blob: f40aa6ef84d1f4e109263ec59028df614cbe9737 [file] [log] [blame]
cerioncee30312004-12-17 20:30:21 +00001
2/*---------------------------------------------------------------*/
sewardj752f9062010-05-03 21:38:49 +00003/*--- begin host_arm_isel.c ---*/
cerioncee30312004-12-17 20:30:21 +00004/*---------------------------------------------------------------*/
5
6/*
sewardj752f9062010-05-03 21:38:49 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
cerioncee30312004-12-17 20:30:21 +00009
sewardje6c53e02011-10-23 07:33:43 +000010 Copyright (C) 2004-2011 OpenWorks LLP
sewardj752f9062010-05-03 21:38:49 +000011 info@open-works.net
sewardj64733c42010-10-12 10:10:46 +000012
13 NEON support is
sewardje6c53e02011-10-23 07:33:43 +000014 Copyright (C) 2010-2011 Samsung Electronics
sewardj64733c42010-10-12 10:10:46 +000015 contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16 and Kirill Batuzov <batuzovk@ispras.ru>
cerioncee30312004-12-17 20:30:21 +000017
sewardj752f9062010-05-03 21:38:49 +000018 This program is free software; you can redistribute it and/or
19 modify it under the terms of the GNU General Public License as
20 published by the Free Software Foundation; either version 2 of the
21 License, or (at your option) any later version.
cerioncee30312004-12-17 20:30:21 +000022
sewardj752f9062010-05-03 21:38:49 +000023 This program is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
27
28 You should have received a copy of the GNU General Public License
29 along with this program; if not, write to the Free Software
30 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
sewardj7bd6ffe2005-08-03 16:07:36 +000031 02110-1301, USA.
32
sewardj752f9062010-05-03 21:38:49 +000033 The GNU General Public License is contained in the file COPYING.
cerioncee30312004-12-17 20:30:21 +000034*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
sewardj6c60b322010-08-22 12:48:28 +000039#include "ir_match.h"
cerioncee30312004-12-17 20:30:21 +000040
sewardjcef7d3e2009-07-02 12:21:59 +000041#include "main_util.h"
42#include "main_globals.h"
43#include "host_generic_regs.h"
sewardje2ea1762010-09-22 00:56:37 +000044#include "host_generic_simd64.h" // for 32-bit SIMD helpers
sewardjcef7d3e2009-07-02 12:21:59 +000045#include "host_arm_defs.h"
cerioncee30312004-12-17 20:30:21 +000046
47
cerioncee30312004-12-17 20:30:21 +000048/*---------------------------------------------------------*/
sewardj6c299f32009-12-31 18:00:12 +000049/*--- ARMvfp control word stuff ---*/
50/*---------------------------------------------------------*/
51
52/* Vex-generated code expects to run with the FPU set as follows: all
53 exceptions masked, round-to-nearest, non-vector mode, with the NZCV
54 flags cleared, and FZ (flush to zero) disabled. Curiously enough,
55 this corresponds to a FPSCR value of zero.
56
57 fpscr should therefore be zero on entry to Vex-generated code, and
58 should be unchanged at exit. (Or at least the bottom 28 bits
59 should be zero).
60*/
61
62#define DEFAULT_FPSCR 0
63
64
65/*---------------------------------------------------------*/
cerioncee30312004-12-17 20:30:21 +000066/*--- ISelEnv ---*/
67/*---------------------------------------------------------*/
68
69/* This carries around:
70
71 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
72 might encounter. This is computed before insn selection starts,
73 and does not change.
74
75 - A mapping from IRTemp to HReg. This tells the insn selector
76 which virtual register(s) are associated with each IRTemp
77 temporary. This is computed before insn selection starts, and
78 does not change. We expect this mapping to map precisely the
79 same set of IRTemps as the type mapping does.
80
81 - vregmap holds the primary register for the IRTemp.
sewardj6c299f32009-12-31 18:00:12 +000082 - vregmapHI is only used for 64-bit integer-typed
83 IRTemps. It holds the identity of a second
84 32-bit virtual HReg, which holds the high half
85 of the value.
86
cerioncee30312004-12-17 20:30:21 +000087 - The code array, that is, the insns selected so far.
88
89 - A counter, for generating new virtual registers.
90
sewardj6c299f32009-12-31 18:00:12 +000091 - The host hardware capabilities word. This is set at the start
92 and does not change.
93
sewardjc6f970f2012-04-02 21:54:49 +000094 - A Bool for indicating whether we may generate chain-me
95 instructions for control flow transfers, or whether we must use
96 XAssisted.
97
98 - The maximum guest address of any guest insn in this block.
99 Actually, the address of the highest-addressed byte from any insn
100 in this block. Is set at the start and does not change. This is
101 used for detecting jumps which are definitely forward-edges from
102 this block, and therefore can be made (chained) to the fast entry
103 point of the destination, thereby avoiding the destination's
104 event check.
105
106 Note, this is all (well, mostly) host-independent.
107*/
cerioncee30312004-12-17 20:30:21 +0000108
109typedef
110 struct {
sewardjc6f970f2012-04-02 21:54:49 +0000111 /* Constant -- are set at the start and do not change. */
cerioncee30312004-12-17 20:30:21 +0000112 IRTypeEnv* type_env;
113
114 HReg* vregmap;
sewardj6c299f32009-12-31 18:00:12 +0000115 HReg* vregmapHI;
cerioncee30312004-12-17 20:30:21 +0000116 Int n_vregmap;
117
sewardj6c299f32009-12-31 18:00:12 +0000118 UInt hwcaps;
sewardjc6f970f2012-04-02 21:54:49 +0000119
120 Bool chainingAllowed;
121 Addr64 max_ga;
122
123 /* These are modified as we go along. */
124 HInstrArray* code;
125 Int vreg_ctr;
cerioncee30312004-12-17 20:30:21 +0000126 }
127 ISelEnv;
128
129static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
130{
131 vassert(tmp >= 0);
132 vassert(tmp < env->n_vregmap);
133 return env->vregmap[tmp];
134}
135
sewardj6c299f32009-12-31 18:00:12 +0000136static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
137{
138 vassert(tmp >= 0);
139 vassert(tmp < env->n_vregmap);
140 vassert(env->vregmapHI[tmp] != INVALID_HREG);
141 *vrLO = env->vregmap[tmp];
142 *vrHI = env->vregmapHI[tmp];
143}
144
cerioncee30312004-12-17 20:30:21 +0000145static void addInstr ( ISelEnv* env, ARMInstr* instr )
146{
147 addHInstr(env->code, instr);
148 if (vex_traceflags & VEX_TRACE_VCODE) {
149 ppARMInstr(instr);
150 vex_printf("\n");
151 }
sewardj6c60b322010-08-22 12:48:28 +0000152#if 0
153 if (instr->tag == ARMin_NUnary || instr->tag == ARMin_NBinary
154 || instr->tag == ARMin_NUnaryS || instr->tag == ARMin_NBinaryS
155 || instr->tag == ARMin_NDual || instr->tag == ARMin_NShift) {
156 ppARMInstr(instr);
157 vex_printf("\n");
158 }
159#endif
cerioncee30312004-12-17 20:30:21 +0000160}
161
162static HReg newVRegI ( ISelEnv* env )
163{
164 HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
165 env->vreg_ctr++;
166 return reg;
167}
168
sewardj6c299f32009-12-31 18:00:12 +0000169static HReg newVRegD ( ISelEnv* env )
170{
171 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
172 env->vreg_ctr++;
173 return reg;
174}
175
176static HReg newVRegF ( ISelEnv* env )
177{
178 HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/);
179 env->vreg_ctr++;
180 return reg;
181}
cerioncee30312004-12-17 20:30:21 +0000182
sewardj6c60b322010-08-22 12:48:28 +0000183static HReg newVRegV ( ISelEnv* env )
184{
185 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
186 env->vreg_ctr++;
187 return reg;
188}
189
190/* These are duplicated in guest_arm_toIR.c */
191static IRExpr* unop ( IROp op, IRExpr* a )
192{
193 return IRExpr_Unop(op, a);
194}
195
196static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
197{
198 return IRExpr_Binop(op, a1, a2);
199}
200
sewardj6c60b322010-08-22 12:48:28 +0000201static IRExpr* bind ( Int binder )
202{
203 return IRExpr_Binder(binder);
204}
205
cerioncee30312004-12-17 20:30:21 +0000206
207/*---------------------------------------------------------*/
208/*--- ISEL: Forward declarations ---*/
209/*---------------------------------------------------------*/
210
211/* These are organised as iselXXX and iselXXX_wrk pairs. The
212 iselXXX_wrk do the real work, but are not to be called directly.
213 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
214 checks that all returned registers are virtual. You should not
215 call the _wrk version directly.
216*/
sewardj6c299f32009-12-31 18:00:12 +0000217static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
218static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000219
sewardj6c299f32009-12-31 18:00:12 +0000220static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
221static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000222
sewardj6c299f32009-12-31 18:00:12 +0000223static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
224static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000225
sewardjff7f5b72011-07-11 11:43:38 +0000226static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
227static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e );
sewardj6c60b322010-08-22 12:48:28 +0000228
sewardj6c299f32009-12-31 18:00:12 +0000229static ARMRI84* iselIntExpr_RI84_wrk
230 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
231static ARMRI84* iselIntExpr_RI84
232 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000233
sewardj6c299f32009-12-31 18:00:12 +0000234static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e );
235static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000236
sewardj6c299f32009-12-31 18:00:12 +0000237static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
238static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000239
sewardj6c299f32009-12-31 18:00:12 +0000240static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
241static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
242
243static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
244 ISelEnv* env, IRExpr* e );
245static void iselInt64Expr ( HReg* rHi, HReg* rLo,
246 ISelEnv* env, IRExpr* e );
247
248static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
249static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
250
251static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
252static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000253
sewardj6c60b322010-08-22 12:48:28 +0000254static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e );
255static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e );
256
257static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e );
258static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000259
260/*---------------------------------------------------------*/
261/*--- ISEL: Misc helpers ---*/
262/*---------------------------------------------------------*/
sewardj6c299f32009-12-31 18:00:12 +0000263
264static UInt ROR32 ( UInt x, UInt sh ) {
265 vassert(sh >= 0 && sh < 32);
266 if (sh == 0)
267 return x;
268 else
269 return (x << (32-sh)) | (x >> sh);
cerioncee30312004-12-17 20:30:21 +0000270}
sewardj6c299f32009-12-31 18:00:12 +0000271
272/* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
273 form, and if so return the components. */
274static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
275{
276 UInt i;
277 for (i = 0; i < 16; i++) {
278 if (0 == (u & 0xFFFFFF00)) {
279 *u8 = u;
280 *u4 = i;
281 return True;
282 }
283 u = ROR32(u, 30);
284 }
285 vassert(i == 16);
286 return False;
287}
cerioncee30312004-12-17 20:30:21 +0000288
289/* Make a int reg-reg move. */
sewardj6c299f32009-12-31 18:00:12 +0000290static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
cerioncee30312004-12-17 20:30:21 +0000291{
292 vassert(hregClass(src) == HRcInt32);
293 vassert(hregClass(dst) == HRcInt32);
sewardj6c299f32009-12-31 18:00:12 +0000294 return ARMInstr_Mov(dst, ARMRI84_R(src));
cerioncee30312004-12-17 20:30:21 +0000295}
296
sewardj6c299f32009-12-31 18:00:12 +0000297/* Set the VFP unit's rounding mode to default (round to nearest). */
298static void set_VFP_rounding_default ( ISelEnv* env )
cerioncee30312004-12-17 20:30:21 +0000299{
sewardj6c299f32009-12-31 18:00:12 +0000300 /* mov rTmp, #DEFAULT_FPSCR
301 fmxr fpscr, rTmp
302 */
303 HReg rTmp = newVRegI(env);
304 addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
305 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
cerioncee30312004-12-17 20:30:21 +0000306}
307
sewardj6c299f32009-12-31 18:00:12 +0000308/* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
309 expression denoting a value in the range 0 .. 3, indicating a round
310 mode encoded as per type IRRoundingMode. Set FPSCR to have the
311 same rounding.
312*/
313static
314void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
cerioncee30312004-12-17 20:30:21 +0000315{
sewardj6c299f32009-12-31 18:00:12 +0000316 /* This isn't simple, because 'mode' carries an IR rounding
317 encoding, and we need to translate that to an ARMvfp one:
318 The IR encoding:
319 00 to nearest (the default)
320 10 to +infinity
321 01 to -infinity
322 11 to zero
323 The ARMvfp encoding:
324 00 to nearest
325 01 to +infinity
326 10 to -infinity
327 11 to zero
328 Easy enough to do; just swap the two bits.
329 */
330 HReg irrm = iselIntExpr_R(env, mode);
331 HReg tL = newVRegI(env);
332 HReg tR = newVRegI(env);
333 HReg t3 = newVRegI(env);
334 /* tL = irrm << 1;
335 tR = irrm >> 1; if we're lucky, these will issue together
336 tL &= 2;
337 tR &= 1; ditto
338 t3 = tL | tR;
339 t3 <<= 22;
340 fmxr fpscr, t3
341 */
342 addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
343 addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
344 addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
345 addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
346 addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
347 addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
348 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
cerioncee30312004-12-17 20:30:21 +0000349}
cerioncee30312004-12-17 20:30:21 +0000350
cerioncee30312004-12-17 20:30:21 +0000351
sewardj6c299f32009-12-31 18:00:12 +0000352/*---------------------------------------------------------*/
353/*--- ISEL: Function call helpers ---*/
354/*---------------------------------------------------------*/
cerioncee30312004-12-17 20:30:21 +0000355
cerioncee30312004-12-17 20:30:21 +0000356/* Used only in doHelperCall. See big comment in doHelperCall re
sewardj6c299f32009-12-31 18:00:12 +0000357 handling of register-parameter args. This function figures out
358 whether evaluation of an expression might require use of a fixed
359 register. If in doubt return True (safe but suboptimal).
cerioncee30312004-12-17 20:30:21 +0000360*/
361static
362Bool mightRequireFixedRegs ( IRExpr* e )
363{
364 switch (e->tag) {
sewardj6c299f32009-12-31 18:00:12 +0000365 case Iex_RdTmp: case Iex_Const: case Iex_Get:
366 return False;
367 default:
368 return True;
cerioncee30312004-12-17 20:30:21 +0000369 }
370}
sewardj6c299f32009-12-31 18:00:12 +0000371
cerioncee30312004-12-17 20:30:21 +0000372
373/* Do a complete function call. guard is a Ity_Bit expression
374 indicating whether or not the call happens. If guard==NULL, the
sewardj6c299f32009-12-31 18:00:12 +0000375 call is unconditional. Returns True iff it managed to handle this
376 combination of arg/return types, else returns False. */
cerioncee30312004-12-17 20:30:21 +0000377
378static
sewardj6c299f32009-12-31 18:00:12 +0000379Bool doHelperCall ( ISelEnv* env,
380 Bool passBBP,
cerioncee30312004-12-17 20:30:21 +0000381 IRExpr* guard, IRCallee* cee, IRExpr** args )
382{
cerioncee30312004-12-17 20:30:21 +0000383 ARMCondCode cc;
sewardj6c299f32009-12-31 18:00:12 +0000384 HReg argregs[ARM_N_ARGREGS];
385 HReg tmpregs[ARM_N_ARGREGS];
386 Bool go_fast;
387 Int n_args, i, nextArgReg;
388 ULong target;
cerioncee30312004-12-17 20:30:21 +0000389
sewardj6c299f32009-12-31 18:00:12 +0000390 vassert(ARM_N_ARGREGS == 4);
cerioncee30312004-12-17 20:30:21 +0000391
sewardj6c299f32009-12-31 18:00:12 +0000392 /* Marshal args for a call and do the call.
cerioncee30312004-12-17 20:30:21 +0000393
sewardj6c299f32009-12-31 18:00:12 +0000394 If passBBP is True, r8 (the baseblock pointer) is to be passed
395 as the first arg.
cerioncee30312004-12-17 20:30:21 +0000396
sewardj6c299f32009-12-31 18:00:12 +0000397 This function only deals with a tiny set of possibilities, which
398 cover all helpers in practice. The restrictions are that only
399 arguments in registers are supported, hence only ARM_N_REGPARMS
400 x 32 integer bits in total can be passed. In fact the only
401 supported arg types are I32 and I64.
cerioncee30312004-12-17 20:30:21 +0000402
sewardj6c299f32009-12-31 18:00:12 +0000403 Generating code which is both efficient and correct when
404 parameters are to be passed in registers is difficult, for the
405 reasons elaborated in detail in comments attached to
406 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
407 of the method described in those comments.
cerioncee30312004-12-17 20:30:21 +0000408
sewardj6c299f32009-12-31 18:00:12 +0000409 The problem is split into two cases: the fast scheme and the
410 slow scheme. In the fast scheme, arguments are computed
411 directly into the target (real) registers. This is only safe
412 when we can be sure that computation of each argument will not
413 trash any real registers set by computation of any other
414 argument.
cerioncee30312004-12-17 20:30:21 +0000415
sewardj6c299f32009-12-31 18:00:12 +0000416 In the slow scheme, all args are first computed into vregs, and
417 once they are all done, they are moved to the relevant real
418 regs. This always gives correct code, but it also gives a bunch
419 of vreg-to-rreg moves which are usually redundant but are hard
420 for the register allocator to get rid of.
421
422 To decide which scheme to use, all argument expressions are
423 first examined. If they are all so simple that it is clear they
424 will be evaluated without use of any fixed registers, use the
425 fast scheme, else use the slow scheme. Note also that only
426 unconditional calls may use the fast scheme, since having to
427 compute a condition expression could itself trash real
428 registers.
cerioncee30312004-12-17 20:30:21 +0000429
430 Note this requires being able to examine an expression and
431 determine whether or not evaluation of it might use a fixed
sewardj6c299f32009-12-31 18:00:12 +0000432 register. That requires knowledge of how the rest of this insn
433 selector works. Currently just the following 3 are regarded as
434 safe -- hopefully they cover the majority of arguments in
435 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
cerioncee30312004-12-17 20:30:21 +0000436 */
cerioncee30312004-12-17 20:30:21 +0000437
sewardj6c299f32009-12-31 18:00:12 +0000438 /* Note that the cee->regparms field is meaningless on ARM hosts
439 (since there is only one calling convention) and so we always
440 ignore it. */
cerioncee30312004-12-17 20:30:21 +0000441
sewardj6c299f32009-12-31 18:00:12 +0000442 n_args = 0;
443 for (i = 0; args[i]; i++)
444 n_args++;
cerioncee30312004-12-17 20:30:21 +0000445
sewardj6c299f32009-12-31 18:00:12 +0000446 argregs[0] = hregARM_R0();
447 argregs[1] = hregARM_R1();
448 argregs[2] = hregARM_R2();
449 argregs[3] = hregARM_R3();
cerioncee30312004-12-17 20:30:21 +0000450
sewardj6c299f32009-12-31 18:00:12 +0000451 tmpregs[0] = tmpregs[1] = tmpregs[2] =
452 tmpregs[3] = INVALID_HREG;
cerioncee30312004-12-17 20:30:21 +0000453
sewardj6c299f32009-12-31 18:00:12 +0000454 /* First decide which scheme (slow or fast) is to be used. First
455 assume the fast scheme, and select slow if any contraindications
456 (wow) appear. */
457
458 go_fast = True;
459
460 if (guard) {
461 if (guard->tag == Iex_Const
462 && guard->Iex.Const.con->tag == Ico_U1
463 && guard->Iex.Const.con->Ico.U1 == True) {
464 /* unconditional */
465 } else {
466 /* Not manifestly unconditional -- be conservative. */
467 go_fast = False;
468 }
cerioncee30312004-12-17 20:30:21 +0000469 }
470
sewardj6c299f32009-12-31 18:00:12 +0000471 if (go_fast) {
472 for (i = 0; i < n_args; i++) {
cerioncee30312004-12-17 20:30:21 +0000473 if (mightRequireFixedRegs(args[i])) {
sewardj6c299f32009-12-31 18:00:12 +0000474 go_fast = False;
cerioncee30312004-12-17 20:30:21 +0000475 break;
476 }
477 }
sewardj6c299f32009-12-31 18:00:12 +0000478 }
479 /* At this point the scheme to use has been established. Generate
480 code to get the arg values into the argument rregs. If we run
481 out of arg regs, give up. */
cerioncee30312004-12-17 20:30:21 +0000482
sewardj6c299f32009-12-31 18:00:12 +0000483 if (go_fast) {
cerioncee30312004-12-17 20:30:21 +0000484
sewardj6c299f32009-12-31 18:00:12 +0000485 /* FAST SCHEME */
486 nextArgReg = 0;
cerioncee30312004-12-17 20:30:21 +0000487 if (passBBP) {
sewardj6c299f32009-12-31 18:00:12 +0000488 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
489 hregARM_R8() ));
490 nextArgReg++;
cerioncee30312004-12-17 20:30:21 +0000491 }
492
sewardj6c299f32009-12-31 18:00:12 +0000493 for (i = 0; i < n_args; i++) {
494 IRType aTy = typeOfIRExpr(env->type_env, args[i]);
495 if (nextArgReg >= ARM_N_ARGREGS)
496 return False; /* out of argregs */
497 if (aTy == Ity_I32) {
498 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
499 iselIntExpr_R(env, args[i]) ));
500 nextArgReg++;
501 }
502 else if (aTy == Ity_I64) {
503 /* 64-bit args must be passed in an a reg-pair of the form
504 n:n+1, where n is even. Hence either r0:r1 or r2:r3.
505 On a little-endian host, the less significant word is
506 passed in the lower-numbered register. */
507 if (nextArgReg & 1) {
508 if (nextArgReg >= ARM_N_ARGREGS)
509 return False; /* out of argregs */
510 addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
511 nextArgReg++;
512 }
513 if (nextArgReg >= ARM_N_ARGREGS)
514 return False; /* out of argregs */
515 HReg raHi, raLo;
516 iselInt64Expr(&raHi, &raLo, env, args[i]);
517 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
518 nextArgReg++;
519 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
520 nextArgReg++;
521 }
522 else
523 return False; /* unhandled arg type */
524 }
525
526 /* Fast scheme only applies for unconditional calls. Hence: */
527 cc = ARMcc_AL;
cerioncee30312004-12-17 20:30:21 +0000528
529 } else {
530
sewardj6c299f32009-12-31 18:00:12 +0000531 /* SLOW SCHEME; move via temporaries */
532 nextArgReg = 0;
533
cerioncee30312004-12-17 20:30:21 +0000534 if (passBBP) {
sewardj6c299f32009-12-31 18:00:12 +0000535 /* This is pretty stupid; better to move directly to r0
536 after the rest of the args are done. */
537 tmpregs[nextArgReg] = newVRegI(env);
538 addInstr(env, mk_iMOVds_RR( tmpregs[nextArgReg],
539 hregARM_R8() ));
540 nextArgReg++;
541 }
542
543 for (i = 0; i < n_args; i++) {
544 IRType aTy = typeOfIRExpr(env->type_env, args[i]);
545 if (nextArgReg >= ARM_N_ARGREGS)
546 return False; /* out of argregs */
547 if (aTy == Ity_I32) {
548 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
549 nextArgReg++;
550 }
551 else if (aTy == Ity_I64) {
552 /* Same comment applies as in the Fast-scheme case. */
553 if (nextArgReg & 1)
554 nextArgReg++;
555 if (nextArgReg + 1 >= ARM_N_ARGREGS)
556 return False; /* out of argregs */
557 HReg raHi, raLo;
558 iselInt64Expr(&raHi, &raLo, env, args[i]);
559 tmpregs[nextArgReg] = raLo;
560 nextArgReg++;
561 tmpregs[nextArgReg] = raHi;
562 nextArgReg++;
563 }
564 }
565
566 /* Now we can compute the condition. We can't do it earlier
567 because the argument computations could trash the condition
568 codes. Be a bit clever to handle the common case where the
569 guard is 1:Bit. */
570 cc = ARMcc_AL;
571 if (guard) {
572 if (guard->tag == Iex_Const
573 && guard->Iex.Const.con->tag == Ico_U1
574 && guard->Iex.Const.con->Ico.U1 == True) {
575 /* unconditional -- do nothing */
576 } else {
577 cc = iselCondCode( env, guard );
578 }
579 }
580
581 /* Move the args to their final destinations. */
582 for (i = 0; i < nextArgReg; i++) {
583 if (tmpregs[i] == INVALID_HREG) { // Skip invalid regs
584 addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
585 continue;
586 }
587 /* None of these insns, including any spill code that might
588 be generated, may alter the condition codes. */
589 addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
cerioncee30312004-12-17 20:30:21 +0000590 }
591
592 }
593
sewardj6c299f32009-12-31 18:00:12 +0000594 /* Should be assured by checks above */
595 vassert(nextArgReg <= ARM_N_ARGREGS);
cerioncee30312004-12-17 20:30:21 +0000596
sewardj6c299f32009-12-31 18:00:12 +0000597 target = (HWord)Ptr_to_ULong(cee->addr);
cerioncee30312004-12-17 20:30:21 +0000598
sewardj6c299f32009-12-31 18:00:12 +0000599 /* nextArgReg doles out argument registers. Since these are
600 assigned in the order r0, r1, r2, r3, its numeric value at this
601 point, which must be between 0 and 4 inclusive, is going to be
602 equal to the number of arg regs in use for the call. Hence bake
603 that number into the call (we'll need to know it when doing
604 register allocation, to know what regs the call reads.)
cerioncee30312004-12-17 20:30:21 +0000605
sewardj6c299f32009-12-31 18:00:12 +0000606 There is a bit of a twist -- harmless but worth recording.
607 Suppose the arg types are (Ity_I32, Ity_I64). Then we will have
608 the first arg in r0 and the second in r3:r2, but r1 isn't used.
609 We nevertheless have nextArgReg==4 and bake that into the call
610 instruction. This will mean the register allocator wil believe
611 this insn reads r1 when in fact it doesn't. But that's
612 harmless; it just artificially extends the live range of r1
613 unnecessarily. The best fix would be to put into the
614 instruction, a bitmask indicating which of r0/1/2/3 carry live
615 values. But that's too much hassle. */
cerioncee30312004-12-17 20:30:21 +0000616
sewardj6c299f32009-12-31 18:00:12 +0000617 /* Finally, the call itself. */
618 addInstr(env, ARMInstr_Call( cc, target, nextArgReg ));
cerioncee30312004-12-17 20:30:21 +0000619
sewardj6c299f32009-12-31 18:00:12 +0000620 return True; /* success */
cerioncee30312004-12-17 20:30:21 +0000621}
622
623
624/*---------------------------------------------------------*/
sewardj6c299f32009-12-31 18:00:12 +0000625/*--- ISEL: Integer expressions (32/16/8 bit) ---*/
cerioncee30312004-12-17 20:30:21 +0000626/*---------------------------------------------------------*/
627
sewardj6c299f32009-12-31 18:00:12 +0000628/* Select insns for an integer-typed expression, and add them to the
629 code list. Return a reg holding the result. This reg will be a
630 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
631 want to modify it, ask for a new vreg, copy it in there, and modify
632 the copy. The register allocator will do its best to map both
633 vregs to the same real register, so the copies will often disappear
634 later in the game.
cerioncee30312004-12-17 20:30:21 +0000635
sewardj6c299f32009-12-31 18:00:12 +0000636 This should handle expressions of 32, 16 and 8-bit type. All
637 results are returned in a 32-bit register. For 16- and 8-bit
638 expressions, the upper 16/24 bits are arbitrary, so you should mask
639 or sign extend partial values if necessary.
cerioncee30312004-12-17 20:30:21 +0000640*/
641
sewardj6c299f32009-12-31 18:00:12 +0000642/* --------------------- AMode1 --------------------- */
643
644/* Return an AMode1 which computes the value of the specified
645 expression, possibly also adding insns to the code list as a
646 result. The expression may only be a 32-bit one.
647*/
cerioncee30312004-12-17 20:30:21 +0000648
649static Bool sane_AMode1 ( ARMAMode1* am )
650{
sewardj6c299f32009-12-31 18:00:12 +0000651 switch (am->tag) {
652 case ARMam1_RI:
653 return
654 toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
655 && (hregIsVirtual(am->ARMam1.RI.reg)
656 || am->ARMam1.RI.reg == hregARM_R8())
657 && am->ARMam1.RI.simm13 >= -4095
658 && am->ARMam1.RI.simm13 <= 4095 );
659 case ARMam1_RRS:
660 return
661 toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
662 && hregIsVirtual(am->ARMam1.RRS.base)
663 && hregClass(am->ARMam1.RRS.index) == HRcInt32
664 && hregIsVirtual(am->ARMam1.RRS.index)
665 && am->ARMam1.RRS.shift >= 0
666 && am->ARMam1.RRS.shift <= 3 );
667 default:
668 vpanic("sane_AMode: unknown ARM AMode1 tag");
669 }
cerioncee30312004-12-17 20:30:21 +0000670}
671
672static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
673{
sewardj6c299f32009-12-31 18:00:12 +0000674 ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
675 vassert(sane_AMode1(am));
676 return am;
cerioncee30312004-12-17 20:30:21 +0000677}
678
cerioncee30312004-12-17 20:30:21 +0000679static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
680{
sewardj6c299f32009-12-31 18:00:12 +0000681 IRType ty = typeOfIRExpr(env->type_env,e);
682 vassert(ty == Ity_I32);
cerioncee30312004-12-17 20:30:21 +0000683
sewardj6c299f32009-12-31 18:00:12 +0000684 /* FIXME: add RRS matching */
cerioncee30312004-12-17 20:30:21 +0000685
sewardj6c299f32009-12-31 18:00:12 +0000686 /* {Add32,Sub32}(expr,simm13) */
687 if (e->tag == Iex_Binop
688 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
689 && e->Iex.Binop.arg2->tag == Iex_Const
690 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
691 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
692 if (simm >= -4095 && simm <= 4095) {
693 HReg reg;
694 if (e->Iex.Binop.op == Iop_Sub32)
695 simm = -simm;
696 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
697 return ARMAMode1_RI(reg, simm);
698 }
699 }
cerioncee30312004-12-17 20:30:21 +0000700
sewardj6c299f32009-12-31 18:00:12 +0000701 /* Doesn't match anything in particular. Generate it into
702 a register and use that. */
703 {
704 HReg reg = iselIntExpr_R(env, e);
705 return ARMAMode1_RI(reg, 0);
706 }
707
cerioncee30312004-12-17 20:30:21 +0000708}
709
710
sewardj6c299f32009-12-31 18:00:12 +0000711/* --------------------- AMode2 --------------------- */
cerioncee30312004-12-17 20:30:21 +0000712
sewardj6c299f32009-12-31 18:00:12 +0000713/* Return an AMode2 which computes the value of the specified
714 expression, possibly also adding insns to the code list as a
715 result. The expression may only be a 32-bit one.
716*/
cerioncee30312004-12-17 20:30:21 +0000717
718static Bool sane_AMode2 ( ARMAMode2* am )
719{
720 switch (am->tag) {
sewardj6c299f32009-12-31 18:00:12 +0000721 case ARMam2_RI:
722 return
723 toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
724 && hregIsVirtual(am->ARMam2.RI.reg)
725 && am->ARMam2.RI.simm9 >= -255
726 && am->ARMam2.RI.simm9 <= 255 );
727 case ARMam2_RR:
728 return
729 toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
730 && hregIsVirtual(am->ARMam2.RR.base)
731 && hregClass(am->ARMam2.RR.index) == HRcInt32
732 && hregIsVirtual(am->ARMam2.RR.index) );
733 default:
734 vpanic("sane_AMode: unknown ARM AMode2 tag");
cerioncee30312004-12-17 20:30:21 +0000735 }
736}
737
sewardj6c299f32009-12-31 18:00:12 +0000738static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
739{
740 ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
741 vassert(sane_AMode2(am));
742 return am;
743}
744
cerioncee30312004-12-17 20:30:21 +0000745static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
746{
sewardj6c299f32009-12-31 18:00:12 +0000747 IRType ty = typeOfIRExpr(env->type_env,e);
748 vassert(ty == Ity_I32);
749
750 /* FIXME: add RR matching */
751
752 /* {Add32,Sub32}(expr,simm8) */
753 if (e->tag == Iex_Binop
754 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
755 && e->Iex.Binop.arg2->tag == Iex_Const
756 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
757 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
758 if (simm >= -255 && simm <= 255) {
759 HReg reg;
760 if (e->Iex.Binop.op == Iop_Sub32)
761 simm = -simm;
762 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
763 return ARMAMode2_RI(reg, simm);
764 }
765 }
766
767 /* Doesn't match anything in particular. Generate it into
768 a register and use that. */
769 {
770 HReg reg = iselIntExpr_R(env, e);
771 return ARMAMode2_RI(reg, 0);
772 }
773
cerioncee30312004-12-17 20:30:21 +0000774}
sewardj6c299f32009-12-31 18:00:12 +0000775
776
777/* --------------------- AModeV --------------------- */
778
779/* Return an AModeV which computes the value of the specified
780 expression, possibly also adding insns to the code list as a
781 result. The expression may only be a 32-bit one.
sewardj48b279b2007-11-16 12:43:32 +0000782*/
cerioncee30312004-12-17 20:30:21 +0000783
sewardj6c299f32009-12-31 18:00:12 +0000784static Bool sane_AModeV ( ARMAModeV* am )
785{
786 return toBool( hregClass(am->reg) == HRcInt32
787 && hregIsVirtual(am->reg)
788 && am->simm11 >= -1020 && am->simm11 <= 1020
789 && 0 == (am->simm11 & 3) );
cerioncee30312004-12-17 20:30:21 +0000790}
791
sewardj6c299f32009-12-31 18:00:12 +0000792static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
cerioncee30312004-12-17 20:30:21 +0000793{
sewardj6c299f32009-12-31 18:00:12 +0000794 ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
795 vassert(sane_AModeV(am));
796 return am;
797}
798
799static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
800{
801 IRType ty = typeOfIRExpr(env->type_env,e);
802 vassert(ty == Ity_I32);
803
804 /* {Add32,Sub32}(expr, simm8 << 2) */
805 if (e->tag == Iex_Binop
806 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
807 && e->Iex.Binop.arg2->tag == Iex_Const
808 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
809 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
810 if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
811 HReg reg;
812 if (e->Iex.Binop.op == Iop_Sub32)
813 simm = -simm;
814 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
815 return mkARMAModeV(reg, simm);
816 }
cerioncee30312004-12-17 20:30:21 +0000817 }
sewardj6c299f32009-12-31 18:00:12 +0000818
819 /* Doesn't match anything in particular. Generate it into
820 a register and use that. */
821 {
822 HReg reg = iselIntExpr_R(env, e);
823 return mkARMAModeV(reg, 0);
824 }
825
cerioncee30312004-12-17 20:30:21 +0000826}
827
sewardj6c60b322010-08-22 12:48:28 +0000828/* -------------------- AModeN -------------------- */
829
830static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
831{
832 return iselIntExpr_AModeN_wrk(env, e);
833}
834
835static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
836{
837 HReg reg = iselIntExpr_R(env, e);
838 return mkARMAModeN_R(reg);
839}
840
sewardj6c299f32009-12-31 18:00:12 +0000841
842/* --------------------- RI84 --------------------- */
843
844/* Select instructions to generate 'e' into a RI84. If mayInv is
845 true, then the caller will also accept an I84 form that denotes
846 'not e'. In this case didInv may not be NULL, and *didInv is set
847 to True. This complication is so as to allow generation of an RI84
848 which is suitable for use in either an AND or BIC instruction,
849 without knowing (before this call) which one.
850*/
851static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
852 ISelEnv* env, IRExpr* e )
cerioncee30312004-12-17 20:30:21 +0000853{
sewardj6c299f32009-12-31 18:00:12 +0000854 ARMRI84* ri;
855 if (mayInv)
856 vassert(didInv != NULL);
857 ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
858 /* sanity checks ... */
859 switch (ri->tag) {
860 case ARMri84_I84:
861 return ri;
862 case ARMri84_R:
863 vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
864 vassert(hregIsVirtual(ri->ARMri84.R.reg));
865 return ri;
866 default:
867 vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
cerioncee30312004-12-17 20:30:21 +0000868 }
869}
870
871/* DO NOT CALL THIS DIRECTLY ! */
sewardj6c299f32009-12-31 18:00:12 +0000872static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
873 ISelEnv* env, IRExpr* e )
cerioncee30312004-12-17 20:30:21 +0000874{
sewardj6c299f32009-12-31 18:00:12 +0000875 IRType ty = typeOfIRExpr(env->type_env,e);
876 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
cerioncee30312004-12-17 20:30:21 +0000877
sewardj6c299f32009-12-31 18:00:12 +0000878 if (didInv) *didInv = False;
879
880 /* special case: immediate */
881 if (e->tag == Iex_Const) {
882 UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
883 switch (e->Iex.Const.con->tag) {
884 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
885 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
886 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
887 default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
888 }
889 if (fitsIn8x4(&u8, &u4, u)) {
890 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
891 }
892 if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
893 vassert(didInv);
894 *didInv = True;
895 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
896 }
897 /* else fail, fall through to default case */
898 }
899
900 /* default case: calculate into a register and return that */
901 {
902 HReg r = iselIntExpr_R ( env, e );
903 return ARMRI84_R(r);
904 }
cerioncee30312004-12-17 20:30:21 +0000905}
906
907
sewardj6c299f32009-12-31 18:00:12 +0000908/* --------------------- RI5 --------------------- */
909
910/* Select instructions to generate 'e' into a RI5. */
911
912static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
913{
914 ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
915 /* sanity checks ... */
916 switch (ri->tag) {
917 case ARMri5_I5:
918 return ri;
919 case ARMri5_R:
920 vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
921 vassert(hregIsVirtual(ri->ARMri5.R.reg));
922 return ri;
923 default:
924 vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
925 }
926}
927
928/* DO NOT CALL THIS DIRECTLY ! */
929static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
930{
931 IRType ty = typeOfIRExpr(env->type_env,e);
932 vassert(ty == Ity_I32 || ty == Ity_I8);
933
934 /* special case: immediate */
935 if (e->tag == Iex_Const) {
936 UInt u; /* both invalid */
937 switch (e->Iex.Const.con->tag) {
938 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
939 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
940 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
941 default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
942 }
943 if (u >= 1 && u <= 31) {
944 return ARMRI5_I5(u);
945 }
946 /* else fail, fall through to default case */
947 }
948
949 /* default case: calculate into a register and return that */
950 {
951 HReg r = iselIntExpr_R ( env, e );
952 return ARMRI5_R(r);
953 }
954}
cerioncee30312004-12-17 20:30:21 +0000955
956
sewardj6c299f32009-12-31 18:00:12 +0000957/* ------------------- CondCode ------------------- */
cerioncee30312004-12-17 20:30:21 +0000958
959/* Generate code to evaluated a bit-typed expression, returning the
960 condition code which would correspond when the expression would
961 notionally have returned 1. */
962
963static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
964{
sewardj6c299f32009-12-31 18:00:12 +0000965 ARMCondCode cc = iselCondCode_wrk(env,e);
sewardj6c60b322010-08-22 12:48:28 +0000966 vassert(cc != ARMcc_NV);
sewardj6c299f32009-12-31 18:00:12 +0000967 return cc;
cerioncee30312004-12-17 20:30:21 +0000968}
969
cerioncee30312004-12-17 20:30:21 +0000970static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
971{
sewardj6c299f32009-12-31 18:00:12 +0000972 vassert(e);
973 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
cerioncee30312004-12-17 20:30:21 +0000974
sewardj6c299f32009-12-31 18:00:12 +0000975 /* var */
976 if (e->tag == Iex_RdTmp) {
977 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
978 /* CmpOrTst doesn't modify rTmp; so this is OK. */
979 ARMRI84* one = ARMRI84_I84(1,0);
980 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
981 return ARMcc_NE;
982 }
983
984 /* Not1(e) */
985 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
986 /* Generate code for the arg, and negate the test condition */
987 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
988 }
989
990 /* --- patterns rooted at: 32to1 --- */
991
992 if (e->tag == Iex_Unop
993 && e->Iex.Unop.op == Iop_32to1) {
994 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
995 ARMRI84* one = ARMRI84_I84(1,0);
996 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
997 return ARMcc_NE;
998 }
999
1000 /* --- patterns rooted at: CmpNEZ8 --- */
1001
1002 if (e->tag == Iex_Unop
1003 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1004 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1005 ARMRI84* xFF = ARMRI84_I84(0xFF,0);
1006 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
1007 return ARMcc_NE;
1008 }
1009
1010 /* --- patterns rooted at: CmpNEZ32 --- */
1011
1012 if (e->tag == Iex_Unop
1013 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1014 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1015 ARMRI84* zero = ARMRI84_I84(0,0);
1016 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
1017 return ARMcc_NE;
1018 }
1019
1020 /* --- patterns rooted at: CmpNEZ64 --- */
1021
1022 if (e->tag == Iex_Unop
1023 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1024 HReg tHi, tLo;
1025 HReg tmp = newVRegI(env);
1026 ARMRI84* zero = ARMRI84_I84(0,0);
1027 iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
1028 addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
1029 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
1030 return ARMcc_NE;
1031 }
1032
1033 /* --- Cmp*32*(x,y) --- */
1034 if (e->tag == Iex_Binop
1035 && (e->Iex.Binop.op == Iop_CmpEQ32
1036 || e->Iex.Binop.op == Iop_CmpNE32
1037 || e->Iex.Binop.op == Iop_CmpLT32S
1038 || e->Iex.Binop.op == Iop_CmpLT32U
1039 || e->Iex.Binop.op == Iop_CmpLE32S
1040 || e->Iex.Binop.op == Iop_CmpLE32U)) {
1041 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1042 ARMRI84* argR = iselIntExpr_RI84(NULL,False,
1043 env, e->Iex.Binop.arg2);
1044 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
1045 switch (e->Iex.Binop.op) {
1046 case Iop_CmpEQ32: return ARMcc_EQ;
1047 case Iop_CmpNE32: return ARMcc_NE;
1048 case Iop_CmpLT32S: return ARMcc_LT;
1049 case Iop_CmpLT32U: return ARMcc_LO;
1050 case Iop_CmpLE32S: return ARMcc_LE;
1051 case Iop_CmpLE32U: return ARMcc_LS;
1052 default: vpanic("iselCondCode(arm): CmpXX32");
1053 }
1054 }
1055
sewardj6c60b322010-08-22 12:48:28 +00001056 /* --- CasCmpEQ* --- */
1057 /* Ist_Cas has a dummy argument to compare with, so comparison is
1058 always true. */
1059 if (e->tag == Iex_Binop
1060 && (e->Iex.Binop.op == Iop_CasCmpEQ32
1061 || e->Iex.Binop.op == Iop_CasCmpEQ16
1062 || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1063 return ARMcc_AL;
1064 }
1065
sewardj6c299f32009-12-31 18:00:12 +00001066 ppIRExpr(e);
1067 vpanic("iselCondCode");
cerioncee30312004-12-17 20:30:21 +00001068}
1069
1070
sewardj6c299f32009-12-31 18:00:12 +00001071/* --------------------- Reg --------------------- */
cerioncee30312004-12-17 20:30:21 +00001072
1073static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1074{
sewardj6c299f32009-12-31 18:00:12 +00001075 HReg r = iselIntExpr_R_wrk(env, e);
1076 /* sanity checks ... */
1077# if 0
1078 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1079# endif
1080 vassert(hregClass(r) == HRcInt32);
1081 vassert(hregIsVirtual(r));
1082 return r;
cerioncee30312004-12-17 20:30:21 +00001083}
1084
1085/* DO NOT CALL THIS DIRECTLY ! */
1086static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1087{
sewardj6c299f32009-12-31 18:00:12 +00001088 IRType ty = typeOfIRExpr(env->type_env,e);
1089 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
sewardj6c60b322010-08-22 12:48:28 +00001090// vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
sewardj6c299f32009-12-31 18:00:12 +00001091
1092 switch (e->tag) {
1093
1094 /* --------- TEMP --------- */
1095 case Iex_RdTmp: {
1096 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1097 }
1098
1099 /* --------- LOAD --------- */
1100 case Iex_Load: {
1101 HReg dst = newVRegI(env);
1102
1103 if (e->Iex.Load.end != Iend_LE)
1104 goto irreducible;
1105
1106 if (ty == Ity_I32) {
1107 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1108 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, dst, amode));
1109 return dst;
1110 }
1111 if (ty == Ity_I16) {
1112 ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
1113 addInstr(env, ARMInstr_LdSt16(True/*isLoad*/, False/*!signedLoad*/,
1114 dst, amode));
1115 return dst;
1116 }
1117 if (ty == Ity_I8) {
1118 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1119 addInstr(env, ARMInstr_LdSt8U(True/*isLoad*/, dst, amode));
1120 return dst;
1121 }
1122
1123//zz if (ty == Ity_I16) {
1124//zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1125//zz return dst;
1126//zz }
1127//zz if (ty == Ity_I8) {
1128//zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1129//zz return dst;
1130//zz }
1131 break;
1132 }
1133
1134//zz /* --------- TERNARY OP --------- */
1135//zz case Iex_Triop: {
florian420bfa92012-06-02 20:29:22 +00001136//zz IRTriop *triop = e->Iex.Triop.details;
sewardj6c299f32009-12-31 18:00:12 +00001137//zz /* C3210 flags following FPU partial remainder (fprem), both
1138//zz IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
florian420bfa92012-06-02 20:29:22 +00001139//zz if (triop->op == Iop_PRemC3210F64
1140//zz || triop->op == Iop_PRem1C3210F64) {
sewardj6c299f32009-12-31 18:00:12 +00001141//zz HReg junk = newVRegF(env);
1142//zz HReg dst = newVRegI(env);
florian420bfa92012-06-02 20:29:22 +00001143//zz HReg srcL = iselDblExpr(env, triop->arg2);
1144//zz HReg srcR = iselDblExpr(env, triop->arg3);
sewardj6c299f32009-12-31 18:00:12 +00001145//zz /* XXXROUNDINGFIXME */
1146//zz /* set roundingmode here */
1147//zz addInstr(env, X86Instr_FpBinary(
1148//zz e->Iex.Binop.op==Iop_PRemC3210F64
1149//zz ? Xfp_PREM : Xfp_PREM1,
1150//zz srcL,srcR,junk
1151//zz ));
1152//zz /* The previous pseudo-insn will have left the FPU's C3210
1153//zz flags set correctly. So bag them. */
1154//zz addInstr(env, X86Instr_FpStSW_AX());
1155//zz addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1156//zz addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1157//zz return dst;
1158//zz }
1159//zz
1160//zz break;
1161//zz }
1162
1163 /* --------- BINARY OP --------- */
1164 case Iex_Binop: {
1165
1166 ARMAluOp aop = 0; /* invalid */
1167 ARMShiftOp sop = 0; /* invalid */
1168
1169 /* ADD/SUB/AND/OR/XOR */
1170 switch (e->Iex.Binop.op) {
1171 case Iop_And32: {
1172 Bool didInv = False;
1173 HReg dst = newVRegI(env);
1174 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1175 ARMRI84* argR = iselIntExpr_RI84(&didInv, True/*mayInv*/,
1176 env, e->Iex.Binop.arg2);
1177 addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
1178 dst, argL, argR));
1179 return dst;
1180 }
1181 case Iop_Or32: aop = ARMalu_OR; goto std_binop;
1182 case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
1183 case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
1184 case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
1185 std_binop: {
1186 HReg dst = newVRegI(env);
1187 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1188 ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
1189 env, e->Iex.Binop.arg2);
1190 addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
1191 return dst;
1192 }
1193 default: break;
1194 }
1195
1196 /* SHL/SHR/SAR */
1197 switch (e->Iex.Binop.op) {
1198 case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
1199 case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
1200 case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
1201 sh_binop: {
1202 HReg dst = newVRegI(env);
1203 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1204 ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
1205 addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
1206 vassert(ty == Ity_I32); /* else the IR is ill-typed */
1207 return dst;
1208 }
1209 default: break;
1210 }
1211
1212 /* MUL */
1213 if (e->Iex.Binop.op == Iop_Mul32) {
1214 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1215 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1216 HReg dst = newVRegI(env);
1217 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1218 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1219 addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
1220 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1221 return dst;
1222 }
1223
1224 /* Handle misc other ops. */
1225
1226 if (e->Iex.Binop.op == Iop_Max32U) {
1227 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1228 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1229 HReg dst = newVRegI(env);
sewardj6c60b322010-08-22 12:48:28 +00001230 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
1231 ARMRI84_R(argR)));
sewardj6c299f32009-12-31 18:00:12 +00001232 addInstr(env, mk_iMOVds_RR(dst, argL));
1233 addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
1234 return dst;
1235 }
1236
1237 if (e->Iex.Binop.op == Iop_CmpF64) {
1238 HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
1239 HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
1240 HReg dst = newVRegI(env);
1241 /* Do the compare (FCMPD) and set NZCV in FPSCR. Then also do
1242 FMSTAT, so we can examine the results directly. */
1243 addInstr(env, ARMInstr_VCmpD(dL, dR));
1244 /* Create in dst, the IRCmpF64Result encoded result. */
1245 addInstr(env, ARMInstr_Imm32(dst, 0));
1246 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
1247 addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
1248 addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
1249 addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
1250 return dst;
1251 }
1252
1253 if (e->Iex.Binop.op == Iop_F64toI32S
1254 || e->Iex.Binop.op == Iop_F64toI32U) {
1255 /* Wretched uglyness all round, due to having to deal
1256 with rounding modes. Oh well. */
1257 /* FIXME: if arg1 is a constant indicating round-to-zero,
1258 then we could skip all this arsing around with FPSCR and
1259 simply emit FTO{S,U}IZD. */
1260 Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
1261 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
1262 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
1263 /* FTO{S,U}ID valF, valD */
1264 HReg valF = newVRegF(env);
1265 addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
1266 valF, valD));
1267 set_VFP_rounding_default(env);
1268 /* VMOV dst, valF */
1269 HReg dst = newVRegI(env);
1270 addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
1271 return dst;
1272 }
1273
sewardj6c60b322010-08-22 12:48:28 +00001274 if (e->Iex.Binop.op == Iop_GetElem8x8
1275 || e->Iex.Binop.op == Iop_GetElem16x4
1276 || e->Iex.Binop.op == Iop_GetElem32x2) {
1277 HReg res = newVRegI(env);
florian1d0da842012-06-01 22:04:27 +00001278 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
sewardj6c60b322010-08-22 12:48:28 +00001279 UInt index, size;
1280 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1281 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1282 vpanic("ARM target supports GetElem with constant "
1283 "second argument only\n");
1284 }
1285 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1286 switch (e->Iex.Binop.op) {
1287 case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1288 case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1289 case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1290 default: vassert(0);
1291 }
1292 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1293 mkARMNRS(ARMNRS_Reg, res, 0),
1294 mkARMNRS(ARMNRS_Scalar, arg, index),
1295 size, False));
1296 return res;
1297 }
1298
1299 if (e->Iex.Binop.op == Iop_GetElem8x16
1300 || e->Iex.Binop.op == Iop_GetElem16x8
1301 || e->Iex.Binop.op == Iop_GetElem32x4) {
1302 HReg res = newVRegI(env);
florian1d0da842012-06-01 22:04:27 +00001303 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
sewardj6c60b322010-08-22 12:48:28 +00001304 UInt index, size;
1305 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1306 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1307 vpanic("ARM target supports GetElem with constant "
1308 "second argument only\n");
1309 }
1310 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1311 switch (e->Iex.Binop.op) {
1312 case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1313 case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1314 case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1315 default: vassert(0);
1316 }
1317 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1318 mkARMNRS(ARMNRS_Reg, res, 0),
1319 mkARMNRS(ARMNRS_Scalar, arg, index),
1320 size, True));
1321 return res;
1322 }
1323
sewardje2ea1762010-09-22 00:56:37 +00001324 /* All cases involving host-side helper calls. */
1325 void* fn = NULL;
1326 switch (e->Iex.Binop.op) {
1327 case Iop_Add16x2:
1328 fn = &h_generic_calc_Add16x2; break;
1329 case Iop_Sub16x2:
1330 fn = &h_generic_calc_Sub16x2; break;
1331 case Iop_HAdd16Ux2:
1332 fn = &h_generic_calc_HAdd16Ux2; break;
1333 case Iop_HAdd16Sx2:
1334 fn = &h_generic_calc_HAdd16Sx2; break;
1335 case Iop_HSub16Ux2:
1336 fn = &h_generic_calc_HSub16Ux2; break;
1337 case Iop_HSub16Sx2:
1338 fn = &h_generic_calc_HSub16Sx2; break;
1339 case Iop_QAdd16Sx2:
1340 fn = &h_generic_calc_QAdd16Sx2; break;
1341 case Iop_QSub16Sx2:
1342 fn = &h_generic_calc_QSub16Sx2; break;
1343 case Iop_Add8x4:
1344 fn = &h_generic_calc_Add8x4; break;
1345 case Iop_Sub8x4:
1346 fn = &h_generic_calc_Sub8x4; break;
1347 case Iop_HAdd8Ux4:
1348 fn = &h_generic_calc_HAdd8Ux4; break;
1349 case Iop_HAdd8Sx4:
1350 fn = &h_generic_calc_HAdd8Sx4; break;
1351 case Iop_HSub8Ux4:
1352 fn = &h_generic_calc_HSub8Ux4; break;
1353 case Iop_HSub8Sx4:
1354 fn = &h_generic_calc_HSub8Sx4; break;
1355 case Iop_QAdd8Sx4:
1356 fn = &h_generic_calc_QAdd8Sx4; break;
1357 case Iop_QAdd8Ux4:
1358 fn = &h_generic_calc_QAdd8Ux4; break;
1359 case Iop_QSub8Sx4:
1360 fn = &h_generic_calc_QSub8Sx4; break;
1361 case Iop_QSub8Ux4:
1362 fn = &h_generic_calc_QSub8Ux4; break;
sewardj310d6b22010-10-18 16:29:40 +00001363 case Iop_Sad8Ux4:
1364 fn = &h_generic_calc_Sad8Ux4; break;
sewardje2ea1762010-09-22 00:56:37 +00001365 default:
1366 break;
1367 }
1368
1369 if (fn) {
1370 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1371 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1372 HReg res = newVRegI(env);
1373 addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
1374 addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
1375 addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 2 ));
1376 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1377 return res;
1378 }
1379
sewardj6c299f32009-12-31 18:00:12 +00001380 break;
1381 }
1382
1383 /* --------- UNARY OP --------- */
1384 case Iex_Unop: {
1385
1386//zz /* 1Uto8(32to1(expr32)) */
1387//zz if (e->Iex.Unop.op == Iop_1Uto8) {
1388//zz DECLARE_PATTERN(p_32to1_then_1Uto8);
1389//zz DEFINE_PATTERN(p_32to1_then_1Uto8,
1390//zz unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1391//zz if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1392//zz IRExpr* expr32 = mi.bindee[0];
1393//zz HReg dst = newVRegI(env);
1394//zz HReg src = iselIntExpr_R(env, expr32);
1395//zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1396//zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1397//zz X86RMI_Imm(1), dst));
1398//zz return dst;
1399//zz }
1400//zz }
1401//zz
1402//zz /* 8Uto32(LDle(expr32)) */
1403//zz if (e->Iex.Unop.op == Iop_8Uto32) {
1404//zz DECLARE_PATTERN(p_LDle8_then_8Uto32);
1405//zz DEFINE_PATTERN(p_LDle8_then_8Uto32,
1406//zz unop(Iop_8Uto32,
1407//zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1408//zz if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1409//zz HReg dst = newVRegI(env);
1410//zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1411//zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1412//zz return dst;
1413//zz }
1414//zz }
1415//zz
1416//zz /* 8Sto32(LDle(expr32)) */
1417//zz if (e->Iex.Unop.op == Iop_8Sto32) {
1418//zz DECLARE_PATTERN(p_LDle8_then_8Sto32);
1419//zz DEFINE_PATTERN(p_LDle8_then_8Sto32,
1420//zz unop(Iop_8Sto32,
1421//zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1422//zz if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1423//zz HReg dst = newVRegI(env);
1424//zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1425//zz addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1426//zz return dst;
1427//zz }
1428//zz }
1429//zz
1430//zz /* 16Uto32(LDle(expr32)) */
1431//zz if (e->Iex.Unop.op == Iop_16Uto32) {
1432//zz DECLARE_PATTERN(p_LDle16_then_16Uto32);
1433//zz DEFINE_PATTERN(p_LDle16_then_16Uto32,
1434//zz unop(Iop_16Uto32,
1435//zz IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1436//zz if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1437//zz HReg dst = newVRegI(env);
1438//zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1439//zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1440//zz return dst;
1441//zz }
1442//zz }
1443//zz
1444//zz /* 8Uto32(GET:I8) */
1445//zz if (e->Iex.Unop.op == Iop_8Uto32) {
1446//zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1447//zz HReg dst;
1448//zz X86AMode* amode;
1449//zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1450//zz dst = newVRegI(env);
1451//zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1452//zz hregX86_EBP());
1453//zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1454//zz return dst;
1455//zz }
1456//zz }
1457//zz
1458//zz /* 16to32(GET:I16) */
1459//zz if (e->Iex.Unop.op == Iop_16Uto32) {
1460//zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1461//zz HReg dst;
1462//zz X86AMode* amode;
1463//zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1464//zz dst = newVRegI(env);
1465//zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1466//zz hregX86_EBP());
1467//zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1468//zz return dst;
1469//zz }
1470//zz }
1471
1472 switch (e->Iex.Unop.op) {
1473 case Iop_8Uto32: {
1474 HReg dst = newVRegI(env);
1475 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1476 addInstr(env, ARMInstr_Alu(ARMalu_AND,
1477 dst, src, ARMRI84_I84(0xFF,0)));
1478 return dst;
1479 }
1480//zz case Iop_8Uto16:
1481//zz case Iop_8Uto32:
1482//zz case Iop_16Uto32: {
1483//zz HReg dst = newVRegI(env);
1484//zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1485//zz UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1486//zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1487//zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1488//zz X86RMI_Imm(mask), dst));
1489//zz return dst;
1490//zz }
1491//zz case Iop_8Sto16:
1492//zz case Iop_8Sto32:
1493 case Iop_16Uto32: {
1494 HReg dst = newVRegI(env);
1495 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1496 ARMRI5* amt = ARMRI5_I5(16);
1497 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1498 addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
1499 return dst;
1500 }
1501 case Iop_8Sto32:
1502 case Iop_16Sto32: {
1503 HReg dst = newVRegI(env);
1504 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1505 ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
1506 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1507 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1508 return dst;
1509 }
1510//zz case Iop_Not8:
1511//zz case Iop_Not16:
1512 case Iop_Not32: {
1513 HReg dst = newVRegI(env);
1514 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1515 addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
1516 return dst;
1517 }
1518 case Iop_64HIto32: {
1519 HReg rHi, rLo;
1520 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1521 return rHi; /* and abandon rLo .. poor wee thing :-) */
1522 }
1523 case Iop_64to32: {
1524 HReg rHi, rLo;
1525 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1526 return rLo; /* similar stupid comment to the above ... */
1527 }
sewardj6c60b322010-08-22 12:48:28 +00001528 case Iop_64to8: {
1529 HReg rHi, rLo;
sewardjc6f970f2012-04-02 21:54:49 +00001530 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00001531 HReg tHi = newVRegI(env);
1532 HReg tLo = newVRegI(env);
1533 HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
1534 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1535 rHi = tHi;
1536 rLo = tLo;
1537 } else {
1538 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1539 }
1540 return rLo;
1541 }
sewardj6c299f32009-12-31 18:00:12 +00001542//zz case Iop_16HIto8:
1543//zz case Iop_32HIto16: {
1544//zz HReg dst = newVRegI(env);
1545//zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1546//zz Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1547//zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1548//zz addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1549//zz return dst;
1550//zz }
1551 case Iop_1Uto32:
1552 case Iop_1Uto8: {
1553 HReg dst = newVRegI(env);
1554 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1555 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1556 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1557 return dst;
1558 }
1559
1560 case Iop_1Sto32: {
1561 HReg dst = newVRegI(env);
1562 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1563 ARMRI5* amt = ARMRI5_I5(31);
1564 /* This is really rough. We could do much better here;
1565 perhaps mvn{cond} dst, #0 as the second insn?
1566 (same applies to 1Sto64) */
1567 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1568 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1569 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1570 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1571 return dst;
1572 }
1573
1574
1575//zz case Iop_1Sto8:
1576//zz case Iop_1Sto16:
1577//zz case Iop_1Sto32: {
1578//zz /* could do better than this, but for now ... */
1579//zz HReg dst = newVRegI(env);
1580//zz X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1581//zz addInstr(env, X86Instr_Set32(cond,dst));
1582//zz addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1583//zz addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1584//zz return dst;
1585//zz }
1586//zz case Iop_Ctz32: {
1587//zz /* Count trailing zeroes, implemented by x86 'bsfl' */
1588//zz HReg dst = newVRegI(env);
1589//zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1590//zz addInstr(env, X86Instr_Bsfr32(True,src,dst));
1591//zz return dst;
1592//zz }
1593 case Iop_Clz32: {
1594 /* Count leading zeroes; easy on ARM. */
1595 HReg dst = newVRegI(env);
1596 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1597 addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
1598 return dst;
1599 }
1600
1601 case Iop_CmpwNEZ32: {
1602 HReg dst = newVRegI(env);
1603 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1604 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1605 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1606 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
1607 return dst;
1608 }
1609
1610 case Iop_Left32: {
1611 HReg dst = newVRegI(env);
1612 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1613 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1614 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1615 return dst;
1616 }
1617
1618//zz case Iop_V128to32: {
1619//zz HReg dst = newVRegI(env);
1620//zz HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1621//zz X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1622//zz sub_from_esp(env, 16);
1623//zz addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1624//zz addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1625//zz add_to_esp(env, 16);
1626//zz return dst;
1627//zz }
1628//zz
1629 case Iop_ReinterpF32asI32: {
1630 HReg dst = newVRegI(env);
1631 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1632 addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
1633 return dst;
1634 }
1635
1636//zz
1637//zz case Iop_16to8:
1638 case Iop_32to8:
1639 case Iop_32to16:
1640 /* These are no-ops. */
1641 return iselIntExpr_R(env, e->Iex.Unop.arg);
1642
sewardj6c60b322010-08-22 12:48:28 +00001643 default:
sewardj6c299f32009-12-31 18:00:12 +00001644 break;
1645 }
sewardje2ea1762010-09-22 00:56:37 +00001646
1647 /* All Unop cases involving host-side helper calls. */
1648 void* fn = NULL;
1649 switch (e->Iex.Unop.op) {
1650 case Iop_CmpNEZ16x2:
1651 fn = &h_generic_calc_CmpNEZ16x2; break;
1652 case Iop_CmpNEZ8x4:
1653 fn = &h_generic_calc_CmpNEZ8x4; break;
1654 default:
1655 break;
1656 }
1657
1658 if (fn) {
1659 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1660 HReg res = newVRegI(env);
1661 addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
1662 addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 1 ));
1663 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1664 return res;
1665 }
1666
sewardj6c299f32009-12-31 18:00:12 +00001667 break;
1668 }
1669
1670 /* --------- GET --------- */
1671 case Iex_Get: {
1672 if (ty == Ity_I32
1673 && 0 == (e->Iex.Get.offset & 3)
1674 && e->Iex.Get.offset < 4096-4) {
1675 HReg dst = newVRegI(env);
1676 addInstr(env, ARMInstr_LdSt32(
1677 True/*isLoad*/,
1678 dst,
1679 ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
1680 return dst;
1681 }
1682//zz if (ty == Ity_I8 || ty == Ity_I16) {
1683//zz HReg dst = newVRegI(env);
1684//zz addInstr(env, X86Instr_LoadEX(
1685//zz toUChar(ty==Ity_I8 ? 1 : 2),
1686//zz False,
1687//zz X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1688//zz dst));
1689//zz return dst;
1690//zz }
1691 break;
1692 }
1693
1694//zz case Iex_GetI: {
1695//zz X86AMode* am
1696//zz = genGuestArrayOffset(
1697//zz env, e->Iex.GetI.descr,
1698//zz e->Iex.GetI.ix, e->Iex.GetI.bias );
1699//zz HReg dst = newVRegI(env);
1700//zz if (ty == Ity_I8) {
1701//zz addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1702//zz return dst;
1703//zz }
1704//zz if (ty == Ity_I32) {
1705//zz addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1706//zz return dst;
1707//zz }
1708//zz break;
1709//zz }
1710
1711 /* --------- CCALL --------- */
1712 case Iex_CCall: {
1713 HReg dst = newVRegI(env);
1714 vassert(ty == e->Iex.CCall.retty);
1715
1716 /* be very restrictive for now. Only 32/64-bit ints allowed
1717 for args, and 32 bits for return type. */
1718 if (e->Iex.CCall.retty != Ity_I32)
1719 goto irreducible;
1720
1721 /* Marshal args, do the call, clear stack. */
1722 Bool ok = doHelperCall( env, False,
1723 NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
1724 if (ok) {
1725 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1726 return dst;
1727 }
1728 /* else fall through; will hit the irreducible: label */
1729 }
1730
1731 /* --------- LITERAL --------- */
1732 /* 32 literals */
1733 case Iex_Const: {
1734 UInt u = 0;
1735 HReg dst = newVRegI(env);
1736 switch (e->Iex.Const.con->tag) {
1737 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1738 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1739 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
sewardj6c60b322010-08-22 12:48:28 +00001740 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
sewardj6c299f32009-12-31 18:00:12 +00001741 }
1742 addInstr(env, ARMInstr_Imm32(dst, u));
1743 return dst;
1744 }
1745
1746 /* --------- MULTIPLEX --------- */
1747 case Iex_Mux0X: {
1748 IRExpr* cond = e->Iex.Mux0X.cond;
1749
1750 /* Mux0X( 32to8(1Uto32(ccexpr)), expr0, exprX ) */
1751 if (ty == Ity_I32
1752 && cond->tag == Iex_Unop
1753 && cond->Iex.Unop.op == Iop_32to8
1754 && cond->Iex.Unop.arg->tag == Iex_Unop
1755 && cond->Iex.Unop.arg->Iex.Unop.op == Iop_1Uto32) {
1756 ARMCondCode cc;
1757 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1758 ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
1759 HReg dst = newVRegI(env);
1760 addInstr(env, mk_iMOVds_RR(dst, rX));
1761 cc = iselCondCode(env, cond->Iex.Unop.arg->Iex.Unop.arg);
1762 addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
1763 return dst;
1764 }
1765
1766 /* Mux0X(cond, expr0, exprX) (general case) */
1767 if (ty == Ity_I32) {
1768 HReg r8;
1769 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1770 ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
1771 HReg dst = newVRegI(env);
1772 addInstr(env, mk_iMOVds_RR(dst, rX));
1773 r8 = iselIntExpr_R(env, cond);
1774 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
1775 ARMRI84_I84(0xFF,0)));
1776 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, r0));
1777 return dst;
1778 }
1779 break;
1780 }
1781
1782 default:
1783 break;
1784 } /* switch (e->tag) */
1785
1786 /* We get here if no pattern matched. */
1787 irreducible:
1788 ppIRExpr(e);
1789 vpanic("iselIntExpr_R: cannot reduce tree");
cerioncee30312004-12-17 20:30:21 +00001790}
1791
1792
sewardj6c299f32009-12-31 18:00:12 +00001793/* -------------------- 64-bit -------------------- */
1794
1795/* Compute a 64-bit value into a register pair, which is returned as
1796 the first two parameters. As with iselIntExpr_R, these may be
1797 either real or virtual regs; in any case they must not be changed
1798 by subsequent code emitted by the caller. */
1799
1800static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1801{
1802 iselInt64Expr_wrk(rHi, rLo, env, e);
1803# if 0
1804 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1805# endif
1806 vassert(hregClass(*rHi) == HRcInt32);
1807 vassert(hregIsVirtual(*rHi));
1808 vassert(hregClass(*rLo) == HRcInt32);
1809 vassert(hregIsVirtual(*rLo));
1810}
1811
1812/* DO NOT CALL THIS DIRECTLY ! */
1813static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1814{
1815 vassert(e);
1816 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
1817
1818 /* 64-bit literal */
1819 if (e->tag == Iex_Const) {
1820 ULong w64 = e->Iex.Const.con->Ico.U64;
1821 UInt wHi = toUInt(w64 >> 32);
1822 UInt wLo = toUInt(w64);
1823 HReg tHi = newVRegI(env);
1824 HReg tLo = newVRegI(env);
1825 vassert(e->Iex.Const.con->tag == Ico_U64);
1826 addInstr(env, ARMInstr_Imm32(tHi, wHi));
1827 addInstr(env, ARMInstr_Imm32(tLo, wLo));
1828 *rHi = tHi;
1829 *rLo = tLo;
1830 return;
1831 }
1832
1833 /* read 64-bit IRTemp */
1834 if (e->tag == Iex_RdTmp) {
sewardjc6f970f2012-04-02 21:54:49 +00001835 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00001836 HReg tHi = newVRegI(env);
1837 HReg tLo = newVRegI(env);
1838 HReg tmp = iselNeon64Expr(env, e);
1839 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1840 *rHi = tHi;
1841 *rLo = tLo;
1842 } else {
1843 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
1844 }
sewardj6c299f32009-12-31 18:00:12 +00001845 return;
1846 }
1847
1848 /* 64-bit load */
1849 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
1850 HReg tLo, tHi, rA;
1851 vassert(e->Iex.Load.ty == Ity_I64);
1852 rA = iselIntExpr_R(env, e->Iex.Load.addr);
1853 tHi = newVRegI(env);
1854 tLo = newVRegI(env);
1855 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, ARMAMode1_RI(rA, 4)));
1856 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, ARMAMode1_RI(rA, 0)));
1857 *rHi = tHi;
1858 *rLo = tLo;
1859 return;
1860 }
1861
1862 /* 64-bit GET */
1863 if (e->tag == Iex_Get) {
1864 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
1865 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
1866 HReg tHi = newVRegI(env);
1867 HReg tLo = newVRegI(env);
1868 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, am4));
1869 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, am0));
1870 *rHi = tHi;
1871 *rLo = tLo;
1872 return;
1873 }
1874
1875 /* --------- BINARY ops --------- */
1876 if (e->tag == Iex_Binop) {
1877 switch (e->Iex.Binop.op) {
1878
1879 /* 32 x 32 -> 64 multiply */
1880 case Iop_MullS32:
1881 case Iop_MullU32: {
1882 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1883 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1884 HReg tHi = newVRegI(env);
1885 HReg tLo = newVRegI(env);
1886 ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32
1887 ? ARMmul_SX : ARMmul_ZX;
1888 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1889 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1890 addInstr(env, ARMInstr_Mul(mop));
1891 addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
1892 addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
1893 *rHi = tHi;
1894 *rLo = tLo;
1895 return;
1896 }
1897
1898 case Iop_Or64: {
1899 HReg xLo, xHi, yLo, yHi;
1900 HReg tHi = newVRegI(env);
1901 HReg tLo = newVRegI(env);
1902 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
1903 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
1904 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
1905 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
1906 *rHi = tHi;
1907 *rLo = tLo;
1908 return;
1909 }
1910
1911 case Iop_Add64: {
1912 HReg xLo, xHi, yLo, yHi;
1913 HReg tHi = newVRegI(env);
1914 HReg tLo = newVRegI(env);
1915 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
1916 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
1917 addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
1918 addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi)));
1919 *rHi = tHi;
1920 *rLo = tLo;
1921 return;
1922 }
1923
1924 /* 32HLto64(e1,e2) */
1925 case Iop_32HLto64: {
1926 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
1927 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
1928 return;
1929 }
1930
1931 default:
1932 break;
1933 }
1934 }
1935
1936 /* --------- UNARY ops --------- */
1937 if (e->tag == Iex_Unop) {
1938 switch (e->Iex.Unop.op) {
1939
1940 /* ReinterpF64asI64 */
1941 case Iop_ReinterpF64asI64: {
1942 HReg dstHi = newVRegI(env);
1943 HReg dstLo = newVRegI(env);
1944 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1945 addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
1946 *rHi = dstHi;
1947 *rLo = dstLo;
1948 return;
1949 }
1950
1951 /* Left64(e) */
1952 case Iop_Left64: {
1953 HReg yLo, yHi;
1954 HReg tHi = newVRegI(env);
1955 HReg tLo = newVRegI(env);
1956 HReg zero = newVRegI(env);
1957 /* yHi:yLo = arg */
1958 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
1959 /* zero = 0 */
1960 addInstr(env, ARMInstr_Imm32(zero, 0));
1961 /* tLo = 0 - yLo, and set carry */
sewardj6c60b322010-08-22 12:48:28 +00001962 addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
1963 tLo, zero, ARMRI84_R(yLo)));
sewardj6c299f32009-12-31 18:00:12 +00001964 /* tHi = 0 - yHi - carry */
sewardj6c60b322010-08-22 12:48:28 +00001965 addInstr(env, ARMInstr_Alu(ARMalu_SBC,
1966 tHi, zero, ARMRI84_R(yHi)));
sewardj6c299f32009-12-31 18:00:12 +00001967 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
1968 back in, so as to give the final result
1969 tHi:tLo = arg | -arg. */
1970 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
1971 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
1972 *rHi = tHi;
1973 *rLo = tLo;
1974 return;
1975 }
1976
1977 /* CmpwNEZ64(e) */
1978 case Iop_CmpwNEZ64: {
1979 HReg srcLo, srcHi;
1980 HReg tmp1 = newVRegI(env);
1981 HReg tmp2 = newVRegI(env);
1982 /* srcHi:srcLo = arg */
1983 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
1984 /* tmp1 = srcHi | srcLo */
1985 addInstr(env, ARMInstr_Alu(ARMalu_OR,
1986 tmp1, srcHi, ARMRI84_R(srcLo)));
1987 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
1988 addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
1989 addInstr(env, ARMInstr_Alu(ARMalu_OR,
1990 tmp2, tmp2, ARMRI84_R(tmp1)));
1991 addInstr(env, ARMInstr_Shift(ARMsh_SAR,
1992 tmp2, tmp2, ARMRI5_I5(31)));
1993 *rHi = tmp2;
1994 *rLo = tmp2;
1995 return;
1996 }
1997
1998 case Iop_1Sto64: {
1999 HReg dst = newVRegI(env);
2000 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2001 ARMRI5* amt = ARMRI5_I5(31);
2002 /* This is really rough. We could do much better here;
2003 perhaps mvn{cond} dst, #0 as the second insn?
2004 (same applies to 1Sto32) */
2005 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2006 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2007 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2008 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2009 *rHi = dst;
2010 *rLo = dst;
2011 return;
2012 }
2013
2014 default:
2015 break;
2016 }
2017 } /* if (e->tag == Iex_Unop) */
2018
2019 /* --------- MULTIPLEX --------- */
2020 if (e->tag == Iex_Mux0X) {
2021 IRType ty8;
2022 HReg r8, rXhi, rXlo, r0hi, r0lo, dstHi, dstLo;
2023 ty8 = typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond);
2024 vassert(ty8 == Ity_I8);
2025 iselInt64Expr(&rXhi, &rXlo, env, e->Iex.Mux0X.exprX);
2026 iselInt64Expr(&r0hi, &r0lo, env, e->Iex.Mux0X.expr0);
2027 dstHi = newVRegI(env);
2028 dstLo = newVRegI(env);
2029 addInstr(env, mk_iMOVds_RR(dstHi, rXhi));
2030 addInstr(env, mk_iMOVds_RR(dstLo, rXlo));
2031 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
2032 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
2033 ARMRI84_I84(0xFF,0)));
2034 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstHi, ARMRI84_R(r0hi)));
2035 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstLo, ARMRI84_R(r0lo)));
2036 *rHi = dstHi;
2037 *rLo = dstLo;
2038 return;
2039 }
2040
sewardj6c60b322010-08-22 12:48:28 +00002041 /* It is convenient sometimes to call iselInt64Expr even when we
2042 have NEON support (e.g. in do_helper_call we need 64-bit
2043 arguments as 2 x 32 regs). */
sewardjc6f970f2012-04-02 21:54:49 +00002044 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00002045 HReg tHi = newVRegI(env);
2046 HReg tLo = newVRegI(env);
2047 HReg tmp = iselNeon64Expr(env, e);
2048 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2049 *rHi = tHi;
2050 *rLo = tLo;
2051 return ;
2052 }
2053
sewardj6c299f32009-12-31 18:00:12 +00002054 ppIRExpr(e);
2055 vpanic("iselInt64Expr");
2056}
2057
2058
2059/*---------------------------------------------------------*/
sewardj6c60b322010-08-22 12:48:28 +00002060/*--- ISEL: Vector (NEON) expressions (64 or 128 bit) ---*/
2061/*---------------------------------------------------------*/
2062
2063static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
2064{
2065 HReg r = iselNeon64Expr_wrk( env, e );
2066 vassert(hregClass(r) == HRcFlt64);
2067 vassert(hregIsVirtual(r));
2068 return r;
2069}
2070
2071/* DO NOT CALL THIS DIRECTLY */
2072static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
2073{
2074 IRType ty = typeOfIRExpr(env->type_env, e);
2075 MatchInfo mi;
2076 vassert(e);
2077 vassert(ty == Ity_I64);
2078
2079 if (e->tag == Iex_RdTmp) {
2080 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2081 }
2082
2083 if (e->tag == Iex_Const) {
2084 HReg rLo, rHi;
2085 HReg res = newVRegD(env);
2086 iselInt64Expr(&rHi, &rLo, env, e);
2087 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2088 return res;
2089 }
2090
2091 /* 64-bit load */
2092 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2093 HReg res = newVRegD(env);
2094 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2095 vassert(ty == Ity_I64);
2096 addInstr(env, ARMInstr_NLdStD(True, res, am));
2097 return res;
2098 }
2099
2100 /* 64-bit GET */
2101 if (e->tag == Iex_Get) {
2102 HReg addr = newVRegI(env);
2103 HReg res = newVRegD(env);
2104 vassert(ty == Ity_I64);
2105 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2106 addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2107 return res;
2108 }
2109
2110 /* --------- BINARY ops --------- */
2111 if (e->tag == Iex_Binop) {
2112 switch (e->Iex.Binop.op) {
2113
2114 /* 32 x 32 -> 64 multiply */
2115 case Iop_MullS32:
2116 case Iop_MullU32: {
2117 HReg rLo, rHi;
2118 HReg res = newVRegD(env);
2119 iselInt64Expr(&rHi, &rLo, env, e);
2120 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2121 return res;
2122 }
2123
2124 case Iop_And64: {
2125 HReg res = newVRegD(env);
2126 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2127 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2128 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2129 res, argL, argR, 4, False));
2130 return res;
2131 }
2132 case Iop_Or64: {
2133 HReg res = newVRegD(env);
2134 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2135 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2136 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2137 res, argL, argR, 4, False));
2138 return res;
2139 }
2140 case Iop_Xor64: {
2141 HReg res = newVRegD(env);
2142 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2143 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2144 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2145 res, argL, argR, 4, False));
2146 return res;
2147 }
2148
2149 /* 32HLto64(e1,e2) */
2150 case Iop_32HLto64: {
2151 HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2152 HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2153 HReg res = newVRegD(env);
2154 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2155 return res;
2156 }
2157
2158 case Iop_Add8x8:
2159 case Iop_Add16x4:
2160 case Iop_Add32x2:
2161 case Iop_Add64: {
2162 HReg res = newVRegD(env);
2163 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2164 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2165 UInt size;
2166 switch (e->Iex.Binop.op) {
2167 case Iop_Add8x8: size = 0; break;
2168 case Iop_Add16x4: size = 1; break;
2169 case Iop_Add32x2: size = 2; break;
2170 case Iop_Add64: size = 3; break;
2171 default: vassert(0);
2172 }
2173 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2174 res, argL, argR, size, False));
2175 return res;
2176 }
2177 case Iop_Add32Fx2: {
2178 HReg res = newVRegD(env);
2179 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2180 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2181 UInt size = 0;
2182 addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2183 res, argL, argR, size, False));
2184 return res;
2185 }
2186 case Iop_Recps32Fx2: {
2187 HReg res = newVRegD(env);
2188 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2189 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2190 UInt size = 0;
2191 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2192 res, argL, argR, size, False));
2193 return res;
2194 }
2195 case Iop_Rsqrts32Fx2: {
2196 HReg res = newVRegD(env);
2197 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2198 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2199 UInt size = 0;
2200 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2201 res, argL, argR, size, False));
2202 return res;
2203 }
2204 case Iop_InterleaveOddLanes8x8:
2205 case Iop_InterleaveOddLanes16x4:
2206 case Iop_InterleaveLO32x2:
2207 case Iop_InterleaveEvenLanes8x8:
2208 case Iop_InterleaveEvenLanes16x4:
2209 case Iop_InterleaveHI32x2: {
2210 HReg tmp = newVRegD(env);
2211 HReg res = newVRegD(env);
2212 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2213 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2214 UInt size;
2215 UInt is_lo;
2216 switch (e->Iex.Binop.op) {
2217 case Iop_InterleaveOddLanes8x8: is_lo = 1; size = 0; break;
2218 case Iop_InterleaveEvenLanes8x8: is_lo = 0; size = 0; break;
2219 case Iop_InterleaveOddLanes16x4: is_lo = 1; size = 1; break;
2220 case Iop_InterleaveEvenLanes16x4: is_lo = 0; size = 1; break;
2221 case Iop_InterleaveLO32x2: is_lo = 1; size = 2; break;
2222 case Iop_InterleaveHI32x2: is_lo = 0; size = 2; break;
2223 default: vassert(0);
2224 }
2225 if (is_lo) {
2226 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2227 tmp, argL, 4, False));
2228 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2229 res, argR, 4, False));
2230 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
2231 res, tmp, size, False));
2232 } else {
2233 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2234 tmp, argR, 4, False));
2235 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2236 res, argL, 4, False));
2237 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
2238 tmp, res, size, False));
2239 }
2240 return res;
2241 }
2242 case Iop_InterleaveHI8x8:
2243 case Iop_InterleaveHI16x4:
2244 case Iop_InterleaveLO8x8:
2245 case Iop_InterleaveLO16x4: {
2246 HReg tmp = newVRegD(env);
2247 HReg res = newVRegD(env);
2248 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2249 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2250 UInt size;
2251 UInt is_lo;
2252 switch (e->Iex.Binop.op) {
2253 case Iop_InterleaveHI8x8: is_lo = 1; size = 0; break;
2254 case Iop_InterleaveLO8x8: is_lo = 0; size = 0; break;
2255 case Iop_InterleaveHI16x4: is_lo = 1; size = 1; break;
2256 case Iop_InterleaveLO16x4: is_lo = 0; size = 1; break;
2257 default: vassert(0);
2258 }
2259 if (is_lo) {
2260 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2261 tmp, argL, 4, False));
2262 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2263 res, argR, 4, False));
2264 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
2265 res, tmp, size, False));
2266 } else {
2267 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2268 tmp, argR, 4, False));
2269 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2270 res, argL, 4, False));
2271 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
2272 tmp, res, size, False));
2273 }
2274 return res;
2275 }
2276 case Iop_CatOddLanes8x8:
2277 case Iop_CatOddLanes16x4:
2278 case Iop_CatEvenLanes8x8:
2279 case Iop_CatEvenLanes16x4: {
2280 HReg tmp = newVRegD(env);
2281 HReg res = newVRegD(env);
2282 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2283 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2284 UInt size;
2285 UInt is_lo;
2286 switch (e->Iex.Binop.op) {
2287 case Iop_CatOddLanes8x8: is_lo = 1; size = 0; break;
2288 case Iop_CatEvenLanes8x8: is_lo = 0; size = 0; break;
2289 case Iop_CatOddLanes16x4: is_lo = 1; size = 1; break;
2290 case Iop_CatEvenLanes16x4: is_lo = 0; size = 1; break;
2291 default: vassert(0);
2292 }
2293 if (is_lo) {
2294 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2295 tmp, argL, 4, False));
2296 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2297 res, argR, 4, False));
2298 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
2299 res, tmp, size, False));
2300 } else {
2301 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2302 tmp, argR, 4, False));
2303 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2304 res, argL, 4, False));
2305 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
2306 tmp, res, size, False));
2307 }
2308 return res;
2309 }
2310 case Iop_QAdd8Ux8:
2311 case Iop_QAdd16Ux4:
2312 case Iop_QAdd32Ux2:
2313 case Iop_QAdd64Ux1: {
2314 HReg res = newVRegD(env);
2315 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2316 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2317 UInt size;
2318 switch (e->Iex.Binop.op) {
2319 case Iop_QAdd8Ux8: size = 0; break;
2320 case Iop_QAdd16Ux4: size = 1; break;
2321 case Iop_QAdd32Ux2: size = 2; break;
2322 case Iop_QAdd64Ux1: size = 3; break;
2323 default: vassert(0);
2324 }
2325 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2326 res, argL, argR, size, False));
2327 return res;
2328 }
2329 case Iop_QAdd8Sx8:
2330 case Iop_QAdd16Sx4:
2331 case Iop_QAdd32Sx2:
2332 case Iop_QAdd64Sx1: {
2333 HReg res = newVRegD(env);
2334 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2335 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2336 UInt size;
2337 switch (e->Iex.Binop.op) {
2338 case Iop_QAdd8Sx8: size = 0; break;
2339 case Iop_QAdd16Sx4: size = 1; break;
2340 case Iop_QAdd32Sx2: size = 2; break;
2341 case Iop_QAdd64Sx1: size = 3; break;
2342 default: vassert(0);
2343 }
2344 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2345 res, argL, argR, size, False));
2346 return res;
2347 }
2348 case Iop_Sub8x8:
2349 case Iop_Sub16x4:
2350 case Iop_Sub32x2:
2351 case Iop_Sub64: {
2352 HReg res = newVRegD(env);
2353 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2354 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2355 UInt size;
2356 switch (e->Iex.Binop.op) {
2357 case Iop_Sub8x8: size = 0; break;
2358 case Iop_Sub16x4: size = 1; break;
2359 case Iop_Sub32x2: size = 2; break;
2360 case Iop_Sub64: size = 3; break;
2361 default: vassert(0);
2362 }
2363 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2364 res, argL, argR, size, False));
2365 return res;
2366 }
2367 case Iop_Sub32Fx2: {
2368 HReg res = newVRegD(env);
2369 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2370 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2371 UInt size = 0;
2372 addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2373 res, argL, argR, size, False));
2374 return res;
2375 }
2376 case Iop_QSub8Ux8:
2377 case Iop_QSub16Ux4:
2378 case Iop_QSub32Ux2:
2379 case Iop_QSub64Ux1: {
2380 HReg res = newVRegD(env);
2381 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2382 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2383 UInt size;
2384 switch (e->Iex.Binop.op) {
2385 case Iop_QSub8Ux8: size = 0; break;
2386 case Iop_QSub16Ux4: size = 1; break;
2387 case Iop_QSub32Ux2: size = 2; break;
2388 case Iop_QSub64Ux1: size = 3; break;
2389 default: vassert(0);
2390 }
2391 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
2392 res, argL, argR, size, False));
2393 return res;
2394 }
2395 case Iop_QSub8Sx8:
2396 case Iop_QSub16Sx4:
2397 case Iop_QSub32Sx2:
2398 case Iop_QSub64Sx1: {
2399 HReg res = newVRegD(env);
2400 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2401 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2402 UInt size;
2403 switch (e->Iex.Binop.op) {
2404 case Iop_QSub8Sx8: size = 0; break;
2405 case Iop_QSub16Sx4: size = 1; break;
2406 case Iop_QSub32Sx2: size = 2; break;
2407 case Iop_QSub64Sx1: size = 3; break;
2408 default: vassert(0);
2409 }
2410 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
2411 res, argL, argR, size, False));
2412 return res;
2413 }
2414 case Iop_Max8Ux8:
2415 case Iop_Max16Ux4:
2416 case Iop_Max32Ux2: {
2417 HReg res = newVRegD(env);
2418 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2419 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2420 UInt size;
2421 switch (e->Iex.Binop.op) {
2422 case Iop_Max8Ux8: size = 0; break;
2423 case Iop_Max16Ux4: size = 1; break;
2424 case Iop_Max32Ux2: size = 2; break;
2425 default: vassert(0);
2426 }
2427 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
2428 res, argL, argR, size, False));
2429 return res;
2430 }
2431 case Iop_Max8Sx8:
2432 case Iop_Max16Sx4:
2433 case Iop_Max32Sx2: {
2434 HReg res = newVRegD(env);
2435 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2436 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2437 UInt size;
2438 switch (e->Iex.Binop.op) {
2439 case Iop_Max8Sx8: size = 0; break;
2440 case Iop_Max16Sx4: size = 1; break;
2441 case Iop_Max32Sx2: size = 2; break;
2442 default: vassert(0);
2443 }
2444 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
2445 res, argL, argR, size, False));
2446 return res;
2447 }
2448 case Iop_Min8Ux8:
2449 case Iop_Min16Ux4:
2450 case Iop_Min32Ux2: {
2451 HReg res = newVRegD(env);
2452 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2453 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2454 UInt size;
2455 switch (e->Iex.Binop.op) {
2456 case Iop_Min8Ux8: size = 0; break;
2457 case Iop_Min16Ux4: size = 1; break;
2458 case Iop_Min32Ux2: size = 2; break;
2459 default: vassert(0);
2460 }
2461 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
2462 res, argL, argR, size, False));
2463 return res;
2464 }
2465 case Iop_Min8Sx8:
2466 case Iop_Min16Sx4:
2467 case Iop_Min32Sx2: {
2468 HReg res = newVRegD(env);
2469 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2470 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2471 UInt size;
2472 switch (e->Iex.Binop.op) {
2473 case Iop_Min8Sx8: size = 0; break;
2474 case Iop_Min16Sx4: size = 1; break;
2475 case Iop_Min32Sx2: size = 2; break;
2476 default: vassert(0);
2477 }
2478 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
2479 res, argL, argR, size, False));
2480 return res;
2481 }
2482 case Iop_Sar8x8:
2483 case Iop_Sar16x4:
2484 case Iop_Sar32x2: {
2485 HReg res = newVRegD(env);
2486 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2487 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2488 HReg argR2 = newVRegD(env);
2489 HReg zero = newVRegD(env);
2490 UInt size;
2491 switch (e->Iex.Binop.op) {
2492 case Iop_Sar8x8: size = 0; break;
2493 case Iop_Sar16x4: size = 1; break;
2494 case Iop_Sar32x2: size = 2; break;
2495 case Iop_Sar64: size = 3; break;
2496 default: vassert(0);
2497 }
2498 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2499 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2500 argR2, zero, argR, size, False));
2501 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2502 res, argL, argR2, size, False));
2503 return res;
2504 }
2505 case Iop_Sal8x8:
2506 case Iop_Sal16x4:
2507 case Iop_Sal32x2:
2508 case Iop_Sal64x1: {
2509 HReg res = newVRegD(env);
2510 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2511 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2512 UInt size;
2513 switch (e->Iex.Binop.op) {
2514 case Iop_Sal8x8: size = 0; break;
2515 case Iop_Sal16x4: size = 1; break;
2516 case Iop_Sal32x2: size = 2; break;
2517 case Iop_Sal64x1: size = 3; break;
2518 default: vassert(0);
2519 }
2520 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2521 res, argL, argR, size, False));
2522 return res;
2523 }
2524 case Iop_Shr8x8:
2525 case Iop_Shr16x4:
2526 case Iop_Shr32x2: {
2527 HReg res = newVRegD(env);
2528 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2529 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2530 HReg argR2 = newVRegD(env);
2531 HReg zero = newVRegD(env);
2532 UInt size;
2533 switch (e->Iex.Binop.op) {
2534 case Iop_Shr8x8: size = 0; break;
2535 case Iop_Shr16x4: size = 1; break;
2536 case Iop_Shr32x2: size = 2; break;
2537 default: vassert(0);
2538 }
2539 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2540 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2541 argR2, zero, argR, size, False));
2542 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2543 res, argL, argR2, size, False));
2544 return res;
2545 }
2546 case Iop_Shl8x8:
2547 case Iop_Shl16x4:
2548 case Iop_Shl32x2: {
2549 HReg res = newVRegD(env);
2550 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2551 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2552 UInt size;
2553 switch (e->Iex.Binop.op) {
2554 case Iop_Shl8x8: size = 0; break;
2555 case Iop_Shl16x4: size = 1; break;
2556 case Iop_Shl32x2: size = 2; break;
2557 default: vassert(0);
2558 }
2559 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2560 res, argL, argR, size, False));
2561 return res;
2562 }
2563 case Iop_QShl8x8:
2564 case Iop_QShl16x4:
2565 case Iop_QShl32x2:
2566 case Iop_QShl64x1: {
2567 HReg res = newVRegD(env);
2568 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2569 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2570 UInt size;
2571 switch (e->Iex.Binop.op) {
2572 case Iop_QShl8x8: size = 0; break;
2573 case Iop_QShl16x4: size = 1; break;
2574 case Iop_QShl32x2: size = 2; break;
2575 case Iop_QShl64x1: size = 3; break;
2576 default: vassert(0);
2577 }
2578 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
2579 res, argL, argR, size, False));
2580 return res;
2581 }
2582 case Iop_QSal8x8:
2583 case Iop_QSal16x4:
2584 case Iop_QSal32x2:
2585 case Iop_QSal64x1: {
2586 HReg res = newVRegD(env);
2587 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2588 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2589 UInt size;
2590 switch (e->Iex.Binop.op) {
2591 case Iop_QSal8x8: size = 0; break;
2592 case Iop_QSal16x4: size = 1; break;
2593 case Iop_QSal32x2: size = 2; break;
2594 case Iop_QSal64x1: size = 3; break;
2595 default: vassert(0);
2596 }
2597 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
2598 res, argL, argR, size, False));
2599 return res;
2600 }
2601 case Iop_QShlN8x8:
2602 case Iop_QShlN16x4:
2603 case Iop_QShlN32x2:
2604 case Iop_QShlN64x1: {
2605 HReg res = newVRegD(env);
2606 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2607 UInt size, imm;
2608 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2609 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2610 vpanic("ARM taget supports Iop_QShlNAxB with constant "
2611 "second argument only\n");
2612 }
2613 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2614 switch (e->Iex.Binop.op) {
2615 case Iop_QShlN8x8: size = 8 | imm; break;
2616 case Iop_QShlN16x4: size = 16 | imm; break;
2617 case Iop_QShlN32x2: size = 32 | imm; break;
2618 case Iop_QShlN64x1: size = 64 | imm; break;
2619 default: vassert(0);
2620 }
2621 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
2622 res, argL, size, False));
2623 return res;
2624 }
2625 case Iop_QShlN8Sx8:
2626 case Iop_QShlN16Sx4:
2627 case Iop_QShlN32Sx2:
2628 case Iop_QShlN64Sx1: {
2629 HReg res = newVRegD(env);
2630 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2631 UInt size, imm;
2632 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2633 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2634 vpanic("ARM taget supports Iop_QShlNAxB with constant "
2635 "second argument only\n");
2636 }
2637 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2638 switch (e->Iex.Binop.op) {
2639 case Iop_QShlN8Sx8: size = 8 | imm; break;
2640 case Iop_QShlN16Sx4: size = 16 | imm; break;
2641 case Iop_QShlN32Sx2: size = 32 | imm; break;
2642 case Iop_QShlN64Sx1: size = 64 | imm; break;
2643 default: vassert(0);
2644 }
2645 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
2646 res, argL, size, False));
2647 return res;
2648 }
2649 case Iop_QSalN8x8:
2650 case Iop_QSalN16x4:
2651 case Iop_QSalN32x2:
2652 case Iop_QSalN64x1: {
2653 HReg res = newVRegD(env);
2654 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2655 UInt size, imm;
2656 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2657 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2658 vpanic("ARM taget supports Iop_QShlNAxB with constant "
2659 "second argument only\n");
2660 }
2661 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2662 switch (e->Iex.Binop.op) {
2663 case Iop_QSalN8x8: size = 8 | imm; break;
2664 case Iop_QSalN16x4: size = 16 | imm; break;
2665 case Iop_QSalN32x2: size = 32 | imm; break;
2666 case Iop_QSalN64x1: size = 64 | imm; break;
2667 default: vassert(0);
2668 }
2669 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
2670 res, argL, size, False));
2671 return res;
2672 }
2673 case Iop_ShrN8x8:
2674 case Iop_ShrN16x4:
2675 case Iop_ShrN32x2:
2676 case Iop_Shr64: {
2677 HReg res = newVRegD(env);
2678 HReg tmp = newVRegD(env);
2679 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2680 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2681 HReg argR2 = newVRegI(env);
2682 UInt size;
2683 switch (e->Iex.Binop.op) {
2684 case Iop_ShrN8x8: size = 0; break;
2685 case Iop_ShrN16x4: size = 1; break;
2686 case Iop_ShrN32x2: size = 2; break;
2687 case Iop_Shr64: size = 3; break;
2688 default: vassert(0);
2689 }
2690 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2691 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2692 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2693 res, argL, tmp, size, False));
2694 return res;
2695 }
2696 case Iop_ShlN8x8:
2697 case Iop_ShlN16x4:
2698 case Iop_ShlN32x2:
2699 case Iop_Shl64: {
2700 HReg res = newVRegD(env);
2701 HReg tmp = newVRegD(env);
2702 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2703 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2704 UInt size;
2705 switch (e->Iex.Binop.op) {
2706 case Iop_ShlN8x8: size = 0; break;
2707 case Iop_ShlN16x4: size = 1; break;
2708 case Iop_ShlN32x2: size = 2; break;
2709 case Iop_Shl64: size = 3; break;
2710 default: vassert(0);
2711 }
2712 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, False));
2713 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2714 res, argL, tmp, size, False));
2715 return res;
2716 }
2717 case Iop_SarN8x8:
2718 case Iop_SarN16x4:
2719 case Iop_SarN32x2:
2720 case Iop_Sar64: {
2721 HReg res = newVRegD(env);
2722 HReg tmp = newVRegD(env);
2723 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2724 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2725 HReg argR2 = newVRegI(env);
2726 UInt size;
2727 switch (e->Iex.Binop.op) {
2728 case Iop_SarN8x8: size = 0; break;
2729 case Iop_SarN16x4: size = 1; break;
2730 case Iop_SarN32x2: size = 2; break;
2731 case Iop_Sar64: size = 3; break;
2732 default: vassert(0);
2733 }
2734 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2735 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2736 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2737 res, argL, tmp, size, False));
2738 return res;
2739 }
2740 case Iop_CmpGT8Ux8:
2741 case Iop_CmpGT16Ux4:
2742 case Iop_CmpGT32Ux2: {
2743 HReg res = newVRegD(env);
2744 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2745 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2746 UInt size;
2747 switch (e->Iex.Binop.op) {
2748 case Iop_CmpGT8Ux8: size = 0; break;
2749 case Iop_CmpGT16Ux4: size = 1; break;
2750 case Iop_CmpGT32Ux2: size = 2; break;
2751 default: vassert(0);
2752 }
2753 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
2754 res, argL, argR, size, False));
2755 return res;
2756 }
2757 case Iop_CmpGT8Sx8:
2758 case Iop_CmpGT16Sx4:
2759 case Iop_CmpGT32Sx2: {
2760 HReg res = newVRegD(env);
2761 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2762 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2763 UInt size;
2764 switch (e->Iex.Binop.op) {
2765 case Iop_CmpGT8Sx8: size = 0; break;
2766 case Iop_CmpGT16Sx4: size = 1; break;
2767 case Iop_CmpGT32Sx2: size = 2; break;
2768 default: vassert(0);
2769 }
2770 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
2771 res, argL, argR, size, False));
2772 return res;
2773 }
2774 case Iop_CmpEQ8x8:
2775 case Iop_CmpEQ16x4:
2776 case Iop_CmpEQ32x2: {
2777 HReg res = newVRegD(env);
2778 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2779 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2780 UInt size;
2781 switch (e->Iex.Binop.op) {
2782 case Iop_CmpEQ8x8: size = 0; break;
2783 case Iop_CmpEQ16x4: size = 1; break;
2784 case Iop_CmpEQ32x2: size = 2; break;
2785 default: vassert(0);
2786 }
2787 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
2788 res, argL, argR, size, False));
2789 return res;
2790 }
2791 case Iop_Mul8x8:
2792 case Iop_Mul16x4:
2793 case Iop_Mul32x2: {
2794 HReg res = newVRegD(env);
2795 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2796 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2797 UInt size = 0;
2798 switch(e->Iex.Binop.op) {
2799 case Iop_Mul8x8: size = 0; break;
2800 case Iop_Mul16x4: size = 1; break;
2801 case Iop_Mul32x2: size = 2; break;
2802 default: vassert(0);
2803 }
2804 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
2805 res, argL, argR, size, False));
2806 return res;
2807 }
2808 case Iop_Mul32Fx2: {
2809 HReg res = newVRegD(env);
2810 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2811 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2812 UInt size = 0;
2813 addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
2814 res, argL, argR, size, False));
2815 return res;
2816 }
2817 case Iop_QDMulHi16Sx4:
2818 case Iop_QDMulHi32Sx2: {
2819 HReg res = newVRegD(env);
2820 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2821 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2822 UInt size = 0;
2823 switch(e->Iex.Binop.op) {
2824 case Iop_QDMulHi16Sx4: size = 1; break;
2825 case Iop_QDMulHi32Sx2: size = 2; break;
2826 default: vassert(0);
2827 }
2828 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
2829 res, argL, argR, size, False));
2830 return res;
2831 }
2832
2833 case Iop_QRDMulHi16Sx4:
2834 case Iop_QRDMulHi32Sx2: {
2835 HReg res = newVRegD(env);
2836 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2837 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2838 UInt size = 0;
2839 switch(e->Iex.Binop.op) {
2840 case Iop_QRDMulHi16Sx4: size = 1; break;
2841 case Iop_QRDMulHi32Sx2: size = 2; break;
2842 default: vassert(0);
2843 }
2844 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
2845 res, argL, argR, size, False));
2846 return res;
2847 }
2848
2849 case Iop_PwAdd8x8:
2850 case Iop_PwAdd16x4:
2851 case Iop_PwAdd32x2: {
2852 HReg res = newVRegD(env);
2853 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2854 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2855 UInt size = 0;
2856 switch(e->Iex.Binop.op) {
2857 case Iop_PwAdd8x8: size = 0; break;
2858 case Iop_PwAdd16x4: size = 1; break;
2859 case Iop_PwAdd32x2: size = 2; break;
2860 default: vassert(0);
2861 }
2862 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
2863 res, argL, argR, size, False));
2864 return res;
2865 }
2866 case Iop_PwAdd32Fx2: {
2867 HReg res = newVRegD(env);
2868 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2869 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2870 UInt size = 0;
2871 addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
2872 res, argL, argR, size, False));
2873 return res;
2874 }
2875 case Iop_PwMin8Ux8:
2876 case Iop_PwMin16Ux4:
2877 case Iop_PwMin32Ux2: {
2878 HReg res = newVRegD(env);
2879 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2880 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2881 UInt size = 0;
2882 switch(e->Iex.Binop.op) {
2883 case Iop_PwMin8Ux8: size = 0; break;
2884 case Iop_PwMin16Ux4: size = 1; break;
2885 case Iop_PwMin32Ux2: size = 2; break;
2886 default: vassert(0);
2887 }
2888 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
2889 res, argL, argR, size, False));
2890 return res;
2891 }
2892 case Iop_PwMin8Sx8:
2893 case Iop_PwMin16Sx4:
2894 case Iop_PwMin32Sx2: {
2895 HReg res = newVRegD(env);
2896 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2897 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2898 UInt size = 0;
2899 switch(e->Iex.Binop.op) {
2900 case Iop_PwMin8Sx8: size = 0; break;
2901 case Iop_PwMin16Sx4: size = 1; break;
2902 case Iop_PwMin32Sx2: size = 2; break;
2903 default: vassert(0);
2904 }
2905 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
2906 res, argL, argR, size, False));
2907 return res;
2908 }
2909 case Iop_PwMax8Ux8:
2910 case Iop_PwMax16Ux4:
2911 case Iop_PwMax32Ux2: {
2912 HReg res = newVRegD(env);
2913 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2914 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2915 UInt size = 0;
2916 switch(e->Iex.Binop.op) {
2917 case Iop_PwMax8Ux8: size = 0; break;
2918 case Iop_PwMax16Ux4: size = 1; break;
2919 case Iop_PwMax32Ux2: size = 2; break;
2920 default: vassert(0);
2921 }
2922 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
2923 res, argL, argR, size, False));
2924 return res;
2925 }
2926 case Iop_PwMax8Sx8:
2927 case Iop_PwMax16Sx4:
2928 case Iop_PwMax32Sx2: {
2929 HReg res = newVRegD(env);
2930 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2931 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2932 UInt size = 0;
2933 switch(e->Iex.Binop.op) {
2934 case Iop_PwMax8Sx8: size = 0; break;
2935 case Iop_PwMax16Sx4: size = 1; break;
2936 case Iop_PwMax32Sx2: size = 2; break;
2937 default: vassert(0);
2938 }
2939 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
2940 res, argL, argR, size, False));
2941 return res;
2942 }
2943 case Iop_Perm8x8: {
2944 HReg res = newVRegD(env);
2945 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2946 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2947 addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
2948 res, argL, argR, 0, False));
2949 return res;
2950 }
2951 case Iop_PolynomialMul8x8: {
2952 HReg res = newVRegD(env);
2953 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2954 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2955 UInt size = 0;
2956 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
2957 res, argL, argR, size, False));
2958 return res;
2959 }
2960 case Iop_Max32Fx2: {
2961 HReg res = newVRegD(env);
2962 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2963 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2964 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
2965 res, argL, argR, 2, False));
2966 return res;
2967 }
2968 case Iop_Min32Fx2: {
2969 HReg res = newVRegD(env);
2970 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2971 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2972 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
2973 res, argL, argR, 2, False));
2974 return res;
2975 }
2976 case Iop_PwMax32Fx2: {
2977 HReg res = newVRegD(env);
2978 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2979 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2980 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
2981 res, argL, argR, 2, False));
2982 return res;
2983 }
2984 case Iop_PwMin32Fx2: {
2985 HReg res = newVRegD(env);
2986 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2987 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2988 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
2989 res, argL, argR, 2, False));
2990 return res;
2991 }
2992 case Iop_CmpGT32Fx2: {
2993 HReg res = newVRegD(env);
2994 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2995 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2996 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
2997 res, argL, argR, 2, False));
2998 return res;
2999 }
3000 case Iop_CmpGE32Fx2: {
3001 HReg res = newVRegD(env);
3002 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3003 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3004 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
3005 res, argL, argR, 2, False));
3006 return res;
3007 }
3008 case Iop_CmpEQ32Fx2: {
3009 HReg res = newVRegD(env);
3010 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3011 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3012 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3013 res, argL, argR, 2, False));
3014 return res;
3015 }
3016 case Iop_F32ToFixed32Ux2_RZ:
3017 case Iop_F32ToFixed32Sx2_RZ:
3018 case Iop_Fixed32UToF32x2_RN:
3019 case Iop_Fixed32SToF32x2_RN: {
3020 HReg res = newVRegD(env);
3021 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3022 ARMNeonUnOp op;
3023 UInt imm6;
3024 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3025 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3026 vpanic("ARM supports FP <-> Fixed conversion with constant "
3027 "second argument less than 33 only\n");
3028 }
3029 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3030 vassert(imm6 <= 32 && imm6 > 0);
3031 imm6 = 64 - imm6;
3032 switch(e->Iex.Binop.op) {
3033 case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3034 case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3035 case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3036 case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3037 default: vassert(0);
3038 }
3039 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3040 return res;
3041 }
3042 /*
3043 FIXME: is this here or not?
3044 case Iop_VDup8x8:
3045 case Iop_VDup16x4:
3046 case Iop_VDup32x2: {
3047 HReg res = newVRegD(env);
3048 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3049 UInt index;
3050 UInt imm4;
3051 UInt size = 0;
3052 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3053 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3054 vpanic("ARM supports Iop_VDup with constant "
3055 "second argument less than 16 only\n");
3056 }
3057 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3058 switch(e->Iex.Binop.op) {
3059 case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3060 case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3061 case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3062 default: vassert(0);
3063 }
3064 if (imm4 >= 16) {
3065 vpanic("ARM supports Iop_VDup with constant "
3066 "second argument less than 16 only\n");
3067 }
3068 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3069 res, argL, imm4, False));
3070 return res;
3071 }
3072 */
3073 default:
3074 break;
3075 }
3076 }
3077
3078 /* --------- UNARY ops --------- */
3079 if (e->tag == Iex_Unop) {
3080 switch (e->Iex.Unop.op) {
3081
3082 /* ReinterpF64asI64 */
3083 case Iop_ReinterpF64asI64:
3084 /* Left64(e) */
3085 case Iop_Left64:
3086 /* CmpwNEZ64(e) */
3087 //case Iop_CmpwNEZ64:
3088 case Iop_1Sto64: {
3089 HReg rLo, rHi;
3090 HReg res = newVRegD(env);
3091 iselInt64Expr(&rHi, &rLo, env, e);
3092 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3093 return res;
3094 }
3095 case Iop_Not64: {
3096 DECLARE_PATTERN(p_veqz_8x8);
3097 DECLARE_PATTERN(p_veqz_16x4);
3098 DECLARE_PATTERN(p_veqz_32x2);
3099 DECLARE_PATTERN(p_vcge_8sx8);
3100 DECLARE_PATTERN(p_vcge_16sx4);
3101 DECLARE_PATTERN(p_vcge_32sx2);
3102 DECLARE_PATTERN(p_vcge_8ux8);
3103 DECLARE_PATTERN(p_vcge_16ux4);
3104 DECLARE_PATTERN(p_vcge_32ux2);
3105 DEFINE_PATTERN(p_veqz_8x8,
3106 unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3107 DEFINE_PATTERN(p_veqz_16x4,
3108 unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3109 DEFINE_PATTERN(p_veqz_32x2,
3110 unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3111 DEFINE_PATTERN(p_vcge_8sx8,
3112 unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3113 DEFINE_PATTERN(p_vcge_16sx4,
3114 unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3115 DEFINE_PATTERN(p_vcge_32sx2,
3116 unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3117 DEFINE_PATTERN(p_vcge_8ux8,
3118 unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3119 DEFINE_PATTERN(p_vcge_16ux4,
3120 unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3121 DEFINE_PATTERN(p_vcge_32ux2,
3122 unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3123 if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3124 HReg res = newVRegD(env);
3125 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3126 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3127 return res;
3128 } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3129 HReg res = newVRegD(env);
3130 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3131 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3132 return res;
3133 } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3134 HReg res = newVRegD(env);
3135 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3136 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3137 return res;
3138 } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3139 HReg res = newVRegD(env);
3140 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3141 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3142 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3143 res, argL, argR, 0, False));
3144 return res;
3145 } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3146 HReg res = newVRegD(env);
3147 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3148 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3149 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3150 res, argL, argR, 1, False));
3151 return res;
3152 } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3153 HReg res = newVRegD(env);
3154 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3155 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3156 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3157 res, argL, argR, 2, False));
3158 return res;
3159 } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3160 HReg res = newVRegD(env);
3161 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3162 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3163 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3164 res, argL, argR, 0, False));
3165 return res;
3166 } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3167 HReg res = newVRegD(env);
3168 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3169 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3170 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3171 res, argL, argR, 1, False));
3172 return res;
3173 } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3174 HReg res = newVRegD(env);
3175 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3176 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3177 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3178 res, argL, argR, 2, False));
3179 return res;
3180 } else {
3181 HReg res = newVRegD(env);
3182 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3183 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3184 return res;
3185 }
3186 }
3187 case Iop_Dup8x8:
3188 case Iop_Dup16x4:
3189 case Iop_Dup32x2: {
3190 HReg res, arg;
3191 UInt size;
3192 DECLARE_PATTERN(p_vdup_8x8);
3193 DECLARE_PATTERN(p_vdup_16x4);
3194 DECLARE_PATTERN(p_vdup_32x2);
3195 DEFINE_PATTERN(p_vdup_8x8,
3196 unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3197 DEFINE_PATTERN(p_vdup_16x4,
3198 unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3199 DEFINE_PATTERN(p_vdup_32x2,
3200 unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3201 if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3202 UInt index;
3203 UInt imm4;
3204 if (mi.bindee[1]->tag == Iex_Const &&
3205 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3206 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3207 imm4 = (index << 1) + 1;
3208 if (index < 8) {
3209 res = newVRegD(env);
3210 arg = iselNeon64Expr(env, mi.bindee[0]);
3211 addInstr(env, ARMInstr_NUnaryS(
3212 ARMneon_VDUP,
3213 mkARMNRS(ARMNRS_Reg, res, 0),
3214 mkARMNRS(ARMNRS_Scalar, arg, index),
3215 imm4, False
3216 ));
3217 return res;
3218 }
3219 }
3220 } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3221 UInt index;
3222 UInt imm4;
3223 if (mi.bindee[1]->tag == Iex_Const &&
3224 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3225 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3226 imm4 = (index << 2) + 2;
3227 if (index < 4) {
3228 res = newVRegD(env);
3229 arg = iselNeon64Expr(env, mi.bindee[0]);
3230 addInstr(env, ARMInstr_NUnaryS(
3231 ARMneon_VDUP,
3232 mkARMNRS(ARMNRS_Reg, res, 0),
3233 mkARMNRS(ARMNRS_Scalar, arg, index),
3234 imm4, False
3235 ));
3236 return res;
3237 }
3238 }
3239 } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3240 UInt index;
3241 UInt imm4;
3242 if (mi.bindee[1]->tag == Iex_Const &&
3243 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3244 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3245 imm4 = (index << 3) + 4;
3246 if (index < 2) {
3247 res = newVRegD(env);
3248 arg = iselNeon64Expr(env, mi.bindee[0]);
3249 addInstr(env, ARMInstr_NUnaryS(
3250 ARMneon_VDUP,
3251 mkARMNRS(ARMNRS_Reg, res, 0),
3252 mkARMNRS(ARMNRS_Scalar, arg, index),
3253 imm4, False
3254 ));
3255 return res;
3256 }
3257 }
3258 }
3259 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3260 res = newVRegD(env);
3261 switch (e->Iex.Unop.op) {
3262 case Iop_Dup8x8: size = 0; break;
3263 case Iop_Dup16x4: size = 1; break;
3264 case Iop_Dup32x2: size = 2; break;
3265 default: vassert(0);
3266 }
3267 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3268 return res;
3269 }
3270 case Iop_Abs8x8:
3271 case Iop_Abs16x4:
3272 case Iop_Abs32x2: {
3273 HReg res = newVRegD(env);
3274 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3275 UInt size = 0;
3276 switch(e->Iex.Binop.op) {
3277 case Iop_Abs8x8: size = 0; break;
3278 case Iop_Abs16x4: size = 1; break;
3279 case Iop_Abs32x2: size = 2; break;
3280 default: vassert(0);
3281 }
3282 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3283 return res;
3284 }
3285 case Iop_Reverse64_8x8:
3286 case Iop_Reverse64_16x4:
3287 case Iop_Reverse64_32x2: {
3288 HReg res = newVRegD(env);
3289 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3290 UInt size = 0;
3291 switch(e->Iex.Binop.op) {
3292 case Iop_Reverse64_8x8: size = 0; break;
3293 case Iop_Reverse64_16x4: size = 1; break;
3294 case Iop_Reverse64_32x2: size = 2; break;
3295 default: vassert(0);
3296 }
3297 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3298 res, arg, size, False));
3299 return res;
3300 }
3301 case Iop_Reverse32_8x8:
3302 case Iop_Reverse32_16x4: {
3303 HReg res = newVRegD(env);
3304 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3305 UInt size = 0;
3306 switch(e->Iex.Binop.op) {
3307 case Iop_Reverse32_8x8: size = 0; break;
3308 case Iop_Reverse32_16x4: size = 1; break;
3309 default: vassert(0);
3310 }
3311 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3312 res, arg, size, False));
3313 return res;
3314 }
3315 case Iop_Reverse16_8x8: {
3316 HReg res = newVRegD(env);
3317 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3318 UInt size = 0;
3319 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3320 res, arg, size, False));
3321 return res;
3322 }
3323 case Iop_CmpwNEZ64: {
3324 HReg x_lsh = newVRegD(env);
3325 HReg x_rsh = newVRegD(env);
3326 HReg lsh_amt = newVRegD(env);
3327 HReg rsh_amt = newVRegD(env);
3328 HReg zero = newVRegD(env);
3329 HReg tmp = newVRegD(env);
3330 HReg tmp2 = newVRegD(env);
3331 HReg res = newVRegD(env);
3332 HReg x = newVRegD(env);
3333 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3334 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3335 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3336 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3337 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3338 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3339 rsh_amt, zero, lsh_amt, 2, False));
3340 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3341 x_lsh, x, lsh_amt, 3, False));
3342 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3343 x_rsh, x, rsh_amt, 3, False));
3344 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3345 tmp, x_lsh, x_rsh, 0, False));
3346 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3347 res, tmp, x, 0, False));
3348 return res;
3349 }
3350 case Iop_CmpNEZ8x8:
3351 case Iop_CmpNEZ16x4:
3352 case Iop_CmpNEZ32x2: {
3353 HReg res = newVRegD(env);
3354 HReg tmp = newVRegD(env);
3355 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3356 UInt size;
3357 switch (e->Iex.Unop.op) {
3358 case Iop_CmpNEZ8x8: size = 0; break;
3359 case Iop_CmpNEZ16x4: size = 1; break;
3360 case Iop_CmpNEZ32x2: size = 2; break;
3361 default: vassert(0);
3362 }
3363 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
3364 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
3365 return res;
3366 }
sewardj5f438dd2011-06-16 11:36:23 +00003367 case Iop_NarrowUn16to8x8:
3368 case Iop_NarrowUn32to16x4:
3369 case Iop_NarrowUn64to32x2: {
sewardj6c60b322010-08-22 12:48:28 +00003370 HReg res = newVRegD(env);
3371 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3372 UInt size = 0;
3373 switch(e->Iex.Binop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003374 case Iop_NarrowUn16to8x8: size = 0; break;
3375 case Iop_NarrowUn32to16x4: size = 1; break;
3376 case Iop_NarrowUn64to32x2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003377 default: vassert(0);
3378 }
3379 addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
3380 res, arg, size, False));
3381 return res;
3382 }
sewardj5f438dd2011-06-16 11:36:23 +00003383 case Iop_QNarrowUn16Sto8Sx8:
3384 case Iop_QNarrowUn32Sto16Sx4:
3385 case Iop_QNarrowUn64Sto32Sx2: {
sewardj6c60b322010-08-22 12:48:28 +00003386 HReg res = newVRegD(env);
3387 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3388 UInt size = 0;
3389 switch(e->Iex.Binop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003390 case Iop_QNarrowUn16Sto8Sx8: size = 0; break;
3391 case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
3392 case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003393 default: vassert(0);
3394 }
3395 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
3396 res, arg, size, False));
3397 return res;
3398 }
sewardj5f438dd2011-06-16 11:36:23 +00003399 case Iop_QNarrowUn16Sto8Ux8:
3400 case Iop_QNarrowUn32Sto16Ux4:
3401 case Iop_QNarrowUn64Sto32Ux2: {
sewardj6c60b322010-08-22 12:48:28 +00003402 HReg res = newVRegD(env);
3403 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3404 UInt size = 0;
3405 switch(e->Iex.Binop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003406 case Iop_QNarrowUn16Sto8Ux8: size = 0; break;
3407 case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
3408 case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003409 default: vassert(0);
3410 }
3411 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
3412 res, arg, size, False));
3413 return res;
3414 }
sewardj5f438dd2011-06-16 11:36:23 +00003415 case Iop_QNarrowUn16Uto8Ux8:
3416 case Iop_QNarrowUn32Uto16Ux4:
3417 case Iop_QNarrowUn64Uto32Ux2: {
sewardj6c60b322010-08-22 12:48:28 +00003418 HReg res = newVRegD(env);
3419 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3420 UInt size = 0;
3421 switch(e->Iex.Binop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003422 case Iop_QNarrowUn16Uto8Ux8: size = 0; break;
3423 case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
3424 case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003425 default: vassert(0);
3426 }
3427 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
3428 res, arg, size, False));
3429 return res;
3430 }
3431 case Iop_PwAddL8Sx8:
3432 case Iop_PwAddL16Sx4:
3433 case Iop_PwAddL32Sx2: {
3434 HReg res = newVRegD(env);
3435 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3436 UInt size = 0;
3437 switch(e->Iex.Binop.op) {
3438 case Iop_PwAddL8Sx8: size = 0; break;
3439 case Iop_PwAddL16Sx4: size = 1; break;
3440 case Iop_PwAddL32Sx2: size = 2; break;
3441 default: vassert(0);
3442 }
3443 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
3444 res, arg, size, False));
3445 return res;
3446 }
3447 case Iop_PwAddL8Ux8:
3448 case Iop_PwAddL16Ux4:
3449 case Iop_PwAddL32Ux2: {
3450 HReg res = newVRegD(env);
3451 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3452 UInt size = 0;
3453 switch(e->Iex.Binop.op) {
3454 case Iop_PwAddL8Ux8: size = 0; break;
3455 case Iop_PwAddL16Ux4: size = 1; break;
3456 case Iop_PwAddL32Ux2: size = 2; break;
3457 default: vassert(0);
3458 }
3459 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
3460 res, arg, size, False));
3461 return res;
3462 }
3463 case Iop_Cnt8x8: {
3464 HReg res = newVRegD(env);
3465 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3466 UInt size = 0;
3467 addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
3468 res, arg, size, False));
3469 return res;
3470 }
3471 case Iop_Clz8Sx8:
3472 case Iop_Clz16Sx4:
3473 case Iop_Clz32Sx2: {
3474 HReg res = newVRegD(env);
3475 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3476 UInt size = 0;
3477 switch(e->Iex.Binop.op) {
3478 case Iop_Clz8Sx8: size = 0; break;
3479 case Iop_Clz16Sx4: size = 1; break;
3480 case Iop_Clz32Sx2: size = 2; break;
3481 default: vassert(0);
3482 }
3483 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
3484 res, arg, size, False));
3485 return res;
3486 }
3487 case Iop_Cls8Sx8:
3488 case Iop_Cls16Sx4:
3489 case Iop_Cls32Sx2: {
3490 HReg res = newVRegD(env);
3491 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3492 UInt size = 0;
3493 switch(e->Iex.Binop.op) {
3494 case Iop_Cls8Sx8: size = 0; break;
3495 case Iop_Cls16Sx4: size = 1; break;
3496 case Iop_Cls32Sx2: size = 2; break;
3497 default: vassert(0);
3498 }
3499 addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
3500 res, arg, size, False));
3501 return res;
3502 }
3503 case Iop_FtoI32Sx2_RZ: {
3504 HReg res = newVRegD(env);
3505 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3506 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
3507 res, arg, 2, False));
3508 return res;
3509 }
3510 case Iop_FtoI32Ux2_RZ: {
3511 HReg res = newVRegD(env);
3512 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3513 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
3514 res, arg, 2, False));
3515 return res;
3516 }
3517 case Iop_I32StoFx2: {
3518 HReg res = newVRegD(env);
3519 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3520 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
3521 res, arg, 2, False));
3522 return res;
3523 }
3524 case Iop_I32UtoFx2: {
3525 HReg res = newVRegD(env);
3526 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3527 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
3528 res, arg, 2, False));
3529 return res;
3530 }
3531 case Iop_F32toF16x4: {
3532 HReg res = newVRegD(env);
3533 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3534 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
3535 res, arg, 2, False));
3536 return res;
3537 }
3538 case Iop_Recip32Fx2: {
3539 HReg res = newVRegD(env);
3540 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3541 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
3542 res, argL, 0, False));
3543 return res;
3544 }
3545 case Iop_Recip32x2: {
3546 HReg res = newVRegD(env);
3547 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3548 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
3549 res, argL, 0, False));
3550 return res;
3551 }
3552 case Iop_Abs32Fx2: {
3553 DECLARE_PATTERN(p_vabd_32fx2);
3554 DEFINE_PATTERN(p_vabd_32fx2,
3555 unop(Iop_Abs32Fx2,
3556 binop(Iop_Sub32Fx2,
3557 bind(0),
3558 bind(1))));
3559 if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
3560 HReg res = newVRegD(env);
3561 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3562 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3563 addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
3564 res, argL, argR, 0, False));
3565 return res;
3566 } else {
3567 HReg res = newVRegD(env);
3568 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3569 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
3570 res, arg, 0, False));
3571 return res;
3572 }
3573 }
3574 case Iop_Rsqrte32Fx2: {
3575 HReg res = newVRegD(env);
3576 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3577 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
3578 res, arg, 0, False));
3579 return res;
3580 }
3581 case Iop_Rsqrte32x2: {
3582 HReg res = newVRegD(env);
3583 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3584 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
3585 res, arg, 0, False));
3586 return res;
3587 }
3588 case Iop_Neg32Fx2: {
3589 HReg res = newVRegD(env);
3590 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3591 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
3592 res, arg, 0, False));
3593 return res;
3594 }
3595 default:
3596 break;
3597 }
3598 } /* if (e->tag == Iex_Unop) */
3599
3600 if (e->tag == Iex_Triop) {
florian420bfa92012-06-02 20:29:22 +00003601 IRTriop *triop = e->Iex.Triop.details;
3602
3603 switch (triop->op) {
sewardj6c60b322010-08-22 12:48:28 +00003604 case Iop_Extract64: {
3605 HReg res = newVRegD(env);
florian420bfa92012-06-02 20:29:22 +00003606 HReg argL = iselNeon64Expr(env, triop->arg1);
3607 HReg argR = iselNeon64Expr(env, triop->arg2);
sewardj6c60b322010-08-22 12:48:28 +00003608 UInt imm4;
florian420bfa92012-06-02 20:29:22 +00003609 if (triop->arg3->tag != Iex_Const ||
3610 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
sewardj6c60b322010-08-22 12:48:28 +00003611 vpanic("ARM target supports Iop_Extract64 with constant "
3612 "third argument less than 16 only\n");
3613 }
florian420bfa92012-06-02 20:29:22 +00003614 imm4 = triop->arg3->Iex.Const.con->Ico.U8;
sewardj6c60b322010-08-22 12:48:28 +00003615 if (imm4 >= 8) {
3616 vpanic("ARM target supports Iop_Extract64 with constant "
3617 "third argument less than 16 only\n");
3618 }
3619 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
3620 res, argL, argR, imm4, False));
3621 return res;
3622 }
3623 case Iop_SetElem8x8:
3624 case Iop_SetElem16x4:
3625 case Iop_SetElem32x2: {
3626 HReg res = newVRegD(env);
florian420bfa92012-06-02 20:29:22 +00003627 HReg dreg = iselNeon64Expr(env, triop->arg1);
3628 HReg arg = iselIntExpr_R(env, triop->arg3);
sewardj6c60b322010-08-22 12:48:28 +00003629 UInt index, size;
florian420bfa92012-06-02 20:29:22 +00003630 if (triop->arg2->tag != Iex_Const ||
3631 typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) {
sewardj6c60b322010-08-22 12:48:28 +00003632 vpanic("ARM target supports SetElem with constant "
3633 "second argument only\n");
3634 }
florian420bfa92012-06-02 20:29:22 +00003635 index = triop->arg2->Iex.Const.con->Ico.U8;
3636 switch (triop->op) {
sewardj6c60b322010-08-22 12:48:28 +00003637 case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
3638 case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
3639 case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
3640 default: vassert(0);
3641 }
3642 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
3643 addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
3644 mkARMNRS(ARMNRS_Scalar, res, index),
3645 mkARMNRS(ARMNRS_Reg, arg, 0),
3646 size, False));
3647 return res;
3648 }
3649 default:
3650 break;
3651 }
3652 }
3653
3654 /* --------- MULTIPLEX --------- */
3655 if (e->tag == Iex_Mux0X) {
3656 HReg rLo, rHi;
3657 HReg res = newVRegD(env);
3658 iselInt64Expr(&rHi, &rLo, env, e);
3659 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3660 return res;
3661 }
3662
3663 ppIRExpr(e);
3664 vpanic("iselNeon64Expr");
3665}
3666
3667static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e )
3668{
3669 HReg r = iselNeonExpr_wrk( env, e );
3670 vassert(hregClass(r) == HRcVec128);
3671 vassert(hregIsVirtual(r));
3672 return r;
3673}
3674
3675/* DO NOT CALL THIS DIRECTLY */
3676static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e )
3677{
3678 IRType ty = typeOfIRExpr(env->type_env, e);
3679 MatchInfo mi;
3680 vassert(e);
3681 vassert(ty == Ity_V128);
3682
3683 if (e->tag == Iex_RdTmp) {
3684 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3685 }
3686
3687 if (e->tag == Iex_Const) {
3688 /* At the moment there should be no 128-bit constants in IR for ARM
3689 generated during disassemble. They are represented as Iop_64HLtoV128
3690 binary operation and are handled among binary ops. */
3691 /* But zero can be created by valgrind internal optimizer */
3692 if (e->Iex.Const.con->Ico.V128 == 0) {
3693 HReg res = newVRegV(env);
3694 addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(0, 0)));
3695 return res;
3696 }
3697 ppIRExpr(e);
3698 vpanic("128-bit constant is not implemented");
3699 }
3700
3701 if (e->tag == Iex_Load) {
3702 HReg res = newVRegV(env);
3703 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
3704 vassert(ty == Ity_V128);
3705 addInstr(env, ARMInstr_NLdStQ(True, res, am));
3706 return res;
3707 }
3708
3709 if (e->tag == Iex_Get) {
3710 HReg addr = newVRegI(env);
3711 HReg res = newVRegV(env);
3712 vassert(ty == Ity_V128);
3713 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
3714 addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
3715 return res;
3716 }
3717
3718 if (e->tag == Iex_Unop) {
3719 switch (e->Iex.Unop.op) {
3720 case Iop_NotV128: {
3721 DECLARE_PATTERN(p_veqz_8x16);
3722 DECLARE_PATTERN(p_veqz_16x8);
3723 DECLARE_PATTERN(p_veqz_32x4);
3724 DECLARE_PATTERN(p_vcge_8sx16);
3725 DECLARE_PATTERN(p_vcge_16sx8);
3726 DECLARE_PATTERN(p_vcge_32sx4);
3727 DECLARE_PATTERN(p_vcge_8ux16);
3728 DECLARE_PATTERN(p_vcge_16ux8);
3729 DECLARE_PATTERN(p_vcge_32ux4);
3730 DEFINE_PATTERN(p_veqz_8x16,
3731 unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
3732 DEFINE_PATTERN(p_veqz_16x8,
3733 unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
3734 DEFINE_PATTERN(p_veqz_32x4,
3735 unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
3736 DEFINE_PATTERN(p_vcge_8sx16,
3737 unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
3738 DEFINE_PATTERN(p_vcge_16sx8,
3739 unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
3740 DEFINE_PATTERN(p_vcge_32sx4,
3741 unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
3742 DEFINE_PATTERN(p_vcge_8ux16,
3743 unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
3744 DEFINE_PATTERN(p_vcge_16ux8,
3745 unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
3746 DEFINE_PATTERN(p_vcge_32ux4,
3747 unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
3748 if (matchIRExpr(&mi, p_veqz_8x16, e)) {
3749 HReg res = newVRegV(env);
3750 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3751 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
3752 return res;
3753 } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
3754 HReg res = newVRegV(env);
3755 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3756 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
3757 return res;
3758 } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
3759 HReg res = newVRegV(env);
3760 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3761 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
3762 return res;
3763 } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
3764 HReg res = newVRegV(env);
3765 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3766 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3767 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3768 res, argL, argR, 0, True));
3769 return res;
3770 } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
3771 HReg res = newVRegV(env);
3772 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3773 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3774 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3775 res, argL, argR, 1, True));
3776 return res;
3777 } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
3778 HReg res = newVRegV(env);
3779 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3780 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3781 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3782 res, argL, argR, 2, True));
3783 return res;
3784 } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
3785 HReg res = newVRegV(env);
3786 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3787 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3788 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3789 res, argL, argR, 0, True));
3790 return res;
3791 } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
3792 HReg res = newVRegV(env);
3793 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3794 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3795 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3796 res, argL, argR, 1, True));
3797 return res;
3798 } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
3799 HReg res = newVRegV(env);
3800 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3801 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3802 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3803 res, argL, argR, 2, True));
3804 return res;
3805 } else {
3806 HReg res = newVRegV(env);
3807 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3808 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
3809 return res;
3810 }
3811 }
3812 case Iop_Dup8x16:
3813 case Iop_Dup16x8:
3814 case Iop_Dup32x4: {
3815 HReg res, arg;
3816 UInt size;
3817 DECLARE_PATTERN(p_vdup_8x16);
3818 DECLARE_PATTERN(p_vdup_16x8);
3819 DECLARE_PATTERN(p_vdup_32x4);
3820 DEFINE_PATTERN(p_vdup_8x16,
3821 unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
3822 DEFINE_PATTERN(p_vdup_16x8,
3823 unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
3824 DEFINE_PATTERN(p_vdup_32x4,
3825 unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
3826 if (matchIRExpr(&mi, p_vdup_8x16, e)) {
3827 UInt index;
3828 UInt imm4;
3829 if (mi.bindee[1]->tag == Iex_Const &&
3830 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3831 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3832 imm4 = (index << 1) + 1;
3833 if (index < 8) {
3834 res = newVRegV(env);
3835 arg = iselNeon64Expr(env, mi.bindee[0]);
3836 addInstr(env, ARMInstr_NUnaryS(
3837 ARMneon_VDUP,
3838 mkARMNRS(ARMNRS_Reg, res, 0),
3839 mkARMNRS(ARMNRS_Scalar, arg, index),
3840 imm4, True
3841 ));
3842 return res;
3843 }
3844 }
3845 } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
3846 UInt index;
3847 UInt imm4;
3848 if (mi.bindee[1]->tag == Iex_Const &&
3849 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3850 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3851 imm4 = (index << 2) + 2;
3852 if (index < 4) {
3853 res = newVRegV(env);
3854 arg = iselNeon64Expr(env, mi.bindee[0]);
3855 addInstr(env, ARMInstr_NUnaryS(
3856 ARMneon_VDUP,
3857 mkARMNRS(ARMNRS_Reg, res, 0),
3858 mkARMNRS(ARMNRS_Scalar, arg, index),
3859 imm4, True
3860 ));
3861 return res;
3862 }
3863 }
3864 } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
3865 UInt index;
3866 UInt imm4;
3867 if (mi.bindee[1]->tag == Iex_Const &&
3868 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3869 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3870 imm4 = (index << 3) + 4;
3871 if (index < 2) {
3872 res = newVRegV(env);
3873 arg = iselNeon64Expr(env, mi.bindee[0]);
3874 addInstr(env, ARMInstr_NUnaryS(
3875 ARMneon_VDUP,
3876 mkARMNRS(ARMNRS_Reg, res, 0),
3877 mkARMNRS(ARMNRS_Scalar, arg, index),
3878 imm4, True
3879 ));
3880 return res;
3881 }
3882 }
3883 }
3884 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3885 res = newVRegV(env);
3886 switch (e->Iex.Unop.op) {
3887 case Iop_Dup8x16: size = 0; break;
3888 case Iop_Dup16x8: size = 1; break;
3889 case Iop_Dup32x4: size = 2; break;
3890 default: vassert(0);
3891 }
3892 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
3893 return res;
3894 }
3895 case Iop_Abs8x16:
3896 case Iop_Abs16x8:
3897 case Iop_Abs32x4: {
3898 HReg res = newVRegV(env);
3899 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3900 UInt size = 0;
3901 switch(e->Iex.Binop.op) {
3902 case Iop_Abs8x16: size = 0; break;
3903 case Iop_Abs16x8: size = 1; break;
3904 case Iop_Abs32x4: size = 2; break;
3905 default: vassert(0);
3906 }
3907 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
3908 return res;
3909 }
3910 case Iop_Reverse64_8x16:
3911 case Iop_Reverse64_16x8:
3912 case Iop_Reverse64_32x4: {
3913 HReg res = newVRegV(env);
3914 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3915 UInt size = 0;
3916 switch(e->Iex.Binop.op) {
3917 case Iop_Reverse64_8x16: size = 0; break;
3918 case Iop_Reverse64_16x8: size = 1; break;
3919 case Iop_Reverse64_32x4: size = 2; break;
3920 default: vassert(0);
3921 }
3922 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3923 res, arg, size, True));
3924 return res;
3925 }
3926 case Iop_Reverse32_8x16:
3927 case Iop_Reverse32_16x8: {
3928 HReg res = newVRegV(env);
3929 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3930 UInt size = 0;
3931 switch(e->Iex.Binop.op) {
3932 case Iop_Reverse32_8x16: size = 0; break;
3933 case Iop_Reverse32_16x8: size = 1; break;
3934 default: vassert(0);
3935 }
3936 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3937 res, arg, size, True));
3938 return res;
3939 }
3940 case Iop_Reverse16_8x16: {
3941 HReg res = newVRegV(env);
3942 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3943 UInt size = 0;
3944 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3945 res, arg, size, True));
3946 return res;
3947 }
3948 case Iop_CmpNEZ64x2: {
3949 HReg x_lsh = newVRegV(env);
3950 HReg x_rsh = newVRegV(env);
3951 HReg lsh_amt = newVRegV(env);
3952 HReg rsh_amt = newVRegV(env);
3953 HReg zero = newVRegV(env);
3954 HReg tmp = newVRegV(env);
3955 HReg tmp2 = newVRegV(env);
3956 HReg res = newVRegV(env);
3957 HReg x = newVRegV(env);
3958 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3959 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
3960 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
3961 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3962 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3963 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3964 rsh_amt, zero, lsh_amt, 2, True));
3965 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3966 x_lsh, x, lsh_amt, 3, True));
3967 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3968 x_rsh, x, rsh_amt, 3, True));
3969 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3970 tmp, x_lsh, x_rsh, 0, True));
3971 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3972 res, tmp, x, 0, True));
3973 return res;
3974 }
3975 case Iop_CmpNEZ8x16:
3976 case Iop_CmpNEZ16x8:
3977 case Iop_CmpNEZ32x4: {
3978 HReg res = newVRegV(env);
3979 HReg tmp = newVRegV(env);
3980 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3981 UInt size;
3982 switch (e->Iex.Unop.op) {
3983 case Iop_CmpNEZ8x16: size = 0; break;
3984 case Iop_CmpNEZ16x8: size = 1; break;
3985 case Iop_CmpNEZ32x4: size = 2; break;
3986 default: vassert(0);
3987 }
3988 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
3989 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
3990 return res;
3991 }
sewardj5f438dd2011-06-16 11:36:23 +00003992 case Iop_Widen8Uto16x8:
3993 case Iop_Widen16Uto32x4:
3994 case Iop_Widen32Uto64x2: {
sewardj6c60b322010-08-22 12:48:28 +00003995 HReg res = newVRegV(env);
3996 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3997 UInt size;
3998 switch (e->Iex.Unop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003999 case Iop_Widen8Uto16x8: size = 0; break;
4000 case Iop_Widen16Uto32x4: size = 1; break;
4001 case Iop_Widen32Uto64x2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00004002 default: vassert(0);
4003 }
4004 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
4005 res, arg, size, True));
4006 return res;
4007 }
sewardj5f438dd2011-06-16 11:36:23 +00004008 case Iop_Widen8Sto16x8:
4009 case Iop_Widen16Sto32x4:
4010 case Iop_Widen32Sto64x2: {
sewardj6c60b322010-08-22 12:48:28 +00004011 HReg res = newVRegV(env);
4012 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4013 UInt size;
4014 switch (e->Iex.Unop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00004015 case Iop_Widen8Sto16x8: size = 0; break;
4016 case Iop_Widen16Sto32x4: size = 1; break;
4017 case Iop_Widen32Sto64x2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00004018 default: vassert(0);
4019 }
4020 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4021 res, arg, size, True));
4022 return res;
4023 }
4024 case Iop_PwAddL8Sx16:
4025 case Iop_PwAddL16Sx8:
4026 case Iop_PwAddL32Sx4: {
4027 HReg res = newVRegV(env);
4028 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4029 UInt size = 0;
4030 switch(e->Iex.Binop.op) {
4031 case Iop_PwAddL8Sx16: size = 0; break;
4032 case Iop_PwAddL16Sx8: size = 1; break;
4033 case Iop_PwAddL32Sx4: size = 2; break;
4034 default: vassert(0);
4035 }
4036 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4037 res, arg, size, True));
4038 return res;
4039 }
4040 case Iop_PwAddL8Ux16:
4041 case Iop_PwAddL16Ux8:
4042 case Iop_PwAddL32Ux4: {
4043 HReg res = newVRegV(env);
4044 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4045 UInt size = 0;
4046 switch(e->Iex.Binop.op) {
4047 case Iop_PwAddL8Ux16: size = 0; break;
4048 case Iop_PwAddL16Ux8: size = 1; break;
4049 case Iop_PwAddL32Ux4: size = 2; break;
4050 default: vassert(0);
4051 }
4052 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4053 res, arg, size, True));
4054 return res;
4055 }
4056 case Iop_Cnt8x16: {
4057 HReg res = newVRegV(env);
4058 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4059 UInt size = 0;
4060 addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4061 return res;
4062 }
4063 case Iop_Clz8Sx16:
4064 case Iop_Clz16Sx8:
4065 case Iop_Clz32Sx4: {
4066 HReg res = newVRegV(env);
4067 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4068 UInt size = 0;
4069 switch(e->Iex.Binop.op) {
4070 case Iop_Clz8Sx16: size = 0; break;
4071 case Iop_Clz16Sx8: size = 1; break;
4072 case Iop_Clz32Sx4: size = 2; break;
4073 default: vassert(0);
4074 }
4075 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4076 return res;
4077 }
4078 case Iop_Cls8Sx16:
4079 case Iop_Cls16Sx8:
4080 case Iop_Cls32Sx4: {
4081 HReg res = newVRegV(env);
4082 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4083 UInt size = 0;
4084 switch(e->Iex.Binop.op) {
4085 case Iop_Cls8Sx16: size = 0; break;
4086 case Iop_Cls16Sx8: size = 1; break;
4087 case Iop_Cls32Sx4: size = 2; break;
4088 default: vassert(0);
4089 }
4090 addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4091 return res;
4092 }
4093 case Iop_FtoI32Sx4_RZ: {
4094 HReg res = newVRegV(env);
4095 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4096 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4097 res, arg, 2, True));
4098 return res;
4099 }
4100 case Iop_FtoI32Ux4_RZ: {
4101 HReg res = newVRegV(env);
4102 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4103 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4104 res, arg, 2, True));
4105 return res;
4106 }
4107 case Iop_I32StoFx4: {
4108 HReg res = newVRegV(env);
4109 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4110 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4111 res, arg, 2, True));
4112 return res;
4113 }
4114 case Iop_I32UtoFx4: {
4115 HReg res = newVRegV(env);
4116 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4117 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4118 res, arg, 2, True));
4119 return res;
4120 }
4121 case Iop_F16toF32x4: {
4122 HReg res = newVRegV(env);
4123 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4124 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4125 res, arg, 2, True));
4126 return res;
4127 }
4128 case Iop_Recip32Fx4: {
4129 HReg res = newVRegV(env);
4130 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4131 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4132 res, argL, 0, True));
4133 return res;
4134 }
4135 case Iop_Recip32x4: {
4136 HReg res = newVRegV(env);
4137 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4138 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4139 res, argL, 0, True));
4140 return res;
4141 }
4142 case Iop_Abs32Fx4: {
4143 DECLARE_PATTERN(p_vabd_32fx4);
4144 DEFINE_PATTERN(p_vabd_32fx4,
4145 unop(Iop_Abs32Fx4,
4146 binop(Iop_Sub32Fx4,
4147 bind(0),
4148 bind(1))));
4149 if (matchIRExpr(&mi, p_vabd_32fx4, e)) {
4150 HReg res = newVRegV(env);
4151 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4152 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4153 addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
4154 res, argL, argR, 0, True));
4155 return res;
4156 } else {
4157 HReg res = newVRegV(env);
4158 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4159 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4160 res, argL, 0, True));
4161 return res;
4162 }
4163 }
4164 case Iop_Rsqrte32Fx4: {
4165 HReg res = newVRegV(env);
4166 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4167 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4168 res, argL, 0, True));
4169 return res;
4170 }
4171 case Iop_Rsqrte32x4: {
4172 HReg res = newVRegV(env);
4173 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4174 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4175 res, argL, 0, True));
4176 return res;
4177 }
4178 case Iop_Neg32Fx4: {
4179 HReg res = newVRegV(env);
4180 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4181 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4182 res, arg, 0, True));
4183 return res;
4184 }
4185 /* ... */
4186 default:
4187 break;
4188 }
4189 }
4190
4191 if (e->tag == Iex_Binop) {
4192 switch (e->Iex.Binop.op) {
4193 case Iop_64HLtoV128:
4194 /* Try to match into single "VMOV reg, imm" instruction */
4195 if (e->Iex.Binop.arg1->tag == Iex_Const &&
4196 e->Iex.Binop.arg2->tag == Iex_Const &&
4197 typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
4198 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
4199 e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
4200 e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
4201 ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
4202 ARMNImm *imm = Imm64_to_ARMNImm(imm64);
4203 if (imm) {
4204 HReg res = newVRegV(env);
4205 addInstr(env, ARMInstr_NeonImm(res, imm));
4206 return res;
4207 }
4208 if ((imm64 >> 32) == 0LL &&
4209 (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
4210 HReg tmp1 = newVRegV(env);
4211 HReg tmp2 = newVRegV(env);
4212 HReg res = newVRegV(env);
4213 if (imm->type < 10) {
4214 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
4215 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4216 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4217 res, tmp1, tmp2, 4, True));
4218 return res;
4219 }
4220 }
4221 if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
4222 (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
4223 HReg tmp1 = newVRegV(env);
4224 HReg tmp2 = newVRegV(env);
4225 HReg res = newVRegV(env);
4226 if (imm->type < 10) {
4227 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
4228 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4229 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4230 res, tmp1, tmp2, 4, True));
4231 return res;
4232 }
4233 }
4234 }
sewardj6828dc72011-09-30 08:49:02 +00004235 /* Does not match "VMOV Reg, Imm" form. We'll have to do
4236 it the slow way. */
4237 {
4238 /* local scope */
4239 /* Done via the stack for ease of use. */
4240 /* FIXME: assumes little endian host */
4241 HReg w3, w2, w1, w0;
4242 HReg res = newVRegV(env);
4243 ARMAMode1* sp_0 = ARMAMode1_RI(hregARM_R13(), 0);
4244 ARMAMode1* sp_4 = ARMAMode1_RI(hregARM_R13(), 4);
4245 ARMAMode1* sp_8 = ARMAMode1_RI(hregARM_R13(), 8);
4246 ARMAMode1* sp_12 = ARMAMode1_RI(hregARM_R13(), 12);
4247 ARMRI84* c_16 = ARMRI84_I84(16,0);
4248 /* Make space for SP */
4249 addInstr(env, ARMInstr_Alu(ARMalu_SUB, hregARM_R13(),
4250 hregARM_R13(), c_16));
4251
4252 /* Store the less significant 64 bits */
4253 iselInt64Expr(&w1, &w0, env, e->Iex.Binop.arg2);
4254 addInstr(env, ARMInstr_LdSt32(False/*store*/, w0, sp_0));
4255 addInstr(env, ARMInstr_LdSt32(False/*store*/, w1, sp_4));
4256
4257 /* Store the more significant 64 bits */
4258 iselInt64Expr(&w3, &w2, env, e->Iex.Binop.arg1);
4259 addInstr(env, ARMInstr_LdSt32(False/*store*/, w2, sp_8));
4260 addInstr(env, ARMInstr_LdSt32(False/*store*/, w3, sp_12));
4261
4262 /* Load result back from stack. */
4263 addInstr(env, ARMInstr_NLdStQ(True/*load*/, res,
4264 mkARMAModeN_R(hregARM_R13())));
4265
4266 /* Restore SP */
4267 addInstr(env, ARMInstr_Alu(ARMalu_ADD, hregARM_R13(),
4268 hregARM_R13(), c_16));
4269 return res;
4270 } /* local scope */
sewardj6c60b322010-08-22 12:48:28 +00004271 goto neon_expr_bad;
4272 case Iop_AndV128: {
4273 HReg res = newVRegV(env);
4274 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4275 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4276 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4277 res, argL, argR, 4, True));
4278 return res;
4279 }
4280 case Iop_OrV128: {
4281 HReg res = newVRegV(env);
4282 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4283 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4284 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4285 res, argL, argR, 4, True));
4286 return res;
4287 }
4288 case Iop_XorV128: {
4289 HReg res = newVRegV(env);
4290 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4291 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4292 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4293 res, argL, argR, 4, True));
4294 return res;
4295 }
4296 case Iop_Add8x16:
4297 case Iop_Add16x8:
4298 case Iop_Add32x4:
4299 case Iop_Add64x2: {
4300 /*
4301 FIXME: remove this if not used
4302 DECLARE_PATTERN(p_vrhadd_32sx4);
4303 ULong one = (1LL << 32) | 1LL;
4304 DEFINE_PATTERN(p_vrhadd_32sx4,
4305 binop(Iop_Add32x4,
4306 binop(Iop_Add32x4,
4307 binop(Iop_SarN32x4,
4308 bind(0),
4309 mkU8(1)),
4310 binop(Iop_SarN32x4,
4311 bind(1),
4312 mkU8(1))),
4313 binop(Iop_SarN32x4,
4314 binop(Iop_Add32x4,
4315 binop(Iop_Add32x4,
4316 binop(Iop_AndV128,
4317 bind(0),
4318 mkU128(one)),
4319 binop(Iop_AndV128,
4320 bind(1),
4321 mkU128(one))),
4322 mkU128(one)),
4323 mkU8(1))));
4324 */
4325 HReg res = newVRegV(env);
4326 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4327 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4328 UInt size;
4329 switch (e->Iex.Binop.op) {
4330 case Iop_Add8x16: size = 0; break;
4331 case Iop_Add16x8: size = 1; break;
4332 case Iop_Add32x4: size = 2; break;
4333 case Iop_Add64x2: size = 3; break;
4334 default:
4335 ppIROp(e->Iex.Binop.op);
4336 vpanic("Illegal element size in VADD");
4337 }
4338 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
4339 res, argL, argR, size, True));
4340 return res;
4341 }
4342 case Iop_Add32Fx4: {
4343 HReg res = newVRegV(env);
4344 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4345 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4346 UInt size = 0;
4347 addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
4348 res, argL, argR, size, True));
4349 return res;
4350 }
4351 case Iop_Recps32Fx4: {
4352 HReg res = newVRegV(env);
4353 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4354 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4355 UInt size = 0;
4356 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
4357 res, argL, argR, size, True));
4358 return res;
4359 }
4360 case Iop_Rsqrts32Fx4: {
4361 HReg res = newVRegV(env);
4362 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4363 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4364 UInt size = 0;
4365 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
4366 res, argL, argR, size, True));
4367 return res;
4368 }
4369 case Iop_InterleaveEvenLanes8x16:
4370 case Iop_InterleaveEvenLanes16x8:
4371 case Iop_InterleaveEvenLanes32x4:
4372 case Iop_InterleaveOddLanes8x16:
4373 case Iop_InterleaveOddLanes16x8:
4374 case Iop_InterleaveOddLanes32x4: {
4375 HReg tmp = newVRegV(env);
4376 HReg res = newVRegV(env);
4377 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4378 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4379 UInt size;
4380 UInt is_lo;
4381 switch (e->Iex.Binop.op) {
4382 case Iop_InterleaveEvenLanes8x16: is_lo = 0; size = 0; break;
4383 case Iop_InterleaveOddLanes8x16: is_lo = 1; size = 0; break;
4384 case Iop_InterleaveEvenLanes16x8: is_lo = 0; size = 1; break;
4385 case Iop_InterleaveOddLanes16x8: is_lo = 1; size = 1; break;
4386 case Iop_InterleaveEvenLanes32x4: is_lo = 0; size = 2; break;
4387 case Iop_InterleaveOddLanes32x4: is_lo = 1; size = 2; break;
4388 default:
4389 ppIROp(e->Iex.Binop.op);
4390 vpanic("Illegal element size in VTRN");
4391 }
4392 if (is_lo) {
4393 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4394 tmp, argL, 4, True));
4395 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4396 res, argR, 4, True));
4397 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
4398 res, tmp, size, True));
4399 } else {
4400 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4401 tmp, argR, 4, True));
4402 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4403 res, argL, 4, True));
4404 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
4405 tmp, res, size, True));
4406 }
4407 return res;
4408 }
4409 case Iop_InterleaveHI8x16:
4410 case Iop_InterleaveHI16x8:
4411 case Iop_InterleaveHI32x4:
4412 case Iop_InterleaveLO8x16:
4413 case Iop_InterleaveLO16x8:
4414 case Iop_InterleaveLO32x4: {
4415 HReg tmp = newVRegV(env);
4416 HReg res = newVRegV(env);
4417 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4418 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4419 UInt size;
4420 UInt is_lo;
4421 switch (e->Iex.Binop.op) {
4422 case Iop_InterleaveHI8x16: is_lo = 1; size = 0; break;
4423 case Iop_InterleaveLO8x16: is_lo = 0; size = 0; break;
4424 case Iop_InterleaveHI16x8: is_lo = 1; size = 1; break;
4425 case Iop_InterleaveLO16x8: is_lo = 0; size = 1; break;
4426 case Iop_InterleaveHI32x4: is_lo = 1; size = 2; break;
4427 case Iop_InterleaveLO32x4: is_lo = 0; size = 2; break;
4428 default:
4429 ppIROp(e->Iex.Binop.op);
4430 vpanic("Illegal element size in VZIP");
4431 }
4432 if (is_lo) {
4433 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4434 tmp, argL, 4, True));
4435 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4436 res, argR, 4, True));
4437 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
4438 res, tmp, size, True));
4439 } else {
4440 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4441 tmp, argR, 4, True));
4442 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4443 res, argL, 4, True));
4444 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
4445 tmp, res, size, True));
4446 }
4447 return res;
4448 }
4449 case Iop_CatOddLanes8x16:
4450 case Iop_CatOddLanes16x8:
4451 case Iop_CatOddLanes32x4:
4452 case Iop_CatEvenLanes8x16:
4453 case Iop_CatEvenLanes16x8:
4454 case Iop_CatEvenLanes32x4: {
4455 HReg tmp = newVRegV(env);
4456 HReg res = newVRegV(env);
4457 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4458 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4459 UInt size;
4460 UInt is_lo;
4461 switch (e->Iex.Binop.op) {
4462 case Iop_CatOddLanes8x16: is_lo = 1; size = 0; break;
4463 case Iop_CatEvenLanes8x16: is_lo = 0; size = 0; break;
4464 case Iop_CatOddLanes16x8: is_lo = 1; size = 1; break;
4465 case Iop_CatEvenLanes16x8: is_lo = 0; size = 1; break;
4466 case Iop_CatOddLanes32x4: is_lo = 1; size = 2; break;
4467 case Iop_CatEvenLanes32x4: is_lo = 0; size = 2; break;
4468 default:
4469 ppIROp(e->Iex.Binop.op);
4470 vpanic("Illegal element size in VUZP");
4471 }
4472 if (is_lo) {
4473 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4474 tmp, argL, 4, True));
4475 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4476 res, argR, 4, True));
4477 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
4478 res, tmp, size, True));
4479 } else {
4480 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4481 tmp, argR, 4, True));
4482 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4483 res, argL, 4, True));
4484 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
4485 tmp, res, size, True));
4486 }
4487 return res;
4488 }
4489 case Iop_QAdd8Ux16:
4490 case Iop_QAdd16Ux8:
4491 case Iop_QAdd32Ux4:
4492 case Iop_QAdd64Ux2: {
4493 HReg res = newVRegV(env);
4494 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4495 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4496 UInt size;
4497 switch (e->Iex.Binop.op) {
4498 case Iop_QAdd8Ux16: size = 0; break;
4499 case Iop_QAdd16Ux8: size = 1; break;
4500 case Iop_QAdd32Ux4: size = 2; break;
4501 case Iop_QAdd64Ux2: size = 3; break;
4502 default:
4503 ppIROp(e->Iex.Binop.op);
4504 vpanic("Illegal element size in VQADDU");
4505 }
4506 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
4507 res, argL, argR, size, True));
4508 return res;
4509 }
4510 case Iop_QAdd8Sx16:
4511 case Iop_QAdd16Sx8:
4512 case Iop_QAdd32Sx4:
4513 case Iop_QAdd64Sx2: {
4514 HReg res = newVRegV(env);
4515 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4516 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4517 UInt size;
4518 switch (e->Iex.Binop.op) {
4519 case Iop_QAdd8Sx16: size = 0; break;
4520 case Iop_QAdd16Sx8: size = 1; break;
4521 case Iop_QAdd32Sx4: size = 2; break;
4522 case Iop_QAdd64Sx2: size = 3; break;
4523 default:
4524 ppIROp(e->Iex.Binop.op);
4525 vpanic("Illegal element size in VQADDS");
4526 }
4527 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
4528 res, argL, argR, size, True));
4529 return res;
4530 }
4531 case Iop_Sub8x16:
4532 case Iop_Sub16x8:
4533 case Iop_Sub32x4:
4534 case Iop_Sub64x2: {
4535 HReg res = newVRegV(env);
4536 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4537 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4538 UInt size;
4539 switch (e->Iex.Binop.op) {
4540 case Iop_Sub8x16: size = 0; break;
4541 case Iop_Sub16x8: size = 1; break;
4542 case Iop_Sub32x4: size = 2; break;
4543 case Iop_Sub64x2: size = 3; break;
4544 default:
4545 ppIROp(e->Iex.Binop.op);
4546 vpanic("Illegal element size in VSUB");
4547 }
4548 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4549 res, argL, argR, size, True));
4550 return res;
4551 }
4552 case Iop_Sub32Fx4: {
4553 HReg res = newVRegV(env);
4554 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4555 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4556 UInt size = 0;
4557 addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
4558 res, argL, argR, size, True));
4559 return res;
4560 }
4561 case Iop_QSub8Ux16:
4562 case Iop_QSub16Ux8:
4563 case Iop_QSub32Ux4:
4564 case Iop_QSub64Ux2: {
4565 HReg res = newVRegV(env);
4566 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4567 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4568 UInt size;
4569 switch (e->Iex.Binop.op) {
4570 case Iop_QSub8Ux16: size = 0; break;
4571 case Iop_QSub16Ux8: size = 1; break;
4572 case Iop_QSub32Ux4: size = 2; break;
4573 case Iop_QSub64Ux2: size = 3; break;
4574 default:
4575 ppIROp(e->Iex.Binop.op);
4576 vpanic("Illegal element size in VQSUBU");
4577 }
4578 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
4579 res, argL, argR, size, True));
4580 return res;
4581 }
4582 case Iop_QSub8Sx16:
4583 case Iop_QSub16Sx8:
4584 case Iop_QSub32Sx4:
4585 case Iop_QSub64Sx2: {
4586 HReg res = newVRegV(env);
4587 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4588 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4589 UInt size;
4590 switch (e->Iex.Binop.op) {
4591 case Iop_QSub8Sx16: size = 0; break;
4592 case Iop_QSub16Sx8: size = 1; break;
4593 case Iop_QSub32Sx4: size = 2; break;
4594 case Iop_QSub64Sx2: size = 3; break;
4595 default:
4596 ppIROp(e->Iex.Binop.op);
4597 vpanic("Illegal element size in VQSUBS");
4598 }
4599 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
4600 res, argL, argR, size, True));
4601 return res;
4602 }
4603 case Iop_Max8Ux16:
4604 case Iop_Max16Ux8:
4605 case Iop_Max32Ux4: {
4606 HReg res = newVRegV(env);
4607 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4608 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4609 UInt size;
4610 switch (e->Iex.Binop.op) {
4611 case Iop_Max8Ux16: size = 0; break;
4612 case Iop_Max16Ux8: size = 1; break;
4613 case Iop_Max32Ux4: size = 2; break;
4614 default: vpanic("Illegal element size in VMAXU");
4615 }
4616 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
4617 res, argL, argR, size, True));
4618 return res;
4619 }
4620 case Iop_Max8Sx16:
4621 case Iop_Max16Sx8:
4622 case Iop_Max32Sx4: {
4623 HReg res = newVRegV(env);
4624 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4625 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4626 UInt size;
4627 switch (e->Iex.Binop.op) {
4628 case Iop_Max8Sx16: size = 0; break;
4629 case Iop_Max16Sx8: size = 1; break;
4630 case Iop_Max32Sx4: size = 2; break;
4631 default: vpanic("Illegal element size in VMAXU");
4632 }
4633 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
4634 res, argL, argR, size, True));
4635 return res;
4636 }
4637 case Iop_Min8Ux16:
4638 case Iop_Min16Ux8:
4639 case Iop_Min32Ux4: {
4640 HReg res = newVRegV(env);
4641 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4642 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4643 UInt size;
4644 switch (e->Iex.Binop.op) {
4645 case Iop_Min8Ux16: size = 0; break;
4646 case Iop_Min16Ux8: size = 1; break;
4647 case Iop_Min32Ux4: size = 2; break;
4648 default: vpanic("Illegal element size in VMAXU");
4649 }
4650 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
4651 res, argL, argR, size, True));
4652 return res;
4653 }
4654 case Iop_Min8Sx16:
4655 case Iop_Min16Sx8:
4656 case Iop_Min32Sx4: {
4657 HReg res = newVRegV(env);
4658 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4659 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4660 UInt size;
4661 switch (e->Iex.Binop.op) {
4662 case Iop_Min8Sx16: size = 0; break;
4663 case Iop_Min16Sx8: size = 1; break;
4664 case Iop_Min32Sx4: size = 2; break;
4665 default: vpanic("Illegal element size in VMAXU");
4666 }
4667 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
4668 res, argL, argR, size, True));
4669 return res;
4670 }
4671 case Iop_Sar8x16:
4672 case Iop_Sar16x8:
4673 case Iop_Sar32x4:
4674 case Iop_Sar64x2: {
4675 HReg res = newVRegV(env);
4676 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4677 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4678 HReg argR2 = newVRegV(env);
4679 HReg zero = newVRegV(env);
4680 UInt size;
4681 switch (e->Iex.Binop.op) {
4682 case Iop_Sar8x16: size = 0; break;
4683 case Iop_Sar16x8: size = 1; break;
4684 case Iop_Sar32x4: size = 2; break;
4685 case Iop_Sar64x2: size = 3; break;
4686 default: vassert(0);
4687 }
4688 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4689 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4690 argR2, zero, argR, size, True));
4691 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4692 res, argL, argR2, size, True));
4693 return res;
4694 }
4695 case Iop_Sal8x16:
4696 case Iop_Sal16x8:
4697 case Iop_Sal32x4:
4698 case Iop_Sal64x2: {
4699 HReg res = newVRegV(env);
4700 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4701 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4702 UInt size;
4703 switch (e->Iex.Binop.op) {
4704 case Iop_Sal8x16: size = 0; break;
4705 case Iop_Sal16x8: size = 1; break;
4706 case Iop_Sal32x4: size = 2; break;
4707 case Iop_Sal64x2: size = 3; break;
4708 default: vassert(0);
4709 }
4710 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4711 res, argL, argR, size, True));
4712 return res;
4713 }
4714 case Iop_Shr8x16:
4715 case Iop_Shr16x8:
4716 case Iop_Shr32x4:
4717 case Iop_Shr64x2: {
4718 HReg res = newVRegV(env);
4719 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4720 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4721 HReg argR2 = newVRegV(env);
4722 HReg zero = newVRegV(env);
4723 UInt size;
4724 switch (e->Iex.Binop.op) {
4725 case Iop_Shr8x16: size = 0; break;
4726 case Iop_Shr16x8: size = 1; break;
4727 case Iop_Shr32x4: size = 2; break;
4728 case Iop_Shr64x2: size = 3; break;
4729 default: vassert(0);
4730 }
4731 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4732 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4733 argR2, zero, argR, size, True));
4734 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4735 res, argL, argR2, size, True));
4736 return res;
4737 }
4738 case Iop_Shl8x16:
4739 case Iop_Shl16x8:
4740 case Iop_Shl32x4:
4741 case Iop_Shl64x2: {
4742 HReg res = newVRegV(env);
4743 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4744 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4745 UInt size;
4746 switch (e->Iex.Binop.op) {
4747 case Iop_Shl8x16: size = 0; break;
4748 case Iop_Shl16x8: size = 1; break;
4749 case Iop_Shl32x4: size = 2; break;
4750 case Iop_Shl64x2: size = 3; break;
4751 default: vassert(0);
4752 }
4753 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4754 res, argL, argR, size, True));
4755 return res;
4756 }
4757 case Iop_QShl8x16:
4758 case Iop_QShl16x8:
4759 case Iop_QShl32x4:
4760 case Iop_QShl64x2: {
4761 HReg res = newVRegV(env);
4762 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4763 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4764 UInt size;
4765 switch (e->Iex.Binop.op) {
4766 case Iop_QShl8x16: size = 0; break;
4767 case Iop_QShl16x8: size = 1; break;
4768 case Iop_QShl32x4: size = 2; break;
4769 case Iop_QShl64x2: size = 3; break;
4770 default: vassert(0);
4771 }
4772 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
4773 res, argL, argR, size, True));
4774 return res;
4775 }
4776 case Iop_QSal8x16:
4777 case Iop_QSal16x8:
4778 case Iop_QSal32x4:
4779 case Iop_QSal64x2: {
4780 HReg res = newVRegV(env);
4781 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4782 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4783 UInt size;
4784 switch (e->Iex.Binop.op) {
4785 case Iop_QSal8x16: size = 0; break;
4786 case Iop_QSal16x8: size = 1; break;
4787 case Iop_QSal32x4: size = 2; break;
4788 case Iop_QSal64x2: size = 3; break;
4789 default: vassert(0);
4790 }
4791 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
4792 res, argL, argR, size, True));
4793 return res;
4794 }
4795 case Iop_QShlN8x16:
4796 case Iop_QShlN16x8:
4797 case Iop_QShlN32x4:
4798 case Iop_QShlN64x2: {
4799 HReg res = newVRegV(env);
4800 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4801 UInt size, imm;
4802 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4803 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4804 vpanic("ARM taget supports Iop_QShlNAxB with constant "
4805 "second argument only\n");
4806 }
4807 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4808 switch (e->Iex.Binop.op) {
4809 case Iop_QShlN8x16: size = 8 | imm; break;
4810 case Iop_QShlN16x8: size = 16 | imm; break;
4811 case Iop_QShlN32x4: size = 32 | imm; break;
4812 case Iop_QShlN64x2: size = 64 | imm; break;
4813 default: vassert(0);
4814 }
4815 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
4816 res, argL, size, True));
4817 return res;
4818 }
4819 case Iop_QShlN8Sx16:
4820 case Iop_QShlN16Sx8:
4821 case Iop_QShlN32Sx4:
4822 case Iop_QShlN64Sx2: {
4823 HReg res = newVRegV(env);
4824 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4825 UInt size, imm;
4826 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4827 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4828 vpanic("ARM taget supports Iop_QShlNASxB with constant "
4829 "second argument only\n");
4830 }
4831 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4832 switch (e->Iex.Binop.op) {
4833 case Iop_QShlN8Sx16: size = 8 | imm; break;
4834 case Iop_QShlN16Sx8: size = 16 | imm; break;
4835 case Iop_QShlN32Sx4: size = 32 | imm; break;
4836 case Iop_QShlN64Sx2: size = 64 | imm; break;
4837 default: vassert(0);
4838 }
4839 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
4840 res, argL, size, True));
4841 return res;
4842 }
4843 case Iop_QSalN8x16:
4844 case Iop_QSalN16x8:
4845 case Iop_QSalN32x4:
4846 case Iop_QSalN64x2: {
4847 HReg res = newVRegV(env);
4848 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4849 UInt size, imm;
4850 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4851 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4852 vpanic("ARM taget supports Iop_QShlNAxB with constant "
4853 "second argument only\n");
4854 }
4855 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4856 switch (e->Iex.Binop.op) {
4857 case Iop_QSalN8x16: size = 8 | imm; break;
4858 case Iop_QSalN16x8: size = 16 | imm; break;
4859 case Iop_QSalN32x4: size = 32 | imm; break;
4860 case Iop_QSalN64x2: size = 64 | imm; break;
4861 default: vassert(0);
4862 }
4863 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
4864 res, argL, size, True));
4865 return res;
4866 }
4867 case Iop_ShrN8x16:
4868 case Iop_ShrN16x8:
4869 case Iop_ShrN32x4:
4870 case Iop_ShrN64x2: {
4871 HReg res = newVRegV(env);
4872 HReg tmp = newVRegV(env);
4873 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4874 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4875 HReg argR2 = newVRegI(env);
4876 UInt size;
4877 switch (e->Iex.Binop.op) {
4878 case Iop_ShrN8x16: size = 0; break;
4879 case Iop_ShrN16x8: size = 1; break;
4880 case Iop_ShrN32x4: size = 2; break;
4881 case Iop_ShrN64x2: size = 3; break;
4882 default: vassert(0);
4883 }
4884 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4885 addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
4886 tmp, argR2, 0, True));
4887 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4888 res, argL, tmp, size, True));
4889 return res;
4890 }
4891 case Iop_ShlN8x16:
4892 case Iop_ShlN16x8:
4893 case Iop_ShlN32x4:
4894 case Iop_ShlN64x2: {
4895 HReg res = newVRegV(env);
4896 HReg tmp = newVRegV(env);
4897 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4898 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4899 UInt size;
4900 switch (e->Iex.Binop.op) {
4901 case Iop_ShlN8x16: size = 0; break;
4902 case Iop_ShlN16x8: size = 1; break;
4903 case Iop_ShlN32x4: size = 2; break;
4904 case Iop_ShlN64x2: size = 3; break;
4905 default: vassert(0);
4906 }
4907 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
4908 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4909 res, argL, tmp, size, True));
4910 return res;
4911 }
4912 case Iop_SarN8x16:
4913 case Iop_SarN16x8:
4914 case Iop_SarN32x4:
4915 case Iop_SarN64x2: {
4916 HReg res = newVRegV(env);
4917 HReg tmp = newVRegV(env);
4918 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4919 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4920 HReg argR2 = newVRegI(env);
4921 UInt size;
4922 switch (e->Iex.Binop.op) {
4923 case Iop_SarN8x16: size = 0; break;
4924 case Iop_SarN16x8: size = 1; break;
4925 case Iop_SarN32x4: size = 2; break;
4926 case Iop_SarN64x2: size = 3; break;
4927 default: vassert(0);
4928 }
4929 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4930 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
4931 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4932 res, argL, tmp, size, True));
4933 return res;
4934 }
4935 case Iop_CmpGT8Ux16:
4936 case Iop_CmpGT16Ux8:
4937 case Iop_CmpGT32Ux4: {
4938 HReg res = newVRegV(env);
4939 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4940 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4941 UInt size;
4942 switch (e->Iex.Binop.op) {
4943 case Iop_CmpGT8Ux16: size = 0; break;
4944 case Iop_CmpGT16Ux8: size = 1; break;
4945 case Iop_CmpGT32Ux4: size = 2; break;
4946 default: vassert(0);
4947 }
4948 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
4949 res, argL, argR, size, True));
4950 return res;
4951 }
4952 case Iop_CmpGT8Sx16:
4953 case Iop_CmpGT16Sx8:
4954 case Iop_CmpGT32Sx4: {
4955 HReg res = newVRegV(env);
4956 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4957 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4958 UInt size;
4959 switch (e->Iex.Binop.op) {
4960 case Iop_CmpGT8Sx16: size = 0; break;
4961 case Iop_CmpGT16Sx8: size = 1; break;
4962 case Iop_CmpGT32Sx4: size = 2; break;
4963 default: vassert(0);
4964 }
4965 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
4966 res, argL, argR, size, True));
4967 return res;
4968 }
4969 case Iop_CmpEQ8x16:
4970 case Iop_CmpEQ16x8:
4971 case Iop_CmpEQ32x4: {
4972 HReg res = newVRegV(env);
4973 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4974 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4975 UInt size;
4976 switch (e->Iex.Binop.op) {
4977 case Iop_CmpEQ8x16: size = 0; break;
4978 case Iop_CmpEQ16x8: size = 1; break;
4979 case Iop_CmpEQ32x4: size = 2; break;
4980 default: vassert(0);
4981 }
4982 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
4983 res, argL, argR, size, True));
4984 return res;
4985 }
4986 case Iop_Mul8x16:
4987 case Iop_Mul16x8:
4988 case Iop_Mul32x4: {
4989 HReg res = newVRegV(env);
4990 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4991 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4992 UInt size = 0;
4993 switch(e->Iex.Binop.op) {
4994 case Iop_Mul8x16: size = 0; break;
4995 case Iop_Mul16x8: size = 1; break;
4996 case Iop_Mul32x4: size = 2; break;
4997 default: vassert(0);
4998 }
4999 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
5000 res, argL, argR, size, True));
5001 return res;
5002 }
5003 case Iop_Mul32Fx4: {
5004 HReg res = newVRegV(env);
5005 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5006 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5007 UInt size = 0;
5008 addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
5009 res, argL, argR, size, True));
5010 return res;
5011 }
5012 case Iop_Mull8Ux8:
5013 case Iop_Mull16Ux4:
5014 case Iop_Mull32Ux2: {
5015 HReg res = newVRegV(env);
5016 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5017 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5018 UInt size = 0;
5019 switch(e->Iex.Binop.op) {
5020 case Iop_Mull8Ux8: size = 0; break;
5021 case Iop_Mull16Ux4: size = 1; break;
5022 case Iop_Mull32Ux2: size = 2; break;
5023 default: vassert(0);
5024 }
5025 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
5026 res, argL, argR, size, True));
5027 return res;
5028 }
5029
5030 case Iop_Mull8Sx8:
5031 case Iop_Mull16Sx4:
5032 case Iop_Mull32Sx2: {
5033 HReg res = newVRegV(env);
5034 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5035 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5036 UInt size = 0;
5037 switch(e->Iex.Binop.op) {
5038 case Iop_Mull8Sx8: size = 0; break;
5039 case Iop_Mull16Sx4: size = 1; break;
5040 case Iop_Mull32Sx2: size = 2; break;
5041 default: vassert(0);
5042 }
5043 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
5044 res, argL, argR, size, True));
5045 return res;
5046 }
5047
5048 case Iop_QDMulHi16Sx8:
5049 case Iop_QDMulHi32Sx4: {
5050 HReg res = newVRegV(env);
5051 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5052 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5053 UInt size = 0;
5054 switch(e->Iex.Binop.op) {
5055 case Iop_QDMulHi16Sx8: size = 1; break;
5056 case Iop_QDMulHi32Sx4: size = 2; break;
5057 default: vassert(0);
5058 }
5059 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5060 res, argL, argR, size, True));
5061 return res;
5062 }
5063
5064 case Iop_QRDMulHi16Sx8:
5065 case Iop_QRDMulHi32Sx4: {
5066 HReg res = newVRegV(env);
5067 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5068 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5069 UInt size = 0;
5070 switch(e->Iex.Binop.op) {
5071 case Iop_QRDMulHi16Sx8: size = 1; break;
5072 case Iop_QRDMulHi32Sx4: size = 2; break;
5073 default: vassert(0);
5074 }
5075 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5076 res, argL, argR, size, True));
5077 return res;
5078 }
5079
5080 case Iop_QDMulLong16Sx4:
5081 case Iop_QDMulLong32Sx2: {
5082 HReg res = newVRegV(env);
5083 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5084 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5085 UInt size = 0;
5086 switch(e->Iex.Binop.op) {
5087 case Iop_QDMulLong16Sx4: size = 1; break;
5088 case Iop_QDMulLong32Sx2: size = 2; break;
5089 default: vassert(0);
5090 }
5091 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5092 res, argL, argR, size, True));
5093 return res;
5094 }
5095 case Iop_PolynomialMul8x16: {
5096 HReg res = newVRegV(env);
5097 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5098 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5099 UInt size = 0;
5100 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5101 res, argL, argR, size, True));
5102 return res;
5103 }
5104 case Iop_Max32Fx4: {
5105 HReg res = newVRegV(env);
5106 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5107 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5108 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5109 res, argL, argR, 2, True));
5110 return res;
5111 }
5112 case Iop_Min32Fx4: {
5113 HReg res = newVRegV(env);
5114 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5115 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5116 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5117 res, argL, argR, 2, True));
5118 return res;
5119 }
5120 case Iop_PwMax32Fx4: {
5121 HReg res = newVRegV(env);
5122 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5123 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5124 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5125 res, argL, argR, 2, True));
5126 return res;
5127 }
5128 case Iop_PwMin32Fx4: {
5129 HReg res = newVRegV(env);
5130 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5131 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5132 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5133 res, argL, argR, 2, True));
5134 return res;
5135 }
5136 case Iop_CmpGT32Fx4: {
5137 HReg res = newVRegV(env);
5138 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5139 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5140 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5141 res, argL, argR, 2, True));
5142 return res;
5143 }
5144 case Iop_CmpGE32Fx4: {
5145 HReg res = newVRegV(env);
5146 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5147 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5148 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5149 res, argL, argR, 2, True));
5150 return res;
5151 }
5152 case Iop_CmpEQ32Fx4: {
5153 HReg res = newVRegV(env);
5154 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5155 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5156 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5157 res, argL, argR, 2, True));
5158 return res;
5159 }
5160
5161 case Iop_PolynomialMull8x8: {
5162 HReg res = newVRegV(env);
5163 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5164 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5165 UInt size = 0;
5166 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5167 res, argL, argR, size, True));
5168 return res;
5169 }
5170 case Iop_F32ToFixed32Ux4_RZ:
5171 case Iop_F32ToFixed32Sx4_RZ:
5172 case Iop_Fixed32UToF32x4_RN:
5173 case Iop_Fixed32SToF32x4_RN: {
5174 HReg res = newVRegV(env);
5175 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5176 ARMNeonUnOp op;
5177 UInt imm6;
5178 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5179 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5180 vpanic("ARM supports FP <-> Fixed conversion with constant "
5181 "second argument less than 33 only\n");
5182 }
5183 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5184 vassert(imm6 <= 32 && imm6 > 0);
5185 imm6 = 64 - imm6;
5186 switch(e->Iex.Binop.op) {
5187 case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5188 case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5189 case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5190 case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5191 default: vassert(0);
5192 }
5193 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5194 return res;
5195 }
5196 /*
5197 FIXME remove if not used
5198 case Iop_VDup8x16:
5199 case Iop_VDup16x8:
5200 case Iop_VDup32x4: {
5201 HReg res = newVRegV(env);
5202 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5203 UInt imm4;
5204 UInt index;
5205 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5206 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5207 vpanic("ARM supports Iop_VDup with constant "
5208 "second argument less than 16 only\n");
5209 }
5210 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5211 switch(e->Iex.Binop.op) {
5212 case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5213 case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5214 case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5215 default: vassert(0);
5216 }
5217 if (imm4 >= 16) {
5218 vpanic("ARM supports Iop_VDup with constant "
5219 "second argument less than 16 only\n");
5220 }
5221 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5222 res, argL, imm4, True));
5223 return res;
5224 }
5225 */
5226 case Iop_PwAdd8x16:
5227 case Iop_PwAdd16x8:
5228 case Iop_PwAdd32x4: {
5229 HReg res = newVRegV(env);
5230 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5231 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5232 UInt size = 0;
5233 switch(e->Iex.Binop.op) {
5234 case Iop_PwAdd8x16: size = 0; break;
5235 case Iop_PwAdd16x8: size = 1; break;
5236 case Iop_PwAdd32x4: size = 2; break;
5237 default: vassert(0);
5238 }
5239 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5240 res, argL, argR, size, True));
5241 return res;
5242 }
5243 /* ... */
5244 default:
5245 break;
5246 }
5247 }
5248
5249 if (e->tag == Iex_Triop) {
florian420bfa92012-06-02 20:29:22 +00005250 IRTriop *triop = e->Iex.Triop.details;
5251
5252 switch (triop->op) {
sewardj6c60b322010-08-22 12:48:28 +00005253 case Iop_ExtractV128: {
5254 HReg res = newVRegV(env);
florian420bfa92012-06-02 20:29:22 +00005255 HReg argL = iselNeonExpr(env, triop->arg1);
5256 HReg argR = iselNeonExpr(env, triop->arg2);
sewardj6c60b322010-08-22 12:48:28 +00005257 UInt imm4;
florian420bfa92012-06-02 20:29:22 +00005258 if (triop->arg3->tag != Iex_Const ||
5259 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
sewardj6c60b322010-08-22 12:48:28 +00005260 vpanic("ARM target supports Iop_ExtractV128 with constant "
5261 "third argument less than 16 only\n");
5262 }
florian420bfa92012-06-02 20:29:22 +00005263 imm4 = triop->arg3->Iex.Const.con->Ico.U8;
sewardj6c60b322010-08-22 12:48:28 +00005264 if (imm4 >= 16) {
5265 vpanic("ARM target supports Iop_ExtractV128 with constant "
5266 "third argument less than 16 only\n");
5267 }
5268 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5269 res, argL, argR, imm4, True));
5270 return res;
5271 }
5272 default:
5273 break;
5274 }
5275 }
5276
5277 if (e->tag == Iex_Mux0X) {
5278 HReg r8;
5279 HReg rX = iselNeonExpr(env, e->Iex.Mux0X.exprX);
5280 HReg r0 = iselNeonExpr(env, e->Iex.Mux0X.expr0);
5281 HReg dst = newVRegV(env);
5282 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, rX, 4, True));
5283 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5284 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5285 ARMRI84_I84(0xFF,0)));
5286 addInstr(env, ARMInstr_NCMovQ(ARMcc_EQ, dst, r0));
5287 return dst;
5288 }
5289
5290 neon_expr_bad:
5291 ppIRExpr(e);
5292 vpanic("iselNeonExpr_wrk");
5293}
5294
5295/*---------------------------------------------------------*/
sewardj6c299f32009-12-31 18:00:12 +00005296/*--- ISEL: Floating point expressions (64 bit) ---*/
5297/*---------------------------------------------------------*/
5298
5299/* Compute a 64-bit floating point value into a register, the identity
5300 of which is returned. As with iselIntExpr_R, the reg may be either
5301 real or virtual; in any case it must not be changed by subsequent
5302 code emitted by the caller. */
5303
5304static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5305{
5306 HReg r = iselDblExpr_wrk( env, e );
5307# if 0
5308 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5309# endif
5310 vassert(hregClass(r) == HRcFlt64);
5311 vassert(hregIsVirtual(r));
5312 return r;
5313}
5314
5315/* DO NOT CALL THIS DIRECTLY */
5316static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5317{
5318 IRType ty = typeOfIRExpr(env->type_env,e);
5319 vassert(e);
5320 vassert(ty == Ity_F64);
5321
5322 if (e->tag == Iex_RdTmp) {
5323 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5324 }
5325
5326 if (e->tag == Iex_Const) {
5327 /* Just handle the zero case. */
5328 IRConst* con = e->Iex.Const.con;
5329 if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
5330 HReg z32 = newVRegI(env);
5331 HReg dst = newVRegD(env);
5332 addInstr(env, ARMInstr_Imm32(z32, 0));
5333 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
5334 return dst;
5335 }
5336 }
5337
5338 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5339 ARMAModeV* am;
5340 HReg res = newVRegD(env);
5341 vassert(e->Iex.Load.ty == Ity_F64);
5342 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5343 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5344 return res;
5345 }
5346
5347 if (e->tag == Iex_Get) {
5348 // XXX This won't work if offset > 1020 or is not 0 % 4.
5349 // In which case we'll have to generate more longwinded code.
5350 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5351 HReg res = newVRegD(env);
5352 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5353 return res;
5354 }
5355
5356 if (e->tag == Iex_Unop) {
5357 switch (e->Iex.Unop.op) {
5358 case Iop_ReinterpI64asF64: {
sewardjc6f970f2012-04-02 21:54:49 +00005359 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00005360 return iselNeon64Expr(env, e->Iex.Unop.arg);
5361 } else {
5362 HReg srcHi, srcLo;
5363 HReg dst = newVRegD(env);
5364 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
5365 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
5366 return dst;
5367 }
sewardj6c299f32009-12-31 18:00:12 +00005368 }
5369 case Iop_NegF64: {
5370 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5371 HReg dst = newVRegD(env);
5372 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
5373 return dst;
5374 }
5375 case Iop_AbsF64: {
5376 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5377 HReg dst = newVRegD(env);
5378 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
5379 return dst;
5380 }
5381 case Iop_F32toF64: {
5382 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5383 HReg dst = newVRegD(env);
5384 addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
5385 return dst;
5386 }
5387 case Iop_I32UtoF64:
5388 case Iop_I32StoF64: {
5389 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5390 HReg f32 = newVRegF(env);
5391 HReg dst = newVRegD(env);
5392 Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
5393 /* VMOV f32, src */
5394 addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
5395 /* FSITOD dst, f32 */
5396 addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
5397 dst, f32));
5398 return dst;
5399 }
5400 default:
5401 break;
5402 }
5403 }
5404
5405 if (e->tag == Iex_Binop) {
5406 switch (e->Iex.Binop.op) {
5407 case Iop_SqrtF64: {
5408 /* first arg is rounding mode; we ignore it. */
5409 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5410 HReg dst = newVRegD(env);
5411 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
5412 return dst;
5413 }
5414 default:
5415 break;
5416 }
5417 }
5418
5419 if (e->tag == Iex_Triop) {
florian420bfa92012-06-02 20:29:22 +00005420 IRTriop *triop = e->Iex.Triop.details;
5421
5422 switch (triop->op) {
sewardj6c299f32009-12-31 18:00:12 +00005423 case Iop_DivF64:
5424 case Iop_MulF64:
5425 case Iop_AddF64:
5426 case Iop_SubF64: {
5427 ARMVfpOp op = 0; /*INVALID*/
florian420bfa92012-06-02 20:29:22 +00005428 HReg argL = iselDblExpr(env, triop->arg2);
5429 HReg argR = iselDblExpr(env, triop->arg3);
sewardj6c299f32009-12-31 18:00:12 +00005430 HReg dst = newVRegD(env);
florian420bfa92012-06-02 20:29:22 +00005431 switch (triop->op) {
sewardj6c299f32009-12-31 18:00:12 +00005432 case Iop_DivF64: op = ARMvfp_DIV; break;
5433 case Iop_MulF64: op = ARMvfp_MUL; break;
5434 case Iop_AddF64: op = ARMvfp_ADD; break;
5435 case Iop_SubF64: op = ARMvfp_SUB; break;
5436 default: vassert(0);
5437 }
5438 addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
5439 return dst;
5440 }
5441 default:
5442 break;
5443 }
5444 }
5445
5446 if (e->tag == Iex_Mux0X) {
5447 if (ty == Ity_F64
5448 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
5449 HReg r8;
5450 HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
5451 HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
5452 HReg dst = newVRegD(env);
5453 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, rX));
5454 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5455 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5456 ARMRI84_I84(0xFF,0)));
5457 addInstr(env, ARMInstr_VCMovD(ARMcc_EQ, dst, r0));
5458 return dst;
5459 }
5460 }
5461
5462 ppIRExpr(e);
5463 vpanic("iselDblExpr_wrk");
5464}
5465
5466
5467/*---------------------------------------------------------*/
5468/*--- ISEL: Floating point expressions (32 bit) ---*/
5469/*---------------------------------------------------------*/
5470
5471/* Compute a 64-bit floating point value into a register, the identity
5472 of which is returned. As with iselIntExpr_R, the reg may be either
5473 real or virtual; in any case it must not be changed by subsequent
5474 code emitted by the caller. */
5475
5476static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
5477{
5478 HReg r = iselFltExpr_wrk( env, e );
5479# if 0
5480 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5481# endif
5482 vassert(hregClass(r) == HRcFlt32);
5483 vassert(hregIsVirtual(r));
5484 return r;
5485}
5486
5487/* DO NOT CALL THIS DIRECTLY */
5488static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
5489{
5490 IRType ty = typeOfIRExpr(env->type_env,e);
5491 vassert(e);
5492 vassert(ty == Ity_F32);
5493
5494 if (e->tag == Iex_RdTmp) {
5495 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5496 }
5497
5498 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5499 ARMAModeV* am;
5500 HReg res = newVRegF(env);
5501 vassert(e->Iex.Load.ty == Ity_F32);
5502 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5503 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5504 return res;
5505 }
5506
5507 if (e->tag == Iex_Get) {
5508 // XXX This won't work if offset > 1020 or is not 0 % 4.
5509 // In which case we'll have to generate more longwinded code.
5510 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5511 HReg res = newVRegF(env);
5512 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5513 return res;
5514 }
5515
5516 if (e->tag == Iex_Unop) {
5517 switch (e->Iex.Unop.op) {
5518 case Iop_ReinterpI32asF32: {
5519 HReg dst = newVRegF(env);
5520 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5521 addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
5522 return dst;
5523 }
5524 case Iop_NegF32: {
5525 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5526 HReg dst = newVRegF(env);
5527 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
5528 return dst;
5529 }
5530 case Iop_AbsF32: {
5531 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5532 HReg dst = newVRegF(env);
5533 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
5534 return dst;
5535 }
5536 default:
5537 break;
5538 }
5539 }
5540
5541 if (e->tag == Iex_Binop) {
5542 switch (e->Iex.Binop.op) {
5543 case Iop_SqrtF32: {
5544 /* first arg is rounding mode; we ignore it. */
5545 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5546 HReg dst = newVRegF(env);
5547 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
5548 return dst;
5549 }
5550 case Iop_F64toF32: {
5551 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
5552 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5553 HReg valS = newVRegF(env);
5554 /* FCVTSD valS, valD */
5555 addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
5556 set_VFP_rounding_default(env);
5557 return valS;
5558 }
5559 default:
5560 break;
5561 }
5562 }
5563
5564 if (e->tag == Iex_Triop) {
florian420bfa92012-06-02 20:29:22 +00005565 IRTriop *triop = e->Iex.Triop.details;
5566
5567 switch (triop->op) {
sewardj6c299f32009-12-31 18:00:12 +00005568 case Iop_DivF32:
5569 case Iop_MulF32:
5570 case Iop_AddF32:
5571 case Iop_SubF32: {
5572 ARMVfpOp op = 0; /*INVALID*/
florian420bfa92012-06-02 20:29:22 +00005573 HReg argL = iselFltExpr(env, triop->arg2);
5574 HReg argR = iselFltExpr(env, triop->arg3);
sewardj6c299f32009-12-31 18:00:12 +00005575 HReg dst = newVRegF(env);
florian420bfa92012-06-02 20:29:22 +00005576 switch (triop->op) {
sewardj6c299f32009-12-31 18:00:12 +00005577 case Iop_DivF32: op = ARMvfp_DIV; break;
5578 case Iop_MulF32: op = ARMvfp_MUL; break;
5579 case Iop_AddF32: op = ARMvfp_ADD; break;
5580 case Iop_SubF32: op = ARMvfp_SUB; break;
5581 default: vassert(0);
5582 }
5583 addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
5584 return dst;
5585 }
5586 default:
5587 break;
5588 }
5589 }
5590
5591 if (e->tag == Iex_Mux0X) {
5592 if (ty == Ity_F32
5593 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
5594 HReg r8;
5595 HReg rX = iselFltExpr(env, e->Iex.Mux0X.exprX);
5596 HReg r0 = iselFltExpr(env, e->Iex.Mux0X.expr0);
5597 HReg dst = newVRegF(env);
5598 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, rX));
5599 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5600 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5601 ARMRI84_I84(0xFF,0)));
5602 addInstr(env, ARMInstr_VCMovS(ARMcc_EQ, dst, r0));
5603 return dst;
5604 }
5605 }
5606
5607 ppIRExpr(e);
5608 vpanic("iselFltExpr_wrk");
5609}
5610
cerioncee30312004-12-17 20:30:21 +00005611
5612/*---------------------------------------------------------*/
5613/*--- ISEL: Statements ---*/
5614/*---------------------------------------------------------*/
5615
5616static void iselStmt ( ISelEnv* env, IRStmt* stmt )
5617{
5618 if (vex_traceflags & VEX_TRACE_VCODE) {
5619 vex_printf("\n-- ");
5620 ppIRStmt(stmt);
5621 vex_printf("\n");
5622 }
5623 switch (stmt->tag) {
5624
5625 /* --------- STORE --------- */
5626 /* little-endian write to memory */
sewardjaf1ceca2005-06-30 23:31:27 +00005627 case Ist_Store: {
sewardj6c299f32009-12-31 18:00:12 +00005628 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
5629 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
5630 IREndness end = stmt->Ist.Store.end;
sewardjaf1ceca2005-06-30 23:31:27 +00005631
sewardj6c299f32009-12-31 18:00:12 +00005632 if (tya != Ity_I32 || end != Iend_LE)
5633 goto stmt_fail;
sewardjaf1ceca2005-06-30 23:31:27 +00005634
sewardj6c299f32009-12-31 18:00:12 +00005635 if (tyd == Ity_I32) {
5636 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5637 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5638 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
5639 return;
5640 }
5641 if (tyd == Ity_I16) {
5642 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5643 ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
5644 addInstr(env, ARMInstr_LdSt16(False/*!isLoad*/,
5645 False/*!isSignedLoad*/, rD, am));
5646 return;
5647 }
5648 if (tyd == Ity_I8) {
5649 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5650 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5651 addInstr(env, ARMInstr_LdSt8U(False/*!isLoad*/, rD, am));
5652 return;
5653 }
5654 if (tyd == Ity_I64) {
sewardjc6f970f2012-04-02 21:54:49 +00005655 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00005656 HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
5657 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5658 addInstr(env, ARMInstr_NLdStD(False, dD, am));
5659 } else {
5660 HReg rDhi, rDlo, rA;
5661 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
5662 rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
5663 addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDhi,
5664 ARMAMode1_RI(rA,4)));
5665 addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDlo,
5666 ARMAMode1_RI(rA,0)));
5667 }
sewardj6c299f32009-12-31 18:00:12 +00005668 return;
5669 }
5670 if (tyd == Ity_F64) {
5671 HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
5672 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5673 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
5674 return;
5675 }
5676 if (tyd == Ity_F32) {
5677 HReg fD = iselFltExpr(env, stmt->Ist.Store.data);
5678 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5679 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
5680 return;
5681 }
sewardj6c60b322010-08-22 12:48:28 +00005682 if (tyd == Ity_V128) {
5683 HReg qD = iselNeonExpr(env, stmt->Ist.Store.data);
5684 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5685 addInstr(env, ARMInstr_NLdStQ(False, qD, am));
5686 return;
5687 }
cerioncee30312004-12-17 20:30:21 +00005688
sewardj6c299f32009-12-31 18:00:12 +00005689 break;
cerioncee30312004-12-17 20:30:21 +00005690 }
5691
5692 /* --------- PUT --------- */
5693 /* write guest state, fixed offset */
5694 case Ist_Put: {
5695 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
cerioncee30312004-12-17 20:30:21 +00005696
cerioncee30312004-12-17 20:30:21 +00005697 if (tyd == Ity_I32) {
sewardj6c299f32009-12-31 18:00:12 +00005698 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
5699 ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
5700 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
5701 return;
cerioncee30312004-12-17 20:30:21 +00005702 }
sewardj6c299f32009-12-31 18:00:12 +00005703 if (tyd == Ity_I64) {
sewardjc6f970f2012-04-02 21:54:49 +00005704 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00005705 HReg addr = newVRegI(env);
5706 HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
5707 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5708 stmt->Ist.Put.offset));
5709 addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
5710 } else {
5711 HReg rDhi, rDlo;
5712 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
5713 stmt->Ist.Put.offset + 0);
5714 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
5715 stmt->Ist.Put.offset + 4);
5716 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
5717 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDhi, am4));
5718 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDlo, am0));
5719 }
sewardj6c299f32009-12-31 18:00:12 +00005720 return;
cerioncee30312004-12-17 20:30:21 +00005721 }
sewardj6c299f32009-12-31 18:00:12 +00005722 if (tyd == Ity_F64) {
5723 // XXX This won't work if offset > 1020 or is not 0 % 4.
5724 // In which case we'll have to generate more longwinded code.
5725 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5726 HReg rD = iselDblExpr(env, stmt->Ist.Put.data);
5727 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
5728 return;
cerioncee30312004-12-17 20:30:21 +00005729 }
sewardj6c299f32009-12-31 18:00:12 +00005730 if (tyd == Ity_F32) {
5731 // XXX This won't work if offset > 1020 or is not 0 % 4.
5732 // In which case we'll have to generate more longwinded code.
5733 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5734 HReg rD = iselFltExpr(env, stmt->Ist.Put.data);
5735 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
5736 return;
5737 }
sewardj6c60b322010-08-22 12:48:28 +00005738 if (tyd == Ity_V128) {
5739 HReg addr = newVRegI(env);
5740 HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
5741 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5742 stmt->Ist.Put.offset));
5743 addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
5744 return;
5745 }
cerioncee30312004-12-17 20:30:21 +00005746 break;
5747 }
5748
sewardj6c299f32009-12-31 18:00:12 +00005749//zz /* --------- Indexed PUT --------- */
5750//zz /* write guest state, run-time offset */
5751//zz case Ist_PutI: {
5752//zz ARMAMode2* am2
5753//zz = genGuestArrayOffset(
5754//zz env, stmt->Ist.PutI.descr,
5755//zz stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
5756//zz
5757//zz IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
5758//zz
5759//zz if (tyd == Ity_I8) {
5760//zz HReg reg = iselIntExpr_R(env, stmt->Ist.PutI.data);
5761//zz addInstr(env, ARMInstr_StoreB(reg, am2));
5762//zz return;
5763//zz }
5764//zz// CAB: Ity_I32, Ity_I16 ?
5765//zz break;
5766//zz }
cerioncee30312004-12-17 20:30:21 +00005767
5768 /* --------- TMP --------- */
5769 /* assign value to temporary */
sewardjdd40fdf2006-12-24 02:20:24 +00005770 case Ist_WrTmp: {
5771 IRTemp tmp = stmt->Ist.WrTmp.tmp;
cerioncee30312004-12-17 20:30:21 +00005772 IRType ty = typeOfIRTemp(env->type_env, tmp);
5773
5774 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
sewardj6c299f32009-12-31 18:00:12 +00005775 ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
5776 env, stmt->Ist.WrTmp.data);
5777 HReg dst = lookupIRTemp(env, tmp);
5778 addInstr(env, ARMInstr_Mov(dst,ri84));
cerioncee30312004-12-17 20:30:21 +00005779 return;
5780 }
sewardj6c299f32009-12-31 18:00:12 +00005781 if (ty == Ity_I1) {
5782 HReg dst = lookupIRTemp(env, tmp);
5783 ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
5784 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
5785 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
5786 return;
5787 }
5788 if (ty == Ity_I64) {
sewardjc6f970f2012-04-02 21:54:49 +00005789 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00005790 HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
5791 HReg dst = lookupIRTemp(env, tmp);
5792 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
5793 } else {
5794 HReg rHi, rLo, dstHi, dstLo;
5795 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
5796 lookupIRTemp64( &dstHi, &dstLo, env, tmp);
5797 addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
5798 addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
5799 }
sewardj6c299f32009-12-31 18:00:12 +00005800 return;
5801 }
5802 if (ty == Ity_F64) {
5803 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
5804 HReg dst = lookupIRTemp(env, tmp);
5805 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
5806 return;
5807 }
5808 if (ty == Ity_F32) {
5809 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
5810 HReg dst = lookupIRTemp(env, tmp);
5811 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
5812 return;
5813 }
sewardj6c60b322010-08-22 12:48:28 +00005814 if (ty == Ity_V128) {
5815 HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
5816 HReg dst = lookupIRTemp(env, tmp);
5817 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
5818 return;
5819 }
cerioncee30312004-12-17 20:30:21 +00005820 break;
5821 }
5822
5823 /* --------- Call to DIRTY helper --------- */
5824 /* call complex ("dirty") helper function */
5825 case Ist_Dirty: {
sewardj6c299f32009-12-31 18:00:12 +00005826 IRType retty;
5827 IRDirty* d = stmt->Ist.Dirty.details;
5828 Bool passBBP = False;
cerioncee30312004-12-17 20:30:21 +00005829
5830 if (d->nFxState == 0)
5831 vassert(!d->needsBBP);
sewardj428fabd2005-03-21 03:11:17 +00005832
5833 passBBP = toBool(d->nFxState > 0 && d->needsBBP);
cerioncee30312004-12-17 20:30:21 +00005834
5835 /* Marshal args, do the call, clear stack. */
sewardj6c299f32009-12-31 18:00:12 +00005836 Bool ok = doHelperCall( env, passBBP, d->guard, d->cee, d->args );
5837 if (!ok)
5838 break; /* will go to stmt_fail: */
cerioncee30312004-12-17 20:30:21 +00005839
5840 /* Now figure out what to do with the returned value, if any. */
5841 if (d->tmp == IRTemp_INVALID)
sewardj6c299f32009-12-31 18:00:12 +00005842 /* No return value. Nothing to do. */
5843 return;
cerioncee30312004-12-17 20:30:21 +00005844
sewardj6c299f32009-12-31 18:00:12 +00005845 retty = typeOfIRTemp(env->type_env, d->tmp);
cerioncee30312004-12-17 20:30:21 +00005846
sewardj6c299f32009-12-31 18:00:12 +00005847 if (retty == Ity_I64) {
sewardjc6f970f2012-04-02 21:54:49 +00005848 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00005849 HReg tmp = lookupIRTemp(env, d->tmp);
5850 addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
5851 hregARM_R0()));
5852 } else {
5853 HReg dstHi, dstLo;
5854 /* The returned value is in r1:r0. Park it in the
5855 register-pair associated with tmp. */
5856 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
5857 addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
5858 addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
5859 }
cerioncee30312004-12-17 20:30:21 +00005860 return;
5861 }
sewardj6c299f32009-12-31 18:00:12 +00005862 if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) {
5863 /* The returned value is in r0. Park it in the register
5864 associated with tmp. */
5865 HReg dst = lookupIRTemp(env, d->tmp);
5866 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
5867 return;
5868 }
5869
cerioncee30312004-12-17 20:30:21 +00005870 break;
5871 }
5872
sewardj6c299f32009-12-31 18:00:12 +00005873 /* --------- Load Linked and Store Conditional --------- */
5874 case Ist_LLSC: {
5875 if (stmt->Ist.LLSC.storedata == NULL) {
5876 /* LL */
5877 IRTemp res = stmt->Ist.LLSC.result;
5878 IRType ty = typeOfIRTemp(env->type_env, res);
sewardjff7f5b72011-07-11 11:43:38 +00005879 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
sewardj6c299f32009-12-31 18:00:12 +00005880 Int szB = 0;
5881 HReg r_dst = lookupIRTemp(env, res);
5882 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5883 switch (ty) {
5884 case Ity_I8: szB = 1; break;
sewardjff7f5b72011-07-11 11:43:38 +00005885 case Ity_I16: szB = 2; break;
sewardj6c299f32009-12-31 18:00:12 +00005886 case Ity_I32: szB = 4; break;
5887 default: vassert(0);
5888 }
sewardjff7f5b72011-07-11 11:43:38 +00005889 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
sewardj6c299f32009-12-31 18:00:12 +00005890 addInstr(env, ARMInstr_LdrEX(szB));
sewardjff7f5b72011-07-11 11:43:38 +00005891 addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
sewardj6c299f32009-12-31 18:00:12 +00005892 return;
5893 }
sewardjff7f5b72011-07-11 11:43:38 +00005894 if (ty == Ity_I64) {
5895 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5896 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
5897 addInstr(env, ARMInstr_LdrEX(8));
5898 /* Result is in r3:r2. On a non-NEON capable CPU, we must
5899 move it into a result register pair. On a NEON capable
5900 CPU, the result register will be a 64 bit NEON
5901 register, so we must move it there instead. */
sewardjc6f970f2012-04-02 21:54:49 +00005902 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardjff7f5b72011-07-11 11:43:38 +00005903 HReg dst = lookupIRTemp(env, res);
5904 addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
5905 hregARM_R2()));
5906 } else {
5907 HReg r_dst_hi, r_dst_lo;
5908 lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
5909 addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
5910 addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
5911 }
5912 return;
5913 }
5914 /*NOTREACHED*/
5915 vassert(0);
sewardj6c299f32009-12-31 18:00:12 +00005916 } else {
5917 /* SC */
sewardj6c299f32009-12-31 18:00:12 +00005918 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
sewardjff7f5b72011-07-11 11:43:38 +00005919 if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
5920 Int szB = 0;
5921 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
5922 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
sewardj6c299f32009-12-31 18:00:12 +00005923 switch (tyd) {
5924 case Ity_I8: szB = 1; break;
sewardjff7f5b72011-07-11 11:43:38 +00005925 case Ity_I16: szB = 2; break;
sewardj6c299f32009-12-31 18:00:12 +00005926 case Ity_I32: szB = 4; break;
5927 default: vassert(0);
5928 }
sewardjff7f5b72011-07-11 11:43:38 +00005929 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
5930 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
sewardj6c299f32009-12-31 18:00:12 +00005931 addInstr(env, ARMInstr_StrEX(szB));
sewardjff7f5b72011-07-11 11:43:38 +00005932 } else {
5933 vassert(tyd == Ity_I64);
5934 /* This is really ugly. There is no is/is-not NEON
5935 decision akin to the case for LL, because iselInt64Expr
5936 fudges this for us, and always gets the result into two
5937 GPRs even if this means moving it from a NEON
5938 register. */
5939 HReg rDhi, rDlo;
5940 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
5941 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5942 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
5943 addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
5944 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
5945 addInstr(env, ARMInstr_StrEX(8));
sewardj6c299f32009-12-31 18:00:12 +00005946 }
sewardjff7f5b72011-07-11 11:43:38 +00005947 /* now r0 is 1 if failed, 0 if success. Change to IR
5948 conventions (0 is fail, 1 is success). Also transfer
5949 result to r_res. */
5950 IRTemp res = stmt->Ist.LLSC.result;
5951 IRType ty = typeOfIRTemp(env->type_env, res);
5952 HReg r_res = lookupIRTemp(env, res);
5953 ARMRI84* one = ARMRI84_I84(1,0);
5954 vassert(ty == Ity_I1);
5955 addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
5956 /* And be conservative -- mask off all but the lowest bit */
5957 addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
5958 return;
sewardj6c299f32009-12-31 18:00:12 +00005959 }
5960 break;
5961 }
5962
sewardj412098c2010-05-04 08:48:43 +00005963 /* --------- MEM FENCE --------- */
5964 case Ist_MBE:
5965 switch (stmt->Ist.MBE.event) {
5966 case Imbe_Fence:
sewardj6d615ba2011-09-26 16:19:43 +00005967 addInstr(env, ARMInstr_MFence());
5968 return;
5969 case Imbe_CancelReservation:
5970 addInstr(env, ARMInstr_CLREX());
sewardj412098c2010-05-04 08:48:43 +00005971 return;
5972 default:
5973 break;
5974 }
5975 break;
5976
sewardj6c299f32009-12-31 18:00:12 +00005977 /* --------- INSTR MARK --------- */
5978 /* Doesn't generate any executable code ... */
5979 case Ist_IMark:
5980 return;
5981
5982 /* --------- NO-OP --------- */
5983 case Ist_NoOp:
5984 return;
5985
cerioncee30312004-12-17 20:30:21 +00005986 /* --------- EXIT --------- */
cerioncee30312004-12-17 20:30:21 +00005987 case Ist_Exit: {
cerioncee30312004-12-17 20:30:21 +00005988 if (stmt->Ist.Exit.dst->tag != Ico_U32)
5989 vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
sewardjc6f970f2012-04-02 21:54:49 +00005990
5991 ARMCondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
5992 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(),
5993 stmt->Ist.Exit.offsIP);
5994
5995 /* Case: boring transfer to known address */
5996 if (stmt->Ist.Exit.jk == Ijk_Boring
5997 || stmt->Ist.Exit.jk == Ijk_Call
5998 || stmt->Ist.Exit.jk == Ijk_Ret) {
5999 if (env->chainingAllowed) {
6000 /* .. almost always true .. */
6001 /* Skip the event check at the dst if this is a forwards
6002 edge. */
6003 Bool toFastEP
6004 = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
6005 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
6006 addInstr(env, ARMInstr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
6007 amR15T, cc, toFastEP));
6008 } else {
6009 /* .. very occasionally .. */
6010 /* We can't use chaining, so ask for an assisted transfer,
6011 as that's the only alternative that is allowable. */
6012 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6013 addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, Ijk_Boring));
6014 }
6015 return;
6016 }
6017
6018 /* Case: assisted transfer to arbitrary address */
6019 switch (stmt->Ist.Exit.jk) {
sewardj2f6902b2012-04-23 09:48:14 +00006020 /* Keep this list in sync with that in iselNext below */
6021 case Ijk_ClientReq:
sewardjc6f970f2012-04-02 21:54:49 +00006022 case Ijk_NoDecode:
sewardj2f6902b2012-04-23 09:48:14 +00006023 case Ijk_NoRedir:
6024 case Ijk_Sys_syscall:
sewardjc6f970f2012-04-02 21:54:49 +00006025 {
6026 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6027 addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
6028 stmt->Ist.Exit.jk));
6029 return;
6030 }
6031 default:
6032 break;
6033 }
6034
6035 /* Do we ever expect to see any other kind? */
6036 goto stmt_fail;
cerioncee30312004-12-17 20:30:21 +00006037 }
6038
6039 default: break;
6040 }
sewardjaf1ceca2005-06-30 23:31:27 +00006041 stmt_fail:
cerioncee30312004-12-17 20:30:21 +00006042 ppIRStmt(stmt);
6043 vpanic("iselStmt");
6044}
6045
6046
6047/*---------------------------------------------------------*/
6048/*--- ISEL: Basic block terminators (Nexts) ---*/
6049/*---------------------------------------------------------*/
6050
sewardjc6f970f2012-04-02 21:54:49 +00006051static void iselNext ( ISelEnv* env,
6052 IRExpr* next, IRJumpKind jk, Int offsIP )
cerioncee30312004-12-17 20:30:21 +00006053{
sewardj6c299f32009-12-31 18:00:12 +00006054 if (vex_traceflags & VEX_TRACE_VCODE) {
sewardjc6f970f2012-04-02 21:54:49 +00006055 vex_printf( "\n-- PUT(%d) = ", offsIP);
6056 ppIRExpr( next );
6057 vex_printf( "; exit-");
sewardj6c299f32009-12-31 18:00:12 +00006058 ppIRJumpKind(jk);
sewardjc6f970f2012-04-02 21:54:49 +00006059 vex_printf( "\n");
sewardj6c299f32009-12-31 18:00:12 +00006060 }
sewardjc6f970f2012-04-02 21:54:49 +00006061
6062 /* Case: boring transfer to known address */
6063 if (next->tag == Iex_Const) {
6064 IRConst* cdst = next->Iex.Const.con;
6065 vassert(cdst->tag == Ico_U32);
6066 if (jk == Ijk_Boring || jk == Ijk_Call) {
6067 /* Boring transfer to known address */
6068 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6069 if (env->chainingAllowed) {
6070 /* .. almost always true .. */
6071 /* Skip the event check at the dst if this is a forwards
6072 edge. */
6073 Bool toFastEP
6074 = ((Addr64)cdst->Ico.U32) > env->max_ga;
6075 if (0) vex_printf("%s", toFastEP ? "X" : ".");
6076 addInstr(env, ARMInstr_XDirect(cdst->Ico.U32,
6077 amR15T, ARMcc_AL,
6078 toFastEP));
6079 } else {
6080 /* .. very occasionally .. */
6081 /* We can't use chaining, so ask for an assisted transfer,
6082 as that's the only alternative that is allowable. */
6083 HReg r = iselIntExpr_R(env, next);
6084 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6085 Ijk_Boring));
6086 }
6087 return;
6088 }
6089 }
6090
6091 /* Case: call/return (==boring) transfer to any address */
6092 switch (jk) {
6093 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
6094 HReg r = iselIntExpr_R(env, next);
6095 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6096 if (env->chainingAllowed) {
6097 addInstr(env, ARMInstr_XIndir(r, amR15T, ARMcc_AL));
6098 } else {
6099 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6100 Ijk_Boring));
6101 }
6102 return;
6103 }
6104 default:
6105 break;
6106 }
6107
sewardj2f6902b2012-04-23 09:48:14 +00006108 /* Case: assisted transfer to arbitrary address */
sewardjc6f970f2012-04-02 21:54:49 +00006109 switch (jk) {
sewardj2f6902b2012-04-23 09:48:14 +00006110 /* Keep this list in sync with that for Ist_Exit above */
6111 case Ijk_ClientReq:
6112 case Ijk_NoDecode:
sewardjc6f970f2012-04-02 21:54:49 +00006113 case Ijk_NoRedir:
sewardj2f6902b2012-04-23 09:48:14 +00006114 case Ijk_Sys_syscall:
sewardjc6f970f2012-04-02 21:54:49 +00006115 {
6116 HReg r = iselIntExpr_R(env, next);
6117 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6118 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, jk));
6119 return;
6120 }
6121 default:
6122 break;
6123 }
6124
6125 vex_printf( "\n-- PUT(%d) = ", offsIP);
6126 ppIRExpr( next );
6127 vex_printf( "; exit-");
6128 ppIRJumpKind(jk);
6129 vex_printf( "\n");
6130 vassert(0); // are we expecting any other kind?
cerioncee30312004-12-17 20:30:21 +00006131}
6132
6133
6134/*---------------------------------------------------------*/
6135/*--- Insn selector top-level ---*/
6136/*---------------------------------------------------------*/
6137
sewardjdd40fdf2006-12-24 02:20:24 +00006138/* Translate an entire SB to arm code. */
cerioncee30312004-12-17 20:30:21 +00006139
sewardjc6f970f2012-04-02 21:54:49 +00006140HInstrArray* iselSB_ARM ( IRSB* bb,
6141 VexArch arch_host,
6142 VexArchInfo* archinfo_host,
6143 VexAbiInfo* vbi/*UNUSED*/,
6144 Int offs_Host_EvC_Counter,
6145 Int offs_Host_EvC_FailAddr,
6146 Bool chainingAllowed,
6147 Bool addProfInc,
6148 Addr64 max_ga )
cerioncee30312004-12-17 20:30:21 +00006149{
sewardjc6f970f2012-04-02 21:54:49 +00006150 Int i, j;
6151 HReg hreg, hregHI;
6152 ISelEnv* env;
6153 UInt hwcaps_host = archinfo_host->hwcaps;
6154 ARMAMode1 *amCounter, *amFailAddr;
cerioncee30312004-12-17 20:30:21 +00006155
sewardj6c299f32009-12-31 18:00:12 +00006156 /* sanity ... */
6157 vassert(arch_host == VexArchARM);
sewardj6c60b322010-08-22 12:48:28 +00006158
6159 /* hwcaps should not change from one ISEL call to another. */
sewardjc6f970f2012-04-02 21:54:49 +00006160 arm_hwcaps = hwcaps_host; // JRS 2012 Mar 31: FIXME (RM)
cerioncee30312004-12-17 20:30:21 +00006161
sewardj6c299f32009-12-31 18:00:12 +00006162 /* Make up an initial environment to use. */
6163 env = LibVEX_Alloc(sizeof(ISelEnv));
6164 env->vreg_ctr = 0;
6165
6166 /* Set up output code array. */
6167 env->code = newHInstrArray();
cerioncee30312004-12-17 20:30:21 +00006168
sewardj6c299f32009-12-31 18:00:12 +00006169 /* Copy BB's type env. */
6170 env->type_env = bb->tyenv;
cerioncee30312004-12-17 20:30:21 +00006171
sewardj6c299f32009-12-31 18:00:12 +00006172 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
6173 change as we go along. */
6174 env->n_vregmap = bb->tyenv->types_used;
6175 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
6176 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
cerioncee30312004-12-17 20:30:21 +00006177
sewardjc6f970f2012-04-02 21:54:49 +00006178 /* and finally ... */
6179 env->chainingAllowed = chainingAllowed;
6180 env->hwcaps = hwcaps_host;
6181 env->max_ga = max_ga;
6182
sewardj6c299f32009-12-31 18:00:12 +00006183 /* For each IR temporary, allocate a suitably-kinded virtual
6184 register. */
6185 j = 0;
6186 for (i = 0; i < env->n_vregmap; i++) {
6187 hregHI = hreg = INVALID_HREG;
6188 switch (bb->tyenv->types[i]) {
6189 case Ity_I1:
6190 case Ity_I8:
6191 case Ity_I16:
6192 case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break;
sewardj6c60b322010-08-22 12:48:28 +00006193 case Ity_I64:
sewardjc6f970f2012-04-02 21:54:49 +00006194 if (hwcaps_host & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00006195 hreg = mkHReg(j++, HRcFlt64, True);
sewardj6c60b322010-08-22 12:48:28 +00006196 } else {
6197 hregHI = mkHReg(j++, HRcInt32, True);
6198 hreg = mkHReg(j++, HRcInt32, True);
6199 }
6200 break;
sewardj6c299f32009-12-31 18:00:12 +00006201 case Ity_F32: hreg = mkHReg(j++, HRcFlt32, True); break;
6202 case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break;
sewardj06122e72011-03-28 12:14:48 +00006203 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break;
sewardj6c299f32009-12-31 18:00:12 +00006204 default: ppIRType(bb->tyenv->types[i]);
6205 vpanic("iselBB: IRTemp type");
6206 }
6207 env->vregmap[i] = hreg;
6208 env->vregmapHI[i] = hregHI;
6209 }
6210 env->vreg_ctr = j;
cerioncee30312004-12-17 20:30:21 +00006211
sewardjc6f970f2012-04-02 21:54:49 +00006212 /* The very first instruction must be an event check. */
6213 amCounter = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_Counter);
6214 amFailAddr = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_FailAddr);
6215 addInstr(env, ARMInstr_EvCheck(amCounter, amFailAddr));
6216
6217 /* Possibly a block counter increment (for profiling). At this
6218 point we don't know the address of the counter, so just pretend
6219 it is zero. It will have to be patched later, but before this
6220 translation is used, by a call to LibVEX_patchProfCtr. */
6221 if (addProfInc) {
6222 addInstr(env, ARMInstr_ProfInc());
6223 }
cerioncee30312004-12-17 20:30:21 +00006224
sewardj6c299f32009-12-31 18:00:12 +00006225 /* Ok, finally we can iterate over the statements. */
6226 for (i = 0; i < bb->stmts_used; i++)
sewardjc6f970f2012-04-02 21:54:49 +00006227 iselStmt(env, bb->stmts[i]);
sewardj6c299f32009-12-31 18:00:12 +00006228
sewardjc6f970f2012-04-02 21:54:49 +00006229 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
sewardj6c299f32009-12-31 18:00:12 +00006230
6231 /* record the number of vregs we used. */
6232 env->code->n_vregs = env->vreg_ctr;
6233 return env->code;
cerioncee30312004-12-17 20:30:21 +00006234}
6235
6236
cerioncee30312004-12-17 20:30:21 +00006237/*---------------------------------------------------------------*/
sewardjcef7d3e2009-07-02 12:21:59 +00006238/*--- end host_arm_isel.c ---*/
cerioncee30312004-12-17 20:30:21 +00006239/*---------------------------------------------------------------*/