blob: 059006bbd73c1e3a4ed410da1616d3342d8bd8cb [file] [log] [blame]
cerioncee30312004-12-17 20:30:21 +00001
2/*---------------------------------------------------------------*/
sewardj752f9062010-05-03 21:38:49 +00003/*--- begin host_arm_isel.c ---*/
cerioncee30312004-12-17 20:30:21 +00004/*---------------------------------------------------------------*/
5
6/*
sewardj752f9062010-05-03 21:38:49 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
cerioncee30312004-12-17 20:30:21 +00009
sewardje6c53e02011-10-23 07:33:43 +000010 Copyright (C) 2004-2011 OpenWorks LLP
sewardj752f9062010-05-03 21:38:49 +000011 info@open-works.net
sewardj64733c42010-10-12 10:10:46 +000012
13 NEON support is
sewardje6c53e02011-10-23 07:33:43 +000014 Copyright (C) 2010-2011 Samsung Electronics
sewardj64733c42010-10-12 10:10:46 +000015 contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16 and Kirill Batuzov <batuzovk@ispras.ru>
cerioncee30312004-12-17 20:30:21 +000017
sewardj752f9062010-05-03 21:38:49 +000018 This program is free software; you can redistribute it and/or
19 modify it under the terms of the GNU General Public License as
20 published by the Free Software Foundation; either version 2 of the
21 License, or (at your option) any later version.
cerioncee30312004-12-17 20:30:21 +000022
sewardj752f9062010-05-03 21:38:49 +000023 This program is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
27
28 You should have received a copy of the GNU General Public License
29 along with this program; if not, write to the Free Software
30 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
sewardj7bd6ffe2005-08-03 16:07:36 +000031 02110-1301, USA.
32
sewardj752f9062010-05-03 21:38:49 +000033 The GNU General Public License is contained in the file COPYING.
cerioncee30312004-12-17 20:30:21 +000034*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
sewardj6c60b322010-08-22 12:48:28 +000039#include "ir_match.h"
cerioncee30312004-12-17 20:30:21 +000040
sewardjcef7d3e2009-07-02 12:21:59 +000041#include "main_util.h"
42#include "main_globals.h"
43#include "host_generic_regs.h"
sewardje2ea1762010-09-22 00:56:37 +000044#include "host_generic_simd64.h" // for 32-bit SIMD helpers
sewardjcef7d3e2009-07-02 12:21:59 +000045#include "host_arm_defs.h"
cerioncee30312004-12-17 20:30:21 +000046
47
cerioncee30312004-12-17 20:30:21 +000048/*---------------------------------------------------------*/
sewardj6c299f32009-12-31 18:00:12 +000049/*--- ARMvfp control word stuff ---*/
50/*---------------------------------------------------------*/
51
52/* Vex-generated code expects to run with the FPU set as follows: all
53 exceptions masked, round-to-nearest, non-vector mode, with the NZCV
54 flags cleared, and FZ (flush to zero) disabled. Curiously enough,
55 this corresponds to a FPSCR value of zero.
56
57 fpscr should therefore be zero on entry to Vex-generated code, and
58 should be unchanged at exit. (Or at least the bottom 28 bits
59 should be zero).
60*/
61
62#define DEFAULT_FPSCR 0
63
64
65/*---------------------------------------------------------*/
cerioncee30312004-12-17 20:30:21 +000066/*--- ISelEnv ---*/
67/*---------------------------------------------------------*/
68
69/* This carries around:
70
71 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
72 might encounter. This is computed before insn selection starts,
73 and does not change.
74
75 - A mapping from IRTemp to HReg. This tells the insn selector
76 which virtual register(s) are associated with each IRTemp
77 temporary. This is computed before insn selection starts, and
78 does not change. We expect this mapping to map precisely the
79 same set of IRTemps as the type mapping does.
80
81 - vregmap holds the primary register for the IRTemp.
sewardj6c299f32009-12-31 18:00:12 +000082 - vregmapHI is only used for 64-bit integer-typed
83 IRTemps. It holds the identity of a second
84 32-bit virtual HReg, which holds the high half
85 of the value.
86
cerioncee30312004-12-17 20:30:21 +000087 - The code array, that is, the insns selected so far.
88
89 - A counter, for generating new virtual registers.
90
sewardj6c299f32009-12-31 18:00:12 +000091 - The host hardware capabilities word. This is set at the start
92 and does not change.
93
sewardjc6f970f2012-04-02 21:54:49 +000094 - A Bool for indicating whether we may generate chain-me
95 instructions for control flow transfers, or whether we must use
96 XAssisted.
97
98 - The maximum guest address of any guest insn in this block.
99 Actually, the address of the highest-addressed byte from any insn
100 in this block. Is set at the start and does not change. This is
101 used for detecting jumps which are definitely forward-edges from
102 this block, and therefore can be made (chained) to the fast entry
103 point of the destination, thereby avoiding the destination's
104 event check.
105
106 Note, this is all (well, mostly) host-independent.
107*/
cerioncee30312004-12-17 20:30:21 +0000108
109typedef
110 struct {
sewardjc6f970f2012-04-02 21:54:49 +0000111 /* Constant -- are set at the start and do not change. */
cerioncee30312004-12-17 20:30:21 +0000112 IRTypeEnv* type_env;
113
114 HReg* vregmap;
sewardj6c299f32009-12-31 18:00:12 +0000115 HReg* vregmapHI;
cerioncee30312004-12-17 20:30:21 +0000116 Int n_vregmap;
117
sewardj6c299f32009-12-31 18:00:12 +0000118 UInt hwcaps;
sewardjc6f970f2012-04-02 21:54:49 +0000119
120 Bool chainingAllowed;
121 Addr64 max_ga;
122
123 /* These are modified as we go along. */
124 HInstrArray* code;
125 Int vreg_ctr;
cerioncee30312004-12-17 20:30:21 +0000126 }
127 ISelEnv;
128
129static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
130{
131 vassert(tmp >= 0);
132 vassert(tmp < env->n_vregmap);
133 return env->vregmap[tmp];
134}
135
sewardj6c299f32009-12-31 18:00:12 +0000136static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
137{
138 vassert(tmp >= 0);
139 vassert(tmp < env->n_vregmap);
140 vassert(env->vregmapHI[tmp] != INVALID_HREG);
141 *vrLO = env->vregmap[tmp];
142 *vrHI = env->vregmapHI[tmp];
143}
144
cerioncee30312004-12-17 20:30:21 +0000145static void addInstr ( ISelEnv* env, ARMInstr* instr )
146{
147 addHInstr(env->code, instr);
148 if (vex_traceflags & VEX_TRACE_VCODE) {
149 ppARMInstr(instr);
150 vex_printf("\n");
151 }
sewardj6c60b322010-08-22 12:48:28 +0000152#if 0
153 if (instr->tag == ARMin_NUnary || instr->tag == ARMin_NBinary
154 || instr->tag == ARMin_NUnaryS || instr->tag == ARMin_NBinaryS
155 || instr->tag == ARMin_NDual || instr->tag == ARMin_NShift) {
156 ppARMInstr(instr);
157 vex_printf("\n");
158 }
159#endif
cerioncee30312004-12-17 20:30:21 +0000160}
161
162static HReg newVRegI ( ISelEnv* env )
163{
164 HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
165 env->vreg_ctr++;
166 return reg;
167}
168
sewardj6c299f32009-12-31 18:00:12 +0000169static HReg newVRegD ( ISelEnv* env )
170{
171 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
172 env->vreg_ctr++;
173 return reg;
174}
175
176static HReg newVRegF ( ISelEnv* env )
177{
178 HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/);
179 env->vreg_ctr++;
180 return reg;
181}
cerioncee30312004-12-17 20:30:21 +0000182
sewardj6c60b322010-08-22 12:48:28 +0000183static HReg newVRegV ( ISelEnv* env )
184{
185 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
186 env->vreg_ctr++;
187 return reg;
188}
189
190/* These are duplicated in guest_arm_toIR.c */
191static IRExpr* unop ( IROp op, IRExpr* a )
192{
193 return IRExpr_Unop(op, a);
194}
195
196static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
197{
198 return IRExpr_Binop(op, a1, a2);
199}
200
sewardj6c60b322010-08-22 12:48:28 +0000201static IRExpr* bind ( Int binder )
202{
203 return IRExpr_Binder(binder);
204}
205
cerioncee30312004-12-17 20:30:21 +0000206
207/*---------------------------------------------------------*/
208/*--- ISEL: Forward declarations ---*/
209/*---------------------------------------------------------*/
210
211/* These are organised as iselXXX and iselXXX_wrk pairs. The
212 iselXXX_wrk do the real work, but are not to be called directly.
213 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
214 checks that all returned registers are virtual. You should not
215 call the _wrk version directly.
216*/
sewardj6c299f32009-12-31 18:00:12 +0000217static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
218static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000219
sewardj6c299f32009-12-31 18:00:12 +0000220static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
221static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000222
sewardj6c299f32009-12-31 18:00:12 +0000223static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
224static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000225
sewardjff7f5b72011-07-11 11:43:38 +0000226static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
227static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e );
sewardj6c60b322010-08-22 12:48:28 +0000228
sewardj6c299f32009-12-31 18:00:12 +0000229static ARMRI84* iselIntExpr_RI84_wrk
230 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
231static ARMRI84* iselIntExpr_RI84
232 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000233
sewardj6c299f32009-12-31 18:00:12 +0000234static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e );
235static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000236
sewardj6c299f32009-12-31 18:00:12 +0000237static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
238static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000239
sewardj6c299f32009-12-31 18:00:12 +0000240static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
241static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
242
243static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
244 ISelEnv* env, IRExpr* e );
245static void iselInt64Expr ( HReg* rHi, HReg* rLo,
246 ISelEnv* env, IRExpr* e );
247
248static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
249static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
250
251static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
252static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000253
sewardj6c60b322010-08-22 12:48:28 +0000254static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e );
255static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e );
256
257static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e );
258static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000259
260/*---------------------------------------------------------*/
261/*--- ISEL: Misc helpers ---*/
262/*---------------------------------------------------------*/
sewardj6c299f32009-12-31 18:00:12 +0000263
264static UInt ROR32 ( UInt x, UInt sh ) {
265 vassert(sh >= 0 && sh < 32);
266 if (sh == 0)
267 return x;
268 else
269 return (x << (32-sh)) | (x >> sh);
cerioncee30312004-12-17 20:30:21 +0000270}
sewardj6c299f32009-12-31 18:00:12 +0000271
272/* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
273 form, and if so return the components. */
274static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
275{
276 UInt i;
277 for (i = 0; i < 16; i++) {
278 if (0 == (u & 0xFFFFFF00)) {
279 *u8 = u;
280 *u4 = i;
281 return True;
282 }
283 u = ROR32(u, 30);
284 }
285 vassert(i == 16);
286 return False;
287}
cerioncee30312004-12-17 20:30:21 +0000288
289/* Make a int reg-reg move. */
sewardj6c299f32009-12-31 18:00:12 +0000290static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
cerioncee30312004-12-17 20:30:21 +0000291{
292 vassert(hregClass(src) == HRcInt32);
293 vassert(hregClass(dst) == HRcInt32);
sewardj6c299f32009-12-31 18:00:12 +0000294 return ARMInstr_Mov(dst, ARMRI84_R(src));
cerioncee30312004-12-17 20:30:21 +0000295}
296
sewardj6c299f32009-12-31 18:00:12 +0000297/* Set the VFP unit's rounding mode to default (round to nearest). */
298static void set_VFP_rounding_default ( ISelEnv* env )
cerioncee30312004-12-17 20:30:21 +0000299{
sewardj6c299f32009-12-31 18:00:12 +0000300 /* mov rTmp, #DEFAULT_FPSCR
301 fmxr fpscr, rTmp
302 */
303 HReg rTmp = newVRegI(env);
304 addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
305 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
cerioncee30312004-12-17 20:30:21 +0000306}
307
sewardj6c299f32009-12-31 18:00:12 +0000308/* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
309 expression denoting a value in the range 0 .. 3, indicating a round
310 mode encoded as per type IRRoundingMode. Set FPSCR to have the
311 same rounding.
312*/
313static
314void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
cerioncee30312004-12-17 20:30:21 +0000315{
sewardj6c299f32009-12-31 18:00:12 +0000316 /* This isn't simple, because 'mode' carries an IR rounding
317 encoding, and we need to translate that to an ARMvfp one:
318 The IR encoding:
319 00 to nearest (the default)
320 10 to +infinity
321 01 to -infinity
322 11 to zero
323 The ARMvfp encoding:
324 00 to nearest
325 01 to +infinity
326 10 to -infinity
327 11 to zero
328 Easy enough to do; just swap the two bits.
329 */
330 HReg irrm = iselIntExpr_R(env, mode);
331 HReg tL = newVRegI(env);
332 HReg tR = newVRegI(env);
333 HReg t3 = newVRegI(env);
334 /* tL = irrm << 1;
335 tR = irrm >> 1; if we're lucky, these will issue together
336 tL &= 2;
337 tR &= 1; ditto
338 t3 = tL | tR;
339 t3 <<= 22;
340 fmxr fpscr, t3
341 */
342 addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
343 addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
344 addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
345 addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
346 addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
347 addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
348 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
cerioncee30312004-12-17 20:30:21 +0000349}
cerioncee30312004-12-17 20:30:21 +0000350
cerioncee30312004-12-17 20:30:21 +0000351
sewardj6c299f32009-12-31 18:00:12 +0000352/*---------------------------------------------------------*/
353/*--- ISEL: Function call helpers ---*/
354/*---------------------------------------------------------*/
cerioncee30312004-12-17 20:30:21 +0000355
cerioncee30312004-12-17 20:30:21 +0000356/* Used only in doHelperCall. See big comment in doHelperCall re
sewardj6c299f32009-12-31 18:00:12 +0000357 handling of register-parameter args. This function figures out
358 whether evaluation of an expression might require use of a fixed
359 register. If in doubt return True (safe but suboptimal).
cerioncee30312004-12-17 20:30:21 +0000360*/
361static
362Bool mightRequireFixedRegs ( IRExpr* e )
363{
364 switch (e->tag) {
sewardj6c299f32009-12-31 18:00:12 +0000365 case Iex_RdTmp: case Iex_Const: case Iex_Get:
366 return False;
367 default:
368 return True;
cerioncee30312004-12-17 20:30:21 +0000369 }
370}
sewardj6c299f32009-12-31 18:00:12 +0000371
cerioncee30312004-12-17 20:30:21 +0000372
373/* Do a complete function call. guard is a Ity_Bit expression
374 indicating whether or not the call happens. If guard==NULL, the
sewardj6c299f32009-12-31 18:00:12 +0000375 call is unconditional. Returns True iff it managed to handle this
376 combination of arg/return types, else returns False. */
cerioncee30312004-12-17 20:30:21 +0000377
378static
sewardj6c299f32009-12-31 18:00:12 +0000379Bool doHelperCall ( ISelEnv* env,
380 Bool passBBP,
cerioncee30312004-12-17 20:30:21 +0000381 IRExpr* guard, IRCallee* cee, IRExpr** args )
382{
cerioncee30312004-12-17 20:30:21 +0000383 ARMCondCode cc;
sewardj6c299f32009-12-31 18:00:12 +0000384 HReg argregs[ARM_N_ARGREGS];
385 HReg tmpregs[ARM_N_ARGREGS];
386 Bool go_fast;
387 Int n_args, i, nextArgReg;
388 ULong target;
cerioncee30312004-12-17 20:30:21 +0000389
sewardj6c299f32009-12-31 18:00:12 +0000390 vassert(ARM_N_ARGREGS == 4);
cerioncee30312004-12-17 20:30:21 +0000391
sewardj6c299f32009-12-31 18:00:12 +0000392 /* Marshal args for a call and do the call.
cerioncee30312004-12-17 20:30:21 +0000393
sewardj6c299f32009-12-31 18:00:12 +0000394 If passBBP is True, r8 (the baseblock pointer) is to be passed
395 as the first arg.
cerioncee30312004-12-17 20:30:21 +0000396
sewardj6c299f32009-12-31 18:00:12 +0000397 This function only deals with a tiny set of possibilities, which
398 cover all helpers in practice. The restrictions are that only
399 arguments in registers are supported, hence only ARM_N_REGPARMS
400 x 32 integer bits in total can be passed. In fact the only
401 supported arg types are I32 and I64.
cerioncee30312004-12-17 20:30:21 +0000402
sewardj6c299f32009-12-31 18:00:12 +0000403 Generating code which is both efficient and correct when
404 parameters are to be passed in registers is difficult, for the
405 reasons elaborated in detail in comments attached to
406 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
407 of the method described in those comments.
cerioncee30312004-12-17 20:30:21 +0000408
sewardj6c299f32009-12-31 18:00:12 +0000409 The problem is split into two cases: the fast scheme and the
410 slow scheme. In the fast scheme, arguments are computed
411 directly into the target (real) registers. This is only safe
412 when we can be sure that computation of each argument will not
413 trash any real registers set by computation of any other
414 argument.
cerioncee30312004-12-17 20:30:21 +0000415
sewardj6c299f32009-12-31 18:00:12 +0000416 In the slow scheme, all args are first computed into vregs, and
417 once they are all done, they are moved to the relevant real
418 regs. This always gives correct code, but it also gives a bunch
419 of vreg-to-rreg moves which are usually redundant but are hard
420 for the register allocator to get rid of.
421
422 To decide which scheme to use, all argument expressions are
423 first examined. If they are all so simple that it is clear they
424 will be evaluated without use of any fixed registers, use the
425 fast scheme, else use the slow scheme. Note also that only
426 unconditional calls may use the fast scheme, since having to
427 compute a condition expression could itself trash real
428 registers.
cerioncee30312004-12-17 20:30:21 +0000429
430 Note this requires being able to examine an expression and
431 determine whether or not evaluation of it might use a fixed
sewardj6c299f32009-12-31 18:00:12 +0000432 register. That requires knowledge of how the rest of this insn
433 selector works. Currently just the following 3 are regarded as
434 safe -- hopefully they cover the majority of arguments in
435 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
cerioncee30312004-12-17 20:30:21 +0000436 */
cerioncee30312004-12-17 20:30:21 +0000437
sewardj6c299f32009-12-31 18:00:12 +0000438 /* Note that the cee->regparms field is meaningless on ARM hosts
439 (since there is only one calling convention) and so we always
440 ignore it. */
cerioncee30312004-12-17 20:30:21 +0000441
sewardj6c299f32009-12-31 18:00:12 +0000442 n_args = 0;
443 for (i = 0; args[i]; i++)
444 n_args++;
cerioncee30312004-12-17 20:30:21 +0000445
sewardj6c299f32009-12-31 18:00:12 +0000446 argregs[0] = hregARM_R0();
447 argregs[1] = hregARM_R1();
448 argregs[2] = hregARM_R2();
449 argregs[3] = hregARM_R3();
cerioncee30312004-12-17 20:30:21 +0000450
sewardj6c299f32009-12-31 18:00:12 +0000451 tmpregs[0] = tmpregs[1] = tmpregs[2] =
452 tmpregs[3] = INVALID_HREG;
cerioncee30312004-12-17 20:30:21 +0000453
sewardj6c299f32009-12-31 18:00:12 +0000454 /* First decide which scheme (slow or fast) is to be used. First
455 assume the fast scheme, and select slow if any contraindications
456 (wow) appear. */
457
458 go_fast = True;
459
460 if (guard) {
461 if (guard->tag == Iex_Const
462 && guard->Iex.Const.con->tag == Ico_U1
463 && guard->Iex.Const.con->Ico.U1 == True) {
464 /* unconditional */
465 } else {
466 /* Not manifestly unconditional -- be conservative. */
467 go_fast = False;
468 }
cerioncee30312004-12-17 20:30:21 +0000469 }
470
sewardj6c299f32009-12-31 18:00:12 +0000471 if (go_fast) {
472 for (i = 0; i < n_args; i++) {
cerioncee30312004-12-17 20:30:21 +0000473 if (mightRequireFixedRegs(args[i])) {
sewardj6c299f32009-12-31 18:00:12 +0000474 go_fast = False;
cerioncee30312004-12-17 20:30:21 +0000475 break;
476 }
477 }
sewardj6c299f32009-12-31 18:00:12 +0000478 }
479 /* At this point the scheme to use has been established. Generate
480 code to get the arg values into the argument rregs. If we run
481 out of arg regs, give up. */
cerioncee30312004-12-17 20:30:21 +0000482
sewardj6c299f32009-12-31 18:00:12 +0000483 if (go_fast) {
cerioncee30312004-12-17 20:30:21 +0000484
sewardj6c299f32009-12-31 18:00:12 +0000485 /* FAST SCHEME */
486 nextArgReg = 0;
cerioncee30312004-12-17 20:30:21 +0000487 if (passBBP) {
sewardj6c299f32009-12-31 18:00:12 +0000488 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
489 hregARM_R8() ));
490 nextArgReg++;
cerioncee30312004-12-17 20:30:21 +0000491 }
492
sewardj6c299f32009-12-31 18:00:12 +0000493 for (i = 0; i < n_args; i++) {
494 IRType aTy = typeOfIRExpr(env->type_env, args[i]);
495 if (nextArgReg >= ARM_N_ARGREGS)
496 return False; /* out of argregs */
497 if (aTy == Ity_I32) {
498 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
499 iselIntExpr_R(env, args[i]) ));
500 nextArgReg++;
501 }
502 else if (aTy == Ity_I64) {
503 /* 64-bit args must be passed in an a reg-pair of the form
504 n:n+1, where n is even. Hence either r0:r1 or r2:r3.
505 On a little-endian host, the less significant word is
506 passed in the lower-numbered register. */
507 if (nextArgReg & 1) {
508 if (nextArgReg >= ARM_N_ARGREGS)
509 return False; /* out of argregs */
510 addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
511 nextArgReg++;
512 }
513 if (nextArgReg >= ARM_N_ARGREGS)
514 return False; /* out of argregs */
515 HReg raHi, raLo;
516 iselInt64Expr(&raHi, &raLo, env, args[i]);
517 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
518 nextArgReg++;
519 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
520 nextArgReg++;
521 }
522 else
523 return False; /* unhandled arg type */
524 }
525
526 /* Fast scheme only applies for unconditional calls. Hence: */
527 cc = ARMcc_AL;
cerioncee30312004-12-17 20:30:21 +0000528
529 } else {
530
sewardj6c299f32009-12-31 18:00:12 +0000531 /* SLOW SCHEME; move via temporaries */
532 nextArgReg = 0;
533
cerioncee30312004-12-17 20:30:21 +0000534 if (passBBP) {
sewardj6c299f32009-12-31 18:00:12 +0000535 /* This is pretty stupid; better to move directly to r0
536 after the rest of the args are done. */
537 tmpregs[nextArgReg] = newVRegI(env);
538 addInstr(env, mk_iMOVds_RR( tmpregs[nextArgReg],
539 hregARM_R8() ));
540 nextArgReg++;
541 }
542
543 for (i = 0; i < n_args; i++) {
544 IRType aTy = typeOfIRExpr(env->type_env, args[i]);
545 if (nextArgReg >= ARM_N_ARGREGS)
546 return False; /* out of argregs */
547 if (aTy == Ity_I32) {
548 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
549 nextArgReg++;
550 }
551 else if (aTy == Ity_I64) {
552 /* Same comment applies as in the Fast-scheme case. */
553 if (nextArgReg & 1)
554 nextArgReg++;
555 if (nextArgReg + 1 >= ARM_N_ARGREGS)
556 return False; /* out of argregs */
557 HReg raHi, raLo;
558 iselInt64Expr(&raHi, &raLo, env, args[i]);
559 tmpregs[nextArgReg] = raLo;
560 nextArgReg++;
561 tmpregs[nextArgReg] = raHi;
562 nextArgReg++;
563 }
564 }
565
566 /* Now we can compute the condition. We can't do it earlier
567 because the argument computations could trash the condition
568 codes. Be a bit clever to handle the common case where the
569 guard is 1:Bit. */
570 cc = ARMcc_AL;
571 if (guard) {
572 if (guard->tag == Iex_Const
573 && guard->Iex.Const.con->tag == Ico_U1
574 && guard->Iex.Const.con->Ico.U1 == True) {
575 /* unconditional -- do nothing */
576 } else {
577 cc = iselCondCode( env, guard );
578 }
579 }
580
581 /* Move the args to their final destinations. */
582 for (i = 0; i < nextArgReg; i++) {
583 if (tmpregs[i] == INVALID_HREG) { // Skip invalid regs
584 addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
585 continue;
586 }
587 /* None of these insns, including any spill code that might
588 be generated, may alter the condition codes. */
589 addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
cerioncee30312004-12-17 20:30:21 +0000590 }
591
592 }
593
sewardj6c299f32009-12-31 18:00:12 +0000594 /* Should be assured by checks above */
595 vassert(nextArgReg <= ARM_N_ARGREGS);
cerioncee30312004-12-17 20:30:21 +0000596
sewardj6c299f32009-12-31 18:00:12 +0000597 target = (HWord)Ptr_to_ULong(cee->addr);
cerioncee30312004-12-17 20:30:21 +0000598
sewardj6c299f32009-12-31 18:00:12 +0000599 /* nextArgReg doles out argument registers. Since these are
600 assigned in the order r0, r1, r2, r3, its numeric value at this
601 point, which must be between 0 and 4 inclusive, is going to be
602 equal to the number of arg regs in use for the call. Hence bake
603 that number into the call (we'll need to know it when doing
604 register allocation, to know what regs the call reads.)
cerioncee30312004-12-17 20:30:21 +0000605
sewardj6c299f32009-12-31 18:00:12 +0000606 There is a bit of a twist -- harmless but worth recording.
607 Suppose the arg types are (Ity_I32, Ity_I64). Then we will have
608 the first arg in r0 and the second in r3:r2, but r1 isn't used.
609 We nevertheless have nextArgReg==4 and bake that into the call
610 instruction. This will mean the register allocator wil believe
611 this insn reads r1 when in fact it doesn't. But that's
612 harmless; it just artificially extends the live range of r1
613 unnecessarily. The best fix would be to put into the
614 instruction, a bitmask indicating which of r0/1/2/3 carry live
615 values. But that's too much hassle. */
cerioncee30312004-12-17 20:30:21 +0000616
sewardj6c299f32009-12-31 18:00:12 +0000617 /* Finally, the call itself. */
618 addInstr(env, ARMInstr_Call( cc, target, nextArgReg ));
cerioncee30312004-12-17 20:30:21 +0000619
sewardj6c299f32009-12-31 18:00:12 +0000620 return True; /* success */
cerioncee30312004-12-17 20:30:21 +0000621}
622
623
624/*---------------------------------------------------------*/
sewardj6c299f32009-12-31 18:00:12 +0000625/*--- ISEL: Integer expressions (32/16/8 bit) ---*/
cerioncee30312004-12-17 20:30:21 +0000626/*---------------------------------------------------------*/
627
sewardj6c299f32009-12-31 18:00:12 +0000628/* Select insns for an integer-typed expression, and add them to the
629 code list. Return a reg holding the result. This reg will be a
630 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
631 want to modify it, ask for a new vreg, copy it in there, and modify
632 the copy. The register allocator will do its best to map both
633 vregs to the same real register, so the copies will often disappear
634 later in the game.
cerioncee30312004-12-17 20:30:21 +0000635
sewardj6c299f32009-12-31 18:00:12 +0000636 This should handle expressions of 32, 16 and 8-bit type. All
637 results are returned in a 32-bit register. For 16- and 8-bit
638 expressions, the upper 16/24 bits are arbitrary, so you should mask
639 or sign extend partial values if necessary.
cerioncee30312004-12-17 20:30:21 +0000640*/
641
sewardj6c299f32009-12-31 18:00:12 +0000642/* --------------------- AMode1 --------------------- */
643
644/* Return an AMode1 which computes the value of the specified
645 expression, possibly also adding insns to the code list as a
646 result. The expression may only be a 32-bit one.
647*/
cerioncee30312004-12-17 20:30:21 +0000648
649static Bool sane_AMode1 ( ARMAMode1* am )
650{
sewardj6c299f32009-12-31 18:00:12 +0000651 switch (am->tag) {
652 case ARMam1_RI:
653 return
654 toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
655 && (hregIsVirtual(am->ARMam1.RI.reg)
656 || am->ARMam1.RI.reg == hregARM_R8())
657 && am->ARMam1.RI.simm13 >= -4095
658 && am->ARMam1.RI.simm13 <= 4095 );
659 case ARMam1_RRS:
660 return
661 toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
662 && hregIsVirtual(am->ARMam1.RRS.base)
663 && hregClass(am->ARMam1.RRS.index) == HRcInt32
664 && hregIsVirtual(am->ARMam1.RRS.index)
665 && am->ARMam1.RRS.shift >= 0
666 && am->ARMam1.RRS.shift <= 3 );
667 default:
668 vpanic("sane_AMode: unknown ARM AMode1 tag");
669 }
cerioncee30312004-12-17 20:30:21 +0000670}
671
672static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
673{
sewardj6c299f32009-12-31 18:00:12 +0000674 ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
675 vassert(sane_AMode1(am));
676 return am;
cerioncee30312004-12-17 20:30:21 +0000677}
678
cerioncee30312004-12-17 20:30:21 +0000679static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
680{
sewardj6c299f32009-12-31 18:00:12 +0000681 IRType ty = typeOfIRExpr(env->type_env,e);
682 vassert(ty == Ity_I32);
cerioncee30312004-12-17 20:30:21 +0000683
sewardj6c299f32009-12-31 18:00:12 +0000684 /* FIXME: add RRS matching */
cerioncee30312004-12-17 20:30:21 +0000685
sewardj6c299f32009-12-31 18:00:12 +0000686 /* {Add32,Sub32}(expr,simm13) */
687 if (e->tag == Iex_Binop
688 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
689 && e->Iex.Binop.arg2->tag == Iex_Const
690 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
691 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
692 if (simm >= -4095 && simm <= 4095) {
693 HReg reg;
694 if (e->Iex.Binop.op == Iop_Sub32)
695 simm = -simm;
696 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
697 return ARMAMode1_RI(reg, simm);
698 }
699 }
cerioncee30312004-12-17 20:30:21 +0000700
sewardj6c299f32009-12-31 18:00:12 +0000701 /* Doesn't match anything in particular. Generate it into
702 a register and use that. */
703 {
704 HReg reg = iselIntExpr_R(env, e);
705 return ARMAMode1_RI(reg, 0);
706 }
707
cerioncee30312004-12-17 20:30:21 +0000708}
709
710
sewardj6c299f32009-12-31 18:00:12 +0000711/* --------------------- AMode2 --------------------- */
cerioncee30312004-12-17 20:30:21 +0000712
sewardj6c299f32009-12-31 18:00:12 +0000713/* Return an AMode2 which computes the value of the specified
714 expression, possibly also adding insns to the code list as a
715 result. The expression may only be a 32-bit one.
716*/
cerioncee30312004-12-17 20:30:21 +0000717
718static Bool sane_AMode2 ( ARMAMode2* am )
719{
720 switch (am->tag) {
sewardj6c299f32009-12-31 18:00:12 +0000721 case ARMam2_RI:
722 return
723 toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
724 && hregIsVirtual(am->ARMam2.RI.reg)
725 && am->ARMam2.RI.simm9 >= -255
726 && am->ARMam2.RI.simm9 <= 255 );
727 case ARMam2_RR:
728 return
729 toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
730 && hregIsVirtual(am->ARMam2.RR.base)
731 && hregClass(am->ARMam2.RR.index) == HRcInt32
732 && hregIsVirtual(am->ARMam2.RR.index) );
733 default:
734 vpanic("sane_AMode: unknown ARM AMode2 tag");
cerioncee30312004-12-17 20:30:21 +0000735 }
736}
737
sewardj6c299f32009-12-31 18:00:12 +0000738static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
739{
740 ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
741 vassert(sane_AMode2(am));
742 return am;
743}
744
cerioncee30312004-12-17 20:30:21 +0000745static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
746{
sewardj6c299f32009-12-31 18:00:12 +0000747 IRType ty = typeOfIRExpr(env->type_env,e);
748 vassert(ty == Ity_I32);
749
750 /* FIXME: add RR matching */
751
752 /* {Add32,Sub32}(expr,simm8) */
753 if (e->tag == Iex_Binop
754 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
755 && e->Iex.Binop.arg2->tag == Iex_Const
756 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
757 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
758 if (simm >= -255 && simm <= 255) {
759 HReg reg;
760 if (e->Iex.Binop.op == Iop_Sub32)
761 simm = -simm;
762 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
763 return ARMAMode2_RI(reg, simm);
764 }
765 }
766
767 /* Doesn't match anything in particular. Generate it into
768 a register and use that. */
769 {
770 HReg reg = iselIntExpr_R(env, e);
771 return ARMAMode2_RI(reg, 0);
772 }
773
cerioncee30312004-12-17 20:30:21 +0000774}
sewardj6c299f32009-12-31 18:00:12 +0000775
776
777/* --------------------- AModeV --------------------- */
778
779/* Return an AModeV which computes the value of the specified
780 expression, possibly also adding insns to the code list as a
781 result. The expression may only be a 32-bit one.
sewardj48b279b2007-11-16 12:43:32 +0000782*/
cerioncee30312004-12-17 20:30:21 +0000783
sewardj6c299f32009-12-31 18:00:12 +0000784static Bool sane_AModeV ( ARMAModeV* am )
785{
786 return toBool( hregClass(am->reg) == HRcInt32
787 && hregIsVirtual(am->reg)
788 && am->simm11 >= -1020 && am->simm11 <= 1020
789 && 0 == (am->simm11 & 3) );
cerioncee30312004-12-17 20:30:21 +0000790}
791
sewardj6c299f32009-12-31 18:00:12 +0000792static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
cerioncee30312004-12-17 20:30:21 +0000793{
sewardj6c299f32009-12-31 18:00:12 +0000794 ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
795 vassert(sane_AModeV(am));
796 return am;
797}
798
799static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
800{
801 IRType ty = typeOfIRExpr(env->type_env,e);
802 vassert(ty == Ity_I32);
803
804 /* {Add32,Sub32}(expr, simm8 << 2) */
805 if (e->tag == Iex_Binop
806 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
807 && e->Iex.Binop.arg2->tag == Iex_Const
808 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
809 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
810 if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
811 HReg reg;
812 if (e->Iex.Binop.op == Iop_Sub32)
813 simm = -simm;
814 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
815 return mkARMAModeV(reg, simm);
816 }
cerioncee30312004-12-17 20:30:21 +0000817 }
sewardj6c299f32009-12-31 18:00:12 +0000818
819 /* Doesn't match anything in particular. Generate it into
820 a register and use that. */
821 {
822 HReg reg = iselIntExpr_R(env, e);
823 return mkARMAModeV(reg, 0);
824 }
825
cerioncee30312004-12-17 20:30:21 +0000826}
827
sewardj6c60b322010-08-22 12:48:28 +0000828/* -------------------- AModeN -------------------- */
829
830static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
831{
832 return iselIntExpr_AModeN_wrk(env, e);
833}
834
835static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
836{
837 HReg reg = iselIntExpr_R(env, e);
838 return mkARMAModeN_R(reg);
839}
840
sewardj6c299f32009-12-31 18:00:12 +0000841
842/* --------------------- RI84 --------------------- */
843
844/* Select instructions to generate 'e' into a RI84. If mayInv is
845 true, then the caller will also accept an I84 form that denotes
846 'not e'. In this case didInv may not be NULL, and *didInv is set
847 to True. This complication is so as to allow generation of an RI84
848 which is suitable for use in either an AND or BIC instruction,
849 without knowing (before this call) which one.
850*/
851static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
852 ISelEnv* env, IRExpr* e )
cerioncee30312004-12-17 20:30:21 +0000853{
sewardj6c299f32009-12-31 18:00:12 +0000854 ARMRI84* ri;
855 if (mayInv)
856 vassert(didInv != NULL);
857 ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
858 /* sanity checks ... */
859 switch (ri->tag) {
860 case ARMri84_I84:
861 return ri;
862 case ARMri84_R:
863 vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
864 vassert(hregIsVirtual(ri->ARMri84.R.reg));
865 return ri;
866 default:
867 vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
cerioncee30312004-12-17 20:30:21 +0000868 }
869}
870
871/* DO NOT CALL THIS DIRECTLY ! */
sewardj6c299f32009-12-31 18:00:12 +0000872static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
873 ISelEnv* env, IRExpr* e )
cerioncee30312004-12-17 20:30:21 +0000874{
sewardj6c299f32009-12-31 18:00:12 +0000875 IRType ty = typeOfIRExpr(env->type_env,e);
876 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
cerioncee30312004-12-17 20:30:21 +0000877
sewardj6c299f32009-12-31 18:00:12 +0000878 if (didInv) *didInv = False;
879
880 /* special case: immediate */
881 if (e->tag == Iex_Const) {
882 UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
883 switch (e->Iex.Const.con->tag) {
884 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
885 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
886 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
887 default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
888 }
889 if (fitsIn8x4(&u8, &u4, u)) {
890 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
891 }
892 if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
893 vassert(didInv);
894 *didInv = True;
895 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
896 }
897 /* else fail, fall through to default case */
898 }
899
900 /* default case: calculate into a register and return that */
901 {
902 HReg r = iselIntExpr_R ( env, e );
903 return ARMRI84_R(r);
904 }
cerioncee30312004-12-17 20:30:21 +0000905}
906
907
sewardj6c299f32009-12-31 18:00:12 +0000908/* --------------------- RI5 --------------------- */
909
910/* Select instructions to generate 'e' into a RI5. */
911
912static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
913{
914 ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
915 /* sanity checks ... */
916 switch (ri->tag) {
917 case ARMri5_I5:
918 return ri;
919 case ARMri5_R:
920 vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
921 vassert(hregIsVirtual(ri->ARMri5.R.reg));
922 return ri;
923 default:
924 vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
925 }
926}
927
928/* DO NOT CALL THIS DIRECTLY ! */
929static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
930{
931 IRType ty = typeOfIRExpr(env->type_env,e);
932 vassert(ty == Ity_I32 || ty == Ity_I8);
933
934 /* special case: immediate */
935 if (e->tag == Iex_Const) {
936 UInt u; /* both invalid */
937 switch (e->Iex.Const.con->tag) {
938 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
939 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
940 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
941 default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
942 }
943 if (u >= 1 && u <= 31) {
944 return ARMRI5_I5(u);
945 }
946 /* else fail, fall through to default case */
947 }
948
949 /* default case: calculate into a register and return that */
950 {
951 HReg r = iselIntExpr_R ( env, e );
952 return ARMRI5_R(r);
953 }
954}
cerioncee30312004-12-17 20:30:21 +0000955
956
sewardj6c299f32009-12-31 18:00:12 +0000957/* ------------------- CondCode ------------------- */
cerioncee30312004-12-17 20:30:21 +0000958
959/* Generate code to evaluated a bit-typed expression, returning the
960 condition code which would correspond when the expression would
961 notionally have returned 1. */
962
963static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
964{
sewardj6c299f32009-12-31 18:00:12 +0000965 ARMCondCode cc = iselCondCode_wrk(env,e);
sewardj6c60b322010-08-22 12:48:28 +0000966 vassert(cc != ARMcc_NV);
sewardj6c299f32009-12-31 18:00:12 +0000967 return cc;
cerioncee30312004-12-17 20:30:21 +0000968}
969
cerioncee30312004-12-17 20:30:21 +0000970static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
971{
sewardj6c299f32009-12-31 18:00:12 +0000972 vassert(e);
973 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
cerioncee30312004-12-17 20:30:21 +0000974
sewardj6c299f32009-12-31 18:00:12 +0000975 /* var */
976 if (e->tag == Iex_RdTmp) {
977 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
978 /* CmpOrTst doesn't modify rTmp; so this is OK. */
979 ARMRI84* one = ARMRI84_I84(1,0);
980 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
981 return ARMcc_NE;
982 }
983
984 /* Not1(e) */
985 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
986 /* Generate code for the arg, and negate the test condition */
987 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
988 }
989
990 /* --- patterns rooted at: 32to1 --- */
991
992 if (e->tag == Iex_Unop
993 && e->Iex.Unop.op == Iop_32to1) {
994 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
995 ARMRI84* one = ARMRI84_I84(1,0);
996 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
997 return ARMcc_NE;
998 }
999
1000 /* --- patterns rooted at: CmpNEZ8 --- */
1001
1002 if (e->tag == Iex_Unop
1003 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1004 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1005 ARMRI84* xFF = ARMRI84_I84(0xFF,0);
1006 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
1007 return ARMcc_NE;
1008 }
1009
1010 /* --- patterns rooted at: CmpNEZ32 --- */
1011
1012 if (e->tag == Iex_Unop
1013 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1014 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1015 ARMRI84* zero = ARMRI84_I84(0,0);
1016 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
1017 return ARMcc_NE;
1018 }
1019
1020 /* --- patterns rooted at: CmpNEZ64 --- */
1021
1022 if (e->tag == Iex_Unop
1023 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1024 HReg tHi, tLo;
1025 HReg tmp = newVRegI(env);
1026 ARMRI84* zero = ARMRI84_I84(0,0);
1027 iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
1028 addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
1029 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
1030 return ARMcc_NE;
1031 }
1032
1033 /* --- Cmp*32*(x,y) --- */
1034 if (e->tag == Iex_Binop
1035 && (e->Iex.Binop.op == Iop_CmpEQ32
1036 || e->Iex.Binop.op == Iop_CmpNE32
1037 || e->Iex.Binop.op == Iop_CmpLT32S
1038 || e->Iex.Binop.op == Iop_CmpLT32U
1039 || e->Iex.Binop.op == Iop_CmpLE32S
1040 || e->Iex.Binop.op == Iop_CmpLE32U)) {
1041 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1042 ARMRI84* argR = iselIntExpr_RI84(NULL,False,
1043 env, e->Iex.Binop.arg2);
1044 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
1045 switch (e->Iex.Binop.op) {
1046 case Iop_CmpEQ32: return ARMcc_EQ;
1047 case Iop_CmpNE32: return ARMcc_NE;
1048 case Iop_CmpLT32S: return ARMcc_LT;
1049 case Iop_CmpLT32U: return ARMcc_LO;
1050 case Iop_CmpLE32S: return ARMcc_LE;
1051 case Iop_CmpLE32U: return ARMcc_LS;
1052 default: vpanic("iselCondCode(arm): CmpXX32");
1053 }
1054 }
1055
sewardj6c60b322010-08-22 12:48:28 +00001056 /* --- CasCmpEQ* --- */
1057 /* Ist_Cas has a dummy argument to compare with, so comparison is
1058 always true. */
1059 if (e->tag == Iex_Binop
1060 && (e->Iex.Binop.op == Iop_CasCmpEQ32
1061 || e->Iex.Binop.op == Iop_CasCmpEQ16
1062 || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1063 return ARMcc_AL;
1064 }
1065
sewardj6c299f32009-12-31 18:00:12 +00001066 ppIRExpr(e);
1067 vpanic("iselCondCode");
cerioncee30312004-12-17 20:30:21 +00001068}
1069
1070
sewardj6c299f32009-12-31 18:00:12 +00001071/* --------------------- Reg --------------------- */
cerioncee30312004-12-17 20:30:21 +00001072
1073static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1074{
sewardj6c299f32009-12-31 18:00:12 +00001075 HReg r = iselIntExpr_R_wrk(env, e);
1076 /* sanity checks ... */
1077# if 0
1078 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1079# endif
1080 vassert(hregClass(r) == HRcInt32);
1081 vassert(hregIsVirtual(r));
1082 return r;
cerioncee30312004-12-17 20:30:21 +00001083}
1084
1085/* DO NOT CALL THIS DIRECTLY ! */
1086static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1087{
sewardj6c299f32009-12-31 18:00:12 +00001088 IRType ty = typeOfIRExpr(env->type_env,e);
1089 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
sewardj6c60b322010-08-22 12:48:28 +00001090// vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
sewardj6c299f32009-12-31 18:00:12 +00001091
1092 switch (e->tag) {
1093
1094 /* --------- TEMP --------- */
1095 case Iex_RdTmp: {
1096 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1097 }
1098
1099 /* --------- LOAD --------- */
1100 case Iex_Load: {
1101 HReg dst = newVRegI(env);
1102
1103 if (e->Iex.Load.end != Iend_LE)
1104 goto irreducible;
1105
1106 if (ty == Ity_I32) {
1107 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1108 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, dst, amode));
1109 return dst;
1110 }
1111 if (ty == Ity_I16) {
1112 ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
1113 addInstr(env, ARMInstr_LdSt16(True/*isLoad*/, False/*!signedLoad*/,
1114 dst, amode));
1115 return dst;
1116 }
1117 if (ty == Ity_I8) {
1118 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1119 addInstr(env, ARMInstr_LdSt8U(True/*isLoad*/, dst, amode));
1120 return dst;
1121 }
1122
1123//zz if (ty == Ity_I16) {
1124//zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1125//zz return dst;
1126//zz }
1127//zz if (ty == Ity_I8) {
1128//zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1129//zz return dst;
1130//zz }
1131 break;
1132 }
1133
1134//zz /* --------- TERNARY OP --------- */
1135//zz case Iex_Triop: {
florian420bfa92012-06-02 20:29:22 +00001136//zz IRTriop *triop = e->Iex.Triop.details;
sewardj6c299f32009-12-31 18:00:12 +00001137//zz /* C3210 flags following FPU partial remainder (fprem), both
1138//zz IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
florian420bfa92012-06-02 20:29:22 +00001139//zz if (triop->op == Iop_PRemC3210F64
1140//zz || triop->op == Iop_PRem1C3210F64) {
sewardj6c299f32009-12-31 18:00:12 +00001141//zz HReg junk = newVRegF(env);
1142//zz HReg dst = newVRegI(env);
florian420bfa92012-06-02 20:29:22 +00001143//zz HReg srcL = iselDblExpr(env, triop->arg2);
1144//zz HReg srcR = iselDblExpr(env, triop->arg3);
sewardj6c299f32009-12-31 18:00:12 +00001145//zz /* XXXROUNDINGFIXME */
1146//zz /* set roundingmode here */
1147//zz addInstr(env, X86Instr_FpBinary(
1148//zz e->Iex.Binop.op==Iop_PRemC3210F64
1149//zz ? Xfp_PREM : Xfp_PREM1,
1150//zz srcL,srcR,junk
1151//zz ));
1152//zz /* The previous pseudo-insn will have left the FPU's C3210
1153//zz flags set correctly. So bag them. */
1154//zz addInstr(env, X86Instr_FpStSW_AX());
1155//zz addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1156//zz addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1157//zz return dst;
1158//zz }
1159//zz
1160//zz break;
1161//zz }
1162
1163 /* --------- BINARY OP --------- */
1164 case Iex_Binop: {
1165
1166 ARMAluOp aop = 0; /* invalid */
1167 ARMShiftOp sop = 0; /* invalid */
1168
1169 /* ADD/SUB/AND/OR/XOR */
1170 switch (e->Iex.Binop.op) {
1171 case Iop_And32: {
1172 Bool didInv = False;
1173 HReg dst = newVRegI(env);
1174 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1175 ARMRI84* argR = iselIntExpr_RI84(&didInv, True/*mayInv*/,
1176 env, e->Iex.Binop.arg2);
1177 addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
1178 dst, argL, argR));
1179 return dst;
1180 }
1181 case Iop_Or32: aop = ARMalu_OR; goto std_binop;
1182 case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
1183 case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
1184 case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
1185 std_binop: {
1186 HReg dst = newVRegI(env);
1187 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1188 ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
1189 env, e->Iex.Binop.arg2);
1190 addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
1191 return dst;
1192 }
1193 default: break;
1194 }
1195
1196 /* SHL/SHR/SAR */
1197 switch (e->Iex.Binop.op) {
1198 case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
1199 case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
1200 case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
1201 sh_binop: {
1202 HReg dst = newVRegI(env);
1203 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1204 ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
1205 addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
1206 vassert(ty == Ity_I32); /* else the IR is ill-typed */
1207 return dst;
1208 }
1209 default: break;
1210 }
1211
1212 /* MUL */
1213 if (e->Iex.Binop.op == Iop_Mul32) {
1214 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1215 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1216 HReg dst = newVRegI(env);
1217 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1218 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1219 addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
1220 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1221 return dst;
1222 }
1223
1224 /* Handle misc other ops. */
1225
1226 if (e->Iex.Binop.op == Iop_Max32U) {
1227 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1228 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1229 HReg dst = newVRegI(env);
sewardj6c60b322010-08-22 12:48:28 +00001230 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
1231 ARMRI84_R(argR)));
sewardj6c299f32009-12-31 18:00:12 +00001232 addInstr(env, mk_iMOVds_RR(dst, argL));
1233 addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
1234 return dst;
1235 }
1236
1237 if (e->Iex.Binop.op == Iop_CmpF64) {
1238 HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
1239 HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
1240 HReg dst = newVRegI(env);
1241 /* Do the compare (FCMPD) and set NZCV in FPSCR. Then also do
1242 FMSTAT, so we can examine the results directly. */
1243 addInstr(env, ARMInstr_VCmpD(dL, dR));
1244 /* Create in dst, the IRCmpF64Result encoded result. */
1245 addInstr(env, ARMInstr_Imm32(dst, 0));
1246 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
1247 addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
1248 addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
1249 addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
1250 return dst;
1251 }
1252
1253 if (e->Iex.Binop.op == Iop_F64toI32S
1254 || e->Iex.Binop.op == Iop_F64toI32U) {
1255 /* Wretched uglyness all round, due to having to deal
1256 with rounding modes. Oh well. */
1257 /* FIXME: if arg1 is a constant indicating round-to-zero,
1258 then we could skip all this arsing around with FPSCR and
1259 simply emit FTO{S,U}IZD. */
1260 Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
1261 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
1262 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
1263 /* FTO{S,U}ID valF, valD */
1264 HReg valF = newVRegF(env);
1265 addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
1266 valF, valD));
1267 set_VFP_rounding_default(env);
1268 /* VMOV dst, valF */
1269 HReg dst = newVRegI(env);
1270 addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
1271 return dst;
1272 }
1273
sewardj6c60b322010-08-22 12:48:28 +00001274 if (e->Iex.Binop.op == Iop_GetElem8x8
1275 || e->Iex.Binop.op == Iop_GetElem16x4
1276 || e->Iex.Binop.op == Iop_GetElem32x2) {
1277 HReg res = newVRegI(env);
florian1d0da842012-06-01 22:04:27 +00001278 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
sewardj6c60b322010-08-22 12:48:28 +00001279 UInt index, size;
1280 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1281 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1282 vpanic("ARM target supports GetElem with constant "
1283 "second argument only\n");
1284 }
1285 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1286 switch (e->Iex.Binop.op) {
1287 case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1288 case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1289 case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1290 default: vassert(0);
1291 }
1292 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1293 mkARMNRS(ARMNRS_Reg, res, 0),
1294 mkARMNRS(ARMNRS_Scalar, arg, index),
1295 size, False));
1296 return res;
1297 }
1298
1299 if (e->Iex.Binop.op == Iop_GetElem8x16
1300 || e->Iex.Binop.op == Iop_GetElem16x8
1301 || e->Iex.Binop.op == Iop_GetElem32x4) {
1302 HReg res = newVRegI(env);
florian1d0da842012-06-01 22:04:27 +00001303 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
sewardj6c60b322010-08-22 12:48:28 +00001304 UInt index, size;
1305 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1306 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1307 vpanic("ARM target supports GetElem with constant "
1308 "second argument only\n");
1309 }
1310 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1311 switch (e->Iex.Binop.op) {
1312 case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1313 case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1314 case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1315 default: vassert(0);
1316 }
1317 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1318 mkARMNRS(ARMNRS_Reg, res, 0),
1319 mkARMNRS(ARMNRS_Scalar, arg, index),
1320 size, True));
1321 return res;
1322 }
1323
sewardje2ea1762010-09-22 00:56:37 +00001324 /* All cases involving host-side helper calls. */
1325 void* fn = NULL;
1326 switch (e->Iex.Binop.op) {
1327 case Iop_Add16x2:
1328 fn = &h_generic_calc_Add16x2; break;
1329 case Iop_Sub16x2:
1330 fn = &h_generic_calc_Sub16x2; break;
1331 case Iop_HAdd16Ux2:
1332 fn = &h_generic_calc_HAdd16Ux2; break;
1333 case Iop_HAdd16Sx2:
1334 fn = &h_generic_calc_HAdd16Sx2; break;
1335 case Iop_HSub16Ux2:
1336 fn = &h_generic_calc_HSub16Ux2; break;
1337 case Iop_HSub16Sx2:
1338 fn = &h_generic_calc_HSub16Sx2; break;
1339 case Iop_QAdd16Sx2:
1340 fn = &h_generic_calc_QAdd16Sx2; break;
1341 case Iop_QSub16Sx2:
1342 fn = &h_generic_calc_QSub16Sx2; break;
1343 case Iop_Add8x4:
1344 fn = &h_generic_calc_Add8x4; break;
1345 case Iop_Sub8x4:
1346 fn = &h_generic_calc_Sub8x4; break;
1347 case Iop_HAdd8Ux4:
1348 fn = &h_generic_calc_HAdd8Ux4; break;
1349 case Iop_HAdd8Sx4:
1350 fn = &h_generic_calc_HAdd8Sx4; break;
1351 case Iop_HSub8Ux4:
1352 fn = &h_generic_calc_HSub8Ux4; break;
1353 case Iop_HSub8Sx4:
1354 fn = &h_generic_calc_HSub8Sx4; break;
1355 case Iop_QAdd8Sx4:
1356 fn = &h_generic_calc_QAdd8Sx4; break;
1357 case Iop_QAdd8Ux4:
1358 fn = &h_generic_calc_QAdd8Ux4; break;
1359 case Iop_QSub8Sx4:
1360 fn = &h_generic_calc_QSub8Sx4; break;
1361 case Iop_QSub8Ux4:
1362 fn = &h_generic_calc_QSub8Ux4; break;
sewardj310d6b22010-10-18 16:29:40 +00001363 case Iop_Sad8Ux4:
1364 fn = &h_generic_calc_Sad8Ux4; break;
sewardj44ce46d2012-07-11 13:19:10 +00001365 case Iop_QAdd32S:
1366 fn = &h_generic_calc_QAdd32S; break;
1367 case Iop_QSub32S:
1368 fn = &h_generic_calc_QSub32S; break;
sewardje2ea1762010-09-22 00:56:37 +00001369 default:
1370 break;
1371 }
1372
1373 if (fn) {
1374 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1375 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1376 HReg res = newVRegI(env);
1377 addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
1378 addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
1379 addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 2 ));
1380 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1381 return res;
1382 }
1383
sewardj6c299f32009-12-31 18:00:12 +00001384 break;
1385 }
1386
1387 /* --------- UNARY OP --------- */
1388 case Iex_Unop: {
1389
1390//zz /* 1Uto8(32to1(expr32)) */
1391//zz if (e->Iex.Unop.op == Iop_1Uto8) {
1392//zz DECLARE_PATTERN(p_32to1_then_1Uto8);
1393//zz DEFINE_PATTERN(p_32to1_then_1Uto8,
1394//zz unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1395//zz if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1396//zz IRExpr* expr32 = mi.bindee[0];
1397//zz HReg dst = newVRegI(env);
1398//zz HReg src = iselIntExpr_R(env, expr32);
1399//zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1400//zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1401//zz X86RMI_Imm(1), dst));
1402//zz return dst;
1403//zz }
1404//zz }
1405//zz
1406//zz /* 8Uto32(LDle(expr32)) */
1407//zz if (e->Iex.Unop.op == Iop_8Uto32) {
1408//zz DECLARE_PATTERN(p_LDle8_then_8Uto32);
1409//zz DEFINE_PATTERN(p_LDle8_then_8Uto32,
1410//zz unop(Iop_8Uto32,
1411//zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1412//zz if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1413//zz HReg dst = newVRegI(env);
1414//zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1415//zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1416//zz return dst;
1417//zz }
1418//zz }
1419//zz
1420//zz /* 8Sto32(LDle(expr32)) */
1421//zz if (e->Iex.Unop.op == Iop_8Sto32) {
1422//zz DECLARE_PATTERN(p_LDle8_then_8Sto32);
1423//zz DEFINE_PATTERN(p_LDle8_then_8Sto32,
1424//zz unop(Iop_8Sto32,
1425//zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1426//zz if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1427//zz HReg dst = newVRegI(env);
1428//zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1429//zz addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1430//zz return dst;
1431//zz }
1432//zz }
1433//zz
1434//zz /* 16Uto32(LDle(expr32)) */
1435//zz if (e->Iex.Unop.op == Iop_16Uto32) {
1436//zz DECLARE_PATTERN(p_LDle16_then_16Uto32);
1437//zz DEFINE_PATTERN(p_LDle16_then_16Uto32,
1438//zz unop(Iop_16Uto32,
1439//zz IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1440//zz if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1441//zz HReg dst = newVRegI(env);
1442//zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1443//zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1444//zz return dst;
1445//zz }
1446//zz }
1447//zz
1448//zz /* 8Uto32(GET:I8) */
1449//zz if (e->Iex.Unop.op == Iop_8Uto32) {
1450//zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1451//zz HReg dst;
1452//zz X86AMode* amode;
1453//zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1454//zz dst = newVRegI(env);
1455//zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1456//zz hregX86_EBP());
1457//zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1458//zz return dst;
1459//zz }
1460//zz }
1461//zz
1462//zz /* 16to32(GET:I16) */
1463//zz if (e->Iex.Unop.op == Iop_16Uto32) {
1464//zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1465//zz HReg dst;
1466//zz X86AMode* amode;
1467//zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1468//zz dst = newVRegI(env);
1469//zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1470//zz hregX86_EBP());
1471//zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1472//zz return dst;
1473//zz }
1474//zz }
1475
1476 switch (e->Iex.Unop.op) {
1477 case Iop_8Uto32: {
1478 HReg dst = newVRegI(env);
1479 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1480 addInstr(env, ARMInstr_Alu(ARMalu_AND,
1481 dst, src, ARMRI84_I84(0xFF,0)));
1482 return dst;
1483 }
1484//zz case Iop_8Uto16:
1485//zz case Iop_8Uto32:
1486//zz case Iop_16Uto32: {
1487//zz HReg dst = newVRegI(env);
1488//zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1489//zz UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1490//zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1491//zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1492//zz X86RMI_Imm(mask), dst));
1493//zz return dst;
1494//zz }
1495//zz case Iop_8Sto16:
1496//zz case Iop_8Sto32:
1497 case Iop_16Uto32: {
1498 HReg dst = newVRegI(env);
1499 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1500 ARMRI5* amt = ARMRI5_I5(16);
1501 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1502 addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
1503 return dst;
1504 }
1505 case Iop_8Sto32:
1506 case Iop_16Sto32: {
1507 HReg dst = newVRegI(env);
1508 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1509 ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
1510 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1511 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1512 return dst;
1513 }
1514//zz case Iop_Not8:
1515//zz case Iop_Not16:
1516 case Iop_Not32: {
1517 HReg dst = newVRegI(env);
1518 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1519 addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
1520 return dst;
1521 }
1522 case Iop_64HIto32: {
1523 HReg rHi, rLo;
1524 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1525 return rHi; /* and abandon rLo .. poor wee thing :-) */
1526 }
1527 case Iop_64to32: {
1528 HReg rHi, rLo;
1529 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1530 return rLo; /* similar stupid comment to the above ... */
1531 }
sewardj6c60b322010-08-22 12:48:28 +00001532 case Iop_64to8: {
1533 HReg rHi, rLo;
sewardjc6f970f2012-04-02 21:54:49 +00001534 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00001535 HReg tHi = newVRegI(env);
1536 HReg tLo = newVRegI(env);
1537 HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
1538 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1539 rHi = tHi;
1540 rLo = tLo;
1541 } else {
1542 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1543 }
1544 return rLo;
1545 }
sewardj6c299f32009-12-31 18:00:12 +00001546//zz case Iop_16HIto8:
1547//zz case Iop_32HIto16: {
1548//zz HReg dst = newVRegI(env);
1549//zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1550//zz Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1551//zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1552//zz addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1553//zz return dst;
1554//zz }
1555 case Iop_1Uto32:
1556 case Iop_1Uto8: {
1557 HReg dst = newVRegI(env);
1558 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1559 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1560 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1561 return dst;
1562 }
1563
1564 case Iop_1Sto32: {
1565 HReg dst = newVRegI(env);
1566 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1567 ARMRI5* amt = ARMRI5_I5(31);
1568 /* This is really rough. We could do much better here;
1569 perhaps mvn{cond} dst, #0 as the second insn?
1570 (same applies to 1Sto64) */
1571 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1572 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1573 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1574 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1575 return dst;
1576 }
1577
1578
1579//zz case Iop_1Sto8:
1580//zz case Iop_1Sto16:
1581//zz case Iop_1Sto32: {
1582//zz /* could do better than this, but for now ... */
1583//zz HReg dst = newVRegI(env);
1584//zz X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1585//zz addInstr(env, X86Instr_Set32(cond,dst));
1586//zz addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1587//zz addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1588//zz return dst;
1589//zz }
1590//zz case Iop_Ctz32: {
1591//zz /* Count trailing zeroes, implemented by x86 'bsfl' */
1592//zz HReg dst = newVRegI(env);
1593//zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1594//zz addInstr(env, X86Instr_Bsfr32(True,src,dst));
1595//zz return dst;
1596//zz }
1597 case Iop_Clz32: {
1598 /* Count leading zeroes; easy on ARM. */
1599 HReg dst = newVRegI(env);
1600 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1601 addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
1602 return dst;
1603 }
1604
1605 case Iop_CmpwNEZ32: {
1606 HReg dst = newVRegI(env);
1607 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1608 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1609 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1610 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
1611 return dst;
1612 }
1613
1614 case Iop_Left32: {
1615 HReg dst = newVRegI(env);
1616 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1617 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1618 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1619 return dst;
1620 }
1621
1622//zz case Iop_V128to32: {
1623//zz HReg dst = newVRegI(env);
1624//zz HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1625//zz X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1626//zz sub_from_esp(env, 16);
1627//zz addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1628//zz addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1629//zz add_to_esp(env, 16);
1630//zz return dst;
1631//zz }
1632//zz
1633 case Iop_ReinterpF32asI32: {
1634 HReg dst = newVRegI(env);
1635 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1636 addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
1637 return dst;
1638 }
1639
1640//zz
1641//zz case Iop_16to8:
1642 case Iop_32to8:
1643 case Iop_32to16:
1644 /* These are no-ops. */
1645 return iselIntExpr_R(env, e->Iex.Unop.arg);
1646
sewardj6c60b322010-08-22 12:48:28 +00001647 default:
sewardj6c299f32009-12-31 18:00:12 +00001648 break;
1649 }
sewardje2ea1762010-09-22 00:56:37 +00001650
1651 /* All Unop cases involving host-side helper calls. */
1652 void* fn = NULL;
1653 switch (e->Iex.Unop.op) {
1654 case Iop_CmpNEZ16x2:
1655 fn = &h_generic_calc_CmpNEZ16x2; break;
1656 case Iop_CmpNEZ8x4:
1657 fn = &h_generic_calc_CmpNEZ8x4; break;
1658 default:
1659 break;
1660 }
1661
1662 if (fn) {
1663 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1664 HReg res = newVRegI(env);
1665 addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
1666 addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 1 ));
1667 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1668 return res;
1669 }
1670
sewardj6c299f32009-12-31 18:00:12 +00001671 break;
1672 }
1673
1674 /* --------- GET --------- */
1675 case Iex_Get: {
1676 if (ty == Ity_I32
1677 && 0 == (e->Iex.Get.offset & 3)
1678 && e->Iex.Get.offset < 4096-4) {
1679 HReg dst = newVRegI(env);
1680 addInstr(env, ARMInstr_LdSt32(
1681 True/*isLoad*/,
1682 dst,
1683 ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
1684 return dst;
1685 }
1686//zz if (ty == Ity_I8 || ty == Ity_I16) {
1687//zz HReg dst = newVRegI(env);
1688//zz addInstr(env, X86Instr_LoadEX(
1689//zz toUChar(ty==Ity_I8 ? 1 : 2),
1690//zz False,
1691//zz X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1692//zz dst));
1693//zz return dst;
1694//zz }
1695 break;
1696 }
1697
1698//zz case Iex_GetI: {
1699//zz X86AMode* am
1700//zz = genGuestArrayOffset(
1701//zz env, e->Iex.GetI.descr,
1702//zz e->Iex.GetI.ix, e->Iex.GetI.bias );
1703//zz HReg dst = newVRegI(env);
1704//zz if (ty == Ity_I8) {
1705//zz addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1706//zz return dst;
1707//zz }
1708//zz if (ty == Ity_I32) {
1709//zz addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1710//zz return dst;
1711//zz }
1712//zz break;
1713//zz }
1714
1715 /* --------- CCALL --------- */
1716 case Iex_CCall: {
1717 HReg dst = newVRegI(env);
1718 vassert(ty == e->Iex.CCall.retty);
1719
1720 /* be very restrictive for now. Only 32/64-bit ints allowed
1721 for args, and 32 bits for return type. */
1722 if (e->Iex.CCall.retty != Ity_I32)
1723 goto irreducible;
1724
1725 /* Marshal args, do the call, clear stack. */
1726 Bool ok = doHelperCall( env, False,
1727 NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
1728 if (ok) {
1729 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1730 return dst;
1731 }
1732 /* else fall through; will hit the irreducible: label */
1733 }
1734
1735 /* --------- LITERAL --------- */
1736 /* 32 literals */
1737 case Iex_Const: {
1738 UInt u = 0;
1739 HReg dst = newVRegI(env);
1740 switch (e->Iex.Const.con->tag) {
1741 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1742 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1743 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
sewardj6c60b322010-08-22 12:48:28 +00001744 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
sewardj6c299f32009-12-31 18:00:12 +00001745 }
1746 addInstr(env, ARMInstr_Imm32(dst, u));
1747 return dst;
1748 }
1749
1750 /* --------- MULTIPLEX --------- */
1751 case Iex_Mux0X: {
1752 IRExpr* cond = e->Iex.Mux0X.cond;
1753
1754 /* Mux0X( 32to8(1Uto32(ccexpr)), expr0, exprX ) */
1755 if (ty == Ity_I32
1756 && cond->tag == Iex_Unop
1757 && cond->Iex.Unop.op == Iop_32to8
1758 && cond->Iex.Unop.arg->tag == Iex_Unop
1759 && cond->Iex.Unop.arg->Iex.Unop.op == Iop_1Uto32) {
1760 ARMCondCode cc;
1761 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1762 ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
1763 HReg dst = newVRegI(env);
1764 addInstr(env, mk_iMOVds_RR(dst, rX));
1765 cc = iselCondCode(env, cond->Iex.Unop.arg->Iex.Unop.arg);
1766 addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
1767 return dst;
1768 }
1769
1770 /* Mux0X(cond, expr0, exprX) (general case) */
1771 if (ty == Ity_I32) {
1772 HReg r8;
1773 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1774 ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
1775 HReg dst = newVRegI(env);
1776 addInstr(env, mk_iMOVds_RR(dst, rX));
1777 r8 = iselIntExpr_R(env, cond);
1778 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
1779 ARMRI84_I84(0xFF,0)));
1780 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, r0));
1781 return dst;
1782 }
1783 break;
1784 }
1785
1786 default:
1787 break;
1788 } /* switch (e->tag) */
1789
1790 /* We get here if no pattern matched. */
1791 irreducible:
1792 ppIRExpr(e);
1793 vpanic("iselIntExpr_R: cannot reduce tree");
cerioncee30312004-12-17 20:30:21 +00001794}
1795
1796
sewardj6c299f32009-12-31 18:00:12 +00001797/* -------------------- 64-bit -------------------- */
1798
1799/* Compute a 64-bit value into a register pair, which is returned as
1800 the first two parameters. As with iselIntExpr_R, these may be
1801 either real or virtual regs; in any case they must not be changed
1802 by subsequent code emitted by the caller. */
1803
1804static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1805{
1806 iselInt64Expr_wrk(rHi, rLo, env, e);
1807# if 0
1808 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1809# endif
1810 vassert(hregClass(*rHi) == HRcInt32);
1811 vassert(hregIsVirtual(*rHi));
1812 vassert(hregClass(*rLo) == HRcInt32);
1813 vassert(hregIsVirtual(*rLo));
1814}
1815
1816/* DO NOT CALL THIS DIRECTLY ! */
1817static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1818{
1819 vassert(e);
1820 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
1821
1822 /* 64-bit literal */
1823 if (e->tag == Iex_Const) {
1824 ULong w64 = e->Iex.Const.con->Ico.U64;
1825 UInt wHi = toUInt(w64 >> 32);
1826 UInt wLo = toUInt(w64);
1827 HReg tHi = newVRegI(env);
1828 HReg tLo = newVRegI(env);
1829 vassert(e->Iex.Const.con->tag == Ico_U64);
1830 addInstr(env, ARMInstr_Imm32(tHi, wHi));
1831 addInstr(env, ARMInstr_Imm32(tLo, wLo));
1832 *rHi = tHi;
1833 *rLo = tLo;
1834 return;
1835 }
1836
1837 /* read 64-bit IRTemp */
1838 if (e->tag == Iex_RdTmp) {
sewardjc6f970f2012-04-02 21:54:49 +00001839 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00001840 HReg tHi = newVRegI(env);
1841 HReg tLo = newVRegI(env);
1842 HReg tmp = iselNeon64Expr(env, e);
1843 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1844 *rHi = tHi;
1845 *rLo = tLo;
1846 } else {
1847 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
1848 }
sewardj6c299f32009-12-31 18:00:12 +00001849 return;
1850 }
1851
1852 /* 64-bit load */
1853 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
1854 HReg tLo, tHi, rA;
1855 vassert(e->Iex.Load.ty == Ity_I64);
1856 rA = iselIntExpr_R(env, e->Iex.Load.addr);
1857 tHi = newVRegI(env);
1858 tLo = newVRegI(env);
1859 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, ARMAMode1_RI(rA, 4)));
1860 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, ARMAMode1_RI(rA, 0)));
1861 *rHi = tHi;
1862 *rLo = tLo;
1863 return;
1864 }
1865
1866 /* 64-bit GET */
1867 if (e->tag == Iex_Get) {
1868 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
1869 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
1870 HReg tHi = newVRegI(env);
1871 HReg tLo = newVRegI(env);
1872 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, am4));
1873 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, am0));
1874 *rHi = tHi;
1875 *rLo = tLo;
1876 return;
1877 }
1878
1879 /* --------- BINARY ops --------- */
1880 if (e->tag == Iex_Binop) {
1881 switch (e->Iex.Binop.op) {
1882
1883 /* 32 x 32 -> 64 multiply */
1884 case Iop_MullS32:
1885 case Iop_MullU32: {
1886 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1887 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1888 HReg tHi = newVRegI(env);
1889 HReg tLo = newVRegI(env);
1890 ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32
1891 ? ARMmul_SX : ARMmul_ZX;
1892 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1893 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1894 addInstr(env, ARMInstr_Mul(mop));
1895 addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
1896 addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
1897 *rHi = tHi;
1898 *rLo = tLo;
1899 return;
1900 }
1901
1902 case Iop_Or64: {
1903 HReg xLo, xHi, yLo, yHi;
1904 HReg tHi = newVRegI(env);
1905 HReg tLo = newVRegI(env);
1906 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
1907 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
1908 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
1909 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
1910 *rHi = tHi;
1911 *rLo = tLo;
1912 return;
1913 }
1914
1915 case Iop_Add64: {
1916 HReg xLo, xHi, yLo, yHi;
1917 HReg tHi = newVRegI(env);
1918 HReg tLo = newVRegI(env);
1919 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
1920 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
1921 addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
1922 addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi)));
1923 *rHi = tHi;
1924 *rLo = tLo;
1925 return;
1926 }
1927
1928 /* 32HLto64(e1,e2) */
1929 case Iop_32HLto64: {
1930 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
1931 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
1932 return;
1933 }
1934
1935 default:
1936 break;
1937 }
1938 }
1939
1940 /* --------- UNARY ops --------- */
1941 if (e->tag == Iex_Unop) {
1942 switch (e->Iex.Unop.op) {
1943
1944 /* ReinterpF64asI64 */
1945 case Iop_ReinterpF64asI64: {
1946 HReg dstHi = newVRegI(env);
1947 HReg dstLo = newVRegI(env);
1948 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1949 addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
1950 *rHi = dstHi;
1951 *rLo = dstLo;
1952 return;
1953 }
1954
1955 /* Left64(e) */
1956 case Iop_Left64: {
1957 HReg yLo, yHi;
1958 HReg tHi = newVRegI(env);
1959 HReg tLo = newVRegI(env);
1960 HReg zero = newVRegI(env);
1961 /* yHi:yLo = arg */
1962 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
1963 /* zero = 0 */
1964 addInstr(env, ARMInstr_Imm32(zero, 0));
1965 /* tLo = 0 - yLo, and set carry */
sewardj6c60b322010-08-22 12:48:28 +00001966 addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
1967 tLo, zero, ARMRI84_R(yLo)));
sewardj6c299f32009-12-31 18:00:12 +00001968 /* tHi = 0 - yHi - carry */
sewardj6c60b322010-08-22 12:48:28 +00001969 addInstr(env, ARMInstr_Alu(ARMalu_SBC,
1970 tHi, zero, ARMRI84_R(yHi)));
sewardj6c299f32009-12-31 18:00:12 +00001971 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
1972 back in, so as to give the final result
1973 tHi:tLo = arg | -arg. */
1974 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
1975 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
1976 *rHi = tHi;
1977 *rLo = tLo;
1978 return;
1979 }
1980
1981 /* CmpwNEZ64(e) */
1982 case Iop_CmpwNEZ64: {
1983 HReg srcLo, srcHi;
1984 HReg tmp1 = newVRegI(env);
1985 HReg tmp2 = newVRegI(env);
1986 /* srcHi:srcLo = arg */
1987 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
1988 /* tmp1 = srcHi | srcLo */
1989 addInstr(env, ARMInstr_Alu(ARMalu_OR,
1990 tmp1, srcHi, ARMRI84_R(srcLo)));
1991 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
1992 addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
1993 addInstr(env, ARMInstr_Alu(ARMalu_OR,
1994 tmp2, tmp2, ARMRI84_R(tmp1)));
1995 addInstr(env, ARMInstr_Shift(ARMsh_SAR,
1996 tmp2, tmp2, ARMRI5_I5(31)));
1997 *rHi = tmp2;
1998 *rLo = tmp2;
1999 return;
2000 }
2001
2002 case Iop_1Sto64: {
2003 HReg dst = newVRegI(env);
2004 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2005 ARMRI5* amt = ARMRI5_I5(31);
2006 /* This is really rough. We could do much better here;
2007 perhaps mvn{cond} dst, #0 as the second insn?
2008 (same applies to 1Sto32) */
2009 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2010 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2011 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2012 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2013 *rHi = dst;
2014 *rLo = dst;
2015 return;
2016 }
2017
2018 default:
2019 break;
2020 }
2021 } /* if (e->tag == Iex_Unop) */
2022
2023 /* --------- MULTIPLEX --------- */
2024 if (e->tag == Iex_Mux0X) {
2025 IRType ty8;
2026 HReg r8, rXhi, rXlo, r0hi, r0lo, dstHi, dstLo;
2027 ty8 = typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond);
2028 vassert(ty8 == Ity_I8);
2029 iselInt64Expr(&rXhi, &rXlo, env, e->Iex.Mux0X.exprX);
2030 iselInt64Expr(&r0hi, &r0lo, env, e->Iex.Mux0X.expr0);
2031 dstHi = newVRegI(env);
2032 dstLo = newVRegI(env);
2033 addInstr(env, mk_iMOVds_RR(dstHi, rXhi));
2034 addInstr(env, mk_iMOVds_RR(dstLo, rXlo));
2035 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
2036 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
2037 ARMRI84_I84(0xFF,0)));
2038 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstHi, ARMRI84_R(r0hi)));
2039 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstLo, ARMRI84_R(r0lo)));
2040 *rHi = dstHi;
2041 *rLo = dstLo;
2042 return;
2043 }
2044
sewardj6c60b322010-08-22 12:48:28 +00002045 /* It is convenient sometimes to call iselInt64Expr even when we
2046 have NEON support (e.g. in do_helper_call we need 64-bit
2047 arguments as 2 x 32 regs). */
sewardjc6f970f2012-04-02 21:54:49 +00002048 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00002049 HReg tHi = newVRegI(env);
2050 HReg tLo = newVRegI(env);
2051 HReg tmp = iselNeon64Expr(env, e);
2052 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2053 *rHi = tHi;
2054 *rLo = tLo;
2055 return ;
2056 }
2057
sewardj6c299f32009-12-31 18:00:12 +00002058 ppIRExpr(e);
2059 vpanic("iselInt64Expr");
2060}
2061
2062
2063/*---------------------------------------------------------*/
sewardj6c60b322010-08-22 12:48:28 +00002064/*--- ISEL: Vector (NEON) expressions (64 or 128 bit) ---*/
2065/*---------------------------------------------------------*/
2066
2067static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
2068{
2069 HReg r = iselNeon64Expr_wrk( env, e );
2070 vassert(hregClass(r) == HRcFlt64);
2071 vassert(hregIsVirtual(r));
2072 return r;
2073}
2074
2075/* DO NOT CALL THIS DIRECTLY */
2076static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
2077{
2078 IRType ty = typeOfIRExpr(env->type_env, e);
2079 MatchInfo mi;
2080 vassert(e);
2081 vassert(ty == Ity_I64);
2082
2083 if (e->tag == Iex_RdTmp) {
2084 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2085 }
2086
2087 if (e->tag == Iex_Const) {
2088 HReg rLo, rHi;
2089 HReg res = newVRegD(env);
2090 iselInt64Expr(&rHi, &rLo, env, e);
2091 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2092 return res;
2093 }
2094
2095 /* 64-bit load */
2096 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2097 HReg res = newVRegD(env);
2098 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2099 vassert(ty == Ity_I64);
2100 addInstr(env, ARMInstr_NLdStD(True, res, am));
2101 return res;
2102 }
2103
2104 /* 64-bit GET */
2105 if (e->tag == Iex_Get) {
2106 HReg addr = newVRegI(env);
2107 HReg res = newVRegD(env);
2108 vassert(ty == Ity_I64);
2109 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2110 addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2111 return res;
2112 }
2113
2114 /* --------- BINARY ops --------- */
2115 if (e->tag == Iex_Binop) {
2116 switch (e->Iex.Binop.op) {
2117
2118 /* 32 x 32 -> 64 multiply */
2119 case Iop_MullS32:
2120 case Iop_MullU32: {
2121 HReg rLo, rHi;
2122 HReg res = newVRegD(env);
2123 iselInt64Expr(&rHi, &rLo, env, e);
2124 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2125 return res;
2126 }
2127
2128 case Iop_And64: {
2129 HReg res = newVRegD(env);
2130 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2131 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2132 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2133 res, argL, argR, 4, False));
2134 return res;
2135 }
2136 case Iop_Or64: {
2137 HReg res = newVRegD(env);
2138 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2139 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2140 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2141 res, argL, argR, 4, False));
2142 return res;
2143 }
2144 case Iop_Xor64: {
2145 HReg res = newVRegD(env);
2146 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2147 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2148 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2149 res, argL, argR, 4, False));
2150 return res;
2151 }
2152
2153 /* 32HLto64(e1,e2) */
2154 case Iop_32HLto64: {
2155 HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2156 HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2157 HReg res = newVRegD(env);
2158 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2159 return res;
2160 }
2161
2162 case Iop_Add8x8:
2163 case Iop_Add16x4:
2164 case Iop_Add32x2:
2165 case Iop_Add64: {
2166 HReg res = newVRegD(env);
2167 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2168 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2169 UInt size;
2170 switch (e->Iex.Binop.op) {
2171 case Iop_Add8x8: size = 0; break;
2172 case Iop_Add16x4: size = 1; break;
2173 case Iop_Add32x2: size = 2; break;
2174 case Iop_Add64: size = 3; break;
2175 default: vassert(0);
2176 }
2177 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2178 res, argL, argR, size, False));
2179 return res;
2180 }
2181 case Iop_Add32Fx2: {
2182 HReg res = newVRegD(env);
2183 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2184 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2185 UInt size = 0;
2186 addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2187 res, argL, argR, size, False));
2188 return res;
2189 }
2190 case Iop_Recps32Fx2: {
2191 HReg res = newVRegD(env);
2192 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2193 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2194 UInt size = 0;
2195 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2196 res, argL, argR, size, False));
2197 return res;
2198 }
2199 case Iop_Rsqrts32Fx2: {
2200 HReg res = newVRegD(env);
2201 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2202 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2203 UInt size = 0;
2204 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2205 res, argL, argR, size, False));
2206 return res;
2207 }
2208 case Iop_InterleaveOddLanes8x8:
2209 case Iop_InterleaveOddLanes16x4:
2210 case Iop_InterleaveLO32x2:
2211 case Iop_InterleaveEvenLanes8x8:
2212 case Iop_InterleaveEvenLanes16x4:
2213 case Iop_InterleaveHI32x2: {
2214 HReg tmp = newVRegD(env);
2215 HReg res = newVRegD(env);
2216 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2217 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2218 UInt size;
2219 UInt is_lo;
2220 switch (e->Iex.Binop.op) {
2221 case Iop_InterleaveOddLanes8x8: is_lo = 1; size = 0; break;
2222 case Iop_InterleaveEvenLanes8x8: is_lo = 0; size = 0; break;
2223 case Iop_InterleaveOddLanes16x4: is_lo = 1; size = 1; break;
2224 case Iop_InterleaveEvenLanes16x4: is_lo = 0; size = 1; break;
2225 case Iop_InterleaveLO32x2: is_lo = 1; size = 2; break;
2226 case Iop_InterleaveHI32x2: is_lo = 0; size = 2; break;
2227 default: vassert(0);
2228 }
2229 if (is_lo) {
2230 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2231 tmp, argL, 4, False));
2232 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2233 res, argR, 4, False));
2234 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
2235 res, tmp, size, False));
2236 } else {
2237 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2238 tmp, argR, 4, False));
2239 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2240 res, argL, 4, False));
2241 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
2242 tmp, res, size, False));
2243 }
2244 return res;
2245 }
2246 case Iop_InterleaveHI8x8:
2247 case Iop_InterleaveHI16x4:
2248 case Iop_InterleaveLO8x8:
2249 case Iop_InterleaveLO16x4: {
2250 HReg tmp = newVRegD(env);
2251 HReg res = newVRegD(env);
2252 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2253 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2254 UInt size;
2255 UInt is_lo;
2256 switch (e->Iex.Binop.op) {
2257 case Iop_InterleaveHI8x8: is_lo = 1; size = 0; break;
2258 case Iop_InterleaveLO8x8: is_lo = 0; size = 0; break;
2259 case Iop_InterleaveHI16x4: is_lo = 1; size = 1; break;
2260 case Iop_InterleaveLO16x4: is_lo = 0; size = 1; break;
2261 default: vassert(0);
2262 }
2263 if (is_lo) {
2264 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2265 tmp, argL, 4, False));
2266 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2267 res, argR, 4, False));
2268 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
2269 res, tmp, size, False));
2270 } else {
2271 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2272 tmp, argR, 4, False));
2273 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2274 res, argL, 4, False));
2275 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
2276 tmp, res, size, False));
2277 }
2278 return res;
2279 }
2280 case Iop_CatOddLanes8x8:
2281 case Iop_CatOddLanes16x4:
2282 case Iop_CatEvenLanes8x8:
2283 case Iop_CatEvenLanes16x4: {
2284 HReg tmp = newVRegD(env);
2285 HReg res = newVRegD(env);
2286 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2287 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2288 UInt size;
2289 UInt is_lo;
2290 switch (e->Iex.Binop.op) {
2291 case Iop_CatOddLanes8x8: is_lo = 1; size = 0; break;
2292 case Iop_CatEvenLanes8x8: is_lo = 0; size = 0; break;
2293 case Iop_CatOddLanes16x4: is_lo = 1; size = 1; break;
2294 case Iop_CatEvenLanes16x4: is_lo = 0; size = 1; break;
2295 default: vassert(0);
2296 }
2297 if (is_lo) {
2298 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2299 tmp, argL, 4, False));
2300 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2301 res, argR, 4, False));
2302 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
2303 res, tmp, size, False));
2304 } else {
2305 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2306 tmp, argR, 4, False));
2307 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2308 res, argL, 4, False));
2309 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
2310 tmp, res, size, False));
2311 }
2312 return res;
2313 }
2314 case Iop_QAdd8Ux8:
2315 case Iop_QAdd16Ux4:
2316 case Iop_QAdd32Ux2:
2317 case Iop_QAdd64Ux1: {
2318 HReg res = newVRegD(env);
2319 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2320 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2321 UInt size;
2322 switch (e->Iex.Binop.op) {
2323 case Iop_QAdd8Ux8: size = 0; break;
2324 case Iop_QAdd16Ux4: size = 1; break;
2325 case Iop_QAdd32Ux2: size = 2; break;
2326 case Iop_QAdd64Ux1: size = 3; break;
2327 default: vassert(0);
2328 }
2329 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2330 res, argL, argR, size, False));
2331 return res;
2332 }
2333 case Iop_QAdd8Sx8:
2334 case Iop_QAdd16Sx4:
2335 case Iop_QAdd32Sx2:
2336 case Iop_QAdd64Sx1: {
2337 HReg res = newVRegD(env);
2338 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2339 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2340 UInt size;
2341 switch (e->Iex.Binop.op) {
2342 case Iop_QAdd8Sx8: size = 0; break;
2343 case Iop_QAdd16Sx4: size = 1; break;
2344 case Iop_QAdd32Sx2: size = 2; break;
2345 case Iop_QAdd64Sx1: size = 3; break;
2346 default: vassert(0);
2347 }
2348 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2349 res, argL, argR, size, False));
2350 return res;
2351 }
2352 case Iop_Sub8x8:
2353 case Iop_Sub16x4:
2354 case Iop_Sub32x2:
2355 case Iop_Sub64: {
2356 HReg res = newVRegD(env);
2357 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2358 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2359 UInt size;
2360 switch (e->Iex.Binop.op) {
2361 case Iop_Sub8x8: size = 0; break;
2362 case Iop_Sub16x4: size = 1; break;
2363 case Iop_Sub32x2: size = 2; break;
2364 case Iop_Sub64: size = 3; break;
2365 default: vassert(0);
2366 }
2367 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2368 res, argL, argR, size, False));
2369 return res;
2370 }
2371 case Iop_Sub32Fx2: {
2372 HReg res = newVRegD(env);
2373 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2374 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2375 UInt size = 0;
2376 addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2377 res, argL, argR, size, False));
2378 return res;
2379 }
2380 case Iop_QSub8Ux8:
2381 case Iop_QSub16Ux4:
2382 case Iop_QSub32Ux2:
2383 case Iop_QSub64Ux1: {
2384 HReg res = newVRegD(env);
2385 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2386 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2387 UInt size;
2388 switch (e->Iex.Binop.op) {
2389 case Iop_QSub8Ux8: size = 0; break;
2390 case Iop_QSub16Ux4: size = 1; break;
2391 case Iop_QSub32Ux2: size = 2; break;
2392 case Iop_QSub64Ux1: size = 3; break;
2393 default: vassert(0);
2394 }
2395 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
2396 res, argL, argR, size, False));
2397 return res;
2398 }
2399 case Iop_QSub8Sx8:
2400 case Iop_QSub16Sx4:
2401 case Iop_QSub32Sx2:
2402 case Iop_QSub64Sx1: {
2403 HReg res = newVRegD(env);
2404 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2405 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2406 UInt size;
2407 switch (e->Iex.Binop.op) {
2408 case Iop_QSub8Sx8: size = 0; break;
2409 case Iop_QSub16Sx4: size = 1; break;
2410 case Iop_QSub32Sx2: size = 2; break;
2411 case Iop_QSub64Sx1: size = 3; break;
2412 default: vassert(0);
2413 }
2414 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
2415 res, argL, argR, size, False));
2416 return res;
2417 }
2418 case Iop_Max8Ux8:
2419 case Iop_Max16Ux4:
2420 case Iop_Max32Ux2: {
2421 HReg res = newVRegD(env);
2422 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2423 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2424 UInt size;
2425 switch (e->Iex.Binop.op) {
2426 case Iop_Max8Ux8: size = 0; break;
2427 case Iop_Max16Ux4: size = 1; break;
2428 case Iop_Max32Ux2: size = 2; break;
2429 default: vassert(0);
2430 }
2431 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
2432 res, argL, argR, size, False));
2433 return res;
2434 }
2435 case Iop_Max8Sx8:
2436 case Iop_Max16Sx4:
2437 case Iop_Max32Sx2: {
2438 HReg res = newVRegD(env);
2439 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2440 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2441 UInt size;
2442 switch (e->Iex.Binop.op) {
2443 case Iop_Max8Sx8: size = 0; break;
2444 case Iop_Max16Sx4: size = 1; break;
2445 case Iop_Max32Sx2: size = 2; break;
2446 default: vassert(0);
2447 }
2448 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
2449 res, argL, argR, size, False));
2450 return res;
2451 }
2452 case Iop_Min8Ux8:
2453 case Iop_Min16Ux4:
2454 case Iop_Min32Ux2: {
2455 HReg res = newVRegD(env);
2456 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2457 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2458 UInt size;
2459 switch (e->Iex.Binop.op) {
2460 case Iop_Min8Ux8: size = 0; break;
2461 case Iop_Min16Ux4: size = 1; break;
2462 case Iop_Min32Ux2: size = 2; break;
2463 default: vassert(0);
2464 }
2465 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
2466 res, argL, argR, size, False));
2467 return res;
2468 }
2469 case Iop_Min8Sx8:
2470 case Iop_Min16Sx4:
2471 case Iop_Min32Sx2: {
2472 HReg res = newVRegD(env);
2473 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2474 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2475 UInt size;
2476 switch (e->Iex.Binop.op) {
2477 case Iop_Min8Sx8: size = 0; break;
2478 case Iop_Min16Sx4: size = 1; break;
2479 case Iop_Min32Sx2: size = 2; break;
2480 default: vassert(0);
2481 }
2482 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
2483 res, argL, argR, size, False));
2484 return res;
2485 }
2486 case Iop_Sar8x8:
2487 case Iop_Sar16x4:
2488 case Iop_Sar32x2: {
2489 HReg res = newVRegD(env);
2490 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2491 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2492 HReg argR2 = newVRegD(env);
2493 HReg zero = newVRegD(env);
2494 UInt size;
2495 switch (e->Iex.Binop.op) {
2496 case Iop_Sar8x8: size = 0; break;
2497 case Iop_Sar16x4: size = 1; break;
2498 case Iop_Sar32x2: size = 2; break;
2499 case Iop_Sar64: size = 3; break;
2500 default: vassert(0);
2501 }
2502 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2503 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2504 argR2, zero, argR, size, False));
2505 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2506 res, argL, argR2, size, False));
2507 return res;
2508 }
2509 case Iop_Sal8x8:
2510 case Iop_Sal16x4:
2511 case Iop_Sal32x2:
2512 case Iop_Sal64x1: {
2513 HReg res = newVRegD(env);
2514 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2515 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2516 UInt size;
2517 switch (e->Iex.Binop.op) {
2518 case Iop_Sal8x8: size = 0; break;
2519 case Iop_Sal16x4: size = 1; break;
2520 case Iop_Sal32x2: size = 2; break;
2521 case Iop_Sal64x1: size = 3; break;
2522 default: vassert(0);
2523 }
2524 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2525 res, argL, argR, size, False));
2526 return res;
2527 }
2528 case Iop_Shr8x8:
2529 case Iop_Shr16x4:
2530 case Iop_Shr32x2: {
2531 HReg res = newVRegD(env);
2532 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2533 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2534 HReg argR2 = newVRegD(env);
2535 HReg zero = newVRegD(env);
2536 UInt size;
2537 switch (e->Iex.Binop.op) {
2538 case Iop_Shr8x8: size = 0; break;
2539 case Iop_Shr16x4: size = 1; break;
2540 case Iop_Shr32x2: size = 2; break;
2541 default: vassert(0);
2542 }
2543 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2544 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2545 argR2, zero, argR, size, False));
2546 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2547 res, argL, argR2, size, False));
2548 return res;
2549 }
2550 case Iop_Shl8x8:
2551 case Iop_Shl16x4:
2552 case Iop_Shl32x2: {
2553 HReg res = newVRegD(env);
2554 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2555 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2556 UInt size;
2557 switch (e->Iex.Binop.op) {
2558 case Iop_Shl8x8: size = 0; break;
2559 case Iop_Shl16x4: size = 1; break;
2560 case Iop_Shl32x2: size = 2; break;
2561 default: vassert(0);
2562 }
2563 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2564 res, argL, argR, size, False));
2565 return res;
2566 }
2567 case Iop_QShl8x8:
2568 case Iop_QShl16x4:
2569 case Iop_QShl32x2:
2570 case Iop_QShl64x1: {
2571 HReg res = newVRegD(env);
2572 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2573 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2574 UInt size;
2575 switch (e->Iex.Binop.op) {
2576 case Iop_QShl8x8: size = 0; break;
2577 case Iop_QShl16x4: size = 1; break;
2578 case Iop_QShl32x2: size = 2; break;
2579 case Iop_QShl64x1: size = 3; break;
2580 default: vassert(0);
2581 }
2582 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
2583 res, argL, argR, size, False));
2584 return res;
2585 }
2586 case Iop_QSal8x8:
2587 case Iop_QSal16x4:
2588 case Iop_QSal32x2:
2589 case Iop_QSal64x1: {
2590 HReg res = newVRegD(env);
2591 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2592 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2593 UInt size;
2594 switch (e->Iex.Binop.op) {
2595 case Iop_QSal8x8: size = 0; break;
2596 case Iop_QSal16x4: size = 1; break;
2597 case Iop_QSal32x2: size = 2; break;
2598 case Iop_QSal64x1: size = 3; break;
2599 default: vassert(0);
2600 }
2601 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
2602 res, argL, argR, size, False));
2603 return res;
2604 }
2605 case Iop_QShlN8x8:
2606 case Iop_QShlN16x4:
2607 case Iop_QShlN32x2:
2608 case Iop_QShlN64x1: {
2609 HReg res = newVRegD(env);
2610 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2611 UInt size, imm;
2612 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2613 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2614 vpanic("ARM taget supports Iop_QShlNAxB with constant "
2615 "second argument only\n");
2616 }
2617 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2618 switch (e->Iex.Binop.op) {
2619 case Iop_QShlN8x8: size = 8 | imm; break;
2620 case Iop_QShlN16x4: size = 16 | imm; break;
2621 case Iop_QShlN32x2: size = 32 | imm; break;
2622 case Iop_QShlN64x1: size = 64 | imm; break;
2623 default: vassert(0);
2624 }
2625 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
2626 res, argL, size, False));
2627 return res;
2628 }
2629 case Iop_QShlN8Sx8:
2630 case Iop_QShlN16Sx4:
2631 case Iop_QShlN32Sx2:
2632 case Iop_QShlN64Sx1: {
2633 HReg res = newVRegD(env);
2634 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2635 UInt size, imm;
2636 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2637 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2638 vpanic("ARM taget supports Iop_QShlNAxB with constant "
2639 "second argument only\n");
2640 }
2641 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2642 switch (e->Iex.Binop.op) {
2643 case Iop_QShlN8Sx8: size = 8 | imm; break;
2644 case Iop_QShlN16Sx4: size = 16 | imm; break;
2645 case Iop_QShlN32Sx2: size = 32 | imm; break;
2646 case Iop_QShlN64Sx1: size = 64 | imm; break;
2647 default: vassert(0);
2648 }
2649 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
2650 res, argL, size, False));
2651 return res;
2652 }
2653 case Iop_QSalN8x8:
2654 case Iop_QSalN16x4:
2655 case Iop_QSalN32x2:
2656 case Iop_QSalN64x1: {
2657 HReg res = newVRegD(env);
2658 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2659 UInt size, imm;
2660 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2661 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2662 vpanic("ARM taget supports Iop_QShlNAxB with constant "
2663 "second argument only\n");
2664 }
2665 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2666 switch (e->Iex.Binop.op) {
2667 case Iop_QSalN8x8: size = 8 | imm; break;
2668 case Iop_QSalN16x4: size = 16 | imm; break;
2669 case Iop_QSalN32x2: size = 32 | imm; break;
2670 case Iop_QSalN64x1: size = 64 | imm; break;
2671 default: vassert(0);
2672 }
2673 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
2674 res, argL, size, False));
2675 return res;
2676 }
2677 case Iop_ShrN8x8:
2678 case Iop_ShrN16x4:
2679 case Iop_ShrN32x2:
2680 case Iop_Shr64: {
2681 HReg res = newVRegD(env);
2682 HReg tmp = newVRegD(env);
2683 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2684 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2685 HReg argR2 = newVRegI(env);
2686 UInt size;
2687 switch (e->Iex.Binop.op) {
2688 case Iop_ShrN8x8: size = 0; break;
2689 case Iop_ShrN16x4: size = 1; break;
2690 case Iop_ShrN32x2: size = 2; break;
2691 case Iop_Shr64: size = 3; break;
2692 default: vassert(0);
2693 }
2694 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2695 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2696 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2697 res, argL, tmp, size, False));
2698 return res;
2699 }
2700 case Iop_ShlN8x8:
2701 case Iop_ShlN16x4:
2702 case Iop_ShlN32x2:
2703 case Iop_Shl64: {
2704 HReg res = newVRegD(env);
2705 HReg tmp = newVRegD(env);
2706 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2707 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2708 UInt size;
2709 switch (e->Iex.Binop.op) {
2710 case Iop_ShlN8x8: size = 0; break;
2711 case Iop_ShlN16x4: size = 1; break;
2712 case Iop_ShlN32x2: size = 2; break;
2713 case Iop_Shl64: size = 3; break;
2714 default: vassert(0);
2715 }
2716 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, False));
2717 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2718 res, argL, tmp, size, False));
2719 return res;
2720 }
2721 case Iop_SarN8x8:
2722 case Iop_SarN16x4:
2723 case Iop_SarN32x2:
2724 case Iop_Sar64: {
2725 HReg res = newVRegD(env);
2726 HReg tmp = newVRegD(env);
2727 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2728 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2729 HReg argR2 = newVRegI(env);
2730 UInt size;
2731 switch (e->Iex.Binop.op) {
2732 case Iop_SarN8x8: size = 0; break;
2733 case Iop_SarN16x4: size = 1; break;
2734 case Iop_SarN32x2: size = 2; break;
2735 case Iop_Sar64: size = 3; break;
2736 default: vassert(0);
2737 }
2738 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2739 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2740 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2741 res, argL, tmp, size, False));
2742 return res;
2743 }
2744 case Iop_CmpGT8Ux8:
2745 case Iop_CmpGT16Ux4:
2746 case Iop_CmpGT32Ux2: {
2747 HReg res = newVRegD(env);
2748 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2749 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2750 UInt size;
2751 switch (e->Iex.Binop.op) {
2752 case Iop_CmpGT8Ux8: size = 0; break;
2753 case Iop_CmpGT16Ux4: size = 1; break;
2754 case Iop_CmpGT32Ux2: size = 2; break;
2755 default: vassert(0);
2756 }
2757 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
2758 res, argL, argR, size, False));
2759 return res;
2760 }
2761 case Iop_CmpGT8Sx8:
2762 case Iop_CmpGT16Sx4:
2763 case Iop_CmpGT32Sx2: {
2764 HReg res = newVRegD(env);
2765 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2766 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2767 UInt size;
2768 switch (e->Iex.Binop.op) {
2769 case Iop_CmpGT8Sx8: size = 0; break;
2770 case Iop_CmpGT16Sx4: size = 1; break;
2771 case Iop_CmpGT32Sx2: size = 2; break;
2772 default: vassert(0);
2773 }
2774 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
2775 res, argL, argR, size, False));
2776 return res;
2777 }
2778 case Iop_CmpEQ8x8:
2779 case Iop_CmpEQ16x4:
2780 case Iop_CmpEQ32x2: {
2781 HReg res = newVRegD(env);
2782 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2783 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2784 UInt size;
2785 switch (e->Iex.Binop.op) {
2786 case Iop_CmpEQ8x8: size = 0; break;
2787 case Iop_CmpEQ16x4: size = 1; break;
2788 case Iop_CmpEQ32x2: size = 2; break;
2789 default: vassert(0);
2790 }
2791 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
2792 res, argL, argR, size, False));
2793 return res;
2794 }
2795 case Iop_Mul8x8:
2796 case Iop_Mul16x4:
2797 case Iop_Mul32x2: {
2798 HReg res = newVRegD(env);
2799 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2800 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2801 UInt size = 0;
2802 switch(e->Iex.Binop.op) {
2803 case Iop_Mul8x8: size = 0; break;
2804 case Iop_Mul16x4: size = 1; break;
2805 case Iop_Mul32x2: size = 2; break;
2806 default: vassert(0);
2807 }
2808 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
2809 res, argL, argR, size, False));
2810 return res;
2811 }
2812 case Iop_Mul32Fx2: {
2813 HReg res = newVRegD(env);
2814 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2815 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2816 UInt size = 0;
2817 addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
2818 res, argL, argR, size, False));
2819 return res;
2820 }
2821 case Iop_QDMulHi16Sx4:
2822 case Iop_QDMulHi32Sx2: {
2823 HReg res = newVRegD(env);
2824 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2825 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2826 UInt size = 0;
2827 switch(e->Iex.Binop.op) {
2828 case Iop_QDMulHi16Sx4: size = 1; break;
2829 case Iop_QDMulHi32Sx2: size = 2; break;
2830 default: vassert(0);
2831 }
2832 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
2833 res, argL, argR, size, False));
2834 return res;
2835 }
2836
2837 case Iop_QRDMulHi16Sx4:
2838 case Iop_QRDMulHi32Sx2: {
2839 HReg res = newVRegD(env);
2840 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2841 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2842 UInt size = 0;
2843 switch(e->Iex.Binop.op) {
2844 case Iop_QRDMulHi16Sx4: size = 1; break;
2845 case Iop_QRDMulHi32Sx2: size = 2; break;
2846 default: vassert(0);
2847 }
2848 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
2849 res, argL, argR, size, False));
2850 return res;
2851 }
2852
2853 case Iop_PwAdd8x8:
2854 case Iop_PwAdd16x4:
2855 case Iop_PwAdd32x2: {
2856 HReg res = newVRegD(env);
2857 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2858 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2859 UInt size = 0;
2860 switch(e->Iex.Binop.op) {
2861 case Iop_PwAdd8x8: size = 0; break;
2862 case Iop_PwAdd16x4: size = 1; break;
2863 case Iop_PwAdd32x2: size = 2; break;
2864 default: vassert(0);
2865 }
2866 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
2867 res, argL, argR, size, False));
2868 return res;
2869 }
2870 case Iop_PwAdd32Fx2: {
2871 HReg res = newVRegD(env);
2872 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2873 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2874 UInt size = 0;
2875 addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
2876 res, argL, argR, size, False));
2877 return res;
2878 }
2879 case Iop_PwMin8Ux8:
2880 case Iop_PwMin16Ux4:
2881 case Iop_PwMin32Ux2: {
2882 HReg res = newVRegD(env);
2883 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2884 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2885 UInt size = 0;
2886 switch(e->Iex.Binop.op) {
2887 case Iop_PwMin8Ux8: size = 0; break;
2888 case Iop_PwMin16Ux4: size = 1; break;
2889 case Iop_PwMin32Ux2: size = 2; break;
2890 default: vassert(0);
2891 }
2892 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
2893 res, argL, argR, size, False));
2894 return res;
2895 }
2896 case Iop_PwMin8Sx8:
2897 case Iop_PwMin16Sx4:
2898 case Iop_PwMin32Sx2: {
2899 HReg res = newVRegD(env);
2900 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2901 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2902 UInt size = 0;
2903 switch(e->Iex.Binop.op) {
2904 case Iop_PwMin8Sx8: size = 0; break;
2905 case Iop_PwMin16Sx4: size = 1; break;
2906 case Iop_PwMin32Sx2: size = 2; break;
2907 default: vassert(0);
2908 }
2909 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
2910 res, argL, argR, size, False));
2911 return res;
2912 }
2913 case Iop_PwMax8Ux8:
2914 case Iop_PwMax16Ux4:
2915 case Iop_PwMax32Ux2: {
2916 HReg res = newVRegD(env);
2917 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2918 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2919 UInt size = 0;
2920 switch(e->Iex.Binop.op) {
2921 case Iop_PwMax8Ux8: size = 0; break;
2922 case Iop_PwMax16Ux4: size = 1; break;
2923 case Iop_PwMax32Ux2: size = 2; break;
2924 default: vassert(0);
2925 }
2926 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
2927 res, argL, argR, size, False));
2928 return res;
2929 }
2930 case Iop_PwMax8Sx8:
2931 case Iop_PwMax16Sx4:
2932 case Iop_PwMax32Sx2: {
2933 HReg res = newVRegD(env);
2934 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2935 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2936 UInt size = 0;
2937 switch(e->Iex.Binop.op) {
2938 case Iop_PwMax8Sx8: size = 0; break;
2939 case Iop_PwMax16Sx4: size = 1; break;
2940 case Iop_PwMax32Sx2: size = 2; break;
2941 default: vassert(0);
2942 }
2943 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
2944 res, argL, argR, size, False));
2945 return res;
2946 }
2947 case Iop_Perm8x8: {
2948 HReg res = newVRegD(env);
2949 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2950 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2951 addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
2952 res, argL, argR, 0, False));
2953 return res;
2954 }
2955 case Iop_PolynomialMul8x8: {
2956 HReg res = newVRegD(env);
2957 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2958 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2959 UInt size = 0;
2960 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
2961 res, argL, argR, size, False));
2962 return res;
2963 }
2964 case Iop_Max32Fx2: {
2965 HReg res = newVRegD(env);
2966 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2967 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2968 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
2969 res, argL, argR, 2, False));
2970 return res;
2971 }
2972 case Iop_Min32Fx2: {
2973 HReg res = newVRegD(env);
2974 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2975 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2976 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
2977 res, argL, argR, 2, False));
2978 return res;
2979 }
2980 case Iop_PwMax32Fx2: {
2981 HReg res = newVRegD(env);
2982 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2983 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2984 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
2985 res, argL, argR, 2, False));
2986 return res;
2987 }
2988 case Iop_PwMin32Fx2: {
2989 HReg res = newVRegD(env);
2990 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2991 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2992 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
2993 res, argL, argR, 2, False));
2994 return res;
2995 }
2996 case Iop_CmpGT32Fx2: {
2997 HReg res = newVRegD(env);
2998 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2999 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3000 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
3001 res, argL, argR, 2, False));
3002 return res;
3003 }
3004 case Iop_CmpGE32Fx2: {
3005 HReg res = newVRegD(env);
3006 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3007 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3008 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
3009 res, argL, argR, 2, False));
3010 return res;
3011 }
3012 case Iop_CmpEQ32Fx2: {
3013 HReg res = newVRegD(env);
3014 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3015 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3016 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3017 res, argL, argR, 2, False));
3018 return res;
3019 }
3020 case Iop_F32ToFixed32Ux2_RZ:
3021 case Iop_F32ToFixed32Sx2_RZ:
3022 case Iop_Fixed32UToF32x2_RN:
3023 case Iop_Fixed32SToF32x2_RN: {
3024 HReg res = newVRegD(env);
3025 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3026 ARMNeonUnOp op;
3027 UInt imm6;
3028 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3029 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3030 vpanic("ARM supports FP <-> Fixed conversion with constant "
3031 "second argument less than 33 only\n");
3032 }
3033 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3034 vassert(imm6 <= 32 && imm6 > 0);
3035 imm6 = 64 - imm6;
3036 switch(e->Iex.Binop.op) {
3037 case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3038 case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3039 case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3040 case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3041 default: vassert(0);
3042 }
3043 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3044 return res;
3045 }
3046 /*
3047 FIXME: is this here or not?
3048 case Iop_VDup8x8:
3049 case Iop_VDup16x4:
3050 case Iop_VDup32x2: {
3051 HReg res = newVRegD(env);
3052 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3053 UInt index;
3054 UInt imm4;
3055 UInt size = 0;
3056 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3057 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3058 vpanic("ARM supports Iop_VDup with constant "
3059 "second argument less than 16 only\n");
3060 }
3061 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3062 switch(e->Iex.Binop.op) {
3063 case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3064 case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3065 case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3066 default: vassert(0);
3067 }
3068 if (imm4 >= 16) {
3069 vpanic("ARM supports Iop_VDup with constant "
3070 "second argument less than 16 only\n");
3071 }
3072 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3073 res, argL, imm4, False));
3074 return res;
3075 }
3076 */
3077 default:
3078 break;
3079 }
3080 }
3081
3082 /* --------- UNARY ops --------- */
3083 if (e->tag == Iex_Unop) {
3084 switch (e->Iex.Unop.op) {
3085
3086 /* ReinterpF64asI64 */
3087 case Iop_ReinterpF64asI64:
3088 /* Left64(e) */
3089 case Iop_Left64:
3090 /* CmpwNEZ64(e) */
3091 //case Iop_CmpwNEZ64:
3092 case Iop_1Sto64: {
3093 HReg rLo, rHi;
3094 HReg res = newVRegD(env);
3095 iselInt64Expr(&rHi, &rLo, env, e);
3096 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3097 return res;
3098 }
3099 case Iop_Not64: {
3100 DECLARE_PATTERN(p_veqz_8x8);
3101 DECLARE_PATTERN(p_veqz_16x4);
3102 DECLARE_PATTERN(p_veqz_32x2);
3103 DECLARE_PATTERN(p_vcge_8sx8);
3104 DECLARE_PATTERN(p_vcge_16sx4);
3105 DECLARE_PATTERN(p_vcge_32sx2);
3106 DECLARE_PATTERN(p_vcge_8ux8);
3107 DECLARE_PATTERN(p_vcge_16ux4);
3108 DECLARE_PATTERN(p_vcge_32ux2);
3109 DEFINE_PATTERN(p_veqz_8x8,
3110 unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3111 DEFINE_PATTERN(p_veqz_16x4,
3112 unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3113 DEFINE_PATTERN(p_veqz_32x2,
3114 unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3115 DEFINE_PATTERN(p_vcge_8sx8,
3116 unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3117 DEFINE_PATTERN(p_vcge_16sx4,
3118 unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3119 DEFINE_PATTERN(p_vcge_32sx2,
3120 unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3121 DEFINE_PATTERN(p_vcge_8ux8,
3122 unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3123 DEFINE_PATTERN(p_vcge_16ux4,
3124 unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3125 DEFINE_PATTERN(p_vcge_32ux2,
3126 unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3127 if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3128 HReg res = newVRegD(env);
3129 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3130 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3131 return res;
3132 } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3133 HReg res = newVRegD(env);
3134 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3135 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3136 return res;
3137 } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3138 HReg res = newVRegD(env);
3139 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3140 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3141 return res;
3142 } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3143 HReg res = newVRegD(env);
3144 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3145 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3146 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3147 res, argL, argR, 0, False));
3148 return res;
3149 } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3150 HReg res = newVRegD(env);
3151 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3152 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3153 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3154 res, argL, argR, 1, False));
3155 return res;
3156 } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3157 HReg res = newVRegD(env);
3158 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3159 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3160 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3161 res, argL, argR, 2, False));
3162 return res;
3163 } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3164 HReg res = newVRegD(env);
3165 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3166 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3167 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3168 res, argL, argR, 0, False));
3169 return res;
3170 } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3171 HReg res = newVRegD(env);
3172 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3173 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3174 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3175 res, argL, argR, 1, False));
3176 return res;
3177 } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3178 HReg res = newVRegD(env);
3179 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3180 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3181 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3182 res, argL, argR, 2, False));
3183 return res;
3184 } else {
3185 HReg res = newVRegD(env);
3186 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3187 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3188 return res;
3189 }
3190 }
3191 case Iop_Dup8x8:
3192 case Iop_Dup16x4:
3193 case Iop_Dup32x2: {
3194 HReg res, arg;
3195 UInt size;
3196 DECLARE_PATTERN(p_vdup_8x8);
3197 DECLARE_PATTERN(p_vdup_16x4);
3198 DECLARE_PATTERN(p_vdup_32x2);
3199 DEFINE_PATTERN(p_vdup_8x8,
3200 unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3201 DEFINE_PATTERN(p_vdup_16x4,
3202 unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3203 DEFINE_PATTERN(p_vdup_32x2,
3204 unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3205 if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3206 UInt index;
3207 UInt imm4;
3208 if (mi.bindee[1]->tag == Iex_Const &&
3209 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3210 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3211 imm4 = (index << 1) + 1;
3212 if (index < 8) {
3213 res = newVRegD(env);
3214 arg = iselNeon64Expr(env, mi.bindee[0]);
3215 addInstr(env, ARMInstr_NUnaryS(
3216 ARMneon_VDUP,
3217 mkARMNRS(ARMNRS_Reg, res, 0),
3218 mkARMNRS(ARMNRS_Scalar, arg, index),
3219 imm4, False
3220 ));
3221 return res;
3222 }
3223 }
3224 } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3225 UInt index;
3226 UInt imm4;
3227 if (mi.bindee[1]->tag == Iex_Const &&
3228 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3229 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3230 imm4 = (index << 2) + 2;
3231 if (index < 4) {
3232 res = newVRegD(env);
3233 arg = iselNeon64Expr(env, mi.bindee[0]);
3234 addInstr(env, ARMInstr_NUnaryS(
3235 ARMneon_VDUP,
3236 mkARMNRS(ARMNRS_Reg, res, 0),
3237 mkARMNRS(ARMNRS_Scalar, arg, index),
3238 imm4, False
3239 ));
3240 return res;
3241 }
3242 }
3243 } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3244 UInt index;
3245 UInt imm4;
3246 if (mi.bindee[1]->tag == Iex_Const &&
3247 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3248 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3249 imm4 = (index << 3) + 4;
3250 if (index < 2) {
3251 res = newVRegD(env);
3252 arg = iselNeon64Expr(env, mi.bindee[0]);
3253 addInstr(env, ARMInstr_NUnaryS(
3254 ARMneon_VDUP,
3255 mkARMNRS(ARMNRS_Reg, res, 0),
3256 mkARMNRS(ARMNRS_Scalar, arg, index),
3257 imm4, False
3258 ));
3259 return res;
3260 }
3261 }
3262 }
3263 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3264 res = newVRegD(env);
3265 switch (e->Iex.Unop.op) {
3266 case Iop_Dup8x8: size = 0; break;
3267 case Iop_Dup16x4: size = 1; break;
3268 case Iop_Dup32x2: size = 2; break;
3269 default: vassert(0);
3270 }
3271 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3272 return res;
3273 }
3274 case Iop_Abs8x8:
3275 case Iop_Abs16x4:
3276 case Iop_Abs32x2: {
3277 HReg res = newVRegD(env);
3278 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3279 UInt size = 0;
3280 switch(e->Iex.Binop.op) {
3281 case Iop_Abs8x8: size = 0; break;
3282 case Iop_Abs16x4: size = 1; break;
3283 case Iop_Abs32x2: size = 2; break;
3284 default: vassert(0);
3285 }
3286 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3287 return res;
3288 }
3289 case Iop_Reverse64_8x8:
3290 case Iop_Reverse64_16x4:
3291 case Iop_Reverse64_32x2: {
3292 HReg res = newVRegD(env);
3293 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3294 UInt size = 0;
3295 switch(e->Iex.Binop.op) {
3296 case Iop_Reverse64_8x8: size = 0; break;
3297 case Iop_Reverse64_16x4: size = 1; break;
3298 case Iop_Reverse64_32x2: size = 2; break;
3299 default: vassert(0);
3300 }
3301 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3302 res, arg, size, False));
3303 return res;
3304 }
3305 case Iop_Reverse32_8x8:
3306 case Iop_Reverse32_16x4: {
3307 HReg res = newVRegD(env);
3308 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3309 UInt size = 0;
3310 switch(e->Iex.Binop.op) {
3311 case Iop_Reverse32_8x8: size = 0; break;
3312 case Iop_Reverse32_16x4: size = 1; break;
3313 default: vassert(0);
3314 }
3315 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3316 res, arg, size, False));
3317 return res;
3318 }
3319 case Iop_Reverse16_8x8: {
3320 HReg res = newVRegD(env);
3321 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3322 UInt size = 0;
3323 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3324 res, arg, size, False));
3325 return res;
3326 }
3327 case Iop_CmpwNEZ64: {
3328 HReg x_lsh = newVRegD(env);
3329 HReg x_rsh = newVRegD(env);
3330 HReg lsh_amt = newVRegD(env);
3331 HReg rsh_amt = newVRegD(env);
3332 HReg zero = newVRegD(env);
3333 HReg tmp = newVRegD(env);
3334 HReg tmp2 = newVRegD(env);
3335 HReg res = newVRegD(env);
3336 HReg x = newVRegD(env);
3337 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3338 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3339 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3340 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3341 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3342 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3343 rsh_amt, zero, lsh_amt, 2, False));
3344 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3345 x_lsh, x, lsh_amt, 3, False));
3346 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3347 x_rsh, x, rsh_amt, 3, False));
3348 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3349 tmp, x_lsh, x_rsh, 0, False));
3350 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3351 res, tmp, x, 0, False));
3352 return res;
3353 }
3354 case Iop_CmpNEZ8x8:
3355 case Iop_CmpNEZ16x4:
3356 case Iop_CmpNEZ32x2: {
3357 HReg res = newVRegD(env);
3358 HReg tmp = newVRegD(env);
3359 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3360 UInt size;
3361 switch (e->Iex.Unop.op) {
3362 case Iop_CmpNEZ8x8: size = 0; break;
3363 case Iop_CmpNEZ16x4: size = 1; break;
3364 case Iop_CmpNEZ32x2: size = 2; break;
3365 default: vassert(0);
3366 }
3367 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
3368 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
3369 return res;
3370 }
sewardj5f438dd2011-06-16 11:36:23 +00003371 case Iop_NarrowUn16to8x8:
3372 case Iop_NarrowUn32to16x4:
3373 case Iop_NarrowUn64to32x2: {
sewardj6c60b322010-08-22 12:48:28 +00003374 HReg res = newVRegD(env);
3375 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3376 UInt size = 0;
3377 switch(e->Iex.Binop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003378 case Iop_NarrowUn16to8x8: size = 0; break;
3379 case Iop_NarrowUn32to16x4: size = 1; break;
3380 case Iop_NarrowUn64to32x2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003381 default: vassert(0);
3382 }
3383 addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
3384 res, arg, size, False));
3385 return res;
3386 }
sewardj5f438dd2011-06-16 11:36:23 +00003387 case Iop_QNarrowUn16Sto8Sx8:
3388 case Iop_QNarrowUn32Sto16Sx4:
3389 case Iop_QNarrowUn64Sto32Sx2: {
sewardj6c60b322010-08-22 12:48:28 +00003390 HReg res = newVRegD(env);
3391 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3392 UInt size = 0;
3393 switch(e->Iex.Binop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003394 case Iop_QNarrowUn16Sto8Sx8: size = 0; break;
3395 case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
3396 case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003397 default: vassert(0);
3398 }
3399 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
3400 res, arg, size, False));
3401 return res;
3402 }
sewardj5f438dd2011-06-16 11:36:23 +00003403 case Iop_QNarrowUn16Sto8Ux8:
3404 case Iop_QNarrowUn32Sto16Ux4:
3405 case Iop_QNarrowUn64Sto32Ux2: {
sewardj6c60b322010-08-22 12:48:28 +00003406 HReg res = newVRegD(env);
3407 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3408 UInt size = 0;
3409 switch(e->Iex.Binop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003410 case Iop_QNarrowUn16Sto8Ux8: size = 0; break;
3411 case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
3412 case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003413 default: vassert(0);
3414 }
3415 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
3416 res, arg, size, False));
3417 return res;
3418 }
sewardj5f438dd2011-06-16 11:36:23 +00003419 case Iop_QNarrowUn16Uto8Ux8:
3420 case Iop_QNarrowUn32Uto16Ux4:
3421 case Iop_QNarrowUn64Uto32Ux2: {
sewardj6c60b322010-08-22 12:48:28 +00003422 HReg res = newVRegD(env);
3423 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3424 UInt size = 0;
3425 switch(e->Iex.Binop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003426 case Iop_QNarrowUn16Uto8Ux8: size = 0; break;
3427 case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
3428 case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003429 default: vassert(0);
3430 }
3431 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
3432 res, arg, size, False));
3433 return res;
3434 }
3435 case Iop_PwAddL8Sx8:
3436 case Iop_PwAddL16Sx4:
3437 case Iop_PwAddL32Sx2: {
3438 HReg res = newVRegD(env);
3439 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3440 UInt size = 0;
3441 switch(e->Iex.Binop.op) {
3442 case Iop_PwAddL8Sx8: size = 0; break;
3443 case Iop_PwAddL16Sx4: size = 1; break;
3444 case Iop_PwAddL32Sx2: size = 2; break;
3445 default: vassert(0);
3446 }
3447 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
3448 res, arg, size, False));
3449 return res;
3450 }
3451 case Iop_PwAddL8Ux8:
3452 case Iop_PwAddL16Ux4:
3453 case Iop_PwAddL32Ux2: {
3454 HReg res = newVRegD(env);
3455 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3456 UInt size = 0;
3457 switch(e->Iex.Binop.op) {
3458 case Iop_PwAddL8Ux8: size = 0; break;
3459 case Iop_PwAddL16Ux4: size = 1; break;
3460 case Iop_PwAddL32Ux2: size = 2; break;
3461 default: vassert(0);
3462 }
3463 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
3464 res, arg, size, False));
3465 return res;
3466 }
3467 case Iop_Cnt8x8: {
3468 HReg res = newVRegD(env);
3469 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3470 UInt size = 0;
3471 addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
3472 res, arg, size, False));
3473 return res;
3474 }
3475 case Iop_Clz8Sx8:
3476 case Iop_Clz16Sx4:
3477 case Iop_Clz32Sx2: {
3478 HReg res = newVRegD(env);
3479 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3480 UInt size = 0;
3481 switch(e->Iex.Binop.op) {
3482 case Iop_Clz8Sx8: size = 0; break;
3483 case Iop_Clz16Sx4: size = 1; break;
3484 case Iop_Clz32Sx2: size = 2; break;
3485 default: vassert(0);
3486 }
3487 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
3488 res, arg, size, False));
3489 return res;
3490 }
3491 case Iop_Cls8Sx8:
3492 case Iop_Cls16Sx4:
3493 case Iop_Cls32Sx2: {
3494 HReg res = newVRegD(env);
3495 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3496 UInt size = 0;
3497 switch(e->Iex.Binop.op) {
3498 case Iop_Cls8Sx8: size = 0; break;
3499 case Iop_Cls16Sx4: size = 1; break;
3500 case Iop_Cls32Sx2: size = 2; break;
3501 default: vassert(0);
3502 }
3503 addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
3504 res, arg, size, False));
3505 return res;
3506 }
3507 case Iop_FtoI32Sx2_RZ: {
3508 HReg res = newVRegD(env);
3509 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3510 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
3511 res, arg, 2, False));
3512 return res;
3513 }
3514 case Iop_FtoI32Ux2_RZ: {
3515 HReg res = newVRegD(env);
3516 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3517 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
3518 res, arg, 2, False));
3519 return res;
3520 }
3521 case Iop_I32StoFx2: {
3522 HReg res = newVRegD(env);
3523 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3524 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
3525 res, arg, 2, False));
3526 return res;
3527 }
3528 case Iop_I32UtoFx2: {
3529 HReg res = newVRegD(env);
3530 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3531 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
3532 res, arg, 2, False));
3533 return res;
3534 }
3535 case Iop_F32toF16x4: {
3536 HReg res = newVRegD(env);
3537 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3538 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
3539 res, arg, 2, False));
3540 return res;
3541 }
3542 case Iop_Recip32Fx2: {
3543 HReg res = newVRegD(env);
3544 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3545 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
3546 res, argL, 0, False));
3547 return res;
3548 }
3549 case Iop_Recip32x2: {
3550 HReg res = newVRegD(env);
3551 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3552 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
3553 res, argL, 0, False));
3554 return res;
3555 }
3556 case Iop_Abs32Fx2: {
3557 DECLARE_PATTERN(p_vabd_32fx2);
3558 DEFINE_PATTERN(p_vabd_32fx2,
3559 unop(Iop_Abs32Fx2,
3560 binop(Iop_Sub32Fx2,
3561 bind(0),
3562 bind(1))));
3563 if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
3564 HReg res = newVRegD(env);
3565 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3566 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3567 addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
3568 res, argL, argR, 0, False));
3569 return res;
3570 } else {
3571 HReg res = newVRegD(env);
3572 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3573 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
3574 res, arg, 0, False));
3575 return res;
3576 }
3577 }
3578 case Iop_Rsqrte32Fx2: {
3579 HReg res = newVRegD(env);
3580 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3581 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
3582 res, arg, 0, False));
3583 return res;
3584 }
3585 case Iop_Rsqrte32x2: {
3586 HReg res = newVRegD(env);
3587 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3588 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
3589 res, arg, 0, False));
3590 return res;
3591 }
3592 case Iop_Neg32Fx2: {
3593 HReg res = newVRegD(env);
3594 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3595 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
3596 res, arg, 0, False));
3597 return res;
3598 }
3599 default:
3600 break;
3601 }
3602 } /* if (e->tag == Iex_Unop) */
3603
3604 if (e->tag == Iex_Triop) {
florian420bfa92012-06-02 20:29:22 +00003605 IRTriop *triop = e->Iex.Triop.details;
3606
3607 switch (triop->op) {
sewardj6c60b322010-08-22 12:48:28 +00003608 case Iop_Extract64: {
3609 HReg res = newVRegD(env);
florian420bfa92012-06-02 20:29:22 +00003610 HReg argL = iselNeon64Expr(env, triop->arg1);
3611 HReg argR = iselNeon64Expr(env, triop->arg2);
sewardj6c60b322010-08-22 12:48:28 +00003612 UInt imm4;
florian420bfa92012-06-02 20:29:22 +00003613 if (triop->arg3->tag != Iex_Const ||
3614 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
sewardj6c60b322010-08-22 12:48:28 +00003615 vpanic("ARM target supports Iop_Extract64 with constant "
3616 "third argument less than 16 only\n");
3617 }
florian420bfa92012-06-02 20:29:22 +00003618 imm4 = triop->arg3->Iex.Const.con->Ico.U8;
sewardj6c60b322010-08-22 12:48:28 +00003619 if (imm4 >= 8) {
3620 vpanic("ARM target supports Iop_Extract64 with constant "
3621 "third argument less than 16 only\n");
3622 }
3623 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
3624 res, argL, argR, imm4, False));
3625 return res;
3626 }
3627 case Iop_SetElem8x8:
3628 case Iop_SetElem16x4:
3629 case Iop_SetElem32x2: {
3630 HReg res = newVRegD(env);
florian420bfa92012-06-02 20:29:22 +00003631 HReg dreg = iselNeon64Expr(env, triop->arg1);
3632 HReg arg = iselIntExpr_R(env, triop->arg3);
sewardj6c60b322010-08-22 12:48:28 +00003633 UInt index, size;
florian420bfa92012-06-02 20:29:22 +00003634 if (triop->arg2->tag != Iex_Const ||
3635 typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) {
sewardj6c60b322010-08-22 12:48:28 +00003636 vpanic("ARM target supports SetElem with constant "
3637 "second argument only\n");
3638 }
florian420bfa92012-06-02 20:29:22 +00003639 index = triop->arg2->Iex.Const.con->Ico.U8;
3640 switch (triop->op) {
sewardj6c60b322010-08-22 12:48:28 +00003641 case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
3642 case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
3643 case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
3644 default: vassert(0);
3645 }
3646 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
3647 addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
3648 mkARMNRS(ARMNRS_Scalar, res, index),
3649 mkARMNRS(ARMNRS_Reg, arg, 0),
3650 size, False));
3651 return res;
3652 }
3653 default:
3654 break;
3655 }
3656 }
3657
3658 /* --------- MULTIPLEX --------- */
3659 if (e->tag == Iex_Mux0X) {
3660 HReg rLo, rHi;
3661 HReg res = newVRegD(env);
3662 iselInt64Expr(&rHi, &rLo, env, e);
3663 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3664 return res;
3665 }
3666
3667 ppIRExpr(e);
3668 vpanic("iselNeon64Expr");
3669}
3670
3671static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e )
3672{
3673 HReg r = iselNeonExpr_wrk( env, e );
3674 vassert(hregClass(r) == HRcVec128);
3675 vassert(hregIsVirtual(r));
3676 return r;
3677}
3678
3679/* DO NOT CALL THIS DIRECTLY */
3680static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e )
3681{
3682 IRType ty = typeOfIRExpr(env->type_env, e);
3683 MatchInfo mi;
3684 vassert(e);
3685 vassert(ty == Ity_V128);
3686
3687 if (e->tag == Iex_RdTmp) {
3688 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3689 }
3690
3691 if (e->tag == Iex_Const) {
3692 /* At the moment there should be no 128-bit constants in IR for ARM
3693 generated during disassemble. They are represented as Iop_64HLtoV128
3694 binary operation and are handled among binary ops. */
3695 /* But zero can be created by valgrind internal optimizer */
3696 if (e->Iex.Const.con->Ico.V128 == 0) {
3697 HReg res = newVRegV(env);
3698 addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(0, 0)));
3699 return res;
3700 }
3701 ppIRExpr(e);
3702 vpanic("128-bit constant is not implemented");
3703 }
3704
3705 if (e->tag == Iex_Load) {
3706 HReg res = newVRegV(env);
3707 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
3708 vassert(ty == Ity_V128);
3709 addInstr(env, ARMInstr_NLdStQ(True, res, am));
3710 return res;
3711 }
3712
3713 if (e->tag == Iex_Get) {
3714 HReg addr = newVRegI(env);
3715 HReg res = newVRegV(env);
3716 vassert(ty == Ity_V128);
3717 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
3718 addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
3719 return res;
3720 }
3721
3722 if (e->tag == Iex_Unop) {
3723 switch (e->Iex.Unop.op) {
3724 case Iop_NotV128: {
3725 DECLARE_PATTERN(p_veqz_8x16);
3726 DECLARE_PATTERN(p_veqz_16x8);
3727 DECLARE_PATTERN(p_veqz_32x4);
3728 DECLARE_PATTERN(p_vcge_8sx16);
3729 DECLARE_PATTERN(p_vcge_16sx8);
3730 DECLARE_PATTERN(p_vcge_32sx4);
3731 DECLARE_PATTERN(p_vcge_8ux16);
3732 DECLARE_PATTERN(p_vcge_16ux8);
3733 DECLARE_PATTERN(p_vcge_32ux4);
3734 DEFINE_PATTERN(p_veqz_8x16,
3735 unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
3736 DEFINE_PATTERN(p_veqz_16x8,
3737 unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
3738 DEFINE_PATTERN(p_veqz_32x4,
3739 unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
3740 DEFINE_PATTERN(p_vcge_8sx16,
3741 unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
3742 DEFINE_PATTERN(p_vcge_16sx8,
3743 unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
3744 DEFINE_PATTERN(p_vcge_32sx4,
3745 unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
3746 DEFINE_PATTERN(p_vcge_8ux16,
3747 unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
3748 DEFINE_PATTERN(p_vcge_16ux8,
3749 unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
3750 DEFINE_PATTERN(p_vcge_32ux4,
3751 unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
3752 if (matchIRExpr(&mi, p_veqz_8x16, e)) {
3753 HReg res = newVRegV(env);
3754 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3755 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
3756 return res;
3757 } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
3758 HReg res = newVRegV(env);
3759 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3760 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
3761 return res;
3762 } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
3763 HReg res = newVRegV(env);
3764 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3765 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
3766 return res;
3767 } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
3768 HReg res = newVRegV(env);
3769 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3770 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3771 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3772 res, argL, argR, 0, True));
3773 return res;
3774 } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
3775 HReg res = newVRegV(env);
3776 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3777 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3778 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3779 res, argL, argR, 1, True));
3780 return res;
3781 } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
3782 HReg res = newVRegV(env);
3783 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3784 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3785 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3786 res, argL, argR, 2, True));
3787 return res;
3788 } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
3789 HReg res = newVRegV(env);
3790 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3791 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3792 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3793 res, argL, argR, 0, True));
3794 return res;
3795 } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
3796 HReg res = newVRegV(env);
3797 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3798 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3799 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3800 res, argL, argR, 1, True));
3801 return res;
3802 } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
3803 HReg res = newVRegV(env);
3804 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3805 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3806 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3807 res, argL, argR, 2, True));
3808 return res;
3809 } else {
3810 HReg res = newVRegV(env);
3811 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3812 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
3813 return res;
3814 }
3815 }
3816 case Iop_Dup8x16:
3817 case Iop_Dup16x8:
3818 case Iop_Dup32x4: {
3819 HReg res, arg;
3820 UInt size;
3821 DECLARE_PATTERN(p_vdup_8x16);
3822 DECLARE_PATTERN(p_vdup_16x8);
3823 DECLARE_PATTERN(p_vdup_32x4);
3824 DEFINE_PATTERN(p_vdup_8x16,
3825 unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
3826 DEFINE_PATTERN(p_vdup_16x8,
3827 unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
3828 DEFINE_PATTERN(p_vdup_32x4,
3829 unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
3830 if (matchIRExpr(&mi, p_vdup_8x16, e)) {
3831 UInt index;
3832 UInt imm4;
3833 if (mi.bindee[1]->tag == Iex_Const &&
3834 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3835 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3836 imm4 = (index << 1) + 1;
3837 if (index < 8) {
3838 res = newVRegV(env);
3839 arg = iselNeon64Expr(env, mi.bindee[0]);
3840 addInstr(env, ARMInstr_NUnaryS(
3841 ARMneon_VDUP,
3842 mkARMNRS(ARMNRS_Reg, res, 0),
3843 mkARMNRS(ARMNRS_Scalar, arg, index),
3844 imm4, True
3845 ));
3846 return res;
3847 }
3848 }
3849 } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
3850 UInt index;
3851 UInt imm4;
3852 if (mi.bindee[1]->tag == Iex_Const &&
3853 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3854 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3855 imm4 = (index << 2) + 2;
3856 if (index < 4) {
3857 res = newVRegV(env);
3858 arg = iselNeon64Expr(env, mi.bindee[0]);
3859 addInstr(env, ARMInstr_NUnaryS(
3860 ARMneon_VDUP,
3861 mkARMNRS(ARMNRS_Reg, res, 0),
3862 mkARMNRS(ARMNRS_Scalar, arg, index),
3863 imm4, True
3864 ));
3865 return res;
3866 }
3867 }
3868 } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
3869 UInt index;
3870 UInt imm4;
3871 if (mi.bindee[1]->tag == Iex_Const &&
3872 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3873 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3874 imm4 = (index << 3) + 4;
3875 if (index < 2) {
3876 res = newVRegV(env);
3877 arg = iselNeon64Expr(env, mi.bindee[0]);
3878 addInstr(env, ARMInstr_NUnaryS(
3879 ARMneon_VDUP,
3880 mkARMNRS(ARMNRS_Reg, res, 0),
3881 mkARMNRS(ARMNRS_Scalar, arg, index),
3882 imm4, True
3883 ));
3884 return res;
3885 }
3886 }
3887 }
3888 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3889 res = newVRegV(env);
3890 switch (e->Iex.Unop.op) {
3891 case Iop_Dup8x16: size = 0; break;
3892 case Iop_Dup16x8: size = 1; break;
3893 case Iop_Dup32x4: size = 2; break;
3894 default: vassert(0);
3895 }
3896 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
3897 return res;
3898 }
3899 case Iop_Abs8x16:
3900 case Iop_Abs16x8:
3901 case Iop_Abs32x4: {
3902 HReg res = newVRegV(env);
3903 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3904 UInt size = 0;
3905 switch(e->Iex.Binop.op) {
3906 case Iop_Abs8x16: size = 0; break;
3907 case Iop_Abs16x8: size = 1; break;
3908 case Iop_Abs32x4: size = 2; break;
3909 default: vassert(0);
3910 }
3911 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
3912 return res;
3913 }
3914 case Iop_Reverse64_8x16:
3915 case Iop_Reverse64_16x8:
3916 case Iop_Reverse64_32x4: {
3917 HReg res = newVRegV(env);
3918 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3919 UInt size = 0;
3920 switch(e->Iex.Binop.op) {
3921 case Iop_Reverse64_8x16: size = 0; break;
3922 case Iop_Reverse64_16x8: size = 1; break;
3923 case Iop_Reverse64_32x4: size = 2; break;
3924 default: vassert(0);
3925 }
3926 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3927 res, arg, size, True));
3928 return res;
3929 }
3930 case Iop_Reverse32_8x16:
3931 case Iop_Reverse32_16x8: {
3932 HReg res = newVRegV(env);
3933 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3934 UInt size = 0;
3935 switch(e->Iex.Binop.op) {
3936 case Iop_Reverse32_8x16: size = 0; break;
3937 case Iop_Reverse32_16x8: size = 1; break;
3938 default: vassert(0);
3939 }
3940 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3941 res, arg, size, True));
3942 return res;
3943 }
3944 case Iop_Reverse16_8x16: {
3945 HReg res = newVRegV(env);
3946 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3947 UInt size = 0;
3948 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3949 res, arg, size, True));
3950 return res;
3951 }
3952 case Iop_CmpNEZ64x2: {
3953 HReg x_lsh = newVRegV(env);
3954 HReg x_rsh = newVRegV(env);
3955 HReg lsh_amt = newVRegV(env);
3956 HReg rsh_amt = newVRegV(env);
3957 HReg zero = newVRegV(env);
3958 HReg tmp = newVRegV(env);
3959 HReg tmp2 = newVRegV(env);
3960 HReg res = newVRegV(env);
3961 HReg x = newVRegV(env);
3962 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3963 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
3964 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
3965 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3966 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3967 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3968 rsh_amt, zero, lsh_amt, 2, True));
3969 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3970 x_lsh, x, lsh_amt, 3, True));
3971 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3972 x_rsh, x, rsh_amt, 3, True));
3973 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3974 tmp, x_lsh, x_rsh, 0, True));
3975 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3976 res, tmp, x, 0, True));
3977 return res;
3978 }
3979 case Iop_CmpNEZ8x16:
3980 case Iop_CmpNEZ16x8:
3981 case Iop_CmpNEZ32x4: {
3982 HReg res = newVRegV(env);
3983 HReg tmp = newVRegV(env);
3984 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3985 UInt size;
3986 switch (e->Iex.Unop.op) {
3987 case Iop_CmpNEZ8x16: size = 0; break;
3988 case Iop_CmpNEZ16x8: size = 1; break;
3989 case Iop_CmpNEZ32x4: size = 2; break;
3990 default: vassert(0);
3991 }
3992 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
3993 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
3994 return res;
3995 }
sewardj5f438dd2011-06-16 11:36:23 +00003996 case Iop_Widen8Uto16x8:
3997 case Iop_Widen16Uto32x4:
3998 case Iop_Widen32Uto64x2: {
sewardj6c60b322010-08-22 12:48:28 +00003999 HReg res = newVRegV(env);
4000 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4001 UInt size;
4002 switch (e->Iex.Unop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00004003 case Iop_Widen8Uto16x8: size = 0; break;
4004 case Iop_Widen16Uto32x4: size = 1; break;
4005 case Iop_Widen32Uto64x2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00004006 default: vassert(0);
4007 }
4008 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
4009 res, arg, size, True));
4010 return res;
4011 }
sewardj5f438dd2011-06-16 11:36:23 +00004012 case Iop_Widen8Sto16x8:
4013 case Iop_Widen16Sto32x4:
4014 case Iop_Widen32Sto64x2: {
sewardj6c60b322010-08-22 12:48:28 +00004015 HReg res = newVRegV(env);
4016 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4017 UInt size;
4018 switch (e->Iex.Unop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00004019 case Iop_Widen8Sto16x8: size = 0; break;
4020 case Iop_Widen16Sto32x4: size = 1; break;
4021 case Iop_Widen32Sto64x2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00004022 default: vassert(0);
4023 }
4024 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4025 res, arg, size, True));
4026 return res;
4027 }
4028 case Iop_PwAddL8Sx16:
4029 case Iop_PwAddL16Sx8:
4030 case Iop_PwAddL32Sx4: {
4031 HReg res = newVRegV(env);
4032 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4033 UInt size = 0;
4034 switch(e->Iex.Binop.op) {
4035 case Iop_PwAddL8Sx16: size = 0; break;
4036 case Iop_PwAddL16Sx8: size = 1; break;
4037 case Iop_PwAddL32Sx4: size = 2; break;
4038 default: vassert(0);
4039 }
4040 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4041 res, arg, size, True));
4042 return res;
4043 }
4044 case Iop_PwAddL8Ux16:
4045 case Iop_PwAddL16Ux8:
4046 case Iop_PwAddL32Ux4: {
4047 HReg res = newVRegV(env);
4048 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4049 UInt size = 0;
4050 switch(e->Iex.Binop.op) {
4051 case Iop_PwAddL8Ux16: size = 0; break;
4052 case Iop_PwAddL16Ux8: size = 1; break;
4053 case Iop_PwAddL32Ux4: size = 2; break;
4054 default: vassert(0);
4055 }
4056 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4057 res, arg, size, True));
4058 return res;
4059 }
4060 case Iop_Cnt8x16: {
4061 HReg res = newVRegV(env);
4062 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4063 UInt size = 0;
4064 addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4065 return res;
4066 }
4067 case Iop_Clz8Sx16:
4068 case Iop_Clz16Sx8:
4069 case Iop_Clz32Sx4: {
4070 HReg res = newVRegV(env);
4071 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4072 UInt size = 0;
4073 switch(e->Iex.Binop.op) {
4074 case Iop_Clz8Sx16: size = 0; break;
4075 case Iop_Clz16Sx8: size = 1; break;
4076 case Iop_Clz32Sx4: size = 2; break;
4077 default: vassert(0);
4078 }
4079 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4080 return res;
4081 }
4082 case Iop_Cls8Sx16:
4083 case Iop_Cls16Sx8:
4084 case Iop_Cls32Sx4: {
4085 HReg res = newVRegV(env);
4086 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4087 UInt size = 0;
4088 switch(e->Iex.Binop.op) {
4089 case Iop_Cls8Sx16: size = 0; break;
4090 case Iop_Cls16Sx8: size = 1; break;
4091 case Iop_Cls32Sx4: size = 2; break;
4092 default: vassert(0);
4093 }
4094 addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4095 return res;
4096 }
4097 case Iop_FtoI32Sx4_RZ: {
4098 HReg res = newVRegV(env);
4099 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4100 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4101 res, arg, 2, True));
4102 return res;
4103 }
4104 case Iop_FtoI32Ux4_RZ: {
4105 HReg res = newVRegV(env);
4106 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4107 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4108 res, arg, 2, True));
4109 return res;
4110 }
4111 case Iop_I32StoFx4: {
4112 HReg res = newVRegV(env);
4113 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4114 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4115 res, arg, 2, True));
4116 return res;
4117 }
4118 case Iop_I32UtoFx4: {
4119 HReg res = newVRegV(env);
4120 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4121 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4122 res, arg, 2, True));
4123 return res;
4124 }
4125 case Iop_F16toF32x4: {
4126 HReg res = newVRegV(env);
4127 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4128 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4129 res, arg, 2, True));
4130 return res;
4131 }
4132 case Iop_Recip32Fx4: {
4133 HReg res = newVRegV(env);
4134 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4135 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4136 res, argL, 0, True));
4137 return res;
4138 }
4139 case Iop_Recip32x4: {
4140 HReg res = newVRegV(env);
4141 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4142 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4143 res, argL, 0, True));
4144 return res;
4145 }
4146 case Iop_Abs32Fx4: {
4147 DECLARE_PATTERN(p_vabd_32fx4);
4148 DEFINE_PATTERN(p_vabd_32fx4,
4149 unop(Iop_Abs32Fx4,
4150 binop(Iop_Sub32Fx4,
4151 bind(0),
4152 bind(1))));
4153 if (matchIRExpr(&mi, p_vabd_32fx4, e)) {
4154 HReg res = newVRegV(env);
4155 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4156 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4157 addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
4158 res, argL, argR, 0, True));
4159 return res;
4160 } else {
4161 HReg res = newVRegV(env);
4162 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4163 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4164 res, argL, 0, True));
4165 return res;
4166 }
4167 }
4168 case Iop_Rsqrte32Fx4: {
4169 HReg res = newVRegV(env);
4170 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4171 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4172 res, argL, 0, True));
4173 return res;
4174 }
4175 case Iop_Rsqrte32x4: {
4176 HReg res = newVRegV(env);
4177 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4178 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4179 res, argL, 0, True));
4180 return res;
4181 }
4182 case Iop_Neg32Fx4: {
4183 HReg res = newVRegV(env);
4184 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4185 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4186 res, arg, 0, True));
4187 return res;
4188 }
4189 /* ... */
4190 default:
4191 break;
4192 }
4193 }
4194
4195 if (e->tag == Iex_Binop) {
4196 switch (e->Iex.Binop.op) {
4197 case Iop_64HLtoV128:
4198 /* Try to match into single "VMOV reg, imm" instruction */
4199 if (e->Iex.Binop.arg1->tag == Iex_Const &&
4200 e->Iex.Binop.arg2->tag == Iex_Const &&
4201 typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
4202 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
4203 e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
4204 e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
4205 ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
4206 ARMNImm *imm = Imm64_to_ARMNImm(imm64);
4207 if (imm) {
4208 HReg res = newVRegV(env);
4209 addInstr(env, ARMInstr_NeonImm(res, imm));
4210 return res;
4211 }
4212 if ((imm64 >> 32) == 0LL &&
4213 (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
4214 HReg tmp1 = newVRegV(env);
4215 HReg tmp2 = newVRegV(env);
4216 HReg res = newVRegV(env);
4217 if (imm->type < 10) {
4218 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
4219 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4220 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4221 res, tmp1, tmp2, 4, True));
4222 return res;
4223 }
4224 }
4225 if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
4226 (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
4227 HReg tmp1 = newVRegV(env);
4228 HReg tmp2 = newVRegV(env);
4229 HReg res = newVRegV(env);
4230 if (imm->type < 10) {
4231 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
4232 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4233 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4234 res, tmp1, tmp2, 4, True));
4235 return res;
4236 }
4237 }
4238 }
sewardj6828dc72011-09-30 08:49:02 +00004239 /* Does not match "VMOV Reg, Imm" form. We'll have to do
4240 it the slow way. */
4241 {
4242 /* local scope */
4243 /* Done via the stack for ease of use. */
4244 /* FIXME: assumes little endian host */
4245 HReg w3, w2, w1, w0;
4246 HReg res = newVRegV(env);
4247 ARMAMode1* sp_0 = ARMAMode1_RI(hregARM_R13(), 0);
4248 ARMAMode1* sp_4 = ARMAMode1_RI(hregARM_R13(), 4);
4249 ARMAMode1* sp_8 = ARMAMode1_RI(hregARM_R13(), 8);
4250 ARMAMode1* sp_12 = ARMAMode1_RI(hregARM_R13(), 12);
4251 ARMRI84* c_16 = ARMRI84_I84(16,0);
4252 /* Make space for SP */
4253 addInstr(env, ARMInstr_Alu(ARMalu_SUB, hregARM_R13(),
4254 hregARM_R13(), c_16));
4255
4256 /* Store the less significant 64 bits */
4257 iselInt64Expr(&w1, &w0, env, e->Iex.Binop.arg2);
4258 addInstr(env, ARMInstr_LdSt32(False/*store*/, w0, sp_0));
4259 addInstr(env, ARMInstr_LdSt32(False/*store*/, w1, sp_4));
4260
4261 /* Store the more significant 64 bits */
4262 iselInt64Expr(&w3, &w2, env, e->Iex.Binop.arg1);
4263 addInstr(env, ARMInstr_LdSt32(False/*store*/, w2, sp_8));
4264 addInstr(env, ARMInstr_LdSt32(False/*store*/, w3, sp_12));
4265
4266 /* Load result back from stack. */
4267 addInstr(env, ARMInstr_NLdStQ(True/*load*/, res,
4268 mkARMAModeN_R(hregARM_R13())));
4269
4270 /* Restore SP */
4271 addInstr(env, ARMInstr_Alu(ARMalu_ADD, hregARM_R13(),
4272 hregARM_R13(), c_16));
4273 return res;
4274 } /* local scope */
sewardj6c60b322010-08-22 12:48:28 +00004275 goto neon_expr_bad;
4276 case Iop_AndV128: {
4277 HReg res = newVRegV(env);
4278 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4279 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4280 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4281 res, argL, argR, 4, True));
4282 return res;
4283 }
4284 case Iop_OrV128: {
4285 HReg res = newVRegV(env);
4286 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4287 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4288 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4289 res, argL, argR, 4, True));
4290 return res;
4291 }
4292 case Iop_XorV128: {
4293 HReg res = newVRegV(env);
4294 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4295 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4296 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4297 res, argL, argR, 4, True));
4298 return res;
4299 }
4300 case Iop_Add8x16:
4301 case Iop_Add16x8:
4302 case Iop_Add32x4:
4303 case Iop_Add64x2: {
4304 /*
4305 FIXME: remove this if not used
4306 DECLARE_PATTERN(p_vrhadd_32sx4);
4307 ULong one = (1LL << 32) | 1LL;
4308 DEFINE_PATTERN(p_vrhadd_32sx4,
4309 binop(Iop_Add32x4,
4310 binop(Iop_Add32x4,
4311 binop(Iop_SarN32x4,
4312 bind(0),
4313 mkU8(1)),
4314 binop(Iop_SarN32x4,
4315 bind(1),
4316 mkU8(1))),
4317 binop(Iop_SarN32x4,
4318 binop(Iop_Add32x4,
4319 binop(Iop_Add32x4,
4320 binop(Iop_AndV128,
4321 bind(0),
4322 mkU128(one)),
4323 binop(Iop_AndV128,
4324 bind(1),
4325 mkU128(one))),
4326 mkU128(one)),
4327 mkU8(1))));
4328 */
4329 HReg res = newVRegV(env);
4330 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4331 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4332 UInt size;
4333 switch (e->Iex.Binop.op) {
4334 case Iop_Add8x16: size = 0; break;
4335 case Iop_Add16x8: size = 1; break;
4336 case Iop_Add32x4: size = 2; break;
4337 case Iop_Add64x2: size = 3; break;
4338 default:
4339 ppIROp(e->Iex.Binop.op);
4340 vpanic("Illegal element size in VADD");
4341 }
4342 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
4343 res, argL, argR, size, True));
4344 return res;
4345 }
4346 case Iop_Add32Fx4: {
4347 HReg res = newVRegV(env);
4348 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4349 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4350 UInt size = 0;
4351 addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
4352 res, argL, argR, size, True));
4353 return res;
4354 }
4355 case Iop_Recps32Fx4: {
4356 HReg res = newVRegV(env);
4357 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4358 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4359 UInt size = 0;
4360 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
4361 res, argL, argR, size, True));
4362 return res;
4363 }
4364 case Iop_Rsqrts32Fx4: {
4365 HReg res = newVRegV(env);
4366 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4367 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4368 UInt size = 0;
4369 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
4370 res, argL, argR, size, True));
4371 return res;
4372 }
4373 case Iop_InterleaveEvenLanes8x16:
4374 case Iop_InterleaveEvenLanes16x8:
4375 case Iop_InterleaveEvenLanes32x4:
4376 case Iop_InterleaveOddLanes8x16:
4377 case Iop_InterleaveOddLanes16x8:
4378 case Iop_InterleaveOddLanes32x4: {
4379 HReg tmp = newVRegV(env);
4380 HReg res = newVRegV(env);
4381 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4382 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4383 UInt size;
4384 UInt is_lo;
4385 switch (e->Iex.Binop.op) {
4386 case Iop_InterleaveEvenLanes8x16: is_lo = 0; size = 0; break;
4387 case Iop_InterleaveOddLanes8x16: is_lo = 1; size = 0; break;
4388 case Iop_InterleaveEvenLanes16x8: is_lo = 0; size = 1; break;
4389 case Iop_InterleaveOddLanes16x8: is_lo = 1; size = 1; break;
4390 case Iop_InterleaveEvenLanes32x4: is_lo = 0; size = 2; break;
4391 case Iop_InterleaveOddLanes32x4: is_lo = 1; size = 2; break;
4392 default:
4393 ppIROp(e->Iex.Binop.op);
4394 vpanic("Illegal element size in VTRN");
4395 }
4396 if (is_lo) {
4397 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4398 tmp, argL, 4, True));
4399 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4400 res, argR, 4, True));
4401 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
4402 res, tmp, size, True));
4403 } else {
4404 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4405 tmp, argR, 4, True));
4406 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4407 res, argL, 4, True));
4408 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
4409 tmp, res, size, True));
4410 }
4411 return res;
4412 }
4413 case Iop_InterleaveHI8x16:
4414 case Iop_InterleaveHI16x8:
4415 case Iop_InterleaveHI32x4:
4416 case Iop_InterleaveLO8x16:
4417 case Iop_InterleaveLO16x8:
4418 case Iop_InterleaveLO32x4: {
4419 HReg tmp = newVRegV(env);
4420 HReg res = newVRegV(env);
4421 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4422 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4423 UInt size;
4424 UInt is_lo;
4425 switch (e->Iex.Binop.op) {
4426 case Iop_InterleaveHI8x16: is_lo = 1; size = 0; break;
4427 case Iop_InterleaveLO8x16: is_lo = 0; size = 0; break;
4428 case Iop_InterleaveHI16x8: is_lo = 1; size = 1; break;
4429 case Iop_InterleaveLO16x8: is_lo = 0; size = 1; break;
4430 case Iop_InterleaveHI32x4: is_lo = 1; size = 2; break;
4431 case Iop_InterleaveLO32x4: is_lo = 0; size = 2; break;
4432 default:
4433 ppIROp(e->Iex.Binop.op);
4434 vpanic("Illegal element size in VZIP");
4435 }
4436 if (is_lo) {
4437 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4438 tmp, argL, 4, True));
4439 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4440 res, argR, 4, True));
4441 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
4442 res, tmp, size, True));
4443 } else {
4444 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4445 tmp, argR, 4, True));
4446 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4447 res, argL, 4, True));
4448 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
4449 tmp, res, size, True));
4450 }
4451 return res;
4452 }
4453 case Iop_CatOddLanes8x16:
4454 case Iop_CatOddLanes16x8:
4455 case Iop_CatOddLanes32x4:
4456 case Iop_CatEvenLanes8x16:
4457 case Iop_CatEvenLanes16x8:
4458 case Iop_CatEvenLanes32x4: {
4459 HReg tmp = newVRegV(env);
4460 HReg res = newVRegV(env);
4461 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4462 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4463 UInt size;
4464 UInt is_lo;
4465 switch (e->Iex.Binop.op) {
4466 case Iop_CatOddLanes8x16: is_lo = 1; size = 0; break;
4467 case Iop_CatEvenLanes8x16: is_lo = 0; size = 0; break;
4468 case Iop_CatOddLanes16x8: is_lo = 1; size = 1; break;
4469 case Iop_CatEvenLanes16x8: is_lo = 0; size = 1; break;
4470 case Iop_CatOddLanes32x4: is_lo = 1; size = 2; break;
4471 case Iop_CatEvenLanes32x4: is_lo = 0; size = 2; break;
4472 default:
4473 ppIROp(e->Iex.Binop.op);
4474 vpanic("Illegal element size in VUZP");
4475 }
4476 if (is_lo) {
4477 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4478 tmp, argL, 4, True));
4479 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4480 res, argR, 4, True));
4481 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
4482 res, tmp, size, True));
4483 } else {
4484 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4485 tmp, argR, 4, True));
4486 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4487 res, argL, 4, True));
4488 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
4489 tmp, res, size, True));
4490 }
4491 return res;
4492 }
4493 case Iop_QAdd8Ux16:
4494 case Iop_QAdd16Ux8:
4495 case Iop_QAdd32Ux4:
4496 case Iop_QAdd64Ux2: {
4497 HReg res = newVRegV(env);
4498 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4499 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4500 UInt size;
4501 switch (e->Iex.Binop.op) {
4502 case Iop_QAdd8Ux16: size = 0; break;
4503 case Iop_QAdd16Ux8: size = 1; break;
4504 case Iop_QAdd32Ux4: size = 2; break;
4505 case Iop_QAdd64Ux2: size = 3; break;
4506 default:
4507 ppIROp(e->Iex.Binop.op);
4508 vpanic("Illegal element size in VQADDU");
4509 }
4510 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
4511 res, argL, argR, size, True));
4512 return res;
4513 }
4514 case Iop_QAdd8Sx16:
4515 case Iop_QAdd16Sx8:
4516 case Iop_QAdd32Sx4:
4517 case Iop_QAdd64Sx2: {
4518 HReg res = newVRegV(env);
4519 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4520 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4521 UInt size;
4522 switch (e->Iex.Binop.op) {
4523 case Iop_QAdd8Sx16: size = 0; break;
4524 case Iop_QAdd16Sx8: size = 1; break;
4525 case Iop_QAdd32Sx4: size = 2; break;
4526 case Iop_QAdd64Sx2: size = 3; break;
4527 default:
4528 ppIROp(e->Iex.Binop.op);
4529 vpanic("Illegal element size in VQADDS");
4530 }
4531 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
4532 res, argL, argR, size, True));
4533 return res;
4534 }
4535 case Iop_Sub8x16:
4536 case Iop_Sub16x8:
4537 case Iop_Sub32x4:
4538 case Iop_Sub64x2: {
4539 HReg res = newVRegV(env);
4540 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4541 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4542 UInt size;
4543 switch (e->Iex.Binop.op) {
4544 case Iop_Sub8x16: size = 0; break;
4545 case Iop_Sub16x8: size = 1; break;
4546 case Iop_Sub32x4: size = 2; break;
4547 case Iop_Sub64x2: size = 3; break;
4548 default:
4549 ppIROp(e->Iex.Binop.op);
4550 vpanic("Illegal element size in VSUB");
4551 }
4552 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4553 res, argL, argR, size, True));
4554 return res;
4555 }
4556 case Iop_Sub32Fx4: {
4557 HReg res = newVRegV(env);
4558 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4559 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4560 UInt size = 0;
4561 addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
4562 res, argL, argR, size, True));
4563 return res;
4564 }
4565 case Iop_QSub8Ux16:
4566 case Iop_QSub16Ux8:
4567 case Iop_QSub32Ux4:
4568 case Iop_QSub64Ux2: {
4569 HReg res = newVRegV(env);
4570 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4571 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4572 UInt size;
4573 switch (e->Iex.Binop.op) {
4574 case Iop_QSub8Ux16: size = 0; break;
4575 case Iop_QSub16Ux8: size = 1; break;
4576 case Iop_QSub32Ux4: size = 2; break;
4577 case Iop_QSub64Ux2: size = 3; break;
4578 default:
4579 ppIROp(e->Iex.Binop.op);
4580 vpanic("Illegal element size in VQSUBU");
4581 }
4582 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
4583 res, argL, argR, size, True));
4584 return res;
4585 }
4586 case Iop_QSub8Sx16:
4587 case Iop_QSub16Sx8:
4588 case Iop_QSub32Sx4:
4589 case Iop_QSub64Sx2: {
4590 HReg res = newVRegV(env);
4591 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4592 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4593 UInt size;
4594 switch (e->Iex.Binop.op) {
4595 case Iop_QSub8Sx16: size = 0; break;
4596 case Iop_QSub16Sx8: size = 1; break;
4597 case Iop_QSub32Sx4: size = 2; break;
4598 case Iop_QSub64Sx2: size = 3; break;
4599 default:
4600 ppIROp(e->Iex.Binop.op);
4601 vpanic("Illegal element size in VQSUBS");
4602 }
4603 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
4604 res, argL, argR, size, True));
4605 return res;
4606 }
4607 case Iop_Max8Ux16:
4608 case Iop_Max16Ux8:
4609 case Iop_Max32Ux4: {
4610 HReg res = newVRegV(env);
4611 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4612 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4613 UInt size;
4614 switch (e->Iex.Binop.op) {
4615 case Iop_Max8Ux16: size = 0; break;
4616 case Iop_Max16Ux8: size = 1; break;
4617 case Iop_Max32Ux4: size = 2; break;
4618 default: vpanic("Illegal element size in VMAXU");
4619 }
4620 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
4621 res, argL, argR, size, True));
4622 return res;
4623 }
4624 case Iop_Max8Sx16:
4625 case Iop_Max16Sx8:
4626 case Iop_Max32Sx4: {
4627 HReg res = newVRegV(env);
4628 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4629 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4630 UInt size;
4631 switch (e->Iex.Binop.op) {
4632 case Iop_Max8Sx16: size = 0; break;
4633 case Iop_Max16Sx8: size = 1; break;
4634 case Iop_Max32Sx4: size = 2; break;
4635 default: vpanic("Illegal element size in VMAXU");
4636 }
4637 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
4638 res, argL, argR, size, True));
4639 return res;
4640 }
4641 case Iop_Min8Ux16:
4642 case Iop_Min16Ux8:
4643 case Iop_Min32Ux4: {
4644 HReg res = newVRegV(env);
4645 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4646 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4647 UInt size;
4648 switch (e->Iex.Binop.op) {
4649 case Iop_Min8Ux16: size = 0; break;
4650 case Iop_Min16Ux8: size = 1; break;
4651 case Iop_Min32Ux4: size = 2; break;
4652 default: vpanic("Illegal element size in VMAXU");
4653 }
4654 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
4655 res, argL, argR, size, True));
4656 return res;
4657 }
4658 case Iop_Min8Sx16:
4659 case Iop_Min16Sx8:
4660 case Iop_Min32Sx4: {
4661 HReg res = newVRegV(env);
4662 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4663 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4664 UInt size;
4665 switch (e->Iex.Binop.op) {
4666 case Iop_Min8Sx16: size = 0; break;
4667 case Iop_Min16Sx8: size = 1; break;
4668 case Iop_Min32Sx4: size = 2; break;
4669 default: vpanic("Illegal element size in VMAXU");
4670 }
4671 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
4672 res, argL, argR, size, True));
4673 return res;
4674 }
4675 case Iop_Sar8x16:
4676 case Iop_Sar16x8:
4677 case Iop_Sar32x4:
4678 case Iop_Sar64x2: {
4679 HReg res = newVRegV(env);
4680 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4681 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4682 HReg argR2 = newVRegV(env);
4683 HReg zero = newVRegV(env);
4684 UInt size;
4685 switch (e->Iex.Binop.op) {
4686 case Iop_Sar8x16: size = 0; break;
4687 case Iop_Sar16x8: size = 1; break;
4688 case Iop_Sar32x4: size = 2; break;
4689 case Iop_Sar64x2: size = 3; break;
4690 default: vassert(0);
4691 }
4692 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4693 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4694 argR2, zero, argR, size, True));
4695 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4696 res, argL, argR2, size, True));
4697 return res;
4698 }
4699 case Iop_Sal8x16:
4700 case Iop_Sal16x8:
4701 case Iop_Sal32x4:
4702 case Iop_Sal64x2: {
4703 HReg res = newVRegV(env);
4704 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4705 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4706 UInt size;
4707 switch (e->Iex.Binop.op) {
4708 case Iop_Sal8x16: size = 0; break;
4709 case Iop_Sal16x8: size = 1; break;
4710 case Iop_Sal32x4: size = 2; break;
4711 case Iop_Sal64x2: size = 3; break;
4712 default: vassert(0);
4713 }
4714 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4715 res, argL, argR, size, True));
4716 return res;
4717 }
4718 case Iop_Shr8x16:
4719 case Iop_Shr16x8:
4720 case Iop_Shr32x4:
4721 case Iop_Shr64x2: {
4722 HReg res = newVRegV(env);
4723 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4724 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4725 HReg argR2 = newVRegV(env);
4726 HReg zero = newVRegV(env);
4727 UInt size;
4728 switch (e->Iex.Binop.op) {
4729 case Iop_Shr8x16: size = 0; break;
4730 case Iop_Shr16x8: size = 1; break;
4731 case Iop_Shr32x4: size = 2; break;
4732 case Iop_Shr64x2: size = 3; break;
4733 default: vassert(0);
4734 }
4735 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4736 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4737 argR2, zero, argR, size, True));
4738 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4739 res, argL, argR2, size, True));
4740 return res;
4741 }
4742 case Iop_Shl8x16:
4743 case Iop_Shl16x8:
4744 case Iop_Shl32x4:
4745 case Iop_Shl64x2: {
4746 HReg res = newVRegV(env);
4747 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4748 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4749 UInt size;
4750 switch (e->Iex.Binop.op) {
4751 case Iop_Shl8x16: size = 0; break;
4752 case Iop_Shl16x8: size = 1; break;
4753 case Iop_Shl32x4: size = 2; break;
4754 case Iop_Shl64x2: size = 3; break;
4755 default: vassert(0);
4756 }
4757 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4758 res, argL, argR, size, True));
4759 return res;
4760 }
4761 case Iop_QShl8x16:
4762 case Iop_QShl16x8:
4763 case Iop_QShl32x4:
4764 case Iop_QShl64x2: {
4765 HReg res = newVRegV(env);
4766 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4767 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4768 UInt size;
4769 switch (e->Iex.Binop.op) {
4770 case Iop_QShl8x16: size = 0; break;
4771 case Iop_QShl16x8: size = 1; break;
4772 case Iop_QShl32x4: size = 2; break;
4773 case Iop_QShl64x2: size = 3; break;
4774 default: vassert(0);
4775 }
4776 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
4777 res, argL, argR, size, True));
4778 return res;
4779 }
4780 case Iop_QSal8x16:
4781 case Iop_QSal16x8:
4782 case Iop_QSal32x4:
4783 case Iop_QSal64x2: {
4784 HReg res = newVRegV(env);
4785 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4786 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4787 UInt size;
4788 switch (e->Iex.Binop.op) {
4789 case Iop_QSal8x16: size = 0; break;
4790 case Iop_QSal16x8: size = 1; break;
4791 case Iop_QSal32x4: size = 2; break;
4792 case Iop_QSal64x2: size = 3; break;
4793 default: vassert(0);
4794 }
4795 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
4796 res, argL, argR, size, True));
4797 return res;
4798 }
4799 case Iop_QShlN8x16:
4800 case Iop_QShlN16x8:
4801 case Iop_QShlN32x4:
4802 case Iop_QShlN64x2: {
4803 HReg res = newVRegV(env);
4804 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4805 UInt size, imm;
4806 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4807 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4808 vpanic("ARM taget supports Iop_QShlNAxB with constant "
4809 "second argument only\n");
4810 }
4811 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4812 switch (e->Iex.Binop.op) {
4813 case Iop_QShlN8x16: size = 8 | imm; break;
4814 case Iop_QShlN16x8: size = 16 | imm; break;
4815 case Iop_QShlN32x4: size = 32 | imm; break;
4816 case Iop_QShlN64x2: size = 64 | imm; break;
4817 default: vassert(0);
4818 }
4819 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
4820 res, argL, size, True));
4821 return res;
4822 }
4823 case Iop_QShlN8Sx16:
4824 case Iop_QShlN16Sx8:
4825 case Iop_QShlN32Sx4:
4826 case Iop_QShlN64Sx2: {
4827 HReg res = newVRegV(env);
4828 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4829 UInt size, imm;
4830 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4831 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4832 vpanic("ARM taget supports Iop_QShlNASxB with constant "
4833 "second argument only\n");
4834 }
4835 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4836 switch (e->Iex.Binop.op) {
4837 case Iop_QShlN8Sx16: size = 8 | imm; break;
4838 case Iop_QShlN16Sx8: size = 16 | imm; break;
4839 case Iop_QShlN32Sx4: size = 32 | imm; break;
4840 case Iop_QShlN64Sx2: size = 64 | imm; break;
4841 default: vassert(0);
4842 }
4843 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
4844 res, argL, size, True));
4845 return res;
4846 }
4847 case Iop_QSalN8x16:
4848 case Iop_QSalN16x8:
4849 case Iop_QSalN32x4:
4850 case Iop_QSalN64x2: {
4851 HReg res = newVRegV(env);
4852 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4853 UInt size, imm;
4854 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4855 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4856 vpanic("ARM taget supports Iop_QShlNAxB with constant "
4857 "second argument only\n");
4858 }
4859 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4860 switch (e->Iex.Binop.op) {
4861 case Iop_QSalN8x16: size = 8 | imm; break;
4862 case Iop_QSalN16x8: size = 16 | imm; break;
4863 case Iop_QSalN32x4: size = 32 | imm; break;
4864 case Iop_QSalN64x2: size = 64 | imm; break;
4865 default: vassert(0);
4866 }
4867 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
4868 res, argL, size, True));
4869 return res;
4870 }
4871 case Iop_ShrN8x16:
4872 case Iop_ShrN16x8:
4873 case Iop_ShrN32x4:
4874 case Iop_ShrN64x2: {
4875 HReg res = newVRegV(env);
4876 HReg tmp = newVRegV(env);
4877 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4878 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4879 HReg argR2 = newVRegI(env);
4880 UInt size;
4881 switch (e->Iex.Binop.op) {
4882 case Iop_ShrN8x16: size = 0; break;
4883 case Iop_ShrN16x8: size = 1; break;
4884 case Iop_ShrN32x4: size = 2; break;
4885 case Iop_ShrN64x2: size = 3; break;
4886 default: vassert(0);
4887 }
4888 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4889 addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
4890 tmp, argR2, 0, True));
4891 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4892 res, argL, tmp, size, True));
4893 return res;
4894 }
4895 case Iop_ShlN8x16:
4896 case Iop_ShlN16x8:
4897 case Iop_ShlN32x4:
4898 case Iop_ShlN64x2: {
4899 HReg res = newVRegV(env);
4900 HReg tmp = newVRegV(env);
4901 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4902 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4903 UInt size;
4904 switch (e->Iex.Binop.op) {
4905 case Iop_ShlN8x16: size = 0; break;
4906 case Iop_ShlN16x8: size = 1; break;
4907 case Iop_ShlN32x4: size = 2; break;
4908 case Iop_ShlN64x2: size = 3; break;
4909 default: vassert(0);
4910 }
4911 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
4912 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4913 res, argL, tmp, size, True));
4914 return res;
4915 }
4916 case Iop_SarN8x16:
4917 case Iop_SarN16x8:
4918 case Iop_SarN32x4:
4919 case Iop_SarN64x2: {
4920 HReg res = newVRegV(env);
4921 HReg tmp = newVRegV(env);
4922 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4923 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4924 HReg argR2 = newVRegI(env);
4925 UInt size;
4926 switch (e->Iex.Binop.op) {
4927 case Iop_SarN8x16: size = 0; break;
4928 case Iop_SarN16x8: size = 1; break;
4929 case Iop_SarN32x4: size = 2; break;
4930 case Iop_SarN64x2: size = 3; break;
4931 default: vassert(0);
4932 }
4933 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4934 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
4935 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4936 res, argL, tmp, size, True));
4937 return res;
4938 }
4939 case Iop_CmpGT8Ux16:
4940 case Iop_CmpGT16Ux8:
4941 case Iop_CmpGT32Ux4: {
4942 HReg res = newVRegV(env);
4943 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4944 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4945 UInt size;
4946 switch (e->Iex.Binop.op) {
4947 case Iop_CmpGT8Ux16: size = 0; break;
4948 case Iop_CmpGT16Ux8: size = 1; break;
4949 case Iop_CmpGT32Ux4: size = 2; break;
4950 default: vassert(0);
4951 }
4952 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
4953 res, argL, argR, size, True));
4954 return res;
4955 }
4956 case Iop_CmpGT8Sx16:
4957 case Iop_CmpGT16Sx8:
4958 case Iop_CmpGT32Sx4: {
4959 HReg res = newVRegV(env);
4960 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4961 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4962 UInt size;
4963 switch (e->Iex.Binop.op) {
4964 case Iop_CmpGT8Sx16: size = 0; break;
4965 case Iop_CmpGT16Sx8: size = 1; break;
4966 case Iop_CmpGT32Sx4: size = 2; break;
4967 default: vassert(0);
4968 }
4969 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
4970 res, argL, argR, size, True));
4971 return res;
4972 }
4973 case Iop_CmpEQ8x16:
4974 case Iop_CmpEQ16x8:
4975 case Iop_CmpEQ32x4: {
4976 HReg res = newVRegV(env);
4977 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4978 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4979 UInt size;
4980 switch (e->Iex.Binop.op) {
4981 case Iop_CmpEQ8x16: size = 0; break;
4982 case Iop_CmpEQ16x8: size = 1; break;
4983 case Iop_CmpEQ32x4: size = 2; break;
4984 default: vassert(0);
4985 }
4986 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
4987 res, argL, argR, size, True));
4988 return res;
4989 }
4990 case Iop_Mul8x16:
4991 case Iop_Mul16x8:
4992 case Iop_Mul32x4: {
4993 HReg res = newVRegV(env);
4994 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4995 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4996 UInt size = 0;
4997 switch(e->Iex.Binop.op) {
4998 case Iop_Mul8x16: size = 0; break;
4999 case Iop_Mul16x8: size = 1; break;
5000 case Iop_Mul32x4: size = 2; break;
5001 default: vassert(0);
5002 }
5003 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
5004 res, argL, argR, size, True));
5005 return res;
5006 }
5007 case Iop_Mul32Fx4: {
5008 HReg res = newVRegV(env);
5009 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5010 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5011 UInt size = 0;
5012 addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
5013 res, argL, argR, size, True));
5014 return res;
5015 }
5016 case Iop_Mull8Ux8:
5017 case Iop_Mull16Ux4:
5018 case Iop_Mull32Ux2: {
5019 HReg res = newVRegV(env);
5020 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5021 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5022 UInt size = 0;
5023 switch(e->Iex.Binop.op) {
5024 case Iop_Mull8Ux8: size = 0; break;
5025 case Iop_Mull16Ux4: size = 1; break;
5026 case Iop_Mull32Ux2: size = 2; break;
5027 default: vassert(0);
5028 }
5029 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
5030 res, argL, argR, size, True));
5031 return res;
5032 }
5033
5034 case Iop_Mull8Sx8:
5035 case Iop_Mull16Sx4:
5036 case Iop_Mull32Sx2: {
5037 HReg res = newVRegV(env);
5038 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5039 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5040 UInt size = 0;
5041 switch(e->Iex.Binop.op) {
5042 case Iop_Mull8Sx8: size = 0; break;
5043 case Iop_Mull16Sx4: size = 1; break;
5044 case Iop_Mull32Sx2: size = 2; break;
5045 default: vassert(0);
5046 }
5047 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
5048 res, argL, argR, size, True));
5049 return res;
5050 }
5051
5052 case Iop_QDMulHi16Sx8:
5053 case Iop_QDMulHi32Sx4: {
5054 HReg res = newVRegV(env);
5055 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5056 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5057 UInt size = 0;
5058 switch(e->Iex.Binop.op) {
5059 case Iop_QDMulHi16Sx8: size = 1; break;
5060 case Iop_QDMulHi32Sx4: size = 2; break;
5061 default: vassert(0);
5062 }
5063 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5064 res, argL, argR, size, True));
5065 return res;
5066 }
5067
5068 case Iop_QRDMulHi16Sx8:
5069 case Iop_QRDMulHi32Sx4: {
5070 HReg res = newVRegV(env);
5071 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5072 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5073 UInt size = 0;
5074 switch(e->Iex.Binop.op) {
5075 case Iop_QRDMulHi16Sx8: size = 1; break;
5076 case Iop_QRDMulHi32Sx4: size = 2; break;
5077 default: vassert(0);
5078 }
5079 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5080 res, argL, argR, size, True));
5081 return res;
5082 }
5083
5084 case Iop_QDMulLong16Sx4:
5085 case Iop_QDMulLong32Sx2: {
5086 HReg res = newVRegV(env);
5087 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5088 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5089 UInt size = 0;
5090 switch(e->Iex.Binop.op) {
5091 case Iop_QDMulLong16Sx4: size = 1; break;
5092 case Iop_QDMulLong32Sx2: size = 2; break;
5093 default: vassert(0);
5094 }
5095 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5096 res, argL, argR, size, True));
5097 return res;
5098 }
5099 case Iop_PolynomialMul8x16: {
5100 HReg res = newVRegV(env);
5101 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5102 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5103 UInt size = 0;
5104 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5105 res, argL, argR, size, True));
5106 return res;
5107 }
5108 case Iop_Max32Fx4: {
5109 HReg res = newVRegV(env);
5110 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5111 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5112 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5113 res, argL, argR, 2, True));
5114 return res;
5115 }
5116 case Iop_Min32Fx4: {
5117 HReg res = newVRegV(env);
5118 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5119 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5120 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5121 res, argL, argR, 2, True));
5122 return res;
5123 }
5124 case Iop_PwMax32Fx4: {
5125 HReg res = newVRegV(env);
5126 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5127 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5128 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5129 res, argL, argR, 2, True));
5130 return res;
5131 }
5132 case Iop_PwMin32Fx4: {
5133 HReg res = newVRegV(env);
5134 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5135 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5136 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5137 res, argL, argR, 2, True));
5138 return res;
5139 }
5140 case Iop_CmpGT32Fx4: {
5141 HReg res = newVRegV(env);
5142 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5143 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5144 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5145 res, argL, argR, 2, True));
5146 return res;
5147 }
5148 case Iop_CmpGE32Fx4: {
5149 HReg res = newVRegV(env);
5150 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5151 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5152 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5153 res, argL, argR, 2, True));
5154 return res;
5155 }
5156 case Iop_CmpEQ32Fx4: {
5157 HReg res = newVRegV(env);
5158 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5159 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5160 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5161 res, argL, argR, 2, True));
5162 return res;
5163 }
5164
5165 case Iop_PolynomialMull8x8: {
5166 HReg res = newVRegV(env);
5167 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5168 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5169 UInt size = 0;
5170 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5171 res, argL, argR, size, True));
5172 return res;
5173 }
5174 case Iop_F32ToFixed32Ux4_RZ:
5175 case Iop_F32ToFixed32Sx4_RZ:
5176 case Iop_Fixed32UToF32x4_RN:
5177 case Iop_Fixed32SToF32x4_RN: {
5178 HReg res = newVRegV(env);
5179 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5180 ARMNeonUnOp op;
5181 UInt imm6;
5182 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5183 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5184 vpanic("ARM supports FP <-> Fixed conversion with constant "
5185 "second argument less than 33 only\n");
5186 }
5187 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5188 vassert(imm6 <= 32 && imm6 > 0);
5189 imm6 = 64 - imm6;
5190 switch(e->Iex.Binop.op) {
5191 case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5192 case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5193 case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5194 case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5195 default: vassert(0);
5196 }
5197 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5198 return res;
5199 }
5200 /*
5201 FIXME remove if not used
5202 case Iop_VDup8x16:
5203 case Iop_VDup16x8:
5204 case Iop_VDup32x4: {
5205 HReg res = newVRegV(env);
5206 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5207 UInt imm4;
5208 UInt index;
5209 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5210 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5211 vpanic("ARM supports Iop_VDup with constant "
5212 "second argument less than 16 only\n");
5213 }
5214 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5215 switch(e->Iex.Binop.op) {
5216 case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5217 case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5218 case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5219 default: vassert(0);
5220 }
5221 if (imm4 >= 16) {
5222 vpanic("ARM supports Iop_VDup with constant "
5223 "second argument less than 16 only\n");
5224 }
5225 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5226 res, argL, imm4, True));
5227 return res;
5228 }
5229 */
5230 case Iop_PwAdd8x16:
5231 case Iop_PwAdd16x8:
5232 case Iop_PwAdd32x4: {
5233 HReg res = newVRegV(env);
5234 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5235 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5236 UInt size = 0;
5237 switch(e->Iex.Binop.op) {
5238 case Iop_PwAdd8x16: size = 0; break;
5239 case Iop_PwAdd16x8: size = 1; break;
5240 case Iop_PwAdd32x4: size = 2; break;
5241 default: vassert(0);
5242 }
5243 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5244 res, argL, argR, size, True));
5245 return res;
5246 }
5247 /* ... */
5248 default:
5249 break;
5250 }
5251 }
5252
5253 if (e->tag == Iex_Triop) {
florian420bfa92012-06-02 20:29:22 +00005254 IRTriop *triop = e->Iex.Triop.details;
5255
5256 switch (triop->op) {
sewardj6c60b322010-08-22 12:48:28 +00005257 case Iop_ExtractV128: {
5258 HReg res = newVRegV(env);
florian420bfa92012-06-02 20:29:22 +00005259 HReg argL = iselNeonExpr(env, triop->arg1);
5260 HReg argR = iselNeonExpr(env, triop->arg2);
sewardj6c60b322010-08-22 12:48:28 +00005261 UInt imm4;
florian420bfa92012-06-02 20:29:22 +00005262 if (triop->arg3->tag != Iex_Const ||
5263 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
sewardj6c60b322010-08-22 12:48:28 +00005264 vpanic("ARM target supports Iop_ExtractV128 with constant "
5265 "third argument less than 16 only\n");
5266 }
florian420bfa92012-06-02 20:29:22 +00005267 imm4 = triop->arg3->Iex.Const.con->Ico.U8;
sewardj6c60b322010-08-22 12:48:28 +00005268 if (imm4 >= 16) {
5269 vpanic("ARM target supports Iop_ExtractV128 with constant "
5270 "third argument less than 16 only\n");
5271 }
5272 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5273 res, argL, argR, imm4, True));
5274 return res;
5275 }
5276 default:
5277 break;
5278 }
5279 }
5280
5281 if (e->tag == Iex_Mux0X) {
5282 HReg r8;
5283 HReg rX = iselNeonExpr(env, e->Iex.Mux0X.exprX);
5284 HReg r0 = iselNeonExpr(env, e->Iex.Mux0X.expr0);
5285 HReg dst = newVRegV(env);
5286 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, rX, 4, True));
5287 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5288 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5289 ARMRI84_I84(0xFF,0)));
5290 addInstr(env, ARMInstr_NCMovQ(ARMcc_EQ, dst, r0));
5291 return dst;
5292 }
5293
5294 neon_expr_bad:
5295 ppIRExpr(e);
5296 vpanic("iselNeonExpr_wrk");
5297}
5298
5299/*---------------------------------------------------------*/
sewardj6c299f32009-12-31 18:00:12 +00005300/*--- ISEL: Floating point expressions (64 bit) ---*/
5301/*---------------------------------------------------------*/
5302
5303/* Compute a 64-bit floating point value into a register, the identity
5304 of which is returned. As with iselIntExpr_R, the reg may be either
5305 real or virtual; in any case it must not be changed by subsequent
5306 code emitted by the caller. */
5307
5308static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5309{
5310 HReg r = iselDblExpr_wrk( env, e );
5311# if 0
5312 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5313# endif
5314 vassert(hregClass(r) == HRcFlt64);
5315 vassert(hregIsVirtual(r));
5316 return r;
5317}
5318
5319/* DO NOT CALL THIS DIRECTLY */
5320static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5321{
5322 IRType ty = typeOfIRExpr(env->type_env,e);
5323 vassert(e);
5324 vassert(ty == Ity_F64);
5325
5326 if (e->tag == Iex_RdTmp) {
5327 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5328 }
5329
5330 if (e->tag == Iex_Const) {
5331 /* Just handle the zero case. */
5332 IRConst* con = e->Iex.Const.con;
5333 if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
5334 HReg z32 = newVRegI(env);
5335 HReg dst = newVRegD(env);
5336 addInstr(env, ARMInstr_Imm32(z32, 0));
5337 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
5338 return dst;
5339 }
5340 }
5341
5342 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5343 ARMAModeV* am;
5344 HReg res = newVRegD(env);
5345 vassert(e->Iex.Load.ty == Ity_F64);
5346 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5347 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5348 return res;
5349 }
5350
5351 if (e->tag == Iex_Get) {
5352 // XXX This won't work if offset > 1020 or is not 0 % 4.
5353 // In which case we'll have to generate more longwinded code.
5354 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5355 HReg res = newVRegD(env);
5356 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5357 return res;
5358 }
5359
5360 if (e->tag == Iex_Unop) {
5361 switch (e->Iex.Unop.op) {
5362 case Iop_ReinterpI64asF64: {
sewardjc6f970f2012-04-02 21:54:49 +00005363 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00005364 return iselNeon64Expr(env, e->Iex.Unop.arg);
5365 } else {
5366 HReg srcHi, srcLo;
5367 HReg dst = newVRegD(env);
5368 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
5369 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
5370 return dst;
5371 }
sewardj6c299f32009-12-31 18:00:12 +00005372 }
5373 case Iop_NegF64: {
5374 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5375 HReg dst = newVRegD(env);
5376 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
5377 return dst;
5378 }
5379 case Iop_AbsF64: {
5380 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5381 HReg dst = newVRegD(env);
5382 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
5383 return dst;
5384 }
5385 case Iop_F32toF64: {
5386 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5387 HReg dst = newVRegD(env);
5388 addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
5389 return dst;
5390 }
5391 case Iop_I32UtoF64:
5392 case Iop_I32StoF64: {
5393 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5394 HReg f32 = newVRegF(env);
5395 HReg dst = newVRegD(env);
5396 Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
5397 /* VMOV f32, src */
5398 addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
5399 /* FSITOD dst, f32 */
5400 addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
5401 dst, f32));
5402 return dst;
5403 }
5404 default:
5405 break;
5406 }
5407 }
5408
5409 if (e->tag == Iex_Binop) {
5410 switch (e->Iex.Binop.op) {
5411 case Iop_SqrtF64: {
5412 /* first arg is rounding mode; we ignore it. */
5413 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5414 HReg dst = newVRegD(env);
5415 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
5416 return dst;
5417 }
5418 default:
5419 break;
5420 }
5421 }
5422
5423 if (e->tag == Iex_Triop) {
florian420bfa92012-06-02 20:29:22 +00005424 IRTriop *triop = e->Iex.Triop.details;
5425
5426 switch (triop->op) {
sewardj6c299f32009-12-31 18:00:12 +00005427 case Iop_DivF64:
5428 case Iop_MulF64:
5429 case Iop_AddF64:
5430 case Iop_SubF64: {
5431 ARMVfpOp op = 0; /*INVALID*/
florian420bfa92012-06-02 20:29:22 +00005432 HReg argL = iselDblExpr(env, triop->arg2);
5433 HReg argR = iselDblExpr(env, triop->arg3);
sewardj6c299f32009-12-31 18:00:12 +00005434 HReg dst = newVRegD(env);
florian420bfa92012-06-02 20:29:22 +00005435 switch (triop->op) {
sewardj6c299f32009-12-31 18:00:12 +00005436 case Iop_DivF64: op = ARMvfp_DIV; break;
5437 case Iop_MulF64: op = ARMvfp_MUL; break;
5438 case Iop_AddF64: op = ARMvfp_ADD; break;
5439 case Iop_SubF64: op = ARMvfp_SUB; break;
5440 default: vassert(0);
5441 }
5442 addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
5443 return dst;
5444 }
5445 default:
5446 break;
5447 }
5448 }
5449
5450 if (e->tag == Iex_Mux0X) {
5451 if (ty == Ity_F64
5452 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
5453 HReg r8;
5454 HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
5455 HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
5456 HReg dst = newVRegD(env);
5457 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, rX));
5458 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5459 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5460 ARMRI84_I84(0xFF,0)));
5461 addInstr(env, ARMInstr_VCMovD(ARMcc_EQ, dst, r0));
5462 return dst;
5463 }
5464 }
5465
5466 ppIRExpr(e);
5467 vpanic("iselDblExpr_wrk");
5468}
5469
5470
5471/*---------------------------------------------------------*/
5472/*--- ISEL: Floating point expressions (32 bit) ---*/
5473/*---------------------------------------------------------*/
5474
5475/* Compute a 64-bit floating point value into a register, the identity
5476 of which is returned. As with iselIntExpr_R, the reg may be either
5477 real or virtual; in any case it must not be changed by subsequent
5478 code emitted by the caller. */
5479
5480static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
5481{
5482 HReg r = iselFltExpr_wrk( env, e );
5483# if 0
5484 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5485# endif
5486 vassert(hregClass(r) == HRcFlt32);
5487 vassert(hregIsVirtual(r));
5488 return r;
5489}
5490
5491/* DO NOT CALL THIS DIRECTLY */
5492static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
5493{
5494 IRType ty = typeOfIRExpr(env->type_env,e);
5495 vassert(e);
5496 vassert(ty == Ity_F32);
5497
5498 if (e->tag == Iex_RdTmp) {
5499 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5500 }
5501
5502 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5503 ARMAModeV* am;
5504 HReg res = newVRegF(env);
5505 vassert(e->Iex.Load.ty == Ity_F32);
5506 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5507 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5508 return res;
5509 }
5510
5511 if (e->tag == Iex_Get) {
5512 // XXX This won't work if offset > 1020 or is not 0 % 4.
5513 // In which case we'll have to generate more longwinded code.
5514 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5515 HReg res = newVRegF(env);
5516 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5517 return res;
5518 }
5519
5520 if (e->tag == Iex_Unop) {
5521 switch (e->Iex.Unop.op) {
5522 case Iop_ReinterpI32asF32: {
5523 HReg dst = newVRegF(env);
5524 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5525 addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
5526 return dst;
5527 }
5528 case Iop_NegF32: {
5529 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5530 HReg dst = newVRegF(env);
5531 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
5532 return dst;
5533 }
5534 case Iop_AbsF32: {
5535 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5536 HReg dst = newVRegF(env);
5537 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
5538 return dst;
5539 }
5540 default:
5541 break;
5542 }
5543 }
5544
5545 if (e->tag == Iex_Binop) {
5546 switch (e->Iex.Binop.op) {
5547 case Iop_SqrtF32: {
5548 /* first arg is rounding mode; we ignore it. */
5549 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5550 HReg dst = newVRegF(env);
5551 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
5552 return dst;
5553 }
5554 case Iop_F64toF32: {
5555 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
5556 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5557 HReg valS = newVRegF(env);
5558 /* FCVTSD valS, valD */
5559 addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
5560 set_VFP_rounding_default(env);
5561 return valS;
5562 }
5563 default:
5564 break;
5565 }
5566 }
5567
5568 if (e->tag == Iex_Triop) {
florian420bfa92012-06-02 20:29:22 +00005569 IRTriop *triop = e->Iex.Triop.details;
5570
5571 switch (triop->op) {
sewardj6c299f32009-12-31 18:00:12 +00005572 case Iop_DivF32:
5573 case Iop_MulF32:
5574 case Iop_AddF32:
5575 case Iop_SubF32: {
5576 ARMVfpOp op = 0; /*INVALID*/
florian420bfa92012-06-02 20:29:22 +00005577 HReg argL = iselFltExpr(env, triop->arg2);
5578 HReg argR = iselFltExpr(env, triop->arg3);
sewardj6c299f32009-12-31 18:00:12 +00005579 HReg dst = newVRegF(env);
florian420bfa92012-06-02 20:29:22 +00005580 switch (triop->op) {
sewardj6c299f32009-12-31 18:00:12 +00005581 case Iop_DivF32: op = ARMvfp_DIV; break;
5582 case Iop_MulF32: op = ARMvfp_MUL; break;
5583 case Iop_AddF32: op = ARMvfp_ADD; break;
5584 case Iop_SubF32: op = ARMvfp_SUB; break;
5585 default: vassert(0);
5586 }
5587 addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
5588 return dst;
5589 }
5590 default:
5591 break;
5592 }
5593 }
5594
5595 if (e->tag == Iex_Mux0X) {
5596 if (ty == Ity_F32
5597 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
5598 HReg r8;
5599 HReg rX = iselFltExpr(env, e->Iex.Mux0X.exprX);
5600 HReg r0 = iselFltExpr(env, e->Iex.Mux0X.expr0);
5601 HReg dst = newVRegF(env);
5602 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, rX));
5603 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5604 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5605 ARMRI84_I84(0xFF,0)));
5606 addInstr(env, ARMInstr_VCMovS(ARMcc_EQ, dst, r0));
5607 return dst;
5608 }
5609 }
5610
5611 ppIRExpr(e);
5612 vpanic("iselFltExpr_wrk");
5613}
5614
cerioncee30312004-12-17 20:30:21 +00005615
5616/*---------------------------------------------------------*/
5617/*--- ISEL: Statements ---*/
5618/*---------------------------------------------------------*/
5619
5620static void iselStmt ( ISelEnv* env, IRStmt* stmt )
5621{
5622 if (vex_traceflags & VEX_TRACE_VCODE) {
5623 vex_printf("\n-- ");
5624 ppIRStmt(stmt);
5625 vex_printf("\n");
5626 }
5627 switch (stmt->tag) {
5628
5629 /* --------- STORE --------- */
5630 /* little-endian write to memory */
sewardjaf1ceca2005-06-30 23:31:27 +00005631 case Ist_Store: {
sewardj6c299f32009-12-31 18:00:12 +00005632 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
5633 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
5634 IREndness end = stmt->Ist.Store.end;
sewardjaf1ceca2005-06-30 23:31:27 +00005635
sewardj6c299f32009-12-31 18:00:12 +00005636 if (tya != Ity_I32 || end != Iend_LE)
5637 goto stmt_fail;
sewardjaf1ceca2005-06-30 23:31:27 +00005638
sewardj6c299f32009-12-31 18:00:12 +00005639 if (tyd == Ity_I32) {
5640 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5641 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5642 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
5643 return;
5644 }
5645 if (tyd == Ity_I16) {
5646 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5647 ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
5648 addInstr(env, ARMInstr_LdSt16(False/*!isLoad*/,
5649 False/*!isSignedLoad*/, rD, am));
5650 return;
5651 }
5652 if (tyd == Ity_I8) {
5653 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5654 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5655 addInstr(env, ARMInstr_LdSt8U(False/*!isLoad*/, rD, am));
5656 return;
5657 }
5658 if (tyd == Ity_I64) {
sewardjc6f970f2012-04-02 21:54:49 +00005659 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00005660 HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
5661 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5662 addInstr(env, ARMInstr_NLdStD(False, dD, am));
5663 } else {
5664 HReg rDhi, rDlo, rA;
5665 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
5666 rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
5667 addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDhi,
5668 ARMAMode1_RI(rA,4)));
5669 addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDlo,
5670 ARMAMode1_RI(rA,0)));
5671 }
sewardj6c299f32009-12-31 18:00:12 +00005672 return;
5673 }
5674 if (tyd == Ity_F64) {
5675 HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
5676 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5677 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
5678 return;
5679 }
5680 if (tyd == Ity_F32) {
5681 HReg fD = iselFltExpr(env, stmt->Ist.Store.data);
5682 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5683 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
5684 return;
5685 }
sewardj6c60b322010-08-22 12:48:28 +00005686 if (tyd == Ity_V128) {
5687 HReg qD = iselNeonExpr(env, stmt->Ist.Store.data);
5688 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5689 addInstr(env, ARMInstr_NLdStQ(False, qD, am));
5690 return;
5691 }
cerioncee30312004-12-17 20:30:21 +00005692
sewardj6c299f32009-12-31 18:00:12 +00005693 break;
cerioncee30312004-12-17 20:30:21 +00005694 }
5695
5696 /* --------- PUT --------- */
5697 /* write guest state, fixed offset */
5698 case Ist_Put: {
5699 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
cerioncee30312004-12-17 20:30:21 +00005700
cerioncee30312004-12-17 20:30:21 +00005701 if (tyd == Ity_I32) {
sewardj6c299f32009-12-31 18:00:12 +00005702 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
5703 ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
5704 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
5705 return;
cerioncee30312004-12-17 20:30:21 +00005706 }
sewardj6c299f32009-12-31 18:00:12 +00005707 if (tyd == Ity_I64) {
sewardjc6f970f2012-04-02 21:54:49 +00005708 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00005709 HReg addr = newVRegI(env);
5710 HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
5711 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5712 stmt->Ist.Put.offset));
5713 addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
5714 } else {
5715 HReg rDhi, rDlo;
5716 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
5717 stmt->Ist.Put.offset + 0);
5718 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
5719 stmt->Ist.Put.offset + 4);
5720 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
5721 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDhi, am4));
5722 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDlo, am0));
5723 }
sewardj6c299f32009-12-31 18:00:12 +00005724 return;
cerioncee30312004-12-17 20:30:21 +00005725 }
sewardj6c299f32009-12-31 18:00:12 +00005726 if (tyd == Ity_F64) {
5727 // XXX This won't work if offset > 1020 or is not 0 % 4.
5728 // In which case we'll have to generate more longwinded code.
5729 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5730 HReg rD = iselDblExpr(env, stmt->Ist.Put.data);
5731 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
5732 return;
cerioncee30312004-12-17 20:30:21 +00005733 }
sewardj6c299f32009-12-31 18:00:12 +00005734 if (tyd == Ity_F32) {
5735 // XXX This won't work if offset > 1020 or is not 0 % 4.
5736 // In which case we'll have to generate more longwinded code.
5737 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5738 HReg rD = iselFltExpr(env, stmt->Ist.Put.data);
5739 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
5740 return;
5741 }
sewardj6c60b322010-08-22 12:48:28 +00005742 if (tyd == Ity_V128) {
5743 HReg addr = newVRegI(env);
5744 HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
5745 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5746 stmt->Ist.Put.offset));
5747 addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
5748 return;
5749 }
cerioncee30312004-12-17 20:30:21 +00005750 break;
5751 }
5752
sewardj6c299f32009-12-31 18:00:12 +00005753//zz /* --------- Indexed PUT --------- */
5754//zz /* write guest state, run-time offset */
5755//zz case Ist_PutI: {
5756//zz ARMAMode2* am2
5757//zz = genGuestArrayOffset(
5758//zz env, stmt->Ist.PutI.descr,
5759//zz stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
5760//zz
5761//zz IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
5762//zz
5763//zz if (tyd == Ity_I8) {
5764//zz HReg reg = iselIntExpr_R(env, stmt->Ist.PutI.data);
5765//zz addInstr(env, ARMInstr_StoreB(reg, am2));
5766//zz return;
5767//zz }
5768//zz// CAB: Ity_I32, Ity_I16 ?
5769//zz break;
5770//zz }
cerioncee30312004-12-17 20:30:21 +00005771
5772 /* --------- TMP --------- */
5773 /* assign value to temporary */
sewardjdd40fdf2006-12-24 02:20:24 +00005774 case Ist_WrTmp: {
5775 IRTemp tmp = stmt->Ist.WrTmp.tmp;
cerioncee30312004-12-17 20:30:21 +00005776 IRType ty = typeOfIRTemp(env->type_env, tmp);
5777
5778 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
sewardj6c299f32009-12-31 18:00:12 +00005779 ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
5780 env, stmt->Ist.WrTmp.data);
5781 HReg dst = lookupIRTemp(env, tmp);
5782 addInstr(env, ARMInstr_Mov(dst,ri84));
cerioncee30312004-12-17 20:30:21 +00005783 return;
5784 }
sewardj6c299f32009-12-31 18:00:12 +00005785 if (ty == Ity_I1) {
5786 HReg dst = lookupIRTemp(env, tmp);
5787 ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
5788 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
5789 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
5790 return;
5791 }
5792 if (ty == Ity_I64) {
sewardjc6f970f2012-04-02 21:54:49 +00005793 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00005794 HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
5795 HReg dst = lookupIRTemp(env, tmp);
5796 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
5797 } else {
5798 HReg rHi, rLo, dstHi, dstLo;
5799 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
5800 lookupIRTemp64( &dstHi, &dstLo, env, tmp);
5801 addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
5802 addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
5803 }
sewardj6c299f32009-12-31 18:00:12 +00005804 return;
5805 }
5806 if (ty == Ity_F64) {
5807 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
5808 HReg dst = lookupIRTemp(env, tmp);
5809 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
5810 return;
5811 }
5812 if (ty == Ity_F32) {
5813 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
5814 HReg dst = lookupIRTemp(env, tmp);
5815 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
5816 return;
5817 }
sewardj6c60b322010-08-22 12:48:28 +00005818 if (ty == Ity_V128) {
5819 HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
5820 HReg dst = lookupIRTemp(env, tmp);
5821 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
5822 return;
5823 }
cerioncee30312004-12-17 20:30:21 +00005824 break;
5825 }
5826
5827 /* --------- Call to DIRTY helper --------- */
5828 /* call complex ("dirty") helper function */
5829 case Ist_Dirty: {
sewardj6c299f32009-12-31 18:00:12 +00005830 IRType retty;
5831 IRDirty* d = stmt->Ist.Dirty.details;
5832 Bool passBBP = False;
cerioncee30312004-12-17 20:30:21 +00005833
5834 if (d->nFxState == 0)
5835 vassert(!d->needsBBP);
sewardj428fabd2005-03-21 03:11:17 +00005836
5837 passBBP = toBool(d->nFxState > 0 && d->needsBBP);
cerioncee30312004-12-17 20:30:21 +00005838
5839 /* Marshal args, do the call, clear stack. */
sewardj6c299f32009-12-31 18:00:12 +00005840 Bool ok = doHelperCall( env, passBBP, d->guard, d->cee, d->args );
5841 if (!ok)
5842 break; /* will go to stmt_fail: */
cerioncee30312004-12-17 20:30:21 +00005843
5844 /* Now figure out what to do with the returned value, if any. */
5845 if (d->tmp == IRTemp_INVALID)
sewardj6c299f32009-12-31 18:00:12 +00005846 /* No return value. Nothing to do. */
5847 return;
cerioncee30312004-12-17 20:30:21 +00005848
sewardj6c299f32009-12-31 18:00:12 +00005849 retty = typeOfIRTemp(env->type_env, d->tmp);
cerioncee30312004-12-17 20:30:21 +00005850
sewardj6c299f32009-12-31 18:00:12 +00005851 if (retty == Ity_I64) {
sewardjc6f970f2012-04-02 21:54:49 +00005852 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00005853 HReg tmp = lookupIRTemp(env, d->tmp);
5854 addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
5855 hregARM_R0()));
5856 } else {
5857 HReg dstHi, dstLo;
5858 /* The returned value is in r1:r0. Park it in the
5859 register-pair associated with tmp. */
5860 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
5861 addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
5862 addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
5863 }
cerioncee30312004-12-17 20:30:21 +00005864 return;
5865 }
sewardj6c299f32009-12-31 18:00:12 +00005866 if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) {
5867 /* The returned value is in r0. Park it in the register
5868 associated with tmp. */
5869 HReg dst = lookupIRTemp(env, d->tmp);
5870 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
5871 return;
5872 }
5873
cerioncee30312004-12-17 20:30:21 +00005874 break;
5875 }
5876
sewardj6c299f32009-12-31 18:00:12 +00005877 /* --------- Load Linked and Store Conditional --------- */
5878 case Ist_LLSC: {
5879 if (stmt->Ist.LLSC.storedata == NULL) {
5880 /* LL */
5881 IRTemp res = stmt->Ist.LLSC.result;
5882 IRType ty = typeOfIRTemp(env->type_env, res);
sewardjff7f5b72011-07-11 11:43:38 +00005883 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
sewardj6c299f32009-12-31 18:00:12 +00005884 Int szB = 0;
5885 HReg r_dst = lookupIRTemp(env, res);
5886 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5887 switch (ty) {
5888 case Ity_I8: szB = 1; break;
sewardjff7f5b72011-07-11 11:43:38 +00005889 case Ity_I16: szB = 2; break;
sewardj6c299f32009-12-31 18:00:12 +00005890 case Ity_I32: szB = 4; break;
5891 default: vassert(0);
5892 }
sewardjff7f5b72011-07-11 11:43:38 +00005893 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
sewardj6c299f32009-12-31 18:00:12 +00005894 addInstr(env, ARMInstr_LdrEX(szB));
sewardjff7f5b72011-07-11 11:43:38 +00005895 addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
sewardj6c299f32009-12-31 18:00:12 +00005896 return;
5897 }
sewardjff7f5b72011-07-11 11:43:38 +00005898 if (ty == Ity_I64) {
5899 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5900 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
5901 addInstr(env, ARMInstr_LdrEX(8));
5902 /* Result is in r3:r2. On a non-NEON capable CPU, we must
5903 move it into a result register pair. On a NEON capable
5904 CPU, the result register will be a 64 bit NEON
5905 register, so we must move it there instead. */
sewardjc6f970f2012-04-02 21:54:49 +00005906 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardjff7f5b72011-07-11 11:43:38 +00005907 HReg dst = lookupIRTemp(env, res);
5908 addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
5909 hregARM_R2()));
5910 } else {
5911 HReg r_dst_hi, r_dst_lo;
5912 lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
5913 addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
5914 addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
5915 }
5916 return;
5917 }
5918 /*NOTREACHED*/
5919 vassert(0);
sewardj6c299f32009-12-31 18:00:12 +00005920 } else {
5921 /* SC */
sewardj6c299f32009-12-31 18:00:12 +00005922 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
sewardjff7f5b72011-07-11 11:43:38 +00005923 if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
5924 Int szB = 0;
5925 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
5926 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
sewardj6c299f32009-12-31 18:00:12 +00005927 switch (tyd) {
5928 case Ity_I8: szB = 1; break;
sewardjff7f5b72011-07-11 11:43:38 +00005929 case Ity_I16: szB = 2; break;
sewardj6c299f32009-12-31 18:00:12 +00005930 case Ity_I32: szB = 4; break;
5931 default: vassert(0);
5932 }
sewardjff7f5b72011-07-11 11:43:38 +00005933 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
5934 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
sewardj6c299f32009-12-31 18:00:12 +00005935 addInstr(env, ARMInstr_StrEX(szB));
sewardjff7f5b72011-07-11 11:43:38 +00005936 } else {
5937 vassert(tyd == Ity_I64);
5938 /* This is really ugly. There is no is/is-not NEON
5939 decision akin to the case for LL, because iselInt64Expr
5940 fudges this for us, and always gets the result into two
5941 GPRs even if this means moving it from a NEON
5942 register. */
5943 HReg rDhi, rDlo;
5944 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
5945 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5946 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
5947 addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
5948 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
5949 addInstr(env, ARMInstr_StrEX(8));
sewardj6c299f32009-12-31 18:00:12 +00005950 }
sewardjff7f5b72011-07-11 11:43:38 +00005951 /* now r0 is 1 if failed, 0 if success. Change to IR
5952 conventions (0 is fail, 1 is success). Also transfer
5953 result to r_res. */
5954 IRTemp res = stmt->Ist.LLSC.result;
5955 IRType ty = typeOfIRTemp(env->type_env, res);
5956 HReg r_res = lookupIRTemp(env, res);
5957 ARMRI84* one = ARMRI84_I84(1,0);
5958 vassert(ty == Ity_I1);
5959 addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
5960 /* And be conservative -- mask off all but the lowest bit */
5961 addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
5962 return;
sewardj6c299f32009-12-31 18:00:12 +00005963 }
5964 break;
5965 }
5966
sewardj412098c2010-05-04 08:48:43 +00005967 /* --------- MEM FENCE --------- */
5968 case Ist_MBE:
5969 switch (stmt->Ist.MBE.event) {
5970 case Imbe_Fence:
sewardj6d615ba2011-09-26 16:19:43 +00005971 addInstr(env, ARMInstr_MFence());
5972 return;
5973 case Imbe_CancelReservation:
5974 addInstr(env, ARMInstr_CLREX());
sewardj412098c2010-05-04 08:48:43 +00005975 return;
5976 default:
5977 break;
5978 }
5979 break;
5980
sewardj6c299f32009-12-31 18:00:12 +00005981 /* --------- INSTR MARK --------- */
5982 /* Doesn't generate any executable code ... */
5983 case Ist_IMark:
5984 return;
5985
5986 /* --------- NO-OP --------- */
5987 case Ist_NoOp:
5988 return;
5989
cerioncee30312004-12-17 20:30:21 +00005990 /* --------- EXIT --------- */
cerioncee30312004-12-17 20:30:21 +00005991 case Ist_Exit: {
cerioncee30312004-12-17 20:30:21 +00005992 if (stmt->Ist.Exit.dst->tag != Ico_U32)
5993 vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
sewardjc6f970f2012-04-02 21:54:49 +00005994
5995 ARMCondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
5996 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(),
5997 stmt->Ist.Exit.offsIP);
5998
5999 /* Case: boring transfer to known address */
6000 if (stmt->Ist.Exit.jk == Ijk_Boring
6001 || stmt->Ist.Exit.jk == Ijk_Call
6002 || stmt->Ist.Exit.jk == Ijk_Ret) {
6003 if (env->chainingAllowed) {
6004 /* .. almost always true .. */
6005 /* Skip the event check at the dst if this is a forwards
6006 edge. */
6007 Bool toFastEP
6008 = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
6009 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
6010 addInstr(env, ARMInstr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
6011 amR15T, cc, toFastEP));
6012 } else {
6013 /* .. very occasionally .. */
6014 /* We can't use chaining, so ask for an assisted transfer,
6015 as that's the only alternative that is allowable. */
6016 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6017 addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, Ijk_Boring));
6018 }
6019 return;
6020 }
6021
6022 /* Case: assisted transfer to arbitrary address */
6023 switch (stmt->Ist.Exit.jk) {
sewardj2f6902b2012-04-23 09:48:14 +00006024 /* Keep this list in sync with that in iselNext below */
6025 case Ijk_ClientReq:
sewardjc6f970f2012-04-02 21:54:49 +00006026 case Ijk_NoDecode:
sewardj2f6902b2012-04-23 09:48:14 +00006027 case Ijk_NoRedir:
6028 case Ijk_Sys_syscall:
sewardjc6f970f2012-04-02 21:54:49 +00006029 {
6030 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6031 addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
6032 stmt->Ist.Exit.jk));
6033 return;
6034 }
6035 default:
6036 break;
6037 }
6038
6039 /* Do we ever expect to see any other kind? */
6040 goto stmt_fail;
cerioncee30312004-12-17 20:30:21 +00006041 }
6042
6043 default: break;
6044 }
sewardjaf1ceca2005-06-30 23:31:27 +00006045 stmt_fail:
cerioncee30312004-12-17 20:30:21 +00006046 ppIRStmt(stmt);
6047 vpanic("iselStmt");
6048}
6049
6050
6051/*---------------------------------------------------------*/
6052/*--- ISEL: Basic block terminators (Nexts) ---*/
6053/*---------------------------------------------------------*/
6054
sewardjc6f970f2012-04-02 21:54:49 +00006055static void iselNext ( ISelEnv* env,
6056 IRExpr* next, IRJumpKind jk, Int offsIP )
cerioncee30312004-12-17 20:30:21 +00006057{
sewardj6c299f32009-12-31 18:00:12 +00006058 if (vex_traceflags & VEX_TRACE_VCODE) {
sewardjc6f970f2012-04-02 21:54:49 +00006059 vex_printf( "\n-- PUT(%d) = ", offsIP);
6060 ppIRExpr( next );
6061 vex_printf( "; exit-");
sewardj6c299f32009-12-31 18:00:12 +00006062 ppIRJumpKind(jk);
sewardjc6f970f2012-04-02 21:54:49 +00006063 vex_printf( "\n");
sewardj6c299f32009-12-31 18:00:12 +00006064 }
sewardjc6f970f2012-04-02 21:54:49 +00006065
6066 /* Case: boring transfer to known address */
6067 if (next->tag == Iex_Const) {
6068 IRConst* cdst = next->Iex.Const.con;
6069 vassert(cdst->tag == Ico_U32);
6070 if (jk == Ijk_Boring || jk == Ijk_Call) {
6071 /* Boring transfer to known address */
6072 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6073 if (env->chainingAllowed) {
6074 /* .. almost always true .. */
6075 /* Skip the event check at the dst if this is a forwards
6076 edge. */
6077 Bool toFastEP
6078 = ((Addr64)cdst->Ico.U32) > env->max_ga;
6079 if (0) vex_printf("%s", toFastEP ? "X" : ".");
6080 addInstr(env, ARMInstr_XDirect(cdst->Ico.U32,
6081 amR15T, ARMcc_AL,
6082 toFastEP));
6083 } else {
6084 /* .. very occasionally .. */
6085 /* We can't use chaining, so ask for an assisted transfer,
6086 as that's the only alternative that is allowable. */
6087 HReg r = iselIntExpr_R(env, next);
6088 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6089 Ijk_Boring));
6090 }
6091 return;
6092 }
6093 }
6094
6095 /* Case: call/return (==boring) transfer to any address */
6096 switch (jk) {
6097 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
6098 HReg r = iselIntExpr_R(env, next);
6099 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6100 if (env->chainingAllowed) {
6101 addInstr(env, ARMInstr_XIndir(r, amR15T, ARMcc_AL));
6102 } else {
6103 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6104 Ijk_Boring));
6105 }
6106 return;
6107 }
6108 default:
6109 break;
6110 }
6111
sewardj2f6902b2012-04-23 09:48:14 +00006112 /* Case: assisted transfer to arbitrary address */
sewardjc6f970f2012-04-02 21:54:49 +00006113 switch (jk) {
sewardj2f6902b2012-04-23 09:48:14 +00006114 /* Keep this list in sync with that for Ist_Exit above */
6115 case Ijk_ClientReq:
6116 case Ijk_NoDecode:
sewardjc6f970f2012-04-02 21:54:49 +00006117 case Ijk_NoRedir:
sewardj2f6902b2012-04-23 09:48:14 +00006118 case Ijk_Sys_syscall:
sewardjc6f970f2012-04-02 21:54:49 +00006119 {
6120 HReg r = iselIntExpr_R(env, next);
6121 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6122 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, jk));
6123 return;
6124 }
6125 default:
6126 break;
6127 }
6128
6129 vex_printf( "\n-- PUT(%d) = ", offsIP);
6130 ppIRExpr( next );
6131 vex_printf( "; exit-");
6132 ppIRJumpKind(jk);
6133 vex_printf( "\n");
6134 vassert(0); // are we expecting any other kind?
cerioncee30312004-12-17 20:30:21 +00006135}
6136
6137
6138/*---------------------------------------------------------*/
6139/*--- Insn selector top-level ---*/
6140/*---------------------------------------------------------*/
6141
sewardjdd40fdf2006-12-24 02:20:24 +00006142/* Translate an entire SB to arm code. */
cerioncee30312004-12-17 20:30:21 +00006143
sewardjc6f970f2012-04-02 21:54:49 +00006144HInstrArray* iselSB_ARM ( IRSB* bb,
6145 VexArch arch_host,
6146 VexArchInfo* archinfo_host,
6147 VexAbiInfo* vbi/*UNUSED*/,
6148 Int offs_Host_EvC_Counter,
6149 Int offs_Host_EvC_FailAddr,
6150 Bool chainingAllowed,
6151 Bool addProfInc,
6152 Addr64 max_ga )
cerioncee30312004-12-17 20:30:21 +00006153{
sewardjc6f970f2012-04-02 21:54:49 +00006154 Int i, j;
6155 HReg hreg, hregHI;
6156 ISelEnv* env;
6157 UInt hwcaps_host = archinfo_host->hwcaps;
6158 ARMAMode1 *amCounter, *amFailAddr;
cerioncee30312004-12-17 20:30:21 +00006159
sewardj6c299f32009-12-31 18:00:12 +00006160 /* sanity ... */
6161 vassert(arch_host == VexArchARM);
sewardj6c60b322010-08-22 12:48:28 +00006162
6163 /* hwcaps should not change from one ISEL call to another. */
sewardjc6f970f2012-04-02 21:54:49 +00006164 arm_hwcaps = hwcaps_host; // JRS 2012 Mar 31: FIXME (RM)
cerioncee30312004-12-17 20:30:21 +00006165
sewardj6c299f32009-12-31 18:00:12 +00006166 /* Make up an initial environment to use. */
6167 env = LibVEX_Alloc(sizeof(ISelEnv));
6168 env->vreg_ctr = 0;
6169
6170 /* Set up output code array. */
6171 env->code = newHInstrArray();
cerioncee30312004-12-17 20:30:21 +00006172
sewardj6c299f32009-12-31 18:00:12 +00006173 /* Copy BB's type env. */
6174 env->type_env = bb->tyenv;
cerioncee30312004-12-17 20:30:21 +00006175
sewardj6c299f32009-12-31 18:00:12 +00006176 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
6177 change as we go along. */
6178 env->n_vregmap = bb->tyenv->types_used;
6179 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
6180 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
cerioncee30312004-12-17 20:30:21 +00006181
sewardjc6f970f2012-04-02 21:54:49 +00006182 /* and finally ... */
6183 env->chainingAllowed = chainingAllowed;
6184 env->hwcaps = hwcaps_host;
6185 env->max_ga = max_ga;
6186
sewardj6c299f32009-12-31 18:00:12 +00006187 /* For each IR temporary, allocate a suitably-kinded virtual
6188 register. */
6189 j = 0;
6190 for (i = 0; i < env->n_vregmap; i++) {
6191 hregHI = hreg = INVALID_HREG;
6192 switch (bb->tyenv->types[i]) {
6193 case Ity_I1:
6194 case Ity_I8:
6195 case Ity_I16:
6196 case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break;
sewardj6c60b322010-08-22 12:48:28 +00006197 case Ity_I64:
sewardjc6f970f2012-04-02 21:54:49 +00006198 if (hwcaps_host & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00006199 hreg = mkHReg(j++, HRcFlt64, True);
sewardj6c60b322010-08-22 12:48:28 +00006200 } else {
6201 hregHI = mkHReg(j++, HRcInt32, True);
6202 hreg = mkHReg(j++, HRcInt32, True);
6203 }
6204 break;
sewardj6c299f32009-12-31 18:00:12 +00006205 case Ity_F32: hreg = mkHReg(j++, HRcFlt32, True); break;
6206 case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break;
sewardj06122e72011-03-28 12:14:48 +00006207 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break;
sewardj6c299f32009-12-31 18:00:12 +00006208 default: ppIRType(bb->tyenv->types[i]);
6209 vpanic("iselBB: IRTemp type");
6210 }
6211 env->vregmap[i] = hreg;
6212 env->vregmapHI[i] = hregHI;
6213 }
6214 env->vreg_ctr = j;
cerioncee30312004-12-17 20:30:21 +00006215
sewardjc6f970f2012-04-02 21:54:49 +00006216 /* The very first instruction must be an event check. */
6217 amCounter = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_Counter);
6218 amFailAddr = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_FailAddr);
6219 addInstr(env, ARMInstr_EvCheck(amCounter, amFailAddr));
6220
6221 /* Possibly a block counter increment (for profiling). At this
6222 point we don't know the address of the counter, so just pretend
6223 it is zero. It will have to be patched later, but before this
6224 translation is used, by a call to LibVEX_patchProfCtr. */
6225 if (addProfInc) {
6226 addInstr(env, ARMInstr_ProfInc());
6227 }
cerioncee30312004-12-17 20:30:21 +00006228
sewardj6c299f32009-12-31 18:00:12 +00006229 /* Ok, finally we can iterate over the statements. */
6230 for (i = 0; i < bb->stmts_used; i++)
sewardjc6f970f2012-04-02 21:54:49 +00006231 iselStmt(env, bb->stmts[i]);
sewardj6c299f32009-12-31 18:00:12 +00006232
sewardjc6f970f2012-04-02 21:54:49 +00006233 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
sewardj6c299f32009-12-31 18:00:12 +00006234
6235 /* record the number of vregs we used. */
6236 env->code->n_vregs = env->vreg_ctr;
6237 return env->code;
cerioncee30312004-12-17 20:30:21 +00006238}
6239
6240
cerioncee30312004-12-17 20:30:21 +00006241/*---------------------------------------------------------------*/
sewardjcef7d3e2009-07-02 12:21:59 +00006242/*--- end host_arm_isel.c ---*/
cerioncee30312004-12-17 20:30:21 +00006243/*---------------------------------------------------------------*/