blob: 98f4f716a2ec63f3af12d450b6dbd72fcc9e22b6 [file] [log] [blame]
cerioncee30312004-12-17 20:30:21 +00001
2/*---------------------------------------------------------------*/
sewardj752f9062010-05-03 21:38:49 +00003/*--- begin host_arm_isel.c ---*/
cerioncee30312004-12-17 20:30:21 +00004/*---------------------------------------------------------------*/
5
6/*
sewardj752f9062010-05-03 21:38:49 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
cerioncee30312004-12-17 20:30:21 +00009
sewardje6c53e02011-10-23 07:33:43 +000010 Copyright (C) 2004-2011 OpenWorks LLP
sewardj752f9062010-05-03 21:38:49 +000011 info@open-works.net
sewardj64733c42010-10-12 10:10:46 +000012
13 NEON support is
sewardje6c53e02011-10-23 07:33:43 +000014 Copyright (C) 2010-2011 Samsung Electronics
sewardj64733c42010-10-12 10:10:46 +000015 contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16 and Kirill Batuzov <batuzovk@ispras.ru>
cerioncee30312004-12-17 20:30:21 +000017
sewardj752f9062010-05-03 21:38:49 +000018 This program is free software; you can redistribute it and/or
19 modify it under the terms of the GNU General Public License as
20 published by the Free Software Foundation; either version 2 of the
21 License, or (at your option) any later version.
cerioncee30312004-12-17 20:30:21 +000022
sewardj752f9062010-05-03 21:38:49 +000023 This program is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
27
28 You should have received a copy of the GNU General Public License
29 along with this program; if not, write to the Free Software
30 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
sewardj7bd6ffe2005-08-03 16:07:36 +000031 02110-1301, USA.
32
sewardj752f9062010-05-03 21:38:49 +000033 The GNU General Public License is contained in the file COPYING.
cerioncee30312004-12-17 20:30:21 +000034*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
sewardj6c60b322010-08-22 12:48:28 +000039#include "ir_match.h"
cerioncee30312004-12-17 20:30:21 +000040
sewardjcef7d3e2009-07-02 12:21:59 +000041#include "main_util.h"
42#include "main_globals.h"
43#include "host_generic_regs.h"
sewardje2ea1762010-09-22 00:56:37 +000044#include "host_generic_simd64.h" // for 32-bit SIMD helpers
sewardjcef7d3e2009-07-02 12:21:59 +000045#include "host_arm_defs.h"
cerioncee30312004-12-17 20:30:21 +000046
47
cerioncee30312004-12-17 20:30:21 +000048/*---------------------------------------------------------*/
sewardj6c299f32009-12-31 18:00:12 +000049/*--- ARMvfp control word stuff ---*/
50/*---------------------------------------------------------*/
51
52/* Vex-generated code expects to run with the FPU set as follows: all
53 exceptions masked, round-to-nearest, non-vector mode, with the NZCV
54 flags cleared, and FZ (flush to zero) disabled. Curiously enough,
55 this corresponds to a FPSCR value of zero.
56
57 fpscr should therefore be zero on entry to Vex-generated code, and
58 should be unchanged at exit. (Or at least the bottom 28 bits
59 should be zero).
60*/
61
62#define DEFAULT_FPSCR 0
63
64
65/*---------------------------------------------------------*/
cerioncee30312004-12-17 20:30:21 +000066/*--- ISelEnv ---*/
67/*---------------------------------------------------------*/
68
69/* This carries around:
70
71 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
72 might encounter. This is computed before insn selection starts,
73 and does not change.
74
75 - A mapping from IRTemp to HReg. This tells the insn selector
76 which virtual register(s) are associated with each IRTemp
77 temporary. This is computed before insn selection starts, and
78 does not change. We expect this mapping to map precisely the
79 same set of IRTemps as the type mapping does.
80
81 - vregmap holds the primary register for the IRTemp.
sewardj6c299f32009-12-31 18:00:12 +000082 - vregmapHI is only used for 64-bit integer-typed
83 IRTemps. It holds the identity of a second
84 32-bit virtual HReg, which holds the high half
85 of the value.
86
cerioncee30312004-12-17 20:30:21 +000087 - The code array, that is, the insns selected so far.
88
89 - A counter, for generating new virtual registers.
90
sewardj6c299f32009-12-31 18:00:12 +000091 - The host hardware capabilities word. This is set at the start
92 and does not change.
93
sewardjc6f970f2012-04-02 21:54:49 +000094 - A Bool for indicating whether we may generate chain-me
95 instructions for control flow transfers, or whether we must use
96 XAssisted.
97
98 - The maximum guest address of any guest insn in this block.
99 Actually, the address of the highest-addressed byte from any insn
100 in this block. Is set at the start and does not change. This is
101 used for detecting jumps which are definitely forward-edges from
102 this block, and therefore can be made (chained) to the fast entry
103 point of the destination, thereby avoiding the destination's
104 event check.
105
106 Note, this is all (well, mostly) host-independent.
107*/
cerioncee30312004-12-17 20:30:21 +0000108
109typedef
110 struct {
sewardjc6f970f2012-04-02 21:54:49 +0000111 /* Constant -- are set at the start and do not change. */
cerioncee30312004-12-17 20:30:21 +0000112 IRTypeEnv* type_env;
113
114 HReg* vregmap;
sewardj6c299f32009-12-31 18:00:12 +0000115 HReg* vregmapHI;
cerioncee30312004-12-17 20:30:21 +0000116 Int n_vregmap;
117
sewardj6c299f32009-12-31 18:00:12 +0000118 UInt hwcaps;
sewardjc6f970f2012-04-02 21:54:49 +0000119
120 Bool chainingAllowed;
121 Addr64 max_ga;
122
123 /* These are modified as we go along. */
124 HInstrArray* code;
125 Int vreg_ctr;
cerioncee30312004-12-17 20:30:21 +0000126 }
127 ISelEnv;
128
129static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
130{
131 vassert(tmp >= 0);
132 vassert(tmp < env->n_vregmap);
133 return env->vregmap[tmp];
134}
135
sewardj6c299f32009-12-31 18:00:12 +0000136static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
137{
138 vassert(tmp >= 0);
139 vassert(tmp < env->n_vregmap);
140 vassert(env->vregmapHI[tmp] != INVALID_HREG);
141 *vrLO = env->vregmap[tmp];
142 *vrHI = env->vregmapHI[tmp];
143}
144
cerioncee30312004-12-17 20:30:21 +0000145static void addInstr ( ISelEnv* env, ARMInstr* instr )
146{
147 addHInstr(env->code, instr);
148 if (vex_traceflags & VEX_TRACE_VCODE) {
149 ppARMInstr(instr);
150 vex_printf("\n");
151 }
sewardj6c60b322010-08-22 12:48:28 +0000152#if 0
153 if (instr->tag == ARMin_NUnary || instr->tag == ARMin_NBinary
154 || instr->tag == ARMin_NUnaryS || instr->tag == ARMin_NBinaryS
155 || instr->tag == ARMin_NDual || instr->tag == ARMin_NShift) {
156 ppARMInstr(instr);
157 vex_printf("\n");
158 }
159#endif
cerioncee30312004-12-17 20:30:21 +0000160}
161
162static HReg newVRegI ( ISelEnv* env )
163{
164 HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
165 env->vreg_ctr++;
166 return reg;
167}
168
sewardj6c299f32009-12-31 18:00:12 +0000169static HReg newVRegD ( ISelEnv* env )
170{
171 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
172 env->vreg_ctr++;
173 return reg;
174}
175
176static HReg newVRegF ( ISelEnv* env )
177{
178 HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/);
179 env->vreg_ctr++;
180 return reg;
181}
cerioncee30312004-12-17 20:30:21 +0000182
sewardj6c60b322010-08-22 12:48:28 +0000183static HReg newVRegV ( ISelEnv* env )
184{
185 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
186 env->vreg_ctr++;
187 return reg;
188}
189
190/* These are duplicated in guest_arm_toIR.c */
191static IRExpr* unop ( IROp op, IRExpr* a )
192{
193 return IRExpr_Unop(op, a);
194}
195
196static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
197{
198 return IRExpr_Binop(op, a1, a2);
199}
200
sewardj6c60b322010-08-22 12:48:28 +0000201static IRExpr* bind ( Int binder )
202{
203 return IRExpr_Binder(binder);
204}
205
cerioncee30312004-12-17 20:30:21 +0000206
207/*---------------------------------------------------------*/
208/*--- ISEL: Forward declarations ---*/
209/*---------------------------------------------------------*/
210
211/* These are organised as iselXXX and iselXXX_wrk pairs. The
212 iselXXX_wrk do the real work, but are not to be called directly.
213 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
214 checks that all returned registers are virtual. You should not
215 call the _wrk version directly.
216*/
sewardj6c299f32009-12-31 18:00:12 +0000217static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
218static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000219
sewardj6c299f32009-12-31 18:00:12 +0000220static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
221static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000222
sewardj6c299f32009-12-31 18:00:12 +0000223static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
224static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000225
sewardjff7f5b72011-07-11 11:43:38 +0000226static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
227static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e );
sewardj6c60b322010-08-22 12:48:28 +0000228
sewardj6c299f32009-12-31 18:00:12 +0000229static ARMRI84* iselIntExpr_RI84_wrk
230 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
231static ARMRI84* iselIntExpr_RI84
232 ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000233
sewardj6c299f32009-12-31 18:00:12 +0000234static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e );
235static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000236
sewardj6c299f32009-12-31 18:00:12 +0000237static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
238static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000239
sewardj6c299f32009-12-31 18:00:12 +0000240static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
241static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
242
243static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
244 ISelEnv* env, IRExpr* e );
245static void iselInt64Expr ( HReg* rHi, HReg* rLo,
246 ISelEnv* env, IRExpr* e );
247
248static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
249static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
250
251static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
252static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000253
sewardj6c60b322010-08-22 12:48:28 +0000254static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e );
255static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e );
256
257static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e );
258static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e );
cerioncee30312004-12-17 20:30:21 +0000259
260/*---------------------------------------------------------*/
261/*--- ISEL: Misc helpers ---*/
262/*---------------------------------------------------------*/
sewardj6c299f32009-12-31 18:00:12 +0000263
264static UInt ROR32 ( UInt x, UInt sh ) {
265 vassert(sh >= 0 && sh < 32);
266 if (sh == 0)
267 return x;
268 else
269 return (x << (32-sh)) | (x >> sh);
cerioncee30312004-12-17 20:30:21 +0000270}
sewardj6c299f32009-12-31 18:00:12 +0000271
272/* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
273 form, and if so return the components. */
274static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
275{
276 UInt i;
277 for (i = 0; i < 16; i++) {
278 if (0 == (u & 0xFFFFFF00)) {
279 *u8 = u;
280 *u4 = i;
281 return True;
282 }
283 u = ROR32(u, 30);
284 }
285 vassert(i == 16);
286 return False;
287}
cerioncee30312004-12-17 20:30:21 +0000288
289/* Make a int reg-reg move. */
sewardj6c299f32009-12-31 18:00:12 +0000290static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
cerioncee30312004-12-17 20:30:21 +0000291{
292 vassert(hregClass(src) == HRcInt32);
293 vassert(hregClass(dst) == HRcInt32);
sewardj6c299f32009-12-31 18:00:12 +0000294 return ARMInstr_Mov(dst, ARMRI84_R(src));
cerioncee30312004-12-17 20:30:21 +0000295}
296
sewardj6c299f32009-12-31 18:00:12 +0000297/* Set the VFP unit's rounding mode to default (round to nearest). */
298static void set_VFP_rounding_default ( ISelEnv* env )
cerioncee30312004-12-17 20:30:21 +0000299{
sewardj6c299f32009-12-31 18:00:12 +0000300 /* mov rTmp, #DEFAULT_FPSCR
301 fmxr fpscr, rTmp
302 */
303 HReg rTmp = newVRegI(env);
304 addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
305 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
cerioncee30312004-12-17 20:30:21 +0000306}
307
sewardj6c299f32009-12-31 18:00:12 +0000308/* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
309 expression denoting a value in the range 0 .. 3, indicating a round
310 mode encoded as per type IRRoundingMode. Set FPSCR to have the
311 same rounding.
312*/
313static
314void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
cerioncee30312004-12-17 20:30:21 +0000315{
sewardj6c299f32009-12-31 18:00:12 +0000316 /* This isn't simple, because 'mode' carries an IR rounding
317 encoding, and we need to translate that to an ARMvfp one:
318 The IR encoding:
319 00 to nearest (the default)
320 10 to +infinity
321 01 to -infinity
322 11 to zero
323 The ARMvfp encoding:
324 00 to nearest
325 01 to +infinity
326 10 to -infinity
327 11 to zero
328 Easy enough to do; just swap the two bits.
329 */
330 HReg irrm = iselIntExpr_R(env, mode);
331 HReg tL = newVRegI(env);
332 HReg tR = newVRegI(env);
333 HReg t3 = newVRegI(env);
334 /* tL = irrm << 1;
335 tR = irrm >> 1; if we're lucky, these will issue together
336 tL &= 2;
337 tR &= 1; ditto
338 t3 = tL | tR;
339 t3 <<= 22;
340 fmxr fpscr, t3
341 */
342 addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
343 addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
344 addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
345 addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
346 addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
347 addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
348 addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
cerioncee30312004-12-17 20:30:21 +0000349}
cerioncee30312004-12-17 20:30:21 +0000350
cerioncee30312004-12-17 20:30:21 +0000351
sewardj6c299f32009-12-31 18:00:12 +0000352/*---------------------------------------------------------*/
353/*--- ISEL: Function call helpers ---*/
354/*---------------------------------------------------------*/
cerioncee30312004-12-17 20:30:21 +0000355
cerioncee30312004-12-17 20:30:21 +0000356/* Used only in doHelperCall. See big comment in doHelperCall re
sewardj6c299f32009-12-31 18:00:12 +0000357 handling of register-parameter args. This function figures out
358 whether evaluation of an expression might require use of a fixed
359 register. If in doubt return True (safe but suboptimal).
cerioncee30312004-12-17 20:30:21 +0000360*/
361static
362Bool mightRequireFixedRegs ( IRExpr* e )
363{
364 switch (e->tag) {
sewardj6c299f32009-12-31 18:00:12 +0000365 case Iex_RdTmp: case Iex_Const: case Iex_Get:
366 return False;
367 default:
368 return True;
cerioncee30312004-12-17 20:30:21 +0000369 }
370}
sewardj6c299f32009-12-31 18:00:12 +0000371
cerioncee30312004-12-17 20:30:21 +0000372
373/* Do a complete function call. guard is a Ity_Bit expression
374 indicating whether or not the call happens. If guard==NULL, the
sewardj6c299f32009-12-31 18:00:12 +0000375 call is unconditional. Returns True iff it managed to handle this
376 combination of arg/return types, else returns False. */
cerioncee30312004-12-17 20:30:21 +0000377
378static
sewardj6c299f32009-12-31 18:00:12 +0000379Bool doHelperCall ( ISelEnv* env,
380 Bool passBBP,
cerioncee30312004-12-17 20:30:21 +0000381 IRExpr* guard, IRCallee* cee, IRExpr** args )
382{
cerioncee30312004-12-17 20:30:21 +0000383 ARMCondCode cc;
sewardj6c299f32009-12-31 18:00:12 +0000384 HReg argregs[ARM_N_ARGREGS];
385 HReg tmpregs[ARM_N_ARGREGS];
386 Bool go_fast;
387 Int n_args, i, nextArgReg;
388 ULong target;
cerioncee30312004-12-17 20:30:21 +0000389
sewardj6c299f32009-12-31 18:00:12 +0000390 vassert(ARM_N_ARGREGS == 4);
cerioncee30312004-12-17 20:30:21 +0000391
sewardj6c299f32009-12-31 18:00:12 +0000392 /* Marshal args for a call and do the call.
cerioncee30312004-12-17 20:30:21 +0000393
sewardj6c299f32009-12-31 18:00:12 +0000394 If passBBP is True, r8 (the baseblock pointer) is to be passed
395 as the first arg.
cerioncee30312004-12-17 20:30:21 +0000396
sewardj6c299f32009-12-31 18:00:12 +0000397 This function only deals with a tiny set of possibilities, which
398 cover all helpers in practice. The restrictions are that only
399 arguments in registers are supported, hence only ARM_N_REGPARMS
400 x 32 integer bits in total can be passed. In fact the only
401 supported arg types are I32 and I64.
cerioncee30312004-12-17 20:30:21 +0000402
sewardj6c299f32009-12-31 18:00:12 +0000403 Generating code which is both efficient and correct when
404 parameters are to be passed in registers is difficult, for the
405 reasons elaborated in detail in comments attached to
406 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
407 of the method described in those comments.
cerioncee30312004-12-17 20:30:21 +0000408
sewardj6c299f32009-12-31 18:00:12 +0000409 The problem is split into two cases: the fast scheme and the
410 slow scheme. In the fast scheme, arguments are computed
411 directly into the target (real) registers. This is only safe
412 when we can be sure that computation of each argument will not
413 trash any real registers set by computation of any other
414 argument.
cerioncee30312004-12-17 20:30:21 +0000415
sewardj6c299f32009-12-31 18:00:12 +0000416 In the slow scheme, all args are first computed into vregs, and
417 once they are all done, they are moved to the relevant real
418 regs. This always gives correct code, but it also gives a bunch
419 of vreg-to-rreg moves which are usually redundant but are hard
420 for the register allocator to get rid of.
421
422 To decide which scheme to use, all argument expressions are
423 first examined. If they are all so simple that it is clear they
424 will be evaluated without use of any fixed registers, use the
425 fast scheme, else use the slow scheme. Note also that only
426 unconditional calls may use the fast scheme, since having to
427 compute a condition expression could itself trash real
428 registers.
cerioncee30312004-12-17 20:30:21 +0000429
430 Note this requires being able to examine an expression and
431 determine whether or not evaluation of it might use a fixed
sewardj6c299f32009-12-31 18:00:12 +0000432 register. That requires knowledge of how the rest of this insn
433 selector works. Currently just the following 3 are regarded as
434 safe -- hopefully they cover the majority of arguments in
435 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
cerioncee30312004-12-17 20:30:21 +0000436 */
cerioncee30312004-12-17 20:30:21 +0000437
sewardj6c299f32009-12-31 18:00:12 +0000438 /* Note that the cee->regparms field is meaningless on ARM hosts
439 (since there is only one calling convention) and so we always
440 ignore it. */
cerioncee30312004-12-17 20:30:21 +0000441
sewardj6c299f32009-12-31 18:00:12 +0000442 n_args = 0;
443 for (i = 0; args[i]; i++)
444 n_args++;
cerioncee30312004-12-17 20:30:21 +0000445
sewardj6c299f32009-12-31 18:00:12 +0000446 argregs[0] = hregARM_R0();
447 argregs[1] = hregARM_R1();
448 argregs[2] = hregARM_R2();
449 argregs[3] = hregARM_R3();
cerioncee30312004-12-17 20:30:21 +0000450
sewardj6c299f32009-12-31 18:00:12 +0000451 tmpregs[0] = tmpregs[1] = tmpregs[2] =
452 tmpregs[3] = INVALID_HREG;
cerioncee30312004-12-17 20:30:21 +0000453
sewardj6c299f32009-12-31 18:00:12 +0000454 /* First decide which scheme (slow or fast) is to be used. First
455 assume the fast scheme, and select slow if any contraindications
456 (wow) appear. */
457
458 go_fast = True;
459
460 if (guard) {
461 if (guard->tag == Iex_Const
462 && guard->Iex.Const.con->tag == Ico_U1
463 && guard->Iex.Const.con->Ico.U1 == True) {
464 /* unconditional */
465 } else {
466 /* Not manifestly unconditional -- be conservative. */
467 go_fast = False;
468 }
cerioncee30312004-12-17 20:30:21 +0000469 }
470
sewardj6c299f32009-12-31 18:00:12 +0000471 if (go_fast) {
472 for (i = 0; i < n_args; i++) {
cerioncee30312004-12-17 20:30:21 +0000473 if (mightRequireFixedRegs(args[i])) {
sewardj6c299f32009-12-31 18:00:12 +0000474 go_fast = False;
cerioncee30312004-12-17 20:30:21 +0000475 break;
476 }
477 }
sewardj6c299f32009-12-31 18:00:12 +0000478 }
479 /* At this point the scheme to use has been established. Generate
480 code to get the arg values into the argument rregs. If we run
481 out of arg regs, give up. */
cerioncee30312004-12-17 20:30:21 +0000482
sewardj6c299f32009-12-31 18:00:12 +0000483 if (go_fast) {
cerioncee30312004-12-17 20:30:21 +0000484
sewardj6c299f32009-12-31 18:00:12 +0000485 /* FAST SCHEME */
486 nextArgReg = 0;
cerioncee30312004-12-17 20:30:21 +0000487 if (passBBP) {
sewardj6c299f32009-12-31 18:00:12 +0000488 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
489 hregARM_R8() ));
490 nextArgReg++;
cerioncee30312004-12-17 20:30:21 +0000491 }
492
sewardj6c299f32009-12-31 18:00:12 +0000493 for (i = 0; i < n_args; i++) {
494 IRType aTy = typeOfIRExpr(env->type_env, args[i]);
495 if (nextArgReg >= ARM_N_ARGREGS)
496 return False; /* out of argregs */
497 if (aTy == Ity_I32) {
498 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
499 iselIntExpr_R(env, args[i]) ));
500 nextArgReg++;
501 }
502 else if (aTy == Ity_I64) {
503 /* 64-bit args must be passed in an a reg-pair of the form
504 n:n+1, where n is even. Hence either r0:r1 or r2:r3.
505 On a little-endian host, the less significant word is
506 passed in the lower-numbered register. */
507 if (nextArgReg & 1) {
508 if (nextArgReg >= ARM_N_ARGREGS)
509 return False; /* out of argregs */
510 addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
511 nextArgReg++;
512 }
513 if (nextArgReg >= ARM_N_ARGREGS)
514 return False; /* out of argregs */
515 HReg raHi, raLo;
516 iselInt64Expr(&raHi, &raLo, env, args[i]);
517 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
518 nextArgReg++;
519 addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
520 nextArgReg++;
521 }
522 else
523 return False; /* unhandled arg type */
524 }
525
526 /* Fast scheme only applies for unconditional calls. Hence: */
527 cc = ARMcc_AL;
cerioncee30312004-12-17 20:30:21 +0000528
529 } else {
530
sewardj6c299f32009-12-31 18:00:12 +0000531 /* SLOW SCHEME; move via temporaries */
532 nextArgReg = 0;
533
cerioncee30312004-12-17 20:30:21 +0000534 if (passBBP) {
sewardj6c299f32009-12-31 18:00:12 +0000535 /* This is pretty stupid; better to move directly to r0
536 after the rest of the args are done. */
537 tmpregs[nextArgReg] = newVRegI(env);
538 addInstr(env, mk_iMOVds_RR( tmpregs[nextArgReg],
539 hregARM_R8() ));
540 nextArgReg++;
541 }
542
543 for (i = 0; i < n_args; i++) {
544 IRType aTy = typeOfIRExpr(env->type_env, args[i]);
545 if (nextArgReg >= ARM_N_ARGREGS)
546 return False; /* out of argregs */
547 if (aTy == Ity_I32) {
548 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
549 nextArgReg++;
550 }
551 else if (aTy == Ity_I64) {
552 /* Same comment applies as in the Fast-scheme case. */
553 if (nextArgReg & 1)
554 nextArgReg++;
555 if (nextArgReg + 1 >= ARM_N_ARGREGS)
556 return False; /* out of argregs */
557 HReg raHi, raLo;
558 iselInt64Expr(&raHi, &raLo, env, args[i]);
559 tmpregs[nextArgReg] = raLo;
560 nextArgReg++;
561 tmpregs[nextArgReg] = raHi;
562 nextArgReg++;
563 }
564 }
565
566 /* Now we can compute the condition. We can't do it earlier
567 because the argument computations could trash the condition
568 codes. Be a bit clever to handle the common case where the
569 guard is 1:Bit. */
570 cc = ARMcc_AL;
571 if (guard) {
572 if (guard->tag == Iex_Const
573 && guard->Iex.Const.con->tag == Ico_U1
574 && guard->Iex.Const.con->Ico.U1 == True) {
575 /* unconditional -- do nothing */
576 } else {
577 cc = iselCondCode( env, guard );
578 }
579 }
580
581 /* Move the args to their final destinations. */
582 for (i = 0; i < nextArgReg; i++) {
583 if (tmpregs[i] == INVALID_HREG) { // Skip invalid regs
584 addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
585 continue;
586 }
587 /* None of these insns, including any spill code that might
588 be generated, may alter the condition codes. */
589 addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
cerioncee30312004-12-17 20:30:21 +0000590 }
591
592 }
593
sewardj6c299f32009-12-31 18:00:12 +0000594 /* Should be assured by checks above */
595 vassert(nextArgReg <= ARM_N_ARGREGS);
cerioncee30312004-12-17 20:30:21 +0000596
sewardj6c299f32009-12-31 18:00:12 +0000597 target = (HWord)Ptr_to_ULong(cee->addr);
cerioncee30312004-12-17 20:30:21 +0000598
sewardj6c299f32009-12-31 18:00:12 +0000599 /* nextArgReg doles out argument registers. Since these are
600 assigned in the order r0, r1, r2, r3, its numeric value at this
601 point, which must be between 0 and 4 inclusive, is going to be
602 equal to the number of arg regs in use for the call. Hence bake
603 that number into the call (we'll need to know it when doing
604 register allocation, to know what regs the call reads.)
cerioncee30312004-12-17 20:30:21 +0000605
sewardj6c299f32009-12-31 18:00:12 +0000606 There is a bit of a twist -- harmless but worth recording.
607 Suppose the arg types are (Ity_I32, Ity_I64). Then we will have
608 the first arg in r0 and the second in r3:r2, but r1 isn't used.
609 We nevertheless have nextArgReg==4 and bake that into the call
610 instruction. This will mean the register allocator wil believe
611 this insn reads r1 when in fact it doesn't. But that's
612 harmless; it just artificially extends the live range of r1
613 unnecessarily. The best fix would be to put into the
614 instruction, a bitmask indicating which of r0/1/2/3 carry live
615 values. But that's too much hassle. */
cerioncee30312004-12-17 20:30:21 +0000616
sewardj6c299f32009-12-31 18:00:12 +0000617 /* Finally, the call itself. */
618 addInstr(env, ARMInstr_Call( cc, target, nextArgReg ));
cerioncee30312004-12-17 20:30:21 +0000619
sewardj6c299f32009-12-31 18:00:12 +0000620 return True; /* success */
cerioncee30312004-12-17 20:30:21 +0000621}
622
623
624/*---------------------------------------------------------*/
sewardj6c299f32009-12-31 18:00:12 +0000625/*--- ISEL: Integer expressions (32/16/8 bit) ---*/
cerioncee30312004-12-17 20:30:21 +0000626/*---------------------------------------------------------*/
627
sewardj6c299f32009-12-31 18:00:12 +0000628/* Select insns for an integer-typed expression, and add them to the
629 code list. Return a reg holding the result. This reg will be a
630 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
631 want to modify it, ask for a new vreg, copy it in there, and modify
632 the copy. The register allocator will do its best to map both
633 vregs to the same real register, so the copies will often disappear
634 later in the game.
cerioncee30312004-12-17 20:30:21 +0000635
sewardj6c299f32009-12-31 18:00:12 +0000636 This should handle expressions of 32, 16 and 8-bit type. All
637 results are returned in a 32-bit register. For 16- and 8-bit
638 expressions, the upper 16/24 bits are arbitrary, so you should mask
639 or sign extend partial values if necessary.
cerioncee30312004-12-17 20:30:21 +0000640*/
641
sewardj6c299f32009-12-31 18:00:12 +0000642/* --------------------- AMode1 --------------------- */
643
644/* Return an AMode1 which computes the value of the specified
645 expression, possibly also adding insns to the code list as a
646 result. The expression may only be a 32-bit one.
647*/
cerioncee30312004-12-17 20:30:21 +0000648
649static Bool sane_AMode1 ( ARMAMode1* am )
650{
sewardj6c299f32009-12-31 18:00:12 +0000651 switch (am->tag) {
652 case ARMam1_RI:
653 return
654 toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
655 && (hregIsVirtual(am->ARMam1.RI.reg)
656 || am->ARMam1.RI.reg == hregARM_R8())
657 && am->ARMam1.RI.simm13 >= -4095
658 && am->ARMam1.RI.simm13 <= 4095 );
659 case ARMam1_RRS:
660 return
661 toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
662 && hregIsVirtual(am->ARMam1.RRS.base)
663 && hregClass(am->ARMam1.RRS.index) == HRcInt32
664 && hregIsVirtual(am->ARMam1.RRS.index)
665 && am->ARMam1.RRS.shift >= 0
666 && am->ARMam1.RRS.shift <= 3 );
667 default:
668 vpanic("sane_AMode: unknown ARM AMode1 tag");
669 }
cerioncee30312004-12-17 20:30:21 +0000670}
671
672static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
673{
sewardj6c299f32009-12-31 18:00:12 +0000674 ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
675 vassert(sane_AMode1(am));
676 return am;
cerioncee30312004-12-17 20:30:21 +0000677}
678
cerioncee30312004-12-17 20:30:21 +0000679static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
680{
sewardj6c299f32009-12-31 18:00:12 +0000681 IRType ty = typeOfIRExpr(env->type_env,e);
682 vassert(ty == Ity_I32);
cerioncee30312004-12-17 20:30:21 +0000683
sewardj6c299f32009-12-31 18:00:12 +0000684 /* FIXME: add RRS matching */
cerioncee30312004-12-17 20:30:21 +0000685
sewardj6c299f32009-12-31 18:00:12 +0000686 /* {Add32,Sub32}(expr,simm13) */
687 if (e->tag == Iex_Binop
688 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
689 && e->Iex.Binop.arg2->tag == Iex_Const
690 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
691 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
692 if (simm >= -4095 && simm <= 4095) {
693 HReg reg;
694 if (e->Iex.Binop.op == Iop_Sub32)
695 simm = -simm;
696 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
697 return ARMAMode1_RI(reg, simm);
698 }
699 }
cerioncee30312004-12-17 20:30:21 +0000700
sewardj6c299f32009-12-31 18:00:12 +0000701 /* Doesn't match anything in particular. Generate it into
702 a register and use that. */
703 {
704 HReg reg = iselIntExpr_R(env, e);
705 return ARMAMode1_RI(reg, 0);
706 }
707
cerioncee30312004-12-17 20:30:21 +0000708}
709
710
sewardj6c299f32009-12-31 18:00:12 +0000711/* --------------------- AMode2 --------------------- */
cerioncee30312004-12-17 20:30:21 +0000712
sewardj6c299f32009-12-31 18:00:12 +0000713/* Return an AMode2 which computes the value of the specified
714 expression, possibly also adding insns to the code list as a
715 result. The expression may only be a 32-bit one.
716*/
cerioncee30312004-12-17 20:30:21 +0000717
718static Bool sane_AMode2 ( ARMAMode2* am )
719{
720 switch (am->tag) {
sewardj6c299f32009-12-31 18:00:12 +0000721 case ARMam2_RI:
722 return
723 toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
724 && hregIsVirtual(am->ARMam2.RI.reg)
725 && am->ARMam2.RI.simm9 >= -255
726 && am->ARMam2.RI.simm9 <= 255 );
727 case ARMam2_RR:
728 return
729 toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
730 && hregIsVirtual(am->ARMam2.RR.base)
731 && hregClass(am->ARMam2.RR.index) == HRcInt32
732 && hregIsVirtual(am->ARMam2.RR.index) );
733 default:
734 vpanic("sane_AMode: unknown ARM AMode2 tag");
cerioncee30312004-12-17 20:30:21 +0000735 }
736}
737
sewardj6c299f32009-12-31 18:00:12 +0000738static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
739{
740 ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
741 vassert(sane_AMode2(am));
742 return am;
743}
744
cerioncee30312004-12-17 20:30:21 +0000745static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
746{
sewardj6c299f32009-12-31 18:00:12 +0000747 IRType ty = typeOfIRExpr(env->type_env,e);
748 vassert(ty == Ity_I32);
749
750 /* FIXME: add RR matching */
751
752 /* {Add32,Sub32}(expr,simm8) */
753 if (e->tag == Iex_Binop
754 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
755 && e->Iex.Binop.arg2->tag == Iex_Const
756 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
757 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
758 if (simm >= -255 && simm <= 255) {
759 HReg reg;
760 if (e->Iex.Binop.op == Iop_Sub32)
761 simm = -simm;
762 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
763 return ARMAMode2_RI(reg, simm);
764 }
765 }
766
767 /* Doesn't match anything in particular. Generate it into
768 a register and use that. */
769 {
770 HReg reg = iselIntExpr_R(env, e);
771 return ARMAMode2_RI(reg, 0);
772 }
773
cerioncee30312004-12-17 20:30:21 +0000774}
sewardj6c299f32009-12-31 18:00:12 +0000775
776
777/* --------------------- AModeV --------------------- */
778
779/* Return an AModeV which computes the value of the specified
780 expression, possibly also adding insns to the code list as a
781 result. The expression may only be a 32-bit one.
sewardj48b279b2007-11-16 12:43:32 +0000782*/
cerioncee30312004-12-17 20:30:21 +0000783
sewardj6c299f32009-12-31 18:00:12 +0000784static Bool sane_AModeV ( ARMAModeV* am )
785{
786 return toBool( hregClass(am->reg) == HRcInt32
787 && hregIsVirtual(am->reg)
788 && am->simm11 >= -1020 && am->simm11 <= 1020
789 && 0 == (am->simm11 & 3) );
cerioncee30312004-12-17 20:30:21 +0000790}
791
sewardj6c299f32009-12-31 18:00:12 +0000792static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
cerioncee30312004-12-17 20:30:21 +0000793{
sewardj6c299f32009-12-31 18:00:12 +0000794 ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
795 vassert(sane_AModeV(am));
796 return am;
797}
798
799static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
800{
801 IRType ty = typeOfIRExpr(env->type_env,e);
802 vassert(ty == Ity_I32);
803
804 /* {Add32,Sub32}(expr, simm8 << 2) */
805 if (e->tag == Iex_Binop
806 && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
807 && e->Iex.Binop.arg2->tag == Iex_Const
808 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
809 Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
810 if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
811 HReg reg;
812 if (e->Iex.Binop.op == Iop_Sub32)
813 simm = -simm;
814 reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
815 return mkARMAModeV(reg, simm);
816 }
cerioncee30312004-12-17 20:30:21 +0000817 }
sewardj6c299f32009-12-31 18:00:12 +0000818
819 /* Doesn't match anything in particular. Generate it into
820 a register and use that. */
821 {
822 HReg reg = iselIntExpr_R(env, e);
823 return mkARMAModeV(reg, 0);
824 }
825
cerioncee30312004-12-17 20:30:21 +0000826}
827
sewardj6c60b322010-08-22 12:48:28 +0000828/* -------------------- AModeN -------------------- */
829
830static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
831{
832 return iselIntExpr_AModeN_wrk(env, e);
833}
834
835static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
836{
837 HReg reg = iselIntExpr_R(env, e);
838 return mkARMAModeN_R(reg);
839}
840
sewardj6c299f32009-12-31 18:00:12 +0000841
842/* --------------------- RI84 --------------------- */
843
844/* Select instructions to generate 'e' into a RI84. If mayInv is
845 true, then the caller will also accept an I84 form that denotes
846 'not e'. In this case didInv may not be NULL, and *didInv is set
847 to True. This complication is so as to allow generation of an RI84
848 which is suitable for use in either an AND or BIC instruction,
849 without knowing (before this call) which one.
850*/
851static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
852 ISelEnv* env, IRExpr* e )
cerioncee30312004-12-17 20:30:21 +0000853{
sewardj6c299f32009-12-31 18:00:12 +0000854 ARMRI84* ri;
855 if (mayInv)
856 vassert(didInv != NULL);
857 ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
858 /* sanity checks ... */
859 switch (ri->tag) {
860 case ARMri84_I84:
861 return ri;
862 case ARMri84_R:
863 vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
864 vassert(hregIsVirtual(ri->ARMri84.R.reg));
865 return ri;
866 default:
867 vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
cerioncee30312004-12-17 20:30:21 +0000868 }
869}
870
871/* DO NOT CALL THIS DIRECTLY ! */
sewardj6c299f32009-12-31 18:00:12 +0000872static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
873 ISelEnv* env, IRExpr* e )
cerioncee30312004-12-17 20:30:21 +0000874{
sewardj6c299f32009-12-31 18:00:12 +0000875 IRType ty = typeOfIRExpr(env->type_env,e);
876 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
cerioncee30312004-12-17 20:30:21 +0000877
sewardj6c299f32009-12-31 18:00:12 +0000878 if (didInv) *didInv = False;
879
880 /* special case: immediate */
881 if (e->tag == Iex_Const) {
882 UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
883 switch (e->Iex.Const.con->tag) {
884 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
885 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
886 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
887 default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
888 }
889 if (fitsIn8x4(&u8, &u4, u)) {
890 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
891 }
892 if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
893 vassert(didInv);
894 *didInv = True;
895 return ARMRI84_I84( (UShort)u8, (UShort)u4 );
896 }
897 /* else fail, fall through to default case */
898 }
899
900 /* default case: calculate into a register and return that */
901 {
902 HReg r = iselIntExpr_R ( env, e );
903 return ARMRI84_R(r);
904 }
cerioncee30312004-12-17 20:30:21 +0000905}
906
907
sewardj6c299f32009-12-31 18:00:12 +0000908/* --------------------- RI5 --------------------- */
909
910/* Select instructions to generate 'e' into a RI5. */
911
912static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
913{
914 ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
915 /* sanity checks ... */
916 switch (ri->tag) {
917 case ARMri5_I5:
918 return ri;
919 case ARMri5_R:
920 vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
921 vassert(hregIsVirtual(ri->ARMri5.R.reg));
922 return ri;
923 default:
924 vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
925 }
926}
927
928/* DO NOT CALL THIS DIRECTLY ! */
929static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
930{
931 IRType ty = typeOfIRExpr(env->type_env,e);
932 vassert(ty == Ity_I32 || ty == Ity_I8);
933
934 /* special case: immediate */
935 if (e->tag == Iex_Const) {
936 UInt u; /* both invalid */
937 switch (e->Iex.Const.con->tag) {
938 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
939 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
940 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
941 default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
942 }
943 if (u >= 1 && u <= 31) {
944 return ARMRI5_I5(u);
945 }
946 /* else fail, fall through to default case */
947 }
948
949 /* default case: calculate into a register and return that */
950 {
951 HReg r = iselIntExpr_R ( env, e );
952 return ARMRI5_R(r);
953 }
954}
cerioncee30312004-12-17 20:30:21 +0000955
956
sewardj6c299f32009-12-31 18:00:12 +0000957/* ------------------- CondCode ------------------- */
cerioncee30312004-12-17 20:30:21 +0000958
959/* Generate code to evaluated a bit-typed expression, returning the
960 condition code which would correspond when the expression would
961 notionally have returned 1. */
962
963static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
964{
sewardj6c299f32009-12-31 18:00:12 +0000965 ARMCondCode cc = iselCondCode_wrk(env,e);
sewardj6c60b322010-08-22 12:48:28 +0000966 vassert(cc != ARMcc_NV);
sewardj6c299f32009-12-31 18:00:12 +0000967 return cc;
cerioncee30312004-12-17 20:30:21 +0000968}
969
cerioncee30312004-12-17 20:30:21 +0000970static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
971{
sewardj6c299f32009-12-31 18:00:12 +0000972 vassert(e);
973 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
cerioncee30312004-12-17 20:30:21 +0000974
sewardj6c299f32009-12-31 18:00:12 +0000975 /* var */
976 if (e->tag == Iex_RdTmp) {
977 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
978 /* CmpOrTst doesn't modify rTmp; so this is OK. */
979 ARMRI84* one = ARMRI84_I84(1,0);
980 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
981 return ARMcc_NE;
982 }
983
984 /* Not1(e) */
985 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
986 /* Generate code for the arg, and negate the test condition */
987 return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
988 }
989
990 /* --- patterns rooted at: 32to1 --- */
991
992 if (e->tag == Iex_Unop
993 && e->Iex.Unop.op == Iop_32to1) {
994 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
995 ARMRI84* one = ARMRI84_I84(1,0);
996 addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
997 return ARMcc_NE;
998 }
999
1000 /* --- patterns rooted at: CmpNEZ8 --- */
1001
1002 if (e->tag == Iex_Unop
1003 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1004 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1005 ARMRI84* xFF = ARMRI84_I84(0xFF,0);
1006 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
1007 return ARMcc_NE;
1008 }
1009
1010 /* --- patterns rooted at: CmpNEZ32 --- */
1011
1012 if (e->tag == Iex_Unop
1013 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1014 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1015 ARMRI84* zero = ARMRI84_I84(0,0);
1016 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
1017 return ARMcc_NE;
1018 }
1019
1020 /* --- patterns rooted at: CmpNEZ64 --- */
1021
1022 if (e->tag == Iex_Unop
1023 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1024 HReg tHi, tLo;
1025 HReg tmp = newVRegI(env);
1026 ARMRI84* zero = ARMRI84_I84(0,0);
1027 iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
1028 addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
1029 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
1030 return ARMcc_NE;
1031 }
1032
1033 /* --- Cmp*32*(x,y) --- */
1034 if (e->tag == Iex_Binop
1035 && (e->Iex.Binop.op == Iop_CmpEQ32
1036 || e->Iex.Binop.op == Iop_CmpNE32
1037 || e->Iex.Binop.op == Iop_CmpLT32S
1038 || e->Iex.Binop.op == Iop_CmpLT32U
1039 || e->Iex.Binop.op == Iop_CmpLE32S
1040 || e->Iex.Binop.op == Iop_CmpLE32U)) {
1041 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1042 ARMRI84* argR = iselIntExpr_RI84(NULL,False,
1043 env, e->Iex.Binop.arg2);
1044 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
1045 switch (e->Iex.Binop.op) {
1046 case Iop_CmpEQ32: return ARMcc_EQ;
1047 case Iop_CmpNE32: return ARMcc_NE;
1048 case Iop_CmpLT32S: return ARMcc_LT;
1049 case Iop_CmpLT32U: return ARMcc_LO;
1050 case Iop_CmpLE32S: return ARMcc_LE;
1051 case Iop_CmpLE32U: return ARMcc_LS;
1052 default: vpanic("iselCondCode(arm): CmpXX32");
1053 }
1054 }
1055
sewardj6c60b322010-08-22 12:48:28 +00001056 /* --- CasCmpEQ* --- */
1057 /* Ist_Cas has a dummy argument to compare with, so comparison is
1058 always true. */
1059 if (e->tag == Iex_Binop
1060 && (e->Iex.Binop.op == Iop_CasCmpEQ32
1061 || e->Iex.Binop.op == Iop_CasCmpEQ16
1062 || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1063 return ARMcc_AL;
1064 }
1065
sewardj6c299f32009-12-31 18:00:12 +00001066 ppIRExpr(e);
1067 vpanic("iselCondCode");
cerioncee30312004-12-17 20:30:21 +00001068}
1069
1070
sewardj6c299f32009-12-31 18:00:12 +00001071/* --------------------- Reg --------------------- */
cerioncee30312004-12-17 20:30:21 +00001072
1073static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1074{
sewardj6c299f32009-12-31 18:00:12 +00001075 HReg r = iselIntExpr_R_wrk(env, e);
1076 /* sanity checks ... */
1077# if 0
1078 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1079# endif
1080 vassert(hregClass(r) == HRcInt32);
1081 vassert(hregIsVirtual(r));
1082 return r;
cerioncee30312004-12-17 20:30:21 +00001083}
1084
1085/* DO NOT CALL THIS DIRECTLY ! */
1086static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1087{
sewardj6c299f32009-12-31 18:00:12 +00001088 IRType ty = typeOfIRExpr(env->type_env,e);
1089 vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
sewardj6c60b322010-08-22 12:48:28 +00001090// vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
sewardj6c299f32009-12-31 18:00:12 +00001091
1092 switch (e->tag) {
1093
1094 /* --------- TEMP --------- */
1095 case Iex_RdTmp: {
1096 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1097 }
1098
1099 /* --------- LOAD --------- */
1100 case Iex_Load: {
1101 HReg dst = newVRegI(env);
1102
1103 if (e->Iex.Load.end != Iend_LE)
1104 goto irreducible;
1105
1106 if (ty == Ity_I32) {
1107 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1108 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, dst, amode));
1109 return dst;
1110 }
1111 if (ty == Ity_I16) {
1112 ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
1113 addInstr(env, ARMInstr_LdSt16(True/*isLoad*/, False/*!signedLoad*/,
1114 dst, amode));
1115 return dst;
1116 }
1117 if (ty == Ity_I8) {
1118 ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1119 addInstr(env, ARMInstr_LdSt8U(True/*isLoad*/, dst, amode));
1120 return dst;
1121 }
1122
1123//zz if (ty == Ity_I16) {
1124//zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1125//zz return dst;
1126//zz }
1127//zz if (ty == Ity_I8) {
1128//zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1129//zz return dst;
1130//zz }
1131 break;
1132 }
1133
1134//zz /* --------- TERNARY OP --------- */
1135//zz case Iex_Triop: {
1136//zz /* C3210 flags following FPU partial remainder (fprem), both
1137//zz IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1138//zz if (e->Iex.Triop.op == Iop_PRemC3210F64
1139//zz || e->Iex.Triop.op == Iop_PRem1C3210F64) {
1140//zz HReg junk = newVRegF(env);
1141//zz HReg dst = newVRegI(env);
1142//zz HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2);
1143//zz HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3);
1144//zz /* XXXROUNDINGFIXME */
1145//zz /* set roundingmode here */
1146//zz addInstr(env, X86Instr_FpBinary(
1147//zz e->Iex.Binop.op==Iop_PRemC3210F64
1148//zz ? Xfp_PREM : Xfp_PREM1,
1149//zz srcL,srcR,junk
1150//zz ));
1151//zz /* The previous pseudo-insn will have left the FPU's C3210
1152//zz flags set correctly. So bag them. */
1153//zz addInstr(env, X86Instr_FpStSW_AX());
1154//zz addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1155//zz addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1156//zz return dst;
1157//zz }
1158//zz
1159//zz break;
1160//zz }
1161
1162 /* --------- BINARY OP --------- */
1163 case Iex_Binop: {
1164
1165 ARMAluOp aop = 0; /* invalid */
1166 ARMShiftOp sop = 0; /* invalid */
1167
1168 /* ADD/SUB/AND/OR/XOR */
1169 switch (e->Iex.Binop.op) {
1170 case Iop_And32: {
1171 Bool didInv = False;
1172 HReg dst = newVRegI(env);
1173 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1174 ARMRI84* argR = iselIntExpr_RI84(&didInv, True/*mayInv*/,
1175 env, e->Iex.Binop.arg2);
1176 addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
1177 dst, argL, argR));
1178 return dst;
1179 }
1180 case Iop_Or32: aop = ARMalu_OR; goto std_binop;
1181 case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
1182 case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
1183 case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
1184 std_binop: {
1185 HReg dst = newVRegI(env);
1186 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1187 ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
1188 env, e->Iex.Binop.arg2);
1189 addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
1190 return dst;
1191 }
1192 default: break;
1193 }
1194
1195 /* SHL/SHR/SAR */
1196 switch (e->Iex.Binop.op) {
1197 case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
1198 case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
1199 case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
1200 sh_binop: {
1201 HReg dst = newVRegI(env);
1202 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1203 ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
1204 addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
1205 vassert(ty == Ity_I32); /* else the IR is ill-typed */
1206 return dst;
1207 }
1208 default: break;
1209 }
1210
1211 /* MUL */
1212 if (e->Iex.Binop.op == Iop_Mul32) {
1213 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1214 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1215 HReg dst = newVRegI(env);
1216 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1217 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1218 addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
1219 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1220 return dst;
1221 }
1222
1223 /* Handle misc other ops. */
1224
1225 if (e->Iex.Binop.op == Iop_Max32U) {
1226 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1227 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1228 HReg dst = newVRegI(env);
sewardj6c60b322010-08-22 12:48:28 +00001229 addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
1230 ARMRI84_R(argR)));
sewardj6c299f32009-12-31 18:00:12 +00001231 addInstr(env, mk_iMOVds_RR(dst, argL));
1232 addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
1233 return dst;
1234 }
1235
1236 if (e->Iex.Binop.op == Iop_CmpF64) {
1237 HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
1238 HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
1239 HReg dst = newVRegI(env);
1240 /* Do the compare (FCMPD) and set NZCV in FPSCR. Then also do
1241 FMSTAT, so we can examine the results directly. */
1242 addInstr(env, ARMInstr_VCmpD(dL, dR));
1243 /* Create in dst, the IRCmpF64Result encoded result. */
1244 addInstr(env, ARMInstr_Imm32(dst, 0));
1245 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
1246 addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
1247 addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
1248 addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
1249 return dst;
1250 }
1251
1252 if (e->Iex.Binop.op == Iop_F64toI32S
1253 || e->Iex.Binop.op == Iop_F64toI32U) {
1254 /* Wretched uglyness all round, due to having to deal
1255 with rounding modes. Oh well. */
1256 /* FIXME: if arg1 is a constant indicating round-to-zero,
1257 then we could skip all this arsing around with FPSCR and
1258 simply emit FTO{S,U}IZD. */
1259 Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
1260 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
1261 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
1262 /* FTO{S,U}ID valF, valD */
1263 HReg valF = newVRegF(env);
1264 addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
1265 valF, valD));
1266 set_VFP_rounding_default(env);
1267 /* VMOV dst, valF */
1268 HReg dst = newVRegI(env);
1269 addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
1270 return dst;
1271 }
1272
sewardj6c60b322010-08-22 12:48:28 +00001273 if (e->Iex.Binop.op == Iop_GetElem8x8
1274 || e->Iex.Binop.op == Iop_GetElem16x4
1275 || e->Iex.Binop.op == Iop_GetElem32x2) {
1276 HReg res = newVRegI(env);
1277 HReg arg = iselNeon64Expr(env, e->Iex.Triop.arg1);
1278 UInt index, size;
1279 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1280 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1281 vpanic("ARM target supports GetElem with constant "
1282 "second argument only\n");
1283 }
1284 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1285 switch (e->Iex.Binop.op) {
1286 case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1287 case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1288 case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1289 default: vassert(0);
1290 }
1291 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1292 mkARMNRS(ARMNRS_Reg, res, 0),
1293 mkARMNRS(ARMNRS_Scalar, arg, index),
1294 size, False));
1295 return res;
1296 }
1297
1298 if (e->Iex.Binop.op == Iop_GetElem8x16
1299 || e->Iex.Binop.op == Iop_GetElem16x8
1300 || e->Iex.Binop.op == Iop_GetElem32x4) {
1301 HReg res = newVRegI(env);
1302 HReg arg = iselNeonExpr(env, e->Iex.Triop.arg1);
1303 UInt index, size;
1304 if (e->Iex.Binop.arg2->tag != Iex_Const ||
1305 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1306 vpanic("ARM target supports GetElem with constant "
1307 "second argument only\n");
1308 }
1309 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1310 switch (e->Iex.Binop.op) {
1311 case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1312 case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1313 case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1314 default: vassert(0);
1315 }
1316 addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1317 mkARMNRS(ARMNRS_Reg, res, 0),
1318 mkARMNRS(ARMNRS_Scalar, arg, index),
1319 size, True));
1320 return res;
1321 }
1322
sewardje2ea1762010-09-22 00:56:37 +00001323 /* All cases involving host-side helper calls. */
1324 void* fn = NULL;
1325 switch (e->Iex.Binop.op) {
1326 case Iop_Add16x2:
1327 fn = &h_generic_calc_Add16x2; break;
1328 case Iop_Sub16x2:
1329 fn = &h_generic_calc_Sub16x2; break;
1330 case Iop_HAdd16Ux2:
1331 fn = &h_generic_calc_HAdd16Ux2; break;
1332 case Iop_HAdd16Sx2:
1333 fn = &h_generic_calc_HAdd16Sx2; break;
1334 case Iop_HSub16Ux2:
1335 fn = &h_generic_calc_HSub16Ux2; break;
1336 case Iop_HSub16Sx2:
1337 fn = &h_generic_calc_HSub16Sx2; break;
1338 case Iop_QAdd16Sx2:
1339 fn = &h_generic_calc_QAdd16Sx2; break;
1340 case Iop_QSub16Sx2:
1341 fn = &h_generic_calc_QSub16Sx2; break;
1342 case Iop_Add8x4:
1343 fn = &h_generic_calc_Add8x4; break;
1344 case Iop_Sub8x4:
1345 fn = &h_generic_calc_Sub8x4; break;
1346 case Iop_HAdd8Ux4:
1347 fn = &h_generic_calc_HAdd8Ux4; break;
1348 case Iop_HAdd8Sx4:
1349 fn = &h_generic_calc_HAdd8Sx4; break;
1350 case Iop_HSub8Ux4:
1351 fn = &h_generic_calc_HSub8Ux4; break;
1352 case Iop_HSub8Sx4:
1353 fn = &h_generic_calc_HSub8Sx4; break;
1354 case Iop_QAdd8Sx4:
1355 fn = &h_generic_calc_QAdd8Sx4; break;
1356 case Iop_QAdd8Ux4:
1357 fn = &h_generic_calc_QAdd8Ux4; break;
1358 case Iop_QSub8Sx4:
1359 fn = &h_generic_calc_QSub8Sx4; break;
1360 case Iop_QSub8Ux4:
1361 fn = &h_generic_calc_QSub8Ux4; break;
sewardj310d6b22010-10-18 16:29:40 +00001362 case Iop_Sad8Ux4:
1363 fn = &h_generic_calc_Sad8Ux4; break;
sewardje2ea1762010-09-22 00:56:37 +00001364 default:
1365 break;
1366 }
1367
1368 if (fn) {
1369 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1370 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1371 HReg res = newVRegI(env);
1372 addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
1373 addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
1374 addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 2 ));
1375 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1376 return res;
1377 }
1378
sewardj6c299f32009-12-31 18:00:12 +00001379 break;
1380 }
1381
1382 /* --------- UNARY OP --------- */
1383 case Iex_Unop: {
1384
1385//zz /* 1Uto8(32to1(expr32)) */
1386//zz if (e->Iex.Unop.op == Iop_1Uto8) {
1387//zz DECLARE_PATTERN(p_32to1_then_1Uto8);
1388//zz DEFINE_PATTERN(p_32to1_then_1Uto8,
1389//zz unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1390//zz if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1391//zz IRExpr* expr32 = mi.bindee[0];
1392//zz HReg dst = newVRegI(env);
1393//zz HReg src = iselIntExpr_R(env, expr32);
1394//zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1395//zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1396//zz X86RMI_Imm(1), dst));
1397//zz return dst;
1398//zz }
1399//zz }
1400//zz
1401//zz /* 8Uto32(LDle(expr32)) */
1402//zz if (e->Iex.Unop.op == Iop_8Uto32) {
1403//zz DECLARE_PATTERN(p_LDle8_then_8Uto32);
1404//zz DEFINE_PATTERN(p_LDle8_then_8Uto32,
1405//zz unop(Iop_8Uto32,
1406//zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1407//zz if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1408//zz HReg dst = newVRegI(env);
1409//zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1410//zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1411//zz return dst;
1412//zz }
1413//zz }
1414//zz
1415//zz /* 8Sto32(LDle(expr32)) */
1416//zz if (e->Iex.Unop.op == Iop_8Sto32) {
1417//zz DECLARE_PATTERN(p_LDle8_then_8Sto32);
1418//zz DEFINE_PATTERN(p_LDle8_then_8Sto32,
1419//zz unop(Iop_8Sto32,
1420//zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1421//zz if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1422//zz HReg dst = newVRegI(env);
1423//zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1424//zz addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1425//zz return dst;
1426//zz }
1427//zz }
1428//zz
1429//zz /* 16Uto32(LDle(expr32)) */
1430//zz if (e->Iex.Unop.op == Iop_16Uto32) {
1431//zz DECLARE_PATTERN(p_LDle16_then_16Uto32);
1432//zz DEFINE_PATTERN(p_LDle16_then_16Uto32,
1433//zz unop(Iop_16Uto32,
1434//zz IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1435//zz if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1436//zz HReg dst = newVRegI(env);
1437//zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1438//zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1439//zz return dst;
1440//zz }
1441//zz }
1442//zz
1443//zz /* 8Uto32(GET:I8) */
1444//zz if (e->Iex.Unop.op == Iop_8Uto32) {
1445//zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1446//zz HReg dst;
1447//zz X86AMode* amode;
1448//zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1449//zz dst = newVRegI(env);
1450//zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1451//zz hregX86_EBP());
1452//zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1453//zz return dst;
1454//zz }
1455//zz }
1456//zz
1457//zz /* 16to32(GET:I16) */
1458//zz if (e->Iex.Unop.op == Iop_16Uto32) {
1459//zz if (e->Iex.Unop.arg->tag == Iex_Get) {
1460//zz HReg dst;
1461//zz X86AMode* amode;
1462//zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1463//zz dst = newVRegI(env);
1464//zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1465//zz hregX86_EBP());
1466//zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1467//zz return dst;
1468//zz }
1469//zz }
1470
1471 switch (e->Iex.Unop.op) {
1472 case Iop_8Uto32: {
1473 HReg dst = newVRegI(env);
1474 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1475 addInstr(env, ARMInstr_Alu(ARMalu_AND,
1476 dst, src, ARMRI84_I84(0xFF,0)));
1477 return dst;
1478 }
1479//zz case Iop_8Uto16:
1480//zz case Iop_8Uto32:
1481//zz case Iop_16Uto32: {
1482//zz HReg dst = newVRegI(env);
1483//zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1484//zz UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1485//zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1486//zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
1487//zz X86RMI_Imm(mask), dst));
1488//zz return dst;
1489//zz }
1490//zz case Iop_8Sto16:
1491//zz case Iop_8Sto32:
1492 case Iop_16Uto32: {
1493 HReg dst = newVRegI(env);
1494 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1495 ARMRI5* amt = ARMRI5_I5(16);
1496 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1497 addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
1498 return dst;
1499 }
1500 case Iop_8Sto32:
1501 case Iop_16Sto32: {
1502 HReg dst = newVRegI(env);
1503 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1504 ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
1505 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1506 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1507 return dst;
1508 }
1509//zz case Iop_Not8:
1510//zz case Iop_Not16:
1511 case Iop_Not32: {
1512 HReg dst = newVRegI(env);
1513 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1514 addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
1515 return dst;
1516 }
1517 case Iop_64HIto32: {
1518 HReg rHi, rLo;
1519 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1520 return rHi; /* and abandon rLo .. poor wee thing :-) */
1521 }
1522 case Iop_64to32: {
1523 HReg rHi, rLo;
1524 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1525 return rLo; /* similar stupid comment to the above ... */
1526 }
sewardj6c60b322010-08-22 12:48:28 +00001527 case Iop_64to8: {
1528 HReg rHi, rLo;
sewardjc6f970f2012-04-02 21:54:49 +00001529 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00001530 HReg tHi = newVRegI(env);
1531 HReg tLo = newVRegI(env);
1532 HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
1533 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1534 rHi = tHi;
1535 rLo = tLo;
1536 } else {
1537 iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1538 }
1539 return rLo;
1540 }
sewardj6c299f32009-12-31 18:00:12 +00001541//zz case Iop_16HIto8:
1542//zz case Iop_32HIto16: {
1543//zz HReg dst = newVRegI(env);
1544//zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1545//zz Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1546//zz addInstr(env, mk_iMOVsd_RR(src,dst) );
1547//zz addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1548//zz return dst;
1549//zz }
1550 case Iop_1Uto32:
1551 case Iop_1Uto8: {
1552 HReg dst = newVRegI(env);
1553 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1554 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1555 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1556 return dst;
1557 }
1558
1559 case Iop_1Sto32: {
1560 HReg dst = newVRegI(env);
1561 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1562 ARMRI5* amt = ARMRI5_I5(31);
1563 /* This is really rough. We could do much better here;
1564 perhaps mvn{cond} dst, #0 as the second insn?
1565 (same applies to 1Sto64) */
1566 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1567 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1568 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1569 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1570 return dst;
1571 }
1572
1573
1574//zz case Iop_1Sto8:
1575//zz case Iop_1Sto16:
1576//zz case Iop_1Sto32: {
1577//zz /* could do better than this, but for now ... */
1578//zz HReg dst = newVRegI(env);
1579//zz X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1580//zz addInstr(env, X86Instr_Set32(cond,dst));
1581//zz addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1582//zz addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1583//zz return dst;
1584//zz }
1585//zz case Iop_Ctz32: {
1586//zz /* Count trailing zeroes, implemented by x86 'bsfl' */
1587//zz HReg dst = newVRegI(env);
1588//zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1589//zz addInstr(env, X86Instr_Bsfr32(True,src,dst));
1590//zz return dst;
1591//zz }
1592 case Iop_Clz32: {
1593 /* Count leading zeroes; easy on ARM. */
1594 HReg dst = newVRegI(env);
1595 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1596 addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
1597 return dst;
1598 }
1599
1600 case Iop_CmpwNEZ32: {
1601 HReg dst = newVRegI(env);
1602 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1603 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1604 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1605 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
1606 return dst;
1607 }
1608
1609 case Iop_Left32: {
1610 HReg dst = newVRegI(env);
1611 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1612 addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1613 addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1614 return dst;
1615 }
1616
1617//zz case Iop_V128to32: {
1618//zz HReg dst = newVRegI(env);
1619//zz HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
1620//zz X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1621//zz sub_from_esp(env, 16);
1622//zz addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1623//zz addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1624//zz add_to_esp(env, 16);
1625//zz return dst;
1626//zz }
1627//zz
1628 case Iop_ReinterpF32asI32: {
1629 HReg dst = newVRegI(env);
1630 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1631 addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
1632 return dst;
1633 }
1634
1635//zz
1636//zz case Iop_16to8:
1637 case Iop_32to8:
1638 case Iop_32to16:
1639 /* These are no-ops. */
1640 return iselIntExpr_R(env, e->Iex.Unop.arg);
1641
sewardj6c60b322010-08-22 12:48:28 +00001642 default:
sewardj6c299f32009-12-31 18:00:12 +00001643 break;
1644 }
sewardje2ea1762010-09-22 00:56:37 +00001645
1646 /* All Unop cases involving host-side helper calls. */
1647 void* fn = NULL;
1648 switch (e->Iex.Unop.op) {
1649 case Iop_CmpNEZ16x2:
1650 fn = &h_generic_calc_CmpNEZ16x2; break;
1651 case Iop_CmpNEZ8x4:
1652 fn = &h_generic_calc_CmpNEZ8x4; break;
1653 default:
1654 break;
1655 }
1656
1657 if (fn) {
1658 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1659 HReg res = newVRegI(env);
1660 addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
1661 addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 1 ));
1662 addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1663 return res;
1664 }
1665
sewardj6c299f32009-12-31 18:00:12 +00001666 break;
1667 }
1668
1669 /* --------- GET --------- */
1670 case Iex_Get: {
1671 if (ty == Ity_I32
1672 && 0 == (e->Iex.Get.offset & 3)
1673 && e->Iex.Get.offset < 4096-4) {
1674 HReg dst = newVRegI(env);
1675 addInstr(env, ARMInstr_LdSt32(
1676 True/*isLoad*/,
1677 dst,
1678 ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
1679 return dst;
1680 }
1681//zz if (ty == Ity_I8 || ty == Ity_I16) {
1682//zz HReg dst = newVRegI(env);
1683//zz addInstr(env, X86Instr_LoadEX(
1684//zz toUChar(ty==Ity_I8 ? 1 : 2),
1685//zz False,
1686//zz X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1687//zz dst));
1688//zz return dst;
1689//zz }
1690 break;
1691 }
1692
1693//zz case Iex_GetI: {
1694//zz X86AMode* am
1695//zz = genGuestArrayOffset(
1696//zz env, e->Iex.GetI.descr,
1697//zz e->Iex.GetI.ix, e->Iex.GetI.bias );
1698//zz HReg dst = newVRegI(env);
1699//zz if (ty == Ity_I8) {
1700//zz addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1701//zz return dst;
1702//zz }
1703//zz if (ty == Ity_I32) {
1704//zz addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1705//zz return dst;
1706//zz }
1707//zz break;
1708//zz }
1709
1710 /* --------- CCALL --------- */
1711 case Iex_CCall: {
1712 HReg dst = newVRegI(env);
1713 vassert(ty == e->Iex.CCall.retty);
1714
1715 /* be very restrictive for now. Only 32/64-bit ints allowed
1716 for args, and 32 bits for return type. */
1717 if (e->Iex.CCall.retty != Ity_I32)
1718 goto irreducible;
1719
1720 /* Marshal args, do the call, clear stack. */
1721 Bool ok = doHelperCall( env, False,
1722 NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
1723 if (ok) {
1724 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1725 return dst;
1726 }
1727 /* else fall through; will hit the irreducible: label */
1728 }
1729
1730 /* --------- LITERAL --------- */
1731 /* 32 literals */
1732 case Iex_Const: {
1733 UInt u = 0;
1734 HReg dst = newVRegI(env);
1735 switch (e->Iex.Const.con->tag) {
1736 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1737 case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1738 case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
sewardj6c60b322010-08-22 12:48:28 +00001739 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
sewardj6c299f32009-12-31 18:00:12 +00001740 }
1741 addInstr(env, ARMInstr_Imm32(dst, u));
1742 return dst;
1743 }
1744
1745 /* --------- MULTIPLEX --------- */
1746 case Iex_Mux0X: {
1747 IRExpr* cond = e->Iex.Mux0X.cond;
1748
1749 /* Mux0X( 32to8(1Uto32(ccexpr)), expr0, exprX ) */
1750 if (ty == Ity_I32
1751 && cond->tag == Iex_Unop
1752 && cond->Iex.Unop.op == Iop_32to8
1753 && cond->Iex.Unop.arg->tag == Iex_Unop
1754 && cond->Iex.Unop.arg->Iex.Unop.op == Iop_1Uto32) {
1755 ARMCondCode cc;
1756 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1757 ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
1758 HReg dst = newVRegI(env);
1759 addInstr(env, mk_iMOVds_RR(dst, rX));
1760 cc = iselCondCode(env, cond->Iex.Unop.arg->Iex.Unop.arg);
1761 addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
1762 return dst;
1763 }
1764
1765 /* Mux0X(cond, expr0, exprX) (general case) */
1766 if (ty == Ity_I32) {
1767 HReg r8;
1768 HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1769 ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
1770 HReg dst = newVRegI(env);
1771 addInstr(env, mk_iMOVds_RR(dst, rX));
1772 r8 = iselIntExpr_R(env, cond);
1773 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
1774 ARMRI84_I84(0xFF,0)));
1775 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, r0));
1776 return dst;
1777 }
1778 break;
1779 }
1780
1781 default:
1782 break;
1783 } /* switch (e->tag) */
1784
1785 /* We get here if no pattern matched. */
1786 irreducible:
1787 ppIRExpr(e);
1788 vpanic("iselIntExpr_R: cannot reduce tree");
cerioncee30312004-12-17 20:30:21 +00001789}
1790
1791
sewardj6c299f32009-12-31 18:00:12 +00001792/* -------------------- 64-bit -------------------- */
1793
1794/* Compute a 64-bit value into a register pair, which is returned as
1795 the first two parameters. As with iselIntExpr_R, these may be
1796 either real or virtual regs; in any case they must not be changed
1797 by subsequent code emitted by the caller. */
1798
1799static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1800{
1801 iselInt64Expr_wrk(rHi, rLo, env, e);
1802# if 0
1803 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1804# endif
1805 vassert(hregClass(*rHi) == HRcInt32);
1806 vassert(hregIsVirtual(*rHi));
1807 vassert(hregClass(*rLo) == HRcInt32);
1808 vassert(hregIsVirtual(*rLo));
1809}
1810
1811/* DO NOT CALL THIS DIRECTLY ! */
1812static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1813{
1814 vassert(e);
1815 vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
1816
1817 /* 64-bit literal */
1818 if (e->tag == Iex_Const) {
1819 ULong w64 = e->Iex.Const.con->Ico.U64;
1820 UInt wHi = toUInt(w64 >> 32);
1821 UInt wLo = toUInt(w64);
1822 HReg tHi = newVRegI(env);
1823 HReg tLo = newVRegI(env);
1824 vassert(e->Iex.Const.con->tag == Ico_U64);
1825 addInstr(env, ARMInstr_Imm32(tHi, wHi));
1826 addInstr(env, ARMInstr_Imm32(tLo, wLo));
1827 *rHi = tHi;
1828 *rLo = tLo;
1829 return;
1830 }
1831
1832 /* read 64-bit IRTemp */
1833 if (e->tag == Iex_RdTmp) {
sewardjc6f970f2012-04-02 21:54:49 +00001834 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00001835 HReg tHi = newVRegI(env);
1836 HReg tLo = newVRegI(env);
1837 HReg tmp = iselNeon64Expr(env, e);
1838 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1839 *rHi = tHi;
1840 *rLo = tLo;
1841 } else {
1842 lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
1843 }
sewardj6c299f32009-12-31 18:00:12 +00001844 return;
1845 }
1846
1847 /* 64-bit load */
1848 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
1849 HReg tLo, tHi, rA;
1850 vassert(e->Iex.Load.ty == Ity_I64);
1851 rA = iselIntExpr_R(env, e->Iex.Load.addr);
1852 tHi = newVRegI(env);
1853 tLo = newVRegI(env);
1854 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, ARMAMode1_RI(rA, 4)));
1855 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, ARMAMode1_RI(rA, 0)));
1856 *rHi = tHi;
1857 *rLo = tLo;
1858 return;
1859 }
1860
1861 /* 64-bit GET */
1862 if (e->tag == Iex_Get) {
1863 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
1864 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
1865 HReg tHi = newVRegI(env);
1866 HReg tLo = newVRegI(env);
1867 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, am4));
1868 addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, am0));
1869 *rHi = tHi;
1870 *rLo = tLo;
1871 return;
1872 }
1873
1874 /* --------- BINARY ops --------- */
1875 if (e->tag == Iex_Binop) {
1876 switch (e->Iex.Binop.op) {
1877
1878 /* 32 x 32 -> 64 multiply */
1879 case Iop_MullS32:
1880 case Iop_MullU32: {
1881 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1882 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1883 HReg tHi = newVRegI(env);
1884 HReg tLo = newVRegI(env);
1885 ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32
1886 ? ARMmul_SX : ARMmul_ZX;
1887 addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1888 addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1889 addInstr(env, ARMInstr_Mul(mop));
1890 addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
1891 addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
1892 *rHi = tHi;
1893 *rLo = tLo;
1894 return;
1895 }
1896
1897 case Iop_Or64: {
1898 HReg xLo, xHi, yLo, yHi;
1899 HReg tHi = newVRegI(env);
1900 HReg tLo = newVRegI(env);
1901 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
1902 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
1903 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
1904 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
1905 *rHi = tHi;
1906 *rLo = tLo;
1907 return;
1908 }
1909
1910 case Iop_Add64: {
1911 HReg xLo, xHi, yLo, yHi;
1912 HReg tHi = newVRegI(env);
1913 HReg tLo = newVRegI(env);
1914 iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
1915 iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
1916 addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
1917 addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi)));
1918 *rHi = tHi;
1919 *rLo = tLo;
1920 return;
1921 }
1922
1923 /* 32HLto64(e1,e2) */
1924 case Iop_32HLto64: {
1925 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
1926 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
1927 return;
1928 }
1929
1930 default:
1931 break;
1932 }
1933 }
1934
1935 /* --------- UNARY ops --------- */
1936 if (e->tag == Iex_Unop) {
1937 switch (e->Iex.Unop.op) {
1938
1939 /* ReinterpF64asI64 */
1940 case Iop_ReinterpF64asI64: {
1941 HReg dstHi = newVRegI(env);
1942 HReg dstLo = newVRegI(env);
1943 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1944 addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
1945 *rHi = dstHi;
1946 *rLo = dstLo;
1947 return;
1948 }
1949
1950 /* Left64(e) */
1951 case Iop_Left64: {
1952 HReg yLo, yHi;
1953 HReg tHi = newVRegI(env);
1954 HReg tLo = newVRegI(env);
1955 HReg zero = newVRegI(env);
1956 /* yHi:yLo = arg */
1957 iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
1958 /* zero = 0 */
1959 addInstr(env, ARMInstr_Imm32(zero, 0));
1960 /* tLo = 0 - yLo, and set carry */
sewardj6c60b322010-08-22 12:48:28 +00001961 addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
1962 tLo, zero, ARMRI84_R(yLo)));
sewardj6c299f32009-12-31 18:00:12 +00001963 /* tHi = 0 - yHi - carry */
sewardj6c60b322010-08-22 12:48:28 +00001964 addInstr(env, ARMInstr_Alu(ARMalu_SBC,
1965 tHi, zero, ARMRI84_R(yHi)));
sewardj6c299f32009-12-31 18:00:12 +00001966 /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
1967 back in, so as to give the final result
1968 tHi:tLo = arg | -arg. */
1969 addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
1970 addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
1971 *rHi = tHi;
1972 *rLo = tLo;
1973 return;
1974 }
1975
1976 /* CmpwNEZ64(e) */
1977 case Iop_CmpwNEZ64: {
1978 HReg srcLo, srcHi;
1979 HReg tmp1 = newVRegI(env);
1980 HReg tmp2 = newVRegI(env);
1981 /* srcHi:srcLo = arg */
1982 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
1983 /* tmp1 = srcHi | srcLo */
1984 addInstr(env, ARMInstr_Alu(ARMalu_OR,
1985 tmp1, srcHi, ARMRI84_R(srcLo)));
1986 /* tmp2 = (tmp1 | -tmp1) >>s 31 */
1987 addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
1988 addInstr(env, ARMInstr_Alu(ARMalu_OR,
1989 tmp2, tmp2, ARMRI84_R(tmp1)));
1990 addInstr(env, ARMInstr_Shift(ARMsh_SAR,
1991 tmp2, tmp2, ARMRI5_I5(31)));
1992 *rHi = tmp2;
1993 *rLo = tmp2;
1994 return;
1995 }
1996
1997 case Iop_1Sto64: {
1998 HReg dst = newVRegI(env);
1999 ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2000 ARMRI5* amt = ARMRI5_I5(31);
2001 /* This is really rough. We could do much better here;
2002 perhaps mvn{cond} dst, #0 as the second insn?
2003 (same applies to 1Sto32) */
2004 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2005 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2006 addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2007 addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2008 *rHi = dst;
2009 *rLo = dst;
2010 return;
2011 }
2012
2013 default:
2014 break;
2015 }
2016 } /* if (e->tag == Iex_Unop) */
2017
2018 /* --------- MULTIPLEX --------- */
2019 if (e->tag == Iex_Mux0X) {
2020 IRType ty8;
2021 HReg r8, rXhi, rXlo, r0hi, r0lo, dstHi, dstLo;
2022 ty8 = typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond);
2023 vassert(ty8 == Ity_I8);
2024 iselInt64Expr(&rXhi, &rXlo, env, e->Iex.Mux0X.exprX);
2025 iselInt64Expr(&r0hi, &r0lo, env, e->Iex.Mux0X.expr0);
2026 dstHi = newVRegI(env);
2027 dstLo = newVRegI(env);
2028 addInstr(env, mk_iMOVds_RR(dstHi, rXhi));
2029 addInstr(env, mk_iMOVds_RR(dstLo, rXlo));
2030 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
2031 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
2032 ARMRI84_I84(0xFF,0)));
2033 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstHi, ARMRI84_R(r0hi)));
2034 addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstLo, ARMRI84_R(r0lo)));
2035 *rHi = dstHi;
2036 *rLo = dstLo;
2037 return;
2038 }
2039
sewardj6c60b322010-08-22 12:48:28 +00002040 /* It is convenient sometimes to call iselInt64Expr even when we
2041 have NEON support (e.g. in do_helper_call we need 64-bit
2042 arguments as 2 x 32 regs). */
sewardjc6f970f2012-04-02 21:54:49 +00002043 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00002044 HReg tHi = newVRegI(env);
2045 HReg tLo = newVRegI(env);
2046 HReg tmp = iselNeon64Expr(env, e);
2047 addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2048 *rHi = tHi;
2049 *rLo = tLo;
2050 return ;
2051 }
2052
sewardj6c299f32009-12-31 18:00:12 +00002053 ppIRExpr(e);
2054 vpanic("iselInt64Expr");
2055}
2056
2057
2058/*---------------------------------------------------------*/
sewardj6c60b322010-08-22 12:48:28 +00002059/*--- ISEL: Vector (NEON) expressions (64 or 128 bit) ---*/
2060/*---------------------------------------------------------*/
2061
2062static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
2063{
2064 HReg r = iselNeon64Expr_wrk( env, e );
2065 vassert(hregClass(r) == HRcFlt64);
2066 vassert(hregIsVirtual(r));
2067 return r;
2068}
2069
2070/* DO NOT CALL THIS DIRECTLY */
2071static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
2072{
2073 IRType ty = typeOfIRExpr(env->type_env, e);
2074 MatchInfo mi;
2075 vassert(e);
2076 vassert(ty == Ity_I64);
2077
2078 if (e->tag == Iex_RdTmp) {
2079 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2080 }
2081
2082 if (e->tag == Iex_Const) {
2083 HReg rLo, rHi;
2084 HReg res = newVRegD(env);
2085 iselInt64Expr(&rHi, &rLo, env, e);
2086 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2087 return res;
2088 }
2089
2090 /* 64-bit load */
2091 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2092 HReg res = newVRegD(env);
2093 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2094 vassert(ty == Ity_I64);
2095 addInstr(env, ARMInstr_NLdStD(True, res, am));
2096 return res;
2097 }
2098
2099 /* 64-bit GET */
2100 if (e->tag == Iex_Get) {
2101 HReg addr = newVRegI(env);
2102 HReg res = newVRegD(env);
2103 vassert(ty == Ity_I64);
2104 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2105 addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2106 return res;
2107 }
2108
2109 /* --------- BINARY ops --------- */
2110 if (e->tag == Iex_Binop) {
2111 switch (e->Iex.Binop.op) {
2112
2113 /* 32 x 32 -> 64 multiply */
2114 case Iop_MullS32:
2115 case Iop_MullU32: {
2116 HReg rLo, rHi;
2117 HReg res = newVRegD(env);
2118 iselInt64Expr(&rHi, &rLo, env, e);
2119 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2120 return res;
2121 }
2122
2123 case Iop_And64: {
2124 HReg res = newVRegD(env);
2125 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2126 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2127 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2128 res, argL, argR, 4, False));
2129 return res;
2130 }
2131 case Iop_Or64: {
2132 HReg res = newVRegD(env);
2133 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2134 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2135 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2136 res, argL, argR, 4, False));
2137 return res;
2138 }
2139 case Iop_Xor64: {
2140 HReg res = newVRegD(env);
2141 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2142 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2143 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2144 res, argL, argR, 4, False));
2145 return res;
2146 }
2147
2148 /* 32HLto64(e1,e2) */
2149 case Iop_32HLto64: {
2150 HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2151 HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2152 HReg res = newVRegD(env);
2153 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2154 return res;
2155 }
2156
2157 case Iop_Add8x8:
2158 case Iop_Add16x4:
2159 case Iop_Add32x2:
2160 case Iop_Add64: {
2161 HReg res = newVRegD(env);
2162 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2163 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2164 UInt size;
2165 switch (e->Iex.Binop.op) {
2166 case Iop_Add8x8: size = 0; break;
2167 case Iop_Add16x4: size = 1; break;
2168 case Iop_Add32x2: size = 2; break;
2169 case Iop_Add64: size = 3; break;
2170 default: vassert(0);
2171 }
2172 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2173 res, argL, argR, size, False));
2174 return res;
2175 }
2176 case Iop_Add32Fx2: {
2177 HReg res = newVRegD(env);
2178 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2179 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2180 UInt size = 0;
2181 addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2182 res, argL, argR, size, False));
2183 return res;
2184 }
2185 case Iop_Recps32Fx2: {
2186 HReg res = newVRegD(env);
2187 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2188 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2189 UInt size = 0;
2190 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2191 res, argL, argR, size, False));
2192 return res;
2193 }
2194 case Iop_Rsqrts32Fx2: {
2195 HReg res = newVRegD(env);
2196 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2197 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2198 UInt size = 0;
2199 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2200 res, argL, argR, size, False));
2201 return res;
2202 }
2203 case Iop_InterleaveOddLanes8x8:
2204 case Iop_InterleaveOddLanes16x4:
2205 case Iop_InterleaveLO32x2:
2206 case Iop_InterleaveEvenLanes8x8:
2207 case Iop_InterleaveEvenLanes16x4:
2208 case Iop_InterleaveHI32x2: {
2209 HReg tmp = newVRegD(env);
2210 HReg res = newVRegD(env);
2211 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2212 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2213 UInt size;
2214 UInt is_lo;
2215 switch (e->Iex.Binop.op) {
2216 case Iop_InterleaveOddLanes8x8: is_lo = 1; size = 0; break;
2217 case Iop_InterleaveEvenLanes8x8: is_lo = 0; size = 0; break;
2218 case Iop_InterleaveOddLanes16x4: is_lo = 1; size = 1; break;
2219 case Iop_InterleaveEvenLanes16x4: is_lo = 0; size = 1; break;
2220 case Iop_InterleaveLO32x2: is_lo = 1; size = 2; break;
2221 case Iop_InterleaveHI32x2: is_lo = 0; size = 2; break;
2222 default: vassert(0);
2223 }
2224 if (is_lo) {
2225 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2226 tmp, argL, 4, False));
2227 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2228 res, argR, 4, False));
2229 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
2230 res, tmp, size, False));
2231 } else {
2232 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2233 tmp, argR, 4, False));
2234 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2235 res, argL, 4, False));
2236 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
2237 tmp, res, size, False));
2238 }
2239 return res;
2240 }
2241 case Iop_InterleaveHI8x8:
2242 case Iop_InterleaveHI16x4:
2243 case Iop_InterleaveLO8x8:
2244 case Iop_InterleaveLO16x4: {
2245 HReg tmp = newVRegD(env);
2246 HReg res = newVRegD(env);
2247 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2248 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2249 UInt size;
2250 UInt is_lo;
2251 switch (e->Iex.Binop.op) {
2252 case Iop_InterleaveHI8x8: is_lo = 1; size = 0; break;
2253 case Iop_InterleaveLO8x8: is_lo = 0; size = 0; break;
2254 case Iop_InterleaveHI16x4: is_lo = 1; size = 1; break;
2255 case Iop_InterleaveLO16x4: is_lo = 0; size = 1; break;
2256 default: vassert(0);
2257 }
2258 if (is_lo) {
2259 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2260 tmp, argL, 4, False));
2261 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2262 res, argR, 4, False));
2263 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
2264 res, tmp, size, False));
2265 } else {
2266 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2267 tmp, argR, 4, False));
2268 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2269 res, argL, 4, False));
2270 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
2271 tmp, res, size, False));
2272 }
2273 return res;
2274 }
2275 case Iop_CatOddLanes8x8:
2276 case Iop_CatOddLanes16x4:
2277 case Iop_CatEvenLanes8x8:
2278 case Iop_CatEvenLanes16x4: {
2279 HReg tmp = newVRegD(env);
2280 HReg res = newVRegD(env);
2281 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2282 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2283 UInt size;
2284 UInt is_lo;
2285 switch (e->Iex.Binop.op) {
2286 case Iop_CatOddLanes8x8: is_lo = 1; size = 0; break;
2287 case Iop_CatEvenLanes8x8: is_lo = 0; size = 0; break;
2288 case Iop_CatOddLanes16x4: is_lo = 1; size = 1; break;
2289 case Iop_CatEvenLanes16x4: is_lo = 0; size = 1; break;
2290 default: vassert(0);
2291 }
2292 if (is_lo) {
2293 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2294 tmp, argL, 4, False));
2295 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2296 res, argR, 4, False));
2297 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
2298 res, tmp, size, False));
2299 } else {
2300 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2301 tmp, argR, 4, False));
2302 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2303 res, argL, 4, False));
2304 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
2305 tmp, res, size, False));
2306 }
2307 return res;
2308 }
2309 case Iop_QAdd8Ux8:
2310 case Iop_QAdd16Ux4:
2311 case Iop_QAdd32Ux2:
2312 case Iop_QAdd64Ux1: {
2313 HReg res = newVRegD(env);
2314 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2315 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2316 UInt size;
2317 switch (e->Iex.Binop.op) {
2318 case Iop_QAdd8Ux8: size = 0; break;
2319 case Iop_QAdd16Ux4: size = 1; break;
2320 case Iop_QAdd32Ux2: size = 2; break;
2321 case Iop_QAdd64Ux1: size = 3; break;
2322 default: vassert(0);
2323 }
2324 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2325 res, argL, argR, size, False));
2326 return res;
2327 }
2328 case Iop_QAdd8Sx8:
2329 case Iop_QAdd16Sx4:
2330 case Iop_QAdd32Sx2:
2331 case Iop_QAdd64Sx1: {
2332 HReg res = newVRegD(env);
2333 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2334 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2335 UInt size;
2336 switch (e->Iex.Binop.op) {
2337 case Iop_QAdd8Sx8: size = 0; break;
2338 case Iop_QAdd16Sx4: size = 1; break;
2339 case Iop_QAdd32Sx2: size = 2; break;
2340 case Iop_QAdd64Sx1: size = 3; break;
2341 default: vassert(0);
2342 }
2343 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2344 res, argL, argR, size, False));
2345 return res;
2346 }
2347 case Iop_Sub8x8:
2348 case Iop_Sub16x4:
2349 case Iop_Sub32x2:
2350 case Iop_Sub64: {
2351 HReg res = newVRegD(env);
2352 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2353 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2354 UInt size;
2355 switch (e->Iex.Binop.op) {
2356 case Iop_Sub8x8: size = 0; break;
2357 case Iop_Sub16x4: size = 1; break;
2358 case Iop_Sub32x2: size = 2; break;
2359 case Iop_Sub64: size = 3; break;
2360 default: vassert(0);
2361 }
2362 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2363 res, argL, argR, size, False));
2364 return res;
2365 }
2366 case Iop_Sub32Fx2: {
2367 HReg res = newVRegD(env);
2368 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2369 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2370 UInt size = 0;
2371 addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2372 res, argL, argR, size, False));
2373 return res;
2374 }
2375 case Iop_QSub8Ux8:
2376 case Iop_QSub16Ux4:
2377 case Iop_QSub32Ux2:
2378 case Iop_QSub64Ux1: {
2379 HReg res = newVRegD(env);
2380 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2381 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2382 UInt size;
2383 switch (e->Iex.Binop.op) {
2384 case Iop_QSub8Ux8: size = 0; break;
2385 case Iop_QSub16Ux4: size = 1; break;
2386 case Iop_QSub32Ux2: size = 2; break;
2387 case Iop_QSub64Ux1: size = 3; break;
2388 default: vassert(0);
2389 }
2390 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
2391 res, argL, argR, size, False));
2392 return res;
2393 }
2394 case Iop_QSub8Sx8:
2395 case Iop_QSub16Sx4:
2396 case Iop_QSub32Sx2:
2397 case Iop_QSub64Sx1: {
2398 HReg res = newVRegD(env);
2399 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2400 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2401 UInt size;
2402 switch (e->Iex.Binop.op) {
2403 case Iop_QSub8Sx8: size = 0; break;
2404 case Iop_QSub16Sx4: size = 1; break;
2405 case Iop_QSub32Sx2: size = 2; break;
2406 case Iop_QSub64Sx1: size = 3; break;
2407 default: vassert(0);
2408 }
2409 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
2410 res, argL, argR, size, False));
2411 return res;
2412 }
2413 case Iop_Max8Ux8:
2414 case Iop_Max16Ux4:
2415 case Iop_Max32Ux2: {
2416 HReg res = newVRegD(env);
2417 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2418 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2419 UInt size;
2420 switch (e->Iex.Binop.op) {
2421 case Iop_Max8Ux8: size = 0; break;
2422 case Iop_Max16Ux4: size = 1; break;
2423 case Iop_Max32Ux2: size = 2; break;
2424 default: vassert(0);
2425 }
2426 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
2427 res, argL, argR, size, False));
2428 return res;
2429 }
2430 case Iop_Max8Sx8:
2431 case Iop_Max16Sx4:
2432 case Iop_Max32Sx2: {
2433 HReg res = newVRegD(env);
2434 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2435 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2436 UInt size;
2437 switch (e->Iex.Binop.op) {
2438 case Iop_Max8Sx8: size = 0; break;
2439 case Iop_Max16Sx4: size = 1; break;
2440 case Iop_Max32Sx2: size = 2; break;
2441 default: vassert(0);
2442 }
2443 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
2444 res, argL, argR, size, False));
2445 return res;
2446 }
2447 case Iop_Min8Ux8:
2448 case Iop_Min16Ux4:
2449 case Iop_Min32Ux2: {
2450 HReg res = newVRegD(env);
2451 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2452 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2453 UInt size;
2454 switch (e->Iex.Binop.op) {
2455 case Iop_Min8Ux8: size = 0; break;
2456 case Iop_Min16Ux4: size = 1; break;
2457 case Iop_Min32Ux2: size = 2; break;
2458 default: vassert(0);
2459 }
2460 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
2461 res, argL, argR, size, False));
2462 return res;
2463 }
2464 case Iop_Min8Sx8:
2465 case Iop_Min16Sx4:
2466 case Iop_Min32Sx2: {
2467 HReg res = newVRegD(env);
2468 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2469 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2470 UInt size;
2471 switch (e->Iex.Binop.op) {
2472 case Iop_Min8Sx8: size = 0; break;
2473 case Iop_Min16Sx4: size = 1; break;
2474 case Iop_Min32Sx2: size = 2; break;
2475 default: vassert(0);
2476 }
2477 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
2478 res, argL, argR, size, False));
2479 return res;
2480 }
2481 case Iop_Sar8x8:
2482 case Iop_Sar16x4:
2483 case Iop_Sar32x2: {
2484 HReg res = newVRegD(env);
2485 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2486 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2487 HReg argR2 = newVRegD(env);
2488 HReg zero = newVRegD(env);
2489 UInt size;
2490 switch (e->Iex.Binop.op) {
2491 case Iop_Sar8x8: size = 0; break;
2492 case Iop_Sar16x4: size = 1; break;
2493 case Iop_Sar32x2: size = 2; break;
2494 case Iop_Sar64: size = 3; break;
2495 default: vassert(0);
2496 }
2497 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2498 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2499 argR2, zero, argR, size, False));
2500 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2501 res, argL, argR2, size, False));
2502 return res;
2503 }
2504 case Iop_Sal8x8:
2505 case Iop_Sal16x4:
2506 case Iop_Sal32x2:
2507 case Iop_Sal64x1: {
2508 HReg res = newVRegD(env);
2509 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2510 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2511 UInt size;
2512 switch (e->Iex.Binop.op) {
2513 case Iop_Sal8x8: size = 0; break;
2514 case Iop_Sal16x4: size = 1; break;
2515 case Iop_Sal32x2: size = 2; break;
2516 case Iop_Sal64x1: size = 3; break;
2517 default: vassert(0);
2518 }
2519 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2520 res, argL, argR, size, False));
2521 return res;
2522 }
2523 case Iop_Shr8x8:
2524 case Iop_Shr16x4:
2525 case Iop_Shr32x2: {
2526 HReg res = newVRegD(env);
2527 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2528 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2529 HReg argR2 = newVRegD(env);
2530 HReg zero = newVRegD(env);
2531 UInt size;
2532 switch (e->Iex.Binop.op) {
2533 case Iop_Shr8x8: size = 0; break;
2534 case Iop_Shr16x4: size = 1; break;
2535 case Iop_Shr32x2: size = 2; break;
2536 default: vassert(0);
2537 }
2538 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2539 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2540 argR2, zero, argR, size, False));
2541 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2542 res, argL, argR2, size, False));
2543 return res;
2544 }
2545 case Iop_Shl8x8:
2546 case Iop_Shl16x4:
2547 case Iop_Shl32x2: {
2548 HReg res = newVRegD(env);
2549 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2550 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2551 UInt size;
2552 switch (e->Iex.Binop.op) {
2553 case Iop_Shl8x8: size = 0; break;
2554 case Iop_Shl16x4: size = 1; break;
2555 case Iop_Shl32x2: size = 2; break;
2556 default: vassert(0);
2557 }
2558 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2559 res, argL, argR, size, False));
2560 return res;
2561 }
2562 case Iop_QShl8x8:
2563 case Iop_QShl16x4:
2564 case Iop_QShl32x2:
2565 case Iop_QShl64x1: {
2566 HReg res = newVRegD(env);
2567 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2568 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2569 UInt size;
2570 switch (e->Iex.Binop.op) {
2571 case Iop_QShl8x8: size = 0; break;
2572 case Iop_QShl16x4: size = 1; break;
2573 case Iop_QShl32x2: size = 2; break;
2574 case Iop_QShl64x1: size = 3; break;
2575 default: vassert(0);
2576 }
2577 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
2578 res, argL, argR, size, False));
2579 return res;
2580 }
2581 case Iop_QSal8x8:
2582 case Iop_QSal16x4:
2583 case Iop_QSal32x2:
2584 case Iop_QSal64x1: {
2585 HReg res = newVRegD(env);
2586 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2587 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2588 UInt size;
2589 switch (e->Iex.Binop.op) {
2590 case Iop_QSal8x8: size = 0; break;
2591 case Iop_QSal16x4: size = 1; break;
2592 case Iop_QSal32x2: size = 2; break;
2593 case Iop_QSal64x1: size = 3; break;
2594 default: vassert(0);
2595 }
2596 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
2597 res, argL, argR, size, False));
2598 return res;
2599 }
2600 case Iop_QShlN8x8:
2601 case Iop_QShlN16x4:
2602 case Iop_QShlN32x2:
2603 case Iop_QShlN64x1: {
2604 HReg res = newVRegD(env);
2605 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2606 UInt size, imm;
2607 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2608 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2609 vpanic("ARM taget supports Iop_QShlNAxB with constant "
2610 "second argument only\n");
2611 }
2612 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2613 switch (e->Iex.Binop.op) {
2614 case Iop_QShlN8x8: size = 8 | imm; break;
2615 case Iop_QShlN16x4: size = 16 | imm; break;
2616 case Iop_QShlN32x2: size = 32 | imm; break;
2617 case Iop_QShlN64x1: size = 64 | imm; break;
2618 default: vassert(0);
2619 }
2620 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
2621 res, argL, size, False));
2622 return res;
2623 }
2624 case Iop_QShlN8Sx8:
2625 case Iop_QShlN16Sx4:
2626 case Iop_QShlN32Sx2:
2627 case Iop_QShlN64Sx1: {
2628 HReg res = newVRegD(env);
2629 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2630 UInt size, imm;
2631 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2632 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2633 vpanic("ARM taget supports Iop_QShlNAxB with constant "
2634 "second argument only\n");
2635 }
2636 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2637 switch (e->Iex.Binop.op) {
2638 case Iop_QShlN8Sx8: size = 8 | imm; break;
2639 case Iop_QShlN16Sx4: size = 16 | imm; break;
2640 case Iop_QShlN32Sx2: size = 32 | imm; break;
2641 case Iop_QShlN64Sx1: size = 64 | imm; break;
2642 default: vassert(0);
2643 }
2644 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
2645 res, argL, size, False));
2646 return res;
2647 }
2648 case Iop_QSalN8x8:
2649 case Iop_QSalN16x4:
2650 case Iop_QSalN32x2:
2651 case Iop_QSalN64x1: {
2652 HReg res = newVRegD(env);
2653 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2654 UInt size, imm;
2655 if (e->Iex.Binop.arg2->tag != Iex_Const ||
2656 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2657 vpanic("ARM taget supports Iop_QShlNAxB with constant "
2658 "second argument only\n");
2659 }
2660 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2661 switch (e->Iex.Binop.op) {
2662 case Iop_QSalN8x8: size = 8 | imm; break;
2663 case Iop_QSalN16x4: size = 16 | imm; break;
2664 case Iop_QSalN32x2: size = 32 | imm; break;
2665 case Iop_QSalN64x1: size = 64 | imm; break;
2666 default: vassert(0);
2667 }
2668 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
2669 res, argL, size, False));
2670 return res;
2671 }
2672 case Iop_ShrN8x8:
2673 case Iop_ShrN16x4:
2674 case Iop_ShrN32x2:
2675 case Iop_Shr64: {
2676 HReg res = newVRegD(env);
2677 HReg tmp = newVRegD(env);
2678 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2679 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2680 HReg argR2 = newVRegI(env);
2681 UInt size;
2682 switch (e->Iex.Binop.op) {
2683 case Iop_ShrN8x8: size = 0; break;
2684 case Iop_ShrN16x4: size = 1; break;
2685 case Iop_ShrN32x2: size = 2; break;
2686 case Iop_Shr64: size = 3; break;
2687 default: vassert(0);
2688 }
2689 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2690 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2691 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2692 res, argL, tmp, size, False));
2693 return res;
2694 }
2695 case Iop_ShlN8x8:
2696 case Iop_ShlN16x4:
2697 case Iop_ShlN32x2:
2698 case Iop_Shl64: {
2699 HReg res = newVRegD(env);
2700 HReg tmp = newVRegD(env);
2701 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2702 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2703 UInt size;
2704 switch (e->Iex.Binop.op) {
2705 case Iop_ShlN8x8: size = 0; break;
2706 case Iop_ShlN16x4: size = 1; break;
2707 case Iop_ShlN32x2: size = 2; break;
2708 case Iop_Shl64: size = 3; break;
2709 default: vassert(0);
2710 }
2711 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, False));
2712 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2713 res, argL, tmp, size, False));
2714 return res;
2715 }
2716 case Iop_SarN8x8:
2717 case Iop_SarN16x4:
2718 case Iop_SarN32x2:
2719 case Iop_Sar64: {
2720 HReg res = newVRegD(env);
2721 HReg tmp = newVRegD(env);
2722 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2723 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2724 HReg argR2 = newVRegI(env);
2725 UInt size;
2726 switch (e->Iex.Binop.op) {
2727 case Iop_SarN8x8: size = 0; break;
2728 case Iop_SarN16x4: size = 1; break;
2729 case Iop_SarN32x2: size = 2; break;
2730 case Iop_Sar64: size = 3; break;
2731 default: vassert(0);
2732 }
2733 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2734 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2735 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2736 res, argL, tmp, size, False));
2737 return res;
2738 }
2739 case Iop_CmpGT8Ux8:
2740 case Iop_CmpGT16Ux4:
2741 case Iop_CmpGT32Ux2: {
2742 HReg res = newVRegD(env);
2743 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2744 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2745 UInt size;
2746 switch (e->Iex.Binop.op) {
2747 case Iop_CmpGT8Ux8: size = 0; break;
2748 case Iop_CmpGT16Ux4: size = 1; break;
2749 case Iop_CmpGT32Ux2: size = 2; break;
2750 default: vassert(0);
2751 }
2752 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
2753 res, argL, argR, size, False));
2754 return res;
2755 }
2756 case Iop_CmpGT8Sx8:
2757 case Iop_CmpGT16Sx4:
2758 case Iop_CmpGT32Sx2: {
2759 HReg res = newVRegD(env);
2760 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2761 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2762 UInt size;
2763 switch (e->Iex.Binop.op) {
2764 case Iop_CmpGT8Sx8: size = 0; break;
2765 case Iop_CmpGT16Sx4: size = 1; break;
2766 case Iop_CmpGT32Sx2: size = 2; break;
2767 default: vassert(0);
2768 }
2769 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
2770 res, argL, argR, size, False));
2771 return res;
2772 }
2773 case Iop_CmpEQ8x8:
2774 case Iop_CmpEQ16x4:
2775 case Iop_CmpEQ32x2: {
2776 HReg res = newVRegD(env);
2777 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2778 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2779 UInt size;
2780 switch (e->Iex.Binop.op) {
2781 case Iop_CmpEQ8x8: size = 0; break;
2782 case Iop_CmpEQ16x4: size = 1; break;
2783 case Iop_CmpEQ32x2: size = 2; break;
2784 default: vassert(0);
2785 }
2786 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
2787 res, argL, argR, size, False));
2788 return res;
2789 }
2790 case Iop_Mul8x8:
2791 case Iop_Mul16x4:
2792 case Iop_Mul32x2: {
2793 HReg res = newVRegD(env);
2794 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2795 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2796 UInt size = 0;
2797 switch(e->Iex.Binop.op) {
2798 case Iop_Mul8x8: size = 0; break;
2799 case Iop_Mul16x4: size = 1; break;
2800 case Iop_Mul32x2: size = 2; break;
2801 default: vassert(0);
2802 }
2803 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
2804 res, argL, argR, size, False));
2805 return res;
2806 }
2807 case Iop_Mul32Fx2: {
2808 HReg res = newVRegD(env);
2809 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2810 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2811 UInt size = 0;
2812 addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
2813 res, argL, argR, size, False));
2814 return res;
2815 }
2816 case Iop_QDMulHi16Sx4:
2817 case Iop_QDMulHi32Sx2: {
2818 HReg res = newVRegD(env);
2819 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2820 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2821 UInt size = 0;
2822 switch(e->Iex.Binop.op) {
2823 case Iop_QDMulHi16Sx4: size = 1; break;
2824 case Iop_QDMulHi32Sx2: size = 2; break;
2825 default: vassert(0);
2826 }
2827 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
2828 res, argL, argR, size, False));
2829 return res;
2830 }
2831
2832 case Iop_QRDMulHi16Sx4:
2833 case Iop_QRDMulHi32Sx2: {
2834 HReg res = newVRegD(env);
2835 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2836 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2837 UInt size = 0;
2838 switch(e->Iex.Binop.op) {
2839 case Iop_QRDMulHi16Sx4: size = 1; break;
2840 case Iop_QRDMulHi32Sx2: size = 2; break;
2841 default: vassert(0);
2842 }
2843 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
2844 res, argL, argR, size, False));
2845 return res;
2846 }
2847
2848 case Iop_PwAdd8x8:
2849 case Iop_PwAdd16x4:
2850 case Iop_PwAdd32x2: {
2851 HReg res = newVRegD(env);
2852 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2853 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2854 UInt size = 0;
2855 switch(e->Iex.Binop.op) {
2856 case Iop_PwAdd8x8: size = 0; break;
2857 case Iop_PwAdd16x4: size = 1; break;
2858 case Iop_PwAdd32x2: size = 2; break;
2859 default: vassert(0);
2860 }
2861 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
2862 res, argL, argR, size, False));
2863 return res;
2864 }
2865 case Iop_PwAdd32Fx2: {
2866 HReg res = newVRegD(env);
2867 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2868 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2869 UInt size = 0;
2870 addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
2871 res, argL, argR, size, False));
2872 return res;
2873 }
2874 case Iop_PwMin8Ux8:
2875 case Iop_PwMin16Ux4:
2876 case Iop_PwMin32Ux2: {
2877 HReg res = newVRegD(env);
2878 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2879 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2880 UInt size = 0;
2881 switch(e->Iex.Binop.op) {
2882 case Iop_PwMin8Ux8: size = 0; break;
2883 case Iop_PwMin16Ux4: size = 1; break;
2884 case Iop_PwMin32Ux2: size = 2; break;
2885 default: vassert(0);
2886 }
2887 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
2888 res, argL, argR, size, False));
2889 return res;
2890 }
2891 case Iop_PwMin8Sx8:
2892 case Iop_PwMin16Sx4:
2893 case Iop_PwMin32Sx2: {
2894 HReg res = newVRegD(env);
2895 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2896 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2897 UInt size = 0;
2898 switch(e->Iex.Binop.op) {
2899 case Iop_PwMin8Sx8: size = 0; break;
2900 case Iop_PwMin16Sx4: size = 1; break;
2901 case Iop_PwMin32Sx2: size = 2; break;
2902 default: vassert(0);
2903 }
2904 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
2905 res, argL, argR, size, False));
2906 return res;
2907 }
2908 case Iop_PwMax8Ux8:
2909 case Iop_PwMax16Ux4:
2910 case Iop_PwMax32Ux2: {
2911 HReg res = newVRegD(env);
2912 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2913 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2914 UInt size = 0;
2915 switch(e->Iex.Binop.op) {
2916 case Iop_PwMax8Ux8: size = 0; break;
2917 case Iop_PwMax16Ux4: size = 1; break;
2918 case Iop_PwMax32Ux2: size = 2; break;
2919 default: vassert(0);
2920 }
2921 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
2922 res, argL, argR, size, False));
2923 return res;
2924 }
2925 case Iop_PwMax8Sx8:
2926 case Iop_PwMax16Sx4:
2927 case Iop_PwMax32Sx2: {
2928 HReg res = newVRegD(env);
2929 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2930 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2931 UInt size = 0;
2932 switch(e->Iex.Binop.op) {
2933 case Iop_PwMax8Sx8: size = 0; break;
2934 case Iop_PwMax16Sx4: size = 1; break;
2935 case Iop_PwMax32Sx2: size = 2; break;
2936 default: vassert(0);
2937 }
2938 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
2939 res, argL, argR, size, False));
2940 return res;
2941 }
2942 case Iop_Perm8x8: {
2943 HReg res = newVRegD(env);
2944 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2945 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2946 addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
2947 res, argL, argR, 0, False));
2948 return res;
2949 }
2950 case Iop_PolynomialMul8x8: {
2951 HReg res = newVRegD(env);
2952 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2953 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2954 UInt size = 0;
2955 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
2956 res, argL, argR, size, False));
2957 return res;
2958 }
2959 case Iop_Max32Fx2: {
2960 HReg res = newVRegD(env);
2961 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2962 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2963 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
2964 res, argL, argR, 2, False));
2965 return res;
2966 }
2967 case Iop_Min32Fx2: {
2968 HReg res = newVRegD(env);
2969 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2970 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2971 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
2972 res, argL, argR, 2, False));
2973 return res;
2974 }
2975 case Iop_PwMax32Fx2: {
2976 HReg res = newVRegD(env);
2977 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2978 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2979 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
2980 res, argL, argR, 2, False));
2981 return res;
2982 }
2983 case Iop_PwMin32Fx2: {
2984 HReg res = newVRegD(env);
2985 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2986 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2987 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
2988 res, argL, argR, 2, False));
2989 return res;
2990 }
2991 case Iop_CmpGT32Fx2: {
2992 HReg res = newVRegD(env);
2993 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2994 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2995 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
2996 res, argL, argR, 2, False));
2997 return res;
2998 }
2999 case Iop_CmpGE32Fx2: {
3000 HReg res = newVRegD(env);
3001 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3002 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3003 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
3004 res, argL, argR, 2, False));
3005 return res;
3006 }
3007 case Iop_CmpEQ32Fx2: {
3008 HReg res = newVRegD(env);
3009 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3010 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3011 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3012 res, argL, argR, 2, False));
3013 return res;
3014 }
3015 case Iop_F32ToFixed32Ux2_RZ:
3016 case Iop_F32ToFixed32Sx2_RZ:
3017 case Iop_Fixed32UToF32x2_RN:
3018 case Iop_Fixed32SToF32x2_RN: {
3019 HReg res = newVRegD(env);
3020 HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3021 ARMNeonUnOp op;
3022 UInt imm6;
3023 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3024 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3025 vpanic("ARM supports FP <-> Fixed conversion with constant "
3026 "second argument less than 33 only\n");
3027 }
3028 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3029 vassert(imm6 <= 32 && imm6 > 0);
3030 imm6 = 64 - imm6;
3031 switch(e->Iex.Binop.op) {
3032 case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3033 case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3034 case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3035 case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3036 default: vassert(0);
3037 }
3038 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3039 return res;
3040 }
3041 /*
3042 FIXME: is this here or not?
3043 case Iop_VDup8x8:
3044 case Iop_VDup16x4:
3045 case Iop_VDup32x2: {
3046 HReg res = newVRegD(env);
3047 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3048 UInt index;
3049 UInt imm4;
3050 UInt size = 0;
3051 if (e->Iex.Binop.arg2->tag != Iex_Const ||
3052 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3053 vpanic("ARM supports Iop_VDup with constant "
3054 "second argument less than 16 only\n");
3055 }
3056 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3057 switch(e->Iex.Binop.op) {
3058 case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3059 case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3060 case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3061 default: vassert(0);
3062 }
3063 if (imm4 >= 16) {
3064 vpanic("ARM supports Iop_VDup with constant "
3065 "second argument less than 16 only\n");
3066 }
3067 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3068 res, argL, imm4, False));
3069 return res;
3070 }
3071 */
3072 default:
3073 break;
3074 }
3075 }
3076
3077 /* --------- UNARY ops --------- */
3078 if (e->tag == Iex_Unop) {
3079 switch (e->Iex.Unop.op) {
3080
3081 /* ReinterpF64asI64 */
3082 case Iop_ReinterpF64asI64:
3083 /* Left64(e) */
3084 case Iop_Left64:
3085 /* CmpwNEZ64(e) */
3086 //case Iop_CmpwNEZ64:
3087 case Iop_1Sto64: {
3088 HReg rLo, rHi;
3089 HReg res = newVRegD(env);
3090 iselInt64Expr(&rHi, &rLo, env, e);
3091 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3092 return res;
3093 }
3094 case Iop_Not64: {
3095 DECLARE_PATTERN(p_veqz_8x8);
3096 DECLARE_PATTERN(p_veqz_16x4);
3097 DECLARE_PATTERN(p_veqz_32x2);
3098 DECLARE_PATTERN(p_vcge_8sx8);
3099 DECLARE_PATTERN(p_vcge_16sx4);
3100 DECLARE_PATTERN(p_vcge_32sx2);
3101 DECLARE_PATTERN(p_vcge_8ux8);
3102 DECLARE_PATTERN(p_vcge_16ux4);
3103 DECLARE_PATTERN(p_vcge_32ux2);
3104 DEFINE_PATTERN(p_veqz_8x8,
3105 unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3106 DEFINE_PATTERN(p_veqz_16x4,
3107 unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3108 DEFINE_PATTERN(p_veqz_32x2,
3109 unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3110 DEFINE_PATTERN(p_vcge_8sx8,
3111 unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3112 DEFINE_PATTERN(p_vcge_16sx4,
3113 unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3114 DEFINE_PATTERN(p_vcge_32sx2,
3115 unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3116 DEFINE_PATTERN(p_vcge_8ux8,
3117 unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3118 DEFINE_PATTERN(p_vcge_16ux4,
3119 unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3120 DEFINE_PATTERN(p_vcge_32ux2,
3121 unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3122 if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3123 HReg res = newVRegD(env);
3124 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3125 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3126 return res;
3127 } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3128 HReg res = newVRegD(env);
3129 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3130 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3131 return res;
3132 } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3133 HReg res = newVRegD(env);
3134 HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3135 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3136 return res;
3137 } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3138 HReg res = newVRegD(env);
3139 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3140 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3141 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3142 res, argL, argR, 0, False));
3143 return res;
3144 } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3145 HReg res = newVRegD(env);
3146 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3147 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3148 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3149 res, argL, argR, 1, False));
3150 return res;
3151 } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3152 HReg res = newVRegD(env);
3153 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3154 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3155 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3156 res, argL, argR, 2, False));
3157 return res;
3158 } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3159 HReg res = newVRegD(env);
3160 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3161 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3162 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3163 res, argL, argR, 0, False));
3164 return res;
3165 } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3166 HReg res = newVRegD(env);
3167 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3168 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3169 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3170 res, argL, argR, 1, False));
3171 return res;
3172 } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3173 HReg res = newVRegD(env);
3174 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3175 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3176 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3177 res, argL, argR, 2, False));
3178 return res;
3179 } else {
3180 HReg res = newVRegD(env);
3181 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3182 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3183 return res;
3184 }
3185 }
3186 case Iop_Dup8x8:
3187 case Iop_Dup16x4:
3188 case Iop_Dup32x2: {
3189 HReg res, arg;
3190 UInt size;
3191 DECLARE_PATTERN(p_vdup_8x8);
3192 DECLARE_PATTERN(p_vdup_16x4);
3193 DECLARE_PATTERN(p_vdup_32x2);
3194 DEFINE_PATTERN(p_vdup_8x8,
3195 unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3196 DEFINE_PATTERN(p_vdup_16x4,
3197 unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3198 DEFINE_PATTERN(p_vdup_32x2,
3199 unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3200 if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3201 UInt index;
3202 UInt imm4;
3203 if (mi.bindee[1]->tag == Iex_Const &&
3204 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3205 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3206 imm4 = (index << 1) + 1;
3207 if (index < 8) {
3208 res = newVRegD(env);
3209 arg = iselNeon64Expr(env, mi.bindee[0]);
3210 addInstr(env, ARMInstr_NUnaryS(
3211 ARMneon_VDUP,
3212 mkARMNRS(ARMNRS_Reg, res, 0),
3213 mkARMNRS(ARMNRS_Scalar, arg, index),
3214 imm4, False
3215 ));
3216 return res;
3217 }
3218 }
3219 } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3220 UInt index;
3221 UInt imm4;
3222 if (mi.bindee[1]->tag == Iex_Const &&
3223 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3224 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3225 imm4 = (index << 2) + 2;
3226 if (index < 4) {
3227 res = newVRegD(env);
3228 arg = iselNeon64Expr(env, mi.bindee[0]);
3229 addInstr(env, ARMInstr_NUnaryS(
3230 ARMneon_VDUP,
3231 mkARMNRS(ARMNRS_Reg, res, 0),
3232 mkARMNRS(ARMNRS_Scalar, arg, index),
3233 imm4, False
3234 ));
3235 return res;
3236 }
3237 }
3238 } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3239 UInt index;
3240 UInt imm4;
3241 if (mi.bindee[1]->tag == Iex_Const &&
3242 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3243 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3244 imm4 = (index << 3) + 4;
3245 if (index < 2) {
3246 res = newVRegD(env);
3247 arg = iselNeon64Expr(env, mi.bindee[0]);
3248 addInstr(env, ARMInstr_NUnaryS(
3249 ARMneon_VDUP,
3250 mkARMNRS(ARMNRS_Reg, res, 0),
3251 mkARMNRS(ARMNRS_Scalar, arg, index),
3252 imm4, False
3253 ));
3254 return res;
3255 }
3256 }
3257 }
3258 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3259 res = newVRegD(env);
3260 switch (e->Iex.Unop.op) {
3261 case Iop_Dup8x8: size = 0; break;
3262 case Iop_Dup16x4: size = 1; break;
3263 case Iop_Dup32x2: size = 2; break;
3264 default: vassert(0);
3265 }
3266 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3267 return res;
3268 }
3269 case Iop_Abs8x8:
3270 case Iop_Abs16x4:
3271 case Iop_Abs32x2: {
3272 HReg res = newVRegD(env);
3273 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3274 UInt size = 0;
3275 switch(e->Iex.Binop.op) {
3276 case Iop_Abs8x8: size = 0; break;
3277 case Iop_Abs16x4: size = 1; break;
3278 case Iop_Abs32x2: size = 2; break;
3279 default: vassert(0);
3280 }
3281 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3282 return res;
3283 }
3284 case Iop_Reverse64_8x8:
3285 case Iop_Reverse64_16x4:
3286 case Iop_Reverse64_32x2: {
3287 HReg res = newVRegD(env);
3288 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3289 UInt size = 0;
3290 switch(e->Iex.Binop.op) {
3291 case Iop_Reverse64_8x8: size = 0; break;
3292 case Iop_Reverse64_16x4: size = 1; break;
3293 case Iop_Reverse64_32x2: size = 2; break;
3294 default: vassert(0);
3295 }
3296 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3297 res, arg, size, False));
3298 return res;
3299 }
3300 case Iop_Reverse32_8x8:
3301 case Iop_Reverse32_16x4: {
3302 HReg res = newVRegD(env);
3303 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3304 UInt size = 0;
3305 switch(e->Iex.Binop.op) {
3306 case Iop_Reverse32_8x8: size = 0; break;
3307 case Iop_Reverse32_16x4: size = 1; break;
3308 default: vassert(0);
3309 }
3310 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3311 res, arg, size, False));
3312 return res;
3313 }
3314 case Iop_Reverse16_8x8: {
3315 HReg res = newVRegD(env);
3316 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3317 UInt size = 0;
3318 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3319 res, arg, size, False));
3320 return res;
3321 }
3322 case Iop_CmpwNEZ64: {
3323 HReg x_lsh = newVRegD(env);
3324 HReg x_rsh = newVRegD(env);
3325 HReg lsh_amt = newVRegD(env);
3326 HReg rsh_amt = newVRegD(env);
3327 HReg zero = newVRegD(env);
3328 HReg tmp = newVRegD(env);
3329 HReg tmp2 = newVRegD(env);
3330 HReg res = newVRegD(env);
3331 HReg x = newVRegD(env);
3332 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3333 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3334 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3335 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3336 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3337 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3338 rsh_amt, zero, lsh_amt, 2, False));
3339 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3340 x_lsh, x, lsh_amt, 3, False));
3341 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3342 x_rsh, x, rsh_amt, 3, False));
3343 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3344 tmp, x_lsh, x_rsh, 0, False));
3345 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3346 res, tmp, x, 0, False));
3347 return res;
3348 }
3349 case Iop_CmpNEZ8x8:
3350 case Iop_CmpNEZ16x4:
3351 case Iop_CmpNEZ32x2: {
3352 HReg res = newVRegD(env);
3353 HReg tmp = newVRegD(env);
3354 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3355 UInt size;
3356 switch (e->Iex.Unop.op) {
3357 case Iop_CmpNEZ8x8: size = 0; break;
3358 case Iop_CmpNEZ16x4: size = 1; break;
3359 case Iop_CmpNEZ32x2: size = 2; break;
3360 default: vassert(0);
3361 }
3362 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
3363 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
3364 return res;
3365 }
sewardj5f438dd2011-06-16 11:36:23 +00003366 case Iop_NarrowUn16to8x8:
3367 case Iop_NarrowUn32to16x4:
3368 case Iop_NarrowUn64to32x2: {
sewardj6c60b322010-08-22 12:48:28 +00003369 HReg res = newVRegD(env);
3370 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3371 UInt size = 0;
3372 switch(e->Iex.Binop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003373 case Iop_NarrowUn16to8x8: size = 0; break;
3374 case Iop_NarrowUn32to16x4: size = 1; break;
3375 case Iop_NarrowUn64to32x2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003376 default: vassert(0);
3377 }
3378 addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
3379 res, arg, size, False));
3380 return res;
3381 }
sewardj5f438dd2011-06-16 11:36:23 +00003382 case Iop_QNarrowUn16Sto8Sx8:
3383 case Iop_QNarrowUn32Sto16Sx4:
3384 case Iop_QNarrowUn64Sto32Sx2: {
sewardj6c60b322010-08-22 12:48:28 +00003385 HReg res = newVRegD(env);
3386 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3387 UInt size = 0;
3388 switch(e->Iex.Binop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003389 case Iop_QNarrowUn16Sto8Sx8: size = 0; break;
3390 case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
3391 case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003392 default: vassert(0);
3393 }
3394 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
3395 res, arg, size, False));
3396 return res;
3397 }
sewardj5f438dd2011-06-16 11:36:23 +00003398 case Iop_QNarrowUn16Sto8Ux8:
3399 case Iop_QNarrowUn32Sto16Ux4:
3400 case Iop_QNarrowUn64Sto32Ux2: {
sewardj6c60b322010-08-22 12:48:28 +00003401 HReg res = newVRegD(env);
3402 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3403 UInt size = 0;
3404 switch(e->Iex.Binop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003405 case Iop_QNarrowUn16Sto8Ux8: size = 0; break;
3406 case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
3407 case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003408 default: vassert(0);
3409 }
3410 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
3411 res, arg, size, False));
3412 return res;
3413 }
sewardj5f438dd2011-06-16 11:36:23 +00003414 case Iop_QNarrowUn16Uto8Ux8:
3415 case Iop_QNarrowUn32Uto16Ux4:
3416 case Iop_QNarrowUn64Uto32Ux2: {
sewardj6c60b322010-08-22 12:48:28 +00003417 HReg res = newVRegD(env);
3418 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3419 UInt size = 0;
3420 switch(e->Iex.Binop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003421 case Iop_QNarrowUn16Uto8Ux8: size = 0; break;
3422 case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
3423 case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003424 default: vassert(0);
3425 }
3426 addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
3427 res, arg, size, False));
3428 return res;
3429 }
3430 case Iop_PwAddL8Sx8:
3431 case Iop_PwAddL16Sx4:
3432 case Iop_PwAddL32Sx2: {
3433 HReg res = newVRegD(env);
3434 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3435 UInt size = 0;
3436 switch(e->Iex.Binop.op) {
3437 case Iop_PwAddL8Sx8: size = 0; break;
3438 case Iop_PwAddL16Sx4: size = 1; break;
3439 case Iop_PwAddL32Sx2: size = 2; break;
3440 default: vassert(0);
3441 }
3442 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
3443 res, arg, size, False));
3444 return res;
3445 }
3446 case Iop_PwAddL8Ux8:
3447 case Iop_PwAddL16Ux4:
3448 case Iop_PwAddL32Ux2: {
3449 HReg res = newVRegD(env);
3450 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3451 UInt size = 0;
3452 switch(e->Iex.Binop.op) {
3453 case Iop_PwAddL8Ux8: size = 0; break;
3454 case Iop_PwAddL16Ux4: size = 1; break;
3455 case Iop_PwAddL32Ux2: size = 2; break;
3456 default: vassert(0);
3457 }
3458 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
3459 res, arg, size, False));
3460 return res;
3461 }
3462 case Iop_Cnt8x8: {
3463 HReg res = newVRegD(env);
3464 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3465 UInt size = 0;
3466 addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
3467 res, arg, size, False));
3468 return res;
3469 }
3470 case Iop_Clz8Sx8:
3471 case Iop_Clz16Sx4:
3472 case Iop_Clz32Sx2: {
3473 HReg res = newVRegD(env);
3474 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3475 UInt size = 0;
3476 switch(e->Iex.Binop.op) {
3477 case Iop_Clz8Sx8: size = 0; break;
3478 case Iop_Clz16Sx4: size = 1; break;
3479 case Iop_Clz32Sx2: size = 2; break;
3480 default: vassert(0);
3481 }
3482 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
3483 res, arg, size, False));
3484 return res;
3485 }
3486 case Iop_Cls8Sx8:
3487 case Iop_Cls16Sx4:
3488 case Iop_Cls32Sx2: {
3489 HReg res = newVRegD(env);
3490 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3491 UInt size = 0;
3492 switch(e->Iex.Binop.op) {
3493 case Iop_Cls8Sx8: size = 0; break;
3494 case Iop_Cls16Sx4: size = 1; break;
3495 case Iop_Cls32Sx2: size = 2; break;
3496 default: vassert(0);
3497 }
3498 addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
3499 res, arg, size, False));
3500 return res;
3501 }
3502 case Iop_FtoI32Sx2_RZ: {
3503 HReg res = newVRegD(env);
3504 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3505 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
3506 res, arg, 2, False));
3507 return res;
3508 }
3509 case Iop_FtoI32Ux2_RZ: {
3510 HReg res = newVRegD(env);
3511 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3512 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
3513 res, arg, 2, False));
3514 return res;
3515 }
3516 case Iop_I32StoFx2: {
3517 HReg res = newVRegD(env);
3518 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3519 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
3520 res, arg, 2, False));
3521 return res;
3522 }
3523 case Iop_I32UtoFx2: {
3524 HReg res = newVRegD(env);
3525 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3526 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
3527 res, arg, 2, False));
3528 return res;
3529 }
3530 case Iop_F32toF16x4: {
3531 HReg res = newVRegD(env);
3532 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3533 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
3534 res, arg, 2, False));
3535 return res;
3536 }
3537 case Iop_Recip32Fx2: {
3538 HReg res = newVRegD(env);
3539 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3540 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
3541 res, argL, 0, False));
3542 return res;
3543 }
3544 case Iop_Recip32x2: {
3545 HReg res = newVRegD(env);
3546 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3547 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
3548 res, argL, 0, False));
3549 return res;
3550 }
3551 case Iop_Abs32Fx2: {
3552 DECLARE_PATTERN(p_vabd_32fx2);
3553 DEFINE_PATTERN(p_vabd_32fx2,
3554 unop(Iop_Abs32Fx2,
3555 binop(Iop_Sub32Fx2,
3556 bind(0),
3557 bind(1))));
3558 if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
3559 HReg res = newVRegD(env);
3560 HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3561 HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3562 addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
3563 res, argL, argR, 0, False));
3564 return res;
3565 } else {
3566 HReg res = newVRegD(env);
3567 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3568 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
3569 res, arg, 0, False));
3570 return res;
3571 }
3572 }
3573 case Iop_Rsqrte32Fx2: {
3574 HReg res = newVRegD(env);
3575 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3576 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
3577 res, arg, 0, False));
3578 return res;
3579 }
3580 case Iop_Rsqrte32x2: {
3581 HReg res = newVRegD(env);
3582 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3583 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
3584 res, arg, 0, False));
3585 return res;
3586 }
3587 case Iop_Neg32Fx2: {
3588 HReg res = newVRegD(env);
3589 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3590 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
3591 res, arg, 0, False));
3592 return res;
3593 }
3594 default:
3595 break;
3596 }
3597 } /* if (e->tag == Iex_Unop) */
3598
3599 if (e->tag == Iex_Triop) {
3600 switch (e->Iex.Triop.op) {
3601 case Iop_Extract64: {
3602 HReg res = newVRegD(env);
3603 HReg argL = iselNeon64Expr(env, e->Iex.Triop.arg1);
3604 HReg argR = iselNeon64Expr(env, e->Iex.Triop.arg2);
3605 UInt imm4;
3606 if (e->Iex.Triop.arg3->tag != Iex_Const ||
3607 typeOfIRExpr(env->type_env, e->Iex.Triop.arg3) != Ity_I8) {
3608 vpanic("ARM target supports Iop_Extract64 with constant "
3609 "third argument less than 16 only\n");
3610 }
3611 imm4 = e->Iex.Triop.arg3->Iex.Const.con->Ico.U8;
3612 if (imm4 >= 8) {
3613 vpanic("ARM target supports Iop_Extract64 with constant "
3614 "third argument less than 16 only\n");
3615 }
3616 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
3617 res, argL, argR, imm4, False));
3618 return res;
3619 }
3620 case Iop_SetElem8x8:
3621 case Iop_SetElem16x4:
3622 case Iop_SetElem32x2: {
3623 HReg res = newVRegD(env);
3624 HReg dreg = iselNeon64Expr(env, e->Iex.Triop.arg1);
3625 HReg arg = iselIntExpr_R(env, e->Iex.Triop.arg3);
3626 UInt index, size;
3627 if (e->Iex.Triop.arg2->tag != Iex_Const ||
3628 typeOfIRExpr(env->type_env, e->Iex.Triop.arg2) != Ity_I8) {
3629 vpanic("ARM target supports SetElem with constant "
3630 "second argument only\n");
3631 }
3632 index = e->Iex.Triop.arg2->Iex.Const.con->Ico.U8;
3633 switch (e->Iex.Triop.op) {
3634 case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
3635 case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
3636 case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
3637 default: vassert(0);
3638 }
3639 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
3640 addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
3641 mkARMNRS(ARMNRS_Scalar, res, index),
3642 mkARMNRS(ARMNRS_Reg, arg, 0),
3643 size, False));
3644 return res;
3645 }
3646 default:
3647 break;
3648 }
3649 }
3650
3651 /* --------- MULTIPLEX --------- */
3652 if (e->tag == Iex_Mux0X) {
3653 HReg rLo, rHi;
3654 HReg res = newVRegD(env);
3655 iselInt64Expr(&rHi, &rLo, env, e);
3656 addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3657 return res;
3658 }
3659
3660 ppIRExpr(e);
3661 vpanic("iselNeon64Expr");
3662}
3663
3664static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e )
3665{
3666 HReg r = iselNeonExpr_wrk( env, e );
3667 vassert(hregClass(r) == HRcVec128);
3668 vassert(hregIsVirtual(r));
3669 return r;
3670}
3671
3672/* DO NOT CALL THIS DIRECTLY */
3673static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e )
3674{
3675 IRType ty = typeOfIRExpr(env->type_env, e);
3676 MatchInfo mi;
3677 vassert(e);
3678 vassert(ty == Ity_V128);
3679
3680 if (e->tag == Iex_RdTmp) {
3681 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3682 }
3683
3684 if (e->tag == Iex_Const) {
3685 /* At the moment there should be no 128-bit constants in IR for ARM
3686 generated during disassemble. They are represented as Iop_64HLtoV128
3687 binary operation and are handled among binary ops. */
3688 /* But zero can be created by valgrind internal optimizer */
3689 if (e->Iex.Const.con->Ico.V128 == 0) {
3690 HReg res = newVRegV(env);
3691 addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(0, 0)));
3692 return res;
3693 }
3694 ppIRExpr(e);
3695 vpanic("128-bit constant is not implemented");
3696 }
3697
3698 if (e->tag == Iex_Load) {
3699 HReg res = newVRegV(env);
3700 ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
3701 vassert(ty == Ity_V128);
3702 addInstr(env, ARMInstr_NLdStQ(True, res, am));
3703 return res;
3704 }
3705
3706 if (e->tag == Iex_Get) {
3707 HReg addr = newVRegI(env);
3708 HReg res = newVRegV(env);
3709 vassert(ty == Ity_V128);
3710 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
3711 addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
3712 return res;
3713 }
3714
3715 if (e->tag == Iex_Unop) {
3716 switch (e->Iex.Unop.op) {
3717 case Iop_NotV128: {
3718 DECLARE_PATTERN(p_veqz_8x16);
3719 DECLARE_PATTERN(p_veqz_16x8);
3720 DECLARE_PATTERN(p_veqz_32x4);
3721 DECLARE_PATTERN(p_vcge_8sx16);
3722 DECLARE_PATTERN(p_vcge_16sx8);
3723 DECLARE_PATTERN(p_vcge_32sx4);
3724 DECLARE_PATTERN(p_vcge_8ux16);
3725 DECLARE_PATTERN(p_vcge_16ux8);
3726 DECLARE_PATTERN(p_vcge_32ux4);
3727 DEFINE_PATTERN(p_veqz_8x16,
3728 unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
3729 DEFINE_PATTERN(p_veqz_16x8,
3730 unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
3731 DEFINE_PATTERN(p_veqz_32x4,
3732 unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
3733 DEFINE_PATTERN(p_vcge_8sx16,
3734 unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
3735 DEFINE_PATTERN(p_vcge_16sx8,
3736 unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
3737 DEFINE_PATTERN(p_vcge_32sx4,
3738 unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
3739 DEFINE_PATTERN(p_vcge_8ux16,
3740 unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
3741 DEFINE_PATTERN(p_vcge_16ux8,
3742 unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
3743 DEFINE_PATTERN(p_vcge_32ux4,
3744 unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
3745 if (matchIRExpr(&mi, p_veqz_8x16, e)) {
3746 HReg res = newVRegV(env);
3747 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3748 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
3749 return res;
3750 } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
3751 HReg res = newVRegV(env);
3752 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3753 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
3754 return res;
3755 } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
3756 HReg res = newVRegV(env);
3757 HReg arg = iselNeonExpr(env, mi.bindee[0]);
3758 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
3759 return res;
3760 } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
3761 HReg res = newVRegV(env);
3762 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3763 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3764 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3765 res, argL, argR, 0, True));
3766 return res;
3767 } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
3768 HReg res = newVRegV(env);
3769 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3770 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3771 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3772 res, argL, argR, 1, True));
3773 return res;
3774 } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
3775 HReg res = newVRegV(env);
3776 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3777 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3778 addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3779 res, argL, argR, 2, True));
3780 return res;
3781 } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
3782 HReg res = newVRegV(env);
3783 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3784 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3785 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3786 res, argL, argR, 0, True));
3787 return res;
3788 } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
3789 HReg res = newVRegV(env);
3790 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3791 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3792 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3793 res, argL, argR, 1, True));
3794 return res;
3795 } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
3796 HReg res = newVRegV(env);
3797 HReg argL = iselNeonExpr(env, mi.bindee[0]);
3798 HReg argR = iselNeonExpr(env, mi.bindee[1]);
3799 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3800 res, argL, argR, 2, True));
3801 return res;
3802 } else {
3803 HReg res = newVRegV(env);
3804 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3805 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
3806 return res;
3807 }
3808 }
3809 case Iop_Dup8x16:
3810 case Iop_Dup16x8:
3811 case Iop_Dup32x4: {
3812 HReg res, arg;
3813 UInt size;
3814 DECLARE_PATTERN(p_vdup_8x16);
3815 DECLARE_PATTERN(p_vdup_16x8);
3816 DECLARE_PATTERN(p_vdup_32x4);
3817 DEFINE_PATTERN(p_vdup_8x16,
3818 unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
3819 DEFINE_PATTERN(p_vdup_16x8,
3820 unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
3821 DEFINE_PATTERN(p_vdup_32x4,
3822 unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
3823 if (matchIRExpr(&mi, p_vdup_8x16, e)) {
3824 UInt index;
3825 UInt imm4;
3826 if (mi.bindee[1]->tag == Iex_Const &&
3827 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3828 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3829 imm4 = (index << 1) + 1;
3830 if (index < 8) {
3831 res = newVRegV(env);
3832 arg = iselNeon64Expr(env, mi.bindee[0]);
3833 addInstr(env, ARMInstr_NUnaryS(
3834 ARMneon_VDUP,
3835 mkARMNRS(ARMNRS_Reg, res, 0),
3836 mkARMNRS(ARMNRS_Scalar, arg, index),
3837 imm4, True
3838 ));
3839 return res;
3840 }
3841 }
3842 } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
3843 UInt index;
3844 UInt imm4;
3845 if (mi.bindee[1]->tag == Iex_Const &&
3846 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3847 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3848 imm4 = (index << 2) + 2;
3849 if (index < 4) {
3850 res = newVRegV(env);
3851 arg = iselNeon64Expr(env, mi.bindee[0]);
3852 addInstr(env, ARMInstr_NUnaryS(
3853 ARMneon_VDUP,
3854 mkARMNRS(ARMNRS_Reg, res, 0),
3855 mkARMNRS(ARMNRS_Scalar, arg, index),
3856 imm4, True
3857 ));
3858 return res;
3859 }
3860 }
3861 } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
3862 UInt index;
3863 UInt imm4;
3864 if (mi.bindee[1]->tag == Iex_Const &&
3865 typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3866 index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3867 imm4 = (index << 3) + 4;
3868 if (index < 2) {
3869 res = newVRegV(env);
3870 arg = iselNeon64Expr(env, mi.bindee[0]);
3871 addInstr(env, ARMInstr_NUnaryS(
3872 ARMneon_VDUP,
3873 mkARMNRS(ARMNRS_Reg, res, 0),
3874 mkARMNRS(ARMNRS_Scalar, arg, index),
3875 imm4, True
3876 ));
3877 return res;
3878 }
3879 }
3880 }
3881 arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3882 res = newVRegV(env);
3883 switch (e->Iex.Unop.op) {
3884 case Iop_Dup8x16: size = 0; break;
3885 case Iop_Dup16x8: size = 1; break;
3886 case Iop_Dup32x4: size = 2; break;
3887 default: vassert(0);
3888 }
3889 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
3890 return res;
3891 }
3892 case Iop_Abs8x16:
3893 case Iop_Abs16x8:
3894 case Iop_Abs32x4: {
3895 HReg res = newVRegV(env);
3896 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3897 UInt size = 0;
3898 switch(e->Iex.Binop.op) {
3899 case Iop_Abs8x16: size = 0; break;
3900 case Iop_Abs16x8: size = 1; break;
3901 case Iop_Abs32x4: size = 2; break;
3902 default: vassert(0);
3903 }
3904 addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
3905 return res;
3906 }
3907 case Iop_Reverse64_8x16:
3908 case Iop_Reverse64_16x8:
3909 case Iop_Reverse64_32x4: {
3910 HReg res = newVRegV(env);
3911 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3912 UInt size = 0;
3913 switch(e->Iex.Binop.op) {
3914 case Iop_Reverse64_8x16: size = 0; break;
3915 case Iop_Reverse64_16x8: size = 1; break;
3916 case Iop_Reverse64_32x4: size = 2; break;
3917 default: vassert(0);
3918 }
3919 addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3920 res, arg, size, True));
3921 return res;
3922 }
3923 case Iop_Reverse32_8x16:
3924 case Iop_Reverse32_16x8: {
3925 HReg res = newVRegV(env);
3926 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3927 UInt size = 0;
3928 switch(e->Iex.Binop.op) {
3929 case Iop_Reverse32_8x16: size = 0; break;
3930 case Iop_Reverse32_16x8: size = 1; break;
3931 default: vassert(0);
3932 }
3933 addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3934 res, arg, size, True));
3935 return res;
3936 }
3937 case Iop_Reverse16_8x16: {
3938 HReg res = newVRegV(env);
3939 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3940 UInt size = 0;
3941 addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3942 res, arg, size, True));
3943 return res;
3944 }
3945 case Iop_CmpNEZ64x2: {
3946 HReg x_lsh = newVRegV(env);
3947 HReg x_rsh = newVRegV(env);
3948 HReg lsh_amt = newVRegV(env);
3949 HReg rsh_amt = newVRegV(env);
3950 HReg zero = newVRegV(env);
3951 HReg tmp = newVRegV(env);
3952 HReg tmp2 = newVRegV(env);
3953 HReg res = newVRegV(env);
3954 HReg x = newVRegV(env);
3955 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3956 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
3957 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
3958 addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3959 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3960 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3961 rsh_amt, zero, lsh_amt, 2, True));
3962 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3963 x_lsh, x, lsh_amt, 3, True));
3964 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3965 x_rsh, x, rsh_amt, 3, True));
3966 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3967 tmp, x_lsh, x_rsh, 0, True));
3968 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3969 res, tmp, x, 0, True));
3970 return res;
3971 }
3972 case Iop_CmpNEZ8x16:
3973 case Iop_CmpNEZ16x8:
3974 case Iop_CmpNEZ32x4: {
3975 HReg res = newVRegV(env);
3976 HReg tmp = newVRegV(env);
3977 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3978 UInt size;
3979 switch (e->Iex.Unop.op) {
3980 case Iop_CmpNEZ8x16: size = 0; break;
3981 case Iop_CmpNEZ16x8: size = 1; break;
3982 case Iop_CmpNEZ32x4: size = 2; break;
3983 default: vassert(0);
3984 }
3985 addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
3986 addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
3987 return res;
3988 }
sewardj5f438dd2011-06-16 11:36:23 +00003989 case Iop_Widen8Uto16x8:
3990 case Iop_Widen16Uto32x4:
3991 case Iop_Widen32Uto64x2: {
sewardj6c60b322010-08-22 12:48:28 +00003992 HReg res = newVRegV(env);
3993 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3994 UInt size;
3995 switch (e->Iex.Unop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00003996 case Iop_Widen8Uto16x8: size = 0; break;
3997 case Iop_Widen16Uto32x4: size = 1; break;
3998 case Iop_Widen32Uto64x2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00003999 default: vassert(0);
4000 }
4001 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
4002 res, arg, size, True));
4003 return res;
4004 }
sewardj5f438dd2011-06-16 11:36:23 +00004005 case Iop_Widen8Sto16x8:
4006 case Iop_Widen16Sto32x4:
4007 case Iop_Widen32Sto64x2: {
sewardj6c60b322010-08-22 12:48:28 +00004008 HReg res = newVRegV(env);
4009 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4010 UInt size;
4011 switch (e->Iex.Unop.op) {
sewardj5f438dd2011-06-16 11:36:23 +00004012 case Iop_Widen8Sto16x8: size = 0; break;
4013 case Iop_Widen16Sto32x4: size = 1; break;
4014 case Iop_Widen32Sto64x2: size = 2; break;
sewardj6c60b322010-08-22 12:48:28 +00004015 default: vassert(0);
4016 }
4017 addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4018 res, arg, size, True));
4019 return res;
4020 }
4021 case Iop_PwAddL8Sx16:
4022 case Iop_PwAddL16Sx8:
4023 case Iop_PwAddL32Sx4: {
4024 HReg res = newVRegV(env);
4025 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4026 UInt size = 0;
4027 switch(e->Iex.Binop.op) {
4028 case Iop_PwAddL8Sx16: size = 0; break;
4029 case Iop_PwAddL16Sx8: size = 1; break;
4030 case Iop_PwAddL32Sx4: size = 2; break;
4031 default: vassert(0);
4032 }
4033 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4034 res, arg, size, True));
4035 return res;
4036 }
4037 case Iop_PwAddL8Ux16:
4038 case Iop_PwAddL16Ux8:
4039 case Iop_PwAddL32Ux4: {
4040 HReg res = newVRegV(env);
4041 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4042 UInt size = 0;
4043 switch(e->Iex.Binop.op) {
4044 case Iop_PwAddL8Ux16: size = 0; break;
4045 case Iop_PwAddL16Ux8: size = 1; break;
4046 case Iop_PwAddL32Ux4: size = 2; break;
4047 default: vassert(0);
4048 }
4049 addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4050 res, arg, size, True));
4051 return res;
4052 }
4053 case Iop_Cnt8x16: {
4054 HReg res = newVRegV(env);
4055 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4056 UInt size = 0;
4057 addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4058 return res;
4059 }
4060 case Iop_Clz8Sx16:
4061 case Iop_Clz16Sx8:
4062 case Iop_Clz32Sx4: {
4063 HReg res = newVRegV(env);
4064 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4065 UInt size = 0;
4066 switch(e->Iex.Binop.op) {
4067 case Iop_Clz8Sx16: size = 0; break;
4068 case Iop_Clz16Sx8: size = 1; break;
4069 case Iop_Clz32Sx4: size = 2; break;
4070 default: vassert(0);
4071 }
4072 addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4073 return res;
4074 }
4075 case Iop_Cls8Sx16:
4076 case Iop_Cls16Sx8:
4077 case Iop_Cls32Sx4: {
4078 HReg res = newVRegV(env);
4079 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4080 UInt size = 0;
4081 switch(e->Iex.Binop.op) {
4082 case Iop_Cls8Sx16: size = 0; break;
4083 case Iop_Cls16Sx8: size = 1; break;
4084 case Iop_Cls32Sx4: size = 2; break;
4085 default: vassert(0);
4086 }
4087 addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4088 return res;
4089 }
4090 case Iop_FtoI32Sx4_RZ: {
4091 HReg res = newVRegV(env);
4092 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4093 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4094 res, arg, 2, True));
4095 return res;
4096 }
4097 case Iop_FtoI32Ux4_RZ: {
4098 HReg res = newVRegV(env);
4099 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4100 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4101 res, arg, 2, True));
4102 return res;
4103 }
4104 case Iop_I32StoFx4: {
4105 HReg res = newVRegV(env);
4106 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4107 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4108 res, arg, 2, True));
4109 return res;
4110 }
4111 case Iop_I32UtoFx4: {
4112 HReg res = newVRegV(env);
4113 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4114 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4115 res, arg, 2, True));
4116 return res;
4117 }
4118 case Iop_F16toF32x4: {
4119 HReg res = newVRegV(env);
4120 HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4121 addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4122 res, arg, 2, True));
4123 return res;
4124 }
4125 case Iop_Recip32Fx4: {
4126 HReg res = newVRegV(env);
4127 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4128 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4129 res, argL, 0, True));
4130 return res;
4131 }
4132 case Iop_Recip32x4: {
4133 HReg res = newVRegV(env);
4134 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4135 addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4136 res, argL, 0, True));
4137 return res;
4138 }
4139 case Iop_Abs32Fx4: {
4140 DECLARE_PATTERN(p_vabd_32fx4);
4141 DEFINE_PATTERN(p_vabd_32fx4,
4142 unop(Iop_Abs32Fx4,
4143 binop(Iop_Sub32Fx4,
4144 bind(0),
4145 bind(1))));
4146 if (matchIRExpr(&mi, p_vabd_32fx4, e)) {
4147 HReg res = newVRegV(env);
4148 HReg argL = iselNeonExpr(env, mi.bindee[0]);
4149 HReg argR = iselNeonExpr(env, mi.bindee[1]);
4150 addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
4151 res, argL, argR, 0, True));
4152 return res;
4153 } else {
4154 HReg res = newVRegV(env);
4155 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4156 addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4157 res, argL, 0, True));
4158 return res;
4159 }
4160 }
4161 case Iop_Rsqrte32Fx4: {
4162 HReg res = newVRegV(env);
4163 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4164 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4165 res, argL, 0, True));
4166 return res;
4167 }
4168 case Iop_Rsqrte32x4: {
4169 HReg res = newVRegV(env);
4170 HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4171 addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4172 res, argL, 0, True));
4173 return res;
4174 }
4175 case Iop_Neg32Fx4: {
4176 HReg res = newVRegV(env);
4177 HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4178 addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4179 res, arg, 0, True));
4180 return res;
4181 }
4182 /* ... */
4183 default:
4184 break;
4185 }
4186 }
4187
4188 if (e->tag == Iex_Binop) {
4189 switch (e->Iex.Binop.op) {
4190 case Iop_64HLtoV128:
4191 /* Try to match into single "VMOV reg, imm" instruction */
4192 if (e->Iex.Binop.arg1->tag == Iex_Const &&
4193 e->Iex.Binop.arg2->tag == Iex_Const &&
4194 typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
4195 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
4196 e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
4197 e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
4198 ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
4199 ARMNImm *imm = Imm64_to_ARMNImm(imm64);
4200 if (imm) {
4201 HReg res = newVRegV(env);
4202 addInstr(env, ARMInstr_NeonImm(res, imm));
4203 return res;
4204 }
4205 if ((imm64 >> 32) == 0LL &&
4206 (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
4207 HReg tmp1 = newVRegV(env);
4208 HReg tmp2 = newVRegV(env);
4209 HReg res = newVRegV(env);
4210 if (imm->type < 10) {
4211 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
4212 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4213 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4214 res, tmp1, tmp2, 4, True));
4215 return res;
4216 }
4217 }
4218 if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
4219 (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
4220 HReg tmp1 = newVRegV(env);
4221 HReg tmp2 = newVRegV(env);
4222 HReg res = newVRegV(env);
4223 if (imm->type < 10) {
4224 addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
4225 addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4226 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4227 res, tmp1, tmp2, 4, True));
4228 return res;
4229 }
4230 }
4231 }
sewardj6828dc72011-09-30 08:49:02 +00004232 /* Does not match "VMOV Reg, Imm" form. We'll have to do
4233 it the slow way. */
4234 {
4235 /* local scope */
4236 /* Done via the stack for ease of use. */
4237 /* FIXME: assumes little endian host */
4238 HReg w3, w2, w1, w0;
4239 HReg res = newVRegV(env);
4240 ARMAMode1* sp_0 = ARMAMode1_RI(hregARM_R13(), 0);
4241 ARMAMode1* sp_4 = ARMAMode1_RI(hregARM_R13(), 4);
4242 ARMAMode1* sp_8 = ARMAMode1_RI(hregARM_R13(), 8);
4243 ARMAMode1* sp_12 = ARMAMode1_RI(hregARM_R13(), 12);
4244 ARMRI84* c_16 = ARMRI84_I84(16,0);
4245 /* Make space for SP */
4246 addInstr(env, ARMInstr_Alu(ARMalu_SUB, hregARM_R13(),
4247 hregARM_R13(), c_16));
4248
4249 /* Store the less significant 64 bits */
4250 iselInt64Expr(&w1, &w0, env, e->Iex.Binop.arg2);
4251 addInstr(env, ARMInstr_LdSt32(False/*store*/, w0, sp_0));
4252 addInstr(env, ARMInstr_LdSt32(False/*store*/, w1, sp_4));
4253
4254 /* Store the more significant 64 bits */
4255 iselInt64Expr(&w3, &w2, env, e->Iex.Binop.arg1);
4256 addInstr(env, ARMInstr_LdSt32(False/*store*/, w2, sp_8));
4257 addInstr(env, ARMInstr_LdSt32(False/*store*/, w3, sp_12));
4258
4259 /* Load result back from stack. */
4260 addInstr(env, ARMInstr_NLdStQ(True/*load*/, res,
4261 mkARMAModeN_R(hregARM_R13())));
4262
4263 /* Restore SP */
4264 addInstr(env, ARMInstr_Alu(ARMalu_ADD, hregARM_R13(),
4265 hregARM_R13(), c_16));
4266 return res;
4267 } /* local scope */
sewardj6c60b322010-08-22 12:48:28 +00004268 goto neon_expr_bad;
4269 case Iop_AndV128: {
4270 HReg res = newVRegV(env);
4271 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4272 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4273 addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4274 res, argL, argR, 4, True));
4275 return res;
4276 }
4277 case Iop_OrV128: {
4278 HReg res = newVRegV(env);
4279 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4280 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4281 addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4282 res, argL, argR, 4, True));
4283 return res;
4284 }
4285 case Iop_XorV128: {
4286 HReg res = newVRegV(env);
4287 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4288 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4289 addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4290 res, argL, argR, 4, True));
4291 return res;
4292 }
4293 case Iop_Add8x16:
4294 case Iop_Add16x8:
4295 case Iop_Add32x4:
4296 case Iop_Add64x2: {
4297 /*
4298 FIXME: remove this if not used
4299 DECLARE_PATTERN(p_vrhadd_32sx4);
4300 ULong one = (1LL << 32) | 1LL;
4301 DEFINE_PATTERN(p_vrhadd_32sx4,
4302 binop(Iop_Add32x4,
4303 binop(Iop_Add32x4,
4304 binop(Iop_SarN32x4,
4305 bind(0),
4306 mkU8(1)),
4307 binop(Iop_SarN32x4,
4308 bind(1),
4309 mkU8(1))),
4310 binop(Iop_SarN32x4,
4311 binop(Iop_Add32x4,
4312 binop(Iop_Add32x4,
4313 binop(Iop_AndV128,
4314 bind(0),
4315 mkU128(one)),
4316 binop(Iop_AndV128,
4317 bind(1),
4318 mkU128(one))),
4319 mkU128(one)),
4320 mkU8(1))));
4321 */
4322 HReg res = newVRegV(env);
4323 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4324 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4325 UInt size;
4326 switch (e->Iex.Binop.op) {
4327 case Iop_Add8x16: size = 0; break;
4328 case Iop_Add16x8: size = 1; break;
4329 case Iop_Add32x4: size = 2; break;
4330 case Iop_Add64x2: size = 3; break;
4331 default:
4332 ppIROp(e->Iex.Binop.op);
4333 vpanic("Illegal element size in VADD");
4334 }
4335 addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
4336 res, argL, argR, size, True));
4337 return res;
4338 }
4339 case Iop_Add32Fx4: {
4340 HReg res = newVRegV(env);
4341 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4342 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4343 UInt size = 0;
4344 addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
4345 res, argL, argR, size, True));
4346 return res;
4347 }
4348 case Iop_Recps32Fx4: {
4349 HReg res = newVRegV(env);
4350 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4351 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4352 UInt size = 0;
4353 addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
4354 res, argL, argR, size, True));
4355 return res;
4356 }
4357 case Iop_Rsqrts32Fx4: {
4358 HReg res = newVRegV(env);
4359 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4360 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4361 UInt size = 0;
4362 addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
4363 res, argL, argR, size, True));
4364 return res;
4365 }
4366 case Iop_InterleaveEvenLanes8x16:
4367 case Iop_InterleaveEvenLanes16x8:
4368 case Iop_InterleaveEvenLanes32x4:
4369 case Iop_InterleaveOddLanes8x16:
4370 case Iop_InterleaveOddLanes16x8:
4371 case Iop_InterleaveOddLanes32x4: {
4372 HReg tmp = newVRegV(env);
4373 HReg res = newVRegV(env);
4374 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4375 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4376 UInt size;
4377 UInt is_lo;
4378 switch (e->Iex.Binop.op) {
4379 case Iop_InterleaveEvenLanes8x16: is_lo = 0; size = 0; break;
4380 case Iop_InterleaveOddLanes8x16: is_lo = 1; size = 0; break;
4381 case Iop_InterleaveEvenLanes16x8: is_lo = 0; size = 1; break;
4382 case Iop_InterleaveOddLanes16x8: is_lo = 1; size = 1; break;
4383 case Iop_InterleaveEvenLanes32x4: is_lo = 0; size = 2; break;
4384 case Iop_InterleaveOddLanes32x4: is_lo = 1; size = 2; break;
4385 default:
4386 ppIROp(e->Iex.Binop.op);
4387 vpanic("Illegal element size in VTRN");
4388 }
4389 if (is_lo) {
4390 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4391 tmp, argL, 4, True));
4392 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4393 res, argR, 4, True));
4394 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
4395 res, tmp, size, True));
4396 } else {
4397 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4398 tmp, argR, 4, True));
4399 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4400 res, argL, 4, True));
4401 addInstr(env, ARMInstr_NDual(ARMneon_TRN,
4402 tmp, res, size, True));
4403 }
4404 return res;
4405 }
4406 case Iop_InterleaveHI8x16:
4407 case Iop_InterleaveHI16x8:
4408 case Iop_InterleaveHI32x4:
4409 case Iop_InterleaveLO8x16:
4410 case Iop_InterleaveLO16x8:
4411 case Iop_InterleaveLO32x4: {
4412 HReg tmp = newVRegV(env);
4413 HReg res = newVRegV(env);
4414 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4415 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4416 UInt size;
4417 UInt is_lo;
4418 switch (e->Iex.Binop.op) {
4419 case Iop_InterleaveHI8x16: is_lo = 1; size = 0; break;
4420 case Iop_InterleaveLO8x16: is_lo = 0; size = 0; break;
4421 case Iop_InterleaveHI16x8: is_lo = 1; size = 1; break;
4422 case Iop_InterleaveLO16x8: is_lo = 0; size = 1; break;
4423 case Iop_InterleaveHI32x4: is_lo = 1; size = 2; break;
4424 case Iop_InterleaveLO32x4: is_lo = 0; size = 2; break;
4425 default:
4426 ppIROp(e->Iex.Binop.op);
4427 vpanic("Illegal element size in VZIP");
4428 }
4429 if (is_lo) {
4430 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4431 tmp, argL, 4, True));
4432 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4433 res, argR, 4, True));
4434 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
4435 res, tmp, size, True));
4436 } else {
4437 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4438 tmp, argR, 4, True));
4439 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4440 res, argL, 4, True));
4441 addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
4442 tmp, res, size, True));
4443 }
4444 return res;
4445 }
4446 case Iop_CatOddLanes8x16:
4447 case Iop_CatOddLanes16x8:
4448 case Iop_CatOddLanes32x4:
4449 case Iop_CatEvenLanes8x16:
4450 case Iop_CatEvenLanes16x8:
4451 case Iop_CatEvenLanes32x4: {
4452 HReg tmp = newVRegV(env);
4453 HReg res = newVRegV(env);
4454 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4455 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4456 UInt size;
4457 UInt is_lo;
4458 switch (e->Iex.Binop.op) {
4459 case Iop_CatOddLanes8x16: is_lo = 1; size = 0; break;
4460 case Iop_CatEvenLanes8x16: is_lo = 0; size = 0; break;
4461 case Iop_CatOddLanes16x8: is_lo = 1; size = 1; break;
4462 case Iop_CatEvenLanes16x8: is_lo = 0; size = 1; break;
4463 case Iop_CatOddLanes32x4: is_lo = 1; size = 2; break;
4464 case Iop_CatEvenLanes32x4: is_lo = 0; size = 2; break;
4465 default:
4466 ppIROp(e->Iex.Binop.op);
4467 vpanic("Illegal element size in VUZP");
4468 }
4469 if (is_lo) {
4470 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4471 tmp, argL, 4, True));
4472 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4473 res, argR, 4, True));
4474 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
4475 res, tmp, size, True));
4476 } else {
4477 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4478 tmp, argR, 4, True));
4479 addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4480 res, argL, 4, True));
4481 addInstr(env, ARMInstr_NDual(ARMneon_UZP,
4482 tmp, res, size, True));
4483 }
4484 return res;
4485 }
4486 case Iop_QAdd8Ux16:
4487 case Iop_QAdd16Ux8:
4488 case Iop_QAdd32Ux4:
4489 case Iop_QAdd64Ux2: {
4490 HReg res = newVRegV(env);
4491 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4492 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4493 UInt size;
4494 switch (e->Iex.Binop.op) {
4495 case Iop_QAdd8Ux16: size = 0; break;
4496 case Iop_QAdd16Ux8: size = 1; break;
4497 case Iop_QAdd32Ux4: size = 2; break;
4498 case Iop_QAdd64Ux2: size = 3; break;
4499 default:
4500 ppIROp(e->Iex.Binop.op);
4501 vpanic("Illegal element size in VQADDU");
4502 }
4503 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
4504 res, argL, argR, size, True));
4505 return res;
4506 }
4507 case Iop_QAdd8Sx16:
4508 case Iop_QAdd16Sx8:
4509 case Iop_QAdd32Sx4:
4510 case Iop_QAdd64Sx2: {
4511 HReg res = newVRegV(env);
4512 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4513 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4514 UInt size;
4515 switch (e->Iex.Binop.op) {
4516 case Iop_QAdd8Sx16: size = 0; break;
4517 case Iop_QAdd16Sx8: size = 1; break;
4518 case Iop_QAdd32Sx4: size = 2; break;
4519 case Iop_QAdd64Sx2: size = 3; break;
4520 default:
4521 ppIROp(e->Iex.Binop.op);
4522 vpanic("Illegal element size in VQADDS");
4523 }
4524 addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
4525 res, argL, argR, size, True));
4526 return res;
4527 }
4528 case Iop_Sub8x16:
4529 case Iop_Sub16x8:
4530 case Iop_Sub32x4:
4531 case Iop_Sub64x2: {
4532 HReg res = newVRegV(env);
4533 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4534 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4535 UInt size;
4536 switch (e->Iex.Binop.op) {
4537 case Iop_Sub8x16: size = 0; break;
4538 case Iop_Sub16x8: size = 1; break;
4539 case Iop_Sub32x4: size = 2; break;
4540 case Iop_Sub64x2: size = 3; break;
4541 default:
4542 ppIROp(e->Iex.Binop.op);
4543 vpanic("Illegal element size in VSUB");
4544 }
4545 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4546 res, argL, argR, size, True));
4547 return res;
4548 }
4549 case Iop_Sub32Fx4: {
4550 HReg res = newVRegV(env);
4551 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4552 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4553 UInt size = 0;
4554 addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
4555 res, argL, argR, size, True));
4556 return res;
4557 }
4558 case Iop_QSub8Ux16:
4559 case Iop_QSub16Ux8:
4560 case Iop_QSub32Ux4:
4561 case Iop_QSub64Ux2: {
4562 HReg res = newVRegV(env);
4563 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4564 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4565 UInt size;
4566 switch (e->Iex.Binop.op) {
4567 case Iop_QSub8Ux16: size = 0; break;
4568 case Iop_QSub16Ux8: size = 1; break;
4569 case Iop_QSub32Ux4: size = 2; break;
4570 case Iop_QSub64Ux2: size = 3; break;
4571 default:
4572 ppIROp(e->Iex.Binop.op);
4573 vpanic("Illegal element size in VQSUBU");
4574 }
4575 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
4576 res, argL, argR, size, True));
4577 return res;
4578 }
4579 case Iop_QSub8Sx16:
4580 case Iop_QSub16Sx8:
4581 case Iop_QSub32Sx4:
4582 case Iop_QSub64Sx2: {
4583 HReg res = newVRegV(env);
4584 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4585 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4586 UInt size;
4587 switch (e->Iex.Binop.op) {
4588 case Iop_QSub8Sx16: size = 0; break;
4589 case Iop_QSub16Sx8: size = 1; break;
4590 case Iop_QSub32Sx4: size = 2; break;
4591 case Iop_QSub64Sx2: size = 3; break;
4592 default:
4593 ppIROp(e->Iex.Binop.op);
4594 vpanic("Illegal element size in VQSUBS");
4595 }
4596 addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
4597 res, argL, argR, size, True));
4598 return res;
4599 }
4600 case Iop_Max8Ux16:
4601 case Iop_Max16Ux8:
4602 case Iop_Max32Ux4: {
4603 HReg res = newVRegV(env);
4604 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4605 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4606 UInt size;
4607 switch (e->Iex.Binop.op) {
4608 case Iop_Max8Ux16: size = 0; break;
4609 case Iop_Max16Ux8: size = 1; break;
4610 case Iop_Max32Ux4: size = 2; break;
4611 default: vpanic("Illegal element size in VMAXU");
4612 }
4613 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
4614 res, argL, argR, size, True));
4615 return res;
4616 }
4617 case Iop_Max8Sx16:
4618 case Iop_Max16Sx8:
4619 case Iop_Max32Sx4: {
4620 HReg res = newVRegV(env);
4621 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4622 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4623 UInt size;
4624 switch (e->Iex.Binop.op) {
4625 case Iop_Max8Sx16: size = 0; break;
4626 case Iop_Max16Sx8: size = 1; break;
4627 case Iop_Max32Sx4: size = 2; break;
4628 default: vpanic("Illegal element size in VMAXU");
4629 }
4630 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
4631 res, argL, argR, size, True));
4632 return res;
4633 }
4634 case Iop_Min8Ux16:
4635 case Iop_Min16Ux8:
4636 case Iop_Min32Ux4: {
4637 HReg res = newVRegV(env);
4638 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4639 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4640 UInt size;
4641 switch (e->Iex.Binop.op) {
4642 case Iop_Min8Ux16: size = 0; break;
4643 case Iop_Min16Ux8: size = 1; break;
4644 case Iop_Min32Ux4: size = 2; break;
4645 default: vpanic("Illegal element size in VMAXU");
4646 }
4647 addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
4648 res, argL, argR, size, True));
4649 return res;
4650 }
4651 case Iop_Min8Sx16:
4652 case Iop_Min16Sx8:
4653 case Iop_Min32Sx4: {
4654 HReg res = newVRegV(env);
4655 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4656 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4657 UInt size;
4658 switch (e->Iex.Binop.op) {
4659 case Iop_Min8Sx16: size = 0; break;
4660 case Iop_Min16Sx8: size = 1; break;
4661 case Iop_Min32Sx4: size = 2; break;
4662 default: vpanic("Illegal element size in VMAXU");
4663 }
4664 addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
4665 res, argL, argR, size, True));
4666 return res;
4667 }
4668 case Iop_Sar8x16:
4669 case Iop_Sar16x8:
4670 case Iop_Sar32x4:
4671 case Iop_Sar64x2: {
4672 HReg res = newVRegV(env);
4673 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4674 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4675 HReg argR2 = newVRegV(env);
4676 HReg zero = newVRegV(env);
4677 UInt size;
4678 switch (e->Iex.Binop.op) {
4679 case Iop_Sar8x16: size = 0; break;
4680 case Iop_Sar16x8: size = 1; break;
4681 case Iop_Sar32x4: size = 2; break;
4682 case Iop_Sar64x2: size = 3; break;
4683 default: vassert(0);
4684 }
4685 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4686 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4687 argR2, zero, argR, size, True));
4688 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4689 res, argL, argR2, size, True));
4690 return res;
4691 }
4692 case Iop_Sal8x16:
4693 case Iop_Sal16x8:
4694 case Iop_Sal32x4:
4695 case Iop_Sal64x2: {
4696 HReg res = newVRegV(env);
4697 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4698 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4699 UInt size;
4700 switch (e->Iex.Binop.op) {
4701 case Iop_Sal8x16: size = 0; break;
4702 case Iop_Sal16x8: size = 1; break;
4703 case Iop_Sal32x4: size = 2; break;
4704 case Iop_Sal64x2: size = 3; break;
4705 default: vassert(0);
4706 }
4707 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4708 res, argL, argR, size, True));
4709 return res;
4710 }
4711 case Iop_Shr8x16:
4712 case Iop_Shr16x8:
4713 case Iop_Shr32x4:
4714 case Iop_Shr64x2: {
4715 HReg res = newVRegV(env);
4716 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4717 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4718 HReg argR2 = newVRegV(env);
4719 HReg zero = newVRegV(env);
4720 UInt size;
4721 switch (e->Iex.Binop.op) {
4722 case Iop_Shr8x16: size = 0; break;
4723 case Iop_Shr16x8: size = 1; break;
4724 case Iop_Shr32x4: size = 2; break;
4725 case Iop_Shr64x2: size = 3; break;
4726 default: vassert(0);
4727 }
4728 addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4729 addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4730 argR2, zero, argR, size, True));
4731 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4732 res, argL, argR2, size, True));
4733 return res;
4734 }
4735 case Iop_Shl8x16:
4736 case Iop_Shl16x8:
4737 case Iop_Shl32x4:
4738 case Iop_Shl64x2: {
4739 HReg res = newVRegV(env);
4740 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4741 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4742 UInt size;
4743 switch (e->Iex.Binop.op) {
4744 case Iop_Shl8x16: size = 0; break;
4745 case Iop_Shl16x8: size = 1; break;
4746 case Iop_Shl32x4: size = 2; break;
4747 case Iop_Shl64x2: size = 3; break;
4748 default: vassert(0);
4749 }
4750 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4751 res, argL, argR, size, True));
4752 return res;
4753 }
4754 case Iop_QShl8x16:
4755 case Iop_QShl16x8:
4756 case Iop_QShl32x4:
4757 case Iop_QShl64x2: {
4758 HReg res = newVRegV(env);
4759 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4760 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4761 UInt size;
4762 switch (e->Iex.Binop.op) {
4763 case Iop_QShl8x16: size = 0; break;
4764 case Iop_QShl16x8: size = 1; break;
4765 case Iop_QShl32x4: size = 2; break;
4766 case Iop_QShl64x2: size = 3; break;
4767 default: vassert(0);
4768 }
4769 addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
4770 res, argL, argR, size, True));
4771 return res;
4772 }
4773 case Iop_QSal8x16:
4774 case Iop_QSal16x8:
4775 case Iop_QSal32x4:
4776 case Iop_QSal64x2: {
4777 HReg res = newVRegV(env);
4778 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4779 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4780 UInt size;
4781 switch (e->Iex.Binop.op) {
4782 case Iop_QSal8x16: size = 0; break;
4783 case Iop_QSal16x8: size = 1; break;
4784 case Iop_QSal32x4: size = 2; break;
4785 case Iop_QSal64x2: size = 3; break;
4786 default: vassert(0);
4787 }
4788 addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
4789 res, argL, argR, size, True));
4790 return res;
4791 }
4792 case Iop_QShlN8x16:
4793 case Iop_QShlN16x8:
4794 case Iop_QShlN32x4:
4795 case Iop_QShlN64x2: {
4796 HReg res = newVRegV(env);
4797 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4798 UInt size, imm;
4799 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4800 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4801 vpanic("ARM taget supports Iop_QShlNAxB with constant "
4802 "second argument only\n");
4803 }
4804 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4805 switch (e->Iex.Binop.op) {
4806 case Iop_QShlN8x16: size = 8 | imm; break;
4807 case Iop_QShlN16x8: size = 16 | imm; break;
4808 case Iop_QShlN32x4: size = 32 | imm; break;
4809 case Iop_QShlN64x2: size = 64 | imm; break;
4810 default: vassert(0);
4811 }
4812 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
4813 res, argL, size, True));
4814 return res;
4815 }
4816 case Iop_QShlN8Sx16:
4817 case Iop_QShlN16Sx8:
4818 case Iop_QShlN32Sx4:
4819 case Iop_QShlN64Sx2: {
4820 HReg res = newVRegV(env);
4821 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4822 UInt size, imm;
4823 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4824 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4825 vpanic("ARM taget supports Iop_QShlNASxB with constant "
4826 "second argument only\n");
4827 }
4828 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4829 switch (e->Iex.Binop.op) {
4830 case Iop_QShlN8Sx16: size = 8 | imm; break;
4831 case Iop_QShlN16Sx8: size = 16 | imm; break;
4832 case Iop_QShlN32Sx4: size = 32 | imm; break;
4833 case Iop_QShlN64Sx2: size = 64 | imm; break;
4834 default: vassert(0);
4835 }
4836 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
4837 res, argL, size, True));
4838 return res;
4839 }
4840 case Iop_QSalN8x16:
4841 case Iop_QSalN16x8:
4842 case Iop_QSalN32x4:
4843 case Iop_QSalN64x2: {
4844 HReg res = newVRegV(env);
4845 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4846 UInt size, imm;
4847 if (e->Iex.Binop.arg2->tag != Iex_Const ||
4848 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4849 vpanic("ARM taget supports Iop_QShlNAxB with constant "
4850 "second argument only\n");
4851 }
4852 imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4853 switch (e->Iex.Binop.op) {
4854 case Iop_QSalN8x16: size = 8 | imm; break;
4855 case Iop_QSalN16x8: size = 16 | imm; break;
4856 case Iop_QSalN32x4: size = 32 | imm; break;
4857 case Iop_QSalN64x2: size = 64 | imm; break;
4858 default: vassert(0);
4859 }
4860 addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
4861 res, argL, size, True));
4862 return res;
4863 }
4864 case Iop_ShrN8x16:
4865 case Iop_ShrN16x8:
4866 case Iop_ShrN32x4:
4867 case Iop_ShrN64x2: {
4868 HReg res = newVRegV(env);
4869 HReg tmp = newVRegV(env);
4870 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4871 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4872 HReg argR2 = newVRegI(env);
4873 UInt size;
4874 switch (e->Iex.Binop.op) {
4875 case Iop_ShrN8x16: size = 0; break;
4876 case Iop_ShrN16x8: size = 1; break;
4877 case Iop_ShrN32x4: size = 2; break;
4878 case Iop_ShrN64x2: size = 3; break;
4879 default: vassert(0);
4880 }
4881 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4882 addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
4883 tmp, argR2, 0, True));
4884 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4885 res, argL, tmp, size, True));
4886 return res;
4887 }
4888 case Iop_ShlN8x16:
4889 case Iop_ShlN16x8:
4890 case Iop_ShlN32x4:
4891 case Iop_ShlN64x2: {
4892 HReg res = newVRegV(env);
4893 HReg tmp = newVRegV(env);
4894 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4895 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4896 UInt size;
4897 switch (e->Iex.Binop.op) {
4898 case Iop_ShlN8x16: size = 0; break;
4899 case Iop_ShlN16x8: size = 1; break;
4900 case Iop_ShlN32x4: size = 2; break;
4901 case Iop_ShlN64x2: size = 3; break;
4902 default: vassert(0);
4903 }
4904 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
4905 addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4906 res, argL, tmp, size, True));
4907 return res;
4908 }
4909 case Iop_SarN8x16:
4910 case Iop_SarN16x8:
4911 case Iop_SarN32x4:
4912 case Iop_SarN64x2: {
4913 HReg res = newVRegV(env);
4914 HReg tmp = newVRegV(env);
4915 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4916 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4917 HReg argR2 = newVRegI(env);
4918 UInt size;
4919 switch (e->Iex.Binop.op) {
4920 case Iop_SarN8x16: size = 0; break;
4921 case Iop_SarN16x8: size = 1; break;
4922 case Iop_SarN32x4: size = 2; break;
4923 case Iop_SarN64x2: size = 3; break;
4924 default: vassert(0);
4925 }
4926 addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4927 addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
4928 addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4929 res, argL, tmp, size, True));
4930 return res;
4931 }
4932 case Iop_CmpGT8Ux16:
4933 case Iop_CmpGT16Ux8:
4934 case Iop_CmpGT32Ux4: {
4935 HReg res = newVRegV(env);
4936 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4937 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4938 UInt size;
4939 switch (e->Iex.Binop.op) {
4940 case Iop_CmpGT8Ux16: size = 0; break;
4941 case Iop_CmpGT16Ux8: size = 1; break;
4942 case Iop_CmpGT32Ux4: size = 2; break;
4943 default: vassert(0);
4944 }
4945 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
4946 res, argL, argR, size, True));
4947 return res;
4948 }
4949 case Iop_CmpGT8Sx16:
4950 case Iop_CmpGT16Sx8:
4951 case Iop_CmpGT32Sx4: {
4952 HReg res = newVRegV(env);
4953 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4954 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4955 UInt size;
4956 switch (e->Iex.Binop.op) {
4957 case Iop_CmpGT8Sx16: size = 0; break;
4958 case Iop_CmpGT16Sx8: size = 1; break;
4959 case Iop_CmpGT32Sx4: size = 2; break;
4960 default: vassert(0);
4961 }
4962 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
4963 res, argL, argR, size, True));
4964 return res;
4965 }
4966 case Iop_CmpEQ8x16:
4967 case Iop_CmpEQ16x8:
4968 case Iop_CmpEQ32x4: {
4969 HReg res = newVRegV(env);
4970 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4971 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4972 UInt size;
4973 switch (e->Iex.Binop.op) {
4974 case Iop_CmpEQ8x16: size = 0; break;
4975 case Iop_CmpEQ16x8: size = 1; break;
4976 case Iop_CmpEQ32x4: size = 2; break;
4977 default: vassert(0);
4978 }
4979 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
4980 res, argL, argR, size, True));
4981 return res;
4982 }
4983 case Iop_Mul8x16:
4984 case Iop_Mul16x8:
4985 case Iop_Mul32x4: {
4986 HReg res = newVRegV(env);
4987 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4988 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4989 UInt size = 0;
4990 switch(e->Iex.Binop.op) {
4991 case Iop_Mul8x16: size = 0; break;
4992 case Iop_Mul16x8: size = 1; break;
4993 case Iop_Mul32x4: size = 2; break;
4994 default: vassert(0);
4995 }
4996 addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
4997 res, argL, argR, size, True));
4998 return res;
4999 }
5000 case Iop_Mul32Fx4: {
5001 HReg res = newVRegV(env);
5002 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5003 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5004 UInt size = 0;
5005 addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
5006 res, argL, argR, size, True));
5007 return res;
5008 }
5009 case Iop_Mull8Ux8:
5010 case Iop_Mull16Ux4:
5011 case Iop_Mull32Ux2: {
5012 HReg res = newVRegV(env);
5013 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5014 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5015 UInt size = 0;
5016 switch(e->Iex.Binop.op) {
5017 case Iop_Mull8Ux8: size = 0; break;
5018 case Iop_Mull16Ux4: size = 1; break;
5019 case Iop_Mull32Ux2: size = 2; break;
5020 default: vassert(0);
5021 }
5022 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
5023 res, argL, argR, size, True));
5024 return res;
5025 }
5026
5027 case Iop_Mull8Sx8:
5028 case Iop_Mull16Sx4:
5029 case Iop_Mull32Sx2: {
5030 HReg res = newVRegV(env);
5031 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5032 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5033 UInt size = 0;
5034 switch(e->Iex.Binop.op) {
5035 case Iop_Mull8Sx8: size = 0; break;
5036 case Iop_Mull16Sx4: size = 1; break;
5037 case Iop_Mull32Sx2: size = 2; break;
5038 default: vassert(0);
5039 }
5040 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
5041 res, argL, argR, size, True));
5042 return res;
5043 }
5044
5045 case Iop_QDMulHi16Sx8:
5046 case Iop_QDMulHi32Sx4: {
5047 HReg res = newVRegV(env);
5048 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5049 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5050 UInt size = 0;
5051 switch(e->Iex.Binop.op) {
5052 case Iop_QDMulHi16Sx8: size = 1; break;
5053 case Iop_QDMulHi32Sx4: size = 2; break;
5054 default: vassert(0);
5055 }
5056 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5057 res, argL, argR, size, True));
5058 return res;
5059 }
5060
5061 case Iop_QRDMulHi16Sx8:
5062 case Iop_QRDMulHi32Sx4: {
5063 HReg res = newVRegV(env);
5064 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5065 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5066 UInt size = 0;
5067 switch(e->Iex.Binop.op) {
5068 case Iop_QRDMulHi16Sx8: size = 1; break;
5069 case Iop_QRDMulHi32Sx4: size = 2; break;
5070 default: vassert(0);
5071 }
5072 addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5073 res, argL, argR, size, True));
5074 return res;
5075 }
5076
5077 case Iop_QDMulLong16Sx4:
5078 case Iop_QDMulLong32Sx2: {
5079 HReg res = newVRegV(env);
5080 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5081 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5082 UInt size = 0;
5083 switch(e->Iex.Binop.op) {
5084 case Iop_QDMulLong16Sx4: size = 1; break;
5085 case Iop_QDMulLong32Sx2: size = 2; break;
5086 default: vassert(0);
5087 }
5088 addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5089 res, argL, argR, size, True));
5090 return res;
5091 }
5092 case Iop_PolynomialMul8x16: {
5093 HReg res = newVRegV(env);
5094 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5095 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5096 UInt size = 0;
5097 addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5098 res, argL, argR, size, True));
5099 return res;
5100 }
5101 case Iop_Max32Fx4: {
5102 HReg res = newVRegV(env);
5103 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5104 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5105 addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5106 res, argL, argR, 2, True));
5107 return res;
5108 }
5109 case Iop_Min32Fx4: {
5110 HReg res = newVRegV(env);
5111 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5112 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5113 addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5114 res, argL, argR, 2, True));
5115 return res;
5116 }
5117 case Iop_PwMax32Fx4: {
5118 HReg res = newVRegV(env);
5119 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5120 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5121 addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5122 res, argL, argR, 2, True));
5123 return res;
5124 }
5125 case Iop_PwMin32Fx4: {
5126 HReg res = newVRegV(env);
5127 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5128 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5129 addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5130 res, argL, argR, 2, True));
5131 return res;
5132 }
5133 case Iop_CmpGT32Fx4: {
5134 HReg res = newVRegV(env);
5135 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5136 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5137 addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5138 res, argL, argR, 2, True));
5139 return res;
5140 }
5141 case Iop_CmpGE32Fx4: {
5142 HReg res = newVRegV(env);
5143 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5144 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5145 addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5146 res, argL, argR, 2, True));
5147 return res;
5148 }
5149 case Iop_CmpEQ32Fx4: {
5150 HReg res = newVRegV(env);
5151 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5152 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5153 addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5154 res, argL, argR, 2, True));
5155 return res;
5156 }
5157
5158 case Iop_PolynomialMull8x8: {
5159 HReg res = newVRegV(env);
5160 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5161 HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5162 UInt size = 0;
5163 addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5164 res, argL, argR, size, True));
5165 return res;
5166 }
5167 case Iop_F32ToFixed32Ux4_RZ:
5168 case Iop_F32ToFixed32Sx4_RZ:
5169 case Iop_Fixed32UToF32x4_RN:
5170 case Iop_Fixed32SToF32x4_RN: {
5171 HReg res = newVRegV(env);
5172 HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5173 ARMNeonUnOp op;
5174 UInt imm6;
5175 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5176 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5177 vpanic("ARM supports FP <-> Fixed conversion with constant "
5178 "second argument less than 33 only\n");
5179 }
5180 imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5181 vassert(imm6 <= 32 && imm6 > 0);
5182 imm6 = 64 - imm6;
5183 switch(e->Iex.Binop.op) {
5184 case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5185 case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5186 case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5187 case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5188 default: vassert(0);
5189 }
5190 addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5191 return res;
5192 }
5193 /*
5194 FIXME remove if not used
5195 case Iop_VDup8x16:
5196 case Iop_VDup16x8:
5197 case Iop_VDup32x4: {
5198 HReg res = newVRegV(env);
5199 HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5200 UInt imm4;
5201 UInt index;
5202 if (e->Iex.Binop.arg2->tag != Iex_Const ||
5203 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5204 vpanic("ARM supports Iop_VDup with constant "
5205 "second argument less than 16 only\n");
5206 }
5207 index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5208 switch(e->Iex.Binop.op) {
5209 case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5210 case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5211 case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5212 default: vassert(0);
5213 }
5214 if (imm4 >= 16) {
5215 vpanic("ARM supports Iop_VDup with constant "
5216 "second argument less than 16 only\n");
5217 }
5218 addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5219 res, argL, imm4, True));
5220 return res;
5221 }
5222 */
5223 case Iop_PwAdd8x16:
5224 case Iop_PwAdd16x8:
5225 case Iop_PwAdd32x4: {
5226 HReg res = newVRegV(env);
5227 HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5228 HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5229 UInt size = 0;
5230 switch(e->Iex.Binop.op) {
5231 case Iop_PwAdd8x16: size = 0; break;
5232 case Iop_PwAdd16x8: size = 1; break;
5233 case Iop_PwAdd32x4: size = 2; break;
5234 default: vassert(0);
5235 }
5236 addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5237 res, argL, argR, size, True));
5238 return res;
5239 }
5240 /* ... */
5241 default:
5242 break;
5243 }
5244 }
5245
5246 if (e->tag == Iex_Triop) {
5247 switch (e->Iex.Triop.op) {
5248 case Iop_ExtractV128: {
5249 HReg res = newVRegV(env);
5250 HReg argL = iselNeonExpr(env, e->Iex.Triop.arg1);
5251 HReg argR = iselNeonExpr(env, e->Iex.Triop.arg2);
5252 UInt imm4;
5253 if (e->Iex.Triop.arg3->tag != Iex_Const ||
5254 typeOfIRExpr(env->type_env, e->Iex.Triop.arg3) != Ity_I8) {
5255 vpanic("ARM target supports Iop_ExtractV128 with constant "
5256 "third argument less than 16 only\n");
5257 }
5258 imm4 = e->Iex.Triop.arg3->Iex.Const.con->Ico.U8;
5259 if (imm4 >= 16) {
5260 vpanic("ARM target supports Iop_ExtractV128 with constant "
5261 "third argument less than 16 only\n");
5262 }
5263 addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5264 res, argL, argR, imm4, True));
5265 return res;
5266 }
5267 default:
5268 break;
5269 }
5270 }
5271
5272 if (e->tag == Iex_Mux0X) {
5273 HReg r8;
5274 HReg rX = iselNeonExpr(env, e->Iex.Mux0X.exprX);
5275 HReg r0 = iselNeonExpr(env, e->Iex.Mux0X.expr0);
5276 HReg dst = newVRegV(env);
5277 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, rX, 4, True));
5278 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5279 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5280 ARMRI84_I84(0xFF,0)));
5281 addInstr(env, ARMInstr_NCMovQ(ARMcc_EQ, dst, r0));
5282 return dst;
5283 }
5284
5285 neon_expr_bad:
5286 ppIRExpr(e);
5287 vpanic("iselNeonExpr_wrk");
5288}
5289
5290/*---------------------------------------------------------*/
sewardj6c299f32009-12-31 18:00:12 +00005291/*--- ISEL: Floating point expressions (64 bit) ---*/
5292/*---------------------------------------------------------*/
5293
5294/* Compute a 64-bit floating point value into a register, the identity
5295 of which is returned. As with iselIntExpr_R, the reg may be either
5296 real or virtual; in any case it must not be changed by subsequent
5297 code emitted by the caller. */
5298
5299static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5300{
5301 HReg r = iselDblExpr_wrk( env, e );
5302# if 0
5303 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5304# endif
5305 vassert(hregClass(r) == HRcFlt64);
5306 vassert(hregIsVirtual(r));
5307 return r;
5308}
5309
5310/* DO NOT CALL THIS DIRECTLY */
5311static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5312{
5313 IRType ty = typeOfIRExpr(env->type_env,e);
5314 vassert(e);
5315 vassert(ty == Ity_F64);
5316
5317 if (e->tag == Iex_RdTmp) {
5318 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5319 }
5320
5321 if (e->tag == Iex_Const) {
5322 /* Just handle the zero case. */
5323 IRConst* con = e->Iex.Const.con;
5324 if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
5325 HReg z32 = newVRegI(env);
5326 HReg dst = newVRegD(env);
5327 addInstr(env, ARMInstr_Imm32(z32, 0));
5328 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
5329 return dst;
5330 }
5331 }
5332
5333 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5334 ARMAModeV* am;
5335 HReg res = newVRegD(env);
5336 vassert(e->Iex.Load.ty == Ity_F64);
5337 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5338 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5339 return res;
5340 }
5341
5342 if (e->tag == Iex_Get) {
5343 // XXX This won't work if offset > 1020 or is not 0 % 4.
5344 // In which case we'll have to generate more longwinded code.
5345 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5346 HReg res = newVRegD(env);
5347 addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5348 return res;
5349 }
5350
5351 if (e->tag == Iex_Unop) {
5352 switch (e->Iex.Unop.op) {
5353 case Iop_ReinterpI64asF64: {
sewardjc6f970f2012-04-02 21:54:49 +00005354 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00005355 return iselNeon64Expr(env, e->Iex.Unop.arg);
5356 } else {
5357 HReg srcHi, srcLo;
5358 HReg dst = newVRegD(env);
5359 iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
5360 addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
5361 return dst;
5362 }
sewardj6c299f32009-12-31 18:00:12 +00005363 }
5364 case Iop_NegF64: {
5365 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5366 HReg dst = newVRegD(env);
5367 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
5368 return dst;
5369 }
5370 case Iop_AbsF64: {
5371 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5372 HReg dst = newVRegD(env);
5373 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
5374 return dst;
5375 }
5376 case Iop_F32toF64: {
5377 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5378 HReg dst = newVRegD(env);
5379 addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
5380 return dst;
5381 }
5382 case Iop_I32UtoF64:
5383 case Iop_I32StoF64: {
5384 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5385 HReg f32 = newVRegF(env);
5386 HReg dst = newVRegD(env);
5387 Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
5388 /* VMOV f32, src */
5389 addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
5390 /* FSITOD dst, f32 */
5391 addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
5392 dst, f32));
5393 return dst;
5394 }
5395 default:
5396 break;
5397 }
5398 }
5399
5400 if (e->tag == Iex_Binop) {
5401 switch (e->Iex.Binop.op) {
5402 case Iop_SqrtF64: {
5403 /* first arg is rounding mode; we ignore it. */
5404 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5405 HReg dst = newVRegD(env);
5406 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
5407 return dst;
5408 }
5409 default:
5410 break;
5411 }
5412 }
5413
5414 if (e->tag == Iex_Triop) {
5415 switch (e->Iex.Triop.op) {
5416 case Iop_DivF64:
5417 case Iop_MulF64:
5418 case Iop_AddF64:
5419 case Iop_SubF64: {
5420 ARMVfpOp op = 0; /*INVALID*/
5421 HReg argL = iselDblExpr(env, e->Iex.Triop.arg2);
5422 HReg argR = iselDblExpr(env, e->Iex.Triop.arg3);
5423 HReg dst = newVRegD(env);
5424 switch (e->Iex.Triop.op) {
5425 case Iop_DivF64: op = ARMvfp_DIV; break;
5426 case Iop_MulF64: op = ARMvfp_MUL; break;
5427 case Iop_AddF64: op = ARMvfp_ADD; break;
5428 case Iop_SubF64: op = ARMvfp_SUB; break;
5429 default: vassert(0);
5430 }
5431 addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
5432 return dst;
5433 }
5434 default:
5435 break;
5436 }
5437 }
5438
5439 if (e->tag == Iex_Mux0X) {
5440 if (ty == Ity_F64
5441 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
5442 HReg r8;
5443 HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
5444 HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
5445 HReg dst = newVRegD(env);
5446 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, rX));
5447 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5448 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5449 ARMRI84_I84(0xFF,0)));
5450 addInstr(env, ARMInstr_VCMovD(ARMcc_EQ, dst, r0));
5451 return dst;
5452 }
5453 }
5454
5455 ppIRExpr(e);
5456 vpanic("iselDblExpr_wrk");
5457}
5458
5459
5460/*---------------------------------------------------------*/
5461/*--- ISEL: Floating point expressions (32 bit) ---*/
5462/*---------------------------------------------------------*/
5463
5464/* Compute a 64-bit floating point value into a register, the identity
5465 of which is returned. As with iselIntExpr_R, the reg may be either
5466 real or virtual; in any case it must not be changed by subsequent
5467 code emitted by the caller. */
5468
5469static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
5470{
5471 HReg r = iselFltExpr_wrk( env, e );
5472# if 0
5473 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5474# endif
5475 vassert(hregClass(r) == HRcFlt32);
5476 vassert(hregIsVirtual(r));
5477 return r;
5478}
5479
5480/* DO NOT CALL THIS DIRECTLY */
5481static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
5482{
5483 IRType ty = typeOfIRExpr(env->type_env,e);
5484 vassert(e);
5485 vassert(ty == Ity_F32);
5486
5487 if (e->tag == Iex_RdTmp) {
5488 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5489 }
5490
5491 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5492 ARMAModeV* am;
5493 HReg res = newVRegF(env);
5494 vassert(e->Iex.Load.ty == Ity_F32);
5495 am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5496 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5497 return res;
5498 }
5499
5500 if (e->tag == Iex_Get) {
5501 // XXX This won't work if offset > 1020 or is not 0 % 4.
5502 // In which case we'll have to generate more longwinded code.
5503 ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5504 HReg res = newVRegF(env);
5505 addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5506 return res;
5507 }
5508
5509 if (e->tag == Iex_Unop) {
5510 switch (e->Iex.Unop.op) {
5511 case Iop_ReinterpI32asF32: {
5512 HReg dst = newVRegF(env);
5513 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5514 addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
5515 return dst;
5516 }
5517 case Iop_NegF32: {
5518 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5519 HReg dst = newVRegF(env);
5520 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
5521 return dst;
5522 }
5523 case Iop_AbsF32: {
5524 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5525 HReg dst = newVRegF(env);
5526 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
5527 return dst;
5528 }
5529 default:
5530 break;
5531 }
5532 }
5533
5534 if (e->tag == Iex_Binop) {
5535 switch (e->Iex.Binop.op) {
5536 case Iop_SqrtF32: {
5537 /* first arg is rounding mode; we ignore it. */
5538 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5539 HReg dst = newVRegF(env);
5540 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
5541 return dst;
5542 }
5543 case Iop_F64toF32: {
5544 HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
5545 set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5546 HReg valS = newVRegF(env);
5547 /* FCVTSD valS, valD */
5548 addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
5549 set_VFP_rounding_default(env);
5550 return valS;
5551 }
5552 default:
5553 break;
5554 }
5555 }
5556
5557 if (e->tag == Iex_Triop) {
5558 switch (e->Iex.Triop.op) {
5559 case Iop_DivF32:
5560 case Iop_MulF32:
5561 case Iop_AddF32:
5562 case Iop_SubF32: {
5563 ARMVfpOp op = 0; /*INVALID*/
5564 HReg argL = iselFltExpr(env, e->Iex.Triop.arg2);
5565 HReg argR = iselFltExpr(env, e->Iex.Triop.arg3);
5566 HReg dst = newVRegF(env);
5567 switch (e->Iex.Triop.op) {
5568 case Iop_DivF32: op = ARMvfp_DIV; break;
5569 case Iop_MulF32: op = ARMvfp_MUL; break;
5570 case Iop_AddF32: op = ARMvfp_ADD; break;
5571 case Iop_SubF32: op = ARMvfp_SUB; break;
5572 default: vassert(0);
5573 }
5574 addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
5575 return dst;
5576 }
5577 default:
5578 break;
5579 }
5580 }
5581
5582 if (e->tag == Iex_Mux0X) {
5583 if (ty == Ity_F32
5584 && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
5585 HReg r8;
5586 HReg rX = iselFltExpr(env, e->Iex.Mux0X.exprX);
5587 HReg r0 = iselFltExpr(env, e->Iex.Mux0X.expr0);
5588 HReg dst = newVRegF(env);
5589 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, rX));
5590 r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5591 addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5592 ARMRI84_I84(0xFF,0)));
5593 addInstr(env, ARMInstr_VCMovS(ARMcc_EQ, dst, r0));
5594 return dst;
5595 }
5596 }
5597
5598 ppIRExpr(e);
5599 vpanic("iselFltExpr_wrk");
5600}
5601
cerioncee30312004-12-17 20:30:21 +00005602
5603/*---------------------------------------------------------*/
5604/*--- ISEL: Statements ---*/
5605/*---------------------------------------------------------*/
5606
5607static void iselStmt ( ISelEnv* env, IRStmt* stmt )
5608{
5609 if (vex_traceflags & VEX_TRACE_VCODE) {
5610 vex_printf("\n-- ");
5611 ppIRStmt(stmt);
5612 vex_printf("\n");
5613 }
5614 switch (stmt->tag) {
5615
5616 /* --------- STORE --------- */
5617 /* little-endian write to memory */
sewardjaf1ceca2005-06-30 23:31:27 +00005618 case Ist_Store: {
sewardj6c299f32009-12-31 18:00:12 +00005619 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
5620 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
5621 IREndness end = stmt->Ist.Store.end;
sewardjaf1ceca2005-06-30 23:31:27 +00005622
sewardj6c299f32009-12-31 18:00:12 +00005623 if (tya != Ity_I32 || end != Iend_LE)
5624 goto stmt_fail;
sewardjaf1ceca2005-06-30 23:31:27 +00005625
sewardj6c299f32009-12-31 18:00:12 +00005626 if (tyd == Ity_I32) {
5627 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5628 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5629 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
5630 return;
5631 }
5632 if (tyd == Ity_I16) {
5633 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5634 ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
5635 addInstr(env, ARMInstr_LdSt16(False/*!isLoad*/,
5636 False/*!isSignedLoad*/, rD, am));
5637 return;
5638 }
5639 if (tyd == Ity_I8) {
5640 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5641 ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5642 addInstr(env, ARMInstr_LdSt8U(False/*!isLoad*/, rD, am));
5643 return;
5644 }
5645 if (tyd == Ity_I64) {
sewardjc6f970f2012-04-02 21:54:49 +00005646 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00005647 HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
5648 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5649 addInstr(env, ARMInstr_NLdStD(False, dD, am));
5650 } else {
5651 HReg rDhi, rDlo, rA;
5652 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
5653 rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
5654 addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDhi,
5655 ARMAMode1_RI(rA,4)));
5656 addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDlo,
5657 ARMAMode1_RI(rA,0)));
5658 }
sewardj6c299f32009-12-31 18:00:12 +00005659 return;
5660 }
5661 if (tyd == Ity_F64) {
5662 HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
5663 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5664 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
5665 return;
5666 }
5667 if (tyd == Ity_F32) {
5668 HReg fD = iselFltExpr(env, stmt->Ist.Store.data);
5669 ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5670 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
5671 return;
5672 }
sewardj6c60b322010-08-22 12:48:28 +00005673 if (tyd == Ity_V128) {
5674 HReg qD = iselNeonExpr(env, stmt->Ist.Store.data);
5675 ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5676 addInstr(env, ARMInstr_NLdStQ(False, qD, am));
5677 return;
5678 }
cerioncee30312004-12-17 20:30:21 +00005679
sewardj6c299f32009-12-31 18:00:12 +00005680 break;
cerioncee30312004-12-17 20:30:21 +00005681 }
5682
5683 /* --------- PUT --------- */
5684 /* write guest state, fixed offset */
5685 case Ist_Put: {
5686 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
cerioncee30312004-12-17 20:30:21 +00005687
cerioncee30312004-12-17 20:30:21 +00005688 if (tyd == Ity_I32) {
sewardj6c299f32009-12-31 18:00:12 +00005689 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
5690 ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
5691 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
5692 return;
cerioncee30312004-12-17 20:30:21 +00005693 }
sewardj6c299f32009-12-31 18:00:12 +00005694 if (tyd == Ity_I64) {
sewardjc6f970f2012-04-02 21:54:49 +00005695 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00005696 HReg addr = newVRegI(env);
5697 HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
5698 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5699 stmt->Ist.Put.offset));
5700 addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
5701 } else {
5702 HReg rDhi, rDlo;
5703 ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
5704 stmt->Ist.Put.offset + 0);
5705 ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
5706 stmt->Ist.Put.offset + 4);
5707 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
5708 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDhi, am4));
5709 addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDlo, am0));
5710 }
sewardj6c299f32009-12-31 18:00:12 +00005711 return;
cerioncee30312004-12-17 20:30:21 +00005712 }
sewardj6c299f32009-12-31 18:00:12 +00005713 if (tyd == Ity_F64) {
5714 // XXX This won't work if offset > 1020 or is not 0 % 4.
5715 // In which case we'll have to generate more longwinded code.
5716 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5717 HReg rD = iselDblExpr(env, stmt->Ist.Put.data);
5718 addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
5719 return;
cerioncee30312004-12-17 20:30:21 +00005720 }
sewardj6c299f32009-12-31 18:00:12 +00005721 if (tyd == Ity_F32) {
5722 // XXX This won't work if offset > 1020 or is not 0 % 4.
5723 // In which case we'll have to generate more longwinded code.
5724 ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5725 HReg rD = iselFltExpr(env, stmt->Ist.Put.data);
5726 addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
5727 return;
5728 }
sewardj6c60b322010-08-22 12:48:28 +00005729 if (tyd == Ity_V128) {
5730 HReg addr = newVRegI(env);
5731 HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
5732 addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5733 stmt->Ist.Put.offset));
5734 addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
5735 return;
5736 }
cerioncee30312004-12-17 20:30:21 +00005737 break;
5738 }
5739
sewardj6c299f32009-12-31 18:00:12 +00005740//zz /* --------- Indexed PUT --------- */
5741//zz /* write guest state, run-time offset */
5742//zz case Ist_PutI: {
5743//zz ARMAMode2* am2
5744//zz = genGuestArrayOffset(
5745//zz env, stmt->Ist.PutI.descr,
5746//zz stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
5747//zz
5748//zz IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
5749//zz
5750//zz if (tyd == Ity_I8) {
5751//zz HReg reg = iselIntExpr_R(env, stmt->Ist.PutI.data);
5752//zz addInstr(env, ARMInstr_StoreB(reg, am2));
5753//zz return;
5754//zz }
5755//zz// CAB: Ity_I32, Ity_I16 ?
5756//zz break;
5757//zz }
cerioncee30312004-12-17 20:30:21 +00005758
5759 /* --------- TMP --------- */
5760 /* assign value to temporary */
sewardjdd40fdf2006-12-24 02:20:24 +00005761 case Ist_WrTmp: {
5762 IRTemp tmp = stmt->Ist.WrTmp.tmp;
cerioncee30312004-12-17 20:30:21 +00005763 IRType ty = typeOfIRTemp(env->type_env, tmp);
5764
5765 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
sewardj6c299f32009-12-31 18:00:12 +00005766 ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
5767 env, stmt->Ist.WrTmp.data);
5768 HReg dst = lookupIRTemp(env, tmp);
5769 addInstr(env, ARMInstr_Mov(dst,ri84));
cerioncee30312004-12-17 20:30:21 +00005770 return;
5771 }
sewardj6c299f32009-12-31 18:00:12 +00005772 if (ty == Ity_I1) {
5773 HReg dst = lookupIRTemp(env, tmp);
5774 ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
5775 addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
5776 addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
5777 return;
5778 }
5779 if (ty == Ity_I64) {
sewardjc6f970f2012-04-02 21:54:49 +00005780 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00005781 HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
5782 HReg dst = lookupIRTemp(env, tmp);
5783 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
5784 } else {
5785 HReg rHi, rLo, dstHi, dstLo;
5786 iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
5787 lookupIRTemp64( &dstHi, &dstLo, env, tmp);
5788 addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
5789 addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
5790 }
sewardj6c299f32009-12-31 18:00:12 +00005791 return;
5792 }
5793 if (ty == Ity_F64) {
5794 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
5795 HReg dst = lookupIRTemp(env, tmp);
5796 addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
5797 return;
5798 }
5799 if (ty == Ity_F32) {
5800 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
5801 HReg dst = lookupIRTemp(env, tmp);
5802 addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
5803 return;
5804 }
sewardj6c60b322010-08-22 12:48:28 +00005805 if (ty == Ity_V128) {
5806 HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
5807 HReg dst = lookupIRTemp(env, tmp);
5808 addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
5809 return;
5810 }
cerioncee30312004-12-17 20:30:21 +00005811 break;
5812 }
5813
5814 /* --------- Call to DIRTY helper --------- */
5815 /* call complex ("dirty") helper function */
5816 case Ist_Dirty: {
sewardj6c299f32009-12-31 18:00:12 +00005817 IRType retty;
5818 IRDirty* d = stmt->Ist.Dirty.details;
5819 Bool passBBP = False;
cerioncee30312004-12-17 20:30:21 +00005820
5821 if (d->nFxState == 0)
5822 vassert(!d->needsBBP);
sewardj428fabd2005-03-21 03:11:17 +00005823
5824 passBBP = toBool(d->nFxState > 0 && d->needsBBP);
cerioncee30312004-12-17 20:30:21 +00005825
5826 /* Marshal args, do the call, clear stack. */
sewardj6c299f32009-12-31 18:00:12 +00005827 Bool ok = doHelperCall( env, passBBP, d->guard, d->cee, d->args );
5828 if (!ok)
5829 break; /* will go to stmt_fail: */
cerioncee30312004-12-17 20:30:21 +00005830
5831 /* Now figure out what to do with the returned value, if any. */
5832 if (d->tmp == IRTemp_INVALID)
sewardj6c299f32009-12-31 18:00:12 +00005833 /* No return value. Nothing to do. */
5834 return;
cerioncee30312004-12-17 20:30:21 +00005835
sewardj6c299f32009-12-31 18:00:12 +00005836 retty = typeOfIRTemp(env->type_env, d->tmp);
cerioncee30312004-12-17 20:30:21 +00005837
sewardj6c299f32009-12-31 18:00:12 +00005838 if (retty == Ity_I64) {
sewardjc6f970f2012-04-02 21:54:49 +00005839 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00005840 HReg tmp = lookupIRTemp(env, d->tmp);
5841 addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
5842 hregARM_R0()));
5843 } else {
5844 HReg dstHi, dstLo;
5845 /* The returned value is in r1:r0. Park it in the
5846 register-pair associated with tmp. */
5847 lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
5848 addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
5849 addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
5850 }
cerioncee30312004-12-17 20:30:21 +00005851 return;
5852 }
sewardj6c299f32009-12-31 18:00:12 +00005853 if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) {
5854 /* The returned value is in r0. Park it in the register
5855 associated with tmp. */
5856 HReg dst = lookupIRTemp(env, d->tmp);
5857 addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
5858 return;
5859 }
5860
cerioncee30312004-12-17 20:30:21 +00005861 break;
5862 }
5863
sewardj6c299f32009-12-31 18:00:12 +00005864 /* --------- Load Linked and Store Conditional --------- */
5865 case Ist_LLSC: {
5866 if (stmt->Ist.LLSC.storedata == NULL) {
5867 /* LL */
5868 IRTemp res = stmt->Ist.LLSC.result;
5869 IRType ty = typeOfIRTemp(env->type_env, res);
sewardjff7f5b72011-07-11 11:43:38 +00005870 if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
sewardj6c299f32009-12-31 18:00:12 +00005871 Int szB = 0;
5872 HReg r_dst = lookupIRTemp(env, res);
5873 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5874 switch (ty) {
5875 case Ity_I8: szB = 1; break;
sewardjff7f5b72011-07-11 11:43:38 +00005876 case Ity_I16: szB = 2; break;
sewardj6c299f32009-12-31 18:00:12 +00005877 case Ity_I32: szB = 4; break;
5878 default: vassert(0);
5879 }
sewardjff7f5b72011-07-11 11:43:38 +00005880 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
sewardj6c299f32009-12-31 18:00:12 +00005881 addInstr(env, ARMInstr_LdrEX(szB));
sewardjff7f5b72011-07-11 11:43:38 +00005882 addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
sewardj6c299f32009-12-31 18:00:12 +00005883 return;
5884 }
sewardjff7f5b72011-07-11 11:43:38 +00005885 if (ty == Ity_I64) {
5886 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5887 addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
5888 addInstr(env, ARMInstr_LdrEX(8));
5889 /* Result is in r3:r2. On a non-NEON capable CPU, we must
5890 move it into a result register pair. On a NEON capable
5891 CPU, the result register will be a 64 bit NEON
5892 register, so we must move it there instead. */
sewardjc6f970f2012-04-02 21:54:49 +00005893 if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
sewardjff7f5b72011-07-11 11:43:38 +00005894 HReg dst = lookupIRTemp(env, res);
5895 addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
5896 hregARM_R2()));
5897 } else {
5898 HReg r_dst_hi, r_dst_lo;
5899 lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
5900 addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
5901 addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
5902 }
5903 return;
5904 }
5905 /*NOTREACHED*/
5906 vassert(0);
sewardj6c299f32009-12-31 18:00:12 +00005907 } else {
5908 /* SC */
sewardj6c299f32009-12-31 18:00:12 +00005909 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
sewardjff7f5b72011-07-11 11:43:38 +00005910 if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
5911 Int szB = 0;
5912 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
5913 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
sewardj6c299f32009-12-31 18:00:12 +00005914 switch (tyd) {
5915 case Ity_I8: szB = 1; break;
sewardjff7f5b72011-07-11 11:43:38 +00005916 case Ity_I16: szB = 2; break;
sewardj6c299f32009-12-31 18:00:12 +00005917 case Ity_I32: szB = 4; break;
5918 default: vassert(0);
5919 }
sewardjff7f5b72011-07-11 11:43:38 +00005920 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
5921 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
sewardj6c299f32009-12-31 18:00:12 +00005922 addInstr(env, ARMInstr_StrEX(szB));
sewardjff7f5b72011-07-11 11:43:38 +00005923 } else {
5924 vassert(tyd == Ity_I64);
5925 /* This is really ugly. There is no is/is-not NEON
5926 decision akin to the case for LL, because iselInt64Expr
5927 fudges this for us, and always gets the result into two
5928 GPRs even if this means moving it from a NEON
5929 register. */
5930 HReg rDhi, rDlo;
5931 iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
5932 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5933 addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
5934 addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
5935 addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
5936 addInstr(env, ARMInstr_StrEX(8));
sewardj6c299f32009-12-31 18:00:12 +00005937 }
sewardjff7f5b72011-07-11 11:43:38 +00005938 /* now r0 is 1 if failed, 0 if success. Change to IR
5939 conventions (0 is fail, 1 is success). Also transfer
5940 result to r_res. */
5941 IRTemp res = stmt->Ist.LLSC.result;
5942 IRType ty = typeOfIRTemp(env->type_env, res);
5943 HReg r_res = lookupIRTemp(env, res);
5944 ARMRI84* one = ARMRI84_I84(1,0);
5945 vassert(ty == Ity_I1);
5946 addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
5947 /* And be conservative -- mask off all but the lowest bit */
5948 addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
5949 return;
sewardj6c299f32009-12-31 18:00:12 +00005950 }
5951 break;
5952 }
5953
sewardj412098c2010-05-04 08:48:43 +00005954 /* --------- MEM FENCE --------- */
5955 case Ist_MBE:
5956 switch (stmt->Ist.MBE.event) {
5957 case Imbe_Fence:
sewardj6d615ba2011-09-26 16:19:43 +00005958 addInstr(env, ARMInstr_MFence());
5959 return;
5960 case Imbe_CancelReservation:
5961 addInstr(env, ARMInstr_CLREX());
sewardj412098c2010-05-04 08:48:43 +00005962 return;
5963 default:
5964 break;
5965 }
5966 break;
5967
sewardj6c299f32009-12-31 18:00:12 +00005968 /* --------- INSTR MARK --------- */
5969 /* Doesn't generate any executable code ... */
5970 case Ist_IMark:
5971 return;
5972
5973 /* --------- NO-OP --------- */
5974 case Ist_NoOp:
5975 return;
5976
cerioncee30312004-12-17 20:30:21 +00005977 /* --------- EXIT --------- */
cerioncee30312004-12-17 20:30:21 +00005978 case Ist_Exit: {
cerioncee30312004-12-17 20:30:21 +00005979 if (stmt->Ist.Exit.dst->tag != Ico_U32)
5980 vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
sewardjc6f970f2012-04-02 21:54:49 +00005981
5982 ARMCondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
5983 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(),
5984 stmt->Ist.Exit.offsIP);
5985
5986 /* Case: boring transfer to known address */
5987 if (stmt->Ist.Exit.jk == Ijk_Boring
5988 || stmt->Ist.Exit.jk == Ijk_Call
5989 || stmt->Ist.Exit.jk == Ijk_Ret) {
5990 if (env->chainingAllowed) {
5991 /* .. almost always true .. */
5992 /* Skip the event check at the dst if this is a forwards
5993 edge. */
5994 Bool toFastEP
5995 = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
5996 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
5997 addInstr(env, ARMInstr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
5998 amR15T, cc, toFastEP));
5999 } else {
6000 /* .. very occasionally .. */
6001 /* We can't use chaining, so ask for an assisted transfer,
6002 as that's the only alternative that is allowable. */
6003 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6004 addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, Ijk_Boring));
6005 }
6006 return;
6007 }
6008
6009 /* Case: assisted transfer to arbitrary address */
6010 switch (stmt->Ist.Exit.jk) {
sewardj2f6902b2012-04-23 09:48:14 +00006011 /* Keep this list in sync with that in iselNext below */
6012 case Ijk_ClientReq:
sewardjc6f970f2012-04-02 21:54:49 +00006013 case Ijk_NoDecode:
sewardj2f6902b2012-04-23 09:48:14 +00006014 case Ijk_NoRedir:
6015 case Ijk_Sys_syscall:
sewardjc6f970f2012-04-02 21:54:49 +00006016 {
6017 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6018 addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
6019 stmt->Ist.Exit.jk));
6020 return;
6021 }
6022 default:
6023 break;
6024 }
6025
6026 /* Do we ever expect to see any other kind? */
6027 goto stmt_fail;
cerioncee30312004-12-17 20:30:21 +00006028 }
6029
6030 default: break;
6031 }
sewardjaf1ceca2005-06-30 23:31:27 +00006032 stmt_fail:
cerioncee30312004-12-17 20:30:21 +00006033 ppIRStmt(stmt);
6034 vpanic("iselStmt");
6035}
6036
6037
6038/*---------------------------------------------------------*/
6039/*--- ISEL: Basic block terminators (Nexts) ---*/
6040/*---------------------------------------------------------*/
6041
sewardjc6f970f2012-04-02 21:54:49 +00006042static void iselNext ( ISelEnv* env,
6043 IRExpr* next, IRJumpKind jk, Int offsIP )
cerioncee30312004-12-17 20:30:21 +00006044{
sewardj6c299f32009-12-31 18:00:12 +00006045 if (vex_traceflags & VEX_TRACE_VCODE) {
sewardjc6f970f2012-04-02 21:54:49 +00006046 vex_printf( "\n-- PUT(%d) = ", offsIP);
6047 ppIRExpr( next );
6048 vex_printf( "; exit-");
sewardj6c299f32009-12-31 18:00:12 +00006049 ppIRJumpKind(jk);
sewardjc6f970f2012-04-02 21:54:49 +00006050 vex_printf( "\n");
sewardj6c299f32009-12-31 18:00:12 +00006051 }
sewardjc6f970f2012-04-02 21:54:49 +00006052
6053 /* Case: boring transfer to known address */
6054 if (next->tag == Iex_Const) {
6055 IRConst* cdst = next->Iex.Const.con;
6056 vassert(cdst->tag == Ico_U32);
6057 if (jk == Ijk_Boring || jk == Ijk_Call) {
6058 /* Boring transfer to known address */
6059 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6060 if (env->chainingAllowed) {
6061 /* .. almost always true .. */
6062 /* Skip the event check at the dst if this is a forwards
6063 edge. */
6064 Bool toFastEP
6065 = ((Addr64)cdst->Ico.U32) > env->max_ga;
6066 if (0) vex_printf("%s", toFastEP ? "X" : ".");
6067 addInstr(env, ARMInstr_XDirect(cdst->Ico.U32,
6068 amR15T, ARMcc_AL,
6069 toFastEP));
6070 } else {
6071 /* .. very occasionally .. */
6072 /* We can't use chaining, so ask for an assisted transfer,
6073 as that's the only alternative that is allowable. */
6074 HReg r = iselIntExpr_R(env, next);
6075 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6076 Ijk_Boring));
6077 }
6078 return;
6079 }
6080 }
6081
6082 /* Case: call/return (==boring) transfer to any address */
6083 switch (jk) {
6084 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
6085 HReg r = iselIntExpr_R(env, next);
6086 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6087 if (env->chainingAllowed) {
6088 addInstr(env, ARMInstr_XIndir(r, amR15T, ARMcc_AL));
6089 } else {
6090 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6091 Ijk_Boring));
6092 }
6093 return;
6094 }
6095 default:
6096 break;
6097 }
6098
sewardj2f6902b2012-04-23 09:48:14 +00006099 /* Case: assisted transfer to arbitrary address */
sewardjc6f970f2012-04-02 21:54:49 +00006100 switch (jk) {
sewardj2f6902b2012-04-23 09:48:14 +00006101 /* Keep this list in sync with that for Ist_Exit above */
6102 case Ijk_ClientReq:
6103 case Ijk_NoDecode:
sewardjc6f970f2012-04-02 21:54:49 +00006104 case Ijk_NoRedir:
sewardj2f6902b2012-04-23 09:48:14 +00006105 case Ijk_Sys_syscall:
sewardjc6f970f2012-04-02 21:54:49 +00006106 {
6107 HReg r = iselIntExpr_R(env, next);
6108 ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6109 addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, jk));
6110 return;
6111 }
6112 default:
6113 break;
6114 }
6115
6116 vex_printf( "\n-- PUT(%d) = ", offsIP);
6117 ppIRExpr( next );
6118 vex_printf( "; exit-");
6119 ppIRJumpKind(jk);
6120 vex_printf( "\n");
6121 vassert(0); // are we expecting any other kind?
cerioncee30312004-12-17 20:30:21 +00006122}
6123
6124
6125/*---------------------------------------------------------*/
6126/*--- Insn selector top-level ---*/
6127/*---------------------------------------------------------*/
6128
sewardjdd40fdf2006-12-24 02:20:24 +00006129/* Translate an entire SB to arm code. */
cerioncee30312004-12-17 20:30:21 +00006130
sewardjc6f970f2012-04-02 21:54:49 +00006131HInstrArray* iselSB_ARM ( IRSB* bb,
6132 VexArch arch_host,
6133 VexArchInfo* archinfo_host,
6134 VexAbiInfo* vbi/*UNUSED*/,
6135 Int offs_Host_EvC_Counter,
6136 Int offs_Host_EvC_FailAddr,
6137 Bool chainingAllowed,
6138 Bool addProfInc,
6139 Addr64 max_ga )
cerioncee30312004-12-17 20:30:21 +00006140{
sewardjc6f970f2012-04-02 21:54:49 +00006141 Int i, j;
6142 HReg hreg, hregHI;
6143 ISelEnv* env;
6144 UInt hwcaps_host = archinfo_host->hwcaps;
6145 ARMAMode1 *amCounter, *amFailAddr;
cerioncee30312004-12-17 20:30:21 +00006146
sewardj6c299f32009-12-31 18:00:12 +00006147 /* sanity ... */
6148 vassert(arch_host == VexArchARM);
sewardj6c60b322010-08-22 12:48:28 +00006149
6150 /* hwcaps should not change from one ISEL call to another. */
sewardjc6f970f2012-04-02 21:54:49 +00006151 arm_hwcaps = hwcaps_host; // JRS 2012 Mar 31: FIXME (RM)
cerioncee30312004-12-17 20:30:21 +00006152
sewardj6c299f32009-12-31 18:00:12 +00006153 /* Make up an initial environment to use. */
6154 env = LibVEX_Alloc(sizeof(ISelEnv));
6155 env->vreg_ctr = 0;
6156
6157 /* Set up output code array. */
6158 env->code = newHInstrArray();
cerioncee30312004-12-17 20:30:21 +00006159
sewardj6c299f32009-12-31 18:00:12 +00006160 /* Copy BB's type env. */
6161 env->type_env = bb->tyenv;
cerioncee30312004-12-17 20:30:21 +00006162
sewardj6c299f32009-12-31 18:00:12 +00006163 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
6164 change as we go along. */
6165 env->n_vregmap = bb->tyenv->types_used;
6166 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
6167 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
cerioncee30312004-12-17 20:30:21 +00006168
sewardjc6f970f2012-04-02 21:54:49 +00006169 /* and finally ... */
6170 env->chainingAllowed = chainingAllowed;
6171 env->hwcaps = hwcaps_host;
6172 env->max_ga = max_ga;
6173
sewardj6c299f32009-12-31 18:00:12 +00006174 /* For each IR temporary, allocate a suitably-kinded virtual
6175 register. */
6176 j = 0;
6177 for (i = 0; i < env->n_vregmap; i++) {
6178 hregHI = hreg = INVALID_HREG;
6179 switch (bb->tyenv->types[i]) {
6180 case Ity_I1:
6181 case Ity_I8:
6182 case Ity_I16:
6183 case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break;
sewardj6c60b322010-08-22 12:48:28 +00006184 case Ity_I64:
sewardjc6f970f2012-04-02 21:54:49 +00006185 if (hwcaps_host & VEX_HWCAPS_ARM_NEON) {
sewardj6c60b322010-08-22 12:48:28 +00006186 hreg = mkHReg(j++, HRcFlt64, True);
sewardj6c60b322010-08-22 12:48:28 +00006187 } else {
6188 hregHI = mkHReg(j++, HRcInt32, True);
6189 hreg = mkHReg(j++, HRcInt32, True);
6190 }
6191 break;
sewardj6c299f32009-12-31 18:00:12 +00006192 case Ity_F32: hreg = mkHReg(j++, HRcFlt32, True); break;
6193 case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break;
sewardj06122e72011-03-28 12:14:48 +00006194 case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break;
sewardj6c299f32009-12-31 18:00:12 +00006195 default: ppIRType(bb->tyenv->types[i]);
6196 vpanic("iselBB: IRTemp type");
6197 }
6198 env->vregmap[i] = hreg;
6199 env->vregmapHI[i] = hregHI;
6200 }
6201 env->vreg_ctr = j;
cerioncee30312004-12-17 20:30:21 +00006202
sewardjc6f970f2012-04-02 21:54:49 +00006203 /* The very first instruction must be an event check. */
6204 amCounter = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_Counter);
6205 amFailAddr = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_FailAddr);
6206 addInstr(env, ARMInstr_EvCheck(amCounter, amFailAddr));
6207
6208 /* Possibly a block counter increment (for profiling). At this
6209 point we don't know the address of the counter, so just pretend
6210 it is zero. It will have to be patched later, but before this
6211 translation is used, by a call to LibVEX_patchProfCtr. */
6212 if (addProfInc) {
6213 addInstr(env, ARMInstr_ProfInc());
6214 }
cerioncee30312004-12-17 20:30:21 +00006215
sewardj6c299f32009-12-31 18:00:12 +00006216 /* Ok, finally we can iterate over the statements. */
6217 for (i = 0; i < bb->stmts_used; i++)
sewardjc6f970f2012-04-02 21:54:49 +00006218 iselStmt(env, bb->stmts[i]);
sewardj6c299f32009-12-31 18:00:12 +00006219
sewardjc6f970f2012-04-02 21:54:49 +00006220 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
sewardj6c299f32009-12-31 18:00:12 +00006221
6222 /* record the number of vregs we used. */
6223 env->code->n_vregs = env->vreg_ctr;
6224 return env->code;
cerioncee30312004-12-17 20:30:21 +00006225}
6226
6227
cerioncee30312004-12-17 20:30:21 +00006228/*---------------------------------------------------------------*/
sewardjcef7d3e2009-07-02 12:21:59 +00006229/*--- end host_arm_isel.c ---*/
cerioncee30312004-12-17 20:30:21 +00006230/*---------------------------------------------------------------*/