blob: 7916ce26cae079acc5b92e97ea389679c0ae7e0a [file] [log] [blame]
sewardjbbcf1882014-01-12 12:49:10 +00001
2/*---------------------------------------------------------------*/
3/*--- begin host_arm64_isel.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2013-2013 OpenWorks
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29*/
30
31#include "libvex_basictypes.h"
32#include "libvex_ir.h"
33#include "libvex.h"
34#include "ir_match.h"
35
36#include "main_util.h"
37#include "main_globals.h"
38#include "host_generic_regs.h"
39#include "host_generic_simd64.h" // for 32-bit SIMD helpers
40#include "host_arm64_defs.h"
41
42
43//ZZ /*---------------------------------------------------------*/
44//ZZ /*--- ARMvfp control word stuff ---*/
45//ZZ /*---------------------------------------------------------*/
46//ZZ
47//ZZ /* Vex-generated code expects to run with the FPU set as follows: all
48//ZZ exceptions masked, round-to-nearest, non-vector mode, with the NZCV
49//ZZ flags cleared, and FZ (flush to zero) disabled. Curiously enough,
50//ZZ this corresponds to a FPSCR value of zero.
51//ZZ
52//ZZ fpscr should therefore be zero on entry to Vex-generated code, and
53//ZZ should be unchanged at exit. (Or at least the bottom 28 bits
54//ZZ should be zero).
55//ZZ */
56//ZZ
57//ZZ #define DEFAULT_FPSCR 0
58
59
60/*---------------------------------------------------------*/
61/*--- ISelEnv ---*/
62/*---------------------------------------------------------*/
63
64/* This carries around:
65
66 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
67 might encounter. This is computed before insn selection starts,
68 and does not change.
69
70 - A mapping from IRTemp to HReg. This tells the insn selector
71 which virtual register is associated with each IRTemp temporary.
72 This is computed before insn selection starts, and does not
73 change. We expect this mapping to map precisely the same set of
74 IRTemps as the type mapping does.
75
76 |vregmap| holds the primary register for the IRTemp.
77 |vregmapHI| is only used for 128-bit integer-typed
78 IRTemps. It holds the identity of a second
79 64-bit virtual HReg, which holds the high half
80 of the value.
81
82 - The code array, that is, the insns selected so far.
83
84 - A counter, for generating new virtual registers.
85
86 - The host hardware capabilities word. This is set at the start
87 and does not change.
88
89 - A Bool for indicating whether we may generate chain-me
90 instructions for control flow transfers, or whether we must use
91 XAssisted.
92
93 - The maximum guest address of any guest insn in this block.
94 Actually, the address of the highest-addressed byte from any insn
95 in this block. Is set at the start and does not change. This is
96 used for detecting jumps which are definitely forward-edges from
97 this block, and therefore can be made (chained) to the fast entry
98 point of the destination, thereby avoiding the destination's
99 event check.
100
101 - An IRExpr*, which may be NULL, holding the IR expression (an
102 IRRoundingMode-encoded value) to which the FPU's rounding mode
103 was most recently set. Setting to NULL is always safe. Used to
104 avoid redundant settings of the FPU's rounding mode, as
105 described in set_FPCR_rounding_mode below.
106
107 Note, this is all (well, mostly) host-independent.
108*/
109
110typedef
111 struct {
112 /* Constant -- are set at the start and do not change. */
113 IRTypeEnv* type_env;
114
115 HReg* vregmap;
116 HReg* vregmapHI;
117 Int n_vregmap;
118
119 UInt hwcaps;
120
121 Bool chainingAllowed;
122 Addr64 max_ga;
123
124 /* These are modified as we go along. */
125 HInstrArray* code;
126 Int vreg_ctr;
127
128 IRExpr* previous_rm;
129 }
130 ISelEnv;
131
132static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
133{
134 vassert(tmp >= 0);
135 vassert(tmp < env->n_vregmap);
136 return env->vregmap[tmp];
137}
138
139static void addInstr ( ISelEnv* env, ARM64Instr* instr )
140{
141 addHInstr(env->code, instr);
142 if (vex_traceflags & VEX_TRACE_VCODE) {
143 ppARM64Instr(instr);
144 vex_printf("\n");
145 }
146}
147
148static HReg newVRegI ( ISelEnv* env )
149{
150 HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/);
151 env->vreg_ctr++;
152 return reg;
153}
154
155static HReg newVRegD ( ISelEnv* env )
156{
157 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
158 env->vreg_ctr++;
159 return reg;
160}
161
162//ZZ static HReg newVRegF ( ISelEnv* env )
163//ZZ {
164//ZZ HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/);
165//ZZ env->vreg_ctr++;
166//ZZ return reg;
167//ZZ }
168
169static HReg newVRegV ( ISelEnv* env )
170{
171 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
172 env->vreg_ctr++;
173 return reg;
174}
175
176//ZZ /* These are duplicated in guest_arm_toIR.c */
177//ZZ static IRExpr* unop ( IROp op, IRExpr* a )
178//ZZ {
179//ZZ return IRExpr_Unop(op, a);
180//ZZ }
181//ZZ
182//ZZ static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
183//ZZ {
184//ZZ return IRExpr_Binop(op, a1, a2);
185//ZZ }
186//ZZ
187//ZZ static IRExpr* bind ( Int binder )
188//ZZ {
189//ZZ return IRExpr_Binder(binder);
190//ZZ }
191
192
193/*---------------------------------------------------------*/
194/*--- ISEL: Forward declarations ---*/
195/*---------------------------------------------------------*/
196
197/* These are organised as iselXXX and iselXXX_wrk pairs. The
198 iselXXX_wrk do the real work, but are not to be called directly.
199 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
200 checks that all returned registers are virtual. You should not
201 call the _wrk version directly.
202
203 Because some forms of ARM64 memory amodes are implicitly scaled by
204 the access size, iselIntExpr_AMode takes an IRType which tells it
205 the type of the access for which the amode is to be used. This
206 type needs to be correct, else you'll get incorrect code.
207*/
208static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env,
209 IRExpr* e, IRType dty );
210static ARM64AMode* iselIntExpr_AMode ( ISelEnv* env,
211 IRExpr* e, IRType dty );
212
213static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e );
214static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e );
215
216static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e );
217static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e );
218
219static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e );
220static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e );
221
222static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
223static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
224
225static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
226static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
227
228static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
229 ISelEnv* env, IRExpr* e );
230static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo,
231 ISelEnv* env, IRExpr* e );
232
233
234//ZZ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
235//ZZ ISelEnv* env, IRExpr* e );
236//ZZ static void iselInt64Expr ( HReg* rHi, HReg* rLo,
237//ZZ ISelEnv* env, IRExpr* e );
238
239static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
240static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
241
242static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
243static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
244
245//ZZ static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e );
246//ZZ static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e );
247
248static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e );
249static HReg iselV128Expr ( ISelEnv* env, IRExpr* e );
250
251static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 );
252
253
254/*---------------------------------------------------------*/
255/*--- ISEL: Misc helpers ---*/
256/*---------------------------------------------------------*/
257
258/* Generate an amode suitable for a 64-bit sized access relative to
259 the baseblock register (X21). This generates an RI12 amode, which
260 means its scaled by the access size, which is why the access size
261 -- 64 bit -- is stated explicitly here. Consequently |off| needs
262 to be divisible by 8. */
263static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off )
264{
265 vassert(off < (8 << 12)); /* otherwise it's unrepresentable */
266 vassert((off & 7) == 0); /* ditto */
267 return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/);
268}
269
270/* Ditto, for 32 bit accesses. */
271static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off )
272{
273 vassert(off < (4 << 12)); /* otherwise it's unrepresentable */
274 vassert((off & 3) == 0); /* ditto */
275 return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/);
276}
277
278/* Ditto, for 16 bit accesses. */
279static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off )
280{
281 vassert(off < (2 << 12)); /* otherwise it's unrepresentable */
282 vassert((off & 1) == 0); /* ditto */
283 return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/);
284}
285
286/* Ditto, for 8 bit accesses. */
287static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off )
288{
289 vassert(off < (1 << 12)); /* otherwise it's unrepresentable */
290 return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/);
291}
292
293static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off )
294{
295 vassert(off < (1<<12));
296 HReg r = newVRegI(env);
297 addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(),
298 ARM64RIA_I12(off,0), True/*isAdd*/));
299 return r;
300}
301
302static HReg get_baseblock_register ( void )
303{
304 return hregARM64_X21();
305}
306
307/* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in
308 a new register, and return the new register. */
309static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src )
310{
311 HReg dst = newVRegI(env);
312 ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */
313 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
314 return dst;
315}
316
317/* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in
318 a new register, and return the new register. */
319static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src )
320{
321 HReg dst = newVRegI(env);
322 ARM64RI6* n48 = ARM64RI6_I6(48);
323 addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
324 addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR));
325 return dst;
326}
327
328/* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in
329 a new register, and return the new register. */
330static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src )
331{
332 HReg dst = newVRegI(env);
333 ARM64RI6* n48 = ARM64RI6_I6(48);
334 addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
335 addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SHR));
336 return dst;
337}
338
339/* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in
340 a new register, and return the new register. */
341static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src )
342{
343 HReg dst = newVRegI(env);
344 ARM64RI6* n32 = ARM64RI6_I6(32);
345 addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL));
346 addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR));
347 return dst;
348}
349
350/* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in
351 a new register, and return the new register. */
352static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src )
353{
354 HReg dst = newVRegI(env);
355 ARM64RI6* n56 = ARM64RI6_I6(56);
356 addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
357 addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR));
358 return dst;
359}
360
361static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src )
362{
363 HReg dst = newVRegI(env);
364 ARM64RI6* n56 = ARM64RI6_I6(56);
365 addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
366 addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SHR));
367 return dst;
368}
369
370/* Is this IRExpr_Const(IRConst_U64(0)) ? */
371static Bool isZeroU64 ( IRExpr* e ) {
372 if (e->tag != Iex_Const) return False;
373 IRConst* con = e->Iex.Const.con;
374 vassert(con->tag == Ico_U64);
375 return con->Ico.U64 == 0;
376}
377
378
379/*---------------------------------------------------------*/
380/*--- ISEL: FP rounding mode helpers ---*/
381/*---------------------------------------------------------*/
382
383/* Set the FP rounding mode: 'mode' is an I32-typed expression
384 denoting a value in the range 0 .. 3, indicating a round mode
385 encoded as per type IRRoundingMode -- the first four values only
386 (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO). Set the PPC
387 FSCR to have the same rounding.
388
389 For speed & simplicity, we're setting the *entire* FPCR here.
390
391 Setting the rounding mode is expensive. So this function tries to
392 avoid repeatedly setting the rounding mode to the same thing by
393 first comparing 'mode' to the 'mode' tree supplied in the previous
394 call to this function, if any. (The previous value is stored in
395 env->previous_rm.) If 'mode' is a single IR temporary 't' and
396 env->previous_rm is also just 't', then the setting is skipped.
397
398 This is safe because of the SSA property of IR: an IR temporary can
399 only be defined once and so will have the same value regardless of
400 where it appears in the block. Cool stuff, SSA.
401
402 A safety condition: all attempts to set the RM must be aware of
403 this mechanism - by being routed through the functions here.
404
405 Of course this only helps if blocks where the RM is set more than
406 once and it is set to the same value each time, *and* that value is
407 held in the same IR temporary each time. In order to assure the
408 latter as much as possible, the IR optimiser takes care to do CSE
409 on any block with any sign of floating point activity.
410*/
411static
412void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode )
413{
414 vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
415
416 /* Do we need to do anything? */
417 if (env->previous_rm
418 && env->previous_rm->tag == Iex_RdTmp
419 && mode->tag == Iex_RdTmp
420 && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
421 /* no - setting it to what it was before. */
422 vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
423 return;
424 }
425
426 /* No luck - we better set it, and remember what we set it to. */
427 env->previous_rm = mode;
428
429 /* Only supporting the rounding-mode bits - the rest of FPCR is set
430 to zero - so we can set the whole register at once (faster). */
431
432 /* This isn't simple, because 'mode' carries an IR rounding
433 encoding, and we need to translate that to an ARM64 FP one:
434 The IR encoding:
435 00 to nearest (the default)
436 10 to +infinity
437 01 to -infinity
438 11 to zero
439 The ARM64 FP encoding:
440 00 to nearest
441 01 to +infinity
442 10 to -infinity
443 11 to zero
444 Easy enough to do; just swap the two bits.
445 */
446 HReg irrm = iselIntExpr_R(env, mode);
447 HReg tL = newVRegI(env);
448 HReg tR = newVRegI(env);
449 HReg t3 = newVRegI(env);
450 /* tL = irrm << 1;
451 tR = irrm >> 1; if we're lucky, these will issue together
452 tL &= 2;
453 tR &= 1; ditto
454 t3 = tL | tR;
455 t3 <<= 22;
456 fmxr fpscr, t3
457 */
458 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
459 ARM64RIL* ril_two = mb_mkARM64RIL_I(2);
460 vassert(ril_one && ril_two);
461 addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL));
462 addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR));
463 addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND));
464 addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND));
465 addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR));
466 addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL));
467 addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3));
468}
469
470
471/*---------------------------------------------------------*/
472/*--- ISEL: Function call helpers ---*/
473/*---------------------------------------------------------*/
474
475/* Used only in doHelperCall. See big comment in doHelperCall re
476 handling of register-parameter args. This function figures out
477 whether evaluation of an expression might require use of a fixed
478 register. If in doubt return True (safe but suboptimal).
479*/
480static
481Bool mightRequireFixedRegs ( IRExpr* e )
482{
483 if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) {
484 // These are always "safe" -- either a copy of SP in some
485 // arbitrary vreg, or a copy of x21, respectively.
486 return False;
487 }
488 /* Else it's a "normal" expression. */
489 switch (e->tag) {
490 case Iex_RdTmp: case Iex_Const: case Iex_Get:
491 return False;
492 default:
493 return True;
494 }
495}
496
497
498/* Do a complete function call. |guard| is a Ity_Bit expression
499 indicating whether or not the call happens. If guard==NULL, the
500 call is unconditional. |retloc| is set to indicate where the
501 return value is after the call. The caller (of this fn) must
502 generate code to add |stackAdjustAfterCall| to the stack pointer
503 after the call is done. Returns True iff it managed to handle this
504 combination of arg/return types, else returns False. */
505
506static
507Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
508 /*OUT*/RetLoc* retloc,
509 ISelEnv* env,
510 IRExpr* guard,
511 IRCallee* cee, IRType retTy, IRExpr** args )
512{
513 ARM64CondCode cc;
514 HReg argregs[ARM64_N_ARGREGS];
515 HReg tmpregs[ARM64_N_ARGREGS];
516 Bool go_fast;
517 Int n_args, i, nextArgReg;
518 ULong target;
519
520 vassert(ARM64_N_ARGREGS == 8);
521
522 /* Set default returns. We'll update them later if needed. */
523 *stackAdjustAfterCall = 0;
524 *retloc = mk_RetLoc_INVALID();
525
526 /* These are used for cross-checking that IR-level constraints on
527 the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */
528 UInt nVECRETs = 0;
529 UInt nBBPTRs = 0;
530
531 /* Marshal args for a call and do the call.
532
533 This function only deals with a tiny set of possibilities, which
534 cover all helpers in practice. The restrictions are that only
535 arguments in registers are supported, hence only
536 ARM64_N_REGPARMS x 64 integer bits in total can be passed. In
537 fact the only supported arg type is I64.
538
539 The return type can be I{64,32} or V128. In the V128 case, it
540 is expected that |args| will contain the special node
541 IRExpr_VECRET(), in which case this routine generates code to
542 allocate space on the stack for the vector return value. Since
543 we are not passing any scalars on the stack, it is enough to
544 preallocate the return space before marshalling any arguments,
545 in this case.
546
547 |args| may also contain IRExpr_BBPTR(), in which case the
548 value in x21 is passed as the corresponding argument.
549
550 Generating code which is both efficient and correct when
551 parameters are to be passed in registers is difficult, for the
552 reasons elaborated in detail in comments attached to
553 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
554 of the method described in those comments.
555
556 The problem is split into two cases: the fast scheme and the
557 slow scheme. In the fast scheme, arguments are computed
558 directly into the target (real) registers. This is only safe
559 when we can be sure that computation of each argument will not
560 trash any real registers set by computation of any other
561 argument.
562
563 In the slow scheme, all args are first computed into vregs, and
564 once they are all done, they are moved to the relevant real
565 regs. This always gives correct code, but it also gives a bunch
566 of vreg-to-rreg moves which are usually redundant but are hard
567 for the register allocator to get rid of.
568
569 To decide which scheme to use, all argument expressions are
570 first examined. If they are all so simple that it is clear they
571 will be evaluated without use of any fixed registers, use the
572 fast scheme, else use the slow scheme. Note also that only
573 unconditional calls may use the fast scheme, since having to
574 compute a condition expression could itself trash real
575 registers.
576
577 Note this requires being able to examine an expression and
578 determine whether or not evaluation of it might use a fixed
579 register. That requires knowledge of how the rest of this insn
580 selector works. Currently just the following 3 are regarded as
581 safe -- hopefully they cover the majority of arguments in
582 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
583 */
584
585 /* Note that the cee->regparms field is meaningless on ARM64 hosts
586 (since there is only one calling convention) and so we always
587 ignore it. */
588
589 n_args = 0;
590 for (i = 0; args[i]; i++) {
591 IRExpr* arg = args[i];
592 if (UNLIKELY(arg->tag == Iex_VECRET)) {
593 nVECRETs++;
594 } else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
595 nBBPTRs++;
596 }
597 n_args++;
598 }
599
600 /* If this fails, the IR is ill-formed */
601 vassert(nBBPTRs == 0 || nBBPTRs == 1);
602
603 /* If we have a VECRET, allocate space on the stack for the return
604 value, and record the stack pointer after that. */
605 HReg r_vecRetAddr = INVALID_HREG;
606 if (nVECRETs == 1) {
607 vassert(retTy == Ity_V128 || retTy == Ity_V256);
608 vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
609 r_vecRetAddr = newVRegI(env);
610 addInstr(env, ARM64Instr_AddToSP(-16));
611 addInstr(env, ARM64Instr_FromSP(r_vecRetAddr));
612 } else {
613 // If either of these fail, the IR is ill-formed
614 vassert(retTy != Ity_V128 && retTy != Ity_V256);
615 vassert(nVECRETs == 0);
616 }
617
618 argregs[0] = hregARM64_X0();
619 argregs[1] = hregARM64_X1();
620 argregs[2] = hregARM64_X2();
621 argregs[3] = hregARM64_X3();
622 argregs[4] = hregARM64_X4();
623 argregs[5] = hregARM64_X5();
624 argregs[6] = hregARM64_X6();
625 argregs[7] = hregARM64_X7();
626
627 tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG;
628 tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG;
629
630 /* First decide which scheme (slow or fast) is to be used. First
631 assume the fast scheme, and select slow if any contraindications
632 (wow) appear. */
633
634 go_fast = True;
635
636 if (guard) {
637 if (guard->tag == Iex_Const
638 && guard->Iex.Const.con->tag == Ico_U1
639 && guard->Iex.Const.con->Ico.U1 == True) {
640 /* unconditional */
641 } else {
642 /* Not manifestly unconditional -- be conservative. */
643 go_fast = False;
644 }
645 }
646
647 if (go_fast) {
648 for (i = 0; i < n_args; i++) {
649 if (mightRequireFixedRegs(args[i])) {
650 go_fast = False;
651 break;
652 }
653 }
654 }
655
656 if (go_fast) {
657 if (retTy == Ity_V128 || retTy == Ity_V256)
658 go_fast = False;
659 }
660
661 /* At this point the scheme to use has been established. Generate
662 code to get the arg values into the argument rregs. If we run
663 out of arg regs, give up. */
664
665 if (go_fast) {
666
667 /* FAST SCHEME */
668 nextArgReg = 0;
669
670 for (i = 0; i < n_args; i++) {
671 IRExpr* arg = args[i];
672
673 IRType aTy = Ity_INVALID;
674 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
675 aTy = typeOfIRExpr(env->type_env, args[i]);
676
677 if (nextArgReg >= ARM64_N_ARGREGS)
678 return False; /* out of argregs */
679
680 if (aTy == Ity_I64) {
681 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
682 iselIntExpr_R(env, args[i]) ));
683 nextArgReg++;
684 }
685 else if (arg->tag == Iex_BBPTR) {
686 vassert(0); //ATC
687 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
688 hregARM64_X21() ));
689 nextArgReg++;
690 }
691 else if (arg->tag == Iex_VECRET) {
692 // because of the go_fast logic above, we can't get here,
693 // since vector return values makes us use the slow path
694 // instead.
695 vassert(0);
696 }
697 else
698 return False; /* unhandled arg type */
699 }
700
701 /* Fast scheme only applies for unconditional calls. Hence: */
702 cc = ARM64cc_AL;
703
704 } else {
705
706 /* SLOW SCHEME; move via temporaries */
707 nextArgReg = 0;
708
709 for (i = 0; i < n_args; i++) {
710 IRExpr* arg = args[i];
711
712 IRType aTy = Ity_INVALID;
713 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
714 aTy = typeOfIRExpr(env->type_env, args[i]);
715
716 if (nextArgReg >= ARM64_N_ARGREGS)
717 return False; /* out of argregs */
718
719 if (aTy == Ity_I64) {
720 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
721 nextArgReg++;
722 }
723 else if (arg->tag == Iex_BBPTR) {
724 vassert(0); //ATC
725 tmpregs[nextArgReg] = hregARM64_X21();
726 nextArgReg++;
727 }
728 else if (arg->tag == Iex_VECRET) {
729 vassert(!hregIsInvalid(r_vecRetAddr));
730 tmpregs[nextArgReg] = r_vecRetAddr;
731 nextArgReg++;
732 }
733 else
734 return False; /* unhandled arg type */
735 }
736
737 /* Now we can compute the condition. We can't do it earlier
738 because the argument computations could trash the condition
739 codes. Be a bit clever to handle the common case where the
740 guard is 1:Bit. */
741 cc = ARM64cc_AL;
742 if (guard) {
743 if (guard->tag == Iex_Const
744 && guard->Iex.Const.con->tag == Ico_U1
745 && guard->Iex.Const.con->Ico.U1 == True) {
746 /* unconditional -- do nothing */
747 } else {
748 cc = iselCondCode( env, guard );
749 }
750 }
751
752 /* Move the args to their final destinations. */
753 for (i = 0; i < nextArgReg; i++) {
754 vassert(!(hregIsInvalid(tmpregs[i])));
755 /* None of these insns, including any spill code that might
756 be generated, may alter the condition codes. */
757 addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) );
758 }
759
760 }
761
762 /* Should be assured by checks above */
763 vassert(nextArgReg <= ARM64_N_ARGREGS);
764
765 /* Do final checks, set the return values, and generate the call
766 instruction proper. */
767 vassert(nBBPTRs == 0 || nBBPTRs == 1);
768 vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0);
769 vassert(*stackAdjustAfterCall == 0);
770 vassert(is_RetLoc_INVALID(*retloc));
771 switch (retTy) {
772 case Ity_INVALID:
773 /* Function doesn't return a value. */
774 *retloc = mk_RetLoc_simple(RLPri_None);
775 break;
776 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
777 *retloc = mk_RetLoc_simple(RLPri_Int);
778 break;
779 case Ity_V128:
780 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
781 *stackAdjustAfterCall = 16;
782 break;
783 case Ity_V256:
784 vassert(0); // ATC
785 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
786 *stackAdjustAfterCall = 32;
787 break;
788 default:
789 /* IR can denote other possible return types, but we don't
790 handle those here. */
791 vassert(0);
792 }
793
794 /* Finally, generate the call itself. This needs the *retloc value
795 set in the switch above, which is why it's at the end. */
796
797 /* nextArgReg doles out argument registers. Since these are
798 assigned in the order x0 .. x7, its numeric value at this point,
799 which must be between 0 and 8 inclusive, is going to be equal to
800 the number of arg regs in use for the call. Hence bake that
801 number into the call (we'll need to know it when doing register
802 allocation, to know what regs the call reads.) */
803
804 target = (HWord)Ptr_to_ULong(cee->addr);
805 addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc ));
806
807 return True; /* success */
808}
809
810
811/*---------------------------------------------------------*/
812/*--- ISEL: Integer expressions (64/32 bit) ---*/
813/*---------------------------------------------------------*/
814
815/* Select insns for an integer-typed expression, and add them to the
816 code list. Return a reg holding the result. This reg will be a
817 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
818 want to modify it, ask for a new vreg, copy it in there, and modify
819 the copy. The register allocator will do its best to map both
820 vregs to the same real register, so the copies will often disappear
821 later in the game.
822
823 This should handle expressions of 64- and 32-bit type. All results
824 are returned in a 64-bit register. For 32-bit expressions, the
825 upper 32 bits are arbitrary, so you should mask or sign extend
826 partial values if necessary.
827*/
828
829/* --------------------- AMode --------------------- */
830
831/* Return an AMode which computes the value of the specified
832 expression, possibly also adding insns to the code list as a
833 result. The expression may only be a 64-bit one.
834*/
835
836static Bool isValidScale ( UChar scale )
837{
838 switch (scale) {
839 case 1: case 2: case 4: case 8: /* case 16: ??*/ return True;
840 default: return False;
841 }
842}
843
844static Bool sane_AMode ( ARM64AMode* am )
845{
846 switch (am->tag) {
847 case ARM64am_RI9:
848 return
849 toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64
850 && (hregIsVirtual(am->ARM64am.RI9.reg)
851 /* || sameHReg(am->ARM64am.RI9.reg,
852 hregARM64_X21()) */ )
853 && am->ARM64am.RI9.simm9 >= -256
854 && am->ARM64am.RI9.simm9 <= 255 );
855 case ARM64am_RI12:
856 return
857 toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64
858 && (hregIsVirtual(am->ARM64am.RI12.reg)
859 /* || sameHReg(am->ARM64am.RI12.reg,
860 hregARM64_X21()) */ )
861 && am->ARM64am.RI12.uimm12 < 4096
862 && isValidScale(am->ARM64am.RI12.szB) );
863 case ARM64am_RR:
864 return
865 toBool( hregClass(am->ARM64am.RR.base) == HRcInt64
866 && hregIsVirtual(am->ARM64am.RR.base)
867 && hregClass(am->ARM64am.RR.index) == HRcInt64
868 && hregIsVirtual(am->ARM64am.RR.index) );
869 default:
870 vpanic("sane_AMode: unknown ARM64 AMode1 tag");
871 }
872}
873
874static
875ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty )
876{
877 ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty);
878 vassert(sane_AMode(am));
879 return am;
880}
881
882static
883ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty )
884{
885 IRType ty = typeOfIRExpr(env->type_env,e);
886 vassert(ty == Ity_I64);
887
888 ULong szBbits = 0;
889 switch (dty) {
890 case Ity_I64: szBbits = 3; break;
891 case Ity_I32: szBbits = 2; break;
892 case Ity_I16: szBbits = 1; break;
893 case Ity_I8: szBbits = 0; break;
894 default: vassert(0);
895 }
896
897 /* {Add64,Sub64}(expr,simm9). We don't care about |dty| here since
898 we're going to create an amode suitable for LDU* or STU*
899 instructions, which use unscaled immediate offsets. */
900 if (e->tag == Iex_Binop
901 && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64)
902 && e->Iex.Binop.arg2->tag == Iex_Const
903 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
904 Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
sewardjf21a6ca2014-03-08 13:08:17 +0000905 if (simm >= -255 && simm <= 255) {
906 /* Although the gating condition might seem to be
907 simm >= -256 && simm <= 255
908 we will need to negate simm in the case where the op is Sub64.
909 Hence limit the lower value to -255 in order that its negation
910 is representable. */
sewardjbbcf1882014-01-12 12:49:10 +0000911 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
sewardjf21a6ca2014-03-08 13:08:17 +0000912 if (e->Iex.Binop.op == Iop_Sub64) simm = -simm;
sewardjbbcf1882014-01-12 12:49:10 +0000913 return ARM64AMode_RI9(reg, (Int)simm);
914 }
915 }
916
917 /* Add64(expr, uimm12 * transfer-size) */
918 if (e->tag == Iex_Binop
919 && e->Iex.Binop.op == Iop_Add64
920 && e->Iex.Binop.arg2->tag == Iex_Const
921 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
922 ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
923 ULong szB = 1 << szBbits;
924 if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */
925 && (uimm >> szBbits) < 4096) {
926 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
927 return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB);
928 }
929 }
930
931 /* Add64(expr1, expr2) */
932 if (e->tag == Iex_Binop
933 && e->Iex.Binop.op == Iop_Add64) {
934 HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
935 HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
936 return ARM64AMode_RR(reg1, reg2);
937 }
938
939 /* Doesn't match anything in particular. Generate it into
940 a register and use that. */
941 HReg reg = iselIntExpr_R(env, e);
942 return ARM64AMode_RI9(reg, 0);
943}
944
945//ZZ /* --------------------- AModeV --------------------- */
946//ZZ
947//ZZ /* Return an AModeV which computes the value of the specified
948//ZZ expression, possibly also adding insns to the code list as a
949//ZZ result. The expression may only be a 32-bit one.
950//ZZ */
951//ZZ
952//ZZ static Bool sane_AModeV ( ARMAModeV* am )
953//ZZ {
954//ZZ return toBool( hregClass(am->reg) == HRcInt32
955//ZZ && hregIsVirtual(am->reg)
956//ZZ && am->simm11 >= -1020 && am->simm11 <= 1020
957//ZZ && 0 == (am->simm11 & 3) );
958//ZZ }
959//ZZ
960//ZZ static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
961//ZZ {
962//ZZ ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
963//ZZ vassert(sane_AModeV(am));
964//ZZ return am;
965//ZZ }
966//ZZ
967//ZZ static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
968//ZZ {
969//ZZ IRType ty = typeOfIRExpr(env->type_env,e);
970//ZZ vassert(ty == Ity_I32);
971//ZZ
972//ZZ /* {Add32,Sub32}(expr, simm8 << 2) */
973//ZZ if (e->tag == Iex_Binop
974//ZZ && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
975//ZZ && e->Iex.Binop.arg2->tag == Iex_Const
976//ZZ && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
977//ZZ Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
978//ZZ if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
979//ZZ HReg reg;
980//ZZ if (e->Iex.Binop.op == Iop_Sub32)
981//ZZ simm = -simm;
982//ZZ reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
983//ZZ return mkARMAModeV(reg, simm);
984//ZZ }
985//ZZ }
986//ZZ
987//ZZ /* Doesn't match anything in particular. Generate it into
988//ZZ a register and use that. */
989//ZZ {
990//ZZ HReg reg = iselIntExpr_R(env, e);
991//ZZ return mkARMAModeV(reg, 0);
992//ZZ }
993//ZZ
994//ZZ }
995//ZZ
996//ZZ /* -------------------- AModeN -------------------- */
997//ZZ
998//ZZ static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
999//ZZ {
1000//ZZ return iselIntExpr_AModeN_wrk(env, e);
1001//ZZ }
1002//ZZ
1003//ZZ static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
1004//ZZ {
1005//ZZ HReg reg = iselIntExpr_R(env, e);
1006//ZZ return mkARMAModeN_R(reg);
1007//ZZ }
1008//ZZ
1009//ZZ
1010//ZZ /* --------------------- RI84 --------------------- */
1011//ZZ
1012//ZZ /* Select instructions to generate 'e' into a RI84. If mayInv is
1013//ZZ true, then the caller will also accept an I84 form that denotes
1014//ZZ 'not e'. In this case didInv may not be NULL, and *didInv is set
1015//ZZ to True. This complication is so as to allow generation of an RI84
1016//ZZ which is suitable for use in either an AND or BIC instruction,
1017//ZZ without knowing (before this call) which one.
1018//ZZ */
1019//ZZ static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
1020//ZZ ISelEnv* env, IRExpr* e )
1021//ZZ {
1022//ZZ ARMRI84* ri;
1023//ZZ if (mayInv)
1024//ZZ vassert(didInv != NULL);
1025//ZZ ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
1026//ZZ /* sanity checks ... */
1027//ZZ switch (ri->tag) {
1028//ZZ case ARMri84_I84:
1029//ZZ return ri;
1030//ZZ case ARMri84_R:
1031//ZZ vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
1032//ZZ vassert(hregIsVirtual(ri->ARMri84.R.reg));
1033//ZZ return ri;
1034//ZZ default:
1035//ZZ vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
1036//ZZ }
1037//ZZ }
1038//ZZ
1039//ZZ /* DO NOT CALL THIS DIRECTLY ! */
1040//ZZ static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
1041//ZZ ISelEnv* env, IRExpr* e )
1042//ZZ {
1043//ZZ IRType ty = typeOfIRExpr(env->type_env,e);
1044//ZZ vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1045//ZZ
1046//ZZ if (didInv) *didInv = False;
1047//ZZ
1048//ZZ /* special case: immediate */
1049//ZZ if (e->tag == Iex_Const) {
1050//ZZ UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
1051//ZZ switch (e->Iex.Const.con->tag) {
1052//ZZ case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1053//ZZ case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1054//ZZ case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1055//ZZ default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
1056//ZZ }
1057//ZZ if (fitsIn8x4(&u8, &u4, u)) {
1058//ZZ return ARMRI84_I84( (UShort)u8, (UShort)u4 );
1059//ZZ }
1060//ZZ if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
1061//ZZ vassert(didInv);
1062//ZZ *didInv = True;
1063//ZZ return ARMRI84_I84( (UShort)u8, (UShort)u4 );
1064//ZZ }
1065//ZZ /* else fail, fall through to default case */
1066//ZZ }
1067//ZZ
1068//ZZ /* default case: calculate into a register and return that */
1069//ZZ {
1070//ZZ HReg r = iselIntExpr_R ( env, e );
1071//ZZ return ARMRI84_R(r);
1072//ZZ }
1073//ZZ }
1074
1075
1076/* --------------------- RIA --------------------- */
1077
1078/* Select instructions to generate 'e' into a RIA. */
1079
1080static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e )
1081{
1082 ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e);
1083 /* sanity checks ... */
1084 switch (ri->tag) {
1085 case ARM64riA_I12:
1086 vassert(ri->ARM64riA.I12.imm12 < 4096);
1087 vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12);
1088 return ri;
1089 case ARM64riA_R:
1090 vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64);
1091 vassert(hregIsVirtual(ri->ARM64riA.R.reg));
1092 return ri;
1093 default:
1094 vpanic("iselIntExpr_RIA: unknown arm RIA tag");
1095 }
1096}
1097
1098/* DO NOT CALL THIS DIRECTLY ! */
1099static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e )
1100{
1101 IRType ty = typeOfIRExpr(env->type_env,e);
1102 vassert(ty == Ity_I64 || ty == Ity_I32);
1103
1104 /* special case: immediate */
1105 if (e->tag == Iex_Const) {
1106 ULong u = 0xF000000ULL; /* invalid */
1107 switch (e->Iex.Const.con->tag) {
1108 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
1109 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1110 default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)");
1111 }
1112 if (0 == (u & ~(0xFFFULL << 0)))
1113 return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0);
1114 if (0 == (u & ~(0xFFFULL << 12)))
1115 return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12);
1116 /* else fail, fall through to default case */
1117 }
1118
1119 /* default case: calculate into a register and return that */
1120 {
1121 HReg r = iselIntExpr_R ( env, e );
1122 return ARM64RIA_R(r);
1123 }
1124}
1125
1126
1127/* --------------------- RIL --------------------- */
1128
1129/* Select instructions to generate 'e' into a RIL. At this point we
1130 have to deal with the strange bitfield-immediate encoding for logic
1131 instructions. */
1132
1133
1134// The following four functions
1135// CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical
1136// are copied, with modifications, from
1137// https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc
1138// which has the following copyright notice:
1139/*
1140 Copyright 2013, ARM Limited
1141 All rights reserved.
1142
1143 Redistribution and use in source and binary forms, with or without
1144 modification, are permitted provided that the following conditions are met:
1145
1146 * Redistributions of source code must retain the above copyright notice,
1147 this list of conditions and the following disclaimer.
1148 * Redistributions in binary form must reproduce the above copyright notice,
1149 this list of conditions and the following disclaimer in the documentation
1150 and/or other materials provided with the distribution.
1151 * Neither the name of ARM Limited nor the names of its contributors may be
1152 used to endorse or promote products derived from this software without
1153 specific prior written permission.
1154
1155 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
1156 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1157 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1158 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
1159 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1160 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1161 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
1162 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
1163 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1164 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1165*/
1166
1167static Int CountLeadingZeros(ULong value, Int width)
1168{
1169 vassert(width == 32 || width == 64);
1170 Int count = 0;
1171 ULong bit_test = 1ULL << (width - 1);
1172 while ((count < width) && ((bit_test & value) == 0)) {
1173 count++;
1174 bit_test >>= 1;
1175 }
1176 return count;
1177}
1178
1179static Int CountTrailingZeros(ULong value, Int width)
1180{
1181 vassert(width == 32 || width == 64);
1182 Int count = 0;
1183 while ((count < width) && (((value >> count) & 1) == 0)) {
1184 count++;
1185 }
1186 return count;
1187}
1188
1189static Int CountSetBits(ULong value, Int width)
1190{
1191 // TODO: Other widths could be added here, as the implementation already
1192 // supports them.
1193 vassert(width == 32 || width == 64);
1194
1195 // Mask out unused bits to ensure that they are not counted.
1196 value &= (0xffffffffffffffffULL >> (64-width));
1197
1198 // Add up the set bits.
1199 // The algorithm works by adding pairs of bit fields together iteratively,
1200 // where the size of each bit field doubles each time.
1201 // An example for an 8-bit value:
1202 // Bits: h g f e d c b a
1203 // \ | \ | \ | \ |
1204 // value = h+g f+e d+c b+a
1205 // \ | \ |
1206 // value = h+g+f+e d+c+b+a
1207 // \ |
1208 // value = h+g+f+e+d+c+b+a
sewardjaeeb31d2014-01-12 18:23:45 +00001209 value = ((value >> 1) & 0x5555555555555555ULL)
1210 + (value & 0x5555555555555555ULL);
1211 value = ((value >> 2) & 0x3333333333333333ULL)
1212 + (value & 0x3333333333333333ULL);
1213 value = ((value >> 4) & 0x0f0f0f0f0f0f0f0fULL)
1214 + (value & 0x0f0f0f0f0f0f0f0fULL);
1215 value = ((value >> 8) & 0x00ff00ff00ff00ffULL)
1216 + (value & 0x00ff00ff00ff00ffULL);
1217 value = ((value >> 16) & 0x0000ffff0000ffffULL)
1218 + (value & 0x0000ffff0000ffffULL);
1219 value = ((value >> 32) & 0x00000000ffffffffULL)
1220 + (value & 0x00000000ffffffffULL);
sewardjbbcf1882014-01-12 12:49:10 +00001221
1222 return value;
1223}
1224
1225static Bool isImmLogical ( /*OUT*/UInt* n,
1226 /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r,
1227 ULong value, UInt width )
1228{
1229 // Test if a given value can be encoded in the immediate field of a
1230 // logical instruction.
1231
1232 // If it can be encoded, the function returns true, and values
1233 // pointed to by n, imm_s and imm_r are updated with immediates
1234 // encoded in the format required by the corresponding fields in the
1235 // logical instruction. If it can not be encoded, the function
1236 // returns false, and the values pointed to by n, imm_s and imm_r
1237 // are undefined.
1238 vassert(n != NULL && imm_s != NULL && imm_r != NULL);
1239 vassert(width == 32 || width == 64);
1240
1241 // Logical immediates are encoded using parameters n, imm_s and imm_r using
1242 // the following table:
1243 //
1244 // N imms immr size S R
1245 // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
1246 // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
1247 // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
1248 // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
1249 // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
1250 // 0 11110s xxxxxr 2 UInt(s) UInt(r)
1251 // (s bits must not be all set)
1252 //
1253 // A pattern is constructed of size bits, where the least significant S+1
1254 // bits are set. The pattern is rotated right by R, and repeated across a
1255 // 32 or 64-bit value, depending on destination register width.
1256 //
1257 // To test if an arbitrary immediate can be encoded using this scheme, an
1258 // iterative algorithm is used.
1259 //
1260 // TODO: This code does not consider using X/W register overlap to support
1261 // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits
1262 // are an encodable logical immediate.
1263
1264 // 1. If the value has all set or all clear bits, it can't be encoded.
1265 if ((value == 0) || (value == 0xffffffffffffffffULL) ||
1266 ((width == 32) && (value == 0xffffffff))) {
1267 return False;
1268 }
1269
1270 UInt lead_zero = CountLeadingZeros(value, width);
1271 UInt lead_one = CountLeadingZeros(~value, width);
1272 UInt trail_zero = CountTrailingZeros(value, width);
1273 UInt trail_one = CountTrailingZeros(~value, width);
1274 UInt set_bits = CountSetBits(value, width);
1275
1276 // The fixed bits in the immediate s field.
1277 // If width == 64 (X reg), start at 0xFFFFFF80.
1278 // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
1279 // widths won't be executed.
1280 Int imm_s_fixed = (width == 64) ? -128 : -64;
1281 Int imm_s_mask = 0x3F;
1282
1283 for (;;) {
1284 // 2. If the value is two bits wide, it can be encoded.
1285 if (width == 2) {
1286 *n = 0;
1287 *imm_s = 0x3C;
1288 *imm_r = (value & 3) - 1;
1289 return True;
1290 }
1291
1292 *n = (width == 64) ? 1 : 0;
1293 *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
1294 if ((lead_zero + set_bits) == width) {
1295 *imm_r = 0;
1296 } else {
1297 *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
1298 }
1299
1300 // 3. If the sum of leading zeros, trailing zeros and set bits is equal to
1301 // the bit width of the value, it can be encoded.
1302 if (lead_zero + trail_zero + set_bits == width) {
1303 return True;
1304 }
1305
1306 // 4. If the sum of leading ones, trailing ones and unset bits in the
1307 // value is equal to the bit width of the value, it can be encoded.
1308 if (lead_one + trail_one + (width - set_bits) == width) {
1309 return True;
1310 }
1311
1312 // 5. If the most-significant half of the bitwise value is equal to the
1313 // least-significant half, return to step 2 using the least-significant
1314 // half of the value.
1315 ULong mask = (1ULL << (width >> 1)) - 1;
1316 if ((value & mask) == ((value >> (width >> 1)) & mask)) {
1317 width >>= 1;
1318 set_bits >>= 1;
1319 imm_s_fixed >>= 1;
1320 continue;
1321 }
1322
1323 // 6. Otherwise, the value can't be encoded.
1324 return False;
1325 }
1326}
1327
1328
1329/* Create a RIL for the given immediate, if it is representable, or
1330 return NULL if not. */
1331
1332static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 )
1333{
1334 UInt n = 0, imm_s = 0, imm_r = 0;
1335 Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64);
1336 if (!ok) return NULL;
1337 vassert(n < 2 && imm_s < 64 && imm_r < 64);
1338 return ARM64RIL_I13(n, imm_r, imm_s);
1339}
1340
1341/* So, finally .. */
1342
1343static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e )
1344{
1345 ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e);
1346 /* sanity checks ... */
1347 switch (ri->tag) {
1348 case ARM64riL_I13:
1349 vassert(ri->ARM64riL.I13.bitN < 2);
1350 vassert(ri->ARM64riL.I13.immR < 64);
1351 vassert(ri->ARM64riL.I13.immS < 64);
1352 return ri;
1353 case ARM64riL_R:
1354 vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64);
1355 vassert(hregIsVirtual(ri->ARM64riL.R.reg));
1356 return ri;
1357 default:
1358 vpanic("iselIntExpr_RIL: unknown arm RIL tag");
1359 }
1360}
1361
1362/* DO NOT CALL THIS DIRECTLY ! */
1363static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e )
1364{
1365 IRType ty = typeOfIRExpr(env->type_env,e);
1366 vassert(ty == Ity_I64 || ty == Ity_I32);
1367
1368 /* special case: immediate */
1369 if (e->tag == Iex_Const) {
1370 ARM64RIL* maybe = NULL;
1371 if (ty == Ity_I64) {
1372 vassert(e->Iex.Const.con->tag == Ico_U64);
1373 maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64);
1374 } else {
1375 vassert(ty == Ity_I32);
1376 vassert(e->Iex.Const.con->tag == Ico_U32);
1377 UInt u32 = e->Iex.Const.con->Ico.U32;
1378 ULong u64 = (ULong)u32;
1379 /* First try with 32 leading zeroes. */
1380 maybe = mb_mkARM64RIL_I(u64);
1381 /* If that doesn't work, try with 2 copies, since it doesn't
1382 matter what winds up in the upper 32 bits. */
1383 if (!maybe) {
1384 maybe = mb_mkARM64RIL_I((u64 << 32) | u64);
1385 }
1386 }
1387 if (maybe) return maybe;
1388 /* else fail, fall through to default case */
1389 }
1390
1391 /* default case: calculate into a register and return that */
1392 {
1393 HReg r = iselIntExpr_R ( env, e );
1394 return ARM64RIL_R(r);
1395 }
1396}
1397
1398
1399/* --------------------- RI6 --------------------- */
1400
1401/* Select instructions to generate 'e' into a RI6. */
1402
1403static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e )
1404{
1405 ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e);
1406 /* sanity checks ... */
1407 switch (ri->tag) {
1408 case ARM64ri6_I6:
1409 vassert(ri->ARM64ri6.I6.imm6 < 64);
1410 vassert(ri->ARM64ri6.I6.imm6 > 0);
1411 return ri;
1412 case ARM64ri6_R:
1413 vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64);
1414 vassert(hregIsVirtual(ri->ARM64ri6.R.reg));
1415 return ri;
1416 default:
1417 vpanic("iselIntExpr_RI6: unknown arm RI6 tag");
1418 }
1419}
1420
1421/* DO NOT CALL THIS DIRECTLY ! */
1422static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e )
1423{
1424 IRType ty = typeOfIRExpr(env->type_env,e);
1425 vassert(ty == Ity_I64 || ty == Ity_I8);
1426
1427 /* special case: immediate */
1428 if (e->tag == Iex_Const) {
1429 switch (e->Iex.Const.con->tag) {
1430 case Ico_U8: {
1431 UInt u = e->Iex.Const.con->Ico.U8;
1432 if (u > 0 && u < 64)
1433 return ARM64RI6_I6(u);
1434 break;
1435 default:
1436 break;
1437 }
1438 }
1439 /* else fail, fall through to default case */
1440 }
1441
1442 /* default case: calculate into a register and return that */
1443 {
1444 HReg r = iselIntExpr_R ( env, e );
1445 return ARM64RI6_R(r);
1446 }
1447}
1448
1449
1450/* ------------------- CondCode ------------------- */
1451
1452/* Generate code to evaluated a bit-typed expression, returning the
1453 condition code which would correspond when the expression would
1454 notionally have returned 1. */
1455
1456static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1457{
1458 ARM64CondCode cc = iselCondCode_wrk(env,e);
1459 vassert(cc != ARM64cc_NV);
1460 return cc;
1461}
1462
1463static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1464{
1465 vassert(e);
1466 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1467
1468 /* var */
1469 if (e->tag == Iex_RdTmp) {
1470 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1471 /* Cmp doesn't modify rTmp; so this is OK. */
1472 ARM64RIL* one = mb_mkARM64RIL_I(1);
1473 vassert(one);
1474 addInstr(env, ARM64Instr_Test(rTmp, one));
1475 return ARM64cc_NE;
1476 }
1477
1478 /* Not1(e) */
1479 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1480 /* Generate code for the arg, and negate the test condition */
1481 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1482 if (cc == ARM64cc_AL || cc == ARM64cc_NV) {
1483 return ARM64cc_AL;
1484 } else {
1485 return 1 ^ cc;
1486 }
1487 }
1488
1489 /* --- patterns rooted at: 64to1 --- */
1490
1491 if (e->tag == Iex_Unop
1492 && e->Iex.Unop.op == Iop_64to1) {
1493 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1494 ARM64RIL* one = mb_mkARM64RIL_I(1);
1495 vassert(one); /* '1' must be representable */
1496 addInstr(env, ARM64Instr_Test(rTmp, one));
1497 return ARM64cc_NE;
1498 }
1499
1500 /* --- patterns rooted at: CmpNEZ8 --- */
1501
1502 if (e->tag == Iex_Unop
1503 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1504 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1505 ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF);
1506 addInstr(env, ARM64Instr_Test(r1, xFF));
1507 return ARM64cc_NE;
1508 }
1509
1510 /* --- patterns rooted at: CmpNEZ64 --- */
1511
1512 if (e->tag == Iex_Unop
1513 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1514 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1515 ARM64RIA* zero = ARM64RIA_I12(0,0);
1516 addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/));
1517 return ARM64cc_NE;
1518 }
1519
1520 /* --- patterns rooted at: CmpNEZ32 --- */
1521
1522 if (e->tag == Iex_Unop
1523 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1524 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1525 ARM64RIA* zero = ARM64RIA_I12(0,0);
1526 addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/));
1527 return ARM64cc_NE;
1528 }
1529
1530 /* --- Cmp*64*(x,y) --- */
1531 if (e->tag == Iex_Binop
1532 && (e->Iex.Binop.op == Iop_CmpEQ64
1533 || e->Iex.Binop.op == Iop_CmpNE64
1534 || e->Iex.Binop.op == Iop_CmpLT64S
1535 || e->Iex.Binop.op == Iop_CmpLT64U
1536 || e->Iex.Binop.op == Iop_CmpLE64S
1537 || e->Iex.Binop.op == Iop_CmpLE64U)) {
1538 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1539 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1540 addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/));
1541 switch (e->Iex.Binop.op) {
1542 case Iop_CmpEQ64: return ARM64cc_EQ;
1543 case Iop_CmpNE64: return ARM64cc_NE;
1544 case Iop_CmpLT64S: return ARM64cc_LT;
1545 case Iop_CmpLT64U: return ARM64cc_CC;
1546 case Iop_CmpLE64S: return ARM64cc_LE;
1547 case Iop_CmpLE64U: return ARM64cc_LS;
1548 default: vpanic("iselCondCode(arm64): CmpXX64");
1549 }
1550 }
1551
1552 /* --- Cmp*32*(x,y) --- */
1553 if (e->tag == Iex_Binop
1554 && (e->Iex.Binop.op == Iop_CmpEQ32
1555 || e->Iex.Binop.op == Iop_CmpNE32
1556 || e->Iex.Binop.op == Iop_CmpLT32S
1557 || e->Iex.Binop.op == Iop_CmpLT32U
1558 || e->Iex.Binop.op == Iop_CmpLE32S
1559 || e->Iex.Binop.op == Iop_CmpLE32U)) {
1560 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1561 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1562 addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/));
1563 switch (e->Iex.Binop.op) {
1564 case Iop_CmpEQ32: return ARM64cc_EQ;
1565 case Iop_CmpNE32: return ARM64cc_NE;
1566 case Iop_CmpLT32S: return ARM64cc_LT;
1567 case Iop_CmpLT32U: return ARM64cc_CC;
1568 case Iop_CmpLE32S: return ARM64cc_LE;
1569 case Iop_CmpLE32U: return ARM64cc_LS;
1570 default: vpanic("iselCondCode(arm64): CmpXX32");
1571 }
1572 }
1573
1574//ZZ /* const */
1575//ZZ /* Constant 1:Bit */
1576//ZZ if (e->tag == Iex_Const) {
1577//ZZ HReg r;
1578//ZZ vassert(e->Iex.Const.con->tag == Ico_U1);
1579//ZZ vassert(e->Iex.Const.con->Ico.U1 == True
1580//ZZ || e->Iex.Const.con->Ico.U1 == False);
1581//ZZ r = newVRegI(env);
1582//ZZ addInstr(env, ARMInstr_Imm32(r, 0));
1583//ZZ addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r, ARMRI84_R(r)));
1584//ZZ return e->Iex.Const.con->Ico.U1 ? ARMcc_EQ : ARMcc_NE;
1585//ZZ }
1586//ZZ
1587//ZZ // JRS 2013-Jan-03: this seems completely nonsensical
1588//ZZ /* --- CasCmpEQ* --- */
1589//ZZ /* Ist_Cas has a dummy argument to compare with, so comparison is
1590//ZZ always true. */
1591//ZZ //if (e->tag == Iex_Binop
1592//ZZ // && (e->Iex.Binop.op == Iop_CasCmpEQ32
1593//ZZ // || e->Iex.Binop.op == Iop_CasCmpEQ16
1594//ZZ // || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1595//ZZ // return ARMcc_AL;
1596//ZZ //}
1597
1598 ppIRExpr(e);
1599 vpanic("iselCondCode");
1600}
1601
1602
1603/* --------------------- Reg --------------------- */
1604
1605static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1606{
1607 HReg r = iselIntExpr_R_wrk(env, e);
1608 /* sanity checks ... */
1609# if 0
1610 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1611# endif
1612 vassert(hregClass(r) == HRcInt64);
1613 vassert(hregIsVirtual(r));
1614 return r;
1615}
1616
1617/* DO NOT CALL THIS DIRECTLY ! */
1618static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1619{
1620 IRType ty = typeOfIRExpr(env->type_env,e);
1621 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1622
1623 switch (e->tag) {
1624
1625 /* --------- TEMP --------- */
1626 case Iex_RdTmp: {
1627 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1628 }
1629
1630 /* --------- LOAD --------- */
1631 case Iex_Load: {
1632 HReg dst = newVRegI(env);
1633
1634 if (e->Iex.Load.end != Iend_LE)
1635 goto irreducible;
1636
1637 if (ty == Ity_I64) {
1638 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1639 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode));
1640 return dst;
1641 }
1642 if (ty == Ity_I32) {
1643 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1644 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode));
1645 return dst;
1646 }
1647 if (ty == Ity_I16) {
1648 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1649 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode));
1650 return dst;
1651 }
1652 if (ty == Ity_I8) {
1653 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1654 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode));
1655 return dst;
1656 }
1657 break;
1658 }
1659
1660 /* --------- BINARY OP --------- */
1661 case Iex_Binop: {
1662
1663 ARM64LogicOp lop = 0; /* invalid */
1664 ARM64ShiftOp sop = 0; /* invalid */
1665
1666 /* Special-case 0-x into a Neg instruction. Not because it's
1667 particularly useful but more so as to give value flow using
1668 this instruction, so as to check its assembly correctness for
1669 implementation of Left32/Left64. */
1670 switch (e->Iex.Binop.op) {
1671 case Iop_Sub64:
1672 if (isZeroU64(e->Iex.Binop.arg1)) {
1673 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1674 HReg dst = newVRegI(env);
1675 addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG));
1676 return dst;
1677 }
1678 break;
1679 default:
1680 break;
1681 }
1682
1683 /* ADD/SUB */
1684 switch (e->Iex.Binop.op) {
1685 case Iop_Add64: case Iop_Add32:
1686 case Iop_Sub64: case Iop_Sub32: {
1687 Bool isAdd = e->Iex.Binop.op == Iop_Add64
1688 || e->Iex.Binop.op == Iop_Add32;
1689 HReg dst = newVRegI(env);
1690 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1691 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1692 addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd));
1693 return dst;
1694 }
1695 default:
1696 break;
1697 }
1698
1699 /* AND/OR/XOR */
1700 switch (e->Iex.Binop.op) {
1701 case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop;
1702 case Iop_Or64: case Iop_Or32: lop = ARM64lo_OR; goto log_binop;
1703 case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop;
1704 log_binop: {
1705 HReg dst = newVRegI(env);
1706 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1707 ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2);
1708 addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop));
1709 return dst;
1710 }
1711 default:
1712 break;
1713 }
1714
1715 /* SHL/SHR/SAR */
1716 switch (e->Iex.Binop.op) {
1717 case Iop_Shr64: sop = ARM64sh_SHR; goto sh_binop;
1718 case Iop_Sar64: sop = ARM64sh_SAR; goto sh_binop;
1719 case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop;
1720 sh_binop: {
1721 HReg dst = newVRegI(env);
1722 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1723 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1724 addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop));
1725 return dst;
1726 }
1727 case Iop_Shr32:
1728 case Iop_Sar32: {
1729 Bool zx = e->Iex.Binop.op == Iop_Shr32;
1730 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1731 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1732 HReg dst = zx ? widen_z_32_to_64(env, argL)
1733 : widen_s_32_to_64(env, argL);
1734 addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR));
1735 return dst;
1736 }
1737 default: break;
1738 }
1739
1740 /* MUL */
1741 if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) {
1742 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1743 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1744 HReg dst = newVRegI(env);
1745 addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN));
1746 return dst;
1747 }
1748
1749 /* MULL */
1750 if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) {
1751 Bool isS = e->Iex.Binop.op == Iop_MullS32;
1752 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1753 HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL);
1754 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1755 HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR);
1756 HReg dst = newVRegI(env);
1757 addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN));
1758 return dst;
1759 }
1760
1761 /* Handle misc other ops. */
1762
sewardj5d384252014-04-08 15:24:15 +00001763 if (e->Iex.Binop.op == Iop_Max32U) {
1764 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1765 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1766 HReg dst = newVRegI(env);
1767 addInstr(env, ARM64Instr_Cmp(argL, ARM64RIA_R(argR), False/*!is64*/));
1768 addInstr(env, ARM64Instr_CSel(dst, argL, argR, ARM64cc_CS));
1769 return dst;
1770 }
sewardjbbcf1882014-01-12 12:49:10 +00001771
1772 if (e->Iex.Binop.op == Iop_32HLto64) {
1773 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1774 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1775 HReg lo32 = widen_z_32_to_64(env, lo32s);
1776 HReg hi32 = newVRegI(env);
1777 addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32),
1778 ARM64sh_SHL));
1779 addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32),
1780 ARM64lo_OR));
1781 return hi32;
1782 }
1783
1784 if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32) {
1785 Bool isD = e->Iex.Binop.op == Iop_CmpF64;
1786 HReg dL = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg1);
1787 HReg dR = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg2);
1788 HReg dst = newVRegI(env);
1789 HReg imm = newVRegI(env);
1790 /* Do the compare (FCMP), which sets NZCV in PSTATE. Then
1791 create in dst, the IRCmpF64Result encoded result. */
1792 addInstr(env, (isD ? ARM64Instr_VCmpD : ARM64Instr_VCmpS)(dL, dR));
1793 addInstr(env, ARM64Instr_Imm64(dst, 0));
1794 addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ
1795 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ));
1796 addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT
1797 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI));
1798 addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT
1799 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT));
1800 addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN
1801 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS));
1802 return dst;
1803 }
1804
1805 { /* local scope */
1806 ARM64CvtOp cvt_op = ARM64cvt_INVALID;
1807 Bool srcIsD = False;
1808 switch (e->Iex.Binop.op) {
1809 case Iop_F64toI64S:
1810 cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break;
1811 case Iop_F64toI64U:
1812 cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break;
1813 case Iop_F64toI32S:
1814 cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break;
1815 case Iop_F64toI32U:
1816 cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break;
1817 case Iop_F32toI32S:
1818 cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break;
sewardj1eaaec22014-03-07 22:52:19 +00001819 case Iop_F32toI32U:
1820 cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break;
1821 case Iop_F32toI64S:
1822 cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break;
sewardjbbcf1882014-01-12 12:49:10 +00001823 case Iop_F32toI64U:
1824 cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break;
1825 default:
1826 break;
1827 }
1828 if (cvt_op != ARM64cvt_INVALID) {
1829 /* This is all a bit dodgy, because we can't handle a
1830 non-constant (not-known-at-JIT-time) rounding mode
1831 indication. That's because there's no instruction
1832 AFAICS that does this conversion but rounds according to
1833 FPCR.RM, so we have to bake the rounding mode into the
1834 instruction right now. But that should be OK because
1835 (1) the front end attaches a literal Irrm_ value to the
1836 conversion binop, and (2) iropt will never float that
1837 off via CSE, into a literal. Hence we should always
1838 have an Irrm_ value as the first arg. */
1839 IRExpr* arg1 = e->Iex.Binop.arg1;
1840 if (arg1->tag != Iex_Const) goto irreducible;
1841 IRConst* arg1con = arg1->Iex.Const.con;
1842 vassert(arg1con->tag == Ico_U32); // else ill-typed IR
1843 UInt irrm = arg1con->Ico.U32;
1844 /* Find the ARM-encoded equivalent for |irrm|. */
1845 UInt armrm = 4; /* impossible */
1846 switch (irrm) {
1847 case Irrm_NEAREST: armrm = 0; break;
1848 case Irrm_NegINF: armrm = 2; break;
1849 case Irrm_PosINF: armrm = 1; break;
1850 case Irrm_ZERO: armrm = 3; break;
1851 default: goto irreducible;
1852 }
1853 HReg src = (srcIsD ? iselDblExpr : iselFltExpr)
1854 (env, e->Iex.Binop.arg2);
1855 HReg dst = newVRegI(env);
1856 addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm));
1857 return dst;
1858 }
1859 } /* local scope */
1860
1861//ZZ if (e->Iex.Binop.op == Iop_GetElem8x8
1862//ZZ || e->Iex.Binop.op == Iop_GetElem16x4
1863//ZZ || e->Iex.Binop.op == Iop_GetElem32x2) {
1864//ZZ HReg res = newVRegI(env);
1865//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
1866//ZZ UInt index, size;
1867//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
1868//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1869//ZZ vpanic("ARM target supports GetElem with constant "
1870//ZZ "second argument only\n");
1871//ZZ }
1872//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1873//ZZ switch (e->Iex.Binop.op) {
1874//ZZ case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1875//ZZ case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1876//ZZ case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1877//ZZ default: vassert(0);
1878//ZZ }
1879//ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1880//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
1881//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
1882//ZZ size, False));
1883//ZZ return res;
1884//ZZ }
1885//ZZ
1886//ZZ if (e->Iex.Binop.op == Iop_GetElem8x16
1887//ZZ || e->Iex.Binop.op == Iop_GetElem16x8
1888//ZZ || e->Iex.Binop.op == Iop_GetElem32x4) {
1889//ZZ HReg res = newVRegI(env);
1890//ZZ HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
1891//ZZ UInt index, size;
1892//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
1893//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1894//ZZ vpanic("ARM target supports GetElem with constant "
1895//ZZ "second argument only\n");
1896//ZZ }
1897//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1898//ZZ switch (e->Iex.Binop.op) {
1899//ZZ case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1900//ZZ case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1901//ZZ case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1902//ZZ default: vassert(0);
1903//ZZ }
1904//ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1905//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
1906//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
1907//ZZ size, True));
1908//ZZ return res;
1909//ZZ }
1910
1911 /* All cases involving host-side helper calls. */
1912 void* fn = NULL;
1913 switch (e->Iex.Binop.op) {
1914//ZZ case Iop_Add16x2:
1915//ZZ fn = &h_generic_calc_Add16x2; break;
1916//ZZ case Iop_Sub16x2:
1917//ZZ fn = &h_generic_calc_Sub16x2; break;
1918//ZZ case Iop_HAdd16Ux2:
1919//ZZ fn = &h_generic_calc_HAdd16Ux2; break;
1920//ZZ case Iop_HAdd16Sx2:
1921//ZZ fn = &h_generic_calc_HAdd16Sx2; break;
1922//ZZ case Iop_HSub16Ux2:
1923//ZZ fn = &h_generic_calc_HSub16Ux2; break;
1924//ZZ case Iop_HSub16Sx2:
1925//ZZ fn = &h_generic_calc_HSub16Sx2; break;
1926//ZZ case Iop_QAdd16Sx2:
1927//ZZ fn = &h_generic_calc_QAdd16Sx2; break;
1928//ZZ case Iop_QAdd16Ux2:
1929//ZZ fn = &h_generic_calc_QAdd16Ux2; break;
1930//ZZ case Iop_QSub16Sx2:
1931//ZZ fn = &h_generic_calc_QSub16Sx2; break;
1932//ZZ case Iop_Add8x4:
1933//ZZ fn = &h_generic_calc_Add8x4; break;
1934//ZZ case Iop_Sub8x4:
1935//ZZ fn = &h_generic_calc_Sub8x4; break;
1936//ZZ case Iop_HAdd8Ux4:
1937//ZZ fn = &h_generic_calc_HAdd8Ux4; break;
1938//ZZ case Iop_HAdd8Sx4:
1939//ZZ fn = &h_generic_calc_HAdd8Sx4; break;
1940//ZZ case Iop_HSub8Ux4:
1941//ZZ fn = &h_generic_calc_HSub8Ux4; break;
1942//ZZ case Iop_HSub8Sx4:
1943//ZZ fn = &h_generic_calc_HSub8Sx4; break;
1944//ZZ case Iop_QAdd8Sx4:
1945//ZZ fn = &h_generic_calc_QAdd8Sx4; break;
1946//ZZ case Iop_QAdd8Ux4:
1947//ZZ fn = &h_generic_calc_QAdd8Ux4; break;
1948//ZZ case Iop_QSub8Sx4:
1949//ZZ fn = &h_generic_calc_QSub8Sx4; break;
1950//ZZ case Iop_QSub8Ux4:
1951//ZZ fn = &h_generic_calc_QSub8Ux4; break;
1952//ZZ case Iop_Sad8Ux4:
1953//ZZ fn = &h_generic_calc_Sad8Ux4; break;
1954//ZZ case Iop_QAdd32S:
1955//ZZ fn = &h_generic_calc_QAdd32S; break;
1956//ZZ case Iop_QSub32S:
1957//ZZ fn = &h_generic_calc_QSub32S; break;
1958//ZZ case Iop_QSub16Ux2:
1959//ZZ fn = &h_generic_calc_QSub16Ux2; break;
1960 case Iop_DivU32:
1961 fn = &h_calc_udiv32_w_arm_semantics; break;
1962 case Iop_DivS32:
1963 fn = &h_calc_sdiv32_w_arm_semantics; break;
1964 case Iop_DivU64:
1965 fn = &h_calc_udiv64_w_arm_semantics; break;
1966 case Iop_DivS64:
1967 fn = &h_calc_sdiv64_w_arm_semantics; break;
1968 default:
1969 break;
1970 }
1971
1972 if (fn) {
1973 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1974 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1975 HReg res = newVRegI(env);
1976 addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL));
1977 addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR));
1978 addInstr(env, ARM64Instr_Call( ARM64cc_AL, (HWord)Ptr_to_ULong(fn),
1979 2, mk_RetLoc_simple(RLPri_Int) ));
1980 addInstr(env, ARM64Instr_MovI(res, hregARM64_X0()));
1981 return res;
1982 }
1983
1984 break;
1985 }
1986
1987 /* --------- UNARY OP --------- */
1988 case Iex_Unop: {
1989
1990 switch (e->Iex.Unop.op) {
1991 case Iop_16Uto64: {
1992 /* This probably doesn't occur often enough to be worth
1993 rolling the extension into the load. */
1994 IRExpr* arg = e->Iex.Unop.arg;
1995 HReg src = iselIntExpr_R(env, arg);
1996 HReg dst = widen_z_16_to_64(env, src);
1997 return dst;
1998 }
1999 case Iop_32Uto64: {
2000 IRExpr* arg = e->Iex.Unop.arg;
2001 if (arg->tag == Iex_Load) {
2002 /* This correctly zero extends because _LdSt32 is
2003 defined to do a zero extending load. */
2004 HReg dst = newVRegI(env);
2005 ARM64AMode* am
2006 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32);
2007 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
2008 return dst;
2009 }
2010 /* else be lame and mask it */
2011 HReg src = iselIntExpr_R(env, arg);
2012 HReg dst = widen_z_32_to_64(env, src);
2013 return dst;
2014 }
2015 case Iop_8Uto32: /* Just freeload on the 8Uto64 case */
2016 case Iop_8Uto64: {
2017 IRExpr* arg = e->Iex.Unop.arg;
2018 if (arg->tag == Iex_Load) {
2019 /* This correctly zero extends because _LdSt8 is
2020 defined to do a zero extending load. */
2021 HReg dst = newVRegI(env);
2022 ARM64AMode* am
2023 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8);
2024 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
2025 return dst;
2026 }
2027 /* else be lame and mask it */
2028 HReg src = iselIntExpr_R(env, arg);
2029 HReg dst = widen_z_8_to_64(env, src);
2030 return dst;
2031 }
2032 case Iop_128HIto64: {
2033 HReg rHi, rLo;
2034 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2035 return rHi; /* and abandon rLo */
2036 }
2037 case Iop_8Sto32: case Iop_8Sto64: {
2038 IRExpr* arg = e->Iex.Unop.arg;
2039 HReg src = iselIntExpr_R(env, arg);
2040 HReg dst = widen_s_8_to_64(env, src);
2041 return dst;
2042 }
2043 case Iop_16Sto32: case Iop_16Sto64: {
2044 IRExpr* arg = e->Iex.Unop.arg;
2045 HReg src = iselIntExpr_R(env, arg);
2046 HReg dst = widen_s_16_to_64(env, src);
2047 return dst;
2048 }
2049 case Iop_32Sto64: {
2050 IRExpr* arg = e->Iex.Unop.arg;
2051 HReg src = iselIntExpr_R(env, arg);
2052 HReg dst = widen_s_32_to_64(env, src);
2053 return dst;
2054 }
2055 case Iop_Not32:
2056 case Iop_Not64: {
2057 HReg dst = newVRegI(env);
2058 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2059 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT));
2060 return dst;
2061 }
2062 case Iop_Clz64: {
2063 HReg dst = newVRegI(env);
2064 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2065 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ));
2066 return dst;
2067 }
2068 case Iop_Left32:
2069 case Iop_Left64: {
2070 /* Left64(src) = src | -src. Left32 can use the same
2071 implementation since in that case we don't care what
2072 the upper 32 bits become. */
2073 HReg dst = newVRegI(env);
2074 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2075 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2076 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2077 ARM64lo_OR));
2078 return dst;
2079 }
2080 case Iop_CmpwNEZ64: {
2081 /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1
2082 = Left64(src) >>s 63 */
2083 HReg dst = newVRegI(env);
2084 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2085 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2086 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2087 ARM64lo_OR));
2088 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2089 ARM64sh_SAR));
2090 return dst;
2091 }
2092 case Iop_CmpwNEZ32: {
2093 /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF)
2094 = Left64(src & 0xFFFFFFFF) >>s 63 */
2095 HReg dst = newVRegI(env);
2096 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
2097 HReg src = widen_z_32_to_64(env, pre);
2098 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2099 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2100 ARM64lo_OR));
2101 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2102 ARM64sh_SAR));
2103 return dst;
2104 }
2105 case Iop_V128to64: case Iop_V128HIto64: {
2106 HReg dst = newVRegI(env);
2107 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2108 UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0;
2109 addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo));
2110 return dst;
2111 }
sewardj85fbb022014-06-12 13:16:01 +00002112 case Iop_ReinterpF64asI64: {
2113 HReg dst = newVRegI(env);
2114 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2115 addInstr(env, ARM64Instr_VXfromDorS(dst, src, True/*fromD*/));
2116 return dst;
2117 }
2118 case Iop_ReinterpF32asI32: {
2119 HReg dst = newVRegI(env);
2120 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
2121 addInstr(env, ARM64Instr_VXfromDorS(dst, src, False/*!fromD*/));
2122 return dst;
2123 }
sewardjbbcf1882014-01-12 12:49:10 +00002124 case Iop_1Sto32:
2125 case Iop_1Sto64: {
2126 /* As with the iselStmt case for 'tmp:I1 = expr', we could
2127 do a lot better here if it ever became necessary. */
2128 HReg zero = newVRegI(env);
2129 HReg one = newVRegI(env);
2130 HReg dst = newVRegI(env);
2131 addInstr(env, ARM64Instr_Imm64(zero, 0));
2132 addInstr(env, ARM64Instr_Imm64(one, 1));
2133 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
2134 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
2135 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2136 ARM64sh_SHL));
2137 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2138 ARM64sh_SAR));
2139 return dst;
2140 }
sewardj1eaaec22014-03-07 22:52:19 +00002141 case Iop_NarrowUn16to8x8:
sewardj606c4ba2014-01-26 19:11:14 +00002142 case Iop_NarrowUn32to16x4:
2143 case Iop_NarrowUn64to32x2: {
2144 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2145 HReg tmp = newVRegV(env);
2146 HReg dst = newVRegI(env);
2147 UInt dszBlg2 = 3; /* illegal */
2148 switch (e->Iex.Unop.op) {
sewardj1eaaec22014-03-07 22:52:19 +00002149 case Iop_NarrowUn16to8x8: dszBlg2 = 0; break; // 16to8_x8
sewardj606c4ba2014-01-26 19:11:14 +00002150 case Iop_NarrowUn32to16x4: dszBlg2 = 1; break; // 32to16_x4
2151 case Iop_NarrowUn64to32x2: dszBlg2 = 2; break; // 64to32_x2
2152 default: vassert(0);
2153 }
2154 addInstr(env, ARM64Instr_VNarrowV(dszBlg2, tmp, src));
2155 addInstr(env, ARM64Instr_VXfromQ(dst, tmp, 0/*laneNo*/));
2156 return dst;
2157 }
sewardjbbcf1882014-01-12 12:49:10 +00002158//ZZ case Iop_64HIto32: {
2159//ZZ HReg rHi, rLo;
2160//ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2161//ZZ return rHi; /* and abandon rLo .. poor wee thing :-) */
2162//ZZ }
2163//ZZ case Iop_64to32: {
2164//ZZ HReg rHi, rLo;
2165//ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2166//ZZ return rLo; /* similar stupid comment to the above ... */
2167//ZZ }
2168//ZZ case Iop_64to8: {
2169//ZZ HReg rHi, rLo;
2170//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2171//ZZ HReg tHi = newVRegI(env);
2172//ZZ HReg tLo = newVRegI(env);
2173//ZZ HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
2174//ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2175//ZZ rHi = tHi;
2176//ZZ rLo = tLo;
2177//ZZ } else {
2178//ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2179//ZZ }
2180//ZZ return rLo;
2181//ZZ }
sewardj7d009132014-02-20 17:43:38 +00002182
sewardjf21a6ca2014-03-08 13:08:17 +00002183 case Iop_1Uto64: {
sewardj7d009132014-02-20 17:43:38 +00002184 /* 1Uto64(tmp). */
sewardjf21a6ca2014-03-08 13:08:17 +00002185 HReg dst = newVRegI(env);
sewardj7d009132014-02-20 17:43:38 +00002186 if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
2187 ARM64RIL* one = mb_mkARM64RIL_I(1);
2188 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
sewardj7d009132014-02-20 17:43:38 +00002189 vassert(one);
2190 addInstr(env, ARM64Instr_Logic(dst, src, one, ARM64lo_AND));
sewardjf21a6ca2014-03-08 13:08:17 +00002191 } else {
2192 /* CLONE-01 */
2193 HReg zero = newVRegI(env);
2194 HReg one = newVRegI(env);
2195 addInstr(env, ARM64Instr_Imm64(zero, 0));
2196 addInstr(env, ARM64Instr_Imm64(one, 1));
2197 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
2198 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
sewardj7d009132014-02-20 17:43:38 +00002199 }
sewardjf21a6ca2014-03-08 13:08:17 +00002200 return dst;
2201 }
sewardjbbcf1882014-01-12 12:49:10 +00002202//ZZ case Iop_1Uto8: {
2203//ZZ HReg dst = newVRegI(env);
2204//ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2205//ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2206//ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2207//ZZ return dst;
2208//ZZ }
2209//ZZ
2210//ZZ case Iop_1Sto32: {
2211//ZZ HReg dst = newVRegI(env);
2212//ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2213//ZZ ARMRI5* amt = ARMRI5_I5(31);
2214//ZZ /* This is really rough. We could do much better here;
2215//ZZ perhaps mvn{cond} dst, #0 as the second insn?
2216//ZZ (same applies to 1Sto64) */
2217//ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2218//ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2219//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2220//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2221//ZZ return dst;
2222//ZZ }
2223//ZZ
2224//ZZ case Iop_Clz32: {
2225//ZZ /* Count leading zeroes; easy on ARM. */
2226//ZZ HReg dst = newVRegI(env);
2227//ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2228//ZZ addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
2229//ZZ return dst;
2230//ZZ }
2231//ZZ
2232//ZZ case Iop_CmpwNEZ32: {
2233//ZZ HReg dst = newVRegI(env);
2234//ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2235//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
2236//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
2237//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
2238//ZZ return dst;
2239//ZZ }
2240//ZZ
2241//ZZ case Iop_ReinterpF32asI32: {
2242//ZZ HReg dst = newVRegI(env);
2243//ZZ HReg src = iselFltExpr(env, e->Iex.Unop.arg);
2244//ZZ addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
2245//ZZ return dst;
2246//ZZ }
2247
2248 case Iop_64to32:
2249 case Iop_64to16:
2250 case Iop_64to8:
2251 /* These are no-ops. */
2252 return iselIntExpr_R(env, e->Iex.Unop.arg);
2253
2254 default:
2255 break;
2256 }
2257
2258//ZZ /* All Unop cases involving host-side helper calls. */
2259//ZZ void* fn = NULL;
2260//ZZ switch (e->Iex.Unop.op) {
2261//ZZ case Iop_CmpNEZ16x2:
2262//ZZ fn = &h_generic_calc_CmpNEZ16x2; break;
2263//ZZ case Iop_CmpNEZ8x4:
2264//ZZ fn = &h_generic_calc_CmpNEZ8x4; break;
2265//ZZ default:
2266//ZZ break;
2267//ZZ }
2268//ZZ
2269//ZZ if (fn) {
2270//ZZ HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2271//ZZ HReg res = newVRegI(env);
2272//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
2273//ZZ addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn),
2274//ZZ 1, RetLocInt ));
2275//ZZ addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
2276//ZZ return res;
2277//ZZ }
2278
2279 break;
2280 }
2281
2282 /* --------- GET --------- */
2283 case Iex_Get: {
2284 if (ty == Ity_I64
sewardj32d86752014-03-02 12:47:18 +00002285 && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < (8<<12)-8) {
sewardjbbcf1882014-01-12 12:49:10 +00002286 HReg dst = newVRegI(env);
2287 ARM64AMode* am
2288 = mk_baseblock_64bit_access_amode(e->Iex.Get.offset);
2289 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am));
2290 return dst;
2291 }
2292 if (ty == Ity_I32
sewardj32d86752014-03-02 12:47:18 +00002293 && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < (4<<12)-4) {
sewardjbbcf1882014-01-12 12:49:10 +00002294 HReg dst = newVRegI(env);
2295 ARM64AMode* am
2296 = mk_baseblock_32bit_access_amode(e->Iex.Get.offset);
2297 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
2298 return dst;
2299 }
sewardj32d86752014-03-02 12:47:18 +00002300 if (ty == Ity_I16
2301 && 0 == (e->Iex.Get.offset & 1) && e->Iex.Get.offset < (2<<12)-2) {
2302 HReg dst = newVRegI(env);
2303 ARM64AMode* am
2304 = mk_baseblock_16bit_access_amode(e->Iex.Get.offset);
2305 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, am));
2306 return dst;
2307 }
2308 if (ty == Ity_I8
2309 /* && no alignment check */ && e->Iex.Get.offset < (1<<12)-1) {
2310 HReg dst = newVRegI(env);
2311 ARM64AMode* am
2312 = mk_baseblock_8bit_access_amode(e->Iex.Get.offset);
2313 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
2314 return dst;
2315 }
sewardjbbcf1882014-01-12 12:49:10 +00002316 break;
2317 }
2318
2319 /* --------- CCALL --------- */
2320 case Iex_CCall: {
2321 HReg dst = newVRegI(env);
2322 vassert(ty == e->Iex.CCall.retty);
2323
2324 /* be very restrictive for now. Only 64-bit ints allowed for
2325 args, and 64 bits for return type. Don't forget to change
2326 the RetLoc if more types are allowed in future. */
2327 if (e->Iex.CCall.retty != Ity_I64)
2328 goto irreducible;
2329
2330 /* Marshal args, do the call, clear stack. */
2331 UInt addToSp = 0;
2332 RetLoc rloc = mk_RetLoc_INVALID();
2333 Bool ok = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2334 e->Iex.CCall.cee, e->Iex.CCall.retty,
2335 e->Iex.CCall.args );
2336 /* */
2337 if (ok) {
2338 vassert(is_sane_RetLoc(rloc));
2339 vassert(rloc.pri == RLPri_Int);
2340 vassert(addToSp == 0);
2341 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()));
2342 return dst;
2343 }
2344 /* else fall through; will hit the irreducible: label */
2345 }
2346
2347 /* --------- LITERAL --------- */
2348 /* 64-bit literals */
2349 case Iex_Const: {
2350 ULong u = 0;
2351 HReg dst = newVRegI(env);
2352 switch (e->Iex.Const.con->tag) {
2353 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
2354 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
2355 case Ico_U16: u = e->Iex.Const.con->Ico.U16; break;
2356 case Ico_U8: u = e->Iex.Const.con->Ico.U8; break;
2357 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)");
2358 }
2359 addInstr(env, ARM64Instr_Imm64(dst, u));
2360 return dst;
2361 }
2362
2363 /* --------- MULTIPLEX --------- */
2364 case Iex_ITE: {
2365 /* ITE(ccexpr, iftrue, iffalse) */
2366 if (ty == Ity_I64 || ty == Ity_I32) {
2367 ARM64CondCode cc;
2368 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
2369 HReg r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse);
2370 HReg dst = newVRegI(env);
2371 cc = iselCondCode(env, e->Iex.ITE.cond);
2372 addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc));
2373 return dst;
2374 }
2375 break;
2376 }
2377
2378 default:
2379 break;
2380 } /* switch (e->tag) */
2381
2382 /* We get here if no pattern matched. */
2383 irreducible:
2384 ppIRExpr(e);
2385 vpanic("iselIntExpr_R: cannot reduce tree");
2386}
2387
2388
2389/*---------------------------------------------------------*/
2390/*--- ISEL: Integer expressions (128 bit) ---*/
2391/*---------------------------------------------------------*/
2392
2393/* Compute a 128-bit value into a register pair, which is returned as
2394 the first two parameters. As with iselIntExpr_R, these may be
2395 either real or virtual regs; in any case they must not be changed
2396 by subsequent code emitted by the caller. */
2397
2398static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2399 ISelEnv* env, IRExpr* e )
2400{
2401 iselInt128Expr_wrk(rHi, rLo, env, e);
2402# if 0
2403 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2404# endif
2405 vassert(hregClass(*rHi) == HRcInt64);
2406 vassert(hregIsVirtual(*rHi));
2407 vassert(hregClass(*rLo) == HRcInt64);
2408 vassert(hregIsVirtual(*rLo));
2409}
2410
2411/* DO NOT CALL THIS DIRECTLY ! */
2412static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2413 ISelEnv* env, IRExpr* e )
2414{
2415 vassert(e);
2416 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2417
2418 /* --------- BINARY ops --------- */
2419 if (e->tag == Iex_Binop) {
2420 switch (e->Iex.Binop.op) {
2421 /* 64 x 64 -> 128 multiply */
2422 case Iop_MullU64:
sewardj7fce7cc2014-05-07 09:41:40 +00002423 case Iop_MullS64: {
sewardjbbcf1882014-01-12 12:49:10 +00002424 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
2425 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2426 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2427 HReg dstLo = newVRegI(env);
2428 HReg dstHi = newVRegI(env);
2429 addInstr(env, ARM64Instr_Mul(dstLo, argL, argR,
2430 ARM64mul_PLAIN));
2431 addInstr(env, ARM64Instr_Mul(dstHi, argL, argR,
2432 syned ? ARM64mul_SX : ARM64mul_ZX));
2433 *rHi = dstHi;
2434 *rLo = dstLo;
2435 return;
2436 }
2437 /* 64HLto128(e1,e2) */
2438 case Iop_64HLto128:
2439 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2440 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2441 return;
2442 default:
2443 break;
2444 }
2445 } /* if (e->tag == Iex_Binop) */
2446
2447 ppIRExpr(e);
2448 vpanic("iselInt128Expr(arm64)");
2449}
2450
2451
2452//ZZ /* -------------------- 64-bit -------------------- */
2453//ZZ
2454//ZZ /* Compute a 64-bit value into a register pair, which is returned as
2455//ZZ the first two parameters. As with iselIntExpr_R, these may be
2456//ZZ either real or virtual regs; in any case they must not be changed
2457//ZZ by subsequent code emitted by the caller. */
2458//ZZ
2459//ZZ static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
2460//ZZ {
2461//ZZ iselInt64Expr_wrk(rHi, rLo, env, e);
2462//ZZ # if 0
2463//ZZ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2464//ZZ # endif
2465//ZZ vassert(hregClass(*rHi) == HRcInt32);
2466//ZZ vassert(hregIsVirtual(*rHi));
2467//ZZ vassert(hregClass(*rLo) == HRcInt32);
2468//ZZ vassert(hregIsVirtual(*rLo));
2469//ZZ }
2470//ZZ
2471//ZZ /* DO NOT CALL THIS DIRECTLY ! */
2472//ZZ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
2473//ZZ {
2474//ZZ vassert(e);
2475//ZZ vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
2476//ZZ
2477//ZZ /* 64-bit literal */
2478//ZZ if (e->tag == Iex_Const) {
2479//ZZ ULong w64 = e->Iex.Const.con->Ico.U64;
2480//ZZ UInt wHi = toUInt(w64 >> 32);
2481//ZZ UInt wLo = toUInt(w64);
2482//ZZ HReg tHi = newVRegI(env);
2483//ZZ HReg tLo = newVRegI(env);
2484//ZZ vassert(e->Iex.Const.con->tag == Ico_U64);
2485//ZZ addInstr(env, ARMInstr_Imm32(tHi, wHi));
2486//ZZ addInstr(env, ARMInstr_Imm32(tLo, wLo));
2487//ZZ *rHi = tHi;
2488//ZZ *rLo = tLo;
2489//ZZ return;
2490//ZZ }
2491//ZZ
2492//ZZ /* read 64-bit IRTemp */
2493//ZZ if (e->tag == Iex_RdTmp) {
2494//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2495//ZZ HReg tHi = newVRegI(env);
2496//ZZ HReg tLo = newVRegI(env);
2497//ZZ HReg tmp = iselNeon64Expr(env, e);
2498//ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2499//ZZ *rHi = tHi;
2500//ZZ *rLo = tLo;
2501//ZZ } else {
2502//ZZ lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
2503//ZZ }
2504//ZZ return;
2505//ZZ }
2506//ZZ
2507//ZZ /* 64-bit load */
2508//ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2509//ZZ HReg tLo, tHi, rA;
2510//ZZ vassert(e->Iex.Load.ty == Ity_I64);
2511//ZZ rA = iselIntExpr_R(env, e->Iex.Load.addr);
2512//ZZ tHi = newVRegI(env);
2513//ZZ tLo = newVRegI(env);
2514//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
2515//ZZ tHi, ARMAMode1_RI(rA, 4)));
2516//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
2517//ZZ tLo, ARMAMode1_RI(rA, 0)));
2518//ZZ *rHi = tHi;
2519//ZZ *rLo = tLo;
2520//ZZ return;
2521//ZZ }
2522//ZZ
2523//ZZ /* 64-bit GET */
2524//ZZ if (e->tag == Iex_Get) {
2525//ZZ ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
2526//ZZ ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
2527//ZZ HReg tHi = newVRegI(env);
2528//ZZ HReg tLo = newVRegI(env);
2529//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tHi, am4));
2530//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tLo, am0));
2531//ZZ *rHi = tHi;
2532//ZZ *rLo = tLo;
2533//ZZ return;
2534//ZZ }
2535//ZZ
2536//ZZ /* --------- BINARY ops --------- */
2537//ZZ if (e->tag == Iex_Binop) {
2538//ZZ switch (e->Iex.Binop.op) {
2539//ZZ
2540//ZZ /* 32 x 32 -> 64 multiply */
2541//ZZ case Iop_MullS32:
2542//ZZ case Iop_MullU32: {
2543//ZZ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2544//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2545//ZZ HReg tHi = newVRegI(env);
2546//ZZ HReg tLo = newVRegI(env);
2547//ZZ ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32
2548//ZZ ? ARMmul_SX : ARMmul_ZX;
2549//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
2550//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
2551//ZZ addInstr(env, ARMInstr_Mul(mop));
2552//ZZ addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
2553//ZZ addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
2554//ZZ *rHi = tHi;
2555//ZZ *rLo = tLo;
2556//ZZ return;
2557//ZZ }
2558//ZZ
2559//ZZ case Iop_Or64: {
2560//ZZ HReg xLo, xHi, yLo, yHi;
2561//ZZ HReg tHi = newVRegI(env);
2562//ZZ HReg tLo = newVRegI(env);
2563//ZZ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2564//ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2565//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
2566//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
2567//ZZ *rHi = tHi;
2568//ZZ *rLo = tLo;
2569//ZZ return;
2570//ZZ }
2571//ZZ
2572//ZZ case Iop_Add64: {
2573//ZZ HReg xLo, xHi, yLo, yHi;
2574//ZZ HReg tHi = newVRegI(env);
2575//ZZ HReg tLo = newVRegI(env);
2576//ZZ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2577//ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2578//ZZ addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
2579//ZZ addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi)));
2580//ZZ *rHi = tHi;
2581//ZZ *rLo = tLo;
2582//ZZ return;
2583//ZZ }
2584//ZZ
2585//ZZ /* 32HLto64(e1,e2) */
2586//ZZ case Iop_32HLto64: {
2587//ZZ *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2588//ZZ *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2589//ZZ return;
2590//ZZ }
2591//ZZ
2592//ZZ default:
2593//ZZ break;
2594//ZZ }
2595//ZZ }
2596//ZZ
2597//ZZ /* --------- UNARY ops --------- */
2598//ZZ if (e->tag == Iex_Unop) {
2599//ZZ switch (e->Iex.Unop.op) {
2600//ZZ
2601//ZZ /* ReinterpF64asI64 */
2602//ZZ case Iop_ReinterpF64asI64: {
2603//ZZ HReg dstHi = newVRegI(env);
2604//ZZ HReg dstLo = newVRegI(env);
2605//ZZ HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2606//ZZ addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
2607//ZZ *rHi = dstHi;
2608//ZZ *rLo = dstLo;
2609//ZZ return;
2610//ZZ }
2611//ZZ
2612//ZZ /* Left64(e) */
2613//ZZ case Iop_Left64: {
2614//ZZ HReg yLo, yHi;
2615//ZZ HReg tHi = newVRegI(env);
2616//ZZ HReg tLo = newVRegI(env);
2617//ZZ HReg zero = newVRegI(env);
2618//ZZ /* yHi:yLo = arg */
2619//ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2620//ZZ /* zero = 0 */
2621//ZZ addInstr(env, ARMInstr_Imm32(zero, 0));
2622//ZZ /* tLo = 0 - yLo, and set carry */
2623//ZZ addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
2624//ZZ tLo, zero, ARMRI84_R(yLo)));
2625//ZZ /* tHi = 0 - yHi - carry */
2626//ZZ addInstr(env, ARMInstr_Alu(ARMalu_SBC,
2627//ZZ tHi, zero, ARMRI84_R(yHi)));
2628//ZZ /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
2629//ZZ back in, so as to give the final result
2630//ZZ tHi:tLo = arg | -arg. */
2631//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
2632//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
2633//ZZ *rHi = tHi;
2634//ZZ *rLo = tLo;
2635//ZZ return;
2636//ZZ }
2637//ZZ
2638//ZZ /* CmpwNEZ64(e) */
2639//ZZ case Iop_CmpwNEZ64: {
2640//ZZ HReg srcLo, srcHi;
2641//ZZ HReg tmp1 = newVRegI(env);
2642//ZZ HReg tmp2 = newVRegI(env);
2643//ZZ /* srcHi:srcLo = arg */
2644//ZZ iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2645//ZZ /* tmp1 = srcHi | srcLo */
2646//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR,
2647//ZZ tmp1, srcHi, ARMRI84_R(srcLo)));
2648//ZZ /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2649//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
2650//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR,
2651//ZZ tmp2, tmp2, ARMRI84_R(tmp1)));
2652//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR,
2653//ZZ tmp2, tmp2, ARMRI5_I5(31)));
2654//ZZ *rHi = tmp2;
2655//ZZ *rLo = tmp2;
2656//ZZ return;
2657//ZZ }
2658//ZZ
2659//ZZ case Iop_1Sto64: {
2660//ZZ HReg dst = newVRegI(env);
2661//ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2662//ZZ ARMRI5* amt = ARMRI5_I5(31);
2663//ZZ /* This is really rough. We could do much better here;
2664//ZZ perhaps mvn{cond} dst, #0 as the second insn?
2665//ZZ (same applies to 1Sto32) */
2666//ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2667//ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2668//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2669//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2670//ZZ *rHi = dst;
2671//ZZ *rLo = dst;
2672//ZZ return;
2673//ZZ }
2674//ZZ
2675//ZZ default:
2676//ZZ break;
2677//ZZ }
2678//ZZ } /* if (e->tag == Iex_Unop) */
2679//ZZ
2680//ZZ /* --------- MULTIPLEX --------- */
2681//ZZ if (e->tag == Iex_ITE) { // VFD
2682//ZZ IRType tyC;
2683//ZZ HReg r1hi, r1lo, r0hi, r0lo, dstHi, dstLo;
2684//ZZ ARMCondCode cc;
2685//ZZ tyC = typeOfIRExpr(env->type_env,e->Iex.ITE.cond);
2686//ZZ vassert(tyC == Ity_I1);
2687//ZZ iselInt64Expr(&r1hi, &r1lo, env, e->Iex.ITE.iftrue);
2688//ZZ iselInt64Expr(&r0hi, &r0lo, env, e->Iex.ITE.iffalse);
2689//ZZ dstHi = newVRegI(env);
2690//ZZ dstLo = newVRegI(env);
2691//ZZ addInstr(env, mk_iMOVds_RR(dstHi, r1hi));
2692//ZZ addInstr(env, mk_iMOVds_RR(dstLo, r1lo));
2693//ZZ cc = iselCondCode(env, e->Iex.ITE.cond);
2694//ZZ addInstr(env, ARMInstr_CMov(cc ^ 1, dstHi, ARMRI84_R(r0hi)));
2695//ZZ addInstr(env, ARMInstr_CMov(cc ^ 1, dstLo, ARMRI84_R(r0lo)));
2696//ZZ *rHi = dstHi;
2697//ZZ *rLo = dstLo;
2698//ZZ return;
2699//ZZ }
2700//ZZ
2701//ZZ /* It is convenient sometimes to call iselInt64Expr even when we
2702//ZZ have NEON support (e.g. in do_helper_call we need 64-bit
2703//ZZ arguments as 2 x 32 regs). */
2704//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2705//ZZ HReg tHi = newVRegI(env);
2706//ZZ HReg tLo = newVRegI(env);
2707//ZZ HReg tmp = iselNeon64Expr(env, e);
2708//ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2709//ZZ *rHi = tHi;
2710//ZZ *rLo = tLo;
2711//ZZ return ;
2712//ZZ }
2713//ZZ
2714//ZZ ppIRExpr(e);
2715//ZZ vpanic("iselInt64Expr");
2716//ZZ }
2717//ZZ
2718//ZZ
2719//ZZ /*---------------------------------------------------------*/
2720//ZZ /*--- ISEL: Vector (NEON) expressions (64 bit) ---*/
2721//ZZ /*---------------------------------------------------------*/
2722//ZZ
2723//ZZ static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
2724//ZZ {
2725//ZZ HReg r = iselNeon64Expr_wrk( env, e );
2726//ZZ vassert(hregClass(r) == HRcFlt64);
2727//ZZ vassert(hregIsVirtual(r));
2728//ZZ return r;
2729//ZZ }
2730//ZZ
2731//ZZ /* DO NOT CALL THIS DIRECTLY */
2732//ZZ static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
2733//ZZ {
2734//ZZ IRType ty = typeOfIRExpr(env->type_env, e);
2735//ZZ MatchInfo mi;
2736//ZZ vassert(e);
2737//ZZ vassert(ty == Ity_I64);
2738//ZZ
2739//ZZ if (e->tag == Iex_RdTmp) {
2740//ZZ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2741//ZZ }
2742//ZZ
2743//ZZ if (e->tag == Iex_Const) {
2744//ZZ HReg rLo, rHi;
2745//ZZ HReg res = newVRegD(env);
2746//ZZ iselInt64Expr(&rHi, &rLo, env, e);
2747//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2748//ZZ return res;
2749//ZZ }
2750//ZZ
2751//ZZ /* 64-bit load */
2752//ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2753//ZZ HReg res = newVRegD(env);
2754//ZZ ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2755//ZZ vassert(ty == Ity_I64);
2756//ZZ addInstr(env, ARMInstr_NLdStD(True, res, am));
2757//ZZ return res;
2758//ZZ }
2759//ZZ
2760//ZZ /* 64-bit GET */
2761//ZZ if (e->tag == Iex_Get) {
2762//ZZ HReg addr = newVRegI(env);
2763//ZZ HReg res = newVRegD(env);
2764//ZZ vassert(ty == Ity_I64);
2765//ZZ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2766//ZZ addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2767//ZZ return res;
2768//ZZ }
2769//ZZ
2770//ZZ /* --------- BINARY ops --------- */
2771//ZZ if (e->tag == Iex_Binop) {
2772//ZZ switch (e->Iex.Binop.op) {
2773//ZZ
2774//ZZ /* 32 x 32 -> 64 multiply */
2775//ZZ case Iop_MullS32:
2776//ZZ case Iop_MullU32: {
2777//ZZ HReg rLo, rHi;
2778//ZZ HReg res = newVRegD(env);
2779//ZZ iselInt64Expr(&rHi, &rLo, env, e);
2780//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2781//ZZ return res;
2782//ZZ }
2783//ZZ
2784//ZZ case Iop_And64: {
2785//ZZ HReg res = newVRegD(env);
2786//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2787//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2788//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2789//ZZ res, argL, argR, 4, False));
2790//ZZ return res;
2791//ZZ }
2792//ZZ case Iop_Or64: {
2793//ZZ HReg res = newVRegD(env);
2794//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2795//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2796//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2797//ZZ res, argL, argR, 4, False));
2798//ZZ return res;
2799//ZZ }
2800//ZZ case Iop_Xor64: {
2801//ZZ HReg res = newVRegD(env);
2802//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2803//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2804//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2805//ZZ res, argL, argR, 4, False));
2806//ZZ return res;
2807//ZZ }
2808//ZZ
2809//ZZ /* 32HLto64(e1,e2) */
2810//ZZ case Iop_32HLto64: {
2811//ZZ HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2812//ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2813//ZZ HReg res = newVRegD(env);
2814//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2815//ZZ return res;
2816//ZZ }
2817//ZZ
2818//ZZ case Iop_Add8x8:
2819//ZZ case Iop_Add16x4:
2820//ZZ case Iop_Add32x2:
2821//ZZ case Iop_Add64: {
2822//ZZ HReg res = newVRegD(env);
2823//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2824//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2825//ZZ UInt size;
2826//ZZ switch (e->Iex.Binop.op) {
2827//ZZ case Iop_Add8x8: size = 0; break;
2828//ZZ case Iop_Add16x4: size = 1; break;
2829//ZZ case Iop_Add32x2: size = 2; break;
2830//ZZ case Iop_Add64: size = 3; break;
2831//ZZ default: vassert(0);
2832//ZZ }
2833//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2834//ZZ res, argL, argR, size, False));
2835//ZZ return res;
2836//ZZ }
2837//ZZ case Iop_Add32Fx2: {
2838//ZZ HReg res = newVRegD(env);
2839//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2840//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2841//ZZ UInt size = 0;
2842//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2843//ZZ res, argL, argR, size, False));
2844//ZZ return res;
2845//ZZ }
2846//ZZ case Iop_Recps32Fx2: {
2847//ZZ HReg res = newVRegD(env);
2848//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2849//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2850//ZZ UInt size = 0;
2851//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2852//ZZ res, argL, argR, size, False));
2853//ZZ return res;
2854//ZZ }
2855//ZZ case Iop_Rsqrts32Fx2: {
2856//ZZ HReg res = newVRegD(env);
2857//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2858//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2859//ZZ UInt size = 0;
2860//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2861//ZZ res, argL, argR, size, False));
2862//ZZ return res;
2863//ZZ }
2864//ZZ
2865//ZZ // These 6 verified 18 Apr 2013
2866//ZZ case Iop_InterleaveHI32x2:
2867//ZZ case Iop_InterleaveLO32x2:
2868//ZZ case Iop_InterleaveOddLanes8x8:
2869//ZZ case Iop_InterleaveEvenLanes8x8:
2870//ZZ case Iop_InterleaveOddLanes16x4:
2871//ZZ case Iop_InterleaveEvenLanes16x4: {
2872//ZZ HReg rD = newVRegD(env);
2873//ZZ HReg rM = newVRegD(env);
2874//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2875//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2876//ZZ UInt size;
2877//ZZ Bool resRd; // is the result in rD or rM ?
2878//ZZ switch (e->Iex.Binop.op) {
2879//ZZ case Iop_InterleaveOddLanes8x8: resRd = False; size = 0; break;
2880//ZZ case Iop_InterleaveEvenLanes8x8: resRd = True; size = 0; break;
2881//ZZ case Iop_InterleaveOddLanes16x4: resRd = False; size = 1; break;
2882//ZZ case Iop_InterleaveEvenLanes16x4: resRd = True; size = 1; break;
2883//ZZ case Iop_InterleaveHI32x2: resRd = False; size = 2; break;
2884//ZZ case Iop_InterleaveLO32x2: resRd = True; size = 2; break;
2885//ZZ default: vassert(0);
2886//ZZ }
2887//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2888//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2889//ZZ addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, False));
2890//ZZ return resRd ? rD : rM;
2891//ZZ }
2892//ZZ
2893//ZZ // These 4 verified 18 Apr 2013
2894//ZZ case Iop_InterleaveHI8x8:
2895//ZZ case Iop_InterleaveLO8x8:
2896//ZZ case Iop_InterleaveHI16x4:
2897//ZZ case Iop_InterleaveLO16x4: {
2898//ZZ HReg rD = newVRegD(env);
2899//ZZ HReg rM = newVRegD(env);
2900//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2901//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2902//ZZ UInt size;
2903//ZZ Bool resRd; // is the result in rD or rM ?
2904//ZZ switch (e->Iex.Binop.op) {
2905//ZZ case Iop_InterleaveHI8x8: resRd = False; size = 0; break;
2906//ZZ case Iop_InterleaveLO8x8: resRd = True; size = 0; break;
2907//ZZ case Iop_InterleaveHI16x4: resRd = False; size = 1; break;
2908//ZZ case Iop_InterleaveLO16x4: resRd = True; size = 1; break;
2909//ZZ default: vassert(0);
2910//ZZ }
2911//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2912//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2913//ZZ addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, False));
2914//ZZ return resRd ? rD : rM;
2915//ZZ }
2916//ZZ
2917//ZZ // These 4 verified 18 Apr 2013
2918//ZZ case Iop_CatOddLanes8x8:
2919//ZZ case Iop_CatEvenLanes8x8:
2920//ZZ case Iop_CatOddLanes16x4:
2921//ZZ case Iop_CatEvenLanes16x4: {
2922//ZZ HReg rD = newVRegD(env);
2923//ZZ HReg rM = newVRegD(env);
2924//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2925//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2926//ZZ UInt size;
2927//ZZ Bool resRd; // is the result in rD or rM ?
2928//ZZ switch (e->Iex.Binop.op) {
2929//ZZ case Iop_CatOddLanes8x8: resRd = False; size = 0; break;
2930//ZZ case Iop_CatEvenLanes8x8: resRd = True; size = 0; break;
2931//ZZ case Iop_CatOddLanes16x4: resRd = False; size = 1; break;
2932//ZZ case Iop_CatEvenLanes16x4: resRd = True; size = 1; break;
2933//ZZ default: vassert(0);
2934//ZZ }
2935//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2936//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2937//ZZ addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, False));
2938//ZZ return resRd ? rD : rM;
2939//ZZ }
2940//ZZ
2941//ZZ case Iop_QAdd8Ux8:
2942//ZZ case Iop_QAdd16Ux4:
2943//ZZ case Iop_QAdd32Ux2:
2944//ZZ case Iop_QAdd64Ux1: {
2945//ZZ HReg res = newVRegD(env);
2946//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2947//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2948//ZZ UInt size;
2949//ZZ switch (e->Iex.Binop.op) {
2950//ZZ case Iop_QAdd8Ux8: size = 0; break;
2951//ZZ case Iop_QAdd16Ux4: size = 1; break;
2952//ZZ case Iop_QAdd32Ux2: size = 2; break;
2953//ZZ case Iop_QAdd64Ux1: size = 3; break;
2954//ZZ default: vassert(0);
2955//ZZ }
2956//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2957//ZZ res, argL, argR, size, False));
2958//ZZ return res;
2959//ZZ }
2960//ZZ case Iop_QAdd8Sx8:
2961//ZZ case Iop_QAdd16Sx4:
2962//ZZ case Iop_QAdd32Sx2:
2963//ZZ case Iop_QAdd64Sx1: {
2964//ZZ HReg res = newVRegD(env);
2965//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2966//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2967//ZZ UInt size;
2968//ZZ switch (e->Iex.Binop.op) {
2969//ZZ case Iop_QAdd8Sx8: size = 0; break;
2970//ZZ case Iop_QAdd16Sx4: size = 1; break;
2971//ZZ case Iop_QAdd32Sx2: size = 2; break;
2972//ZZ case Iop_QAdd64Sx1: size = 3; break;
2973//ZZ default: vassert(0);
2974//ZZ }
2975//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2976//ZZ res, argL, argR, size, False));
2977//ZZ return res;
2978//ZZ }
2979//ZZ case Iop_Sub8x8:
2980//ZZ case Iop_Sub16x4:
2981//ZZ case Iop_Sub32x2:
2982//ZZ case Iop_Sub64: {
2983//ZZ HReg res = newVRegD(env);
2984//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2985//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2986//ZZ UInt size;
2987//ZZ switch (e->Iex.Binop.op) {
2988//ZZ case Iop_Sub8x8: size = 0; break;
2989//ZZ case Iop_Sub16x4: size = 1; break;
2990//ZZ case Iop_Sub32x2: size = 2; break;
2991//ZZ case Iop_Sub64: size = 3; break;
2992//ZZ default: vassert(0);
2993//ZZ }
2994//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2995//ZZ res, argL, argR, size, False));
2996//ZZ return res;
2997//ZZ }
2998//ZZ case Iop_Sub32Fx2: {
2999//ZZ HReg res = newVRegD(env);
3000//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3001//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3002//ZZ UInt size = 0;
3003//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
3004//ZZ res, argL, argR, size, False));
3005//ZZ return res;
3006//ZZ }
3007//ZZ case Iop_QSub8Ux8:
3008//ZZ case Iop_QSub16Ux4:
3009//ZZ case Iop_QSub32Ux2:
3010//ZZ case Iop_QSub64Ux1: {
3011//ZZ HReg res = newVRegD(env);
3012//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3013//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3014//ZZ UInt size;
3015//ZZ switch (e->Iex.Binop.op) {
3016//ZZ case Iop_QSub8Ux8: size = 0; break;
3017//ZZ case Iop_QSub16Ux4: size = 1; break;
3018//ZZ case Iop_QSub32Ux2: size = 2; break;
3019//ZZ case Iop_QSub64Ux1: size = 3; break;
3020//ZZ default: vassert(0);
3021//ZZ }
3022//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
3023//ZZ res, argL, argR, size, False));
3024//ZZ return res;
3025//ZZ }
3026//ZZ case Iop_QSub8Sx8:
3027//ZZ case Iop_QSub16Sx4:
3028//ZZ case Iop_QSub32Sx2:
3029//ZZ case Iop_QSub64Sx1: {
3030//ZZ HReg res = newVRegD(env);
3031//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3032//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3033//ZZ UInt size;
3034//ZZ switch (e->Iex.Binop.op) {
3035//ZZ case Iop_QSub8Sx8: size = 0; break;
3036//ZZ case Iop_QSub16Sx4: size = 1; break;
3037//ZZ case Iop_QSub32Sx2: size = 2; break;
3038//ZZ case Iop_QSub64Sx1: size = 3; break;
3039//ZZ default: vassert(0);
3040//ZZ }
3041//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
3042//ZZ res, argL, argR, size, False));
3043//ZZ return res;
3044//ZZ }
3045//ZZ case Iop_Max8Ux8:
3046//ZZ case Iop_Max16Ux4:
3047//ZZ case Iop_Max32Ux2: {
3048//ZZ HReg res = newVRegD(env);
3049//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3050//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3051//ZZ UInt size;
3052//ZZ switch (e->Iex.Binop.op) {
3053//ZZ case Iop_Max8Ux8: size = 0; break;
3054//ZZ case Iop_Max16Ux4: size = 1; break;
3055//ZZ case Iop_Max32Ux2: size = 2; break;
3056//ZZ default: vassert(0);
3057//ZZ }
3058//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
3059//ZZ res, argL, argR, size, False));
3060//ZZ return res;
3061//ZZ }
3062//ZZ case Iop_Max8Sx8:
3063//ZZ case Iop_Max16Sx4:
3064//ZZ case Iop_Max32Sx2: {
3065//ZZ HReg res = newVRegD(env);
3066//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3067//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3068//ZZ UInt size;
3069//ZZ switch (e->Iex.Binop.op) {
3070//ZZ case Iop_Max8Sx8: size = 0; break;
3071//ZZ case Iop_Max16Sx4: size = 1; break;
3072//ZZ case Iop_Max32Sx2: size = 2; break;
3073//ZZ default: vassert(0);
3074//ZZ }
3075//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
3076//ZZ res, argL, argR, size, False));
3077//ZZ return res;
3078//ZZ }
3079//ZZ case Iop_Min8Ux8:
3080//ZZ case Iop_Min16Ux4:
3081//ZZ case Iop_Min32Ux2: {
3082//ZZ HReg res = newVRegD(env);
3083//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3084//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3085//ZZ UInt size;
3086//ZZ switch (e->Iex.Binop.op) {
3087//ZZ case Iop_Min8Ux8: size = 0; break;
3088//ZZ case Iop_Min16Ux4: size = 1; break;
3089//ZZ case Iop_Min32Ux2: size = 2; break;
3090//ZZ default: vassert(0);
3091//ZZ }
3092//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
3093//ZZ res, argL, argR, size, False));
3094//ZZ return res;
3095//ZZ }
3096//ZZ case Iop_Min8Sx8:
3097//ZZ case Iop_Min16Sx4:
3098//ZZ case Iop_Min32Sx2: {
3099//ZZ HReg res = newVRegD(env);
3100//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3101//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3102//ZZ UInt size;
3103//ZZ switch (e->Iex.Binop.op) {
3104//ZZ case Iop_Min8Sx8: size = 0; break;
3105//ZZ case Iop_Min16Sx4: size = 1; break;
3106//ZZ case Iop_Min32Sx2: size = 2; break;
3107//ZZ default: vassert(0);
3108//ZZ }
3109//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
3110//ZZ res, argL, argR, size, False));
3111//ZZ return res;
3112//ZZ }
3113//ZZ case Iop_Sar8x8:
3114//ZZ case Iop_Sar16x4:
3115//ZZ case Iop_Sar32x2: {
3116//ZZ HReg res = newVRegD(env);
3117//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3118//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3119//ZZ HReg argR2 = newVRegD(env);
3120//ZZ HReg zero = newVRegD(env);
3121//ZZ UInt size;
3122//ZZ switch (e->Iex.Binop.op) {
3123//ZZ case Iop_Sar8x8: size = 0; break;
3124//ZZ case Iop_Sar16x4: size = 1; break;
3125//ZZ case Iop_Sar32x2: size = 2; break;
3126//ZZ case Iop_Sar64: size = 3; break;
3127//ZZ default: vassert(0);
3128//ZZ }
3129//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
3130//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3131//ZZ argR2, zero, argR, size, False));
3132//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
3133//ZZ res, argL, argR2, size, False));
3134//ZZ return res;
3135//ZZ }
3136//ZZ case Iop_Sal8x8:
3137//ZZ case Iop_Sal16x4:
3138//ZZ case Iop_Sal32x2:
3139//ZZ case Iop_Sal64x1: {
3140//ZZ HReg res = newVRegD(env);
3141//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3142//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3143//ZZ UInt size;
3144//ZZ switch (e->Iex.Binop.op) {
3145//ZZ case Iop_Sal8x8: size = 0; break;
3146//ZZ case Iop_Sal16x4: size = 1; break;
3147//ZZ case Iop_Sal32x2: size = 2; break;
3148//ZZ case Iop_Sal64x1: size = 3; break;
3149//ZZ default: vassert(0);
3150//ZZ }
3151//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
3152//ZZ res, argL, argR, size, False));
3153//ZZ return res;
3154//ZZ }
3155//ZZ case Iop_Shr8x8:
3156//ZZ case Iop_Shr16x4:
3157//ZZ case Iop_Shr32x2: {
3158//ZZ HReg res = newVRegD(env);
3159//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3160//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3161//ZZ HReg argR2 = newVRegD(env);
3162//ZZ HReg zero = newVRegD(env);
3163//ZZ UInt size;
3164//ZZ switch (e->Iex.Binop.op) {
3165//ZZ case Iop_Shr8x8: size = 0; break;
3166//ZZ case Iop_Shr16x4: size = 1; break;
3167//ZZ case Iop_Shr32x2: size = 2; break;
3168//ZZ default: vassert(0);
3169//ZZ }
3170//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
3171//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3172//ZZ argR2, zero, argR, size, False));
3173//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3174//ZZ res, argL, argR2, size, False));
3175//ZZ return res;
3176//ZZ }
3177//ZZ case Iop_Shl8x8:
3178//ZZ case Iop_Shl16x4:
3179//ZZ case Iop_Shl32x2: {
3180//ZZ HReg res = newVRegD(env);
3181//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3182//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3183//ZZ UInt size;
3184//ZZ switch (e->Iex.Binop.op) {
3185//ZZ case Iop_Shl8x8: size = 0; break;
3186//ZZ case Iop_Shl16x4: size = 1; break;
3187//ZZ case Iop_Shl32x2: size = 2; break;
3188//ZZ default: vassert(0);
3189//ZZ }
3190//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3191//ZZ res, argL, argR, size, False));
3192//ZZ return res;
3193//ZZ }
3194//ZZ case Iop_QShl8x8:
3195//ZZ case Iop_QShl16x4:
3196//ZZ case Iop_QShl32x2:
3197//ZZ case Iop_QShl64x1: {
3198//ZZ HReg res = newVRegD(env);
3199//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3200//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3201//ZZ UInt size;
3202//ZZ switch (e->Iex.Binop.op) {
3203//ZZ case Iop_QShl8x8: size = 0; break;
3204//ZZ case Iop_QShl16x4: size = 1; break;
3205//ZZ case Iop_QShl32x2: size = 2; break;
3206//ZZ case Iop_QShl64x1: size = 3; break;
3207//ZZ default: vassert(0);
3208//ZZ }
3209//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
3210//ZZ res, argL, argR, size, False));
3211//ZZ return res;
3212//ZZ }
3213//ZZ case Iop_QSal8x8:
3214//ZZ case Iop_QSal16x4:
3215//ZZ case Iop_QSal32x2:
3216//ZZ case Iop_QSal64x1: {
3217//ZZ HReg res = newVRegD(env);
3218//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3219//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3220//ZZ UInt size;
3221//ZZ switch (e->Iex.Binop.op) {
3222//ZZ case Iop_QSal8x8: size = 0; break;
3223//ZZ case Iop_QSal16x4: size = 1; break;
3224//ZZ case Iop_QSal32x2: size = 2; break;
3225//ZZ case Iop_QSal64x1: size = 3; break;
3226//ZZ default: vassert(0);
3227//ZZ }
3228//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
3229//ZZ res, argL, argR, size, False));
3230//ZZ return res;
3231//ZZ }
3232//ZZ case Iop_QShlN8x8:
3233//ZZ case Iop_QShlN16x4:
3234//ZZ case Iop_QShlN32x2:
3235//ZZ case Iop_QShlN64x1: {
3236//ZZ HReg res = newVRegD(env);
3237//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3238//ZZ UInt size, imm;
3239//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
3240//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3241//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
3242//ZZ "second argument only\n");
3243//ZZ }
3244//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3245//ZZ switch (e->Iex.Binop.op) {
3246//ZZ case Iop_QShlN8x8: size = 8 | imm; break;
3247//ZZ case Iop_QShlN16x4: size = 16 | imm; break;
3248//ZZ case Iop_QShlN32x2: size = 32 | imm; break;
3249//ZZ case Iop_QShlN64x1: size = 64 | imm; break;
3250//ZZ default: vassert(0);
3251//ZZ }
3252//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
3253//ZZ res, argL, size, False));
3254//ZZ return res;
3255//ZZ }
3256//ZZ case Iop_QShlN8Sx8:
3257//ZZ case Iop_QShlN16Sx4:
3258//ZZ case Iop_QShlN32Sx2:
3259//ZZ case Iop_QShlN64Sx1: {
3260//ZZ HReg res = newVRegD(env);
3261//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3262//ZZ UInt size, imm;
3263//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
3264//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3265//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
3266//ZZ "second argument only\n");
3267//ZZ }
3268//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3269//ZZ switch (e->Iex.Binop.op) {
3270//ZZ case Iop_QShlN8Sx8: size = 8 | imm; break;
3271//ZZ case Iop_QShlN16Sx4: size = 16 | imm; break;
3272//ZZ case Iop_QShlN32Sx2: size = 32 | imm; break;
3273//ZZ case Iop_QShlN64Sx1: size = 64 | imm; break;
3274//ZZ default: vassert(0);
3275//ZZ }
3276//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
3277//ZZ res, argL, size, False));
3278//ZZ return res;
3279//ZZ }
3280//ZZ case Iop_QSalN8x8:
3281//ZZ case Iop_QSalN16x4:
3282//ZZ case Iop_QSalN32x2:
3283//ZZ case Iop_QSalN64x1: {
3284//ZZ HReg res = newVRegD(env);
3285//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3286//ZZ UInt size, imm;
3287//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
3288//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3289//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
3290//ZZ "second argument only\n");
3291//ZZ }
3292//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3293//ZZ switch (e->Iex.Binop.op) {
3294//ZZ case Iop_QSalN8x8: size = 8 | imm; break;
3295//ZZ case Iop_QSalN16x4: size = 16 | imm; break;
3296//ZZ case Iop_QSalN32x2: size = 32 | imm; break;
3297//ZZ case Iop_QSalN64x1: size = 64 | imm; break;
3298//ZZ default: vassert(0);
3299//ZZ }
3300//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
3301//ZZ res, argL, size, False));
3302//ZZ return res;
3303//ZZ }
3304//ZZ case Iop_ShrN8x8:
3305//ZZ case Iop_ShrN16x4:
3306//ZZ case Iop_ShrN32x2:
3307//ZZ case Iop_Shr64: {
3308//ZZ HReg res = newVRegD(env);
3309//ZZ HReg tmp = newVRegD(env);
3310//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3311//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3312//ZZ HReg argR2 = newVRegI(env);
3313//ZZ UInt size;
3314//ZZ switch (e->Iex.Binop.op) {
3315//ZZ case Iop_ShrN8x8: size = 0; break;
3316//ZZ case Iop_ShrN16x4: size = 1; break;
3317//ZZ case Iop_ShrN32x2: size = 2; break;
3318//ZZ case Iop_Shr64: size = 3; break;
3319//ZZ default: vassert(0);
3320//ZZ }
3321//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
3322//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
3323//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3324//ZZ res, argL, tmp, size, False));
3325//ZZ return res;
3326//ZZ }
3327//ZZ case Iop_ShlN8x8:
3328//ZZ case Iop_ShlN16x4:
3329//ZZ case Iop_ShlN32x2:
3330//ZZ case Iop_Shl64: {
3331//ZZ HReg res = newVRegD(env);
3332//ZZ HReg tmp = newVRegD(env);
3333//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3334//ZZ /* special-case Shl64(x, imm8) since the Neon front
3335//ZZ end produces a lot of those for V{LD,ST}{1,2,3,4}. */
3336//ZZ if (e->Iex.Binop.op == Iop_Shl64
3337//ZZ && e->Iex.Binop.arg2->tag == Iex_Const) {
3338//ZZ vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
3339//ZZ Int nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3340//ZZ if (nshift >= 1 && nshift <= 63) {
3341//ZZ addInstr(env, ARMInstr_NShl64(res, argL, nshift));
3342//ZZ return res;
3343//ZZ }
3344//ZZ /* else fall through to general case */
3345//ZZ }
3346//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3347//ZZ UInt size;
3348//ZZ switch (e->Iex.Binop.op) {
3349//ZZ case Iop_ShlN8x8: size = 0; break;
3350//ZZ case Iop_ShlN16x4: size = 1; break;
3351//ZZ case Iop_ShlN32x2: size = 2; break;
3352//ZZ case Iop_Shl64: size = 3; break;
3353//ZZ default: vassert(0);
3354//ZZ }
3355//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
3356//ZZ tmp, argR, 0, False));
3357//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3358//ZZ res, argL, tmp, size, False));
3359//ZZ return res;
3360//ZZ }
3361//ZZ case Iop_SarN8x8:
3362//ZZ case Iop_SarN16x4:
3363//ZZ case Iop_SarN32x2:
3364//ZZ case Iop_Sar64: {
3365//ZZ HReg res = newVRegD(env);
3366//ZZ HReg tmp = newVRegD(env);
3367//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3368//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3369//ZZ HReg argR2 = newVRegI(env);
3370//ZZ UInt size;
3371//ZZ switch (e->Iex.Binop.op) {
3372//ZZ case Iop_SarN8x8: size = 0; break;
3373//ZZ case Iop_SarN16x4: size = 1; break;
3374//ZZ case Iop_SarN32x2: size = 2; break;
3375//ZZ case Iop_Sar64: size = 3; break;
3376//ZZ default: vassert(0);
3377//ZZ }
3378//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
3379//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
3380//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
3381//ZZ res, argL, tmp, size, False));
3382//ZZ return res;
3383//ZZ }
3384//ZZ case Iop_CmpGT8Ux8:
3385//ZZ case Iop_CmpGT16Ux4:
3386//ZZ case Iop_CmpGT32Ux2: {
3387//ZZ HReg res = newVRegD(env);
3388//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3389//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3390//ZZ UInt size;
3391//ZZ switch (e->Iex.Binop.op) {
3392//ZZ case Iop_CmpGT8Ux8: size = 0; break;
3393//ZZ case Iop_CmpGT16Ux4: size = 1; break;
3394//ZZ case Iop_CmpGT32Ux2: size = 2; break;
3395//ZZ default: vassert(0);
3396//ZZ }
3397//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
3398//ZZ res, argL, argR, size, False));
3399//ZZ return res;
3400//ZZ }
3401//ZZ case Iop_CmpGT8Sx8:
3402//ZZ case Iop_CmpGT16Sx4:
3403//ZZ case Iop_CmpGT32Sx2: {
3404//ZZ HReg res = newVRegD(env);
3405//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3406//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3407//ZZ UInt size;
3408//ZZ switch (e->Iex.Binop.op) {
3409//ZZ case Iop_CmpGT8Sx8: size = 0; break;
3410//ZZ case Iop_CmpGT16Sx4: size = 1; break;
3411//ZZ case Iop_CmpGT32Sx2: size = 2; break;
3412//ZZ default: vassert(0);
3413//ZZ }
3414//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
3415//ZZ res, argL, argR, size, False));
3416//ZZ return res;
3417//ZZ }
3418//ZZ case Iop_CmpEQ8x8:
3419//ZZ case Iop_CmpEQ16x4:
3420//ZZ case Iop_CmpEQ32x2: {
3421//ZZ HReg res = newVRegD(env);
3422//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3423//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3424//ZZ UInt size;
3425//ZZ switch (e->Iex.Binop.op) {
3426//ZZ case Iop_CmpEQ8x8: size = 0; break;
3427//ZZ case Iop_CmpEQ16x4: size = 1; break;
3428//ZZ case Iop_CmpEQ32x2: size = 2; break;
3429//ZZ default: vassert(0);
3430//ZZ }
3431//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
3432//ZZ res, argL, argR, size, False));
3433//ZZ return res;
3434//ZZ }
3435//ZZ case Iop_Mul8x8:
3436//ZZ case Iop_Mul16x4:
3437//ZZ case Iop_Mul32x2: {
3438//ZZ HReg res = newVRegD(env);
3439//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3440//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3441//ZZ UInt size = 0;
3442//ZZ switch(e->Iex.Binop.op) {
3443//ZZ case Iop_Mul8x8: size = 0; break;
3444//ZZ case Iop_Mul16x4: size = 1; break;
3445//ZZ case Iop_Mul32x2: size = 2; break;
3446//ZZ default: vassert(0);
3447//ZZ }
3448//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
3449//ZZ res, argL, argR, size, False));
3450//ZZ return res;
3451//ZZ }
3452//ZZ case Iop_Mul32Fx2: {
3453//ZZ HReg res = newVRegD(env);
3454//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3455//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3456//ZZ UInt size = 0;
3457//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
3458//ZZ res, argL, argR, size, False));
3459//ZZ return res;
3460//ZZ }
3461//ZZ case Iop_QDMulHi16Sx4:
3462//ZZ case Iop_QDMulHi32Sx2: {
3463//ZZ HReg res = newVRegD(env);
3464//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3465//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3466//ZZ UInt size = 0;
3467//ZZ switch(e->Iex.Binop.op) {
3468//ZZ case Iop_QDMulHi16Sx4: size = 1; break;
3469//ZZ case Iop_QDMulHi32Sx2: size = 2; break;
3470//ZZ default: vassert(0);
3471//ZZ }
3472//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
3473//ZZ res, argL, argR, size, False));
3474//ZZ return res;
3475//ZZ }
3476//ZZ
3477//ZZ case Iop_QRDMulHi16Sx4:
3478//ZZ case Iop_QRDMulHi32Sx2: {
3479//ZZ HReg res = newVRegD(env);
3480//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3481//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3482//ZZ UInt size = 0;
3483//ZZ switch(e->Iex.Binop.op) {
3484//ZZ case Iop_QRDMulHi16Sx4: size = 1; break;
3485//ZZ case Iop_QRDMulHi32Sx2: size = 2; break;
3486//ZZ default: vassert(0);
3487//ZZ }
3488//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
3489//ZZ res, argL, argR, size, False));
3490//ZZ return res;
3491//ZZ }
3492//ZZ
3493//ZZ case Iop_PwAdd8x8:
3494//ZZ case Iop_PwAdd16x4:
3495//ZZ case Iop_PwAdd32x2: {
3496//ZZ HReg res = newVRegD(env);
3497//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3498//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3499//ZZ UInt size = 0;
3500//ZZ switch(e->Iex.Binop.op) {
3501//ZZ case Iop_PwAdd8x8: size = 0; break;
3502//ZZ case Iop_PwAdd16x4: size = 1; break;
3503//ZZ case Iop_PwAdd32x2: size = 2; break;
3504//ZZ default: vassert(0);
3505//ZZ }
3506//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
3507//ZZ res, argL, argR, size, False));
3508//ZZ return res;
3509//ZZ }
3510//ZZ case Iop_PwAdd32Fx2: {
3511//ZZ HReg res = newVRegD(env);
3512//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3513//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3514//ZZ UInt size = 0;
3515//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
3516//ZZ res, argL, argR, size, False));
3517//ZZ return res;
3518//ZZ }
3519//ZZ case Iop_PwMin8Ux8:
3520//ZZ case Iop_PwMin16Ux4:
3521//ZZ case Iop_PwMin32Ux2: {
3522//ZZ HReg res = newVRegD(env);
3523//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3524//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3525//ZZ UInt size = 0;
3526//ZZ switch(e->Iex.Binop.op) {
3527//ZZ case Iop_PwMin8Ux8: size = 0; break;
3528//ZZ case Iop_PwMin16Ux4: size = 1; break;
3529//ZZ case Iop_PwMin32Ux2: size = 2; break;
3530//ZZ default: vassert(0);
3531//ZZ }
3532//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
3533//ZZ res, argL, argR, size, False));
3534//ZZ return res;
3535//ZZ }
3536//ZZ case Iop_PwMin8Sx8:
3537//ZZ case Iop_PwMin16Sx4:
3538//ZZ case Iop_PwMin32Sx2: {
3539//ZZ HReg res = newVRegD(env);
3540//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3541//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3542//ZZ UInt size = 0;
3543//ZZ switch(e->Iex.Binop.op) {
3544//ZZ case Iop_PwMin8Sx8: size = 0; break;
3545//ZZ case Iop_PwMin16Sx4: size = 1; break;
3546//ZZ case Iop_PwMin32Sx2: size = 2; break;
3547//ZZ default: vassert(0);
3548//ZZ }
3549//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
3550//ZZ res, argL, argR, size, False));
3551//ZZ return res;
3552//ZZ }
3553//ZZ case Iop_PwMax8Ux8:
3554//ZZ case Iop_PwMax16Ux4:
3555//ZZ case Iop_PwMax32Ux2: {
3556//ZZ HReg res = newVRegD(env);
3557//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3558//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3559//ZZ UInt size = 0;
3560//ZZ switch(e->Iex.Binop.op) {
3561//ZZ case Iop_PwMax8Ux8: size = 0; break;
3562//ZZ case Iop_PwMax16Ux4: size = 1; break;
3563//ZZ case Iop_PwMax32Ux2: size = 2; break;
3564//ZZ default: vassert(0);
3565//ZZ }
3566//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
3567//ZZ res, argL, argR, size, False));
3568//ZZ return res;
3569//ZZ }
3570//ZZ case Iop_PwMax8Sx8:
3571//ZZ case Iop_PwMax16Sx4:
3572//ZZ case Iop_PwMax32Sx2: {
3573//ZZ HReg res = newVRegD(env);
3574//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3575//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3576//ZZ UInt size = 0;
3577//ZZ switch(e->Iex.Binop.op) {
3578//ZZ case Iop_PwMax8Sx8: size = 0; break;
3579//ZZ case Iop_PwMax16Sx4: size = 1; break;
3580//ZZ case Iop_PwMax32Sx2: size = 2; break;
3581//ZZ default: vassert(0);
3582//ZZ }
3583//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
3584//ZZ res, argL, argR, size, False));
3585//ZZ return res;
3586//ZZ }
3587//ZZ case Iop_Perm8x8: {
3588//ZZ HReg res = newVRegD(env);
3589//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3590//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3591//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
3592//ZZ res, argL, argR, 0, False));
3593//ZZ return res;
3594//ZZ }
3595//ZZ case Iop_PolynomialMul8x8: {
3596//ZZ HReg res = newVRegD(env);
3597//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3598//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3599//ZZ UInt size = 0;
3600//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
3601//ZZ res, argL, argR, size, False));
3602//ZZ return res;
3603//ZZ }
3604//ZZ case Iop_Max32Fx2: {
3605//ZZ HReg res = newVRegD(env);
3606//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3607//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3608//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
3609//ZZ res, argL, argR, 2, False));
3610//ZZ return res;
3611//ZZ }
3612//ZZ case Iop_Min32Fx2: {
3613//ZZ HReg res = newVRegD(env);
3614//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3615//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3616//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
3617//ZZ res, argL, argR, 2, False));
3618//ZZ return res;
3619//ZZ }
3620//ZZ case Iop_PwMax32Fx2: {
3621//ZZ HReg res = newVRegD(env);
3622//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3623//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3624//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
3625//ZZ res, argL, argR, 2, False));
3626//ZZ return res;
3627//ZZ }
3628//ZZ case Iop_PwMin32Fx2: {
3629//ZZ HReg res = newVRegD(env);
3630//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3631//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3632//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
3633//ZZ res, argL, argR, 2, False));
3634//ZZ return res;
3635//ZZ }
3636//ZZ case Iop_CmpGT32Fx2: {
3637//ZZ HReg res = newVRegD(env);
3638//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3639//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3640//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
3641//ZZ res, argL, argR, 2, False));
3642//ZZ return res;
3643//ZZ }
3644//ZZ case Iop_CmpGE32Fx2: {
3645//ZZ HReg res = newVRegD(env);
3646//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3647//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3648//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
3649//ZZ res, argL, argR, 2, False));
3650//ZZ return res;
3651//ZZ }
3652//ZZ case Iop_CmpEQ32Fx2: {
3653//ZZ HReg res = newVRegD(env);
3654//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3655//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3656//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3657//ZZ res, argL, argR, 2, False));
3658//ZZ return res;
3659//ZZ }
3660//ZZ case Iop_F32ToFixed32Ux2_RZ:
3661//ZZ case Iop_F32ToFixed32Sx2_RZ:
3662//ZZ case Iop_Fixed32UToF32x2_RN:
3663//ZZ case Iop_Fixed32SToF32x2_RN: {
3664//ZZ HReg res = newVRegD(env);
3665//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3666//ZZ ARMNeonUnOp op;
3667//ZZ UInt imm6;
3668//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
3669//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3670//ZZ vpanic("ARM supports FP <-> Fixed conversion with constant "
3671//ZZ "second argument less than 33 only\n");
3672//ZZ }
3673//ZZ imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3674//ZZ vassert(imm6 <= 32 && imm6 > 0);
3675//ZZ imm6 = 64 - imm6;
3676//ZZ switch(e->Iex.Binop.op) {
3677//ZZ case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3678//ZZ case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3679//ZZ case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3680//ZZ case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3681//ZZ default: vassert(0);
3682//ZZ }
3683//ZZ addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3684//ZZ return res;
3685//ZZ }
3686//ZZ /*
3687//ZZ FIXME: is this here or not?
3688//ZZ case Iop_VDup8x8:
3689//ZZ case Iop_VDup16x4:
3690//ZZ case Iop_VDup32x2: {
3691//ZZ HReg res = newVRegD(env);
3692//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3693//ZZ UInt index;
3694//ZZ UInt imm4;
3695//ZZ UInt size = 0;
3696//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
3697//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3698//ZZ vpanic("ARM supports Iop_VDup with constant "
3699//ZZ "second argument less than 16 only\n");
3700//ZZ }
3701//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3702//ZZ switch(e->Iex.Binop.op) {
3703//ZZ case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3704//ZZ case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3705//ZZ case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3706//ZZ default: vassert(0);
3707//ZZ }
3708//ZZ if (imm4 >= 16) {
3709//ZZ vpanic("ARM supports Iop_VDup with constant "
3710//ZZ "second argument less than 16 only\n");
3711//ZZ }
3712//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3713//ZZ res, argL, imm4, False));
3714//ZZ return res;
3715//ZZ }
3716//ZZ */
3717//ZZ default:
3718//ZZ break;
3719//ZZ }
3720//ZZ }
3721//ZZ
3722//ZZ /* --------- UNARY ops --------- */
3723//ZZ if (e->tag == Iex_Unop) {
3724//ZZ switch (e->Iex.Unop.op) {
3725//ZZ
3726//ZZ /* 32Uto64 */
3727//ZZ case Iop_32Uto64: {
3728//ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3729//ZZ HReg rHi = newVRegI(env);
3730//ZZ HReg res = newVRegD(env);
3731//ZZ addInstr(env, ARMInstr_Imm32(rHi, 0));
3732//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3733//ZZ return res;
3734//ZZ }
3735//ZZ
3736//ZZ /* 32Sto64 */
3737//ZZ case Iop_32Sto64: {
3738//ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3739//ZZ HReg rHi = newVRegI(env);
3740//ZZ addInstr(env, mk_iMOVds_RR(rHi, rLo));
3741//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, rHi, rHi, ARMRI5_I5(31)));
3742//ZZ HReg res = newVRegD(env);
3743//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3744//ZZ return res;
3745//ZZ }
3746//ZZ
3747//ZZ /* The next 3 are pass-throughs */
3748//ZZ /* ReinterpF64asI64 */
3749//ZZ case Iop_ReinterpF64asI64:
3750//ZZ /* Left64(e) */
3751//ZZ case Iop_Left64:
3752//ZZ /* CmpwNEZ64(e) */
3753//ZZ case Iop_1Sto64: {
3754//ZZ HReg rLo, rHi;
3755//ZZ HReg res = newVRegD(env);
3756//ZZ iselInt64Expr(&rHi, &rLo, env, e);
3757//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3758//ZZ return res;
3759//ZZ }
3760//ZZ
3761//ZZ case Iop_Not64: {
3762//ZZ DECLARE_PATTERN(p_veqz_8x8);
3763//ZZ DECLARE_PATTERN(p_veqz_16x4);
3764//ZZ DECLARE_PATTERN(p_veqz_32x2);
3765//ZZ DECLARE_PATTERN(p_vcge_8sx8);
3766//ZZ DECLARE_PATTERN(p_vcge_16sx4);
3767//ZZ DECLARE_PATTERN(p_vcge_32sx2);
3768//ZZ DECLARE_PATTERN(p_vcge_8ux8);
3769//ZZ DECLARE_PATTERN(p_vcge_16ux4);
3770//ZZ DECLARE_PATTERN(p_vcge_32ux2);
3771//ZZ DEFINE_PATTERN(p_veqz_8x8,
3772//ZZ unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3773//ZZ DEFINE_PATTERN(p_veqz_16x4,
3774//ZZ unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3775//ZZ DEFINE_PATTERN(p_veqz_32x2,
3776//ZZ unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3777//ZZ DEFINE_PATTERN(p_vcge_8sx8,
3778//ZZ unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3779//ZZ DEFINE_PATTERN(p_vcge_16sx4,
3780//ZZ unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3781//ZZ DEFINE_PATTERN(p_vcge_32sx2,
3782//ZZ unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3783//ZZ DEFINE_PATTERN(p_vcge_8ux8,
3784//ZZ unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3785//ZZ DEFINE_PATTERN(p_vcge_16ux4,
3786//ZZ unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3787//ZZ DEFINE_PATTERN(p_vcge_32ux2,
3788//ZZ unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3789//ZZ if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3790//ZZ HReg res = newVRegD(env);
3791//ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3792//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3793//ZZ return res;
3794//ZZ } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3795//ZZ HReg res = newVRegD(env);
3796//ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3797//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3798//ZZ return res;
3799//ZZ } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3800//ZZ HReg res = newVRegD(env);
3801//ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3802//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3803//ZZ return res;
3804//ZZ } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3805//ZZ HReg res = newVRegD(env);
3806//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3807//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3808//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3809//ZZ res, argL, argR, 0, False));
3810//ZZ return res;
3811//ZZ } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3812//ZZ HReg res = newVRegD(env);
3813//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3814//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3815//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3816//ZZ res, argL, argR, 1, False));
3817//ZZ return res;
3818//ZZ } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3819//ZZ HReg res = newVRegD(env);
3820//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3821//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3822//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3823//ZZ res, argL, argR, 2, False));
3824//ZZ return res;
3825//ZZ } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3826//ZZ HReg res = newVRegD(env);
3827//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3828//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3829//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3830//ZZ res, argL, argR, 0, False));
3831//ZZ return res;
3832//ZZ } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3833//ZZ HReg res = newVRegD(env);
3834//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3835//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3836//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3837//ZZ res, argL, argR, 1, False));
3838//ZZ return res;
3839//ZZ } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3840//ZZ HReg res = newVRegD(env);
3841//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3842//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3843//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3844//ZZ res, argL, argR, 2, False));
3845//ZZ return res;
3846//ZZ } else {
3847//ZZ HReg res = newVRegD(env);
3848//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3849//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3850//ZZ return res;
3851//ZZ }
3852//ZZ }
3853//ZZ case Iop_Dup8x8:
3854//ZZ case Iop_Dup16x4:
3855//ZZ case Iop_Dup32x2: {
3856//ZZ HReg res, arg;
3857//ZZ UInt size;
3858//ZZ DECLARE_PATTERN(p_vdup_8x8);
3859//ZZ DECLARE_PATTERN(p_vdup_16x4);
3860//ZZ DECLARE_PATTERN(p_vdup_32x2);
3861//ZZ DEFINE_PATTERN(p_vdup_8x8,
3862//ZZ unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3863//ZZ DEFINE_PATTERN(p_vdup_16x4,
3864//ZZ unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3865//ZZ DEFINE_PATTERN(p_vdup_32x2,
3866//ZZ unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3867//ZZ if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3868//ZZ UInt index;
3869//ZZ UInt imm4;
3870//ZZ if (mi.bindee[1]->tag == Iex_Const &&
3871//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3872//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3873//ZZ imm4 = (index << 1) + 1;
3874//ZZ if (index < 8) {
3875//ZZ res = newVRegD(env);
3876//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
3877//ZZ addInstr(env, ARMInstr_NUnaryS(
3878//ZZ ARMneon_VDUP,
3879//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
3880//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
3881//ZZ imm4, False
3882//ZZ ));
3883//ZZ return res;
3884//ZZ }
3885//ZZ }
3886//ZZ } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3887//ZZ UInt index;
3888//ZZ UInt imm4;
3889//ZZ if (mi.bindee[1]->tag == Iex_Const &&
3890//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3891//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3892//ZZ imm4 = (index << 2) + 2;
3893//ZZ if (index < 4) {
3894//ZZ res = newVRegD(env);
3895//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
3896//ZZ addInstr(env, ARMInstr_NUnaryS(
3897//ZZ ARMneon_VDUP,
3898//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
3899//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
3900//ZZ imm4, False
3901//ZZ ));
3902//ZZ return res;
3903//ZZ }
3904//ZZ }
3905//ZZ } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3906//ZZ UInt index;
3907//ZZ UInt imm4;
3908//ZZ if (mi.bindee[1]->tag == Iex_Const &&
3909//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3910//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3911//ZZ imm4 = (index << 3) + 4;
3912//ZZ if (index < 2) {
3913//ZZ res = newVRegD(env);
3914//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
3915//ZZ addInstr(env, ARMInstr_NUnaryS(
3916//ZZ ARMneon_VDUP,
3917//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
3918//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
3919//ZZ imm4, False
3920//ZZ ));
3921//ZZ return res;
3922//ZZ }
3923//ZZ }
3924//ZZ }
3925//ZZ arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3926//ZZ res = newVRegD(env);
3927//ZZ switch (e->Iex.Unop.op) {
3928//ZZ case Iop_Dup8x8: size = 0; break;
3929//ZZ case Iop_Dup16x4: size = 1; break;
3930//ZZ case Iop_Dup32x2: size = 2; break;
3931//ZZ default: vassert(0);
3932//ZZ }
3933//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3934//ZZ return res;
3935//ZZ }
3936//ZZ case Iop_Abs8x8:
3937//ZZ case Iop_Abs16x4:
3938//ZZ case Iop_Abs32x2: {
3939//ZZ HReg res = newVRegD(env);
3940//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3941//ZZ UInt size = 0;
3942//ZZ switch(e->Iex.Binop.op) {
3943//ZZ case Iop_Abs8x8: size = 0; break;
3944//ZZ case Iop_Abs16x4: size = 1; break;
3945//ZZ case Iop_Abs32x2: size = 2; break;
3946//ZZ default: vassert(0);
3947//ZZ }
3948//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3949//ZZ return res;
3950//ZZ }
3951//ZZ case Iop_Reverse64_8x8:
3952//ZZ case Iop_Reverse64_16x4:
3953//ZZ case Iop_Reverse64_32x2: {
3954//ZZ HReg res = newVRegD(env);
3955//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3956//ZZ UInt size = 0;
3957//ZZ switch(e->Iex.Binop.op) {
3958//ZZ case Iop_Reverse64_8x8: size = 0; break;
3959//ZZ case Iop_Reverse64_16x4: size = 1; break;
3960//ZZ case Iop_Reverse64_32x2: size = 2; break;
3961//ZZ default: vassert(0);
3962//ZZ }
3963//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3964//ZZ res, arg, size, False));
3965//ZZ return res;
3966//ZZ }
3967//ZZ case Iop_Reverse32_8x8:
3968//ZZ case Iop_Reverse32_16x4: {
3969//ZZ HReg res = newVRegD(env);
3970//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3971//ZZ UInt size = 0;
3972//ZZ switch(e->Iex.Binop.op) {
3973//ZZ case Iop_Reverse32_8x8: size = 0; break;
3974//ZZ case Iop_Reverse32_16x4: size = 1; break;
3975//ZZ default: vassert(0);
3976//ZZ }
3977//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3978//ZZ res, arg, size, False));
3979//ZZ return res;
3980//ZZ }
3981//ZZ case Iop_Reverse16_8x8: {
3982//ZZ HReg res = newVRegD(env);
3983//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3984//ZZ UInt size = 0;
3985//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3986//ZZ res, arg, size, False));
3987//ZZ return res;
3988//ZZ }
3989//ZZ case Iop_CmpwNEZ64: {
3990//ZZ HReg x_lsh = newVRegD(env);
3991//ZZ HReg x_rsh = newVRegD(env);
3992//ZZ HReg lsh_amt = newVRegD(env);
3993//ZZ HReg rsh_amt = newVRegD(env);
3994//ZZ HReg zero = newVRegD(env);
3995//ZZ HReg tmp = newVRegD(env);
3996//ZZ HReg tmp2 = newVRegD(env);
3997//ZZ HReg res = newVRegD(env);
3998//ZZ HReg x = newVRegD(env);
3999//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4000//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
4001//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
4002//ZZ addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
4003//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
4004//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4005//ZZ rsh_amt, zero, lsh_amt, 2, False));
4006//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4007//ZZ x_lsh, x, lsh_amt, 3, False));
4008//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4009//ZZ x_rsh, x, rsh_amt, 3, False));
4010//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4011//ZZ tmp, x_lsh, x_rsh, 0, False));
4012//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4013//ZZ res, tmp, x, 0, False));
4014//ZZ return res;
4015//ZZ }
4016//ZZ case Iop_CmpNEZ8x8:
4017//ZZ case Iop_CmpNEZ16x4:
4018//ZZ case Iop_CmpNEZ32x2: {
4019//ZZ HReg res = newVRegD(env);
4020//ZZ HReg tmp = newVRegD(env);
4021//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4022//ZZ UInt size;
4023//ZZ switch (e->Iex.Unop.op) {
4024//ZZ case Iop_CmpNEZ8x8: size = 0; break;
4025//ZZ case Iop_CmpNEZ16x4: size = 1; break;
4026//ZZ case Iop_CmpNEZ32x2: size = 2; break;
4027//ZZ default: vassert(0);
4028//ZZ }
4029//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
4030//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
4031//ZZ return res;
4032//ZZ }
4033//ZZ case Iop_NarrowUn16to8x8:
4034//ZZ case Iop_NarrowUn32to16x4:
4035//ZZ case Iop_NarrowUn64to32x2: {
4036//ZZ HReg res = newVRegD(env);
4037//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4038//ZZ UInt size = 0;
4039//ZZ switch(e->Iex.Binop.op) {
4040//ZZ case Iop_NarrowUn16to8x8: size = 0; break;
4041//ZZ case Iop_NarrowUn32to16x4: size = 1; break;
4042//ZZ case Iop_NarrowUn64to32x2: size = 2; break;
4043//ZZ default: vassert(0);
4044//ZZ }
4045//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
4046//ZZ res, arg, size, False));
4047//ZZ return res;
4048//ZZ }
4049//ZZ case Iop_QNarrowUn16Sto8Sx8:
4050//ZZ case Iop_QNarrowUn32Sto16Sx4:
4051//ZZ case Iop_QNarrowUn64Sto32Sx2: {
4052//ZZ HReg res = newVRegD(env);
4053//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4054//ZZ UInt size = 0;
4055//ZZ switch(e->Iex.Binop.op) {
4056//ZZ case Iop_QNarrowUn16Sto8Sx8: size = 0; break;
4057//ZZ case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
4058//ZZ case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
4059//ZZ default: vassert(0);
4060//ZZ }
4061//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
4062//ZZ res, arg, size, False));
4063//ZZ return res;
4064//ZZ }
4065//ZZ case Iop_QNarrowUn16Sto8Ux8:
4066//ZZ case Iop_QNarrowUn32Sto16Ux4:
4067//ZZ case Iop_QNarrowUn64Sto32Ux2: {
4068//ZZ HReg res = newVRegD(env);
4069//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4070//ZZ UInt size = 0;
4071//ZZ switch(e->Iex.Binop.op) {
4072//ZZ case Iop_QNarrowUn16Sto8Ux8: size = 0; break;
4073//ZZ case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
4074//ZZ case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
4075//ZZ default: vassert(0);
4076//ZZ }
4077//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
4078//ZZ res, arg, size, False));
4079//ZZ return res;
4080//ZZ }
4081//ZZ case Iop_QNarrowUn16Uto8Ux8:
4082//ZZ case Iop_QNarrowUn32Uto16Ux4:
4083//ZZ case Iop_QNarrowUn64Uto32Ux2: {
4084//ZZ HReg res = newVRegD(env);
4085//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4086//ZZ UInt size = 0;
4087//ZZ switch(e->Iex.Binop.op) {
4088//ZZ case Iop_QNarrowUn16Uto8Ux8: size = 0; break;
4089//ZZ case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
4090//ZZ case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
4091//ZZ default: vassert(0);
4092//ZZ }
4093//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
4094//ZZ res, arg, size, False));
4095//ZZ return res;
4096//ZZ }
4097//ZZ case Iop_PwAddL8Sx8:
4098//ZZ case Iop_PwAddL16Sx4:
4099//ZZ case Iop_PwAddL32Sx2: {
4100//ZZ HReg res = newVRegD(env);
4101//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4102//ZZ UInt size = 0;
4103//ZZ switch(e->Iex.Binop.op) {
4104//ZZ case Iop_PwAddL8Sx8: size = 0; break;
4105//ZZ case Iop_PwAddL16Sx4: size = 1; break;
4106//ZZ case Iop_PwAddL32Sx2: size = 2; break;
4107//ZZ default: vassert(0);
4108//ZZ }
4109//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4110//ZZ res, arg, size, False));
4111//ZZ return res;
4112//ZZ }
4113//ZZ case Iop_PwAddL8Ux8:
4114//ZZ case Iop_PwAddL16Ux4:
4115//ZZ case Iop_PwAddL32Ux2: {
4116//ZZ HReg res = newVRegD(env);
4117//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4118//ZZ UInt size = 0;
4119//ZZ switch(e->Iex.Binop.op) {
4120//ZZ case Iop_PwAddL8Ux8: size = 0; break;
4121//ZZ case Iop_PwAddL16Ux4: size = 1; break;
4122//ZZ case Iop_PwAddL32Ux2: size = 2; break;
4123//ZZ default: vassert(0);
4124//ZZ }
4125//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4126//ZZ res, arg, size, False));
4127//ZZ return res;
4128//ZZ }
4129//ZZ case Iop_Cnt8x8: {
4130//ZZ HReg res = newVRegD(env);
4131//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4132//ZZ UInt size = 0;
4133//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
4134//ZZ res, arg, size, False));
4135//ZZ return res;
4136//ZZ }
4137//ZZ case Iop_Clz8Sx8:
4138//ZZ case Iop_Clz16Sx4:
4139//ZZ case Iop_Clz32Sx2: {
4140//ZZ HReg res = newVRegD(env);
4141//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4142//ZZ UInt size = 0;
4143//ZZ switch(e->Iex.Binop.op) {
4144//ZZ case Iop_Clz8Sx8: size = 0; break;
4145//ZZ case Iop_Clz16Sx4: size = 1; break;
4146//ZZ case Iop_Clz32Sx2: size = 2; break;
4147//ZZ default: vassert(0);
4148//ZZ }
4149//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
4150//ZZ res, arg, size, False));
4151//ZZ return res;
4152//ZZ }
4153//ZZ case Iop_Cls8Sx8:
4154//ZZ case Iop_Cls16Sx4:
4155//ZZ case Iop_Cls32Sx2: {
4156//ZZ HReg res = newVRegD(env);
4157//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4158//ZZ UInt size = 0;
4159//ZZ switch(e->Iex.Binop.op) {
4160//ZZ case Iop_Cls8Sx8: size = 0; break;
4161//ZZ case Iop_Cls16Sx4: size = 1; break;
4162//ZZ case Iop_Cls32Sx2: size = 2; break;
4163//ZZ default: vassert(0);
4164//ZZ }
4165//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
4166//ZZ res, arg, size, False));
4167//ZZ return res;
4168//ZZ }
4169//ZZ case Iop_FtoI32Sx2_RZ: {
4170//ZZ HReg res = newVRegD(env);
4171//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4172//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4173//ZZ res, arg, 2, False));
4174//ZZ return res;
4175//ZZ }
4176//ZZ case Iop_FtoI32Ux2_RZ: {
4177//ZZ HReg res = newVRegD(env);
4178//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4179//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4180//ZZ res, arg, 2, False));
4181//ZZ return res;
4182//ZZ }
4183//ZZ case Iop_I32StoFx2: {
4184//ZZ HReg res = newVRegD(env);
4185//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4186//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4187//ZZ res, arg, 2, False));
4188//ZZ return res;
4189//ZZ }
4190//ZZ case Iop_I32UtoFx2: {
4191//ZZ HReg res = newVRegD(env);
4192//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4193//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4194//ZZ res, arg, 2, False));
4195//ZZ return res;
4196//ZZ }
4197//ZZ case Iop_F32toF16x4: {
4198//ZZ HReg res = newVRegD(env);
4199//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4200//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
4201//ZZ res, arg, 2, False));
4202//ZZ return res;
4203//ZZ }
4204//ZZ case Iop_Recip32Fx2: {
4205//ZZ HReg res = newVRegD(env);
4206//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
4207//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4208//ZZ res, argL, 0, False));
4209//ZZ return res;
4210//ZZ }
4211//ZZ case Iop_Recip32x2: {
4212//ZZ HReg res = newVRegD(env);
4213//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
4214//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4215//ZZ res, argL, 0, False));
4216//ZZ return res;
4217//ZZ }
4218//ZZ case Iop_Abs32Fx2: {
4219//ZZ DECLARE_PATTERN(p_vabd_32fx2);
4220//ZZ DEFINE_PATTERN(p_vabd_32fx2,
4221//ZZ unop(Iop_Abs32Fx2,
4222//ZZ binop(Iop_Sub32Fx2,
4223//ZZ bind(0),
4224//ZZ bind(1))));
4225//ZZ if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
4226//ZZ HReg res = newVRegD(env);
4227//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
4228//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
4229//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
4230//ZZ res, argL, argR, 0, False));
4231//ZZ return res;
4232//ZZ } else {
4233//ZZ HReg res = newVRegD(env);
4234//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4235//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4236//ZZ res, arg, 0, False));
4237//ZZ return res;
4238//ZZ }
4239//ZZ }
4240//ZZ case Iop_Rsqrte32Fx2: {
4241//ZZ HReg res = newVRegD(env);
4242//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4243//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4244//ZZ res, arg, 0, False));
4245//ZZ return res;
4246//ZZ }
4247//ZZ case Iop_Rsqrte32x2: {
4248//ZZ HReg res = newVRegD(env);
4249//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4250//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4251//ZZ res, arg, 0, False));
4252//ZZ return res;
4253//ZZ }
4254//ZZ case Iop_Neg32Fx2: {
4255//ZZ HReg res = newVRegD(env);
4256//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4257//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4258//ZZ res, arg, 0, False));
4259//ZZ return res;
4260//ZZ }
4261//ZZ default:
4262//ZZ break;
4263//ZZ }
4264//ZZ } /* if (e->tag == Iex_Unop) */
4265//ZZ
4266//ZZ if (e->tag == Iex_Triop) {
4267//ZZ IRTriop *triop = e->Iex.Triop.details;
4268//ZZ
4269//ZZ switch (triop->op) {
4270//ZZ case Iop_Extract64: {
4271//ZZ HReg res = newVRegD(env);
4272//ZZ HReg argL = iselNeon64Expr(env, triop->arg1);
4273//ZZ HReg argR = iselNeon64Expr(env, triop->arg2);
4274//ZZ UInt imm4;
4275//ZZ if (triop->arg3->tag != Iex_Const ||
4276//ZZ typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
4277//ZZ vpanic("ARM target supports Iop_Extract64 with constant "
4278//ZZ "third argument less than 16 only\n");
4279//ZZ }
4280//ZZ imm4 = triop->arg3->Iex.Const.con->Ico.U8;
4281//ZZ if (imm4 >= 8) {
4282//ZZ vpanic("ARM target supports Iop_Extract64 with constant "
4283//ZZ "third argument less than 16 only\n");
4284//ZZ }
4285//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
4286//ZZ res, argL, argR, imm4, False));
4287//ZZ return res;
4288//ZZ }
4289//ZZ case Iop_SetElem8x8:
4290//ZZ case Iop_SetElem16x4:
4291//ZZ case Iop_SetElem32x2: {
4292//ZZ HReg res = newVRegD(env);
4293//ZZ HReg dreg = iselNeon64Expr(env, triop->arg1);
4294//ZZ HReg arg = iselIntExpr_R(env, triop->arg3);
4295//ZZ UInt index, size;
4296//ZZ if (triop->arg2->tag != Iex_Const ||
4297//ZZ typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) {
4298//ZZ vpanic("ARM target supports SetElem with constant "
4299//ZZ "second argument only\n");
4300//ZZ }
4301//ZZ index = triop->arg2->Iex.Const.con->Ico.U8;
4302//ZZ switch (triop->op) {
4303//ZZ case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
4304//ZZ case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
4305//ZZ case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
4306//ZZ default: vassert(0);
4307//ZZ }
4308//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
4309//ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
4310//ZZ mkARMNRS(ARMNRS_Scalar, res, index),
4311//ZZ mkARMNRS(ARMNRS_Reg, arg, 0),
4312//ZZ size, False));
4313//ZZ return res;
4314//ZZ }
4315//ZZ default:
4316//ZZ break;
4317//ZZ }
4318//ZZ }
4319//ZZ
4320//ZZ /* --------- MULTIPLEX --------- */
4321//ZZ if (e->tag == Iex_ITE) { // VFD
4322//ZZ HReg rLo, rHi;
4323//ZZ HReg res = newVRegD(env);
4324//ZZ iselInt64Expr(&rHi, &rLo, env, e);
4325//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
4326//ZZ return res;
4327//ZZ }
4328//ZZ
4329//ZZ ppIRExpr(e);
4330//ZZ vpanic("iselNeon64Expr");
4331//ZZ }
4332
4333
4334/*---------------------------------------------------------*/
4335/*--- ISEL: Vector (NEON) expressions (128 bit) ---*/
4336/*---------------------------------------------------------*/
4337
4338static HReg iselV128Expr ( ISelEnv* env, IRExpr* e )
4339{
4340 HReg r = iselV128Expr_wrk( env, e );
4341 vassert(hregClass(r) == HRcVec128);
4342 vassert(hregIsVirtual(r));
4343 return r;
4344}
4345
4346/* DO NOT CALL THIS DIRECTLY */
4347static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
4348{
4349 IRType ty = typeOfIRExpr(env->type_env, e);
4350 vassert(e);
4351 vassert(ty == Ity_V128);
4352
4353 if (e->tag == Iex_RdTmp) {
4354 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
4355 }
4356
4357 if (e->tag == Iex_Const) {
4358 /* Only a very limited range of constants is handled. */
4359 vassert(e->Iex.Const.con->tag == Ico_V128);
4360 UShort con = e->Iex.Const.con->Ico.V128;
4361 if (con == 0x0000) {
4362 HReg res = newVRegV(env);
4363 addInstr(env, ARM64Instr_VImmQ(res, con));
4364 return res;
4365 }
4366 /* Unhandled */
4367 goto v128_expr_bad;
4368 }
4369
4370 if (e->tag == Iex_Load) {
4371 HReg res = newVRegV(env);
4372 HReg rN = iselIntExpr_R(env, e->Iex.Load.addr);
4373 vassert(ty == Ity_V128);
4374 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN));
4375 return res;
4376 }
4377
4378 if (e->tag == Iex_Get) {
4379 UInt offs = (UInt)e->Iex.Get.offset;
4380 if (offs < (1<<12)) {
4381 HReg addr = mk_baseblock_128bit_access_addr(env, offs);
4382 HReg res = newVRegV(env);
4383 vassert(ty == Ity_V128);
4384 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr));
4385 return res;
4386 }
4387 goto v128_expr_bad;
4388 }
4389
sewardjecde6972014-02-05 11:01:19 +00004390 if (e->tag == Iex_Unop) {
4391
4392 /* Iop_ZeroHIXXofV128 cases */
4393 UShort imm16 = 0;
4394 switch (e->Iex.Unop.op) {
sewardj9b1cf5e2014-03-01 11:16:57 +00004395 case Iop_ZeroHI64ofV128: imm16 = 0x00FF; break;
sewardjecde6972014-02-05 11:01:19 +00004396 case Iop_ZeroHI96ofV128: imm16 = 0x000F; break;
4397 case Iop_ZeroHI112ofV128: imm16 = 0x0003; break;
sewardjfab09142014-02-10 10:28:13 +00004398 case Iop_ZeroHI120ofV128: imm16 = 0x0001; break;
sewardjecde6972014-02-05 11:01:19 +00004399 default: break;
4400 }
4401 if (imm16 != 0) {
4402 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
4403 HReg imm = newVRegV(env);
4404 HReg res = newVRegV(env);
4405 addInstr(env, ARM64Instr_VImmQ(imm, imm16));
4406 addInstr(env, ARM64Instr_VBinV(ARM64vecb_AND, res, src, imm));
4407 return res;
4408 }
4409
4410 /* Other cases */
4411 switch (e->Iex.Unop.op) {
sewardje520bb32014-02-17 11:00:53 +00004412 case Iop_NotV128:
sewardj2b6fd5e2014-06-19 14:21:37 +00004413 case Iop_Abs64Fx2: case Iop_Abs32Fx4:
4414 case Iop_Neg64Fx2: case Iop_Neg32Fx4:
4415 case Iop_Abs64x2: case Iop_Abs32x4:
4416 case Iop_Abs16x8: case Iop_Abs8x16:
4417 case Iop_Cls32Sx4: case Iop_Cls16Sx8: case Iop_Cls8Sx16:
4418 case Iop_Clz32Sx4: case Iop_Clz16Sx8: case Iop_Clz8Sx16:
4419 case Iop_Cnt8x16:
sewardj25523c42014-06-15 19:36:29 +00004420 {
sewardjfab09142014-02-10 10:28:13 +00004421 HReg res = newVRegV(env);
4422 HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
4423 ARM64VecUnaryOp op = ARM64vecu_INVALID;
4424 switch (e->Iex.Unop.op) {
sewardje520bb32014-02-17 11:00:53 +00004425 case Iop_NotV128: op = ARM64vecu_NOT; break;
4426 case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break;
sewardj2bd1ffe2014-03-27 18:59:00 +00004427 case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break;
sewardjfab09142014-02-10 10:28:13 +00004428 case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break;
sewardj950ca7a2014-04-03 23:03:32 +00004429 case Iop_Neg32Fx4: op = ARM64vecu_FNEG32x4; break;
sewardj25523c42014-06-15 19:36:29 +00004430 case Iop_Abs64x2: op = ARM64vecu_ABS64x2; break;
4431 case Iop_Abs32x4: op = ARM64vecu_ABS32x4; break;
4432 case Iop_Abs16x8: op = ARM64vecu_ABS16x8; break;
4433 case Iop_Abs8x16: op = ARM64vecu_ABS8x16; break;
sewardj2b6fd5e2014-06-19 14:21:37 +00004434 case Iop_Cls32Sx4: op = ARM64vecu_CLS32x4; break;
4435 case Iop_Cls16Sx8: op = ARM64vecu_CLS16x8; break;
4436 case Iop_Cls8Sx16: op = ARM64vecu_CLS8x16; break;
4437 case Iop_Clz32Sx4: op = ARM64vecu_CLZ32x4; break;
4438 case Iop_Clz16Sx8: op = ARM64vecu_CLZ16x8; break;
4439 case Iop_Clz8Sx16: op = ARM64vecu_CLZ8x16; break;
4440 case Iop_Cnt8x16: op = ARM64vecu_CNT8x16; break;
sewardjfab09142014-02-10 10:28:13 +00004441 default: vassert(0);
4442 }
4443 addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
4444 return res;
4445 }
sewardj505a27d2014-03-10 10:40:48 +00004446 case Iop_CmpNEZ8x16:
4447 case Iop_CmpNEZ16x8:
4448 case Iop_CmpNEZ32x4:
sewardj99c1f812014-03-09 09:41:56 +00004449 case Iop_CmpNEZ64x2: {
4450 HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
4451 HReg zero = newVRegV(env);
4452 HReg res = newVRegV(env);
4453 ARM64VecBinOp cmp = ARM64vecb_INVALID;
4454 switch (e->Iex.Unop.op) {
4455 case Iop_CmpNEZ64x2: cmp = ARM64vecb_CMEQ64x2; break;
sewardj505a27d2014-03-10 10:40:48 +00004456 case Iop_CmpNEZ32x4: cmp = ARM64vecb_CMEQ32x4; break;
4457 case Iop_CmpNEZ16x8: cmp = ARM64vecb_CMEQ16x8; break;
4458 case Iop_CmpNEZ8x16: cmp = ARM64vecb_CMEQ8x16; break;
sewardj99c1f812014-03-09 09:41:56 +00004459 default: vassert(0);
4460 }
4461 // This is pretty feeble. Better: use CMP against zero
4462 // and avoid the extra instruction and extra register.
4463 addInstr(env, ARM64Instr_VImmQ(zero, 0x0000));
4464 addInstr(env, ARM64Instr_VBinV(cmp, res, arg, zero));
4465 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
4466 return res;
4467 }
4468
sewardjbbcf1882014-01-12 12:49:10 +00004469//ZZ case Iop_NotV128: {
4470//ZZ DECLARE_PATTERN(p_veqz_8x16);
4471//ZZ DECLARE_PATTERN(p_veqz_16x8);
4472//ZZ DECLARE_PATTERN(p_veqz_32x4);
4473//ZZ DECLARE_PATTERN(p_vcge_8sx16);
4474//ZZ DECLARE_PATTERN(p_vcge_16sx8);
4475//ZZ DECLARE_PATTERN(p_vcge_32sx4);
4476//ZZ DECLARE_PATTERN(p_vcge_8ux16);
4477//ZZ DECLARE_PATTERN(p_vcge_16ux8);
4478//ZZ DECLARE_PATTERN(p_vcge_32ux4);
4479//ZZ DEFINE_PATTERN(p_veqz_8x16,
4480//ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
4481//ZZ DEFINE_PATTERN(p_veqz_16x8,
4482//ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
4483//ZZ DEFINE_PATTERN(p_veqz_32x4,
4484//ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
4485//ZZ DEFINE_PATTERN(p_vcge_8sx16,
4486//ZZ unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
4487//ZZ DEFINE_PATTERN(p_vcge_16sx8,
4488//ZZ unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
4489//ZZ DEFINE_PATTERN(p_vcge_32sx4,
4490//ZZ unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
4491//ZZ DEFINE_PATTERN(p_vcge_8ux16,
4492//ZZ unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
4493//ZZ DEFINE_PATTERN(p_vcge_16ux8,
4494//ZZ unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
4495//ZZ DEFINE_PATTERN(p_vcge_32ux4,
4496//ZZ unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
4497//ZZ if (matchIRExpr(&mi, p_veqz_8x16, e)) {
4498//ZZ HReg res = newVRegV(env);
4499//ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]);
4500//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
4501//ZZ return res;
4502//ZZ } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
4503//ZZ HReg res = newVRegV(env);
4504//ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]);
4505//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
4506//ZZ return res;
4507//ZZ } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
4508//ZZ HReg res = newVRegV(env);
4509//ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]);
4510//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
4511//ZZ return res;
4512//ZZ } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
4513//ZZ HReg res = newVRegV(env);
4514//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4515//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4516//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4517//ZZ res, argL, argR, 0, True));
4518//ZZ return res;
4519//ZZ } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
4520//ZZ HReg res = newVRegV(env);
4521//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4522//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4523//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4524//ZZ res, argL, argR, 1, True));
4525//ZZ return res;
4526//ZZ } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
4527//ZZ HReg res = newVRegV(env);
4528//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4529//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4530//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4531//ZZ res, argL, argR, 2, True));
4532//ZZ return res;
4533//ZZ } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
4534//ZZ HReg res = newVRegV(env);
4535//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4536//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4537//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4538//ZZ res, argL, argR, 0, True));
4539//ZZ return res;
4540//ZZ } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
4541//ZZ HReg res = newVRegV(env);
4542//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4543//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4544//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4545//ZZ res, argL, argR, 1, True));
4546//ZZ return res;
4547//ZZ } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
4548//ZZ HReg res = newVRegV(env);
4549//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4550//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4551//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4552//ZZ res, argL, argR, 2, True));
4553//ZZ return res;
4554//ZZ } else {
4555//ZZ HReg res = newVRegV(env);
4556//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4557//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
4558//ZZ return res;
4559//ZZ }
4560//ZZ }
4561//ZZ case Iop_Dup8x16:
4562//ZZ case Iop_Dup16x8:
4563//ZZ case Iop_Dup32x4: {
4564//ZZ HReg res, arg;
4565//ZZ UInt size;
4566//ZZ DECLARE_PATTERN(p_vdup_8x16);
4567//ZZ DECLARE_PATTERN(p_vdup_16x8);
4568//ZZ DECLARE_PATTERN(p_vdup_32x4);
4569//ZZ DEFINE_PATTERN(p_vdup_8x16,
4570//ZZ unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
4571//ZZ DEFINE_PATTERN(p_vdup_16x8,
4572//ZZ unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
4573//ZZ DEFINE_PATTERN(p_vdup_32x4,
4574//ZZ unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
4575//ZZ if (matchIRExpr(&mi, p_vdup_8x16, e)) {
4576//ZZ UInt index;
4577//ZZ UInt imm4;
4578//ZZ if (mi.bindee[1]->tag == Iex_Const &&
4579//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4580//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4581//ZZ imm4 = (index << 1) + 1;
4582//ZZ if (index < 8) {
4583//ZZ res = newVRegV(env);
4584//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
4585//ZZ addInstr(env, ARMInstr_NUnaryS(
4586//ZZ ARMneon_VDUP,
4587//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
4588//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
4589//ZZ imm4, True
4590//ZZ ));
4591//ZZ return res;
4592//ZZ }
4593//ZZ }
4594//ZZ } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
4595//ZZ UInt index;
4596//ZZ UInt imm4;
4597//ZZ if (mi.bindee[1]->tag == Iex_Const &&
4598//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4599//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4600//ZZ imm4 = (index << 2) + 2;
4601//ZZ if (index < 4) {
4602//ZZ res = newVRegV(env);
4603//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
4604//ZZ addInstr(env, ARMInstr_NUnaryS(
4605//ZZ ARMneon_VDUP,
4606//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
4607//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
4608//ZZ imm4, True
4609//ZZ ));
4610//ZZ return res;
4611//ZZ }
4612//ZZ }
4613//ZZ } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
4614//ZZ UInt index;
4615//ZZ UInt imm4;
4616//ZZ if (mi.bindee[1]->tag == Iex_Const &&
4617//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4618//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4619//ZZ imm4 = (index << 3) + 4;
4620//ZZ if (index < 2) {
4621//ZZ res = newVRegV(env);
4622//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
4623//ZZ addInstr(env, ARMInstr_NUnaryS(
4624//ZZ ARMneon_VDUP,
4625//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
4626//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
4627//ZZ imm4, True
4628//ZZ ));
4629//ZZ return res;
4630//ZZ }
4631//ZZ }
4632//ZZ }
4633//ZZ arg = iselIntExpr_R(env, e->Iex.Unop.arg);
4634//ZZ res = newVRegV(env);
4635//ZZ switch (e->Iex.Unop.op) {
4636//ZZ case Iop_Dup8x16: size = 0; break;
4637//ZZ case Iop_Dup16x8: size = 1; break;
4638//ZZ case Iop_Dup32x4: size = 2; break;
4639//ZZ default: vassert(0);
4640//ZZ }
4641//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
4642//ZZ return res;
4643//ZZ }
4644//ZZ case Iop_Abs8x16:
4645//ZZ case Iop_Abs16x8:
4646//ZZ case Iop_Abs32x4: {
4647//ZZ HReg res = newVRegV(env);
4648//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4649//ZZ UInt size = 0;
4650//ZZ switch(e->Iex.Binop.op) {
4651//ZZ case Iop_Abs8x16: size = 0; break;
4652//ZZ case Iop_Abs16x8: size = 1; break;
4653//ZZ case Iop_Abs32x4: size = 2; break;
4654//ZZ default: vassert(0);
4655//ZZ }
4656//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
4657//ZZ return res;
4658//ZZ }
4659//ZZ case Iop_Reverse64_8x16:
4660//ZZ case Iop_Reverse64_16x8:
4661//ZZ case Iop_Reverse64_32x4: {
4662//ZZ HReg res = newVRegV(env);
4663//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4664//ZZ UInt size = 0;
4665//ZZ switch(e->Iex.Binop.op) {
4666//ZZ case Iop_Reverse64_8x16: size = 0; break;
4667//ZZ case Iop_Reverse64_16x8: size = 1; break;
4668//ZZ case Iop_Reverse64_32x4: size = 2; break;
4669//ZZ default: vassert(0);
4670//ZZ }
4671//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
4672//ZZ res, arg, size, True));
4673//ZZ return res;
4674//ZZ }
4675//ZZ case Iop_Reverse32_8x16:
4676//ZZ case Iop_Reverse32_16x8: {
4677//ZZ HReg res = newVRegV(env);
4678//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4679//ZZ UInt size = 0;
4680//ZZ switch(e->Iex.Binop.op) {
4681//ZZ case Iop_Reverse32_8x16: size = 0; break;
4682//ZZ case Iop_Reverse32_16x8: size = 1; break;
4683//ZZ default: vassert(0);
4684//ZZ }
4685//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
4686//ZZ res, arg, size, True));
4687//ZZ return res;
4688//ZZ }
4689//ZZ case Iop_Reverse16_8x16: {
4690//ZZ HReg res = newVRegV(env);
4691//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4692//ZZ UInt size = 0;
4693//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
4694//ZZ res, arg, size, True));
4695//ZZ return res;
4696//ZZ }
4697//ZZ case Iop_CmpNEZ64x2: {
4698//ZZ HReg x_lsh = newVRegV(env);
4699//ZZ HReg x_rsh = newVRegV(env);
4700//ZZ HReg lsh_amt = newVRegV(env);
4701//ZZ HReg rsh_amt = newVRegV(env);
4702//ZZ HReg zero = newVRegV(env);
4703//ZZ HReg tmp = newVRegV(env);
4704//ZZ HReg tmp2 = newVRegV(env);
4705//ZZ HReg res = newVRegV(env);
4706//ZZ HReg x = newVRegV(env);
4707//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4708//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
4709//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
4710//ZZ addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
4711//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
4712//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4713//ZZ rsh_amt, zero, lsh_amt, 2, True));
4714//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4715//ZZ x_lsh, x, lsh_amt, 3, True));
4716//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4717//ZZ x_rsh, x, rsh_amt, 3, True));
4718//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4719//ZZ tmp, x_lsh, x_rsh, 0, True));
4720//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4721//ZZ res, tmp, x, 0, True));
4722//ZZ return res;
4723//ZZ }
sewardjbbcf1882014-01-12 12:49:10 +00004724//ZZ case Iop_Widen8Uto16x8:
4725//ZZ case Iop_Widen16Uto32x4:
4726//ZZ case Iop_Widen32Uto64x2: {
4727//ZZ HReg res = newVRegV(env);
4728//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4729//ZZ UInt size;
4730//ZZ switch (e->Iex.Unop.op) {
4731//ZZ case Iop_Widen8Uto16x8: size = 0; break;
4732//ZZ case Iop_Widen16Uto32x4: size = 1; break;
4733//ZZ case Iop_Widen32Uto64x2: size = 2; break;
4734//ZZ default: vassert(0);
4735//ZZ }
4736//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
4737//ZZ res, arg, size, True));
4738//ZZ return res;
4739//ZZ }
4740//ZZ case Iop_Widen8Sto16x8:
4741//ZZ case Iop_Widen16Sto32x4:
4742//ZZ case Iop_Widen32Sto64x2: {
4743//ZZ HReg res = newVRegV(env);
4744//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4745//ZZ UInt size;
4746//ZZ switch (e->Iex.Unop.op) {
4747//ZZ case Iop_Widen8Sto16x8: size = 0; break;
4748//ZZ case Iop_Widen16Sto32x4: size = 1; break;
4749//ZZ case Iop_Widen32Sto64x2: size = 2; break;
4750//ZZ default: vassert(0);
4751//ZZ }
4752//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4753//ZZ res, arg, size, True));
4754//ZZ return res;
4755//ZZ }
4756//ZZ case Iop_PwAddL8Sx16:
4757//ZZ case Iop_PwAddL16Sx8:
4758//ZZ case Iop_PwAddL32Sx4: {
4759//ZZ HReg res = newVRegV(env);
4760//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4761//ZZ UInt size = 0;
4762//ZZ switch(e->Iex.Binop.op) {
4763//ZZ case Iop_PwAddL8Sx16: size = 0; break;
4764//ZZ case Iop_PwAddL16Sx8: size = 1; break;
4765//ZZ case Iop_PwAddL32Sx4: size = 2; break;
4766//ZZ default: vassert(0);
4767//ZZ }
4768//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4769//ZZ res, arg, size, True));
4770//ZZ return res;
4771//ZZ }
4772//ZZ case Iop_PwAddL8Ux16:
4773//ZZ case Iop_PwAddL16Ux8:
4774//ZZ case Iop_PwAddL32Ux4: {
4775//ZZ HReg res = newVRegV(env);
4776//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4777//ZZ UInt size = 0;
4778//ZZ switch(e->Iex.Binop.op) {
4779//ZZ case Iop_PwAddL8Ux16: size = 0; break;
4780//ZZ case Iop_PwAddL16Ux8: size = 1; break;
4781//ZZ case Iop_PwAddL32Ux4: size = 2; break;
4782//ZZ default: vassert(0);
4783//ZZ }
4784//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4785//ZZ res, arg, size, True));
4786//ZZ return res;
4787//ZZ }
4788//ZZ case Iop_Cnt8x16: {
4789//ZZ HReg res = newVRegV(env);
4790//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4791//ZZ UInt size = 0;
4792//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4793//ZZ return res;
4794//ZZ }
4795//ZZ case Iop_Clz8Sx16:
4796//ZZ case Iop_Clz16Sx8:
4797//ZZ case Iop_Clz32Sx4: {
4798//ZZ HReg res = newVRegV(env);
4799//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4800//ZZ UInt size = 0;
4801//ZZ switch(e->Iex.Binop.op) {
4802//ZZ case Iop_Clz8Sx16: size = 0; break;
4803//ZZ case Iop_Clz16Sx8: size = 1; break;
4804//ZZ case Iop_Clz32Sx4: size = 2; break;
4805//ZZ default: vassert(0);
4806//ZZ }
4807//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4808//ZZ return res;
4809//ZZ }
4810//ZZ case Iop_Cls8Sx16:
4811//ZZ case Iop_Cls16Sx8:
4812//ZZ case Iop_Cls32Sx4: {
4813//ZZ HReg res = newVRegV(env);
4814//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4815//ZZ UInt size = 0;
4816//ZZ switch(e->Iex.Binop.op) {
4817//ZZ case Iop_Cls8Sx16: size = 0; break;
4818//ZZ case Iop_Cls16Sx8: size = 1; break;
4819//ZZ case Iop_Cls32Sx4: size = 2; break;
4820//ZZ default: vassert(0);
4821//ZZ }
4822//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4823//ZZ return res;
4824//ZZ }
4825//ZZ case Iop_FtoI32Sx4_RZ: {
4826//ZZ HReg res = newVRegV(env);
4827//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4828//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4829//ZZ res, arg, 2, True));
4830//ZZ return res;
4831//ZZ }
4832//ZZ case Iop_FtoI32Ux4_RZ: {
4833//ZZ HReg res = newVRegV(env);
4834//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4835//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4836//ZZ res, arg, 2, True));
4837//ZZ return res;
4838//ZZ }
4839//ZZ case Iop_I32StoFx4: {
4840//ZZ HReg res = newVRegV(env);
4841//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4842//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4843//ZZ res, arg, 2, True));
4844//ZZ return res;
4845//ZZ }
4846//ZZ case Iop_I32UtoFx4: {
4847//ZZ HReg res = newVRegV(env);
4848//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4849//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4850//ZZ res, arg, 2, True));
4851//ZZ return res;
4852//ZZ }
4853//ZZ case Iop_F16toF32x4: {
4854//ZZ HReg res = newVRegV(env);
4855//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4856//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4857//ZZ res, arg, 2, True));
4858//ZZ return res;
4859//ZZ }
4860//ZZ case Iop_Recip32Fx4: {
4861//ZZ HReg res = newVRegV(env);
4862//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4863//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4864//ZZ res, argL, 0, True));
4865//ZZ return res;
4866//ZZ }
4867//ZZ case Iop_Recip32x4: {
4868//ZZ HReg res = newVRegV(env);
4869//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4870//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4871//ZZ res, argL, 0, True));
4872//ZZ return res;
4873//ZZ }
sewardjbbcf1882014-01-12 12:49:10 +00004874//ZZ case Iop_Rsqrte32Fx4: {
4875//ZZ HReg res = newVRegV(env);
4876//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4877//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4878//ZZ res, argL, 0, True));
4879//ZZ return res;
4880//ZZ }
4881//ZZ case Iop_Rsqrte32x4: {
4882//ZZ HReg res = newVRegV(env);
4883//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4884//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4885//ZZ res, argL, 0, True));
4886//ZZ return res;
4887//ZZ }
sewardjecde6972014-02-05 11:01:19 +00004888 /* ... */
4889 default:
4890 break;
4891 } /* switch on the unop */
4892 } /* if (e->tag == Iex_Unop) */
sewardjbbcf1882014-01-12 12:49:10 +00004893
4894 if (e->tag == Iex_Binop) {
4895 switch (e->Iex.Binop.op) {
4896 case Iop_64HLtoV128: {
4897 HReg res = newVRegV(env);
4898 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
4899 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4900 addInstr(env, ARM64Instr_VQfromXX(res, argL, argR));
4901 return res;
4902 }
4903//ZZ case Iop_AndV128: {
4904//ZZ HReg res = newVRegV(env);
4905//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4906//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4907//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4908//ZZ res, argL, argR, 4, True));
4909//ZZ return res;
4910//ZZ }
4911//ZZ case Iop_OrV128: {
4912//ZZ HReg res = newVRegV(env);
4913//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4914//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4915//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4916//ZZ res, argL, argR, 4, True));
4917//ZZ return res;
4918//ZZ }
4919//ZZ case Iop_XorV128: {
4920//ZZ HReg res = newVRegV(env);
4921//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4922//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4923//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4924//ZZ res, argL, argR, 4, True));
4925//ZZ return res;
4926//ZZ }
4927//ZZ case Iop_Add8x16:
4928//ZZ case Iop_Add16x8:
4929//ZZ case Iop_Add32x4:
sewardjfab09142014-02-10 10:28:13 +00004930 case Iop_AndV128:
4931 case Iop_OrV128:
sewardje520bb32014-02-17 11:00:53 +00004932 case Iop_XorV128:
sewardjecde6972014-02-05 11:01:19 +00004933 case Iop_Max32Ux4:
4934 case Iop_Max16Ux8:
sewardjfab09142014-02-10 10:28:13 +00004935 case Iop_Max8Ux16:
sewardjecde6972014-02-05 11:01:19 +00004936 case Iop_Min32Ux4:
4937 case Iop_Min16Ux8:
sewardjfab09142014-02-10 10:28:13 +00004938 case Iop_Min8Ux16:
sewardjf5b08912014-02-06 12:57:58 +00004939 case Iop_Max32Sx4:
4940 case Iop_Max16Sx8:
sewardj9b1cf5e2014-03-01 11:16:57 +00004941 case Iop_Max8Sx16:
sewardjf5b08912014-02-06 12:57:58 +00004942 case Iop_Min32Sx4:
4943 case Iop_Min16Sx8:
sewardj9b1cf5e2014-03-01 11:16:57 +00004944 case Iop_Min8Sx16:
sewardj606c4ba2014-01-26 19:11:14 +00004945 case Iop_Add64x2:
sewardjf5b08912014-02-06 12:57:58 +00004946 case Iop_Add32x4:
4947 case Iop_Add16x8:
sewardj92d0ae32014-04-03 13:48:54 +00004948 case Iop_Add8x16:
sewardj606c4ba2014-01-26 19:11:14 +00004949 case Iop_Sub64x2:
4950 case Iop_Sub32x4:
sewardjf5b08912014-02-06 12:57:58 +00004951 case Iop_Sub16x8:
sewardj92d0ae32014-04-03 13:48:54 +00004952 case Iop_Sub8x16:
sewardjf5b08912014-02-06 12:57:58 +00004953 case Iop_Mul32x4:
sewardje520bb32014-02-17 11:00:53 +00004954 case Iop_Mul16x8:
sewardj93013432014-04-27 12:02:12 +00004955 case Iop_Mul8x16:
sewardj2bd1ffe2014-03-27 18:59:00 +00004956 case Iop_CmpEQ64x2:
sewardj93013432014-04-27 12:02:12 +00004957 case Iop_CmpEQ32x4:
4958 case Iop_CmpEQ16x8:
4959 case Iop_CmpEQ8x16:
4960 case Iop_CmpGT64Ux2:
4961 case Iop_CmpGT32Ux4:
4962 case Iop_CmpGT16Ux8:
4963 case Iop_CmpGT8Ux16:
4964 case Iop_CmpGT64Sx2:
4965 case Iop_CmpGT32Sx4:
4966 case Iop_CmpGT16Sx8:
4967 case Iop_CmpGT8Sx16:
sewardj2bd1ffe2014-03-27 18:59:00 +00004968 case Iop_CmpEQ64Fx2:
4969 case Iop_CmpEQ32Fx4:
4970 case Iop_CmpLE64Fx2:
4971 case Iop_CmpLE32Fx4:
4972 case Iop_CmpLT64Fx2:
4973 case Iop_CmpLT32Fx4:
sewardj92d0ae32014-04-03 13:48:54 +00004974 case Iop_Perm8x16:
sewardjd96daf62014-06-15 08:17:35 +00004975 case Iop_InterleaveLO64x2:
4976 case Iop_CatEvenLanes32x4:
4977 case Iop_CatEvenLanes16x8:
4978 case Iop_CatEvenLanes8x16:
4979 case Iop_InterleaveHI64x2:
4980 case Iop_CatOddLanes32x4:
4981 case Iop_CatOddLanes16x8:
4982 case Iop_CatOddLanes8x16:
4983 case Iop_InterleaveHI32x4:
4984 case Iop_InterleaveHI16x8:
4985 case Iop_InterleaveHI8x16:
4986 case Iop_InterleaveLO32x4:
4987 case Iop_InterleaveLO16x8:
4988 case Iop_InterleaveLO8x16:
sewardj168c8bd2014-06-25 13:05:23 +00004989 case Iop_PolynomialMul8x16:
sewardj2bd1ffe2014-03-27 18:59:00 +00004990 {
sewardj606c4ba2014-01-26 19:11:14 +00004991 HReg res = newVRegV(env);
4992 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
4993 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
sewardj2bd1ffe2014-03-27 18:59:00 +00004994 Bool sw = False;
sewardj606c4ba2014-01-26 19:11:14 +00004995 ARM64VecBinOp op = ARM64vecb_INVALID;
4996 switch (e->Iex.Binop.op) {
sewardj2bd1ffe2014-03-27 18:59:00 +00004997 case Iop_AndV128: op = ARM64vecb_AND; break;
4998 case Iop_OrV128: op = ARM64vecb_ORR; break;
4999 case Iop_XorV128: op = ARM64vecb_XOR; break;
5000 case Iop_Max32Ux4: op = ARM64vecb_UMAX32x4; break;
5001 case Iop_Max16Ux8: op = ARM64vecb_UMAX16x8; break;
5002 case Iop_Max8Ux16: op = ARM64vecb_UMAX8x16; break;
5003 case Iop_Min32Ux4: op = ARM64vecb_UMIN32x4; break;
5004 case Iop_Min16Ux8: op = ARM64vecb_UMIN16x8; break;
5005 case Iop_Min8Ux16: op = ARM64vecb_UMIN8x16; break;
5006 case Iop_Max32Sx4: op = ARM64vecb_SMAX32x4; break;
5007 case Iop_Max16Sx8: op = ARM64vecb_SMAX16x8; break;
5008 case Iop_Max8Sx16: op = ARM64vecb_SMAX8x16; break;
5009 case Iop_Min32Sx4: op = ARM64vecb_SMIN32x4; break;
5010 case Iop_Min16Sx8: op = ARM64vecb_SMIN16x8; break;
5011 case Iop_Min8Sx16: op = ARM64vecb_SMIN8x16; break;
5012 case Iop_Add64x2: op = ARM64vecb_ADD64x2; break;
5013 case Iop_Add32x4: op = ARM64vecb_ADD32x4; break;
5014 case Iop_Add16x8: op = ARM64vecb_ADD16x8; break;
sewardj92d0ae32014-04-03 13:48:54 +00005015 case Iop_Add8x16: op = ARM64vecb_ADD8x16; break;
sewardj2bd1ffe2014-03-27 18:59:00 +00005016 case Iop_Sub64x2: op = ARM64vecb_SUB64x2; break;
5017 case Iop_Sub32x4: op = ARM64vecb_SUB32x4; break;
5018 case Iop_Sub16x8: op = ARM64vecb_SUB16x8; break;
sewardj92d0ae32014-04-03 13:48:54 +00005019 case Iop_Sub8x16: op = ARM64vecb_SUB8x16; break;
sewardj2bd1ffe2014-03-27 18:59:00 +00005020 case Iop_Mul32x4: op = ARM64vecb_MUL32x4; break;
5021 case Iop_Mul16x8: op = ARM64vecb_MUL16x8; break;
sewardj93013432014-04-27 12:02:12 +00005022 case Iop_Mul8x16: op = ARM64vecb_MUL8x16; break;
sewardj2bd1ffe2014-03-27 18:59:00 +00005023 case Iop_CmpEQ64x2: op = ARM64vecb_CMEQ64x2; break;
sewardj93013432014-04-27 12:02:12 +00005024 case Iop_CmpEQ32x4: op = ARM64vecb_CMEQ32x4; break;
5025 case Iop_CmpEQ16x8: op = ARM64vecb_CMEQ16x8; break;
5026 case Iop_CmpEQ8x16: op = ARM64vecb_CMEQ8x16; break;
5027 case Iop_CmpGT64Ux2: op = ARM64vecb_CMHI64x2; break;
5028 case Iop_CmpGT32Ux4: op = ARM64vecb_CMHI32x4; break;
5029 case Iop_CmpGT16Ux8: op = ARM64vecb_CMHI16x8; break;
5030 case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break;
5031 case Iop_CmpGT64Sx2: op = ARM64vecb_CMGT64x2; break;
5032 case Iop_CmpGT32Sx4: op = ARM64vecb_CMGT32x4; break;
5033 case Iop_CmpGT16Sx8: op = ARM64vecb_CMGT16x8; break;
5034 case Iop_CmpGT8Sx16: op = ARM64vecb_CMGT8x16; break;
sewardj2bd1ffe2014-03-27 18:59:00 +00005035 case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break;
5036 case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break;
5037 case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break;
5038 case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break;
5039 case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break;
5040 case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break;
sewardj92d0ae32014-04-03 13:48:54 +00005041 case Iop_Perm8x16: op = ARM64vecb_TBL1; break;
sewardjd96daf62014-06-15 08:17:35 +00005042 case Iop_InterleaveLO64x2: op = ARM64vecb_UZP164x2; sw = True;
5043 break;
5044 case Iop_CatEvenLanes32x4: op = ARM64vecb_UZP132x4; sw = True;
5045 break;
5046 case Iop_CatEvenLanes16x8: op = ARM64vecb_UZP116x8; sw = True;
5047 break;
5048 case Iop_CatEvenLanes8x16: op = ARM64vecb_UZP18x16; sw = True;
5049 break;
5050 case Iop_InterleaveHI64x2: op = ARM64vecb_UZP264x2; sw = True;
5051 break;
5052 case Iop_CatOddLanes32x4: op = ARM64vecb_UZP232x4; sw = True;
5053 break;
5054 case Iop_CatOddLanes16x8: op = ARM64vecb_UZP216x8; sw = True;
5055 break;
5056 case Iop_CatOddLanes8x16: op = ARM64vecb_UZP28x16; sw = True;
5057 break;
5058 case Iop_InterleaveHI32x4: op = ARM64vecb_ZIP232x4; sw = True;
5059 break;
5060 case Iop_InterleaveHI16x8: op = ARM64vecb_ZIP216x8; sw = True;
5061 break;
5062 case Iop_InterleaveHI8x16: op = ARM64vecb_ZIP28x16; sw = True;
5063 break;
5064 case Iop_InterleaveLO32x4: op = ARM64vecb_ZIP132x4; sw = True;
5065 break;
5066 case Iop_InterleaveLO16x8: op = ARM64vecb_ZIP116x8; sw = True;
5067 break;
5068 case Iop_InterleaveLO8x16: op = ARM64vecb_ZIP18x16; sw = True;
5069 break;
sewardj168c8bd2014-06-25 13:05:23 +00005070 case Iop_PolynomialMul8x16: op = ARM64vecb_PMUL8x16; break;
sewardj606c4ba2014-01-26 19:11:14 +00005071 default: vassert(0);
5072 }
sewardj2bd1ffe2014-03-27 18:59:00 +00005073 if (sw) {
5074 addInstr(env, ARM64Instr_VBinV(op, res, argR, argL));
5075 } else {
5076 addInstr(env, ARM64Instr_VBinV(op, res, argL, argR));
5077 }
sewardj606c4ba2014-01-26 19:11:14 +00005078 return res;
5079 }
sewardjbbcf1882014-01-12 12:49:10 +00005080//ZZ case Iop_Add32Fx4: {
5081//ZZ HReg res = newVRegV(env);
5082//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5083//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5084//ZZ UInt size = 0;
5085//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
5086//ZZ res, argL, argR, size, True));
5087//ZZ return res;
5088//ZZ }
5089//ZZ case Iop_Recps32Fx4: {
5090//ZZ HReg res = newVRegV(env);
5091//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5092//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5093//ZZ UInt size = 0;
5094//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
5095//ZZ res, argL, argR, size, True));
5096//ZZ return res;
5097//ZZ }
5098//ZZ case Iop_Rsqrts32Fx4: {
5099//ZZ HReg res = newVRegV(env);
5100//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5101//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5102//ZZ UInt size = 0;
5103//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
5104//ZZ res, argL, argR, size, True));
5105//ZZ return res;
5106//ZZ }
5107//ZZ
5108//ZZ // These 6 verified 18 Apr 2013
5109//ZZ case Iop_InterleaveEvenLanes8x16:
5110//ZZ case Iop_InterleaveOddLanes8x16:
5111//ZZ case Iop_InterleaveEvenLanes16x8:
5112//ZZ case Iop_InterleaveOddLanes16x8:
5113//ZZ case Iop_InterleaveEvenLanes32x4:
5114//ZZ case Iop_InterleaveOddLanes32x4: {
5115//ZZ HReg rD = newVRegV(env);
5116//ZZ HReg rM = newVRegV(env);
5117//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5118//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5119//ZZ UInt size;
5120//ZZ Bool resRd; // is the result in rD or rM ?
5121//ZZ switch (e->Iex.Binop.op) {
5122//ZZ case Iop_InterleaveOddLanes8x16: resRd = False; size = 0; break;
5123//ZZ case Iop_InterleaveEvenLanes8x16: resRd = True; size = 0; break;
5124//ZZ case Iop_InterleaveOddLanes16x8: resRd = False; size = 1; break;
5125//ZZ case Iop_InterleaveEvenLanes16x8: resRd = True; size = 1; break;
5126//ZZ case Iop_InterleaveOddLanes32x4: resRd = False; size = 2; break;
5127//ZZ case Iop_InterleaveEvenLanes32x4: resRd = True; size = 2; break;
5128//ZZ default: vassert(0);
5129//ZZ }
5130//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
5131//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
5132//ZZ addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, True));
5133//ZZ return resRd ? rD : rM;
5134//ZZ }
5135//ZZ
5136//ZZ // These 6 verified 18 Apr 2013
5137//ZZ case Iop_InterleaveHI8x16:
5138//ZZ case Iop_InterleaveLO8x16:
5139//ZZ case Iop_InterleaveHI16x8:
5140//ZZ case Iop_InterleaveLO16x8:
5141//ZZ case Iop_InterleaveHI32x4:
5142//ZZ case Iop_InterleaveLO32x4: {
5143//ZZ HReg rD = newVRegV(env);
5144//ZZ HReg rM = newVRegV(env);
5145//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5146//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5147//ZZ UInt size;
5148//ZZ Bool resRd; // is the result in rD or rM ?
5149//ZZ switch (e->Iex.Binop.op) {
5150//ZZ case Iop_InterleaveHI8x16: resRd = False; size = 0; break;
5151//ZZ case Iop_InterleaveLO8x16: resRd = True; size = 0; break;
5152//ZZ case Iop_InterleaveHI16x8: resRd = False; size = 1; break;
5153//ZZ case Iop_InterleaveLO16x8: resRd = True; size = 1; break;
5154//ZZ case Iop_InterleaveHI32x4: resRd = False; size = 2; break;
5155//ZZ case Iop_InterleaveLO32x4: resRd = True; size = 2; break;
5156//ZZ default: vassert(0);
5157//ZZ }
5158//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
5159//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
5160//ZZ addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, True));
5161//ZZ return resRd ? rD : rM;
5162//ZZ }
5163//ZZ
5164//ZZ // These 6 verified 18 Apr 2013
5165//ZZ case Iop_CatOddLanes8x16:
5166//ZZ case Iop_CatEvenLanes8x16:
5167//ZZ case Iop_CatOddLanes16x8:
5168//ZZ case Iop_CatEvenLanes16x8:
5169//ZZ case Iop_CatOddLanes32x4:
5170//ZZ case Iop_CatEvenLanes32x4: {
5171//ZZ HReg rD = newVRegV(env);
5172//ZZ HReg rM = newVRegV(env);
5173//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5174//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5175//ZZ UInt size;
5176//ZZ Bool resRd; // is the result in rD or rM ?
5177//ZZ switch (e->Iex.Binop.op) {
5178//ZZ case Iop_CatOddLanes8x16: resRd = False; size = 0; break;
5179//ZZ case Iop_CatEvenLanes8x16: resRd = True; size = 0; break;
5180//ZZ case Iop_CatOddLanes16x8: resRd = False; size = 1; break;
5181//ZZ case Iop_CatEvenLanes16x8: resRd = True; size = 1; break;
5182//ZZ case Iop_CatOddLanes32x4: resRd = False; size = 2; break;
5183//ZZ case Iop_CatEvenLanes32x4: resRd = True; size = 2; break;
5184//ZZ default: vassert(0);
5185//ZZ }
5186//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
5187//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
5188//ZZ addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, True));
5189//ZZ return resRd ? rD : rM;
5190//ZZ }
5191//ZZ
5192//ZZ case Iop_QAdd8Ux16:
5193//ZZ case Iop_QAdd16Ux8:
5194//ZZ case Iop_QAdd32Ux4:
5195//ZZ case Iop_QAdd64Ux2: {
5196//ZZ HReg res = newVRegV(env);
5197//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5198//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5199//ZZ UInt size;
5200//ZZ switch (e->Iex.Binop.op) {
5201//ZZ case Iop_QAdd8Ux16: size = 0; break;
5202//ZZ case Iop_QAdd16Ux8: size = 1; break;
5203//ZZ case Iop_QAdd32Ux4: size = 2; break;
5204//ZZ case Iop_QAdd64Ux2: size = 3; break;
5205//ZZ default:
5206//ZZ ppIROp(e->Iex.Binop.op);
5207//ZZ vpanic("Illegal element size in VQADDU");
5208//ZZ }
5209//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
5210//ZZ res, argL, argR, size, True));
5211//ZZ return res;
5212//ZZ }
5213//ZZ case Iop_QAdd8Sx16:
5214//ZZ case Iop_QAdd16Sx8:
5215//ZZ case Iop_QAdd32Sx4:
5216//ZZ case Iop_QAdd64Sx2: {
5217//ZZ HReg res = newVRegV(env);
5218//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5219//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5220//ZZ UInt size;
5221//ZZ switch (e->Iex.Binop.op) {
5222//ZZ case Iop_QAdd8Sx16: size = 0; break;
5223//ZZ case Iop_QAdd16Sx8: size = 1; break;
5224//ZZ case Iop_QAdd32Sx4: size = 2; break;
5225//ZZ case Iop_QAdd64Sx2: size = 3; break;
5226//ZZ default:
5227//ZZ ppIROp(e->Iex.Binop.op);
5228//ZZ vpanic("Illegal element size in VQADDS");
5229//ZZ }
5230//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
5231//ZZ res, argL, argR, size, True));
5232//ZZ return res;
5233//ZZ }
5234//ZZ case Iop_Sub8x16:
5235//ZZ case Iop_Sub16x8:
5236//ZZ case Iop_Sub32x4:
5237//ZZ case Iop_Sub64x2: {
5238//ZZ HReg res = newVRegV(env);
5239//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5240//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5241//ZZ UInt size;
5242//ZZ switch (e->Iex.Binop.op) {
5243//ZZ case Iop_Sub8x16: size = 0; break;
5244//ZZ case Iop_Sub16x8: size = 1; break;
5245//ZZ case Iop_Sub32x4: size = 2; break;
5246//ZZ case Iop_Sub64x2: size = 3; break;
5247//ZZ default:
5248//ZZ ppIROp(e->Iex.Binop.op);
5249//ZZ vpanic("Illegal element size in VSUB");
5250//ZZ }
5251//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
5252//ZZ res, argL, argR, size, True));
5253//ZZ return res;
5254//ZZ }
5255//ZZ case Iop_Sub32Fx4: {
5256//ZZ HReg res = newVRegV(env);
5257//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5258//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5259//ZZ UInt size = 0;
5260//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
5261//ZZ res, argL, argR, size, True));
5262//ZZ return res;
5263//ZZ }
5264//ZZ case Iop_QSub8Ux16:
5265//ZZ case Iop_QSub16Ux8:
5266//ZZ case Iop_QSub32Ux4:
5267//ZZ case Iop_QSub64Ux2: {
5268//ZZ HReg res = newVRegV(env);
5269//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5270//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5271//ZZ UInt size;
5272//ZZ switch (e->Iex.Binop.op) {
5273//ZZ case Iop_QSub8Ux16: size = 0; break;
5274//ZZ case Iop_QSub16Ux8: size = 1; break;
5275//ZZ case Iop_QSub32Ux4: size = 2; break;
5276//ZZ case Iop_QSub64Ux2: size = 3; break;
5277//ZZ default:
5278//ZZ ppIROp(e->Iex.Binop.op);
5279//ZZ vpanic("Illegal element size in VQSUBU");
5280//ZZ }
5281//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
5282//ZZ res, argL, argR, size, True));
5283//ZZ return res;
5284//ZZ }
5285//ZZ case Iop_QSub8Sx16:
5286//ZZ case Iop_QSub16Sx8:
5287//ZZ case Iop_QSub32Sx4:
5288//ZZ case Iop_QSub64Sx2: {
5289//ZZ HReg res = newVRegV(env);
5290//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5291//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5292//ZZ UInt size;
5293//ZZ switch (e->Iex.Binop.op) {
5294//ZZ case Iop_QSub8Sx16: size = 0; break;
5295//ZZ case Iop_QSub16Sx8: size = 1; break;
5296//ZZ case Iop_QSub32Sx4: size = 2; break;
5297//ZZ case Iop_QSub64Sx2: size = 3; break;
5298//ZZ default:
5299//ZZ ppIROp(e->Iex.Binop.op);
5300//ZZ vpanic("Illegal element size in VQSUBS");
5301//ZZ }
5302//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
5303//ZZ res, argL, argR, size, True));
5304//ZZ return res;
5305//ZZ }
5306//ZZ case Iop_Max8Ux16:
5307//ZZ case Iop_Max16Ux8:
5308//ZZ case Iop_Max32Ux4: {
5309//ZZ HReg res = newVRegV(env);
5310//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5311//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5312//ZZ UInt size;
5313//ZZ switch (e->Iex.Binop.op) {
5314//ZZ case Iop_Max8Ux16: size = 0; break;
5315//ZZ case Iop_Max16Ux8: size = 1; break;
5316//ZZ case Iop_Max32Ux4: size = 2; break;
5317//ZZ default: vpanic("Illegal element size in VMAXU");
5318//ZZ }
5319//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
5320//ZZ res, argL, argR, size, True));
5321//ZZ return res;
5322//ZZ }
5323//ZZ case Iop_Max8Sx16:
5324//ZZ case Iop_Max16Sx8:
5325//ZZ case Iop_Max32Sx4: {
5326//ZZ HReg res = newVRegV(env);
5327//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5328//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5329//ZZ UInt size;
5330//ZZ switch (e->Iex.Binop.op) {
5331//ZZ case Iop_Max8Sx16: size = 0; break;
5332//ZZ case Iop_Max16Sx8: size = 1; break;
5333//ZZ case Iop_Max32Sx4: size = 2; break;
5334//ZZ default: vpanic("Illegal element size in VMAXU");
5335//ZZ }
5336//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
5337//ZZ res, argL, argR, size, True));
5338//ZZ return res;
5339//ZZ }
5340//ZZ case Iop_Min8Ux16:
5341//ZZ case Iop_Min16Ux8:
5342//ZZ case Iop_Min32Ux4: {
5343//ZZ HReg res = newVRegV(env);
5344//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5345//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5346//ZZ UInt size;
5347//ZZ switch (e->Iex.Binop.op) {
5348//ZZ case Iop_Min8Ux16: size = 0; break;
5349//ZZ case Iop_Min16Ux8: size = 1; break;
5350//ZZ case Iop_Min32Ux4: size = 2; break;
5351//ZZ default: vpanic("Illegal element size in VMAXU");
5352//ZZ }
5353//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
5354//ZZ res, argL, argR, size, True));
5355//ZZ return res;
5356//ZZ }
5357//ZZ case Iop_Min8Sx16:
5358//ZZ case Iop_Min16Sx8:
5359//ZZ case Iop_Min32Sx4: {
5360//ZZ HReg res = newVRegV(env);
5361//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5362//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5363//ZZ UInt size;
5364//ZZ switch (e->Iex.Binop.op) {
5365//ZZ case Iop_Min8Sx16: size = 0; break;
5366//ZZ case Iop_Min16Sx8: size = 1; break;
5367//ZZ case Iop_Min32Sx4: size = 2; break;
5368//ZZ default: vpanic("Illegal element size in VMAXU");
5369//ZZ }
5370//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
5371//ZZ res, argL, argR, size, True));
5372//ZZ return res;
5373//ZZ }
5374//ZZ case Iop_Sar8x16:
5375//ZZ case Iop_Sar16x8:
5376//ZZ case Iop_Sar32x4:
5377//ZZ case Iop_Sar64x2: {
5378//ZZ HReg res = newVRegV(env);
5379//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5380//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5381//ZZ HReg argR2 = newVRegV(env);
5382//ZZ HReg zero = newVRegV(env);
5383//ZZ UInt size;
5384//ZZ switch (e->Iex.Binop.op) {
5385//ZZ case Iop_Sar8x16: size = 0; break;
5386//ZZ case Iop_Sar16x8: size = 1; break;
5387//ZZ case Iop_Sar32x4: size = 2; break;
5388//ZZ case Iop_Sar64x2: size = 3; break;
5389//ZZ default: vassert(0);
5390//ZZ }
5391//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
5392//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
5393//ZZ argR2, zero, argR, size, True));
5394//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
5395//ZZ res, argL, argR2, size, True));
5396//ZZ return res;
5397//ZZ }
5398//ZZ case Iop_Sal8x16:
5399//ZZ case Iop_Sal16x8:
5400//ZZ case Iop_Sal32x4:
5401//ZZ case Iop_Sal64x2: {
5402//ZZ HReg res = newVRegV(env);
5403//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5404//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5405//ZZ UInt size;
5406//ZZ switch (e->Iex.Binop.op) {
5407//ZZ case Iop_Sal8x16: size = 0; break;
5408//ZZ case Iop_Sal16x8: size = 1; break;
5409//ZZ case Iop_Sal32x4: size = 2; break;
5410//ZZ case Iop_Sal64x2: size = 3; break;
5411//ZZ default: vassert(0);
5412//ZZ }
5413//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
5414//ZZ res, argL, argR, size, True));
5415//ZZ return res;
5416//ZZ }
5417//ZZ case Iop_Shr8x16:
5418//ZZ case Iop_Shr16x8:
5419//ZZ case Iop_Shr32x4:
5420//ZZ case Iop_Shr64x2: {
5421//ZZ HReg res = newVRegV(env);
5422//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5423//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5424//ZZ HReg argR2 = newVRegV(env);
5425//ZZ HReg zero = newVRegV(env);
5426//ZZ UInt size;
5427//ZZ switch (e->Iex.Binop.op) {
5428//ZZ case Iop_Shr8x16: size = 0; break;
5429//ZZ case Iop_Shr16x8: size = 1; break;
5430//ZZ case Iop_Shr32x4: size = 2; break;
5431//ZZ case Iop_Shr64x2: size = 3; break;
5432//ZZ default: vassert(0);
5433//ZZ }
5434//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
5435//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
5436//ZZ argR2, zero, argR, size, True));
5437//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5438//ZZ res, argL, argR2, size, True));
5439//ZZ return res;
5440//ZZ }
5441//ZZ case Iop_Shl8x16:
5442//ZZ case Iop_Shl16x8:
5443//ZZ case Iop_Shl32x4:
5444//ZZ case Iop_Shl64x2: {
5445//ZZ HReg res = newVRegV(env);
5446//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5447//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5448//ZZ UInt size;
5449//ZZ switch (e->Iex.Binop.op) {
5450//ZZ case Iop_Shl8x16: size = 0; break;
5451//ZZ case Iop_Shl16x8: size = 1; break;
5452//ZZ case Iop_Shl32x4: size = 2; break;
5453//ZZ case Iop_Shl64x2: size = 3; break;
5454//ZZ default: vassert(0);
5455//ZZ }
5456//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5457//ZZ res, argL, argR, size, True));
5458//ZZ return res;
5459//ZZ }
5460//ZZ case Iop_QShl8x16:
5461//ZZ case Iop_QShl16x8:
5462//ZZ case Iop_QShl32x4:
5463//ZZ case Iop_QShl64x2: {
5464//ZZ HReg res = newVRegV(env);
5465//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5466//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5467//ZZ UInt size;
5468//ZZ switch (e->Iex.Binop.op) {
5469//ZZ case Iop_QShl8x16: size = 0; break;
5470//ZZ case Iop_QShl16x8: size = 1; break;
5471//ZZ case Iop_QShl32x4: size = 2; break;
5472//ZZ case Iop_QShl64x2: size = 3; break;
5473//ZZ default: vassert(0);
5474//ZZ }
5475//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
5476//ZZ res, argL, argR, size, True));
5477//ZZ return res;
5478//ZZ }
5479//ZZ case Iop_QSal8x16:
5480//ZZ case Iop_QSal16x8:
5481//ZZ case Iop_QSal32x4:
5482//ZZ case Iop_QSal64x2: {
5483//ZZ HReg res = newVRegV(env);
5484//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5485//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5486//ZZ UInt size;
5487//ZZ switch (e->Iex.Binop.op) {
5488//ZZ case Iop_QSal8x16: size = 0; break;
5489//ZZ case Iop_QSal16x8: size = 1; break;
5490//ZZ case Iop_QSal32x4: size = 2; break;
5491//ZZ case Iop_QSal64x2: size = 3; break;
5492//ZZ default: vassert(0);
5493//ZZ }
5494//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
5495//ZZ res, argL, argR, size, True));
5496//ZZ return res;
5497//ZZ }
5498//ZZ case Iop_QShlN8x16:
5499//ZZ case Iop_QShlN16x8:
5500//ZZ case Iop_QShlN32x4:
5501//ZZ case Iop_QShlN64x2: {
5502//ZZ HReg res = newVRegV(env);
5503//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5504//ZZ UInt size, imm;
5505//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
5506//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5507//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
5508//ZZ "second argument only\n");
5509//ZZ }
5510//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5511//ZZ switch (e->Iex.Binop.op) {
5512//ZZ case Iop_QShlN8x16: size = 8 | imm; break;
5513//ZZ case Iop_QShlN16x8: size = 16 | imm; break;
5514//ZZ case Iop_QShlN32x4: size = 32 | imm; break;
5515//ZZ case Iop_QShlN64x2: size = 64 | imm; break;
5516//ZZ default: vassert(0);
5517//ZZ }
5518//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
5519//ZZ res, argL, size, True));
5520//ZZ return res;
5521//ZZ }
5522//ZZ case Iop_QShlN8Sx16:
5523//ZZ case Iop_QShlN16Sx8:
5524//ZZ case Iop_QShlN32Sx4:
5525//ZZ case Iop_QShlN64Sx2: {
5526//ZZ HReg res = newVRegV(env);
5527//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5528//ZZ UInt size, imm;
5529//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
5530//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5531//ZZ vpanic("ARM taget supports Iop_QShlNASxB with constant "
5532//ZZ "second argument only\n");
5533//ZZ }
5534//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5535//ZZ switch (e->Iex.Binop.op) {
5536//ZZ case Iop_QShlN8Sx16: size = 8 | imm; break;
5537//ZZ case Iop_QShlN16Sx8: size = 16 | imm; break;
5538//ZZ case Iop_QShlN32Sx4: size = 32 | imm; break;
5539//ZZ case Iop_QShlN64Sx2: size = 64 | imm; break;
5540//ZZ default: vassert(0);
5541//ZZ }
5542//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
5543//ZZ res, argL, size, True));
5544//ZZ return res;
5545//ZZ }
5546//ZZ case Iop_QSalN8x16:
5547//ZZ case Iop_QSalN16x8:
5548//ZZ case Iop_QSalN32x4:
5549//ZZ case Iop_QSalN64x2: {
5550//ZZ HReg res = newVRegV(env);
5551//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5552//ZZ UInt size, imm;
5553//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
5554//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5555//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
5556//ZZ "second argument only\n");
5557//ZZ }
5558//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5559//ZZ switch (e->Iex.Binop.op) {
5560//ZZ case Iop_QSalN8x16: size = 8 | imm; break;
5561//ZZ case Iop_QSalN16x8: size = 16 | imm; break;
5562//ZZ case Iop_QSalN32x4: size = 32 | imm; break;
5563//ZZ case Iop_QSalN64x2: size = 64 | imm; break;
5564//ZZ default: vassert(0);
5565//ZZ }
5566//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
5567//ZZ res, argL, size, True));
5568//ZZ return res;
5569//ZZ }
sewardje520bb32014-02-17 11:00:53 +00005570 case Iop_ShrN64x2:
sewardj93013432014-04-27 12:02:12 +00005571 case Iop_ShrN32x4:
sewardj1eaaec22014-03-07 22:52:19 +00005572 case Iop_ShrN16x8:
sewardj93013432014-04-27 12:02:12 +00005573 case Iop_ShrN8x16:
sewardj32d86752014-03-02 12:47:18 +00005574 case Iop_SarN64x2:
sewardj93013432014-04-27 12:02:12 +00005575 case Iop_SarN32x4:
5576 case Iop_SarN16x8:
5577 case Iop_SarN8x16:
5578 case Iop_ShlN64x2:
sewardj1eaaec22014-03-07 22:52:19 +00005579 case Iop_ShlN32x4:
sewardj93013432014-04-27 12:02:12 +00005580 case Iop_ShlN16x8:
5581 case Iop_ShlN8x16:
sewardj1eaaec22014-03-07 22:52:19 +00005582 {
sewardje520bb32014-02-17 11:00:53 +00005583 IRExpr* argL = e->Iex.Binop.arg1;
5584 IRExpr* argR = e->Iex.Binop.arg2;
5585 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
5586 UInt amt = argR->Iex.Const.con->Ico.U8;
5587 UInt limit = 0;
5588 ARM64VecShiftOp op = ARM64vecsh_INVALID;
5589 switch (e->Iex.Binop.op) {
5590 case Iop_ShrN64x2:
5591 op = ARM64vecsh_USHR64x2; limit = 63; break;
sewardj93013432014-04-27 12:02:12 +00005592 case Iop_ShrN32x4:
5593 op = ARM64vecsh_USHR32x4; limit = 31; break;
sewardj1eaaec22014-03-07 22:52:19 +00005594 case Iop_ShrN16x8:
5595 op = ARM64vecsh_USHR16x8; limit = 15; break;
sewardj93013432014-04-27 12:02:12 +00005596 case Iop_ShrN8x16:
5597 op = ARM64vecsh_USHR8x16; limit = 7; break;
sewardje520bb32014-02-17 11:00:53 +00005598 case Iop_SarN64x2:
5599 op = ARM64vecsh_SSHR64x2; limit = 63; break;
sewardj93013432014-04-27 12:02:12 +00005600 case Iop_SarN32x4:
5601 op = ARM64vecsh_SSHR32x4; limit = 31; break;
5602 case Iop_SarN16x8:
5603 op = ARM64vecsh_SSHR16x8; limit = 15; break;
5604 case Iop_SarN8x16:
5605 op = ARM64vecsh_SSHR8x16; limit = 7; break;
5606 case Iop_ShlN64x2:
5607 op = ARM64vecsh_SHL64x2; limit = 63; break;
sewardj32d86752014-03-02 12:47:18 +00005608 case Iop_ShlN32x4:
5609 op = ARM64vecsh_SHL32x4; limit = 31; break;
sewardj93013432014-04-27 12:02:12 +00005610 case Iop_ShlN16x8:
5611 op = ARM64vecsh_SHL16x8; limit = 15; break;
5612 case Iop_ShlN8x16:
5613 op = ARM64vecsh_SHL8x16; limit = 7; break;
sewardje520bb32014-02-17 11:00:53 +00005614 default:
5615 vassert(0);
5616 }
sewardjfda314f2014-05-16 11:20:07 +00005617 if (op != ARM64vecsh_INVALID && amt >= 0 && amt <= limit) {
sewardje520bb32014-02-17 11:00:53 +00005618 HReg src = iselV128Expr(env, argL);
5619 HReg dst = newVRegV(env);
sewardjfda314f2014-05-16 11:20:07 +00005620 if (amt > 0) {
5621 addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
5622 } else {
5623 dst = src;
5624 }
sewardje520bb32014-02-17 11:00:53 +00005625 return dst;
5626 }
5627 }
5628 /* else fall out; this is unhandled */
5629 break;
5630 }
sewardjab33a7a2014-06-19 22:20:47 +00005631
5632 case Iop_ShlV128:
5633 case Iop_ShrV128: {
5634 Bool isSHR = e->Iex.Binop.op == Iop_ShrV128;
5635 /* This is tricky. Generate an EXT instruction with zeroes in
5636 the high operand (shift right) or low operand (shift left).
5637 Note that we can only slice in the EXT instruction at a byte
5638 level of granularity, so the shift amount needs careful
5639 checking. */
5640 IRExpr* argL = e->Iex.Binop.arg1;
5641 IRExpr* argR = e->Iex.Binop.arg2;
5642 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
5643 UInt amt = argR->Iex.Const.con->Ico.U8;
5644 Bool amtOK = False;
5645 switch (amt) {
5646 case 0x08: case 0x10: case 0x18: case 0x20: case 0x28:
5647 case 0x30: case 0x38: case 0x40: case 0x48: case 0x50:
5648 case 0x58: case 0x60: case 0x68: case 0x70: case 0x78:
5649 amtOK = True; break;
5650 }
5651 /* We could also deal with amt==0 by copying the source to
5652 the destination, but there's no need for that so far. */
5653 if (amtOK) {
5654 HReg src = iselV128Expr(env, argL);
5655 HReg srcZ = newVRegV(env);
5656 addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000));
5657 UInt immB = amt / 8;
5658 vassert(immB >= 1 && immB <= 15);
5659 HReg dst = newVRegV(env);
5660 if (isSHR) {
5661 addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/,
5662 immB));
5663 } else {
5664 addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/,
5665 16 - immB));
5666 }
5667 return dst;
5668 }
5669 }
5670 /* else fall out; this is unhandled */
5671 break;
5672 }
5673
sewardjbbcf1882014-01-12 12:49:10 +00005674//ZZ case Iop_CmpGT8Ux16:
5675//ZZ case Iop_CmpGT16Ux8:
5676//ZZ case Iop_CmpGT32Ux4: {
5677//ZZ HReg res = newVRegV(env);
5678//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5679//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5680//ZZ UInt size;
5681//ZZ switch (e->Iex.Binop.op) {
5682//ZZ case Iop_CmpGT8Ux16: size = 0; break;
5683//ZZ case Iop_CmpGT16Ux8: size = 1; break;
5684//ZZ case Iop_CmpGT32Ux4: size = 2; break;
5685//ZZ default: vassert(0);
5686//ZZ }
5687//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
5688//ZZ res, argL, argR, size, True));
5689//ZZ return res;
5690//ZZ }
5691//ZZ case Iop_CmpGT8Sx16:
5692//ZZ case Iop_CmpGT16Sx8:
5693//ZZ case Iop_CmpGT32Sx4: {
5694//ZZ HReg res = newVRegV(env);
5695//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5696//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5697//ZZ UInt size;
5698//ZZ switch (e->Iex.Binop.op) {
5699//ZZ case Iop_CmpGT8Sx16: size = 0; break;
5700//ZZ case Iop_CmpGT16Sx8: size = 1; break;
5701//ZZ case Iop_CmpGT32Sx4: size = 2; break;
5702//ZZ default: vassert(0);
5703//ZZ }
5704//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
5705//ZZ res, argL, argR, size, True));
5706//ZZ return res;
5707//ZZ }
5708//ZZ case Iop_CmpEQ8x16:
5709//ZZ case Iop_CmpEQ16x8:
5710//ZZ case Iop_CmpEQ32x4: {
5711//ZZ HReg res = newVRegV(env);
5712//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5713//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5714//ZZ UInt size;
5715//ZZ switch (e->Iex.Binop.op) {
5716//ZZ case Iop_CmpEQ8x16: size = 0; break;
5717//ZZ case Iop_CmpEQ16x8: size = 1; break;
5718//ZZ case Iop_CmpEQ32x4: size = 2; break;
5719//ZZ default: vassert(0);
5720//ZZ }
5721//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
5722//ZZ res, argL, argR, size, True));
5723//ZZ return res;
5724//ZZ }
5725//ZZ case Iop_Mul8x16:
5726//ZZ case Iop_Mul16x8:
5727//ZZ case Iop_Mul32x4: {
5728//ZZ HReg res = newVRegV(env);
5729//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5730//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5731//ZZ UInt size = 0;
5732//ZZ switch(e->Iex.Binop.op) {
5733//ZZ case Iop_Mul8x16: size = 0; break;
5734//ZZ case Iop_Mul16x8: size = 1; break;
5735//ZZ case Iop_Mul32x4: size = 2; break;
5736//ZZ default: vassert(0);
5737//ZZ }
5738//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
5739//ZZ res, argL, argR, size, True));
5740//ZZ return res;
5741//ZZ }
5742//ZZ case Iop_Mul32Fx4: {
5743//ZZ HReg res = newVRegV(env);
5744//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5745//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5746//ZZ UInt size = 0;
5747//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
5748//ZZ res, argL, argR, size, True));
5749//ZZ return res;
5750//ZZ }
5751//ZZ case Iop_Mull8Ux8:
5752//ZZ case Iop_Mull16Ux4:
5753//ZZ case Iop_Mull32Ux2: {
5754//ZZ HReg res = newVRegV(env);
5755//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5756//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5757//ZZ UInt size = 0;
5758//ZZ switch(e->Iex.Binop.op) {
5759//ZZ case Iop_Mull8Ux8: size = 0; break;
5760//ZZ case Iop_Mull16Ux4: size = 1; break;
5761//ZZ case Iop_Mull32Ux2: size = 2; break;
5762//ZZ default: vassert(0);
5763//ZZ }
5764//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
5765//ZZ res, argL, argR, size, True));
5766//ZZ return res;
5767//ZZ }
5768//ZZ
5769//ZZ case Iop_Mull8Sx8:
5770//ZZ case Iop_Mull16Sx4:
5771//ZZ case Iop_Mull32Sx2: {
5772//ZZ HReg res = newVRegV(env);
5773//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5774//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5775//ZZ UInt size = 0;
5776//ZZ switch(e->Iex.Binop.op) {
5777//ZZ case Iop_Mull8Sx8: size = 0; break;
5778//ZZ case Iop_Mull16Sx4: size = 1; break;
5779//ZZ case Iop_Mull32Sx2: size = 2; break;
5780//ZZ default: vassert(0);
5781//ZZ }
5782//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
5783//ZZ res, argL, argR, size, True));
5784//ZZ return res;
5785//ZZ }
5786//ZZ
5787//ZZ case Iop_QDMulHi16Sx8:
5788//ZZ case Iop_QDMulHi32Sx4: {
5789//ZZ HReg res = newVRegV(env);
5790//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5791//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5792//ZZ UInt size = 0;
5793//ZZ switch(e->Iex.Binop.op) {
5794//ZZ case Iop_QDMulHi16Sx8: size = 1; break;
5795//ZZ case Iop_QDMulHi32Sx4: size = 2; break;
5796//ZZ default: vassert(0);
5797//ZZ }
5798//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5799//ZZ res, argL, argR, size, True));
5800//ZZ return res;
5801//ZZ }
5802//ZZ
5803//ZZ case Iop_QRDMulHi16Sx8:
5804//ZZ case Iop_QRDMulHi32Sx4: {
5805//ZZ HReg res = newVRegV(env);
5806//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5807//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5808//ZZ UInt size = 0;
5809//ZZ switch(e->Iex.Binop.op) {
5810//ZZ case Iop_QRDMulHi16Sx8: size = 1; break;
5811//ZZ case Iop_QRDMulHi32Sx4: size = 2; break;
5812//ZZ default: vassert(0);
5813//ZZ }
5814//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5815//ZZ res, argL, argR, size, True));
5816//ZZ return res;
5817//ZZ }
5818//ZZ
5819//ZZ case Iop_QDMulLong16Sx4:
5820//ZZ case Iop_QDMulLong32Sx2: {
5821//ZZ HReg res = newVRegV(env);
5822//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5823//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5824//ZZ UInt size = 0;
5825//ZZ switch(e->Iex.Binop.op) {
5826//ZZ case Iop_QDMulLong16Sx4: size = 1; break;
5827//ZZ case Iop_QDMulLong32Sx2: size = 2; break;
5828//ZZ default: vassert(0);
5829//ZZ }
5830//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5831//ZZ res, argL, argR, size, True));
5832//ZZ return res;
5833//ZZ }
5834//ZZ case Iop_PolynomialMul8x16: {
5835//ZZ HReg res = newVRegV(env);
5836//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5837//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5838//ZZ UInt size = 0;
5839//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5840//ZZ res, argL, argR, size, True));
5841//ZZ return res;
5842//ZZ }
5843//ZZ case Iop_Max32Fx4: {
5844//ZZ HReg res = newVRegV(env);
5845//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5846//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5847//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5848//ZZ res, argL, argR, 2, True));
5849//ZZ return res;
5850//ZZ }
5851//ZZ case Iop_Min32Fx4: {
5852//ZZ HReg res = newVRegV(env);
5853//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5854//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5855//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5856//ZZ res, argL, argR, 2, True));
5857//ZZ return res;
5858//ZZ }
5859//ZZ case Iop_PwMax32Fx4: {
5860//ZZ HReg res = newVRegV(env);
5861//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5862//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5863//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5864//ZZ res, argL, argR, 2, True));
5865//ZZ return res;
5866//ZZ }
5867//ZZ case Iop_PwMin32Fx4: {
5868//ZZ HReg res = newVRegV(env);
5869//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5870//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5871//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5872//ZZ res, argL, argR, 2, True));
5873//ZZ return res;
5874//ZZ }
5875//ZZ case Iop_CmpGT32Fx4: {
5876//ZZ HReg res = newVRegV(env);
5877//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5878//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5879//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5880//ZZ res, argL, argR, 2, True));
5881//ZZ return res;
5882//ZZ }
5883//ZZ case Iop_CmpGE32Fx4: {
5884//ZZ HReg res = newVRegV(env);
5885//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5886//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5887//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5888//ZZ res, argL, argR, 2, True));
5889//ZZ return res;
5890//ZZ }
5891//ZZ case Iop_CmpEQ32Fx4: {
5892//ZZ HReg res = newVRegV(env);
5893//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5894//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5895//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5896//ZZ res, argL, argR, 2, True));
5897//ZZ return res;
5898//ZZ }
5899//ZZ
5900//ZZ case Iop_PolynomialMull8x8: {
5901//ZZ HReg res = newVRegV(env);
5902//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5903//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5904//ZZ UInt size = 0;
5905//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5906//ZZ res, argL, argR, size, True));
5907//ZZ return res;
5908//ZZ }
5909//ZZ case Iop_F32ToFixed32Ux4_RZ:
5910//ZZ case Iop_F32ToFixed32Sx4_RZ:
5911//ZZ case Iop_Fixed32UToF32x4_RN:
5912//ZZ case Iop_Fixed32SToF32x4_RN: {
5913//ZZ HReg res = newVRegV(env);
5914//ZZ HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5915//ZZ ARMNeonUnOp op;
5916//ZZ UInt imm6;
5917//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
5918//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5919//ZZ vpanic("ARM supports FP <-> Fixed conversion with constant "
5920//ZZ "second argument less than 33 only\n");
5921//ZZ }
5922//ZZ imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5923//ZZ vassert(imm6 <= 32 && imm6 > 0);
5924//ZZ imm6 = 64 - imm6;
5925//ZZ switch(e->Iex.Binop.op) {
5926//ZZ case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5927//ZZ case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5928//ZZ case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5929//ZZ case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5930//ZZ default: vassert(0);
5931//ZZ }
5932//ZZ addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5933//ZZ return res;
5934//ZZ }
5935//ZZ /*
5936//ZZ FIXME remove if not used
5937//ZZ case Iop_VDup8x16:
5938//ZZ case Iop_VDup16x8:
5939//ZZ case Iop_VDup32x4: {
5940//ZZ HReg res = newVRegV(env);
5941//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5942//ZZ UInt imm4;
5943//ZZ UInt index;
5944//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
5945//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5946//ZZ vpanic("ARM supports Iop_VDup with constant "
5947//ZZ "second argument less than 16 only\n");
5948//ZZ }
5949//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5950//ZZ switch(e->Iex.Binop.op) {
5951//ZZ case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5952//ZZ case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5953//ZZ case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5954//ZZ default: vassert(0);
5955//ZZ }
5956//ZZ if (imm4 >= 16) {
5957//ZZ vpanic("ARM supports Iop_VDup with constant "
5958//ZZ "second argument less than 16 only\n");
5959//ZZ }
5960//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5961//ZZ res, argL, imm4, True));
5962//ZZ return res;
5963//ZZ }
5964//ZZ */
5965//ZZ case Iop_PwAdd8x16:
5966//ZZ case Iop_PwAdd16x8:
5967//ZZ case Iop_PwAdd32x4: {
5968//ZZ HReg res = newVRegV(env);
5969//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5970//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5971//ZZ UInt size = 0;
5972//ZZ switch(e->Iex.Binop.op) {
5973//ZZ case Iop_PwAdd8x16: size = 0; break;
5974//ZZ case Iop_PwAdd16x8: size = 1; break;
5975//ZZ case Iop_PwAdd32x4: size = 2; break;
5976//ZZ default: vassert(0);
5977//ZZ }
5978//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5979//ZZ res, argL, argR, size, True));
5980//ZZ return res;
5981//ZZ }
5982 /* ... */
5983 default:
5984 break;
5985 } /* switch on the binop */
5986 } /* if (e->tag == Iex_Binop) */
5987
sewardj606c4ba2014-01-26 19:11:14 +00005988 if (e->tag == Iex_Triop) {
5989 IRTriop* triop = e->Iex.Triop.details;
5990 ARM64VecBinOp vecbop = ARM64vecb_INVALID;
5991 switch (triop->op) {
5992 case Iop_Add64Fx2: vecbop = ARM64vecb_FADD64x2; break;
5993 case Iop_Sub64Fx2: vecbop = ARM64vecb_FSUB64x2; break;
5994 case Iop_Mul64Fx2: vecbop = ARM64vecb_FMUL64x2; break;
5995 case Iop_Div64Fx2: vecbop = ARM64vecb_FDIV64x2; break;
sewardjecde6972014-02-05 11:01:19 +00005996 case Iop_Add32Fx4: vecbop = ARM64vecb_FADD32x4; break;
5997 case Iop_Sub32Fx4: vecbop = ARM64vecb_FSUB32x4; break;
5998 case Iop_Mul32Fx4: vecbop = ARM64vecb_FMUL32x4; break;
5999 case Iop_Div32Fx4: vecbop = ARM64vecb_FDIV32x4; break;
sewardj606c4ba2014-01-26 19:11:14 +00006000 default: break;
6001 }
6002 if (vecbop != ARM64vecb_INVALID) {
6003 HReg argL = iselV128Expr(env, triop->arg2);
6004 HReg argR = iselV128Expr(env, triop->arg3);
6005 HReg dst = newVRegV(env);
6006 set_FPCR_rounding_mode(env, triop->arg1);
6007 addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR));
6008 return dst;
6009 }
6010
sewardjbbcf1882014-01-12 12:49:10 +00006011//ZZ switch (triop->op) {
6012//ZZ case Iop_ExtractV128: {
6013//ZZ HReg res = newVRegV(env);
6014//ZZ HReg argL = iselNeonExpr(env, triop->arg1);
6015//ZZ HReg argR = iselNeonExpr(env, triop->arg2);
6016//ZZ UInt imm4;
6017//ZZ if (triop->arg3->tag != Iex_Const ||
6018//ZZ typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
6019//ZZ vpanic("ARM target supports Iop_ExtractV128 with constant "
6020//ZZ "third argument less than 16 only\n");
6021//ZZ }
6022//ZZ imm4 = triop->arg3->Iex.Const.con->Ico.U8;
6023//ZZ if (imm4 >= 16) {
6024//ZZ vpanic("ARM target supports Iop_ExtractV128 with constant "
6025//ZZ "third argument less than 16 only\n");
6026//ZZ }
6027//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
6028//ZZ res, argL, argR, imm4, True));
6029//ZZ return res;
6030//ZZ }
6031//ZZ default:
6032//ZZ break;
6033//ZZ }
sewardj606c4ba2014-01-26 19:11:14 +00006034 }
6035
sewardjbbcf1882014-01-12 12:49:10 +00006036//ZZ if (e->tag == Iex_ITE) { // VFD
6037//ZZ ARMCondCode cc;
6038//ZZ HReg r1 = iselNeonExpr(env, e->Iex.ITE.iftrue);
6039//ZZ HReg r0 = iselNeonExpr(env, e->Iex.ITE.iffalse);
6040//ZZ HReg dst = newVRegV(env);
6041//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, r1, 4, True));
6042//ZZ cc = iselCondCode(env, e->Iex.ITE.cond);
6043//ZZ addInstr(env, ARMInstr_NCMovQ(cc ^ 1, dst, r0));
6044//ZZ return dst;
6045//ZZ }
6046
6047 v128_expr_bad:
6048 ppIRExpr(e);
6049 vpanic("iselV128Expr_wrk");
6050}
6051
6052
6053/*---------------------------------------------------------*/
6054/*--- ISEL: Floating point expressions (64 bit) ---*/
6055/*---------------------------------------------------------*/
6056
6057/* Compute a 64-bit floating point value into a register, the identity
6058 of which is returned. As with iselIntExpr_R, the reg may be either
6059 real or virtual; in any case it must not be changed by subsequent
6060 code emitted by the caller. */
6061
6062static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
6063{
6064 HReg r = iselDblExpr_wrk( env, e );
6065# if 0
6066 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
6067# endif
6068 vassert(hregClass(r) == HRcFlt64);
6069 vassert(hregIsVirtual(r));
6070 return r;
6071}
6072
6073/* DO NOT CALL THIS DIRECTLY */
6074static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
6075{
6076 IRType ty = typeOfIRExpr(env->type_env,e);
6077 vassert(e);
6078 vassert(ty == Ity_F64);
6079
6080 if (e->tag == Iex_RdTmp) {
6081 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
6082 }
6083
6084 if (e->tag == Iex_Const) {
6085 IRConst* con = e->Iex.Const.con;
6086 if (con->tag == Ico_F64i) {
6087 HReg src = newVRegI(env);
6088 HReg dst = newVRegD(env);
6089 addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i));
6090 addInstr(env, ARM64Instr_VDfromX(dst, src));
6091 return dst;
6092 }
6093 }
6094
6095 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
6096 vassert(e->Iex.Load.ty == Ity_F64);
6097 HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
6098 HReg res = newVRegD(env);
6099 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0));
6100 return res;
6101 }
6102
6103 if (e->tag == Iex_Get) {
6104 Int offs = e->Iex.Get.offset;
6105 if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) {
6106 HReg rD = newVRegD(env);
6107 HReg rN = get_baseblock_register();
6108 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs));
6109 return rD;
6110 }
6111 }
6112
6113 if (e->tag == Iex_Unop) {
6114 switch (e->Iex.Unop.op) {
6115//ZZ case Iop_ReinterpI64asF64: {
6116//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6117//ZZ return iselNeon64Expr(env, e->Iex.Unop.arg);
6118//ZZ } else {
6119//ZZ HReg srcHi, srcLo;
6120//ZZ HReg dst = newVRegD(env);
6121//ZZ iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
6122//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
6123//ZZ return dst;
6124//ZZ }
6125//ZZ }
6126 case Iop_NegF64: {
6127 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
6128 HReg dst = newVRegD(env);
6129 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src));
6130 return dst;
6131 }
6132 case Iop_AbsF64: {
6133 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
6134 HReg dst = newVRegD(env);
6135 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src));
6136 return dst;
6137 }
6138 case Iop_F32toF64: {
6139 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
6140 HReg dst = newVRegD(env);
6141 addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src));
6142 return dst;
6143 }
6144 case Iop_I32UtoF64:
6145 case Iop_I32StoF64: {
6146 /* Rounding mode is not involved here, since the
6147 conversion can always be done without loss of
6148 precision. */
6149 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
6150 HReg dst = newVRegD(env);
6151 Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
6152 ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U;
6153 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src));
6154 return dst;
6155 }
6156 default:
6157 break;
6158 }
6159 }
6160
6161 if (e->tag == Iex_Binop) {
6162 switch (e->Iex.Binop.op) {
6163 case Iop_RoundF64toInt: {
6164 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
6165 HReg dst = newVRegD(env);
6166 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6167 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_RINT, dst, src));
6168 return dst;
6169 }
6170 case Iop_SqrtF64: {
6171 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
6172 HReg dst = newVRegD(env);
6173 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6174 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_SQRT, dst, src));
6175 return dst;
6176 }
6177 case Iop_I64StoF64:
6178 case Iop_I64UtoF64: {
6179 ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64
6180 ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U;
6181 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
6182 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6183 HReg dstS = newVRegD(env);
6184 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
6185 return dstS;
6186 }
6187 default:
6188 break;
6189 }
6190 }
6191
6192 if (e->tag == Iex_Triop) {
6193 IRTriop* triop = e->Iex.Triop.details;
6194 ARM64FpBinOp dblop = ARM64fpb_INVALID;
6195 switch (triop->op) {
6196 case Iop_DivF64: dblop = ARM64fpb_DIV; break;
6197 case Iop_MulF64: dblop = ARM64fpb_MUL; break;
6198 case Iop_SubF64: dblop = ARM64fpb_SUB; break;
6199 case Iop_AddF64: dblop = ARM64fpb_ADD; break;
6200 default: break;
6201 }
6202 if (dblop != ARM64fpb_INVALID) {
6203 HReg argL = iselDblExpr(env, triop->arg2);
6204 HReg argR = iselDblExpr(env, triop->arg3);
6205 HReg dst = newVRegD(env);
6206 set_FPCR_rounding_mode(env, triop->arg1);
6207 addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR));
6208 return dst;
6209 }
6210 }
6211
6212//ZZ if (e->tag == Iex_ITE) { // VFD
6213//ZZ if (ty == Ity_F64
6214//ZZ && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
6215//ZZ HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
6216//ZZ HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
6217//ZZ HReg dst = newVRegD(env);
6218//ZZ addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, r1));
6219//ZZ ARMCondCode cc = iselCondCode(env, e->Iex.ITE.cond);
6220//ZZ addInstr(env, ARMInstr_VCMovD(cc ^ 1, dst, r0));
6221//ZZ return dst;
6222//ZZ }
6223//ZZ }
6224
6225 ppIRExpr(e);
6226 vpanic("iselDblExpr_wrk");
6227}
6228
6229
6230/*---------------------------------------------------------*/
6231/*--- ISEL: Floating point expressions (32 bit) ---*/
6232/*---------------------------------------------------------*/
6233
6234/* Compute a 32-bit floating point value into a register, the identity
6235 of which is returned. As with iselIntExpr_R, the reg may be either
6236 real or virtual; in any case it must not be changed by subsequent
6237 code emitted by the caller. Values are generated into HRcFlt64
6238 registers despite the values themselves being Ity_F32s. */
6239
6240static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
6241{
6242 HReg r = iselFltExpr_wrk( env, e );
6243# if 0
6244 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
6245# endif
6246 vassert(hregClass(r) == HRcFlt64);
6247 vassert(hregIsVirtual(r));
6248 return r;
6249}
6250
6251/* DO NOT CALL THIS DIRECTLY */
6252static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
6253{
6254 IRType ty = typeOfIRExpr(env->type_env,e);
6255 vassert(e);
6256 vassert(ty == Ity_F32);
6257
6258 if (e->tag == Iex_RdTmp) {
6259 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
6260 }
6261
6262 if (e->tag == Iex_Const) {
6263 /* This is something of a kludge. Since a 32 bit floating point
6264 zero is just .. all zeroes, just create a 64 bit zero word
6265 and transfer it. This avoids having to create a SfromW
6266 instruction for this specific case. */
6267 IRConst* con = e->Iex.Const.con;
6268 if (con->tag == Ico_F32i && con->Ico.F32i == 0) {
6269 HReg src = newVRegI(env);
6270 HReg dst = newVRegD(env);
6271 addInstr(env, ARM64Instr_Imm64(src, 0));
6272 addInstr(env, ARM64Instr_VDfromX(dst, src));
6273 return dst;
6274 }
6275 }
6276
6277//ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
6278//ZZ ARMAModeV* am;
6279//ZZ HReg res = newVRegF(env);
6280//ZZ vassert(e->Iex.Load.ty == Ity_F32);
6281//ZZ am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
6282//ZZ addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
6283//ZZ return res;
6284//ZZ }
6285
6286 if (e->tag == Iex_Get) {
6287 Int offs = e->Iex.Get.offset;
6288 if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) {
6289 HReg rD = newVRegD(env);
6290 HReg rN = get_baseblock_register();
6291 addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs));
6292 return rD;
6293 }
6294 }
6295
6296 if (e->tag == Iex_Unop) {
6297 switch (e->Iex.Unop.op) {
6298//ZZ case Iop_ReinterpI32asF32: {
6299//ZZ HReg dst = newVRegF(env);
6300//ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
6301//ZZ addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
6302//ZZ return dst;
6303//ZZ }
6304 case Iop_NegF32: {
6305 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
6306 HReg dst = newVRegD(env);
6307 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src));
6308 return dst;
6309 }
6310 case Iop_AbsF32: {
6311 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
6312 HReg dst = newVRegD(env);
6313 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src));
6314 return dst;
6315 }
6316 default:
6317 break;
6318 }
6319 }
6320
6321 if (e->tag == Iex_Binop) {
6322 switch (e->Iex.Binop.op) {
6323 case Iop_RoundF32toInt: {
6324 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
6325 HReg dst = newVRegD(env);
6326 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6327 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_RINT, dst, src));
6328 return dst;
6329 }
6330 case Iop_SqrtF32: {
6331 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
6332 HReg dst = newVRegD(env);
6333 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6334 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_SQRT, dst, src));
6335 return dst;
6336 }
6337 case Iop_F64toF32: {
6338 HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
6339 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6340 HReg dstS = newVRegD(env);
6341 addInstr(env, ARM64Instr_VCvtSD(False/*dToS*/, dstS, srcD));
6342 return dstS;
6343 }
sewardj1eaaec22014-03-07 22:52:19 +00006344 case Iop_I32UtoF32:
sewardjbbcf1882014-01-12 12:49:10 +00006345 case Iop_I32StoF32:
6346 case Iop_I64UtoF32:
6347 case Iop_I64StoF32: {
6348 ARM64CvtOp cvt_op = ARM64cvt_INVALID;
6349 switch (e->Iex.Binop.op) {
sewardj1eaaec22014-03-07 22:52:19 +00006350 case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break;
sewardjbbcf1882014-01-12 12:49:10 +00006351 case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break;
6352 case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break;
6353 case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break;
6354 default: vassert(0);
6355 }
6356 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
6357 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6358 HReg dstS = newVRegD(env);
6359 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
6360 return dstS;
6361 }
6362 default:
6363 break;
6364 }
6365 }
6366
6367 if (e->tag == Iex_Triop) {
6368 IRTriop* triop = e->Iex.Triop.details;
6369 ARM64FpBinOp sglop = ARM64fpb_INVALID;
6370 switch (triop->op) {
6371 case Iop_DivF32: sglop = ARM64fpb_DIV; break;
6372 case Iop_MulF32: sglop = ARM64fpb_MUL; break;
6373 case Iop_SubF32: sglop = ARM64fpb_SUB; break;
6374 case Iop_AddF32: sglop = ARM64fpb_ADD; break;
6375 default: break;
6376 }
6377 if (sglop != ARM64fpb_INVALID) {
6378 HReg argL = iselFltExpr(env, triop->arg2);
6379 HReg argR = iselFltExpr(env, triop->arg3);
6380 HReg dst = newVRegD(env);
6381 set_FPCR_rounding_mode(env, triop->arg1);
6382 addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR));
6383 return dst;
6384 }
6385 }
6386
6387//ZZ
6388//ZZ if (e->tag == Iex_ITE) { // VFD
6389//ZZ if (ty == Ity_F32
6390//ZZ && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
6391//ZZ ARMCondCode cc;
6392//ZZ HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue);
6393//ZZ HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse);
6394//ZZ HReg dst = newVRegF(env);
6395//ZZ addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, r1));
6396//ZZ cc = iselCondCode(env, e->Iex.ITE.cond);
6397//ZZ addInstr(env, ARMInstr_VCMovS(cc ^ 1, dst, r0));
6398//ZZ return dst;
6399//ZZ }
6400//ZZ }
6401
6402 ppIRExpr(e);
6403 vpanic("iselFltExpr_wrk");
6404}
6405
6406
6407/*---------------------------------------------------------*/
6408/*--- ISEL: Statements ---*/
6409/*---------------------------------------------------------*/
6410
6411static void iselStmt ( ISelEnv* env, IRStmt* stmt )
6412{
6413 if (vex_traceflags & VEX_TRACE_VCODE) {
6414 vex_printf("\n-- ");
6415 ppIRStmt(stmt);
6416 vex_printf("\n");
6417 }
6418 switch (stmt->tag) {
6419
6420 /* --------- STORE --------- */
6421 /* little-endian write to memory */
6422 case Ist_Store: {
6423 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
6424 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
6425 IREndness end = stmt->Ist.Store.end;
6426
6427 if (tya != Ity_I64 || end != Iend_LE)
6428 goto stmt_fail;
6429
6430 if (tyd == Ity_I64) {
6431 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6432 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
6433 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
6434 return;
6435 }
6436 if (tyd == Ity_I32) {
6437 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6438 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
6439 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
6440 return;
6441 }
6442 if (tyd == Ity_I16) {
6443 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6444 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
6445 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
6446 return;
6447 }
6448 if (tyd == Ity_I8) {
6449 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6450 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
6451 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
6452 return;
6453 }
6454 if (tyd == Ity_V128) {
6455 HReg qD = iselV128Expr(env, stmt->Ist.Store.data);
6456 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
6457 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
6458 return;
6459 }
6460 if (tyd == Ity_F64) {
6461 HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
6462 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
6463 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0));
6464 return;
6465 }
sewardjac0c92b2014-05-07 09:20:59 +00006466 if (tyd == Ity_F32) {
6467 HReg sD = iselFltExpr(env, stmt->Ist.Store.data);
6468 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
6469 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, addr, 0));
6470 return;
6471 }
sewardjbbcf1882014-01-12 12:49:10 +00006472
6473//ZZ if (tyd == Ity_I16) {
6474//ZZ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6475//ZZ ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
6476//ZZ addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
6477//ZZ False/*!isLoad*/,
6478//ZZ False/*!isSignedLoad*/, rD, am));
6479//ZZ return;
6480//ZZ }
6481//ZZ if (tyd == Ity_I8) {
6482//ZZ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6483//ZZ ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
6484//ZZ addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, False/*!isLoad*/, rD, am));
6485//ZZ return;
6486//ZZ }
6487//ZZ if (tyd == Ity_I64) {
6488//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6489//ZZ HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
6490//ZZ ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
6491//ZZ addInstr(env, ARMInstr_NLdStD(False, dD, am));
6492//ZZ } else {
6493//ZZ HReg rDhi, rDlo, rA;
6494//ZZ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
6495//ZZ rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
6496//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDhi,
6497//ZZ ARMAMode1_RI(rA,4)));
6498//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDlo,
6499//ZZ ARMAMode1_RI(rA,0)));
6500//ZZ }
6501//ZZ return;
6502//ZZ }
6503//ZZ if (tyd == Ity_F64) {
6504//ZZ HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
6505//ZZ ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
6506//ZZ addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
6507//ZZ return;
6508//ZZ }
6509//ZZ if (tyd == Ity_F32) {
6510//ZZ HReg fD = iselFltExpr(env, stmt->Ist.Store.data);
6511//ZZ ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
6512//ZZ addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
6513//ZZ return;
6514//ZZ }
6515//ZZ if (tyd == Ity_V128) {
6516//ZZ HReg qD = iselNeonExpr(env, stmt->Ist.Store.data);
6517//ZZ ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
6518//ZZ addInstr(env, ARMInstr_NLdStQ(False, qD, am));
6519//ZZ return;
6520//ZZ }
6521
6522 break;
6523 }
6524
6525//ZZ /* --------- CONDITIONAL STORE --------- */
6526//ZZ /* conditional little-endian write to memory */
6527//ZZ case Ist_StoreG: {
6528//ZZ IRStoreG* sg = stmt->Ist.StoreG.details;
6529//ZZ IRType tya = typeOfIRExpr(env->type_env, sg->addr);
6530//ZZ IRType tyd = typeOfIRExpr(env->type_env, sg->data);
6531//ZZ IREndness end = sg->end;
6532//ZZ
6533//ZZ if (tya != Ity_I32 || end != Iend_LE)
6534//ZZ goto stmt_fail;
6535//ZZ
6536//ZZ switch (tyd) {
6537//ZZ case Ity_I8:
6538//ZZ case Ity_I32: {
6539//ZZ HReg rD = iselIntExpr_R(env, sg->data);
6540//ZZ ARMAMode1* am = iselIntExpr_AMode1(env, sg->addr);
6541//ZZ ARMCondCode cc = iselCondCode(env, sg->guard);
6542//ZZ addInstr(env, (tyd == Ity_I32 ? ARMInstr_LdSt32 : ARMInstr_LdSt8U)
6543//ZZ (cc, False/*!isLoad*/, rD, am));
6544//ZZ return;
6545//ZZ }
6546//ZZ case Ity_I16: {
6547//ZZ HReg rD = iselIntExpr_R(env, sg->data);
6548//ZZ ARMAMode2* am = iselIntExpr_AMode2(env, sg->addr);
6549//ZZ ARMCondCode cc = iselCondCode(env, sg->guard);
6550//ZZ addInstr(env, ARMInstr_LdSt16(cc,
6551//ZZ False/*!isLoad*/,
6552//ZZ False/*!isSignedLoad*/, rD, am));
6553//ZZ return;
6554//ZZ }
6555//ZZ default:
6556//ZZ break;
6557//ZZ }
6558//ZZ break;
6559//ZZ }
6560//ZZ
6561//ZZ /* --------- CONDITIONAL LOAD --------- */
6562//ZZ /* conditional little-endian load from memory */
6563//ZZ case Ist_LoadG: {
6564//ZZ IRLoadG* lg = stmt->Ist.LoadG.details;
6565//ZZ IRType tya = typeOfIRExpr(env->type_env, lg->addr);
6566//ZZ IREndness end = lg->end;
6567//ZZ
6568//ZZ if (tya != Ity_I32 || end != Iend_LE)
6569//ZZ goto stmt_fail;
6570//ZZ
6571//ZZ switch (lg->cvt) {
6572//ZZ case ILGop_8Uto32:
6573//ZZ case ILGop_Ident32: {
6574//ZZ HReg rAlt = iselIntExpr_R(env, lg->alt);
6575//ZZ ARMAMode1* am = iselIntExpr_AMode1(env, lg->addr);
6576//ZZ HReg rD = lookupIRTemp(env, lg->dst);
6577//ZZ addInstr(env, mk_iMOVds_RR(rD, rAlt));
6578//ZZ ARMCondCode cc = iselCondCode(env, lg->guard);
6579//ZZ addInstr(env, (lg->cvt == ILGop_Ident32 ? ARMInstr_LdSt32
6580//ZZ : ARMInstr_LdSt8U)
6581//ZZ (cc, True/*isLoad*/, rD, am));
6582//ZZ return;
6583//ZZ }
6584//ZZ case ILGop_16Sto32:
6585//ZZ case ILGop_16Uto32:
6586//ZZ case ILGop_8Sto32: {
6587//ZZ HReg rAlt = iselIntExpr_R(env, lg->alt);
6588//ZZ ARMAMode2* am = iselIntExpr_AMode2(env, lg->addr);
6589//ZZ HReg rD = lookupIRTemp(env, lg->dst);
6590//ZZ addInstr(env, mk_iMOVds_RR(rD, rAlt));
6591//ZZ ARMCondCode cc = iselCondCode(env, lg->guard);
6592//ZZ if (lg->cvt == ILGop_8Sto32) {
6593//ZZ addInstr(env, ARMInstr_Ld8S(cc, rD, am));
6594//ZZ } else {
6595//ZZ vassert(lg->cvt == ILGop_16Sto32 || lg->cvt == ILGop_16Uto32);
6596//ZZ Bool sx = lg->cvt == ILGop_16Sto32;
6597//ZZ addInstr(env, ARMInstr_LdSt16(cc, True/*isLoad*/, sx, rD, am));
6598//ZZ }
6599//ZZ return;
6600//ZZ }
6601//ZZ default:
6602//ZZ break;
6603//ZZ }
6604//ZZ break;
6605//ZZ }
6606
6607 /* --------- PUT --------- */
6608 /* write guest state, fixed offset */
6609 case Ist_Put: {
6610 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
6611 UInt offs = (UInt)stmt->Ist.Put.offset;
6612 if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) {
6613 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6614 ARM64AMode* am = mk_baseblock_64bit_access_amode(offs);
6615 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
6616 return;
6617 }
6618 if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) {
6619 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6620 ARM64AMode* am = mk_baseblock_32bit_access_amode(offs);
6621 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
6622 return;
6623 }
6624 if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) {
6625 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6626 ARM64AMode* am = mk_baseblock_16bit_access_amode(offs);
6627 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
6628 return;
6629 }
6630 if (tyd == Ity_I8 && offs < (1<<12)) {
6631 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6632 ARM64AMode* am = mk_baseblock_8bit_access_amode(offs);
6633 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
6634 return;
6635 }
6636 if (tyd == Ity_V128 && offs < (1<<12)) {
6637 HReg qD = iselV128Expr(env, stmt->Ist.Put.data);
6638 HReg addr = mk_baseblock_128bit_access_addr(env, offs);
6639 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
6640 return;
6641 }
6642 if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) {
6643 HReg dD = iselDblExpr(env, stmt->Ist.Put.data);
6644 HReg bbp = get_baseblock_register();
6645 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs));
6646 return;
6647 }
6648 if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) {
6649 HReg dD = iselFltExpr(env, stmt->Ist.Put.data);
6650 HReg bbp = get_baseblock_register();
6651 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, dD, bbp, offs));
6652 return;
6653 }
6654
6655//ZZ if (tyd == Ity_I64) {
6656//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6657//ZZ HReg addr = newVRegI(env);
6658//ZZ HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
6659//ZZ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
6660//ZZ stmt->Ist.Put.offset));
6661//ZZ addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
6662//ZZ } else {
6663//ZZ HReg rDhi, rDlo;
6664//ZZ ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
6665//ZZ stmt->Ist.Put.offset + 0);
6666//ZZ ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
6667//ZZ stmt->Ist.Put.offset + 4);
6668//ZZ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
6669//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
6670//ZZ rDhi, am4));
6671//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
6672//ZZ rDlo, am0));
6673//ZZ }
6674//ZZ return;
6675//ZZ }
6676//ZZ if (tyd == Ity_F64) {
6677//ZZ // XXX This won't work if offset > 1020 or is not 0 % 4.
6678//ZZ // In which case we'll have to generate more longwinded code.
6679//ZZ ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
6680//ZZ HReg rD = iselDblExpr(env, stmt->Ist.Put.data);
6681//ZZ addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
6682//ZZ return;
6683//ZZ }
6684//ZZ if (tyd == Ity_F32) {
6685//ZZ // XXX This won't work if offset > 1020 or is not 0 % 4.
6686//ZZ // In which case we'll have to generate more longwinded code.
6687//ZZ ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
6688//ZZ HReg rD = iselFltExpr(env, stmt->Ist.Put.data);
6689//ZZ addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
6690//ZZ return;
6691//ZZ }
6692 break;
6693 }
6694
6695 /* --------- TMP --------- */
6696 /* assign value to temporary */
6697 case Ist_WrTmp: {
6698 IRTemp tmp = stmt->Ist.WrTmp.tmp;
6699 IRType ty = typeOfIRTemp(env->type_env, tmp);
6700
6701 if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
6702 /* We could do a lot better here. But for the time being: */
6703 HReg dst = lookupIRTemp(env, tmp);
6704 HReg rD = iselIntExpr_R(env, stmt->Ist.WrTmp.data);
6705 addInstr(env, ARM64Instr_MovI(dst, rD));
6706 return;
6707 }
6708 if (ty == Ity_I1) {
6709 /* Here, we are generating a I1 value into a 64 bit register.
6710 Make sure the value in the register is only zero or one,
6711 but no other. This allows optimisation of the
6712 1Uto64(tmp:I1) case, by making it simply a copy of the
6713 register holding 'tmp'. The point being that the value in
6714 the register holding 'tmp' can only have been created
6715 here. LATER: that seems dangerous; safer to do 'tmp & 1'
6716 in that case. Also, could do this just with a single CINC
6717 insn. */
sewardjf21a6ca2014-03-08 13:08:17 +00006718 /* CLONE-01 */
sewardjbbcf1882014-01-12 12:49:10 +00006719 HReg zero = newVRegI(env);
6720 HReg one = newVRegI(env);
6721 HReg dst = lookupIRTemp(env, tmp);
6722 addInstr(env, ARM64Instr_Imm64(zero, 0));
6723 addInstr(env, ARM64Instr_Imm64(one, 1));
6724 ARM64CondCode cc = iselCondCode(env, stmt->Ist.WrTmp.data);
6725 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
6726 return;
6727 }
6728 if (ty == Ity_F64) {
6729 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
6730 HReg dst = lookupIRTemp(env, tmp);
6731 addInstr(env, ARM64Instr_VMov(8, dst, src));
6732 return;
6733 }
6734 if (ty == Ity_F32) {
6735 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
6736 HReg dst = lookupIRTemp(env, tmp);
6737 addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
6738 return;
6739 }
6740 if (ty == Ity_V128) {
6741 HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data);
6742 HReg dst = lookupIRTemp(env, tmp);
6743 addInstr(env, ARM64Instr_VMov(16, dst, src));
6744 return;
6745 }
6746 break;
6747 }
6748
6749 /* --------- Call to DIRTY helper --------- */
6750 /* call complex ("dirty") helper function */
6751 case Ist_Dirty: {
6752 IRDirty* d = stmt->Ist.Dirty.details;
6753
6754 /* Figure out the return type, if any. */
6755 IRType retty = Ity_INVALID;
6756 if (d->tmp != IRTemp_INVALID)
6757 retty = typeOfIRTemp(env->type_env, d->tmp);
6758
6759 Bool retty_ok = False;
6760 switch (retty) {
6761 case Ity_INVALID: /* function doesn't return anything */
6762 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
6763 case Ity_V128:
6764 retty_ok = True; break;
6765 default:
6766 break;
6767 }
6768 if (!retty_ok)
6769 break; /* will go to stmt_fail: */
6770
6771 /* Marshal args, do the call, and set the return value to 0x555..555
6772 if this is a conditional call that returns a value and the
6773 call is skipped. */
6774 UInt addToSp = 0;
6775 RetLoc rloc = mk_RetLoc_INVALID();
6776 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
6777 vassert(is_sane_RetLoc(rloc));
6778
6779 /* Now figure out what to do with the returned value, if any. */
6780 switch (retty) {
6781 case Ity_INVALID: {
6782 /* No return value. Nothing to do. */
6783 vassert(d->tmp == IRTemp_INVALID);
6784 vassert(rloc.pri == RLPri_None);
6785 vassert(addToSp == 0);
6786 return;
6787 }
6788 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
6789 vassert(rloc.pri == RLPri_Int);
6790 vassert(addToSp == 0);
6791 /* The returned value is in x0. Park it in the register
6792 associated with tmp. */
6793 HReg dst = lookupIRTemp(env, d->tmp);
6794 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) );
6795 return;
6796 }
6797 case Ity_V128: {
6798 /* The returned value is on the stack, and *retloc tells
6799 us where. Fish it off the stack and then move the
6800 stack pointer upwards to clear it, as directed by
6801 doHelperCall. */
6802 vassert(rloc.pri == RLPri_V128SpRel);
6803 vassert(rloc.spOff < 256); // stay sane
6804 vassert(addToSp >= 16); // ditto
6805 vassert(addToSp < 256); // ditto
6806 HReg dst = lookupIRTemp(env, d->tmp);
6807 HReg tmp = newVRegI(env); // the address of the returned value
6808 addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP
6809 addInstr(env, ARM64Instr_Arith(tmp, tmp,
6810 ARM64RIA_I12((UShort)rloc.spOff, 0),
6811 True/*isAdd*/ ));
6812 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp));
6813 addInstr(env, ARM64Instr_AddToSP(addToSp));
6814 return;
6815 }
6816 default:
6817 /*NOTREACHED*/
6818 vassert(0);
6819 }
6820 break;
6821 }
6822
sewardj7d009132014-02-20 17:43:38 +00006823 /* --------- Load Linked and Store Conditional --------- */
6824 case Ist_LLSC: {
6825 if (stmt->Ist.LLSC.storedata == NULL) {
6826 /* LL */
6827 IRTemp res = stmt->Ist.LLSC.result;
6828 IRType ty = typeOfIRTemp(env->type_env, res);
6829 if (ty == Ity_I64 || ty == Ity_I32
6830 || ty == Ity_I16 || ty == Ity_I8) {
6831 Int szB = 0;
6832 HReg r_dst = lookupIRTemp(env, res);
6833 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6834 switch (ty) {
6835 case Ity_I8: szB = 1; break;
6836 case Ity_I16: szB = 2; break;
6837 case Ity_I32: szB = 4; break;
6838 case Ity_I64: szB = 8; break;
6839 default: vassert(0);
6840 }
6841 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
6842 addInstr(env, ARM64Instr_LdrEX(szB));
6843 addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2()));
6844 return;
6845 }
6846 goto stmt_fail;
6847 } else {
6848 /* SC */
6849 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
6850 if (tyd == Ity_I64 || tyd == Ity_I32
6851 || tyd == Ity_I16 || tyd == Ity_I8) {
6852 Int szB = 0;
6853 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
6854 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6855 switch (tyd) {
6856 case Ity_I8: szB = 1; break;
6857 case Ity_I16: szB = 2; break;
6858 case Ity_I32: szB = 4; break;
6859 case Ity_I64: szB = 8; break;
6860 default: vassert(0);
6861 }
6862 addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD));
6863 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
6864 addInstr(env, ARM64Instr_StrEX(szB));
6865 } else {
6866 goto stmt_fail;
6867 }
6868 /* now r0 is 1 if failed, 0 if success. Change to IR
6869 conventions (0 is fail, 1 is success). Also transfer
6870 result to r_res. */
6871 IRTemp res = stmt->Ist.LLSC.result;
6872 IRType ty = typeOfIRTemp(env->type_env, res);
6873 HReg r_res = lookupIRTemp(env, res);
6874 ARM64RIL* one = mb_mkARM64RIL_I(1);
6875 vassert(ty == Ity_I1);
6876 vassert(one);
6877 addInstr(env, ARM64Instr_Logic(r_res, hregARM64_X0(), one,
6878 ARM64lo_XOR));
6879 /* And be conservative -- mask off all but the lowest bit. */
6880 addInstr(env, ARM64Instr_Logic(r_res, r_res, one,
6881 ARM64lo_AND));
6882 return;
6883 }
6884 break;
6885 }
6886
6887 /* --------- MEM FENCE --------- */
6888 case Ist_MBE:
6889 switch (stmt->Ist.MBE.event) {
6890 case Imbe_Fence:
6891 addInstr(env, ARM64Instr_MFence());
6892 return;
sewardjbbcf1882014-01-12 12:49:10 +00006893//ZZ case Imbe_CancelReservation:
6894//ZZ addInstr(env, ARMInstr_CLREX());
6895//ZZ return;
sewardj7d009132014-02-20 17:43:38 +00006896 default:
6897 break;
6898 }
6899 break;
sewardjbbcf1882014-01-12 12:49:10 +00006900
6901 /* --------- INSTR MARK --------- */
6902 /* Doesn't generate any executable code ... */
6903 case Ist_IMark:
6904 return;
6905
6906 /* --------- NO-OP --------- */
6907 case Ist_NoOp:
6908 return;
6909
6910 /* --------- EXIT --------- */
6911 case Ist_Exit: {
6912 if (stmt->Ist.Exit.dst->tag != Ico_U64)
6913 vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value");
6914
6915 ARM64CondCode cc
6916 = iselCondCode(env, stmt->Ist.Exit.guard);
6917 ARM64AMode* amPC
6918 = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP);
6919
sewardjbbcf1882014-01-12 12:49:10 +00006920 /* Case: boring transfer to known address */
6921 if (stmt->Ist.Exit.jk == Ijk_Boring
6922 /*ATC || stmt->Ist.Exit.jk == Ijk_Call */
6923 /*ATC || stmt->Ist.Exit.jk == Ijk_Ret */ ) {
6924 if (env->chainingAllowed) {
6925 /* .. almost always true .. */
6926 /* Skip the event check at the dst if this is a forwards
6927 edge. */
6928 Bool toFastEP
6929 = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
6930 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
6931 addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
6932 amPC, cc, toFastEP));
6933 } else {
6934 /* .. very occasionally .. */
6935 /* We can't use chaining, so ask for an assisted transfer,
6936 as that's the only alternative that is allowable. */
6937 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6938 addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring));
6939 }
6940 return;
6941 }
6942
6943//ZZ /* Case: assisted transfer to arbitrary address */
6944//ZZ switch (stmt->Ist.Exit.jk) {
6945//ZZ /* Keep this list in sync with that in iselNext below */
6946//ZZ case Ijk_ClientReq:
6947//ZZ case Ijk_NoDecode:
6948//ZZ case Ijk_NoRedir:
6949//ZZ case Ijk_Sys_syscall:
sewardj05f5e012014-05-04 10:52:11 +00006950//ZZ case Ijk_InvalICache:
sewardjbbcf1882014-01-12 12:49:10 +00006951//ZZ case Ijk_Yield:
6952//ZZ {
6953//ZZ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6954//ZZ addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
6955//ZZ stmt->Ist.Exit.jk));
6956//ZZ return;
6957//ZZ }
6958//ZZ default:
6959//ZZ break;
6960//ZZ }
6961
6962 /* Do we ever expect to see any other kind? */
6963 goto stmt_fail;
6964 }
6965
6966 default: break;
6967 }
6968 stmt_fail:
6969 ppIRStmt(stmt);
6970 vpanic("iselStmt");
6971}
6972
6973
6974/*---------------------------------------------------------*/
6975/*--- ISEL: Basic block terminators (Nexts) ---*/
6976/*---------------------------------------------------------*/
6977
6978static void iselNext ( ISelEnv* env,
6979 IRExpr* next, IRJumpKind jk, Int offsIP )
6980{
6981 if (vex_traceflags & VEX_TRACE_VCODE) {
6982 vex_printf( "\n-- PUT(%d) = ", offsIP);
6983 ppIRExpr( next );
6984 vex_printf( "; exit-");
6985 ppIRJumpKind(jk);
6986 vex_printf( "\n");
6987 }
6988
6989 /* Case: boring transfer to known address */
6990 if (next->tag == Iex_Const) {
6991 IRConst* cdst = next->Iex.Const.con;
6992 vassert(cdst->tag == Ico_U64);
6993 if (jk == Ijk_Boring || jk == Ijk_Call) {
6994 /* Boring transfer to known address */
6995 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
6996 if (env->chainingAllowed) {
6997 /* .. almost always true .. */
6998 /* Skip the event check at the dst if this is a forwards
6999 edge. */
7000 Bool toFastEP
7001 = ((Addr64)cdst->Ico.U64) > env->max_ga;
7002 if (0) vex_printf("%s", toFastEP ? "X" : ".");
7003 addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64,
7004 amPC, ARM64cc_AL,
7005 toFastEP));
7006 } else {
7007 /* .. very occasionally .. */
7008 /* We can't use chaining, so ask for an assisted transfer,
7009 as that's the only alternative that is allowable. */
7010 HReg r = iselIntExpr_R(env, next);
7011 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
7012 Ijk_Boring));
7013 }
7014 return;
7015 }
7016 }
7017
7018 /* Case: call/return (==boring) transfer to any address */
7019 switch (jk) {
7020 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
7021 HReg r = iselIntExpr_R(env, next);
7022 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
7023 if (env->chainingAllowed) {
7024 addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL));
7025 } else {
7026 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
7027 Ijk_Boring));
7028 }
7029 return;
7030 }
7031 default:
7032 break;
7033 }
7034
7035 /* Case: assisted transfer to arbitrary address */
7036 switch (jk) {
7037 /* Keep this list in sync with that for Ist_Exit above */
7038 case Ijk_ClientReq:
7039 case Ijk_NoDecode:
sewardj99c1f812014-03-09 09:41:56 +00007040 case Ijk_NoRedir:
sewardjbbcf1882014-01-12 12:49:10 +00007041 case Ijk_Sys_syscall:
sewardj05f5e012014-05-04 10:52:11 +00007042 case Ijk_InvalICache:
sewardj65902992014-05-03 21:20:56 +00007043 case Ijk_FlushDCache:
sewardjbbcf1882014-01-12 12:49:10 +00007044//ZZ case Ijk_Yield:
7045 {
7046 HReg r = iselIntExpr_R(env, next);
7047 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
7048 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk));
7049 return;
7050 }
7051 default:
7052 break;
7053 }
7054
7055 vex_printf( "\n-- PUT(%d) = ", offsIP);
7056 ppIRExpr( next );
7057 vex_printf( "; exit-");
7058 ppIRJumpKind(jk);
7059 vex_printf( "\n");
7060 vassert(0); // are we expecting any other kind?
7061}
7062
7063
7064/*---------------------------------------------------------*/
7065/*--- Insn selector top-level ---*/
7066/*---------------------------------------------------------*/
7067
7068/* Translate an entire SB to arm64 code. */
7069
7070HInstrArray* iselSB_ARM64 ( IRSB* bb,
7071 VexArch arch_host,
7072 VexArchInfo* archinfo_host,
7073 VexAbiInfo* vbi/*UNUSED*/,
7074 Int offs_Host_EvC_Counter,
7075 Int offs_Host_EvC_FailAddr,
7076 Bool chainingAllowed,
7077 Bool addProfInc,
7078 Addr64 max_ga )
7079{
7080 Int i, j;
7081 HReg hreg, hregHI;
7082 ISelEnv* env;
7083 UInt hwcaps_host = archinfo_host->hwcaps;
7084 ARM64AMode *amCounter, *amFailAddr;
7085
7086 /* sanity ... */
7087 vassert(arch_host == VexArchARM64);
7088
7089 /* guard against unexpected space regressions */
7090 vassert(sizeof(ARM64Instr) <= 32);
7091
7092 /* Make up an initial environment to use. */
7093 env = LibVEX_Alloc(sizeof(ISelEnv));
7094 env->vreg_ctr = 0;
7095
7096 /* Set up output code array. */
7097 env->code = newHInstrArray();
7098
7099 /* Copy BB's type env. */
7100 env->type_env = bb->tyenv;
7101
7102 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
7103 change as we go along. */
7104 env->n_vregmap = bb->tyenv->types_used;
7105 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
7106 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
7107
7108 /* and finally ... */
7109 env->chainingAllowed = chainingAllowed;
7110 env->hwcaps = hwcaps_host;
7111 env->previous_rm = NULL;
7112 env->max_ga = max_ga;
7113
7114 /* For each IR temporary, allocate a suitably-kinded virtual
7115 register. */
7116 j = 0;
7117 for (i = 0; i < env->n_vregmap; i++) {
7118 hregHI = hreg = INVALID_HREG;
7119 switch (bb->tyenv->types[i]) {
7120 case Ity_I1:
7121 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
7122 hreg = mkHReg(j++, HRcInt64, True);
7123 break;
7124 case Ity_I128:
7125 hreg = mkHReg(j++, HRcInt64, True);
7126 hregHI = mkHReg(j++, HRcInt64, True);
7127 break;
7128 case Ity_F32: // we'll use HRcFlt64 regs for F32 too
7129 case Ity_F64:
7130 hreg = mkHReg(j++, HRcFlt64, True);
7131 break;
7132 case Ity_V128:
7133 hreg = mkHReg(j++, HRcVec128, True);
7134 break;
7135 default:
7136 ppIRType(bb->tyenv->types[i]);
7137 vpanic("iselBB(arm64): IRTemp type");
7138 }
7139 env->vregmap[i] = hreg;
7140 env->vregmapHI[i] = hregHI;
7141 }
7142 env->vreg_ctr = j;
7143
7144 /* The very first instruction must be an event check. */
7145 amCounter = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter);
7146 amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr);
7147 addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr));
7148
7149 /* Possibly a block counter increment (for profiling). At this
7150 point we don't know the address of the counter, so just pretend
7151 it is zero. It will have to be patched later, but before this
7152 translation is used, by a call to LibVEX_patchProfCtr. */
7153 if (addProfInc) {
7154 vassert(0);
7155 //addInstr(env, ARM64Instr_ProfInc());
7156 }
7157
7158 /* Ok, finally we can iterate over the statements. */
7159 for (i = 0; i < bb->stmts_used; i++)
7160 iselStmt(env, bb->stmts[i]);
7161
7162 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
7163
7164 /* record the number of vregs we used. */
7165 env->code->n_vregs = env->vreg_ctr;
7166 return env->code;
7167}
7168
7169
7170/*---------------------------------------------------------------*/
7171/*--- end host_arm64_isel.c ---*/
7172/*---------------------------------------------------------------*/