blob: cb027f13f86d01ce68d2fa831ec234b1388f3f00 [file] [log] [blame]
sewardjbbcf1882014-01-12 12:49:10 +00001
2/*---------------------------------------------------------------*/
3/*--- begin host_arm64_isel.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2013-2013 OpenWorks
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29*/
30
31#include "libvex_basictypes.h"
32#include "libvex_ir.h"
33#include "libvex.h"
34#include "ir_match.h"
35
36#include "main_util.h"
37#include "main_globals.h"
38#include "host_generic_regs.h"
39#include "host_generic_simd64.h" // for 32-bit SIMD helpers
40#include "host_arm64_defs.h"
41
42
43//ZZ /*---------------------------------------------------------*/
44//ZZ /*--- ARMvfp control word stuff ---*/
45//ZZ /*---------------------------------------------------------*/
46//ZZ
47//ZZ /* Vex-generated code expects to run with the FPU set as follows: all
48//ZZ exceptions masked, round-to-nearest, non-vector mode, with the NZCV
49//ZZ flags cleared, and FZ (flush to zero) disabled. Curiously enough,
50//ZZ this corresponds to a FPSCR value of zero.
51//ZZ
52//ZZ fpscr should therefore be zero on entry to Vex-generated code, and
53//ZZ should be unchanged at exit. (Or at least the bottom 28 bits
54//ZZ should be zero).
55//ZZ */
56//ZZ
57//ZZ #define DEFAULT_FPSCR 0
58
59
60/*---------------------------------------------------------*/
61/*--- ISelEnv ---*/
62/*---------------------------------------------------------*/
63
64/* This carries around:
65
66 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
67 might encounter. This is computed before insn selection starts,
68 and does not change.
69
70 - A mapping from IRTemp to HReg. This tells the insn selector
71 which virtual register is associated with each IRTemp temporary.
72 This is computed before insn selection starts, and does not
73 change. We expect this mapping to map precisely the same set of
74 IRTemps as the type mapping does.
75
76 |vregmap| holds the primary register for the IRTemp.
77 |vregmapHI| is only used for 128-bit integer-typed
78 IRTemps. It holds the identity of a second
79 64-bit virtual HReg, which holds the high half
80 of the value.
81
82 - The code array, that is, the insns selected so far.
83
84 - A counter, for generating new virtual registers.
85
86 - The host hardware capabilities word. This is set at the start
87 and does not change.
88
89 - A Bool for indicating whether we may generate chain-me
90 instructions for control flow transfers, or whether we must use
91 XAssisted.
92
93 - The maximum guest address of any guest insn in this block.
94 Actually, the address of the highest-addressed byte from any insn
95 in this block. Is set at the start and does not change. This is
96 used for detecting jumps which are definitely forward-edges from
97 this block, and therefore can be made (chained) to the fast entry
98 point of the destination, thereby avoiding the destination's
99 event check.
100
101 - An IRExpr*, which may be NULL, holding the IR expression (an
102 IRRoundingMode-encoded value) to which the FPU's rounding mode
103 was most recently set. Setting to NULL is always safe. Used to
104 avoid redundant settings of the FPU's rounding mode, as
105 described in set_FPCR_rounding_mode below.
106
107 Note, this is all (well, mostly) host-independent.
108*/
109
110typedef
111 struct {
112 /* Constant -- are set at the start and do not change. */
113 IRTypeEnv* type_env;
114
115 HReg* vregmap;
116 HReg* vregmapHI;
117 Int n_vregmap;
118
119 UInt hwcaps;
120
121 Bool chainingAllowed;
122 Addr64 max_ga;
123
124 /* These are modified as we go along. */
125 HInstrArray* code;
126 Int vreg_ctr;
127
128 IRExpr* previous_rm;
129 }
130 ISelEnv;
131
132static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
133{
134 vassert(tmp >= 0);
135 vassert(tmp < env->n_vregmap);
136 return env->vregmap[tmp];
137}
138
139static void addInstr ( ISelEnv* env, ARM64Instr* instr )
140{
141 addHInstr(env->code, instr);
142 if (vex_traceflags & VEX_TRACE_VCODE) {
143 ppARM64Instr(instr);
144 vex_printf("\n");
145 }
146}
147
148static HReg newVRegI ( ISelEnv* env )
149{
150 HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/);
151 env->vreg_ctr++;
152 return reg;
153}
154
155static HReg newVRegD ( ISelEnv* env )
156{
157 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
158 env->vreg_ctr++;
159 return reg;
160}
161
162//ZZ static HReg newVRegF ( ISelEnv* env )
163//ZZ {
164//ZZ HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/);
165//ZZ env->vreg_ctr++;
166//ZZ return reg;
167//ZZ }
168
169static HReg newVRegV ( ISelEnv* env )
170{
171 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
172 env->vreg_ctr++;
173 return reg;
174}
175
176//ZZ /* These are duplicated in guest_arm_toIR.c */
177//ZZ static IRExpr* unop ( IROp op, IRExpr* a )
178//ZZ {
179//ZZ return IRExpr_Unop(op, a);
180//ZZ }
181//ZZ
182//ZZ static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
183//ZZ {
184//ZZ return IRExpr_Binop(op, a1, a2);
185//ZZ }
186//ZZ
187//ZZ static IRExpr* bind ( Int binder )
188//ZZ {
189//ZZ return IRExpr_Binder(binder);
190//ZZ }
191
192
193/*---------------------------------------------------------*/
194/*--- ISEL: Forward declarations ---*/
195/*---------------------------------------------------------*/
196
197/* These are organised as iselXXX and iselXXX_wrk pairs. The
198 iselXXX_wrk do the real work, but are not to be called directly.
199 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
200 checks that all returned registers are virtual. You should not
201 call the _wrk version directly.
202
203 Because some forms of ARM64 memory amodes are implicitly scaled by
204 the access size, iselIntExpr_AMode takes an IRType which tells it
205 the type of the access for which the amode is to be used. This
206 type needs to be correct, else you'll get incorrect code.
207*/
208static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env,
209 IRExpr* e, IRType dty );
210static ARM64AMode* iselIntExpr_AMode ( ISelEnv* env,
211 IRExpr* e, IRType dty );
212
213static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e );
214static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e );
215
216static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e );
217static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e );
218
219static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e );
220static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e );
221
222static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
223static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
224
225static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
226static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
227
228static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
229 ISelEnv* env, IRExpr* e );
230static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo,
231 ISelEnv* env, IRExpr* e );
232
233
234//ZZ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
235//ZZ ISelEnv* env, IRExpr* e );
236//ZZ static void iselInt64Expr ( HReg* rHi, HReg* rLo,
237//ZZ ISelEnv* env, IRExpr* e );
238
239static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
240static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
241
242static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
243static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
244
245//ZZ static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e );
246//ZZ static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e );
247
248static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e );
249static HReg iselV128Expr ( ISelEnv* env, IRExpr* e );
250
251static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 );
252
253
254/*---------------------------------------------------------*/
255/*--- ISEL: Misc helpers ---*/
256/*---------------------------------------------------------*/
257
258/* Generate an amode suitable for a 64-bit sized access relative to
259 the baseblock register (X21). This generates an RI12 amode, which
260 means its scaled by the access size, which is why the access size
261 -- 64 bit -- is stated explicitly here. Consequently |off| needs
262 to be divisible by 8. */
263static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off )
264{
265 vassert(off < (8 << 12)); /* otherwise it's unrepresentable */
266 vassert((off & 7) == 0); /* ditto */
267 return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/);
268}
269
270/* Ditto, for 32 bit accesses. */
271static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off )
272{
273 vassert(off < (4 << 12)); /* otherwise it's unrepresentable */
274 vassert((off & 3) == 0); /* ditto */
275 return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/);
276}
277
278/* Ditto, for 16 bit accesses. */
279static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off )
280{
281 vassert(off < (2 << 12)); /* otherwise it's unrepresentable */
282 vassert((off & 1) == 0); /* ditto */
283 return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/);
284}
285
286/* Ditto, for 8 bit accesses. */
287static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off )
288{
289 vassert(off < (1 << 12)); /* otherwise it's unrepresentable */
290 return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/);
291}
292
293static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off )
294{
295 vassert(off < (1<<12));
296 HReg r = newVRegI(env);
297 addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(),
298 ARM64RIA_I12(off,0), True/*isAdd*/));
299 return r;
300}
301
302static HReg get_baseblock_register ( void )
303{
304 return hregARM64_X21();
305}
306
307/* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in
308 a new register, and return the new register. */
309static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src )
310{
311 HReg dst = newVRegI(env);
312 ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */
313 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
314 return dst;
315}
316
317/* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in
318 a new register, and return the new register. */
319static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src )
320{
321 HReg dst = newVRegI(env);
322 ARM64RI6* n48 = ARM64RI6_I6(48);
323 addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
324 addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR));
325 return dst;
326}
327
328/* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in
329 a new register, and return the new register. */
330static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src )
331{
332 HReg dst = newVRegI(env);
333 ARM64RI6* n48 = ARM64RI6_I6(48);
334 addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
335 addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SHR));
336 return dst;
337}
338
339/* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in
340 a new register, and return the new register. */
341static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src )
342{
343 HReg dst = newVRegI(env);
344 ARM64RI6* n32 = ARM64RI6_I6(32);
345 addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL));
346 addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR));
347 return dst;
348}
349
350/* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in
351 a new register, and return the new register. */
352static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src )
353{
354 HReg dst = newVRegI(env);
355 ARM64RI6* n56 = ARM64RI6_I6(56);
356 addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
357 addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR));
358 return dst;
359}
360
361static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src )
362{
363 HReg dst = newVRegI(env);
364 ARM64RI6* n56 = ARM64RI6_I6(56);
365 addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
366 addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SHR));
367 return dst;
368}
369
370/* Is this IRExpr_Const(IRConst_U64(0)) ? */
371static Bool isZeroU64 ( IRExpr* e ) {
372 if (e->tag != Iex_Const) return False;
373 IRConst* con = e->Iex.Const.con;
374 vassert(con->tag == Ico_U64);
375 return con->Ico.U64 == 0;
376}
377
378
379/*---------------------------------------------------------*/
380/*--- ISEL: FP rounding mode helpers ---*/
381/*---------------------------------------------------------*/
382
383/* Set the FP rounding mode: 'mode' is an I32-typed expression
384 denoting a value in the range 0 .. 3, indicating a round mode
385 encoded as per type IRRoundingMode -- the first four values only
386 (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO). Set the PPC
387 FSCR to have the same rounding.
388
389 For speed & simplicity, we're setting the *entire* FPCR here.
390
391 Setting the rounding mode is expensive. So this function tries to
392 avoid repeatedly setting the rounding mode to the same thing by
393 first comparing 'mode' to the 'mode' tree supplied in the previous
394 call to this function, if any. (The previous value is stored in
395 env->previous_rm.) If 'mode' is a single IR temporary 't' and
396 env->previous_rm is also just 't', then the setting is skipped.
397
398 This is safe because of the SSA property of IR: an IR temporary can
399 only be defined once and so will have the same value regardless of
400 where it appears in the block. Cool stuff, SSA.
401
402 A safety condition: all attempts to set the RM must be aware of
403 this mechanism - by being routed through the functions here.
404
405 Of course this only helps if blocks where the RM is set more than
406 once and it is set to the same value each time, *and* that value is
407 held in the same IR temporary each time. In order to assure the
408 latter as much as possible, the IR optimiser takes care to do CSE
409 on any block with any sign of floating point activity.
410*/
411static
412void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode )
413{
414 vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
415
416 /* Do we need to do anything? */
417 if (env->previous_rm
418 && env->previous_rm->tag == Iex_RdTmp
419 && mode->tag == Iex_RdTmp
420 && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
421 /* no - setting it to what it was before. */
422 vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
423 return;
424 }
425
426 /* No luck - we better set it, and remember what we set it to. */
427 env->previous_rm = mode;
428
429 /* Only supporting the rounding-mode bits - the rest of FPCR is set
430 to zero - so we can set the whole register at once (faster). */
431
432 /* This isn't simple, because 'mode' carries an IR rounding
433 encoding, and we need to translate that to an ARM64 FP one:
434 The IR encoding:
435 00 to nearest (the default)
436 10 to +infinity
437 01 to -infinity
438 11 to zero
439 The ARM64 FP encoding:
440 00 to nearest
441 01 to +infinity
442 10 to -infinity
443 11 to zero
444 Easy enough to do; just swap the two bits.
445 */
446 HReg irrm = iselIntExpr_R(env, mode);
447 HReg tL = newVRegI(env);
448 HReg tR = newVRegI(env);
449 HReg t3 = newVRegI(env);
450 /* tL = irrm << 1;
451 tR = irrm >> 1; if we're lucky, these will issue together
452 tL &= 2;
453 tR &= 1; ditto
454 t3 = tL | tR;
455 t3 <<= 22;
456 fmxr fpscr, t3
457 */
458 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
459 ARM64RIL* ril_two = mb_mkARM64RIL_I(2);
460 vassert(ril_one && ril_two);
461 addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL));
462 addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR));
463 addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND));
464 addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND));
465 addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR));
466 addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL));
467 addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3));
468}
469
470
471/*---------------------------------------------------------*/
472/*--- ISEL: Function call helpers ---*/
473/*---------------------------------------------------------*/
474
475/* Used only in doHelperCall. See big comment in doHelperCall re
476 handling of register-parameter args. This function figures out
477 whether evaluation of an expression might require use of a fixed
478 register. If in doubt return True (safe but suboptimal).
479*/
480static
481Bool mightRequireFixedRegs ( IRExpr* e )
482{
483 if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) {
484 // These are always "safe" -- either a copy of SP in some
485 // arbitrary vreg, or a copy of x21, respectively.
486 return False;
487 }
488 /* Else it's a "normal" expression. */
489 switch (e->tag) {
490 case Iex_RdTmp: case Iex_Const: case Iex_Get:
491 return False;
492 default:
493 return True;
494 }
495}
496
497
498/* Do a complete function call. |guard| is a Ity_Bit expression
499 indicating whether or not the call happens. If guard==NULL, the
500 call is unconditional. |retloc| is set to indicate where the
501 return value is after the call. The caller (of this fn) must
502 generate code to add |stackAdjustAfterCall| to the stack pointer
503 after the call is done. Returns True iff it managed to handle this
504 combination of arg/return types, else returns False. */
505
506static
507Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
508 /*OUT*/RetLoc* retloc,
509 ISelEnv* env,
510 IRExpr* guard,
511 IRCallee* cee, IRType retTy, IRExpr** args )
512{
513 ARM64CondCode cc;
514 HReg argregs[ARM64_N_ARGREGS];
515 HReg tmpregs[ARM64_N_ARGREGS];
516 Bool go_fast;
517 Int n_args, i, nextArgReg;
518 ULong target;
519
520 vassert(ARM64_N_ARGREGS == 8);
521
522 /* Set default returns. We'll update them later if needed. */
523 *stackAdjustAfterCall = 0;
524 *retloc = mk_RetLoc_INVALID();
525
526 /* These are used for cross-checking that IR-level constraints on
527 the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */
528 UInt nVECRETs = 0;
529 UInt nBBPTRs = 0;
530
531 /* Marshal args for a call and do the call.
532
533 This function only deals with a tiny set of possibilities, which
534 cover all helpers in practice. The restrictions are that only
535 arguments in registers are supported, hence only
536 ARM64_N_REGPARMS x 64 integer bits in total can be passed. In
537 fact the only supported arg type is I64.
538
539 The return type can be I{64,32} or V128. In the V128 case, it
540 is expected that |args| will contain the special node
541 IRExpr_VECRET(), in which case this routine generates code to
542 allocate space on the stack for the vector return value. Since
543 we are not passing any scalars on the stack, it is enough to
544 preallocate the return space before marshalling any arguments,
545 in this case.
546
547 |args| may also contain IRExpr_BBPTR(), in which case the
548 value in x21 is passed as the corresponding argument.
549
550 Generating code which is both efficient and correct when
551 parameters are to be passed in registers is difficult, for the
552 reasons elaborated in detail in comments attached to
553 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
554 of the method described in those comments.
555
556 The problem is split into two cases: the fast scheme and the
557 slow scheme. In the fast scheme, arguments are computed
558 directly into the target (real) registers. This is only safe
559 when we can be sure that computation of each argument will not
560 trash any real registers set by computation of any other
561 argument.
562
563 In the slow scheme, all args are first computed into vregs, and
564 once they are all done, they are moved to the relevant real
565 regs. This always gives correct code, but it also gives a bunch
566 of vreg-to-rreg moves which are usually redundant but are hard
567 for the register allocator to get rid of.
568
569 To decide which scheme to use, all argument expressions are
570 first examined. If they are all so simple that it is clear they
571 will be evaluated without use of any fixed registers, use the
572 fast scheme, else use the slow scheme. Note also that only
573 unconditional calls may use the fast scheme, since having to
574 compute a condition expression could itself trash real
575 registers.
576
577 Note this requires being able to examine an expression and
578 determine whether or not evaluation of it might use a fixed
579 register. That requires knowledge of how the rest of this insn
580 selector works. Currently just the following 3 are regarded as
581 safe -- hopefully they cover the majority of arguments in
582 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
583 */
584
585 /* Note that the cee->regparms field is meaningless on ARM64 hosts
586 (since there is only one calling convention) and so we always
587 ignore it. */
588
589 n_args = 0;
590 for (i = 0; args[i]; i++) {
591 IRExpr* arg = args[i];
592 if (UNLIKELY(arg->tag == Iex_VECRET)) {
593 nVECRETs++;
594 } else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
595 nBBPTRs++;
596 }
597 n_args++;
598 }
599
600 /* If this fails, the IR is ill-formed */
601 vassert(nBBPTRs == 0 || nBBPTRs == 1);
602
603 /* If we have a VECRET, allocate space on the stack for the return
604 value, and record the stack pointer after that. */
605 HReg r_vecRetAddr = INVALID_HREG;
606 if (nVECRETs == 1) {
607 vassert(retTy == Ity_V128 || retTy == Ity_V256);
608 vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
609 r_vecRetAddr = newVRegI(env);
610 addInstr(env, ARM64Instr_AddToSP(-16));
611 addInstr(env, ARM64Instr_FromSP(r_vecRetAddr));
612 } else {
613 // If either of these fail, the IR is ill-formed
614 vassert(retTy != Ity_V128 && retTy != Ity_V256);
615 vassert(nVECRETs == 0);
616 }
617
618 argregs[0] = hregARM64_X0();
619 argregs[1] = hregARM64_X1();
620 argregs[2] = hregARM64_X2();
621 argregs[3] = hregARM64_X3();
622 argregs[4] = hregARM64_X4();
623 argregs[5] = hregARM64_X5();
624 argregs[6] = hregARM64_X6();
625 argregs[7] = hregARM64_X7();
626
627 tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG;
628 tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG;
629
630 /* First decide which scheme (slow or fast) is to be used. First
631 assume the fast scheme, and select slow if any contraindications
632 (wow) appear. */
633
634 go_fast = True;
635
636 if (guard) {
637 if (guard->tag == Iex_Const
638 && guard->Iex.Const.con->tag == Ico_U1
639 && guard->Iex.Const.con->Ico.U1 == True) {
640 /* unconditional */
641 } else {
642 /* Not manifestly unconditional -- be conservative. */
643 go_fast = False;
644 }
645 }
646
647 if (go_fast) {
648 for (i = 0; i < n_args; i++) {
649 if (mightRequireFixedRegs(args[i])) {
650 go_fast = False;
651 break;
652 }
653 }
654 }
655
656 if (go_fast) {
657 if (retTy == Ity_V128 || retTy == Ity_V256)
658 go_fast = False;
659 }
660
661 /* At this point the scheme to use has been established. Generate
662 code to get the arg values into the argument rregs. If we run
663 out of arg regs, give up. */
664
665 if (go_fast) {
666
667 /* FAST SCHEME */
668 nextArgReg = 0;
669
670 for (i = 0; i < n_args; i++) {
671 IRExpr* arg = args[i];
672
673 IRType aTy = Ity_INVALID;
674 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
675 aTy = typeOfIRExpr(env->type_env, args[i]);
676
677 if (nextArgReg >= ARM64_N_ARGREGS)
678 return False; /* out of argregs */
679
680 if (aTy == Ity_I64) {
681 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
682 iselIntExpr_R(env, args[i]) ));
683 nextArgReg++;
684 }
685 else if (arg->tag == Iex_BBPTR) {
686 vassert(0); //ATC
687 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
688 hregARM64_X21() ));
689 nextArgReg++;
690 }
691 else if (arg->tag == Iex_VECRET) {
692 // because of the go_fast logic above, we can't get here,
693 // since vector return values makes us use the slow path
694 // instead.
695 vassert(0);
696 }
697 else
698 return False; /* unhandled arg type */
699 }
700
701 /* Fast scheme only applies for unconditional calls. Hence: */
702 cc = ARM64cc_AL;
703
704 } else {
705
706 /* SLOW SCHEME; move via temporaries */
707 nextArgReg = 0;
708
709 for (i = 0; i < n_args; i++) {
710 IRExpr* arg = args[i];
711
712 IRType aTy = Ity_INVALID;
713 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
714 aTy = typeOfIRExpr(env->type_env, args[i]);
715
716 if (nextArgReg >= ARM64_N_ARGREGS)
717 return False; /* out of argregs */
718
719 if (aTy == Ity_I64) {
720 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
721 nextArgReg++;
722 }
723 else if (arg->tag == Iex_BBPTR) {
724 vassert(0); //ATC
725 tmpregs[nextArgReg] = hregARM64_X21();
726 nextArgReg++;
727 }
728 else if (arg->tag == Iex_VECRET) {
729 vassert(!hregIsInvalid(r_vecRetAddr));
730 tmpregs[nextArgReg] = r_vecRetAddr;
731 nextArgReg++;
732 }
733 else
734 return False; /* unhandled arg type */
735 }
736
737 /* Now we can compute the condition. We can't do it earlier
738 because the argument computations could trash the condition
739 codes. Be a bit clever to handle the common case where the
740 guard is 1:Bit. */
741 cc = ARM64cc_AL;
742 if (guard) {
743 if (guard->tag == Iex_Const
744 && guard->Iex.Const.con->tag == Ico_U1
745 && guard->Iex.Const.con->Ico.U1 == True) {
746 /* unconditional -- do nothing */
747 } else {
748 cc = iselCondCode( env, guard );
749 }
750 }
751
752 /* Move the args to their final destinations. */
753 for (i = 0; i < nextArgReg; i++) {
754 vassert(!(hregIsInvalid(tmpregs[i])));
755 /* None of these insns, including any spill code that might
756 be generated, may alter the condition codes. */
757 addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) );
758 }
759
760 }
761
762 /* Should be assured by checks above */
763 vassert(nextArgReg <= ARM64_N_ARGREGS);
764
765 /* Do final checks, set the return values, and generate the call
766 instruction proper. */
767 vassert(nBBPTRs == 0 || nBBPTRs == 1);
768 vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0);
769 vassert(*stackAdjustAfterCall == 0);
770 vassert(is_RetLoc_INVALID(*retloc));
771 switch (retTy) {
772 case Ity_INVALID:
773 /* Function doesn't return a value. */
774 *retloc = mk_RetLoc_simple(RLPri_None);
775 break;
776 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
777 *retloc = mk_RetLoc_simple(RLPri_Int);
778 break;
779 case Ity_V128:
780 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
781 *stackAdjustAfterCall = 16;
782 break;
783 case Ity_V256:
784 vassert(0); // ATC
785 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
786 *stackAdjustAfterCall = 32;
787 break;
788 default:
789 /* IR can denote other possible return types, but we don't
790 handle those here. */
791 vassert(0);
792 }
793
794 /* Finally, generate the call itself. This needs the *retloc value
795 set in the switch above, which is why it's at the end. */
796
797 /* nextArgReg doles out argument registers. Since these are
798 assigned in the order x0 .. x7, its numeric value at this point,
799 which must be between 0 and 8 inclusive, is going to be equal to
800 the number of arg regs in use for the call. Hence bake that
801 number into the call (we'll need to know it when doing register
802 allocation, to know what regs the call reads.) */
803
804 target = (HWord)Ptr_to_ULong(cee->addr);
805 addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc ));
806
807 return True; /* success */
808}
809
810
811/*---------------------------------------------------------*/
812/*--- ISEL: Integer expressions (64/32 bit) ---*/
813/*---------------------------------------------------------*/
814
815/* Select insns for an integer-typed expression, and add them to the
816 code list. Return a reg holding the result. This reg will be a
817 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
818 want to modify it, ask for a new vreg, copy it in there, and modify
819 the copy. The register allocator will do its best to map both
820 vregs to the same real register, so the copies will often disappear
821 later in the game.
822
823 This should handle expressions of 64- and 32-bit type. All results
824 are returned in a 64-bit register. For 32-bit expressions, the
825 upper 32 bits are arbitrary, so you should mask or sign extend
826 partial values if necessary.
827*/
828
829/* --------------------- AMode --------------------- */
830
831/* Return an AMode which computes the value of the specified
832 expression, possibly also adding insns to the code list as a
833 result. The expression may only be a 64-bit one.
834*/
835
836static Bool isValidScale ( UChar scale )
837{
838 switch (scale) {
839 case 1: case 2: case 4: case 8: /* case 16: ??*/ return True;
840 default: return False;
841 }
842}
843
844static Bool sane_AMode ( ARM64AMode* am )
845{
846 switch (am->tag) {
847 case ARM64am_RI9:
848 return
849 toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64
850 && (hregIsVirtual(am->ARM64am.RI9.reg)
851 /* || sameHReg(am->ARM64am.RI9.reg,
852 hregARM64_X21()) */ )
853 && am->ARM64am.RI9.simm9 >= -256
854 && am->ARM64am.RI9.simm9 <= 255 );
855 case ARM64am_RI12:
856 return
857 toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64
858 && (hregIsVirtual(am->ARM64am.RI12.reg)
859 /* || sameHReg(am->ARM64am.RI12.reg,
860 hregARM64_X21()) */ )
861 && am->ARM64am.RI12.uimm12 < 4096
862 && isValidScale(am->ARM64am.RI12.szB) );
863 case ARM64am_RR:
864 return
865 toBool( hregClass(am->ARM64am.RR.base) == HRcInt64
866 && hregIsVirtual(am->ARM64am.RR.base)
867 && hregClass(am->ARM64am.RR.index) == HRcInt64
868 && hregIsVirtual(am->ARM64am.RR.index) );
869 default:
870 vpanic("sane_AMode: unknown ARM64 AMode1 tag");
871 }
872}
873
874static
875ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty )
876{
877 ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty);
878 vassert(sane_AMode(am));
879 return am;
880}
881
882static
883ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty )
884{
885 IRType ty = typeOfIRExpr(env->type_env,e);
886 vassert(ty == Ity_I64);
887
888 ULong szBbits = 0;
889 switch (dty) {
890 case Ity_I64: szBbits = 3; break;
891 case Ity_I32: szBbits = 2; break;
892 case Ity_I16: szBbits = 1; break;
893 case Ity_I8: szBbits = 0; break;
894 default: vassert(0);
895 }
896
897 /* {Add64,Sub64}(expr,simm9). We don't care about |dty| here since
898 we're going to create an amode suitable for LDU* or STU*
899 instructions, which use unscaled immediate offsets. */
900 if (e->tag == Iex_Binop
901 && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64)
902 && e->Iex.Binop.arg2->tag == Iex_Const
903 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
904 Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
905 if (simm >= -256 && simm <= 255) {
906 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
907 return ARM64AMode_RI9(reg, (Int)simm);
908 }
909 }
910
911 /* Add64(expr, uimm12 * transfer-size) */
912 if (e->tag == Iex_Binop
913 && e->Iex.Binop.op == Iop_Add64
914 && e->Iex.Binop.arg2->tag == Iex_Const
915 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
916 ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
917 ULong szB = 1 << szBbits;
918 if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */
919 && (uimm >> szBbits) < 4096) {
920 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
921 return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB);
922 }
923 }
924
925 /* Add64(expr1, expr2) */
926 if (e->tag == Iex_Binop
927 && e->Iex.Binop.op == Iop_Add64) {
928 HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
929 HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
930 return ARM64AMode_RR(reg1, reg2);
931 }
932
933 /* Doesn't match anything in particular. Generate it into
934 a register and use that. */
935 HReg reg = iselIntExpr_R(env, e);
936 return ARM64AMode_RI9(reg, 0);
937}
938
939//ZZ /* --------------------- AModeV --------------------- */
940//ZZ
941//ZZ /* Return an AModeV which computes the value of the specified
942//ZZ expression, possibly also adding insns to the code list as a
943//ZZ result. The expression may only be a 32-bit one.
944//ZZ */
945//ZZ
946//ZZ static Bool sane_AModeV ( ARMAModeV* am )
947//ZZ {
948//ZZ return toBool( hregClass(am->reg) == HRcInt32
949//ZZ && hregIsVirtual(am->reg)
950//ZZ && am->simm11 >= -1020 && am->simm11 <= 1020
951//ZZ && 0 == (am->simm11 & 3) );
952//ZZ }
953//ZZ
954//ZZ static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
955//ZZ {
956//ZZ ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
957//ZZ vassert(sane_AModeV(am));
958//ZZ return am;
959//ZZ }
960//ZZ
961//ZZ static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
962//ZZ {
963//ZZ IRType ty = typeOfIRExpr(env->type_env,e);
964//ZZ vassert(ty == Ity_I32);
965//ZZ
966//ZZ /* {Add32,Sub32}(expr, simm8 << 2) */
967//ZZ if (e->tag == Iex_Binop
968//ZZ && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
969//ZZ && e->Iex.Binop.arg2->tag == Iex_Const
970//ZZ && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
971//ZZ Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
972//ZZ if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
973//ZZ HReg reg;
974//ZZ if (e->Iex.Binop.op == Iop_Sub32)
975//ZZ simm = -simm;
976//ZZ reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
977//ZZ return mkARMAModeV(reg, simm);
978//ZZ }
979//ZZ }
980//ZZ
981//ZZ /* Doesn't match anything in particular. Generate it into
982//ZZ a register and use that. */
983//ZZ {
984//ZZ HReg reg = iselIntExpr_R(env, e);
985//ZZ return mkARMAModeV(reg, 0);
986//ZZ }
987//ZZ
988//ZZ }
989//ZZ
990//ZZ /* -------------------- AModeN -------------------- */
991//ZZ
992//ZZ static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
993//ZZ {
994//ZZ return iselIntExpr_AModeN_wrk(env, e);
995//ZZ }
996//ZZ
997//ZZ static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
998//ZZ {
999//ZZ HReg reg = iselIntExpr_R(env, e);
1000//ZZ return mkARMAModeN_R(reg);
1001//ZZ }
1002//ZZ
1003//ZZ
1004//ZZ /* --------------------- RI84 --------------------- */
1005//ZZ
1006//ZZ /* Select instructions to generate 'e' into a RI84. If mayInv is
1007//ZZ true, then the caller will also accept an I84 form that denotes
1008//ZZ 'not e'. In this case didInv may not be NULL, and *didInv is set
1009//ZZ to True. This complication is so as to allow generation of an RI84
1010//ZZ which is suitable for use in either an AND or BIC instruction,
1011//ZZ without knowing (before this call) which one.
1012//ZZ */
1013//ZZ static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
1014//ZZ ISelEnv* env, IRExpr* e )
1015//ZZ {
1016//ZZ ARMRI84* ri;
1017//ZZ if (mayInv)
1018//ZZ vassert(didInv != NULL);
1019//ZZ ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
1020//ZZ /* sanity checks ... */
1021//ZZ switch (ri->tag) {
1022//ZZ case ARMri84_I84:
1023//ZZ return ri;
1024//ZZ case ARMri84_R:
1025//ZZ vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
1026//ZZ vassert(hregIsVirtual(ri->ARMri84.R.reg));
1027//ZZ return ri;
1028//ZZ default:
1029//ZZ vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
1030//ZZ }
1031//ZZ }
1032//ZZ
1033//ZZ /* DO NOT CALL THIS DIRECTLY ! */
1034//ZZ static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
1035//ZZ ISelEnv* env, IRExpr* e )
1036//ZZ {
1037//ZZ IRType ty = typeOfIRExpr(env->type_env,e);
1038//ZZ vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1039//ZZ
1040//ZZ if (didInv) *didInv = False;
1041//ZZ
1042//ZZ /* special case: immediate */
1043//ZZ if (e->tag == Iex_Const) {
1044//ZZ UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
1045//ZZ switch (e->Iex.Const.con->tag) {
1046//ZZ case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1047//ZZ case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1048//ZZ case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1049//ZZ default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
1050//ZZ }
1051//ZZ if (fitsIn8x4(&u8, &u4, u)) {
1052//ZZ return ARMRI84_I84( (UShort)u8, (UShort)u4 );
1053//ZZ }
1054//ZZ if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
1055//ZZ vassert(didInv);
1056//ZZ *didInv = True;
1057//ZZ return ARMRI84_I84( (UShort)u8, (UShort)u4 );
1058//ZZ }
1059//ZZ /* else fail, fall through to default case */
1060//ZZ }
1061//ZZ
1062//ZZ /* default case: calculate into a register and return that */
1063//ZZ {
1064//ZZ HReg r = iselIntExpr_R ( env, e );
1065//ZZ return ARMRI84_R(r);
1066//ZZ }
1067//ZZ }
1068
1069
1070/* --------------------- RIA --------------------- */
1071
1072/* Select instructions to generate 'e' into a RIA. */
1073
1074static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e )
1075{
1076 ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e);
1077 /* sanity checks ... */
1078 switch (ri->tag) {
1079 case ARM64riA_I12:
1080 vassert(ri->ARM64riA.I12.imm12 < 4096);
1081 vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12);
1082 return ri;
1083 case ARM64riA_R:
1084 vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64);
1085 vassert(hregIsVirtual(ri->ARM64riA.R.reg));
1086 return ri;
1087 default:
1088 vpanic("iselIntExpr_RIA: unknown arm RIA tag");
1089 }
1090}
1091
1092/* DO NOT CALL THIS DIRECTLY ! */
1093static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e )
1094{
1095 IRType ty = typeOfIRExpr(env->type_env,e);
1096 vassert(ty == Ity_I64 || ty == Ity_I32);
1097
1098 /* special case: immediate */
1099 if (e->tag == Iex_Const) {
1100 ULong u = 0xF000000ULL; /* invalid */
1101 switch (e->Iex.Const.con->tag) {
1102 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
1103 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1104 default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)");
1105 }
1106 if (0 == (u & ~(0xFFFULL << 0)))
1107 return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0);
1108 if (0 == (u & ~(0xFFFULL << 12)))
1109 return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12);
1110 /* else fail, fall through to default case */
1111 }
1112
1113 /* default case: calculate into a register and return that */
1114 {
1115 HReg r = iselIntExpr_R ( env, e );
1116 return ARM64RIA_R(r);
1117 }
1118}
1119
1120
1121/* --------------------- RIL --------------------- */
1122
1123/* Select instructions to generate 'e' into a RIL. At this point we
1124 have to deal with the strange bitfield-immediate encoding for logic
1125 instructions. */
1126
1127
1128// The following four functions
1129// CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical
1130// are copied, with modifications, from
1131// https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc
1132// which has the following copyright notice:
1133/*
1134 Copyright 2013, ARM Limited
1135 All rights reserved.
1136
1137 Redistribution and use in source and binary forms, with or without
1138 modification, are permitted provided that the following conditions are met:
1139
1140 * Redistributions of source code must retain the above copyright notice,
1141 this list of conditions and the following disclaimer.
1142 * Redistributions in binary form must reproduce the above copyright notice,
1143 this list of conditions and the following disclaimer in the documentation
1144 and/or other materials provided with the distribution.
1145 * Neither the name of ARM Limited nor the names of its contributors may be
1146 used to endorse or promote products derived from this software without
1147 specific prior written permission.
1148
1149 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
1150 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1151 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1152 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
1153 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1154 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1155 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
1156 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
1157 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1158 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1159*/
1160
1161static Int CountLeadingZeros(ULong value, Int width)
1162{
1163 vassert(width == 32 || width == 64);
1164 Int count = 0;
1165 ULong bit_test = 1ULL << (width - 1);
1166 while ((count < width) && ((bit_test & value) == 0)) {
1167 count++;
1168 bit_test >>= 1;
1169 }
1170 return count;
1171}
1172
1173static Int CountTrailingZeros(ULong value, Int width)
1174{
1175 vassert(width == 32 || width == 64);
1176 Int count = 0;
1177 while ((count < width) && (((value >> count) & 1) == 0)) {
1178 count++;
1179 }
1180 return count;
1181}
1182
1183static Int CountSetBits(ULong value, Int width)
1184{
1185 // TODO: Other widths could be added here, as the implementation already
1186 // supports them.
1187 vassert(width == 32 || width == 64);
1188
1189 // Mask out unused bits to ensure that they are not counted.
1190 value &= (0xffffffffffffffffULL >> (64-width));
1191
1192 // Add up the set bits.
1193 // The algorithm works by adding pairs of bit fields together iteratively,
1194 // where the size of each bit field doubles each time.
1195 // An example for an 8-bit value:
1196 // Bits: h g f e d c b a
1197 // \ | \ | \ | \ |
1198 // value = h+g f+e d+c b+a
1199 // \ | \ |
1200 // value = h+g+f+e d+c+b+a
1201 // \ |
1202 // value = h+g+f+e+d+c+b+a
sewardjaeeb31d2014-01-12 18:23:45 +00001203 value = ((value >> 1) & 0x5555555555555555ULL)
1204 + (value & 0x5555555555555555ULL);
1205 value = ((value >> 2) & 0x3333333333333333ULL)
1206 + (value & 0x3333333333333333ULL);
1207 value = ((value >> 4) & 0x0f0f0f0f0f0f0f0fULL)
1208 + (value & 0x0f0f0f0f0f0f0f0fULL);
1209 value = ((value >> 8) & 0x00ff00ff00ff00ffULL)
1210 + (value & 0x00ff00ff00ff00ffULL);
1211 value = ((value >> 16) & 0x0000ffff0000ffffULL)
1212 + (value & 0x0000ffff0000ffffULL);
1213 value = ((value >> 32) & 0x00000000ffffffffULL)
1214 + (value & 0x00000000ffffffffULL);
sewardjbbcf1882014-01-12 12:49:10 +00001215
1216 return value;
1217}
1218
1219static Bool isImmLogical ( /*OUT*/UInt* n,
1220 /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r,
1221 ULong value, UInt width )
1222{
1223 // Test if a given value can be encoded in the immediate field of a
1224 // logical instruction.
1225
1226 // If it can be encoded, the function returns true, and values
1227 // pointed to by n, imm_s and imm_r are updated with immediates
1228 // encoded in the format required by the corresponding fields in the
1229 // logical instruction. If it can not be encoded, the function
1230 // returns false, and the values pointed to by n, imm_s and imm_r
1231 // are undefined.
1232 vassert(n != NULL && imm_s != NULL && imm_r != NULL);
1233 vassert(width == 32 || width == 64);
1234
1235 // Logical immediates are encoded using parameters n, imm_s and imm_r using
1236 // the following table:
1237 //
1238 // N imms immr size S R
1239 // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
1240 // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
1241 // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
1242 // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
1243 // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
1244 // 0 11110s xxxxxr 2 UInt(s) UInt(r)
1245 // (s bits must not be all set)
1246 //
1247 // A pattern is constructed of size bits, where the least significant S+1
1248 // bits are set. The pattern is rotated right by R, and repeated across a
1249 // 32 or 64-bit value, depending on destination register width.
1250 //
1251 // To test if an arbitrary immediate can be encoded using this scheme, an
1252 // iterative algorithm is used.
1253 //
1254 // TODO: This code does not consider using X/W register overlap to support
1255 // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits
1256 // are an encodable logical immediate.
1257
1258 // 1. If the value has all set or all clear bits, it can't be encoded.
1259 if ((value == 0) || (value == 0xffffffffffffffffULL) ||
1260 ((width == 32) && (value == 0xffffffff))) {
1261 return False;
1262 }
1263
1264 UInt lead_zero = CountLeadingZeros(value, width);
1265 UInt lead_one = CountLeadingZeros(~value, width);
1266 UInt trail_zero = CountTrailingZeros(value, width);
1267 UInt trail_one = CountTrailingZeros(~value, width);
1268 UInt set_bits = CountSetBits(value, width);
1269
1270 // The fixed bits in the immediate s field.
1271 // If width == 64 (X reg), start at 0xFFFFFF80.
1272 // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
1273 // widths won't be executed.
1274 Int imm_s_fixed = (width == 64) ? -128 : -64;
1275 Int imm_s_mask = 0x3F;
1276
1277 for (;;) {
1278 // 2. If the value is two bits wide, it can be encoded.
1279 if (width == 2) {
1280 *n = 0;
1281 *imm_s = 0x3C;
1282 *imm_r = (value & 3) - 1;
1283 return True;
1284 }
1285
1286 *n = (width == 64) ? 1 : 0;
1287 *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
1288 if ((lead_zero + set_bits) == width) {
1289 *imm_r = 0;
1290 } else {
1291 *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
1292 }
1293
1294 // 3. If the sum of leading zeros, trailing zeros and set bits is equal to
1295 // the bit width of the value, it can be encoded.
1296 if (lead_zero + trail_zero + set_bits == width) {
1297 return True;
1298 }
1299
1300 // 4. If the sum of leading ones, trailing ones and unset bits in the
1301 // value is equal to the bit width of the value, it can be encoded.
1302 if (lead_one + trail_one + (width - set_bits) == width) {
1303 return True;
1304 }
1305
1306 // 5. If the most-significant half of the bitwise value is equal to the
1307 // least-significant half, return to step 2 using the least-significant
1308 // half of the value.
1309 ULong mask = (1ULL << (width >> 1)) - 1;
1310 if ((value & mask) == ((value >> (width >> 1)) & mask)) {
1311 width >>= 1;
1312 set_bits >>= 1;
1313 imm_s_fixed >>= 1;
1314 continue;
1315 }
1316
1317 // 6. Otherwise, the value can't be encoded.
1318 return False;
1319 }
1320}
1321
1322
1323/* Create a RIL for the given immediate, if it is representable, or
1324 return NULL if not. */
1325
1326static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 )
1327{
1328 UInt n = 0, imm_s = 0, imm_r = 0;
1329 Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64);
1330 if (!ok) return NULL;
1331 vassert(n < 2 && imm_s < 64 && imm_r < 64);
1332 return ARM64RIL_I13(n, imm_r, imm_s);
1333}
1334
1335/* So, finally .. */
1336
1337static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e )
1338{
1339 ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e);
1340 /* sanity checks ... */
1341 switch (ri->tag) {
1342 case ARM64riL_I13:
1343 vassert(ri->ARM64riL.I13.bitN < 2);
1344 vassert(ri->ARM64riL.I13.immR < 64);
1345 vassert(ri->ARM64riL.I13.immS < 64);
1346 return ri;
1347 case ARM64riL_R:
1348 vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64);
1349 vassert(hregIsVirtual(ri->ARM64riL.R.reg));
1350 return ri;
1351 default:
1352 vpanic("iselIntExpr_RIL: unknown arm RIL tag");
1353 }
1354}
1355
1356/* DO NOT CALL THIS DIRECTLY ! */
1357static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e )
1358{
1359 IRType ty = typeOfIRExpr(env->type_env,e);
1360 vassert(ty == Ity_I64 || ty == Ity_I32);
1361
1362 /* special case: immediate */
1363 if (e->tag == Iex_Const) {
1364 ARM64RIL* maybe = NULL;
1365 if (ty == Ity_I64) {
1366 vassert(e->Iex.Const.con->tag == Ico_U64);
1367 maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64);
1368 } else {
1369 vassert(ty == Ity_I32);
1370 vassert(e->Iex.Const.con->tag == Ico_U32);
1371 UInt u32 = e->Iex.Const.con->Ico.U32;
1372 ULong u64 = (ULong)u32;
1373 /* First try with 32 leading zeroes. */
1374 maybe = mb_mkARM64RIL_I(u64);
1375 /* If that doesn't work, try with 2 copies, since it doesn't
1376 matter what winds up in the upper 32 bits. */
1377 if (!maybe) {
1378 maybe = mb_mkARM64RIL_I((u64 << 32) | u64);
1379 }
1380 }
1381 if (maybe) return maybe;
1382 /* else fail, fall through to default case */
1383 }
1384
1385 /* default case: calculate into a register and return that */
1386 {
1387 HReg r = iselIntExpr_R ( env, e );
1388 return ARM64RIL_R(r);
1389 }
1390}
1391
1392
1393/* --------------------- RI6 --------------------- */
1394
1395/* Select instructions to generate 'e' into a RI6. */
1396
1397static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e )
1398{
1399 ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e);
1400 /* sanity checks ... */
1401 switch (ri->tag) {
1402 case ARM64ri6_I6:
1403 vassert(ri->ARM64ri6.I6.imm6 < 64);
1404 vassert(ri->ARM64ri6.I6.imm6 > 0);
1405 return ri;
1406 case ARM64ri6_R:
1407 vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64);
1408 vassert(hregIsVirtual(ri->ARM64ri6.R.reg));
1409 return ri;
1410 default:
1411 vpanic("iselIntExpr_RI6: unknown arm RI6 tag");
1412 }
1413}
1414
1415/* DO NOT CALL THIS DIRECTLY ! */
1416static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e )
1417{
1418 IRType ty = typeOfIRExpr(env->type_env,e);
1419 vassert(ty == Ity_I64 || ty == Ity_I8);
1420
1421 /* special case: immediate */
1422 if (e->tag == Iex_Const) {
1423 switch (e->Iex.Const.con->tag) {
1424 case Ico_U8: {
1425 UInt u = e->Iex.Const.con->Ico.U8;
1426 if (u > 0 && u < 64)
1427 return ARM64RI6_I6(u);
1428 break;
1429 default:
1430 break;
1431 }
1432 }
1433 /* else fail, fall through to default case */
1434 }
1435
1436 /* default case: calculate into a register and return that */
1437 {
1438 HReg r = iselIntExpr_R ( env, e );
1439 return ARM64RI6_R(r);
1440 }
1441}
1442
1443
1444/* ------------------- CondCode ------------------- */
1445
1446/* Generate code to evaluated a bit-typed expression, returning the
1447 condition code which would correspond when the expression would
1448 notionally have returned 1. */
1449
1450static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1451{
1452 ARM64CondCode cc = iselCondCode_wrk(env,e);
1453 vassert(cc != ARM64cc_NV);
1454 return cc;
1455}
1456
1457static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1458{
1459 vassert(e);
1460 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1461
1462 /* var */
1463 if (e->tag == Iex_RdTmp) {
1464 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1465 /* Cmp doesn't modify rTmp; so this is OK. */
1466 ARM64RIL* one = mb_mkARM64RIL_I(1);
1467 vassert(one);
1468 addInstr(env, ARM64Instr_Test(rTmp, one));
1469 return ARM64cc_NE;
1470 }
1471
1472 /* Not1(e) */
1473 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1474 /* Generate code for the arg, and negate the test condition */
1475 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1476 if (cc == ARM64cc_AL || cc == ARM64cc_NV) {
1477 return ARM64cc_AL;
1478 } else {
1479 return 1 ^ cc;
1480 }
1481 }
1482
1483 /* --- patterns rooted at: 64to1 --- */
1484
1485 if (e->tag == Iex_Unop
1486 && e->Iex.Unop.op == Iop_64to1) {
1487 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1488 ARM64RIL* one = mb_mkARM64RIL_I(1);
1489 vassert(one); /* '1' must be representable */
1490 addInstr(env, ARM64Instr_Test(rTmp, one));
1491 return ARM64cc_NE;
1492 }
1493
1494 /* --- patterns rooted at: CmpNEZ8 --- */
1495
1496 if (e->tag == Iex_Unop
1497 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1498 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1499 ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF);
1500 addInstr(env, ARM64Instr_Test(r1, xFF));
1501 return ARM64cc_NE;
1502 }
1503
1504 /* --- patterns rooted at: CmpNEZ64 --- */
1505
1506 if (e->tag == Iex_Unop
1507 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1508 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1509 ARM64RIA* zero = ARM64RIA_I12(0,0);
1510 addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/));
1511 return ARM64cc_NE;
1512 }
1513
1514 /* --- patterns rooted at: CmpNEZ32 --- */
1515
1516 if (e->tag == Iex_Unop
1517 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1518 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1519 ARM64RIA* zero = ARM64RIA_I12(0,0);
1520 addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/));
1521 return ARM64cc_NE;
1522 }
1523
1524 /* --- Cmp*64*(x,y) --- */
1525 if (e->tag == Iex_Binop
1526 && (e->Iex.Binop.op == Iop_CmpEQ64
1527 || e->Iex.Binop.op == Iop_CmpNE64
1528 || e->Iex.Binop.op == Iop_CmpLT64S
1529 || e->Iex.Binop.op == Iop_CmpLT64U
1530 || e->Iex.Binop.op == Iop_CmpLE64S
1531 || e->Iex.Binop.op == Iop_CmpLE64U)) {
1532 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1533 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1534 addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/));
1535 switch (e->Iex.Binop.op) {
1536 case Iop_CmpEQ64: return ARM64cc_EQ;
1537 case Iop_CmpNE64: return ARM64cc_NE;
1538 case Iop_CmpLT64S: return ARM64cc_LT;
1539 case Iop_CmpLT64U: return ARM64cc_CC;
1540 case Iop_CmpLE64S: return ARM64cc_LE;
1541 case Iop_CmpLE64U: return ARM64cc_LS;
1542 default: vpanic("iselCondCode(arm64): CmpXX64");
1543 }
1544 }
1545
1546 /* --- Cmp*32*(x,y) --- */
1547 if (e->tag == Iex_Binop
1548 && (e->Iex.Binop.op == Iop_CmpEQ32
1549 || e->Iex.Binop.op == Iop_CmpNE32
1550 || e->Iex.Binop.op == Iop_CmpLT32S
1551 || e->Iex.Binop.op == Iop_CmpLT32U
1552 || e->Iex.Binop.op == Iop_CmpLE32S
1553 || e->Iex.Binop.op == Iop_CmpLE32U)) {
1554 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1555 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1556 addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/));
1557 switch (e->Iex.Binop.op) {
1558 case Iop_CmpEQ32: return ARM64cc_EQ;
1559 case Iop_CmpNE32: return ARM64cc_NE;
1560 case Iop_CmpLT32S: return ARM64cc_LT;
1561 case Iop_CmpLT32U: return ARM64cc_CC;
1562 case Iop_CmpLE32S: return ARM64cc_LE;
1563 case Iop_CmpLE32U: return ARM64cc_LS;
1564 default: vpanic("iselCondCode(arm64): CmpXX32");
1565 }
1566 }
1567
1568//ZZ /* const */
1569//ZZ /* Constant 1:Bit */
1570//ZZ if (e->tag == Iex_Const) {
1571//ZZ HReg r;
1572//ZZ vassert(e->Iex.Const.con->tag == Ico_U1);
1573//ZZ vassert(e->Iex.Const.con->Ico.U1 == True
1574//ZZ || e->Iex.Const.con->Ico.U1 == False);
1575//ZZ r = newVRegI(env);
1576//ZZ addInstr(env, ARMInstr_Imm32(r, 0));
1577//ZZ addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r, ARMRI84_R(r)));
1578//ZZ return e->Iex.Const.con->Ico.U1 ? ARMcc_EQ : ARMcc_NE;
1579//ZZ }
1580//ZZ
1581//ZZ // JRS 2013-Jan-03: this seems completely nonsensical
1582//ZZ /* --- CasCmpEQ* --- */
1583//ZZ /* Ist_Cas has a dummy argument to compare with, so comparison is
1584//ZZ always true. */
1585//ZZ //if (e->tag == Iex_Binop
1586//ZZ // && (e->Iex.Binop.op == Iop_CasCmpEQ32
1587//ZZ // || e->Iex.Binop.op == Iop_CasCmpEQ16
1588//ZZ // || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1589//ZZ // return ARMcc_AL;
1590//ZZ //}
1591
1592 ppIRExpr(e);
1593 vpanic("iselCondCode");
1594}
1595
1596
1597/* --------------------- Reg --------------------- */
1598
1599static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1600{
1601 HReg r = iselIntExpr_R_wrk(env, e);
1602 /* sanity checks ... */
1603# if 0
1604 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1605# endif
1606 vassert(hregClass(r) == HRcInt64);
1607 vassert(hregIsVirtual(r));
1608 return r;
1609}
1610
1611/* DO NOT CALL THIS DIRECTLY ! */
1612static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1613{
1614 IRType ty = typeOfIRExpr(env->type_env,e);
1615 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1616
1617 switch (e->tag) {
1618
1619 /* --------- TEMP --------- */
1620 case Iex_RdTmp: {
1621 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1622 }
1623
1624 /* --------- LOAD --------- */
1625 case Iex_Load: {
1626 HReg dst = newVRegI(env);
1627
1628 if (e->Iex.Load.end != Iend_LE)
1629 goto irreducible;
1630
1631 if (ty == Ity_I64) {
1632 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1633 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode));
1634 return dst;
1635 }
1636 if (ty == Ity_I32) {
1637 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1638 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode));
1639 return dst;
1640 }
1641 if (ty == Ity_I16) {
1642 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1643 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode));
1644 return dst;
1645 }
1646 if (ty == Ity_I8) {
1647 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1648 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode));
1649 return dst;
1650 }
1651 break;
1652 }
1653
1654 /* --------- BINARY OP --------- */
1655 case Iex_Binop: {
1656
1657 ARM64LogicOp lop = 0; /* invalid */
1658 ARM64ShiftOp sop = 0; /* invalid */
1659
1660 /* Special-case 0-x into a Neg instruction. Not because it's
1661 particularly useful but more so as to give value flow using
1662 this instruction, so as to check its assembly correctness for
1663 implementation of Left32/Left64. */
1664 switch (e->Iex.Binop.op) {
1665 case Iop_Sub64:
1666 if (isZeroU64(e->Iex.Binop.arg1)) {
1667 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1668 HReg dst = newVRegI(env);
1669 addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG));
1670 return dst;
1671 }
1672 break;
1673 default:
1674 break;
1675 }
1676
1677 /* ADD/SUB */
1678 switch (e->Iex.Binop.op) {
1679 case Iop_Add64: case Iop_Add32:
1680 case Iop_Sub64: case Iop_Sub32: {
1681 Bool isAdd = e->Iex.Binop.op == Iop_Add64
1682 || e->Iex.Binop.op == Iop_Add32;
1683 HReg dst = newVRegI(env);
1684 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1685 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1686 addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd));
1687 return dst;
1688 }
1689 default:
1690 break;
1691 }
1692
1693 /* AND/OR/XOR */
1694 switch (e->Iex.Binop.op) {
1695 case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop;
1696 case Iop_Or64: case Iop_Or32: lop = ARM64lo_OR; goto log_binop;
1697 case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop;
1698 log_binop: {
1699 HReg dst = newVRegI(env);
1700 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1701 ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2);
1702 addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop));
1703 return dst;
1704 }
1705 default:
1706 break;
1707 }
1708
1709 /* SHL/SHR/SAR */
1710 switch (e->Iex.Binop.op) {
1711 case Iop_Shr64: sop = ARM64sh_SHR; goto sh_binop;
1712 case Iop_Sar64: sop = ARM64sh_SAR; goto sh_binop;
1713 case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop;
1714 sh_binop: {
1715 HReg dst = newVRegI(env);
1716 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1717 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1718 addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop));
1719 return dst;
1720 }
1721 case Iop_Shr32:
1722 case Iop_Sar32: {
1723 Bool zx = e->Iex.Binop.op == Iop_Shr32;
1724 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1725 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1726 HReg dst = zx ? widen_z_32_to_64(env, argL)
1727 : widen_s_32_to_64(env, argL);
1728 addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR));
1729 return dst;
1730 }
1731 default: break;
1732 }
1733
1734 /* MUL */
1735 if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) {
1736 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1737 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1738 HReg dst = newVRegI(env);
1739 addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN));
1740 return dst;
1741 }
1742
1743 /* MULL */
1744 if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) {
1745 Bool isS = e->Iex.Binop.op == Iop_MullS32;
1746 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1747 HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL);
1748 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1749 HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR);
1750 HReg dst = newVRegI(env);
1751 addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN));
1752 return dst;
1753 }
1754
1755 /* Handle misc other ops. */
1756
1757//ZZ if (e->Iex.Binop.op == Iop_Max32U) {
1758//ZZ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1759//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1760//ZZ HReg dst = newVRegI(env);
1761//ZZ addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
1762//ZZ ARMRI84_R(argR)));
1763//ZZ addInstr(env, mk_iMOVds_RR(dst, argL));
1764//ZZ addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
1765//ZZ return dst;
1766//ZZ }
1767
1768 if (e->Iex.Binop.op == Iop_32HLto64) {
1769 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1770 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1771 HReg lo32 = widen_z_32_to_64(env, lo32s);
1772 HReg hi32 = newVRegI(env);
1773 addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32),
1774 ARM64sh_SHL));
1775 addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32),
1776 ARM64lo_OR));
1777 return hi32;
1778 }
1779
1780 if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32) {
1781 Bool isD = e->Iex.Binop.op == Iop_CmpF64;
1782 HReg dL = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg1);
1783 HReg dR = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg2);
1784 HReg dst = newVRegI(env);
1785 HReg imm = newVRegI(env);
1786 /* Do the compare (FCMP), which sets NZCV in PSTATE. Then
1787 create in dst, the IRCmpF64Result encoded result. */
1788 addInstr(env, (isD ? ARM64Instr_VCmpD : ARM64Instr_VCmpS)(dL, dR));
1789 addInstr(env, ARM64Instr_Imm64(dst, 0));
1790 addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ
1791 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ));
1792 addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT
1793 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI));
1794 addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT
1795 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT));
1796 addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN
1797 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS));
1798 return dst;
1799 }
1800
1801 { /* local scope */
1802 ARM64CvtOp cvt_op = ARM64cvt_INVALID;
1803 Bool srcIsD = False;
1804 switch (e->Iex.Binop.op) {
1805 case Iop_F64toI64S:
1806 cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break;
1807 case Iop_F64toI64U:
1808 cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break;
1809 case Iop_F64toI32S:
1810 cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break;
1811 case Iop_F64toI32U:
1812 cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break;
1813 case Iop_F32toI32S:
1814 cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break;
1815 case Iop_F32toI64U:
1816 cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break;
1817 default:
1818 break;
1819 }
1820 if (cvt_op != ARM64cvt_INVALID) {
1821 /* This is all a bit dodgy, because we can't handle a
1822 non-constant (not-known-at-JIT-time) rounding mode
1823 indication. That's because there's no instruction
1824 AFAICS that does this conversion but rounds according to
1825 FPCR.RM, so we have to bake the rounding mode into the
1826 instruction right now. But that should be OK because
1827 (1) the front end attaches a literal Irrm_ value to the
1828 conversion binop, and (2) iropt will never float that
1829 off via CSE, into a literal. Hence we should always
1830 have an Irrm_ value as the first arg. */
1831 IRExpr* arg1 = e->Iex.Binop.arg1;
1832 if (arg1->tag != Iex_Const) goto irreducible;
1833 IRConst* arg1con = arg1->Iex.Const.con;
1834 vassert(arg1con->tag == Ico_U32); // else ill-typed IR
1835 UInt irrm = arg1con->Ico.U32;
1836 /* Find the ARM-encoded equivalent for |irrm|. */
1837 UInt armrm = 4; /* impossible */
1838 switch (irrm) {
1839 case Irrm_NEAREST: armrm = 0; break;
1840 case Irrm_NegINF: armrm = 2; break;
1841 case Irrm_PosINF: armrm = 1; break;
1842 case Irrm_ZERO: armrm = 3; break;
1843 default: goto irreducible;
1844 }
1845 HReg src = (srcIsD ? iselDblExpr : iselFltExpr)
1846 (env, e->Iex.Binop.arg2);
1847 HReg dst = newVRegI(env);
1848 addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm));
1849 return dst;
1850 }
1851 } /* local scope */
1852
1853//ZZ if (e->Iex.Binop.op == Iop_GetElem8x8
1854//ZZ || e->Iex.Binop.op == Iop_GetElem16x4
1855//ZZ || e->Iex.Binop.op == Iop_GetElem32x2) {
1856//ZZ HReg res = newVRegI(env);
1857//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
1858//ZZ UInt index, size;
1859//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
1860//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1861//ZZ vpanic("ARM target supports GetElem with constant "
1862//ZZ "second argument only\n");
1863//ZZ }
1864//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1865//ZZ switch (e->Iex.Binop.op) {
1866//ZZ case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1867//ZZ case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1868//ZZ case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1869//ZZ default: vassert(0);
1870//ZZ }
1871//ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1872//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
1873//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
1874//ZZ size, False));
1875//ZZ return res;
1876//ZZ }
1877//ZZ
1878//ZZ if (e->Iex.Binop.op == Iop_GetElem8x16
1879//ZZ || e->Iex.Binop.op == Iop_GetElem16x8
1880//ZZ || e->Iex.Binop.op == Iop_GetElem32x4) {
1881//ZZ HReg res = newVRegI(env);
1882//ZZ HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
1883//ZZ UInt index, size;
1884//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
1885//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1886//ZZ vpanic("ARM target supports GetElem with constant "
1887//ZZ "second argument only\n");
1888//ZZ }
1889//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1890//ZZ switch (e->Iex.Binop.op) {
1891//ZZ case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1892//ZZ case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1893//ZZ case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1894//ZZ default: vassert(0);
1895//ZZ }
1896//ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1897//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
1898//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
1899//ZZ size, True));
1900//ZZ return res;
1901//ZZ }
1902
1903 /* All cases involving host-side helper calls. */
1904 void* fn = NULL;
1905 switch (e->Iex.Binop.op) {
1906//ZZ case Iop_Add16x2:
1907//ZZ fn = &h_generic_calc_Add16x2; break;
1908//ZZ case Iop_Sub16x2:
1909//ZZ fn = &h_generic_calc_Sub16x2; break;
1910//ZZ case Iop_HAdd16Ux2:
1911//ZZ fn = &h_generic_calc_HAdd16Ux2; break;
1912//ZZ case Iop_HAdd16Sx2:
1913//ZZ fn = &h_generic_calc_HAdd16Sx2; break;
1914//ZZ case Iop_HSub16Ux2:
1915//ZZ fn = &h_generic_calc_HSub16Ux2; break;
1916//ZZ case Iop_HSub16Sx2:
1917//ZZ fn = &h_generic_calc_HSub16Sx2; break;
1918//ZZ case Iop_QAdd16Sx2:
1919//ZZ fn = &h_generic_calc_QAdd16Sx2; break;
1920//ZZ case Iop_QAdd16Ux2:
1921//ZZ fn = &h_generic_calc_QAdd16Ux2; break;
1922//ZZ case Iop_QSub16Sx2:
1923//ZZ fn = &h_generic_calc_QSub16Sx2; break;
1924//ZZ case Iop_Add8x4:
1925//ZZ fn = &h_generic_calc_Add8x4; break;
1926//ZZ case Iop_Sub8x4:
1927//ZZ fn = &h_generic_calc_Sub8x4; break;
1928//ZZ case Iop_HAdd8Ux4:
1929//ZZ fn = &h_generic_calc_HAdd8Ux4; break;
1930//ZZ case Iop_HAdd8Sx4:
1931//ZZ fn = &h_generic_calc_HAdd8Sx4; break;
1932//ZZ case Iop_HSub8Ux4:
1933//ZZ fn = &h_generic_calc_HSub8Ux4; break;
1934//ZZ case Iop_HSub8Sx4:
1935//ZZ fn = &h_generic_calc_HSub8Sx4; break;
1936//ZZ case Iop_QAdd8Sx4:
1937//ZZ fn = &h_generic_calc_QAdd8Sx4; break;
1938//ZZ case Iop_QAdd8Ux4:
1939//ZZ fn = &h_generic_calc_QAdd8Ux4; break;
1940//ZZ case Iop_QSub8Sx4:
1941//ZZ fn = &h_generic_calc_QSub8Sx4; break;
1942//ZZ case Iop_QSub8Ux4:
1943//ZZ fn = &h_generic_calc_QSub8Ux4; break;
1944//ZZ case Iop_Sad8Ux4:
1945//ZZ fn = &h_generic_calc_Sad8Ux4; break;
1946//ZZ case Iop_QAdd32S:
1947//ZZ fn = &h_generic_calc_QAdd32S; break;
1948//ZZ case Iop_QSub32S:
1949//ZZ fn = &h_generic_calc_QSub32S; break;
1950//ZZ case Iop_QSub16Ux2:
1951//ZZ fn = &h_generic_calc_QSub16Ux2; break;
1952 case Iop_DivU32:
1953 fn = &h_calc_udiv32_w_arm_semantics; break;
1954 case Iop_DivS32:
1955 fn = &h_calc_sdiv32_w_arm_semantics; break;
1956 case Iop_DivU64:
1957 fn = &h_calc_udiv64_w_arm_semantics; break;
1958 case Iop_DivS64:
1959 fn = &h_calc_sdiv64_w_arm_semantics; break;
1960 default:
1961 break;
1962 }
1963
1964 if (fn) {
1965 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1966 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1967 HReg res = newVRegI(env);
1968 addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL));
1969 addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR));
1970 addInstr(env, ARM64Instr_Call( ARM64cc_AL, (HWord)Ptr_to_ULong(fn),
1971 2, mk_RetLoc_simple(RLPri_Int) ));
1972 addInstr(env, ARM64Instr_MovI(res, hregARM64_X0()));
1973 return res;
1974 }
1975
1976 break;
1977 }
1978
1979 /* --------- UNARY OP --------- */
1980 case Iex_Unop: {
1981
1982 switch (e->Iex.Unop.op) {
1983 case Iop_16Uto64: {
1984 /* This probably doesn't occur often enough to be worth
1985 rolling the extension into the load. */
1986 IRExpr* arg = e->Iex.Unop.arg;
1987 HReg src = iselIntExpr_R(env, arg);
1988 HReg dst = widen_z_16_to_64(env, src);
1989 return dst;
1990 }
1991 case Iop_32Uto64: {
1992 IRExpr* arg = e->Iex.Unop.arg;
1993 if (arg->tag == Iex_Load) {
1994 /* This correctly zero extends because _LdSt32 is
1995 defined to do a zero extending load. */
1996 HReg dst = newVRegI(env);
1997 ARM64AMode* am
1998 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32);
1999 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
2000 return dst;
2001 }
2002 /* else be lame and mask it */
2003 HReg src = iselIntExpr_R(env, arg);
2004 HReg dst = widen_z_32_to_64(env, src);
2005 return dst;
2006 }
2007 case Iop_8Uto32: /* Just freeload on the 8Uto64 case */
2008 case Iop_8Uto64: {
2009 IRExpr* arg = e->Iex.Unop.arg;
2010 if (arg->tag == Iex_Load) {
2011 /* This correctly zero extends because _LdSt8 is
2012 defined to do a zero extending load. */
2013 HReg dst = newVRegI(env);
2014 ARM64AMode* am
2015 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8);
2016 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
2017 return dst;
2018 }
2019 /* else be lame and mask it */
2020 HReg src = iselIntExpr_R(env, arg);
2021 HReg dst = widen_z_8_to_64(env, src);
2022 return dst;
2023 }
2024 case Iop_128HIto64: {
2025 HReg rHi, rLo;
2026 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2027 return rHi; /* and abandon rLo */
2028 }
2029 case Iop_8Sto32: case Iop_8Sto64: {
2030 IRExpr* arg = e->Iex.Unop.arg;
2031 HReg src = iselIntExpr_R(env, arg);
2032 HReg dst = widen_s_8_to_64(env, src);
2033 return dst;
2034 }
2035 case Iop_16Sto32: case Iop_16Sto64: {
2036 IRExpr* arg = e->Iex.Unop.arg;
2037 HReg src = iselIntExpr_R(env, arg);
2038 HReg dst = widen_s_16_to_64(env, src);
2039 return dst;
2040 }
2041 case Iop_32Sto64: {
2042 IRExpr* arg = e->Iex.Unop.arg;
2043 HReg src = iselIntExpr_R(env, arg);
2044 HReg dst = widen_s_32_to_64(env, src);
2045 return dst;
2046 }
2047 case Iop_Not32:
2048 case Iop_Not64: {
2049 HReg dst = newVRegI(env);
2050 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2051 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT));
2052 return dst;
2053 }
2054 case Iop_Clz64: {
2055 HReg dst = newVRegI(env);
2056 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2057 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ));
2058 return dst;
2059 }
2060 case Iop_Left32:
2061 case Iop_Left64: {
2062 /* Left64(src) = src | -src. Left32 can use the same
2063 implementation since in that case we don't care what
2064 the upper 32 bits become. */
2065 HReg dst = newVRegI(env);
2066 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2067 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2068 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2069 ARM64lo_OR));
2070 return dst;
2071 }
2072 case Iop_CmpwNEZ64: {
2073 /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1
2074 = Left64(src) >>s 63 */
2075 HReg dst = newVRegI(env);
2076 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2077 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2078 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2079 ARM64lo_OR));
2080 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2081 ARM64sh_SAR));
2082 return dst;
2083 }
2084 case Iop_CmpwNEZ32: {
2085 /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF)
2086 = Left64(src & 0xFFFFFFFF) >>s 63 */
2087 HReg dst = newVRegI(env);
2088 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
2089 HReg src = widen_z_32_to_64(env, pre);
2090 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2091 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2092 ARM64lo_OR));
2093 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2094 ARM64sh_SAR));
2095 return dst;
2096 }
2097 case Iop_V128to64: case Iop_V128HIto64: {
2098 HReg dst = newVRegI(env);
2099 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2100 UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0;
2101 addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo));
2102 return dst;
2103 }
2104 case Iop_1Sto32:
2105 case Iop_1Sto64: {
2106 /* As with the iselStmt case for 'tmp:I1 = expr', we could
2107 do a lot better here if it ever became necessary. */
2108 HReg zero = newVRegI(env);
2109 HReg one = newVRegI(env);
2110 HReg dst = newVRegI(env);
2111 addInstr(env, ARM64Instr_Imm64(zero, 0));
2112 addInstr(env, ARM64Instr_Imm64(one, 1));
2113 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
2114 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
2115 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2116 ARM64sh_SHL));
2117 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2118 ARM64sh_SAR));
2119 return dst;
2120 }
sewardj606c4ba2014-01-26 19:11:14 +00002121 case Iop_NarrowUn32to16x4:
2122 case Iop_NarrowUn64to32x2: {
2123 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2124 HReg tmp = newVRegV(env);
2125 HReg dst = newVRegI(env);
2126 UInt dszBlg2 = 3; /* illegal */
2127 switch (e->Iex.Unop.op) {
2128 case Iop_NarrowUn32to16x4: dszBlg2 = 1; break; // 32to16_x4
2129 case Iop_NarrowUn64to32x2: dszBlg2 = 2; break; // 64to32_x2
2130 default: vassert(0);
2131 }
2132 addInstr(env, ARM64Instr_VNarrowV(dszBlg2, tmp, src));
2133 addInstr(env, ARM64Instr_VXfromQ(dst, tmp, 0/*laneNo*/));
2134 return dst;
2135 }
sewardjbbcf1882014-01-12 12:49:10 +00002136//ZZ case Iop_64HIto32: {
2137//ZZ HReg rHi, rLo;
2138//ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2139//ZZ return rHi; /* and abandon rLo .. poor wee thing :-) */
2140//ZZ }
2141//ZZ case Iop_64to32: {
2142//ZZ HReg rHi, rLo;
2143//ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2144//ZZ return rLo; /* similar stupid comment to the above ... */
2145//ZZ }
2146//ZZ case Iop_64to8: {
2147//ZZ HReg rHi, rLo;
2148//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2149//ZZ HReg tHi = newVRegI(env);
2150//ZZ HReg tLo = newVRegI(env);
2151//ZZ HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
2152//ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2153//ZZ rHi = tHi;
2154//ZZ rLo = tLo;
2155//ZZ } else {
2156//ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2157//ZZ }
2158//ZZ return rLo;
2159//ZZ }
2160//ZZ
2161//ZZ case Iop_1Uto32:
2162//ZZ /* 1Uto32(tmp). Since I1 values generated into registers
2163//ZZ are guaranteed to have value either only zero or one,
2164//ZZ we can simply return the value of the register in this
2165//ZZ case. */
2166//ZZ if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
2167//ZZ HReg dst = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
2168//ZZ return dst;
2169//ZZ }
2170//ZZ /* else fall through */
2171//ZZ case Iop_1Uto8: {
2172//ZZ HReg dst = newVRegI(env);
2173//ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2174//ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2175//ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2176//ZZ return dst;
2177//ZZ }
2178//ZZ
2179//ZZ case Iop_1Sto32: {
2180//ZZ HReg dst = newVRegI(env);
2181//ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2182//ZZ ARMRI5* amt = ARMRI5_I5(31);
2183//ZZ /* This is really rough. We could do much better here;
2184//ZZ perhaps mvn{cond} dst, #0 as the second insn?
2185//ZZ (same applies to 1Sto64) */
2186//ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2187//ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2188//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2189//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2190//ZZ return dst;
2191//ZZ }
2192//ZZ
2193//ZZ case Iop_Clz32: {
2194//ZZ /* Count leading zeroes; easy on ARM. */
2195//ZZ HReg dst = newVRegI(env);
2196//ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2197//ZZ addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
2198//ZZ return dst;
2199//ZZ }
2200//ZZ
2201//ZZ case Iop_CmpwNEZ32: {
2202//ZZ HReg dst = newVRegI(env);
2203//ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2204//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
2205//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
2206//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
2207//ZZ return dst;
2208//ZZ }
2209//ZZ
2210//ZZ case Iop_ReinterpF32asI32: {
2211//ZZ HReg dst = newVRegI(env);
2212//ZZ HReg src = iselFltExpr(env, e->Iex.Unop.arg);
2213//ZZ addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
2214//ZZ return dst;
2215//ZZ }
2216
2217 case Iop_64to32:
2218 case Iop_64to16:
2219 case Iop_64to8:
2220 /* These are no-ops. */
2221 return iselIntExpr_R(env, e->Iex.Unop.arg);
2222
2223 default:
2224 break;
2225 }
2226
2227//ZZ /* All Unop cases involving host-side helper calls. */
2228//ZZ void* fn = NULL;
2229//ZZ switch (e->Iex.Unop.op) {
2230//ZZ case Iop_CmpNEZ16x2:
2231//ZZ fn = &h_generic_calc_CmpNEZ16x2; break;
2232//ZZ case Iop_CmpNEZ8x4:
2233//ZZ fn = &h_generic_calc_CmpNEZ8x4; break;
2234//ZZ default:
2235//ZZ break;
2236//ZZ }
2237//ZZ
2238//ZZ if (fn) {
2239//ZZ HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2240//ZZ HReg res = newVRegI(env);
2241//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
2242//ZZ addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn),
2243//ZZ 1, RetLocInt ));
2244//ZZ addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
2245//ZZ return res;
2246//ZZ }
2247
2248 break;
2249 }
2250
2251 /* --------- GET --------- */
2252 case Iex_Get: {
2253 if (ty == Ity_I64
2254 && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < 8192-8) {
2255 HReg dst = newVRegI(env);
2256 ARM64AMode* am
2257 = mk_baseblock_64bit_access_amode(e->Iex.Get.offset);
2258 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am));
2259 return dst;
2260 }
2261 if (ty == Ity_I32
2262 && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < 4096-4) {
2263 HReg dst = newVRegI(env);
2264 ARM64AMode* am
2265 = mk_baseblock_32bit_access_amode(e->Iex.Get.offset);
2266 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
2267 return dst;
2268 }
2269 break;
2270 }
2271
2272 /* --------- CCALL --------- */
2273 case Iex_CCall: {
2274 HReg dst = newVRegI(env);
2275 vassert(ty == e->Iex.CCall.retty);
2276
2277 /* be very restrictive for now. Only 64-bit ints allowed for
2278 args, and 64 bits for return type. Don't forget to change
2279 the RetLoc if more types are allowed in future. */
2280 if (e->Iex.CCall.retty != Ity_I64)
2281 goto irreducible;
2282
2283 /* Marshal args, do the call, clear stack. */
2284 UInt addToSp = 0;
2285 RetLoc rloc = mk_RetLoc_INVALID();
2286 Bool ok = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2287 e->Iex.CCall.cee, e->Iex.CCall.retty,
2288 e->Iex.CCall.args );
2289 /* */
2290 if (ok) {
2291 vassert(is_sane_RetLoc(rloc));
2292 vassert(rloc.pri == RLPri_Int);
2293 vassert(addToSp == 0);
2294 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()));
2295 return dst;
2296 }
2297 /* else fall through; will hit the irreducible: label */
2298 }
2299
2300 /* --------- LITERAL --------- */
2301 /* 64-bit literals */
2302 case Iex_Const: {
2303 ULong u = 0;
2304 HReg dst = newVRegI(env);
2305 switch (e->Iex.Const.con->tag) {
2306 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
2307 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
2308 case Ico_U16: u = e->Iex.Const.con->Ico.U16; break;
2309 case Ico_U8: u = e->Iex.Const.con->Ico.U8; break;
2310 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)");
2311 }
2312 addInstr(env, ARM64Instr_Imm64(dst, u));
2313 return dst;
2314 }
2315
2316 /* --------- MULTIPLEX --------- */
2317 case Iex_ITE: {
2318 /* ITE(ccexpr, iftrue, iffalse) */
2319 if (ty == Ity_I64 || ty == Ity_I32) {
2320 ARM64CondCode cc;
2321 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
2322 HReg r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse);
2323 HReg dst = newVRegI(env);
2324 cc = iselCondCode(env, e->Iex.ITE.cond);
2325 addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc));
2326 return dst;
2327 }
2328 break;
2329 }
2330
2331 default:
2332 break;
2333 } /* switch (e->tag) */
2334
2335 /* We get here if no pattern matched. */
2336 irreducible:
2337 ppIRExpr(e);
2338 vpanic("iselIntExpr_R: cannot reduce tree");
2339}
2340
2341
2342/*---------------------------------------------------------*/
2343/*--- ISEL: Integer expressions (128 bit) ---*/
2344/*---------------------------------------------------------*/
2345
2346/* Compute a 128-bit value into a register pair, which is returned as
2347 the first two parameters. As with iselIntExpr_R, these may be
2348 either real or virtual regs; in any case they must not be changed
2349 by subsequent code emitted by the caller. */
2350
2351static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2352 ISelEnv* env, IRExpr* e )
2353{
2354 iselInt128Expr_wrk(rHi, rLo, env, e);
2355# if 0
2356 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2357# endif
2358 vassert(hregClass(*rHi) == HRcInt64);
2359 vassert(hregIsVirtual(*rHi));
2360 vassert(hregClass(*rLo) == HRcInt64);
2361 vassert(hregIsVirtual(*rLo));
2362}
2363
2364/* DO NOT CALL THIS DIRECTLY ! */
2365static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2366 ISelEnv* env, IRExpr* e )
2367{
2368 vassert(e);
2369 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2370
2371 /* --------- BINARY ops --------- */
2372 if (e->tag == Iex_Binop) {
2373 switch (e->Iex.Binop.op) {
2374 /* 64 x 64 -> 128 multiply */
2375 case Iop_MullU64:
2376 /*case Iop_MullS64:*/ {
2377 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
2378 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2379 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2380 HReg dstLo = newVRegI(env);
2381 HReg dstHi = newVRegI(env);
2382 addInstr(env, ARM64Instr_Mul(dstLo, argL, argR,
2383 ARM64mul_PLAIN));
2384 addInstr(env, ARM64Instr_Mul(dstHi, argL, argR,
2385 syned ? ARM64mul_SX : ARM64mul_ZX));
2386 *rHi = dstHi;
2387 *rLo = dstLo;
2388 return;
2389 }
2390 /* 64HLto128(e1,e2) */
2391 case Iop_64HLto128:
2392 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2393 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2394 return;
2395 default:
2396 break;
2397 }
2398 } /* if (e->tag == Iex_Binop) */
2399
2400 ppIRExpr(e);
2401 vpanic("iselInt128Expr(arm64)");
2402}
2403
2404
2405//ZZ /* -------------------- 64-bit -------------------- */
2406//ZZ
2407//ZZ /* Compute a 64-bit value into a register pair, which is returned as
2408//ZZ the first two parameters. As with iselIntExpr_R, these may be
2409//ZZ either real or virtual regs; in any case they must not be changed
2410//ZZ by subsequent code emitted by the caller. */
2411//ZZ
2412//ZZ static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
2413//ZZ {
2414//ZZ iselInt64Expr_wrk(rHi, rLo, env, e);
2415//ZZ # if 0
2416//ZZ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2417//ZZ # endif
2418//ZZ vassert(hregClass(*rHi) == HRcInt32);
2419//ZZ vassert(hregIsVirtual(*rHi));
2420//ZZ vassert(hregClass(*rLo) == HRcInt32);
2421//ZZ vassert(hregIsVirtual(*rLo));
2422//ZZ }
2423//ZZ
2424//ZZ /* DO NOT CALL THIS DIRECTLY ! */
2425//ZZ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
2426//ZZ {
2427//ZZ vassert(e);
2428//ZZ vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
2429//ZZ
2430//ZZ /* 64-bit literal */
2431//ZZ if (e->tag == Iex_Const) {
2432//ZZ ULong w64 = e->Iex.Const.con->Ico.U64;
2433//ZZ UInt wHi = toUInt(w64 >> 32);
2434//ZZ UInt wLo = toUInt(w64);
2435//ZZ HReg tHi = newVRegI(env);
2436//ZZ HReg tLo = newVRegI(env);
2437//ZZ vassert(e->Iex.Const.con->tag == Ico_U64);
2438//ZZ addInstr(env, ARMInstr_Imm32(tHi, wHi));
2439//ZZ addInstr(env, ARMInstr_Imm32(tLo, wLo));
2440//ZZ *rHi = tHi;
2441//ZZ *rLo = tLo;
2442//ZZ return;
2443//ZZ }
2444//ZZ
2445//ZZ /* read 64-bit IRTemp */
2446//ZZ if (e->tag == Iex_RdTmp) {
2447//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2448//ZZ HReg tHi = newVRegI(env);
2449//ZZ HReg tLo = newVRegI(env);
2450//ZZ HReg tmp = iselNeon64Expr(env, e);
2451//ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2452//ZZ *rHi = tHi;
2453//ZZ *rLo = tLo;
2454//ZZ } else {
2455//ZZ lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
2456//ZZ }
2457//ZZ return;
2458//ZZ }
2459//ZZ
2460//ZZ /* 64-bit load */
2461//ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2462//ZZ HReg tLo, tHi, rA;
2463//ZZ vassert(e->Iex.Load.ty == Ity_I64);
2464//ZZ rA = iselIntExpr_R(env, e->Iex.Load.addr);
2465//ZZ tHi = newVRegI(env);
2466//ZZ tLo = newVRegI(env);
2467//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
2468//ZZ tHi, ARMAMode1_RI(rA, 4)));
2469//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
2470//ZZ tLo, ARMAMode1_RI(rA, 0)));
2471//ZZ *rHi = tHi;
2472//ZZ *rLo = tLo;
2473//ZZ return;
2474//ZZ }
2475//ZZ
2476//ZZ /* 64-bit GET */
2477//ZZ if (e->tag == Iex_Get) {
2478//ZZ ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
2479//ZZ ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
2480//ZZ HReg tHi = newVRegI(env);
2481//ZZ HReg tLo = newVRegI(env);
2482//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tHi, am4));
2483//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tLo, am0));
2484//ZZ *rHi = tHi;
2485//ZZ *rLo = tLo;
2486//ZZ return;
2487//ZZ }
2488//ZZ
2489//ZZ /* --------- BINARY ops --------- */
2490//ZZ if (e->tag == Iex_Binop) {
2491//ZZ switch (e->Iex.Binop.op) {
2492//ZZ
2493//ZZ /* 32 x 32 -> 64 multiply */
2494//ZZ case Iop_MullS32:
2495//ZZ case Iop_MullU32: {
2496//ZZ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2497//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2498//ZZ HReg tHi = newVRegI(env);
2499//ZZ HReg tLo = newVRegI(env);
2500//ZZ ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32
2501//ZZ ? ARMmul_SX : ARMmul_ZX;
2502//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
2503//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
2504//ZZ addInstr(env, ARMInstr_Mul(mop));
2505//ZZ addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
2506//ZZ addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
2507//ZZ *rHi = tHi;
2508//ZZ *rLo = tLo;
2509//ZZ return;
2510//ZZ }
2511//ZZ
2512//ZZ case Iop_Or64: {
2513//ZZ HReg xLo, xHi, yLo, yHi;
2514//ZZ HReg tHi = newVRegI(env);
2515//ZZ HReg tLo = newVRegI(env);
2516//ZZ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2517//ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2518//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
2519//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
2520//ZZ *rHi = tHi;
2521//ZZ *rLo = tLo;
2522//ZZ return;
2523//ZZ }
2524//ZZ
2525//ZZ case Iop_Add64: {
2526//ZZ HReg xLo, xHi, yLo, yHi;
2527//ZZ HReg tHi = newVRegI(env);
2528//ZZ HReg tLo = newVRegI(env);
2529//ZZ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2530//ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2531//ZZ addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
2532//ZZ addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi)));
2533//ZZ *rHi = tHi;
2534//ZZ *rLo = tLo;
2535//ZZ return;
2536//ZZ }
2537//ZZ
2538//ZZ /* 32HLto64(e1,e2) */
2539//ZZ case Iop_32HLto64: {
2540//ZZ *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2541//ZZ *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2542//ZZ return;
2543//ZZ }
2544//ZZ
2545//ZZ default:
2546//ZZ break;
2547//ZZ }
2548//ZZ }
2549//ZZ
2550//ZZ /* --------- UNARY ops --------- */
2551//ZZ if (e->tag == Iex_Unop) {
2552//ZZ switch (e->Iex.Unop.op) {
2553//ZZ
2554//ZZ /* ReinterpF64asI64 */
2555//ZZ case Iop_ReinterpF64asI64: {
2556//ZZ HReg dstHi = newVRegI(env);
2557//ZZ HReg dstLo = newVRegI(env);
2558//ZZ HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2559//ZZ addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
2560//ZZ *rHi = dstHi;
2561//ZZ *rLo = dstLo;
2562//ZZ return;
2563//ZZ }
2564//ZZ
2565//ZZ /* Left64(e) */
2566//ZZ case Iop_Left64: {
2567//ZZ HReg yLo, yHi;
2568//ZZ HReg tHi = newVRegI(env);
2569//ZZ HReg tLo = newVRegI(env);
2570//ZZ HReg zero = newVRegI(env);
2571//ZZ /* yHi:yLo = arg */
2572//ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2573//ZZ /* zero = 0 */
2574//ZZ addInstr(env, ARMInstr_Imm32(zero, 0));
2575//ZZ /* tLo = 0 - yLo, and set carry */
2576//ZZ addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
2577//ZZ tLo, zero, ARMRI84_R(yLo)));
2578//ZZ /* tHi = 0 - yHi - carry */
2579//ZZ addInstr(env, ARMInstr_Alu(ARMalu_SBC,
2580//ZZ tHi, zero, ARMRI84_R(yHi)));
2581//ZZ /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
2582//ZZ back in, so as to give the final result
2583//ZZ tHi:tLo = arg | -arg. */
2584//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
2585//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
2586//ZZ *rHi = tHi;
2587//ZZ *rLo = tLo;
2588//ZZ return;
2589//ZZ }
2590//ZZ
2591//ZZ /* CmpwNEZ64(e) */
2592//ZZ case Iop_CmpwNEZ64: {
2593//ZZ HReg srcLo, srcHi;
2594//ZZ HReg tmp1 = newVRegI(env);
2595//ZZ HReg tmp2 = newVRegI(env);
2596//ZZ /* srcHi:srcLo = arg */
2597//ZZ iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2598//ZZ /* tmp1 = srcHi | srcLo */
2599//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR,
2600//ZZ tmp1, srcHi, ARMRI84_R(srcLo)));
2601//ZZ /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2602//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
2603//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR,
2604//ZZ tmp2, tmp2, ARMRI84_R(tmp1)));
2605//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR,
2606//ZZ tmp2, tmp2, ARMRI5_I5(31)));
2607//ZZ *rHi = tmp2;
2608//ZZ *rLo = tmp2;
2609//ZZ return;
2610//ZZ }
2611//ZZ
2612//ZZ case Iop_1Sto64: {
2613//ZZ HReg dst = newVRegI(env);
2614//ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2615//ZZ ARMRI5* amt = ARMRI5_I5(31);
2616//ZZ /* This is really rough. We could do much better here;
2617//ZZ perhaps mvn{cond} dst, #0 as the second insn?
2618//ZZ (same applies to 1Sto32) */
2619//ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2620//ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2621//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2622//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2623//ZZ *rHi = dst;
2624//ZZ *rLo = dst;
2625//ZZ return;
2626//ZZ }
2627//ZZ
2628//ZZ default:
2629//ZZ break;
2630//ZZ }
2631//ZZ } /* if (e->tag == Iex_Unop) */
2632//ZZ
2633//ZZ /* --------- MULTIPLEX --------- */
2634//ZZ if (e->tag == Iex_ITE) { // VFD
2635//ZZ IRType tyC;
2636//ZZ HReg r1hi, r1lo, r0hi, r0lo, dstHi, dstLo;
2637//ZZ ARMCondCode cc;
2638//ZZ tyC = typeOfIRExpr(env->type_env,e->Iex.ITE.cond);
2639//ZZ vassert(tyC == Ity_I1);
2640//ZZ iselInt64Expr(&r1hi, &r1lo, env, e->Iex.ITE.iftrue);
2641//ZZ iselInt64Expr(&r0hi, &r0lo, env, e->Iex.ITE.iffalse);
2642//ZZ dstHi = newVRegI(env);
2643//ZZ dstLo = newVRegI(env);
2644//ZZ addInstr(env, mk_iMOVds_RR(dstHi, r1hi));
2645//ZZ addInstr(env, mk_iMOVds_RR(dstLo, r1lo));
2646//ZZ cc = iselCondCode(env, e->Iex.ITE.cond);
2647//ZZ addInstr(env, ARMInstr_CMov(cc ^ 1, dstHi, ARMRI84_R(r0hi)));
2648//ZZ addInstr(env, ARMInstr_CMov(cc ^ 1, dstLo, ARMRI84_R(r0lo)));
2649//ZZ *rHi = dstHi;
2650//ZZ *rLo = dstLo;
2651//ZZ return;
2652//ZZ }
2653//ZZ
2654//ZZ /* It is convenient sometimes to call iselInt64Expr even when we
2655//ZZ have NEON support (e.g. in do_helper_call we need 64-bit
2656//ZZ arguments as 2 x 32 regs). */
2657//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2658//ZZ HReg tHi = newVRegI(env);
2659//ZZ HReg tLo = newVRegI(env);
2660//ZZ HReg tmp = iselNeon64Expr(env, e);
2661//ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2662//ZZ *rHi = tHi;
2663//ZZ *rLo = tLo;
2664//ZZ return ;
2665//ZZ }
2666//ZZ
2667//ZZ ppIRExpr(e);
2668//ZZ vpanic("iselInt64Expr");
2669//ZZ }
2670//ZZ
2671//ZZ
2672//ZZ /*---------------------------------------------------------*/
2673//ZZ /*--- ISEL: Vector (NEON) expressions (64 bit) ---*/
2674//ZZ /*---------------------------------------------------------*/
2675//ZZ
2676//ZZ static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
2677//ZZ {
2678//ZZ HReg r = iselNeon64Expr_wrk( env, e );
2679//ZZ vassert(hregClass(r) == HRcFlt64);
2680//ZZ vassert(hregIsVirtual(r));
2681//ZZ return r;
2682//ZZ }
2683//ZZ
2684//ZZ /* DO NOT CALL THIS DIRECTLY */
2685//ZZ static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
2686//ZZ {
2687//ZZ IRType ty = typeOfIRExpr(env->type_env, e);
2688//ZZ MatchInfo mi;
2689//ZZ vassert(e);
2690//ZZ vassert(ty == Ity_I64);
2691//ZZ
2692//ZZ if (e->tag == Iex_RdTmp) {
2693//ZZ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2694//ZZ }
2695//ZZ
2696//ZZ if (e->tag == Iex_Const) {
2697//ZZ HReg rLo, rHi;
2698//ZZ HReg res = newVRegD(env);
2699//ZZ iselInt64Expr(&rHi, &rLo, env, e);
2700//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2701//ZZ return res;
2702//ZZ }
2703//ZZ
2704//ZZ /* 64-bit load */
2705//ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2706//ZZ HReg res = newVRegD(env);
2707//ZZ ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2708//ZZ vassert(ty == Ity_I64);
2709//ZZ addInstr(env, ARMInstr_NLdStD(True, res, am));
2710//ZZ return res;
2711//ZZ }
2712//ZZ
2713//ZZ /* 64-bit GET */
2714//ZZ if (e->tag == Iex_Get) {
2715//ZZ HReg addr = newVRegI(env);
2716//ZZ HReg res = newVRegD(env);
2717//ZZ vassert(ty == Ity_I64);
2718//ZZ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2719//ZZ addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2720//ZZ return res;
2721//ZZ }
2722//ZZ
2723//ZZ /* --------- BINARY ops --------- */
2724//ZZ if (e->tag == Iex_Binop) {
2725//ZZ switch (e->Iex.Binop.op) {
2726//ZZ
2727//ZZ /* 32 x 32 -> 64 multiply */
2728//ZZ case Iop_MullS32:
2729//ZZ case Iop_MullU32: {
2730//ZZ HReg rLo, rHi;
2731//ZZ HReg res = newVRegD(env);
2732//ZZ iselInt64Expr(&rHi, &rLo, env, e);
2733//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2734//ZZ return res;
2735//ZZ }
2736//ZZ
2737//ZZ case Iop_And64: {
2738//ZZ HReg res = newVRegD(env);
2739//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2740//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2741//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2742//ZZ res, argL, argR, 4, False));
2743//ZZ return res;
2744//ZZ }
2745//ZZ case Iop_Or64: {
2746//ZZ HReg res = newVRegD(env);
2747//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2748//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2749//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2750//ZZ res, argL, argR, 4, False));
2751//ZZ return res;
2752//ZZ }
2753//ZZ case Iop_Xor64: {
2754//ZZ HReg res = newVRegD(env);
2755//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2756//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2757//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2758//ZZ res, argL, argR, 4, False));
2759//ZZ return res;
2760//ZZ }
2761//ZZ
2762//ZZ /* 32HLto64(e1,e2) */
2763//ZZ case Iop_32HLto64: {
2764//ZZ HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2765//ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2766//ZZ HReg res = newVRegD(env);
2767//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2768//ZZ return res;
2769//ZZ }
2770//ZZ
2771//ZZ case Iop_Add8x8:
2772//ZZ case Iop_Add16x4:
2773//ZZ case Iop_Add32x2:
2774//ZZ case Iop_Add64: {
2775//ZZ HReg res = newVRegD(env);
2776//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2777//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2778//ZZ UInt size;
2779//ZZ switch (e->Iex.Binop.op) {
2780//ZZ case Iop_Add8x8: size = 0; break;
2781//ZZ case Iop_Add16x4: size = 1; break;
2782//ZZ case Iop_Add32x2: size = 2; break;
2783//ZZ case Iop_Add64: size = 3; break;
2784//ZZ default: vassert(0);
2785//ZZ }
2786//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2787//ZZ res, argL, argR, size, False));
2788//ZZ return res;
2789//ZZ }
2790//ZZ case Iop_Add32Fx2: {
2791//ZZ HReg res = newVRegD(env);
2792//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2793//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2794//ZZ UInt size = 0;
2795//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2796//ZZ res, argL, argR, size, False));
2797//ZZ return res;
2798//ZZ }
2799//ZZ case Iop_Recps32Fx2: {
2800//ZZ HReg res = newVRegD(env);
2801//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2802//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2803//ZZ UInt size = 0;
2804//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2805//ZZ res, argL, argR, size, False));
2806//ZZ return res;
2807//ZZ }
2808//ZZ case Iop_Rsqrts32Fx2: {
2809//ZZ HReg res = newVRegD(env);
2810//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2811//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2812//ZZ UInt size = 0;
2813//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2814//ZZ res, argL, argR, size, False));
2815//ZZ return res;
2816//ZZ }
2817//ZZ
2818//ZZ // These 6 verified 18 Apr 2013
2819//ZZ case Iop_InterleaveHI32x2:
2820//ZZ case Iop_InterleaveLO32x2:
2821//ZZ case Iop_InterleaveOddLanes8x8:
2822//ZZ case Iop_InterleaveEvenLanes8x8:
2823//ZZ case Iop_InterleaveOddLanes16x4:
2824//ZZ case Iop_InterleaveEvenLanes16x4: {
2825//ZZ HReg rD = newVRegD(env);
2826//ZZ HReg rM = newVRegD(env);
2827//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2828//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2829//ZZ UInt size;
2830//ZZ Bool resRd; // is the result in rD or rM ?
2831//ZZ switch (e->Iex.Binop.op) {
2832//ZZ case Iop_InterleaveOddLanes8x8: resRd = False; size = 0; break;
2833//ZZ case Iop_InterleaveEvenLanes8x8: resRd = True; size = 0; break;
2834//ZZ case Iop_InterleaveOddLanes16x4: resRd = False; size = 1; break;
2835//ZZ case Iop_InterleaveEvenLanes16x4: resRd = True; size = 1; break;
2836//ZZ case Iop_InterleaveHI32x2: resRd = False; size = 2; break;
2837//ZZ case Iop_InterleaveLO32x2: resRd = True; size = 2; break;
2838//ZZ default: vassert(0);
2839//ZZ }
2840//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2841//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2842//ZZ addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, False));
2843//ZZ return resRd ? rD : rM;
2844//ZZ }
2845//ZZ
2846//ZZ // These 4 verified 18 Apr 2013
2847//ZZ case Iop_InterleaveHI8x8:
2848//ZZ case Iop_InterleaveLO8x8:
2849//ZZ case Iop_InterleaveHI16x4:
2850//ZZ case Iop_InterleaveLO16x4: {
2851//ZZ HReg rD = newVRegD(env);
2852//ZZ HReg rM = newVRegD(env);
2853//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2854//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2855//ZZ UInt size;
2856//ZZ Bool resRd; // is the result in rD or rM ?
2857//ZZ switch (e->Iex.Binop.op) {
2858//ZZ case Iop_InterleaveHI8x8: resRd = False; size = 0; break;
2859//ZZ case Iop_InterleaveLO8x8: resRd = True; size = 0; break;
2860//ZZ case Iop_InterleaveHI16x4: resRd = False; size = 1; break;
2861//ZZ case Iop_InterleaveLO16x4: resRd = True; size = 1; break;
2862//ZZ default: vassert(0);
2863//ZZ }
2864//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2865//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2866//ZZ addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, False));
2867//ZZ return resRd ? rD : rM;
2868//ZZ }
2869//ZZ
2870//ZZ // These 4 verified 18 Apr 2013
2871//ZZ case Iop_CatOddLanes8x8:
2872//ZZ case Iop_CatEvenLanes8x8:
2873//ZZ case Iop_CatOddLanes16x4:
2874//ZZ case Iop_CatEvenLanes16x4: {
2875//ZZ HReg rD = newVRegD(env);
2876//ZZ HReg rM = newVRegD(env);
2877//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2878//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2879//ZZ UInt size;
2880//ZZ Bool resRd; // is the result in rD or rM ?
2881//ZZ switch (e->Iex.Binop.op) {
2882//ZZ case Iop_CatOddLanes8x8: resRd = False; size = 0; break;
2883//ZZ case Iop_CatEvenLanes8x8: resRd = True; size = 0; break;
2884//ZZ case Iop_CatOddLanes16x4: resRd = False; size = 1; break;
2885//ZZ case Iop_CatEvenLanes16x4: resRd = True; size = 1; break;
2886//ZZ default: vassert(0);
2887//ZZ }
2888//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2889//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2890//ZZ addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, False));
2891//ZZ return resRd ? rD : rM;
2892//ZZ }
2893//ZZ
2894//ZZ case Iop_QAdd8Ux8:
2895//ZZ case Iop_QAdd16Ux4:
2896//ZZ case Iop_QAdd32Ux2:
2897//ZZ case Iop_QAdd64Ux1: {
2898//ZZ HReg res = newVRegD(env);
2899//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2900//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2901//ZZ UInt size;
2902//ZZ switch (e->Iex.Binop.op) {
2903//ZZ case Iop_QAdd8Ux8: size = 0; break;
2904//ZZ case Iop_QAdd16Ux4: size = 1; break;
2905//ZZ case Iop_QAdd32Ux2: size = 2; break;
2906//ZZ case Iop_QAdd64Ux1: size = 3; break;
2907//ZZ default: vassert(0);
2908//ZZ }
2909//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2910//ZZ res, argL, argR, size, False));
2911//ZZ return res;
2912//ZZ }
2913//ZZ case Iop_QAdd8Sx8:
2914//ZZ case Iop_QAdd16Sx4:
2915//ZZ case Iop_QAdd32Sx2:
2916//ZZ case Iop_QAdd64Sx1: {
2917//ZZ HReg res = newVRegD(env);
2918//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2919//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2920//ZZ UInt size;
2921//ZZ switch (e->Iex.Binop.op) {
2922//ZZ case Iop_QAdd8Sx8: size = 0; break;
2923//ZZ case Iop_QAdd16Sx4: size = 1; break;
2924//ZZ case Iop_QAdd32Sx2: size = 2; break;
2925//ZZ case Iop_QAdd64Sx1: size = 3; break;
2926//ZZ default: vassert(0);
2927//ZZ }
2928//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2929//ZZ res, argL, argR, size, False));
2930//ZZ return res;
2931//ZZ }
2932//ZZ case Iop_Sub8x8:
2933//ZZ case Iop_Sub16x4:
2934//ZZ case Iop_Sub32x2:
2935//ZZ case Iop_Sub64: {
2936//ZZ HReg res = newVRegD(env);
2937//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2938//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2939//ZZ UInt size;
2940//ZZ switch (e->Iex.Binop.op) {
2941//ZZ case Iop_Sub8x8: size = 0; break;
2942//ZZ case Iop_Sub16x4: size = 1; break;
2943//ZZ case Iop_Sub32x2: size = 2; break;
2944//ZZ case Iop_Sub64: size = 3; break;
2945//ZZ default: vassert(0);
2946//ZZ }
2947//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2948//ZZ res, argL, argR, size, False));
2949//ZZ return res;
2950//ZZ }
2951//ZZ case Iop_Sub32Fx2: {
2952//ZZ HReg res = newVRegD(env);
2953//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2954//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2955//ZZ UInt size = 0;
2956//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2957//ZZ res, argL, argR, size, False));
2958//ZZ return res;
2959//ZZ }
2960//ZZ case Iop_QSub8Ux8:
2961//ZZ case Iop_QSub16Ux4:
2962//ZZ case Iop_QSub32Ux2:
2963//ZZ case Iop_QSub64Ux1: {
2964//ZZ HReg res = newVRegD(env);
2965//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2966//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2967//ZZ UInt size;
2968//ZZ switch (e->Iex.Binop.op) {
2969//ZZ case Iop_QSub8Ux8: size = 0; break;
2970//ZZ case Iop_QSub16Ux4: size = 1; break;
2971//ZZ case Iop_QSub32Ux2: size = 2; break;
2972//ZZ case Iop_QSub64Ux1: size = 3; break;
2973//ZZ default: vassert(0);
2974//ZZ }
2975//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
2976//ZZ res, argL, argR, size, False));
2977//ZZ return res;
2978//ZZ }
2979//ZZ case Iop_QSub8Sx8:
2980//ZZ case Iop_QSub16Sx4:
2981//ZZ case Iop_QSub32Sx2:
2982//ZZ case Iop_QSub64Sx1: {
2983//ZZ HReg res = newVRegD(env);
2984//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2985//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2986//ZZ UInt size;
2987//ZZ switch (e->Iex.Binop.op) {
2988//ZZ case Iop_QSub8Sx8: size = 0; break;
2989//ZZ case Iop_QSub16Sx4: size = 1; break;
2990//ZZ case Iop_QSub32Sx2: size = 2; break;
2991//ZZ case Iop_QSub64Sx1: size = 3; break;
2992//ZZ default: vassert(0);
2993//ZZ }
2994//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
2995//ZZ res, argL, argR, size, False));
2996//ZZ return res;
2997//ZZ }
2998//ZZ case Iop_Max8Ux8:
2999//ZZ case Iop_Max16Ux4:
3000//ZZ case Iop_Max32Ux2: {
3001//ZZ HReg res = newVRegD(env);
3002//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3003//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3004//ZZ UInt size;
3005//ZZ switch (e->Iex.Binop.op) {
3006//ZZ case Iop_Max8Ux8: size = 0; break;
3007//ZZ case Iop_Max16Ux4: size = 1; break;
3008//ZZ case Iop_Max32Ux2: size = 2; break;
3009//ZZ default: vassert(0);
3010//ZZ }
3011//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
3012//ZZ res, argL, argR, size, False));
3013//ZZ return res;
3014//ZZ }
3015//ZZ case Iop_Max8Sx8:
3016//ZZ case Iop_Max16Sx4:
3017//ZZ case Iop_Max32Sx2: {
3018//ZZ HReg res = newVRegD(env);
3019//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3020//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3021//ZZ UInt size;
3022//ZZ switch (e->Iex.Binop.op) {
3023//ZZ case Iop_Max8Sx8: size = 0; break;
3024//ZZ case Iop_Max16Sx4: size = 1; break;
3025//ZZ case Iop_Max32Sx2: size = 2; break;
3026//ZZ default: vassert(0);
3027//ZZ }
3028//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
3029//ZZ res, argL, argR, size, False));
3030//ZZ return res;
3031//ZZ }
3032//ZZ case Iop_Min8Ux8:
3033//ZZ case Iop_Min16Ux4:
3034//ZZ case Iop_Min32Ux2: {
3035//ZZ HReg res = newVRegD(env);
3036//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3037//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3038//ZZ UInt size;
3039//ZZ switch (e->Iex.Binop.op) {
3040//ZZ case Iop_Min8Ux8: size = 0; break;
3041//ZZ case Iop_Min16Ux4: size = 1; break;
3042//ZZ case Iop_Min32Ux2: size = 2; break;
3043//ZZ default: vassert(0);
3044//ZZ }
3045//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
3046//ZZ res, argL, argR, size, False));
3047//ZZ return res;
3048//ZZ }
3049//ZZ case Iop_Min8Sx8:
3050//ZZ case Iop_Min16Sx4:
3051//ZZ case Iop_Min32Sx2: {
3052//ZZ HReg res = newVRegD(env);
3053//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3054//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3055//ZZ UInt size;
3056//ZZ switch (e->Iex.Binop.op) {
3057//ZZ case Iop_Min8Sx8: size = 0; break;
3058//ZZ case Iop_Min16Sx4: size = 1; break;
3059//ZZ case Iop_Min32Sx2: size = 2; break;
3060//ZZ default: vassert(0);
3061//ZZ }
3062//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
3063//ZZ res, argL, argR, size, False));
3064//ZZ return res;
3065//ZZ }
3066//ZZ case Iop_Sar8x8:
3067//ZZ case Iop_Sar16x4:
3068//ZZ case Iop_Sar32x2: {
3069//ZZ HReg res = newVRegD(env);
3070//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3071//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3072//ZZ HReg argR2 = newVRegD(env);
3073//ZZ HReg zero = newVRegD(env);
3074//ZZ UInt size;
3075//ZZ switch (e->Iex.Binop.op) {
3076//ZZ case Iop_Sar8x8: size = 0; break;
3077//ZZ case Iop_Sar16x4: size = 1; break;
3078//ZZ case Iop_Sar32x2: size = 2; break;
3079//ZZ case Iop_Sar64: size = 3; break;
3080//ZZ default: vassert(0);
3081//ZZ }
3082//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
3083//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3084//ZZ argR2, zero, argR, size, False));
3085//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
3086//ZZ res, argL, argR2, size, False));
3087//ZZ return res;
3088//ZZ }
3089//ZZ case Iop_Sal8x8:
3090//ZZ case Iop_Sal16x4:
3091//ZZ case Iop_Sal32x2:
3092//ZZ case Iop_Sal64x1: {
3093//ZZ HReg res = newVRegD(env);
3094//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3095//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3096//ZZ UInt size;
3097//ZZ switch (e->Iex.Binop.op) {
3098//ZZ case Iop_Sal8x8: size = 0; break;
3099//ZZ case Iop_Sal16x4: size = 1; break;
3100//ZZ case Iop_Sal32x2: size = 2; break;
3101//ZZ case Iop_Sal64x1: size = 3; break;
3102//ZZ default: vassert(0);
3103//ZZ }
3104//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
3105//ZZ res, argL, argR, size, False));
3106//ZZ return res;
3107//ZZ }
3108//ZZ case Iop_Shr8x8:
3109//ZZ case Iop_Shr16x4:
3110//ZZ case Iop_Shr32x2: {
3111//ZZ HReg res = newVRegD(env);
3112//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3113//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3114//ZZ HReg argR2 = newVRegD(env);
3115//ZZ HReg zero = newVRegD(env);
3116//ZZ UInt size;
3117//ZZ switch (e->Iex.Binop.op) {
3118//ZZ case Iop_Shr8x8: size = 0; break;
3119//ZZ case Iop_Shr16x4: size = 1; break;
3120//ZZ case Iop_Shr32x2: size = 2; break;
3121//ZZ default: vassert(0);
3122//ZZ }
3123//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
3124//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3125//ZZ argR2, zero, argR, size, False));
3126//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3127//ZZ res, argL, argR2, size, False));
3128//ZZ return res;
3129//ZZ }
3130//ZZ case Iop_Shl8x8:
3131//ZZ case Iop_Shl16x4:
3132//ZZ case Iop_Shl32x2: {
3133//ZZ HReg res = newVRegD(env);
3134//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3135//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3136//ZZ UInt size;
3137//ZZ switch (e->Iex.Binop.op) {
3138//ZZ case Iop_Shl8x8: size = 0; break;
3139//ZZ case Iop_Shl16x4: size = 1; break;
3140//ZZ case Iop_Shl32x2: size = 2; break;
3141//ZZ default: vassert(0);
3142//ZZ }
3143//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3144//ZZ res, argL, argR, size, False));
3145//ZZ return res;
3146//ZZ }
3147//ZZ case Iop_QShl8x8:
3148//ZZ case Iop_QShl16x4:
3149//ZZ case Iop_QShl32x2:
3150//ZZ case Iop_QShl64x1: {
3151//ZZ HReg res = newVRegD(env);
3152//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3153//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3154//ZZ UInt size;
3155//ZZ switch (e->Iex.Binop.op) {
3156//ZZ case Iop_QShl8x8: size = 0; break;
3157//ZZ case Iop_QShl16x4: size = 1; break;
3158//ZZ case Iop_QShl32x2: size = 2; break;
3159//ZZ case Iop_QShl64x1: size = 3; break;
3160//ZZ default: vassert(0);
3161//ZZ }
3162//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
3163//ZZ res, argL, argR, size, False));
3164//ZZ return res;
3165//ZZ }
3166//ZZ case Iop_QSal8x8:
3167//ZZ case Iop_QSal16x4:
3168//ZZ case Iop_QSal32x2:
3169//ZZ case Iop_QSal64x1: {
3170//ZZ HReg res = newVRegD(env);
3171//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3172//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3173//ZZ UInt size;
3174//ZZ switch (e->Iex.Binop.op) {
3175//ZZ case Iop_QSal8x8: size = 0; break;
3176//ZZ case Iop_QSal16x4: size = 1; break;
3177//ZZ case Iop_QSal32x2: size = 2; break;
3178//ZZ case Iop_QSal64x1: size = 3; break;
3179//ZZ default: vassert(0);
3180//ZZ }
3181//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
3182//ZZ res, argL, argR, size, False));
3183//ZZ return res;
3184//ZZ }
3185//ZZ case Iop_QShlN8x8:
3186//ZZ case Iop_QShlN16x4:
3187//ZZ case Iop_QShlN32x2:
3188//ZZ case Iop_QShlN64x1: {
3189//ZZ HReg res = newVRegD(env);
3190//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3191//ZZ UInt size, imm;
3192//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
3193//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3194//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
3195//ZZ "second argument only\n");
3196//ZZ }
3197//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3198//ZZ switch (e->Iex.Binop.op) {
3199//ZZ case Iop_QShlN8x8: size = 8 | imm; break;
3200//ZZ case Iop_QShlN16x4: size = 16 | imm; break;
3201//ZZ case Iop_QShlN32x2: size = 32 | imm; break;
3202//ZZ case Iop_QShlN64x1: size = 64 | imm; break;
3203//ZZ default: vassert(0);
3204//ZZ }
3205//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
3206//ZZ res, argL, size, False));
3207//ZZ return res;
3208//ZZ }
3209//ZZ case Iop_QShlN8Sx8:
3210//ZZ case Iop_QShlN16Sx4:
3211//ZZ case Iop_QShlN32Sx2:
3212//ZZ case Iop_QShlN64Sx1: {
3213//ZZ HReg res = newVRegD(env);
3214//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3215//ZZ UInt size, imm;
3216//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
3217//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3218//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
3219//ZZ "second argument only\n");
3220//ZZ }
3221//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3222//ZZ switch (e->Iex.Binop.op) {
3223//ZZ case Iop_QShlN8Sx8: size = 8 | imm; break;
3224//ZZ case Iop_QShlN16Sx4: size = 16 | imm; break;
3225//ZZ case Iop_QShlN32Sx2: size = 32 | imm; break;
3226//ZZ case Iop_QShlN64Sx1: size = 64 | imm; break;
3227//ZZ default: vassert(0);
3228//ZZ }
3229//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
3230//ZZ res, argL, size, False));
3231//ZZ return res;
3232//ZZ }
3233//ZZ case Iop_QSalN8x8:
3234//ZZ case Iop_QSalN16x4:
3235//ZZ case Iop_QSalN32x2:
3236//ZZ case Iop_QSalN64x1: {
3237//ZZ HReg res = newVRegD(env);
3238//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3239//ZZ UInt size, imm;
3240//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
3241//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3242//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
3243//ZZ "second argument only\n");
3244//ZZ }
3245//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3246//ZZ switch (e->Iex.Binop.op) {
3247//ZZ case Iop_QSalN8x8: size = 8 | imm; break;
3248//ZZ case Iop_QSalN16x4: size = 16 | imm; break;
3249//ZZ case Iop_QSalN32x2: size = 32 | imm; break;
3250//ZZ case Iop_QSalN64x1: size = 64 | imm; break;
3251//ZZ default: vassert(0);
3252//ZZ }
3253//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
3254//ZZ res, argL, size, False));
3255//ZZ return res;
3256//ZZ }
3257//ZZ case Iop_ShrN8x8:
3258//ZZ case Iop_ShrN16x4:
3259//ZZ case Iop_ShrN32x2:
3260//ZZ case Iop_Shr64: {
3261//ZZ HReg res = newVRegD(env);
3262//ZZ HReg tmp = newVRegD(env);
3263//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3264//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3265//ZZ HReg argR2 = newVRegI(env);
3266//ZZ UInt size;
3267//ZZ switch (e->Iex.Binop.op) {
3268//ZZ case Iop_ShrN8x8: size = 0; break;
3269//ZZ case Iop_ShrN16x4: size = 1; break;
3270//ZZ case Iop_ShrN32x2: size = 2; break;
3271//ZZ case Iop_Shr64: size = 3; break;
3272//ZZ default: vassert(0);
3273//ZZ }
3274//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
3275//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
3276//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3277//ZZ res, argL, tmp, size, False));
3278//ZZ return res;
3279//ZZ }
3280//ZZ case Iop_ShlN8x8:
3281//ZZ case Iop_ShlN16x4:
3282//ZZ case Iop_ShlN32x2:
3283//ZZ case Iop_Shl64: {
3284//ZZ HReg res = newVRegD(env);
3285//ZZ HReg tmp = newVRegD(env);
3286//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3287//ZZ /* special-case Shl64(x, imm8) since the Neon front
3288//ZZ end produces a lot of those for V{LD,ST}{1,2,3,4}. */
3289//ZZ if (e->Iex.Binop.op == Iop_Shl64
3290//ZZ && e->Iex.Binop.arg2->tag == Iex_Const) {
3291//ZZ vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
3292//ZZ Int nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3293//ZZ if (nshift >= 1 && nshift <= 63) {
3294//ZZ addInstr(env, ARMInstr_NShl64(res, argL, nshift));
3295//ZZ return res;
3296//ZZ }
3297//ZZ /* else fall through to general case */
3298//ZZ }
3299//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3300//ZZ UInt size;
3301//ZZ switch (e->Iex.Binop.op) {
3302//ZZ case Iop_ShlN8x8: size = 0; break;
3303//ZZ case Iop_ShlN16x4: size = 1; break;
3304//ZZ case Iop_ShlN32x2: size = 2; break;
3305//ZZ case Iop_Shl64: size = 3; break;
3306//ZZ default: vassert(0);
3307//ZZ }
3308//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
3309//ZZ tmp, argR, 0, False));
3310//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3311//ZZ res, argL, tmp, size, False));
3312//ZZ return res;
3313//ZZ }
3314//ZZ case Iop_SarN8x8:
3315//ZZ case Iop_SarN16x4:
3316//ZZ case Iop_SarN32x2:
3317//ZZ case Iop_Sar64: {
3318//ZZ HReg res = newVRegD(env);
3319//ZZ HReg tmp = newVRegD(env);
3320//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3321//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3322//ZZ HReg argR2 = newVRegI(env);
3323//ZZ UInt size;
3324//ZZ switch (e->Iex.Binop.op) {
3325//ZZ case Iop_SarN8x8: size = 0; break;
3326//ZZ case Iop_SarN16x4: size = 1; break;
3327//ZZ case Iop_SarN32x2: size = 2; break;
3328//ZZ case Iop_Sar64: size = 3; break;
3329//ZZ default: vassert(0);
3330//ZZ }
3331//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
3332//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
3333//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
3334//ZZ res, argL, tmp, size, False));
3335//ZZ return res;
3336//ZZ }
3337//ZZ case Iop_CmpGT8Ux8:
3338//ZZ case Iop_CmpGT16Ux4:
3339//ZZ case Iop_CmpGT32Ux2: {
3340//ZZ HReg res = newVRegD(env);
3341//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3342//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3343//ZZ UInt size;
3344//ZZ switch (e->Iex.Binop.op) {
3345//ZZ case Iop_CmpGT8Ux8: size = 0; break;
3346//ZZ case Iop_CmpGT16Ux4: size = 1; break;
3347//ZZ case Iop_CmpGT32Ux2: size = 2; break;
3348//ZZ default: vassert(0);
3349//ZZ }
3350//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
3351//ZZ res, argL, argR, size, False));
3352//ZZ return res;
3353//ZZ }
3354//ZZ case Iop_CmpGT8Sx8:
3355//ZZ case Iop_CmpGT16Sx4:
3356//ZZ case Iop_CmpGT32Sx2: {
3357//ZZ HReg res = newVRegD(env);
3358//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3359//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3360//ZZ UInt size;
3361//ZZ switch (e->Iex.Binop.op) {
3362//ZZ case Iop_CmpGT8Sx8: size = 0; break;
3363//ZZ case Iop_CmpGT16Sx4: size = 1; break;
3364//ZZ case Iop_CmpGT32Sx2: size = 2; break;
3365//ZZ default: vassert(0);
3366//ZZ }
3367//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
3368//ZZ res, argL, argR, size, False));
3369//ZZ return res;
3370//ZZ }
3371//ZZ case Iop_CmpEQ8x8:
3372//ZZ case Iop_CmpEQ16x4:
3373//ZZ case Iop_CmpEQ32x2: {
3374//ZZ HReg res = newVRegD(env);
3375//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3376//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3377//ZZ UInt size;
3378//ZZ switch (e->Iex.Binop.op) {
3379//ZZ case Iop_CmpEQ8x8: size = 0; break;
3380//ZZ case Iop_CmpEQ16x4: size = 1; break;
3381//ZZ case Iop_CmpEQ32x2: size = 2; break;
3382//ZZ default: vassert(0);
3383//ZZ }
3384//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
3385//ZZ res, argL, argR, size, False));
3386//ZZ return res;
3387//ZZ }
3388//ZZ case Iop_Mul8x8:
3389//ZZ case Iop_Mul16x4:
3390//ZZ case Iop_Mul32x2: {
3391//ZZ HReg res = newVRegD(env);
3392//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3393//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3394//ZZ UInt size = 0;
3395//ZZ switch(e->Iex.Binop.op) {
3396//ZZ case Iop_Mul8x8: size = 0; break;
3397//ZZ case Iop_Mul16x4: size = 1; break;
3398//ZZ case Iop_Mul32x2: size = 2; break;
3399//ZZ default: vassert(0);
3400//ZZ }
3401//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
3402//ZZ res, argL, argR, size, False));
3403//ZZ return res;
3404//ZZ }
3405//ZZ case Iop_Mul32Fx2: {
3406//ZZ HReg res = newVRegD(env);
3407//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3408//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3409//ZZ UInt size = 0;
3410//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
3411//ZZ res, argL, argR, size, False));
3412//ZZ return res;
3413//ZZ }
3414//ZZ case Iop_QDMulHi16Sx4:
3415//ZZ case Iop_QDMulHi32Sx2: {
3416//ZZ HReg res = newVRegD(env);
3417//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3418//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3419//ZZ UInt size = 0;
3420//ZZ switch(e->Iex.Binop.op) {
3421//ZZ case Iop_QDMulHi16Sx4: size = 1; break;
3422//ZZ case Iop_QDMulHi32Sx2: size = 2; break;
3423//ZZ default: vassert(0);
3424//ZZ }
3425//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
3426//ZZ res, argL, argR, size, False));
3427//ZZ return res;
3428//ZZ }
3429//ZZ
3430//ZZ case Iop_QRDMulHi16Sx4:
3431//ZZ case Iop_QRDMulHi32Sx2: {
3432//ZZ HReg res = newVRegD(env);
3433//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3434//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3435//ZZ UInt size = 0;
3436//ZZ switch(e->Iex.Binop.op) {
3437//ZZ case Iop_QRDMulHi16Sx4: size = 1; break;
3438//ZZ case Iop_QRDMulHi32Sx2: size = 2; break;
3439//ZZ default: vassert(0);
3440//ZZ }
3441//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
3442//ZZ res, argL, argR, size, False));
3443//ZZ return res;
3444//ZZ }
3445//ZZ
3446//ZZ case Iop_PwAdd8x8:
3447//ZZ case Iop_PwAdd16x4:
3448//ZZ case Iop_PwAdd32x2: {
3449//ZZ HReg res = newVRegD(env);
3450//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3451//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3452//ZZ UInt size = 0;
3453//ZZ switch(e->Iex.Binop.op) {
3454//ZZ case Iop_PwAdd8x8: size = 0; break;
3455//ZZ case Iop_PwAdd16x4: size = 1; break;
3456//ZZ case Iop_PwAdd32x2: size = 2; break;
3457//ZZ default: vassert(0);
3458//ZZ }
3459//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
3460//ZZ res, argL, argR, size, False));
3461//ZZ return res;
3462//ZZ }
3463//ZZ case Iop_PwAdd32Fx2: {
3464//ZZ HReg res = newVRegD(env);
3465//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3466//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3467//ZZ UInt size = 0;
3468//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
3469//ZZ res, argL, argR, size, False));
3470//ZZ return res;
3471//ZZ }
3472//ZZ case Iop_PwMin8Ux8:
3473//ZZ case Iop_PwMin16Ux4:
3474//ZZ case Iop_PwMin32Ux2: {
3475//ZZ HReg res = newVRegD(env);
3476//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3477//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3478//ZZ UInt size = 0;
3479//ZZ switch(e->Iex.Binop.op) {
3480//ZZ case Iop_PwMin8Ux8: size = 0; break;
3481//ZZ case Iop_PwMin16Ux4: size = 1; break;
3482//ZZ case Iop_PwMin32Ux2: size = 2; break;
3483//ZZ default: vassert(0);
3484//ZZ }
3485//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
3486//ZZ res, argL, argR, size, False));
3487//ZZ return res;
3488//ZZ }
3489//ZZ case Iop_PwMin8Sx8:
3490//ZZ case Iop_PwMin16Sx4:
3491//ZZ case Iop_PwMin32Sx2: {
3492//ZZ HReg res = newVRegD(env);
3493//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3494//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3495//ZZ UInt size = 0;
3496//ZZ switch(e->Iex.Binop.op) {
3497//ZZ case Iop_PwMin8Sx8: size = 0; break;
3498//ZZ case Iop_PwMin16Sx4: size = 1; break;
3499//ZZ case Iop_PwMin32Sx2: size = 2; break;
3500//ZZ default: vassert(0);
3501//ZZ }
3502//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
3503//ZZ res, argL, argR, size, False));
3504//ZZ return res;
3505//ZZ }
3506//ZZ case Iop_PwMax8Ux8:
3507//ZZ case Iop_PwMax16Ux4:
3508//ZZ case Iop_PwMax32Ux2: {
3509//ZZ HReg res = newVRegD(env);
3510//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3511//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3512//ZZ UInt size = 0;
3513//ZZ switch(e->Iex.Binop.op) {
3514//ZZ case Iop_PwMax8Ux8: size = 0; break;
3515//ZZ case Iop_PwMax16Ux4: size = 1; break;
3516//ZZ case Iop_PwMax32Ux2: size = 2; break;
3517//ZZ default: vassert(0);
3518//ZZ }
3519//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
3520//ZZ res, argL, argR, size, False));
3521//ZZ return res;
3522//ZZ }
3523//ZZ case Iop_PwMax8Sx8:
3524//ZZ case Iop_PwMax16Sx4:
3525//ZZ case Iop_PwMax32Sx2: {
3526//ZZ HReg res = newVRegD(env);
3527//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3528//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3529//ZZ UInt size = 0;
3530//ZZ switch(e->Iex.Binop.op) {
3531//ZZ case Iop_PwMax8Sx8: size = 0; break;
3532//ZZ case Iop_PwMax16Sx4: size = 1; break;
3533//ZZ case Iop_PwMax32Sx2: size = 2; break;
3534//ZZ default: vassert(0);
3535//ZZ }
3536//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
3537//ZZ res, argL, argR, size, False));
3538//ZZ return res;
3539//ZZ }
3540//ZZ case Iop_Perm8x8: {
3541//ZZ HReg res = newVRegD(env);
3542//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3543//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3544//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
3545//ZZ res, argL, argR, 0, False));
3546//ZZ return res;
3547//ZZ }
3548//ZZ case Iop_PolynomialMul8x8: {
3549//ZZ HReg res = newVRegD(env);
3550//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3551//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3552//ZZ UInt size = 0;
3553//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
3554//ZZ res, argL, argR, size, False));
3555//ZZ return res;
3556//ZZ }
3557//ZZ case Iop_Max32Fx2: {
3558//ZZ HReg res = newVRegD(env);
3559//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3560//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3561//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
3562//ZZ res, argL, argR, 2, False));
3563//ZZ return res;
3564//ZZ }
3565//ZZ case Iop_Min32Fx2: {
3566//ZZ HReg res = newVRegD(env);
3567//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3568//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3569//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
3570//ZZ res, argL, argR, 2, False));
3571//ZZ return res;
3572//ZZ }
3573//ZZ case Iop_PwMax32Fx2: {
3574//ZZ HReg res = newVRegD(env);
3575//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3576//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3577//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
3578//ZZ res, argL, argR, 2, False));
3579//ZZ return res;
3580//ZZ }
3581//ZZ case Iop_PwMin32Fx2: {
3582//ZZ HReg res = newVRegD(env);
3583//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3584//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3585//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
3586//ZZ res, argL, argR, 2, False));
3587//ZZ return res;
3588//ZZ }
3589//ZZ case Iop_CmpGT32Fx2: {
3590//ZZ HReg res = newVRegD(env);
3591//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3592//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3593//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
3594//ZZ res, argL, argR, 2, False));
3595//ZZ return res;
3596//ZZ }
3597//ZZ case Iop_CmpGE32Fx2: {
3598//ZZ HReg res = newVRegD(env);
3599//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3600//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3601//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
3602//ZZ res, argL, argR, 2, False));
3603//ZZ return res;
3604//ZZ }
3605//ZZ case Iop_CmpEQ32Fx2: {
3606//ZZ HReg res = newVRegD(env);
3607//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3608//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3609//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3610//ZZ res, argL, argR, 2, False));
3611//ZZ return res;
3612//ZZ }
3613//ZZ case Iop_F32ToFixed32Ux2_RZ:
3614//ZZ case Iop_F32ToFixed32Sx2_RZ:
3615//ZZ case Iop_Fixed32UToF32x2_RN:
3616//ZZ case Iop_Fixed32SToF32x2_RN: {
3617//ZZ HReg res = newVRegD(env);
3618//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3619//ZZ ARMNeonUnOp op;
3620//ZZ UInt imm6;
3621//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
3622//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3623//ZZ vpanic("ARM supports FP <-> Fixed conversion with constant "
3624//ZZ "second argument less than 33 only\n");
3625//ZZ }
3626//ZZ imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3627//ZZ vassert(imm6 <= 32 && imm6 > 0);
3628//ZZ imm6 = 64 - imm6;
3629//ZZ switch(e->Iex.Binop.op) {
3630//ZZ case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3631//ZZ case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3632//ZZ case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3633//ZZ case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3634//ZZ default: vassert(0);
3635//ZZ }
3636//ZZ addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3637//ZZ return res;
3638//ZZ }
3639//ZZ /*
3640//ZZ FIXME: is this here or not?
3641//ZZ case Iop_VDup8x8:
3642//ZZ case Iop_VDup16x4:
3643//ZZ case Iop_VDup32x2: {
3644//ZZ HReg res = newVRegD(env);
3645//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3646//ZZ UInt index;
3647//ZZ UInt imm4;
3648//ZZ UInt size = 0;
3649//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
3650//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3651//ZZ vpanic("ARM supports Iop_VDup with constant "
3652//ZZ "second argument less than 16 only\n");
3653//ZZ }
3654//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3655//ZZ switch(e->Iex.Binop.op) {
3656//ZZ case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3657//ZZ case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3658//ZZ case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3659//ZZ default: vassert(0);
3660//ZZ }
3661//ZZ if (imm4 >= 16) {
3662//ZZ vpanic("ARM supports Iop_VDup with constant "
3663//ZZ "second argument less than 16 only\n");
3664//ZZ }
3665//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3666//ZZ res, argL, imm4, False));
3667//ZZ return res;
3668//ZZ }
3669//ZZ */
3670//ZZ default:
3671//ZZ break;
3672//ZZ }
3673//ZZ }
3674//ZZ
3675//ZZ /* --------- UNARY ops --------- */
3676//ZZ if (e->tag == Iex_Unop) {
3677//ZZ switch (e->Iex.Unop.op) {
3678//ZZ
3679//ZZ /* 32Uto64 */
3680//ZZ case Iop_32Uto64: {
3681//ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3682//ZZ HReg rHi = newVRegI(env);
3683//ZZ HReg res = newVRegD(env);
3684//ZZ addInstr(env, ARMInstr_Imm32(rHi, 0));
3685//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3686//ZZ return res;
3687//ZZ }
3688//ZZ
3689//ZZ /* 32Sto64 */
3690//ZZ case Iop_32Sto64: {
3691//ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3692//ZZ HReg rHi = newVRegI(env);
3693//ZZ addInstr(env, mk_iMOVds_RR(rHi, rLo));
3694//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, rHi, rHi, ARMRI5_I5(31)));
3695//ZZ HReg res = newVRegD(env);
3696//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3697//ZZ return res;
3698//ZZ }
3699//ZZ
3700//ZZ /* The next 3 are pass-throughs */
3701//ZZ /* ReinterpF64asI64 */
3702//ZZ case Iop_ReinterpF64asI64:
3703//ZZ /* Left64(e) */
3704//ZZ case Iop_Left64:
3705//ZZ /* CmpwNEZ64(e) */
3706//ZZ case Iop_1Sto64: {
3707//ZZ HReg rLo, rHi;
3708//ZZ HReg res = newVRegD(env);
3709//ZZ iselInt64Expr(&rHi, &rLo, env, e);
3710//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3711//ZZ return res;
3712//ZZ }
3713//ZZ
3714//ZZ case Iop_Not64: {
3715//ZZ DECLARE_PATTERN(p_veqz_8x8);
3716//ZZ DECLARE_PATTERN(p_veqz_16x4);
3717//ZZ DECLARE_PATTERN(p_veqz_32x2);
3718//ZZ DECLARE_PATTERN(p_vcge_8sx8);
3719//ZZ DECLARE_PATTERN(p_vcge_16sx4);
3720//ZZ DECLARE_PATTERN(p_vcge_32sx2);
3721//ZZ DECLARE_PATTERN(p_vcge_8ux8);
3722//ZZ DECLARE_PATTERN(p_vcge_16ux4);
3723//ZZ DECLARE_PATTERN(p_vcge_32ux2);
3724//ZZ DEFINE_PATTERN(p_veqz_8x8,
3725//ZZ unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3726//ZZ DEFINE_PATTERN(p_veqz_16x4,
3727//ZZ unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3728//ZZ DEFINE_PATTERN(p_veqz_32x2,
3729//ZZ unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3730//ZZ DEFINE_PATTERN(p_vcge_8sx8,
3731//ZZ unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3732//ZZ DEFINE_PATTERN(p_vcge_16sx4,
3733//ZZ unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3734//ZZ DEFINE_PATTERN(p_vcge_32sx2,
3735//ZZ unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3736//ZZ DEFINE_PATTERN(p_vcge_8ux8,
3737//ZZ unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3738//ZZ DEFINE_PATTERN(p_vcge_16ux4,
3739//ZZ unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3740//ZZ DEFINE_PATTERN(p_vcge_32ux2,
3741//ZZ unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3742//ZZ if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3743//ZZ HReg res = newVRegD(env);
3744//ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3745//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3746//ZZ return res;
3747//ZZ } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3748//ZZ HReg res = newVRegD(env);
3749//ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3750//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3751//ZZ return res;
3752//ZZ } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3753//ZZ HReg res = newVRegD(env);
3754//ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3755//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3756//ZZ return res;
3757//ZZ } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3758//ZZ HReg res = newVRegD(env);
3759//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3760//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3761//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3762//ZZ res, argL, argR, 0, False));
3763//ZZ return res;
3764//ZZ } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3765//ZZ HReg res = newVRegD(env);
3766//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3767//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3768//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3769//ZZ res, argL, argR, 1, False));
3770//ZZ return res;
3771//ZZ } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3772//ZZ HReg res = newVRegD(env);
3773//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3774//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3775//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3776//ZZ res, argL, argR, 2, False));
3777//ZZ return res;
3778//ZZ } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3779//ZZ HReg res = newVRegD(env);
3780//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3781//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3782//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3783//ZZ res, argL, argR, 0, False));
3784//ZZ return res;
3785//ZZ } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3786//ZZ HReg res = newVRegD(env);
3787//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3788//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3789//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3790//ZZ res, argL, argR, 1, False));
3791//ZZ return res;
3792//ZZ } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3793//ZZ HReg res = newVRegD(env);
3794//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3795//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3796//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3797//ZZ res, argL, argR, 2, False));
3798//ZZ return res;
3799//ZZ } else {
3800//ZZ HReg res = newVRegD(env);
3801//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3802//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3803//ZZ return res;
3804//ZZ }
3805//ZZ }
3806//ZZ case Iop_Dup8x8:
3807//ZZ case Iop_Dup16x4:
3808//ZZ case Iop_Dup32x2: {
3809//ZZ HReg res, arg;
3810//ZZ UInt size;
3811//ZZ DECLARE_PATTERN(p_vdup_8x8);
3812//ZZ DECLARE_PATTERN(p_vdup_16x4);
3813//ZZ DECLARE_PATTERN(p_vdup_32x2);
3814//ZZ DEFINE_PATTERN(p_vdup_8x8,
3815//ZZ unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3816//ZZ DEFINE_PATTERN(p_vdup_16x4,
3817//ZZ unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3818//ZZ DEFINE_PATTERN(p_vdup_32x2,
3819//ZZ unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3820//ZZ if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3821//ZZ UInt index;
3822//ZZ UInt imm4;
3823//ZZ if (mi.bindee[1]->tag == Iex_Const &&
3824//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3825//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3826//ZZ imm4 = (index << 1) + 1;
3827//ZZ if (index < 8) {
3828//ZZ res = newVRegD(env);
3829//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
3830//ZZ addInstr(env, ARMInstr_NUnaryS(
3831//ZZ ARMneon_VDUP,
3832//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
3833//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
3834//ZZ imm4, False
3835//ZZ ));
3836//ZZ return res;
3837//ZZ }
3838//ZZ }
3839//ZZ } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3840//ZZ UInt index;
3841//ZZ UInt imm4;
3842//ZZ if (mi.bindee[1]->tag == Iex_Const &&
3843//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3844//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3845//ZZ imm4 = (index << 2) + 2;
3846//ZZ if (index < 4) {
3847//ZZ res = newVRegD(env);
3848//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
3849//ZZ addInstr(env, ARMInstr_NUnaryS(
3850//ZZ ARMneon_VDUP,
3851//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
3852//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
3853//ZZ imm4, False
3854//ZZ ));
3855//ZZ return res;
3856//ZZ }
3857//ZZ }
3858//ZZ } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3859//ZZ UInt index;
3860//ZZ UInt imm4;
3861//ZZ if (mi.bindee[1]->tag == Iex_Const &&
3862//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3863//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3864//ZZ imm4 = (index << 3) + 4;
3865//ZZ if (index < 2) {
3866//ZZ res = newVRegD(env);
3867//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
3868//ZZ addInstr(env, ARMInstr_NUnaryS(
3869//ZZ ARMneon_VDUP,
3870//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
3871//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
3872//ZZ imm4, False
3873//ZZ ));
3874//ZZ return res;
3875//ZZ }
3876//ZZ }
3877//ZZ }
3878//ZZ arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3879//ZZ res = newVRegD(env);
3880//ZZ switch (e->Iex.Unop.op) {
3881//ZZ case Iop_Dup8x8: size = 0; break;
3882//ZZ case Iop_Dup16x4: size = 1; break;
3883//ZZ case Iop_Dup32x2: size = 2; break;
3884//ZZ default: vassert(0);
3885//ZZ }
3886//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3887//ZZ return res;
3888//ZZ }
3889//ZZ case Iop_Abs8x8:
3890//ZZ case Iop_Abs16x4:
3891//ZZ case Iop_Abs32x2: {
3892//ZZ HReg res = newVRegD(env);
3893//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3894//ZZ UInt size = 0;
3895//ZZ switch(e->Iex.Binop.op) {
3896//ZZ case Iop_Abs8x8: size = 0; break;
3897//ZZ case Iop_Abs16x4: size = 1; break;
3898//ZZ case Iop_Abs32x2: size = 2; break;
3899//ZZ default: vassert(0);
3900//ZZ }
3901//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3902//ZZ return res;
3903//ZZ }
3904//ZZ case Iop_Reverse64_8x8:
3905//ZZ case Iop_Reverse64_16x4:
3906//ZZ case Iop_Reverse64_32x2: {
3907//ZZ HReg res = newVRegD(env);
3908//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3909//ZZ UInt size = 0;
3910//ZZ switch(e->Iex.Binop.op) {
3911//ZZ case Iop_Reverse64_8x8: size = 0; break;
3912//ZZ case Iop_Reverse64_16x4: size = 1; break;
3913//ZZ case Iop_Reverse64_32x2: size = 2; break;
3914//ZZ default: vassert(0);
3915//ZZ }
3916//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3917//ZZ res, arg, size, False));
3918//ZZ return res;
3919//ZZ }
3920//ZZ case Iop_Reverse32_8x8:
3921//ZZ case Iop_Reverse32_16x4: {
3922//ZZ HReg res = newVRegD(env);
3923//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3924//ZZ UInt size = 0;
3925//ZZ switch(e->Iex.Binop.op) {
3926//ZZ case Iop_Reverse32_8x8: size = 0; break;
3927//ZZ case Iop_Reverse32_16x4: size = 1; break;
3928//ZZ default: vassert(0);
3929//ZZ }
3930//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3931//ZZ res, arg, size, False));
3932//ZZ return res;
3933//ZZ }
3934//ZZ case Iop_Reverse16_8x8: {
3935//ZZ HReg res = newVRegD(env);
3936//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3937//ZZ UInt size = 0;
3938//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3939//ZZ res, arg, size, False));
3940//ZZ return res;
3941//ZZ }
3942//ZZ case Iop_CmpwNEZ64: {
3943//ZZ HReg x_lsh = newVRegD(env);
3944//ZZ HReg x_rsh = newVRegD(env);
3945//ZZ HReg lsh_amt = newVRegD(env);
3946//ZZ HReg rsh_amt = newVRegD(env);
3947//ZZ HReg zero = newVRegD(env);
3948//ZZ HReg tmp = newVRegD(env);
3949//ZZ HReg tmp2 = newVRegD(env);
3950//ZZ HReg res = newVRegD(env);
3951//ZZ HReg x = newVRegD(env);
3952//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3953//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3954//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3955//ZZ addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3956//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3957//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3958//ZZ rsh_amt, zero, lsh_amt, 2, False));
3959//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3960//ZZ x_lsh, x, lsh_amt, 3, False));
3961//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3962//ZZ x_rsh, x, rsh_amt, 3, False));
3963//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3964//ZZ tmp, x_lsh, x_rsh, 0, False));
3965//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3966//ZZ res, tmp, x, 0, False));
3967//ZZ return res;
3968//ZZ }
3969//ZZ case Iop_CmpNEZ8x8:
3970//ZZ case Iop_CmpNEZ16x4:
3971//ZZ case Iop_CmpNEZ32x2: {
3972//ZZ HReg res = newVRegD(env);
3973//ZZ HReg tmp = newVRegD(env);
3974//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3975//ZZ UInt size;
3976//ZZ switch (e->Iex.Unop.op) {
3977//ZZ case Iop_CmpNEZ8x8: size = 0; break;
3978//ZZ case Iop_CmpNEZ16x4: size = 1; break;
3979//ZZ case Iop_CmpNEZ32x2: size = 2; break;
3980//ZZ default: vassert(0);
3981//ZZ }
3982//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
3983//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
3984//ZZ return res;
3985//ZZ }
3986//ZZ case Iop_NarrowUn16to8x8:
3987//ZZ case Iop_NarrowUn32to16x4:
3988//ZZ case Iop_NarrowUn64to32x2: {
3989//ZZ HReg res = newVRegD(env);
3990//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3991//ZZ UInt size = 0;
3992//ZZ switch(e->Iex.Binop.op) {
3993//ZZ case Iop_NarrowUn16to8x8: size = 0; break;
3994//ZZ case Iop_NarrowUn32to16x4: size = 1; break;
3995//ZZ case Iop_NarrowUn64to32x2: size = 2; break;
3996//ZZ default: vassert(0);
3997//ZZ }
3998//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
3999//ZZ res, arg, size, False));
4000//ZZ return res;
4001//ZZ }
4002//ZZ case Iop_QNarrowUn16Sto8Sx8:
4003//ZZ case Iop_QNarrowUn32Sto16Sx4:
4004//ZZ case Iop_QNarrowUn64Sto32Sx2: {
4005//ZZ HReg res = newVRegD(env);
4006//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4007//ZZ UInt size = 0;
4008//ZZ switch(e->Iex.Binop.op) {
4009//ZZ case Iop_QNarrowUn16Sto8Sx8: size = 0; break;
4010//ZZ case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
4011//ZZ case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
4012//ZZ default: vassert(0);
4013//ZZ }
4014//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
4015//ZZ res, arg, size, False));
4016//ZZ return res;
4017//ZZ }
4018//ZZ case Iop_QNarrowUn16Sto8Ux8:
4019//ZZ case Iop_QNarrowUn32Sto16Ux4:
4020//ZZ case Iop_QNarrowUn64Sto32Ux2: {
4021//ZZ HReg res = newVRegD(env);
4022//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4023//ZZ UInt size = 0;
4024//ZZ switch(e->Iex.Binop.op) {
4025//ZZ case Iop_QNarrowUn16Sto8Ux8: size = 0; break;
4026//ZZ case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
4027//ZZ case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
4028//ZZ default: vassert(0);
4029//ZZ }
4030//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
4031//ZZ res, arg, size, False));
4032//ZZ return res;
4033//ZZ }
4034//ZZ case Iop_QNarrowUn16Uto8Ux8:
4035//ZZ case Iop_QNarrowUn32Uto16Ux4:
4036//ZZ case Iop_QNarrowUn64Uto32Ux2: {
4037//ZZ HReg res = newVRegD(env);
4038//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4039//ZZ UInt size = 0;
4040//ZZ switch(e->Iex.Binop.op) {
4041//ZZ case Iop_QNarrowUn16Uto8Ux8: size = 0; break;
4042//ZZ case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
4043//ZZ case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
4044//ZZ default: vassert(0);
4045//ZZ }
4046//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
4047//ZZ res, arg, size, False));
4048//ZZ return res;
4049//ZZ }
4050//ZZ case Iop_PwAddL8Sx8:
4051//ZZ case Iop_PwAddL16Sx4:
4052//ZZ case Iop_PwAddL32Sx2: {
4053//ZZ HReg res = newVRegD(env);
4054//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4055//ZZ UInt size = 0;
4056//ZZ switch(e->Iex.Binop.op) {
4057//ZZ case Iop_PwAddL8Sx8: size = 0; break;
4058//ZZ case Iop_PwAddL16Sx4: size = 1; break;
4059//ZZ case Iop_PwAddL32Sx2: size = 2; break;
4060//ZZ default: vassert(0);
4061//ZZ }
4062//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4063//ZZ res, arg, size, False));
4064//ZZ return res;
4065//ZZ }
4066//ZZ case Iop_PwAddL8Ux8:
4067//ZZ case Iop_PwAddL16Ux4:
4068//ZZ case Iop_PwAddL32Ux2: {
4069//ZZ HReg res = newVRegD(env);
4070//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4071//ZZ UInt size = 0;
4072//ZZ switch(e->Iex.Binop.op) {
4073//ZZ case Iop_PwAddL8Ux8: size = 0; break;
4074//ZZ case Iop_PwAddL16Ux4: size = 1; break;
4075//ZZ case Iop_PwAddL32Ux2: size = 2; break;
4076//ZZ default: vassert(0);
4077//ZZ }
4078//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4079//ZZ res, arg, size, False));
4080//ZZ return res;
4081//ZZ }
4082//ZZ case Iop_Cnt8x8: {
4083//ZZ HReg res = newVRegD(env);
4084//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4085//ZZ UInt size = 0;
4086//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
4087//ZZ res, arg, size, False));
4088//ZZ return res;
4089//ZZ }
4090//ZZ case Iop_Clz8Sx8:
4091//ZZ case Iop_Clz16Sx4:
4092//ZZ case Iop_Clz32Sx2: {
4093//ZZ HReg res = newVRegD(env);
4094//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4095//ZZ UInt size = 0;
4096//ZZ switch(e->Iex.Binop.op) {
4097//ZZ case Iop_Clz8Sx8: size = 0; break;
4098//ZZ case Iop_Clz16Sx4: size = 1; break;
4099//ZZ case Iop_Clz32Sx2: size = 2; break;
4100//ZZ default: vassert(0);
4101//ZZ }
4102//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
4103//ZZ res, arg, size, False));
4104//ZZ return res;
4105//ZZ }
4106//ZZ case Iop_Cls8Sx8:
4107//ZZ case Iop_Cls16Sx4:
4108//ZZ case Iop_Cls32Sx2: {
4109//ZZ HReg res = newVRegD(env);
4110//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4111//ZZ UInt size = 0;
4112//ZZ switch(e->Iex.Binop.op) {
4113//ZZ case Iop_Cls8Sx8: size = 0; break;
4114//ZZ case Iop_Cls16Sx4: size = 1; break;
4115//ZZ case Iop_Cls32Sx2: size = 2; break;
4116//ZZ default: vassert(0);
4117//ZZ }
4118//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
4119//ZZ res, arg, size, False));
4120//ZZ return res;
4121//ZZ }
4122//ZZ case Iop_FtoI32Sx2_RZ: {
4123//ZZ HReg res = newVRegD(env);
4124//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4125//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4126//ZZ res, arg, 2, False));
4127//ZZ return res;
4128//ZZ }
4129//ZZ case Iop_FtoI32Ux2_RZ: {
4130//ZZ HReg res = newVRegD(env);
4131//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4132//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4133//ZZ res, arg, 2, False));
4134//ZZ return res;
4135//ZZ }
4136//ZZ case Iop_I32StoFx2: {
4137//ZZ HReg res = newVRegD(env);
4138//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4139//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4140//ZZ res, arg, 2, False));
4141//ZZ return res;
4142//ZZ }
4143//ZZ case Iop_I32UtoFx2: {
4144//ZZ HReg res = newVRegD(env);
4145//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4146//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4147//ZZ res, arg, 2, False));
4148//ZZ return res;
4149//ZZ }
4150//ZZ case Iop_F32toF16x4: {
4151//ZZ HReg res = newVRegD(env);
4152//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4153//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
4154//ZZ res, arg, 2, False));
4155//ZZ return res;
4156//ZZ }
4157//ZZ case Iop_Recip32Fx2: {
4158//ZZ HReg res = newVRegD(env);
4159//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
4160//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4161//ZZ res, argL, 0, False));
4162//ZZ return res;
4163//ZZ }
4164//ZZ case Iop_Recip32x2: {
4165//ZZ HReg res = newVRegD(env);
4166//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
4167//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4168//ZZ res, argL, 0, False));
4169//ZZ return res;
4170//ZZ }
4171//ZZ case Iop_Abs32Fx2: {
4172//ZZ DECLARE_PATTERN(p_vabd_32fx2);
4173//ZZ DEFINE_PATTERN(p_vabd_32fx2,
4174//ZZ unop(Iop_Abs32Fx2,
4175//ZZ binop(Iop_Sub32Fx2,
4176//ZZ bind(0),
4177//ZZ bind(1))));
4178//ZZ if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
4179//ZZ HReg res = newVRegD(env);
4180//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
4181//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
4182//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
4183//ZZ res, argL, argR, 0, False));
4184//ZZ return res;
4185//ZZ } else {
4186//ZZ HReg res = newVRegD(env);
4187//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4188//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4189//ZZ res, arg, 0, False));
4190//ZZ return res;
4191//ZZ }
4192//ZZ }
4193//ZZ case Iop_Rsqrte32Fx2: {
4194//ZZ HReg res = newVRegD(env);
4195//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4196//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4197//ZZ res, arg, 0, False));
4198//ZZ return res;
4199//ZZ }
4200//ZZ case Iop_Rsqrte32x2: {
4201//ZZ HReg res = newVRegD(env);
4202//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4203//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4204//ZZ res, arg, 0, False));
4205//ZZ return res;
4206//ZZ }
4207//ZZ case Iop_Neg32Fx2: {
4208//ZZ HReg res = newVRegD(env);
4209//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4210//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4211//ZZ res, arg, 0, False));
4212//ZZ return res;
4213//ZZ }
4214//ZZ default:
4215//ZZ break;
4216//ZZ }
4217//ZZ } /* if (e->tag == Iex_Unop) */
4218//ZZ
4219//ZZ if (e->tag == Iex_Triop) {
4220//ZZ IRTriop *triop = e->Iex.Triop.details;
4221//ZZ
4222//ZZ switch (triop->op) {
4223//ZZ case Iop_Extract64: {
4224//ZZ HReg res = newVRegD(env);
4225//ZZ HReg argL = iselNeon64Expr(env, triop->arg1);
4226//ZZ HReg argR = iselNeon64Expr(env, triop->arg2);
4227//ZZ UInt imm4;
4228//ZZ if (triop->arg3->tag != Iex_Const ||
4229//ZZ typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
4230//ZZ vpanic("ARM target supports Iop_Extract64 with constant "
4231//ZZ "third argument less than 16 only\n");
4232//ZZ }
4233//ZZ imm4 = triop->arg3->Iex.Const.con->Ico.U8;
4234//ZZ if (imm4 >= 8) {
4235//ZZ vpanic("ARM target supports Iop_Extract64 with constant "
4236//ZZ "third argument less than 16 only\n");
4237//ZZ }
4238//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
4239//ZZ res, argL, argR, imm4, False));
4240//ZZ return res;
4241//ZZ }
4242//ZZ case Iop_SetElem8x8:
4243//ZZ case Iop_SetElem16x4:
4244//ZZ case Iop_SetElem32x2: {
4245//ZZ HReg res = newVRegD(env);
4246//ZZ HReg dreg = iselNeon64Expr(env, triop->arg1);
4247//ZZ HReg arg = iselIntExpr_R(env, triop->arg3);
4248//ZZ UInt index, size;
4249//ZZ if (triop->arg2->tag != Iex_Const ||
4250//ZZ typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) {
4251//ZZ vpanic("ARM target supports SetElem with constant "
4252//ZZ "second argument only\n");
4253//ZZ }
4254//ZZ index = triop->arg2->Iex.Const.con->Ico.U8;
4255//ZZ switch (triop->op) {
4256//ZZ case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
4257//ZZ case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
4258//ZZ case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
4259//ZZ default: vassert(0);
4260//ZZ }
4261//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
4262//ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
4263//ZZ mkARMNRS(ARMNRS_Scalar, res, index),
4264//ZZ mkARMNRS(ARMNRS_Reg, arg, 0),
4265//ZZ size, False));
4266//ZZ return res;
4267//ZZ }
4268//ZZ default:
4269//ZZ break;
4270//ZZ }
4271//ZZ }
4272//ZZ
4273//ZZ /* --------- MULTIPLEX --------- */
4274//ZZ if (e->tag == Iex_ITE) { // VFD
4275//ZZ HReg rLo, rHi;
4276//ZZ HReg res = newVRegD(env);
4277//ZZ iselInt64Expr(&rHi, &rLo, env, e);
4278//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
4279//ZZ return res;
4280//ZZ }
4281//ZZ
4282//ZZ ppIRExpr(e);
4283//ZZ vpanic("iselNeon64Expr");
4284//ZZ }
4285
4286
4287/*---------------------------------------------------------*/
4288/*--- ISEL: Vector (NEON) expressions (128 bit) ---*/
4289/*---------------------------------------------------------*/
4290
4291static HReg iselV128Expr ( ISelEnv* env, IRExpr* e )
4292{
4293 HReg r = iselV128Expr_wrk( env, e );
4294 vassert(hregClass(r) == HRcVec128);
4295 vassert(hregIsVirtual(r));
4296 return r;
4297}
4298
4299/* DO NOT CALL THIS DIRECTLY */
4300static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
4301{
4302 IRType ty = typeOfIRExpr(env->type_env, e);
4303 vassert(e);
4304 vassert(ty == Ity_V128);
4305
4306 if (e->tag == Iex_RdTmp) {
4307 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
4308 }
4309
4310 if (e->tag == Iex_Const) {
4311 /* Only a very limited range of constants is handled. */
4312 vassert(e->Iex.Const.con->tag == Ico_V128);
4313 UShort con = e->Iex.Const.con->Ico.V128;
4314 if (con == 0x0000) {
4315 HReg res = newVRegV(env);
4316 addInstr(env, ARM64Instr_VImmQ(res, con));
4317 return res;
4318 }
4319 /* Unhandled */
4320 goto v128_expr_bad;
4321 }
4322
4323 if (e->tag == Iex_Load) {
4324 HReg res = newVRegV(env);
4325 HReg rN = iselIntExpr_R(env, e->Iex.Load.addr);
4326 vassert(ty == Ity_V128);
4327 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN));
4328 return res;
4329 }
4330
4331 if (e->tag == Iex_Get) {
4332 UInt offs = (UInt)e->Iex.Get.offset;
4333 if (offs < (1<<12)) {
4334 HReg addr = mk_baseblock_128bit_access_addr(env, offs);
4335 HReg res = newVRegV(env);
4336 vassert(ty == Ity_V128);
4337 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr));
4338 return res;
4339 }
4340 goto v128_expr_bad;
4341 }
4342
sewardjecde6972014-02-05 11:01:19 +00004343 if (e->tag == Iex_Unop) {
4344
4345 /* Iop_ZeroHIXXofV128 cases */
4346 UShort imm16 = 0;
4347 switch (e->Iex.Unop.op) {
4348 case Iop_ZeroHI96ofV128: imm16 = 0x000F; break;
4349 case Iop_ZeroHI112ofV128: imm16 = 0x0003; break;
4350 default: break;
4351 }
4352 if (imm16 != 0) {
4353 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
4354 HReg imm = newVRegV(env);
4355 HReg res = newVRegV(env);
4356 addInstr(env, ARM64Instr_VImmQ(imm, imm16));
4357 addInstr(env, ARM64Instr_VBinV(ARM64vecb_AND, res, src, imm));
4358 return res;
4359 }
4360
4361 /* Other cases */
4362 switch (e->Iex.Unop.op) {
sewardjbbcf1882014-01-12 12:49:10 +00004363//ZZ case Iop_NotV128: {
4364//ZZ DECLARE_PATTERN(p_veqz_8x16);
4365//ZZ DECLARE_PATTERN(p_veqz_16x8);
4366//ZZ DECLARE_PATTERN(p_veqz_32x4);
4367//ZZ DECLARE_PATTERN(p_vcge_8sx16);
4368//ZZ DECLARE_PATTERN(p_vcge_16sx8);
4369//ZZ DECLARE_PATTERN(p_vcge_32sx4);
4370//ZZ DECLARE_PATTERN(p_vcge_8ux16);
4371//ZZ DECLARE_PATTERN(p_vcge_16ux8);
4372//ZZ DECLARE_PATTERN(p_vcge_32ux4);
4373//ZZ DEFINE_PATTERN(p_veqz_8x16,
4374//ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
4375//ZZ DEFINE_PATTERN(p_veqz_16x8,
4376//ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
4377//ZZ DEFINE_PATTERN(p_veqz_32x4,
4378//ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
4379//ZZ DEFINE_PATTERN(p_vcge_8sx16,
4380//ZZ unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
4381//ZZ DEFINE_PATTERN(p_vcge_16sx8,
4382//ZZ unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
4383//ZZ DEFINE_PATTERN(p_vcge_32sx4,
4384//ZZ unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
4385//ZZ DEFINE_PATTERN(p_vcge_8ux16,
4386//ZZ unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
4387//ZZ DEFINE_PATTERN(p_vcge_16ux8,
4388//ZZ unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
4389//ZZ DEFINE_PATTERN(p_vcge_32ux4,
4390//ZZ unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
4391//ZZ if (matchIRExpr(&mi, p_veqz_8x16, e)) {
4392//ZZ HReg res = newVRegV(env);
4393//ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]);
4394//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
4395//ZZ return res;
4396//ZZ } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
4397//ZZ HReg res = newVRegV(env);
4398//ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]);
4399//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
4400//ZZ return res;
4401//ZZ } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
4402//ZZ HReg res = newVRegV(env);
4403//ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]);
4404//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
4405//ZZ return res;
4406//ZZ } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
4407//ZZ HReg res = newVRegV(env);
4408//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4409//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4410//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4411//ZZ res, argL, argR, 0, True));
4412//ZZ return res;
4413//ZZ } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
4414//ZZ HReg res = newVRegV(env);
4415//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4416//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4417//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4418//ZZ res, argL, argR, 1, True));
4419//ZZ return res;
4420//ZZ } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
4421//ZZ HReg res = newVRegV(env);
4422//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4423//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4424//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4425//ZZ res, argL, argR, 2, True));
4426//ZZ return res;
4427//ZZ } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
4428//ZZ HReg res = newVRegV(env);
4429//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4430//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4431//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4432//ZZ res, argL, argR, 0, True));
4433//ZZ return res;
4434//ZZ } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
4435//ZZ HReg res = newVRegV(env);
4436//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4437//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4438//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4439//ZZ res, argL, argR, 1, True));
4440//ZZ return res;
4441//ZZ } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
4442//ZZ HReg res = newVRegV(env);
4443//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4444//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4445//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4446//ZZ res, argL, argR, 2, True));
4447//ZZ return res;
4448//ZZ } else {
4449//ZZ HReg res = newVRegV(env);
4450//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4451//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
4452//ZZ return res;
4453//ZZ }
4454//ZZ }
4455//ZZ case Iop_Dup8x16:
4456//ZZ case Iop_Dup16x8:
4457//ZZ case Iop_Dup32x4: {
4458//ZZ HReg res, arg;
4459//ZZ UInt size;
4460//ZZ DECLARE_PATTERN(p_vdup_8x16);
4461//ZZ DECLARE_PATTERN(p_vdup_16x8);
4462//ZZ DECLARE_PATTERN(p_vdup_32x4);
4463//ZZ DEFINE_PATTERN(p_vdup_8x16,
4464//ZZ unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
4465//ZZ DEFINE_PATTERN(p_vdup_16x8,
4466//ZZ unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
4467//ZZ DEFINE_PATTERN(p_vdup_32x4,
4468//ZZ unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
4469//ZZ if (matchIRExpr(&mi, p_vdup_8x16, e)) {
4470//ZZ UInt index;
4471//ZZ UInt imm4;
4472//ZZ if (mi.bindee[1]->tag == Iex_Const &&
4473//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4474//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4475//ZZ imm4 = (index << 1) + 1;
4476//ZZ if (index < 8) {
4477//ZZ res = newVRegV(env);
4478//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
4479//ZZ addInstr(env, ARMInstr_NUnaryS(
4480//ZZ ARMneon_VDUP,
4481//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
4482//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
4483//ZZ imm4, True
4484//ZZ ));
4485//ZZ return res;
4486//ZZ }
4487//ZZ }
4488//ZZ } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
4489//ZZ UInt index;
4490//ZZ UInt imm4;
4491//ZZ if (mi.bindee[1]->tag == Iex_Const &&
4492//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4493//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4494//ZZ imm4 = (index << 2) + 2;
4495//ZZ if (index < 4) {
4496//ZZ res = newVRegV(env);
4497//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
4498//ZZ addInstr(env, ARMInstr_NUnaryS(
4499//ZZ ARMneon_VDUP,
4500//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
4501//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
4502//ZZ imm4, True
4503//ZZ ));
4504//ZZ return res;
4505//ZZ }
4506//ZZ }
4507//ZZ } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
4508//ZZ UInt index;
4509//ZZ UInt imm4;
4510//ZZ if (mi.bindee[1]->tag == Iex_Const &&
4511//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4512//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4513//ZZ imm4 = (index << 3) + 4;
4514//ZZ if (index < 2) {
4515//ZZ res = newVRegV(env);
4516//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
4517//ZZ addInstr(env, ARMInstr_NUnaryS(
4518//ZZ ARMneon_VDUP,
4519//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
4520//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
4521//ZZ imm4, True
4522//ZZ ));
4523//ZZ return res;
4524//ZZ }
4525//ZZ }
4526//ZZ }
4527//ZZ arg = iselIntExpr_R(env, e->Iex.Unop.arg);
4528//ZZ res = newVRegV(env);
4529//ZZ switch (e->Iex.Unop.op) {
4530//ZZ case Iop_Dup8x16: size = 0; break;
4531//ZZ case Iop_Dup16x8: size = 1; break;
4532//ZZ case Iop_Dup32x4: size = 2; break;
4533//ZZ default: vassert(0);
4534//ZZ }
4535//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
4536//ZZ return res;
4537//ZZ }
4538//ZZ case Iop_Abs8x16:
4539//ZZ case Iop_Abs16x8:
4540//ZZ case Iop_Abs32x4: {
4541//ZZ HReg res = newVRegV(env);
4542//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4543//ZZ UInt size = 0;
4544//ZZ switch(e->Iex.Binop.op) {
4545//ZZ case Iop_Abs8x16: size = 0; break;
4546//ZZ case Iop_Abs16x8: size = 1; break;
4547//ZZ case Iop_Abs32x4: size = 2; break;
4548//ZZ default: vassert(0);
4549//ZZ }
4550//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
4551//ZZ return res;
4552//ZZ }
4553//ZZ case Iop_Reverse64_8x16:
4554//ZZ case Iop_Reverse64_16x8:
4555//ZZ case Iop_Reverse64_32x4: {
4556//ZZ HReg res = newVRegV(env);
4557//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4558//ZZ UInt size = 0;
4559//ZZ switch(e->Iex.Binop.op) {
4560//ZZ case Iop_Reverse64_8x16: size = 0; break;
4561//ZZ case Iop_Reverse64_16x8: size = 1; break;
4562//ZZ case Iop_Reverse64_32x4: size = 2; break;
4563//ZZ default: vassert(0);
4564//ZZ }
4565//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
4566//ZZ res, arg, size, True));
4567//ZZ return res;
4568//ZZ }
4569//ZZ case Iop_Reverse32_8x16:
4570//ZZ case Iop_Reverse32_16x8: {
4571//ZZ HReg res = newVRegV(env);
4572//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4573//ZZ UInt size = 0;
4574//ZZ switch(e->Iex.Binop.op) {
4575//ZZ case Iop_Reverse32_8x16: size = 0; break;
4576//ZZ case Iop_Reverse32_16x8: size = 1; break;
4577//ZZ default: vassert(0);
4578//ZZ }
4579//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
4580//ZZ res, arg, size, True));
4581//ZZ return res;
4582//ZZ }
4583//ZZ case Iop_Reverse16_8x16: {
4584//ZZ HReg res = newVRegV(env);
4585//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4586//ZZ UInt size = 0;
4587//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
4588//ZZ res, arg, size, True));
4589//ZZ return res;
4590//ZZ }
4591//ZZ case Iop_CmpNEZ64x2: {
4592//ZZ HReg x_lsh = newVRegV(env);
4593//ZZ HReg x_rsh = newVRegV(env);
4594//ZZ HReg lsh_amt = newVRegV(env);
4595//ZZ HReg rsh_amt = newVRegV(env);
4596//ZZ HReg zero = newVRegV(env);
4597//ZZ HReg tmp = newVRegV(env);
4598//ZZ HReg tmp2 = newVRegV(env);
4599//ZZ HReg res = newVRegV(env);
4600//ZZ HReg x = newVRegV(env);
4601//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4602//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
4603//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
4604//ZZ addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
4605//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
4606//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4607//ZZ rsh_amt, zero, lsh_amt, 2, True));
4608//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4609//ZZ x_lsh, x, lsh_amt, 3, True));
4610//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4611//ZZ x_rsh, x, rsh_amt, 3, True));
4612//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4613//ZZ tmp, x_lsh, x_rsh, 0, True));
4614//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4615//ZZ res, tmp, x, 0, True));
4616//ZZ return res;
4617//ZZ }
4618//ZZ case Iop_CmpNEZ8x16:
4619//ZZ case Iop_CmpNEZ16x8:
4620//ZZ case Iop_CmpNEZ32x4: {
4621//ZZ HReg res = newVRegV(env);
4622//ZZ HReg tmp = newVRegV(env);
4623//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4624//ZZ UInt size;
4625//ZZ switch (e->Iex.Unop.op) {
4626//ZZ case Iop_CmpNEZ8x16: size = 0; break;
4627//ZZ case Iop_CmpNEZ16x8: size = 1; break;
4628//ZZ case Iop_CmpNEZ32x4: size = 2; break;
4629//ZZ default: vassert(0);
4630//ZZ }
4631//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
4632//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
4633//ZZ return res;
4634//ZZ }
4635//ZZ case Iop_Widen8Uto16x8:
4636//ZZ case Iop_Widen16Uto32x4:
4637//ZZ case Iop_Widen32Uto64x2: {
4638//ZZ HReg res = newVRegV(env);
4639//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4640//ZZ UInt size;
4641//ZZ switch (e->Iex.Unop.op) {
4642//ZZ case Iop_Widen8Uto16x8: size = 0; break;
4643//ZZ case Iop_Widen16Uto32x4: size = 1; break;
4644//ZZ case Iop_Widen32Uto64x2: size = 2; break;
4645//ZZ default: vassert(0);
4646//ZZ }
4647//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
4648//ZZ res, arg, size, True));
4649//ZZ return res;
4650//ZZ }
4651//ZZ case Iop_Widen8Sto16x8:
4652//ZZ case Iop_Widen16Sto32x4:
4653//ZZ case Iop_Widen32Sto64x2: {
4654//ZZ HReg res = newVRegV(env);
4655//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4656//ZZ UInt size;
4657//ZZ switch (e->Iex.Unop.op) {
4658//ZZ case Iop_Widen8Sto16x8: size = 0; break;
4659//ZZ case Iop_Widen16Sto32x4: size = 1; break;
4660//ZZ case Iop_Widen32Sto64x2: size = 2; break;
4661//ZZ default: vassert(0);
4662//ZZ }
4663//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4664//ZZ res, arg, size, True));
4665//ZZ return res;
4666//ZZ }
4667//ZZ case Iop_PwAddL8Sx16:
4668//ZZ case Iop_PwAddL16Sx8:
4669//ZZ case Iop_PwAddL32Sx4: {
4670//ZZ HReg res = newVRegV(env);
4671//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4672//ZZ UInt size = 0;
4673//ZZ switch(e->Iex.Binop.op) {
4674//ZZ case Iop_PwAddL8Sx16: size = 0; break;
4675//ZZ case Iop_PwAddL16Sx8: size = 1; break;
4676//ZZ case Iop_PwAddL32Sx4: size = 2; break;
4677//ZZ default: vassert(0);
4678//ZZ }
4679//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4680//ZZ res, arg, size, True));
4681//ZZ return res;
4682//ZZ }
4683//ZZ case Iop_PwAddL8Ux16:
4684//ZZ case Iop_PwAddL16Ux8:
4685//ZZ case Iop_PwAddL32Ux4: {
4686//ZZ HReg res = newVRegV(env);
4687//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4688//ZZ UInt size = 0;
4689//ZZ switch(e->Iex.Binop.op) {
4690//ZZ case Iop_PwAddL8Ux16: size = 0; break;
4691//ZZ case Iop_PwAddL16Ux8: size = 1; break;
4692//ZZ case Iop_PwAddL32Ux4: size = 2; break;
4693//ZZ default: vassert(0);
4694//ZZ }
4695//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4696//ZZ res, arg, size, True));
4697//ZZ return res;
4698//ZZ }
4699//ZZ case Iop_Cnt8x16: {
4700//ZZ HReg res = newVRegV(env);
4701//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4702//ZZ UInt size = 0;
4703//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4704//ZZ return res;
4705//ZZ }
4706//ZZ case Iop_Clz8Sx16:
4707//ZZ case Iop_Clz16Sx8:
4708//ZZ case Iop_Clz32Sx4: {
4709//ZZ HReg res = newVRegV(env);
4710//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4711//ZZ UInt size = 0;
4712//ZZ switch(e->Iex.Binop.op) {
4713//ZZ case Iop_Clz8Sx16: size = 0; break;
4714//ZZ case Iop_Clz16Sx8: size = 1; break;
4715//ZZ case Iop_Clz32Sx4: size = 2; break;
4716//ZZ default: vassert(0);
4717//ZZ }
4718//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4719//ZZ return res;
4720//ZZ }
4721//ZZ case Iop_Cls8Sx16:
4722//ZZ case Iop_Cls16Sx8:
4723//ZZ case Iop_Cls32Sx4: {
4724//ZZ HReg res = newVRegV(env);
4725//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4726//ZZ UInt size = 0;
4727//ZZ switch(e->Iex.Binop.op) {
4728//ZZ case Iop_Cls8Sx16: size = 0; break;
4729//ZZ case Iop_Cls16Sx8: size = 1; break;
4730//ZZ case Iop_Cls32Sx4: size = 2; break;
4731//ZZ default: vassert(0);
4732//ZZ }
4733//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4734//ZZ return res;
4735//ZZ }
4736//ZZ case Iop_FtoI32Sx4_RZ: {
4737//ZZ HReg res = newVRegV(env);
4738//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4739//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4740//ZZ res, arg, 2, True));
4741//ZZ return res;
4742//ZZ }
4743//ZZ case Iop_FtoI32Ux4_RZ: {
4744//ZZ HReg res = newVRegV(env);
4745//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4746//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4747//ZZ res, arg, 2, True));
4748//ZZ return res;
4749//ZZ }
4750//ZZ case Iop_I32StoFx4: {
4751//ZZ HReg res = newVRegV(env);
4752//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4753//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4754//ZZ res, arg, 2, True));
4755//ZZ return res;
4756//ZZ }
4757//ZZ case Iop_I32UtoFx4: {
4758//ZZ HReg res = newVRegV(env);
4759//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4760//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4761//ZZ res, arg, 2, True));
4762//ZZ return res;
4763//ZZ }
4764//ZZ case Iop_F16toF32x4: {
4765//ZZ HReg res = newVRegV(env);
4766//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4767//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4768//ZZ res, arg, 2, True));
4769//ZZ return res;
4770//ZZ }
4771//ZZ case Iop_Recip32Fx4: {
4772//ZZ HReg res = newVRegV(env);
4773//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4774//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4775//ZZ res, argL, 0, True));
4776//ZZ return res;
4777//ZZ }
4778//ZZ case Iop_Recip32x4: {
4779//ZZ HReg res = newVRegV(env);
4780//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4781//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4782//ZZ res, argL, 0, True));
4783//ZZ return res;
4784//ZZ }
4785//ZZ case Iop_Abs32Fx4: {
4786//ZZ DECLARE_PATTERN(p_vabd_32fx4);
4787//ZZ DEFINE_PATTERN(p_vabd_32fx4,
4788//ZZ unop(Iop_Abs32Fx4,
4789//ZZ binop(Iop_Sub32Fx4,
4790//ZZ bind(0),
4791//ZZ bind(1))));
4792//ZZ if (matchIRExpr(&mi, p_vabd_32fx4, e)) {
4793//ZZ HReg res = newVRegV(env);
4794//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4795//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4796//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
4797//ZZ res, argL, argR, 0, True));
4798//ZZ return res;
4799//ZZ } else {
4800//ZZ HReg res = newVRegV(env);
4801//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4802//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4803//ZZ res, argL, 0, True));
4804//ZZ return res;
4805//ZZ }
4806//ZZ }
4807//ZZ case Iop_Rsqrte32Fx4: {
4808//ZZ HReg res = newVRegV(env);
4809//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4810//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4811//ZZ res, argL, 0, True));
4812//ZZ return res;
4813//ZZ }
4814//ZZ case Iop_Rsqrte32x4: {
4815//ZZ HReg res = newVRegV(env);
4816//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4817//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4818//ZZ res, argL, 0, True));
4819//ZZ return res;
4820//ZZ }
4821//ZZ case Iop_Neg32Fx4: {
4822//ZZ HReg res = newVRegV(env);
4823//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4824//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4825//ZZ res, arg, 0, True));
4826//ZZ return res;
4827//ZZ }
sewardjecde6972014-02-05 11:01:19 +00004828 /* ... */
4829 default:
4830 break;
4831 } /* switch on the unop */
4832 } /* if (e->tag == Iex_Unop) */
sewardjbbcf1882014-01-12 12:49:10 +00004833
4834 if (e->tag == Iex_Binop) {
4835 switch (e->Iex.Binop.op) {
4836 case Iop_64HLtoV128: {
4837 HReg res = newVRegV(env);
4838 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
4839 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4840 addInstr(env, ARM64Instr_VQfromXX(res, argL, argR));
4841 return res;
4842 }
4843//ZZ case Iop_AndV128: {
4844//ZZ HReg res = newVRegV(env);
4845//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4846//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4847//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4848//ZZ res, argL, argR, 4, True));
4849//ZZ return res;
4850//ZZ }
4851//ZZ case Iop_OrV128: {
4852//ZZ HReg res = newVRegV(env);
4853//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4854//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4855//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4856//ZZ res, argL, argR, 4, True));
4857//ZZ return res;
4858//ZZ }
4859//ZZ case Iop_XorV128: {
4860//ZZ HReg res = newVRegV(env);
4861//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4862//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4863//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4864//ZZ res, argL, argR, 4, True));
4865//ZZ return res;
4866//ZZ }
4867//ZZ case Iop_Add8x16:
4868//ZZ case Iop_Add16x8:
4869//ZZ case Iop_Add32x4:
sewardjecde6972014-02-05 11:01:19 +00004870 case Iop_Max32Ux4:
4871 case Iop_Max16Ux8:
4872 case Iop_Min32Ux4:
4873 case Iop_Min16Ux8:
sewardjf5b08912014-02-06 12:57:58 +00004874 case Iop_Max32Sx4:
4875 case Iop_Max16Sx8:
4876 case Iop_Min32Sx4:
4877 case Iop_Min16Sx8:
sewardj606c4ba2014-01-26 19:11:14 +00004878 case Iop_Add64x2:
sewardjf5b08912014-02-06 12:57:58 +00004879 case Iop_Add32x4:
4880 case Iop_Add16x8:
sewardj606c4ba2014-01-26 19:11:14 +00004881 case Iop_Sub64x2:
4882 case Iop_Sub32x4:
sewardjf5b08912014-02-06 12:57:58 +00004883 case Iop_Sub16x8:
4884 case Iop_Mul32x4:
4885 case Iop_Mul16x8: {
sewardj606c4ba2014-01-26 19:11:14 +00004886 HReg res = newVRegV(env);
4887 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
4888 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
4889 ARM64VecBinOp op = ARM64vecb_INVALID;
4890 switch (e->Iex.Binop.op) {
sewardjecde6972014-02-05 11:01:19 +00004891 case Iop_Max32Ux4: op = ARM64vecb_UMAX32x4; break;
4892 case Iop_Max16Ux8: op = ARM64vecb_UMAX16x8; break;
4893 case Iop_Min32Ux4: op = ARM64vecb_UMIN32x4; break;
4894 case Iop_Min16Ux8: op = ARM64vecb_UMIN16x8; break;
sewardjf5b08912014-02-06 12:57:58 +00004895 case Iop_Max32Sx4: op = ARM64vecb_SMAX32x4; break;
4896 case Iop_Max16Sx8: op = ARM64vecb_SMAX16x8; break;
4897 case Iop_Min32Sx4: op = ARM64vecb_SMIN32x4; break;
4898 case Iop_Min16Sx8: op = ARM64vecb_SMIN16x8; break;
sewardjecde6972014-02-05 11:01:19 +00004899 case Iop_Add64x2: op = ARM64vecb_ADD64x2; break;
sewardjf5b08912014-02-06 12:57:58 +00004900 case Iop_Add32x4: op = ARM64vecb_ADD32x4; break;
4901 case Iop_Add16x8: op = ARM64vecb_ADD16x8; break;
sewardjecde6972014-02-05 11:01:19 +00004902 case Iop_Sub64x2: op = ARM64vecb_SUB64x2; break;
4903 case Iop_Sub32x4: op = ARM64vecb_SUB32x4; break;
4904 case Iop_Sub16x8: op = ARM64vecb_SUB16x8; break;
sewardjf5b08912014-02-06 12:57:58 +00004905 case Iop_Mul32x4: op = ARM64vecb_MUL32x4; break;
4906 case Iop_Mul16x8: op = ARM64vecb_MUL16x8; break;
sewardj606c4ba2014-01-26 19:11:14 +00004907 default: vassert(0);
4908 }
4909 addInstr(env, ARM64Instr_VBinV(op, res, argL, argR));
4910 return res;
4911 }
sewardjbbcf1882014-01-12 12:49:10 +00004912//ZZ case Iop_Add32Fx4: {
4913//ZZ HReg res = newVRegV(env);
4914//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4915//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4916//ZZ UInt size = 0;
4917//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
4918//ZZ res, argL, argR, size, True));
4919//ZZ return res;
4920//ZZ }
4921//ZZ case Iop_Recps32Fx4: {
4922//ZZ HReg res = newVRegV(env);
4923//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4924//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4925//ZZ UInt size = 0;
4926//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
4927//ZZ res, argL, argR, size, True));
4928//ZZ return res;
4929//ZZ }
4930//ZZ case Iop_Rsqrts32Fx4: {
4931//ZZ HReg res = newVRegV(env);
4932//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4933//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4934//ZZ UInt size = 0;
4935//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
4936//ZZ res, argL, argR, size, True));
4937//ZZ return res;
4938//ZZ }
4939//ZZ
4940//ZZ // These 6 verified 18 Apr 2013
4941//ZZ case Iop_InterleaveEvenLanes8x16:
4942//ZZ case Iop_InterleaveOddLanes8x16:
4943//ZZ case Iop_InterleaveEvenLanes16x8:
4944//ZZ case Iop_InterleaveOddLanes16x8:
4945//ZZ case Iop_InterleaveEvenLanes32x4:
4946//ZZ case Iop_InterleaveOddLanes32x4: {
4947//ZZ HReg rD = newVRegV(env);
4948//ZZ HReg rM = newVRegV(env);
4949//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4950//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4951//ZZ UInt size;
4952//ZZ Bool resRd; // is the result in rD or rM ?
4953//ZZ switch (e->Iex.Binop.op) {
4954//ZZ case Iop_InterleaveOddLanes8x16: resRd = False; size = 0; break;
4955//ZZ case Iop_InterleaveEvenLanes8x16: resRd = True; size = 0; break;
4956//ZZ case Iop_InterleaveOddLanes16x8: resRd = False; size = 1; break;
4957//ZZ case Iop_InterleaveEvenLanes16x8: resRd = True; size = 1; break;
4958//ZZ case Iop_InterleaveOddLanes32x4: resRd = False; size = 2; break;
4959//ZZ case Iop_InterleaveEvenLanes32x4: resRd = True; size = 2; break;
4960//ZZ default: vassert(0);
4961//ZZ }
4962//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4963//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4964//ZZ addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, True));
4965//ZZ return resRd ? rD : rM;
4966//ZZ }
4967//ZZ
4968//ZZ // These 6 verified 18 Apr 2013
4969//ZZ case Iop_InterleaveHI8x16:
4970//ZZ case Iop_InterleaveLO8x16:
4971//ZZ case Iop_InterleaveHI16x8:
4972//ZZ case Iop_InterleaveLO16x8:
4973//ZZ case Iop_InterleaveHI32x4:
4974//ZZ case Iop_InterleaveLO32x4: {
4975//ZZ HReg rD = newVRegV(env);
4976//ZZ HReg rM = newVRegV(env);
4977//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4978//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4979//ZZ UInt size;
4980//ZZ Bool resRd; // is the result in rD or rM ?
4981//ZZ switch (e->Iex.Binop.op) {
4982//ZZ case Iop_InterleaveHI8x16: resRd = False; size = 0; break;
4983//ZZ case Iop_InterleaveLO8x16: resRd = True; size = 0; break;
4984//ZZ case Iop_InterleaveHI16x8: resRd = False; size = 1; break;
4985//ZZ case Iop_InterleaveLO16x8: resRd = True; size = 1; break;
4986//ZZ case Iop_InterleaveHI32x4: resRd = False; size = 2; break;
4987//ZZ case Iop_InterleaveLO32x4: resRd = True; size = 2; break;
4988//ZZ default: vassert(0);
4989//ZZ }
4990//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4991//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4992//ZZ addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, True));
4993//ZZ return resRd ? rD : rM;
4994//ZZ }
4995//ZZ
4996//ZZ // These 6 verified 18 Apr 2013
4997//ZZ case Iop_CatOddLanes8x16:
4998//ZZ case Iop_CatEvenLanes8x16:
4999//ZZ case Iop_CatOddLanes16x8:
5000//ZZ case Iop_CatEvenLanes16x8:
5001//ZZ case Iop_CatOddLanes32x4:
5002//ZZ case Iop_CatEvenLanes32x4: {
5003//ZZ HReg rD = newVRegV(env);
5004//ZZ HReg rM = newVRegV(env);
5005//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5006//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5007//ZZ UInt size;
5008//ZZ Bool resRd; // is the result in rD or rM ?
5009//ZZ switch (e->Iex.Binop.op) {
5010//ZZ case Iop_CatOddLanes8x16: resRd = False; size = 0; break;
5011//ZZ case Iop_CatEvenLanes8x16: resRd = True; size = 0; break;
5012//ZZ case Iop_CatOddLanes16x8: resRd = False; size = 1; break;
5013//ZZ case Iop_CatEvenLanes16x8: resRd = True; size = 1; break;
5014//ZZ case Iop_CatOddLanes32x4: resRd = False; size = 2; break;
5015//ZZ case Iop_CatEvenLanes32x4: resRd = True; size = 2; break;
5016//ZZ default: vassert(0);
5017//ZZ }
5018//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
5019//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
5020//ZZ addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, True));
5021//ZZ return resRd ? rD : rM;
5022//ZZ }
5023//ZZ
5024//ZZ case Iop_QAdd8Ux16:
5025//ZZ case Iop_QAdd16Ux8:
5026//ZZ case Iop_QAdd32Ux4:
5027//ZZ case Iop_QAdd64Ux2: {
5028//ZZ HReg res = newVRegV(env);
5029//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5030//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5031//ZZ UInt size;
5032//ZZ switch (e->Iex.Binop.op) {
5033//ZZ case Iop_QAdd8Ux16: size = 0; break;
5034//ZZ case Iop_QAdd16Ux8: size = 1; break;
5035//ZZ case Iop_QAdd32Ux4: size = 2; break;
5036//ZZ case Iop_QAdd64Ux2: size = 3; break;
5037//ZZ default:
5038//ZZ ppIROp(e->Iex.Binop.op);
5039//ZZ vpanic("Illegal element size in VQADDU");
5040//ZZ }
5041//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
5042//ZZ res, argL, argR, size, True));
5043//ZZ return res;
5044//ZZ }
5045//ZZ case Iop_QAdd8Sx16:
5046//ZZ case Iop_QAdd16Sx8:
5047//ZZ case Iop_QAdd32Sx4:
5048//ZZ case Iop_QAdd64Sx2: {
5049//ZZ HReg res = newVRegV(env);
5050//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5051//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5052//ZZ UInt size;
5053//ZZ switch (e->Iex.Binop.op) {
5054//ZZ case Iop_QAdd8Sx16: size = 0; break;
5055//ZZ case Iop_QAdd16Sx8: size = 1; break;
5056//ZZ case Iop_QAdd32Sx4: size = 2; break;
5057//ZZ case Iop_QAdd64Sx2: size = 3; break;
5058//ZZ default:
5059//ZZ ppIROp(e->Iex.Binop.op);
5060//ZZ vpanic("Illegal element size in VQADDS");
5061//ZZ }
5062//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
5063//ZZ res, argL, argR, size, True));
5064//ZZ return res;
5065//ZZ }
5066//ZZ case Iop_Sub8x16:
5067//ZZ case Iop_Sub16x8:
5068//ZZ case Iop_Sub32x4:
5069//ZZ case Iop_Sub64x2: {
5070//ZZ HReg res = newVRegV(env);
5071//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5072//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5073//ZZ UInt size;
5074//ZZ switch (e->Iex.Binop.op) {
5075//ZZ case Iop_Sub8x16: size = 0; break;
5076//ZZ case Iop_Sub16x8: size = 1; break;
5077//ZZ case Iop_Sub32x4: size = 2; break;
5078//ZZ case Iop_Sub64x2: size = 3; break;
5079//ZZ default:
5080//ZZ ppIROp(e->Iex.Binop.op);
5081//ZZ vpanic("Illegal element size in VSUB");
5082//ZZ }
5083//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
5084//ZZ res, argL, argR, size, True));
5085//ZZ return res;
5086//ZZ }
5087//ZZ case Iop_Sub32Fx4: {
5088//ZZ HReg res = newVRegV(env);
5089//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5090//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5091//ZZ UInt size = 0;
5092//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
5093//ZZ res, argL, argR, size, True));
5094//ZZ return res;
5095//ZZ }
5096//ZZ case Iop_QSub8Ux16:
5097//ZZ case Iop_QSub16Ux8:
5098//ZZ case Iop_QSub32Ux4:
5099//ZZ case Iop_QSub64Ux2: {
5100//ZZ HReg res = newVRegV(env);
5101//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5102//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5103//ZZ UInt size;
5104//ZZ switch (e->Iex.Binop.op) {
5105//ZZ case Iop_QSub8Ux16: size = 0; break;
5106//ZZ case Iop_QSub16Ux8: size = 1; break;
5107//ZZ case Iop_QSub32Ux4: size = 2; break;
5108//ZZ case Iop_QSub64Ux2: size = 3; break;
5109//ZZ default:
5110//ZZ ppIROp(e->Iex.Binop.op);
5111//ZZ vpanic("Illegal element size in VQSUBU");
5112//ZZ }
5113//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
5114//ZZ res, argL, argR, size, True));
5115//ZZ return res;
5116//ZZ }
5117//ZZ case Iop_QSub8Sx16:
5118//ZZ case Iop_QSub16Sx8:
5119//ZZ case Iop_QSub32Sx4:
5120//ZZ case Iop_QSub64Sx2: {
5121//ZZ HReg res = newVRegV(env);
5122//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5123//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5124//ZZ UInt size;
5125//ZZ switch (e->Iex.Binop.op) {
5126//ZZ case Iop_QSub8Sx16: size = 0; break;
5127//ZZ case Iop_QSub16Sx8: size = 1; break;
5128//ZZ case Iop_QSub32Sx4: size = 2; break;
5129//ZZ case Iop_QSub64Sx2: size = 3; break;
5130//ZZ default:
5131//ZZ ppIROp(e->Iex.Binop.op);
5132//ZZ vpanic("Illegal element size in VQSUBS");
5133//ZZ }
5134//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
5135//ZZ res, argL, argR, size, True));
5136//ZZ return res;
5137//ZZ }
5138//ZZ case Iop_Max8Ux16:
5139//ZZ case Iop_Max16Ux8:
5140//ZZ case Iop_Max32Ux4: {
5141//ZZ HReg res = newVRegV(env);
5142//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5143//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5144//ZZ UInt size;
5145//ZZ switch (e->Iex.Binop.op) {
5146//ZZ case Iop_Max8Ux16: size = 0; break;
5147//ZZ case Iop_Max16Ux8: size = 1; break;
5148//ZZ case Iop_Max32Ux4: size = 2; break;
5149//ZZ default: vpanic("Illegal element size in VMAXU");
5150//ZZ }
5151//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
5152//ZZ res, argL, argR, size, True));
5153//ZZ return res;
5154//ZZ }
5155//ZZ case Iop_Max8Sx16:
5156//ZZ case Iop_Max16Sx8:
5157//ZZ case Iop_Max32Sx4: {
5158//ZZ HReg res = newVRegV(env);
5159//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5160//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5161//ZZ UInt size;
5162//ZZ switch (e->Iex.Binop.op) {
5163//ZZ case Iop_Max8Sx16: size = 0; break;
5164//ZZ case Iop_Max16Sx8: size = 1; break;
5165//ZZ case Iop_Max32Sx4: size = 2; break;
5166//ZZ default: vpanic("Illegal element size in VMAXU");
5167//ZZ }
5168//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
5169//ZZ res, argL, argR, size, True));
5170//ZZ return res;
5171//ZZ }
5172//ZZ case Iop_Min8Ux16:
5173//ZZ case Iop_Min16Ux8:
5174//ZZ case Iop_Min32Ux4: {
5175//ZZ HReg res = newVRegV(env);
5176//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5177//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5178//ZZ UInt size;
5179//ZZ switch (e->Iex.Binop.op) {
5180//ZZ case Iop_Min8Ux16: size = 0; break;
5181//ZZ case Iop_Min16Ux8: size = 1; break;
5182//ZZ case Iop_Min32Ux4: size = 2; break;
5183//ZZ default: vpanic("Illegal element size in VMAXU");
5184//ZZ }
5185//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
5186//ZZ res, argL, argR, size, True));
5187//ZZ return res;
5188//ZZ }
5189//ZZ case Iop_Min8Sx16:
5190//ZZ case Iop_Min16Sx8:
5191//ZZ case Iop_Min32Sx4: {
5192//ZZ HReg res = newVRegV(env);
5193//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5194//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5195//ZZ UInt size;
5196//ZZ switch (e->Iex.Binop.op) {
5197//ZZ case Iop_Min8Sx16: size = 0; break;
5198//ZZ case Iop_Min16Sx8: size = 1; break;
5199//ZZ case Iop_Min32Sx4: size = 2; break;
5200//ZZ default: vpanic("Illegal element size in VMAXU");
5201//ZZ }
5202//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
5203//ZZ res, argL, argR, size, True));
5204//ZZ return res;
5205//ZZ }
5206//ZZ case Iop_Sar8x16:
5207//ZZ case Iop_Sar16x8:
5208//ZZ case Iop_Sar32x4:
5209//ZZ case Iop_Sar64x2: {
5210//ZZ HReg res = newVRegV(env);
5211//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5212//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5213//ZZ HReg argR2 = newVRegV(env);
5214//ZZ HReg zero = newVRegV(env);
5215//ZZ UInt size;
5216//ZZ switch (e->Iex.Binop.op) {
5217//ZZ case Iop_Sar8x16: size = 0; break;
5218//ZZ case Iop_Sar16x8: size = 1; break;
5219//ZZ case Iop_Sar32x4: size = 2; break;
5220//ZZ case Iop_Sar64x2: size = 3; break;
5221//ZZ default: vassert(0);
5222//ZZ }
5223//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
5224//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
5225//ZZ argR2, zero, argR, size, True));
5226//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
5227//ZZ res, argL, argR2, size, True));
5228//ZZ return res;
5229//ZZ }
5230//ZZ case Iop_Sal8x16:
5231//ZZ case Iop_Sal16x8:
5232//ZZ case Iop_Sal32x4:
5233//ZZ case Iop_Sal64x2: {
5234//ZZ HReg res = newVRegV(env);
5235//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5236//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5237//ZZ UInt size;
5238//ZZ switch (e->Iex.Binop.op) {
5239//ZZ case Iop_Sal8x16: size = 0; break;
5240//ZZ case Iop_Sal16x8: size = 1; break;
5241//ZZ case Iop_Sal32x4: size = 2; break;
5242//ZZ case Iop_Sal64x2: size = 3; break;
5243//ZZ default: vassert(0);
5244//ZZ }
5245//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
5246//ZZ res, argL, argR, size, True));
5247//ZZ return res;
5248//ZZ }
5249//ZZ case Iop_Shr8x16:
5250//ZZ case Iop_Shr16x8:
5251//ZZ case Iop_Shr32x4:
5252//ZZ case Iop_Shr64x2: {
5253//ZZ HReg res = newVRegV(env);
5254//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5255//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5256//ZZ HReg argR2 = newVRegV(env);
5257//ZZ HReg zero = newVRegV(env);
5258//ZZ UInt size;
5259//ZZ switch (e->Iex.Binop.op) {
5260//ZZ case Iop_Shr8x16: size = 0; break;
5261//ZZ case Iop_Shr16x8: size = 1; break;
5262//ZZ case Iop_Shr32x4: size = 2; break;
5263//ZZ case Iop_Shr64x2: size = 3; break;
5264//ZZ default: vassert(0);
5265//ZZ }
5266//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
5267//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
5268//ZZ argR2, zero, argR, size, True));
5269//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5270//ZZ res, argL, argR2, size, True));
5271//ZZ return res;
5272//ZZ }
5273//ZZ case Iop_Shl8x16:
5274//ZZ case Iop_Shl16x8:
5275//ZZ case Iop_Shl32x4:
5276//ZZ case Iop_Shl64x2: {
5277//ZZ HReg res = newVRegV(env);
5278//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5279//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5280//ZZ UInt size;
5281//ZZ switch (e->Iex.Binop.op) {
5282//ZZ case Iop_Shl8x16: size = 0; break;
5283//ZZ case Iop_Shl16x8: size = 1; break;
5284//ZZ case Iop_Shl32x4: size = 2; break;
5285//ZZ case Iop_Shl64x2: size = 3; break;
5286//ZZ default: vassert(0);
5287//ZZ }
5288//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5289//ZZ res, argL, argR, size, True));
5290//ZZ return res;
5291//ZZ }
5292//ZZ case Iop_QShl8x16:
5293//ZZ case Iop_QShl16x8:
5294//ZZ case Iop_QShl32x4:
5295//ZZ case Iop_QShl64x2: {
5296//ZZ HReg res = newVRegV(env);
5297//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5298//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5299//ZZ UInt size;
5300//ZZ switch (e->Iex.Binop.op) {
5301//ZZ case Iop_QShl8x16: size = 0; break;
5302//ZZ case Iop_QShl16x8: size = 1; break;
5303//ZZ case Iop_QShl32x4: size = 2; break;
5304//ZZ case Iop_QShl64x2: size = 3; break;
5305//ZZ default: vassert(0);
5306//ZZ }
5307//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
5308//ZZ res, argL, argR, size, True));
5309//ZZ return res;
5310//ZZ }
5311//ZZ case Iop_QSal8x16:
5312//ZZ case Iop_QSal16x8:
5313//ZZ case Iop_QSal32x4:
5314//ZZ case Iop_QSal64x2: {
5315//ZZ HReg res = newVRegV(env);
5316//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5317//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5318//ZZ UInt size;
5319//ZZ switch (e->Iex.Binop.op) {
5320//ZZ case Iop_QSal8x16: size = 0; break;
5321//ZZ case Iop_QSal16x8: size = 1; break;
5322//ZZ case Iop_QSal32x4: size = 2; break;
5323//ZZ case Iop_QSal64x2: size = 3; break;
5324//ZZ default: vassert(0);
5325//ZZ }
5326//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
5327//ZZ res, argL, argR, size, True));
5328//ZZ return res;
5329//ZZ }
5330//ZZ case Iop_QShlN8x16:
5331//ZZ case Iop_QShlN16x8:
5332//ZZ case Iop_QShlN32x4:
5333//ZZ case Iop_QShlN64x2: {
5334//ZZ HReg res = newVRegV(env);
5335//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5336//ZZ UInt size, imm;
5337//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
5338//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5339//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
5340//ZZ "second argument only\n");
5341//ZZ }
5342//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5343//ZZ switch (e->Iex.Binop.op) {
5344//ZZ case Iop_QShlN8x16: size = 8 | imm; break;
5345//ZZ case Iop_QShlN16x8: size = 16 | imm; break;
5346//ZZ case Iop_QShlN32x4: size = 32 | imm; break;
5347//ZZ case Iop_QShlN64x2: size = 64 | imm; break;
5348//ZZ default: vassert(0);
5349//ZZ }
5350//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
5351//ZZ res, argL, size, True));
5352//ZZ return res;
5353//ZZ }
5354//ZZ case Iop_QShlN8Sx16:
5355//ZZ case Iop_QShlN16Sx8:
5356//ZZ case Iop_QShlN32Sx4:
5357//ZZ case Iop_QShlN64Sx2: {
5358//ZZ HReg res = newVRegV(env);
5359//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5360//ZZ UInt size, imm;
5361//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
5362//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5363//ZZ vpanic("ARM taget supports Iop_QShlNASxB with constant "
5364//ZZ "second argument only\n");
5365//ZZ }
5366//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5367//ZZ switch (e->Iex.Binop.op) {
5368//ZZ case Iop_QShlN8Sx16: size = 8 | imm; break;
5369//ZZ case Iop_QShlN16Sx8: size = 16 | imm; break;
5370//ZZ case Iop_QShlN32Sx4: size = 32 | imm; break;
5371//ZZ case Iop_QShlN64Sx2: size = 64 | imm; break;
5372//ZZ default: vassert(0);
5373//ZZ }
5374//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
5375//ZZ res, argL, size, True));
5376//ZZ return res;
5377//ZZ }
5378//ZZ case Iop_QSalN8x16:
5379//ZZ case Iop_QSalN16x8:
5380//ZZ case Iop_QSalN32x4:
5381//ZZ case Iop_QSalN64x2: {
5382//ZZ HReg res = newVRegV(env);
5383//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5384//ZZ UInt size, imm;
5385//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
5386//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5387//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
5388//ZZ "second argument only\n");
5389//ZZ }
5390//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5391//ZZ switch (e->Iex.Binop.op) {
5392//ZZ case Iop_QSalN8x16: size = 8 | imm; break;
5393//ZZ case Iop_QSalN16x8: size = 16 | imm; break;
5394//ZZ case Iop_QSalN32x4: size = 32 | imm; break;
5395//ZZ case Iop_QSalN64x2: size = 64 | imm; break;
5396//ZZ default: vassert(0);
5397//ZZ }
5398//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
5399//ZZ res, argL, size, True));
5400//ZZ return res;
5401//ZZ }
5402//ZZ case Iop_ShrN8x16:
5403//ZZ case Iop_ShrN16x8:
5404//ZZ case Iop_ShrN32x4:
5405//ZZ case Iop_ShrN64x2: {
5406//ZZ HReg res = newVRegV(env);
5407//ZZ HReg tmp = newVRegV(env);
5408//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5409//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
5410//ZZ HReg argR2 = newVRegI(env);
5411//ZZ UInt size;
5412//ZZ switch (e->Iex.Binop.op) {
5413//ZZ case Iop_ShrN8x16: size = 0; break;
5414//ZZ case Iop_ShrN16x8: size = 1; break;
5415//ZZ case Iop_ShrN32x4: size = 2; break;
5416//ZZ case Iop_ShrN64x2: size = 3; break;
5417//ZZ default: vassert(0);
5418//ZZ }
5419//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
5420//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
5421//ZZ tmp, argR2, 0, True));
5422//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5423//ZZ res, argL, tmp, size, True));
5424//ZZ return res;
5425//ZZ }
5426//ZZ case Iop_ShlN8x16:
5427//ZZ case Iop_ShlN16x8:
5428//ZZ case Iop_ShlN32x4:
5429//ZZ case Iop_ShlN64x2: {
5430//ZZ HReg res = newVRegV(env);
5431//ZZ HReg tmp = newVRegV(env);
5432//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5433//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
5434//ZZ UInt size;
5435//ZZ switch (e->Iex.Binop.op) {
5436//ZZ case Iop_ShlN8x16: size = 0; break;
5437//ZZ case Iop_ShlN16x8: size = 1; break;
5438//ZZ case Iop_ShlN32x4: size = 2; break;
5439//ZZ case Iop_ShlN64x2: size = 3; break;
5440//ZZ default: vassert(0);
5441//ZZ }
5442//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
5443//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5444//ZZ res, argL, tmp, size, True));
5445//ZZ return res;
5446//ZZ }
5447//ZZ case Iop_SarN8x16:
5448//ZZ case Iop_SarN16x8:
5449//ZZ case Iop_SarN32x4:
5450//ZZ case Iop_SarN64x2: {
5451//ZZ HReg res = newVRegV(env);
5452//ZZ HReg tmp = newVRegV(env);
5453//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5454//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
5455//ZZ HReg argR2 = newVRegI(env);
5456//ZZ UInt size;
5457//ZZ switch (e->Iex.Binop.op) {
5458//ZZ case Iop_SarN8x16: size = 0; break;
5459//ZZ case Iop_SarN16x8: size = 1; break;
5460//ZZ case Iop_SarN32x4: size = 2; break;
5461//ZZ case Iop_SarN64x2: size = 3; break;
5462//ZZ default: vassert(0);
5463//ZZ }
5464//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
5465//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
5466//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
5467//ZZ res, argL, tmp, size, True));
5468//ZZ return res;
5469//ZZ }
5470//ZZ case Iop_CmpGT8Ux16:
5471//ZZ case Iop_CmpGT16Ux8:
5472//ZZ case Iop_CmpGT32Ux4: {
5473//ZZ HReg res = newVRegV(env);
5474//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5475//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5476//ZZ UInt size;
5477//ZZ switch (e->Iex.Binop.op) {
5478//ZZ case Iop_CmpGT8Ux16: size = 0; break;
5479//ZZ case Iop_CmpGT16Ux8: size = 1; break;
5480//ZZ case Iop_CmpGT32Ux4: size = 2; break;
5481//ZZ default: vassert(0);
5482//ZZ }
5483//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
5484//ZZ res, argL, argR, size, True));
5485//ZZ return res;
5486//ZZ }
5487//ZZ case Iop_CmpGT8Sx16:
5488//ZZ case Iop_CmpGT16Sx8:
5489//ZZ case Iop_CmpGT32Sx4: {
5490//ZZ HReg res = newVRegV(env);
5491//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5492//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5493//ZZ UInt size;
5494//ZZ switch (e->Iex.Binop.op) {
5495//ZZ case Iop_CmpGT8Sx16: size = 0; break;
5496//ZZ case Iop_CmpGT16Sx8: size = 1; break;
5497//ZZ case Iop_CmpGT32Sx4: size = 2; break;
5498//ZZ default: vassert(0);
5499//ZZ }
5500//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
5501//ZZ res, argL, argR, size, True));
5502//ZZ return res;
5503//ZZ }
5504//ZZ case Iop_CmpEQ8x16:
5505//ZZ case Iop_CmpEQ16x8:
5506//ZZ case Iop_CmpEQ32x4: {
5507//ZZ HReg res = newVRegV(env);
5508//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5509//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5510//ZZ UInt size;
5511//ZZ switch (e->Iex.Binop.op) {
5512//ZZ case Iop_CmpEQ8x16: size = 0; break;
5513//ZZ case Iop_CmpEQ16x8: size = 1; break;
5514//ZZ case Iop_CmpEQ32x4: size = 2; break;
5515//ZZ default: vassert(0);
5516//ZZ }
5517//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
5518//ZZ res, argL, argR, size, True));
5519//ZZ return res;
5520//ZZ }
5521//ZZ case Iop_Mul8x16:
5522//ZZ case Iop_Mul16x8:
5523//ZZ case Iop_Mul32x4: {
5524//ZZ HReg res = newVRegV(env);
5525//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5526//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5527//ZZ UInt size = 0;
5528//ZZ switch(e->Iex.Binop.op) {
5529//ZZ case Iop_Mul8x16: size = 0; break;
5530//ZZ case Iop_Mul16x8: size = 1; break;
5531//ZZ case Iop_Mul32x4: size = 2; break;
5532//ZZ default: vassert(0);
5533//ZZ }
5534//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
5535//ZZ res, argL, argR, size, True));
5536//ZZ return res;
5537//ZZ }
5538//ZZ case Iop_Mul32Fx4: {
5539//ZZ HReg res = newVRegV(env);
5540//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5541//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5542//ZZ UInt size = 0;
5543//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
5544//ZZ res, argL, argR, size, True));
5545//ZZ return res;
5546//ZZ }
5547//ZZ case Iop_Mull8Ux8:
5548//ZZ case Iop_Mull16Ux4:
5549//ZZ case Iop_Mull32Ux2: {
5550//ZZ HReg res = newVRegV(env);
5551//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5552//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5553//ZZ UInt size = 0;
5554//ZZ switch(e->Iex.Binop.op) {
5555//ZZ case Iop_Mull8Ux8: size = 0; break;
5556//ZZ case Iop_Mull16Ux4: size = 1; break;
5557//ZZ case Iop_Mull32Ux2: size = 2; break;
5558//ZZ default: vassert(0);
5559//ZZ }
5560//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
5561//ZZ res, argL, argR, size, True));
5562//ZZ return res;
5563//ZZ }
5564//ZZ
5565//ZZ case Iop_Mull8Sx8:
5566//ZZ case Iop_Mull16Sx4:
5567//ZZ case Iop_Mull32Sx2: {
5568//ZZ HReg res = newVRegV(env);
5569//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5570//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5571//ZZ UInt size = 0;
5572//ZZ switch(e->Iex.Binop.op) {
5573//ZZ case Iop_Mull8Sx8: size = 0; break;
5574//ZZ case Iop_Mull16Sx4: size = 1; break;
5575//ZZ case Iop_Mull32Sx2: size = 2; break;
5576//ZZ default: vassert(0);
5577//ZZ }
5578//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
5579//ZZ res, argL, argR, size, True));
5580//ZZ return res;
5581//ZZ }
5582//ZZ
5583//ZZ case Iop_QDMulHi16Sx8:
5584//ZZ case Iop_QDMulHi32Sx4: {
5585//ZZ HReg res = newVRegV(env);
5586//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5587//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5588//ZZ UInt size = 0;
5589//ZZ switch(e->Iex.Binop.op) {
5590//ZZ case Iop_QDMulHi16Sx8: size = 1; break;
5591//ZZ case Iop_QDMulHi32Sx4: size = 2; break;
5592//ZZ default: vassert(0);
5593//ZZ }
5594//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5595//ZZ res, argL, argR, size, True));
5596//ZZ return res;
5597//ZZ }
5598//ZZ
5599//ZZ case Iop_QRDMulHi16Sx8:
5600//ZZ case Iop_QRDMulHi32Sx4: {
5601//ZZ HReg res = newVRegV(env);
5602//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5603//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5604//ZZ UInt size = 0;
5605//ZZ switch(e->Iex.Binop.op) {
5606//ZZ case Iop_QRDMulHi16Sx8: size = 1; break;
5607//ZZ case Iop_QRDMulHi32Sx4: size = 2; break;
5608//ZZ default: vassert(0);
5609//ZZ }
5610//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5611//ZZ res, argL, argR, size, True));
5612//ZZ return res;
5613//ZZ }
5614//ZZ
5615//ZZ case Iop_QDMulLong16Sx4:
5616//ZZ case Iop_QDMulLong32Sx2: {
5617//ZZ HReg res = newVRegV(env);
5618//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5619//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5620//ZZ UInt size = 0;
5621//ZZ switch(e->Iex.Binop.op) {
5622//ZZ case Iop_QDMulLong16Sx4: size = 1; break;
5623//ZZ case Iop_QDMulLong32Sx2: size = 2; break;
5624//ZZ default: vassert(0);
5625//ZZ }
5626//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5627//ZZ res, argL, argR, size, True));
5628//ZZ return res;
5629//ZZ }
5630//ZZ case Iop_PolynomialMul8x16: {
5631//ZZ HReg res = newVRegV(env);
5632//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5633//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5634//ZZ UInt size = 0;
5635//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5636//ZZ res, argL, argR, size, True));
5637//ZZ return res;
5638//ZZ }
5639//ZZ case Iop_Max32Fx4: {
5640//ZZ HReg res = newVRegV(env);
5641//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5642//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5643//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5644//ZZ res, argL, argR, 2, True));
5645//ZZ return res;
5646//ZZ }
5647//ZZ case Iop_Min32Fx4: {
5648//ZZ HReg res = newVRegV(env);
5649//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5650//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5651//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5652//ZZ res, argL, argR, 2, True));
5653//ZZ return res;
5654//ZZ }
5655//ZZ case Iop_PwMax32Fx4: {
5656//ZZ HReg res = newVRegV(env);
5657//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5658//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5659//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5660//ZZ res, argL, argR, 2, True));
5661//ZZ return res;
5662//ZZ }
5663//ZZ case Iop_PwMin32Fx4: {
5664//ZZ HReg res = newVRegV(env);
5665//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5666//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5667//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5668//ZZ res, argL, argR, 2, True));
5669//ZZ return res;
5670//ZZ }
5671//ZZ case Iop_CmpGT32Fx4: {
5672//ZZ HReg res = newVRegV(env);
5673//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5674//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5675//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5676//ZZ res, argL, argR, 2, True));
5677//ZZ return res;
5678//ZZ }
5679//ZZ case Iop_CmpGE32Fx4: {
5680//ZZ HReg res = newVRegV(env);
5681//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5682//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5683//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5684//ZZ res, argL, argR, 2, True));
5685//ZZ return res;
5686//ZZ }
5687//ZZ case Iop_CmpEQ32Fx4: {
5688//ZZ HReg res = newVRegV(env);
5689//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5690//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5691//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5692//ZZ res, argL, argR, 2, True));
5693//ZZ return res;
5694//ZZ }
5695//ZZ
5696//ZZ case Iop_PolynomialMull8x8: {
5697//ZZ HReg res = newVRegV(env);
5698//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5699//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5700//ZZ UInt size = 0;
5701//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5702//ZZ res, argL, argR, size, True));
5703//ZZ return res;
5704//ZZ }
5705//ZZ case Iop_F32ToFixed32Ux4_RZ:
5706//ZZ case Iop_F32ToFixed32Sx4_RZ:
5707//ZZ case Iop_Fixed32UToF32x4_RN:
5708//ZZ case Iop_Fixed32SToF32x4_RN: {
5709//ZZ HReg res = newVRegV(env);
5710//ZZ HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5711//ZZ ARMNeonUnOp op;
5712//ZZ UInt imm6;
5713//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
5714//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5715//ZZ vpanic("ARM supports FP <-> Fixed conversion with constant "
5716//ZZ "second argument less than 33 only\n");
5717//ZZ }
5718//ZZ imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5719//ZZ vassert(imm6 <= 32 && imm6 > 0);
5720//ZZ imm6 = 64 - imm6;
5721//ZZ switch(e->Iex.Binop.op) {
5722//ZZ case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5723//ZZ case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5724//ZZ case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5725//ZZ case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5726//ZZ default: vassert(0);
5727//ZZ }
5728//ZZ addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5729//ZZ return res;
5730//ZZ }
5731//ZZ /*
5732//ZZ FIXME remove if not used
5733//ZZ case Iop_VDup8x16:
5734//ZZ case Iop_VDup16x8:
5735//ZZ case Iop_VDup32x4: {
5736//ZZ HReg res = newVRegV(env);
5737//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5738//ZZ UInt imm4;
5739//ZZ UInt index;
5740//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
5741//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5742//ZZ vpanic("ARM supports Iop_VDup with constant "
5743//ZZ "second argument less than 16 only\n");
5744//ZZ }
5745//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5746//ZZ switch(e->Iex.Binop.op) {
5747//ZZ case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5748//ZZ case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5749//ZZ case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5750//ZZ default: vassert(0);
5751//ZZ }
5752//ZZ if (imm4 >= 16) {
5753//ZZ vpanic("ARM supports Iop_VDup with constant "
5754//ZZ "second argument less than 16 only\n");
5755//ZZ }
5756//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5757//ZZ res, argL, imm4, True));
5758//ZZ return res;
5759//ZZ }
5760//ZZ */
5761//ZZ case Iop_PwAdd8x16:
5762//ZZ case Iop_PwAdd16x8:
5763//ZZ case Iop_PwAdd32x4: {
5764//ZZ HReg res = newVRegV(env);
5765//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5766//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5767//ZZ UInt size = 0;
5768//ZZ switch(e->Iex.Binop.op) {
5769//ZZ case Iop_PwAdd8x16: size = 0; break;
5770//ZZ case Iop_PwAdd16x8: size = 1; break;
5771//ZZ case Iop_PwAdd32x4: size = 2; break;
5772//ZZ default: vassert(0);
5773//ZZ }
5774//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5775//ZZ res, argL, argR, size, True));
5776//ZZ return res;
5777//ZZ }
5778 /* ... */
5779 default:
5780 break;
5781 } /* switch on the binop */
5782 } /* if (e->tag == Iex_Binop) */
5783
sewardj606c4ba2014-01-26 19:11:14 +00005784 if (e->tag == Iex_Triop) {
5785 IRTriop* triop = e->Iex.Triop.details;
5786 ARM64VecBinOp vecbop = ARM64vecb_INVALID;
5787 switch (triop->op) {
5788 case Iop_Add64Fx2: vecbop = ARM64vecb_FADD64x2; break;
5789 case Iop_Sub64Fx2: vecbop = ARM64vecb_FSUB64x2; break;
5790 case Iop_Mul64Fx2: vecbop = ARM64vecb_FMUL64x2; break;
5791 case Iop_Div64Fx2: vecbop = ARM64vecb_FDIV64x2; break;
sewardjecde6972014-02-05 11:01:19 +00005792 case Iop_Add32Fx4: vecbop = ARM64vecb_FADD32x4; break;
5793 case Iop_Sub32Fx4: vecbop = ARM64vecb_FSUB32x4; break;
5794 case Iop_Mul32Fx4: vecbop = ARM64vecb_FMUL32x4; break;
5795 case Iop_Div32Fx4: vecbop = ARM64vecb_FDIV32x4; break;
sewardj606c4ba2014-01-26 19:11:14 +00005796 default: break;
5797 }
5798 if (vecbop != ARM64vecb_INVALID) {
5799 HReg argL = iselV128Expr(env, triop->arg2);
5800 HReg argR = iselV128Expr(env, triop->arg3);
5801 HReg dst = newVRegV(env);
5802 set_FPCR_rounding_mode(env, triop->arg1);
5803 addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR));
5804 return dst;
5805 }
5806
sewardjbbcf1882014-01-12 12:49:10 +00005807//ZZ switch (triop->op) {
5808//ZZ case Iop_ExtractV128: {
5809//ZZ HReg res = newVRegV(env);
5810//ZZ HReg argL = iselNeonExpr(env, triop->arg1);
5811//ZZ HReg argR = iselNeonExpr(env, triop->arg2);
5812//ZZ UInt imm4;
5813//ZZ if (triop->arg3->tag != Iex_Const ||
5814//ZZ typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
5815//ZZ vpanic("ARM target supports Iop_ExtractV128 with constant "
5816//ZZ "third argument less than 16 only\n");
5817//ZZ }
5818//ZZ imm4 = triop->arg3->Iex.Const.con->Ico.U8;
5819//ZZ if (imm4 >= 16) {
5820//ZZ vpanic("ARM target supports Iop_ExtractV128 with constant "
5821//ZZ "third argument less than 16 only\n");
5822//ZZ }
5823//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5824//ZZ res, argL, argR, imm4, True));
5825//ZZ return res;
5826//ZZ }
5827//ZZ default:
5828//ZZ break;
5829//ZZ }
sewardj606c4ba2014-01-26 19:11:14 +00005830 }
5831
sewardjbbcf1882014-01-12 12:49:10 +00005832//ZZ if (e->tag == Iex_ITE) { // VFD
5833//ZZ ARMCondCode cc;
5834//ZZ HReg r1 = iselNeonExpr(env, e->Iex.ITE.iftrue);
5835//ZZ HReg r0 = iselNeonExpr(env, e->Iex.ITE.iffalse);
5836//ZZ HReg dst = newVRegV(env);
5837//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, r1, 4, True));
5838//ZZ cc = iselCondCode(env, e->Iex.ITE.cond);
5839//ZZ addInstr(env, ARMInstr_NCMovQ(cc ^ 1, dst, r0));
5840//ZZ return dst;
5841//ZZ }
5842
5843 v128_expr_bad:
5844 ppIRExpr(e);
5845 vpanic("iselV128Expr_wrk");
5846}
5847
5848
5849/*---------------------------------------------------------*/
5850/*--- ISEL: Floating point expressions (64 bit) ---*/
5851/*---------------------------------------------------------*/
5852
5853/* Compute a 64-bit floating point value into a register, the identity
5854 of which is returned. As with iselIntExpr_R, the reg may be either
5855 real or virtual; in any case it must not be changed by subsequent
5856 code emitted by the caller. */
5857
5858static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5859{
5860 HReg r = iselDblExpr_wrk( env, e );
5861# if 0
5862 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5863# endif
5864 vassert(hregClass(r) == HRcFlt64);
5865 vassert(hregIsVirtual(r));
5866 return r;
5867}
5868
5869/* DO NOT CALL THIS DIRECTLY */
5870static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5871{
5872 IRType ty = typeOfIRExpr(env->type_env,e);
5873 vassert(e);
5874 vassert(ty == Ity_F64);
5875
5876 if (e->tag == Iex_RdTmp) {
5877 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5878 }
5879
5880 if (e->tag == Iex_Const) {
5881 IRConst* con = e->Iex.Const.con;
5882 if (con->tag == Ico_F64i) {
5883 HReg src = newVRegI(env);
5884 HReg dst = newVRegD(env);
5885 addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i));
5886 addInstr(env, ARM64Instr_VDfromX(dst, src));
5887 return dst;
5888 }
5889 }
5890
5891 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5892 vassert(e->Iex.Load.ty == Ity_F64);
5893 HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
5894 HReg res = newVRegD(env);
5895 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0));
5896 return res;
5897 }
5898
5899 if (e->tag == Iex_Get) {
5900 Int offs = e->Iex.Get.offset;
5901 if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) {
5902 HReg rD = newVRegD(env);
5903 HReg rN = get_baseblock_register();
5904 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs));
5905 return rD;
5906 }
5907 }
5908
5909 if (e->tag == Iex_Unop) {
5910 switch (e->Iex.Unop.op) {
5911//ZZ case Iop_ReinterpI64asF64: {
5912//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5913//ZZ return iselNeon64Expr(env, e->Iex.Unop.arg);
5914//ZZ } else {
5915//ZZ HReg srcHi, srcLo;
5916//ZZ HReg dst = newVRegD(env);
5917//ZZ iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
5918//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
5919//ZZ return dst;
5920//ZZ }
5921//ZZ }
5922 case Iop_NegF64: {
5923 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5924 HReg dst = newVRegD(env);
5925 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src));
5926 return dst;
5927 }
5928 case Iop_AbsF64: {
5929 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5930 HReg dst = newVRegD(env);
5931 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src));
5932 return dst;
5933 }
5934 case Iop_F32toF64: {
5935 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5936 HReg dst = newVRegD(env);
5937 addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src));
5938 return dst;
5939 }
5940 case Iop_I32UtoF64:
5941 case Iop_I32StoF64: {
5942 /* Rounding mode is not involved here, since the
5943 conversion can always be done without loss of
5944 precision. */
5945 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5946 HReg dst = newVRegD(env);
5947 Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
5948 ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U;
5949 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src));
5950 return dst;
5951 }
5952 default:
5953 break;
5954 }
5955 }
5956
5957 if (e->tag == Iex_Binop) {
5958 switch (e->Iex.Binop.op) {
5959 case Iop_RoundF64toInt: {
5960 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5961 HReg dst = newVRegD(env);
5962 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
5963 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_RINT, dst, src));
5964 return dst;
5965 }
5966 case Iop_SqrtF64: {
5967 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5968 HReg dst = newVRegD(env);
5969 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
5970 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_SQRT, dst, src));
5971 return dst;
5972 }
5973 case Iop_I64StoF64:
5974 case Iop_I64UtoF64: {
5975 ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64
5976 ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U;
5977 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
5978 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
5979 HReg dstS = newVRegD(env);
5980 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
5981 return dstS;
5982 }
5983 default:
5984 break;
5985 }
5986 }
5987
5988 if (e->tag == Iex_Triop) {
5989 IRTriop* triop = e->Iex.Triop.details;
5990 ARM64FpBinOp dblop = ARM64fpb_INVALID;
5991 switch (triop->op) {
5992 case Iop_DivF64: dblop = ARM64fpb_DIV; break;
5993 case Iop_MulF64: dblop = ARM64fpb_MUL; break;
5994 case Iop_SubF64: dblop = ARM64fpb_SUB; break;
5995 case Iop_AddF64: dblop = ARM64fpb_ADD; break;
5996 default: break;
5997 }
5998 if (dblop != ARM64fpb_INVALID) {
5999 HReg argL = iselDblExpr(env, triop->arg2);
6000 HReg argR = iselDblExpr(env, triop->arg3);
6001 HReg dst = newVRegD(env);
6002 set_FPCR_rounding_mode(env, triop->arg1);
6003 addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR));
6004 return dst;
6005 }
6006 }
6007
6008//ZZ if (e->tag == Iex_ITE) { // VFD
6009//ZZ if (ty == Ity_F64
6010//ZZ && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
6011//ZZ HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
6012//ZZ HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
6013//ZZ HReg dst = newVRegD(env);
6014//ZZ addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, r1));
6015//ZZ ARMCondCode cc = iselCondCode(env, e->Iex.ITE.cond);
6016//ZZ addInstr(env, ARMInstr_VCMovD(cc ^ 1, dst, r0));
6017//ZZ return dst;
6018//ZZ }
6019//ZZ }
6020
6021 ppIRExpr(e);
6022 vpanic("iselDblExpr_wrk");
6023}
6024
6025
6026/*---------------------------------------------------------*/
6027/*--- ISEL: Floating point expressions (32 bit) ---*/
6028/*---------------------------------------------------------*/
6029
6030/* Compute a 32-bit floating point value into a register, the identity
6031 of which is returned. As with iselIntExpr_R, the reg may be either
6032 real or virtual; in any case it must not be changed by subsequent
6033 code emitted by the caller. Values are generated into HRcFlt64
6034 registers despite the values themselves being Ity_F32s. */
6035
6036static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
6037{
6038 HReg r = iselFltExpr_wrk( env, e );
6039# if 0
6040 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
6041# endif
6042 vassert(hregClass(r) == HRcFlt64);
6043 vassert(hregIsVirtual(r));
6044 return r;
6045}
6046
6047/* DO NOT CALL THIS DIRECTLY */
6048static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
6049{
6050 IRType ty = typeOfIRExpr(env->type_env,e);
6051 vassert(e);
6052 vassert(ty == Ity_F32);
6053
6054 if (e->tag == Iex_RdTmp) {
6055 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
6056 }
6057
6058 if (e->tag == Iex_Const) {
6059 /* This is something of a kludge. Since a 32 bit floating point
6060 zero is just .. all zeroes, just create a 64 bit zero word
6061 and transfer it. This avoids having to create a SfromW
6062 instruction for this specific case. */
6063 IRConst* con = e->Iex.Const.con;
6064 if (con->tag == Ico_F32i && con->Ico.F32i == 0) {
6065 HReg src = newVRegI(env);
6066 HReg dst = newVRegD(env);
6067 addInstr(env, ARM64Instr_Imm64(src, 0));
6068 addInstr(env, ARM64Instr_VDfromX(dst, src));
6069 return dst;
6070 }
6071 }
6072
6073//ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
6074//ZZ ARMAModeV* am;
6075//ZZ HReg res = newVRegF(env);
6076//ZZ vassert(e->Iex.Load.ty == Ity_F32);
6077//ZZ am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
6078//ZZ addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
6079//ZZ return res;
6080//ZZ }
6081
6082 if (e->tag == Iex_Get) {
6083 Int offs = e->Iex.Get.offset;
6084 if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) {
6085 HReg rD = newVRegD(env);
6086 HReg rN = get_baseblock_register();
6087 addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs));
6088 return rD;
6089 }
6090 }
6091
6092 if (e->tag == Iex_Unop) {
6093 switch (e->Iex.Unop.op) {
6094//ZZ case Iop_ReinterpI32asF32: {
6095//ZZ HReg dst = newVRegF(env);
6096//ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
6097//ZZ addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
6098//ZZ return dst;
6099//ZZ }
6100 case Iop_NegF32: {
6101 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
6102 HReg dst = newVRegD(env);
6103 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src));
6104 return dst;
6105 }
6106 case Iop_AbsF32: {
6107 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
6108 HReg dst = newVRegD(env);
6109 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src));
6110 return dst;
6111 }
6112 default:
6113 break;
6114 }
6115 }
6116
6117 if (e->tag == Iex_Binop) {
6118 switch (e->Iex.Binop.op) {
6119 case Iop_RoundF32toInt: {
6120 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
6121 HReg dst = newVRegD(env);
6122 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6123 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_RINT, dst, src));
6124 return dst;
6125 }
6126 case Iop_SqrtF32: {
6127 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
6128 HReg dst = newVRegD(env);
6129 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6130 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_SQRT, dst, src));
6131 return dst;
6132 }
6133 case Iop_F64toF32: {
6134 HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
6135 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6136 HReg dstS = newVRegD(env);
6137 addInstr(env, ARM64Instr_VCvtSD(False/*dToS*/, dstS, srcD));
6138 return dstS;
6139 }
6140 case Iop_I32StoF32:
6141 case Iop_I64UtoF32:
6142 case Iop_I64StoF32: {
6143 ARM64CvtOp cvt_op = ARM64cvt_INVALID;
6144 switch (e->Iex.Binop.op) {
6145 case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break;
6146 case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break;
6147 case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break;
6148 default: vassert(0);
6149 }
6150 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
6151 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6152 HReg dstS = newVRegD(env);
6153 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
6154 return dstS;
6155 }
6156 default:
6157 break;
6158 }
6159 }
6160
6161 if (e->tag == Iex_Triop) {
6162 IRTriop* triop = e->Iex.Triop.details;
6163 ARM64FpBinOp sglop = ARM64fpb_INVALID;
6164 switch (triop->op) {
6165 case Iop_DivF32: sglop = ARM64fpb_DIV; break;
6166 case Iop_MulF32: sglop = ARM64fpb_MUL; break;
6167 case Iop_SubF32: sglop = ARM64fpb_SUB; break;
6168 case Iop_AddF32: sglop = ARM64fpb_ADD; break;
6169 default: break;
6170 }
6171 if (sglop != ARM64fpb_INVALID) {
6172 HReg argL = iselFltExpr(env, triop->arg2);
6173 HReg argR = iselFltExpr(env, triop->arg3);
6174 HReg dst = newVRegD(env);
6175 set_FPCR_rounding_mode(env, triop->arg1);
6176 addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR));
6177 return dst;
6178 }
6179 }
6180
6181//ZZ
6182//ZZ if (e->tag == Iex_ITE) { // VFD
6183//ZZ if (ty == Ity_F32
6184//ZZ && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
6185//ZZ ARMCondCode cc;
6186//ZZ HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue);
6187//ZZ HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse);
6188//ZZ HReg dst = newVRegF(env);
6189//ZZ addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, r1));
6190//ZZ cc = iselCondCode(env, e->Iex.ITE.cond);
6191//ZZ addInstr(env, ARMInstr_VCMovS(cc ^ 1, dst, r0));
6192//ZZ return dst;
6193//ZZ }
6194//ZZ }
6195
6196 ppIRExpr(e);
6197 vpanic("iselFltExpr_wrk");
6198}
6199
6200
6201/*---------------------------------------------------------*/
6202/*--- ISEL: Statements ---*/
6203/*---------------------------------------------------------*/
6204
6205static void iselStmt ( ISelEnv* env, IRStmt* stmt )
6206{
6207 if (vex_traceflags & VEX_TRACE_VCODE) {
6208 vex_printf("\n-- ");
6209 ppIRStmt(stmt);
6210 vex_printf("\n");
6211 }
6212 switch (stmt->tag) {
6213
6214 /* --------- STORE --------- */
6215 /* little-endian write to memory */
6216 case Ist_Store: {
6217 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
6218 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
6219 IREndness end = stmt->Ist.Store.end;
6220
6221 if (tya != Ity_I64 || end != Iend_LE)
6222 goto stmt_fail;
6223
6224 if (tyd == Ity_I64) {
6225 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6226 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
6227 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
6228 return;
6229 }
6230 if (tyd == Ity_I32) {
6231 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6232 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
6233 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
6234 return;
6235 }
6236 if (tyd == Ity_I16) {
6237 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6238 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
6239 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
6240 return;
6241 }
6242 if (tyd == Ity_I8) {
6243 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6244 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
6245 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
6246 return;
6247 }
6248 if (tyd == Ity_V128) {
6249 HReg qD = iselV128Expr(env, stmt->Ist.Store.data);
6250 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
6251 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
6252 return;
6253 }
6254 if (tyd == Ity_F64) {
6255 HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
6256 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
6257 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0));
6258 return;
6259 }
6260
6261//ZZ if (tyd == Ity_I16) {
6262//ZZ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6263//ZZ ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
6264//ZZ addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
6265//ZZ False/*!isLoad*/,
6266//ZZ False/*!isSignedLoad*/, rD, am));
6267//ZZ return;
6268//ZZ }
6269//ZZ if (tyd == Ity_I8) {
6270//ZZ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6271//ZZ ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
6272//ZZ addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, False/*!isLoad*/, rD, am));
6273//ZZ return;
6274//ZZ }
6275//ZZ if (tyd == Ity_I64) {
6276//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6277//ZZ HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
6278//ZZ ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
6279//ZZ addInstr(env, ARMInstr_NLdStD(False, dD, am));
6280//ZZ } else {
6281//ZZ HReg rDhi, rDlo, rA;
6282//ZZ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
6283//ZZ rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
6284//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDhi,
6285//ZZ ARMAMode1_RI(rA,4)));
6286//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDlo,
6287//ZZ ARMAMode1_RI(rA,0)));
6288//ZZ }
6289//ZZ return;
6290//ZZ }
6291//ZZ if (tyd == Ity_F64) {
6292//ZZ HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
6293//ZZ ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
6294//ZZ addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
6295//ZZ return;
6296//ZZ }
6297//ZZ if (tyd == Ity_F32) {
6298//ZZ HReg fD = iselFltExpr(env, stmt->Ist.Store.data);
6299//ZZ ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
6300//ZZ addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
6301//ZZ return;
6302//ZZ }
6303//ZZ if (tyd == Ity_V128) {
6304//ZZ HReg qD = iselNeonExpr(env, stmt->Ist.Store.data);
6305//ZZ ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
6306//ZZ addInstr(env, ARMInstr_NLdStQ(False, qD, am));
6307//ZZ return;
6308//ZZ }
6309
6310 break;
6311 }
6312
6313//ZZ /* --------- CONDITIONAL STORE --------- */
6314//ZZ /* conditional little-endian write to memory */
6315//ZZ case Ist_StoreG: {
6316//ZZ IRStoreG* sg = stmt->Ist.StoreG.details;
6317//ZZ IRType tya = typeOfIRExpr(env->type_env, sg->addr);
6318//ZZ IRType tyd = typeOfIRExpr(env->type_env, sg->data);
6319//ZZ IREndness end = sg->end;
6320//ZZ
6321//ZZ if (tya != Ity_I32 || end != Iend_LE)
6322//ZZ goto stmt_fail;
6323//ZZ
6324//ZZ switch (tyd) {
6325//ZZ case Ity_I8:
6326//ZZ case Ity_I32: {
6327//ZZ HReg rD = iselIntExpr_R(env, sg->data);
6328//ZZ ARMAMode1* am = iselIntExpr_AMode1(env, sg->addr);
6329//ZZ ARMCondCode cc = iselCondCode(env, sg->guard);
6330//ZZ addInstr(env, (tyd == Ity_I32 ? ARMInstr_LdSt32 : ARMInstr_LdSt8U)
6331//ZZ (cc, False/*!isLoad*/, rD, am));
6332//ZZ return;
6333//ZZ }
6334//ZZ case Ity_I16: {
6335//ZZ HReg rD = iselIntExpr_R(env, sg->data);
6336//ZZ ARMAMode2* am = iselIntExpr_AMode2(env, sg->addr);
6337//ZZ ARMCondCode cc = iselCondCode(env, sg->guard);
6338//ZZ addInstr(env, ARMInstr_LdSt16(cc,
6339//ZZ False/*!isLoad*/,
6340//ZZ False/*!isSignedLoad*/, rD, am));
6341//ZZ return;
6342//ZZ }
6343//ZZ default:
6344//ZZ break;
6345//ZZ }
6346//ZZ break;
6347//ZZ }
6348//ZZ
6349//ZZ /* --------- CONDITIONAL LOAD --------- */
6350//ZZ /* conditional little-endian load from memory */
6351//ZZ case Ist_LoadG: {
6352//ZZ IRLoadG* lg = stmt->Ist.LoadG.details;
6353//ZZ IRType tya = typeOfIRExpr(env->type_env, lg->addr);
6354//ZZ IREndness end = lg->end;
6355//ZZ
6356//ZZ if (tya != Ity_I32 || end != Iend_LE)
6357//ZZ goto stmt_fail;
6358//ZZ
6359//ZZ switch (lg->cvt) {
6360//ZZ case ILGop_8Uto32:
6361//ZZ case ILGop_Ident32: {
6362//ZZ HReg rAlt = iselIntExpr_R(env, lg->alt);
6363//ZZ ARMAMode1* am = iselIntExpr_AMode1(env, lg->addr);
6364//ZZ HReg rD = lookupIRTemp(env, lg->dst);
6365//ZZ addInstr(env, mk_iMOVds_RR(rD, rAlt));
6366//ZZ ARMCondCode cc = iselCondCode(env, lg->guard);
6367//ZZ addInstr(env, (lg->cvt == ILGop_Ident32 ? ARMInstr_LdSt32
6368//ZZ : ARMInstr_LdSt8U)
6369//ZZ (cc, True/*isLoad*/, rD, am));
6370//ZZ return;
6371//ZZ }
6372//ZZ case ILGop_16Sto32:
6373//ZZ case ILGop_16Uto32:
6374//ZZ case ILGop_8Sto32: {
6375//ZZ HReg rAlt = iselIntExpr_R(env, lg->alt);
6376//ZZ ARMAMode2* am = iselIntExpr_AMode2(env, lg->addr);
6377//ZZ HReg rD = lookupIRTemp(env, lg->dst);
6378//ZZ addInstr(env, mk_iMOVds_RR(rD, rAlt));
6379//ZZ ARMCondCode cc = iselCondCode(env, lg->guard);
6380//ZZ if (lg->cvt == ILGop_8Sto32) {
6381//ZZ addInstr(env, ARMInstr_Ld8S(cc, rD, am));
6382//ZZ } else {
6383//ZZ vassert(lg->cvt == ILGop_16Sto32 || lg->cvt == ILGop_16Uto32);
6384//ZZ Bool sx = lg->cvt == ILGop_16Sto32;
6385//ZZ addInstr(env, ARMInstr_LdSt16(cc, True/*isLoad*/, sx, rD, am));
6386//ZZ }
6387//ZZ return;
6388//ZZ }
6389//ZZ default:
6390//ZZ break;
6391//ZZ }
6392//ZZ break;
6393//ZZ }
6394
6395 /* --------- PUT --------- */
6396 /* write guest state, fixed offset */
6397 case Ist_Put: {
6398 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
6399 UInt offs = (UInt)stmt->Ist.Put.offset;
6400 if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) {
6401 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6402 ARM64AMode* am = mk_baseblock_64bit_access_amode(offs);
6403 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
6404 return;
6405 }
6406 if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) {
6407 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6408 ARM64AMode* am = mk_baseblock_32bit_access_amode(offs);
6409 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
6410 return;
6411 }
6412 if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) {
6413 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6414 ARM64AMode* am = mk_baseblock_16bit_access_amode(offs);
6415 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
6416 return;
6417 }
6418 if (tyd == Ity_I8 && offs < (1<<12)) {
6419 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6420 ARM64AMode* am = mk_baseblock_8bit_access_amode(offs);
6421 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
6422 return;
6423 }
6424 if (tyd == Ity_V128 && offs < (1<<12)) {
6425 HReg qD = iselV128Expr(env, stmt->Ist.Put.data);
6426 HReg addr = mk_baseblock_128bit_access_addr(env, offs);
6427 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
6428 return;
6429 }
6430 if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) {
6431 HReg dD = iselDblExpr(env, stmt->Ist.Put.data);
6432 HReg bbp = get_baseblock_register();
6433 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs));
6434 return;
6435 }
6436 if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) {
6437 HReg dD = iselFltExpr(env, stmt->Ist.Put.data);
6438 HReg bbp = get_baseblock_register();
6439 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, dD, bbp, offs));
6440 return;
6441 }
6442
6443//ZZ if (tyd == Ity_I64) {
6444//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6445//ZZ HReg addr = newVRegI(env);
6446//ZZ HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
6447//ZZ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
6448//ZZ stmt->Ist.Put.offset));
6449//ZZ addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
6450//ZZ } else {
6451//ZZ HReg rDhi, rDlo;
6452//ZZ ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
6453//ZZ stmt->Ist.Put.offset + 0);
6454//ZZ ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
6455//ZZ stmt->Ist.Put.offset + 4);
6456//ZZ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
6457//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
6458//ZZ rDhi, am4));
6459//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
6460//ZZ rDlo, am0));
6461//ZZ }
6462//ZZ return;
6463//ZZ }
6464//ZZ if (tyd == Ity_F64) {
6465//ZZ // XXX This won't work if offset > 1020 or is not 0 % 4.
6466//ZZ // In which case we'll have to generate more longwinded code.
6467//ZZ ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
6468//ZZ HReg rD = iselDblExpr(env, stmt->Ist.Put.data);
6469//ZZ addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
6470//ZZ return;
6471//ZZ }
6472//ZZ if (tyd == Ity_F32) {
6473//ZZ // XXX This won't work if offset > 1020 or is not 0 % 4.
6474//ZZ // In which case we'll have to generate more longwinded code.
6475//ZZ ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
6476//ZZ HReg rD = iselFltExpr(env, stmt->Ist.Put.data);
6477//ZZ addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
6478//ZZ return;
6479//ZZ }
6480 break;
6481 }
6482
6483 /* --------- TMP --------- */
6484 /* assign value to temporary */
6485 case Ist_WrTmp: {
6486 IRTemp tmp = stmt->Ist.WrTmp.tmp;
6487 IRType ty = typeOfIRTemp(env->type_env, tmp);
6488
6489 if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
6490 /* We could do a lot better here. But for the time being: */
6491 HReg dst = lookupIRTemp(env, tmp);
6492 HReg rD = iselIntExpr_R(env, stmt->Ist.WrTmp.data);
6493 addInstr(env, ARM64Instr_MovI(dst, rD));
6494 return;
6495 }
6496 if (ty == Ity_I1) {
6497 /* Here, we are generating a I1 value into a 64 bit register.
6498 Make sure the value in the register is only zero or one,
6499 but no other. This allows optimisation of the
6500 1Uto64(tmp:I1) case, by making it simply a copy of the
6501 register holding 'tmp'. The point being that the value in
6502 the register holding 'tmp' can only have been created
6503 here. LATER: that seems dangerous; safer to do 'tmp & 1'
6504 in that case. Also, could do this just with a single CINC
6505 insn. */
6506 HReg zero = newVRegI(env);
6507 HReg one = newVRegI(env);
6508 HReg dst = lookupIRTemp(env, tmp);
6509 addInstr(env, ARM64Instr_Imm64(zero, 0));
6510 addInstr(env, ARM64Instr_Imm64(one, 1));
6511 ARM64CondCode cc = iselCondCode(env, stmt->Ist.WrTmp.data);
6512 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
6513 return;
6514 }
6515 if (ty == Ity_F64) {
6516 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
6517 HReg dst = lookupIRTemp(env, tmp);
6518 addInstr(env, ARM64Instr_VMov(8, dst, src));
6519 return;
6520 }
6521 if (ty == Ity_F32) {
6522 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
6523 HReg dst = lookupIRTemp(env, tmp);
6524 addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
6525 return;
6526 }
6527 if (ty == Ity_V128) {
6528 HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data);
6529 HReg dst = lookupIRTemp(env, tmp);
6530 addInstr(env, ARM64Instr_VMov(16, dst, src));
6531 return;
6532 }
6533 break;
6534 }
6535
6536 /* --------- Call to DIRTY helper --------- */
6537 /* call complex ("dirty") helper function */
6538 case Ist_Dirty: {
6539 IRDirty* d = stmt->Ist.Dirty.details;
6540
6541 /* Figure out the return type, if any. */
6542 IRType retty = Ity_INVALID;
6543 if (d->tmp != IRTemp_INVALID)
6544 retty = typeOfIRTemp(env->type_env, d->tmp);
6545
6546 Bool retty_ok = False;
6547 switch (retty) {
6548 case Ity_INVALID: /* function doesn't return anything */
6549 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
6550 case Ity_V128:
6551 retty_ok = True; break;
6552 default:
6553 break;
6554 }
6555 if (!retty_ok)
6556 break; /* will go to stmt_fail: */
6557
6558 /* Marshal args, do the call, and set the return value to 0x555..555
6559 if this is a conditional call that returns a value and the
6560 call is skipped. */
6561 UInt addToSp = 0;
6562 RetLoc rloc = mk_RetLoc_INVALID();
6563 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
6564 vassert(is_sane_RetLoc(rloc));
6565
6566 /* Now figure out what to do with the returned value, if any. */
6567 switch (retty) {
6568 case Ity_INVALID: {
6569 /* No return value. Nothing to do. */
6570 vassert(d->tmp == IRTemp_INVALID);
6571 vassert(rloc.pri == RLPri_None);
6572 vassert(addToSp == 0);
6573 return;
6574 }
6575 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
6576 vassert(rloc.pri == RLPri_Int);
6577 vassert(addToSp == 0);
6578 /* The returned value is in x0. Park it in the register
6579 associated with tmp. */
6580 HReg dst = lookupIRTemp(env, d->tmp);
6581 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) );
6582 return;
6583 }
6584 case Ity_V128: {
6585 /* The returned value is on the stack, and *retloc tells
6586 us where. Fish it off the stack and then move the
6587 stack pointer upwards to clear it, as directed by
6588 doHelperCall. */
6589 vassert(rloc.pri == RLPri_V128SpRel);
6590 vassert(rloc.spOff < 256); // stay sane
6591 vassert(addToSp >= 16); // ditto
6592 vassert(addToSp < 256); // ditto
6593 HReg dst = lookupIRTemp(env, d->tmp);
6594 HReg tmp = newVRegI(env); // the address of the returned value
6595 addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP
6596 addInstr(env, ARM64Instr_Arith(tmp, tmp,
6597 ARM64RIA_I12((UShort)rloc.spOff, 0),
6598 True/*isAdd*/ ));
6599 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp));
6600 addInstr(env, ARM64Instr_AddToSP(addToSp));
6601 return;
6602 }
6603 default:
6604 /*NOTREACHED*/
6605 vassert(0);
6606 }
6607 break;
6608 }
6609
6610//ZZ /* --------- Load Linked and Store Conditional --------- */
6611//ZZ case Ist_LLSC: {
6612//ZZ if (stmt->Ist.LLSC.storedata == NULL) {
6613//ZZ /* LL */
6614//ZZ IRTemp res = stmt->Ist.LLSC.result;
6615//ZZ IRType ty = typeOfIRTemp(env->type_env, res);
6616//ZZ if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
6617//ZZ Int szB = 0;
6618//ZZ HReg r_dst = lookupIRTemp(env, res);
6619//ZZ HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6620//ZZ switch (ty) {
6621//ZZ case Ity_I8: szB = 1; break;
6622//ZZ case Ity_I16: szB = 2; break;
6623//ZZ case Ity_I32: szB = 4; break;
6624//ZZ default: vassert(0);
6625//ZZ }
6626//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
6627//ZZ addInstr(env, ARMInstr_LdrEX(szB));
6628//ZZ addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
6629//ZZ return;
6630//ZZ }
6631//ZZ if (ty == Ity_I64) {
6632//ZZ HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6633//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
6634//ZZ addInstr(env, ARMInstr_LdrEX(8));
6635//ZZ /* Result is in r3:r2. On a non-NEON capable CPU, we must
6636//ZZ move it into a result register pair. On a NEON capable
6637//ZZ CPU, the result register will be a 64 bit NEON
6638//ZZ register, so we must move it there instead. */
6639//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6640//ZZ HReg dst = lookupIRTemp(env, res);
6641//ZZ addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
6642//ZZ hregARM_R2()));
6643//ZZ } else {
6644//ZZ HReg r_dst_hi, r_dst_lo;
6645//ZZ lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
6646//ZZ addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
6647//ZZ addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
6648//ZZ }
6649//ZZ return;
6650//ZZ }
6651//ZZ /*NOTREACHED*/
6652//ZZ vassert(0);
6653//ZZ } else {
6654//ZZ /* SC */
6655//ZZ IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
6656//ZZ if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
6657//ZZ Int szB = 0;
6658//ZZ HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
6659//ZZ HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6660//ZZ switch (tyd) {
6661//ZZ case Ity_I8: szB = 1; break;
6662//ZZ case Ity_I16: szB = 2; break;
6663//ZZ case Ity_I32: szB = 4; break;
6664//ZZ default: vassert(0);
6665//ZZ }
6666//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
6667//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
6668//ZZ addInstr(env, ARMInstr_StrEX(szB));
6669//ZZ } else {
6670//ZZ vassert(tyd == Ity_I64);
6671//ZZ /* This is really ugly. There is no is/is-not NEON
6672//ZZ decision akin to the case for LL, because iselInt64Expr
6673//ZZ fudges this for us, and always gets the result into two
6674//ZZ GPRs even if this means moving it from a NEON
6675//ZZ register. */
6676//ZZ HReg rDhi, rDlo;
6677//ZZ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
6678//ZZ HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6679//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
6680//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
6681//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
6682//ZZ addInstr(env, ARMInstr_StrEX(8));
6683//ZZ }
6684//ZZ /* now r0 is 1 if failed, 0 if success. Change to IR
6685//ZZ conventions (0 is fail, 1 is success). Also transfer
6686//ZZ result to r_res. */
6687//ZZ IRTemp res = stmt->Ist.LLSC.result;
6688//ZZ IRType ty = typeOfIRTemp(env->type_env, res);
6689//ZZ HReg r_res = lookupIRTemp(env, res);
6690//ZZ ARMRI84* one = ARMRI84_I84(1,0);
6691//ZZ vassert(ty == Ity_I1);
6692//ZZ addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
6693//ZZ /* And be conservative -- mask off all but the lowest bit */
6694//ZZ addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
6695//ZZ return;
6696//ZZ }
6697//ZZ break;
6698//ZZ }
6699//ZZ
6700//ZZ /* --------- MEM FENCE --------- */
6701//ZZ case Ist_MBE:
6702//ZZ switch (stmt->Ist.MBE.event) {
6703//ZZ case Imbe_Fence:
6704//ZZ addInstr(env, ARMInstr_MFence());
6705//ZZ return;
6706//ZZ case Imbe_CancelReservation:
6707//ZZ addInstr(env, ARMInstr_CLREX());
6708//ZZ return;
6709//ZZ default:
6710//ZZ break;
6711//ZZ }
6712//ZZ break;
6713
6714 /* --------- INSTR MARK --------- */
6715 /* Doesn't generate any executable code ... */
6716 case Ist_IMark:
6717 return;
6718
6719 /* --------- NO-OP --------- */
6720 case Ist_NoOp:
6721 return;
6722
6723 /* --------- EXIT --------- */
6724 case Ist_Exit: {
6725 if (stmt->Ist.Exit.dst->tag != Ico_U64)
6726 vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value");
6727
6728 ARM64CondCode cc
6729 = iselCondCode(env, stmt->Ist.Exit.guard);
6730 ARM64AMode* amPC
6731 = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP);
6732
6733
6734 /* Case: boring transfer to known address */
6735 if (stmt->Ist.Exit.jk == Ijk_Boring
6736 /*ATC || stmt->Ist.Exit.jk == Ijk_Call */
6737 /*ATC || stmt->Ist.Exit.jk == Ijk_Ret */ ) {
6738 if (env->chainingAllowed) {
6739 /* .. almost always true .. */
6740 /* Skip the event check at the dst if this is a forwards
6741 edge. */
6742 Bool toFastEP
6743 = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
6744 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
6745 addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
6746 amPC, cc, toFastEP));
6747 } else {
6748 /* .. very occasionally .. */
6749 /* We can't use chaining, so ask for an assisted transfer,
6750 as that's the only alternative that is allowable. */
6751 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6752 addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring));
6753 }
6754 return;
6755 }
6756
6757//ZZ /* Case: assisted transfer to arbitrary address */
6758//ZZ switch (stmt->Ist.Exit.jk) {
6759//ZZ /* Keep this list in sync with that in iselNext below */
6760//ZZ case Ijk_ClientReq:
6761//ZZ case Ijk_NoDecode:
6762//ZZ case Ijk_NoRedir:
6763//ZZ case Ijk_Sys_syscall:
6764//ZZ case Ijk_TInval:
6765//ZZ case Ijk_Yield:
6766//ZZ {
6767//ZZ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6768//ZZ addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
6769//ZZ stmt->Ist.Exit.jk));
6770//ZZ return;
6771//ZZ }
6772//ZZ default:
6773//ZZ break;
6774//ZZ }
6775
6776 /* Do we ever expect to see any other kind? */
6777 goto stmt_fail;
6778 }
6779
6780 default: break;
6781 }
6782 stmt_fail:
6783 ppIRStmt(stmt);
6784 vpanic("iselStmt");
6785}
6786
6787
6788/*---------------------------------------------------------*/
6789/*--- ISEL: Basic block terminators (Nexts) ---*/
6790/*---------------------------------------------------------*/
6791
6792static void iselNext ( ISelEnv* env,
6793 IRExpr* next, IRJumpKind jk, Int offsIP )
6794{
6795 if (vex_traceflags & VEX_TRACE_VCODE) {
6796 vex_printf( "\n-- PUT(%d) = ", offsIP);
6797 ppIRExpr( next );
6798 vex_printf( "; exit-");
6799 ppIRJumpKind(jk);
6800 vex_printf( "\n");
6801 }
6802
6803 /* Case: boring transfer to known address */
6804 if (next->tag == Iex_Const) {
6805 IRConst* cdst = next->Iex.Const.con;
6806 vassert(cdst->tag == Ico_U64);
6807 if (jk == Ijk_Boring || jk == Ijk_Call) {
6808 /* Boring transfer to known address */
6809 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
6810 if (env->chainingAllowed) {
6811 /* .. almost always true .. */
6812 /* Skip the event check at the dst if this is a forwards
6813 edge. */
6814 Bool toFastEP
6815 = ((Addr64)cdst->Ico.U64) > env->max_ga;
6816 if (0) vex_printf("%s", toFastEP ? "X" : ".");
6817 addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64,
6818 amPC, ARM64cc_AL,
6819 toFastEP));
6820 } else {
6821 /* .. very occasionally .. */
6822 /* We can't use chaining, so ask for an assisted transfer,
6823 as that's the only alternative that is allowable. */
6824 HReg r = iselIntExpr_R(env, next);
6825 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
6826 Ijk_Boring));
6827 }
6828 return;
6829 }
6830 }
6831
6832 /* Case: call/return (==boring) transfer to any address */
6833 switch (jk) {
6834 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
6835 HReg r = iselIntExpr_R(env, next);
6836 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
6837 if (env->chainingAllowed) {
6838 addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL));
6839 } else {
6840 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
6841 Ijk_Boring));
6842 }
6843 return;
6844 }
6845 default:
6846 break;
6847 }
6848
6849 /* Case: assisted transfer to arbitrary address */
6850 switch (jk) {
6851 /* Keep this list in sync with that for Ist_Exit above */
6852 case Ijk_ClientReq:
6853 case Ijk_NoDecode:
6854//ZZ case Ijk_NoRedir:
6855 case Ijk_Sys_syscall:
6856//ZZ case Ijk_TInval:
6857//ZZ case Ijk_Yield:
6858 {
6859 HReg r = iselIntExpr_R(env, next);
6860 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
6861 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk));
6862 return;
6863 }
6864 default:
6865 break;
6866 }
6867
6868 vex_printf( "\n-- PUT(%d) = ", offsIP);
6869 ppIRExpr( next );
6870 vex_printf( "; exit-");
6871 ppIRJumpKind(jk);
6872 vex_printf( "\n");
6873 vassert(0); // are we expecting any other kind?
6874}
6875
6876
6877/*---------------------------------------------------------*/
6878/*--- Insn selector top-level ---*/
6879/*---------------------------------------------------------*/
6880
6881/* Translate an entire SB to arm64 code. */
6882
6883HInstrArray* iselSB_ARM64 ( IRSB* bb,
6884 VexArch arch_host,
6885 VexArchInfo* archinfo_host,
6886 VexAbiInfo* vbi/*UNUSED*/,
6887 Int offs_Host_EvC_Counter,
6888 Int offs_Host_EvC_FailAddr,
6889 Bool chainingAllowed,
6890 Bool addProfInc,
6891 Addr64 max_ga )
6892{
6893 Int i, j;
6894 HReg hreg, hregHI;
6895 ISelEnv* env;
6896 UInt hwcaps_host = archinfo_host->hwcaps;
6897 ARM64AMode *amCounter, *amFailAddr;
6898
6899 /* sanity ... */
6900 vassert(arch_host == VexArchARM64);
6901
6902 /* guard against unexpected space regressions */
6903 vassert(sizeof(ARM64Instr) <= 32);
6904
6905 /* Make up an initial environment to use. */
6906 env = LibVEX_Alloc(sizeof(ISelEnv));
6907 env->vreg_ctr = 0;
6908
6909 /* Set up output code array. */
6910 env->code = newHInstrArray();
6911
6912 /* Copy BB's type env. */
6913 env->type_env = bb->tyenv;
6914
6915 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
6916 change as we go along. */
6917 env->n_vregmap = bb->tyenv->types_used;
6918 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
6919 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
6920
6921 /* and finally ... */
6922 env->chainingAllowed = chainingAllowed;
6923 env->hwcaps = hwcaps_host;
6924 env->previous_rm = NULL;
6925 env->max_ga = max_ga;
6926
6927 /* For each IR temporary, allocate a suitably-kinded virtual
6928 register. */
6929 j = 0;
6930 for (i = 0; i < env->n_vregmap; i++) {
6931 hregHI = hreg = INVALID_HREG;
6932 switch (bb->tyenv->types[i]) {
6933 case Ity_I1:
6934 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
6935 hreg = mkHReg(j++, HRcInt64, True);
6936 break;
6937 case Ity_I128:
6938 hreg = mkHReg(j++, HRcInt64, True);
6939 hregHI = mkHReg(j++, HRcInt64, True);
6940 break;
6941 case Ity_F32: // we'll use HRcFlt64 regs for F32 too
6942 case Ity_F64:
6943 hreg = mkHReg(j++, HRcFlt64, True);
6944 break;
6945 case Ity_V128:
6946 hreg = mkHReg(j++, HRcVec128, True);
6947 break;
6948 default:
6949 ppIRType(bb->tyenv->types[i]);
6950 vpanic("iselBB(arm64): IRTemp type");
6951 }
6952 env->vregmap[i] = hreg;
6953 env->vregmapHI[i] = hregHI;
6954 }
6955 env->vreg_ctr = j;
6956
6957 /* The very first instruction must be an event check. */
6958 amCounter = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter);
6959 amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr);
6960 addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr));
6961
6962 /* Possibly a block counter increment (for profiling). At this
6963 point we don't know the address of the counter, so just pretend
6964 it is zero. It will have to be patched later, but before this
6965 translation is used, by a call to LibVEX_patchProfCtr. */
6966 if (addProfInc) {
6967 vassert(0);
6968 //addInstr(env, ARM64Instr_ProfInc());
6969 }
6970
6971 /* Ok, finally we can iterate over the statements. */
6972 for (i = 0; i < bb->stmts_used; i++)
6973 iselStmt(env, bb->stmts[i]);
6974
6975 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
6976
6977 /* record the number of vregs we used. */
6978 env->code->n_vregs = env->vreg_ctr;
6979 return env->code;
6980}
6981
6982
6983/*---------------------------------------------------------------*/
6984/*--- end host_arm64_isel.c ---*/
6985/*---------------------------------------------------------------*/