blob: 805d00cd8897af001fb009de9f4e056a3c4cda15 [file] [log] [blame]
sewardjbbcf1882014-01-12 12:49:10 +00001
2/*---------------------------------------------------------------*/
3/*--- begin host_arm64_isel.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2013-2013 OpenWorks
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29*/
30
31#include "libvex_basictypes.h"
32#include "libvex_ir.h"
33#include "libvex.h"
34#include "ir_match.h"
35
36#include "main_util.h"
37#include "main_globals.h"
38#include "host_generic_regs.h"
39#include "host_generic_simd64.h" // for 32-bit SIMD helpers
40#include "host_arm64_defs.h"
41
42
43//ZZ /*---------------------------------------------------------*/
44//ZZ /*--- ARMvfp control word stuff ---*/
45//ZZ /*---------------------------------------------------------*/
46//ZZ
47//ZZ /* Vex-generated code expects to run with the FPU set as follows: all
48//ZZ exceptions masked, round-to-nearest, non-vector mode, with the NZCV
49//ZZ flags cleared, and FZ (flush to zero) disabled. Curiously enough,
50//ZZ this corresponds to a FPSCR value of zero.
51//ZZ
52//ZZ fpscr should therefore be zero on entry to Vex-generated code, and
53//ZZ should be unchanged at exit. (Or at least the bottom 28 bits
54//ZZ should be zero).
55//ZZ */
56//ZZ
57//ZZ #define DEFAULT_FPSCR 0
58
59
60/*---------------------------------------------------------*/
61/*--- ISelEnv ---*/
62/*---------------------------------------------------------*/
63
64/* This carries around:
65
66 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
67 might encounter. This is computed before insn selection starts,
68 and does not change.
69
70 - A mapping from IRTemp to HReg. This tells the insn selector
71 which virtual register is associated with each IRTemp temporary.
72 This is computed before insn selection starts, and does not
73 change. We expect this mapping to map precisely the same set of
74 IRTemps as the type mapping does.
75
76 |vregmap| holds the primary register for the IRTemp.
77 |vregmapHI| is only used for 128-bit integer-typed
78 IRTemps. It holds the identity of a second
79 64-bit virtual HReg, which holds the high half
80 of the value.
81
82 - The code array, that is, the insns selected so far.
83
84 - A counter, for generating new virtual registers.
85
86 - The host hardware capabilities word. This is set at the start
87 and does not change.
88
89 - A Bool for indicating whether we may generate chain-me
90 instructions for control flow transfers, or whether we must use
91 XAssisted.
92
93 - The maximum guest address of any guest insn in this block.
94 Actually, the address of the highest-addressed byte from any insn
95 in this block. Is set at the start and does not change. This is
96 used for detecting jumps which are definitely forward-edges from
97 this block, and therefore can be made (chained) to the fast entry
98 point of the destination, thereby avoiding the destination's
99 event check.
100
101 - An IRExpr*, which may be NULL, holding the IR expression (an
102 IRRoundingMode-encoded value) to which the FPU's rounding mode
103 was most recently set. Setting to NULL is always safe. Used to
104 avoid redundant settings of the FPU's rounding mode, as
105 described in set_FPCR_rounding_mode below.
106
107 Note, this is all (well, mostly) host-independent.
108*/
109
110typedef
111 struct {
112 /* Constant -- are set at the start and do not change. */
113 IRTypeEnv* type_env;
114
115 HReg* vregmap;
116 HReg* vregmapHI;
117 Int n_vregmap;
118
119 UInt hwcaps;
120
121 Bool chainingAllowed;
122 Addr64 max_ga;
123
124 /* These are modified as we go along. */
125 HInstrArray* code;
126 Int vreg_ctr;
127
128 IRExpr* previous_rm;
129 }
130 ISelEnv;
131
132static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
133{
134 vassert(tmp >= 0);
135 vassert(tmp < env->n_vregmap);
136 return env->vregmap[tmp];
137}
138
139static void addInstr ( ISelEnv* env, ARM64Instr* instr )
140{
141 addHInstr(env->code, instr);
142 if (vex_traceflags & VEX_TRACE_VCODE) {
143 ppARM64Instr(instr);
144 vex_printf("\n");
145 }
146}
147
148static HReg newVRegI ( ISelEnv* env )
149{
150 HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/);
151 env->vreg_ctr++;
152 return reg;
153}
154
155static HReg newVRegD ( ISelEnv* env )
156{
157 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
158 env->vreg_ctr++;
159 return reg;
160}
161
162//ZZ static HReg newVRegF ( ISelEnv* env )
163//ZZ {
164//ZZ HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/);
165//ZZ env->vreg_ctr++;
166//ZZ return reg;
167//ZZ }
168
169static HReg newVRegV ( ISelEnv* env )
170{
171 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
172 env->vreg_ctr++;
173 return reg;
174}
175
176//ZZ /* These are duplicated in guest_arm_toIR.c */
177//ZZ static IRExpr* unop ( IROp op, IRExpr* a )
178//ZZ {
179//ZZ return IRExpr_Unop(op, a);
180//ZZ }
181//ZZ
182//ZZ static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
183//ZZ {
184//ZZ return IRExpr_Binop(op, a1, a2);
185//ZZ }
186//ZZ
187//ZZ static IRExpr* bind ( Int binder )
188//ZZ {
189//ZZ return IRExpr_Binder(binder);
190//ZZ }
191
192
193/*---------------------------------------------------------*/
194/*--- ISEL: Forward declarations ---*/
195/*---------------------------------------------------------*/
196
197/* These are organised as iselXXX and iselXXX_wrk pairs. The
198 iselXXX_wrk do the real work, but are not to be called directly.
199 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
200 checks that all returned registers are virtual. You should not
201 call the _wrk version directly.
202
203 Because some forms of ARM64 memory amodes are implicitly scaled by
204 the access size, iselIntExpr_AMode takes an IRType which tells it
205 the type of the access for which the amode is to be used. This
206 type needs to be correct, else you'll get incorrect code.
207*/
208static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env,
209 IRExpr* e, IRType dty );
210static ARM64AMode* iselIntExpr_AMode ( ISelEnv* env,
211 IRExpr* e, IRType dty );
212
213static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e );
214static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e );
215
216static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e );
217static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e );
218
219static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e );
220static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e );
221
222static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
223static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
224
225static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
226static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
227
228static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
229 ISelEnv* env, IRExpr* e );
230static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo,
231 ISelEnv* env, IRExpr* e );
232
233
234//ZZ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
235//ZZ ISelEnv* env, IRExpr* e );
236//ZZ static void iselInt64Expr ( HReg* rHi, HReg* rLo,
237//ZZ ISelEnv* env, IRExpr* e );
238
239static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
240static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
241
242static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
243static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
244
245//ZZ static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e );
246//ZZ static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e );
247
248static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e );
249static HReg iselV128Expr ( ISelEnv* env, IRExpr* e );
250
251static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 );
252
253
254/*---------------------------------------------------------*/
255/*--- ISEL: Misc helpers ---*/
256/*---------------------------------------------------------*/
257
258/* Generate an amode suitable for a 64-bit sized access relative to
259 the baseblock register (X21). This generates an RI12 amode, which
260 means its scaled by the access size, which is why the access size
261 -- 64 bit -- is stated explicitly here. Consequently |off| needs
262 to be divisible by 8. */
263static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off )
264{
265 vassert(off < (8 << 12)); /* otherwise it's unrepresentable */
266 vassert((off & 7) == 0); /* ditto */
267 return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/);
268}
269
270/* Ditto, for 32 bit accesses. */
271static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off )
272{
273 vassert(off < (4 << 12)); /* otherwise it's unrepresentable */
274 vassert((off & 3) == 0); /* ditto */
275 return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/);
276}
277
278/* Ditto, for 16 bit accesses. */
279static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off )
280{
281 vassert(off < (2 << 12)); /* otherwise it's unrepresentable */
282 vassert((off & 1) == 0); /* ditto */
283 return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/);
284}
285
286/* Ditto, for 8 bit accesses. */
287static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off )
288{
289 vassert(off < (1 << 12)); /* otherwise it's unrepresentable */
290 return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/);
291}
292
293static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off )
294{
295 vassert(off < (1<<12));
296 HReg r = newVRegI(env);
297 addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(),
298 ARM64RIA_I12(off,0), True/*isAdd*/));
299 return r;
300}
301
302static HReg get_baseblock_register ( void )
303{
304 return hregARM64_X21();
305}
306
307/* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in
308 a new register, and return the new register. */
309static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src )
310{
311 HReg dst = newVRegI(env);
312 ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */
313 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
314 return dst;
315}
316
317/* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in
318 a new register, and return the new register. */
319static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src )
320{
321 HReg dst = newVRegI(env);
322 ARM64RI6* n48 = ARM64RI6_I6(48);
323 addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
324 addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR));
325 return dst;
326}
327
328/* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in
329 a new register, and return the new register. */
330static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src )
331{
332 HReg dst = newVRegI(env);
333 ARM64RI6* n48 = ARM64RI6_I6(48);
334 addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
335 addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SHR));
336 return dst;
337}
338
339/* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in
340 a new register, and return the new register. */
341static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src )
342{
343 HReg dst = newVRegI(env);
344 ARM64RI6* n32 = ARM64RI6_I6(32);
345 addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL));
346 addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR));
347 return dst;
348}
349
350/* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in
351 a new register, and return the new register. */
352static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src )
353{
354 HReg dst = newVRegI(env);
355 ARM64RI6* n56 = ARM64RI6_I6(56);
356 addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
357 addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR));
358 return dst;
359}
360
361static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src )
362{
363 HReg dst = newVRegI(env);
364 ARM64RI6* n56 = ARM64RI6_I6(56);
365 addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
366 addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SHR));
367 return dst;
368}
369
370/* Is this IRExpr_Const(IRConst_U64(0)) ? */
371static Bool isZeroU64 ( IRExpr* e ) {
372 if (e->tag != Iex_Const) return False;
373 IRConst* con = e->Iex.Const.con;
374 vassert(con->tag == Ico_U64);
375 return con->Ico.U64 == 0;
376}
377
378
379/*---------------------------------------------------------*/
380/*--- ISEL: FP rounding mode helpers ---*/
381/*---------------------------------------------------------*/
382
383/* Set the FP rounding mode: 'mode' is an I32-typed expression
384 denoting a value in the range 0 .. 3, indicating a round mode
385 encoded as per type IRRoundingMode -- the first four values only
386 (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO). Set the PPC
387 FSCR to have the same rounding.
388
389 For speed & simplicity, we're setting the *entire* FPCR here.
390
391 Setting the rounding mode is expensive. So this function tries to
392 avoid repeatedly setting the rounding mode to the same thing by
393 first comparing 'mode' to the 'mode' tree supplied in the previous
394 call to this function, if any. (The previous value is stored in
395 env->previous_rm.) If 'mode' is a single IR temporary 't' and
396 env->previous_rm is also just 't', then the setting is skipped.
397
398 This is safe because of the SSA property of IR: an IR temporary can
399 only be defined once and so will have the same value regardless of
400 where it appears in the block. Cool stuff, SSA.
401
402 A safety condition: all attempts to set the RM must be aware of
403 this mechanism - by being routed through the functions here.
404
405 Of course this only helps if blocks where the RM is set more than
406 once and it is set to the same value each time, *and* that value is
407 held in the same IR temporary each time. In order to assure the
408 latter as much as possible, the IR optimiser takes care to do CSE
409 on any block with any sign of floating point activity.
410*/
411static
412void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode )
413{
414 vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
415
416 /* Do we need to do anything? */
417 if (env->previous_rm
418 && env->previous_rm->tag == Iex_RdTmp
419 && mode->tag == Iex_RdTmp
420 && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
421 /* no - setting it to what it was before. */
422 vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
423 return;
424 }
425
426 /* No luck - we better set it, and remember what we set it to. */
427 env->previous_rm = mode;
428
429 /* Only supporting the rounding-mode bits - the rest of FPCR is set
430 to zero - so we can set the whole register at once (faster). */
431
432 /* This isn't simple, because 'mode' carries an IR rounding
433 encoding, and we need to translate that to an ARM64 FP one:
434 The IR encoding:
435 00 to nearest (the default)
436 10 to +infinity
437 01 to -infinity
438 11 to zero
439 The ARM64 FP encoding:
440 00 to nearest
441 01 to +infinity
442 10 to -infinity
443 11 to zero
444 Easy enough to do; just swap the two bits.
445 */
446 HReg irrm = iselIntExpr_R(env, mode);
447 HReg tL = newVRegI(env);
448 HReg tR = newVRegI(env);
449 HReg t3 = newVRegI(env);
450 /* tL = irrm << 1;
451 tR = irrm >> 1; if we're lucky, these will issue together
452 tL &= 2;
453 tR &= 1; ditto
454 t3 = tL | tR;
455 t3 <<= 22;
456 fmxr fpscr, t3
457 */
458 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
459 ARM64RIL* ril_two = mb_mkARM64RIL_I(2);
460 vassert(ril_one && ril_two);
461 addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL));
462 addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR));
463 addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND));
464 addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND));
465 addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR));
466 addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL));
467 addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3));
468}
469
470
471/*---------------------------------------------------------*/
472/*--- ISEL: Function call helpers ---*/
473/*---------------------------------------------------------*/
474
475/* Used only in doHelperCall. See big comment in doHelperCall re
476 handling of register-parameter args. This function figures out
477 whether evaluation of an expression might require use of a fixed
478 register. If in doubt return True (safe but suboptimal).
479*/
480static
481Bool mightRequireFixedRegs ( IRExpr* e )
482{
483 if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) {
484 // These are always "safe" -- either a copy of SP in some
485 // arbitrary vreg, or a copy of x21, respectively.
486 return False;
487 }
488 /* Else it's a "normal" expression. */
489 switch (e->tag) {
490 case Iex_RdTmp: case Iex_Const: case Iex_Get:
491 return False;
492 default:
493 return True;
494 }
495}
496
497
498/* Do a complete function call. |guard| is a Ity_Bit expression
499 indicating whether or not the call happens. If guard==NULL, the
500 call is unconditional. |retloc| is set to indicate where the
501 return value is after the call. The caller (of this fn) must
502 generate code to add |stackAdjustAfterCall| to the stack pointer
503 after the call is done. Returns True iff it managed to handle this
504 combination of arg/return types, else returns False. */
505
506static
507Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
508 /*OUT*/RetLoc* retloc,
509 ISelEnv* env,
510 IRExpr* guard,
511 IRCallee* cee, IRType retTy, IRExpr** args )
512{
513 ARM64CondCode cc;
514 HReg argregs[ARM64_N_ARGREGS];
515 HReg tmpregs[ARM64_N_ARGREGS];
516 Bool go_fast;
517 Int n_args, i, nextArgReg;
518 ULong target;
519
520 vassert(ARM64_N_ARGREGS == 8);
521
522 /* Set default returns. We'll update them later if needed. */
523 *stackAdjustAfterCall = 0;
524 *retloc = mk_RetLoc_INVALID();
525
526 /* These are used for cross-checking that IR-level constraints on
527 the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */
528 UInt nVECRETs = 0;
529 UInt nBBPTRs = 0;
530
531 /* Marshal args for a call and do the call.
532
533 This function only deals with a tiny set of possibilities, which
534 cover all helpers in practice. The restrictions are that only
535 arguments in registers are supported, hence only
536 ARM64_N_REGPARMS x 64 integer bits in total can be passed. In
537 fact the only supported arg type is I64.
538
539 The return type can be I{64,32} or V128. In the V128 case, it
540 is expected that |args| will contain the special node
541 IRExpr_VECRET(), in which case this routine generates code to
542 allocate space on the stack for the vector return value. Since
543 we are not passing any scalars on the stack, it is enough to
544 preallocate the return space before marshalling any arguments,
545 in this case.
546
547 |args| may also contain IRExpr_BBPTR(), in which case the
548 value in x21 is passed as the corresponding argument.
549
550 Generating code which is both efficient and correct when
551 parameters are to be passed in registers is difficult, for the
552 reasons elaborated in detail in comments attached to
553 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
554 of the method described in those comments.
555
556 The problem is split into two cases: the fast scheme and the
557 slow scheme. In the fast scheme, arguments are computed
558 directly into the target (real) registers. This is only safe
559 when we can be sure that computation of each argument will not
560 trash any real registers set by computation of any other
561 argument.
562
563 In the slow scheme, all args are first computed into vregs, and
564 once they are all done, they are moved to the relevant real
565 regs. This always gives correct code, but it also gives a bunch
566 of vreg-to-rreg moves which are usually redundant but are hard
567 for the register allocator to get rid of.
568
569 To decide which scheme to use, all argument expressions are
570 first examined. If they are all so simple that it is clear they
571 will be evaluated without use of any fixed registers, use the
572 fast scheme, else use the slow scheme. Note also that only
573 unconditional calls may use the fast scheme, since having to
574 compute a condition expression could itself trash real
575 registers.
576
577 Note this requires being able to examine an expression and
578 determine whether or not evaluation of it might use a fixed
579 register. That requires knowledge of how the rest of this insn
580 selector works. Currently just the following 3 are regarded as
581 safe -- hopefully they cover the majority of arguments in
582 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
583 */
584
585 /* Note that the cee->regparms field is meaningless on ARM64 hosts
586 (since there is only one calling convention) and so we always
587 ignore it. */
588
589 n_args = 0;
590 for (i = 0; args[i]; i++) {
591 IRExpr* arg = args[i];
592 if (UNLIKELY(arg->tag == Iex_VECRET)) {
593 nVECRETs++;
594 } else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
595 nBBPTRs++;
596 }
597 n_args++;
598 }
599
600 /* If this fails, the IR is ill-formed */
601 vassert(nBBPTRs == 0 || nBBPTRs == 1);
602
603 /* If we have a VECRET, allocate space on the stack for the return
604 value, and record the stack pointer after that. */
605 HReg r_vecRetAddr = INVALID_HREG;
606 if (nVECRETs == 1) {
607 vassert(retTy == Ity_V128 || retTy == Ity_V256);
608 vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
609 r_vecRetAddr = newVRegI(env);
610 addInstr(env, ARM64Instr_AddToSP(-16));
611 addInstr(env, ARM64Instr_FromSP(r_vecRetAddr));
612 } else {
613 // If either of these fail, the IR is ill-formed
614 vassert(retTy != Ity_V128 && retTy != Ity_V256);
615 vassert(nVECRETs == 0);
616 }
617
618 argregs[0] = hregARM64_X0();
619 argregs[1] = hregARM64_X1();
620 argregs[2] = hregARM64_X2();
621 argregs[3] = hregARM64_X3();
622 argregs[4] = hregARM64_X4();
623 argregs[5] = hregARM64_X5();
624 argregs[6] = hregARM64_X6();
625 argregs[7] = hregARM64_X7();
626
627 tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG;
628 tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG;
629
630 /* First decide which scheme (slow or fast) is to be used. First
631 assume the fast scheme, and select slow if any contraindications
632 (wow) appear. */
633
634 go_fast = True;
635
636 if (guard) {
637 if (guard->tag == Iex_Const
638 && guard->Iex.Const.con->tag == Ico_U1
639 && guard->Iex.Const.con->Ico.U1 == True) {
640 /* unconditional */
641 } else {
642 /* Not manifestly unconditional -- be conservative. */
643 go_fast = False;
644 }
645 }
646
647 if (go_fast) {
648 for (i = 0; i < n_args; i++) {
649 if (mightRequireFixedRegs(args[i])) {
650 go_fast = False;
651 break;
652 }
653 }
654 }
655
656 if (go_fast) {
657 if (retTy == Ity_V128 || retTy == Ity_V256)
658 go_fast = False;
659 }
660
661 /* At this point the scheme to use has been established. Generate
662 code to get the arg values into the argument rregs. If we run
663 out of arg regs, give up. */
664
665 if (go_fast) {
666
667 /* FAST SCHEME */
668 nextArgReg = 0;
669
670 for (i = 0; i < n_args; i++) {
671 IRExpr* arg = args[i];
672
673 IRType aTy = Ity_INVALID;
674 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
675 aTy = typeOfIRExpr(env->type_env, args[i]);
676
677 if (nextArgReg >= ARM64_N_ARGREGS)
678 return False; /* out of argregs */
679
680 if (aTy == Ity_I64) {
681 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
682 iselIntExpr_R(env, args[i]) ));
683 nextArgReg++;
684 }
685 else if (arg->tag == Iex_BBPTR) {
686 vassert(0); //ATC
687 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
688 hregARM64_X21() ));
689 nextArgReg++;
690 }
691 else if (arg->tag == Iex_VECRET) {
692 // because of the go_fast logic above, we can't get here,
693 // since vector return values makes us use the slow path
694 // instead.
695 vassert(0);
696 }
697 else
698 return False; /* unhandled arg type */
699 }
700
701 /* Fast scheme only applies for unconditional calls. Hence: */
702 cc = ARM64cc_AL;
703
704 } else {
705
706 /* SLOW SCHEME; move via temporaries */
707 nextArgReg = 0;
708
709 for (i = 0; i < n_args; i++) {
710 IRExpr* arg = args[i];
711
712 IRType aTy = Ity_INVALID;
713 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
714 aTy = typeOfIRExpr(env->type_env, args[i]);
715
716 if (nextArgReg >= ARM64_N_ARGREGS)
717 return False; /* out of argregs */
718
719 if (aTy == Ity_I64) {
720 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
721 nextArgReg++;
722 }
723 else if (arg->tag == Iex_BBPTR) {
724 vassert(0); //ATC
725 tmpregs[nextArgReg] = hregARM64_X21();
726 nextArgReg++;
727 }
728 else if (arg->tag == Iex_VECRET) {
729 vassert(!hregIsInvalid(r_vecRetAddr));
730 tmpregs[nextArgReg] = r_vecRetAddr;
731 nextArgReg++;
732 }
733 else
734 return False; /* unhandled arg type */
735 }
736
737 /* Now we can compute the condition. We can't do it earlier
738 because the argument computations could trash the condition
739 codes. Be a bit clever to handle the common case where the
740 guard is 1:Bit. */
741 cc = ARM64cc_AL;
742 if (guard) {
743 if (guard->tag == Iex_Const
744 && guard->Iex.Const.con->tag == Ico_U1
745 && guard->Iex.Const.con->Ico.U1 == True) {
746 /* unconditional -- do nothing */
747 } else {
748 cc = iselCondCode( env, guard );
749 }
750 }
751
752 /* Move the args to their final destinations. */
753 for (i = 0; i < nextArgReg; i++) {
754 vassert(!(hregIsInvalid(tmpregs[i])));
755 /* None of these insns, including any spill code that might
756 be generated, may alter the condition codes. */
757 addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) );
758 }
759
760 }
761
762 /* Should be assured by checks above */
763 vassert(nextArgReg <= ARM64_N_ARGREGS);
764
765 /* Do final checks, set the return values, and generate the call
766 instruction proper. */
767 vassert(nBBPTRs == 0 || nBBPTRs == 1);
768 vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0);
769 vassert(*stackAdjustAfterCall == 0);
770 vassert(is_RetLoc_INVALID(*retloc));
771 switch (retTy) {
772 case Ity_INVALID:
773 /* Function doesn't return a value. */
774 *retloc = mk_RetLoc_simple(RLPri_None);
775 break;
776 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
777 *retloc = mk_RetLoc_simple(RLPri_Int);
778 break;
779 case Ity_V128:
780 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
781 *stackAdjustAfterCall = 16;
782 break;
783 case Ity_V256:
784 vassert(0); // ATC
785 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
786 *stackAdjustAfterCall = 32;
787 break;
788 default:
789 /* IR can denote other possible return types, but we don't
790 handle those here. */
791 vassert(0);
792 }
793
794 /* Finally, generate the call itself. This needs the *retloc value
795 set in the switch above, which is why it's at the end. */
796
797 /* nextArgReg doles out argument registers. Since these are
798 assigned in the order x0 .. x7, its numeric value at this point,
799 which must be between 0 and 8 inclusive, is going to be equal to
800 the number of arg regs in use for the call. Hence bake that
801 number into the call (we'll need to know it when doing register
802 allocation, to know what regs the call reads.) */
803
804 target = (HWord)Ptr_to_ULong(cee->addr);
805 addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc ));
806
807 return True; /* success */
808}
809
810
811/*---------------------------------------------------------*/
812/*--- ISEL: Integer expressions (64/32 bit) ---*/
813/*---------------------------------------------------------*/
814
815/* Select insns for an integer-typed expression, and add them to the
816 code list. Return a reg holding the result. This reg will be a
817 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
818 want to modify it, ask for a new vreg, copy it in there, and modify
819 the copy. The register allocator will do its best to map both
820 vregs to the same real register, so the copies will often disappear
821 later in the game.
822
823 This should handle expressions of 64- and 32-bit type. All results
824 are returned in a 64-bit register. For 32-bit expressions, the
825 upper 32 bits are arbitrary, so you should mask or sign extend
826 partial values if necessary.
827*/
828
829/* --------------------- AMode --------------------- */
830
831/* Return an AMode which computes the value of the specified
832 expression, possibly also adding insns to the code list as a
833 result. The expression may only be a 64-bit one.
834*/
835
836static Bool isValidScale ( UChar scale )
837{
838 switch (scale) {
839 case 1: case 2: case 4: case 8: /* case 16: ??*/ return True;
840 default: return False;
841 }
842}
843
844static Bool sane_AMode ( ARM64AMode* am )
845{
846 switch (am->tag) {
847 case ARM64am_RI9:
848 return
849 toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64
850 && (hregIsVirtual(am->ARM64am.RI9.reg)
851 /* || sameHReg(am->ARM64am.RI9.reg,
852 hregARM64_X21()) */ )
853 && am->ARM64am.RI9.simm9 >= -256
854 && am->ARM64am.RI9.simm9 <= 255 );
855 case ARM64am_RI12:
856 return
857 toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64
858 && (hregIsVirtual(am->ARM64am.RI12.reg)
859 /* || sameHReg(am->ARM64am.RI12.reg,
860 hregARM64_X21()) */ )
861 && am->ARM64am.RI12.uimm12 < 4096
862 && isValidScale(am->ARM64am.RI12.szB) );
863 case ARM64am_RR:
864 return
865 toBool( hregClass(am->ARM64am.RR.base) == HRcInt64
866 && hregIsVirtual(am->ARM64am.RR.base)
867 && hregClass(am->ARM64am.RR.index) == HRcInt64
868 && hregIsVirtual(am->ARM64am.RR.index) );
869 default:
870 vpanic("sane_AMode: unknown ARM64 AMode1 tag");
871 }
872}
873
874static
875ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty )
876{
877 ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty);
878 vassert(sane_AMode(am));
879 return am;
880}
881
882static
883ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty )
884{
885 IRType ty = typeOfIRExpr(env->type_env,e);
886 vassert(ty == Ity_I64);
887
888 ULong szBbits = 0;
889 switch (dty) {
890 case Ity_I64: szBbits = 3; break;
891 case Ity_I32: szBbits = 2; break;
892 case Ity_I16: szBbits = 1; break;
893 case Ity_I8: szBbits = 0; break;
894 default: vassert(0);
895 }
896
897 /* {Add64,Sub64}(expr,simm9). We don't care about |dty| here since
898 we're going to create an amode suitable for LDU* or STU*
899 instructions, which use unscaled immediate offsets. */
900 if (e->tag == Iex_Binop
901 && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64)
902 && e->Iex.Binop.arg2->tag == Iex_Const
903 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
904 Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
905 if (simm >= -256 && simm <= 255) {
906 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
907 return ARM64AMode_RI9(reg, (Int)simm);
908 }
909 }
910
911 /* Add64(expr, uimm12 * transfer-size) */
912 if (e->tag == Iex_Binop
913 && e->Iex.Binop.op == Iop_Add64
914 && e->Iex.Binop.arg2->tag == Iex_Const
915 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
916 ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
917 ULong szB = 1 << szBbits;
918 if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */
919 && (uimm >> szBbits) < 4096) {
920 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
921 return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB);
922 }
923 }
924
925 /* Add64(expr1, expr2) */
926 if (e->tag == Iex_Binop
927 && e->Iex.Binop.op == Iop_Add64) {
928 HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
929 HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
930 return ARM64AMode_RR(reg1, reg2);
931 }
932
933 /* Doesn't match anything in particular. Generate it into
934 a register and use that. */
935 HReg reg = iselIntExpr_R(env, e);
936 return ARM64AMode_RI9(reg, 0);
937}
938
939//ZZ /* --------------------- AModeV --------------------- */
940//ZZ
941//ZZ /* Return an AModeV which computes the value of the specified
942//ZZ expression, possibly also adding insns to the code list as a
943//ZZ result. The expression may only be a 32-bit one.
944//ZZ */
945//ZZ
946//ZZ static Bool sane_AModeV ( ARMAModeV* am )
947//ZZ {
948//ZZ return toBool( hregClass(am->reg) == HRcInt32
949//ZZ && hregIsVirtual(am->reg)
950//ZZ && am->simm11 >= -1020 && am->simm11 <= 1020
951//ZZ && 0 == (am->simm11 & 3) );
952//ZZ }
953//ZZ
954//ZZ static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
955//ZZ {
956//ZZ ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
957//ZZ vassert(sane_AModeV(am));
958//ZZ return am;
959//ZZ }
960//ZZ
961//ZZ static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
962//ZZ {
963//ZZ IRType ty = typeOfIRExpr(env->type_env,e);
964//ZZ vassert(ty == Ity_I32);
965//ZZ
966//ZZ /* {Add32,Sub32}(expr, simm8 << 2) */
967//ZZ if (e->tag == Iex_Binop
968//ZZ && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
969//ZZ && e->Iex.Binop.arg2->tag == Iex_Const
970//ZZ && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
971//ZZ Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
972//ZZ if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
973//ZZ HReg reg;
974//ZZ if (e->Iex.Binop.op == Iop_Sub32)
975//ZZ simm = -simm;
976//ZZ reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
977//ZZ return mkARMAModeV(reg, simm);
978//ZZ }
979//ZZ }
980//ZZ
981//ZZ /* Doesn't match anything in particular. Generate it into
982//ZZ a register and use that. */
983//ZZ {
984//ZZ HReg reg = iselIntExpr_R(env, e);
985//ZZ return mkARMAModeV(reg, 0);
986//ZZ }
987//ZZ
988//ZZ }
989//ZZ
990//ZZ /* -------------------- AModeN -------------------- */
991//ZZ
992//ZZ static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
993//ZZ {
994//ZZ return iselIntExpr_AModeN_wrk(env, e);
995//ZZ }
996//ZZ
997//ZZ static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
998//ZZ {
999//ZZ HReg reg = iselIntExpr_R(env, e);
1000//ZZ return mkARMAModeN_R(reg);
1001//ZZ }
1002//ZZ
1003//ZZ
1004//ZZ /* --------------------- RI84 --------------------- */
1005//ZZ
1006//ZZ /* Select instructions to generate 'e' into a RI84. If mayInv is
1007//ZZ true, then the caller will also accept an I84 form that denotes
1008//ZZ 'not e'. In this case didInv may not be NULL, and *didInv is set
1009//ZZ to True. This complication is so as to allow generation of an RI84
1010//ZZ which is suitable for use in either an AND or BIC instruction,
1011//ZZ without knowing (before this call) which one.
1012//ZZ */
1013//ZZ static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
1014//ZZ ISelEnv* env, IRExpr* e )
1015//ZZ {
1016//ZZ ARMRI84* ri;
1017//ZZ if (mayInv)
1018//ZZ vassert(didInv != NULL);
1019//ZZ ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
1020//ZZ /* sanity checks ... */
1021//ZZ switch (ri->tag) {
1022//ZZ case ARMri84_I84:
1023//ZZ return ri;
1024//ZZ case ARMri84_R:
1025//ZZ vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
1026//ZZ vassert(hregIsVirtual(ri->ARMri84.R.reg));
1027//ZZ return ri;
1028//ZZ default:
1029//ZZ vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
1030//ZZ }
1031//ZZ }
1032//ZZ
1033//ZZ /* DO NOT CALL THIS DIRECTLY ! */
1034//ZZ static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
1035//ZZ ISelEnv* env, IRExpr* e )
1036//ZZ {
1037//ZZ IRType ty = typeOfIRExpr(env->type_env,e);
1038//ZZ vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1039//ZZ
1040//ZZ if (didInv) *didInv = False;
1041//ZZ
1042//ZZ /* special case: immediate */
1043//ZZ if (e->tag == Iex_Const) {
1044//ZZ UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
1045//ZZ switch (e->Iex.Const.con->tag) {
1046//ZZ case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1047//ZZ case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1048//ZZ case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
1049//ZZ default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
1050//ZZ }
1051//ZZ if (fitsIn8x4(&u8, &u4, u)) {
1052//ZZ return ARMRI84_I84( (UShort)u8, (UShort)u4 );
1053//ZZ }
1054//ZZ if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
1055//ZZ vassert(didInv);
1056//ZZ *didInv = True;
1057//ZZ return ARMRI84_I84( (UShort)u8, (UShort)u4 );
1058//ZZ }
1059//ZZ /* else fail, fall through to default case */
1060//ZZ }
1061//ZZ
1062//ZZ /* default case: calculate into a register and return that */
1063//ZZ {
1064//ZZ HReg r = iselIntExpr_R ( env, e );
1065//ZZ return ARMRI84_R(r);
1066//ZZ }
1067//ZZ }
1068
1069
1070/* --------------------- RIA --------------------- */
1071
1072/* Select instructions to generate 'e' into a RIA. */
1073
1074static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e )
1075{
1076 ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e);
1077 /* sanity checks ... */
1078 switch (ri->tag) {
1079 case ARM64riA_I12:
1080 vassert(ri->ARM64riA.I12.imm12 < 4096);
1081 vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12);
1082 return ri;
1083 case ARM64riA_R:
1084 vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64);
1085 vassert(hregIsVirtual(ri->ARM64riA.R.reg));
1086 return ri;
1087 default:
1088 vpanic("iselIntExpr_RIA: unknown arm RIA tag");
1089 }
1090}
1091
1092/* DO NOT CALL THIS DIRECTLY ! */
1093static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e )
1094{
1095 IRType ty = typeOfIRExpr(env->type_env,e);
1096 vassert(ty == Ity_I64 || ty == Ity_I32);
1097
1098 /* special case: immediate */
1099 if (e->tag == Iex_Const) {
1100 ULong u = 0xF000000ULL; /* invalid */
1101 switch (e->Iex.Const.con->tag) {
1102 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
1103 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1104 default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)");
1105 }
1106 if (0 == (u & ~(0xFFFULL << 0)))
1107 return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0);
1108 if (0 == (u & ~(0xFFFULL << 12)))
1109 return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12);
1110 /* else fail, fall through to default case */
1111 }
1112
1113 /* default case: calculate into a register and return that */
1114 {
1115 HReg r = iselIntExpr_R ( env, e );
1116 return ARM64RIA_R(r);
1117 }
1118}
1119
1120
1121/* --------------------- RIL --------------------- */
1122
1123/* Select instructions to generate 'e' into a RIL. At this point we
1124 have to deal with the strange bitfield-immediate encoding for logic
1125 instructions. */
1126
1127
1128// The following four functions
1129// CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical
1130// are copied, with modifications, from
1131// https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc
1132// which has the following copyright notice:
1133/*
1134 Copyright 2013, ARM Limited
1135 All rights reserved.
1136
1137 Redistribution and use in source and binary forms, with or without
1138 modification, are permitted provided that the following conditions are met:
1139
1140 * Redistributions of source code must retain the above copyright notice,
1141 this list of conditions and the following disclaimer.
1142 * Redistributions in binary form must reproduce the above copyright notice,
1143 this list of conditions and the following disclaimer in the documentation
1144 and/or other materials provided with the distribution.
1145 * Neither the name of ARM Limited nor the names of its contributors may be
1146 used to endorse or promote products derived from this software without
1147 specific prior written permission.
1148
1149 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
1150 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1151 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1152 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
1153 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1154 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1155 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
1156 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
1157 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1158 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1159*/
1160
1161static Int CountLeadingZeros(ULong value, Int width)
1162{
1163 vassert(width == 32 || width == 64);
1164 Int count = 0;
1165 ULong bit_test = 1ULL << (width - 1);
1166 while ((count < width) && ((bit_test & value) == 0)) {
1167 count++;
1168 bit_test >>= 1;
1169 }
1170 return count;
1171}
1172
1173static Int CountTrailingZeros(ULong value, Int width)
1174{
1175 vassert(width == 32 || width == 64);
1176 Int count = 0;
1177 while ((count < width) && (((value >> count) & 1) == 0)) {
1178 count++;
1179 }
1180 return count;
1181}
1182
1183static Int CountSetBits(ULong value, Int width)
1184{
1185 // TODO: Other widths could be added here, as the implementation already
1186 // supports them.
1187 vassert(width == 32 || width == 64);
1188
1189 // Mask out unused bits to ensure that they are not counted.
1190 value &= (0xffffffffffffffffULL >> (64-width));
1191
1192 // Add up the set bits.
1193 // The algorithm works by adding pairs of bit fields together iteratively,
1194 // where the size of each bit field doubles each time.
1195 // An example for an 8-bit value:
1196 // Bits: h g f e d c b a
1197 // \ | \ | \ | \ |
1198 // value = h+g f+e d+c b+a
1199 // \ | \ |
1200 // value = h+g+f+e d+c+b+a
1201 // \ |
1202 // value = h+g+f+e+d+c+b+a
1203 value = ((value >> 1) & 0x5555555555555555) + (value & 0x5555555555555555);
1204 value = ((value >> 2) & 0x3333333333333333) + (value & 0x3333333333333333);
1205 value = ((value >> 4) & 0x0f0f0f0f0f0f0f0f) + (value & 0x0f0f0f0f0f0f0f0f);
1206 value = ((value >> 8) & 0x00ff00ff00ff00ff) + (value & 0x00ff00ff00ff00ff);
1207 value = ((value >> 16) & 0x0000ffff0000ffff) + (value & 0x0000ffff0000ffff);
1208 value = ((value >> 32) & 0x00000000ffffffff) + (value & 0x00000000ffffffff);
1209
1210 return value;
1211}
1212
1213static Bool isImmLogical ( /*OUT*/UInt* n,
1214 /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r,
1215 ULong value, UInt width )
1216{
1217 // Test if a given value can be encoded in the immediate field of a
1218 // logical instruction.
1219
1220 // If it can be encoded, the function returns true, and values
1221 // pointed to by n, imm_s and imm_r are updated with immediates
1222 // encoded in the format required by the corresponding fields in the
1223 // logical instruction. If it can not be encoded, the function
1224 // returns false, and the values pointed to by n, imm_s and imm_r
1225 // are undefined.
1226 vassert(n != NULL && imm_s != NULL && imm_r != NULL);
1227 vassert(width == 32 || width == 64);
1228
1229 // Logical immediates are encoded using parameters n, imm_s and imm_r using
1230 // the following table:
1231 //
1232 // N imms immr size S R
1233 // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
1234 // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
1235 // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
1236 // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
1237 // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
1238 // 0 11110s xxxxxr 2 UInt(s) UInt(r)
1239 // (s bits must not be all set)
1240 //
1241 // A pattern is constructed of size bits, where the least significant S+1
1242 // bits are set. The pattern is rotated right by R, and repeated across a
1243 // 32 or 64-bit value, depending on destination register width.
1244 //
1245 // To test if an arbitrary immediate can be encoded using this scheme, an
1246 // iterative algorithm is used.
1247 //
1248 // TODO: This code does not consider using X/W register overlap to support
1249 // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits
1250 // are an encodable logical immediate.
1251
1252 // 1. If the value has all set or all clear bits, it can't be encoded.
1253 if ((value == 0) || (value == 0xffffffffffffffffULL) ||
1254 ((width == 32) && (value == 0xffffffff))) {
1255 return False;
1256 }
1257
1258 UInt lead_zero = CountLeadingZeros(value, width);
1259 UInt lead_one = CountLeadingZeros(~value, width);
1260 UInt trail_zero = CountTrailingZeros(value, width);
1261 UInt trail_one = CountTrailingZeros(~value, width);
1262 UInt set_bits = CountSetBits(value, width);
1263
1264 // The fixed bits in the immediate s field.
1265 // If width == 64 (X reg), start at 0xFFFFFF80.
1266 // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
1267 // widths won't be executed.
1268 Int imm_s_fixed = (width == 64) ? -128 : -64;
1269 Int imm_s_mask = 0x3F;
1270
1271 for (;;) {
1272 // 2. If the value is two bits wide, it can be encoded.
1273 if (width == 2) {
1274 *n = 0;
1275 *imm_s = 0x3C;
1276 *imm_r = (value & 3) - 1;
1277 return True;
1278 }
1279
1280 *n = (width == 64) ? 1 : 0;
1281 *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
1282 if ((lead_zero + set_bits) == width) {
1283 *imm_r = 0;
1284 } else {
1285 *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
1286 }
1287
1288 // 3. If the sum of leading zeros, trailing zeros and set bits is equal to
1289 // the bit width of the value, it can be encoded.
1290 if (lead_zero + trail_zero + set_bits == width) {
1291 return True;
1292 }
1293
1294 // 4. If the sum of leading ones, trailing ones and unset bits in the
1295 // value is equal to the bit width of the value, it can be encoded.
1296 if (lead_one + trail_one + (width - set_bits) == width) {
1297 return True;
1298 }
1299
1300 // 5. If the most-significant half of the bitwise value is equal to the
1301 // least-significant half, return to step 2 using the least-significant
1302 // half of the value.
1303 ULong mask = (1ULL << (width >> 1)) - 1;
1304 if ((value & mask) == ((value >> (width >> 1)) & mask)) {
1305 width >>= 1;
1306 set_bits >>= 1;
1307 imm_s_fixed >>= 1;
1308 continue;
1309 }
1310
1311 // 6. Otherwise, the value can't be encoded.
1312 return False;
1313 }
1314}
1315
1316
1317/* Create a RIL for the given immediate, if it is representable, or
1318 return NULL if not. */
1319
1320static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 )
1321{
1322 UInt n = 0, imm_s = 0, imm_r = 0;
1323 Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64);
1324 if (!ok) return NULL;
1325 vassert(n < 2 && imm_s < 64 && imm_r < 64);
1326 return ARM64RIL_I13(n, imm_r, imm_s);
1327}
1328
1329/* So, finally .. */
1330
1331static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e )
1332{
1333 ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e);
1334 /* sanity checks ... */
1335 switch (ri->tag) {
1336 case ARM64riL_I13:
1337 vassert(ri->ARM64riL.I13.bitN < 2);
1338 vassert(ri->ARM64riL.I13.immR < 64);
1339 vassert(ri->ARM64riL.I13.immS < 64);
1340 return ri;
1341 case ARM64riL_R:
1342 vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64);
1343 vassert(hregIsVirtual(ri->ARM64riL.R.reg));
1344 return ri;
1345 default:
1346 vpanic("iselIntExpr_RIL: unknown arm RIL tag");
1347 }
1348}
1349
1350/* DO NOT CALL THIS DIRECTLY ! */
1351static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e )
1352{
1353 IRType ty = typeOfIRExpr(env->type_env,e);
1354 vassert(ty == Ity_I64 || ty == Ity_I32);
1355
1356 /* special case: immediate */
1357 if (e->tag == Iex_Const) {
1358 ARM64RIL* maybe = NULL;
1359 if (ty == Ity_I64) {
1360 vassert(e->Iex.Const.con->tag == Ico_U64);
1361 maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64);
1362 } else {
1363 vassert(ty == Ity_I32);
1364 vassert(e->Iex.Const.con->tag == Ico_U32);
1365 UInt u32 = e->Iex.Const.con->Ico.U32;
1366 ULong u64 = (ULong)u32;
1367 /* First try with 32 leading zeroes. */
1368 maybe = mb_mkARM64RIL_I(u64);
1369 /* If that doesn't work, try with 2 copies, since it doesn't
1370 matter what winds up in the upper 32 bits. */
1371 if (!maybe) {
1372 maybe = mb_mkARM64RIL_I((u64 << 32) | u64);
1373 }
1374 }
1375 if (maybe) return maybe;
1376 /* else fail, fall through to default case */
1377 }
1378
1379 /* default case: calculate into a register and return that */
1380 {
1381 HReg r = iselIntExpr_R ( env, e );
1382 return ARM64RIL_R(r);
1383 }
1384}
1385
1386
1387/* --------------------- RI6 --------------------- */
1388
1389/* Select instructions to generate 'e' into a RI6. */
1390
1391static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e )
1392{
1393 ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e);
1394 /* sanity checks ... */
1395 switch (ri->tag) {
1396 case ARM64ri6_I6:
1397 vassert(ri->ARM64ri6.I6.imm6 < 64);
1398 vassert(ri->ARM64ri6.I6.imm6 > 0);
1399 return ri;
1400 case ARM64ri6_R:
1401 vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64);
1402 vassert(hregIsVirtual(ri->ARM64ri6.R.reg));
1403 return ri;
1404 default:
1405 vpanic("iselIntExpr_RI6: unknown arm RI6 tag");
1406 }
1407}
1408
1409/* DO NOT CALL THIS DIRECTLY ! */
1410static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e )
1411{
1412 IRType ty = typeOfIRExpr(env->type_env,e);
1413 vassert(ty == Ity_I64 || ty == Ity_I8);
1414
1415 /* special case: immediate */
1416 if (e->tag == Iex_Const) {
1417 switch (e->Iex.Const.con->tag) {
1418 case Ico_U8: {
1419 UInt u = e->Iex.Const.con->Ico.U8;
1420 if (u > 0 && u < 64)
1421 return ARM64RI6_I6(u);
1422 break;
1423 default:
1424 break;
1425 }
1426 }
1427 /* else fail, fall through to default case */
1428 }
1429
1430 /* default case: calculate into a register and return that */
1431 {
1432 HReg r = iselIntExpr_R ( env, e );
1433 return ARM64RI6_R(r);
1434 }
1435}
1436
1437
1438/* ------------------- CondCode ------------------- */
1439
1440/* Generate code to evaluated a bit-typed expression, returning the
1441 condition code which would correspond when the expression would
1442 notionally have returned 1. */
1443
1444static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1445{
1446 ARM64CondCode cc = iselCondCode_wrk(env,e);
1447 vassert(cc != ARM64cc_NV);
1448 return cc;
1449}
1450
1451static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1452{
1453 vassert(e);
1454 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1455
1456 /* var */
1457 if (e->tag == Iex_RdTmp) {
1458 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1459 /* Cmp doesn't modify rTmp; so this is OK. */
1460 ARM64RIL* one = mb_mkARM64RIL_I(1);
1461 vassert(one);
1462 addInstr(env, ARM64Instr_Test(rTmp, one));
1463 return ARM64cc_NE;
1464 }
1465
1466 /* Not1(e) */
1467 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1468 /* Generate code for the arg, and negate the test condition */
1469 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1470 if (cc == ARM64cc_AL || cc == ARM64cc_NV) {
1471 return ARM64cc_AL;
1472 } else {
1473 return 1 ^ cc;
1474 }
1475 }
1476
1477 /* --- patterns rooted at: 64to1 --- */
1478
1479 if (e->tag == Iex_Unop
1480 && e->Iex.Unop.op == Iop_64to1) {
1481 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1482 ARM64RIL* one = mb_mkARM64RIL_I(1);
1483 vassert(one); /* '1' must be representable */
1484 addInstr(env, ARM64Instr_Test(rTmp, one));
1485 return ARM64cc_NE;
1486 }
1487
1488 /* --- patterns rooted at: CmpNEZ8 --- */
1489
1490 if (e->tag == Iex_Unop
1491 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1492 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1493 ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF);
1494 addInstr(env, ARM64Instr_Test(r1, xFF));
1495 return ARM64cc_NE;
1496 }
1497
1498 /* --- patterns rooted at: CmpNEZ64 --- */
1499
1500 if (e->tag == Iex_Unop
1501 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1502 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1503 ARM64RIA* zero = ARM64RIA_I12(0,0);
1504 addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/));
1505 return ARM64cc_NE;
1506 }
1507
1508 /* --- patterns rooted at: CmpNEZ32 --- */
1509
1510 if (e->tag == Iex_Unop
1511 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1512 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1513 ARM64RIA* zero = ARM64RIA_I12(0,0);
1514 addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/));
1515 return ARM64cc_NE;
1516 }
1517
1518 /* --- Cmp*64*(x,y) --- */
1519 if (e->tag == Iex_Binop
1520 && (e->Iex.Binop.op == Iop_CmpEQ64
1521 || e->Iex.Binop.op == Iop_CmpNE64
1522 || e->Iex.Binop.op == Iop_CmpLT64S
1523 || e->Iex.Binop.op == Iop_CmpLT64U
1524 || e->Iex.Binop.op == Iop_CmpLE64S
1525 || e->Iex.Binop.op == Iop_CmpLE64U)) {
1526 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1527 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1528 addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/));
1529 switch (e->Iex.Binop.op) {
1530 case Iop_CmpEQ64: return ARM64cc_EQ;
1531 case Iop_CmpNE64: return ARM64cc_NE;
1532 case Iop_CmpLT64S: return ARM64cc_LT;
1533 case Iop_CmpLT64U: return ARM64cc_CC;
1534 case Iop_CmpLE64S: return ARM64cc_LE;
1535 case Iop_CmpLE64U: return ARM64cc_LS;
1536 default: vpanic("iselCondCode(arm64): CmpXX64");
1537 }
1538 }
1539
1540 /* --- Cmp*32*(x,y) --- */
1541 if (e->tag == Iex_Binop
1542 && (e->Iex.Binop.op == Iop_CmpEQ32
1543 || e->Iex.Binop.op == Iop_CmpNE32
1544 || e->Iex.Binop.op == Iop_CmpLT32S
1545 || e->Iex.Binop.op == Iop_CmpLT32U
1546 || e->Iex.Binop.op == Iop_CmpLE32S
1547 || e->Iex.Binop.op == Iop_CmpLE32U)) {
1548 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1549 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1550 addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/));
1551 switch (e->Iex.Binop.op) {
1552 case Iop_CmpEQ32: return ARM64cc_EQ;
1553 case Iop_CmpNE32: return ARM64cc_NE;
1554 case Iop_CmpLT32S: return ARM64cc_LT;
1555 case Iop_CmpLT32U: return ARM64cc_CC;
1556 case Iop_CmpLE32S: return ARM64cc_LE;
1557 case Iop_CmpLE32U: return ARM64cc_LS;
1558 default: vpanic("iselCondCode(arm64): CmpXX32");
1559 }
1560 }
1561
1562//ZZ /* const */
1563//ZZ /* Constant 1:Bit */
1564//ZZ if (e->tag == Iex_Const) {
1565//ZZ HReg r;
1566//ZZ vassert(e->Iex.Const.con->tag == Ico_U1);
1567//ZZ vassert(e->Iex.Const.con->Ico.U1 == True
1568//ZZ || e->Iex.Const.con->Ico.U1 == False);
1569//ZZ r = newVRegI(env);
1570//ZZ addInstr(env, ARMInstr_Imm32(r, 0));
1571//ZZ addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r, ARMRI84_R(r)));
1572//ZZ return e->Iex.Const.con->Ico.U1 ? ARMcc_EQ : ARMcc_NE;
1573//ZZ }
1574//ZZ
1575//ZZ // JRS 2013-Jan-03: this seems completely nonsensical
1576//ZZ /* --- CasCmpEQ* --- */
1577//ZZ /* Ist_Cas has a dummy argument to compare with, so comparison is
1578//ZZ always true. */
1579//ZZ //if (e->tag == Iex_Binop
1580//ZZ // && (e->Iex.Binop.op == Iop_CasCmpEQ32
1581//ZZ // || e->Iex.Binop.op == Iop_CasCmpEQ16
1582//ZZ // || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1583//ZZ // return ARMcc_AL;
1584//ZZ //}
1585
1586 ppIRExpr(e);
1587 vpanic("iselCondCode");
1588}
1589
1590
1591/* --------------------- Reg --------------------- */
1592
1593static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1594{
1595 HReg r = iselIntExpr_R_wrk(env, e);
1596 /* sanity checks ... */
1597# if 0
1598 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1599# endif
1600 vassert(hregClass(r) == HRcInt64);
1601 vassert(hregIsVirtual(r));
1602 return r;
1603}
1604
1605/* DO NOT CALL THIS DIRECTLY ! */
1606static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1607{
1608 IRType ty = typeOfIRExpr(env->type_env,e);
1609 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1610
1611 switch (e->tag) {
1612
1613 /* --------- TEMP --------- */
1614 case Iex_RdTmp: {
1615 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1616 }
1617
1618 /* --------- LOAD --------- */
1619 case Iex_Load: {
1620 HReg dst = newVRegI(env);
1621
1622 if (e->Iex.Load.end != Iend_LE)
1623 goto irreducible;
1624
1625 if (ty == Ity_I64) {
1626 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1627 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode));
1628 return dst;
1629 }
1630 if (ty == Ity_I32) {
1631 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1632 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode));
1633 return dst;
1634 }
1635 if (ty == Ity_I16) {
1636 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1637 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode));
1638 return dst;
1639 }
1640 if (ty == Ity_I8) {
1641 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1642 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode));
1643 return dst;
1644 }
1645 break;
1646 }
1647
1648 /* --------- BINARY OP --------- */
1649 case Iex_Binop: {
1650
1651 ARM64LogicOp lop = 0; /* invalid */
1652 ARM64ShiftOp sop = 0; /* invalid */
1653
1654 /* Special-case 0-x into a Neg instruction. Not because it's
1655 particularly useful but more so as to give value flow using
1656 this instruction, so as to check its assembly correctness for
1657 implementation of Left32/Left64. */
1658 switch (e->Iex.Binop.op) {
1659 case Iop_Sub64:
1660 if (isZeroU64(e->Iex.Binop.arg1)) {
1661 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1662 HReg dst = newVRegI(env);
1663 addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG));
1664 return dst;
1665 }
1666 break;
1667 default:
1668 break;
1669 }
1670
1671 /* ADD/SUB */
1672 switch (e->Iex.Binop.op) {
1673 case Iop_Add64: case Iop_Add32:
1674 case Iop_Sub64: case Iop_Sub32: {
1675 Bool isAdd = e->Iex.Binop.op == Iop_Add64
1676 || e->Iex.Binop.op == Iop_Add32;
1677 HReg dst = newVRegI(env);
1678 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1679 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1680 addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd));
1681 return dst;
1682 }
1683 default:
1684 break;
1685 }
1686
1687 /* AND/OR/XOR */
1688 switch (e->Iex.Binop.op) {
1689 case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop;
1690 case Iop_Or64: case Iop_Or32: lop = ARM64lo_OR; goto log_binop;
1691 case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop;
1692 log_binop: {
1693 HReg dst = newVRegI(env);
1694 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1695 ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2);
1696 addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop));
1697 return dst;
1698 }
1699 default:
1700 break;
1701 }
1702
1703 /* SHL/SHR/SAR */
1704 switch (e->Iex.Binop.op) {
1705 case Iop_Shr64: sop = ARM64sh_SHR; goto sh_binop;
1706 case Iop_Sar64: sop = ARM64sh_SAR; goto sh_binop;
1707 case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop;
1708 sh_binop: {
1709 HReg dst = newVRegI(env);
1710 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1711 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1712 addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop));
1713 return dst;
1714 }
1715 case Iop_Shr32:
1716 case Iop_Sar32: {
1717 Bool zx = e->Iex.Binop.op == Iop_Shr32;
1718 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1719 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1720 HReg dst = zx ? widen_z_32_to_64(env, argL)
1721 : widen_s_32_to_64(env, argL);
1722 addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR));
1723 return dst;
1724 }
1725 default: break;
1726 }
1727
1728 /* MUL */
1729 if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) {
1730 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1731 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1732 HReg dst = newVRegI(env);
1733 addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN));
1734 return dst;
1735 }
1736
1737 /* MULL */
1738 if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) {
1739 Bool isS = e->Iex.Binop.op == Iop_MullS32;
1740 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1741 HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL);
1742 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1743 HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR);
1744 HReg dst = newVRegI(env);
1745 addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN));
1746 return dst;
1747 }
1748
1749 /* Handle misc other ops. */
1750
1751//ZZ if (e->Iex.Binop.op == Iop_Max32U) {
1752//ZZ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1753//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1754//ZZ HReg dst = newVRegI(env);
1755//ZZ addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
1756//ZZ ARMRI84_R(argR)));
1757//ZZ addInstr(env, mk_iMOVds_RR(dst, argL));
1758//ZZ addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
1759//ZZ return dst;
1760//ZZ }
1761
1762 if (e->Iex.Binop.op == Iop_32HLto64) {
1763 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1764 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1765 HReg lo32 = widen_z_32_to_64(env, lo32s);
1766 HReg hi32 = newVRegI(env);
1767 addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32),
1768 ARM64sh_SHL));
1769 addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32),
1770 ARM64lo_OR));
1771 return hi32;
1772 }
1773
1774 if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32) {
1775 Bool isD = e->Iex.Binop.op == Iop_CmpF64;
1776 HReg dL = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg1);
1777 HReg dR = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg2);
1778 HReg dst = newVRegI(env);
1779 HReg imm = newVRegI(env);
1780 /* Do the compare (FCMP), which sets NZCV in PSTATE. Then
1781 create in dst, the IRCmpF64Result encoded result. */
1782 addInstr(env, (isD ? ARM64Instr_VCmpD : ARM64Instr_VCmpS)(dL, dR));
1783 addInstr(env, ARM64Instr_Imm64(dst, 0));
1784 addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ
1785 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ));
1786 addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT
1787 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI));
1788 addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT
1789 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT));
1790 addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN
1791 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS));
1792 return dst;
1793 }
1794
1795 { /* local scope */
1796 ARM64CvtOp cvt_op = ARM64cvt_INVALID;
1797 Bool srcIsD = False;
1798 switch (e->Iex.Binop.op) {
1799 case Iop_F64toI64S:
1800 cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break;
1801 case Iop_F64toI64U:
1802 cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break;
1803 case Iop_F64toI32S:
1804 cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break;
1805 case Iop_F64toI32U:
1806 cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break;
1807 case Iop_F32toI32S:
1808 cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break;
1809 case Iop_F32toI64U:
1810 cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break;
1811 default:
1812 break;
1813 }
1814 if (cvt_op != ARM64cvt_INVALID) {
1815 /* This is all a bit dodgy, because we can't handle a
1816 non-constant (not-known-at-JIT-time) rounding mode
1817 indication. That's because there's no instruction
1818 AFAICS that does this conversion but rounds according to
1819 FPCR.RM, so we have to bake the rounding mode into the
1820 instruction right now. But that should be OK because
1821 (1) the front end attaches a literal Irrm_ value to the
1822 conversion binop, and (2) iropt will never float that
1823 off via CSE, into a literal. Hence we should always
1824 have an Irrm_ value as the first arg. */
1825 IRExpr* arg1 = e->Iex.Binop.arg1;
1826 if (arg1->tag != Iex_Const) goto irreducible;
1827 IRConst* arg1con = arg1->Iex.Const.con;
1828 vassert(arg1con->tag == Ico_U32); // else ill-typed IR
1829 UInt irrm = arg1con->Ico.U32;
1830 /* Find the ARM-encoded equivalent for |irrm|. */
1831 UInt armrm = 4; /* impossible */
1832 switch (irrm) {
1833 case Irrm_NEAREST: armrm = 0; break;
1834 case Irrm_NegINF: armrm = 2; break;
1835 case Irrm_PosINF: armrm = 1; break;
1836 case Irrm_ZERO: armrm = 3; break;
1837 default: goto irreducible;
1838 }
1839 HReg src = (srcIsD ? iselDblExpr : iselFltExpr)
1840 (env, e->Iex.Binop.arg2);
1841 HReg dst = newVRegI(env);
1842 addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm));
1843 return dst;
1844 }
1845 } /* local scope */
1846
1847//ZZ if (e->Iex.Binop.op == Iop_GetElem8x8
1848//ZZ || e->Iex.Binop.op == Iop_GetElem16x4
1849//ZZ || e->Iex.Binop.op == Iop_GetElem32x2) {
1850//ZZ HReg res = newVRegI(env);
1851//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
1852//ZZ UInt index, size;
1853//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
1854//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1855//ZZ vpanic("ARM target supports GetElem with constant "
1856//ZZ "second argument only\n");
1857//ZZ }
1858//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1859//ZZ switch (e->Iex.Binop.op) {
1860//ZZ case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1861//ZZ case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1862//ZZ case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1863//ZZ default: vassert(0);
1864//ZZ }
1865//ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1866//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
1867//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
1868//ZZ size, False));
1869//ZZ return res;
1870//ZZ }
1871//ZZ
1872//ZZ if (e->Iex.Binop.op == Iop_GetElem8x16
1873//ZZ || e->Iex.Binop.op == Iop_GetElem16x8
1874//ZZ || e->Iex.Binop.op == Iop_GetElem32x4) {
1875//ZZ HReg res = newVRegI(env);
1876//ZZ HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
1877//ZZ UInt index, size;
1878//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
1879//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1880//ZZ vpanic("ARM target supports GetElem with constant "
1881//ZZ "second argument only\n");
1882//ZZ }
1883//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1884//ZZ switch (e->Iex.Binop.op) {
1885//ZZ case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1886//ZZ case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1887//ZZ case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1888//ZZ default: vassert(0);
1889//ZZ }
1890//ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1891//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
1892//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
1893//ZZ size, True));
1894//ZZ return res;
1895//ZZ }
1896
1897 /* All cases involving host-side helper calls. */
1898 void* fn = NULL;
1899 switch (e->Iex.Binop.op) {
1900//ZZ case Iop_Add16x2:
1901//ZZ fn = &h_generic_calc_Add16x2; break;
1902//ZZ case Iop_Sub16x2:
1903//ZZ fn = &h_generic_calc_Sub16x2; break;
1904//ZZ case Iop_HAdd16Ux2:
1905//ZZ fn = &h_generic_calc_HAdd16Ux2; break;
1906//ZZ case Iop_HAdd16Sx2:
1907//ZZ fn = &h_generic_calc_HAdd16Sx2; break;
1908//ZZ case Iop_HSub16Ux2:
1909//ZZ fn = &h_generic_calc_HSub16Ux2; break;
1910//ZZ case Iop_HSub16Sx2:
1911//ZZ fn = &h_generic_calc_HSub16Sx2; break;
1912//ZZ case Iop_QAdd16Sx2:
1913//ZZ fn = &h_generic_calc_QAdd16Sx2; break;
1914//ZZ case Iop_QAdd16Ux2:
1915//ZZ fn = &h_generic_calc_QAdd16Ux2; break;
1916//ZZ case Iop_QSub16Sx2:
1917//ZZ fn = &h_generic_calc_QSub16Sx2; break;
1918//ZZ case Iop_Add8x4:
1919//ZZ fn = &h_generic_calc_Add8x4; break;
1920//ZZ case Iop_Sub8x4:
1921//ZZ fn = &h_generic_calc_Sub8x4; break;
1922//ZZ case Iop_HAdd8Ux4:
1923//ZZ fn = &h_generic_calc_HAdd8Ux4; break;
1924//ZZ case Iop_HAdd8Sx4:
1925//ZZ fn = &h_generic_calc_HAdd8Sx4; break;
1926//ZZ case Iop_HSub8Ux4:
1927//ZZ fn = &h_generic_calc_HSub8Ux4; break;
1928//ZZ case Iop_HSub8Sx4:
1929//ZZ fn = &h_generic_calc_HSub8Sx4; break;
1930//ZZ case Iop_QAdd8Sx4:
1931//ZZ fn = &h_generic_calc_QAdd8Sx4; break;
1932//ZZ case Iop_QAdd8Ux4:
1933//ZZ fn = &h_generic_calc_QAdd8Ux4; break;
1934//ZZ case Iop_QSub8Sx4:
1935//ZZ fn = &h_generic_calc_QSub8Sx4; break;
1936//ZZ case Iop_QSub8Ux4:
1937//ZZ fn = &h_generic_calc_QSub8Ux4; break;
1938//ZZ case Iop_Sad8Ux4:
1939//ZZ fn = &h_generic_calc_Sad8Ux4; break;
1940//ZZ case Iop_QAdd32S:
1941//ZZ fn = &h_generic_calc_QAdd32S; break;
1942//ZZ case Iop_QSub32S:
1943//ZZ fn = &h_generic_calc_QSub32S; break;
1944//ZZ case Iop_QSub16Ux2:
1945//ZZ fn = &h_generic_calc_QSub16Ux2; break;
1946 case Iop_DivU32:
1947 fn = &h_calc_udiv32_w_arm_semantics; break;
1948 case Iop_DivS32:
1949 fn = &h_calc_sdiv32_w_arm_semantics; break;
1950 case Iop_DivU64:
1951 fn = &h_calc_udiv64_w_arm_semantics; break;
1952 case Iop_DivS64:
1953 fn = &h_calc_sdiv64_w_arm_semantics; break;
1954 default:
1955 break;
1956 }
1957
1958 if (fn) {
1959 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1960 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1961 HReg res = newVRegI(env);
1962 addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL));
1963 addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR));
1964 addInstr(env, ARM64Instr_Call( ARM64cc_AL, (HWord)Ptr_to_ULong(fn),
1965 2, mk_RetLoc_simple(RLPri_Int) ));
1966 addInstr(env, ARM64Instr_MovI(res, hregARM64_X0()));
1967 return res;
1968 }
1969
1970 break;
1971 }
1972
1973 /* --------- UNARY OP --------- */
1974 case Iex_Unop: {
1975
1976 switch (e->Iex.Unop.op) {
1977 case Iop_16Uto64: {
1978 /* This probably doesn't occur often enough to be worth
1979 rolling the extension into the load. */
1980 IRExpr* arg = e->Iex.Unop.arg;
1981 HReg src = iselIntExpr_R(env, arg);
1982 HReg dst = widen_z_16_to_64(env, src);
1983 return dst;
1984 }
1985 case Iop_32Uto64: {
1986 IRExpr* arg = e->Iex.Unop.arg;
1987 if (arg->tag == Iex_Load) {
1988 /* This correctly zero extends because _LdSt32 is
1989 defined to do a zero extending load. */
1990 HReg dst = newVRegI(env);
1991 ARM64AMode* am
1992 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32);
1993 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
1994 return dst;
1995 }
1996 /* else be lame and mask it */
1997 HReg src = iselIntExpr_R(env, arg);
1998 HReg dst = widen_z_32_to_64(env, src);
1999 return dst;
2000 }
2001 case Iop_8Uto32: /* Just freeload on the 8Uto64 case */
2002 case Iop_8Uto64: {
2003 IRExpr* arg = e->Iex.Unop.arg;
2004 if (arg->tag == Iex_Load) {
2005 /* This correctly zero extends because _LdSt8 is
2006 defined to do a zero extending load. */
2007 HReg dst = newVRegI(env);
2008 ARM64AMode* am
2009 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8);
2010 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
2011 return dst;
2012 }
2013 /* else be lame and mask it */
2014 HReg src = iselIntExpr_R(env, arg);
2015 HReg dst = widen_z_8_to_64(env, src);
2016 return dst;
2017 }
2018 case Iop_128HIto64: {
2019 HReg rHi, rLo;
2020 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2021 return rHi; /* and abandon rLo */
2022 }
2023 case Iop_8Sto32: case Iop_8Sto64: {
2024 IRExpr* arg = e->Iex.Unop.arg;
2025 HReg src = iselIntExpr_R(env, arg);
2026 HReg dst = widen_s_8_to_64(env, src);
2027 return dst;
2028 }
2029 case Iop_16Sto32: case Iop_16Sto64: {
2030 IRExpr* arg = e->Iex.Unop.arg;
2031 HReg src = iselIntExpr_R(env, arg);
2032 HReg dst = widen_s_16_to_64(env, src);
2033 return dst;
2034 }
2035 case Iop_32Sto64: {
2036 IRExpr* arg = e->Iex.Unop.arg;
2037 HReg src = iselIntExpr_R(env, arg);
2038 HReg dst = widen_s_32_to_64(env, src);
2039 return dst;
2040 }
2041 case Iop_Not32:
2042 case Iop_Not64: {
2043 HReg dst = newVRegI(env);
2044 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2045 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT));
2046 return dst;
2047 }
2048 case Iop_Clz64: {
2049 HReg dst = newVRegI(env);
2050 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2051 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ));
2052 return dst;
2053 }
2054 case Iop_Left32:
2055 case Iop_Left64: {
2056 /* Left64(src) = src | -src. Left32 can use the same
2057 implementation since in that case we don't care what
2058 the upper 32 bits become. */
2059 HReg dst = newVRegI(env);
2060 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2061 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2062 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2063 ARM64lo_OR));
2064 return dst;
2065 }
2066 case Iop_CmpwNEZ64: {
2067 /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1
2068 = Left64(src) >>s 63 */
2069 HReg dst = newVRegI(env);
2070 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2071 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2072 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2073 ARM64lo_OR));
2074 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2075 ARM64sh_SAR));
2076 return dst;
2077 }
2078 case Iop_CmpwNEZ32: {
2079 /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF)
2080 = Left64(src & 0xFFFFFFFF) >>s 63 */
2081 HReg dst = newVRegI(env);
2082 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
2083 HReg src = widen_z_32_to_64(env, pre);
2084 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2085 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2086 ARM64lo_OR));
2087 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2088 ARM64sh_SAR));
2089 return dst;
2090 }
2091 case Iop_V128to64: case Iop_V128HIto64: {
2092 HReg dst = newVRegI(env);
2093 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2094 UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0;
2095 addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo));
2096 return dst;
2097 }
2098 case Iop_1Sto32:
2099 case Iop_1Sto64: {
2100 /* As with the iselStmt case for 'tmp:I1 = expr', we could
2101 do a lot better here if it ever became necessary. */
2102 HReg zero = newVRegI(env);
2103 HReg one = newVRegI(env);
2104 HReg dst = newVRegI(env);
2105 addInstr(env, ARM64Instr_Imm64(zero, 0));
2106 addInstr(env, ARM64Instr_Imm64(one, 1));
2107 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
2108 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
2109 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2110 ARM64sh_SHL));
2111 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2112 ARM64sh_SAR));
2113 return dst;
2114 }
2115
2116//ZZ case Iop_64HIto32: {
2117//ZZ HReg rHi, rLo;
2118//ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2119//ZZ return rHi; /* and abandon rLo .. poor wee thing :-) */
2120//ZZ }
2121//ZZ case Iop_64to32: {
2122//ZZ HReg rHi, rLo;
2123//ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2124//ZZ return rLo; /* similar stupid comment to the above ... */
2125//ZZ }
2126//ZZ case Iop_64to8: {
2127//ZZ HReg rHi, rLo;
2128//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2129//ZZ HReg tHi = newVRegI(env);
2130//ZZ HReg tLo = newVRegI(env);
2131//ZZ HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
2132//ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2133//ZZ rHi = tHi;
2134//ZZ rLo = tLo;
2135//ZZ } else {
2136//ZZ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2137//ZZ }
2138//ZZ return rLo;
2139//ZZ }
2140//ZZ
2141//ZZ case Iop_1Uto32:
2142//ZZ /* 1Uto32(tmp). Since I1 values generated into registers
2143//ZZ are guaranteed to have value either only zero or one,
2144//ZZ we can simply return the value of the register in this
2145//ZZ case. */
2146//ZZ if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
2147//ZZ HReg dst = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
2148//ZZ return dst;
2149//ZZ }
2150//ZZ /* else fall through */
2151//ZZ case Iop_1Uto8: {
2152//ZZ HReg dst = newVRegI(env);
2153//ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2154//ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2155//ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2156//ZZ return dst;
2157//ZZ }
2158//ZZ
2159//ZZ case Iop_1Sto32: {
2160//ZZ HReg dst = newVRegI(env);
2161//ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2162//ZZ ARMRI5* amt = ARMRI5_I5(31);
2163//ZZ /* This is really rough. We could do much better here;
2164//ZZ perhaps mvn{cond} dst, #0 as the second insn?
2165//ZZ (same applies to 1Sto64) */
2166//ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2167//ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2168//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2169//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2170//ZZ return dst;
2171//ZZ }
2172//ZZ
2173//ZZ case Iop_Clz32: {
2174//ZZ /* Count leading zeroes; easy on ARM. */
2175//ZZ HReg dst = newVRegI(env);
2176//ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2177//ZZ addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
2178//ZZ return dst;
2179//ZZ }
2180//ZZ
2181//ZZ case Iop_CmpwNEZ32: {
2182//ZZ HReg dst = newVRegI(env);
2183//ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2184//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
2185//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
2186//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
2187//ZZ return dst;
2188//ZZ }
2189//ZZ
2190//ZZ case Iop_ReinterpF32asI32: {
2191//ZZ HReg dst = newVRegI(env);
2192//ZZ HReg src = iselFltExpr(env, e->Iex.Unop.arg);
2193//ZZ addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
2194//ZZ return dst;
2195//ZZ }
2196
2197 case Iop_64to32:
2198 case Iop_64to16:
2199 case Iop_64to8:
2200 /* These are no-ops. */
2201 return iselIntExpr_R(env, e->Iex.Unop.arg);
2202
2203 default:
2204 break;
2205 }
2206
2207//ZZ /* All Unop cases involving host-side helper calls. */
2208//ZZ void* fn = NULL;
2209//ZZ switch (e->Iex.Unop.op) {
2210//ZZ case Iop_CmpNEZ16x2:
2211//ZZ fn = &h_generic_calc_CmpNEZ16x2; break;
2212//ZZ case Iop_CmpNEZ8x4:
2213//ZZ fn = &h_generic_calc_CmpNEZ8x4; break;
2214//ZZ default:
2215//ZZ break;
2216//ZZ }
2217//ZZ
2218//ZZ if (fn) {
2219//ZZ HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2220//ZZ HReg res = newVRegI(env);
2221//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
2222//ZZ addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn),
2223//ZZ 1, RetLocInt ));
2224//ZZ addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
2225//ZZ return res;
2226//ZZ }
2227
2228 break;
2229 }
2230
2231 /* --------- GET --------- */
2232 case Iex_Get: {
2233 if (ty == Ity_I64
2234 && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < 8192-8) {
2235 HReg dst = newVRegI(env);
2236 ARM64AMode* am
2237 = mk_baseblock_64bit_access_amode(e->Iex.Get.offset);
2238 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am));
2239 return dst;
2240 }
2241 if (ty == Ity_I32
2242 && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < 4096-4) {
2243 HReg dst = newVRegI(env);
2244 ARM64AMode* am
2245 = mk_baseblock_32bit_access_amode(e->Iex.Get.offset);
2246 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
2247 return dst;
2248 }
2249 break;
2250 }
2251
2252 /* --------- CCALL --------- */
2253 case Iex_CCall: {
2254 HReg dst = newVRegI(env);
2255 vassert(ty == e->Iex.CCall.retty);
2256
2257 /* be very restrictive for now. Only 64-bit ints allowed for
2258 args, and 64 bits for return type. Don't forget to change
2259 the RetLoc if more types are allowed in future. */
2260 if (e->Iex.CCall.retty != Ity_I64)
2261 goto irreducible;
2262
2263 /* Marshal args, do the call, clear stack. */
2264 UInt addToSp = 0;
2265 RetLoc rloc = mk_RetLoc_INVALID();
2266 Bool ok = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2267 e->Iex.CCall.cee, e->Iex.CCall.retty,
2268 e->Iex.CCall.args );
2269 /* */
2270 if (ok) {
2271 vassert(is_sane_RetLoc(rloc));
2272 vassert(rloc.pri == RLPri_Int);
2273 vassert(addToSp == 0);
2274 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()));
2275 return dst;
2276 }
2277 /* else fall through; will hit the irreducible: label */
2278 }
2279
2280 /* --------- LITERAL --------- */
2281 /* 64-bit literals */
2282 case Iex_Const: {
2283 ULong u = 0;
2284 HReg dst = newVRegI(env);
2285 switch (e->Iex.Const.con->tag) {
2286 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
2287 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
2288 case Ico_U16: u = e->Iex.Const.con->Ico.U16; break;
2289 case Ico_U8: u = e->Iex.Const.con->Ico.U8; break;
2290 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)");
2291 }
2292 addInstr(env, ARM64Instr_Imm64(dst, u));
2293 return dst;
2294 }
2295
2296 /* --------- MULTIPLEX --------- */
2297 case Iex_ITE: {
2298 /* ITE(ccexpr, iftrue, iffalse) */
2299 if (ty == Ity_I64 || ty == Ity_I32) {
2300 ARM64CondCode cc;
2301 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
2302 HReg r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse);
2303 HReg dst = newVRegI(env);
2304 cc = iselCondCode(env, e->Iex.ITE.cond);
2305 addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc));
2306 return dst;
2307 }
2308 break;
2309 }
2310
2311 default:
2312 break;
2313 } /* switch (e->tag) */
2314
2315 /* We get here if no pattern matched. */
2316 irreducible:
2317 ppIRExpr(e);
2318 vpanic("iselIntExpr_R: cannot reduce tree");
2319}
2320
2321
2322/*---------------------------------------------------------*/
2323/*--- ISEL: Integer expressions (128 bit) ---*/
2324/*---------------------------------------------------------*/
2325
2326/* Compute a 128-bit value into a register pair, which is returned as
2327 the first two parameters. As with iselIntExpr_R, these may be
2328 either real or virtual regs; in any case they must not be changed
2329 by subsequent code emitted by the caller. */
2330
2331static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2332 ISelEnv* env, IRExpr* e )
2333{
2334 iselInt128Expr_wrk(rHi, rLo, env, e);
2335# if 0
2336 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2337# endif
2338 vassert(hregClass(*rHi) == HRcInt64);
2339 vassert(hregIsVirtual(*rHi));
2340 vassert(hregClass(*rLo) == HRcInt64);
2341 vassert(hregIsVirtual(*rLo));
2342}
2343
2344/* DO NOT CALL THIS DIRECTLY ! */
2345static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2346 ISelEnv* env, IRExpr* e )
2347{
2348 vassert(e);
2349 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2350
2351 /* --------- BINARY ops --------- */
2352 if (e->tag == Iex_Binop) {
2353 switch (e->Iex.Binop.op) {
2354 /* 64 x 64 -> 128 multiply */
2355 case Iop_MullU64:
2356 /*case Iop_MullS64:*/ {
2357 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
2358 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2359 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2360 HReg dstLo = newVRegI(env);
2361 HReg dstHi = newVRegI(env);
2362 addInstr(env, ARM64Instr_Mul(dstLo, argL, argR,
2363 ARM64mul_PLAIN));
2364 addInstr(env, ARM64Instr_Mul(dstHi, argL, argR,
2365 syned ? ARM64mul_SX : ARM64mul_ZX));
2366 *rHi = dstHi;
2367 *rLo = dstLo;
2368 return;
2369 }
2370 /* 64HLto128(e1,e2) */
2371 case Iop_64HLto128:
2372 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2373 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2374 return;
2375 default:
2376 break;
2377 }
2378 } /* if (e->tag == Iex_Binop) */
2379
2380 ppIRExpr(e);
2381 vpanic("iselInt128Expr(arm64)");
2382}
2383
2384
2385//ZZ /* -------------------- 64-bit -------------------- */
2386//ZZ
2387//ZZ /* Compute a 64-bit value into a register pair, which is returned as
2388//ZZ the first two parameters. As with iselIntExpr_R, these may be
2389//ZZ either real or virtual regs; in any case they must not be changed
2390//ZZ by subsequent code emitted by the caller. */
2391//ZZ
2392//ZZ static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
2393//ZZ {
2394//ZZ iselInt64Expr_wrk(rHi, rLo, env, e);
2395//ZZ # if 0
2396//ZZ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2397//ZZ # endif
2398//ZZ vassert(hregClass(*rHi) == HRcInt32);
2399//ZZ vassert(hregIsVirtual(*rHi));
2400//ZZ vassert(hregClass(*rLo) == HRcInt32);
2401//ZZ vassert(hregIsVirtual(*rLo));
2402//ZZ }
2403//ZZ
2404//ZZ /* DO NOT CALL THIS DIRECTLY ! */
2405//ZZ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
2406//ZZ {
2407//ZZ vassert(e);
2408//ZZ vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
2409//ZZ
2410//ZZ /* 64-bit literal */
2411//ZZ if (e->tag == Iex_Const) {
2412//ZZ ULong w64 = e->Iex.Const.con->Ico.U64;
2413//ZZ UInt wHi = toUInt(w64 >> 32);
2414//ZZ UInt wLo = toUInt(w64);
2415//ZZ HReg tHi = newVRegI(env);
2416//ZZ HReg tLo = newVRegI(env);
2417//ZZ vassert(e->Iex.Const.con->tag == Ico_U64);
2418//ZZ addInstr(env, ARMInstr_Imm32(tHi, wHi));
2419//ZZ addInstr(env, ARMInstr_Imm32(tLo, wLo));
2420//ZZ *rHi = tHi;
2421//ZZ *rLo = tLo;
2422//ZZ return;
2423//ZZ }
2424//ZZ
2425//ZZ /* read 64-bit IRTemp */
2426//ZZ if (e->tag == Iex_RdTmp) {
2427//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2428//ZZ HReg tHi = newVRegI(env);
2429//ZZ HReg tLo = newVRegI(env);
2430//ZZ HReg tmp = iselNeon64Expr(env, e);
2431//ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2432//ZZ *rHi = tHi;
2433//ZZ *rLo = tLo;
2434//ZZ } else {
2435//ZZ lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
2436//ZZ }
2437//ZZ return;
2438//ZZ }
2439//ZZ
2440//ZZ /* 64-bit load */
2441//ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2442//ZZ HReg tLo, tHi, rA;
2443//ZZ vassert(e->Iex.Load.ty == Ity_I64);
2444//ZZ rA = iselIntExpr_R(env, e->Iex.Load.addr);
2445//ZZ tHi = newVRegI(env);
2446//ZZ tLo = newVRegI(env);
2447//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
2448//ZZ tHi, ARMAMode1_RI(rA, 4)));
2449//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
2450//ZZ tLo, ARMAMode1_RI(rA, 0)));
2451//ZZ *rHi = tHi;
2452//ZZ *rLo = tLo;
2453//ZZ return;
2454//ZZ }
2455//ZZ
2456//ZZ /* 64-bit GET */
2457//ZZ if (e->tag == Iex_Get) {
2458//ZZ ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
2459//ZZ ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
2460//ZZ HReg tHi = newVRegI(env);
2461//ZZ HReg tLo = newVRegI(env);
2462//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tHi, am4));
2463//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tLo, am0));
2464//ZZ *rHi = tHi;
2465//ZZ *rLo = tLo;
2466//ZZ return;
2467//ZZ }
2468//ZZ
2469//ZZ /* --------- BINARY ops --------- */
2470//ZZ if (e->tag == Iex_Binop) {
2471//ZZ switch (e->Iex.Binop.op) {
2472//ZZ
2473//ZZ /* 32 x 32 -> 64 multiply */
2474//ZZ case Iop_MullS32:
2475//ZZ case Iop_MullU32: {
2476//ZZ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2477//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2478//ZZ HReg tHi = newVRegI(env);
2479//ZZ HReg tLo = newVRegI(env);
2480//ZZ ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32
2481//ZZ ? ARMmul_SX : ARMmul_ZX;
2482//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
2483//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
2484//ZZ addInstr(env, ARMInstr_Mul(mop));
2485//ZZ addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
2486//ZZ addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
2487//ZZ *rHi = tHi;
2488//ZZ *rLo = tLo;
2489//ZZ return;
2490//ZZ }
2491//ZZ
2492//ZZ case Iop_Or64: {
2493//ZZ HReg xLo, xHi, yLo, yHi;
2494//ZZ HReg tHi = newVRegI(env);
2495//ZZ HReg tLo = newVRegI(env);
2496//ZZ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2497//ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2498//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
2499//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
2500//ZZ *rHi = tHi;
2501//ZZ *rLo = tLo;
2502//ZZ return;
2503//ZZ }
2504//ZZ
2505//ZZ case Iop_Add64: {
2506//ZZ HReg xLo, xHi, yLo, yHi;
2507//ZZ HReg tHi = newVRegI(env);
2508//ZZ HReg tLo = newVRegI(env);
2509//ZZ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2510//ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2511//ZZ addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
2512//ZZ addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi)));
2513//ZZ *rHi = tHi;
2514//ZZ *rLo = tLo;
2515//ZZ return;
2516//ZZ }
2517//ZZ
2518//ZZ /* 32HLto64(e1,e2) */
2519//ZZ case Iop_32HLto64: {
2520//ZZ *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2521//ZZ *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2522//ZZ return;
2523//ZZ }
2524//ZZ
2525//ZZ default:
2526//ZZ break;
2527//ZZ }
2528//ZZ }
2529//ZZ
2530//ZZ /* --------- UNARY ops --------- */
2531//ZZ if (e->tag == Iex_Unop) {
2532//ZZ switch (e->Iex.Unop.op) {
2533//ZZ
2534//ZZ /* ReinterpF64asI64 */
2535//ZZ case Iop_ReinterpF64asI64: {
2536//ZZ HReg dstHi = newVRegI(env);
2537//ZZ HReg dstLo = newVRegI(env);
2538//ZZ HReg src = iselDblExpr(env, e->Iex.Unop.arg);
2539//ZZ addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
2540//ZZ *rHi = dstHi;
2541//ZZ *rLo = dstLo;
2542//ZZ return;
2543//ZZ }
2544//ZZ
2545//ZZ /* Left64(e) */
2546//ZZ case Iop_Left64: {
2547//ZZ HReg yLo, yHi;
2548//ZZ HReg tHi = newVRegI(env);
2549//ZZ HReg tLo = newVRegI(env);
2550//ZZ HReg zero = newVRegI(env);
2551//ZZ /* yHi:yLo = arg */
2552//ZZ iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2553//ZZ /* zero = 0 */
2554//ZZ addInstr(env, ARMInstr_Imm32(zero, 0));
2555//ZZ /* tLo = 0 - yLo, and set carry */
2556//ZZ addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
2557//ZZ tLo, zero, ARMRI84_R(yLo)));
2558//ZZ /* tHi = 0 - yHi - carry */
2559//ZZ addInstr(env, ARMInstr_Alu(ARMalu_SBC,
2560//ZZ tHi, zero, ARMRI84_R(yHi)));
2561//ZZ /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
2562//ZZ back in, so as to give the final result
2563//ZZ tHi:tLo = arg | -arg. */
2564//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
2565//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
2566//ZZ *rHi = tHi;
2567//ZZ *rLo = tLo;
2568//ZZ return;
2569//ZZ }
2570//ZZ
2571//ZZ /* CmpwNEZ64(e) */
2572//ZZ case Iop_CmpwNEZ64: {
2573//ZZ HReg srcLo, srcHi;
2574//ZZ HReg tmp1 = newVRegI(env);
2575//ZZ HReg tmp2 = newVRegI(env);
2576//ZZ /* srcHi:srcLo = arg */
2577//ZZ iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2578//ZZ /* tmp1 = srcHi | srcLo */
2579//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR,
2580//ZZ tmp1, srcHi, ARMRI84_R(srcLo)));
2581//ZZ /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2582//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
2583//ZZ addInstr(env, ARMInstr_Alu(ARMalu_OR,
2584//ZZ tmp2, tmp2, ARMRI84_R(tmp1)));
2585//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR,
2586//ZZ tmp2, tmp2, ARMRI5_I5(31)));
2587//ZZ *rHi = tmp2;
2588//ZZ *rLo = tmp2;
2589//ZZ return;
2590//ZZ }
2591//ZZ
2592//ZZ case Iop_1Sto64: {
2593//ZZ HReg dst = newVRegI(env);
2594//ZZ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2595//ZZ ARMRI5* amt = ARMRI5_I5(31);
2596//ZZ /* This is really rough. We could do much better here;
2597//ZZ perhaps mvn{cond} dst, #0 as the second insn?
2598//ZZ (same applies to 1Sto32) */
2599//ZZ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2600//ZZ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2601//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2602//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2603//ZZ *rHi = dst;
2604//ZZ *rLo = dst;
2605//ZZ return;
2606//ZZ }
2607//ZZ
2608//ZZ default:
2609//ZZ break;
2610//ZZ }
2611//ZZ } /* if (e->tag == Iex_Unop) */
2612//ZZ
2613//ZZ /* --------- MULTIPLEX --------- */
2614//ZZ if (e->tag == Iex_ITE) { // VFD
2615//ZZ IRType tyC;
2616//ZZ HReg r1hi, r1lo, r0hi, r0lo, dstHi, dstLo;
2617//ZZ ARMCondCode cc;
2618//ZZ tyC = typeOfIRExpr(env->type_env,e->Iex.ITE.cond);
2619//ZZ vassert(tyC == Ity_I1);
2620//ZZ iselInt64Expr(&r1hi, &r1lo, env, e->Iex.ITE.iftrue);
2621//ZZ iselInt64Expr(&r0hi, &r0lo, env, e->Iex.ITE.iffalse);
2622//ZZ dstHi = newVRegI(env);
2623//ZZ dstLo = newVRegI(env);
2624//ZZ addInstr(env, mk_iMOVds_RR(dstHi, r1hi));
2625//ZZ addInstr(env, mk_iMOVds_RR(dstLo, r1lo));
2626//ZZ cc = iselCondCode(env, e->Iex.ITE.cond);
2627//ZZ addInstr(env, ARMInstr_CMov(cc ^ 1, dstHi, ARMRI84_R(r0hi)));
2628//ZZ addInstr(env, ARMInstr_CMov(cc ^ 1, dstLo, ARMRI84_R(r0lo)));
2629//ZZ *rHi = dstHi;
2630//ZZ *rLo = dstLo;
2631//ZZ return;
2632//ZZ }
2633//ZZ
2634//ZZ /* It is convenient sometimes to call iselInt64Expr even when we
2635//ZZ have NEON support (e.g. in do_helper_call we need 64-bit
2636//ZZ arguments as 2 x 32 regs). */
2637//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2638//ZZ HReg tHi = newVRegI(env);
2639//ZZ HReg tLo = newVRegI(env);
2640//ZZ HReg tmp = iselNeon64Expr(env, e);
2641//ZZ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2642//ZZ *rHi = tHi;
2643//ZZ *rLo = tLo;
2644//ZZ return ;
2645//ZZ }
2646//ZZ
2647//ZZ ppIRExpr(e);
2648//ZZ vpanic("iselInt64Expr");
2649//ZZ }
2650//ZZ
2651//ZZ
2652//ZZ /*---------------------------------------------------------*/
2653//ZZ /*--- ISEL: Vector (NEON) expressions (64 bit) ---*/
2654//ZZ /*---------------------------------------------------------*/
2655//ZZ
2656//ZZ static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
2657//ZZ {
2658//ZZ HReg r = iselNeon64Expr_wrk( env, e );
2659//ZZ vassert(hregClass(r) == HRcFlt64);
2660//ZZ vassert(hregIsVirtual(r));
2661//ZZ return r;
2662//ZZ }
2663//ZZ
2664//ZZ /* DO NOT CALL THIS DIRECTLY */
2665//ZZ static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
2666//ZZ {
2667//ZZ IRType ty = typeOfIRExpr(env->type_env, e);
2668//ZZ MatchInfo mi;
2669//ZZ vassert(e);
2670//ZZ vassert(ty == Ity_I64);
2671//ZZ
2672//ZZ if (e->tag == Iex_RdTmp) {
2673//ZZ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2674//ZZ }
2675//ZZ
2676//ZZ if (e->tag == Iex_Const) {
2677//ZZ HReg rLo, rHi;
2678//ZZ HReg res = newVRegD(env);
2679//ZZ iselInt64Expr(&rHi, &rLo, env, e);
2680//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2681//ZZ return res;
2682//ZZ }
2683//ZZ
2684//ZZ /* 64-bit load */
2685//ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2686//ZZ HReg res = newVRegD(env);
2687//ZZ ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2688//ZZ vassert(ty == Ity_I64);
2689//ZZ addInstr(env, ARMInstr_NLdStD(True, res, am));
2690//ZZ return res;
2691//ZZ }
2692//ZZ
2693//ZZ /* 64-bit GET */
2694//ZZ if (e->tag == Iex_Get) {
2695//ZZ HReg addr = newVRegI(env);
2696//ZZ HReg res = newVRegD(env);
2697//ZZ vassert(ty == Ity_I64);
2698//ZZ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2699//ZZ addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2700//ZZ return res;
2701//ZZ }
2702//ZZ
2703//ZZ /* --------- BINARY ops --------- */
2704//ZZ if (e->tag == Iex_Binop) {
2705//ZZ switch (e->Iex.Binop.op) {
2706//ZZ
2707//ZZ /* 32 x 32 -> 64 multiply */
2708//ZZ case Iop_MullS32:
2709//ZZ case Iop_MullU32: {
2710//ZZ HReg rLo, rHi;
2711//ZZ HReg res = newVRegD(env);
2712//ZZ iselInt64Expr(&rHi, &rLo, env, e);
2713//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2714//ZZ return res;
2715//ZZ }
2716//ZZ
2717//ZZ case Iop_And64: {
2718//ZZ HReg res = newVRegD(env);
2719//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2720//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2721//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2722//ZZ res, argL, argR, 4, False));
2723//ZZ return res;
2724//ZZ }
2725//ZZ case Iop_Or64: {
2726//ZZ HReg res = newVRegD(env);
2727//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2728//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2729//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2730//ZZ res, argL, argR, 4, False));
2731//ZZ return res;
2732//ZZ }
2733//ZZ case Iop_Xor64: {
2734//ZZ HReg res = newVRegD(env);
2735//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2736//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2737//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2738//ZZ res, argL, argR, 4, False));
2739//ZZ return res;
2740//ZZ }
2741//ZZ
2742//ZZ /* 32HLto64(e1,e2) */
2743//ZZ case Iop_32HLto64: {
2744//ZZ HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2745//ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2746//ZZ HReg res = newVRegD(env);
2747//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2748//ZZ return res;
2749//ZZ }
2750//ZZ
2751//ZZ case Iop_Add8x8:
2752//ZZ case Iop_Add16x4:
2753//ZZ case Iop_Add32x2:
2754//ZZ case Iop_Add64: {
2755//ZZ HReg res = newVRegD(env);
2756//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2757//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2758//ZZ UInt size;
2759//ZZ switch (e->Iex.Binop.op) {
2760//ZZ case Iop_Add8x8: size = 0; break;
2761//ZZ case Iop_Add16x4: size = 1; break;
2762//ZZ case Iop_Add32x2: size = 2; break;
2763//ZZ case Iop_Add64: size = 3; break;
2764//ZZ default: vassert(0);
2765//ZZ }
2766//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2767//ZZ res, argL, argR, size, False));
2768//ZZ return res;
2769//ZZ }
2770//ZZ case Iop_Add32Fx2: {
2771//ZZ HReg res = newVRegD(env);
2772//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2773//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2774//ZZ UInt size = 0;
2775//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2776//ZZ res, argL, argR, size, False));
2777//ZZ return res;
2778//ZZ }
2779//ZZ case Iop_Recps32Fx2: {
2780//ZZ HReg res = newVRegD(env);
2781//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2782//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2783//ZZ UInt size = 0;
2784//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2785//ZZ res, argL, argR, size, False));
2786//ZZ return res;
2787//ZZ }
2788//ZZ case Iop_Rsqrts32Fx2: {
2789//ZZ HReg res = newVRegD(env);
2790//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2791//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2792//ZZ UInt size = 0;
2793//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2794//ZZ res, argL, argR, size, False));
2795//ZZ return res;
2796//ZZ }
2797//ZZ
2798//ZZ // These 6 verified 18 Apr 2013
2799//ZZ case Iop_InterleaveHI32x2:
2800//ZZ case Iop_InterleaveLO32x2:
2801//ZZ case Iop_InterleaveOddLanes8x8:
2802//ZZ case Iop_InterleaveEvenLanes8x8:
2803//ZZ case Iop_InterleaveOddLanes16x4:
2804//ZZ case Iop_InterleaveEvenLanes16x4: {
2805//ZZ HReg rD = newVRegD(env);
2806//ZZ HReg rM = newVRegD(env);
2807//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2808//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2809//ZZ UInt size;
2810//ZZ Bool resRd; // is the result in rD or rM ?
2811//ZZ switch (e->Iex.Binop.op) {
2812//ZZ case Iop_InterleaveOddLanes8x8: resRd = False; size = 0; break;
2813//ZZ case Iop_InterleaveEvenLanes8x8: resRd = True; size = 0; break;
2814//ZZ case Iop_InterleaveOddLanes16x4: resRd = False; size = 1; break;
2815//ZZ case Iop_InterleaveEvenLanes16x4: resRd = True; size = 1; break;
2816//ZZ case Iop_InterleaveHI32x2: resRd = False; size = 2; break;
2817//ZZ case Iop_InterleaveLO32x2: resRd = True; size = 2; break;
2818//ZZ default: vassert(0);
2819//ZZ }
2820//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2821//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2822//ZZ addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, False));
2823//ZZ return resRd ? rD : rM;
2824//ZZ }
2825//ZZ
2826//ZZ // These 4 verified 18 Apr 2013
2827//ZZ case Iop_InterleaveHI8x8:
2828//ZZ case Iop_InterleaveLO8x8:
2829//ZZ case Iop_InterleaveHI16x4:
2830//ZZ case Iop_InterleaveLO16x4: {
2831//ZZ HReg rD = newVRegD(env);
2832//ZZ HReg rM = newVRegD(env);
2833//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2834//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2835//ZZ UInt size;
2836//ZZ Bool resRd; // is the result in rD or rM ?
2837//ZZ switch (e->Iex.Binop.op) {
2838//ZZ case Iop_InterleaveHI8x8: resRd = False; size = 0; break;
2839//ZZ case Iop_InterleaveLO8x8: resRd = True; size = 0; break;
2840//ZZ case Iop_InterleaveHI16x4: resRd = False; size = 1; break;
2841//ZZ case Iop_InterleaveLO16x4: resRd = True; size = 1; break;
2842//ZZ default: vassert(0);
2843//ZZ }
2844//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2845//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2846//ZZ addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, False));
2847//ZZ return resRd ? rD : rM;
2848//ZZ }
2849//ZZ
2850//ZZ // These 4 verified 18 Apr 2013
2851//ZZ case Iop_CatOddLanes8x8:
2852//ZZ case Iop_CatEvenLanes8x8:
2853//ZZ case Iop_CatOddLanes16x4:
2854//ZZ case Iop_CatEvenLanes16x4: {
2855//ZZ HReg rD = newVRegD(env);
2856//ZZ HReg rM = newVRegD(env);
2857//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2858//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2859//ZZ UInt size;
2860//ZZ Bool resRd; // is the result in rD or rM ?
2861//ZZ switch (e->Iex.Binop.op) {
2862//ZZ case Iop_CatOddLanes8x8: resRd = False; size = 0; break;
2863//ZZ case Iop_CatEvenLanes8x8: resRd = True; size = 0; break;
2864//ZZ case Iop_CatOddLanes16x4: resRd = False; size = 1; break;
2865//ZZ case Iop_CatEvenLanes16x4: resRd = True; size = 1; break;
2866//ZZ default: vassert(0);
2867//ZZ }
2868//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2869//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2870//ZZ addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, False));
2871//ZZ return resRd ? rD : rM;
2872//ZZ }
2873//ZZ
2874//ZZ case Iop_QAdd8Ux8:
2875//ZZ case Iop_QAdd16Ux4:
2876//ZZ case Iop_QAdd32Ux2:
2877//ZZ case Iop_QAdd64Ux1: {
2878//ZZ HReg res = newVRegD(env);
2879//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2880//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2881//ZZ UInt size;
2882//ZZ switch (e->Iex.Binop.op) {
2883//ZZ case Iop_QAdd8Ux8: size = 0; break;
2884//ZZ case Iop_QAdd16Ux4: size = 1; break;
2885//ZZ case Iop_QAdd32Ux2: size = 2; break;
2886//ZZ case Iop_QAdd64Ux1: size = 3; break;
2887//ZZ default: vassert(0);
2888//ZZ }
2889//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2890//ZZ res, argL, argR, size, False));
2891//ZZ return res;
2892//ZZ }
2893//ZZ case Iop_QAdd8Sx8:
2894//ZZ case Iop_QAdd16Sx4:
2895//ZZ case Iop_QAdd32Sx2:
2896//ZZ case Iop_QAdd64Sx1: {
2897//ZZ HReg res = newVRegD(env);
2898//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2899//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2900//ZZ UInt size;
2901//ZZ switch (e->Iex.Binop.op) {
2902//ZZ case Iop_QAdd8Sx8: size = 0; break;
2903//ZZ case Iop_QAdd16Sx4: size = 1; break;
2904//ZZ case Iop_QAdd32Sx2: size = 2; break;
2905//ZZ case Iop_QAdd64Sx1: size = 3; break;
2906//ZZ default: vassert(0);
2907//ZZ }
2908//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2909//ZZ res, argL, argR, size, False));
2910//ZZ return res;
2911//ZZ }
2912//ZZ case Iop_Sub8x8:
2913//ZZ case Iop_Sub16x4:
2914//ZZ case Iop_Sub32x2:
2915//ZZ case Iop_Sub64: {
2916//ZZ HReg res = newVRegD(env);
2917//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2918//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2919//ZZ UInt size;
2920//ZZ switch (e->Iex.Binop.op) {
2921//ZZ case Iop_Sub8x8: size = 0; break;
2922//ZZ case Iop_Sub16x4: size = 1; break;
2923//ZZ case Iop_Sub32x2: size = 2; break;
2924//ZZ case Iop_Sub64: size = 3; break;
2925//ZZ default: vassert(0);
2926//ZZ }
2927//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2928//ZZ res, argL, argR, size, False));
2929//ZZ return res;
2930//ZZ }
2931//ZZ case Iop_Sub32Fx2: {
2932//ZZ HReg res = newVRegD(env);
2933//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2934//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2935//ZZ UInt size = 0;
2936//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2937//ZZ res, argL, argR, size, False));
2938//ZZ return res;
2939//ZZ }
2940//ZZ case Iop_QSub8Ux8:
2941//ZZ case Iop_QSub16Ux4:
2942//ZZ case Iop_QSub32Ux2:
2943//ZZ case Iop_QSub64Ux1: {
2944//ZZ HReg res = newVRegD(env);
2945//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2946//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2947//ZZ UInt size;
2948//ZZ switch (e->Iex.Binop.op) {
2949//ZZ case Iop_QSub8Ux8: size = 0; break;
2950//ZZ case Iop_QSub16Ux4: size = 1; break;
2951//ZZ case Iop_QSub32Ux2: size = 2; break;
2952//ZZ case Iop_QSub64Ux1: size = 3; break;
2953//ZZ default: vassert(0);
2954//ZZ }
2955//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
2956//ZZ res, argL, argR, size, False));
2957//ZZ return res;
2958//ZZ }
2959//ZZ case Iop_QSub8Sx8:
2960//ZZ case Iop_QSub16Sx4:
2961//ZZ case Iop_QSub32Sx2:
2962//ZZ case Iop_QSub64Sx1: {
2963//ZZ HReg res = newVRegD(env);
2964//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2965//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2966//ZZ UInt size;
2967//ZZ switch (e->Iex.Binop.op) {
2968//ZZ case Iop_QSub8Sx8: size = 0; break;
2969//ZZ case Iop_QSub16Sx4: size = 1; break;
2970//ZZ case Iop_QSub32Sx2: size = 2; break;
2971//ZZ case Iop_QSub64Sx1: size = 3; break;
2972//ZZ default: vassert(0);
2973//ZZ }
2974//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
2975//ZZ res, argL, argR, size, False));
2976//ZZ return res;
2977//ZZ }
2978//ZZ case Iop_Max8Ux8:
2979//ZZ case Iop_Max16Ux4:
2980//ZZ case Iop_Max32Ux2: {
2981//ZZ HReg res = newVRegD(env);
2982//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2983//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2984//ZZ UInt size;
2985//ZZ switch (e->Iex.Binop.op) {
2986//ZZ case Iop_Max8Ux8: size = 0; break;
2987//ZZ case Iop_Max16Ux4: size = 1; break;
2988//ZZ case Iop_Max32Ux2: size = 2; break;
2989//ZZ default: vassert(0);
2990//ZZ }
2991//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
2992//ZZ res, argL, argR, size, False));
2993//ZZ return res;
2994//ZZ }
2995//ZZ case Iop_Max8Sx8:
2996//ZZ case Iop_Max16Sx4:
2997//ZZ case Iop_Max32Sx2: {
2998//ZZ HReg res = newVRegD(env);
2999//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3000//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3001//ZZ UInt size;
3002//ZZ switch (e->Iex.Binop.op) {
3003//ZZ case Iop_Max8Sx8: size = 0; break;
3004//ZZ case Iop_Max16Sx4: size = 1; break;
3005//ZZ case Iop_Max32Sx2: size = 2; break;
3006//ZZ default: vassert(0);
3007//ZZ }
3008//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
3009//ZZ res, argL, argR, size, False));
3010//ZZ return res;
3011//ZZ }
3012//ZZ case Iop_Min8Ux8:
3013//ZZ case Iop_Min16Ux4:
3014//ZZ case Iop_Min32Ux2: {
3015//ZZ HReg res = newVRegD(env);
3016//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3017//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3018//ZZ UInt size;
3019//ZZ switch (e->Iex.Binop.op) {
3020//ZZ case Iop_Min8Ux8: size = 0; break;
3021//ZZ case Iop_Min16Ux4: size = 1; break;
3022//ZZ case Iop_Min32Ux2: size = 2; break;
3023//ZZ default: vassert(0);
3024//ZZ }
3025//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
3026//ZZ res, argL, argR, size, False));
3027//ZZ return res;
3028//ZZ }
3029//ZZ case Iop_Min8Sx8:
3030//ZZ case Iop_Min16Sx4:
3031//ZZ case Iop_Min32Sx2: {
3032//ZZ HReg res = newVRegD(env);
3033//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3034//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3035//ZZ UInt size;
3036//ZZ switch (e->Iex.Binop.op) {
3037//ZZ case Iop_Min8Sx8: size = 0; break;
3038//ZZ case Iop_Min16Sx4: size = 1; break;
3039//ZZ case Iop_Min32Sx2: size = 2; break;
3040//ZZ default: vassert(0);
3041//ZZ }
3042//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
3043//ZZ res, argL, argR, size, False));
3044//ZZ return res;
3045//ZZ }
3046//ZZ case Iop_Sar8x8:
3047//ZZ case Iop_Sar16x4:
3048//ZZ case Iop_Sar32x2: {
3049//ZZ HReg res = newVRegD(env);
3050//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3051//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3052//ZZ HReg argR2 = newVRegD(env);
3053//ZZ HReg zero = newVRegD(env);
3054//ZZ UInt size;
3055//ZZ switch (e->Iex.Binop.op) {
3056//ZZ case Iop_Sar8x8: size = 0; break;
3057//ZZ case Iop_Sar16x4: size = 1; break;
3058//ZZ case Iop_Sar32x2: size = 2; break;
3059//ZZ case Iop_Sar64: size = 3; break;
3060//ZZ default: vassert(0);
3061//ZZ }
3062//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
3063//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3064//ZZ argR2, zero, argR, size, False));
3065//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
3066//ZZ res, argL, argR2, size, False));
3067//ZZ return res;
3068//ZZ }
3069//ZZ case Iop_Sal8x8:
3070//ZZ case Iop_Sal16x4:
3071//ZZ case Iop_Sal32x2:
3072//ZZ case Iop_Sal64x1: {
3073//ZZ HReg res = newVRegD(env);
3074//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3075//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3076//ZZ UInt size;
3077//ZZ switch (e->Iex.Binop.op) {
3078//ZZ case Iop_Sal8x8: size = 0; break;
3079//ZZ case Iop_Sal16x4: size = 1; break;
3080//ZZ case Iop_Sal32x2: size = 2; break;
3081//ZZ case Iop_Sal64x1: size = 3; break;
3082//ZZ default: vassert(0);
3083//ZZ }
3084//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
3085//ZZ res, argL, argR, size, False));
3086//ZZ return res;
3087//ZZ }
3088//ZZ case Iop_Shr8x8:
3089//ZZ case Iop_Shr16x4:
3090//ZZ case Iop_Shr32x2: {
3091//ZZ HReg res = newVRegD(env);
3092//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3093//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3094//ZZ HReg argR2 = newVRegD(env);
3095//ZZ HReg zero = newVRegD(env);
3096//ZZ UInt size;
3097//ZZ switch (e->Iex.Binop.op) {
3098//ZZ case Iop_Shr8x8: size = 0; break;
3099//ZZ case Iop_Shr16x4: size = 1; break;
3100//ZZ case Iop_Shr32x2: size = 2; break;
3101//ZZ default: vassert(0);
3102//ZZ }
3103//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
3104//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3105//ZZ argR2, zero, argR, size, False));
3106//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3107//ZZ res, argL, argR2, size, False));
3108//ZZ return res;
3109//ZZ }
3110//ZZ case Iop_Shl8x8:
3111//ZZ case Iop_Shl16x4:
3112//ZZ case Iop_Shl32x2: {
3113//ZZ HReg res = newVRegD(env);
3114//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3115//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3116//ZZ UInt size;
3117//ZZ switch (e->Iex.Binop.op) {
3118//ZZ case Iop_Shl8x8: size = 0; break;
3119//ZZ case Iop_Shl16x4: size = 1; break;
3120//ZZ case Iop_Shl32x2: size = 2; break;
3121//ZZ default: vassert(0);
3122//ZZ }
3123//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3124//ZZ res, argL, argR, size, False));
3125//ZZ return res;
3126//ZZ }
3127//ZZ case Iop_QShl8x8:
3128//ZZ case Iop_QShl16x4:
3129//ZZ case Iop_QShl32x2:
3130//ZZ case Iop_QShl64x1: {
3131//ZZ HReg res = newVRegD(env);
3132//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3133//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3134//ZZ UInt size;
3135//ZZ switch (e->Iex.Binop.op) {
3136//ZZ case Iop_QShl8x8: size = 0; break;
3137//ZZ case Iop_QShl16x4: size = 1; break;
3138//ZZ case Iop_QShl32x2: size = 2; break;
3139//ZZ case Iop_QShl64x1: size = 3; break;
3140//ZZ default: vassert(0);
3141//ZZ }
3142//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
3143//ZZ res, argL, argR, size, False));
3144//ZZ return res;
3145//ZZ }
3146//ZZ case Iop_QSal8x8:
3147//ZZ case Iop_QSal16x4:
3148//ZZ case Iop_QSal32x2:
3149//ZZ case Iop_QSal64x1: {
3150//ZZ HReg res = newVRegD(env);
3151//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3152//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3153//ZZ UInt size;
3154//ZZ switch (e->Iex.Binop.op) {
3155//ZZ case Iop_QSal8x8: size = 0; break;
3156//ZZ case Iop_QSal16x4: size = 1; break;
3157//ZZ case Iop_QSal32x2: size = 2; break;
3158//ZZ case Iop_QSal64x1: size = 3; break;
3159//ZZ default: vassert(0);
3160//ZZ }
3161//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
3162//ZZ res, argL, argR, size, False));
3163//ZZ return res;
3164//ZZ }
3165//ZZ case Iop_QShlN8x8:
3166//ZZ case Iop_QShlN16x4:
3167//ZZ case Iop_QShlN32x2:
3168//ZZ case Iop_QShlN64x1: {
3169//ZZ HReg res = newVRegD(env);
3170//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3171//ZZ UInt size, imm;
3172//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
3173//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3174//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
3175//ZZ "second argument only\n");
3176//ZZ }
3177//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3178//ZZ switch (e->Iex.Binop.op) {
3179//ZZ case Iop_QShlN8x8: size = 8 | imm; break;
3180//ZZ case Iop_QShlN16x4: size = 16 | imm; break;
3181//ZZ case Iop_QShlN32x2: size = 32 | imm; break;
3182//ZZ case Iop_QShlN64x1: size = 64 | imm; break;
3183//ZZ default: vassert(0);
3184//ZZ }
3185//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
3186//ZZ res, argL, size, False));
3187//ZZ return res;
3188//ZZ }
3189//ZZ case Iop_QShlN8Sx8:
3190//ZZ case Iop_QShlN16Sx4:
3191//ZZ case Iop_QShlN32Sx2:
3192//ZZ case Iop_QShlN64Sx1: {
3193//ZZ HReg res = newVRegD(env);
3194//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3195//ZZ UInt size, imm;
3196//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
3197//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3198//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
3199//ZZ "second argument only\n");
3200//ZZ }
3201//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3202//ZZ switch (e->Iex.Binop.op) {
3203//ZZ case Iop_QShlN8Sx8: size = 8 | imm; break;
3204//ZZ case Iop_QShlN16Sx4: size = 16 | imm; break;
3205//ZZ case Iop_QShlN32Sx2: size = 32 | imm; break;
3206//ZZ case Iop_QShlN64Sx1: size = 64 | imm; break;
3207//ZZ default: vassert(0);
3208//ZZ }
3209//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
3210//ZZ res, argL, size, False));
3211//ZZ return res;
3212//ZZ }
3213//ZZ case Iop_QSalN8x8:
3214//ZZ case Iop_QSalN16x4:
3215//ZZ case Iop_QSalN32x2:
3216//ZZ case Iop_QSalN64x1: {
3217//ZZ HReg res = newVRegD(env);
3218//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3219//ZZ UInt size, imm;
3220//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
3221//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3222//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
3223//ZZ "second argument only\n");
3224//ZZ }
3225//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3226//ZZ switch (e->Iex.Binop.op) {
3227//ZZ case Iop_QSalN8x8: size = 8 | imm; break;
3228//ZZ case Iop_QSalN16x4: size = 16 | imm; break;
3229//ZZ case Iop_QSalN32x2: size = 32 | imm; break;
3230//ZZ case Iop_QSalN64x1: size = 64 | imm; break;
3231//ZZ default: vassert(0);
3232//ZZ }
3233//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
3234//ZZ res, argL, size, False));
3235//ZZ return res;
3236//ZZ }
3237//ZZ case Iop_ShrN8x8:
3238//ZZ case Iop_ShrN16x4:
3239//ZZ case Iop_ShrN32x2:
3240//ZZ case Iop_Shr64: {
3241//ZZ HReg res = newVRegD(env);
3242//ZZ HReg tmp = newVRegD(env);
3243//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3244//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3245//ZZ HReg argR2 = newVRegI(env);
3246//ZZ UInt size;
3247//ZZ switch (e->Iex.Binop.op) {
3248//ZZ case Iop_ShrN8x8: size = 0; break;
3249//ZZ case Iop_ShrN16x4: size = 1; break;
3250//ZZ case Iop_ShrN32x2: size = 2; break;
3251//ZZ case Iop_Shr64: size = 3; break;
3252//ZZ default: vassert(0);
3253//ZZ }
3254//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
3255//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
3256//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3257//ZZ res, argL, tmp, size, False));
3258//ZZ return res;
3259//ZZ }
3260//ZZ case Iop_ShlN8x8:
3261//ZZ case Iop_ShlN16x4:
3262//ZZ case Iop_ShlN32x2:
3263//ZZ case Iop_Shl64: {
3264//ZZ HReg res = newVRegD(env);
3265//ZZ HReg tmp = newVRegD(env);
3266//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3267//ZZ /* special-case Shl64(x, imm8) since the Neon front
3268//ZZ end produces a lot of those for V{LD,ST}{1,2,3,4}. */
3269//ZZ if (e->Iex.Binop.op == Iop_Shl64
3270//ZZ && e->Iex.Binop.arg2->tag == Iex_Const) {
3271//ZZ vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
3272//ZZ Int nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3273//ZZ if (nshift >= 1 && nshift <= 63) {
3274//ZZ addInstr(env, ARMInstr_NShl64(res, argL, nshift));
3275//ZZ return res;
3276//ZZ }
3277//ZZ /* else fall through to general case */
3278//ZZ }
3279//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3280//ZZ UInt size;
3281//ZZ switch (e->Iex.Binop.op) {
3282//ZZ case Iop_ShlN8x8: size = 0; break;
3283//ZZ case Iop_ShlN16x4: size = 1; break;
3284//ZZ case Iop_ShlN32x2: size = 2; break;
3285//ZZ case Iop_Shl64: size = 3; break;
3286//ZZ default: vassert(0);
3287//ZZ }
3288//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
3289//ZZ tmp, argR, 0, False));
3290//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3291//ZZ res, argL, tmp, size, False));
3292//ZZ return res;
3293//ZZ }
3294//ZZ case Iop_SarN8x8:
3295//ZZ case Iop_SarN16x4:
3296//ZZ case Iop_SarN32x2:
3297//ZZ case Iop_Sar64: {
3298//ZZ HReg res = newVRegD(env);
3299//ZZ HReg tmp = newVRegD(env);
3300//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3301//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3302//ZZ HReg argR2 = newVRegI(env);
3303//ZZ UInt size;
3304//ZZ switch (e->Iex.Binop.op) {
3305//ZZ case Iop_SarN8x8: size = 0; break;
3306//ZZ case Iop_SarN16x4: size = 1; break;
3307//ZZ case Iop_SarN32x2: size = 2; break;
3308//ZZ case Iop_Sar64: size = 3; break;
3309//ZZ default: vassert(0);
3310//ZZ }
3311//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
3312//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
3313//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
3314//ZZ res, argL, tmp, size, False));
3315//ZZ return res;
3316//ZZ }
3317//ZZ case Iop_CmpGT8Ux8:
3318//ZZ case Iop_CmpGT16Ux4:
3319//ZZ case Iop_CmpGT32Ux2: {
3320//ZZ HReg res = newVRegD(env);
3321//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3322//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3323//ZZ UInt size;
3324//ZZ switch (e->Iex.Binop.op) {
3325//ZZ case Iop_CmpGT8Ux8: size = 0; break;
3326//ZZ case Iop_CmpGT16Ux4: size = 1; break;
3327//ZZ case Iop_CmpGT32Ux2: size = 2; break;
3328//ZZ default: vassert(0);
3329//ZZ }
3330//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
3331//ZZ res, argL, argR, size, False));
3332//ZZ return res;
3333//ZZ }
3334//ZZ case Iop_CmpGT8Sx8:
3335//ZZ case Iop_CmpGT16Sx4:
3336//ZZ case Iop_CmpGT32Sx2: {
3337//ZZ HReg res = newVRegD(env);
3338//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3339//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3340//ZZ UInt size;
3341//ZZ switch (e->Iex.Binop.op) {
3342//ZZ case Iop_CmpGT8Sx8: size = 0; break;
3343//ZZ case Iop_CmpGT16Sx4: size = 1; break;
3344//ZZ case Iop_CmpGT32Sx2: size = 2; break;
3345//ZZ default: vassert(0);
3346//ZZ }
3347//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
3348//ZZ res, argL, argR, size, False));
3349//ZZ return res;
3350//ZZ }
3351//ZZ case Iop_CmpEQ8x8:
3352//ZZ case Iop_CmpEQ16x4:
3353//ZZ case Iop_CmpEQ32x2: {
3354//ZZ HReg res = newVRegD(env);
3355//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3356//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3357//ZZ UInt size;
3358//ZZ switch (e->Iex.Binop.op) {
3359//ZZ case Iop_CmpEQ8x8: size = 0; break;
3360//ZZ case Iop_CmpEQ16x4: size = 1; break;
3361//ZZ case Iop_CmpEQ32x2: size = 2; break;
3362//ZZ default: vassert(0);
3363//ZZ }
3364//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
3365//ZZ res, argL, argR, size, False));
3366//ZZ return res;
3367//ZZ }
3368//ZZ case Iop_Mul8x8:
3369//ZZ case Iop_Mul16x4:
3370//ZZ case Iop_Mul32x2: {
3371//ZZ HReg res = newVRegD(env);
3372//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3373//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3374//ZZ UInt size = 0;
3375//ZZ switch(e->Iex.Binop.op) {
3376//ZZ case Iop_Mul8x8: size = 0; break;
3377//ZZ case Iop_Mul16x4: size = 1; break;
3378//ZZ case Iop_Mul32x2: size = 2; break;
3379//ZZ default: vassert(0);
3380//ZZ }
3381//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
3382//ZZ res, argL, argR, size, False));
3383//ZZ return res;
3384//ZZ }
3385//ZZ case Iop_Mul32Fx2: {
3386//ZZ HReg res = newVRegD(env);
3387//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3388//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3389//ZZ UInt size = 0;
3390//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
3391//ZZ res, argL, argR, size, False));
3392//ZZ return res;
3393//ZZ }
3394//ZZ case Iop_QDMulHi16Sx4:
3395//ZZ case Iop_QDMulHi32Sx2: {
3396//ZZ HReg res = newVRegD(env);
3397//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3398//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3399//ZZ UInt size = 0;
3400//ZZ switch(e->Iex.Binop.op) {
3401//ZZ case Iop_QDMulHi16Sx4: size = 1; break;
3402//ZZ case Iop_QDMulHi32Sx2: size = 2; break;
3403//ZZ default: vassert(0);
3404//ZZ }
3405//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
3406//ZZ res, argL, argR, size, False));
3407//ZZ return res;
3408//ZZ }
3409//ZZ
3410//ZZ case Iop_QRDMulHi16Sx4:
3411//ZZ case Iop_QRDMulHi32Sx2: {
3412//ZZ HReg res = newVRegD(env);
3413//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3414//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3415//ZZ UInt size = 0;
3416//ZZ switch(e->Iex.Binop.op) {
3417//ZZ case Iop_QRDMulHi16Sx4: size = 1; break;
3418//ZZ case Iop_QRDMulHi32Sx2: size = 2; break;
3419//ZZ default: vassert(0);
3420//ZZ }
3421//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
3422//ZZ res, argL, argR, size, False));
3423//ZZ return res;
3424//ZZ }
3425//ZZ
3426//ZZ case Iop_PwAdd8x8:
3427//ZZ case Iop_PwAdd16x4:
3428//ZZ case Iop_PwAdd32x2: {
3429//ZZ HReg res = newVRegD(env);
3430//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3431//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3432//ZZ UInt size = 0;
3433//ZZ switch(e->Iex.Binop.op) {
3434//ZZ case Iop_PwAdd8x8: size = 0; break;
3435//ZZ case Iop_PwAdd16x4: size = 1; break;
3436//ZZ case Iop_PwAdd32x2: size = 2; break;
3437//ZZ default: vassert(0);
3438//ZZ }
3439//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
3440//ZZ res, argL, argR, size, False));
3441//ZZ return res;
3442//ZZ }
3443//ZZ case Iop_PwAdd32Fx2: {
3444//ZZ HReg res = newVRegD(env);
3445//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3446//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3447//ZZ UInt size = 0;
3448//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
3449//ZZ res, argL, argR, size, False));
3450//ZZ return res;
3451//ZZ }
3452//ZZ case Iop_PwMin8Ux8:
3453//ZZ case Iop_PwMin16Ux4:
3454//ZZ case Iop_PwMin32Ux2: {
3455//ZZ HReg res = newVRegD(env);
3456//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3457//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3458//ZZ UInt size = 0;
3459//ZZ switch(e->Iex.Binop.op) {
3460//ZZ case Iop_PwMin8Ux8: size = 0; break;
3461//ZZ case Iop_PwMin16Ux4: size = 1; break;
3462//ZZ case Iop_PwMin32Ux2: size = 2; break;
3463//ZZ default: vassert(0);
3464//ZZ }
3465//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
3466//ZZ res, argL, argR, size, False));
3467//ZZ return res;
3468//ZZ }
3469//ZZ case Iop_PwMin8Sx8:
3470//ZZ case Iop_PwMin16Sx4:
3471//ZZ case Iop_PwMin32Sx2: {
3472//ZZ HReg res = newVRegD(env);
3473//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3474//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3475//ZZ UInt size = 0;
3476//ZZ switch(e->Iex.Binop.op) {
3477//ZZ case Iop_PwMin8Sx8: size = 0; break;
3478//ZZ case Iop_PwMin16Sx4: size = 1; break;
3479//ZZ case Iop_PwMin32Sx2: size = 2; break;
3480//ZZ default: vassert(0);
3481//ZZ }
3482//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
3483//ZZ res, argL, argR, size, False));
3484//ZZ return res;
3485//ZZ }
3486//ZZ case Iop_PwMax8Ux8:
3487//ZZ case Iop_PwMax16Ux4:
3488//ZZ case Iop_PwMax32Ux2: {
3489//ZZ HReg res = newVRegD(env);
3490//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3491//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3492//ZZ UInt size = 0;
3493//ZZ switch(e->Iex.Binop.op) {
3494//ZZ case Iop_PwMax8Ux8: size = 0; break;
3495//ZZ case Iop_PwMax16Ux4: size = 1; break;
3496//ZZ case Iop_PwMax32Ux2: size = 2; break;
3497//ZZ default: vassert(0);
3498//ZZ }
3499//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
3500//ZZ res, argL, argR, size, False));
3501//ZZ return res;
3502//ZZ }
3503//ZZ case Iop_PwMax8Sx8:
3504//ZZ case Iop_PwMax16Sx4:
3505//ZZ case Iop_PwMax32Sx2: {
3506//ZZ HReg res = newVRegD(env);
3507//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3508//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3509//ZZ UInt size = 0;
3510//ZZ switch(e->Iex.Binop.op) {
3511//ZZ case Iop_PwMax8Sx8: size = 0; break;
3512//ZZ case Iop_PwMax16Sx4: size = 1; break;
3513//ZZ case Iop_PwMax32Sx2: size = 2; break;
3514//ZZ default: vassert(0);
3515//ZZ }
3516//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
3517//ZZ res, argL, argR, size, False));
3518//ZZ return res;
3519//ZZ }
3520//ZZ case Iop_Perm8x8: {
3521//ZZ HReg res = newVRegD(env);
3522//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3523//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3524//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
3525//ZZ res, argL, argR, 0, False));
3526//ZZ return res;
3527//ZZ }
3528//ZZ case Iop_PolynomialMul8x8: {
3529//ZZ HReg res = newVRegD(env);
3530//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3531//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3532//ZZ UInt size = 0;
3533//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
3534//ZZ res, argL, argR, size, False));
3535//ZZ return res;
3536//ZZ }
3537//ZZ case Iop_Max32Fx2: {
3538//ZZ HReg res = newVRegD(env);
3539//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3540//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3541//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
3542//ZZ res, argL, argR, 2, False));
3543//ZZ return res;
3544//ZZ }
3545//ZZ case Iop_Min32Fx2: {
3546//ZZ HReg res = newVRegD(env);
3547//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3548//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3549//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
3550//ZZ res, argL, argR, 2, False));
3551//ZZ return res;
3552//ZZ }
3553//ZZ case Iop_PwMax32Fx2: {
3554//ZZ HReg res = newVRegD(env);
3555//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3556//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3557//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
3558//ZZ res, argL, argR, 2, False));
3559//ZZ return res;
3560//ZZ }
3561//ZZ case Iop_PwMin32Fx2: {
3562//ZZ HReg res = newVRegD(env);
3563//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3564//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3565//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
3566//ZZ res, argL, argR, 2, False));
3567//ZZ return res;
3568//ZZ }
3569//ZZ case Iop_CmpGT32Fx2: {
3570//ZZ HReg res = newVRegD(env);
3571//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3572//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3573//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
3574//ZZ res, argL, argR, 2, False));
3575//ZZ return res;
3576//ZZ }
3577//ZZ case Iop_CmpGE32Fx2: {
3578//ZZ HReg res = newVRegD(env);
3579//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3580//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3581//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
3582//ZZ res, argL, argR, 2, False));
3583//ZZ return res;
3584//ZZ }
3585//ZZ case Iop_CmpEQ32Fx2: {
3586//ZZ HReg res = newVRegD(env);
3587//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3588//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3589//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3590//ZZ res, argL, argR, 2, False));
3591//ZZ return res;
3592//ZZ }
3593//ZZ case Iop_F32ToFixed32Ux2_RZ:
3594//ZZ case Iop_F32ToFixed32Sx2_RZ:
3595//ZZ case Iop_Fixed32UToF32x2_RN:
3596//ZZ case Iop_Fixed32SToF32x2_RN: {
3597//ZZ HReg res = newVRegD(env);
3598//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3599//ZZ ARMNeonUnOp op;
3600//ZZ UInt imm6;
3601//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
3602//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3603//ZZ vpanic("ARM supports FP <-> Fixed conversion with constant "
3604//ZZ "second argument less than 33 only\n");
3605//ZZ }
3606//ZZ imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3607//ZZ vassert(imm6 <= 32 && imm6 > 0);
3608//ZZ imm6 = 64 - imm6;
3609//ZZ switch(e->Iex.Binop.op) {
3610//ZZ case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3611//ZZ case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3612//ZZ case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3613//ZZ case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3614//ZZ default: vassert(0);
3615//ZZ }
3616//ZZ addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3617//ZZ return res;
3618//ZZ }
3619//ZZ /*
3620//ZZ FIXME: is this here or not?
3621//ZZ case Iop_VDup8x8:
3622//ZZ case Iop_VDup16x4:
3623//ZZ case Iop_VDup32x2: {
3624//ZZ HReg res = newVRegD(env);
3625//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3626//ZZ UInt index;
3627//ZZ UInt imm4;
3628//ZZ UInt size = 0;
3629//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
3630//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3631//ZZ vpanic("ARM supports Iop_VDup with constant "
3632//ZZ "second argument less than 16 only\n");
3633//ZZ }
3634//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3635//ZZ switch(e->Iex.Binop.op) {
3636//ZZ case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3637//ZZ case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3638//ZZ case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3639//ZZ default: vassert(0);
3640//ZZ }
3641//ZZ if (imm4 >= 16) {
3642//ZZ vpanic("ARM supports Iop_VDup with constant "
3643//ZZ "second argument less than 16 only\n");
3644//ZZ }
3645//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3646//ZZ res, argL, imm4, False));
3647//ZZ return res;
3648//ZZ }
3649//ZZ */
3650//ZZ default:
3651//ZZ break;
3652//ZZ }
3653//ZZ }
3654//ZZ
3655//ZZ /* --------- UNARY ops --------- */
3656//ZZ if (e->tag == Iex_Unop) {
3657//ZZ switch (e->Iex.Unop.op) {
3658//ZZ
3659//ZZ /* 32Uto64 */
3660//ZZ case Iop_32Uto64: {
3661//ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3662//ZZ HReg rHi = newVRegI(env);
3663//ZZ HReg res = newVRegD(env);
3664//ZZ addInstr(env, ARMInstr_Imm32(rHi, 0));
3665//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3666//ZZ return res;
3667//ZZ }
3668//ZZ
3669//ZZ /* 32Sto64 */
3670//ZZ case Iop_32Sto64: {
3671//ZZ HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3672//ZZ HReg rHi = newVRegI(env);
3673//ZZ addInstr(env, mk_iMOVds_RR(rHi, rLo));
3674//ZZ addInstr(env, ARMInstr_Shift(ARMsh_SAR, rHi, rHi, ARMRI5_I5(31)));
3675//ZZ HReg res = newVRegD(env);
3676//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3677//ZZ return res;
3678//ZZ }
3679//ZZ
3680//ZZ /* The next 3 are pass-throughs */
3681//ZZ /* ReinterpF64asI64 */
3682//ZZ case Iop_ReinterpF64asI64:
3683//ZZ /* Left64(e) */
3684//ZZ case Iop_Left64:
3685//ZZ /* CmpwNEZ64(e) */
3686//ZZ case Iop_1Sto64: {
3687//ZZ HReg rLo, rHi;
3688//ZZ HReg res = newVRegD(env);
3689//ZZ iselInt64Expr(&rHi, &rLo, env, e);
3690//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3691//ZZ return res;
3692//ZZ }
3693//ZZ
3694//ZZ case Iop_Not64: {
3695//ZZ DECLARE_PATTERN(p_veqz_8x8);
3696//ZZ DECLARE_PATTERN(p_veqz_16x4);
3697//ZZ DECLARE_PATTERN(p_veqz_32x2);
3698//ZZ DECLARE_PATTERN(p_vcge_8sx8);
3699//ZZ DECLARE_PATTERN(p_vcge_16sx4);
3700//ZZ DECLARE_PATTERN(p_vcge_32sx2);
3701//ZZ DECLARE_PATTERN(p_vcge_8ux8);
3702//ZZ DECLARE_PATTERN(p_vcge_16ux4);
3703//ZZ DECLARE_PATTERN(p_vcge_32ux2);
3704//ZZ DEFINE_PATTERN(p_veqz_8x8,
3705//ZZ unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3706//ZZ DEFINE_PATTERN(p_veqz_16x4,
3707//ZZ unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3708//ZZ DEFINE_PATTERN(p_veqz_32x2,
3709//ZZ unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3710//ZZ DEFINE_PATTERN(p_vcge_8sx8,
3711//ZZ unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3712//ZZ DEFINE_PATTERN(p_vcge_16sx4,
3713//ZZ unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3714//ZZ DEFINE_PATTERN(p_vcge_32sx2,
3715//ZZ unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3716//ZZ DEFINE_PATTERN(p_vcge_8ux8,
3717//ZZ unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3718//ZZ DEFINE_PATTERN(p_vcge_16ux4,
3719//ZZ unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3720//ZZ DEFINE_PATTERN(p_vcge_32ux2,
3721//ZZ unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3722//ZZ if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3723//ZZ HReg res = newVRegD(env);
3724//ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3725//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3726//ZZ return res;
3727//ZZ } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3728//ZZ HReg res = newVRegD(env);
3729//ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3730//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3731//ZZ return res;
3732//ZZ } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3733//ZZ HReg res = newVRegD(env);
3734//ZZ HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3735//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3736//ZZ return res;
3737//ZZ } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3738//ZZ HReg res = newVRegD(env);
3739//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3740//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3741//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3742//ZZ res, argL, argR, 0, False));
3743//ZZ return res;
3744//ZZ } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3745//ZZ HReg res = newVRegD(env);
3746//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3747//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3748//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3749//ZZ res, argL, argR, 1, False));
3750//ZZ return res;
3751//ZZ } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3752//ZZ HReg res = newVRegD(env);
3753//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3754//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3755//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3756//ZZ res, argL, argR, 2, False));
3757//ZZ return res;
3758//ZZ } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3759//ZZ HReg res = newVRegD(env);
3760//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3761//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3762//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3763//ZZ res, argL, argR, 0, False));
3764//ZZ return res;
3765//ZZ } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3766//ZZ HReg res = newVRegD(env);
3767//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3768//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3769//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3770//ZZ res, argL, argR, 1, False));
3771//ZZ return res;
3772//ZZ } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3773//ZZ HReg res = newVRegD(env);
3774//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3775//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3776//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3777//ZZ res, argL, argR, 2, False));
3778//ZZ return res;
3779//ZZ } else {
3780//ZZ HReg res = newVRegD(env);
3781//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3782//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3783//ZZ return res;
3784//ZZ }
3785//ZZ }
3786//ZZ case Iop_Dup8x8:
3787//ZZ case Iop_Dup16x4:
3788//ZZ case Iop_Dup32x2: {
3789//ZZ HReg res, arg;
3790//ZZ UInt size;
3791//ZZ DECLARE_PATTERN(p_vdup_8x8);
3792//ZZ DECLARE_PATTERN(p_vdup_16x4);
3793//ZZ DECLARE_PATTERN(p_vdup_32x2);
3794//ZZ DEFINE_PATTERN(p_vdup_8x8,
3795//ZZ unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3796//ZZ DEFINE_PATTERN(p_vdup_16x4,
3797//ZZ unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3798//ZZ DEFINE_PATTERN(p_vdup_32x2,
3799//ZZ unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3800//ZZ if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3801//ZZ UInt index;
3802//ZZ UInt imm4;
3803//ZZ if (mi.bindee[1]->tag == Iex_Const &&
3804//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3805//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3806//ZZ imm4 = (index << 1) + 1;
3807//ZZ if (index < 8) {
3808//ZZ res = newVRegD(env);
3809//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
3810//ZZ addInstr(env, ARMInstr_NUnaryS(
3811//ZZ ARMneon_VDUP,
3812//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
3813//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
3814//ZZ imm4, False
3815//ZZ ));
3816//ZZ return res;
3817//ZZ }
3818//ZZ }
3819//ZZ } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3820//ZZ UInt index;
3821//ZZ UInt imm4;
3822//ZZ if (mi.bindee[1]->tag == Iex_Const &&
3823//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3824//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3825//ZZ imm4 = (index << 2) + 2;
3826//ZZ if (index < 4) {
3827//ZZ res = newVRegD(env);
3828//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
3829//ZZ addInstr(env, ARMInstr_NUnaryS(
3830//ZZ ARMneon_VDUP,
3831//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
3832//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
3833//ZZ imm4, False
3834//ZZ ));
3835//ZZ return res;
3836//ZZ }
3837//ZZ }
3838//ZZ } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3839//ZZ UInt index;
3840//ZZ UInt imm4;
3841//ZZ if (mi.bindee[1]->tag == Iex_Const &&
3842//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3843//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3844//ZZ imm4 = (index << 3) + 4;
3845//ZZ if (index < 2) {
3846//ZZ res = newVRegD(env);
3847//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
3848//ZZ addInstr(env, ARMInstr_NUnaryS(
3849//ZZ ARMneon_VDUP,
3850//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
3851//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
3852//ZZ imm4, False
3853//ZZ ));
3854//ZZ return res;
3855//ZZ }
3856//ZZ }
3857//ZZ }
3858//ZZ arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3859//ZZ res = newVRegD(env);
3860//ZZ switch (e->Iex.Unop.op) {
3861//ZZ case Iop_Dup8x8: size = 0; break;
3862//ZZ case Iop_Dup16x4: size = 1; break;
3863//ZZ case Iop_Dup32x2: size = 2; break;
3864//ZZ default: vassert(0);
3865//ZZ }
3866//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3867//ZZ return res;
3868//ZZ }
3869//ZZ case Iop_Abs8x8:
3870//ZZ case Iop_Abs16x4:
3871//ZZ case Iop_Abs32x2: {
3872//ZZ HReg res = newVRegD(env);
3873//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3874//ZZ UInt size = 0;
3875//ZZ switch(e->Iex.Binop.op) {
3876//ZZ case Iop_Abs8x8: size = 0; break;
3877//ZZ case Iop_Abs16x4: size = 1; break;
3878//ZZ case Iop_Abs32x2: size = 2; break;
3879//ZZ default: vassert(0);
3880//ZZ }
3881//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3882//ZZ return res;
3883//ZZ }
3884//ZZ case Iop_Reverse64_8x8:
3885//ZZ case Iop_Reverse64_16x4:
3886//ZZ case Iop_Reverse64_32x2: {
3887//ZZ HReg res = newVRegD(env);
3888//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3889//ZZ UInt size = 0;
3890//ZZ switch(e->Iex.Binop.op) {
3891//ZZ case Iop_Reverse64_8x8: size = 0; break;
3892//ZZ case Iop_Reverse64_16x4: size = 1; break;
3893//ZZ case Iop_Reverse64_32x2: size = 2; break;
3894//ZZ default: vassert(0);
3895//ZZ }
3896//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3897//ZZ res, arg, size, False));
3898//ZZ return res;
3899//ZZ }
3900//ZZ case Iop_Reverse32_8x8:
3901//ZZ case Iop_Reverse32_16x4: {
3902//ZZ HReg res = newVRegD(env);
3903//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3904//ZZ UInt size = 0;
3905//ZZ switch(e->Iex.Binop.op) {
3906//ZZ case Iop_Reverse32_8x8: size = 0; break;
3907//ZZ case Iop_Reverse32_16x4: size = 1; break;
3908//ZZ default: vassert(0);
3909//ZZ }
3910//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3911//ZZ res, arg, size, False));
3912//ZZ return res;
3913//ZZ }
3914//ZZ case Iop_Reverse16_8x8: {
3915//ZZ HReg res = newVRegD(env);
3916//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3917//ZZ UInt size = 0;
3918//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3919//ZZ res, arg, size, False));
3920//ZZ return res;
3921//ZZ }
3922//ZZ case Iop_CmpwNEZ64: {
3923//ZZ HReg x_lsh = newVRegD(env);
3924//ZZ HReg x_rsh = newVRegD(env);
3925//ZZ HReg lsh_amt = newVRegD(env);
3926//ZZ HReg rsh_amt = newVRegD(env);
3927//ZZ HReg zero = newVRegD(env);
3928//ZZ HReg tmp = newVRegD(env);
3929//ZZ HReg tmp2 = newVRegD(env);
3930//ZZ HReg res = newVRegD(env);
3931//ZZ HReg x = newVRegD(env);
3932//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3933//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3934//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3935//ZZ addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3936//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3937//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3938//ZZ rsh_amt, zero, lsh_amt, 2, False));
3939//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3940//ZZ x_lsh, x, lsh_amt, 3, False));
3941//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3942//ZZ x_rsh, x, rsh_amt, 3, False));
3943//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3944//ZZ tmp, x_lsh, x_rsh, 0, False));
3945//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3946//ZZ res, tmp, x, 0, False));
3947//ZZ return res;
3948//ZZ }
3949//ZZ case Iop_CmpNEZ8x8:
3950//ZZ case Iop_CmpNEZ16x4:
3951//ZZ case Iop_CmpNEZ32x2: {
3952//ZZ HReg res = newVRegD(env);
3953//ZZ HReg tmp = newVRegD(env);
3954//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3955//ZZ UInt size;
3956//ZZ switch (e->Iex.Unop.op) {
3957//ZZ case Iop_CmpNEZ8x8: size = 0; break;
3958//ZZ case Iop_CmpNEZ16x4: size = 1; break;
3959//ZZ case Iop_CmpNEZ32x2: size = 2; break;
3960//ZZ default: vassert(0);
3961//ZZ }
3962//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
3963//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
3964//ZZ return res;
3965//ZZ }
3966//ZZ case Iop_NarrowUn16to8x8:
3967//ZZ case Iop_NarrowUn32to16x4:
3968//ZZ case Iop_NarrowUn64to32x2: {
3969//ZZ HReg res = newVRegD(env);
3970//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3971//ZZ UInt size = 0;
3972//ZZ switch(e->Iex.Binop.op) {
3973//ZZ case Iop_NarrowUn16to8x8: size = 0; break;
3974//ZZ case Iop_NarrowUn32to16x4: size = 1; break;
3975//ZZ case Iop_NarrowUn64to32x2: size = 2; break;
3976//ZZ default: vassert(0);
3977//ZZ }
3978//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
3979//ZZ res, arg, size, False));
3980//ZZ return res;
3981//ZZ }
3982//ZZ case Iop_QNarrowUn16Sto8Sx8:
3983//ZZ case Iop_QNarrowUn32Sto16Sx4:
3984//ZZ case Iop_QNarrowUn64Sto32Sx2: {
3985//ZZ HReg res = newVRegD(env);
3986//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3987//ZZ UInt size = 0;
3988//ZZ switch(e->Iex.Binop.op) {
3989//ZZ case Iop_QNarrowUn16Sto8Sx8: size = 0; break;
3990//ZZ case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
3991//ZZ case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
3992//ZZ default: vassert(0);
3993//ZZ }
3994//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
3995//ZZ res, arg, size, False));
3996//ZZ return res;
3997//ZZ }
3998//ZZ case Iop_QNarrowUn16Sto8Ux8:
3999//ZZ case Iop_QNarrowUn32Sto16Ux4:
4000//ZZ case Iop_QNarrowUn64Sto32Ux2: {
4001//ZZ HReg res = newVRegD(env);
4002//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4003//ZZ UInt size = 0;
4004//ZZ switch(e->Iex.Binop.op) {
4005//ZZ case Iop_QNarrowUn16Sto8Ux8: size = 0; break;
4006//ZZ case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
4007//ZZ case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
4008//ZZ default: vassert(0);
4009//ZZ }
4010//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
4011//ZZ res, arg, size, False));
4012//ZZ return res;
4013//ZZ }
4014//ZZ case Iop_QNarrowUn16Uto8Ux8:
4015//ZZ case Iop_QNarrowUn32Uto16Ux4:
4016//ZZ case Iop_QNarrowUn64Uto32Ux2: {
4017//ZZ HReg res = newVRegD(env);
4018//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4019//ZZ UInt size = 0;
4020//ZZ switch(e->Iex.Binop.op) {
4021//ZZ case Iop_QNarrowUn16Uto8Ux8: size = 0; break;
4022//ZZ case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
4023//ZZ case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
4024//ZZ default: vassert(0);
4025//ZZ }
4026//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
4027//ZZ res, arg, size, False));
4028//ZZ return res;
4029//ZZ }
4030//ZZ case Iop_PwAddL8Sx8:
4031//ZZ case Iop_PwAddL16Sx4:
4032//ZZ case Iop_PwAddL32Sx2: {
4033//ZZ HReg res = newVRegD(env);
4034//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4035//ZZ UInt size = 0;
4036//ZZ switch(e->Iex.Binop.op) {
4037//ZZ case Iop_PwAddL8Sx8: size = 0; break;
4038//ZZ case Iop_PwAddL16Sx4: size = 1; break;
4039//ZZ case Iop_PwAddL32Sx2: size = 2; break;
4040//ZZ default: vassert(0);
4041//ZZ }
4042//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4043//ZZ res, arg, size, False));
4044//ZZ return res;
4045//ZZ }
4046//ZZ case Iop_PwAddL8Ux8:
4047//ZZ case Iop_PwAddL16Ux4:
4048//ZZ case Iop_PwAddL32Ux2: {
4049//ZZ HReg res = newVRegD(env);
4050//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4051//ZZ UInt size = 0;
4052//ZZ switch(e->Iex.Binop.op) {
4053//ZZ case Iop_PwAddL8Ux8: size = 0; break;
4054//ZZ case Iop_PwAddL16Ux4: size = 1; break;
4055//ZZ case Iop_PwAddL32Ux2: size = 2; break;
4056//ZZ default: vassert(0);
4057//ZZ }
4058//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4059//ZZ res, arg, size, False));
4060//ZZ return res;
4061//ZZ }
4062//ZZ case Iop_Cnt8x8: {
4063//ZZ HReg res = newVRegD(env);
4064//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4065//ZZ UInt size = 0;
4066//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
4067//ZZ res, arg, size, False));
4068//ZZ return res;
4069//ZZ }
4070//ZZ case Iop_Clz8Sx8:
4071//ZZ case Iop_Clz16Sx4:
4072//ZZ case Iop_Clz32Sx2: {
4073//ZZ HReg res = newVRegD(env);
4074//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4075//ZZ UInt size = 0;
4076//ZZ switch(e->Iex.Binop.op) {
4077//ZZ case Iop_Clz8Sx8: size = 0; break;
4078//ZZ case Iop_Clz16Sx4: size = 1; break;
4079//ZZ case Iop_Clz32Sx2: size = 2; break;
4080//ZZ default: vassert(0);
4081//ZZ }
4082//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
4083//ZZ res, arg, size, False));
4084//ZZ return res;
4085//ZZ }
4086//ZZ case Iop_Cls8Sx8:
4087//ZZ case Iop_Cls16Sx4:
4088//ZZ case Iop_Cls32Sx2: {
4089//ZZ HReg res = newVRegD(env);
4090//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4091//ZZ UInt size = 0;
4092//ZZ switch(e->Iex.Binop.op) {
4093//ZZ case Iop_Cls8Sx8: size = 0; break;
4094//ZZ case Iop_Cls16Sx4: size = 1; break;
4095//ZZ case Iop_Cls32Sx2: size = 2; break;
4096//ZZ default: vassert(0);
4097//ZZ }
4098//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
4099//ZZ res, arg, size, False));
4100//ZZ return res;
4101//ZZ }
4102//ZZ case Iop_FtoI32Sx2_RZ: {
4103//ZZ HReg res = newVRegD(env);
4104//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4105//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4106//ZZ res, arg, 2, False));
4107//ZZ return res;
4108//ZZ }
4109//ZZ case Iop_FtoI32Ux2_RZ: {
4110//ZZ HReg res = newVRegD(env);
4111//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4112//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4113//ZZ res, arg, 2, False));
4114//ZZ return res;
4115//ZZ }
4116//ZZ case Iop_I32StoFx2: {
4117//ZZ HReg res = newVRegD(env);
4118//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4119//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4120//ZZ res, arg, 2, False));
4121//ZZ return res;
4122//ZZ }
4123//ZZ case Iop_I32UtoFx2: {
4124//ZZ HReg res = newVRegD(env);
4125//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4126//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4127//ZZ res, arg, 2, False));
4128//ZZ return res;
4129//ZZ }
4130//ZZ case Iop_F32toF16x4: {
4131//ZZ HReg res = newVRegD(env);
4132//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4133//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
4134//ZZ res, arg, 2, False));
4135//ZZ return res;
4136//ZZ }
4137//ZZ case Iop_Recip32Fx2: {
4138//ZZ HReg res = newVRegD(env);
4139//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
4140//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4141//ZZ res, argL, 0, False));
4142//ZZ return res;
4143//ZZ }
4144//ZZ case Iop_Recip32x2: {
4145//ZZ HReg res = newVRegD(env);
4146//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
4147//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4148//ZZ res, argL, 0, False));
4149//ZZ return res;
4150//ZZ }
4151//ZZ case Iop_Abs32Fx2: {
4152//ZZ DECLARE_PATTERN(p_vabd_32fx2);
4153//ZZ DEFINE_PATTERN(p_vabd_32fx2,
4154//ZZ unop(Iop_Abs32Fx2,
4155//ZZ binop(Iop_Sub32Fx2,
4156//ZZ bind(0),
4157//ZZ bind(1))));
4158//ZZ if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
4159//ZZ HReg res = newVRegD(env);
4160//ZZ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
4161//ZZ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
4162//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
4163//ZZ res, argL, argR, 0, False));
4164//ZZ return res;
4165//ZZ } else {
4166//ZZ HReg res = newVRegD(env);
4167//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4168//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4169//ZZ res, arg, 0, False));
4170//ZZ return res;
4171//ZZ }
4172//ZZ }
4173//ZZ case Iop_Rsqrte32Fx2: {
4174//ZZ HReg res = newVRegD(env);
4175//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4176//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4177//ZZ res, arg, 0, False));
4178//ZZ return res;
4179//ZZ }
4180//ZZ case Iop_Rsqrte32x2: {
4181//ZZ HReg res = newVRegD(env);
4182//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4183//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4184//ZZ res, arg, 0, False));
4185//ZZ return res;
4186//ZZ }
4187//ZZ case Iop_Neg32Fx2: {
4188//ZZ HReg res = newVRegD(env);
4189//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4190//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4191//ZZ res, arg, 0, False));
4192//ZZ return res;
4193//ZZ }
4194//ZZ default:
4195//ZZ break;
4196//ZZ }
4197//ZZ } /* if (e->tag == Iex_Unop) */
4198//ZZ
4199//ZZ if (e->tag == Iex_Triop) {
4200//ZZ IRTriop *triop = e->Iex.Triop.details;
4201//ZZ
4202//ZZ switch (triop->op) {
4203//ZZ case Iop_Extract64: {
4204//ZZ HReg res = newVRegD(env);
4205//ZZ HReg argL = iselNeon64Expr(env, triop->arg1);
4206//ZZ HReg argR = iselNeon64Expr(env, triop->arg2);
4207//ZZ UInt imm4;
4208//ZZ if (triop->arg3->tag != Iex_Const ||
4209//ZZ typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
4210//ZZ vpanic("ARM target supports Iop_Extract64 with constant "
4211//ZZ "third argument less than 16 only\n");
4212//ZZ }
4213//ZZ imm4 = triop->arg3->Iex.Const.con->Ico.U8;
4214//ZZ if (imm4 >= 8) {
4215//ZZ vpanic("ARM target supports Iop_Extract64 with constant "
4216//ZZ "third argument less than 16 only\n");
4217//ZZ }
4218//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
4219//ZZ res, argL, argR, imm4, False));
4220//ZZ return res;
4221//ZZ }
4222//ZZ case Iop_SetElem8x8:
4223//ZZ case Iop_SetElem16x4:
4224//ZZ case Iop_SetElem32x2: {
4225//ZZ HReg res = newVRegD(env);
4226//ZZ HReg dreg = iselNeon64Expr(env, triop->arg1);
4227//ZZ HReg arg = iselIntExpr_R(env, triop->arg3);
4228//ZZ UInt index, size;
4229//ZZ if (triop->arg2->tag != Iex_Const ||
4230//ZZ typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) {
4231//ZZ vpanic("ARM target supports SetElem with constant "
4232//ZZ "second argument only\n");
4233//ZZ }
4234//ZZ index = triop->arg2->Iex.Const.con->Ico.U8;
4235//ZZ switch (triop->op) {
4236//ZZ case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
4237//ZZ case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
4238//ZZ case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
4239//ZZ default: vassert(0);
4240//ZZ }
4241//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
4242//ZZ addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
4243//ZZ mkARMNRS(ARMNRS_Scalar, res, index),
4244//ZZ mkARMNRS(ARMNRS_Reg, arg, 0),
4245//ZZ size, False));
4246//ZZ return res;
4247//ZZ }
4248//ZZ default:
4249//ZZ break;
4250//ZZ }
4251//ZZ }
4252//ZZ
4253//ZZ /* --------- MULTIPLEX --------- */
4254//ZZ if (e->tag == Iex_ITE) { // VFD
4255//ZZ HReg rLo, rHi;
4256//ZZ HReg res = newVRegD(env);
4257//ZZ iselInt64Expr(&rHi, &rLo, env, e);
4258//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
4259//ZZ return res;
4260//ZZ }
4261//ZZ
4262//ZZ ppIRExpr(e);
4263//ZZ vpanic("iselNeon64Expr");
4264//ZZ }
4265
4266
4267/*---------------------------------------------------------*/
4268/*--- ISEL: Vector (NEON) expressions (128 bit) ---*/
4269/*---------------------------------------------------------*/
4270
4271static HReg iselV128Expr ( ISelEnv* env, IRExpr* e )
4272{
4273 HReg r = iselV128Expr_wrk( env, e );
4274 vassert(hregClass(r) == HRcVec128);
4275 vassert(hregIsVirtual(r));
4276 return r;
4277}
4278
4279/* DO NOT CALL THIS DIRECTLY */
4280static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
4281{
4282 IRType ty = typeOfIRExpr(env->type_env, e);
4283 vassert(e);
4284 vassert(ty == Ity_V128);
4285
4286 if (e->tag == Iex_RdTmp) {
4287 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
4288 }
4289
4290 if (e->tag == Iex_Const) {
4291 /* Only a very limited range of constants is handled. */
4292 vassert(e->Iex.Const.con->tag == Ico_V128);
4293 UShort con = e->Iex.Const.con->Ico.V128;
4294 if (con == 0x0000) {
4295 HReg res = newVRegV(env);
4296 addInstr(env, ARM64Instr_VImmQ(res, con));
4297 return res;
4298 }
4299 /* Unhandled */
4300 goto v128_expr_bad;
4301 }
4302
4303 if (e->tag == Iex_Load) {
4304 HReg res = newVRegV(env);
4305 HReg rN = iselIntExpr_R(env, e->Iex.Load.addr);
4306 vassert(ty == Ity_V128);
4307 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN));
4308 return res;
4309 }
4310
4311 if (e->tag == Iex_Get) {
4312 UInt offs = (UInt)e->Iex.Get.offset;
4313 if (offs < (1<<12)) {
4314 HReg addr = mk_baseblock_128bit_access_addr(env, offs);
4315 HReg res = newVRegV(env);
4316 vassert(ty == Ity_V128);
4317 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr));
4318 return res;
4319 }
4320 goto v128_expr_bad;
4321 }
4322
4323//ZZ if (e->tag == Iex_Unop) {
4324//ZZ switch (e->Iex.Unop.op) {
4325//ZZ case Iop_NotV128: {
4326//ZZ DECLARE_PATTERN(p_veqz_8x16);
4327//ZZ DECLARE_PATTERN(p_veqz_16x8);
4328//ZZ DECLARE_PATTERN(p_veqz_32x4);
4329//ZZ DECLARE_PATTERN(p_vcge_8sx16);
4330//ZZ DECLARE_PATTERN(p_vcge_16sx8);
4331//ZZ DECLARE_PATTERN(p_vcge_32sx4);
4332//ZZ DECLARE_PATTERN(p_vcge_8ux16);
4333//ZZ DECLARE_PATTERN(p_vcge_16ux8);
4334//ZZ DECLARE_PATTERN(p_vcge_32ux4);
4335//ZZ DEFINE_PATTERN(p_veqz_8x16,
4336//ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
4337//ZZ DEFINE_PATTERN(p_veqz_16x8,
4338//ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
4339//ZZ DEFINE_PATTERN(p_veqz_32x4,
4340//ZZ unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
4341//ZZ DEFINE_PATTERN(p_vcge_8sx16,
4342//ZZ unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
4343//ZZ DEFINE_PATTERN(p_vcge_16sx8,
4344//ZZ unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
4345//ZZ DEFINE_PATTERN(p_vcge_32sx4,
4346//ZZ unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
4347//ZZ DEFINE_PATTERN(p_vcge_8ux16,
4348//ZZ unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
4349//ZZ DEFINE_PATTERN(p_vcge_16ux8,
4350//ZZ unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
4351//ZZ DEFINE_PATTERN(p_vcge_32ux4,
4352//ZZ unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
4353//ZZ if (matchIRExpr(&mi, p_veqz_8x16, e)) {
4354//ZZ HReg res = newVRegV(env);
4355//ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]);
4356//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
4357//ZZ return res;
4358//ZZ } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
4359//ZZ HReg res = newVRegV(env);
4360//ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]);
4361//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
4362//ZZ return res;
4363//ZZ } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
4364//ZZ HReg res = newVRegV(env);
4365//ZZ HReg arg = iselNeonExpr(env, mi.bindee[0]);
4366//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
4367//ZZ return res;
4368//ZZ } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
4369//ZZ HReg res = newVRegV(env);
4370//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4371//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4372//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4373//ZZ res, argL, argR, 0, True));
4374//ZZ return res;
4375//ZZ } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
4376//ZZ HReg res = newVRegV(env);
4377//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4378//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4379//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4380//ZZ res, argL, argR, 1, True));
4381//ZZ return res;
4382//ZZ } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
4383//ZZ HReg res = newVRegV(env);
4384//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4385//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4386//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4387//ZZ res, argL, argR, 2, True));
4388//ZZ return res;
4389//ZZ } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
4390//ZZ HReg res = newVRegV(env);
4391//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4392//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4393//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4394//ZZ res, argL, argR, 0, True));
4395//ZZ return res;
4396//ZZ } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
4397//ZZ HReg res = newVRegV(env);
4398//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4399//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4400//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4401//ZZ res, argL, argR, 1, True));
4402//ZZ return res;
4403//ZZ } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
4404//ZZ HReg res = newVRegV(env);
4405//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4406//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4407//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4408//ZZ res, argL, argR, 2, True));
4409//ZZ return res;
4410//ZZ } else {
4411//ZZ HReg res = newVRegV(env);
4412//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4413//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
4414//ZZ return res;
4415//ZZ }
4416//ZZ }
4417//ZZ case Iop_Dup8x16:
4418//ZZ case Iop_Dup16x8:
4419//ZZ case Iop_Dup32x4: {
4420//ZZ HReg res, arg;
4421//ZZ UInt size;
4422//ZZ DECLARE_PATTERN(p_vdup_8x16);
4423//ZZ DECLARE_PATTERN(p_vdup_16x8);
4424//ZZ DECLARE_PATTERN(p_vdup_32x4);
4425//ZZ DEFINE_PATTERN(p_vdup_8x16,
4426//ZZ unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
4427//ZZ DEFINE_PATTERN(p_vdup_16x8,
4428//ZZ unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
4429//ZZ DEFINE_PATTERN(p_vdup_32x4,
4430//ZZ unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
4431//ZZ if (matchIRExpr(&mi, p_vdup_8x16, e)) {
4432//ZZ UInt index;
4433//ZZ UInt imm4;
4434//ZZ if (mi.bindee[1]->tag == Iex_Const &&
4435//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4436//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4437//ZZ imm4 = (index << 1) + 1;
4438//ZZ if (index < 8) {
4439//ZZ res = newVRegV(env);
4440//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
4441//ZZ addInstr(env, ARMInstr_NUnaryS(
4442//ZZ ARMneon_VDUP,
4443//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
4444//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
4445//ZZ imm4, True
4446//ZZ ));
4447//ZZ return res;
4448//ZZ }
4449//ZZ }
4450//ZZ } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
4451//ZZ UInt index;
4452//ZZ UInt imm4;
4453//ZZ if (mi.bindee[1]->tag == Iex_Const &&
4454//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4455//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4456//ZZ imm4 = (index << 2) + 2;
4457//ZZ if (index < 4) {
4458//ZZ res = newVRegV(env);
4459//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
4460//ZZ addInstr(env, ARMInstr_NUnaryS(
4461//ZZ ARMneon_VDUP,
4462//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
4463//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
4464//ZZ imm4, True
4465//ZZ ));
4466//ZZ return res;
4467//ZZ }
4468//ZZ }
4469//ZZ } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
4470//ZZ UInt index;
4471//ZZ UInt imm4;
4472//ZZ if (mi.bindee[1]->tag == Iex_Const &&
4473//ZZ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4474//ZZ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4475//ZZ imm4 = (index << 3) + 4;
4476//ZZ if (index < 2) {
4477//ZZ res = newVRegV(env);
4478//ZZ arg = iselNeon64Expr(env, mi.bindee[0]);
4479//ZZ addInstr(env, ARMInstr_NUnaryS(
4480//ZZ ARMneon_VDUP,
4481//ZZ mkARMNRS(ARMNRS_Reg, res, 0),
4482//ZZ mkARMNRS(ARMNRS_Scalar, arg, index),
4483//ZZ imm4, True
4484//ZZ ));
4485//ZZ return res;
4486//ZZ }
4487//ZZ }
4488//ZZ }
4489//ZZ arg = iselIntExpr_R(env, e->Iex.Unop.arg);
4490//ZZ res = newVRegV(env);
4491//ZZ switch (e->Iex.Unop.op) {
4492//ZZ case Iop_Dup8x16: size = 0; break;
4493//ZZ case Iop_Dup16x8: size = 1; break;
4494//ZZ case Iop_Dup32x4: size = 2; break;
4495//ZZ default: vassert(0);
4496//ZZ }
4497//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
4498//ZZ return res;
4499//ZZ }
4500//ZZ case Iop_Abs8x16:
4501//ZZ case Iop_Abs16x8:
4502//ZZ case Iop_Abs32x4: {
4503//ZZ HReg res = newVRegV(env);
4504//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4505//ZZ UInt size = 0;
4506//ZZ switch(e->Iex.Binop.op) {
4507//ZZ case Iop_Abs8x16: size = 0; break;
4508//ZZ case Iop_Abs16x8: size = 1; break;
4509//ZZ case Iop_Abs32x4: size = 2; break;
4510//ZZ default: vassert(0);
4511//ZZ }
4512//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
4513//ZZ return res;
4514//ZZ }
4515//ZZ case Iop_Reverse64_8x16:
4516//ZZ case Iop_Reverse64_16x8:
4517//ZZ case Iop_Reverse64_32x4: {
4518//ZZ HReg res = newVRegV(env);
4519//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4520//ZZ UInt size = 0;
4521//ZZ switch(e->Iex.Binop.op) {
4522//ZZ case Iop_Reverse64_8x16: size = 0; break;
4523//ZZ case Iop_Reverse64_16x8: size = 1; break;
4524//ZZ case Iop_Reverse64_32x4: size = 2; break;
4525//ZZ default: vassert(0);
4526//ZZ }
4527//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
4528//ZZ res, arg, size, True));
4529//ZZ return res;
4530//ZZ }
4531//ZZ case Iop_Reverse32_8x16:
4532//ZZ case Iop_Reverse32_16x8: {
4533//ZZ HReg res = newVRegV(env);
4534//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4535//ZZ UInt size = 0;
4536//ZZ switch(e->Iex.Binop.op) {
4537//ZZ case Iop_Reverse32_8x16: size = 0; break;
4538//ZZ case Iop_Reverse32_16x8: size = 1; break;
4539//ZZ default: vassert(0);
4540//ZZ }
4541//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
4542//ZZ res, arg, size, True));
4543//ZZ return res;
4544//ZZ }
4545//ZZ case Iop_Reverse16_8x16: {
4546//ZZ HReg res = newVRegV(env);
4547//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4548//ZZ UInt size = 0;
4549//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
4550//ZZ res, arg, size, True));
4551//ZZ return res;
4552//ZZ }
4553//ZZ case Iop_CmpNEZ64x2: {
4554//ZZ HReg x_lsh = newVRegV(env);
4555//ZZ HReg x_rsh = newVRegV(env);
4556//ZZ HReg lsh_amt = newVRegV(env);
4557//ZZ HReg rsh_amt = newVRegV(env);
4558//ZZ HReg zero = newVRegV(env);
4559//ZZ HReg tmp = newVRegV(env);
4560//ZZ HReg tmp2 = newVRegV(env);
4561//ZZ HReg res = newVRegV(env);
4562//ZZ HReg x = newVRegV(env);
4563//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4564//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
4565//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
4566//ZZ addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
4567//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
4568//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4569//ZZ rsh_amt, zero, lsh_amt, 2, True));
4570//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4571//ZZ x_lsh, x, lsh_amt, 3, True));
4572//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4573//ZZ x_rsh, x, rsh_amt, 3, True));
4574//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4575//ZZ tmp, x_lsh, x_rsh, 0, True));
4576//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4577//ZZ res, tmp, x, 0, True));
4578//ZZ return res;
4579//ZZ }
4580//ZZ case Iop_CmpNEZ8x16:
4581//ZZ case Iop_CmpNEZ16x8:
4582//ZZ case Iop_CmpNEZ32x4: {
4583//ZZ HReg res = newVRegV(env);
4584//ZZ HReg tmp = newVRegV(env);
4585//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4586//ZZ UInt size;
4587//ZZ switch (e->Iex.Unop.op) {
4588//ZZ case Iop_CmpNEZ8x16: size = 0; break;
4589//ZZ case Iop_CmpNEZ16x8: size = 1; break;
4590//ZZ case Iop_CmpNEZ32x4: size = 2; break;
4591//ZZ default: vassert(0);
4592//ZZ }
4593//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
4594//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
4595//ZZ return res;
4596//ZZ }
4597//ZZ case Iop_Widen8Uto16x8:
4598//ZZ case Iop_Widen16Uto32x4:
4599//ZZ case Iop_Widen32Uto64x2: {
4600//ZZ HReg res = newVRegV(env);
4601//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4602//ZZ UInt size;
4603//ZZ switch (e->Iex.Unop.op) {
4604//ZZ case Iop_Widen8Uto16x8: size = 0; break;
4605//ZZ case Iop_Widen16Uto32x4: size = 1; break;
4606//ZZ case Iop_Widen32Uto64x2: size = 2; break;
4607//ZZ default: vassert(0);
4608//ZZ }
4609//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
4610//ZZ res, arg, size, True));
4611//ZZ return res;
4612//ZZ }
4613//ZZ case Iop_Widen8Sto16x8:
4614//ZZ case Iop_Widen16Sto32x4:
4615//ZZ case Iop_Widen32Sto64x2: {
4616//ZZ HReg res = newVRegV(env);
4617//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4618//ZZ UInt size;
4619//ZZ switch (e->Iex.Unop.op) {
4620//ZZ case Iop_Widen8Sto16x8: size = 0; break;
4621//ZZ case Iop_Widen16Sto32x4: size = 1; break;
4622//ZZ case Iop_Widen32Sto64x2: size = 2; break;
4623//ZZ default: vassert(0);
4624//ZZ }
4625//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4626//ZZ res, arg, size, True));
4627//ZZ return res;
4628//ZZ }
4629//ZZ case Iop_PwAddL8Sx16:
4630//ZZ case Iop_PwAddL16Sx8:
4631//ZZ case Iop_PwAddL32Sx4: {
4632//ZZ HReg res = newVRegV(env);
4633//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4634//ZZ UInt size = 0;
4635//ZZ switch(e->Iex.Binop.op) {
4636//ZZ case Iop_PwAddL8Sx16: size = 0; break;
4637//ZZ case Iop_PwAddL16Sx8: size = 1; break;
4638//ZZ case Iop_PwAddL32Sx4: size = 2; break;
4639//ZZ default: vassert(0);
4640//ZZ }
4641//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4642//ZZ res, arg, size, True));
4643//ZZ return res;
4644//ZZ }
4645//ZZ case Iop_PwAddL8Ux16:
4646//ZZ case Iop_PwAddL16Ux8:
4647//ZZ case Iop_PwAddL32Ux4: {
4648//ZZ HReg res = newVRegV(env);
4649//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4650//ZZ UInt size = 0;
4651//ZZ switch(e->Iex.Binop.op) {
4652//ZZ case Iop_PwAddL8Ux16: size = 0; break;
4653//ZZ case Iop_PwAddL16Ux8: size = 1; break;
4654//ZZ case Iop_PwAddL32Ux4: size = 2; break;
4655//ZZ default: vassert(0);
4656//ZZ }
4657//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4658//ZZ res, arg, size, True));
4659//ZZ return res;
4660//ZZ }
4661//ZZ case Iop_Cnt8x16: {
4662//ZZ HReg res = newVRegV(env);
4663//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4664//ZZ UInt size = 0;
4665//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4666//ZZ return res;
4667//ZZ }
4668//ZZ case Iop_Clz8Sx16:
4669//ZZ case Iop_Clz16Sx8:
4670//ZZ case Iop_Clz32Sx4: {
4671//ZZ HReg res = newVRegV(env);
4672//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4673//ZZ UInt size = 0;
4674//ZZ switch(e->Iex.Binop.op) {
4675//ZZ case Iop_Clz8Sx16: size = 0; break;
4676//ZZ case Iop_Clz16Sx8: size = 1; break;
4677//ZZ case Iop_Clz32Sx4: size = 2; break;
4678//ZZ default: vassert(0);
4679//ZZ }
4680//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4681//ZZ return res;
4682//ZZ }
4683//ZZ case Iop_Cls8Sx16:
4684//ZZ case Iop_Cls16Sx8:
4685//ZZ case Iop_Cls32Sx4: {
4686//ZZ HReg res = newVRegV(env);
4687//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4688//ZZ UInt size = 0;
4689//ZZ switch(e->Iex.Binop.op) {
4690//ZZ case Iop_Cls8Sx16: size = 0; break;
4691//ZZ case Iop_Cls16Sx8: size = 1; break;
4692//ZZ case Iop_Cls32Sx4: size = 2; break;
4693//ZZ default: vassert(0);
4694//ZZ }
4695//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4696//ZZ return res;
4697//ZZ }
4698//ZZ case Iop_FtoI32Sx4_RZ: {
4699//ZZ HReg res = newVRegV(env);
4700//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4701//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4702//ZZ res, arg, 2, True));
4703//ZZ return res;
4704//ZZ }
4705//ZZ case Iop_FtoI32Ux4_RZ: {
4706//ZZ HReg res = newVRegV(env);
4707//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4708//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4709//ZZ res, arg, 2, True));
4710//ZZ return res;
4711//ZZ }
4712//ZZ case Iop_I32StoFx4: {
4713//ZZ HReg res = newVRegV(env);
4714//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4715//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4716//ZZ res, arg, 2, True));
4717//ZZ return res;
4718//ZZ }
4719//ZZ case Iop_I32UtoFx4: {
4720//ZZ HReg res = newVRegV(env);
4721//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4722//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4723//ZZ res, arg, 2, True));
4724//ZZ return res;
4725//ZZ }
4726//ZZ case Iop_F16toF32x4: {
4727//ZZ HReg res = newVRegV(env);
4728//ZZ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4729//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4730//ZZ res, arg, 2, True));
4731//ZZ return res;
4732//ZZ }
4733//ZZ case Iop_Recip32Fx4: {
4734//ZZ HReg res = newVRegV(env);
4735//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4736//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4737//ZZ res, argL, 0, True));
4738//ZZ return res;
4739//ZZ }
4740//ZZ case Iop_Recip32x4: {
4741//ZZ HReg res = newVRegV(env);
4742//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4743//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4744//ZZ res, argL, 0, True));
4745//ZZ return res;
4746//ZZ }
4747//ZZ case Iop_Abs32Fx4: {
4748//ZZ DECLARE_PATTERN(p_vabd_32fx4);
4749//ZZ DEFINE_PATTERN(p_vabd_32fx4,
4750//ZZ unop(Iop_Abs32Fx4,
4751//ZZ binop(Iop_Sub32Fx4,
4752//ZZ bind(0),
4753//ZZ bind(1))));
4754//ZZ if (matchIRExpr(&mi, p_vabd_32fx4, e)) {
4755//ZZ HReg res = newVRegV(env);
4756//ZZ HReg argL = iselNeonExpr(env, mi.bindee[0]);
4757//ZZ HReg argR = iselNeonExpr(env, mi.bindee[1]);
4758//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
4759//ZZ res, argL, argR, 0, True));
4760//ZZ return res;
4761//ZZ } else {
4762//ZZ HReg res = newVRegV(env);
4763//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4764//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4765//ZZ res, argL, 0, True));
4766//ZZ return res;
4767//ZZ }
4768//ZZ }
4769//ZZ case Iop_Rsqrte32Fx4: {
4770//ZZ HReg res = newVRegV(env);
4771//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4772//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4773//ZZ res, argL, 0, True));
4774//ZZ return res;
4775//ZZ }
4776//ZZ case Iop_Rsqrte32x4: {
4777//ZZ HReg res = newVRegV(env);
4778//ZZ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4779//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4780//ZZ res, argL, 0, True));
4781//ZZ return res;
4782//ZZ }
4783//ZZ case Iop_Neg32Fx4: {
4784//ZZ HReg res = newVRegV(env);
4785//ZZ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4786//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4787//ZZ res, arg, 0, True));
4788//ZZ return res;
4789//ZZ }
4790//ZZ /* ... */
4791//ZZ default:
4792//ZZ break;
4793//ZZ }
4794//ZZ }
4795
4796 if (e->tag == Iex_Binop) {
4797 switch (e->Iex.Binop.op) {
4798 case Iop_64HLtoV128: {
4799 HReg res = newVRegV(env);
4800 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
4801 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4802 addInstr(env, ARM64Instr_VQfromXX(res, argL, argR));
4803 return res;
4804 }
4805//ZZ case Iop_AndV128: {
4806//ZZ HReg res = newVRegV(env);
4807//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4808//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4809//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4810//ZZ res, argL, argR, 4, True));
4811//ZZ return res;
4812//ZZ }
4813//ZZ case Iop_OrV128: {
4814//ZZ HReg res = newVRegV(env);
4815//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4816//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4817//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4818//ZZ res, argL, argR, 4, True));
4819//ZZ return res;
4820//ZZ }
4821//ZZ case Iop_XorV128: {
4822//ZZ HReg res = newVRegV(env);
4823//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4824//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4825//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4826//ZZ res, argL, argR, 4, True));
4827//ZZ return res;
4828//ZZ }
4829//ZZ case Iop_Add8x16:
4830//ZZ case Iop_Add16x8:
4831//ZZ case Iop_Add32x4:
4832//ZZ case Iop_Add64x2: {
4833//ZZ /*
4834//ZZ FIXME: remove this if not used
4835//ZZ DECLARE_PATTERN(p_vrhadd_32sx4);
4836//ZZ ULong one = (1LL << 32) | 1LL;
4837//ZZ DEFINE_PATTERN(p_vrhadd_32sx4,
4838//ZZ binop(Iop_Add32x4,
4839//ZZ binop(Iop_Add32x4,
4840//ZZ binop(Iop_SarN32x4,
4841//ZZ bind(0),
4842//ZZ mkU8(1)),
4843//ZZ binop(Iop_SarN32x4,
4844//ZZ bind(1),
4845//ZZ mkU8(1))),
4846//ZZ binop(Iop_SarN32x4,
4847//ZZ binop(Iop_Add32x4,
4848//ZZ binop(Iop_Add32x4,
4849//ZZ binop(Iop_AndV128,
4850//ZZ bind(0),
4851//ZZ mkU128(one)),
4852//ZZ binop(Iop_AndV128,
4853//ZZ bind(1),
4854//ZZ mkU128(one))),
4855//ZZ mkU128(one)),
4856//ZZ mkU8(1))));
4857//ZZ */
4858//ZZ HReg res = newVRegV(env);
4859//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4860//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4861//ZZ UInt size;
4862//ZZ switch (e->Iex.Binop.op) {
4863//ZZ case Iop_Add8x16: size = 0; break;
4864//ZZ case Iop_Add16x8: size = 1; break;
4865//ZZ case Iop_Add32x4: size = 2; break;
4866//ZZ case Iop_Add64x2: size = 3; break;
4867//ZZ default:
4868//ZZ ppIROp(e->Iex.Binop.op);
4869//ZZ vpanic("Illegal element size in VADD");
4870//ZZ }
4871//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
4872//ZZ res, argL, argR, size, True));
4873//ZZ return res;
4874//ZZ }
4875//ZZ case Iop_Add32Fx4: {
4876//ZZ HReg res = newVRegV(env);
4877//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4878//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4879//ZZ UInt size = 0;
4880//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
4881//ZZ res, argL, argR, size, True));
4882//ZZ return res;
4883//ZZ }
4884//ZZ case Iop_Recps32Fx4: {
4885//ZZ HReg res = newVRegV(env);
4886//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4887//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4888//ZZ UInt size = 0;
4889//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
4890//ZZ res, argL, argR, size, True));
4891//ZZ return res;
4892//ZZ }
4893//ZZ case Iop_Rsqrts32Fx4: {
4894//ZZ HReg res = newVRegV(env);
4895//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4896//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4897//ZZ UInt size = 0;
4898//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
4899//ZZ res, argL, argR, size, True));
4900//ZZ return res;
4901//ZZ }
4902//ZZ
4903//ZZ // These 6 verified 18 Apr 2013
4904//ZZ case Iop_InterleaveEvenLanes8x16:
4905//ZZ case Iop_InterleaveOddLanes8x16:
4906//ZZ case Iop_InterleaveEvenLanes16x8:
4907//ZZ case Iop_InterleaveOddLanes16x8:
4908//ZZ case Iop_InterleaveEvenLanes32x4:
4909//ZZ case Iop_InterleaveOddLanes32x4: {
4910//ZZ HReg rD = newVRegV(env);
4911//ZZ HReg rM = newVRegV(env);
4912//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4913//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4914//ZZ UInt size;
4915//ZZ Bool resRd; // is the result in rD or rM ?
4916//ZZ switch (e->Iex.Binop.op) {
4917//ZZ case Iop_InterleaveOddLanes8x16: resRd = False; size = 0; break;
4918//ZZ case Iop_InterleaveEvenLanes8x16: resRd = True; size = 0; break;
4919//ZZ case Iop_InterleaveOddLanes16x8: resRd = False; size = 1; break;
4920//ZZ case Iop_InterleaveEvenLanes16x8: resRd = True; size = 1; break;
4921//ZZ case Iop_InterleaveOddLanes32x4: resRd = False; size = 2; break;
4922//ZZ case Iop_InterleaveEvenLanes32x4: resRd = True; size = 2; break;
4923//ZZ default: vassert(0);
4924//ZZ }
4925//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4926//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4927//ZZ addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, True));
4928//ZZ return resRd ? rD : rM;
4929//ZZ }
4930//ZZ
4931//ZZ // These 6 verified 18 Apr 2013
4932//ZZ case Iop_InterleaveHI8x16:
4933//ZZ case Iop_InterleaveLO8x16:
4934//ZZ case Iop_InterleaveHI16x8:
4935//ZZ case Iop_InterleaveLO16x8:
4936//ZZ case Iop_InterleaveHI32x4:
4937//ZZ case Iop_InterleaveLO32x4: {
4938//ZZ HReg rD = newVRegV(env);
4939//ZZ HReg rM = newVRegV(env);
4940//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4941//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4942//ZZ UInt size;
4943//ZZ Bool resRd; // is the result in rD or rM ?
4944//ZZ switch (e->Iex.Binop.op) {
4945//ZZ case Iop_InterleaveHI8x16: resRd = False; size = 0; break;
4946//ZZ case Iop_InterleaveLO8x16: resRd = True; size = 0; break;
4947//ZZ case Iop_InterleaveHI16x8: resRd = False; size = 1; break;
4948//ZZ case Iop_InterleaveLO16x8: resRd = True; size = 1; break;
4949//ZZ case Iop_InterleaveHI32x4: resRd = False; size = 2; break;
4950//ZZ case Iop_InterleaveLO32x4: resRd = True; size = 2; break;
4951//ZZ default: vassert(0);
4952//ZZ }
4953//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4954//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4955//ZZ addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, True));
4956//ZZ return resRd ? rD : rM;
4957//ZZ }
4958//ZZ
4959//ZZ // These 6 verified 18 Apr 2013
4960//ZZ case Iop_CatOddLanes8x16:
4961//ZZ case Iop_CatEvenLanes8x16:
4962//ZZ case Iop_CatOddLanes16x8:
4963//ZZ case Iop_CatEvenLanes16x8:
4964//ZZ case Iop_CatOddLanes32x4:
4965//ZZ case Iop_CatEvenLanes32x4: {
4966//ZZ HReg rD = newVRegV(env);
4967//ZZ HReg rM = newVRegV(env);
4968//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4969//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4970//ZZ UInt size;
4971//ZZ Bool resRd; // is the result in rD or rM ?
4972//ZZ switch (e->Iex.Binop.op) {
4973//ZZ case Iop_CatOddLanes8x16: resRd = False; size = 0; break;
4974//ZZ case Iop_CatEvenLanes8x16: resRd = True; size = 0; break;
4975//ZZ case Iop_CatOddLanes16x8: resRd = False; size = 1; break;
4976//ZZ case Iop_CatEvenLanes16x8: resRd = True; size = 1; break;
4977//ZZ case Iop_CatOddLanes32x4: resRd = False; size = 2; break;
4978//ZZ case Iop_CatEvenLanes32x4: resRd = True; size = 2; break;
4979//ZZ default: vassert(0);
4980//ZZ }
4981//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4982//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4983//ZZ addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, True));
4984//ZZ return resRd ? rD : rM;
4985//ZZ }
4986//ZZ
4987//ZZ case Iop_QAdd8Ux16:
4988//ZZ case Iop_QAdd16Ux8:
4989//ZZ case Iop_QAdd32Ux4:
4990//ZZ case Iop_QAdd64Ux2: {
4991//ZZ HReg res = newVRegV(env);
4992//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4993//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4994//ZZ UInt size;
4995//ZZ switch (e->Iex.Binop.op) {
4996//ZZ case Iop_QAdd8Ux16: size = 0; break;
4997//ZZ case Iop_QAdd16Ux8: size = 1; break;
4998//ZZ case Iop_QAdd32Ux4: size = 2; break;
4999//ZZ case Iop_QAdd64Ux2: size = 3; break;
5000//ZZ default:
5001//ZZ ppIROp(e->Iex.Binop.op);
5002//ZZ vpanic("Illegal element size in VQADDU");
5003//ZZ }
5004//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
5005//ZZ res, argL, argR, size, True));
5006//ZZ return res;
5007//ZZ }
5008//ZZ case Iop_QAdd8Sx16:
5009//ZZ case Iop_QAdd16Sx8:
5010//ZZ case Iop_QAdd32Sx4:
5011//ZZ case Iop_QAdd64Sx2: {
5012//ZZ HReg res = newVRegV(env);
5013//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5014//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5015//ZZ UInt size;
5016//ZZ switch (e->Iex.Binop.op) {
5017//ZZ case Iop_QAdd8Sx16: size = 0; break;
5018//ZZ case Iop_QAdd16Sx8: size = 1; break;
5019//ZZ case Iop_QAdd32Sx4: size = 2; break;
5020//ZZ case Iop_QAdd64Sx2: size = 3; break;
5021//ZZ default:
5022//ZZ ppIROp(e->Iex.Binop.op);
5023//ZZ vpanic("Illegal element size in VQADDS");
5024//ZZ }
5025//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
5026//ZZ res, argL, argR, size, True));
5027//ZZ return res;
5028//ZZ }
5029//ZZ case Iop_Sub8x16:
5030//ZZ case Iop_Sub16x8:
5031//ZZ case Iop_Sub32x4:
5032//ZZ case Iop_Sub64x2: {
5033//ZZ HReg res = newVRegV(env);
5034//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5035//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5036//ZZ UInt size;
5037//ZZ switch (e->Iex.Binop.op) {
5038//ZZ case Iop_Sub8x16: size = 0; break;
5039//ZZ case Iop_Sub16x8: size = 1; break;
5040//ZZ case Iop_Sub32x4: size = 2; break;
5041//ZZ case Iop_Sub64x2: size = 3; break;
5042//ZZ default:
5043//ZZ ppIROp(e->Iex.Binop.op);
5044//ZZ vpanic("Illegal element size in VSUB");
5045//ZZ }
5046//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
5047//ZZ res, argL, argR, size, True));
5048//ZZ return res;
5049//ZZ }
5050//ZZ case Iop_Sub32Fx4: {
5051//ZZ HReg res = newVRegV(env);
5052//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5053//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5054//ZZ UInt size = 0;
5055//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
5056//ZZ res, argL, argR, size, True));
5057//ZZ return res;
5058//ZZ }
5059//ZZ case Iop_QSub8Ux16:
5060//ZZ case Iop_QSub16Ux8:
5061//ZZ case Iop_QSub32Ux4:
5062//ZZ case Iop_QSub64Ux2: {
5063//ZZ HReg res = newVRegV(env);
5064//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5065//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5066//ZZ UInt size;
5067//ZZ switch (e->Iex.Binop.op) {
5068//ZZ case Iop_QSub8Ux16: size = 0; break;
5069//ZZ case Iop_QSub16Ux8: size = 1; break;
5070//ZZ case Iop_QSub32Ux4: size = 2; break;
5071//ZZ case Iop_QSub64Ux2: size = 3; break;
5072//ZZ default:
5073//ZZ ppIROp(e->Iex.Binop.op);
5074//ZZ vpanic("Illegal element size in VQSUBU");
5075//ZZ }
5076//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
5077//ZZ res, argL, argR, size, True));
5078//ZZ return res;
5079//ZZ }
5080//ZZ case Iop_QSub8Sx16:
5081//ZZ case Iop_QSub16Sx8:
5082//ZZ case Iop_QSub32Sx4:
5083//ZZ case Iop_QSub64Sx2: {
5084//ZZ HReg res = newVRegV(env);
5085//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5086//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5087//ZZ UInt size;
5088//ZZ switch (e->Iex.Binop.op) {
5089//ZZ case Iop_QSub8Sx16: size = 0; break;
5090//ZZ case Iop_QSub16Sx8: size = 1; break;
5091//ZZ case Iop_QSub32Sx4: size = 2; break;
5092//ZZ case Iop_QSub64Sx2: size = 3; break;
5093//ZZ default:
5094//ZZ ppIROp(e->Iex.Binop.op);
5095//ZZ vpanic("Illegal element size in VQSUBS");
5096//ZZ }
5097//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
5098//ZZ res, argL, argR, size, True));
5099//ZZ return res;
5100//ZZ }
5101//ZZ case Iop_Max8Ux16:
5102//ZZ case Iop_Max16Ux8:
5103//ZZ case Iop_Max32Ux4: {
5104//ZZ HReg res = newVRegV(env);
5105//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5106//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5107//ZZ UInt size;
5108//ZZ switch (e->Iex.Binop.op) {
5109//ZZ case Iop_Max8Ux16: size = 0; break;
5110//ZZ case Iop_Max16Ux8: size = 1; break;
5111//ZZ case Iop_Max32Ux4: size = 2; break;
5112//ZZ default: vpanic("Illegal element size in VMAXU");
5113//ZZ }
5114//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
5115//ZZ res, argL, argR, size, True));
5116//ZZ return res;
5117//ZZ }
5118//ZZ case Iop_Max8Sx16:
5119//ZZ case Iop_Max16Sx8:
5120//ZZ case Iop_Max32Sx4: {
5121//ZZ HReg res = newVRegV(env);
5122//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5123//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5124//ZZ UInt size;
5125//ZZ switch (e->Iex.Binop.op) {
5126//ZZ case Iop_Max8Sx16: size = 0; break;
5127//ZZ case Iop_Max16Sx8: size = 1; break;
5128//ZZ case Iop_Max32Sx4: size = 2; break;
5129//ZZ default: vpanic("Illegal element size in VMAXU");
5130//ZZ }
5131//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
5132//ZZ res, argL, argR, size, True));
5133//ZZ return res;
5134//ZZ }
5135//ZZ case Iop_Min8Ux16:
5136//ZZ case Iop_Min16Ux8:
5137//ZZ case Iop_Min32Ux4: {
5138//ZZ HReg res = newVRegV(env);
5139//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5140//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5141//ZZ UInt size;
5142//ZZ switch (e->Iex.Binop.op) {
5143//ZZ case Iop_Min8Ux16: size = 0; break;
5144//ZZ case Iop_Min16Ux8: size = 1; break;
5145//ZZ case Iop_Min32Ux4: size = 2; break;
5146//ZZ default: vpanic("Illegal element size in VMAXU");
5147//ZZ }
5148//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
5149//ZZ res, argL, argR, size, True));
5150//ZZ return res;
5151//ZZ }
5152//ZZ case Iop_Min8Sx16:
5153//ZZ case Iop_Min16Sx8:
5154//ZZ case Iop_Min32Sx4: {
5155//ZZ HReg res = newVRegV(env);
5156//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5157//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5158//ZZ UInt size;
5159//ZZ switch (e->Iex.Binop.op) {
5160//ZZ case Iop_Min8Sx16: size = 0; break;
5161//ZZ case Iop_Min16Sx8: size = 1; break;
5162//ZZ case Iop_Min32Sx4: size = 2; break;
5163//ZZ default: vpanic("Illegal element size in VMAXU");
5164//ZZ }
5165//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
5166//ZZ res, argL, argR, size, True));
5167//ZZ return res;
5168//ZZ }
5169//ZZ case Iop_Sar8x16:
5170//ZZ case Iop_Sar16x8:
5171//ZZ case Iop_Sar32x4:
5172//ZZ case Iop_Sar64x2: {
5173//ZZ HReg res = newVRegV(env);
5174//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5175//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5176//ZZ HReg argR2 = newVRegV(env);
5177//ZZ HReg zero = newVRegV(env);
5178//ZZ UInt size;
5179//ZZ switch (e->Iex.Binop.op) {
5180//ZZ case Iop_Sar8x16: size = 0; break;
5181//ZZ case Iop_Sar16x8: size = 1; break;
5182//ZZ case Iop_Sar32x4: size = 2; break;
5183//ZZ case Iop_Sar64x2: size = 3; break;
5184//ZZ default: vassert(0);
5185//ZZ }
5186//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
5187//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
5188//ZZ argR2, zero, argR, size, True));
5189//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
5190//ZZ res, argL, argR2, size, True));
5191//ZZ return res;
5192//ZZ }
5193//ZZ case Iop_Sal8x16:
5194//ZZ case Iop_Sal16x8:
5195//ZZ case Iop_Sal32x4:
5196//ZZ case Iop_Sal64x2: {
5197//ZZ HReg res = newVRegV(env);
5198//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5199//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5200//ZZ UInt size;
5201//ZZ switch (e->Iex.Binop.op) {
5202//ZZ case Iop_Sal8x16: size = 0; break;
5203//ZZ case Iop_Sal16x8: size = 1; break;
5204//ZZ case Iop_Sal32x4: size = 2; break;
5205//ZZ case Iop_Sal64x2: size = 3; break;
5206//ZZ default: vassert(0);
5207//ZZ }
5208//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
5209//ZZ res, argL, argR, size, True));
5210//ZZ return res;
5211//ZZ }
5212//ZZ case Iop_Shr8x16:
5213//ZZ case Iop_Shr16x8:
5214//ZZ case Iop_Shr32x4:
5215//ZZ case Iop_Shr64x2: {
5216//ZZ HReg res = newVRegV(env);
5217//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5218//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5219//ZZ HReg argR2 = newVRegV(env);
5220//ZZ HReg zero = newVRegV(env);
5221//ZZ UInt size;
5222//ZZ switch (e->Iex.Binop.op) {
5223//ZZ case Iop_Shr8x16: size = 0; break;
5224//ZZ case Iop_Shr16x8: size = 1; break;
5225//ZZ case Iop_Shr32x4: size = 2; break;
5226//ZZ case Iop_Shr64x2: size = 3; break;
5227//ZZ default: vassert(0);
5228//ZZ }
5229//ZZ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
5230//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
5231//ZZ argR2, zero, argR, size, True));
5232//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5233//ZZ res, argL, argR2, size, True));
5234//ZZ return res;
5235//ZZ }
5236//ZZ case Iop_Shl8x16:
5237//ZZ case Iop_Shl16x8:
5238//ZZ case Iop_Shl32x4:
5239//ZZ case Iop_Shl64x2: {
5240//ZZ HReg res = newVRegV(env);
5241//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5242//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5243//ZZ UInt size;
5244//ZZ switch (e->Iex.Binop.op) {
5245//ZZ case Iop_Shl8x16: size = 0; break;
5246//ZZ case Iop_Shl16x8: size = 1; break;
5247//ZZ case Iop_Shl32x4: size = 2; break;
5248//ZZ case Iop_Shl64x2: size = 3; break;
5249//ZZ default: vassert(0);
5250//ZZ }
5251//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5252//ZZ res, argL, argR, size, True));
5253//ZZ return res;
5254//ZZ }
5255//ZZ case Iop_QShl8x16:
5256//ZZ case Iop_QShl16x8:
5257//ZZ case Iop_QShl32x4:
5258//ZZ case Iop_QShl64x2: {
5259//ZZ HReg res = newVRegV(env);
5260//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5261//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5262//ZZ UInt size;
5263//ZZ switch (e->Iex.Binop.op) {
5264//ZZ case Iop_QShl8x16: size = 0; break;
5265//ZZ case Iop_QShl16x8: size = 1; break;
5266//ZZ case Iop_QShl32x4: size = 2; break;
5267//ZZ case Iop_QShl64x2: size = 3; break;
5268//ZZ default: vassert(0);
5269//ZZ }
5270//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
5271//ZZ res, argL, argR, size, True));
5272//ZZ return res;
5273//ZZ }
5274//ZZ case Iop_QSal8x16:
5275//ZZ case Iop_QSal16x8:
5276//ZZ case Iop_QSal32x4:
5277//ZZ case Iop_QSal64x2: {
5278//ZZ HReg res = newVRegV(env);
5279//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5280//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5281//ZZ UInt size;
5282//ZZ switch (e->Iex.Binop.op) {
5283//ZZ case Iop_QSal8x16: size = 0; break;
5284//ZZ case Iop_QSal16x8: size = 1; break;
5285//ZZ case Iop_QSal32x4: size = 2; break;
5286//ZZ case Iop_QSal64x2: size = 3; break;
5287//ZZ default: vassert(0);
5288//ZZ }
5289//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
5290//ZZ res, argL, argR, size, True));
5291//ZZ return res;
5292//ZZ }
5293//ZZ case Iop_QShlN8x16:
5294//ZZ case Iop_QShlN16x8:
5295//ZZ case Iop_QShlN32x4:
5296//ZZ case Iop_QShlN64x2: {
5297//ZZ HReg res = newVRegV(env);
5298//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5299//ZZ UInt size, imm;
5300//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
5301//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5302//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
5303//ZZ "second argument only\n");
5304//ZZ }
5305//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5306//ZZ switch (e->Iex.Binop.op) {
5307//ZZ case Iop_QShlN8x16: size = 8 | imm; break;
5308//ZZ case Iop_QShlN16x8: size = 16 | imm; break;
5309//ZZ case Iop_QShlN32x4: size = 32 | imm; break;
5310//ZZ case Iop_QShlN64x2: size = 64 | imm; break;
5311//ZZ default: vassert(0);
5312//ZZ }
5313//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
5314//ZZ res, argL, size, True));
5315//ZZ return res;
5316//ZZ }
5317//ZZ case Iop_QShlN8Sx16:
5318//ZZ case Iop_QShlN16Sx8:
5319//ZZ case Iop_QShlN32Sx4:
5320//ZZ case Iop_QShlN64Sx2: {
5321//ZZ HReg res = newVRegV(env);
5322//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5323//ZZ UInt size, imm;
5324//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
5325//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5326//ZZ vpanic("ARM taget supports Iop_QShlNASxB with constant "
5327//ZZ "second argument only\n");
5328//ZZ }
5329//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5330//ZZ switch (e->Iex.Binop.op) {
5331//ZZ case Iop_QShlN8Sx16: size = 8 | imm; break;
5332//ZZ case Iop_QShlN16Sx8: size = 16 | imm; break;
5333//ZZ case Iop_QShlN32Sx4: size = 32 | imm; break;
5334//ZZ case Iop_QShlN64Sx2: size = 64 | imm; break;
5335//ZZ default: vassert(0);
5336//ZZ }
5337//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
5338//ZZ res, argL, size, True));
5339//ZZ return res;
5340//ZZ }
5341//ZZ case Iop_QSalN8x16:
5342//ZZ case Iop_QSalN16x8:
5343//ZZ case Iop_QSalN32x4:
5344//ZZ case Iop_QSalN64x2: {
5345//ZZ HReg res = newVRegV(env);
5346//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5347//ZZ UInt size, imm;
5348//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
5349//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5350//ZZ vpanic("ARM taget supports Iop_QShlNAxB with constant "
5351//ZZ "second argument only\n");
5352//ZZ }
5353//ZZ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5354//ZZ switch (e->Iex.Binop.op) {
5355//ZZ case Iop_QSalN8x16: size = 8 | imm; break;
5356//ZZ case Iop_QSalN16x8: size = 16 | imm; break;
5357//ZZ case Iop_QSalN32x4: size = 32 | imm; break;
5358//ZZ case Iop_QSalN64x2: size = 64 | imm; break;
5359//ZZ default: vassert(0);
5360//ZZ }
5361//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
5362//ZZ res, argL, size, True));
5363//ZZ return res;
5364//ZZ }
5365//ZZ case Iop_ShrN8x16:
5366//ZZ case Iop_ShrN16x8:
5367//ZZ case Iop_ShrN32x4:
5368//ZZ case Iop_ShrN64x2: {
5369//ZZ HReg res = newVRegV(env);
5370//ZZ HReg tmp = newVRegV(env);
5371//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5372//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
5373//ZZ HReg argR2 = newVRegI(env);
5374//ZZ UInt size;
5375//ZZ switch (e->Iex.Binop.op) {
5376//ZZ case Iop_ShrN8x16: size = 0; break;
5377//ZZ case Iop_ShrN16x8: size = 1; break;
5378//ZZ case Iop_ShrN32x4: size = 2; break;
5379//ZZ case Iop_ShrN64x2: size = 3; break;
5380//ZZ default: vassert(0);
5381//ZZ }
5382//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
5383//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
5384//ZZ tmp, argR2, 0, True));
5385//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5386//ZZ res, argL, tmp, size, True));
5387//ZZ return res;
5388//ZZ }
5389//ZZ case Iop_ShlN8x16:
5390//ZZ case Iop_ShlN16x8:
5391//ZZ case Iop_ShlN32x4:
5392//ZZ case Iop_ShlN64x2: {
5393//ZZ HReg res = newVRegV(env);
5394//ZZ HReg tmp = newVRegV(env);
5395//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5396//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
5397//ZZ UInt size;
5398//ZZ switch (e->Iex.Binop.op) {
5399//ZZ case Iop_ShlN8x16: size = 0; break;
5400//ZZ case Iop_ShlN16x8: size = 1; break;
5401//ZZ case Iop_ShlN32x4: size = 2; break;
5402//ZZ case Iop_ShlN64x2: size = 3; break;
5403//ZZ default: vassert(0);
5404//ZZ }
5405//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
5406//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5407//ZZ res, argL, tmp, size, True));
5408//ZZ return res;
5409//ZZ }
5410//ZZ case Iop_SarN8x16:
5411//ZZ case Iop_SarN16x8:
5412//ZZ case Iop_SarN32x4:
5413//ZZ case Iop_SarN64x2: {
5414//ZZ HReg res = newVRegV(env);
5415//ZZ HReg tmp = newVRegV(env);
5416//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5417//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
5418//ZZ HReg argR2 = newVRegI(env);
5419//ZZ UInt size;
5420//ZZ switch (e->Iex.Binop.op) {
5421//ZZ case Iop_SarN8x16: size = 0; break;
5422//ZZ case Iop_SarN16x8: size = 1; break;
5423//ZZ case Iop_SarN32x4: size = 2; break;
5424//ZZ case Iop_SarN64x2: size = 3; break;
5425//ZZ default: vassert(0);
5426//ZZ }
5427//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
5428//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
5429//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
5430//ZZ res, argL, tmp, size, True));
5431//ZZ return res;
5432//ZZ }
5433//ZZ case Iop_CmpGT8Ux16:
5434//ZZ case Iop_CmpGT16Ux8:
5435//ZZ case Iop_CmpGT32Ux4: {
5436//ZZ HReg res = newVRegV(env);
5437//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5438//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5439//ZZ UInt size;
5440//ZZ switch (e->Iex.Binop.op) {
5441//ZZ case Iop_CmpGT8Ux16: size = 0; break;
5442//ZZ case Iop_CmpGT16Ux8: size = 1; break;
5443//ZZ case Iop_CmpGT32Ux4: size = 2; break;
5444//ZZ default: vassert(0);
5445//ZZ }
5446//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
5447//ZZ res, argL, argR, size, True));
5448//ZZ return res;
5449//ZZ }
5450//ZZ case Iop_CmpGT8Sx16:
5451//ZZ case Iop_CmpGT16Sx8:
5452//ZZ case Iop_CmpGT32Sx4: {
5453//ZZ HReg res = newVRegV(env);
5454//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5455//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5456//ZZ UInt size;
5457//ZZ switch (e->Iex.Binop.op) {
5458//ZZ case Iop_CmpGT8Sx16: size = 0; break;
5459//ZZ case Iop_CmpGT16Sx8: size = 1; break;
5460//ZZ case Iop_CmpGT32Sx4: size = 2; break;
5461//ZZ default: vassert(0);
5462//ZZ }
5463//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
5464//ZZ res, argL, argR, size, True));
5465//ZZ return res;
5466//ZZ }
5467//ZZ case Iop_CmpEQ8x16:
5468//ZZ case Iop_CmpEQ16x8:
5469//ZZ case Iop_CmpEQ32x4: {
5470//ZZ HReg res = newVRegV(env);
5471//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5472//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5473//ZZ UInt size;
5474//ZZ switch (e->Iex.Binop.op) {
5475//ZZ case Iop_CmpEQ8x16: size = 0; break;
5476//ZZ case Iop_CmpEQ16x8: size = 1; break;
5477//ZZ case Iop_CmpEQ32x4: size = 2; break;
5478//ZZ default: vassert(0);
5479//ZZ }
5480//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
5481//ZZ res, argL, argR, size, True));
5482//ZZ return res;
5483//ZZ }
5484//ZZ case Iop_Mul8x16:
5485//ZZ case Iop_Mul16x8:
5486//ZZ case Iop_Mul32x4: {
5487//ZZ HReg res = newVRegV(env);
5488//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5489//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5490//ZZ UInt size = 0;
5491//ZZ switch(e->Iex.Binop.op) {
5492//ZZ case Iop_Mul8x16: size = 0; break;
5493//ZZ case Iop_Mul16x8: size = 1; break;
5494//ZZ case Iop_Mul32x4: size = 2; break;
5495//ZZ default: vassert(0);
5496//ZZ }
5497//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
5498//ZZ res, argL, argR, size, True));
5499//ZZ return res;
5500//ZZ }
5501//ZZ case Iop_Mul32Fx4: {
5502//ZZ HReg res = newVRegV(env);
5503//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5504//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5505//ZZ UInt size = 0;
5506//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
5507//ZZ res, argL, argR, size, True));
5508//ZZ return res;
5509//ZZ }
5510//ZZ case Iop_Mull8Ux8:
5511//ZZ case Iop_Mull16Ux4:
5512//ZZ case Iop_Mull32Ux2: {
5513//ZZ HReg res = newVRegV(env);
5514//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5515//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5516//ZZ UInt size = 0;
5517//ZZ switch(e->Iex.Binop.op) {
5518//ZZ case Iop_Mull8Ux8: size = 0; break;
5519//ZZ case Iop_Mull16Ux4: size = 1; break;
5520//ZZ case Iop_Mull32Ux2: size = 2; break;
5521//ZZ default: vassert(0);
5522//ZZ }
5523//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
5524//ZZ res, argL, argR, size, True));
5525//ZZ return res;
5526//ZZ }
5527//ZZ
5528//ZZ case Iop_Mull8Sx8:
5529//ZZ case Iop_Mull16Sx4:
5530//ZZ case Iop_Mull32Sx2: {
5531//ZZ HReg res = newVRegV(env);
5532//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5533//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5534//ZZ UInt size = 0;
5535//ZZ switch(e->Iex.Binop.op) {
5536//ZZ case Iop_Mull8Sx8: size = 0; break;
5537//ZZ case Iop_Mull16Sx4: size = 1; break;
5538//ZZ case Iop_Mull32Sx2: size = 2; break;
5539//ZZ default: vassert(0);
5540//ZZ }
5541//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
5542//ZZ res, argL, argR, size, True));
5543//ZZ return res;
5544//ZZ }
5545//ZZ
5546//ZZ case Iop_QDMulHi16Sx8:
5547//ZZ case Iop_QDMulHi32Sx4: {
5548//ZZ HReg res = newVRegV(env);
5549//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5550//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5551//ZZ UInt size = 0;
5552//ZZ switch(e->Iex.Binop.op) {
5553//ZZ case Iop_QDMulHi16Sx8: size = 1; break;
5554//ZZ case Iop_QDMulHi32Sx4: size = 2; break;
5555//ZZ default: vassert(0);
5556//ZZ }
5557//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5558//ZZ res, argL, argR, size, True));
5559//ZZ return res;
5560//ZZ }
5561//ZZ
5562//ZZ case Iop_QRDMulHi16Sx8:
5563//ZZ case Iop_QRDMulHi32Sx4: {
5564//ZZ HReg res = newVRegV(env);
5565//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5566//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5567//ZZ UInt size = 0;
5568//ZZ switch(e->Iex.Binop.op) {
5569//ZZ case Iop_QRDMulHi16Sx8: size = 1; break;
5570//ZZ case Iop_QRDMulHi32Sx4: size = 2; break;
5571//ZZ default: vassert(0);
5572//ZZ }
5573//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5574//ZZ res, argL, argR, size, True));
5575//ZZ return res;
5576//ZZ }
5577//ZZ
5578//ZZ case Iop_QDMulLong16Sx4:
5579//ZZ case Iop_QDMulLong32Sx2: {
5580//ZZ HReg res = newVRegV(env);
5581//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5582//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5583//ZZ UInt size = 0;
5584//ZZ switch(e->Iex.Binop.op) {
5585//ZZ case Iop_QDMulLong16Sx4: size = 1; break;
5586//ZZ case Iop_QDMulLong32Sx2: size = 2; break;
5587//ZZ default: vassert(0);
5588//ZZ }
5589//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5590//ZZ res, argL, argR, size, True));
5591//ZZ return res;
5592//ZZ }
5593//ZZ case Iop_PolynomialMul8x16: {
5594//ZZ HReg res = newVRegV(env);
5595//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5596//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5597//ZZ UInt size = 0;
5598//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5599//ZZ res, argL, argR, size, True));
5600//ZZ return res;
5601//ZZ }
5602//ZZ case Iop_Max32Fx4: {
5603//ZZ HReg res = newVRegV(env);
5604//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5605//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5606//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5607//ZZ res, argL, argR, 2, True));
5608//ZZ return res;
5609//ZZ }
5610//ZZ case Iop_Min32Fx4: {
5611//ZZ HReg res = newVRegV(env);
5612//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5613//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5614//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5615//ZZ res, argL, argR, 2, True));
5616//ZZ return res;
5617//ZZ }
5618//ZZ case Iop_PwMax32Fx4: {
5619//ZZ HReg res = newVRegV(env);
5620//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5621//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5622//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5623//ZZ res, argL, argR, 2, True));
5624//ZZ return res;
5625//ZZ }
5626//ZZ case Iop_PwMin32Fx4: {
5627//ZZ HReg res = newVRegV(env);
5628//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5629//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5630//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5631//ZZ res, argL, argR, 2, True));
5632//ZZ return res;
5633//ZZ }
5634//ZZ case Iop_CmpGT32Fx4: {
5635//ZZ HReg res = newVRegV(env);
5636//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5637//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5638//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5639//ZZ res, argL, argR, 2, True));
5640//ZZ return res;
5641//ZZ }
5642//ZZ case Iop_CmpGE32Fx4: {
5643//ZZ HReg res = newVRegV(env);
5644//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5645//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5646//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5647//ZZ res, argL, argR, 2, True));
5648//ZZ return res;
5649//ZZ }
5650//ZZ case Iop_CmpEQ32Fx4: {
5651//ZZ HReg res = newVRegV(env);
5652//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5653//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5654//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5655//ZZ res, argL, argR, 2, True));
5656//ZZ return res;
5657//ZZ }
5658//ZZ
5659//ZZ case Iop_PolynomialMull8x8: {
5660//ZZ HReg res = newVRegV(env);
5661//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5662//ZZ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5663//ZZ UInt size = 0;
5664//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5665//ZZ res, argL, argR, size, True));
5666//ZZ return res;
5667//ZZ }
5668//ZZ case Iop_F32ToFixed32Ux4_RZ:
5669//ZZ case Iop_F32ToFixed32Sx4_RZ:
5670//ZZ case Iop_Fixed32UToF32x4_RN:
5671//ZZ case Iop_Fixed32SToF32x4_RN: {
5672//ZZ HReg res = newVRegV(env);
5673//ZZ HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5674//ZZ ARMNeonUnOp op;
5675//ZZ UInt imm6;
5676//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
5677//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5678//ZZ vpanic("ARM supports FP <-> Fixed conversion with constant "
5679//ZZ "second argument less than 33 only\n");
5680//ZZ }
5681//ZZ imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5682//ZZ vassert(imm6 <= 32 && imm6 > 0);
5683//ZZ imm6 = 64 - imm6;
5684//ZZ switch(e->Iex.Binop.op) {
5685//ZZ case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5686//ZZ case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5687//ZZ case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5688//ZZ case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5689//ZZ default: vassert(0);
5690//ZZ }
5691//ZZ addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5692//ZZ return res;
5693//ZZ }
5694//ZZ /*
5695//ZZ FIXME remove if not used
5696//ZZ case Iop_VDup8x16:
5697//ZZ case Iop_VDup16x8:
5698//ZZ case Iop_VDup32x4: {
5699//ZZ HReg res = newVRegV(env);
5700//ZZ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5701//ZZ UInt imm4;
5702//ZZ UInt index;
5703//ZZ if (e->Iex.Binop.arg2->tag != Iex_Const ||
5704//ZZ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5705//ZZ vpanic("ARM supports Iop_VDup with constant "
5706//ZZ "second argument less than 16 only\n");
5707//ZZ }
5708//ZZ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5709//ZZ switch(e->Iex.Binop.op) {
5710//ZZ case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5711//ZZ case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5712//ZZ case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5713//ZZ default: vassert(0);
5714//ZZ }
5715//ZZ if (imm4 >= 16) {
5716//ZZ vpanic("ARM supports Iop_VDup with constant "
5717//ZZ "second argument less than 16 only\n");
5718//ZZ }
5719//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5720//ZZ res, argL, imm4, True));
5721//ZZ return res;
5722//ZZ }
5723//ZZ */
5724//ZZ case Iop_PwAdd8x16:
5725//ZZ case Iop_PwAdd16x8:
5726//ZZ case Iop_PwAdd32x4: {
5727//ZZ HReg res = newVRegV(env);
5728//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5729//ZZ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5730//ZZ UInt size = 0;
5731//ZZ switch(e->Iex.Binop.op) {
5732//ZZ case Iop_PwAdd8x16: size = 0; break;
5733//ZZ case Iop_PwAdd16x8: size = 1; break;
5734//ZZ case Iop_PwAdd32x4: size = 2; break;
5735//ZZ default: vassert(0);
5736//ZZ }
5737//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5738//ZZ res, argL, argR, size, True));
5739//ZZ return res;
5740//ZZ }
5741 /* ... */
5742 default:
5743 break;
5744 } /* switch on the binop */
5745 } /* if (e->tag == Iex_Binop) */
5746
5747//ZZ if (e->tag == Iex_Triop) {
5748//ZZ IRTriop *triop = e->Iex.Triop.details;
5749//ZZ
5750//ZZ switch (triop->op) {
5751//ZZ case Iop_ExtractV128: {
5752//ZZ HReg res = newVRegV(env);
5753//ZZ HReg argL = iselNeonExpr(env, triop->arg1);
5754//ZZ HReg argR = iselNeonExpr(env, triop->arg2);
5755//ZZ UInt imm4;
5756//ZZ if (triop->arg3->tag != Iex_Const ||
5757//ZZ typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
5758//ZZ vpanic("ARM target supports Iop_ExtractV128 with constant "
5759//ZZ "third argument less than 16 only\n");
5760//ZZ }
5761//ZZ imm4 = triop->arg3->Iex.Const.con->Ico.U8;
5762//ZZ if (imm4 >= 16) {
5763//ZZ vpanic("ARM target supports Iop_ExtractV128 with constant "
5764//ZZ "third argument less than 16 only\n");
5765//ZZ }
5766//ZZ addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5767//ZZ res, argL, argR, imm4, True));
5768//ZZ return res;
5769//ZZ }
5770//ZZ default:
5771//ZZ break;
5772//ZZ }
5773//ZZ }
5774//ZZ
5775//ZZ if (e->tag == Iex_ITE) { // VFD
5776//ZZ ARMCondCode cc;
5777//ZZ HReg r1 = iselNeonExpr(env, e->Iex.ITE.iftrue);
5778//ZZ HReg r0 = iselNeonExpr(env, e->Iex.ITE.iffalse);
5779//ZZ HReg dst = newVRegV(env);
5780//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, r1, 4, True));
5781//ZZ cc = iselCondCode(env, e->Iex.ITE.cond);
5782//ZZ addInstr(env, ARMInstr_NCMovQ(cc ^ 1, dst, r0));
5783//ZZ return dst;
5784//ZZ }
5785
5786 v128_expr_bad:
5787 ppIRExpr(e);
5788 vpanic("iselV128Expr_wrk");
5789}
5790
5791
5792/*---------------------------------------------------------*/
5793/*--- ISEL: Floating point expressions (64 bit) ---*/
5794/*---------------------------------------------------------*/
5795
5796/* Compute a 64-bit floating point value into a register, the identity
5797 of which is returned. As with iselIntExpr_R, the reg may be either
5798 real or virtual; in any case it must not be changed by subsequent
5799 code emitted by the caller. */
5800
5801static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5802{
5803 HReg r = iselDblExpr_wrk( env, e );
5804# if 0
5805 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5806# endif
5807 vassert(hregClass(r) == HRcFlt64);
5808 vassert(hregIsVirtual(r));
5809 return r;
5810}
5811
5812/* DO NOT CALL THIS DIRECTLY */
5813static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5814{
5815 IRType ty = typeOfIRExpr(env->type_env,e);
5816 vassert(e);
5817 vassert(ty == Ity_F64);
5818
5819 if (e->tag == Iex_RdTmp) {
5820 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5821 }
5822
5823 if (e->tag == Iex_Const) {
5824 IRConst* con = e->Iex.Const.con;
5825 if (con->tag == Ico_F64i) {
5826 HReg src = newVRegI(env);
5827 HReg dst = newVRegD(env);
5828 addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i));
5829 addInstr(env, ARM64Instr_VDfromX(dst, src));
5830 return dst;
5831 }
5832 }
5833
5834 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5835 vassert(e->Iex.Load.ty == Ity_F64);
5836 HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
5837 HReg res = newVRegD(env);
5838 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0));
5839 return res;
5840 }
5841
5842 if (e->tag == Iex_Get) {
5843 Int offs = e->Iex.Get.offset;
5844 if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) {
5845 HReg rD = newVRegD(env);
5846 HReg rN = get_baseblock_register();
5847 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs));
5848 return rD;
5849 }
5850 }
5851
5852 if (e->tag == Iex_Unop) {
5853 switch (e->Iex.Unop.op) {
5854//ZZ case Iop_ReinterpI64asF64: {
5855//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5856//ZZ return iselNeon64Expr(env, e->Iex.Unop.arg);
5857//ZZ } else {
5858//ZZ HReg srcHi, srcLo;
5859//ZZ HReg dst = newVRegD(env);
5860//ZZ iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
5861//ZZ addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
5862//ZZ return dst;
5863//ZZ }
5864//ZZ }
5865 case Iop_NegF64: {
5866 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5867 HReg dst = newVRegD(env);
5868 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src));
5869 return dst;
5870 }
5871 case Iop_AbsF64: {
5872 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5873 HReg dst = newVRegD(env);
5874 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src));
5875 return dst;
5876 }
5877 case Iop_F32toF64: {
5878 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5879 HReg dst = newVRegD(env);
5880 addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src));
5881 return dst;
5882 }
5883 case Iop_I32UtoF64:
5884 case Iop_I32StoF64: {
5885 /* Rounding mode is not involved here, since the
5886 conversion can always be done without loss of
5887 precision. */
5888 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5889 HReg dst = newVRegD(env);
5890 Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
5891 ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U;
5892 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src));
5893 return dst;
5894 }
5895 default:
5896 break;
5897 }
5898 }
5899
5900 if (e->tag == Iex_Binop) {
5901 switch (e->Iex.Binop.op) {
5902 case Iop_RoundF64toInt: {
5903 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5904 HReg dst = newVRegD(env);
5905 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
5906 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_RINT, dst, src));
5907 return dst;
5908 }
5909 case Iop_SqrtF64: {
5910 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5911 HReg dst = newVRegD(env);
5912 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
5913 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_SQRT, dst, src));
5914 return dst;
5915 }
5916 case Iop_I64StoF64:
5917 case Iop_I64UtoF64: {
5918 ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64
5919 ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U;
5920 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
5921 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
5922 HReg dstS = newVRegD(env);
5923 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
5924 return dstS;
5925 }
5926 default:
5927 break;
5928 }
5929 }
5930
5931 if (e->tag == Iex_Triop) {
5932 IRTriop* triop = e->Iex.Triop.details;
5933 ARM64FpBinOp dblop = ARM64fpb_INVALID;
5934 switch (triop->op) {
5935 case Iop_DivF64: dblop = ARM64fpb_DIV; break;
5936 case Iop_MulF64: dblop = ARM64fpb_MUL; break;
5937 case Iop_SubF64: dblop = ARM64fpb_SUB; break;
5938 case Iop_AddF64: dblop = ARM64fpb_ADD; break;
5939 default: break;
5940 }
5941 if (dblop != ARM64fpb_INVALID) {
5942 HReg argL = iselDblExpr(env, triop->arg2);
5943 HReg argR = iselDblExpr(env, triop->arg3);
5944 HReg dst = newVRegD(env);
5945 set_FPCR_rounding_mode(env, triop->arg1);
5946 addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR));
5947 return dst;
5948 }
5949 }
5950
5951//ZZ if (e->tag == Iex_ITE) { // VFD
5952//ZZ if (ty == Ity_F64
5953//ZZ && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
5954//ZZ HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
5955//ZZ HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
5956//ZZ HReg dst = newVRegD(env);
5957//ZZ addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, r1));
5958//ZZ ARMCondCode cc = iselCondCode(env, e->Iex.ITE.cond);
5959//ZZ addInstr(env, ARMInstr_VCMovD(cc ^ 1, dst, r0));
5960//ZZ return dst;
5961//ZZ }
5962//ZZ }
5963
5964 ppIRExpr(e);
5965 vpanic("iselDblExpr_wrk");
5966}
5967
5968
5969/*---------------------------------------------------------*/
5970/*--- ISEL: Floating point expressions (32 bit) ---*/
5971/*---------------------------------------------------------*/
5972
5973/* Compute a 32-bit floating point value into a register, the identity
5974 of which is returned. As with iselIntExpr_R, the reg may be either
5975 real or virtual; in any case it must not be changed by subsequent
5976 code emitted by the caller. Values are generated into HRcFlt64
5977 registers despite the values themselves being Ity_F32s. */
5978
5979static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
5980{
5981 HReg r = iselFltExpr_wrk( env, e );
5982# if 0
5983 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5984# endif
5985 vassert(hregClass(r) == HRcFlt64);
5986 vassert(hregIsVirtual(r));
5987 return r;
5988}
5989
5990/* DO NOT CALL THIS DIRECTLY */
5991static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
5992{
5993 IRType ty = typeOfIRExpr(env->type_env,e);
5994 vassert(e);
5995 vassert(ty == Ity_F32);
5996
5997 if (e->tag == Iex_RdTmp) {
5998 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5999 }
6000
6001 if (e->tag == Iex_Const) {
6002 /* This is something of a kludge. Since a 32 bit floating point
6003 zero is just .. all zeroes, just create a 64 bit zero word
6004 and transfer it. This avoids having to create a SfromW
6005 instruction for this specific case. */
6006 IRConst* con = e->Iex.Const.con;
6007 if (con->tag == Ico_F32i && con->Ico.F32i == 0) {
6008 HReg src = newVRegI(env);
6009 HReg dst = newVRegD(env);
6010 addInstr(env, ARM64Instr_Imm64(src, 0));
6011 addInstr(env, ARM64Instr_VDfromX(dst, src));
6012 return dst;
6013 }
6014 }
6015
6016//ZZ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
6017//ZZ ARMAModeV* am;
6018//ZZ HReg res = newVRegF(env);
6019//ZZ vassert(e->Iex.Load.ty == Ity_F32);
6020//ZZ am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
6021//ZZ addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
6022//ZZ return res;
6023//ZZ }
6024
6025 if (e->tag == Iex_Get) {
6026 Int offs = e->Iex.Get.offset;
6027 if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) {
6028 HReg rD = newVRegD(env);
6029 HReg rN = get_baseblock_register();
6030 addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs));
6031 return rD;
6032 }
6033 }
6034
6035 if (e->tag == Iex_Unop) {
6036 switch (e->Iex.Unop.op) {
6037//ZZ case Iop_ReinterpI32asF32: {
6038//ZZ HReg dst = newVRegF(env);
6039//ZZ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
6040//ZZ addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
6041//ZZ return dst;
6042//ZZ }
6043 case Iop_NegF32: {
6044 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
6045 HReg dst = newVRegD(env);
6046 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src));
6047 return dst;
6048 }
6049 case Iop_AbsF32: {
6050 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
6051 HReg dst = newVRegD(env);
6052 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src));
6053 return dst;
6054 }
6055 default:
6056 break;
6057 }
6058 }
6059
6060 if (e->tag == Iex_Binop) {
6061 switch (e->Iex.Binop.op) {
6062 case Iop_RoundF32toInt: {
6063 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
6064 HReg dst = newVRegD(env);
6065 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6066 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_RINT, dst, src));
6067 return dst;
6068 }
6069 case Iop_SqrtF32: {
6070 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
6071 HReg dst = newVRegD(env);
6072 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6073 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_SQRT, dst, src));
6074 return dst;
6075 }
6076 case Iop_F64toF32: {
6077 HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
6078 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6079 HReg dstS = newVRegD(env);
6080 addInstr(env, ARM64Instr_VCvtSD(False/*dToS*/, dstS, srcD));
6081 return dstS;
6082 }
6083 case Iop_I32StoF32:
6084 case Iop_I64UtoF32:
6085 case Iop_I64StoF32: {
6086 ARM64CvtOp cvt_op = ARM64cvt_INVALID;
6087 switch (e->Iex.Binop.op) {
6088 case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break;
6089 case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break;
6090 case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break;
6091 default: vassert(0);
6092 }
6093 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
6094 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6095 HReg dstS = newVRegD(env);
6096 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
6097 return dstS;
6098 }
6099 default:
6100 break;
6101 }
6102 }
6103
6104 if (e->tag == Iex_Triop) {
6105 IRTriop* triop = e->Iex.Triop.details;
6106 ARM64FpBinOp sglop = ARM64fpb_INVALID;
6107 switch (triop->op) {
6108 case Iop_DivF32: sglop = ARM64fpb_DIV; break;
6109 case Iop_MulF32: sglop = ARM64fpb_MUL; break;
6110 case Iop_SubF32: sglop = ARM64fpb_SUB; break;
6111 case Iop_AddF32: sglop = ARM64fpb_ADD; break;
6112 default: break;
6113 }
6114 if (sglop != ARM64fpb_INVALID) {
6115 HReg argL = iselFltExpr(env, triop->arg2);
6116 HReg argR = iselFltExpr(env, triop->arg3);
6117 HReg dst = newVRegD(env);
6118 set_FPCR_rounding_mode(env, triop->arg1);
6119 addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR));
6120 return dst;
6121 }
6122 }
6123
6124//ZZ
6125//ZZ if (e->tag == Iex_ITE) { // VFD
6126//ZZ if (ty == Ity_F32
6127//ZZ && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
6128//ZZ ARMCondCode cc;
6129//ZZ HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue);
6130//ZZ HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse);
6131//ZZ HReg dst = newVRegF(env);
6132//ZZ addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, r1));
6133//ZZ cc = iselCondCode(env, e->Iex.ITE.cond);
6134//ZZ addInstr(env, ARMInstr_VCMovS(cc ^ 1, dst, r0));
6135//ZZ return dst;
6136//ZZ }
6137//ZZ }
6138
6139 ppIRExpr(e);
6140 vpanic("iselFltExpr_wrk");
6141}
6142
6143
6144/*---------------------------------------------------------*/
6145/*--- ISEL: Statements ---*/
6146/*---------------------------------------------------------*/
6147
6148static void iselStmt ( ISelEnv* env, IRStmt* stmt )
6149{
6150 if (vex_traceflags & VEX_TRACE_VCODE) {
6151 vex_printf("\n-- ");
6152 ppIRStmt(stmt);
6153 vex_printf("\n");
6154 }
6155 switch (stmt->tag) {
6156
6157 /* --------- STORE --------- */
6158 /* little-endian write to memory */
6159 case Ist_Store: {
6160 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
6161 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
6162 IREndness end = stmt->Ist.Store.end;
6163
6164 if (tya != Ity_I64 || end != Iend_LE)
6165 goto stmt_fail;
6166
6167 if (tyd == Ity_I64) {
6168 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6169 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
6170 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
6171 return;
6172 }
6173 if (tyd == Ity_I32) {
6174 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6175 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
6176 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
6177 return;
6178 }
6179 if (tyd == Ity_I16) {
6180 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6181 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
6182 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
6183 return;
6184 }
6185 if (tyd == Ity_I8) {
6186 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6187 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
6188 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
6189 return;
6190 }
6191 if (tyd == Ity_V128) {
6192 HReg qD = iselV128Expr(env, stmt->Ist.Store.data);
6193 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
6194 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
6195 return;
6196 }
6197 if (tyd == Ity_F64) {
6198 HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
6199 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
6200 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0));
6201 return;
6202 }
6203
6204//ZZ if (tyd == Ity_I16) {
6205//ZZ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6206//ZZ ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
6207//ZZ addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
6208//ZZ False/*!isLoad*/,
6209//ZZ False/*!isSignedLoad*/, rD, am));
6210//ZZ return;
6211//ZZ }
6212//ZZ if (tyd == Ity_I8) {
6213//ZZ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6214//ZZ ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
6215//ZZ addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, False/*!isLoad*/, rD, am));
6216//ZZ return;
6217//ZZ }
6218//ZZ if (tyd == Ity_I64) {
6219//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6220//ZZ HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
6221//ZZ ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
6222//ZZ addInstr(env, ARMInstr_NLdStD(False, dD, am));
6223//ZZ } else {
6224//ZZ HReg rDhi, rDlo, rA;
6225//ZZ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
6226//ZZ rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
6227//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDhi,
6228//ZZ ARMAMode1_RI(rA,4)));
6229//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDlo,
6230//ZZ ARMAMode1_RI(rA,0)));
6231//ZZ }
6232//ZZ return;
6233//ZZ }
6234//ZZ if (tyd == Ity_F64) {
6235//ZZ HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
6236//ZZ ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
6237//ZZ addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
6238//ZZ return;
6239//ZZ }
6240//ZZ if (tyd == Ity_F32) {
6241//ZZ HReg fD = iselFltExpr(env, stmt->Ist.Store.data);
6242//ZZ ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
6243//ZZ addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
6244//ZZ return;
6245//ZZ }
6246//ZZ if (tyd == Ity_V128) {
6247//ZZ HReg qD = iselNeonExpr(env, stmt->Ist.Store.data);
6248//ZZ ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
6249//ZZ addInstr(env, ARMInstr_NLdStQ(False, qD, am));
6250//ZZ return;
6251//ZZ }
6252
6253 break;
6254 }
6255
6256//ZZ /* --------- CONDITIONAL STORE --------- */
6257//ZZ /* conditional little-endian write to memory */
6258//ZZ case Ist_StoreG: {
6259//ZZ IRStoreG* sg = stmt->Ist.StoreG.details;
6260//ZZ IRType tya = typeOfIRExpr(env->type_env, sg->addr);
6261//ZZ IRType tyd = typeOfIRExpr(env->type_env, sg->data);
6262//ZZ IREndness end = sg->end;
6263//ZZ
6264//ZZ if (tya != Ity_I32 || end != Iend_LE)
6265//ZZ goto stmt_fail;
6266//ZZ
6267//ZZ switch (tyd) {
6268//ZZ case Ity_I8:
6269//ZZ case Ity_I32: {
6270//ZZ HReg rD = iselIntExpr_R(env, sg->data);
6271//ZZ ARMAMode1* am = iselIntExpr_AMode1(env, sg->addr);
6272//ZZ ARMCondCode cc = iselCondCode(env, sg->guard);
6273//ZZ addInstr(env, (tyd == Ity_I32 ? ARMInstr_LdSt32 : ARMInstr_LdSt8U)
6274//ZZ (cc, False/*!isLoad*/, rD, am));
6275//ZZ return;
6276//ZZ }
6277//ZZ case Ity_I16: {
6278//ZZ HReg rD = iselIntExpr_R(env, sg->data);
6279//ZZ ARMAMode2* am = iselIntExpr_AMode2(env, sg->addr);
6280//ZZ ARMCondCode cc = iselCondCode(env, sg->guard);
6281//ZZ addInstr(env, ARMInstr_LdSt16(cc,
6282//ZZ False/*!isLoad*/,
6283//ZZ False/*!isSignedLoad*/, rD, am));
6284//ZZ return;
6285//ZZ }
6286//ZZ default:
6287//ZZ break;
6288//ZZ }
6289//ZZ break;
6290//ZZ }
6291//ZZ
6292//ZZ /* --------- CONDITIONAL LOAD --------- */
6293//ZZ /* conditional little-endian load from memory */
6294//ZZ case Ist_LoadG: {
6295//ZZ IRLoadG* lg = stmt->Ist.LoadG.details;
6296//ZZ IRType tya = typeOfIRExpr(env->type_env, lg->addr);
6297//ZZ IREndness end = lg->end;
6298//ZZ
6299//ZZ if (tya != Ity_I32 || end != Iend_LE)
6300//ZZ goto stmt_fail;
6301//ZZ
6302//ZZ switch (lg->cvt) {
6303//ZZ case ILGop_8Uto32:
6304//ZZ case ILGop_Ident32: {
6305//ZZ HReg rAlt = iselIntExpr_R(env, lg->alt);
6306//ZZ ARMAMode1* am = iselIntExpr_AMode1(env, lg->addr);
6307//ZZ HReg rD = lookupIRTemp(env, lg->dst);
6308//ZZ addInstr(env, mk_iMOVds_RR(rD, rAlt));
6309//ZZ ARMCondCode cc = iselCondCode(env, lg->guard);
6310//ZZ addInstr(env, (lg->cvt == ILGop_Ident32 ? ARMInstr_LdSt32
6311//ZZ : ARMInstr_LdSt8U)
6312//ZZ (cc, True/*isLoad*/, rD, am));
6313//ZZ return;
6314//ZZ }
6315//ZZ case ILGop_16Sto32:
6316//ZZ case ILGop_16Uto32:
6317//ZZ case ILGop_8Sto32: {
6318//ZZ HReg rAlt = iselIntExpr_R(env, lg->alt);
6319//ZZ ARMAMode2* am = iselIntExpr_AMode2(env, lg->addr);
6320//ZZ HReg rD = lookupIRTemp(env, lg->dst);
6321//ZZ addInstr(env, mk_iMOVds_RR(rD, rAlt));
6322//ZZ ARMCondCode cc = iselCondCode(env, lg->guard);
6323//ZZ if (lg->cvt == ILGop_8Sto32) {
6324//ZZ addInstr(env, ARMInstr_Ld8S(cc, rD, am));
6325//ZZ } else {
6326//ZZ vassert(lg->cvt == ILGop_16Sto32 || lg->cvt == ILGop_16Uto32);
6327//ZZ Bool sx = lg->cvt == ILGop_16Sto32;
6328//ZZ addInstr(env, ARMInstr_LdSt16(cc, True/*isLoad*/, sx, rD, am));
6329//ZZ }
6330//ZZ return;
6331//ZZ }
6332//ZZ default:
6333//ZZ break;
6334//ZZ }
6335//ZZ break;
6336//ZZ }
6337
6338 /* --------- PUT --------- */
6339 /* write guest state, fixed offset */
6340 case Ist_Put: {
6341 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
6342 UInt offs = (UInt)stmt->Ist.Put.offset;
6343 if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) {
6344 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6345 ARM64AMode* am = mk_baseblock_64bit_access_amode(offs);
6346 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
6347 return;
6348 }
6349 if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) {
6350 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6351 ARM64AMode* am = mk_baseblock_32bit_access_amode(offs);
6352 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
6353 return;
6354 }
6355 if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) {
6356 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6357 ARM64AMode* am = mk_baseblock_16bit_access_amode(offs);
6358 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
6359 return;
6360 }
6361 if (tyd == Ity_I8 && offs < (1<<12)) {
6362 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6363 ARM64AMode* am = mk_baseblock_8bit_access_amode(offs);
6364 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
6365 return;
6366 }
6367 if (tyd == Ity_V128 && offs < (1<<12)) {
6368 HReg qD = iselV128Expr(env, stmt->Ist.Put.data);
6369 HReg addr = mk_baseblock_128bit_access_addr(env, offs);
6370 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
6371 return;
6372 }
6373 if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) {
6374 HReg dD = iselDblExpr(env, stmt->Ist.Put.data);
6375 HReg bbp = get_baseblock_register();
6376 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs));
6377 return;
6378 }
6379 if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) {
6380 HReg dD = iselFltExpr(env, stmt->Ist.Put.data);
6381 HReg bbp = get_baseblock_register();
6382 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, dD, bbp, offs));
6383 return;
6384 }
6385
6386//ZZ if (tyd == Ity_I64) {
6387//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6388//ZZ HReg addr = newVRegI(env);
6389//ZZ HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
6390//ZZ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
6391//ZZ stmt->Ist.Put.offset));
6392//ZZ addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
6393//ZZ } else {
6394//ZZ HReg rDhi, rDlo;
6395//ZZ ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
6396//ZZ stmt->Ist.Put.offset + 0);
6397//ZZ ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
6398//ZZ stmt->Ist.Put.offset + 4);
6399//ZZ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
6400//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
6401//ZZ rDhi, am4));
6402//ZZ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
6403//ZZ rDlo, am0));
6404//ZZ }
6405//ZZ return;
6406//ZZ }
6407//ZZ if (tyd == Ity_F64) {
6408//ZZ // XXX This won't work if offset > 1020 or is not 0 % 4.
6409//ZZ // In which case we'll have to generate more longwinded code.
6410//ZZ ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
6411//ZZ HReg rD = iselDblExpr(env, stmt->Ist.Put.data);
6412//ZZ addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
6413//ZZ return;
6414//ZZ }
6415//ZZ if (tyd == Ity_F32) {
6416//ZZ // XXX This won't work if offset > 1020 or is not 0 % 4.
6417//ZZ // In which case we'll have to generate more longwinded code.
6418//ZZ ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
6419//ZZ HReg rD = iselFltExpr(env, stmt->Ist.Put.data);
6420//ZZ addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
6421//ZZ return;
6422//ZZ }
6423 break;
6424 }
6425
6426 /* --------- TMP --------- */
6427 /* assign value to temporary */
6428 case Ist_WrTmp: {
6429 IRTemp tmp = stmt->Ist.WrTmp.tmp;
6430 IRType ty = typeOfIRTemp(env->type_env, tmp);
6431
6432 if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
6433 /* We could do a lot better here. But for the time being: */
6434 HReg dst = lookupIRTemp(env, tmp);
6435 HReg rD = iselIntExpr_R(env, stmt->Ist.WrTmp.data);
6436 addInstr(env, ARM64Instr_MovI(dst, rD));
6437 return;
6438 }
6439 if (ty == Ity_I1) {
6440 /* Here, we are generating a I1 value into a 64 bit register.
6441 Make sure the value in the register is only zero or one,
6442 but no other. This allows optimisation of the
6443 1Uto64(tmp:I1) case, by making it simply a copy of the
6444 register holding 'tmp'. The point being that the value in
6445 the register holding 'tmp' can only have been created
6446 here. LATER: that seems dangerous; safer to do 'tmp & 1'
6447 in that case. Also, could do this just with a single CINC
6448 insn. */
6449 HReg zero = newVRegI(env);
6450 HReg one = newVRegI(env);
6451 HReg dst = lookupIRTemp(env, tmp);
6452 addInstr(env, ARM64Instr_Imm64(zero, 0));
6453 addInstr(env, ARM64Instr_Imm64(one, 1));
6454 ARM64CondCode cc = iselCondCode(env, stmt->Ist.WrTmp.data);
6455 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
6456 return;
6457 }
6458 if (ty == Ity_F64) {
6459 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
6460 HReg dst = lookupIRTemp(env, tmp);
6461 addInstr(env, ARM64Instr_VMov(8, dst, src));
6462 return;
6463 }
6464 if (ty == Ity_F32) {
6465 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
6466 HReg dst = lookupIRTemp(env, tmp);
6467 addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
6468 return;
6469 }
6470 if (ty == Ity_V128) {
6471 HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data);
6472 HReg dst = lookupIRTemp(env, tmp);
6473 addInstr(env, ARM64Instr_VMov(16, dst, src));
6474 return;
6475 }
6476 break;
6477 }
6478
6479 /* --------- Call to DIRTY helper --------- */
6480 /* call complex ("dirty") helper function */
6481 case Ist_Dirty: {
6482 IRDirty* d = stmt->Ist.Dirty.details;
6483
6484 /* Figure out the return type, if any. */
6485 IRType retty = Ity_INVALID;
6486 if (d->tmp != IRTemp_INVALID)
6487 retty = typeOfIRTemp(env->type_env, d->tmp);
6488
6489 Bool retty_ok = False;
6490 switch (retty) {
6491 case Ity_INVALID: /* function doesn't return anything */
6492 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
6493 case Ity_V128:
6494 retty_ok = True; break;
6495 default:
6496 break;
6497 }
6498 if (!retty_ok)
6499 break; /* will go to stmt_fail: */
6500
6501 /* Marshal args, do the call, and set the return value to 0x555..555
6502 if this is a conditional call that returns a value and the
6503 call is skipped. */
6504 UInt addToSp = 0;
6505 RetLoc rloc = mk_RetLoc_INVALID();
6506 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
6507 vassert(is_sane_RetLoc(rloc));
6508
6509 /* Now figure out what to do with the returned value, if any. */
6510 switch (retty) {
6511 case Ity_INVALID: {
6512 /* No return value. Nothing to do. */
6513 vassert(d->tmp == IRTemp_INVALID);
6514 vassert(rloc.pri == RLPri_None);
6515 vassert(addToSp == 0);
6516 return;
6517 }
6518 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
6519 vassert(rloc.pri == RLPri_Int);
6520 vassert(addToSp == 0);
6521 /* The returned value is in x0. Park it in the register
6522 associated with tmp. */
6523 HReg dst = lookupIRTemp(env, d->tmp);
6524 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) );
6525 return;
6526 }
6527 case Ity_V128: {
6528 /* The returned value is on the stack, and *retloc tells
6529 us where. Fish it off the stack and then move the
6530 stack pointer upwards to clear it, as directed by
6531 doHelperCall. */
6532 vassert(rloc.pri == RLPri_V128SpRel);
6533 vassert(rloc.spOff < 256); // stay sane
6534 vassert(addToSp >= 16); // ditto
6535 vassert(addToSp < 256); // ditto
6536 HReg dst = lookupIRTemp(env, d->tmp);
6537 HReg tmp = newVRegI(env); // the address of the returned value
6538 addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP
6539 addInstr(env, ARM64Instr_Arith(tmp, tmp,
6540 ARM64RIA_I12((UShort)rloc.spOff, 0),
6541 True/*isAdd*/ ));
6542 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp));
6543 addInstr(env, ARM64Instr_AddToSP(addToSp));
6544 return;
6545 }
6546 default:
6547 /*NOTREACHED*/
6548 vassert(0);
6549 }
6550 break;
6551 }
6552
6553//ZZ /* --------- Load Linked and Store Conditional --------- */
6554//ZZ case Ist_LLSC: {
6555//ZZ if (stmt->Ist.LLSC.storedata == NULL) {
6556//ZZ /* LL */
6557//ZZ IRTemp res = stmt->Ist.LLSC.result;
6558//ZZ IRType ty = typeOfIRTemp(env->type_env, res);
6559//ZZ if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
6560//ZZ Int szB = 0;
6561//ZZ HReg r_dst = lookupIRTemp(env, res);
6562//ZZ HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6563//ZZ switch (ty) {
6564//ZZ case Ity_I8: szB = 1; break;
6565//ZZ case Ity_I16: szB = 2; break;
6566//ZZ case Ity_I32: szB = 4; break;
6567//ZZ default: vassert(0);
6568//ZZ }
6569//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
6570//ZZ addInstr(env, ARMInstr_LdrEX(szB));
6571//ZZ addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
6572//ZZ return;
6573//ZZ }
6574//ZZ if (ty == Ity_I64) {
6575//ZZ HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6576//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
6577//ZZ addInstr(env, ARMInstr_LdrEX(8));
6578//ZZ /* Result is in r3:r2. On a non-NEON capable CPU, we must
6579//ZZ move it into a result register pair. On a NEON capable
6580//ZZ CPU, the result register will be a 64 bit NEON
6581//ZZ register, so we must move it there instead. */
6582//ZZ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6583//ZZ HReg dst = lookupIRTemp(env, res);
6584//ZZ addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
6585//ZZ hregARM_R2()));
6586//ZZ } else {
6587//ZZ HReg r_dst_hi, r_dst_lo;
6588//ZZ lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
6589//ZZ addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
6590//ZZ addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
6591//ZZ }
6592//ZZ return;
6593//ZZ }
6594//ZZ /*NOTREACHED*/
6595//ZZ vassert(0);
6596//ZZ } else {
6597//ZZ /* SC */
6598//ZZ IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
6599//ZZ if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
6600//ZZ Int szB = 0;
6601//ZZ HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
6602//ZZ HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6603//ZZ switch (tyd) {
6604//ZZ case Ity_I8: szB = 1; break;
6605//ZZ case Ity_I16: szB = 2; break;
6606//ZZ case Ity_I32: szB = 4; break;
6607//ZZ default: vassert(0);
6608//ZZ }
6609//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
6610//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
6611//ZZ addInstr(env, ARMInstr_StrEX(szB));
6612//ZZ } else {
6613//ZZ vassert(tyd == Ity_I64);
6614//ZZ /* This is really ugly. There is no is/is-not NEON
6615//ZZ decision akin to the case for LL, because iselInt64Expr
6616//ZZ fudges this for us, and always gets the result into two
6617//ZZ GPRs even if this means moving it from a NEON
6618//ZZ register. */
6619//ZZ HReg rDhi, rDlo;
6620//ZZ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
6621//ZZ HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6622//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
6623//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
6624//ZZ addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
6625//ZZ addInstr(env, ARMInstr_StrEX(8));
6626//ZZ }
6627//ZZ /* now r0 is 1 if failed, 0 if success. Change to IR
6628//ZZ conventions (0 is fail, 1 is success). Also transfer
6629//ZZ result to r_res. */
6630//ZZ IRTemp res = stmt->Ist.LLSC.result;
6631//ZZ IRType ty = typeOfIRTemp(env->type_env, res);
6632//ZZ HReg r_res = lookupIRTemp(env, res);
6633//ZZ ARMRI84* one = ARMRI84_I84(1,0);
6634//ZZ vassert(ty == Ity_I1);
6635//ZZ addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
6636//ZZ /* And be conservative -- mask off all but the lowest bit */
6637//ZZ addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
6638//ZZ return;
6639//ZZ }
6640//ZZ break;
6641//ZZ }
6642//ZZ
6643//ZZ /* --------- MEM FENCE --------- */
6644//ZZ case Ist_MBE:
6645//ZZ switch (stmt->Ist.MBE.event) {
6646//ZZ case Imbe_Fence:
6647//ZZ addInstr(env, ARMInstr_MFence());
6648//ZZ return;
6649//ZZ case Imbe_CancelReservation:
6650//ZZ addInstr(env, ARMInstr_CLREX());
6651//ZZ return;
6652//ZZ default:
6653//ZZ break;
6654//ZZ }
6655//ZZ break;
6656
6657 /* --------- INSTR MARK --------- */
6658 /* Doesn't generate any executable code ... */
6659 case Ist_IMark:
6660 return;
6661
6662 /* --------- NO-OP --------- */
6663 case Ist_NoOp:
6664 return;
6665
6666 /* --------- EXIT --------- */
6667 case Ist_Exit: {
6668 if (stmt->Ist.Exit.dst->tag != Ico_U64)
6669 vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value");
6670
6671 ARM64CondCode cc
6672 = iselCondCode(env, stmt->Ist.Exit.guard);
6673 ARM64AMode* amPC
6674 = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP);
6675
6676
6677 /* Case: boring transfer to known address */
6678 if (stmt->Ist.Exit.jk == Ijk_Boring
6679 /*ATC || stmt->Ist.Exit.jk == Ijk_Call */
6680 /*ATC || stmt->Ist.Exit.jk == Ijk_Ret */ ) {
6681 if (env->chainingAllowed) {
6682 /* .. almost always true .. */
6683 /* Skip the event check at the dst if this is a forwards
6684 edge. */
6685 Bool toFastEP
6686 = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
6687 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
6688 addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
6689 amPC, cc, toFastEP));
6690 } else {
6691 /* .. very occasionally .. */
6692 /* We can't use chaining, so ask for an assisted transfer,
6693 as that's the only alternative that is allowable. */
6694 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6695 addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring));
6696 }
6697 return;
6698 }
6699
6700//ZZ /* Case: assisted transfer to arbitrary address */
6701//ZZ switch (stmt->Ist.Exit.jk) {
6702//ZZ /* Keep this list in sync with that in iselNext below */
6703//ZZ case Ijk_ClientReq:
6704//ZZ case Ijk_NoDecode:
6705//ZZ case Ijk_NoRedir:
6706//ZZ case Ijk_Sys_syscall:
6707//ZZ case Ijk_TInval:
6708//ZZ case Ijk_Yield:
6709//ZZ {
6710//ZZ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6711//ZZ addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
6712//ZZ stmt->Ist.Exit.jk));
6713//ZZ return;
6714//ZZ }
6715//ZZ default:
6716//ZZ break;
6717//ZZ }
6718
6719 /* Do we ever expect to see any other kind? */
6720 goto stmt_fail;
6721 }
6722
6723 default: break;
6724 }
6725 stmt_fail:
6726 ppIRStmt(stmt);
6727 vpanic("iselStmt");
6728}
6729
6730
6731/*---------------------------------------------------------*/
6732/*--- ISEL: Basic block terminators (Nexts) ---*/
6733/*---------------------------------------------------------*/
6734
6735static void iselNext ( ISelEnv* env,
6736 IRExpr* next, IRJumpKind jk, Int offsIP )
6737{
6738 if (vex_traceflags & VEX_TRACE_VCODE) {
6739 vex_printf( "\n-- PUT(%d) = ", offsIP);
6740 ppIRExpr( next );
6741 vex_printf( "; exit-");
6742 ppIRJumpKind(jk);
6743 vex_printf( "\n");
6744 }
6745
6746 /* Case: boring transfer to known address */
6747 if (next->tag == Iex_Const) {
6748 IRConst* cdst = next->Iex.Const.con;
6749 vassert(cdst->tag == Ico_U64);
6750 if (jk == Ijk_Boring || jk == Ijk_Call) {
6751 /* Boring transfer to known address */
6752 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
6753 if (env->chainingAllowed) {
6754 /* .. almost always true .. */
6755 /* Skip the event check at the dst if this is a forwards
6756 edge. */
6757 Bool toFastEP
6758 = ((Addr64)cdst->Ico.U64) > env->max_ga;
6759 if (0) vex_printf("%s", toFastEP ? "X" : ".");
6760 addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64,
6761 amPC, ARM64cc_AL,
6762 toFastEP));
6763 } else {
6764 /* .. very occasionally .. */
6765 /* We can't use chaining, so ask for an assisted transfer,
6766 as that's the only alternative that is allowable. */
6767 HReg r = iselIntExpr_R(env, next);
6768 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
6769 Ijk_Boring));
6770 }
6771 return;
6772 }
6773 }
6774
6775 /* Case: call/return (==boring) transfer to any address */
6776 switch (jk) {
6777 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
6778 HReg r = iselIntExpr_R(env, next);
6779 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
6780 if (env->chainingAllowed) {
6781 addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL));
6782 } else {
6783 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
6784 Ijk_Boring));
6785 }
6786 return;
6787 }
6788 default:
6789 break;
6790 }
6791
6792 /* Case: assisted transfer to arbitrary address */
6793 switch (jk) {
6794 /* Keep this list in sync with that for Ist_Exit above */
6795 case Ijk_ClientReq:
6796 case Ijk_NoDecode:
6797//ZZ case Ijk_NoRedir:
6798 case Ijk_Sys_syscall:
6799//ZZ case Ijk_TInval:
6800//ZZ case Ijk_Yield:
6801 {
6802 HReg r = iselIntExpr_R(env, next);
6803 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
6804 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk));
6805 return;
6806 }
6807 default:
6808 break;
6809 }
6810
6811 vex_printf( "\n-- PUT(%d) = ", offsIP);
6812 ppIRExpr( next );
6813 vex_printf( "; exit-");
6814 ppIRJumpKind(jk);
6815 vex_printf( "\n");
6816 vassert(0); // are we expecting any other kind?
6817}
6818
6819
6820/*---------------------------------------------------------*/
6821/*--- Insn selector top-level ---*/
6822/*---------------------------------------------------------*/
6823
6824/* Translate an entire SB to arm64 code. */
6825
6826HInstrArray* iselSB_ARM64 ( IRSB* bb,
6827 VexArch arch_host,
6828 VexArchInfo* archinfo_host,
6829 VexAbiInfo* vbi/*UNUSED*/,
6830 Int offs_Host_EvC_Counter,
6831 Int offs_Host_EvC_FailAddr,
6832 Bool chainingAllowed,
6833 Bool addProfInc,
6834 Addr64 max_ga )
6835{
6836 Int i, j;
6837 HReg hreg, hregHI;
6838 ISelEnv* env;
6839 UInt hwcaps_host = archinfo_host->hwcaps;
6840 ARM64AMode *amCounter, *amFailAddr;
6841
6842 /* sanity ... */
6843 vassert(arch_host == VexArchARM64);
6844
6845 /* guard against unexpected space regressions */
6846 vassert(sizeof(ARM64Instr) <= 32);
6847
6848 /* Make up an initial environment to use. */
6849 env = LibVEX_Alloc(sizeof(ISelEnv));
6850 env->vreg_ctr = 0;
6851
6852 /* Set up output code array. */
6853 env->code = newHInstrArray();
6854
6855 /* Copy BB's type env. */
6856 env->type_env = bb->tyenv;
6857
6858 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
6859 change as we go along. */
6860 env->n_vregmap = bb->tyenv->types_used;
6861 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
6862 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
6863
6864 /* and finally ... */
6865 env->chainingAllowed = chainingAllowed;
6866 env->hwcaps = hwcaps_host;
6867 env->previous_rm = NULL;
6868 env->max_ga = max_ga;
6869
6870 /* For each IR temporary, allocate a suitably-kinded virtual
6871 register. */
6872 j = 0;
6873 for (i = 0; i < env->n_vregmap; i++) {
6874 hregHI = hreg = INVALID_HREG;
6875 switch (bb->tyenv->types[i]) {
6876 case Ity_I1:
6877 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
6878 hreg = mkHReg(j++, HRcInt64, True);
6879 break;
6880 case Ity_I128:
6881 hreg = mkHReg(j++, HRcInt64, True);
6882 hregHI = mkHReg(j++, HRcInt64, True);
6883 break;
6884 case Ity_F32: // we'll use HRcFlt64 regs for F32 too
6885 case Ity_F64:
6886 hreg = mkHReg(j++, HRcFlt64, True);
6887 break;
6888 case Ity_V128:
6889 hreg = mkHReg(j++, HRcVec128, True);
6890 break;
6891 default:
6892 ppIRType(bb->tyenv->types[i]);
6893 vpanic("iselBB(arm64): IRTemp type");
6894 }
6895 env->vregmap[i] = hreg;
6896 env->vregmapHI[i] = hregHI;
6897 }
6898 env->vreg_ctr = j;
6899
6900 /* The very first instruction must be an event check. */
6901 amCounter = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter);
6902 amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr);
6903 addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr));
6904
6905 /* Possibly a block counter increment (for profiling). At this
6906 point we don't know the address of the counter, so just pretend
6907 it is zero. It will have to be patched later, but before this
6908 translation is used, by a call to LibVEX_patchProfCtr. */
6909 if (addProfInc) {
6910 vassert(0);
6911 //addInstr(env, ARM64Instr_ProfInc());
6912 }
6913
6914 /* Ok, finally we can iterate over the statements. */
6915 for (i = 0; i < bb->stmts_used; i++)
6916 iselStmt(env, bb->stmts[i]);
6917
6918 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
6919
6920 /* record the number of vregs we used. */
6921 env->code->n_vregs = env->vreg_ctr;
6922 return env->code;
6923}
6924
6925
6926/*---------------------------------------------------------------*/
6927/*--- end host_arm64_isel.c ---*/
6928/*---------------------------------------------------------------*/