blob: 5759994138d8a7e90c04b5bf4fc2a5d2370dc051 [file] [log] [blame]
sewardjbbcf1882014-01-12 12:49:10 +00001
2/*---------------------------------------------------------------*/
3/*--- begin host_arm64_isel.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2013-2013 OpenWorks
11 info@open-works.net
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29*/
30
31#include "libvex_basictypes.h"
32#include "libvex_ir.h"
33#include "libvex.h"
34#include "ir_match.h"
35
36#include "main_util.h"
37#include "main_globals.h"
38#include "host_generic_regs.h"
39#include "host_generic_simd64.h" // for 32-bit SIMD helpers
40#include "host_arm64_defs.h"
41
42
43//ZZ /*---------------------------------------------------------*/
44//ZZ /*--- ARMvfp control word stuff ---*/
45//ZZ /*---------------------------------------------------------*/
46//ZZ
47//ZZ /* Vex-generated code expects to run with the FPU set as follows: all
48//ZZ exceptions masked, round-to-nearest, non-vector mode, with the NZCV
49//ZZ flags cleared, and FZ (flush to zero) disabled. Curiously enough,
50//ZZ this corresponds to a FPSCR value of zero.
51//ZZ
52//ZZ fpscr should therefore be zero on entry to Vex-generated code, and
53//ZZ should be unchanged at exit. (Or at least the bottom 28 bits
54//ZZ should be zero).
55//ZZ */
56//ZZ
57//ZZ #define DEFAULT_FPSCR 0
58
59
60/*---------------------------------------------------------*/
61/*--- ISelEnv ---*/
62/*---------------------------------------------------------*/
63
64/* This carries around:
65
66 - A mapping from IRTemp to IRType, giving the type of any IRTemp we
67 might encounter. This is computed before insn selection starts,
68 and does not change.
69
70 - A mapping from IRTemp to HReg. This tells the insn selector
71 which virtual register is associated with each IRTemp temporary.
72 This is computed before insn selection starts, and does not
73 change. We expect this mapping to map precisely the same set of
74 IRTemps as the type mapping does.
75
76 |vregmap| holds the primary register for the IRTemp.
77 |vregmapHI| is only used for 128-bit integer-typed
78 IRTemps. It holds the identity of a second
79 64-bit virtual HReg, which holds the high half
80 of the value.
81
82 - The code array, that is, the insns selected so far.
83
84 - A counter, for generating new virtual registers.
85
86 - The host hardware capabilities word. This is set at the start
87 and does not change.
88
89 - A Bool for indicating whether we may generate chain-me
90 instructions for control flow transfers, or whether we must use
91 XAssisted.
92
93 - The maximum guest address of any guest insn in this block.
94 Actually, the address of the highest-addressed byte from any insn
95 in this block. Is set at the start and does not change. This is
96 used for detecting jumps which are definitely forward-edges from
97 this block, and therefore can be made (chained) to the fast entry
98 point of the destination, thereby avoiding the destination's
99 event check.
100
101 - An IRExpr*, which may be NULL, holding the IR expression (an
102 IRRoundingMode-encoded value) to which the FPU's rounding mode
103 was most recently set. Setting to NULL is always safe. Used to
104 avoid redundant settings of the FPU's rounding mode, as
105 described in set_FPCR_rounding_mode below.
106
107 Note, this is all (well, mostly) host-independent.
108*/
109
110typedef
111 struct {
112 /* Constant -- are set at the start and do not change. */
113 IRTypeEnv* type_env;
114
115 HReg* vregmap;
116 HReg* vregmapHI;
117 Int n_vregmap;
118
119 UInt hwcaps;
120
121 Bool chainingAllowed;
122 Addr64 max_ga;
123
124 /* These are modified as we go along. */
125 HInstrArray* code;
126 Int vreg_ctr;
127
128 IRExpr* previous_rm;
129 }
130 ISelEnv;
131
132static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
133{
134 vassert(tmp >= 0);
135 vassert(tmp < env->n_vregmap);
136 return env->vregmap[tmp];
137}
138
sewardj12972182014-08-04 08:09:47 +0000139static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
140 ISelEnv* env, IRTemp tmp )
141{
142 vassert(tmp >= 0);
143 vassert(tmp < env->n_vregmap);
144 vassert(! hregIsInvalid(env->vregmapHI[tmp]));
145 *vrLO = env->vregmap[tmp];
146 *vrHI = env->vregmapHI[tmp];
147}
148
sewardjbbcf1882014-01-12 12:49:10 +0000149static void addInstr ( ISelEnv* env, ARM64Instr* instr )
150{
151 addHInstr(env->code, instr);
152 if (vex_traceflags & VEX_TRACE_VCODE) {
153 ppARM64Instr(instr);
154 vex_printf("\n");
155 }
156}
157
158static HReg newVRegI ( ISelEnv* env )
159{
160 HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/);
161 env->vreg_ctr++;
162 return reg;
163}
164
165static HReg newVRegD ( ISelEnv* env )
166{
167 HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
168 env->vreg_ctr++;
169 return reg;
170}
171
sewardjbbcf1882014-01-12 12:49:10 +0000172static HReg newVRegV ( ISelEnv* env )
173{
174 HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
175 env->vreg_ctr++;
176 return reg;
177}
178
sewardjbbcf1882014-01-12 12:49:10 +0000179
180/*---------------------------------------------------------*/
181/*--- ISEL: Forward declarations ---*/
182/*---------------------------------------------------------*/
183
184/* These are organised as iselXXX and iselXXX_wrk pairs. The
185 iselXXX_wrk do the real work, but are not to be called directly.
186 For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
187 checks that all returned registers are virtual. You should not
188 call the _wrk version directly.
189
190 Because some forms of ARM64 memory amodes are implicitly scaled by
191 the access size, iselIntExpr_AMode takes an IRType which tells it
192 the type of the access for which the amode is to be used. This
193 type needs to be correct, else you'll get incorrect code.
194*/
195static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env,
196 IRExpr* e, IRType dty );
197static ARM64AMode* iselIntExpr_AMode ( ISelEnv* env,
198 IRExpr* e, IRType dty );
199
200static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e );
201static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e );
202
203static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e );
204static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e );
205
206static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e );
207static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e );
208
209static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
210static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
211
212static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
213static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
214
215static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
216 ISelEnv* env, IRExpr* e );
217static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo,
218 ISelEnv* env, IRExpr* e );
219
sewardjbbcf1882014-01-12 12:49:10 +0000220static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
221static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
222
223static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
224static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
225
sewardjbbcf1882014-01-12 12:49:10 +0000226static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e );
227static HReg iselV128Expr ( ISelEnv* env, IRExpr* e );
228
sewardj12972182014-08-04 08:09:47 +0000229static void iselV256Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
230 ISelEnv* env, IRExpr* e );
231static void iselV256Expr ( /*OUT*/HReg* rHi, HReg* rLo,
232 ISelEnv* env, IRExpr* e );
233
sewardjbbcf1882014-01-12 12:49:10 +0000234static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 );
235
236
237/*---------------------------------------------------------*/
238/*--- ISEL: Misc helpers ---*/
239/*---------------------------------------------------------*/
240
241/* Generate an amode suitable for a 64-bit sized access relative to
242 the baseblock register (X21). This generates an RI12 amode, which
243 means its scaled by the access size, which is why the access size
244 -- 64 bit -- is stated explicitly here. Consequently |off| needs
245 to be divisible by 8. */
246static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off )
247{
248 vassert(off < (8 << 12)); /* otherwise it's unrepresentable */
249 vassert((off & 7) == 0); /* ditto */
250 return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/);
251}
252
253/* Ditto, for 32 bit accesses. */
254static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off )
255{
256 vassert(off < (4 << 12)); /* otherwise it's unrepresentable */
257 vassert((off & 3) == 0); /* ditto */
258 return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/);
259}
260
261/* Ditto, for 16 bit accesses. */
262static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off )
263{
264 vassert(off < (2 << 12)); /* otherwise it's unrepresentable */
265 vassert((off & 1) == 0); /* ditto */
266 return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/);
267}
268
269/* Ditto, for 8 bit accesses. */
270static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off )
271{
272 vassert(off < (1 << 12)); /* otherwise it's unrepresentable */
273 return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/);
274}
275
276static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off )
277{
278 vassert(off < (1<<12));
279 HReg r = newVRegI(env);
280 addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(),
281 ARM64RIA_I12(off,0), True/*isAdd*/));
282 return r;
283}
284
285static HReg get_baseblock_register ( void )
286{
287 return hregARM64_X21();
288}
289
290/* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in
291 a new register, and return the new register. */
292static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src )
293{
294 HReg dst = newVRegI(env);
295 ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */
296 addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
297 return dst;
298}
299
300/* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in
301 a new register, and return the new register. */
302static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src )
303{
304 HReg dst = newVRegI(env);
305 ARM64RI6* n48 = ARM64RI6_I6(48);
306 addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
307 addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR));
308 return dst;
309}
310
311/* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in
312 a new register, and return the new register. */
313static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src )
314{
315 HReg dst = newVRegI(env);
316 ARM64RI6* n48 = ARM64RI6_I6(48);
317 addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
318 addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SHR));
319 return dst;
320}
321
322/* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in
323 a new register, and return the new register. */
324static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src )
325{
326 HReg dst = newVRegI(env);
327 ARM64RI6* n32 = ARM64RI6_I6(32);
328 addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL));
329 addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR));
330 return dst;
331}
332
333/* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in
334 a new register, and return the new register. */
335static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src )
336{
337 HReg dst = newVRegI(env);
338 ARM64RI6* n56 = ARM64RI6_I6(56);
339 addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
340 addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR));
341 return dst;
342}
343
344static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src )
345{
346 HReg dst = newVRegI(env);
347 ARM64RI6* n56 = ARM64RI6_I6(56);
348 addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
349 addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SHR));
350 return dst;
351}
352
353/* Is this IRExpr_Const(IRConst_U64(0)) ? */
354static Bool isZeroU64 ( IRExpr* e ) {
355 if (e->tag != Iex_Const) return False;
356 IRConst* con = e->Iex.Const.con;
357 vassert(con->tag == Ico_U64);
358 return con->Ico.U64 == 0;
359}
360
361
362/*---------------------------------------------------------*/
363/*--- ISEL: FP rounding mode helpers ---*/
364/*---------------------------------------------------------*/
365
366/* Set the FP rounding mode: 'mode' is an I32-typed expression
367 denoting a value in the range 0 .. 3, indicating a round mode
368 encoded as per type IRRoundingMode -- the first four values only
369 (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO). Set the PPC
370 FSCR to have the same rounding.
371
372 For speed & simplicity, we're setting the *entire* FPCR here.
373
374 Setting the rounding mode is expensive. So this function tries to
375 avoid repeatedly setting the rounding mode to the same thing by
376 first comparing 'mode' to the 'mode' tree supplied in the previous
377 call to this function, if any. (The previous value is stored in
378 env->previous_rm.) If 'mode' is a single IR temporary 't' and
379 env->previous_rm is also just 't', then the setting is skipped.
380
381 This is safe because of the SSA property of IR: an IR temporary can
382 only be defined once and so will have the same value regardless of
383 where it appears in the block. Cool stuff, SSA.
384
385 A safety condition: all attempts to set the RM must be aware of
386 this mechanism - by being routed through the functions here.
387
388 Of course this only helps if blocks where the RM is set more than
389 once and it is set to the same value each time, *and* that value is
390 held in the same IR temporary each time. In order to assure the
391 latter as much as possible, the IR optimiser takes care to do CSE
392 on any block with any sign of floating point activity.
393*/
394static
395void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode )
396{
397 vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
398
399 /* Do we need to do anything? */
400 if (env->previous_rm
401 && env->previous_rm->tag == Iex_RdTmp
402 && mode->tag == Iex_RdTmp
403 && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
404 /* no - setting it to what it was before. */
405 vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
406 return;
407 }
408
409 /* No luck - we better set it, and remember what we set it to. */
410 env->previous_rm = mode;
411
412 /* Only supporting the rounding-mode bits - the rest of FPCR is set
413 to zero - so we can set the whole register at once (faster). */
414
415 /* This isn't simple, because 'mode' carries an IR rounding
416 encoding, and we need to translate that to an ARM64 FP one:
417 The IR encoding:
418 00 to nearest (the default)
419 10 to +infinity
420 01 to -infinity
421 11 to zero
422 The ARM64 FP encoding:
423 00 to nearest
424 01 to +infinity
425 10 to -infinity
426 11 to zero
427 Easy enough to do; just swap the two bits.
428 */
429 HReg irrm = iselIntExpr_R(env, mode);
430 HReg tL = newVRegI(env);
431 HReg tR = newVRegI(env);
432 HReg t3 = newVRegI(env);
433 /* tL = irrm << 1;
434 tR = irrm >> 1; if we're lucky, these will issue together
435 tL &= 2;
436 tR &= 1; ditto
437 t3 = tL | tR;
438 t3 <<= 22;
439 fmxr fpscr, t3
440 */
441 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
442 ARM64RIL* ril_two = mb_mkARM64RIL_I(2);
443 vassert(ril_one && ril_two);
444 addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL));
445 addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR));
446 addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND));
447 addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND));
448 addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR));
449 addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL));
450 addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3));
451}
452
453
454/*---------------------------------------------------------*/
455/*--- ISEL: Function call helpers ---*/
456/*---------------------------------------------------------*/
457
458/* Used only in doHelperCall. See big comment in doHelperCall re
459 handling of register-parameter args. This function figures out
460 whether evaluation of an expression might require use of a fixed
461 register. If in doubt return True (safe but suboptimal).
462*/
463static
464Bool mightRequireFixedRegs ( IRExpr* e )
465{
466 if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) {
467 // These are always "safe" -- either a copy of SP in some
468 // arbitrary vreg, or a copy of x21, respectively.
469 return False;
470 }
471 /* Else it's a "normal" expression. */
472 switch (e->tag) {
473 case Iex_RdTmp: case Iex_Const: case Iex_Get:
474 return False;
475 default:
476 return True;
477 }
478}
479
480
481/* Do a complete function call. |guard| is a Ity_Bit expression
482 indicating whether or not the call happens. If guard==NULL, the
483 call is unconditional. |retloc| is set to indicate where the
484 return value is after the call. The caller (of this fn) must
485 generate code to add |stackAdjustAfterCall| to the stack pointer
486 after the call is done. Returns True iff it managed to handle this
487 combination of arg/return types, else returns False. */
488
489static
490Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
491 /*OUT*/RetLoc* retloc,
492 ISelEnv* env,
493 IRExpr* guard,
494 IRCallee* cee, IRType retTy, IRExpr** args )
495{
496 ARM64CondCode cc;
497 HReg argregs[ARM64_N_ARGREGS];
498 HReg tmpregs[ARM64_N_ARGREGS];
499 Bool go_fast;
500 Int n_args, i, nextArgReg;
501 ULong target;
502
503 vassert(ARM64_N_ARGREGS == 8);
504
505 /* Set default returns. We'll update them later if needed. */
506 *stackAdjustAfterCall = 0;
507 *retloc = mk_RetLoc_INVALID();
508
509 /* These are used for cross-checking that IR-level constraints on
510 the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */
511 UInt nVECRETs = 0;
512 UInt nBBPTRs = 0;
513
514 /* Marshal args for a call and do the call.
515
516 This function only deals with a tiny set of possibilities, which
517 cover all helpers in practice. The restrictions are that only
518 arguments in registers are supported, hence only
519 ARM64_N_REGPARMS x 64 integer bits in total can be passed. In
520 fact the only supported arg type is I64.
521
522 The return type can be I{64,32} or V128. In the V128 case, it
523 is expected that |args| will contain the special node
524 IRExpr_VECRET(), in which case this routine generates code to
525 allocate space on the stack for the vector return value. Since
526 we are not passing any scalars on the stack, it is enough to
527 preallocate the return space before marshalling any arguments,
528 in this case.
529
530 |args| may also contain IRExpr_BBPTR(), in which case the
531 value in x21 is passed as the corresponding argument.
532
533 Generating code which is both efficient and correct when
534 parameters are to be passed in registers is difficult, for the
535 reasons elaborated in detail in comments attached to
536 doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
537 of the method described in those comments.
538
539 The problem is split into two cases: the fast scheme and the
540 slow scheme. In the fast scheme, arguments are computed
541 directly into the target (real) registers. This is only safe
542 when we can be sure that computation of each argument will not
543 trash any real registers set by computation of any other
544 argument.
545
546 In the slow scheme, all args are first computed into vregs, and
547 once they are all done, they are moved to the relevant real
548 regs. This always gives correct code, but it also gives a bunch
549 of vreg-to-rreg moves which are usually redundant but are hard
550 for the register allocator to get rid of.
551
552 To decide which scheme to use, all argument expressions are
553 first examined. If they are all so simple that it is clear they
554 will be evaluated without use of any fixed registers, use the
555 fast scheme, else use the slow scheme. Note also that only
556 unconditional calls may use the fast scheme, since having to
557 compute a condition expression could itself trash real
558 registers.
559
560 Note this requires being able to examine an expression and
561 determine whether or not evaluation of it might use a fixed
562 register. That requires knowledge of how the rest of this insn
563 selector works. Currently just the following 3 are regarded as
564 safe -- hopefully they cover the majority of arguments in
565 practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
566 */
567
568 /* Note that the cee->regparms field is meaningless on ARM64 hosts
569 (since there is only one calling convention) and so we always
570 ignore it. */
571
572 n_args = 0;
573 for (i = 0; args[i]; i++) {
574 IRExpr* arg = args[i];
575 if (UNLIKELY(arg->tag == Iex_VECRET)) {
576 nVECRETs++;
577 } else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
578 nBBPTRs++;
579 }
580 n_args++;
581 }
582
583 /* If this fails, the IR is ill-formed */
584 vassert(nBBPTRs == 0 || nBBPTRs == 1);
585
586 /* If we have a VECRET, allocate space on the stack for the return
587 value, and record the stack pointer after that. */
588 HReg r_vecRetAddr = INVALID_HREG;
589 if (nVECRETs == 1) {
590 vassert(retTy == Ity_V128 || retTy == Ity_V256);
591 vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
592 r_vecRetAddr = newVRegI(env);
593 addInstr(env, ARM64Instr_AddToSP(-16));
594 addInstr(env, ARM64Instr_FromSP(r_vecRetAddr));
595 } else {
596 // If either of these fail, the IR is ill-formed
597 vassert(retTy != Ity_V128 && retTy != Ity_V256);
598 vassert(nVECRETs == 0);
599 }
600
601 argregs[0] = hregARM64_X0();
602 argregs[1] = hregARM64_X1();
603 argregs[2] = hregARM64_X2();
604 argregs[3] = hregARM64_X3();
605 argregs[4] = hregARM64_X4();
606 argregs[5] = hregARM64_X5();
607 argregs[6] = hregARM64_X6();
608 argregs[7] = hregARM64_X7();
609
610 tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG;
611 tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG;
612
613 /* First decide which scheme (slow or fast) is to be used. First
614 assume the fast scheme, and select slow if any contraindications
615 (wow) appear. */
616
617 go_fast = True;
618
619 if (guard) {
620 if (guard->tag == Iex_Const
621 && guard->Iex.Const.con->tag == Ico_U1
622 && guard->Iex.Const.con->Ico.U1 == True) {
623 /* unconditional */
624 } else {
625 /* Not manifestly unconditional -- be conservative. */
626 go_fast = False;
627 }
628 }
629
630 if (go_fast) {
631 for (i = 0; i < n_args; i++) {
632 if (mightRequireFixedRegs(args[i])) {
633 go_fast = False;
634 break;
635 }
636 }
637 }
638
639 if (go_fast) {
640 if (retTy == Ity_V128 || retTy == Ity_V256)
641 go_fast = False;
642 }
643
644 /* At this point the scheme to use has been established. Generate
645 code to get the arg values into the argument rregs. If we run
646 out of arg regs, give up. */
647
648 if (go_fast) {
649
650 /* FAST SCHEME */
651 nextArgReg = 0;
652
653 for (i = 0; i < n_args; i++) {
654 IRExpr* arg = args[i];
655
656 IRType aTy = Ity_INVALID;
657 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
658 aTy = typeOfIRExpr(env->type_env, args[i]);
659
660 if (nextArgReg >= ARM64_N_ARGREGS)
661 return False; /* out of argregs */
662
663 if (aTy == Ity_I64) {
664 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
665 iselIntExpr_R(env, args[i]) ));
666 nextArgReg++;
667 }
668 else if (arg->tag == Iex_BBPTR) {
669 vassert(0); //ATC
670 addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
671 hregARM64_X21() ));
672 nextArgReg++;
673 }
674 else if (arg->tag == Iex_VECRET) {
675 // because of the go_fast logic above, we can't get here,
676 // since vector return values makes us use the slow path
677 // instead.
678 vassert(0);
679 }
680 else
681 return False; /* unhandled arg type */
682 }
683
684 /* Fast scheme only applies for unconditional calls. Hence: */
685 cc = ARM64cc_AL;
686
687 } else {
688
689 /* SLOW SCHEME; move via temporaries */
690 nextArgReg = 0;
691
692 for (i = 0; i < n_args; i++) {
693 IRExpr* arg = args[i];
694
695 IRType aTy = Ity_INVALID;
696 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
697 aTy = typeOfIRExpr(env->type_env, args[i]);
698
699 if (nextArgReg >= ARM64_N_ARGREGS)
700 return False; /* out of argregs */
701
702 if (aTy == Ity_I64) {
703 tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
704 nextArgReg++;
705 }
706 else if (arg->tag == Iex_BBPTR) {
707 vassert(0); //ATC
708 tmpregs[nextArgReg] = hregARM64_X21();
709 nextArgReg++;
710 }
711 else if (arg->tag == Iex_VECRET) {
712 vassert(!hregIsInvalid(r_vecRetAddr));
713 tmpregs[nextArgReg] = r_vecRetAddr;
714 nextArgReg++;
715 }
716 else
717 return False; /* unhandled arg type */
718 }
719
720 /* Now we can compute the condition. We can't do it earlier
721 because the argument computations could trash the condition
722 codes. Be a bit clever to handle the common case where the
723 guard is 1:Bit. */
724 cc = ARM64cc_AL;
725 if (guard) {
726 if (guard->tag == Iex_Const
727 && guard->Iex.Const.con->tag == Ico_U1
728 && guard->Iex.Const.con->Ico.U1 == True) {
729 /* unconditional -- do nothing */
730 } else {
731 cc = iselCondCode( env, guard );
732 }
733 }
734
735 /* Move the args to their final destinations. */
736 for (i = 0; i < nextArgReg; i++) {
737 vassert(!(hregIsInvalid(tmpregs[i])));
738 /* None of these insns, including any spill code that might
739 be generated, may alter the condition codes. */
740 addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) );
741 }
742
743 }
744
745 /* Should be assured by checks above */
746 vassert(nextArgReg <= ARM64_N_ARGREGS);
747
748 /* Do final checks, set the return values, and generate the call
749 instruction proper. */
750 vassert(nBBPTRs == 0 || nBBPTRs == 1);
751 vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0);
752 vassert(*stackAdjustAfterCall == 0);
753 vassert(is_RetLoc_INVALID(*retloc));
754 switch (retTy) {
755 case Ity_INVALID:
756 /* Function doesn't return a value. */
757 *retloc = mk_RetLoc_simple(RLPri_None);
758 break;
759 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
760 *retloc = mk_RetLoc_simple(RLPri_Int);
761 break;
762 case Ity_V128:
763 *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
764 *stackAdjustAfterCall = 16;
765 break;
766 case Ity_V256:
767 vassert(0); // ATC
768 *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
769 *stackAdjustAfterCall = 32;
770 break;
771 default:
772 /* IR can denote other possible return types, but we don't
773 handle those here. */
774 vassert(0);
775 }
776
777 /* Finally, generate the call itself. This needs the *retloc value
778 set in the switch above, which is why it's at the end. */
779
780 /* nextArgReg doles out argument registers. Since these are
781 assigned in the order x0 .. x7, its numeric value at this point,
782 which must be between 0 and 8 inclusive, is going to be equal to
783 the number of arg regs in use for the call. Hence bake that
784 number into the call (we'll need to know it when doing register
785 allocation, to know what regs the call reads.) */
786
787 target = (HWord)Ptr_to_ULong(cee->addr);
788 addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc ));
789
790 return True; /* success */
791}
792
793
794/*---------------------------------------------------------*/
795/*--- ISEL: Integer expressions (64/32 bit) ---*/
796/*---------------------------------------------------------*/
797
798/* Select insns for an integer-typed expression, and add them to the
799 code list. Return a reg holding the result. This reg will be a
800 virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
801 want to modify it, ask for a new vreg, copy it in there, and modify
802 the copy. The register allocator will do its best to map both
803 vregs to the same real register, so the copies will often disappear
804 later in the game.
805
806 This should handle expressions of 64- and 32-bit type. All results
807 are returned in a 64-bit register. For 32-bit expressions, the
808 upper 32 bits are arbitrary, so you should mask or sign extend
809 partial values if necessary.
810*/
811
812/* --------------------- AMode --------------------- */
813
814/* Return an AMode which computes the value of the specified
815 expression, possibly also adding insns to the code list as a
816 result. The expression may only be a 64-bit one.
817*/
818
819static Bool isValidScale ( UChar scale )
820{
821 switch (scale) {
822 case 1: case 2: case 4: case 8: /* case 16: ??*/ return True;
823 default: return False;
824 }
825}
826
827static Bool sane_AMode ( ARM64AMode* am )
828{
829 switch (am->tag) {
830 case ARM64am_RI9:
831 return
832 toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64
833 && (hregIsVirtual(am->ARM64am.RI9.reg)
834 /* || sameHReg(am->ARM64am.RI9.reg,
835 hregARM64_X21()) */ )
836 && am->ARM64am.RI9.simm9 >= -256
837 && am->ARM64am.RI9.simm9 <= 255 );
838 case ARM64am_RI12:
839 return
840 toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64
841 && (hregIsVirtual(am->ARM64am.RI12.reg)
842 /* || sameHReg(am->ARM64am.RI12.reg,
843 hregARM64_X21()) */ )
844 && am->ARM64am.RI12.uimm12 < 4096
845 && isValidScale(am->ARM64am.RI12.szB) );
846 case ARM64am_RR:
847 return
848 toBool( hregClass(am->ARM64am.RR.base) == HRcInt64
849 && hregIsVirtual(am->ARM64am.RR.base)
850 && hregClass(am->ARM64am.RR.index) == HRcInt64
851 && hregIsVirtual(am->ARM64am.RR.index) );
852 default:
853 vpanic("sane_AMode: unknown ARM64 AMode1 tag");
854 }
855}
856
857static
858ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty )
859{
860 ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty);
861 vassert(sane_AMode(am));
862 return am;
863}
864
865static
866ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty )
867{
868 IRType ty = typeOfIRExpr(env->type_env,e);
869 vassert(ty == Ity_I64);
870
871 ULong szBbits = 0;
872 switch (dty) {
873 case Ity_I64: szBbits = 3; break;
874 case Ity_I32: szBbits = 2; break;
875 case Ity_I16: szBbits = 1; break;
876 case Ity_I8: szBbits = 0; break;
877 default: vassert(0);
878 }
879
880 /* {Add64,Sub64}(expr,simm9). We don't care about |dty| here since
881 we're going to create an amode suitable for LDU* or STU*
882 instructions, which use unscaled immediate offsets. */
883 if (e->tag == Iex_Binop
884 && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64)
885 && e->Iex.Binop.arg2->tag == Iex_Const
886 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
887 Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
sewardjf21a6ca2014-03-08 13:08:17 +0000888 if (simm >= -255 && simm <= 255) {
889 /* Although the gating condition might seem to be
890 simm >= -256 && simm <= 255
891 we will need to negate simm in the case where the op is Sub64.
892 Hence limit the lower value to -255 in order that its negation
893 is representable. */
sewardjbbcf1882014-01-12 12:49:10 +0000894 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
sewardjf21a6ca2014-03-08 13:08:17 +0000895 if (e->Iex.Binop.op == Iop_Sub64) simm = -simm;
sewardjbbcf1882014-01-12 12:49:10 +0000896 return ARM64AMode_RI9(reg, (Int)simm);
897 }
898 }
899
900 /* Add64(expr, uimm12 * transfer-size) */
901 if (e->tag == Iex_Binop
902 && e->Iex.Binop.op == Iop_Add64
903 && e->Iex.Binop.arg2->tag == Iex_Const
904 && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
905 ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
906 ULong szB = 1 << szBbits;
907 if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */
908 && (uimm >> szBbits) < 4096) {
909 HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
910 return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB);
911 }
912 }
913
914 /* Add64(expr1, expr2) */
915 if (e->tag == Iex_Binop
916 && e->Iex.Binop.op == Iop_Add64) {
917 HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
918 HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
919 return ARM64AMode_RR(reg1, reg2);
920 }
921
922 /* Doesn't match anything in particular. Generate it into
923 a register and use that. */
924 HReg reg = iselIntExpr_R(env, e);
925 return ARM64AMode_RI9(reg, 0);
926}
927
sewardjbbcf1882014-01-12 12:49:10 +0000928
929/* --------------------- RIA --------------------- */
930
931/* Select instructions to generate 'e' into a RIA. */
932
933static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e )
934{
935 ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e);
936 /* sanity checks ... */
937 switch (ri->tag) {
938 case ARM64riA_I12:
939 vassert(ri->ARM64riA.I12.imm12 < 4096);
940 vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12);
941 return ri;
942 case ARM64riA_R:
943 vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64);
944 vassert(hregIsVirtual(ri->ARM64riA.R.reg));
945 return ri;
946 default:
947 vpanic("iselIntExpr_RIA: unknown arm RIA tag");
948 }
949}
950
951/* DO NOT CALL THIS DIRECTLY ! */
952static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e )
953{
954 IRType ty = typeOfIRExpr(env->type_env,e);
955 vassert(ty == Ity_I64 || ty == Ity_I32);
956
957 /* special case: immediate */
958 if (e->tag == Iex_Const) {
959 ULong u = 0xF000000ULL; /* invalid */
960 switch (e->Iex.Const.con->tag) {
961 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
962 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
963 default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)");
964 }
965 if (0 == (u & ~(0xFFFULL << 0)))
966 return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0);
967 if (0 == (u & ~(0xFFFULL << 12)))
968 return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12);
969 /* else fail, fall through to default case */
970 }
971
972 /* default case: calculate into a register and return that */
973 {
974 HReg r = iselIntExpr_R ( env, e );
975 return ARM64RIA_R(r);
976 }
977}
978
979
980/* --------------------- RIL --------------------- */
981
982/* Select instructions to generate 'e' into a RIL. At this point we
983 have to deal with the strange bitfield-immediate encoding for logic
984 instructions. */
985
986
987// The following four functions
988// CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical
989// are copied, with modifications, from
990// https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc
991// which has the following copyright notice:
992/*
993 Copyright 2013, ARM Limited
994 All rights reserved.
995
996 Redistribution and use in source and binary forms, with or without
997 modification, are permitted provided that the following conditions are met:
998
999 * Redistributions of source code must retain the above copyright notice,
1000 this list of conditions and the following disclaimer.
1001 * Redistributions in binary form must reproduce the above copyright notice,
1002 this list of conditions and the following disclaimer in the documentation
1003 and/or other materials provided with the distribution.
1004 * Neither the name of ARM Limited nor the names of its contributors may be
1005 used to endorse or promote products derived from this software without
1006 specific prior written permission.
1007
1008 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
1009 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1010 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1011 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
1012 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1013 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1014 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
1015 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
1016 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1017 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1018*/
1019
1020static Int CountLeadingZeros(ULong value, Int width)
1021{
1022 vassert(width == 32 || width == 64);
1023 Int count = 0;
1024 ULong bit_test = 1ULL << (width - 1);
1025 while ((count < width) && ((bit_test & value) == 0)) {
1026 count++;
1027 bit_test >>= 1;
1028 }
1029 return count;
1030}
1031
1032static Int CountTrailingZeros(ULong value, Int width)
1033{
1034 vassert(width == 32 || width == 64);
1035 Int count = 0;
1036 while ((count < width) && (((value >> count) & 1) == 0)) {
1037 count++;
1038 }
1039 return count;
1040}
1041
1042static Int CountSetBits(ULong value, Int width)
1043{
1044 // TODO: Other widths could be added here, as the implementation already
1045 // supports them.
1046 vassert(width == 32 || width == 64);
1047
1048 // Mask out unused bits to ensure that they are not counted.
1049 value &= (0xffffffffffffffffULL >> (64-width));
1050
1051 // Add up the set bits.
1052 // The algorithm works by adding pairs of bit fields together iteratively,
1053 // where the size of each bit field doubles each time.
1054 // An example for an 8-bit value:
1055 // Bits: h g f e d c b a
1056 // \ | \ | \ | \ |
1057 // value = h+g f+e d+c b+a
1058 // \ | \ |
1059 // value = h+g+f+e d+c+b+a
1060 // \ |
1061 // value = h+g+f+e+d+c+b+a
sewardjaeeb31d2014-01-12 18:23:45 +00001062 value = ((value >> 1) & 0x5555555555555555ULL)
1063 + (value & 0x5555555555555555ULL);
1064 value = ((value >> 2) & 0x3333333333333333ULL)
1065 + (value & 0x3333333333333333ULL);
1066 value = ((value >> 4) & 0x0f0f0f0f0f0f0f0fULL)
1067 + (value & 0x0f0f0f0f0f0f0f0fULL);
1068 value = ((value >> 8) & 0x00ff00ff00ff00ffULL)
1069 + (value & 0x00ff00ff00ff00ffULL);
1070 value = ((value >> 16) & 0x0000ffff0000ffffULL)
1071 + (value & 0x0000ffff0000ffffULL);
1072 value = ((value >> 32) & 0x00000000ffffffffULL)
1073 + (value & 0x00000000ffffffffULL);
sewardjbbcf1882014-01-12 12:49:10 +00001074
1075 return value;
1076}
1077
1078static Bool isImmLogical ( /*OUT*/UInt* n,
1079 /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r,
1080 ULong value, UInt width )
1081{
1082 // Test if a given value can be encoded in the immediate field of a
1083 // logical instruction.
1084
1085 // If it can be encoded, the function returns true, and values
1086 // pointed to by n, imm_s and imm_r are updated with immediates
1087 // encoded in the format required by the corresponding fields in the
1088 // logical instruction. If it can not be encoded, the function
1089 // returns false, and the values pointed to by n, imm_s and imm_r
1090 // are undefined.
1091 vassert(n != NULL && imm_s != NULL && imm_r != NULL);
1092 vassert(width == 32 || width == 64);
1093
1094 // Logical immediates are encoded using parameters n, imm_s and imm_r using
1095 // the following table:
1096 //
1097 // N imms immr size S R
1098 // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
1099 // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
1100 // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
1101 // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
1102 // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
1103 // 0 11110s xxxxxr 2 UInt(s) UInt(r)
1104 // (s bits must not be all set)
1105 //
1106 // A pattern is constructed of size bits, where the least significant S+1
1107 // bits are set. The pattern is rotated right by R, and repeated across a
1108 // 32 or 64-bit value, depending on destination register width.
1109 //
1110 // To test if an arbitrary immediate can be encoded using this scheme, an
1111 // iterative algorithm is used.
1112 //
1113 // TODO: This code does not consider using X/W register overlap to support
1114 // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits
1115 // are an encodable logical immediate.
1116
1117 // 1. If the value has all set or all clear bits, it can't be encoded.
1118 if ((value == 0) || (value == 0xffffffffffffffffULL) ||
1119 ((width == 32) && (value == 0xffffffff))) {
1120 return False;
1121 }
1122
1123 UInt lead_zero = CountLeadingZeros(value, width);
1124 UInt lead_one = CountLeadingZeros(~value, width);
1125 UInt trail_zero = CountTrailingZeros(value, width);
1126 UInt trail_one = CountTrailingZeros(~value, width);
1127 UInt set_bits = CountSetBits(value, width);
1128
1129 // The fixed bits in the immediate s field.
1130 // If width == 64 (X reg), start at 0xFFFFFF80.
1131 // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
1132 // widths won't be executed.
1133 Int imm_s_fixed = (width == 64) ? -128 : -64;
1134 Int imm_s_mask = 0x3F;
1135
1136 for (;;) {
1137 // 2. If the value is two bits wide, it can be encoded.
1138 if (width == 2) {
1139 *n = 0;
1140 *imm_s = 0x3C;
1141 *imm_r = (value & 3) - 1;
1142 return True;
1143 }
1144
1145 *n = (width == 64) ? 1 : 0;
1146 *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
1147 if ((lead_zero + set_bits) == width) {
1148 *imm_r = 0;
1149 } else {
1150 *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
1151 }
1152
1153 // 3. If the sum of leading zeros, trailing zeros and set bits is equal to
1154 // the bit width of the value, it can be encoded.
1155 if (lead_zero + trail_zero + set_bits == width) {
1156 return True;
1157 }
1158
1159 // 4. If the sum of leading ones, trailing ones and unset bits in the
1160 // value is equal to the bit width of the value, it can be encoded.
1161 if (lead_one + trail_one + (width - set_bits) == width) {
1162 return True;
1163 }
1164
1165 // 5. If the most-significant half of the bitwise value is equal to the
1166 // least-significant half, return to step 2 using the least-significant
1167 // half of the value.
1168 ULong mask = (1ULL << (width >> 1)) - 1;
1169 if ((value & mask) == ((value >> (width >> 1)) & mask)) {
1170 width >>= 1;
1171 set_bits >>= 1;
1172 imm_s_fixed >>= 1;
1173 continue;
1174 }
1175
1176 // 6. Otherwise, the value can't be encoded.
1177 return False;
1178 }
1179}
1180
1181
1182/* Create a RIL for the given immediate, if it is representable, or
1183 return NULL if not. */
1184
1185static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 )
1186{
1187 UInt n = 0, imm_s = 0, imm_r = 0;
1188 Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64);
1189 if (!ok) return NULL;
1190 vassert(n < 2 && imm_s < 64 && imm_r < 64);
1191 return ARM64RIL_I13(n, imm_r, imm_s);
1192}
1193
1194/* So, finally .. */
1195
1196static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e )
1197{
1198 ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e);
1199 /* sanity checks ... */
1200 switch (ri->tag) {
1201 case ARM64riL_I13:
1202 vassert(ri->ARM64riL.I13.bitN < 2);
1203 vassert(ri->ARM64riL.I13.immR < 64);
1204 vassert(ri->ARM64riL.I13.immS < 64);
1205 return ri;
1206 case ARM64riL_R:
1207 vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64);
1208 vassert(hregIsVirtual(ri->ARM64riL.R.reg));
1209 return ri;
1210 default:
1211 vpanic("iselIntExpr_RIL: unknown arm RIL tag");
1212 }
1213}
1214
1215/* DO NOT CALL THIS DIRECTLY ! */
1216static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e )
1217{
1218 IRType ty = typeOfIRExpr(env->type_env,e);
1219 vassert(ty == Ity_I64 || ty == Ity_I32);
1220
1221 /* special case: immediate */
1222 if (e->tag == Iex_Const) {
1223 ARM64RIL* maybe = NULL;
1224 if (ty == Ity_I64) {
1225 vassert(e->Iex.Const.con->tag == Ico_U64);
1226 maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64);
1227 } else {
1228 vassert(ty == Ity_I32);
1229 vassert(e->Iex.Const.con->tag == Ico_U32);
1230 UInt u32 = e->Iex.Const.con->Ico.U32;
1231 ULong u64 = (ULong)u32;
1232 /* First try with 32 leading zeroes. */
1233 maybe = mb_mkARM64RIL_I(u64);
1234 /* If that doesn't work, try with 2 copies, since it doesn't
1235 matter what winds up in the upper 32 bits. */
1236 if (!maybe) {
1237 maybe = mb_mkARM64RIL_I((u64 << 32) | u64);
1238 }
1239 }
1240 if (maybe) return maybe;
1241 /* else fail, fall through to default case */
1242 }
1243
1244 /* default case: calculate into a register and return that */
1245 {
1246 HReg r = iselIntExpr_R ( env, e );
1247 return ARM64RIL_R(r);
1248 }
1249}
1250
1251
1252/* --------------------- RI6 --------------------- */
1253
1254/* Select instructions to generate 'e' into a RI6. */
1255
1256static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e )
1257{
1258 ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e);
1259 /* sanity checks ... */
1260 switch (ri->tag) {
1261 case ARM64ri6_I6:
1262 vassert(ri->ARM64ri6.I6.imm6 < 64);
1263 vassert(ri->ARM64ri6.I6.imm6 > 0);
1264 return ri;
1265 case ARM64ri6_R:
1266 vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64);
1267 vassert(hregIsVirtual(ri->ARM64ri6.R.reg));
1268 return ri;
1269 default:
1270 vpanic("iselIntExpr_RI6: unknown arm RI6 tag");
1271 }
1272}
1273
1274/* DO NOT CALL THIS DIRECTLY ! */
1275static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e )
1276{
1277 IRType ty = typeOfIRExpr(env->type_env,e);
1278 vassert(ty == Ity_I64 || ty == Ity_I8);
1279
1280 /* special case: immediate */
1281 if (e->tag == Iex_Const) {
1282 switch (e->Iex.Const.con->tag) {
1283 case Ico_U8: {
1284 UInt u = e->Iex.Const.con->Ico.U8;
1285 if (u > 0 && u < 64)
1286 return ARM64RI6_I6(u);
1287 break;
1288 default:
1289 break;
1290 }
1291 }
1292 /* else fail, fall through to default case */
1293 }
1294
1295 /* default case: calculate into a register and return that */
1296 {
1297 HReg r = iselIntExpr_R ( env, e );
1298 return ARM64RI6_R(r);
1299 }
1300}
1301
1302
1303/* ------------------- CondCode ------------------- */
1304
1305/* Generate code to evaluated a bit-typed expression, returning the
1306 condition code which would correspond when the expression would
1307 notionally have returned 1. */
1308
1309static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1310{
1311 ARM64CondCode cc = iselCondCode_wrk(env,e);
1312 vassert(cc != ARM64cc_NV);
1313 return cc;
1314}
1315
1316static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1317{
1318 vassert(e);
1319 vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1320
1321 /* var */
1322 if (e->tag == Iex_RdTmp) {
1323 HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1324 /* Cmp doesn't modify rTmp; so this is OK. */
1325 ARM64RIL* one = mb_mkARM64RIL_I(1);
1326 vassert(one);
1327 addInstr(env, ARM64Instr_Test(rTmp, one));
1328 return ARM64cc_NE;
1329 }
1330
1331 /* Not1(e) */
1332 if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1333 /* Generate code for the arg, and negate the test condition */
1334 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1335 if (cc == ARM64cc_AL || cc == ARM64cc_NV) {
1336 return ARM64cc_AL;
1337 } else {
1338 return 1 ^ cc;
1339 }
1340 }
1341
1342 /* --- patterns rooted at: 64to1 --- */
1343
1344 if (e->tag == Iex_Unop
1345 && e->Iex.Unop.op == Iop_64to1) {
1346 HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1347 ARM64RIL* one = mb_mkARM64RIL_I(1);
1348 vassert(one); /* '1' must be representable */
1349 addInstr(env, ARM64Instr_Test(rTmp, one));
1350 return ARM64cc_NE;
1351 }
1352
1353 /* --- patterns rooted at: CmpNEZ8 --- */
1354
1355 if (e->tag == Iex_Unop
1356 && e->Iex.Unop.op == Iop_CmpNEZ8) {
1357 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1358 ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF);
1359 addInstr(env, ARM64Instr_Test(r1, xFF));
1360 return ARM64cc_NE;
1361 }
1362
1363 /* --- patterns rooted at: CmpNEZ64 --- */
1364
1365 if (e->tag == Iex_Unop
1366 && e->Iex.Unop.op == Iop_CmpNEZ64) {
1367 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1368 ARM64RIA* zero = ARM64RIA_I12(0,0);
1369 addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/));
1370 return ARM64cc_NE;
1371 }
1372
1373 /* --- patterns rooted at: CmpNEZ32 --- */
1374
1375 if (e->tag == Iex_Unop
1376 && e->Iex.Unop.op == Iop_CmpNEZ32) {
1377 HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
1378 ARM64RIA* zero = ARM64RIA_I12(0,0);
1379 addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/));
1380 return ARM64cc_NE;
1381 }
1382
1383 /* --- Cmp*64*(x,y) --- */
1384 if (e->tag == Iex_Binop
1385 && (e->Iex.Binop.op == Iop_CmpEQ64
1386 || e->Iex.Binop.op == Iop_CmpNE64
1387 || e->Iex.Binop.op == Iop_CmpLT64S
1388 || e->Iex.Binop.op == Iop_CmpLT64U
1389 || e->Iex.Binop.op == Iop_CmpLE64S
1390 || e->Iex.Binop.op == Iop_CmpLE64U)) {
1391 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1392 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1393 addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/));
1394 switch (e->Iex.Binop.op) {
1395 case Iop_CmpEQ64: return ARM64cc_EQ;
1396 case Iop_CmpNE64: return ARM64cc_NE;
1397 case Iop_CmpLT64S: return ARM64cc_LT;
1398 case Iop_CmpLT64U: return ARM64cc_CC;
1399 case Iop_CmpLE64S: return ARM64cc_LE;
1400 case Iop_CmpLE64U: return ARM64cc_LS;
1401 default: vpanic("iselCondCode(arm64): CmpXX64");
1402 }
1403 }
1404
1405 /* --- Cmp*32*(x,y) --- */
1406 if (e->tag == Iex_Binop
1407 && (e->Iex.Binop.op == Iop_CmpEQ32
1408 || e->Iex.Binop.op == Iop_CmpNE32
1409 || e->Iex.Binop.op == Iop_CmpLT32S
1410 || e->Iex.Binop.op == Iop_CmpLT32U
1411 || e->Iex.Binop.op == Iop_CmpLE32S
1412 || e->Iex.Binop.op == Iop_CmpLE32U)) {
1413 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1414 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1415 addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/));
1416 switch (e->Iex.Binop.op) {
1417 case Iop_CmpEQ32: return ARM64cc_EQ;
1418 case Iop_CmpNE32: return ARM64cc_NE;
1419 case Iop_CmpLT32S: return ARM64cc_LT;
1420 case Iop_CmpLT32U: return ARM64cc_CC;
1421 case Iop_CmpLE32S: return ARM64cc_LE;
1422 case Iop_CmpLE32U: return ARM64cc_LS;
1423 default: vpanic("iselCondCode(arm64): CmpXX32");
1424 }
1425 }
1426
sewardjbbcf1882014-01-12 12:49:10 +00001427 ppIRExpr(e);
1428 vpanic("iselCondCode");
1429}
1430
1431
1432/* --------------------- Reg --------------------- */
1433
1434static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1435{
1436 HReg r = iselIntExpr_R_wrk(env, e);
1437 /* sanity checks ... */
1438# if 0
1439 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1440# endif
1441 vassert(hregClass(r) == HRcInt64);
1442 vassert(hregIsVirtual(r));
1443 return r;
1444}
1445
1446/* DO NOT CALL THIS DIRECTLY ! */
1447static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1448{
1449 IRType ty = typeOfIRExpr(env->type_env,e);
1450 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1451
1452 switch (e->tag) {
1453
1454 /* --------- TEMP --------- */
1455 case Iex_RdTmp: {
1456 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1457 }
1458
1459 /* --------- LOAD --------- */
1460 case Iex_Load: {
1461 HReg dst = newVRegI(env);
1462
1463 if (e->Iex.Load.end != Iend_LE)
1464 goto irreducible;
1465
1466 if (ty == Ity_I64) {
1467 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1468 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode));
1469 return dst;
1470 }
1471 if (ty == Ity_I32) {
1472 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1473 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode));
1474 return dst;
1475 }
1476 if (ty == Ity_I16) {
1477 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1478 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode));
1479 return dst;
1480 }
1481 if (ty == Ity_I8) {
1482 ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1483 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode));
1484 return dst;
1485 }
1486 break;
1487 }
1488
1489 /* --------- BINARY OP --------- */
1490 case Iex_Binop: {
1491
1492 ARM64LogicOp lop = 0; /* invalid */
1493 ARM64ShiftOp sop = 0; /* invalid */
1494
1495 /* Special-case 0-x into a Neg instruction. Not because it's
1496 particularly useful but more so as to give value flow using
1497 this instruction, so as to check its assembly correctness for
1498 implementation of Left32/Left64. */
1499 switch (e->Iex.Binop.op) {
1500 case Iop_Sub64:
1501 if (isZeroU64(e->Iex.Binop.arg1)) {
1502 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1503 HReg dst = newVRegI(env);
1504 addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG));
1505 return dst;
1506 }
1507 break;
1508 default:
1509 break;
1510 }
1511
1512 /* ADD/SUB */
1513 switch (e->Iex.Binop.op) {
1514 case Iop_Add64: case Iop_Add32:
1515 case Iop_Sub64: case Iop_Sub32: {
1516 Bool isAdd = e->Iex.Binop.op == Iop_Add64
1517 || e->Iex.Binop.op == Iop_Add32;
1518 HReg dst = newVRegI(env);
1519 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1520 ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1521 addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd));
1522 return dst;
1523 }
1524 default:
1525 break;
1526 }
1527
1528 /* AND/OR/XOR */
1529 switch (e->Iex.Binop.op) {
1530 case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop;
1531 case Iop_Or64: case Iop_Or32: lop = ARM64lo_OR; goto log_binop;
1532 case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop;
1533 log_binop: {
1534 HReg dst = newVRegI(env);
1535 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1536 ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2);
1537 addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop));
1538 return dst;
1539 }
1540 default:
1541 break;
1542 }
1543
1544 /* SHL/SHR/SAR */
1545 switch (e->Iex.Binop.op) {
1546 case Iop_Shr64: sop = ARM64sh_SHR; goto sh_binop;
1547 case Iop_Sar64: sop = ARM64sh_SAR; goto sh_binop;
1548 case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop;
1549 sh_binop: {
1550 HReg dst = newVRegI(env);
1551 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1552 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1553 addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop));
1554 return dst;
1555 }
1556 case Iop_Shr32:
1557 case Iop_Sar32: {
1558 Bool zx = e->Iex.Binop.op == Iop_Shr32;
1559 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1560 ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1561 HReg dst = zx ? widen_z_32_to_64(env, argL)
1562 : widen_s_32_to_64(env, argL);
1563 addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR));
1564 return dst;
1565 }
1566 default: break;
1567 }
1568
1569 /* MUL */
1570 if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) {
1571 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1572 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1573 HReg dst = newVRegI(env);
1574 addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN));
1575 return dst;
1576 }
1577
1578 /* MULL */
1579 if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) {
1580 Bool isS = e->Iex.Binop.op == Iop_MullS32;
1581 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1582 HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL);
1583 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1584 HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR);
1585 HReg dst = newVRegI(env);
1586 addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN));
1587 return dst;
1588 }
1589
1590 /* Handle misc other ops. */
1591
sewardj5d384252014-04-08 15:24:15 +00001592 if (e->Iex.Binop.op == Iop_Max32U) {
1593 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1594 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1595 HReg dst = newVRegI(env);
1596 addInstr(env, ARM64Instr_Cmp(argL, ARM64RIA_R(argR), False/*!is64*/));
1597 addInstr(env, ARM64Instr_CSel(dst, argL, argR, ARM64cc_CS));
1598 return dst;
1599 }
sewardjbbcf1882014-01-12 12:49:10 +00001600
1601 if (e->Iex.Binop.op == Iop_32HLto64) {
1602 HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1603 HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1604 HReg lo32 = widen_z_32_to_64(env, lo32s);
1605 HReg hi32 = newVRegI(env);
1606 addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32),
1607 ARM64sh_SHL));
1608 addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32),
1609 ARM64lo_OR));
1610 return hi32;
1611 }
1612
1613 if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32) {
1614 Bool isD = e->Iex.Binop.op == Iop_CmpF64;
1615 HReg dL = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg1);
1616 HReg dR = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg2);
1617 HReg dst = newVRegI(env);
1618 HReg imm = newVRegI(env);
1619 /* Do the compare (FCMP), which sets NZCV in PSTATE. Then
1620 create in dst, the IRCmpF64Result encoded result. */
1621 addInstr(env, (isD ? ARM64Instr_VCmpD : ARM64Instr_VCmpS)(dL, dR));
1622 addInstr(env, ARM64Instr_Imm64(dst, 0));
1623 addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ
1624 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ));
1625 addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT
1626 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI));
1627 addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT
1628 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT));
1629 addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN
1630 addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS));
1631 return dst;
1632 }
1633
1634 { /* local scope */
1635 ARM64CvtOp cvt_op = ARM64cvt_INVALID;
1636 Bool srcIsD = False;
1637 switch (e->Iex.Binop.op) {
1638 case Iop_F64toI64S:
1639 cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break;
1640 case Iop_F64toI64U:
1641 cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break;
1642 case Iop_F64toI32S:
1643 cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break;
1644 case Iop_F64toI32U:
1645 cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break;
1646 case Iop_F32toI32S:
1647 cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break;
sewardj1eaaec22014-03-07 22:52:19 +00001648 case Iop_F32toI32U:
1649 cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break;
1650 case Iop_F32toI64S:
1651 cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break;
sewardjbbcf1882014-01-12 12:49:10 +00001652 case Iop_F32toI64U:
1653 cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break;
1654 default:
1655 break;
1656 }
1657 if (cvt_op != ARM64cvt_INVALID) {
1658 /* This is all a bit dodgy, because we can't handle a
1659 non-constant (not-known-at-JIT-time) rounding mode
1660 indication. That's because there's no instruction
1661 AFAICS that does this conversion but rounds according to
1662 FPCR.RM, so we have to bake the rounding mode into the
1663 instruction right now. But that should be OK because
1664 (1) the front end attaches a literal Irrm_ value to the
1665 conversion binop, and (2) iropt will never float that
1666 off via CSE, into a literal. Hence we should always
1667 have an Irrm_ value as the first arg. */
1668 IRExpr* arg1 = e->Iex.Binop.arg1;
1669 if (arg1->tag != Iex_Const) goto irreducible;
1670 IRConst* arg1con = arg1->Iex.Const.con;
1671 vassert(arg1con->tag == Ico_U32); // else ill-typed IR
1672 UInt irrm = arg1con->Ico.U32;
1673 /* Find the ARM-encoded equivalent for |irrm|. */
1674 UInt armrm = 4; /* impossible */
1675 switch (irrm) {
1676 case Irrm_NEAREST: armrm = 0; break;
1677 case Irrm_NegINF: armrm = 2; break;
1678 case Irrm_PosINF: armrm = 1; break;
1679 case Irrm_ZERO: armrm = 3; break;
1680 default: goto irreducible;
1681 }
1682 HReg src = (srcIsD ? iselDblExpr : iselFltExpr)
1683 (env, e->Iex.Binop.arg2);
1684 HReg dst = newVRegI(env);
1685 addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm));
1686 return dst;
1687 }
1688 } /* local scope */
1689
sewardjbbcf1882014-01-12 12:49:10 +00001690 /* All cases involving host-side helper calls. */
1691 void* fn = NULL;
1692 switch (e->Iex.Binop.op) {
sewardjbbcf1882014-01-12 12:49:10 +00001693 case Iop_DivU32:
1694 fn = &h_calc_udiv32_w_arm_semantics; break;
1695 case Iop_DivS32:
1696 fn = &h_calc_sdiv32_w_arm_semantics; break;
1697 case Iop_DivU64:
1698 fn = &h_calc_udiv64_w_arm_semantics; break;
1699 case Iop_DivS64:
1700 fn = &h_calc_sdiv64_w_arm_semantics; break;
1701 default:
1702 break;
1703 }
1704
1705 if (fn) {
1706 HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1707 HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1708 HReg res = newVRegI(env);
1709 addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL));
1710 addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR));
1711 addInstr(env, ARM64Instr_Call( ARM64cc_AL, (HWord)Ptr_to_ULong(fn),
1712 2, mk_RetLoc_simple(RLPri_Int) ));
1713 addInstr(env, ARM64Instr_MovI(res, hregARM64_X0()));
1714 return res;
1715 }
1716
1717 break;
1718 }
1719
1720 /* --------- UNARY OP --------- */
1721 case Iex_Unop: {
1722
1723 switch (e->Iex.Unop.op) {
1724 case Iop_16Uto64: {
1725 /* This probably doesn't occur often enough to be worth
1726 rolling the extension into the load. */
1727 IRExpr* arg = e->Iex.Unop.arg;
1728 HReg src = iselIntExpr_R(env, arg);
1729 HReg dst = widen_z_16_to_64(env, src);
1730 return dst;
1731 }
1732 case Iop_32Uto64: {
1733 IRExpr* arg = e->Iex.Unop.arg;
1734 if (arg->tag == Iex_Load) {
1735 /* This correctly zero extends because _LdSt32 is
1736 defined to do a zero extending load. */
1737 HReg dst = newVRegI(env);
1738 ARM64AMode* am
1739 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32);
1740 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
1741 return dst;
1742 }
1743 /* else be lame and mask it */
1744 HReg src = iselIntExpr_R(env, arg);
1745 HReg dst = widen_z_32_to_64(env, src);
1746 return dst;
1747 }
1748 case Iop_8Uto32: /* Just freeload on the 8Uto64 case */
1749 case Iop_8Uto64: {
1750 IRExpr* arg = e->Iex.Unop.arg;
1751 if (arg->tag == Iex_Load) {
1752 /* This correctly zero extends because _LdSt8 is
1753 defined to do a zero extending load. */
1754 HReg dst = newVRegI(env);
1755 ARM64AMode* am
1756 = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8);
1757 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
1758 return dst;
1759 }
1760 /* else be lame and mask it */
1761 HReg src = iselIntExpr_R(env, arg);
1762 HReg dst = widen_z_8_to_64(env, src);
1763 return dst;
1764 }
1765 case Iop_128HIto64: {
1766 HReg rHi, rLo;
1767 iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1768 return rHi; /* and abandon rLo */
1769 }
1770 case Iop_8Sto32: case Iop_8Sto64: {
1771 IRExpr* arg = e->Iex.Unop.arg;
1772 HReg src = iselIntExpr_R(env, arg);
1773 HReg dst = widen_s_8_to_64(env, src);
1774 return dst;
1775 }
1776 case Iop_16Sto32: case Iop_16Sto64: {
1777 IRExpr* arg = e->Iex.Unop.arg;
1778 HReg src = iselIntExpr_R(env, arg);
1779 HReg dst = widen_s_16_to_64(env, src);
1780 return dst;
1781 }
1782 case Iop_32Sto64: {
1783 IRExpr* arg = e->Iex.Unop.arg;
1784 HReg src = iselIntExpr_R(env, arg);
1785 HReg dst = widen_s_32_to_64(env, src);
1786 return dst;
1787 }
1788 case Iop_Not32:
1789 case Iop_Not64: {
1790 HReg dst = newVRegI(env);
1791 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1792 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT));
1793 return dst;
1794 }
1795 case Iop_Clz64: {
1796 HReg dst = newVRegI(env);
1797 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1798 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ));
1799 return dst;
1800 }
1801 case Iop_Left32:
1802 case Iop_Left64: {
1803 /* Left64(src) = src | -src. Left32 can use the same
1804 implementation since in that case we don't care what
1805 the upper 32 bits become. */
1806 HReg dst = newVRegI(env);
1807 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1808 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
1809 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
1810 ARM64lo_OR));
1811 return dst;
1812 }
1813 case Iop_CmpwNEZ64: {
1814 /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1
1815 = Left64(src) >>s 63 */
1816 HReg dst = newVRegI(env);
1817 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1818 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
1819 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
1820 ARM64lo_OR));
1821 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1822 ARM64sh_SAR));
1823 return dst;
1824 }
1825 case Iop_CmpwNEZ32: {
1826 /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF)
1827 = Left64(src & 0xFFFFFFFF) >>s 63 */
1828 HReg dst = newVRegI(env);
1829 HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
1830 HReg src = widen_z_32_to_64(env, pre);
1831 addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
1832 addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
1833 ARM64lo_OR));
1834 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1835 ARM64sh_SAR));
1836 return dst;
1837 }
1838 case Iop_V128to64: case Iop_V128HIto64: {
1839 HReg dst = newVRegI(env);
1840 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
1841 UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0;
1842 addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo));
1843 return dst;
1844 }
sewardj85fbb022014-06-12 13:16:01 +00001845 case Iop_ReinterpF64asI64: {
1846 HReg dst = newVRegI(env);
1847 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1848 addInstr(env, ARM64Instr_VXfromDorS(dst, src, True/*fromD*/));
1849 return dst;
1850 }
1851 case Iop_ReinterpF32asI32: {
1852 HReg dst = newVRegI(env);
1853 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1854 addInstr(env, ARM64Instr_VXfromDorS(dst, src, False/*!fromD*/));
1855 return dst;
1856 }
sewardjbbcf1882014-01-12 12:49:10 +00001857 case Iop_1Sto32:
1858 case Iop_1Sto64: {
1859 /* As with the iselStmt case for 'tmp:I1 = expr', we could
1860 do a lot better here if it ever became necessary. */
1861 HReg zero = newVRegI(env);
1862 HReg one = newVRegI(env);
1863 HReg dst = newVRegI(env);
1864 addInstr(env, ARM64Instr_Imm64(zero, 0));
1865 addInstr(env, ARM64Instr_Imm64(one, 1));
1866 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1867 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
1868 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1869 ARM64sh_SHL));
1870 addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1871 ARM64sh_SAR));
1872 return dst;
1873 }
sewardj1eaaec22014-03-07 22:52:19 +00001874 case Iop_NarrowUn16to8x8:
sewardj606c4ba2014-01-26 19:11:14 +00001875 case Iop_NarrowUn32to16x4:
sewardjecedd982014-08-11 14:02:47 +00001876 case Iop_NarrowUn64to32x2:
1877 case Iop_QNarrowUn16Sto8Sx8:
1878 case Iop_QNarrowUn32Sto16Sx4:
1879 case Iop_QNarrowUn64Sto32Sx2:
1880 case Iop_QNarrowUn16Uto8Ux8:
1881 case Iop_QNarrowUn32Uto16Ux4:
1882 case Iop_QNarrowUn64Uto32Ux2:
1883 case Iop_QNarrowUn16Sto8Ux8:
1884 case Iop_QNarrowUn32Sto16Ux4:
1885 case Iop_QNarrowUn64Sto32Ux2:
1886 {
sewardj606c4ba2014-01-26 19:11:14 +00001887 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
1888 HReg tmp = newVRegV(env);
1889 HReg dst = newVRegI(env);
1890 UInt dszBlg2 = 3; /* illegal */
sewardjecedd982014-08-11 14:02:47 +00001891 ARM64VecNarrowOp op = ARM64vecna_INVALID;
sewardj606c4ba2014-01-26 19:11:14 +00001892 switch (e->Iex.Unop.op) {
sewardjecedd982014-08-11 14:02:47 +00001893 case Iop_NarrowUn16to8x8:
1894 dszBlg2 = 0; op = ARM64vecna_XTN; break;
1895 case Iop_NarrowUn32to16x4:
1896 dszBlg2 = 1; op = ARM64vecna_XTN; break;
1897 case Iop_NarrowUn64to32x2:
1898 dszBlg2 = 2; op = ARM64vecna_XTN; break;
1899 case Iop_QNarrowUn16Sto8Sx8:
1900 dszBlg2 = 0; op = ARM64vecna_SQXTN; break;
1901 case Iop_QNarrowUn32Sto16Sx4:
1902 dszBlg2 = 1; op = ARM64vecna_SQXTN; break;
1903 case Iop_QNarrowUn64Sto32Sx2:
1904 dszBlg2 = 2; op = ARM64vecna_SQXTN; break;
1905 case Iop_QNarrowUn16Uto8Ux8:
1906 dszBlg2 = 0; op = ARM64vecna_UQXTN; break;
1907 case Iop_QNarrowUn32Uto16Ux4:
1908 dszBlg2 = 1; op = ARM64vecna_UQXTN; break;
1909 case Iop_QNarrowUn64Uto32Ux2:
1910 dszBlg2 = 2; op = ARM64vecna_UQXTN; break;
1911 case Iop_QNarrowUn16Sto8Ux8:
1912 dszBlg2 = 0; op = ARM64vecna_SQXTUN; break;
1913 case Iop_QNarrowUn32Sto16Ux4:
1914 dszBlg2 = 1; op = ARM64vecna_SQXTUN; break;
1915 case Iop_QNarrowUn64Sto32Ux2:
1916 dszBlg2 = 2; op = ARM64vecna_SQXTUN; break;
1917 default:
1918 vassert(0);
sewardj606c4ba2014-01-26 19:11:14 +00001919 }
sewardjecedd982014-08-11 14:02:47 +00001920 addInstr(env, ARM64Instr_VNarrowV(op, dszBlg2, tmp, src));
sewardj606c4ba2014-01-26 19:11:14 +00001921 addInstr(env, ARM64Instr_VXfromQ(dst, tmp, 0/*laneNo*/));
1922 return dst;
1923 }
sewardjf21a6ca2014-03-08 13:08:17 +00001924 case Iop_1Uto64: {
sewardj7d009132014-02-20 17:43:38 +00001925 /* 1Uto64(tmp). */
sewardjf21a6ca2014-03-08 13:08:17 +00001926 HReg dst = newVRegI(env);
sewardj7d009132014-02-20 17:43:38 +00001927 if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
1928 ARM64RIL* one = mb_mkARM64RIL_I(1);
1929 HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
sewardj7d009132014-02-20 17:43:38 +00001930 vassert(one);
1931 addInstr(env, ARM64Instr_Logic(dst, src, one, ARM64lo_AND));
sewardjf21a6ca2014-03-08 13:08:17 +00001932 } else {
1933 /* CLONE-01 */
1934 HReg zero = newVRegI(env);
1935 HReg one = newVRegI(env);
1936 addInstr(env, ARM64Instr_Imm64(zero, 0));
1937 addInstr(env, ARM64Instr_Imm64(one, 1));
1938 ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1939 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
sewardj7d009132014-02-20 17:43:38 +00001940 }
sewardjf21a6ca2014-03-08 13:08:17 +00001941 return dst;
1942 }
sewardjbbcf1882014-01-12 12:49:10 +00001943 case Iop_64to32:
1944 case Iop_64to16:
1945 case Iop_64to8:
1946 /* These are no-ops. */
1947 return iselIntExpr_R(env, e->Iex.Unop.arg);
1948
1949 default:
1950 break;
1951 }
1952
sewardjbbcf1882014-01-12 12:49:10 +00001953 break;
1954 }
1955
1956 /* --------- GET --------- */
1957 case Iex_Get: {
1958 if (ty == Ity_I64
sewardj32d86752014-03-02 12:47:18 +00001959 && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < (8<<12)-8) {
sewardjbbcf1882014-01-12 12:49:10 +00001960 HReg dst = newVRegI(env);
1961 ARM64AMode* am
1962 = mk_baseblock_64bit_access_amode(e->Iex.Get.offset);
1963 addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am));
1964 return dst;
1965 }
1966 if (ty == Ity_I32
sewardj32d86752014-03-02 12:47:18 +00001967 && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < (4<<12)-4) {
sewardjbbcf1882014-01-12 12:49:10 +00001968 HReg dst = newVRegI(env);
1969 ARM64AMode* am
1970 = mk_baseblock_32bit_access_amode(e->Iex.Get.offset);
1971 addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
1972 return dst;
1973 }
sewardj32d86752014-03-02 12:47:18 +00001974 if (ty == Ity_I16
1975 && 0 == (e->Iex.Get.offset & 1) && e->Iex.Get.offset < (2<<12)-2) {
1976 HReg dst = newVRegI(env);
1977 ARM64AMode* am
1978 = mk_baseblock_16bit_access_amode(e->Iex.Get.offset);
1979 addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, am));
1980 return dst;
1981 }
1982 if (ty == Ity_I8
1983 /* && no alignment check */ && e->Iex.Get.offset < (1<<12)-1) {
1984 HReg dst = newVRegI(env);
1985 ARM64AMode* am
1986 = mk_baseblock_8bit_access_amode(e->Iex.Get.offset);
1987 addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
1988 return dst;
1989 }
sewardjbbcf1882014-01-12 12:49:10 +00001990 break;
1991 }
1992
1993 /* --------- CCALL --------- */
1994 case Iex_CCall: {
1995 HReg dst = newVRegI(env);
1996 vassert(ty == e->Iex.CCall.retty);
1997
1998 /* be very restrictive for now. Only 64-bit ints allowed for
1999 args, and 64 bits for return type. Don't forget to change
2000 the RetLoc if more types are allowed in future. */
2001 if (e->Iex.CCall.retty != Ity_I64)
2002 goto irreducible;
2003
2004 /* Marshal args, do the call, clear stack. */
2005 UInt addToSp = 0;
2006 RetLoc rloc = mk_RetLoc_INVALID();
2007 Bool ok = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2008 e->Iex.CCall.cee, e->Iex.CCall.retty,
2009 e->Iex.CCall.args );
2010 /* */
2011 if (ok) {
2012 vassert(is_sane_RetLoc(rloc));
2013 vassert(rloc.pri == RLPri_Int);
2014 vassert(addToSp == 0);
2015 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()));
2016 return dst;
2017 }
2018 /* else fall through; will hit the irreducible: label */
2019 }
2020
2021 /* --------- LITERAL --------- */
2022 /* 64-bit literals */
2023 case Iex_Const: {
2024 ULong u = 0;
2025 HReg dst = newVRegI(env);
2026 switch (e->Iex.Const.con->tag) {
2027 case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
2028 case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
2029 case Ico_U16: u = e->Iex.Const.con->Ico.U16; break;
2030 case Ico_U8: u = e->Iex.Const.con->Ico.U8; break;
2031 default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)");
2032 }
2033 addInstr(env, ARM64Instr_Imm64(dst, u));
2034 return dst;
2035 }
2036
2037 /* --------- MULTIPLEX --------- */
2038 case Iex_ITE: {
2039 /* ITE(ccexpr, iftrue, iffalse) */
2040 if (ty == Ity_I64 || ty == Ity_I32) {
2041 ARM64CondCode cc;
2042 HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
2043 HReg r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse);
2044 HReg dst = newVRegI(env);
2045 cc = iselCondCode(env, e->Iex.ITE.cond);
2046 addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc));
2047 return dst;
2048 }
2049 break;
2050 }
2051
2052 default:
2053 break;
2054 } /* switch (e->tag) */
2055
2056 /* We get here if no pattern matched. */
2057 irreducible:
2058 ppIRExpr(e);
2059 vpanic("iselIntExpr_R: cannot reduce tree");
2060}
2061
2062
2063/*---------------------------------------------------------*/
2064/*--- ISEL: Integer expressions (128 bit) ---*/
2065/*---------------------------------------------------------*/
2066
2067/* Compute a 128-bit value into a register pair, which is returned as
2068 the first two parameters. As with iselIntExpr_R, these may be
2069 either real or virtual regs; in any case they must not be changed
2070 by subsequent code emitted by the caller. */
2071
2072static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2073 ISelEnv* env, IRExpr* e )
2074{
2075 iselInt128Expr_wrk(rHi, rLo, env, e);
2076# if 0
2077 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2078# endif
2079 vassert(hregClass(*rHi) == HRcInt64);
2080 vassert(hregIsVirtual(*rHi));
2081 vassert(hregClass(*rLo) == HRcInt64);
2082 vassert(hregIsVirtual(*rLo));
2083}
2084
2085/* DO NOT CALL THIS DIRECTLY ! */
2086static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2087 ISelEnv* env, IRExpr* e )
2088{
2089 vassert(e);
2090 vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2091
2092 /* --------- BINARY ops --------- */
2093 if (e->tag == Iex_Binop) {
2094 switch (e->Iex.Binop.op) {
2095 /* 64 x 64 -> 128 multiply */
2096 case Iop_MullU64:
sewardj7fce7cc2014-05-07 09:41:40 +00002097 case Iop_MullS64: {
sewardjbbcf1882014-01-12 12:49:10 +00002098 Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
2099 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2100 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2101 HReg dstLo = newVRegI(env);
2102 HReg dstHi = newVRegI(env);
2103 addInstr(env, ARM64Instr_Mul(dstLo, argL, argR,
2104 ARM64mul_PLAIN));
2105 addInstr(env, ARM64Instr_Mul(dstHi, argL, argR,
2106 syned ? ARM64mul_SX : ARM64mul_ZX));
2107 *rHi = dstHi;
2108 *rLo = dstLo;
2109 return;
2110 }
2111 /* 64HLto128(e1,e2) */
2112 case Iop_64HLto128:
2113 *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2114 *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2115 return;
2116 default:
2117 break;
2118 }
2119 } /* if (e->tag == Iex_Binop) */
2120
2121 ppIRExpr(e);
2122 vpanic("iselInt128Expr(arm64)");
2123}
2124
2125
sewardjbbcf1882014-01-12 12:49:10 +00002126/*---------------------------------------------------------*/
sewardj12972182014-08-04 08:09:47 +00002127/*--- ISEL: Vector expressions (128 bit) ---*/
sewardjbbcf1882014-01-12 12:49:10 +00002128/*---------------------------------------------------------*/
2129
2130static HReg iselV128Expr ( ISelEnv* env, IRExpr* e )
2131{
2132 HReg r = iselV128Expr_wrk( env, e );
2133 vassert(hregClass(r) == HRcVec128);
2134 vassert(hregIsVirtual(r));
2135 return r;
2136}
2137
2138/* DO NOT CALL THIS DIRECTLY */
2139static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
2140{
2141 IRType ty = typeOfIRExpr(env->type_env, e);
2142 vassert(e);
2143 vassert(ty == Ity_V128);
2144
2145 if (e->tag == Iex_RdTmp) {
2146 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2147 }
2148
2149 if (e->tag == Iex_Const) {
2150 /* Only a very limited range of constants is handled. */
2151 vassert(e->Iex.Const.con->tag == Ico_V128);
2152 UShort con = e->Iex.Const.con->Ico.V128;
sewardj208a7762014-10-22 13:52:51 +00002153 HReg res = newVRegV(env);
2154 switch (con) {
2155 case 0x0000: case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
2156 addInstr(env, ARM64Instr_VImmQ(res, con));
2157 return res;
2158 case 0x00F0:
2159 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2160 addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2161 return res;
2162 case 0x0F00:
2163 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2164 addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2165 return res;
2166 case 0x0FF0:
2167 addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2168 addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2169 return res;
2170 case 0x0FFF:
2171 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2172 addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2173 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2174 return res;
2175 case 0xF000:
2176 addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2177 addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2178 return res;
2179 case 0xFF00:
2180 addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2181 addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2182 return res;
2183 default:
2184 break;
sewardjbbcf1882014-01-12 12:49:10 +00002185 }
2186 /* Unhandled */
2187 goto v128_expr_bad;
2188 }
2189
2190 if (e->tag == Iex_Load) {
2191 HReg res = newVRegV(env);
2192 HReg rN = iselIntExpr_R(env, e->Iex.Load.addr);
2193 vassert(ty == Ity_V128);
2194 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN));
2195 return res;
2196 }
2197
2198 if (e->tag == Iex_Get) {
2199 UInt offs = (UInt)e->Iex.Get.offset;
2200 if (offs < (1<<12)) {
2201 HReg addr = mk_baseblock_128bit_access_addr(env, offs);
2202 HReg res = newVRegV(env);
2203 vassert(ty == Ity_V128);
2204 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr));
2205 return res;
2206 }
2207 goto v128_expr_bad;
2208 }
2209
sewardjecde6972014-02-05 11:01:19 +00002210 if (e->tag == Iex_Unop) {
2211
sewardj12972182014-08-04 08:09:47 +00002212 /* Iop_ZeroHIXXofV128 cases */
sewardjecde6972014-02-05 11:01:19 +00002213 UShort imm16 = 0;
2214 switch (e->Iex.Unop.op) {
sewardj9b1cf5e2014-03-01 11:16:57 +00002215 case Iop_ZeroHI64ofV128: imm16 = 0x00FF; break;
sewardjecde6972014-02-05 11:01:19 +00002216 case Iop_ZeroHI96ofV128: imm16 = 0x000F; break;
2217 case Iop_ZeroHI112ofV128: imm16 = 0x0003; break;
sewardjfab09142014-02-10 10:28:13 +00002218 case Iop_ZeroHI120ofV128: imm16 = 0x0001; break;
sewardjecde6972014-02-05 11:01:19 +00002219 default: break;
2220 }
2221 if (imm16 != 0) {
2222 HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2223 HReg imm = newVRegV(env);
2224 HReg res = newVRegV(env);
2225 addInstr(env, ARM64Instr_VImmQ(imm, imm16));
2226 addInstr(env, ARM64Instr_VBinV(ARM64vecb_AND, res, src, imm));
2227 return res;
2228 }
2229
2230 /* Other cases */
2231 switch (e->Iex.Unop.op) {
sewardje520bb32014-02-17 11:00:53 +00002232 case Iop_NotV128:
sewardj2b6fd5e2014-06-19 14:21:37 +00002233 case Iop_Abs64Fx2: case Iop_Abs32Fx4:
2234 case Iop_Neg64Fx2: case Iop_Neg32Fx4:
2235 case Iop_Abs64x2: case Iop_Abs32x4:
2236 case Iop_Abs16x8: case Iop_Abs8x16:
sewardja5a6b752014-06-30 07:33:56 +00002237 case Iop_Cls32x4: case Iop_Cls16x8: case Iop_Cls8x16:
2238 case Iop_Clz32x4: case Iop_Clz16x8: case Iop_Clz8x16:
sewardj2b6fd5e2014-06-19 14:21:37 +00002239 case Iop_Cnt8x16:
sewardj715d1622014-06-26 12:39:05 +00002240 case Iop_Reverse1sIn8_x16:
2241 case Iop_Reverse8sIn16_x8:
sewardjdf9d6d52014-06-27 10:43:22 +00002242 case Iop_Reverse8sIn32_x4: case Iop_Reverse16sIn32_x4:
2243 case Iop_Reverse8sIn64_x2: case Iop_Reverse16sIn64_x2:
sewardjfc261d92014-08-24 20:36:14 +00002244 case Iop_Reverse32sIn64_x2:
2245 case Iop_RecipEst32Ux4:
2246 case Iop_RSqrtEst32Ux4:
sewardj25523c42014-06-15 19:36:29 +00002247 {
sewardjfab09142014-02-10 10:28:13 +00002248 HReg res = newVRegV(env);
2249 HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
2250 ARM64VecUnaryOp op = ARM64vecu_INVALID;
2251 switch (e->Iex.Unop.op) {
sewardjfc261d92014-08-24 20:36:14 +00002252 case Iop_NotV128: op = ARM64vecu_NOT; break;
2253 case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break;
2254 case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break;
2255 case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break;
2256 case Iop_Neg32Fx4: op = ARM64vecu_FNEG32x4; break;
2257 case Iop_Abs64x2: op = ARM64vecu_ABS64x2; break;
2258 case Iop_Abs32x4: op = ARM64vecu_ABS32x4; break;
2259 case Iop_Abs16x8: op = ARM64vecu_ABS16x8; break;
2260 case Iop_Abs8x16: op = ARM64vecu_ABS8x16; break;
2261 case Iop_Cls32x4: op = ARM64vecu_CLS32x4; break;
2262 case Iop_Cls16x8: op = ARM64vecu_CLS16x8; break;
2263 case Iop_Cls8x16: op = ARM64vecu_CLS8x16; break;
2264 case Iop_Clz32x4: op = ARM64vecu_CLZ32x4; break;
2265 case Iop_Clz16x8: op = ARM64vecu_CLZ16x8; break;
2266 case Iop_Clz8x16: op = ARM64vecu_CLZ8x16; break;
2267 case Iop_Cnt8x16: op = ARM64vecu_CNT8x16; break;
2268 case Iop_Reverse1sIn8_x16: op = ARM64vecu_RBIT; break;
2269 case Iop_Reverse8sIn16_x8: op = ARM64vecu_REV1616B; break;
2270 case Iop_Reverse8sIn32_x4: op = ARM64vecu_REV3216B; break;
2271 case Iop_Reverse16sIn32_x4: op = ARM64vecu_REV328H; break;
2272 case Iop_Reverse8sIn64_x2: op = ARM64vecu_REV6416B; break;
2273 case Iop_Reverse16sIn64_x2: op = ARM64vecu_REV648H; break;
2274 case Iop_Reverse32sIn64_x2: op = ARM64vecu_REV644S; break;
2275 case Iop_RecipEst32Ux4: op = ARM64vecu_URECPE32x4; break;
2276 case Iop_RSqrtEst32Ux4: op = ARM64vecu_URSQRTE32x4; break;
sewardjfab09142014-02-10 10:28:13 +00002277 default: vassert(0);
2278 }
2279 addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
2280 return res;
2281 }
sewardj505a27d2014-03-10 10:40:48 +00002282 case Iop_CmpNEZ8x16:
2283 case Iop_CmpNEZ16x8:
2284 case Iop_CmpNEZ32x4:
sewardj99c1f812014-03-09 09:41:56 +00002285 case Iop_CmpNEZ64x2: {
2286 HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
2287 HReg zero = newVRegV(env);
2288 HReg res = newVRegV(env);
2289 ARM64VecBinOp cmp = ARM64vecb_INVALID;
2290 switch (e->Iex.Unop.op) {
2291 case Iop_CmpNEZ64x2: cmp = ARM64vecb_CMEQ64x2; break;
sewardj505a27d2014-03-10 10:40:48 +00002292 case Iop_CmpNEZ32x4: cmp = ARM64vecb_CMEQ32x4; break;
2293 case Iop_CmpNEZ16x8: cmp = ARM64vecb_CMEQ16x8; break;
2294 case Iop_CmpNEZ8x16: cmp = ARM64vecb_CMEQ8x16; break;
sewardj99c1f812014-03-09 09:41:56 +00002295 default: vassert(0);
2296 }
2297 // This is pretty feeble. Better: use CMP against zero
2298 // and avoid the extra instruction and extra register.
2299 addInstr(env, ARM64Instr_VImmQ(zero, 0x0000));
2300 addInstr(env, ARM64Instr_VBinV(cmp, res, arg, zero));
2301 addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2302 return res;
2303 }
sewardj12972182014-08-04 08:09:47 +00002304 case Iop_V256toV128_0:
2305 case Iop_V256toV128_1: {
2306 HReg vHi, vLo;
2307 iselV256Expr(&vHi, &vLo, env, e->Iex.Unop.arg);
2308 return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo;
2309 }
sewardjecedd982014-08-11 14:02:47 +00002310 case Iop_64UtoV128: {
2311 HReg res = newVRegV(env);
2312 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2313 addInstr(env, ARM64Instr_VQfromX(res, arg));
2314 return res;
2315 }
sewardj3ce4dec2014-08-26 18:30:48 +00002316 case Iop_Widen8Sto16x8: {
2317 HReg res = newVRegV(env);
2318 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2319 addInstr(env, ARM64Instr_VQfromX(res, arg));
2320 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP18x16, res, res, res));
2321 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR16x8,
2322 res, res, 8));
2323 return res;
2324 }
2325 case Iop_Widen16Sto32x4: {
2326 HReg res = newVRegV(env);
2327 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2328 addInstr(env, ARM64Instr_VQfromX(res, arg));
2329 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP116x8, res, res, res));
2330 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR32x4,
2331 res, res, 16));
2332 return res;
2333 }
2334 case Iop_Widen32Sto64x2: {
2335 HReg res = newVRegV(env);
2336 HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2337 addInstr(env, ARM64Instr_VQfromX(res, arg));
2338 addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP132x4, res, res, res));
2339 addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR64x2,
2340 res, res, 32));
2341 return res;
2342 }
sewardjecde6972014-02-05 11:01:19 +00002343 /* ... */
2344 default:
2345 break;
2346 } /* switch on the unop */
2347 } /* if (e->tag == Iex_Unop) */
sewardjbbcf1882014-01-12 12:49:10 +00002348
2349 if (e->tag == Iex_Binop) {
2350 switch (e->Iex.Binop.op) {
2351 case Iop_64HLtoV128: {
2352 HReg res = newVRegV(env);
2353 HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2354 HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2355 addInstr(env, ARM64Instr_VQfromXX(res, argL, argR));
2356 return res;
2357 }
sewardjf7003bc2014-08-18 12:28:02 +00002358 /* -- Cases where we can generate a simple three-reg instruction. -- */
sewardjfab09142014-02-10 10:28:13 +00002359 case Iop_AndV128:
2360 case Iop_OrV128:
sewardje520bb32014-02-17 11:00:53 +00002361 case Iop_XorV128:
sewardja5a6b752014-06-30 07:33:56 +00002362 case Iop_Max32Ux4: case Iop_Max16Ux8: case Iop_Max8Ux16:
2363 case Iop_Min32Ux4: case Iop_Min16Ux8: case Iop_Min8Ux16:
2364 case Iop_Max32Sx4: case Iop_Max16Sx8: case Iop_Max8Sx16:
2365 case Iop_Min32Sx4: case Iop_Min16Sx8: case Iop_Min8Sx16:
2366 case Iop_Add64x2: case Iop_Add32x4:
2367 case Iop_Add16x8: case Iop_Add8x16:
2368 case Iop_Sub64x2: case Iop_Sub32x4:
2369 case Iop_Sub16x8: case Iop_Sub8x16:
2370 case Iop_Mul32x4: case Iop_Mul16x8: case Iop_Mul8x16:
2371 case Iop_CmpEQ64x2: case Iop_CmpEQ32x4:
2372 case Iop_CmpEQ16x8: case Iop_CmpEQ8x16:
2373 case Iop_CmpGT64Ux2: case Iop_CmpGT32Ux4:
2374 case Iop_CmpGT16Ux8: case Iop_CmpGT8Ux16:
2375 case Iop_CmpGT64Sx2: case Iop_CmpGT32Sx4:
2376 case Iop_CmpGT16Sx8: case Iop_CmpGT8Sx16:
2377 case Iop_CmpEQ64Fx2: case Iop_CmpEQ32Fx4:
2378 case Iop_CmpLE64Fx2: case Iop_CmpLE32Fx4:
2379 case Iop_CmpLT64Fx2: case Iop_CmpLT32Fx4:
sewardj92d0ae32014-04-03 13:48:54 +00002380 case Iop_Perm8x16:
sewardja5a6b752014-06-30 07:33:56 +00002381 case Iop_InterleaveLO64x2: case Iop_CatEvenLanes32x4:
2382 case Iop_CatEvenLanes16x8: case Iop_CatEvenLanes8x16:
2383 case Iop_InterleaveHI64x2: case Iop_CatOddLanes32x4:
2384 case Iop_CatOddLanes16x8: case Iop_CatOddLanes8x16:
sewardjd96daf62014-06-15 08:17:35 +00002385 case Iop_InterleaveHI32x4:
sewardja5a6b752014-06-30 07:33:56 +00002386 case Iop_InterleaveHI16x8: case Iop_InterleaveHI8x16:
sewardjd96daf62014-06-15 08:17:35 +00002387 case Iop_InterleaveLO32x4:
sewardja5a6b752014-06-30 07:33:56 +00002388 case Iop_InterleaveLO16x8: case Iop_InterleaveLO8x16:
sewardj168c8bd2014-06-25 13:05:23 +00002389 case Iop_PolynomialMul8x16:
sewardja5a6b752014-06-30 07:33:56 +00002390 case Iop_QAdd64Sx2: case Iop_QAdd32Sx4:
2391 case Iop_QAdd16Sx8: case Iop_QAdd8Sx16:
2392 case Iop_QAdd64Ux2: case Iop_QAdd32Ux4:
2393 case Iop_QAdd16Ux8: case Iop_QAdd8Ux16:
2394 case Iop_QSub64Sx2: case Iop_QSub32Sx4:
2395 case Iop_QSub16Sx8: case Iop_QSub8Sx16:
2396 case Iop_QSub64Ux2: case Iop_QSub32Ux4:
2397 case Iop_QSub16Ux8: case Iop_QSub8Ux16:
sewardja6b61f02014-08-17 18:32:14 +00002398 case Iop_QDMulHi32Sx4: case Iop_QDMulHi16Sx8:
2399 case Iop_QRDMulHi32Sx4: case Iop_QRDMulHi16Sx8:
2400 case Iop_Sh8Sx16: case Iop_Sh16Sx8:
2401 case Iop_Sh32Sx4: case Iop_Sh64Sx2:
2402 case Iop_Sh8Ux16: case Iop_Sh16Ux8:
2403 case Iop_Sh32Ux4: case Iop_Sh64Ux2:
2404 case Iop_Rsh8Sx16: case Iop_Rsh16Sx8:
2405 case Iop_Rsh32Sx4: case Iop_Rsh64Sx2:
2406 case Iop_Rsh8Ux16: case Iop_Rsh16Ux8:
2407 case Iop_Rsh32Ux4: case Iop_Rsh64Ux2:
sewardj76927e62014-11-17 11:21:21 +00002408 case Iop_Max64Fx2: case Iop_Max32Fx4:
2409 case Iop_Min64Fx2: case Iop_Min32Fx4:
sewardj2bd1ffe2014-03-27 18:59:00 +00002410 {
sewardj606c4ba2014-01-26 19:11:14 +00002411 HReg res = newVRegV(env);
2412 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
2413 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
sewardj2bd1ffe2014-03-27 18:59:00 +00002414 Bool sw = False;
sewardj606c4ba2014-01-26 19:11:14 +00002415 ARM64VecBinOp op = ARM64vecb_INVALID;
2416 switch (e->Iex.Binop.op) {
sewardj2bd1ffe2014-03-27 18:59:00 +00002417 case Iop_AndV128: op = ARM64vecb_AND; break;
2418 case Iop_OrV128: op = ARM64vecb_ORR; break;
2419 case Iop_XorV128: op = ARM64vecb_XOR; break;
2420 case Iop_Max32Ux4: op = ARM64vecb_UMAX32x4; break;
2421 case Iop_Max16Ux8: op = ARM64vecb_UMAX16x8; break;
2422 case Iop_Max8Ux16: op = ARM64vecb_UMAX8x16; break;
2423 case Iop_Min32Ux4: op = ARM64vecb_UMIN32x4; break;
2424 case Iop_Min16Ux8: op = ARM64vecb_UMIN16x8; break;
2425 case Iop_Min8Ux16: op = ARM64vecb_UMIN8x16; break;
2426 case Iop_Max32Sx4: op = ARM64vecb_SMAX32x4; break;
2427 case Iop_Max16Sx8: op = ARM64vecb_SMAX16x8; break;
2428 case Iop_Max8Sx16: op = ARM64vecb_SMAX8x16; break;
2429 case Iop_Min32Sx4: op = ARM64vecb_SMIN32x4; break;
2430 case Iop_Min16Sx8: op = ARM64vecb_SMIN16x8; break;
2431 case Iop_Min8Sx16: op = ARM64vecb_SMIN8x16; break;
2432 case Iop_Add64x2: op = ARM64vecb_ADD64x2; break;
2433 case Iop_Add32x4: op = ARM64vecb_ADD32x4; break;
2434 case Iop_Add16x8: op = ARM64vecb_ADD16x8; break;
sewardj92d0ae32014-04-03 13:48:54 +00002435 case Iop_Add8x16: op = ARM64vecb_ADD8x16; break;
sewardj2bd1ffe2014-03-27 18:59:00 +00002436 case Iop_Sub64x2: op = ARM64vecb_SUB64x2; break;
2437 case Iop_Sub32x4: op = ARM64vecb_SUB32x4; break;
2438 case Iop_Sub16x8: op = ARM64vecb_SUB16x8; break;
sewardj92d0ae32014-04-03 13:48:54 +00002439 case Iop_Sub8x16: op = ARM64vecb_SUB8x16; break;
sewardj2bd1ffe2014-03-27 18:59:00 +00002440 case Iop_Mul32x4: op = ARM64vecb_MUL32x4; break;
2441 case Iop_Mul16x8: op = ARM64vecb_MUL16x8; break;
sewardj93013432014-04-27 12:02:12 +00002442 case Iop_Mul8x16: op = ARM64vecb_MUL8x16; break;
sewardj2bd1ffe2014-03-27 18:59:00 +00002443 case Iop_CmpEQ64x2: op = ARM64vecb_CMEQ64x2; break;
sewardj93013432014-04-27 12:02:12 +00002444 case Iop_CmpEQ32x4: op = ARM64vecb_CMEQ32x4; break;
2445 case Iop_CmpEQ16x8: op = ARM64vecb_CMEQ16x8; break;
2446 case Iop_CmpEQ8x16: op = ARM64vecb_CMEQ8x16; break;
2447 case Iop_CmpGT64Ux2: op = ARM64vecb_CMHI64x2; break;
2448 case Iop_CmpGT32Ux4: op = ARM64vecb_CMHI32x4; break;
2449 case Iop_CmpGT16Ux8: op = ARM64vecb_CMHI16x8; break;
2450 case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break;
2451 case Iop_CmpGT64Sx2: op = ARM64vecb_CMGT64x2; break;
2452 case Iop_CmpGT32Sx4: op = ARM64vecb_CMGT32x4; break;
2453 case Iop_CmpGT16Sx8: op = ARM64vecb_CMGT16x8; break;
2454 case Iop_CmpGT8Sx16: op = ARM64vecb_CMGT8x16; break;
sewardj2bd1ffe2014-03-27 18:59:00 +00002455 case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break;
2456 case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break;
2457 case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break;
2458 case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break;
2459 case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break;
2460 case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break;
sewardj92d0ae32014-04-03 13:48:54 +00002461 case Iop_Perm8x16: op = ARM64vecb_TBL1; break;
sewardjd96daf62014-06-15 08:17:35 +00002462 case Iop_InterleaveLO64x2: op = ARM64vecb_UZP164x2; sw = True;
2463 break;
2464 case Iop_CatEvenLanes32x4: op = ARM64vecb_UZP132x4; sw = True;
2465 break;
2466 case Iop_CatEvenLanes16x8: op = ARM64vecb_UZP116x8; sw = True;
2467 break;
2468 case Iop_CatEvenLanes8x16: op = ARM64vecb_UZP18x16; sw = True;
2469 break;
2470 case Iop_InterleaveHI64x2: op = ARM64vecb_UZP264x2; sw = True;
2471 break;
2472 case Iop_CatOddLanes32x4: op = ARM64vecb_UZP232x4; sw = True;
2473 break;
2474 case Iop_CatOddLanes16x8: op = ARM64vecb_UZP216x8; sw = True;
2475 break;
2476 case Iop_CatOddLanes8x16: op = ARM64vecb_UZP28x16; sw = True;
2477 break;
2478 case Iop_InterleaveHI32x4: op = ARM64vecb_ZIP232x4; sw = True;
2479 break;
2480 case Iop_InterleaveHI16x8: op = ARM64vecb_ZIP216x8; sw = True;
2481 break;
2482 case Iop_InterleaveHI8x16: op = ARM64vecb_ZIP28x16; sw = True;
2483 break;
2484 case Iop_InterleaveLO32x4: op = ARM64vecb_ZIP132x4; sw = True;
2485 break;
2486 case Iop_InterleaveLO16x8: op = ARM64vecb_ZIP116x8; sw = True;
2487 break;
2488 case Iop_InterleaveLO8x16: op = ARM64vecb_ZIP18x16; sw = True;
2489 break;
sewardj168c8bd2014-06-25 13:05:23 +00002490 case Iop_PolynomialMul8x16: op = ARM64vecb_PMUL8x16; break;
sewardj54ffa1d2014-07-22 09:27:49 +00002491 case Iop_QAdd64Sx2: op = ARM64vecb_SQADD64x2; break;
2492 case Iop_QAdd32Sx4: op = ARM64vecb_SQADD32x4; break;
2493 case Iop_QAdd16Sx8: op = ARM64vecb_SQADD16x8; break;
2494 case Iop_QAdd8Sx16: op = ARM64vecb_SQADD8x16; break;
2495 case Iop_QAdd64Ux2: op = ARM64vecb_UQADD64x2; break;
2496 case Iop_QAdd32Ux4: op = ARM64vecb_UQADD32x4; break;
2497 case Iop_QAdd16Ux8: op = ARM64vecb_UQADD16x8; break;
2498 case Iop_QAdd8Ux16: op = ARM64vecb_UQADD8x16; break;
2499 case Iop_QSub64Sx2: op = ARM64vecb_SQSUB64x2; break;
2500 case Iop_QSub32Sx4: op = ARM64vecb_SQSUB32x4; break;
2501 case Iop_QSub16Sx8: op = ARM64vecb_SQSUB16x8; break;
2502 case Iop_QSub8Sx16: op = ARM64vecb_SQSUB8x16; break;
2503 case Iop_QSub64Ux2: op = ARM64vecb_UQSUB64x2; break;
2504 case Iop_QSub32Ux4: op = ARM64vecb_UQSUB32x4; break;
2505 case Iop_QSub16Ux8: op = ARM64vecb_UQSUB16x8; break;
2506 case Iop_QSub8Ux16: op = ARM64vecb_UQSUB8x16; break;
2507 case Iop_QDMulHi32Sx4: op = ARM64vecb_SQDMULH32x4; break;
2508 case Iop_QDMulHi16Sx8: op = ARM64vecb_SQDMULH16x8; break;
2509 case Iop_QRDMulHi32Sx4: op = ARM64vecb_SQRDMULH32x4; break;
2510 case Iop_QRDMulHi16Sx8: op = ARM64vecb_SQRDMULH16x8; break;
sewardja6b61f02014-08-17 18:32:14 +00002511 case Iop_Sh8Sx16: op = ARM64vecb_SSHL8x16; break;
2512 case Iop_Sh16Sx8: op = ARM64vecb_SSHL16x8; break;
2513 case Iop_Sh32Sx4: op = ARM64vecb_SSHL32x4; break;
2514 case Iop_Sh64Sx2: op = ARM64vecb_SSHL64x2; break;
2515 case Iop_Sh8Ux16: op = ARM64vecb_USHL8x16; break;
2516 case Iop_Sh16Ux8: op = ARM64vecb_USHL16x8; break;
2517 case Iop_Sh32Ux4: op = ARM64vecb_USHL32x4; break;
2518 case Iop_Sh64Ux2: op = ARM64vecb_USHL64x2; break;
2519 case Iop_Rsh8Sx16: op = ARM64vecb_SRSHL8x16; break;
2520 case Iop_Rsh16Sx8: op = ARM64vecb_SRSHL16x8; break;
2521 case Iop_Rsh32Sx4: op = ARM64vecb_SRSHL32x4; break;
2522 case Iop_Rsh64Sx2: op = ARM64vecb_SRSHL64x2; break;
2523 case Iop_Rsh8Ux16: op = ARM64vecb_URSHL8x16; break;
2524 case Iop_Rsh16Ux8: op = ARM64vecb_URSHL16x8; break;
2525 case Iop_Rsh32Ux4: op = ARM64vecb_URSHL32x4; break;
2526 case Iop_Rsh64Ux2: op = ARM64vecb_URSHL64x2; break;
sewardj76927e62014-11-17 11:21:21 +00002527 case Iop_Max64Fx2: op = ARM64vecb_FMAX64x2; break;
2528 case Iop_Max32Fx4: op = ARM64vecb_FMAX32x4; break;
2529 case Iop_Min64Fx2: op = ARM64vecb_FMIN64x2; break;
2530 case Iop_Min32Fx4: op = ARM64vecb_FMIN32x4; break;
sewardj606c4ba2014-01-26 19:11:14 +00002531 default: vassert(0);
2532 }
sewardj2bd1ffe2014-03-27 18:59:00 +00002533 if (sw) {
2534 addInstr(env, ARM64Instr_VBinV(op, res, argR, argL));
2535 } else {
2536 addInstr(env, ARM64Instr_VBinV(op, res, argL, argR));
2537 }
sewardj606c4ba2014-01-26 19:11:14 +00002538 return res;
2539 }
sewardjf7003bc2014-08-18 12:28:02 +00002540 /* -- These only have 2 operand instructions, so we have to first move
2541 the first argument into a new register, for modification. -- */
2542 case Iop_QAddExtUSsatSS8x16: case Iop_QAddExtUSsatSS16x8:
2543 case Iop_QAddExtUSsatSS32x4: case Iop_QAddExtUSsatSS64x2:
2544 case Iop_QAddExtSUsatUU8x16: case Iop_QAddExtSUsatUU16x8:
2545 case Iop_QAddExtSUsatUU32x4: case Iop_QAddExtSUsatUU64x2:
2546 {
2547 HReg res = newVRegV(env);
2548 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
2549 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
2550 ARM64VecModifyOp op = ARM64vecmo_INVALID;
2551 switch (e->Iex.Binop.op) {
2552 /* In the following 8 cases, the US - SU switching is intended.
2553 See comments on the libvex_ir.h for details. Also in the
2554 ARM64 front end, where used these primops are generated. */
2555 case Iop_QAddExtUSsatSS8x16: op = ARM64vecmo_SUQADD8x16; break;
2556 case Iop_QAddExtUSsatSS16x8: op = ARM64vecmo_SUQADD16x8; break;
2557 case Iop_QAddExtUSsatSS32x4: op = ARM64vecmo_SUQADD32x4; break;
2558 case Iop_QAddExtUSsatSS64x2: op = ARM64vecmo_SUQADD64x2; break;
2559 case Iop_QAddExtSUsatUU8x16: op = ARM64vecmo_USQADD8x16; break;
2560 case Iop_QAddExtSUsatUU16x8: op = ARM64vecmo_USQADD16x8; break;
2561 case Iop_QAddExtSUsatUU32x4: op = ARM64vecmo_USQADD32x4; break;
2562 case Iop_QAddExtSUsatUU64x2: op = ARM64vecmo_USQADD64x2; break;
2563 default: vassert(0);
2564 }
2565 /* The order of the operands is important. Although this is
2566 basically addition, the two operands are extended differently,
2567 making it important to get them into the correct registers in
2568 the instruction. */
2569 addInstr(env, ARM64Instr_VMov(16, res, argR));
2570 addInstr(env, ARM64Instr_VModifyV(op, res, argL));
2571 return res;
2572 }
2573 /* -- Shifts by an immediate. -- */
sewardja97dddf2014-08-14 22:26:52 +00002574 case Iop_ShrN64x2: case Iop_ShrN32x4:
2575 case Iop_ShrN16x8: case Iop_ShrN8x16:
2576 case Iop_SarN64x2: case Iop_SarN32x4:
2577 case Iop_SarN16x8: case Iop_SarN8x16:
2578 case Iop_ShlN64x2: case Iop_ShlN32x4:
2579 case Iop_ShlN16x8: case Iop_ShlN8x16:
sewardj1dd3ec12014-08-15 09:11:08 +00002580 case Iop_QShlNsatUU64x2: case Iop_QShlNsatUU32x4:
2581 case Iop_QShlNsatUU16x8: case Iop_QShlNsatUU8x16:
2582 case Iop_QShlNsatSS64x2: case Iop_QShlNsatSS32x4:
2583 case Iop_QShlNsatSS16x8: case Iop_QShlNsatSS8x16:
2584 case Iop_QShlNsatSU64x2: case Iop_QShlNsatSU32x4:
2585 case Iop_QShlNsatSU16x8: case Iop_QShlNsatSU8x16:
sewardj1eaaec22014-03-07 22:52:19 +00002586 {
sewardje520bb32014-02-17 11:00:53 +00002587 IRExpr* argL = e->Iex.Binop.arg1;
2588 IRExpr* argR = e->Iex.Binop.arg2;
2589 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2590 UInt amt = argR->Iex.Const.con->Ico.U8;
sewardja97dddf2014-08-14 22:26:52 +00002591 UInt limLo = 0;
2592 UInt limHi = 0;
sewardja6b61f02014-08-17 18:32:14 +00002593 ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
sewardja97dddf2014-08-14 22:26:52 +00002594 /* Establish the instruction to use. */
sewardje520bb32014-02-17 11:00:53 +00002595 switch (e->Iex.Binop.op) {
sewardja6b61f02014-08-17 18:32:14 +00002596 case Iop_ShrN64x2: op = ARM64vecshi_USHR64x2; break;
2597 case Iop_ShrN32x4: op = ARM64vecshi_USHR32x4; break;
2598 case Iop_ShrN16x8: op = ARM64vecshi_USHR16x8; break;
2599 case Iop_ShrN8x16: op = ARM64vecshi_USHR8x16; break;
2600 case Iop_SarN64x2: op = ARM64vecshi_SSHR64x2; break;
2601 case Iop_SarN32x4: op = ARM64vecshi_SSHR32x4; break;
2602 case Iop_SarN16x8: op = ARM64vecshi_SSHR16x8; break;
2603 case Iop_SarN8x16: op = ARM64vecshi_SSHR8x16; break;
2604 case Iop_ShlN64x2: op = ARM64vecshi_SHL64x2; break;
2605 case Iop_ShlN32x4: op = ARM64vecshi_SHL32x4; break;
2606 case Iop_ShlN16x8: op = ARM64vecshi_SHL16x8; break;
2607 case Iop_ShlN8x16: op = ARM64vecshi_SHL8x16; break;
2608 case Iop_QShlNsatUU64x2: op = ARM64vecshi_UQSHL64x2; break;
2609 case Iop_QShlNsatUU32x4: op = ARM64vecshi_UQSHL32x4; break;
2610 case Iop_QShlNsatUU16x8: op = ARM64vecshi_UQSHL16x8; break;
2611 case Iop_QShlNsatUU8x16: op = ARM64vecshi_UQSHL8x16; break;
2612 case Iop_QShlNsatSS64x2: op = ARM64vecshi_SQSHL64x2; break;
2613 case Iop_QShlNsatSS32x4: op = ARM64vecshi_SQSHL32x4; break;
2614 case Iop_QShlNsatSS16x8: op = ARM64vecshi_SQSHL16x8; break;
2615 case Iop_QShlNsatSS8x16: op = ARM64vecshi_SQSHL8x16; break;
2616 case Iop_QShlNsatSU64x2: op = ARM64vecshi_SQSHLU64x2; break;
2617 case Iop_QShlNsatSU32x4: op = ARM64vecshi_SQSHLU32x4; break;
2618 case Iop_QShlNsatSU16x8: op = ARM64vecshi_SQSHLU16x8; break;
2619 case Iop_QShlNsatSU8x16: op = ARM64vecshi_SQSHLU8x16; break;
sewardja97dddf2014-08-14 22:26:52 +00002620 default: vassert(0);
sewardje520bb32014-02-17 11:00:53 +00002621 }
sewardja97dddf2014-08-14 22:26:52 +00002622 /* Establish the shift limits, for sanity check purposes only. */
2623 switch (e->Iex.Binop.op) {
sewardj1dd3ec12014-08-15 09:11:08 +00002624 case Iop_ShrN64x2: limLo = 1; limHi = 64; break;
2625 case Iop_ShrN32x4: limLo = 1; limHi = 32; break;
2626 case Iop_ShrN16x8: limLo = 1; limHi = 16; break;
2627 case Iop_ShrN8x16: limLo = 1; limHi = 8; break;
2628 case Iop_SarN64x2: limLo = 1; limHi = 64; break;
2629 case Iop_SarN32x4: limLo = 1; limHi = 32; break;
2630 case Iop_SarN16x8: limLo = 1; limHi = 16; break;
2631 case Iop_SarN8x16: limLo = 1; limHi = 8; break;
2632 case Iop_ShlN64x2: limLo = 0; limHi = 63; break;
2633 case Iop_ShlN32x4: limLo = 0; limHi = 31; break;
2634 case Iop_ShlN16x8: limLo = 0; limHi = 15; break;
2635 case Iop_ShlN8x16: limLo = 0; limHi = 7; break;
2636 case Iop_QShlNsatUU64x2: limLo = 0; limHi = 63; break;
2637 case Iop_QShlNsatUU32x4: limLo = 0; limHi = 31; break;
2638 case Iop_QShlNsatUU16x8: limLo = 0; limHi = 15; break;
2639 case Iop_QShlNsatUU8x16: limLo = 0; limHi = 7; break;
2640 case Iop_QShlNsatSS64x2: limLo = 0; limHi = 63; break;
2641 case Iop_QShlNsatSS32x4: limLo = 0; limHi = 31; break;
2642 case Iop_QShlNsatSS16x8: limLo = 0; limHi = 15; break;
2643 case Iop_QShlNsatSS8x16: limLo = 0; limHi = 7; break;
2644 case Iop_QShlNsatSU64x2: limLo = 0; limHi = 63; break;
2645 case Iop_QShlNsatSU32x4: limLo = 0; limHi = 31; break;
2646 case Iop_QShlNsatSU16x8: limLo = 0; limHi = 15; break;
2647 case Iop_QShlNsatSU8x16: limLo = 0; limHi = 7; break;
sewardja97dddf2014-08-14 22:26:52 +00002648 default: vassert(0);
2649 }
2650 /* For left shifts, the allowable amt values are
2651 0 .. lane_bits-1. For right shifts the allowable
2652 values are 1 .. lane_bits. */
sewardja6b61f02014-08-17 18:32:14 +00002653 if (op != ARM64vecshi_INVALID && amt >= limLo && amt <= limHi) {
sewardje520bb32014-02-17 11:00:53 +00002654 HReg src = iselV128Expr(env, argL);
2655 HReg dst = newVRegV(env);
sewardja97dddf2014-08-14 22:26:52 +00002656 addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
sewardje520bb32014-02-17 11:00:53 +00002657 return dst;
2658 }
sewardja97dddf2014-08-14 22:26:52 +00002659 /* Special case some no-op shifts that the arm64 front end
2660 throws at us. We can't generate any instructions for these,
2661 but we don't need to either. */
2662 switch (e->Iex.Binop.op) {
2663 case Iop_ShrN64x2: case Iop_ShrN32x4:
2664 case Iop_ShrN16x8: case Iop_ShrN8x16:
2665 if (amt == 0) {
2666 return iselV128Expr(env, argL);
2667 }
2668 break;
2669 default:
2670 break;
2671 }
2672 /* otherwise unhandled */
sewardje520bb32014-02-17 11:00:53 +00002673 }
2674 /* else fall out; this is unhandled */
2675 break;
2676 }
sewardjf7003bc2014-08-18 12:28:02 +00002677 /* -- Saturating narrowing by an immediate -- */
sewardjecedd982014-08-11 14:02:47 +00002678 /* uu */
2679 case Iop_QandQShrNnarrow16Uto8Ux8:
2680 case Iop_QandQShrNnarrow32Uto16Ux4:
2681 case Iop_QandQShrNnarrow64Uto32Ux2:
2682 /* ss */
2683 case Iop_QandQSarNnarrow16Sto8Sx8:
2684 case Iop_QandQSarNnarrow32Sto16Sx4:
2685 case Iop_QandQSarNnarrow64Sto32Sx2:
2686 /* su */
2687 case Iop_QandQSarNnarrow16Sto8Ux8:
2688 case Iop_QandQSarNnarrow32Sto16Ux4:
2689 case Iop_QandQSarNnarrow64Sto32Ux2:
2690 /* ruu */
2691 case Iop_QandQRShrNnarrow16Uto8Ux8:
2692 case Iop_QandQRShrNnarrow32Uto16Ux4:
2693 case Iop_QandQRShrNnarrow64Uto32Ux2:
2694 /* rss */
2695 case Iop_QandQRSarNnarrow16Sto8Sx8:
2696 case Iop_QandQRSarNnarrow32Sto16Sx4:
2697 case Iop_QandQRSarNnarrow64Sto32Sx2:
2698 /* rsu */
2699 case Iop_QandQRSarNnarrow16Sto8Ux8:
2700 case Iop_QandQRSarNnarrow32Sto16Ux4:
2701 case Iop_QandQRSarNnarrow64Sto32Ux2:
2702 {
2703 IRExpr* argL = e->Iex.Binop.arg1;
2704 IRExpr* argR = e->Iex.Binop.arg2;
2705 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2706 UInt amt = argR->Iex.Const.con->Ico.U8;
2707 UInt limit = 0;
sewardja6b61f02014-08-17 18:32:14 +00002708 ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
sewardjecedd982014-08-11 14:02:47 +00002709 switch (e->Iex.Binop.op) {
2710 /* uu */
2711 case Iop_QandQShrNnarrow64Uto32Ux2:
sewardja6b61f02014-08-17 18:32:14 +00002712 op = ARM64vecshi_UQSHRN2SD; limit = 64; break;
sewardjecedd982014-08-11 14:02:47 +00002713 case Iop_QandQShrNnarrow32Uto16Ux4:
sewardja6b61f02014-08-17 18:32:14 +00002714 op = ARM64vecshi_UQSHRN4HS; limit = 32; break;
sewardjecedd982014-08-11 14:02:47 +00002715 case Iop_QandQShrNnarrow16Uto8Ux8:
sewardja6b61f02014-08-17 18:32:14 +00002716 op = ARM64vecshi_UQSHRN8BH; limit = 16; break;
sewardjecedd982014-08-11 14:02:47 +00002717 /* ss */
2718 case Iop_QandQSarNnarrow64Sto32Sx2:
sewardja6b61f02014-08-17 18:32:14 +00002719 op = ARM64vecshi_SQSHRN2SD; limit = 64; break;
sewardjecedd982014-08-11 14:02:47 +00002720 case Iop_QandQSarNnarrow32Sto16Sx4:
sewardja6b61f02014-08-17 18:32:14 +00002721 op = ARM64vecshi_SQSHRN4HS; limit = 32; break;
sewardjecedd982014-08-11 14:02:47 +00002722 case Iop_QandQSarNnarrow16Sto8Sx8:
sewardja6b61f02014-08-17 18:32:14 +00002723 op = ARM64vecshi_SQSHRN8BH; limit = 16; break;
sewardjecedd982014-08-11 14:02:47 +00002724 /* su */
2725 case Iop_QandQSarNnarrow64Sto32Ux2:
sewardja6b61f02014-08-17 18:32:14 +00002726 op = ARM64vecshi_SQSHRUN2SD; limit = 64; break;
sewardjecedd982014-08-11 14:02:47 +00002727 case Iop_QandQSarNnarrow32Sto16Ux4:
sewardja6b61f02014-08-17 18:32:14 +00002728 op = ARM64vecshi_SQSHRUN4HS; limit = 32; break;
sewardjecedd982014-08-11 14:02:47 +00002729 case Iop_QandQSarNnarrow16Sto8Ux8:
sewardja6b61f02014-08-17 18:32:14 +00002730 op = ARM64vecshi_SQSHRUN8BH; limit = 16; break;
sewardjecedd982014-08-11 14:02:47 +00002731 /* ruu */
2732 case Iop_QandQRShrNnarrow64Uto32Ux2:
sewardja6b61f02014-08-17 18:32:14 +00002733 op = ARM64vecshi_UQRSHRN2SD; limit = 64; break;
sewardjecedd982014-08-11 14:02:47 +00002734 case Iop_QandQRShrNnarrow32Uto16Ux4:
sewardja6b61f02014-08-17 18:32:14 +00002735 op = ARM64vecshi_UQRSHRN4HS; limit = 32; break;
sewardjecedd982014-08-11 14:02:47 +00002736 case Iop_QandQRShrNnarrow16Uto8Ux8:
sewardja6b61f02014-08-17 18:32:14 +00002737 op = ARM64vecshi_UQRSHRN8BH; limit = 16; break;
sewardjecedd982014-08-11 14:02:47 +00002738 /* rss */
2739 case Iop_QandQRSarNnarrow64Sto32Sx2:
sewardja6b61f02014-08-17 18:32:14 +00002740 op = ARM64vecshi_SQRSHRN2SD; limit = 64; break;
sewardjecedd982014-08-11 14:02:47 +00002741 case Iop_QandQRSarNnarrow32Sto16Sx4:
sewardja6b61f02014-08-17 18:32:14 +00002742 op = ARM64vecshi_SQRSHRN4HS; limit = 32; break;
sewardjecedd982014-08-11 14:02:47 +00002743 case Iop_QandQRSarNnarrow16Sto8Sx8:
sewardja6b61f02014-08-17 18:32:14 +00002744 op = ARM64vecshi_SQRSHRN8BH; limit = 16; break;
sewardjecedd982014-08-11 14:02:47 +00002745 /* rsu */
2746 case Iop_QandQRSarNnarrow64Sto32Ux2:
sewardja6b61f02014-08-17 18:32:14 +00002747 op = ARM64vecshi_SQRSHRUN2SD; limit = 64; break;
sewardjecedd982014-08-11 14:02:47 +00002748 case Iop_QandQRSarNnarrow32Sto16Ux4:
sewardja6b61f02014-08-17 18:32:14 +00002749 op = ARM64vecshi_SQRSHRUN4HS; limit = 32; break;
sewardjecedd982014-08-11 14:02:47 +00002750 case Iop_QandQRSarNnarrow16Sto8Ux8:
sewardja6b61f02014-08-17 18:32:14 +00002751 op = ARM64vecshi_SQRSHRUN8BH; limit = 16; break;
sewardjecedd982014-08-11 14:02:47 +00002752 /**/
2753 default:
2754 vassert(0);
2755 }
sewardja6b61f02014-08-17 18:32:14 +00002756 if (op != ARM64vecshi_INVALID && amt >= 1 && amt <= limit) {
sewardjecedd982014-08-11 14:02:47 +00002757 HReg src = iselV128Expr(env, argL);
2758 HReg dst = newVRegV(env);
2759 HReg fpsr = newVRegI(env);
2760 /* Clear FPSR.Q, do the operation, and return both its
2761 result and the new value of FPSR.Q. We can simply
2762 zero out FPSR since all the other bits have no relevance
2763 in VEX generated code. */
2764 addInstr(env, ARM64Instr_Imm64(fpsr, 0));
2765 addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
2766 addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
2767 addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
2768 addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
2769 ARM64sh_SHR));
2770 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
2771 vassert(ril_one);
2772 addInstr(env, ARM64Instr_Logic(fpsr,
2773 fpsr, ril_one, ARM64lo_AND));
2774 /* Now we have: the main (shift) result in the bottom half
2775 of |dst|, and the Q bit at the bottom of |fpsr|.
2776 Combining them with a "InterleaveLO64x2" style operation
2777 produces a 128 bit value, dst[63:0]:fpsr[63:0],
2778 which is what we want. */
2779 HReg scratch = newVRegV(env);
2780 addInstr(env, ARM64Instr_VQfromX(scratch, fpsr));
2781 addInstr(env, ARM64Instr_VBinV(ARM64vecb_UZP164x2,
2782 dst, dst, scratch));
2783 return dst;
2784 }
2785 }
2786 /* else fall out; this is unhandled */
2787 break;
2788 }
2789
sewardj208a7762014-10-22 13:52:51 +00002790 // Use Iop_SliceV128 in preference to Iop_ShlV128 and Iop_ShrV128,
2791 // as it is in some ways more general and often leads to better
2792 // code overall.
2793 case Iop_ShlV128:
2794 case Iop_ShrV128: {
2795 Bool isSHR = e->Iex.Binop.op == Iop_ShrV128;
2796 /* This is tricky. Generate an EXT instruction with zeroes in
2797 the high operand (shift right) or low operand (shift left).
2798 Note that we can only slice in the EXT instruction at a byte
2799 level of granularity, so the shift amount needs careful
2800 checking. */
2801 IRExpr* argL = e->Iex.Binop.arg1;
2802 IRExpr* argR = e->Iex.Binop.arg2;
2803 if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2804 UInt amt = argR->Iex.Const.con->Ico.U8;
2805 Bool amtOK = False;
2806 switch (amt) {
2807 case 0x08: case 0x10: case 0x18: case 0x20: case 0x28:
2808 case 0x30: case 0x38: case 0x40: case 0x48: case 0x50:
2809 case 0x58: case 0x60: case 0x68: case 0x70: case 0x78:
2810 amtOK = True; break;
2811 }
2812 /* We could also deal with amt==0 by copying the source to
2813 the destination, but there's no need for that so far. */
2814 if (amtOK) {
2815 HReg src = iselV128Expr(env, argL);
2816 HReg srcZ = newVRegV(env);
2817 addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000));
2818 UInt immB = amt / 8;
2819 vassert(immB >= 1 && immB <= 15);
2820 HReg dst = newVRegV(env);
2821 if (isSHR) {
2822 addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/,
2823 immB));
2824 } else {
2825 addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/,
2826 16 - immB));
2827 }
2828 return dst;
2829 }
2830 }
2831 /* else fall out; this is unhandled */
2832 break;
2833 }
sewardjab33a7a2014-06-19 22:20:47 +00002834
sewardj6f312d02014-06-28 12:21:37 +00002835 case Iop_PolynomialMull8x8:
2836 case Iop_Mull32Ux2:
2837 case Iop_Mull16Ux4:
2838 case Iop_Mull8Ux8:
2839 case Iop_Mull32Sx2:
2840 case Iop_Mull16Sx4:
2841 case Iop_Mull8Sx8:
sewardj51d012a2014-07-21 09:19:50 +00002842 case Iop_QDMull32Sx2:
2843 case Iop_QDMull16Sx4:
sewardj6f312d02014-06-28 12:21:37 +00002844 {
sewardj31b5a952014-06-26 07:41:14 +00002845 HReg iSrcL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2846 HReg iSrcR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2847 HReg vSrcL = newVRegV(env);
2848 HReg vSrcR = newVRegV(env);
2849 HReg dst = newVRegV(env);
sewardj6f312d02014-06-28 12:21:37 +00002850 ARM64VecBinOp op = ARM64vecb_INVALID;
2851 switch (e->Iex.Binop.op) {
sewardj51d012a2014-07-21 09:19:50 +00002852 case Iop_PolynomialMull8x8: op = ARM64vecb_PMULL8x8; break;
2853 case Iop_Mull32Ux2: op = ARM64vecb_UMULL2DSS; break;
2854 case Iop_Mull16Ux4: op = ARM64vecb_UMULL4SHH; break;
2855 case Iop_Mull8Ux8: op = ARM64vecb_UMULL8HBB; break;
2856 case Iop_Mull32Sx2: op = ARM64vecb_SMULL2DSS; break;
2857 case Iop_Mull16Sx4: op = ARM64vecb_SMULL4SHH; break;
2858 case Iop_Mull8Sx8: op = ARM64vecb_SMULL8HBB; break;
2859 case Iop_QDMull32Sx2: op = ARM64vecb_SQDMULL2DSS; break;
2860 case Iop_QDMull16Sx4: op = ARM64vecb_SQDMULL4SHH; break;
sewardj6f312d02014-06-28 12:21:37 +00002861 default: vassert(0);
2862 }
sewardj31b5a952014-06-26 07:41:14 +00002863 addInstr(env, ARM64Instr_VQfromXX(vSrcL, iSrcL, iSrcL));
2864 addInstr(env, ARM64Instr_VQfromXX(vSrcR, iSrcR, iSrcR));
sewardj6f312d02014-06-28 12:21:37 +00002865 addInstr(env, ARM64Instr_VBinV(op, dst, vSrcL, vSrcR));
sewardj31b5a952014-06-26 07:41:14 +00002866 return dst;
2867 }
2868
sewardjbbcf1882014-01-12 12:49:10 +00002869 /* ... */
2870 default:
2871 break;
2872 } /* switch on the binop */
2873 } /* if (e->tag == Iex_Binop) */
2874
sewardj606c4ba2014-01-26 19:11:14 +00002875 if (e->tag == Iex_Triop) {
2876 IRTriop* triop = e->Iex.Triop.details;
2877 ARM64VecBinOp vecbop = ARM64vecb_INVALID;
2878 switch (triop->op) {
2879 case Iop_Add64Fx2: vecbop = ARM64vecb_FADD64x2; break;
2880 case Iop_Sub64Fx2: vecbop = ARM64vecb_FSUB64x2; break;
2881 case Iop_Mul64Fx2: vecbop = ARM64vecb_FMUL64x2; break;
2882 case Iop_Div64Fx2: vecbop = ARM64vecb_FDIV64x2; break;
sewardjecde6972014-02-05 11:01:19 +00002883 case Iop_Add32Fx4: vecbop = ARM64vecb_FADD32x4; break;
2884 case Iop_Sub32Fx4: vecbop = ARM64vecb_FSUB32x4; break;
2885 case Iop_Mul32Fx4: vecbop = ARM64vecb_FMUL32x4; break;
2886 case Iop_Div32Fx4: vecbop = ARM64vecb_FDIV32x4; break;
sewardj606c4ba2014-01-26 19:11:14 +00002887 default: break;
2888 }
2889 if (vecbop != ARM64vecb_INVALID) {
2890 HReg argL = iselV128Expr(env, triop->arg2);
2891 HReg argR = iselV128Expr(env, triop->arg3);
2892 HReg dst = newVRegV(env);
2893 set_FPCR_rounding_mode(env, triop->arg1);
2894 addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR));
2895 return dst;
2896 }
sewardj8def0492014-09-01 14:13:15 +00002897
2898 if (triop->op == Iop_SliceV128) {
2899 /* Note that, compared to ShlV128/ShrV128 just above, the shift
2900 amount here is in bytes, not bits. */
2901 IRExpr* argHi = triop->arg1;
2902 IRExpr* argLo = triop->arg2;
2903 IRExpr* argAmt = triop->arg3;
2904 if (argAmt->tag == Iex_Const && argAmt->Iex.Const.con->tag == Ico_U8) {
2905 UInt amt = argAmt->Iex.Const.con->Ico.U8;
2906 Bool amtOK = amt >= 1 && amt <= 15;
2907 /* We could also deal with amt==0 by copying argLO to
2908 the destination, but there's no need for that so far. */
2909 if (amtOK) {
2910 HReg srcHi = iselV128Expr(env, argHi);
2911 HReg srcLo = iselV128Expr(env, argLo);
2912 HReg dst = newVRegV(env);
2913 addInstr(env, ARM64Instr_VExtV(dst, srcLo, srcHi, amt));
2914 return dst;
2915 }
2916 }
2917 /* else fall out; this is unhandled */
2918 }
2919
2920 } /* if (e->tag == Iex_Triop) */
sewardj606c4ba2014-01-26 19:11:14 +00002921
sewardjbbcf1882014-01-12 12:49:10 +00002922 v128_expr_bad:
2923 ppIRExpr(e);
2924 vpanic("iselV128Expr_wrk");
2925}
2926
2927
2928/*---------------------------------------------------------*/
2929/*--- ISEL: Floating point expressions (64 bit) ---*/
2930/*---------------------------------------------------------*/
2931
2932/* Compute a 64-bit floating point value into a register, the identity
2933 of which is returned. As with iselIntExpr_R, the reg may be either
2934 real or virtual; in any case it must not be changed by subsequent
2935 code emitted by the caller. */
2936
2937static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2938{
2939 HReg r = iselDblExpr_wrk( env, e );
2940# if 0
2941 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2942# endif
2943 vassert(hregClass(r) == HRcFlt64);
2944 vassert(hregIsVirtual(r));
2945 return r;
2946}
2947
2948/* DO NOT CALL THIS DIRECTLY */
2949static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
2950{
2951 IRType ty = typeOfIRExpr(env->type_env,e);
2952 vassert(e);
2953 vassert(ty == Ity_F64);
2954
2955 if (e->tag == Iex_RdTmp) {
2956 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2957 }
2958
2959 if (e->tag == Iex_Const) {
2960 IRConst* con = e->Iex.Const.con;
2961 if (con->tag == Ico_F64i) {
2962 HReg src = newVRegI(env);
2963 HReg dst = newVRegD(env);
2964 addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i));
2965 addInstr(env, ARM64Instr_VDfromX(dst, src));
2966 return dst;
2967 }
sewardj1aff76b2014-11-20 10:14:06 +00002968 if (con->tag == Ico_F64) {
2969 HReg src = newVRegI(env);
2970 HReg dst = newVRegD(env);
2971 union { Double d64; ULong u64; } u;
2972 vassert(sizeof(u) == 8);
2973 u.d64 = con->Ico.F64;
2974 addInstr(env, ARM64Instr_Imm64(src, u.u64));
2975 addInstr(env, ARM64Instr_VDfromX(dst, src));
2976 return dst;
2977 }
sewardjbbcf1882014-01-12 12:49:10 +00002978 }
2979
2980 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2981 vassert(e->Iex.Load.ty == Ity_F64);
2982 HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
2983 HReg res = newVRegD(env);
2984 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0));
2985 return res;
2986 }
2987
2988 if (e->tag == Iex_Get) {
2989 Int offs = e->Iex.Get.offset;
2990 if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) {
2991 HReg rD = newVRegD(env);
2992 HReg rN = get_baseblock_register();
2993 addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs));
2994 return rD;
2995 }
2996 }
2997
2998 if (e->tag == Iex_Unop) {
2999 switch (e->Iex.Unop.op) {
sewardjbbcf1882014-01-12 12:49:10 +00003000 case Iop_NegF64: {
3001 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3002 HReg dst = newVRegD(env);
3003 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src));
3004 return dst;
3005 }
3006 case Iop_AbsF64: {
3007 HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3008 HReg dst = newVRegD(env);
3009 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src));
3010 return dst;
3011 }
3012 case Iop_F32toF64: {
3013 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3014 HReg dst = newVRegD(env);
3015 addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src));
3016 return dst;
3017 }
3018 case Iop_I32UtoF64:
3019 case Iop_I32StoF64: {
3020 /* Rounding mode is not involved here, since the
3021 conversion can always be done without loss of
3022 precision. */
3023 HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
3024 HReg dst = newVRegD(env);
3025 Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
3026 ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U;
3027 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src));
3028 return dst;
3029 }
3030 default:
3031 break;
3032 }
3033 }
3034
3035 if (e->tag == Iex_Binop) {
3036 switch (e->Iex.Binop.op) {
3037 case Iop_RoundF64toInt: {
3038 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3039 HReg dst = newVRegD(env);
3040 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3041 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_RINT, dst, src));
3042 return dst;
3043 }
3044 case Iop_SqrtF64: {
3045 HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3046 HReg dst = newVRegD(env);
3047 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3048 addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_SQRT, dst, src));
3049 return dst;
3050 }
3051 case Iop_I64StoF64:
3052 case Iop_I64UtoF64: {
3053 ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64
3054 ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U;
3055 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3056 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3057 HReg dstS = newVRegD(env);
3058 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3059 return dstS;
3060 }
3061 default:
3062 break;
3063 }
3064 }
3065
3066 if (e->tag == Iex_Triop) {
3067 IRTriop* triop = e->Iex.Triop.details;
3068 ARM64FpBinOp dblop = ARM64fpb_INVALID;
3069 switch (triop->op) {
3070 case Iop_DivF64: dblop = ARM64fpb_DIV; break;
3071 case Iop_MulF64: dblop = ARM64fpb_MUL; break;
3072 case Iop_SubF64: dblop = ARM64fpb_SUB; break;
3073 case Iop_AddF64: dblop = ARM64fpb_ADD; break;
3074 default: break;
3075 }
3076 if (dblop != ARM64fpb_INVALID) {
3077 HReg argL = iselDblExpr(env, triop->arg2);
3078 HReg argR = iselDblExpr(env, triop->arg3);
3079 HReg dst = newVRegD(env);
3080 set_FPCR_rounding_mode(env, triop->arg1);
3081 addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR));
3082 return dst;
3083 }
3084 }
3085
sewardje23ec112014-11-15 16:07:14 +00003086 if (e->tag == Iex_ITE) {
3087 /* ITE(ccexpr, iftrue, iffalse) */
3088 ARM64CondCode cc;
3089 HReg r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
3090 HReg r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
3091 HReg dst = newVRegD(env);
3092 cc = iselCondCode(env, e->Iex.ITE.cond);
3093 addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, True/*64-bit*/));
3094 return dst;
3095 }
3096
sewardjbbcf1882014-01-12 12:49:10 +00003097 ppIRExpr(e);
3098 vpanic("iselDblExpr_wrk");
3099}
3100
3101
3102/*---------------------------------------------------------*/
3103/*--- ISEL: Floating point expressions (32 bit) ---*/
3104/*---------------------------------------------------------*/
3105
3106/* Compute a 32-bit floating point value into a register, the identity
3107 of which is returned. As with iselIntExpr_R, the reg may be either
3108 real or virtual; in any case it must not be changed by subsequent
3109 code emitted by the caller. Values are generated into HRcFlt64
3110 registers despite the values themselves being Ity_F32s. */
3111
3112static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
3113{
3114 HReg r = iselFltExpr_wrk( env, e );
3115# if 0
3116 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3117# endif
3118 vassert(hregClass(r) == HRcFlt64);
3119 vassert(hregIsVirtual(r));
3120 return r;
3121}
3122
3123/* DO NOT CALL THIS DIRECTLY */
3124static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
3125{
3126 IRType ty = typeOfIRExpr(env->type_env,e);
3127 vassert(e);
3128 vassert(ty == Ity_F32);
3129
3130 if (e->tag == Iex_RdTmp) {
3131 return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3132 }
3133
3134 if (e->tag == Iex_Const) {
3135 /* This is something of a kludge. Since a 32 bit floating point
3136 zero is just .. all zeroes, just create a 64 bit zero word
3137 and transfer it. This avoids having to create a SfromW
3138 instruction for this specific case. */
3139 IRConst* con = e->Iex.Const.con;
3140 if (con->tag == Ico_F32i && con->Ico.F32i == 0) {
3141 HReg src = newVRegI(env);
3142 HReg dst = newVRegD(env);
3143 addInstr(env, ARM64Instr_Imm64(src, 0));
3144 addInstr(env, ARM64Instr_VDfromX(dst, src));
3145 return dst;
3146 }
sewardj1aff76b2014-11-20 10:14:06 +00003147 if (con->tag == Ico_F32) {
3148 HReg src = newVRegI(env);
3149 HReg dst = newVRegD(env);
3150 union { Float f32; UInt u32; } u;
3151 vassert(sizeof(u) == 4);
3152 u.f32 = con->Ico.F32;
3153 addInstr(env, ARM64Instr_Imm64(src, (ULong)u.u32));
3154 addInstr(env, ARM64Instr_VDfromX(dst, src));
3155 return dst;
3156 }
sewardjbbcf1882014-01-12 12:49:10 +00003157 }
3158
sewardj208a7762014-10-22 13:52:51 +00003159 if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3160 vassert(e->Iex.Load.ty == Ity_F32);
3161 HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
3162 HReg res = newVRegD(env);
3163 addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, res, addr, 0));
3164 return res;
3165 }
3166
sewardjbbcf1882014-01-12 12:49:10 +00003167 if (e->tag == Iex_Get) {
3168 Int offs = e->Iex.Get.offset;
3169 if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) {
3170 HReg rD = newVRegD(env);
3171 HReg rN = get_baseblock_register();
3172 addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs));
3173 return rD;
3174 }
3175 }
3176
3177 if (e->tag == Iex_Unop) {
3178 switch (e->Iex.Unop.op) {
sewardjbbcf1882014-01-12 12:49:10 +00003179 case Iop_NegF32: {
3180 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3181 HReg dst = newVRegD(env);
3182 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src));
3183 return dst;
3184 }
3185 case Iop_AbsF32: {
3186 HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3187 HReg dst = newVRegD(env);
3188 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src));
3189 return dst;
3190 }
3191 default:
3192 break;
3193 }
3194 }
3195
3196 if (e->tag == Iex_Binop) {
3197 switch (e->Iex.Binop.op) {
3198 case Iop_RoundF32toInt: {
3199 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
3200 HReg dst = newVRegD(env);
3201 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3202 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_RINT, dst, src));
3203 return dst;
3204 }
3205 case Iop_SqrtF32: {
3206 HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
3207 HReg dst = newVRegD(env);
3208 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3209 addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_SQRT, dst, src));
3210 return dst;
3211 }
3212 case Iop_F64toF32: {
3213 HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
3214 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3215 HReg dstS = newVRegD(env);
3216 addInstr(env, ARM64Instr_VCvtSD(False/*dToS*/, dstS, srcD));
3217 return dstS;
3218 }
sewardj1eaaec22014-03-07 22:52:19 +00003219 case Iop_I32UtoF32:
sewardjbbcf1882014-01-12 12:49:10 +00003220 case Iop_I32StoF32:
3221 case Iop_I64UtoF32:
3222 case Iop_I64StoF32: {
3223 ARM64CvtOp cvt_op = ARM64cvt_INVALID;
3224 switch (e->Iex.Binop.op) {
sewardj1eaaec22014-03-07 22:52:19 +00003225 case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break;
sewardjbbcf1882014-01-12 12:49:10 +00003226 case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break;
3227 case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break;
3228 case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break;
3229 default: vassert(0);
3230 }
3231 HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3232 set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3233 HReg dstS = newVRegD(env);
3234 addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3235 return dstS;
3236 }
3237 default:
3238 break;
3239 }
3240 }
3241
3242 if (e->tag == Iex_Triop) {
3243 IRTriop* triop = e->Iex.Triop.details;
3244 ARM64FpBinOp sglop = ARM64fpb_INVALID;
3245 switch (triop->op) {
3246 case Iop_DivF32: sglop = ARM64fpb_DIV; break;
3247 case Iop_MulF32: sglop = ARM64fpb_MUL; break;
3248 case Iop_SubF32: sglop = ARM64fpb_SUB; break;
3249 case Iop_AddF32: sglop = ARM64fpb_ADD; break;
3250 default: break;
3251 }
3252 if (sglop != ARM64fpb_INVALID) {
3253 HReg argL = iselFltExpr(env, triop->arg2);
3254 HReg argR = iselFltExpr(env, triop->arg3);
3255 HReg dst = newVRegD(env);
3256 set_FPCR_rounding_mode(env, triop->arg1);
3257 addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR));
3258 return dst;
3259 }
3260 }
3261
sewardje23ec112014-11-15 16:07:14 +00003262 if (e->tag == Iex_ITE) {
3263 /* ITE(ccexpr, iftrue, iffalse) */
3264 ARM64CondCode cc;
3265 HReg r1 = iselFltExpr(env, e->Iex.ITE.iftrue);
3266 HReg r0 = iselFltExpr(env, e->Iex.ITE.iffalse);
3267 HReg dst = newVRegD(env);
3268 cc = iselCondCode(env, e->Iex.ITE.cond);
3269 addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, False/*!64-bit*/));
3270 return dst;
3271 }
3272
sewardjbbcf1882014-01-12 12:49:10 +00003273 ppIRExpr(e);
3274 vpanic("iselFltExpr_wrk");
3275}
3276
3277
3278/*---------------------------------------------------------*/
sewardj12972182014-08-04 08:09:47 +00003279/*--- ISEL: Vector expressions (256 bit) ---*/
3280/*---------------------------------------------------------*/
3281
3282static void iselV256Expr ( /*OUT*/HReg* rHi, HReg* rLo,
3283 ISelEnv* env, IRExpr* e )
3284{
3285 iselV256Expr_wrk( rHi, rLo, env, e );
3286 vassert(hregClass(*rHi) == HRcVec128);
3287 vassert(hregClass(*rLo) == HRcVec128);
3288 vassert(hregIsVirtual(*rHi));
3289 vassert(hregIsVirtual(*rLo));
3290}
3291
3292/* DO NOT CALL THIS DIRECTLY */
3293static void iselV256Expr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
3294 ISelEnv* env, IRExpr* e )
3295{
3296 vassert(e);
3297 IRType ty = typeOfIRExpr(env->type_env,e);
3298 vassert(ty == Ity_V256);
3299
3300 /* read 256-bit IRTemp */
3301 if (e->tag == Iex_RdTmp) {
3302 lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3303 return;
3304 }
3305
3306 if (e->tag == Iex_Binop) {
3307 switch (e->Iex.Binop.op) {
sewardj3ce4dec2014-08-26 18:30:48 +00003308 case Iop_V128HLtoV256: {
3309 *rHi = iselV128Expr(env, e->Iex.Binop.arg1);
3310 *rLo = iselV128Expr(env, e->Iex.Binop.arg2);
3311 return;
3312 }
sewardj12972182014-08-04 08:09:47 +00003313 case Iop_QandSQsh64x2:
3314 case Iop_QandSQsh32x4:
3315 case Iop_QandSQsh16x8:
3316 case Iop_QandSQsh8x16:
3317 case Iop_QandUQsh64x2:
3318 case Iop_QandUQsh32x4:
3319 case Iop_QandUQsh16x8:
3320 case Iop_QandUQsh8x16:
3321 case Iop_QandSQRsh64x2:
3322 case Iop_QandSQRsh32x4:
3323 case Iop_QandSQRsh16x8:
3324 case Iop_QandSQRsh8x16:
3325 case Iop_QandUQRsh64x2:
3326 case Iop_QandUQRsh32x4:
3327 case Iop_QandUQRsh16x8:
3328 case Iop_QandUQRsh8x16:
3329 {
3330 HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
3331 HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
3332 HReg fpsr = newVRegI(env);
3333 HReg resHi = newVRegV(env);
3334 HReg resLo = newVRegV(env);
3335 ARM64VecBinOp op = ARM64vecb_INVALID;
3336 switch (e->Iex.Binop.op) {
3337 case Iop_QandSQsh64x2: op = ARM64vecb_SQSHL64x2; break;
3338 case Iop_QandSQsh32x4: op = ARM64vecb_SQSHL32x4; break;
3339 case Iop_QandSQsh16x8: op = ARM64vecb_SQSHL16x8; break;
3340 case Iop_QandSQsh8x16: op = ARM64vecb_SQSHL8x16; break;
3341 case Iop_QandUQsh64x2: op = ARM64vecb_UQSHL64x2; break;
3342 case Iop_QandUQsh32x4: op = ARM64vecb_UQSHL32x4; break;
3343 case Iop_QandUQsh16x8: op = ARM64vecb_UQSHL16x8; break;
3344 case Iop_QandUQsh8x16: op = ARM64vecb_UQSHL8x16; break;
3345 case Iop_QandSQRsh64x2: op = ARM64vecb_SQRSHL64x2; break;
3346 case Iop_QandSQRsh32x4: op = ARM64vecb_SQRSHL32x4; break;
3347 case Iop_QandSQRsh16x8: op = ARM64vecb_SQRSHL16x8; break;
3348 case Iop_QandSQRsh8x16: op = ARM64vecb_SQRSHL8x16; break;
3349 case Iop_QandUQRsh64x2: op = ARM64vecb_UQRSHL64x2; break;
3350 case Iop_QandUQRsh32x4: op = ARM64vecb_UQRSHL32x4; break;
3351 case Iop_QandUQRsh16x8: op = ARM64vecb_UQRSHL16x8; break;
3352 case Iop_QandUQRsh8x16: op = ARM64vecb_UQRSHL8x16; break;
3353 default: vassert(0);
3354 }
3355 /* Clear FPSR.Q, do the operation, and return both its result
sewardjecedd982014-08-11 14:02:47 +00003356 and the new value of FPSR.Q. We can simply zero out FPSR
3357 since all the other bits have no relevance in VEX generated
3358 code. */
sewardj12972182014-08-04 08:09:47 +00003359 addInstr(env, ARM64Instr_Imm64(fpsr, 0));
3360 addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
3361 addInstr(env, ARM64Instr_VBinV(op, resLo, argL, argR));
3362 addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
3363 addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
3364 ARM64sh_SHR));
3365 ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
3366 vassert(ril_one);
3367 addInstr(env, ARM64Instr_Logic(fpsr, fpsr, ril_one, ARM64lo_AND));
3368 /* Now we have: the main (shift) result in |resLo|, and the
3369 Q bit at the bottom of |fpsr|. */
3370 addInstr(env, ARM64Instr_VQfromX(resHi, fpsr));
3371 *rHi = resHi;
3372 *rLo = resLo;
3373 return;
3374 }
3375
3376 /* ... */
3377 default:
3378 break;
3379 } /* switch on the binop */
3380 } /* if (e->tag == Iex_Binop) */
3381
3382 ppIRExpr(e);
3383 vpanic("iselV256Expr_wrk");
3384}
3385
3386
3387/*---------------------------------------------------------*/
sewardjbbcf1882014-01-12 12:49:10 +00003388/*--- ISEL: Statements ---*/
3389/*---------------------------------------------------------*/
3390
3391static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3392{
3393 if (vex_traceflags & VEX_TRACE_VCODE) {
3394 vex_printf("\n-- ");
3395 ppIRStmt(stmt);
3396 vex_printf("\n");
3397 }
3398 switch (stmt->tag) {
3399
3400 /* --------- STORE --------- */
3401 /* little-endian write to memory */
3402 case Ist_Store: {
3403 IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3404 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3405 IREndness end = stmt->Ist.Store.end;
3406
3407 if (tya != Ity_I64 || end != Iend_LE)
3408 goto stmt_fail;
3409
3410 if (tyd == Ity_I64) {
3411 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3412 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3413 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
3414 return;
3415 }
3416 if (tyd == Ity_I32) {
3417 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3418 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3419 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
3420 return;
3421 }
3422 if (tyd == Ity_I16) {
3423 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3424 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3425 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
3426 return;
3427 }
3428 if (tyd == Ity_I8) {
3429 HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3430 ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3431 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
3432 return;
3433 }
3434 if (tyd == Ity_V128) {
3435 HReg qD = iselV128Expr(env, stmt->Ist.Store.data);
3436 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3437 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
3438 return;
3439 }
3440 if (tyd == Ity_F64) {
3441 HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
3442 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3443 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0));
3444 return;
3445 }
sewardjac0c92b2014-05-07 09:20:59 +00003446 if (tyd == Ity_F32) {
3447 HReg sD = iselFltExpr(env, stmt->Ist.Store.data);
3448 HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3449 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, addr, 0));
3450 return;
3451 }
sewardjbbcf1882014-01-12 12:49:10 +00003452 break;
3453 }
3454
sewardjbbcf1882014-01-12 12:49:10 +00003455 /* --------- PUT --------- */
3456 /* write guest state, fixed offset */
3457 case Ist_Put: {
3458 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3459 UInt offs = (UInt)stmt->Ist.Put.offset;
3460 if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) {
3461 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3462 ARM64AMode* am = mk_baseblock_64bit_access_amode(offs);
3463 addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
3464 return;
3465 }
3466 if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) {
3467 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3468 ARM64AMode* am = mk_baseblock_32bit_access_amode(offs);
3469 addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
3470 return;
3471 }
3472 if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) {
3473 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3474 ARM64AMode* am = mk_baseblock_16bit_access_amode(offs);
3475 addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
3476 return;
3477 }
3478 if (tyd == Ity_I8 && offs < (1<<12)) {
3479 HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3480 ARM64AMode* am = mk_baseblock_8bit_access_amode(offs);
3481 addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
3482 return;
3483 }
3484 if (tyd == Ity_V128 && offs < (1<<12)) {
3485 HReg qD = iselV128Expr(env, stmt->Ist.Put.data);
3486 HReg addr = mk_baseblock_128bit_access_addr(env, offs);
3487 addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
3488 return;
3489 }
3490 if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) {
3491 HReg dD = iselDblExpr(env, stmt->Ist.Put.data);
3492 HReg bbp = get_baseblock_register();
3493 addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs));
3494 return;
3495 }
3496 if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) {
3497 HReg dD = iselFltExpr(env, stmt->Ist.Put.data);
3498 HReg bbp = get_baseblock_register();
3499 addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, dD, bbp, offs));
3500 return;
3501 }
3502
sewardjbbcf1882014-01-12 12:49:10 +00003503 break;
3504 }
3505
3506 /* --------- TMP --------- */
3507 /* assign value to temporary */
3508 case Ist_WrTmp: {
3509 IRTemp tmp = stmt->Ist.WrTmp.tmp;
3510 IRType ty = typeOfIRTemp(env->type_env, tmp);
3511
3512 if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
3513 /* We could do a lot better here. But for the time being: */
3514 HReg dst = lookupIRTemp(env, tmp);
3515 HReg rD = iselIntExpr_R(env, stmt->Ist.WrTmp.data);
3516 addInstr(env, ARM64Instr_MovI(dst, rD));
3517 return;
3518 }
3519 if (ty == Ity_I1) {
3520 /* Here, we are generating a I1 value into a 64 bit register.
3521 Make sure the value in the register is only zero or one,
3522 but no other. This allows optimisation of the
3523 1Uto64(tmp:I1) case, by making it simply a copy of the
3524 register holding 'tmp'. The point being that the value in
3525 the register holding 'tmp' can only have been created
3526 here. LATER: that seems dangerous; safer to do 'tmp & 1'
3527 in that case. Also, could do this just with a single CINC
3528 insn. */
sewardjf21a6ca2014-03-08 13:08:17 +00003529 /* CLONE-01 */
sewardjbbcf1882014-01-12 12:49:10 +00003530 HReg zero = newVRegI(env);
3531 HReg one = newVRegI(env);
3532 HReg dst = lookupIRTemp(env, tmp);
3533 addInstr(env, ARM64Instr_Imm64(zero, 0));
3534 addInstr(env, ARM64Instr_Imm64(one, 1));
3535 ARM64CondCode cc = iselCondCode(env, stmt->Ist.WrTmp.data);
3536 addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
3537 return;
3538 }
3539 if (ty == Ity_F64) {
3540 HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
3541 HReg dst = lookupIRTemp(env, tmp);
3542 addInstr(env, ARM64Instr_VMov(8, dst, src));
3543 return;
3544 }
3545 if (ty == Ity_F32) {
3546 HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
3547 HReg dst = lookupIRTemp(env, tmp);
3548 addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
3549 return;
3550 }
3551 if (ty == Ity_V128) {
3552 HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data);
3553 HReg dst = lookupIRTemp(env, tmp);
3554 addInstr(env, ARM64Instr_VMov(16, dst, src));
3555 return;
3556 }
sewardj12972182014-08-04 08:09:47 +00003557 if (ty == Ity_V256) {
3558 HReg srcHi, srcLo, dstHi, dstLo;
3559 iselV256Expr(&srcHi,&srcLo, env, stmt->Ist.WrTmp.data);
3560 lookupIRTempPair( &dstHi, &dstLo, env, tmp);
3561 addInstr(env, ARM64Instr_VMov(16, dstHi, srcHi));
3562 addInstr(env, ARM64Instr_VMov(16, dstLo, srcLo));
3563 return;
3564 }
sewardjbbcf1882014-01-12 12:49:10 +00003565 break;
3566 }
3567
3568 /* --------- Call to DIRTY helper --------- */
3569 /* call complex ("dirty") helper function */
3570 case Ist_Dirty: {
3571 IRDirty* d = stmt->Ist.Dirty.details;
3572
3573 /* Figure out the return type, if any. */
3574 IRType retty = Ity_INVALID;
3575 if (d->tmp != IRTemp_INVALID)
3576 retty = typeOfIRTemp(env->type_env, d->tmp);
3577
3578 Bool retty_ok = False;
3579 switch (retty) {
3580 case Ity_INVALID: /* function doesn't return anything */
3581 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
3582 case Ity_V128:
3583 retty_ok = True; break;
3584 default:
3585 break;
3586 }
3587 if (!retty_ok)
3588 break; /* will go to stmt_fail: */
3589
3590 /* Marshal args, do the call, and set the return value to 0x555..555
3591 if this is a conditional call that returns a value and the
3592 call is skipped. */
3593 UInt addToSp = 0;
3594 RetLoc rloc = mk_RetLoc_INVALID();
3595 doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
3596 vassert(is_sane_RetLoc(rloc));
3597
3598 /* Now figure out what to do with the returned value, if any. */
3599 switch (retty) {
3600 case Ity_INVALID: {
3601 /* No return value. Nothing to do. */
3602 vassert(d->tmp == IRTemp_INVALID);
3603 vassert(rloc.pri == RLPri_None);
3604 vassert(addToSp == 0);
3605 return;
3606 }
3607 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
3608 vassert(rloc.pri == RLPri_Int);
3609 vassert(addToSp == 0);
3610 /* The returned value is in x0. Park it in the register
3611 associated with tmp. */
3612 HReg dst = lookupIRTemp(env, d->tmp);
3613 addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) );
3614 return;
3615 }
3616 case Ity_V128: {
3617 /* The returned value is on the stack, and *retloc tells
3618 us where. Fish it off the stack and then move the
3619 stack pointer upwards to clear it, as directed by
3620 doHelperCall. */
3621 vassert(rloc.pri == RLPri_V128SpRel);
3622 vassert(rloc.spOff < 256); // stay sane
3623 vassert(addToSp >= 16); // ditto
3624 vassert(addToSp < 256); // ditto
3625 HReg dst = lookupIRTemp(env, d->tmp);
3626 HReg tmp = newVRegI(env); // the address of the returned value
3627 addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP
3628 addInstr(env, ARM64Instr_Arith(tmp, tmp,
3629 ARM64RIA_I12((UShort)rloc.spOff, 0),
3630 True/*isAdd*/ ));
3631 addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp));
3632 addInstr(env, ARM64Instr_AddToSP(addToSp));
3633 return;
3634 }
3635 default:
3636 /*NOTREACHED*/
3637 vassert(0);
3638 }
3639 break;
3640 }
3641
sewardj7d009132014-02-20 17:43:38 +00003642 /* --------- Load Linked and Store Conditional --------- */
3643 case Ist_LLSC: {
3644 if (stmt->Ist.LLSC.storedata == NULL) {
3645 /* LL */
3646 IRTemp res = stmt->Ist.LLSC.result;
3647 IRType ty = typeOfIRTemp(env->type_env, res);
3648 if (ty == Ity_I64 || ty == Ity_I32
3649 || ty == Ity_I16 || ty == Ity_I8) {
3650 Int szB = 0;
3651 HReg r_dst = lookupIRTemp(env, res);
3652 HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
3653 switch (ty) {
3654 case Ity_I8: szB = 1; break;
3655 case Ity_I16: szB = 2; break;
3656 case Ity_I32: szB = 4; break;
3657 case Ity_I64: szB = 8; break;
3658 default: vassert(0);
3659 }
3660 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
3661 addInstr(env, ARM64Instr_LdrEX(szB));
3662 addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2()));
3663 return;
3664 }
3665 goto stmt_fail;
3666 } else {
3667 /* SC */
3668 IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
3669 if (tyd == Ity_I64 || tyd == Ity_I32
3670 || tyd == Ity_I16 || tyd == Ity_I8) {
3671 Int szB = 0;
3672 HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
3673 HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
3674 switch (tyd) {
3675 case Ity_I8: szB = 1; break;
3676 case Ity_I16: szB = 2; break;
3677 case Ity_I32: szB = 4; break;
3678 case Ity_I64: szB = 8; break;
3679 default: vassert(0);
3680 }
3681 addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD));
3682 addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
3683 addInstr(env, ARM64Instr_StrEX(szB));
3684 } else {
3685 goto stmt_fail;
3686 }
3687 /* now r0 is 1 if failed, 0 if success. Change to IR
3688 conventions (0 is fail, 1 is success). Also transfer
3689 result to r_res. */
3690 IRTemp res = stmt->Ist.LLSC.result;
3691 IRType ty = typeOfIRTemp(env->type_env, res);
3692 HReg r_res = lookupIRTemp(env, res);
3693 ARM64RIL* one = mb_mkARM64RIL_I(1);
3694 vassert(ty == Ity_I1);
3695 vassert(one);
3696 addInstr(env, ARM64Instr_Logic(r_res, hregARM64_X0(), one,
3697 ARM64lo_XOR));
3698 /* And be conservative -- mask off all but the lowest bit. */
3699 addInstr(env, ARM64Instr_Logic(r_res, r_res, one,
3700 ARM64lo_AND));
3701 return;
3702 }
3703 break;
3704 }
3705
3706 /* --------- MEM FENCE --------- */
3707 case Ist_MBE:
3708 switch (stmt->Ist.MBE.event) {
3709 case Imbe_Fence:
3710 addInstr(env, ARM64Instr_MFence());
3711 return;
sewardj7d009132014-02-20 17:43:38 +00003712 default:
3713 break;
3714 }
3715 break;
sewardjbbcf1882014-01-12 12:49:10 +00003716
3717 /* --------- INSTR MARK --------- */
3718 /* Doesn't generate any executable code ... */
3719 case Ist_IMark:
3720 return;
3721
3722 /* --------- NO-OP --------- */
3723 case Ist_NoOp:
3724 return;
3725
3726 /* --------- EXIT --------- */
3727 case Ist_Exit: {
3728 if (stmt->Ist.Exit.dst->tag != Ico_U64)
3729 vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value");
3730
3731 ARM64CondCode cc
3732 = iselCondCode(env, stmt->Ist.Exit.guard);
3733 ARM64AMode* amPC
3734 = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP);
3735
sewardjbbcf1882014-01-12 12:49:10 +00003736 /* Case: boring transfer to known address */
3737 if (stmt->Ist.Exit.jk == Ijk_Boring
3738 /*ATC || stmt->Ist.Exit.jk == Ijk_Call */
3739 /*ATC || stmt->Ist.Exit.jk == Ijk_Ret */ ) {
3740 if (env->chainingAllowed) {
3741 /* .. almost always true .. */
3742 /* Skip the event check at the dst if this is a forwards
3743 edge. */
3744 Bool toFastEP
3745 = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
3746 if (0) vex_printf("%s", toFastEP ? "Y" : ",");
3747 addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
3748 amPC, cc, toFastEP));
3749 } else {
3750 /* .. very occasionally .. */
3751 /* We can't use chaining, so ask for an assisted transfer,
3752 as that's the only alternative that is allowable. */
3753 HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
3754 addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring));
3755 }
3756 return;
3757 }
3758
sewardjbbcf1882014-01-12 12:49:10 +00003759 /* Do we ever expect to see any other kind? */
3760 goto stmt_fail;
3761 }
3762
3763 default: break;
3764 }
3765 stmt_fail:
3766 ppIRStmt(stmt);
3767 vpanic("iselStmt");
3768}
3769
3770
3771/*---------------------------------------------------------*/
3772/*--- ISEL: Basic block terminators (Nexts) ---*/
3773/*---------------------------------------------------------*/
3774
3775static void iselNext ( ISelEnv* env,
3776 IRExpr* next, IRJumpKind jk, Int offsIP )
3777{
3778 if (vex_traceflags & VEX_TRACE_VCODE) {
3779 vex_printf( "\n-- PUT(%d) = ", offsIP);
3780 ppIRExpr( next );
3781 vex_printf( "; exit-");
3782 ppIRJumpKind(jk);
3783 vex_printf( "\n");
3784 }
3785
3786 /* Case: boring transfer to known address */
3787 if (next->tag == Iex_Const) {
3788 IRConst* cdst = next->Iex.Const.con;
3789 vassert(cdst->tag == Ico_U64);
3790 if (jk == Ijk_Boring || jk == Ijk_Call) {
3791 /* Boring transfer to known address */
3792 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
3793 if (env->chainingAllowed) {
3794 /* .. almost always true .. */
3795 /* Skip the event check at the dst if this is a forwards
3796 edge. */
3797 Bool toFastEP
3798 = ((Addr64)cdst->Ico.U64) > env->max_ga;
3799 if (0) vex_printf("%s", toFastEP ? "X" : ".");
3800 addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64,
3801 amPC, ARM64cc_AL,
3802 toFastEP));
3803 } else {
3804 /* .. very occasionally .. */
3805 /* We can't use chaining, so ask for an assisted transfer,
3806 as that's the only alternative that is allowable. */
3807 HReg r = iselIntExpr_R(env, next);
3808 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
3809 Ijk_Boring));
3810 }
3811 return;
3812 }
3813 }
3814
3815 /* Case: call/return (==boring) transfer to any address */
3816 switch (jk) {
3817 case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
3818 HReg r = iselIntExpr_R(env, next);
3819 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
3820 if (env->chainingAllowed) {
3821 addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL));
3822 } else {
3823 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
3824 Ijk_Boring));
3825 }
3826 return;
3827 }
3828 default:
3829 break;
3830 }
3831
3832 /* Case: assisted transfer to arbitrary address */
3833 switch (jk) {
3834 /* Keep this list in sync with that for Ist_Exit above */
3835 case Ijk_ClientReq:
3836 case Ijk_NoDecode:
sewardj99c1f812014-03-09 09:41:56 +00003837 case Ijk_NoRedir:
sewardjbbcf1882014-01-12 12:49:10 +00003838 case Ijk_Sys_syscall:
sewardj05f5e012014-05-04 10:52:11 +00003839 case Ijk_InvalICache:
sewardj65902992014-05-03 21:20:56 +00003840 case Ijk_FlushDCache:
sewardj39b51682014-11-25 12:17:53 +00003841 case Ijk_SigTRAP:
sewardjbbcf1882014-01-12 12:49:10 +00003842 {
3843 HReg r = iselIntExpr_R(env, next);
3844 ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
3845 addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk));
3846 return;
3847 }
3848 default:
3849 break;
3850 }
3851
3852 vex_printf( "\n-- PUT(%d) = ", offsIP);
3853 ppIRExpr( next );
3854 vex_printf( "; exit-");
3855 ppIRJumpKind(jk);
3856 vex_printf( "\n");
3857 vassert(0); // are we expecting any other kind?
3858}
3859
3860
3861/*---------------------------------------------------------*/
3862/*--- Insn selector top-level ---*/
3863/*---------------------------------------------------------*/
3864
3865/* Translate an entire SB to arm64 code. */
3866
3867HInstrArray* iselSB_ARM64 ( IRSB* bb,
3868 VexArch arch_host,
floriand8c64e02014-10-08 08:54:44 +00003869 const VexArchInfo* archinfo_host,
3870 const VexAbiInfo* vbi/*UNUSED*/,
sewardjbbcf1882014-01-12 12:49:10 +00003871 Int offs_Host_EvC_Counter,
3872 Int offs_Host_EvC_FailAddr,
3873 Bool chainingAllowed,
3874 Bool addProfInc,
3875 Addr64 max_ga )
3876{
3877 Int i, j;
3878 HReg hreg, hregHI;
3879 ISelEnv* env;
3880 UInt hwcaps_host = archinfo_host->hwcaps;
3881 ARM64AMode *amCounter, *amFailAddr;
3882
3883 /* sanity ... */
3884 vassert(arch_host == VexArchARM64);
3885
sewardj9b769162014-07-24 12:42:03 +00003886 /* Check that the host's endianness is as expected. */
3887 vassert(archinfo_host->endness == VexEndnessLE);
3888
sewardjbbcf1882014-01-12 12:49:10 +00003889 /* guard against unexpected space regressions */
3890 vassert(sizeof(ARM64Instr) <= 32);
3891
3892 /* Make up an initial environment to use. */
3893 env = LibVEX_Alloc(sizeof(ISelEnv));
3894 env->vreg_ctr = 0;
3895
3896 /* Set up output code array. */
3897 env->code = newHInstrArray();
3898
3899 /* Copy BB's type env. */
3900 env->type_env = bb->tyenv;
3901
3902 /* Make up an IRTemp -> virtual HReg mapping. This doesn't
3903 change as we go along. */
3904 env->n_vregmap = bb->tyenv->types_used;
3905 env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
3906 env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
3907
3908 /* and finally ... */
3909 env->chainingAllowed = chainingAllowed;
3910 env->hwcaps = hwcaps_host;
3911 env->previous_rm = NULL;
3912 env->max_ga = max_ga;
3913
3914 /* For each IR temporary, allocate a suitably-kinded virtual
3915 register. */
3916 j = 0;
3917 for (i = 0; i < env->n_vregmap; i++) {
3918 hregHI = hreg = INVALID_HREG;
3919 switch (bb->tyenv->types[i]) {
3920 case Ity_I1:
3921 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
3922 hreg = mkHReg(j++, HRcInt64, True);
3923 break;
3924 case Ity_I128:
3925 hreg = mkHReg(j++, HRcInt64, True);
3926 hregHI = mkHReg(j++, HRcInt64, True);
3927 break;
3928 case Ity_F32: // we'll use HRcFlt64 regs for F32 too
3929 case Ity_F64:
3930 hreg = mkHReg(j++, HRcFlt64, True);
3931 break;
3932 case Ity_V128:
3933 hreg = mkHReg(j++, HRcVec128, True);
3934 break;
sewardj12972182014-08-04 08:09:47 +00003935 case Ity_V256:
3936 hreg = mkHReg(j++, HRcVec128, True);
3937 hregHI = mkHReg(j++, HRcVec128, True);
3938 break;
sewardjbbcf1882014-01-12 12:49:10 +00003939 default:
3940 ppIRType(bb->tyenv->types[i]);
3941 vpanic("iselBB(arm64): IRTemp type");
3942 }
3943 env->vregmap[i] = hreg;
3944 env->vregmapHI[i] = hregHI;
3945 }
3946 env->vreg_ctr = j;
3947
3948 /* The very first instruction must be an event check. */
3949 amCounter = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter);
3950 amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr);
3951 addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr));
3952
3953 /* Possibly a block counter increment (for profiling). At this
3954 point we don't know the address of the counter, so just pretend
3955 it is zero. It will have to be patched later, but before this
3956 translation is used, by a call to LibVEX_patchProfCtr. */
3957 if (addProfInc) {
sewardj0ad37a92014-08-29 21:58:03 +00003958 addInstr(env, ARM64Instr_ProfInc());
sewardjbbcf1882014-01-12 12:49:10 +00003959 }
3960
3961 /* Ok, finally we can iterate over the statements. */
3962 for (i = 0; i < bb->stmts_used; i++)
3963 iselStmt(env, bb->stmts[i]);
3964
3965 iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
3966
3967 /* record the number of vregs we used. */
3968 env->code->n_vregs = env->vreg_ctr;
3969 return env->code;
3970}
3971
3972
3973/*---------------------------------------------------------------*/
3974/*--- end host_arm64_isel.c ---*/
3975/*---------------------------------------------------------------*/