Initial import of valgrind 3.6.0.
diff --git a/VEX/priv/guest_amd64_defs.h b/VEX/priv/guest_amd64_defs.h
new file mode 100644
index 0000000..42451fa
--- /dev/null
+++ b/VEX/priv/guest_amd64_defs.h
@@ -0,0 +1,480 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin guest_amd64_defs.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+/* Only to be used within the guest-amd64 directory. */
+
+#ifndef __VEX_GUEST_AMD64_DEFS_H
+#define __VEX_GUEST_AMD64_DEFS_H
+
+
+/*---------------------------------------------------------*/
+/*--- amd64 to IR conversion ---*/
+/*---------------------------------------------------------*/
+
+/* Convert one amd64 insn to IR. See the type DisOneInstrFn in
+ bb_to_IR.h. */
+extern
+DisResult disInstr_AMD64 ( IRSB* irbb,
+ Bool put_IP,
+ Bool (*resteerOkFn) ( void*, Addr64 ),
+ Bool resteerCisOk,
+ void* callback_opaque,
+ UChar* guest_code,
+ Long delta,
+ Addr64 guest_IP,
+ VexArch guest_arch,
+ VexArchInfo* archinfo,
+ VexAbiInfo* abiinfo,
+ Bool host_bigendian );
+
+/* Used by the optimiser to specialise calls to helpers. */
+extern
+IRExpr* guest_amd64_spechelper ( HChar* function_name,
+ IRExpr** args,
+ IRStmt** precedingStmts,
+ Int n_precedingStmts );
+
+/* Describes to the optimiser which part of the guest state require
+ precise memory exceptions. This is logically part of the guest
+ state description. */
+extern
+Bool guest_amd64_state_requires_precise_mem_exns ( Int, Int );
+
+extern
+VexGuestLayout amd64guest_layout;
+
+
+/*---------------------------------------------------------*/
+/*--- amd64 guest helpers ---*/
+/*---------------------------------------------------------*/
+
+/* --- CLEAN HELPERS --- */
+
+extern ULong amd64g_calculate_rflags_all (
+ ULong cc_op,
+ ULong cc_dep1, ULong cc_dep2, ULong cc_ndep
+ );
+
+extern ULong amd64g_calculate_rflags_c (
+ ULong cc_op,
+ ULong cc_dep1, ULong cc_dep2, ULong cc_ndep
+ );
+
+extern ULong amd64g_calculate_condition (
+ ULong/*AMD64Condcode*/ cond,
+ ULong cc_op,
+ ULong cc_dep1, ULong cc_dep2, ULong cc_ndep
+ );
+
+extern ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl );
+
+extern ULong amd64g_calculate_RCR (
+ ULong arg, ULong rot_amt, ULong rflags_in, Long sz
+ );
+
+extern ULong amd64g_calculate_RCL (
+ ULong arg, ULong rot_amt, ULong rflags_in, Long sz
+ );
+
+extern ULong amd64g_calculate_pclmul(ULong s1, ULong s2, ULong which);
+
+extern ULong amd64g_check_fldcw ( ULong fpucw );
+
+extern ULong amd64g_create_fpucw ( ULong fpround );
+
+extern ULong amd64g_check_ldmxcsr ( ULong mxcsr );
+
+extern ULong amd64g_create_mxcsr ( ULong sseround );
+
+extern VexEmWarn amd64g_dirtyhelper_FLDENV ( VexGuestAMD64State*, HWord );
+
+extern void amd64g_dirtyhelper_FSTENV ( VexGuestAMD64State*, HWord );
+
+/* Translate a guest virtual_addr into a guest linear address by
+ consulting the supplied LDT/GDT structures. Their representation
+ must be as specified in pub/libvex_guest_amd64.h. To indicate a
+ translation failure, 1<<32 is returned. On success, the lower 32
+ bits of the returned result indicate the linear address.
+*/
+//extern
+//ULong amd64g_use_seg_selector ( HWord ldt, HWord gdt,
+// UInt seg_selector, UInt virtual_addr );
+
+extern ULong amd64g_calculate_mmx_pmaddwd ( ULong, ULong );
+extern ULong amd64g_calculate_mmx_psadbw ( ULong, ULong );
+extern ULong amd64g_calculate_mmx_pmovmskb ( ULong );
+extern ULong amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo );
+
+
+/* --- DIRTY HELPERS --- */
+
+extern ULong amd64g_dirtyhelper_loadF80le ( ULong/*addr*/ );
+
+extern void amd64g_dirtyhelper_storeF80le ( ULong/*addr*/, ULong/*data*/ );
+
+extern void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st );
+extern void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st );
+extern void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st );
+
+extern void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* );
+
+extern void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State*, HWord );
+
+extern ULong amd64g_dirtyhelper_RDTSC ( void );
+
+extern ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ );
+extern void amd64g_dirtyhelper_OUT ( ULong portno, ULong data,
+ ULong sz/*1,2 or 4*/ );
+
+extern void amd64g_dirtyhelper_SxDT ( void* address,
+ ULong op /* 0 or 1 */ );
+
+/* Helps with PCMP{I,E}STR{I,M}.
+
+ CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really,
+ actually it could be a clean helper, but for the fact that we can't
+ pass by value 2 x V128 to a clean helper, nor have one returned.)
+ Reads guest state, writes to guest state for the xSTRM cases, no
+ accesses of memory, is a pure function.
+
+ opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so
+ the callee knows which I/E and I/M variant it is dealing with and
+ what the specific operation is. 4th byte of opcode is in the range
+ 0x60 to 0x63:
+ istri 66 0F 3A 63
+ istrm 66 0F 3A 62
+ estri 66 0F 3A 61
+ estrm 66 0F 3A 60
+
+ gstOffL and gstOffR are the guest state offsets for the two XMM
+ register inputs. We never have to deal with the memory case since
+ that is handled by pre-loading the relevant value into the fake
+ XMM16 register.
+
+ For ESTRx variants, edxIN and eaxIN hold the values of those two
+ registers.
+
+ In all cases, the bottom 16 bits of the result contain the new
+ OSZACP %rflags values. For xSTRI variants, bits[31:16] of the
+ result hold the new %ecx value. For xSTRM variants, the helper
+ writes the result directly to the guest XMM0.
+
+ Declarable side effects: in all cases, reads guest state at
+ [gstOffL, +16) and [gstOffR, +16). For xSTRM variants, also writes
+ guest_XMM0.
+
+ Is expected to be called with opc_and_imm combinations which have
+ actually been validated, and will assert if otherwise. The front
+ end should ensure we're only called with verified values.
+*/
+extern ULong amd64g_dirtyhelper_PCMPxSTRx (
+ VexGuestAMD64State*,
+ HWord opc4_and_imm,
+ HWord gstOffL, HWord gstOffR,
+ HWord edxIN, HWord eaxIN
+ );
+
+
+//extern void amd64g_dirtyhelper_CPUID_sse0 ( VexGuestAMD64State* );
+//extern void amd64g_dirtyhelper_CPUID_sse1 ( VexGuestAMD64State* );
+//extern void amd64g_dirtyhelper_CPUID_sse2 ( VexGuestAMD64State* );
+
+//extern void amd64g_dirtyhelper_FSAVE ( VexGuestAMD64State*, HWord );
+
+//extern VexEmWarn
+// amd64g_dirtyhelper_FRSTOR ( VexGuestAMD64State*, HWord );
+
+//extern void amd64g_dirtyhelper_FSTENV ( VexGuestAMD64State*, HWord );
+
+//extern VexEmWarn
+// amd64g_dirtyhelper_FLDENV ( VexGuestAMD64State*, HWord );
+
+
+
+/*---------------------------------------------------------*/
+/*--- Condition code stuff ---*/
+/*---------------------------------------------------------*/
+
+/* rflags masks */
+#define AMD64G_CC_SHIFT_O 11
+#define AMD64G_CC_SHIFT_S 7
+#define AMD64G_CC_SHIFT_Z 6
+#define AMD64G_CC_SHIFT_A 4
+#define AMD64G_CC_SHIFT_C 0
+#define AMD64G_CC_SHIFT_P 2
+
+#define AMD64G_CC_MASK_O (1ULL << AMD64G_CC_SHIFT_O)
+#define AMD64G_CC_MASK_S (1ULL << AMD64G_CC_SHIFT_S)
+#define AMD64G_CC_MASK_Z (1ULL << AMD64G_CC_SHIFT_Z)
+#define AMD64G_CC_MASK_A (1ULL << AMD64G_CC_SHIFT_A)
+#define AMD64G_CC_MASK_C (1ULL << AMD64G_CC_SHIFT_C)
+#define AMD64G_CC_MASK_P (1ULL << AMD64G_CC_SHIFT_P)
+
+/* FPU flag masks */
+#define AMD64G_FC_SHIFT_C3 14
+#define AMD64G_FC_SHIFT_C2 10
+#define AMD64G_FC_SHIFT_C1 9
+#define AMD64G_FC_SHIFT_C0 8
+
+#define AMD64G_FC_MASK_C3 (1ULL << AMD64G_FC_SHIFT_C3)
+#define AMD64G_FC_MASK_C2 (1ULL << AMD64G_FC_SHIFT_C2)
+#define AMD64G_FC_MASK_C1 (1ULL << AMD64G_FC_SHIFT_C1)
+#define AMD64G_FC_MASK_C0 (1ULL << AMD64G_FC_SHIFT_C0)
+
+
+/* %RFLAGS thunk descriptors. A four-word thunk is used to record
+ details of the most recent flag-setting operation, so the flags can
+ be computed later if needed. It is possible to do this a little
+ more efficiently using a 3-word thunk, but that makes it impossible
+ to describe the flag data dependencies sufficiently accurately for
+ Memcheck. Hence 4 words are used, with minimal loss of efficiency.
+
+ The four words are:
+
+ CC_OP, which describes the operation.
+
+ CC_DEP1 and CC_DEP2. These are arguments to the operation.
+ We want Memcheck to believe that the resulting flags are
+ data-dependent on both CC_DEP1 and CC_DEP2, hence the
+ name DEP.
+
+ CC_NDEP. This is a 3rd argument to the operation which is
+ sometimes needed. We arrange things so that Memcheck does
+ not believe the resulting flags are data-dependent on CC_NDEP
+ ("not dependent").
+
+ To make Memcheck believe that (the definedness of) the encoded
+ flags depends only on (the definedness of) CC_DEP1 and CC_DEP2
+ requires two things:
+
+ (1) In the guest state layout info (amd64guest_layout), CC_OP and
+ CC_NDEP are marked as always defined.
+
+ (2) When passing the thunk components to an evaluation function
+ (calculate_condition, calculate_eflags, calculate_eflags_c) the
+ IRCallee's mcx_mask must be set so as to exclude from
+ consideration all passed args except CC_DEP1 and CC_DEP2.
+
+ Strictly speaking only (2) is necessary for correctness. However,
+ (1) helps efficiency in that since (2) means we never ask about the
+ definedness of CC_OP or CC_NDEP, we may as well not even bother to
+ track their definedness.
+
+ When building the thunk, it is always necessary to write words into
+ CC_DEP1 and CC_DEP2, even if those args are not used given the
+ CC_OP field (eg, CC_DEP2 is not used if CC_OP is CC_LOGIC1/2/4).
+ This is important because otherwise Memcheck could give false
+ positives as it does not understand the relationship between the
+ CC_OP field and CC_DEP1 and CC_DEP2, and so believes that the
+ definedness of the stored flags always depends on both CC_DEP1 and
+ CC_DEP2.
+
+ However, it is only necessary to set CC_NDEP when the CC_OP value
+ requires it, because Memcheck ignores CC_NDEP, and the evaluation
+ functions do understand the CC_OP fields and will only examine
+ CC_NDEP for suitable values of CC_OP.
+
+ A summary of the field usages is:
+
+ Operation DEP1 DEP2 NDEP
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ add/sub/mul first arg second arg unused
+
+ adc/sbb first arg (second arg)
+ XOR old_carry old_carry
+
+ and/or/xor result zero unused
+
+ inc/dec result zero old_carry
+
+ shl/shr/sar result subshifted- unused
+ result
+
+ rol/ror result zero old_flags
+
+ copy old_flags zero unused.
+
+
+ Therefore Memcheck will believe the following:
+
+ * add/sub/mul -- definedness of result flags depends on definedness
+ of both args.
+
+ * adc/sbb -- definedness of result flags depends on definedness of
+ both args and definedness of the old C flag. Because only two
+ DEP fields are available, the old C flag is XOR'd into the second
+ arg so that Memcheck sees the data dependency on it. That means
+ the NDEP field must contain a second copy of the old C flag
+ so that the evaluation functions can correctly recover the second
+ arg.
+
+ * and/or/xor are straightforward -- definedness of result flags
+ depends on definedness of result value.
+
+ * inc/dec -- definedness of result flags depends only on
+ definedness of result. This isn't really true -- it also depends
+ on the old C flag. However, we don't want Memcheck to see that,
+ and so the old C flag must be passed in NDEP and not in DEP2.
+ It's inconceivable that a compiler would generate code that puts
+ the C flag in an undefined state, then does an inc/dec, which
+ leaves C unchanged, and then makes a conditional jump/move based
+ on C. So our fiction seems a good approximation.
+
+ * shl/shr/sar -- straightforward, again, definedness of result
+ flags depends on definedness of result value. The subshifted
+ value (value shifted one less) is also needed, but its
+ definedness is the same as the definedness of the shifted value.
+
+ * rol/ror -- these only set O and C, and leave A Z C P alone.
+ However it seems prudent (as per inc/dec) to say the definedness
+ of all resulting flags depends on the definedness of the result,
+ hence the old flags must go in as NDEP and not DEP2.
+
+ * rcl/rcr are too difficult to do in-line, and so are done by a
+ helper function. They are not part of this scheme. The helper
+ function takes the value to be rotated, the rotate amount and the
+ old flags, and returns the new flags and the rotated value.
+ Since the helper's mcx_mask does not have any set bits, Memcheck
+ will lazily propagate undefinedness from any of the 3 args into
+ both results (flags and actual value).
+*/
+enum {
+ AMD64G_CC_OP_COPY=0, /* DEP1 = current flags, DEP2 = 0, NDEP = unused */
+ /* just copy DEP1 to output */
+
+ AMD64G_CC_OP_ADDB, /* 1 */
+ AMD64G_CC_OP_ADDW, /* 2 DEP1 = argL, DEP2 = argR, NDEP = unused */
+ AMD64G_CC_OP_ADDL, /* 3 */
+ AMD64G_CC_OP_ADDQ, /* 4 */
+
+ AMD64G_CC_OP_SUBB, /* 5 */
+ AMD64G_CC_OP_SUBW, /* 6 DEP1 = argL, DEP2 = argR, NDEP = unused */
+ AMD64G_CC_OP_SUBL, /* 7 */
+ AMD64G_CC_OP_SUBQ, /* 8 */
+
+ AMD64G_CC_OP_ADCB, /* 9 */
+ AMD64G_CC_OP_ADCW, /* 10 DEP1 = argL, DEP2 = argR ^ oldCarry, NDEP = oldCarry */
+ AMD64G_CC_OP_ADCL, /* 11 */
+ AMD64G_CC_OP_ADCQ, /* 12 */
+
+ AMD64G_CC_OP_SBBB, /* 13 */
+ AMD64G_CC_OP_SBBW, /* 14 DEP1 = argL, DEP2 = argR ^ oldCarry, NDEP = oldCarry */
+ AMD64G_CC_OP_SBBL, /* 15 */
+ AMD64G_CC_OP_SBBQ, /* 16 */
+
+ AMD64G_CC_OP_LOGICB, /* 17 */
+ AMD64G_CC_OP_LOGICW, /* 18 DEP1 = result, DEP2 = 0, NDEP = unused */
+ AMD64G_CC_OP_LOGICL, /* 19 */
+ AMD64G_CC_OP_LOGICQ, /* 20 */
+
+ AMD64G_CC_OP_INCB, /* 21 */
+ AMD64G_CC_OP_INCW, /* 22 DEP1 = result, DEP2 = 0, NDEP = oldCarry (0 or 1) */
+ AMD64G_CC_OP_INCL, /* 23 */
+ AMD64G_CC_OP_INCQ, /* 24 */
+
+ AMD64G_CC_OP_DECB, /* 25 */
+ AMD64G_CC_OP_DECW, /* 26 DEP1 = result, DEP2 = 0, NDEP = oldCarry (0 or 1) */
+ AMD64G_CC_OP_DECL, /* 27 */
+ AMD64G_CC_OP_DECQ, /* 28 */
+
+ AMD64G_CC_OP_SHLB, /* 29 DEP1 = res, DEP2 = res', NDEP = unused */
+ AMD64G_CC_OP_SHLW, /* 30 where res' is like res but shifted one bit less */
+ AMD64G_CC_OP_SHLL, /* 31 */
+ AMD64G_CC_OP_SHLQ, /* 32 */
+
+ AMD64G_CC_OP_SHRB, /* 33 DEP1 = res, DEP2 = res', NDEP = unused */
+ AMD64G_CC_OP_SHRW, /* 34 where res' is like res but shifted one bit less */
+ AMD64G_CC_OP_SHRL, /* 35 */
+ AMD64G_CC_OP_SHRQ, /* 36 */
+
+ AMD64G_CC_OP_ROLB, /* 37 */
+ AMD64G_CC_OP_ROLW, /* 38 DEP1 = res, DEP2 = 0, NDEP = old flags */
+ AMD64G_CC_OP_ROLL, /* 39 */
+ AMD64G_CC_OP_ROLQ, /* 40 */
+
+ AMD64G_CC_OP_RORB, /* 41 */
+ AMD64G_CC_OP_RORW, /* 42 DEP1 = res, DEP2 = 0, NDEP = old flags */
+ AMD64G_CC_OP_RORL, /* 43 */
+ AMD64G_CC_OP_RORQ, /* 44 */
+
+ AMD64G_CC_OP_UMULB, /* 45 */
+ AMD64G_CC_OP_UMULW, /* 46 DEP1 = argL, DEP2 = argR, NDEP = unused */
+ AMD64G_CC_OP_UMULL, /* 47 */
+ AMD64G_CC_OP_UMULQ, /* 48 */
+
+ AMD64G_CC_OP_SMULB, /* 49 */
+ AMD64G_CC_OP_SMULW, /* 50 DEP1 = argL, DEP2 = argR, NDEP = unused */
+ AMD64G_CC_OP_SMULL, /* 51 */
+ AMD64G_CC_OP_SMULQ, /* 52 */
+
+ AMD64G_CC_OP_NUMBER
+};
+
+typedef
+ enum {
+ AMD64CondO = 0, /* overflow */
+ AMD64CondNO = 1, /* no overflow */
+
+ AMD64CondB = 2, /* below */
+ AMD64CondNB = 3, /* not below */
+
+ AMD64CondZ = 4, /* zero */
+ AMD64CondNZ = 5, /* not zero */
+
+ AMD64CondBE = 6, /* below or equal */
+ AMD64CondNBE = 7, /* not below or equal */
+
+ AMD64CondS = 8, /* negative */
+ AMD64CondNS = 9, /* not negative */
+
+ AMD64CondP = 10, /* parity even */
+ AMD64CondNP = 11, /* not parity even */
+
+ AMD64CondL = 12, /* jump less */
+ AMD64CondNL = 13, /* not less */
+
+ AMD64CondLE = 14, /* less or equal */
+ AMD64CondNLE = 15, /* not less or equal */
+
+ AMD64CondAlways = 16 /* HACK */
+ }
+ AMD64Condcode;
+
+#endif /* ndef __VEX_GUEST_AMD64_DEFS_H */
+
+/*---------------------------------------------------------------*/
+/*--- end guest_amd64_defs.h ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/guest_amd64_helpers.c b/VEX/priv/guest_amd64_helpers.c
new file mode 100644
index 0000000..a920ecd
--- /dev/null
+++ b/VEX/priv/guest_amd64_helpers.c
@@ -0,0 +1,2874 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin guest_amd64_helpers.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex_emwarn.h"
+#include "libvex_guest_amd64.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+
+#include "main_util.h"
+#include "guest_generic_bb_to_IR.h"
+#include "guest_amd64_defs.h"
+#include "guest_generic_x87.h"
+
+
+/* This file contains helper functions for amd64 guest code.
+ Calls to these functions are generated by the back end.
+ These calls are of course in the host machine code and
+ this file will be compiled to host machine code, so that
+ all makes sense.
+
+ Only change the signatures of these helper functions very
+ carefully. If you change the signature here, you'll have to change
+ the parameters passed to it in the IR calls constructed by
+ guest-amd64/toIR.c.
+
+ The convention used is that all functions called from generated
+ code are named amd64g_<something>, and any function whose name lacks
+ that prefix is not called from generated code. Note that some
+ LibVEX_* functions can however be called by VEX's client, but that
+ is not the same as calling them from VEX-generated code.
+*/
+
+
+/* Set to 1 to get detailed profiling info about use of the flag
+ machinery. */
+#define PROFILE_RFLAGS 0
+
+
+/*---------------------------------------------------------------*/
+/*--- %rflags run-time helpers. ---*/
+/*---------------------------------------------------------------*/
+
+/* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags
+ after imulq/mulq. */
+
+static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo )
+{
+ ULong u0, v0, w0;
+ Long u1, v1, w1, w2, t;
+ u0 = u & 0xFFFFFFFFULL;
+ u1 = u >> 32;
+ v0 = v & 0xFFFFFFFFULL;
+ v1 = v >> 32;
+ w0 = u0 * v0;
+ t = u1 * v0 + (w0 >> 32);
+ w1 = t & 0xFFFFFFFFULL;
+ w2 = t >> 32;
+ w1 = u0 * v1 + w1;
+ *rHi = u1 * v1 + w2 + (w1 >> 32);
+ *rLo = u * v;
+}
+
+static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo )
+{
+ ULong u0, v0, w0;
+ ULong u1, v1, w1,w2,t;
+ u0 = u & 0xFFFFFFFFULL;
+ u1 = u >> 32;
+ v0 = v & 0xFFFFFFFFULL;
+ v1 = v >> 32;
+ w0 = u0 * v0;
+ t = u1 * v0 + (w0 >> 32);
+ w1 = t & 0xFFFFFFFFULL;
+ w2 = t >> 32;
+ w1 = u0 * v1 + w1;
+ *rHi = u1 * v1 + w2 + (w1 >> 32);
+ *rLo = u * v;
+}
+
+
+static const UChar parity_table[256] = {
+ AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
+ 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
+ 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
+ AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
+ 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
+ AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
+ AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
+ 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
+ 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
+ AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
+ AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
+ 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
+ AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
+ 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
+ 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
+ AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
+ 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
+ AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
+ AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
+ 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
+ AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
+ 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
+ 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
+ AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
+ AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
+ 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
+ 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
+ AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
+ 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
+ AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
+ AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
+ 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
+};
+
+/* generalised left-shifter */
+static inline Long lshift ( Long x, Int n )
+{
+ if (n >= 0)
+ return x << n;
+ else
+ return x >> (-n);
+}
+
+/* identity on ULong */
+static inline ULong idULong ( ULong x )
+{
+ return x;
+}
+
+
+#define PREAMBLE(__data_bits) \
+ /* const */ ULong DATA_MASK \
+ = __data_bits==8 \
+ ? 0xFFULL \
+ : (__data_bits==16 \
+ ? 0xFFFFULL \
+ : (__data_bits==32 \
+ ? 0xFFFFFFFFULL \
+ : 0xFFFFFFFFFFFFFFFFULL)); \
+ /* const */ ULong SIGN_MASK = 1ULL << (__data_bits - 1); \
+ /* const */ ULong CC_DEP1 = cc_dep1_formal; \
+ /* const */ ULong CC_DEP2 = cc_dep2_formal; \
+ /* const */ ULong CC_NDEP = cc_ndep_formal; \
+ /* Four bogus assignments, which hopefully gcc can */ \
+ /* optimise away, and which stop it complaining about */ \
+ /* unused variables. */ \
+ SIGN_MASK = SIGN_MASK; \
+ DATA_MASK = DATA_MASK; \
+ CC_DEP2 = CC_DEP2; \
+ CC_NDEP = CC_NDEP;
+
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Long cf, pf, af, zf, sf, of; \
+ Long argL, argR, res; \
+ argL = CC_DEP1; \
+ argR = CC_DEP2; \
+ res = argL + argR; \
+ cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
+ pf = parity_table[(UChar)res]; \
+ af = (res ^ argL ^ argR) & 0x10; \
+ zf = ((DATA_UTYPE)res == 0) << 6; \
+ sf = lshift(res, 8 - DATA_BITS) & 0x80; \
+ of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
+ 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Long cf, pf, af, zf, sf, of; \
+ Long argL, argR, res; \
+ argL = CC_DEP1; \
+ argR = CC_DEP2; \
+ res = argL - argR; \
+ cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
+ pf = parity_table[(UChar)res]; \
+ af = (res ^ argL ^ argR) & 0x10; \
+ zf = ((DATA_UTYPE)res == 0) << 6; \
+ sf = lshift(res, 8 - DATA_BITS) & 0x80; \
+ of = lshift((argL ^ argR) & (argL ^ res), \
+ 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Long cf, pf, af, zf, sf, of; \
+ Long argL, argR, oldC, res; \
+ oldC = CC_NDEP & AMD64G_CC_MASK_C; \
+ argL = CC_DEP1; \
+ argR = CC_DEP2 ^ oldC; \
+ res = (argL + argR) + oldC; \
+ if (oldC) \
+ cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
+ else \
+ cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
+ pf = parity_table[(UChar)res]; \
+ af = (res ^ argL ^ argR) & 0x10; \
+ zf = ((DATA_UTYPE)res == 0) << 6; \
+ sf = lshift(res, 8 - DATA_BITS) & 0x80; \
+ of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
+ 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Long cf, pf, af, zf, sf, of; \
+ Long argL, argR, oldC, res; \
+ oldC = CC_NDEP & AMD64G_CC_MASK_C; \
+ argL = CC_DEP1; \
+ argR = CC_DEP2 ^ oldC; \
+ res = (argL - argR) - oldC; \
+ if (oldC) \
+ cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
+ else \
+ cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
+ pf = parity_table[(UChar)res]; \
+ af = (res ^ argL ^ argR) & 0x10; \
+ zf = ((DATA_UTYPE)res == 0) << 6; \
+ sf = lshift(res, 8 - DATA_BITS) & 0x80; \
+ of = lshift((argL ^ argR) & (argL ^ res), \
+ 12 - DATA_BITS) & AMD64G_CC_MASK_O; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Long cf, pf, af, zf, sf, of; \
+ cf = 0; \
+ pf = parity_table[(UChar)CC_DEP1]; \
+ af = 0; \
+ zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
+ sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
+ of = 0; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Long cf, pf, af, zf, sf, of; \
+ Long argL, argR, res; \
+ res = CC_DEP1; \
+ argL = res - 1; \
+ argR = 1; \
+ cf = CC_NDEP & AMD64G_CC_MASK_C; \
+ pf = parity_table[(UChar)res]; \
+ af = (res ^ argL ^ argR) & 0x10; \
+ zf = ((DATA_UTYPE)res == 0) << 6; \
+ sf = lshift(res, 8 - DATA_BITS) & 0x80; \
+ of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Long cf, pf, af, zf, sf, of; \
+ Long argL, argR, res; \
+ res = CC_DEP1; \
+ argL = res + 1; \
+ argR = 1; \
+ cf = CC_NDEP & AMD64G_CC_MASK_C; \
+ pf = parity_table[(UChar)res]; \
+ af = (res ^ argL ^ argR) & 0x10; \
+ zf = ((DATA_UTYPE)res == 0) << 6; \
+ sf = lshift(res, 8 - DATA_BITS) & 0x80; \
+ of = ((res & DATA_MASK) \
+ == ((ULong)SIGN_MASK - 1)) << 11; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Long cf, pf, af, zf, sf, of; \
+ cf = (CC_DEP2 >> (DATA_BITS - 1)) & AMD64G_CC_MASK_C; \
+ pf = parity_table[(UChar)CC_DEP1]; \
+ af = 0; /* undefined */ \
+ zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
+ sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
+ /* of is defined if shift count == 1 */ \
+ of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
+ & AMD64G_CC_MASK_O; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Long cf, pf, af, zf, sf, of; \
+ cf = CC_DEP2 & 1; \
+ pf = parity_table[(UChar)CC_DEP1]; \
+ af = 0; /* undefined */ \
+ zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
+ sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
+ /* of is defined if shift count == 1 */ \
+ of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
+ & AMD64G_CC_MASK_O; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+/* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
+/* DEP1 = result, NDEP = old flags */
+#define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Long fl \
+ = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
+ | (AMD64G_CC_MASK_C & CC_DEP1) \
+ | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
+ 11-(DATA_BITS-1)) \
+ ^ lshift(CC_DEP1, 11))); \
+ return fl; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+/* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
+/* DEP1 = result, NDEP = old flags */
+#define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Long fl \
+ = (CC_NDEP & ~(AMD64G_CC_MASK_O | AMD64G_CC_MASK_C)) \
+ | (AMD64G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
+ | (AMD64G_CC_MASK_O & (lshift(CC_DEP1, \
+ 11-(DATA_BITS-1)) \
+ ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
+ return fl; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
+ DATA_U2TYPE, NARROWto2U) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Long cf, pf, af, zf, sf, of; \
+ DATA_UTYPE hi; \
+ DATA_UTYPE lo \
+ = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
+ * ((DATA_UTYPE)CC_DEP2) ); \
+ DATA_U2TYPE rr \
+ = NARROWto2U( \
+ ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
+ * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
+ hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
+ cf = (hi != 0); \
+ pf = parity_table[(UChar)lo]; \
+ af = 0; /* undefined */ \
+ zf = (lo == 0) << 6; \
+ sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
+ of = cf << 11; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
+ DATA_S2TYPE, NARROWto2S) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Long cf, pf, af, zf, sf, of; \
+ DATA_STYPE hi; \
+ DATA_STYPE lo \
+ = NARROWtoS( ((DATA_STYPE)CC_DEP1) \
+ * ((DATA_STYPE)CC_DEP2) ); \
+ DATA_S2TYPE rr \
+ = NARROWto2S( \
+ ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
+ * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
+ hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
+ cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
+ pf = parity_table[(UChar)lo]; \
+ af = 0; /* undefined */ \
+ zf = (lo == 0) << 6; \
+ sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
+ of = cf << 11; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_UMULQ \
+{ \
+ PREAMBLE(64); \
+ { Long cf, pf, af, zf, sf, of; \
+ ULong lo, hi; \
+ mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \
+ cf = (hi != 0); \
+ pf = parity_table[(UChar)lo]; \
+ af = 0; /* undefined */ \
+ zf = (lo == 0) << 6; \
+ sf = lshift(lo, 8 - 64) & 0x80; \
+ of = cf << 11; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_SMULQ \
+{ \
+ PREAMBLE(64); \
+ { Long cf, pf, af, zf, sf, of; \
+ Long lo, hi; \
+ mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \
+ cf = (hi != (lo >>/*s*/ (64-1))); \
+ pf = parity_table[(UChar)lo]; \
+ af = 0; /* undefined */ \
+ zf = (lo == 0) << 6; \
+ sf = lshift(lo, 8 - 64) & 0x80; \
+ of = cf << 11; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+
+#if PROFILE_RFLAGS
+
+static Bool initted = False;
+
+/* C flag, fast route */
+static UInt tabc_fast[AMD64G_CC_OP_NUMBER];
+/* C flag, slow route */
+static UInt tabc_slow[AMD64G_CC_OP_NUMBER];
+/* table for calculate_cond */
+static UInt tab_cond[AMD64G_CC_OP_NUMBER][16];
+/* total entry counts for calc_all, calc_c, calc_cond. */
+static UInt n_calc_all = 0;
+static UInt n_calc_c = 0;
+static UInt n_calc_cond = 0;
+
+#define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
+
+
+static void showCounts ( void )
+{
+ Int op, co;
+ Char ch;
+ vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
+ n_calc_all, n_calc_cond, n_calc_c);
+
+ vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
+ " S NS P NP L NL LE NLE\n");
+ vex_printf(" -----------------------------------------------------"
+ "----------------------------------------\n");
+ for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
+
+ ch = ' ';
+ if (op > 0 && (op-1) % 4 == 0)
+ ch = 'B';
+ if (op > 0 && (op-1) % 4 == 1)
+ ch = 'W';
+ if (op > 0 && (op-1) % 4 == 2)
+ ch = 'L';
+ if (op > 0 && (op-1) % 4 == 3)
+ ch = 'Q';
+
+ vex_printf("%2d%c: ", op, ch);
+ vex_printf("%6u ", tabc_slow[op]);
+ vex_printf("%6u ", tabc_fast[op]);
+ for (co = 0; co < 16; co++) {
+ Int n = tab_cond[op][co];
+ if (n >= 1000) {
+ vex_printf(" %3dK", n / 1000);
+ } else
+ if (n >= 0) {
+ vex_printf(" %3d ", n );
+ } else {
+ vex_printf(" ");
+ }
+ }
+ vex_printf("\n");
+ }
+ vex_printf("\n");
+}
+
+static void initCounts ( void )
+{
+ Int op, co;
+ initted = True;
+ for (op = 0; op < AMD64G_CC_OP_NUMBER; op++) {
+ tabc_fast[op] = tabc_slow[op] = 0;
+ for (co = 0; co < 16; co++)
+ tab_cond[op][co] = 0;
+ }
+}
+
+#endif /* PROFILE_RFLAGS */
+
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* Calculate all the 6 flags from the supplied thunk parameters.
+ Worker function, not directly called from generated code. */
+static
+ULong amd64g_calculate_rflags_all_WRK ( ULong cc_op,
+ ULong cc_dep1_formal,
+ ULong cc_dep2_formal,
+ ULong cc_ndep_formal )
+{
+ switch (cc_op) {
+ case AMD64G_CC_OP_COPY:
+ return cc_dep1_formal
+ & (AMD64G_CC_MASK_O | AMD64G_CC_MASK_S | AMD64G_CC_MASK_Z
+ | AMD64G_CC_MASK_A | AMD64G_CC_MASK_C | AMD64G_CC_MASK_P);
+
+ case AMD64G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar );
+ case AMD64G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort );
+ case AMD64G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt );
+ case AMD64G_CC_OP_ADDQ: ACTIONS_ADD( 64, ULong );
+
+ case AMD64G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar );
+ case AMD64G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort );
+ case AMD64G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt );
+ case AMD64G_CC_OP_ADCQ: ACTIONS_ADC( 64, ULong );
+
+ case AMD64G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar );
+ case AMD64G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort );
+ case AMD64G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt );
+ case AMD64G_CC_OP_SUBQ: ACTIONS_SUB( 64, ULong );
+
+ case AMD64G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar );
+ case AMD64G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort );
+ case AMD64G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt );
+ case AMD64G_CC_OP_SBBQ: ACTIONS_SBB( 64, ULong );
+
+ case AMD64G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar );
+ case AMD64G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
+ case AMD64G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt );
+ case AMD64G_CC_OP_LOGICQ: ACTIONS_LOGIC( 64, ULong );
+
+ case AMD64G_CC_OP_INCB: ACTIONS_INC( 8, UChar );
+ case AMD64G_CC_OP_INCW: ACTIONS_INC( 16, UShort );
+ case AMD64G_CC_OP_INCL: ACTIONS_INC( 32, UInt );
+ case AMD64G_CC_OP_INCQ: ACTIONS_INC( 64, ULong );
+
+ case AMD64G_CC_OP_DECB: ACTIONS_DEC( 8, UChar );
+ case AMD64G_CC_OP_DECW: ACTIONS_DEC( 16, UShort );
+ case AMD64G_CC_OP_DECL: ACTIONS_DEC( 32, UInt );
+ case AMD64G_CC_OP_DECQ: ACTIONS_DEC( 64, ULong );
+
+ case AMD64G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar );
+ case AMD64G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort );
+ case AMD64G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt );
+ case AMD64G_CC_OP_SHLQ: ACTIONS_SHL( 64, ULong );
+
+ case AMD64G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar );
+ case AMD64G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort );
+ case AMD64G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt );
+ case AMD64G_CC_OP_SHRQ: ACTIONS_SHR( 64, ULong );
+
+ case AMD64G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar );
+ case AMD64G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort );
+ case AMD64G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt );
+ case AMD64G_CC_OP_ROLQ: ACTIONS_ROL( 64, ULong );
+
+ case AMD64G_CC_OP_RORB: ACTIONS_ROR( 8, UChar );
+ case AMD64G_CC_OP_RORW: ACTIONS_ROR( 16, UShort );
+ case AMD64G_CC_OP_RORL: ACTIONS_ROR( 32, UInt );
+ case AMD64G_CC_OP_RORQ: ACTIONS_ROR( 64, ULong );
+
+ case AMD64G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar,
+ UShort, toUShort );
+ case AMD64G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort,
+ UInt, toUInt );
+ case AMD64G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt,
+ ULong, idULong );
+
+ case AMD64G_CC_OP_UMULQ: ACTIONS_UMULQ;
+
+ case AMD64G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar,
+ Short, toUShort );
+ case AMD64G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort,
+ Int, toUInt );
+ case AMD64G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt,
+ Long, idULong );
+
+ case AMD64G_CC_OP_SMULQ: ACTIONS_SMULQ;
+
+ default:
+ /* shouldn't really make these calls from generated code */
+ vex_printf("amd64g_calculate_rflags_all_WRK(AMD64)"
+ "( %llu, 0x%llx, 0x%llx, 0x%llx )\n",
+ cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
+ vpanic("amd64g_calculate_rflags_all_WRK(AMD64)");
+ }
+}
+
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* Calculate all the 6 flags from the supplied thunk parameters. */
+ULong amd64g_calculate_rflags_all ( ULong cc_op,
+ ULong cc_dep1,
+ ULong cc_dep2,
+ ULong cc_ndep )
+{
+# if PROFILE_RFLAGS
+ if (!initted) initCounts();
+ n_calc_all++;
+ if (SHOW_COUNTS_NOW) showCounts();
+# endif
+ return
+ amd64g_calculate_rflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
+}
+
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* Calculate just the carry flag from the supplied thunk parameters. */
+ULong amd64g_calculate_rflags_c ( ULong cc_op,
+ ULong cc_dep1,
+ ULong cc_dep2,
+ ULong cc_ndep )
+{
+# if PROFILE_RFLAGS
+ if (!initted) initCounts();
+ n_calc_c++;
+ tabc_fast[cc_op]++;
+ if (SHOW_COUNTS_NOW) showCounts();
+# endif
+
+ /* Fast-case some common ones. */
+ switch (cc_op) {
+ case AMD64G_CC_OP_COPY:
+ return (cc_dep1 >> AMD64G_CC_SHIFT_C) & 1;
+ case AMD64G_CC_OP_LOGICQ:
+ case AMD64G_CC_OP_LOGICL:
+ case AMD64G_CC_OP_LOGICW:
+ case AMD64G_CC_OP_LOGICB:
+ return 0;
+ // case AMD64G_CC_OP_SUBL:
+ // return ((UInt)cc_dep1) < ((UInt)cc_dep2)
+ // ? AMD64G_CC_MASK_C : 0;
+ // case AMD64G_CC_OP_SUBW:
+ // return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
+ // ? AMD64G_CC_MASK_C : 0;
+ // case AMD64G_CC_OP_SUBB:
+ // return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
+ // ? AMD64G_CC_MASK_C : 0;
+ // case AMD64G_CC_OP_INCL:
+ // case AMD64G_CC_OP_DECL:
+ // return cc_ndep & AMD64G_CC_MASK_C;
+ default:
+ break;
+ }
+
+# if PROFILE_RFLAGS
+ tabc_fast[cc_op]--;
+ tabc_slow[cc_op]++;
+# endif
+
+ return amd64g_calculate_rflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
+ & AMD64G_CC_MASK_C;
+}
+
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* returns 1 or 0 */
+ULong amd64g_calculate_condition ( ULong/*AMD64Condcode*/ cond,
+ ULong cc_op,
+ ULong cc_dep1,
+ ULong cc_dep2,
+ ULong cc_ndep )
+{
+ ULong rflags = amd64g_calculate_rflags_all_WRK(cc_op, cc_dep1,
+ cc_dep2, cc_ndep);
+ ULong of,sf,zf,cf,pf;
+ ULong inv = cond & 1;
+
+# if PROFILE_RFLAGS
+ if (!initted) initCounts();
+ tab_cond[cc_op][cond]++;
+ n_calc_cond++;
+ if (SHOW_COUNTS_NOW) showCounts();
+# endif
+
+ switch (cond) {
+ case AMD64CondNO:
+ case AMD64CondO: /* OF == 1 */
+ of = rflags >> AMD64G_CC_SHIFT_O;
+ return 1 & (inv ^ of);
+
+ case AMD64CondNZ:
+ case AMD64CondZ: /* ZF == 1 */
+ zf = rflags >> AMD64G_CC_SHIFT_Z;
+ return 1 & (inv ^ zf);
+
+ case AMD64CondNB:
+ case AMD64CondB: /* CF == 1 */
+ cf = rflags >> AMD64G_CC_SHIFT_C;
+ return 1 & (inv ^ cf);
+ break;
+
+ case AMD64CondNBE:
+ case AMD64CondBE: /* (CF or ZF) == 1 */
+ cf = rflags >> AMD64G_CC_SHIFT_C;
+ zf = rflags >> AMD64G_CC_SHIFT_Z;
+ return 1 & (inv ^ (cf | zf));
+ break;
+
+ case AMD64CondNS:
+ case AMD64CondS: /* SF == 1 */
+ sf = rflags >> AMD64G_CC_SHIFT_S;
+ return 1 & (inv ^ sf);
+
+ case AMD64CondNP:
+ case AMD64CondP: /* PF == 1 */
+ pf = rflags >> AMD64G_CC_SHIFT_P;
+ return 1 & (inv ^ pf);
+
+ case AMD64CondNL:
+ case AMD64CondL: /* (SF xor OF) == 1 */
+ sf = rflags >> AMD64G_CC_SHIFT_S;
+ of = rflags >> AMD64G_CC_SHIFT_O;
+ return 1 & (inv ^ (sf ^ of));
+ break;
+
+ case AMD64CondNLE:
+ case AMD64CondLE: /* ((SF xor OF) or ZF) == 1 */
+ sf = rflags >> AMD64G_CC_SHIFT_S;
+ of = rflags >> AMD64G_CC_SHIFT_O;
+ zf = rflags >> AMD64G_CC_SHIFT_Z;
+ return 1 & (inv ^ ((sf ^ of) | zf));
+ break;
+
+ default:
+ /* shouldn't really make these calls from generated code */
+ vex_printf("amd64g_calculate_condition"
+ "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
+ cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
+ vpanic("amd64g_calculate_condition");
+ }
+}
+
+
+/* VISIBLE TO LIBVEX CLIENT */
+ULong LibVEX_GuestAMD64_get_rflags ( /*IN*/VexGuestAMD64State* vex_state )
+{
+ ULong rflags = amd64g_calculate_rflags_all_WRK(
+ vex_state->guest_CC_OP,
+ vex_state->guest_CC_DEP1,
+ vex_state->guest_CC_DEP2,
+ vex_state->guest_CC_NDEP
+ );
+ Long dflag = vex_state->guest_DFLAG;
+ vassert(dflag == 1 || dflag == -1);
+ if (dflag == -1)
+ rflags |= (1<<10);
+ if (vex_state->guest_IDFLAG == 1)
+ rflags |= (1<<21);
+ if (vex_state->guest_ACFLAG == 1)
+ rflags |= (1<<18);
+
+ return rflags;
+}
+
+/* VISIBLE TO LIBVEX CLIENT */
+void
+LibVEX_GuestAMD64_put_rflag_c ( ULong new_carry_flag,
+ /*MOD*/VexGuestAMD64State* vex_state )
+{
+ ULong oszacp = amd64g_calculate_rflags_all_WRK(
+ vex_state->guest_CC_OP,
+ vex_state->guest_CC_DEP1,
+ vex_state->guest_CC_DEP2,
+ vex_state->guest_CC_NDEP
+ );
+ if (new_carry_flag & 1) {
+ oszacp |= AMD64G_CC_MASK_C;
+ } else {
+ oszacp &= ~AMD64G_CC_MASK_C;
+ }
+ vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
+ vex_state->guest_CC_DEP1 = oszacp;
+ vex_state->guest_CC_DEP2 = 0;
+ vex_state->guest_CC_NDEP = 0;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- %rflags translation-time function specialisers. ---*/
+/*--- These help iropt specialise calls the above run-time ---*/
+/*--- %rflags functions. ---*/
+/*---------------------------------------------------------------*/
+
+/* Used by the optimiser to try specialisations. Returns an
+ equivalent expression, or NULL if none. */
+
+static Bool isU64 ( IRExpr* e, ULong n )
+{
+ return toBool( e->tag == Iex_Const
+ && e->Iex.Const.con->tag == Ico_U64
+ && e->Iex.Const.con->Ico.U64 == n );
+}
+
+IRExpr* guest_amd64_spechelper ( HChar* function_name,
+ IRExpr** args,
+ IRStmt** precedingStmts,
+ Int n_precedingStmts )
+{
+# define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
+# define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
+# define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
+# define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
+
+ Int i, arity = 0;
+ for (i = 0; args[i]; i++)
+ arity++;
+# if 0
+ vex_printf("spec request:\n");
+ vex_printf(" %s ", function_name);
+ for (i = 0; i < arity; i++) {
+ vex_printf(" ");
+ ppIRExpr(args[i]);
+ }
+ vex_printf("\n");
+# endif
+
+ /* --------- specialising "amd64g_calculate_condition" --------- */
+
+ if (vex_streq(function_name, "amd64g_calculate_condition")) {
+ /* specialise calls to above "calculate condition" function */
+ IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
+ vassert(arity == 5);
+ cond = args[0];
+ cc_op = args[1];
+ cc_dep1 = args[2];
+ cc_dep2 = args[3];
+
+ /*---------------- ADDQ ----------------*/
+
+ if (isU64(cc_op, AMD64G_CC_OP_ADDQ) && isU64(cond, AMD64CondZ)) {
+ /* long long add, then Z --> test (dst+src == 0) */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpEQ64,
+ binop(Iop_Add64, cc_dep1, cc_dep2),
+ mkU64(0)));
+ }
+
+ /*---------------- SUBQ ----------------*/
+
+ if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) {
+ /* long long sub/cmp, then Z --> test dst==src */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpEQ64,cc_dep1,cc_dep2));
+ }
+ if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) {
+ /* long long sub/cmp, then NZ --> test dst!=src */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpNE64,cc_dep1,cc_dep2));
+ }
+
+ if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) {
+ /* long long sub/cmp, then L (signed less than)
+ --> test dst <s src */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
+ }
+
+ if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondB)) {
+ /* long long sub/cmp, then B (unsigned less than)
+ --> test dst <u src */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
+ }
+ if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) {
+ /* long long sub/cmp, then NB (unsigned greater than or equal)
+ --> test src <=u dst */
+ /* Note, args are opposite way round from the usual */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
+ }
+
+ if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) {
+ /* long long sub/cmp, then BE (unsigned less than or equal)
+ --> test dst <=u src */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
+ }
+
+ /*---------------- SUBL ----------------*/
+
+ if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) {
+ /* long sub/cmp, then Z --> test dst==src */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpEQ64,
+ binop(Iop_Shl64,cc_dep1,mkU8(32)),
+ binop(Iop_Shl64,cc_dep2,mkU8(32))));
+ }
+ if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) {
+ /* long sub/cmp, then NZ --> test dst!=src */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpNE64,
+ binop(Iop_Shl64,cc_dep1,mkU8(32)),
+ binop(Iop_Shl64,cc_dep2,mkU8(32))));
+ }
+
+ if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) {
+ /* long sub/cmp, then L (signed less than)
+ --> test dst <s src */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLT64S,
+ binop(Iop_Shl64,cc_dep1,mkU8(32)),
+ binop(Iop_Shl64,cc_dep2,mkU8(32))));
+ }
+
+ if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) {
+ /* long sub/cmp, then LE (signed less than or equal)
+ --> test dst <=s src */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLE64S,
+ binop(Iop_Shl64,cc_dep1,mkU8(32)),
+ binop(Iop_Shl64,cc_dep2,mkU8(32))));
+
+ }
+ if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) {
+ /* long sub/cmp, then NLE (signed greater than)
+ --> test !(dst <=s src)
+ --> test (dst >s src)
+ --> test (src <s dst) */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLT64S,
+ binop(Iop_Shl64,cc_dep2,mkU8(32)),
+ binop(Iop_Shl64,cc_dep1,mkU8(32))));
+
+ }
+
+ if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) {
+ /* long sub/cmp, then BE (unsigned less than or equal)
+ --> test dst <=u src */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLE64U,
+ binop(Iop_Shl64,cc_dep1,mkU8(32)),
+ binop(Iop_Shl64,cc_dep2,mkU8(32))));
+ }
+ if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) {
+ /* long sub/cmp, then NBE (unsigned greater than)
+ --> test src <u dst */
+ /* Note, args are opposite way round from the usual */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLT64U,
+ binop(Iop_Shl64,cc_dep2,mkU8(32)),
+ binop(Iop_Shl64,cc_dep1,mkU8(32))));
+ }
+
+ if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) {
+ /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLT64S,
+ binop(Iop_Sub64,
+ binop(Iop_Shl64, cc_dep1, mkU8(32)),
+ binop(Iop_Shl64, cc_dep2, mkU8(32))),
+ mkU64(0)));
+ }
+
+ /*---------------- SUBW ----------------*/
+
+ if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) {
+ /* word sub/cmp, then Z --> test dst==src */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpEQ16,
+ unop(Iop_64to16,cc_dep1),
+ unop(Iop_64to16,cc_dep2)));
+ }
+ if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondNZ)) {
+ /* word sub/cmp, then NZ --> test dst!=src */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpNE16,
+ unop(Iop_64to16,cc_dep1),
+ unop(Iop_64to16,cc_dep2)));
+ }
+
+ if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondLE)) {
+ /* word sub/cmp, then LE (signed less than or equal)
+ --> test dst <=s src */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLE64S,
+ binop(Iop_Shl64,cc_dep1,mkU8(48)),
+ binop(Iop_Shl64,cc_dep2,mkU8(48))));
+
+ }
+
+ /*---------------- SUBB ----------------*/
+
+ if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) {
+ /* byte sub/cmp, then Z --> test dst==src */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpEQ8,
+ unop(Iop_64to8,cc_dep1),
+ unop(Iop_64to8,cc_dep2)));
+ }
+ if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ)) {
+ /* byte sub/cmp, then NZ --> test dst!=src */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpNE8,
+ unop(Iop_64to8,cc_dep1),
+ unop(Iop_64to8,cc_dep2)));
+ }
+
+ if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS)
+ && isU64(cc_dep2, 0)) {
+ /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
+ --> test dst <s 0
+ --> (ULong)dst[7]
+ This is yet another scheme by which gcc figures out if the
+ top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
+ /* Note: isU64(cc_dep2, 0) is correct, even though this is
+ for an 8-bit comparison, since the args to the helper
+ function are always U64s. */
+ return binop(Iop_And64,
+ binop(Iop_Shr64,cc_dep1,mkU8(7)),
+ mkU64(1));
+ }
+ if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNS)
+ && isU64(cc_dep2, 0)) {
+ /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
+ --> test !(dst <s 0)
+ --> (ULong) !dst[7]
+ */
+ return binop(Iop_Xor64,
+ binop(Iop_And64,
+ binop(Iop_Shr64,cc_dep1,mkU8(7)),
+ mkU64(1)),
+ mkU64(1));
+ }
+
+ /*---------------- LOGICQ ----------------*/
+
+ if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondZ)) {
+ /* long long and/or/xor, then Z --> test dst==0 */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
+ }
+
+ if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) {
+ /* long long and/or/xor, then L
+ LOGIC sets SF and ZF according to the
+ result and makes OF be zero. L computes SF ^ OF, but
+ OF is zero, so this reduces to SF -- which will be 1 iff
+ the result is < signed 0. Hence ...
+ */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLT64S,
+ cc_dep1,
+ mkU64(0)));
+ }
+
+ /*---------------- LOGICL ----------------*/
+
+ if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) {
+ /* long and/or/xor, then Z --> test dst==0 */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpEQ64,
+ binop(Iop_Shl64,cc_dep1,mkU8(32)),
+ mkU64(0)));
+ }
+
+ if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) {
+ /* long and/or/xor, then NZ --> test dst!=0 */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpNE64,
+ binop(Iop_Shl64,cc_dep1,mkU8(32)),
+ mkU64(0)));
+ }
+
+ if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) {
+ /* long and/or/xor, then LE
+ This is pretty subtle. LOGIC sets SF and ZF according to the
+ result and makes OF be zero. LE computes (SF ^ OF) | ZF, but
+ OF is zero, so this reduces to SF | ZF -- which will be 1 iff
+ the result is <=signed 0. Hence ...
+ */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLE64S,
+ binop(Iop_Shl64,cc_dep1,mkU8(32)),
+ mkU64(0)));
+ }
+
+ /*---------------- LOGICB ----------------*/
+
+ if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondZ)) {
+ /* byte and/or/xor, then Z --> test dst==0 */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpEQ64, binop(Iop_And64,cc_dep1,mkU64(255)),
+ mkU64(0)));
+ }
+ if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNZ)) {
+ /* byte and/or/xor, then NZ --> test dst!=0 */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpNE64, binop(Iop_And64,cc_dep1,mkU64(255)),
+ mkU64(0)));
+ }
+
+ if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondS)) {
+ /* this is an idiom gcc sometimes uses to find out if the top
+ bit of a byte register is set: eg testb %al,%al; js ..
+ Since it just depends on the top bit of the byte, extract
+ that bit and explicitly get rid of all the rest. This
+ helps memcheck avoid false positives in the case where any
+ of the other bits in the byte are undefined. */
+ /* byte and/or/xor, then S --> (UInt)result[7] */
+ return binop(Iop_And64,
+ binop(Iop_Shr64,cc_dep1,mkU8(7)),
+ mkU64(1));
+ }
+
+ /*---------------- INCB ----------------*/
+
+ if (isU64(cc_op, AMD64G_CC_OP_INCB) && isU64(cond, AMD64CondLE)) {
+ /* 8-bit inc, then LE --> sign bit of the arg */
+ return binop(Iop_And64,
+ binop(Iop_Shr64,
+ binop(Iop_Sub64, cc_dep1, mkU64(1)),
+ mkU8(7)),
+ mkU64(1));
+ }
+
+ /*---------------- INCW ----------------*/
+
+ if (isU64(cc_op, AMD64G_CC_OP_INCW) && isU64(cond, AMD64CondZ)) {
+ /* 16-bit inc, then Z --> test dst == 0 */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpEQ64,
+ binop(Iop_Shl64,cc_dep1,mkU8(48)),
+ mkU64(0)));
+ }
+
+ /*---------------- DECL ----------------*/
+
+ if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) {
+ /* dec L, then Z --> test dst == 0 */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpEQ64,
+ binop(Iop_Shl64,cc_dep1,mkU8(32)),
+ mkU64(0)));
+ }
+
+ /*---------------- DECW ----------------*/
+
+ if (isU64(cc_op, AMD64G_CC_OP_DECW) && isU64(cond, AMD64CondNZ)) {
+ /* 16-bit dec, then NZ --> test dst != 0 */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpNE64,
+ binop(Iop_Shl64,cc_dep1,mkU8(48)),
+ mkU64(0)));
+ }
+
+ /*---------------- COPY ----------------*/
+ /* This can happen, as a result of amd64 FP compares: "comisd ... ;
+ jbe" for example. */
+
+ if (isU64(cc_op, AMD64G_CC_OP_COPY) &&
+ (isU64(cond, AMD64CondBE) || isU64(cond, AMD64CondNBE))) {
+ /* COPY, then BE --> extract C and Z from dep1, and test (C
+ or Z == 1). */
+ /* COPY, then NBE --> extract C and Z from dep1, and test (C
+ or Z == 0). */
+ ULong nnn = isU64(cond, AMD64CondBE) ? 1 : 0;
+ return
+ unop(
+ Iop_1Uto64,
+ binop(
+ Iop_CmpEQ64,
+ binop(
+ Iop_And64,
+ binop(
+ Iop_Or64,
+ binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
+ binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z))
+ ),
+ mkU64(1)
+ ),
+ mkU64(nnn)
+ )
+ );
+ }
+
+ if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondB)) {
+ /* COPY, then B --> extract C dep1, and test (C == 1). */
+ return
+ unop(
+ Iop_1Uto64,
+ binop(
+ Iop_CmpNE64,
+ binop(
+ Iop_And64,
+ binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_C)),
+ mkU64(1)
+ ),
+ mkU64(0)
+ )
+ );
+ }
+
+ if (isU64(cc_op, AMD64G_CC_OP_COPY)
+ && (isU64(cond, AMD64CondZ) || isU64(cond, AMD64CondNZ))) {
+ /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
+ /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
+ UInt nnn = isU64(cond, AMD64CondZ) ? 1 : 0;
+ return
+ unop(
+ Iop_1Uto64,
+ binop(
+ Iop_CmpEQ64,
+ binop(
+ Iop_And64,
+ binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_Z)),
+ mkU64(1)
+ ),
+ mkU64(nnn)
+ )
+ );
+ }
+
+ if (isU64(cc_op, AMD64G_CC_OP_COPY) && isU64(cond, AMD64CondP)) {
+ /* COPY, then P --> extract P from dep1, and test (P == 1). */
+ return
+ unop(
+ Iop_1Uto64,
+ binop(
+ Iop_CmpNE64,
+ binop(
+ Iop_And64,
+ binop(Iop_Shr64, cc_dep1, mkU8(AMD64G_CC_SHIFT_P)),
+ mkU64(1)
+ ),
+ mkU64(0)
+ )
+ );
+ }
+
+ return NULL;
+ }
+
+ /* --------- specialising "amd64g_calculate_rflags_c" --------- */
+
+ if (vex_streq(function_name, "amd64g_calculate_rflags_c")) {
+ /* specialise calls to above "calculate_rflags_c" function */
+ IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
+ vassert(arity == 4);
+ cc_op = args[0];
+ cc_dep1 = args[1];
+ cc_dep2 = args[2];
+ cc_ndep = args[3];
+
+ if (isU64(cc_op, AMD64G_CC_OP_SUBQ)) {
+ /* C after sub denotes unsigned less than */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLT64U,
+ cc_dep1,
+ cc_dep2));
+ }
+ if (isU64(cc_op, AMD64G_CC_OP_SUBL)) {
+ /* C after sub denotes unsigned less than */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLT64U,
+ binop(Iop_Shl64,cc_dep1,mkU8(32)),
+ binop(Iop_Shl64,cc_dep2,mkU8(32))));
+ }
+ if (isU64(cc_op, AMD64G_CC_OP_SUBB)) {
+ /* C after sub denotes unsigned less than */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLT64U,
+ binop(Iop_And64,cc_dep1,mkU64(0xFF)),
+ binop(Iop_And64,cc_dep2,mkU64(0xFF))));
+ }
+ if (isU64(cc_op, AMD64G_CC_OP_LOGICQ)
+ || isU64(cc_op, AMD64G_CC_OP_LOGICL)
+ || isU64(cc_op, AMD64G_CC_OP_LOGICW)
+ || isU64(cc_op, AMD64G_CC_OP_LOGICB)) {
+ /* cflag after logic is zero */
+ return mkU64(0);
+ }
+ if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_INCL)
+ || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_OP_INCQ)) {
+ /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
+ return cc_ndep;
+ }
+
+# if 0
+ if (cc_op->tag == Iex_Const) {
+ vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
+ }
+# endif
+
+ return NULL;
+ }
+
+# undef unop
+# undef binop
+# undef mkU64
+# undef mkU8
+
+ return NULL;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Supporting functions for x87 FPU activities. ---*/
+/*---------------------------------------------------------------*/
+
+static inline Bool host_is_little_endian ( void )
+{
+ UInt x = 0x76543210;
+ UChar* p = (UChar*)(&x);
+ return toBool(*p == 0x10);
+}
+
+/* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl )
+{
+ Bool mantissaIsZero;
+ Int bexp;
+ UChar sign;
+ UChar* f64;
+
+ vassert(host_is_little_endian());
+
+ /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
+
+ f64 = (UChar*)(&dbl);
+ sign = toUChar( (f64[7] >> 7) & 1 );
+
+ /* First off, if the tag indicates the register was empty,
+ return 1,0,sign,1 */
+ if (tag == 0) {
+ /* vex_printf("Empty\n"); */
+ return AMD64G_FC_MASK_C3 | 0 | (sign << AMD64G_FC_SHIFT_C1)
+ | AMD64G_FC_MASK_C0;
+ }
+
+ bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
+ bexp &= 0x7FF;
+
+ mantissaIsZero
+ = toBool(
+ (f64[6] & 0x0F) == 0
+ && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
+ );
+
+ /* If both exponent and mantissa are zero, the value is zero.
+ Return 1,0,sign,0. */
+ if (bexp == 0 && mantissaIsZero) {
+ /* vex_printf("Zero\n"); */
+ return AMD64G_FC_MASK_C3 | 0
+ | (sign << AMD64G_FC_SHIFT_C1) | 0;
+ }
+
+ /* If exponent is zero but mantissa isn't, it's a denormal.
+ Return 1,1,sign,0. */
+ if (bexp == 0 && !mantissaIsZero) {
+ /* vex_printf("Denormal\n"); */
+ return AMD64G_FC_MASK_C3 | AMD64G_FC_MASK_C2
+ | (sign << AMD64G_FC_SHIFT_C1) | 0;
+ }
+
+ /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
+ Return 0,1,sign,1. */
+ if (bexp == 0x7FF && mantissaIsZero) {
+ /* vex_printf("Inf\n"); */
+ return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1)
+ | AMD64G_FC_MASK_C0;
+ }
+
+ /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
+ Return 0,0,sign,1. */
+ if (bexp == 0x7FF && !mantissaIsZero) {
+ /* vex_printf("NaN\n"); */
+ return 0 | 0 | (sign << AMD64G_FC_SHIFT_C1) | AMD64G_FC_MASK_C0;
+ }
+
+ /* Uh, ok, we give up. It must be a normal finite number.
+ Return 0,1,sign,0.
+ */
+ /* vex_printf("normal\n"); */
+ return 0 | AMD64G_FC_MASK_C2 | (sign << AMD64G_FC_SHIFT_C1) | 0;
+}
+
+
+/* Create an x87 FPU state from the guest state, as close as
+ we can approximate it. */
+static
+void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state,
+ /*OUT*/UChar* x87_state )
+{
+ Int i, stno, preg;
+ UInt tagw;
+ ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
+ UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
+ Fpu_State* x87 = (Fpu_State*)x87_state;
+ UInt ftop = vex_state->guest_FTOP;
+ UInt c3210 = vex_state->guest_FC3210;
+
+ for (i = 0; i < 14; i++)
+ x87->env[i] = 0;
+
+ x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
+ x87->env[FP_ENV_STAT]
+ = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
+ x87->env[FP_ENV_CTRL]
+ = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND ));
+
+ /* Dump the register stack in ST order. */
+ tagw = 0;
+ for (stno = 0; stno < 8; stno++) {
+ preg = (stno + ftop) & 7;
+ if (vexTags[preg] == 0) {
+ /* register is empty */
+ tagw |= (3 << (2*preg));
+ convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
+ &x87->reg[10*stno] );
+ } else {
+ /* register is full. */
+ tagw |= (0 << (2*preg));
+ convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
+ &x87->reg[10*stno] );
+ }
+ }
+ x87->env[FP_ENV_TAG] = toUShort(tagw);
+}
+
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (reads guest state, writes guest mem) */
+/* NOTE: only handles 32-bit format (no REX.W on the insn) */
+void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State* gst, HWord addr )
+{
+ /* Derived from values obtained from
+ vendor_id : AuthenticAMD
+ cpu family : 15
+ model : 12
+ model name : AMD Athlon(tm) 64 Processor 3200+
+ stepping : 0
+ cpu MHz : 2200.000
+ cache size : 512 KB
+ */
+ /* Somewhat roundabout, but at least it's simple. */
+ Fpu_State tmp;
+ UShort* addrS = (UShort*)addr;
+ UChar* addrC = (UChar*)addr;
+ U128* xmm = (U128*)(addr + 160);
+ UInt mxcsr;
+ UShort fp_tags;
+ UInt summary_tags;
+ Int r, stno;
+ UShort *srcS, *dstS;
+
+ do_get_x87( gst, (UChar*)&tmp );
+ mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND );
+
+ /* Now build the proper fxsave image from the x87 image we just
+ made. */
+
+ addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
+ addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
+
+ /* set addrS[2] in an endian-independent way */
+ summary_tags = 0;
+ fp_tags = tmp.env[FP_ENV_TAG];
+ for (r = 0; r < 8; r++) {
+ if ( ((fp_tags >> (2*r)) & 3) != 3 )
+ summary_tags |= (1 << r);
+ }
+ addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */
+ addrC[5] = 0; /* pad */
+
+ /* FOP: faulting fpu opcode. From experimentation, the real CPU
+ does not write this field. (?!) */
+ addrS[3] = 0; /* BOGUS */
+
+ /* RIP (Last x87 instruction pointer). From experimentation, the
+ real CPU does not write this field. (?!) */
+ addrS[4] = 0; /* BOGUS */
+ addrS[5] = 0; /* BOGUS */
+ addrS[6] = 0; /* BOGUS */
+ addrS[7] = 0; /* BOGUS */
+
+ /* RDP (Last x87 data pointer). From experimentation, the real CPU
+ does not write this field. (?!) */
+ addrS[8] = 0; /* BOGUS */
+ addrS[9] = 0; /* BOGUS */
+ addrS[10] = 0; /* BOGUS */
+ addrS[11] = 0; /* BOGUS */
+
+ addrS[12] = toUShort(mxcsr); /* MXCSR */
+ addrS[13] = toUShort(mxcsr >> 16);
+
+ addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */
+ addrS[15] = 0x0000; /* MXCSR mask (hi16) */
+
+ /* Copy in the FP registers, in ST order. */
+ for (stno = 0; stno < 8; stno++) {
+ srcS = (UShort*)(&tmp.reg[10*stno]);
+ dstS = (UShort*)(&addrS[16 + 8*stno]);
+ dstS[0] = srcS[0];
+ dstS[1] = srcS[1];
+ dstS[2] = srcS[2];
+ dstS[3] = srcS[3];
+ dstS[4] = srcS[4];
+ dstS[5] = 0;
+ dstS[6] = 0;
+ dstS[7] = 0;
+ }
+
+ /* That's the first 160 bytes of the image done. Now only %xmm0
+ .. %xmm15 remain to be copied. If the host is big-endian, these
+ need to be byte-swapped. */
+ vassert(host_is_little_endian());
+
+# define COPY_U128(_dst,_src) \
+ do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
+ _dst[2] = _src[2]; _dst[3] = _src[3]; } \
+ while (0)
+
+ COPY_U128( xmm[0], gst->guest_XMM0 );
+ COPY_U128( xmm[1], gst->guest_XMM1 );
+ COPY_U128( xmm[2], gst->guest_XMM2 );
+ COPY_U128( xmm[3], gst->guest_XMM3 );
+ COPY_U128( xmm[4], gst->guest_XMM4 );
+ COPY_U128( xmm[5], gst->guest_XMM5 );
+ COPY_U128( xmm[6], gst->guest_XMM6 );
+ COPY_U128( xmm[7], gst->guest_XMM7 );
+ COPY_U128( xmm[8], gst->guest_XMM8 );
+ COPY_U128( xmm[9], gst->guest_XMM9 );
+ COPY_U128( xmm[10], gst->guest_XMM10 );
+ COPY_U128( xmm[11], gst->guest_XMM11 );
+ COPY_U128( xmm[12], gst->guest_XMM12 );
+ COPY_U128( xmm[13], gst->guest_XMM13 );
+ COPY_U128( xmm[14], gst->guest_XMM14 );
+ COPY_U128( xmm[15], gst->guest_XMM15 );
+
+# undef COPY_U128
+}
+
+
+/* DIRTY HELPER (writes guest state) */
+/* Initialise the x87 FPU state as per 'finit'. */
+void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst )
+{
+ Int i;
+ gst->guest_FTOP = 0;
+ for (i = 0; i < 8; i++) {
+ gst->guest_FPTAG[i] = 0; /* empty */
+ gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
+ }
+ gst->guest_FPROUND = (ULong)Irrm_NEAREST;
+ gst->guest_FC3210 = 0;
+}
+
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (reads guest memory) */
+ULong amd64g_dirtyhelper_loadF80le ( ULong addrU )
+{
+ ULong f64;
+ convert_f80le_to_f64le ( (UChar*)ULong_to_Ptr(addrU), (UChar*)&f64 );
+ return f64;
+}
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (writes guest memory) */
+void amd64g_dirtyhelper_storeF80le ( ULong addrU, ULong f64 )
+{
+ convert_f64le_to_f80le( (UChar*)&f64, (UChar*)ULong_to_Ptr(addrU) );
+}
+
+
+/* CALLED FROM GENERATED CODE */
+/* CLEAN HELPER */
+/* mxcsr[15:0] contains a SSE native format MXCSR value.
+ Extract from it the required SSEROUND value and any resulting
+ emulation warning, and return (warn << 32) | sseround value.
+*/
+ULong amd64g_check_ldmxcsr ( ULong mxcsr )
+{
+ /* Decide on a rounding mode. mxcsr[14:13] holds it. */
+ /* NOTE, encoded exactly as per enum IRRoundingMode. */
+ ULong rmode = (mxcsr >> 13) & 3;
+
+ /* Detect any required emulation warnings. */
+ VexEmWarn ew = EmWarn_NONE;
+
+ if ((mxcsr & 0x1F80) != 0x1F80) {
+ /* unmasked exceptions! */
+ ew = EmWarn_X86_sseExns;
+ }
+ else
+ if (mxcsr & (1<<15)) {
+ /* FZ is set */
+ ew = EmWarn_X86_fz;
+ }
+ else
+ if (mxcsr & (1<<6)) {
+ /* DAZ is set */
+ ew = EmWarn_X86_daz;
+ }
+
+ return (((ULong)ew) << 32) | ((ULong)rmode);
+}
+
+
+/* CALLED FROM GENERATED CODE */
+/* CLEAN HELPER */
+/* Given sseround as an IRRoundingMode value, create a suitable SSE
+ native format MXCSR value. */
+ULong amd64g_create_mxcsr ( ULong sseround )
+{
+ sseround &= 3;
+ return 0x1F80 | (sseround << 13);
+}
+
+
+/* CLEAN HELPER */
+/* fpucw[15:0] contains a x87 native format FPU control word.
+ Extract from it the required FPROUND value and any resulting
+ emulation warning, and return (warn << 32) | fpround value.
+*/
+ULong amd64g_check_fldcw ( ULong fpucw )
+{
+ /* Decide on a rounding mode. fpucw[11:10] holds it. */
+ /* NOTE, encoded exactly as per enum IRRoundingMode. */
+ ULong rmode = (fpucw >> 10) & 3;
+
+ /* Detect any required emulation warnings. */
+ VexEmWarn ew = EmWarn_NONE;
+
+ if ((fpucw & 0x3F) != 0x3F) {
+ /* unmasked exceptions! */
+ ew = EmWarn_X86_x87exns;
+ }
+ else
+ if (((fpucw >> 8) & 3) != 3) {
+ /* unsupported precision */
+ ew = EmWarn_X86_x87precision;
+ }
+
+ return (((ULong)ew) << 32) | ((ULong)rmode);
+}
+
+
+/* CLEAN HELPER */
+/* Given fpround as an IRRoundingMode value, create a suitable x87
+ native format FPU control word. */
+ULong amd64g_create_fpucw ( ULong fpround )
+{
+ fpround &= 3;
+ return 0x037F | (fpround << 10);
+}
+
+
+/* This is used to implement 'fldenv'.
+ Reads 28 bytes at x87_state[0 .. 27]. */
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER */
+VexEmWarn amd64g_dirtyhelper_FLDENV ( /*OUT*/VexGuestAMD64State* vex_state,
+ /*IN*/HWord x87_state)
+{
+ Int stno, preg;
+ UInt tag;
+ UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
+ Fpu_State* x87 = (Fpu_State*)x87_state;
+ UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7;
+ UInt tagw = x87->env[FP_ENV_TAG];
+ UInt fpucw = x87->env[FP_ENV_CTRL];
+ ULong c3210 = x87->env[FP_ENV_STAT] & 0x4700;
+ VexEmWarn ew;
+ ULong fpround;
+ ULong pair;
+
+ /* Copy tags */
+ for (stno = 0; stno < 8; stno++) {
+ preg = (stno + ftop) & 7;
+ tag = (tagw >> (2*preg)) & 3;
+ if (tag == 3) {
+ /* register is empty */
+ vexTags[preg] = 0;
+ } else {
+ /* register is non-empty */
+ vexTags[preg] = 1;
+ }
+ }
+
+ /* stack pointer */
+ vex_state->guest_FTOP = ftop;
+
+ /* status word */
+ vex_state->guest_FC3210 = c3210;
+
+ /* handle the control word, setting FPROUND and detecting any
+ emulation warnings. */
+ pair = amd64g_check_fldcw ( (ULong)fpucw );
+ fpround = pair & 0xFFFFFFFFULL;
+ ew = (VexEmWarn)(pair >> 32);
+
+ vex_state->guest_FPROUND = fpround & 3;
+
+ /* emulation warnings --> caller */
+ return ew;
+}
+
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER */
+/* Create an x87 FPU env from the guest state, as close as we can
+ approximate it. Writes 28 bytes at x87_state[0..27]. */
+void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state,
+ /*OUT*/HWord x87_state )
+{
+ Int i, stno, preg;
+ UInt tagw;
+ UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
+ Fpu_State* x87 = (Fpu_State*)x87_state;
+ UInt ftop = vex_state->guest_FTOP;
+ ULong c3210 = vex_state->guest_FC3210;
+
+ for (i = 0; i < 14; i++)
+ x87->env[i] = 0;
+
+ x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
+ x87->env[FP_ENV_STAT]
+ = toUShort(toUInt( ((ftop & 7) << 11) | (c3210 & 0x4700) ));
+ x87->env[FP_ENV_CTRL]
+ = toUShort(toUInt( amd64g_create_fpucw( vex_state->guest_FPROUND ) ));
+
+ /* Compute the x87 tag word. */
+ tagw = 0;
+ for (stno = 0; stno < 8; stno++) {
+ preg = (stno + ftop) & 7;
+ if (vexTags[preg] == 0) {
+ /* register is empty */
+ tagw |= (3 << (2*preg));
+ } else {
+ /* register is full. */
+ tagw |= (0 << (2*preg));
+ }
+ }
+ x87->env[FP_ENV_TAG] = toUShort(tagw);
+
+ /* We don't dump the x87 registers, tho. */
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Misc integer helpers, including rotates and CPUID. ---*/
+/*---------------------------------------------------------------*/
+
+/* Claim to be the following CPU, which is probably representative of
+ the lowliest (earliest) amd64 offerings. It can do neither sse3
+ nor cx16.
+
+ vendor_id : AuthenticAMD
+ cpu family : 15
+ model : 5
+ model name : AMD Opteron (tm) Processor 848
+ stepping : 10
+ cpu MHz : 1797.682
+ cache size : 1024 KB
+ fpu : yes
+ fpu_exception : yes
+ cpuid level : 1
+ wp : yes
+ flags : fpu vme de pse tsc msr pae mce cx8 apic sep
+ mtrr pge mca cmov pat pse36 clflush mmx fxsr
+ sse sse2 syscall nx mmxext lm 3dnowext 3dnow
+ bogomips : 3600.62
+ TLB size : 1088 4K pages
+ clflush size : 64
+ cache_alignment : 64
+ address sizes : 40 bits physical, 48 bits virtual
+ power management: ts fid vid ttp
+*/
+void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st )
+{
+# define SET_ABCD(_a,_b,_c,_d) \
+ do { st->guest_RAX = (ULong)(_a); \
+ st->guest_RBX = (ULong)(_b); \
+ st->guest_RCX = (ULong)(_c); \
+ st->guest_RDX = (ULong)(_d); \
+ } while (0)
+
+ switch (0xFFFFFFFF & st->guest_RAX) {
+ case 0x00000000:
+ SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65);
+ break;
+ case 0x00000001:
+ SET_ABCD(0x00000f5a, 0x01000800, 0x00000000, 0x078bfbff);
+ break;
+ case 0x80000000:
+ SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65);
+ break;
+ case 0x80000001:
+ SET_ABCD(0x00000f5a, 0x00000505, 0x00000000, 0xe1d3fbff);
+ break;
+ case 0x80000002:
+ SET_ABCD(0x20444d41, 0x6574704f, 0x206e6f72, 0x296d7428);
+ break;
+ case 0x80000003:
+ SET_ABCD(0x6f725020, 0x73736563, 0x3820726f, 0x00003834);
+ break;
+ case 0x80000004:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000005:
+ SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140);
+ break;
+ case 0x80000006:
+ SET_ABCD(0x00000000, 0x42004200, 0x04008140, 0x00000000);
+ break;
+ case 0x80000007:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f);
+ break;
+ case 0x80000008:
+ SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ default:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ }
+# undef SET_ABCD
+}
+
+
+/* Claim to be the following CPU (2 x ...), which is sse3 and cx16
+ capable.
+
+ vendor_id : GenuineIntel
+ cpu family : 6
+ model : 15
+ model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
+ stepping : 6
+ cpu MHz : 2394.000
+ cache size : 4096 KB
+ physical id : 0
+ siblings : 2
+ core id : 0
+ cpu cores : 2
+ fpu : yes
+ fpu_exception : yes
+ cpuid level : 10
+ wp : yes
+ flags : fpu vme de pse tsc msr pae mce cx8 apic sep
+ mtrr pge mca cmov pat pse36 clflush dts acpi
+ mmx fxsr sse sse2 ss ht tm syscall nx lm
+ constant_tsc pni monitor ds_cpl vmx est tm2
+ cx16 xtpr lahf_lm
+ bogomips : 4798.78
+ clflush size : 64
+ cache_alignment : 64
+ address sizes : 36 bits physical, 48 bits virtual
+ power management:
+*/
+void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st )
+{
+# define SET_ABCD(_a,_b,_c,_d) \
+ do { st->guest_RAX = (ULong)(_a); \
+ st->guest_RBX = (ULong)(_b); \
+ st->guest_RCX = (ULong)(_c); \
+ st->guest_RDX = (ULong)(_d); \
+ } while (0)
+
+ switch (0xFFFFFFFF & st->guest_RAX) {
+ case 0x00000000:
+ SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
+ break;
+ case 0x00000001:
+ SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
+ break;
+ case 0x00000002:
+ SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
+ break;
+ case 0x00000003:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x00000004: {
+ switch (0xFFFFFFFF & st->guest_RCX) {
+ case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
+ 0x0000003f, 0x00000001); break;
+ case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
+ 0x0000003f, 0x00000001); break;
+ case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
+ 0x00000fff, 0x00000001); break;
+ default: SET_ABCD(0x00000000, 0x00000000,
+ 0x00000000, 0x00000000); break;
+ }
+ break;
+ }
+ case 0x00000005:
+ SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
+ break;
+ case 0x00000006:
+ SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
+ break;
+ case 0x00000007:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x00000008:
+ SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x00000009:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x0000000a:
+ unhandled_eax_value:
+ SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000000:
+ SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000001:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800);
+ break;
+ case 0x80000002:
+ SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
+ break;
+ case 0x80000003:
+ SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
+ break;
+ case 0x80000004:
+ SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
+ break;
+ case 0x80000005:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000006:
+ SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
+ break;
+ case 0x80000007:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000008:
+ SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ default:
+ goto unhandled_eax_value;
+ }
+# undef SET_ABCD
+}
+
+
+/* Claim to be the following CPU (4 x ...), which is sse4.2 and cx16
+ capable.
+
+ vendor_id : GenuineIntel
+ cpu family : 6
+ model : 37
+ model name : Intel(R) Core(TM) i5 CPU 670 @ 3.47GHz
+ stepping : 2
+ cpu MHz : 3334.000
+ cache size : 4096 KB
+ physical id : 0
+ siblings : 4
+ core id : 0
+ cpu cores : 2
+ apicid : 0
+ initial apicid : 0
+ fpu : yes
+ fpu_exception : yes
+ cpuid level : 11
+ wp : yes
+ flags : fpu vme de pse tsc msr pae mce cx8 apic sep
+ mtrr pge mca cmov pat pse36 clflush dts acpi
+ mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp
+ lm constant_tsc arch_perfmon pebs bts rep_good
+ xtopology nonstop_tsc aperfmperf pni pclmulqdq
+ dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16
+ xtpr pdcm sse4_1 sse4_2 popcnt aes lahf_lm ida
+ arat tpr_shadow vnmi flexpriority ept vpid
+ MINUS aes (see below)
+ bogomips : 6957.57
+ clflush size : 64
+ cache_alignment : 64
+ address sizes : 36 bits physical, 48 bits virtual
+ power management:
+*/
+void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st )
+{
+# define SET_ABCD(_a,_b,_c,_d) \
+ do { st->guest_RAX = (ULong)(_a); \
+ st->guest_RBX = (ULong)(_b); \
+ st->guest_RCX = (ULong)(_c); \
+ st->guest_RDX = (ULong)(_d); \
+ } while (0)
+
+ UInt old_eax = (UInt)st->guest_RAX;
+ UInt old_ecx = (UInt)st->guest_RCX;
+
+ switch (old_eax) {
+ case 0x00000000:
+ SET_ABCD(0x0000000b, 0x756e6547, 0x6c65746e, 0x49656e69);
+ break;
+ case 0x00000001:
+ // & ~(1<<25): don't claim to support AES insns. See
+ // bug 249991.
+ SET_ABCD(0x00020652, 0x00100800, 0x0298e3ff & ~(1<<25),
+ 0xbfebfbff);
+ break;
+ case 0x00000002:
+ SET_ABCD(0x55035a01, 0x00f0b2e3, 0x00000000, 0x09ca212c);
+ break;
+ case 0x00000003:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x00000004:
+ switch (old_ecx) {
+ case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
+ 0x0000003f, 0x00000000); break;
+ case 0x00000001: SET_ABCD(0x1c004122, 0x00c0003f,
+ 0x0000007f, 0x00000000); break;
+ case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
+ 0x000001ff, 0x00000000); break;
+ case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
+ 0x00000fff, 0x00000002); break;
+ default: SET_ABCD(0x00000000, 0x00000000,
+ 0x00000000, 0x00000000); break;
+ }
+ break;
+ case 0x00000005:
+ SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00001120);
+ break;
+ case 0x00000006:
+ SET_ABCD(0x00000007, 0x00000002, 0x00000001, 0x00000000);
+ break;
+ case 0x00000007:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x00000008:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x00000009:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x0000000a:
+ SET_ABCD(0x07300403, 0x00000004, 0x00000000, 0x00000603);
+ break;
+ case 0x0000000b:
+ switch (old_ecx) {
+ case 0x00000000:
+ SET_ABCD(0x00000001, 0x00000002,
+ 0x00000100, 0x00000000); break;
+ case 0x00000001:
+ SET_ABCD(0x00000004, 0x00000004,
+ 0x00000201, 0x00000000); break;
+ default:
+ SET_ABCD(0x00000000, 0x00000000,
+ old_ecx, 0x00000000); break;
+ }
+ break;
+ case 0x0000000c:
+ SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
+ break;
+ case 0x0000000d:
+ switch (old_ecx) {
+ case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
+ 0x00000100, 0x00000000); break;
+ case 0x00000001: SET_ABCD(0x00000004, 0x00000004,
+ 0x00000201, 0x00000000); break;
+ default: SET_ABCD(0x00000000, 0x00000000,
+ old_ecx, 0x00000000); break;
+ }
+ break;
+ case 0x80000000:
+ SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000001:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x28100800);
+ break;
+ case 0x80000002:
+ SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
+ break;
+ case 0x80000003:
+ SET_ABCD(0x35692029, 0x55504320, 0x20202020, 0x20202020);
+ break;
+ case 0x80000004:
+ SET_ABCD(0x30373620, 0x20402020, 0x37342e33, 0x007a4847);
+ break;
+ case 0x80000005:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000006:
+ SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
+ break;
+ case 0x80000007:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
+ break;
+ case 0x80000008:
+ SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ default:
+ SET_ABCD(0x00000001, 0x00000002, 0x00000100, 0x00000000);
+ break;
+ }
+# undef SET_ABCD
+}
+
+
+ULong amd64g_calculate_RCR ( ULong arg,
+ ULong rot_amt,
+ ULong rflags_in,
+ Long szIN )
+{
+ Bool wantRflags = toBool(szIN < 0);
+ ULong sz = wantRflags ? (-szIN) : szIN;
+ ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F);
+ ULong cf=0, of=0, tempcf;
+
+ switch (sz) {
+ case 8:
+ cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
+ of = ((arg >> 63) ^ cf) & 1;
+ while (tempCOUNT > 0) {
+ tempcf = arg & 1;
+ arg = (arg >> 1) | (cf << 63);
+ cf = tempcf;
+ tempCOUNT--;
+ }
+ break;
+ case 4:
+ while (tempCOUNT >= 33) tempCOUNT -= 33;
+ cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
+ of = ((arg >> 31) ^ cf) & 1;
+ while (tempCOUNT > 0) {
+ tempcf = arg & 1;
+ arg = ((arg >> 1) & 0x7FFFFFFFULL) | (cf << 31);
+ cf = tempcf;
+ tempCOUNT--;
+ }
+ break;
+ case 2:
+ while (tempCOUNT >= 17) tempCOUNT -= 17;
+ cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
+ of = ((arg >> 15) ^ cf) & 1;
+ while (tempCOUNT > 0) {
+ tempcf = arg & 1;
+ arg = ((arg >> 1) & 0x7FFFULL) | (cf << 15);
+ cf = tempcf;
+ tempCOUNT--;
+ }
+ break;
+ case 1:
+ while (tempCOUNT >= 9) tempCOUNT -= 9;
+ cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
+ of = ((arg >> 7) ^ cf) & 1;
+ while (tempCOUNT > 0) {
+ tempcf = arg & 1;
+ arg = ((arg >> 1) & 0x7FULL) | (cf << 7);
+ cf = tempcf;
+ tempCOUNT--;
+ }
+ break;
+ default:
+ vpanic("calculate_RCR(amd64g): invalid size");
+ }
+
+ cf &= 1;
+ of &= 1;
+ rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
+ rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
+
+ /* caller can ask to have back either the resulting flags or
+ resulting value, but not both */
+ return wantRflags ? rflags_in : arg;
+}
+
+ULong amd64g_calculate_RCL ( ULong arg,
+ ULong rot_amt,
+ ULong rflags_in,
+ Long szIN )
+{
+ Bool wantRflags = toBool(szIN < 0);
+ ULong sz = wantRflags ? (-szIN) : szIN;
+ ULong tempCOUNT = rot_amt & (sz == 8 ? 0x3F : 0x1F);
+ ULong cf=0, of=0, tempcf;
+
+ switch (sz) {
+ case 8:
+ cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
+ while (tempCOUNT > 0) {
+ tempcf = (arg >> 63) & 1;
+ arg = (arg << 1) | (cf & 1);
+ cf = tempcf;
+ tempCOUNT--;
+ }
+ of = ((arg >> 63) ^ cf) & 1;
+ break;
+ case 4:
+ while (tempCOUNT >= 33) tempCOUNT -= 33;
+ cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
+ while (tempCOUNT > 0) {
+ tempcf = (arg >> 31) & 1;
+ arg = 0xFFFFFFFFULL & ((arg << 1) | (cf & 1));
+ cf = tempcf;
+ tempCOUNT--;
+ }
+ of = ((arg >> 31) ^ cf) & 1;
+ break;
+ case 2:
+ while (tempCOUNT >= 17) tempCOUNT -= 17;
+ cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
+ while (tempCOUNT > 0) {
+ tempcf = (arg >> 15) & 1;
+ arg = 0xFFFFULL & ((arg << 1) | (cf & 1));
+ cf = tempcf;
+ tempCOUNT--;
+ }
+ of = ((arg >> 15) ^ cf) & 1;
+ break;
+ case 1:
+ while (tempCOUNT >= 9) tempCOUNT -= 9;
+ cf = (rflags_in >> AMD64G_CC_SHIFT_C) & 1;
+ while (tempCOUNT > 0) {
+ tempcf = (arg >> 7) & 1;
+ arg = 0xFFULL & ((arg << 1) | (cf & 1));
+ cf = tempcf;
+ tempCOUNT--;
+ }
+ of = ((arg >> 7) ^ cf) & 1;
+ break;
+ default:
+ vpanic("calculate_RCL(amd64g): invalid size");
+ }
+
+ cf &= 1;
+ of &= 1;
+ rflags_in &= ~(AMD64G_CC_MASK_C | AMD64G_CC_MASK_O);
+ rflags_in |= (cf << AMD64G_CC_SHIFT_C) | (of << AMD64G_CC_SHIFT_O);
+
+ return wantRflags ? rflags_in : arg;
+}
+
+/* Taken from gf2x-0.9.5, released under GPLv2+ (later versions LGPLv2+)
+ * svn://scm.gforge.inria.fr/svn/gf2x/trunk/hardware/opteron/gf2x_mul1.h@25
+ */
+ULong amd64g_calculate_pclmul(ULong a, ULong b, ULong which)
+{
+ ULong hi, lo, tmp, A[16];
+
+ A[0] = 0; A[1] = a;
+ A[2] = A[1] << 1; A[3] = A[2] ^ a;
+ A[4] = A[2] << 1; A[5] = A[4] ^ a;
+ A[6] = A[3] << 1; A[7] = A[6] ^ a;
+ A[8] = A[4] << 1; A[9] = A[8] ^ a;
+ A[10] = A[5] << 1; A[11] = A[10] ^ a;
+ A[12] = A[6] << 1; A[13] = A[12] ^ a;
+ A[14] = A[7] << 1; A[15] = A[14] ^ a;
+
+ lo = (A[b >> 60] << 4) ^ A[(b >> 56) & 15];
+ hi = lo >> 56;
+ lo = (lo << 8) ^ (A[(b >> 52) & 15] << 4) ^ A[(b >> 48) & 15];
+ hi = (hi << 8) | (lo >> 56);
+ lo = (lo << 8) ^ (A[(b >> 44) & 15] << 4) ^ A[(b >> 40) & 15];
+ hi = (hi << 8) | (lo >> 56);
+ lo = (lo << 8) ^ (A[(b >> 36) & 15] << 4) ^ A[(b >> 32) & 15];
+ hi = (hi << 8) | (lo >> 56);
+ lo = (lo << 8) ^ (A[(b >> 28) & 15] << 4) ^ A[(b >> 24) & 15];
+ hi = (hi << 8) | (lo >> 56);
+ lo = (lo << 8) ^ (A[(b >> 20) & 15] << 4) ^ A[(b >> 16) & 15];
+ hi = (hi << 8) | (lo >> 56);
+ lo = (lo << 8) ^ (A[(b >> 12) & 15] << 4) ^ A[(b >> 8) & 15];
+ hi = (hi << 8) | (lo >> 56);
+ lo = (lo << 8) ^ (A[(b >> 4) & 15] << 4) ^ A[b & 15];
+
+ ULong m0 = -1;
+ m0 /= 255;
+ tmp = -((a >> 63) & 1); tmp &= ((b & (m0 * 0xfe)) >> 1); hi = hi ^ tmp;
+ tmp = -((a >> 62) & 1); tmp &= ((b & (m0 * 0xfc)) >> 2); hi = hi ^ tmp;
+ tmp = -((a >> 61) & 1); tmp &= ((b & (m0 * 0xf8)) >> 3); hi = hi ^ tmp;
+ tmp = -((a >> 60) & 1); tmp &= ((b & (m0 * 0xf0)) >> 4); hi = hi ^ tmp;
+ tmp = -((a >> 59) & 1); tmp &= ((b & (m0 * 0xe0)) >> 5); hi = hi ^ tmp;
+ tmp = -((a >> 58) & 1); tmp &= ((b & (m0 * 0xc0)) >> 6); hi = hi ^ tmp;
+ tmp = -((a >> 57) & 1); tmp &= ((b & (m0 * 0x80)) >> 7); hi = hi ^ tmp;
+
+ return which ? hi : lo;
+}
+
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (non-referentially-transparent) */
+/* Horrible hack. On non-amd64 platforms, return 1. */
+ULong amd64g_dirtyhelper_RDTSC ( void )
+{
+# if defined(__x86_64__)
+ UInt eax, edx;
+ __asm__ __volatile__("rdtsc" : "=a" (eax), "=d" (edx));
+ return (((ULong)edx) << 32) | ((ULong)eax);
+# else
+ return 1ULL;
+# endif
+}
+
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (non-referentially-transparent) */
+/* Horrible hack. On non-amd64 platforms, return 0. */
+ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ )
+{
+# if defined(__x86_64__)
+ ULong r = 0;
+ portno &= 0xFFFF;
+ switch (sz) {
+ case 4:
+ __asm__ __volatile__("movq $0,%%rax; inl %w1,%%eax; movq %%rax,%0"
+ : "=a" (r) : "Nd" (portno));
+ break;
+ case 2:
+ __asm__ __volatile__("movq $0,%%rax; inw %w1,%w0"
+ : "=a" (r) : "Nd" (portno));
+ break;
+ case 1:
+ __asm__ __volatile__("movq $0,%%rax; inb %w1,%b0"
+ : "=a" (r) : "Nd" (portno));
+ break;
+ default:
+ break; /* note: no 64-bit version of insn exists */
+ }
+ return r;
+# else
+ return 0;
+# endif
+}
+
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (non-referentially-transparent) */
+/* Horrible hack. On non-amd64 platforms, do nothing. */
+void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, ULong sz/*1,2 or 4*/ )
+{
+# if defined(__x86_64__)
+ portno &= 0xFFFF;
+ switch (sz) {
+ case 4:
+ __asm__ __volatile__("movq %0,%%rax; outl %%eax, %w1"
+ : : "a" (data), "Nd" (portno));
+ break;
+ case 2:
+ __asm__ __volatile__("outw %w0, %w1"
+ : : "a" (data), "Nd" (portno));
+ break;
+ case 1:
+ __asm__ __volatile__("outb %b0, %w1"
+ : : "a" (data), "Nd" (portno));
+ break;
+ default:
+ break; /* note: no 64-bit version of insn exists */
+ }
+# else
+ /* do nothing */
+# endif
+}
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (non-referentially-transparent) */
+/* Horrible hack. On non-amd64 platforms, do nothing. */
+/* op = 0: call the native SGDT instruction.
+ op = 1: call the native SIDT instruction.
+*/
+void amd64g_dirtyhelper_SxDT ( void *address, ULong op ) {
+# if defined(__x86_64__)
+ switch (op) {
+ case 0:
+ __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
+ break;
+ case 1:
+ __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
+ break;
+ default:
+ vpanic("amd64g_dirtyhelper_SxDT");
+ }
+# else
+ /* do nothing */
+ UChar* p = (UChar*)address;
+ p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
+ p[6] = p[7] = p[8] = p[9] = 0;
+# endif
+}
+
+/*---------------------------------------------------------------*/
+/*--- Helpers for MMX/SSE/SSE2. ---*/
+/*---------------------------------------------------------------*/
+
+static inline UChar abdU8 ( UChar xx, UChar yy ) {
+ return toUChar(xx>yy ? xx-yy : yy-xx);
+}
+
+static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
+ return (((ULong)w1) << 32) | ((ULong)w0);
+}
+
+static inline UShort sel16x4_3 ( ULong w64 ) {
+ UInt hi32 = toUInt(w64 >> 32);
+ return toUShort(hi32 >> 16);
+}
+static inline UShort sel16x4_2 ( ULong w64 ) {
+ UInt hi32 = toUInt(w64 >> 32);
+ return toUShort(hi32);
+}
+static inline UShort sel16x4_1 ( ULong w64 ) {
+ UInt lo32 = toUInt(w64);
+ return toUShort(lo32 >> 16);
+}
+static inline UShort sel16x4_0 ( ULong w64 ) {
+ UInt lo32 = toUInt(w64);
+ return toUShort(lo32);
+}
+
+static inline UChar sel8x8_7 ( ULong w64 ) {
+ UInt hi32 = toUInt(w64 >> 32);
+ return toUChar(hi32 >> 24);
+}
+static inline UChar sel8x8_6 ( ULong w64 ) {
+ UInt hi32 = toUInt(w64 >> 32);
+ return toUChar(hi32 >> 16);
+}
+static inline UChar sel8x8_5 ( ULong w64 ) {
+ UInt hi32 = toUInt(w64 >> 32);
+ return toUChar(hi32 >> 8);
+}
+static inline UChar sel8x8_4 ( ULong w64 ) {
+ UInt hi32 = toUInt(w64 >> 32);
+ return toUChar(hi32 >> 0);
+}
+static inline UChar sel8x8_3 ( ULong w64 ) {
+ UInt lo32 = toUInt(w64);
+ return toUChar(lo32 >> 24);
+}
+static inline UChar sel8x8_2 ( ULong w64 ) {
+ UInt lo32 = toUInt(w64);
+ return toUChar(lo32 >> 16);
+}
+static inline UChar sel8x8_1 ( ULong w64 ) {
+ UInt lo32 = toUInt(w64);
+ return toUChar(lo32 >> 8);
+}
+static inline UChar sel8x8_0 ( ULong w64 ) {
+ UInt lo32 = toUInt(w64);
+ return toUChar(lo32 >> 0);
+}
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
+{
+ return
+ mk32x2(
+ (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
+ + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
+ (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
+ + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
+ );
+}
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+ULong amd64g_calculate_mmx_pmovmskb ( ULong xx )
+{
+ ULong r = 0;
+ if (xx & (1ULL << (64-1))) r |= (1<<7);
+ if (xx & (1ULL << (56-1))) r |= (1<<6);
+ if (xx & (1ULL << (48-1))) r |= (1<<5);
+ if (xx & (1ULL << (40-1))) r |= (1<<4);
+ if (xx & (1ULL << (32-1))) r |= (1<<3);
+ if (xx & (1ULL << (24-1))) r |= (1<<2);
+ if (xx & (1ULL << (16-1))) r |= (1<<1);
+ if (xx & (1ULL << ( 8-1))) r |= (1<<0);
+ return r;
+}
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy )
+{
+ UInt t = 0;
+ t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
+ t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
+ t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
+ t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
+ t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
+ t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
+ t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
+ t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
+ t &= 0xFFFF;
+ return (ULong)t;
+}
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+ULong amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo )
+{
+ ULong rHi8 = amd64g_calculate_mmx_pmovmskb ( w64hi );
+ ULong rLo8 = amd64g_calculate_mmx_pmovmskb ( w64lo );
+ return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF);
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M} ---*/
+/*---------------------------------------------------------------*/
+
+static UInt zmask_from_V128 ( V128* arg )
+{
+ UInt i, res = 0;
+ for (i = 0; i < 16; i++) {
+ res |= ((arg->w8[i] == 0) ? 1 : 0) << i;
+ }
+ return res;
+}
+
+/* Helps with PCMP{I,E}STR{I,M}.
+
+ CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really,
+ actually it could be a clean helper, but for the fact that we can't
+ pass by value 2 x V128 to a clean helper, nor have one returned.)
+ Reads guest state, writes to guest state for the xSTRM cases, no
+ accesses of memory, is a pure function.
+
+ opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so
+ the callee knows which I/E and I/M variant it is dealing with and
+ what the specific operation is. 4th byte of opcode is in the range
+ 0x60 to 0x63:
+ istri 66 0F 3A 63
+ istrm 66 0F 3A 62
+ estri 66 0F 3A 61
+ estrm 66 0F 3A 60
+
+ gstOffL and gstOffR are the guest state offsets for the two XMM
+ register inputs. We never have to deal with the memory case since
+ that is handled by pre-loading the relevant value into the fake
+ XMM16 register.
+
+ For ESTRx variants, edxIN and eaxIN hold the values of those two
+ registers.
+
+ In all cases, the bottom 16 bits of the result contain the new
+ OSZACP %rflags values. For xSTRI variants, bits[31:16] of the
+ result hold the new %ecx value. For xSTRM variants, the helper
+ writes the result directly to the guest XMM0.
+
+ Declarable side effects: in all cases, reads guest state at
+ [gstOffL, +16) and [gstOffR, +16). For xSTRM variants, also writes
+ guest_XMM0.
+
+ Is expected to be called with opc_and_imm combinations which have
+ actually been validated, and will assert if otherwise. The front
+ end should ensure we're only called with verified values.
+*/
+ULong amd64g_dirtyhelper_PCMPxSTRx (
+ VexGuestAMD64State* gst,
+ HWord opc4_and_imm,
+ HWord gstOffL, HWord gstOffR,
+ HWord edxIN, HWord eaxIN
+ )
+{
+ HWord opc4 = (opc4_and_imm >> 8) & 0xFF;
+ HWord imm8 = opc4_and_imm & 0xFF;
+ HWord isISTRx = opc4 & 2;
+ HWord isxSTRM = (opc4 & 1) ^ 1;
+ vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */
+ vassert((imm8 & 1) == 0); /* we support byte-size cases only */
+
+ // where the args are
+ V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
+ V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
+
+ /* Create the arg validity masks, either from the vectors
+ themselves or from the supplied edx/eax values. */
+ // FIXME: this is only right for the 8-bit data cases.
+ // At least that is asserted above.
+ UInt zmaskL, zmaskR;
+ if (isISTRx) {
+ zmaskL = zmask_from_V128(argL);
+ zmaskR = zmask_from_V128(argR);
+ } else {
+ Int tmp;
+ tmp = edxIN & 0xFFFFFFFF;
+ if (tmp < -16) tmp = -16;
+ if (tmp > 16) tmp = 16;
+ if (tmp < 0) tmp = -tmp;
+ vassert(tmp >= 0 && tmp <= 16);
+ zmaskL = (1 << tmp) & 0xFFFF;
+ tmp = eaxIN & 0xFFFFFFFF;
+ if (tmp < -16) tmp = -16;
+ if (tmp > 16) tmp = 16;
+ if (tmp < 0) tmp = -tmp;
+ vassert(tmp >= 0 && tmp <= 16);
+ zmaskR = (1 << tmp) & 0xFFFF;
+ }
+
+ // temp spot for the resulting flags and vector.
+ V128 resV;
+ UInt resOSZACP;
+
+ // do the meyaath
+ Bool ok = compute_PCMPxSTRx (
+ &resV, &resOSZACP, argL, argR,
+ zmaskL, zmaskR, imm8, (Bool)isxSTRM
+ );
+
+ // front end shouldn't pass us any imm8 variants we can't
+ // handle. Hence:
+ vassert(ok);
+
+ // So, finally we need to get the results back to the caller.
+ // In all cases, the new OSZACP value is the lowest 16 of
+ // the return value.
+ if (isxSTRM) {
+ /* gst->guest_XMM0 = resV; */ // gcc don't like that
+ gst->guest_XMM0[0] = resV.w32[0];
+ gst->guest_XMM0[1] = resV.w32[1];
+ gst->guest_XMM0[2] = resV.w32[2];
+ gst->guest_XMM0[3] = resV.w32[3];
+ return resOSZACP & 0x8D5;
+ } else {
+ UInt newECX = resV.w32[0] & 0xFFFF;
+ return (newECX << 16) | (resOSZACP & 0x8D5);
+ }
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Helpers for dealing with, and describing, ---*/
+/*--- guest state as a whole. ---*/
+/*---------------------------------------------------------------*/
+
+/* Initialise the entire amd64 guest state. */
+/* VISIBLE TO LIBVEX CLIENT */
+void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state )
+{
+ vex_state->guest_RAX = 0;
+ vex_state->guest_RCX = 0;
+ vex_state->guest_RDX = 0;
+ vex_state->guest_RBX = 0;
+ vex_state->guest_RSP = 0;
+ vex_state->guest_RBP = 0;
+ vex_state->guest_RSI = 0;
+ vex_state->guest_RDI = 0;
+ vex_state->guest_R8 = 0;
+ vex_state->guest_R9 = 0;
+ vex_state->guest_R10 = 0;
+ vex_state->guest_R11 = 0;
+ vex_state->guest_R12 = 0;
+ vex_state->guest_R13 = 0;
+ vex_state->guest_R14 = 0;
+ vex_state->guest_R15 = 0;
+
+ vex_state->guest_CC_OP = AMD64G_CC_OP_COPY;
+ vex_state->guest_CC_DEP1 = 0;
+ vex_state->guest_CC_DEP2 = 0;
+ vex_state->guest_CC_NDEP = 0;
+
+ vex_state->guest_DFLAG = 1; /* forwards */
+ vex_state->guest_IDFLAG = 0;
+
+ /* HACK: represent the offset associated with %fs==0. This
+ assumes that %fs is only ever zero. */
+ vex_state->guest_FS_ZERO = 0;
+
+ vex_state->guest_RIP = 0;
+
+ /* Initialise the simulated FPU */
+ amd64g_dirtyhelper_FINIT( vex_state );
+
+ /* Initialise the SSE state. */
+# define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
+
+ vex_state->guest_SSEROUND = (ULong)Irrm_NEAREST;
+ SSEZERO(vex_state->guest_XMM0);
+ SSEZERO(vex_state->guest_XMM1);
+ SSEZERO(vex_state->guest_XMM2);
+ SSEZERO(vex_state->guest_XMM3);
+ SSEZERO(vex_state->guest_XMM4);
+ SSEZERO(vex_state->guest_XMM5);
+ SSEZERO(vex_state->guest_XMM6);
+ SSEZERO(vex_state->guest_XMM7);
+ SSEZERO(vex_state->guest_XMM8);
+ SSEZERO(vex_state->guest_XMM9);
+ SSEZERO(vex_state->guest_XMM10);
+ SSEZERO(vex_state->guest_XMM11);
+ SSEZERO(vex_state->guest_XMM12);
+ SSEZERO(vex_state->guest_XMM13);
+ SSEZERO(vex_state->guest_XMM14);
+ SSEZERO(vex_state->guest_XMM15);
+ SSEZERO(vex_state->guest_XMM16);
+
+# undef SSEZERO
+
+ vex_state->guest_EMWARN = EmWarn_NONE;
+
+ /* These should not ever be either read or written, but we
+ initialise them anyway. */
+ vex_state->guest_TISTART = 0;
+ vex_state->guest_TILEN = 0;
+
+ vex_state->guest_NRADDR = 0;
+ vex_state->guest_SC_CLASS = 0;
+ vex_state->guest_GS_0x60 = 0;
+
+ vex_state->guest_IP_AT_SYSCALL = 0;
+ /* vex_state->padding = 0; */
+}
+
+
+/* Figure out if any part of the guest state contained in minoff
+ .. maxoff requires precise memory exceptions. If in doubt return
+ True (but this is generates significantly slower code).
+
+ By default we enforce precise exns for guest %RSP, %RBP and %RIP
+ only. These are the minimum needed to extract correct stack
+ backtraces from amd64 code.
+*/
+Bool guest_amd64_state_requires_precise_mem_exns ( Int minoff,
+ Int maxoff)
+{
+ Int rbp_min = offsetof(VexGuestAMD64State, guest_RBP);
+ Int rbp_max = rbp_min + 8 - 1;
+ Int rsp_min = offsetof(VexGuestAMD64State, guest_RSP);
+ Int rsp_max = rsp_min + 8 - 1;
+ Int rip_min = offsetof(VexGuestAMD64State, guest_RIP);
+ Int rip_max = rip_min + 8 - 1;
+
+ if (maxoff < rbp_min || minoff > rbp_max) {
+ /* no overlap with rbp */
+ } else {
+ return True;
+ }
+
+ if (maxoff < rsp_min || minoff > rsp_max) {
+ /* no overlap with rsp */
+ } else {
+ return True;
+ }
+
+ if (maxoff < rip_min || minoff > rip_max) {
+ /* no overlap with eip */
+ } else {
+ return True;
+ }
+
+ return False;
+}
+
+
+#define ALWAYSDEFD(field) \
+ { offsetof(VexGuestAMD64State, field), \
+ (sizeof ((VexGuestAMD64State*)0)->field) }
+
+VexGuestLayout
+ amd64guest_layout
+ = {
+ /* Total size of the guest state, in bytes. */
+ .total_sizeB = sizeof(VexGuestAMD64State),
+
+ /* Describe the stack pointer. */
+ .offset_SP = offsetof(VexGuestAMD64State,guest_RSP),
+ .sizeof_SP = 8,
+
+ /* Describe the frame pointer. */
+ .offset_FP = offsetof(VexGuestAMD64State,guest_RBP),
+ .sizeof_FP = 8,
+
+ /* Describe the instruction pointer. */
+ .offset_IP = offsetof(VexGuestAMD64State,guest_RIP),
+ .sizeof_IP = 8,
+
+ /* Describe any sections to be regarded by Memcheck as
+ 'always-defined'. */
+ .n_alwaysDefd = 16,
+
+ /* flags thunk: OP and NDEP are always defd, whereas DEP1
+ and DEP2 have to be tracked. See detailed comment in
+ gdefs.h on meaning of thunk fields. */
+ .alwaysDefd
+ = { /* 0 */ ALWAYSDEFD(guest_CC_OP),
+ /* 1 */ ALWAYSDEFD(guest_CC_NDEP),
+ /* 2 */ ALWAYSDEFD(guest_DFLAG),
+ /* 3 */ ALWAYSDEFD(guest_IDFLAG),
+ /* 4 */ ALWAYSDEFD(guest_RIP),
+ /* 5 */ ALWAYSDEFD(guest_FS_ZERO),
+ /* 6 */ ALWAYSDEFD(guest_FTOP),
+ /* 7 */ ALWAYSDEFD(guest_FPTAG),
+ /* 8 */ ALWAYSDEFD(guest_FPROUND),
+ /* 9 */ ALWAYSDEFD(guest_FC3210),
+ // /* */ ALWAYSDEFD(guest_CS),
+ // /* */ ALWAYSDEFD(guest_DS),
+ // /* */ ALWAYSDEFD(guest_ES),
+ // /* */ ALWAYSDEFD(guest_FS),
+ // /* */ ALWAYSDEFD(guest_GS),
+ // /* */ ALWAYSDEFD(guest_SS),
+ // /* */ ALWAYSDEFD(guest_LDT),
+ // /* */ ALWAYSDEFD(guest_GDT),
+ /* 10 */ ALWAYSDEFD(guest_EMWARN),
+ /* 11 */ ALWAYSDEFD(guest_SSEROUND),
+ /* 12 */ ALWAYSDEFD(guest_TISTART),
+ /* 13 */ ALWAYSDEFD(guest_TILEN),
+ /* 14 */ ALWAYSDEFD(guest_SC_CLASS),
+ /* 15 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
+ }
+ };
+
+
+/*---------------------------------------------------------------*/
+/*--- end guest_amd64_helpers.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c
new file mode 100644
index 0000000..79b1269
--- /dev/null
+++ b/VEX/priv/guest_amd64_toIR.c
@@ -0,0 +1,18294 @@
+
+/*--------------------------------------------------------------------*/
+/*--- begin guest_amd64_toIR.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+/* Translates AMD64 code to IR. */
+
+/* TODO:
+
+ All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
+ to ensure a 64-bit value is being written.
+
+ x87 FP Limitations:
+
+ * all arithmetic done at 64 bits
+
+ * no FP exceptions, except for handling stack over/underflow
+
+ * FP rounding mode observed only for float->int conversions and
+ int->float conversions which could lose accuracy, and for
+ float-to-float rounding. For all other operations,
+ round-to-nearest is used, regardless.
+
+ * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the
+ simulation claims the argument is in-range (-2^63 <= arg <= 2^63)
+ even when it isn't.
+
+ * some of the FCOM cases could do with testing -- not convinced
+ that the args are the right way round.
+
+ * FSAVE does not re-initialise the FPU; it should do
+
+ * FINIT not only initialises the FPU environment, it also zeroes
+ all the FP registers. It should leave the registers unchanged.
+
+ RDTSC returns zero, always.
+
+ SAHF should cause eflags[1] == 1, and in fact it produces 0. As
+ per Intel docs this bit has no meaning anyway. Since PUSHF is the
+ only way to observe eflags[1], a proper fix would be to make that
+ bit be set by PUSHF.
+
+ This module uses global variables and so is not MT-safe (if that
+ should ever become relevant).
+*/
+
+/* Notes re address size overrides (0x67).
+
+ According to the AMD documentation (24594 Rev 3.09, Sept 2003,
+ "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose
+ and System Instructions"), Section 1.2.3 ("Address-Size Override
+ Prefix"):
+
+ 0x67 applies to all explicit memory references, causing the top
+ 32 bits of the effective address to become zero.
+
+ 0x67 has no effect on stack references (push/pop); these always
+ use a 64-bit address.
+
+ 0x67 changes the interpretation of instructions which implicitly
+ reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used
+ instead. These are:
+
+ cmp{s,sb,sw,sd,sq}
+ in{s,sb,sw,sd}
+ jcxz, jecxz, jrcxz
+ lod{s,sb,sw,sd,sq}
+ loop{,e,bz,be,z}
+ mov{s,sb,sw,sd,sq}
+ out{s,sb,sw,sd}
+ rep{,e,ne,nz}
+ sca{s,sb,sw,sd,sq}
+ sto{s,sb,sw,sd,sq}
+ xlat{,b} */
+
+/* "Special" instructions.
+
+ This instruction decoder can decode three special instructions
+ which mean nothing natively (are no-ops as far as regs/mem are
+ concerned) but have meaning for supporting Valgrind. A special
+ instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D
+ 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq
+ $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi).
+ Following that, one of the following 3 are allowed (standard
+ interpretation in parentheses):
+
+ 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX )
+ 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR
+ 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX
+
+ Any other bytes following the 16-byte preamble are illegal and
+ constitute a failure in instruction decoding. This all assumes
+ that the preamble will never occur except in specific code
+ fragments designed for Valgrind to catch.
+
+ No prefixes may precede a "Special" instruction.
+*/
+
+/* casLE (implementation of lock-prefixed insns) and rep-prefixed
+ insns: the side-exit back to the start of the insn is done with
+ Ijk_Boring. This is quite wrong, it should be done with
+ Ijk_NoRedir, since otherwise the side exit, which is intended to
+ restart the instruction for whatever reason, could go somewhere
+ entirely else. Doing it right (with Ijk_NoRedir jumps) would make
+ no-redir jumps performance critical, at least for rep-prefixed
+ instructions, since all iterations thereof would involve such a
+ jump. It's not such a big deal with casLE since the side exit is
+ only taken if the CAS fails, that is, the location is contended,
+ which is relatively unlikely.
+
+ Note also, the test for CAS success vs failure is done using
+ Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
+ Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
+ shouldn't definedness-check these comparisons. See
+ COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
+ background/rationale.
+*/
+
+/* LOCK prefixed instructions. These are translated using IR-level
+ CAS statements (IRCAS) and are believed to preserve atomicity, even
+ from the point of view of some other process racing against a
+ simulated one (presumably they communicate via a shared memory
+ segment).
+
+ Handlers which are aware of LOCK prefixes are:
+ dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
+ dis_cmpxchg_G_E (cmpxchg)
+ dis_Grp1 (add, or, adc, sbb, and, sub, xor)
+ dis_Grp3 (not, neg)
+ dis_Grp4 (inc, dec)
+ dis_Grp5 (inc, dec)
+ dis_Grp8_Imm (bts, btc, btr)
+ dis_bt_G_E (bts, btc, btr)
+ dis_xadd_G_E (xadd)
+*/
+
+
+#include "libvex_basictypes.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+#include "libvex_guest_amd64.h"
+
+#include "main_util.h"
+#include "main_globals.h"
+#include "guest_generic_bb_to_IR.h"
+#include "guest_generic_x87.h"
+#include "guest_amd64_defs.h"
+
+
+/*------------------------------------------------------------*/
+/*--- Globals ---*/
+/*------------------------------------------------------------*/
+
+/* These are set at the start of the translation of an insn, right
+ down in disInstr_AMD64, so that we don't have to pass them around
+ endlessly. They are all constant during the translation of any
+ given insn. */
+
+/* These are set at the start of the translation of a BB, so
+ that we don't have to pass them around endlessly. */
+
+/* We need to know this to do sub-register accesses correctly. */
+static Bool host_is_bigendian;
+
+/* Pointer to the guest code area (points to start of BB, not to the
+ insn being processed). */
+static UChar* guest_code;
+
+/* The guest address corresponding to guest_code[0]. */
+static Addr64 guest_RIP_bbstart;
+
+/* The guest address for the instruction currently being
+ translated. */
+static Addr64 guest_RIP_curr_instr;
+
+/* The IRSB* into which we're generating code. */
+static IRSB* irsb;
+
+/* For ensuring that %rip-relative addressing is done right. A read
+ of %rip generates the address of the next instruction. It may be
+ that we don't conveniently know that inside disAMode(). For sanity
+ checking, if the next insn %rip is needed, we make a guess at what
+ it is, record that guess here, and set the accompanying Bool to
+ indicate that -- after this insn's decode is finished -- that guess
+ needs to be checked. */
+
+/* At the start of each insn decode, is set to (0, False).
+ After the decode, if _mustcheck is now True, _assumed is
+ checked. */
+
+static Addr64 guest_RIP_next_assumed;
+static Bool guest_RIP_next_mustcheck;
+
+
+/*------------------------------------------------------------*/
+/*--- Helpers for constructing IR. ---*/
+/*------------------------------------------------------------*/
+
+/* Generate a new temporary of the given type. */
+static IRTemp newTemp ( IRType ty )
+{
+ vassert(isPlausibleIRType(ty));
+ return newIRTemp( irsb->tyenv, ty );
+}
+
+/* Add a statement to the list held by "irsb". */
+static void stmt ( IRStmt* st )
+{
+ addStmtToIRSB( irsb, st );
+}
+
+/* Generate a statement "dst := e". */
+static void assign ( IRTemp dst, IRExpr* e )
+{
+ stmt( IRStmt_WrTmp(dst, e) );
+}
+
+static IRExpr* unop ( IROp op, IRExpr* a )
+{
+ return IRExpr_Unop(op, a);
+}
+
+static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
+{
+ return IRExpr_Binop(op, a1, a2);
+}
+
+static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
+{
+ return IRExpr_Triop(op, a1, a2, a3);
+}
+
+static IRExpr* mkexpr ( IRTemp tmp )
+{
+ return IRExpr_RdTmp(tmp);
+}
+
+static IRExpr* mkU8 ( ULong i )
+{
+ vassert(i < 256);
+ return IRExpr_Const(IRConst_U8( (UChar)i ));
+}
+
+static IRExpr* mkU16 ( ULong i )
+{
+ vassert(i < 0x10000ULL);
+ return IRExpr_Const(IRConst_U16( (UShort)i ));
+}
+
+static IRExpr* mkU32 ( ULong i )
+{
+ vassert(i < 0x100000000ULL);
+ return IRExpr_Const(IRConst_U32( (UInt)i ));
+}
+
+static IRExpr* mkU64 ( ULong i )
+{
+ return IRExpr_Const(IRConst_U64(i));
+}
+
+static IRExpr* mkU ( IRType ty, ULong i )
+{
+ switch (ty) {
+ case Ity_I8: return mkU8(i);
+ case Ity_I16: return mkU16(i);
+ case Ity_I32: return mkU32(i);
+ case Ity_I64: return mkU64(i);
+ default: vpanic("mkU(amd64)");
+ }
+}
+
+static void storeLE ( IRExpr* addr, IRExpr* data )
+{
+ stmt( IRStmt_Store(Iend_LE, addr, data) );
+}
+
+static IRExpr* loadLE ( IRType ty, IRExpr* addr )
+{
+ return IRExpr_Load(Iend_LE, ty, addr);
+}
+
+static IROp mkSizedOp ( IRType ty, IROp op8 )
+{
+ vassert(op8 == Iop_Add8 || op8 == Iop_Sub8
+ || op8 == Iop_Mul8
+ || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8
+ || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8
+ || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8
+ || op8 == Iop_CasCmpNE8
+ || op8 == Iop_Not8 );
+ switch (ty) {
+ case Ity_I8: return 0 +op8;
+ case Ity_I16: return 1 +op8;
+ case Ity_I32: return 2 +op8;
+ case Ity_I64: return 3 +op8;
+ default: vpanic("mkSizedOp(amd64)");
+ }
+}
+
+static
+IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src )
+{
+ if (szSmall == 1 && szBig == 4) {
+ return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src);
+ }
+ if (szSmall == 1 && szBig == 2) {
+ return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src);
+ }
+ if (szSmall == 2 && szBig == 4) {
+ return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src);
+ }
+ if (szSmall == 1 && szBig == 8 && !signd) {
+ return unop(Iop_8Uto64, src);
+ }
+ if (szSmall == 1 && szBig == 8 && signd) {
+ return unop(Iop_8Sto64, src);
+ }
+ if (szSmall == 2 && szBig == 8 && !signd) {
+ return unop(Iop_16Uto64, src);
+ }
+ if (szSmall == 2 && szBig == 8 && signd) {
+ return unop(Iop_16Sto64, src);
+ }
+ vpanic("doScalarWidening(amd64)");
+}
+
+
+
+/*------------------------------------------------------------*/
+/*--- Debugging output ---*/
+/*------------------------------------------------------------*/
+
+/* Bomb out if we can't handle something. */
+__attribute__ ((noreturn))
+static void unimplemented ( HChar* str )
+{
+ vex_printf("amd64toIR: unimplemented feature\n");
+ vpanic(str);
+}
+
+#define DIP(format, args...) \
+ if (vex_traceflags & VEX_TRACE_FE) \
+ vex_printf(format, ## args)
+
+#define DIS(buf, format, args...) \
+ if (vex_traceflags & VEX_TRACE_FE) \
+ vex_sprintf(buf, format, ## args)
+
+
+/*------------------------------------------------------------*/
+/*--- Offsets of various parts of the amd64 guest state. ---*/
+/*------------------------------------------------------------*/
+
+#define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX)
+#define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX)
+#define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX)
+#define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX)
+#define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP)
+#define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP)
+#define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI)
+#define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI)
+#define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8)
+#define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9)
+#define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10)
+#define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11)
+#define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12)
+#define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13)
+#define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14)
+#define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15)
+
+#define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP)
+
+#define OFFB_FS_ZERO offsetof(VexGuestAMD64State,guest_FS_ZERO)
+#define OFFB_GS_0x60 offsetof(VexGuestAMD64State,guest_GS_0x60)
+
+#define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP)
+#define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1)
+#define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2)
+#define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP)
+
+#define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0])
+#define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0])
+#define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG)
+#define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG)
+#define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG)
+#define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP)
+#define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210)
+#define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND)
+//..
+//.. #define OFFB_CS offsetof(VexGuestX86State,guest_CS)
+//.. #define OFFB_DS offsetof(VexGuestX86State,guest_DS)
+//.. #define OFFB_ES offsetof(VexGuestX86State,guest_ES)
+//.. #define OFFB_FS offsetof(VexGuestX86State,guest_FS)
+//.. #define OFFB_GS offsetof(VexGuestX86State,guest_GS)
+//.. #define OFFB_SS offsetof(VexGuestX86State,guest_SS)
+//.. #define OFFB_LDT offsetof(VexGuestX86State,guest_LDT)
+//.. #define OFFB_GDT offsetof(VexGuestX86State,guest_GDT)
+
+#define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND)
+#define OFFB_XMM0 offsetof(VexGuestAMD64State,guest_XMM0)
+#define OFFB_XMM1 offsetof(VexGuestAMD64State,guest_XMM1)
+#define OFFB_XMM2 offsetof(VexGuestAMD64State,guest_XMM2)
+#define OFFB_XMM3 offsetof(VexGuestAMD64State,guest_XMM3)
+#define OFFB_XMM4 offsetof(VexGuestAMD64State,guest_XMM4)
+#define OFFB_XMM5 offsetof(VexGuestAMD64State,guest_XMM5)
+#define OFFB_XMM6 offsetof(VexGuestAMD64State,guest_XMM6)
+#define OFFB_XMM7 offsetof(VexGuestAMD64State,guest_XMM7)
+#define OFFB_XMM8 offsetof(VexGuestAMD64State,guest_XMM8)
+#define OFFB_XMM9 offsetof(VexGuestAMD64State,guest_XMM9)
+#define OFFB_XMM10 offsetof(VexGuestAMD64State,guest_XMM10)
+#define OFFB_XMM11 offsetof(VexGuestAMD64State,guest_XMM11)
+#define OFFB_XMM12 offsetof(VexGuestAMD64State,guest_XMM12)
+#define OFFB_XMM13 offsetof(VexGuestAMD64State,guest_XMM13)
+#define OFFB_XMM14 offsetof(VexGuestAMD64State,guest_XMM14)
+#define OFFB_XMM15 offsetof(VexGuestAMD64State,guest_XMM15)
+#define OFFB_XMM16 offsetof(VexGuestAMD64State,guest_XMM16)
+
+#define OFFB_EMWARN offsetof(VexGuestAMD64State,guest_EMWARN)
+#define OFFB_TISTART offsetof(VexGuestAMD64State,guest_TISTART)
+#define OFFB_TILEN offsetof(VexGuestAMD64State,guest_TILEN)
+
+#define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR)
+
+
+/*------------------------------------------------------------*/
+/*--- Helper bits and pieces for deconstructing the ---*/
+/*--- amd64 insn stream. ---*/
+/*------------------------------------------------------------*/
+
+/* This is the AMD64 register encoding -- integer regs. */
+#define R_RAX 0
+#define R_RCX 1
+#define R_RDX 2
+#define R_RBX 3
+#define R_RSP 4
+#define R_RBP 5
+#define R_RSI 6
+#define R_RDI 7
+#define R_R8 8
+#define R_R9 9
+#define R_R10 10
+#define R_R11 11
+#define R_R12 12
+#define R_R13 13
+#define R_R14 14
+#define R_R15 15
+
+//.. #define R_AL (0+R_EAX)
+//.. #define R_AH (4+R_EAX)
+
+/* This is the Intel register encoding -- segment regs. */
+#define R_ES 0
+#define R_CS 1
+#define R_SS 2
+#define R_DS 3
+#define R_FS 4
+#define R_GS 5
+
+
+/* Various simple conversions */
+
+static ULong extend_s_8to64 ( UChar x )
+{
+ return (ULong)((((Long)x) << 56) >> 56);
+}
+
+static ULong extend_s_16to64 ( UShort x )
+{
+ return (ULong)((((Long)x) << 48) >> 48);
+}
+
+static ULong extend_s_32to64 ( UInt x )
+{
+ return (ULong)((((Long)x) << 32) >> 32);
+}
+
+/* Figure out whether the mod and rm parts of a modRM byte refer to a
+ register or memory. If so, the byte will have the form 11XXXYYY,
+ where YYY is the register number. */
+inline
+static Bool epartIsReg ( UChar mod_reg_rm )
+{
+ return toBool(0xC0 == (mod_reg_rm & 0xC0));
+}
+
+/* Extract the 'g' field from a modRM byte. This only produces 3
+ bits, which is not a complete register number. You should avoid
+ this function if at all possible. */
+inline
+static Int gregLO3ofRM ( UChar mod_reg_rm )
+{
+ return (Int)( (mod_reg_rm >> 3) & 7 );
+}
+
+/* Ditto the 'e' field of a modRM byte. */
+inline
+static Int eregLO3ofRM ( UChar mod_reg_rm )
+{
+ return (Int)(mod_reg_rm & 0x7);
+}
+
+/* Get a 8/16/32-bit unsigned value out of the insn stream. */
+
+static UChar getUChar ( Long delta )
+{
+ UChar v = guest_code[delta+0];
+ return v;
+}
+
+static UInt getUDisp16 ( Long delta )
+{
+ UInt v = guest_code[delta+1]; v <<= 8;
+ v |= guest_code[delta+0];
+ return v & 0xFFFF;
+}
+
+//.. static UInt getUDisp ( Int size, Long delta )
+//.. {
+//.. switch (size) {
+//.. case 4: return getUDisp32(delta);
+//.. case 2: return getUDisp16(delta);
+//.. case 1: return getUChar(delta);
+//.. default: vpanic("getUDisp(x86)");
+//.. }
+//.. return 0; /*notreached*/
+//.. }
+
+
+/* Get a byte value out of the insn stream and sign-extend to 64
+ bits. */
+static Long getSDisp8 ( Long delta )
+{
+ return extend_s_8to64( guest_code[delta] );
+}
+
+/* Get a 16-bit value out of the insn stream and sign-extend to 64
+ bits. */
+static Long getSDisp16 ( Long delta )
+{
+ UInt v = guest_code[delta+1]; v <<= 8;
+ v |= guest_code[delta+0];
+ return extend_s_16to64( (UShort)v );
+}
+
+/* Get a 32-bit value out of the insn stream and sign-extend to 64
+ bits. */
+static Long getSDisp32 ( Long delta )
+{
+ UInt v = guest_code[delta+3]; v <<= 8;
+ v |= guest_code[delta+2]; v <<= 8;
+ v |= guest_code[delta+1]; v <<= 8;
+ v |= guest_code[delta+0];
+ return extend_s_32to64( v );
+}
+
+/* Get a 64-bit value out of the insn stream. */
+static Long getDisp64 ( Long delta )
+{
+ ULong v = 0;
+ v |= guest_code[delta+7]; v <<= 8;
+ v |= guest_code[delta+6]; v <<= 8;
+ v |= guest_code[delta+5]; v <<= 8;
+ v |= guest_code[delta+4]; v <<= 8;
+ v |= guest_code[delta+3]; v <<= 8;
+ v |= guest_code[delta+2]; v <<= 8;
+ v |= guest_code[delta+1]; v <<= 8;
+ v |= guest_code[delta+0];
+ return v;
+}
+
+/* Note: because AMD64 doesn't allow 64-bit literals, it is an error
+ if this is called with size==8. Should not happen. */
+static Long getSDisp ( Int size, Long delta )
+{
+ switch (size) {
+ case 4: return getSDisp32(delta);
+ case 2: return getSDisp16(delta);
+ case 1: return getSDisp8(delta);
+ default: vpanic("getSDisp(amd64)");
+ }
+}
+
+static ULong mkSizeMask ( Int sz )
+{
+ switch (sz) {
+ case 1: return 0x00000000000000FFULL;
+ case 2: return 0x000000000000FFFFULL;
+ case 4: return 0x00000000FFFFFFFFULL;
+ case 8: return 0xFFFFFFFFFFFFFFFFULL;
+ default: vpanic("mkSzMask(amd64)");
+ }
+}
+
+static Int imin ( Int a, Int b )
+{
+ return (a < b) ? a : b;
+}
+
+static IRType szToITy ( Int n )
+{
+ switch (n) {
+ case 1: return Ity_I8;
+ case 2: return Ity_I16;
+ case 4: return Ity_I32;
+ case 8: return Ity_I64;
+ default: vex_printf("\nszToITy(%d)\n", n);
+ vpanic("szToITy(amd64)");
+ }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- For dealing with prefixes. ---*/
+/*------------------------------------------------------------*/
+
+/* The idea is to pass around an int holding a bitmask summarising
+ info from the prefixes seen on the current instruction, including
+ info from the REX byte. This info is used in various places, but
+ most especially when making sense of register fields in
+ instructions.
+
+ The top 16 bits of the prefix are 0x3141, just as a hacky way
+ to ensure it really is a valid prefix.
+
+ Things you can safely assume about a well-formed prefix:
+ * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set.
+ * if REX is not present then REXW,REXR,REXX,REXB will read
+ as zero.
+ * F2 and F3 will not both be 1.
+*/
+
+typedef UInt Prefix;
+
+#define PFX_ASO (1<<0) /* address-size override present (0x67) */
+#define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */
+#define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */
+#define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */
+#define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */
+#define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */
+#define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */
+#define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */
+#define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */
+#define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */
+#define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */
+#define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */
+#define PFX_ES (1<<12) /* ES segment prefix present (0x26) */
+#define PFX_FS (1<<13) /* FS segment prefix present (0x64) */
+#define PFX_GS (1<<14) /* GS segment prefix present (0x65) */
+#define PFX_SS (1<<15) /* SS segment prefix present (0x36) */
+
+#define PFX_EMPTY 0x31410000
+
+static Bool IS_VALID_PFX ( Prefix pfx ) {
+ return toBool((pfx & 0xFFFF0000) == PFX_EMPTY);
+}
+
+static Bool haveREX ( Prefix pfx ) {
+ return toBool(pfx & PFX_REX);
+}
+
+static Int getRexW ( Prefix pfx ) {
+ return (pfx & PFX_REXW) ? 1 : 0;
+}
+/* Apparently unused.
+static Int getRexR ( Prefix pfx ) {
+ return (pfx & PFX_REXR) ? 1 : 0;
+}
+*/
+static Int getRexX ( Prefix pfx ) {
+ return (pfx & PFX_REXX) ? 1 : 0;
+}
+static Int getRexB ( Prefix pfx ) {
+ return (pfx & PFX_REXB) ? 1 : 0;
+}
+
+/* Check a prefix doesn't have F2 or F3 set in it, since usually that
+ completely changes what instruction it really is. */
+static Bool haveF2orF3 ( Prefix pfx ) {
+ return toBool((pfx & (PFX_F2|PFX_F3)) > 0);
+}
+static Bool haveF2 ( Prefix pfx ) {
+ return toBool((pfx & PFX_F2) > 0);
+}
+static Bool haveF3 ( Prefix pfx ) {
+ return toBool((pfx & PFX_F3) > 0);
+}
+
+static Bool have66 ( Prefix pfx ) {
+ return toBool((pfx & PFX_66) > 0);
+}
+static Bool haveASO ( Prefix pfx ) {
+ return toBool((pfx & PFX_ASO) > 0);
+}
+
+/* Return True iff pfx has 66 set and F2 and F3 clear */
+static Bool have66noF2noF3 ( Prefix pfx )
+{
+ return
+ toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66);
+}
+
+/* Return True iff pfx has F2 set and 66 and F3 clear */
+static Bool haveF2no66noF3 ( Prefix pfx )
+{
+ return
+ toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2);
+}
+
+/* Return True iff pfx has F3 set and 66 and F2 clear */
+static Bool haveF3no66noF2 ( Prefix pfx )
+{
+ return
+ toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3);
+}
+
+/* Return True iff pfx has F3 set and F2 clear */
+static Bool haveF3noF2 ( Prefix pfx )
+{
+ return
+ toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3);
+}
+
+/* Return True iff pfx has 66, F2 and F3 clear */
+static Bool haveNo66noF2noF3 ( Prefix pfx )
+{
+ return
+ toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0);
+}
+
+/* Return True iff pfx has any of 66, F2 and F3 set */
+static Bool have66orF2orF3 ( Prefix pfx )
+{
+ return toBool( ! haveNo66noF2noF3(pfx) );
+}
+
+/* Return True iff pfx has 66 or F2 set */
+static Bool have66orF2 ( Prefix pfx )
+{
+ return toBool((pfx & (PFX_66|PFX_F2)) > 0);
+}
+
+/* Clear all the segment-override bits in a prefix. */
+static Prefix clearSegBits ( Prefix p )
+{
+ return
+ p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS);
+}
+
+
+/*------------------------------------------------------------*/
+/*--- For dealing with integer registers ---*/
+/*------------------------------------------------------------*/
+
+/* This is somewhat complex. The rules are:
+
+ For 64, 32 and 16 bit register references, the e or g fields in the
+ modrm bytes supply the low 3 bits of the register number. The
+ fourth (most-significant) bit of the register number is supplied by
+ the REX byte, if it is present; else that bit is taken to be zero.
+
+ The REX.R bit supplies the high bit corresponding to the g register
+ field, and the REX.B bit supplies the high bit corresponding to the
+ e register field (when the mod part of modrm indicates that modrm's
+ e component refers to a register and not to memory).
+
+ The REX.X bit supplies a high register bit for certain registers
+ in SIB address modes, and is generally rarely used.
+
+ For 8 bit register references, the presence of the REX byte itself
+ has significance. If there is no REX present, then the 3-bit
+ number extracted from the modrm e or g field is treated as an index
+ into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the
+ old x86 encoding scheme.
+
+ But if there is a REX present, the register reference is
+ interpreted in the same way as for 64/32/16-bit references: a high
+ bit is extracted from REX, giving a 4-bit number, and the denoted
+ register is the lowest 8 bits of the 16 integer registers denoted
+ by the number. In particular, values 3 through 7 of this sequence
+ do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of
+ %rsp %rbp %rsi %rdi.
+
+ The REX.W bit has no bearing at all on register numbers. Instead
+ its presence indicates that the operand size is to be overridden
+ from its default value (32 bits) to 64 bits instead. This is in
+ the same fashion that an 0x66 prefix indicates the operand size is
+ to be overridden from 32 bits down to 16 bits. When both REX.W and
+ 0x66 are present there is a conflict, and REX.W takes precedence.
+
+ Rather than try to handle this complexity using a single huge
+ function, several smaller ones are provided. The aim is to make it
+ as difficult as possible to screw up register decoding in a subtle
+ and hard-to-track-down way.
+
+ Because these routines fish around in the host's memory (that is,
+ in the guest state area) for sub-parts of guest registers, their
+ correctness depends on the host's endianness. So far these
+ routines only work for little-endian hosts. Those for which
+ endianness is important have assertions to ensure sanity.
+*/
+
+
+/* About the simplest question you can ask: where do the 64-bit
+ integer registers live (in the guest state) ? */
+
+static Int integerGuestReg64Offset ( UInt reg )
+{
+ switch (reg) {
+ case R_RAX: return OFFB_RAX;
+ case R_RCX: return OFFB_RCX;
+ case R_RDX: return OFFB_RDX;
+ case R_RBX: return OFFB_RBX;
+ case R_RSP: return OFFB_RSP;
+ case R_RBP: return OFFB_RBP;
+ case R_RSI: return OFFB_RSI;
+ case R_RDI: return OFFB_RDI;
+ case R_R8: return OFFB_R8;
+ case R_R9: return OFFB_R9;
+ case R_R10: return OFFB_R10;
+ case R_R11: return OFFB_R11;
+ case R_R12: return OFFB_R12;
+ case R_R13: return OFFB_R13;
+ case R_R14: return OFFB_R14;
+ case R_R15: return OFFB_R15;
+ default: vpanic("integerGuestReg64Offset(amd64)");
+ }
+}
+
+
+/* Produce the name of an integer register, for printing purposes.
+ reg is a number in the range 0 .. 15 that has been generated from a
+ 3-bit reg-field number and a REX extension bit. irregular denotes
+ the case where sz==1 and no REX byte is present. */
+
+static
+HChar* nameIReg ( Int sz, UInt reg, Bool irregular )
+{
+ static HChar* ireg64_names[16]
+ = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
+ "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
+ static HChar* ireg32_names[16]
+ = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
+ "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" };
+ static HChar* ireg16_names[16]
+ = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di",
+ "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" };
+ static HChar* ireg8_names[16]
+ = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil",
+ "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" };
+ static HChar* ireg8_irregular[8]
+ = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" };
+
+ vassert(reg < 16);
+ if (sz == 1) {
+ if (irregular)
+ vassert(reg < 8);
+ } else {
+ vassert(irregular == False);
+ }
+
+ switch (sz) {
+ case 8: return ireg64_names[reg];
+ case 4: return ireg32_names[reg];
+ case 2: return ireg16_names[reg];
+ case 1: if (irregular) {
+ return ireg8_irregular[reg];
+ } else {
+ return ireg8_names[reg];
+ }
+ default: vpanic("nameIReg(amd64)");
+ }
+}
+
+/* Using the same argument conventions as nameIReg, produce the
+ guest state offset of an integer register. */
+
+static
+Int offsetIReg ( Int sz, UInt reg, Bool irregular )
+{
+ vassert(reg < 16);
+ if (sz == 1) {
+ if (irregular)
+ vassert(reg < 8);
+ } else {
+ vassert(irregular == False);
+ }
+
+ /* Deal with irregular case -- sz==1 and no REX present */
+ if (sz == 1 && irregular) {
+ switch (reg) {
+ case R_RSP: return 1+ OFFB_RAX;
+ case R_RBP: return 1+ OFFB_RCX;
+ case R_RSI: return 1+ OFFB_RDX;
+ case R_RDI: return 1+ OFFB_RBX;
+ default: break; /* use the normal case */
+ }
+ }
+
+ /* Normal case */
+ return integerGuestReg64Offset(reg);
+}
+
+
+/* Read the %CL register :: Ity_I8, for shift/rotate operations. */
+
+static IRExpr* getIRegCL ( void )
+{
+ vassert(!host_is_bigendian);
+ return IRExpr_Get( OFFB_RCX, Ity_I8 );
+}
+
+
+/* Write to the %AH register. */
+
+static void putIRegAH ( IRExpr* e )
+{
+ vassert(!host_is_bigendian);
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8);
+ stmt( IRStmt_Put( OFFB_RAX+1, e ) );
+}
+
+
+/* Read/write various widths of %RAX, as it has various
+ special-purpose uses. */
+
+static HChar* nameIRegRAX ( Int sz )
+{
+ switch (sz) {
+ case 1: return "%al";
+ case 2: return "%ax";
+ case 4: return "%eax";
+ case 8: return "%rax";
+ default: vpanic("nameIRegRAX(amd64)");
+ }
+}
+
+static IRExpr* getIRegRAX ( Int sz )
+{
+ vassert(!host_is_bigendian);
+ switch (sz) {
+ case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 );
+ case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 );
+ case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 ));
+ case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 );
+ default: vpanic("getIRegRAX(amd64)");
+ }
+}
+
+static void putIRegRAX ( Int sz, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(irsb->tyenv, e);
+ vassert(!host_is_bigendian);
+ switch (sz) {
+ case 8: vassert(ty == Ity_I64);
+ stmt( IRStmt_Put( OFFB_RAX, e ));
+ break;
+ case 4: vassert(ty == Ity_I32);
+ stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) ));
+ break;
+ case 2: vassert(ty == Ity_I16);
+ stmt( IRStmt_Put( OFFB_RAX, e ));
+ break;
+ case 1: vassert(ty == Ity_I8);
+ stmt( IRStmt_Put( OFFB_RAX, e ));
+ break;
+ default: vpanic("putIRegRAX(amd64)");
+ }
+}
+
+
+/* Read/write various widths of %RDX, as it has various
+ special-purpose uses. */
+
+static HChar* nameIRegRDX ( Int sz )
+{
+ switch (sz) {
+ case 1: return "%dl";
+ case 2: return "%dx";
+ case 4: return "%edx";
+ case 8: return "%rdx";
+ default: vpanic("nameIRegRDX(amd64)");
+ }
+}
+
+static IRExpr* getIRegRDX ( Int sz )
+{
+ vassert(!host_is_bigendian);
+ switch (sz) {
+ case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 );
+ case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 );
+ case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 ));
+ case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 );
+ default: vpanic("getIRegRDX(amd64)");
+ }
+}
+
+static void putIRegRDX ( Int sz, IRExpr* e )
+{
+ vassert(!host_is_bigendian);
+ vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
+ switch (sz) {
+ case 8: stmt( IRStmt_Put( OFFB_RDX, e ));
+ break;
+ case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) ));
+ break;
+ case 2: stmt( IRStmt_Put( OFFB_RDX, e ));
+ break;
+ case 1: stmt( IRStmt_Put( OFFB_RDX, e ));
+ break;
+ default: vpanic("putIRegRDX(amd64)");
+ }
+}
+
+
+/* Simplistic functions to deal with the integer registers as a
+ straightforward bank of 16 64-bit regs. */
+
+static IRExpr* getIReg64 ( UInt regno )
+{
+ return IRExpr_Get( integerGuestReg64Offset(regno),
+ Ity_I64 );
+}
+
+static void putIReg64 ( UInt regno, IRExpr* e )
+{
+ vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
+ stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) );
+}
+
+static HChar* nameIReg64 ( UInt regno )
+{
+ return nameIReg( 8, regno, False );
+}
+
+
+/* Simplistic functions to deal with the lower halves of integer
+ registers as a straightforward bank of 16 32-bit regs. */
+
+static IRExpr* getIReg32 ( UInt regno )
+{
+ vassert(!host_is_bigendian);
+ return unop(Iop_64to32,
+ IRExpr_Get( integerGuestReg64Offset(regno),
+ Ity_I64 ));
+}
+
+static void putIReg32 ( UInt regno, IRExpr* e )
+{
+ vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
+ stmt( IRStmt_Put( integerGuestReg64Offset(regno),
+ unop(Iop_32Uto64,e) ) );
+}
+
+static HChar* nameIReg32 ( UInt regno )
+{
+ return nameIReg( 4, regno, False );
+}
+
+
+/* Simplistic functions to deal with the lower quarters of integer
+ registers as a straightforward bank of 16 16-bit regs. */
+
+static IRExpr* getIReg16 ( UInt regno )
+{
+ vassert(!host_is_bigendian);
+ return IRExpr_Get( integerGuestReg64Offset(regno),
+ Ity_I16 );
+}
+
+static HChar* nameIReg16 ( UInt regno )
+{
+ return nameIReg( 2, regno, False );
+}
+
+
+/* Sometimes what we know is a 3-bit register number, a REX byte, and
+ which field of the REX byte is to be used to extend to a 4-bit
+ number. These functions cater for that situation.
+*/
+static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits )
+{
+ vassert(lo3bits < 8);
+ vassert(IS_VALID_PFX(pfx));
+ return getIReg64( lo3bits | (getRexX(pfx) << 3) );
+}
+
+static HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits )
+{
+ vassert(lo3bits < 8);
+ vassert(IS_VALID_PFX(pfx));
+ return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False );
+}
+
+static HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
+{
+ vassert(lo3bits < 8);
+ vassert(IS_VALID_PFX(pfx));
+ vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
+ return nameIReg( sz, lo3bits | (getRexB(pfx) << 3),
+ toBool(sz==1 && !haveREX(pfx)) );
+}
+
+static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
+{
+ vassert(lo3bits < 8);
+ vassert(IS_VALID_PFX(pfx));
+ vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
+ if (sz == 4) {
+ sz = 8;
+ return unop(Iop_64to32,
+ IRExpr_Get(
+ offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
+ toBool(sz==1 && !haveREX(pfx)) ),
+ szToITy(sz)
+ )
+ );
+ } else {
+ return IRExpr_Get(
+ offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
+ toBool(sz==1 && !haveREX(pfx)) ),
+ szToITy(sz)
+ );
+ }
+}
+
+static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e )
+{
+ vassert(lo3bits < 8);
+ vassert(IS_VALID_PFX(pfx));
+ vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
+ vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
+ stmt( IRStmt_Put(
+ offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
+ toBool(sz==1 && !haveREX(pfx)) ),
+ sz==4 ? unop(Iop_32Uto64,e) : e
+ ));
+}
+
+
+/* Functions for getting register numbers from modrm bytes and REX
+ when we don't have to consider the complexities of integer subreg
+ accesses.
+*/
+/* Extract the g reg field from a modRM byte, and augment it using the
+ REX.R bit from the supplied REX byte. The R bit usually is
+ associated with the g register field.
+*/
+static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
+{
+ Int reg = (Int)( (mod_reg_rm >> 3) & 7 );
+ reg += (pfx & PFX_REXR) ? 8 : 0;
+ return reg;
+}
+
+/* Extract the e reg field from a modRM byte, and augment it using the
+ REX.B bit from the supplied REX byte. The B bit usually is
+ associated with the e register field (when modrm indicates e is a
+ register, that is).
+*/
+static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
+{
+ Int rm;
+ vassert(epartIsReg(mod_reg_rm));
+ rm = (Int)(mod_reg_rm & 0x7);
+ rm += (pfx & PFX_REXB) ? 8 : 0;
+ return rm;
+}
+
+
+/* General functions for dealing with integer register access. */
+
+/* Produce the guest state offset for a reference to the 'g' register
+ field in a modrm byte, taking into account REX (or its absence),
+ and the size of the access.
+*/
+static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
+{
+ UInt reg;
+ vassert(!host_is_bigendian);
+ vassert(IS_VALID_PFX(pfx));
+ vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
+ reg = gregOfRexRM( pfx, mod_reg_rm );
+ return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
+}
+
+static
+IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
+{
+ if (sz == 4) {
+ sz = 8;
+ return unop(Iop_64to32,
+ IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
+ szToITy(sz) ));
+ } else {
+ return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
+ szToITy(sz) );
+ }
+}
+
+static
+void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
+{
+ vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
+ if (sz == 4) {
+ e = unop(Iop_32Uto64,e);
+ }
+ stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) );
+}
+
+static
+HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
+{
+ return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm),
+ toBool(sz==1 && !haveREX(pfx)) );
+}
+
+
+/* Produce the guest state offset for a reference to the 'e' register
+ field in a modrm byte, taking into account REX (or its absence),
+ and the size of the access. eregOfRexRM will assert if mod_reg_rm
+ denotes a memory access rather than a register access.
+*/
+static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
+{
+ UInt reg;
+ vassert(!host_is_bigendian);
+ vassert(IS_VALID_PFX(pfx));
+ vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
+ reg = eregOfRexRM( pfx, mod_reg_rm );
+ return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
+}
+
+static
+IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
+{
+ if (sz == 4) {
+ sz = 8;
+ return unop(Iop_64to32,
+ IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
+ szToITy(sz) ));
+ } else {
+ return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
+ szToITy(sz) );
+ }
+}
+
+static
+void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
+{
+ vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
+ if (sz == 4) {
+ e = unop(Iop_32Uto64,e);
+ }
+ stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) );
+}
+
+static
+HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
+{
+ return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm),
+ toBool(sz==1 && !haveREX(pfx)) );
+}
+
+
+/*------------------------------------------------------------*/
+/*--- For dealing with XMM registers ---*/
+/*------------------------------------------------------------*/
+
+//.. static Int segmentGuestRegOffset ( UInt sreg )
+//.. {
+//.. switch (sreg) {
+//.. case R_ES: return OFFB_ES;
+//.. case R_CS: return OFFB_CS;
+//.. case R_SS: return OFFB_SS;
+//.. case R_DS: return OFFB_DS;
+//.. case R_FS: return OFFB_FS;
+//.. case R_GS: return OFFB_GS;
+//.. default: vpanic("segmentGuestRegOffset(x86)");
+//.. }
+//.. }
+
+static Int xmmGuestRegOffset ( UInt xmmreg )
+{
+ switch (xmmreg) {
+ case 0: return OFFB_XMM0;
+ case 1: return OFFB_XMM1;
+ case 2: return OFFB_XMM2;
+ case 3: return OFFB_XMM3;
+ case 4: return OFFB_XMM4;
+ case 5: return OFFB_XMM5;
+ case 6: return OFFB_XMM6;
+ case 7: return OFFB_XMM7;
+ case 8: return OFFB_XMM8;
+ case 9: return OFFB_XMM9;
+ case 10: return OFFB_XMM10;
+ case 11: return OFFB_XMM11;
+ case 12: return OFFB_XMM12;
+ case 13: return OFFB_XMM13;
+ case 14: return OFFB_XMM14;
+ case 15: return OFFB_XMM15;
+ default: vpanic("xmmGuestRegOffset(amd64)");
+ }
+}
+
+/* Lanes of vector registers are always numbered from zero being the
+ least significant lane (rightmost in the register). */
+
+static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
+{
+ /* Correct for little-endian host only. */
+ vassert(!host_is_bigendian);
+ vassert(laneno >= 0 && laneno < 8);
+ return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
+}
+
+static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno )
+{
+ /* Correct for little-endian host only. */
+ vassert(!host_is_bigendian);
+ vassert(laneno >= 0 && laneno < 4);
+ return xmmGuestRegOffset( xmmreg ) + 4 * laneno;
+}
+
+static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno )
+{
+ /* Correct for little-endian host only. */
+ vassert(!host_is_bigendian);
+ vassert(laneno >= 0 && laneno < 2);
+ return xmmGuestRegOffset( xmmreg ) + 8 * laneno;
+}
+
+//.. static IRExpr* getSReg ( UInt sreg )
+//.. {
+//.. return IRExpr_Get( segmentGuestRegOffset(sreg), Ity_I16 );
+//.. }
+//..
+//.. static void putSReg ( UInt sreg, IRExpr* e )
+//.. {
+//.. vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
+//.. stmt( IRStmt_Put( segmentGuestRegOffset(sreg), e ) );
+//.. }
+
+static IRExpr* getXMMReg ( UInt xmmreg )
+{
+ return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 );
+}
+
+static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno )
+{
+ return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 );
+}
+
+static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno )
+{
+ return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 );
+}
+
+static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno )
+{
+ return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 );
+}
+
+static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno )
+{
+ return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 );
+}
+
+static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno )
+{
+ return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 );
+}
+
+static void putXMMReg ( UInt xmmreg, IRExpr* e )
+{
+ vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
+ stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) );
+}
+
+static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e )
+{
+ vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
+ stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
+}
+
+static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
+{
+ vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
+ stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
+}
+
+static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e )
+{
+ vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
+ stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
+}
+
+static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e )
+{
+ vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
+ stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
+}
+
+static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e )
+{
+ vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
+ stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) );
+}
+
+static IRExpr* mkV128 ( UShort mask )
+{
+ return IRExpr_Const(IRConst_V128(mask));
+}
+
+static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y )
+{
+ vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1);
+ vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1);
+ return unop(Iop_64to1,
+ binop(Iop_And64,
+ unop(Iop_1Uto64,x),
+ unop(Iop_1Uto64,y)));
+}
+
+/* Generate a compare-and-swap operation, operating on memory at
+ 'addr'. The expected value is 'expVal' and the new value is
+ 'newVal'. If the operation fails, then transfer control (with a
+ no-redir jump (XXX no -- see comment at top of this file)) to
+ 'restart_point', which is presumably the address of the guest
+ instruction again -- retrying, essentially. */
+static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
+ Addr64 restart_point )
+{
+ IRCAS* cas;
+ IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
+ IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
+ IRTemp oldTmp = newTemp(tyE);
+ IRTemp expTmp = newTemp(tyE);
+ vassert(tyE == tyN);
+ vassert(tyE == Ity_I64 || tyE == Ity_I32
+ || tyE == Ity_I16 || tyE == Ity_I8);
+ assign(expTmp, expVal);
+ cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
+ NULL, mkexpr(expTmp), NULL, newVal );
+ stmt( IRStmt_CAS(cas) );
+ stmt( IRStmt_Exit(
+ binop( mkSizedOp(tyE,Iop_CasCmpNE8),
+ mkexpr(oldTmp), mkexpr(expTmp) ),
+ Ijk_Boring, /*Ijk_NoRedir*/
+ IRConst_U64( restart_point )
+ ));
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Helpers for %rflags. ---*/
+/*------------------------------------------------------------*/
+
+/* -------------- Evaluating the flags-thunk. -------------- */
+
+/* Build IR to calculate all the eflags from stored
+ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
+ Ity_I64. */
+static IRExpr* mk_amd64g_calculate_rflags_all ( void )
+{
+ IRExpr** args
+ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
+ IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
+ IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
+ IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
+ IRExpr* call
+ = mkIRExprCCall(
+ Ity_I64,
+ 0/*regparm*/,
+ "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all,
+ args
+ );
+ /* Exclude OP and NDEP from definedness checking. We're only
+ interested in DEP1 and DEP2. */
+ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
+ return call;
+}
+
+/* Build IR to calculate some particular condition from stored
+ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
+ Ity_Bit. */
+static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond )
+{
+ IRExpr** args
+ = mkIRExprVec_5( mkU64(cond),
+ IRExpr_Get(OFFB_CC_OP, Ity_I64),
+ IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
+ IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
+ IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
+ IRExpr* call
+ = mkIRExprCCall(
+ Ity_I64,
+ 0/*regparm*/,
+ "amd64g_calculate_condition", &amd64g_calculate_condition,
+ args
+ );
+ /* Exclude the requested condition, OP and NDEP from definedness
+ checking. We're only interested in DEP1 and DEP2. */
+ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4);
+ return unop(Iop_64to1, call);
+}
+
+/* Build IR to calculate just the carry flag from stored
+ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */
+static IRExpr* mk_amd64g_calculate_rflags_c ( void )
+{
+ IRExpr** args
+ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
+ IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
+ IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
+ IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
+ IRExpr* call
+ = mkIRExprCCall(
+ Ity_I64,
+ 0/*regparm*/,
+ "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c,
+ args
+ );
+ /* Exclude OP and NDEP from definedness checking. We're only
+ interested in DEP1 and DEP2. */
+ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
+ return call;
+}
+
+
+/* -------------- Building the flags-thunk. -------------- */
+
+/* The machinery in this section builds the flag-thunk following a
+ flag-setting operation. Hence the various setFlags_* functions.
+*/
+
+static Bool isAddSub ( IROp op8 )
+{
+ return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8);
+}
+
+static Bool isLogic ( IROp op8 )
+{
+ return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8);
+}
+
+/* U-widen 8/16/32/64 bit int expr to 64. */
+static IRExpr* widenUto64 ( IRExpr* e )
+{
+ switch (typeOfIRExpr(irsb->tyenv,e)) {
+ case Ity_I64: return e;
+ case Ity_I32: return unop(Iop_32Uto64, e);
+ case Ity_I16: return unop(Iop_16Uto64, e);
+ case Ity_I8: return unop(Iop_8Uto64, e);
+ default: vpanic("widenUto64");
+ }
+}
+
+/* S-widen 8/16/32/64 bit int expr to 32. */
+static IRExpr* widenSto64 ( IRExpr* e )
+{
+ switch (typeOfIRExpr(irsb->tyenv,e)) {
+ case Ity_I64: return e;
+ case Ity_I32: return unop(Iop_32Sto64, e);
+ case Ity_I16: return unop(Iop_16Sto64, e);
+ case Ity_I8: return unop(Iop_8Sto64, e);
+ default: vpanic("widenSto64");
+ }
+}
+
+/* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some
+ of these combinations make sense. */
+static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e )
+{
+ IRType src_ty = typeOfIRExpr(irsb->tyenv,e);
+ if (src_ty == dst_ty)
+ return e;
+ if (src_ty == Ity_I32 && dst_ty == Ity_I16)
+ return unop(Iop_32to16, e);
+ if (src_ty == Ity_I32 && dst_ty == Ity_I8)
+ return unop(Iop_32to8, e);
+ if (src_ty == Ity_I64 && dst_ty == Ity_I32)
+ return unop(Iop_64to32, e);
+ if (src_ty == Ity_I64 && dst_ty == Ity_I16)
+ return unop(Iop_64to16, e);
+ if (src_ty == Ity_I64 && dst_ty == Ity_I8)
+ return unop(Iop_64to8, e);
+
+ vex_printf("\nsrc, dst tys are: ");
+ ppIRType(src_ty);
+ vex_printf(", ");
+ ppIRType(dst_ty);
+ vex_printf("\n");
+ vpanic("narrowTo(amd64)");
+}
+
+
+/* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
+ auto-sized up to the real op. */
+
+static
+void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty )
+{
+ Int ccOp = 0;
+ switch (ty) {
+ case Ity_I8: ccOp = 0; break;
+ case Ity_I16: ccOp = 1; break;
+ case Ity_I32: ccOp = 2; break;
+ case Ity_I64: ccOp = 3; break;
+ default: vassert(0);
+ }
+ switch (op8) {
+ case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB; break;
+ case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB; break;
+ default: ppIROp(op8);
+ vpanic("setFlags_DEP1_DEP2(amd64)");
+ }
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
+ stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
+ stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) );
+}
+
+
+/* Set the OP and DEP1 fields only, and write zero to DEP2. */
+
+static
+void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty )
+{
+ Int ccOp = 0;
+ switch (ty) {
+ case Ity_I8: ccOp = 0; break;
+ case Ity_I16: ccOp = 1; break;
+ case Ity_I32: ccOp = 2; break;
+ case Ity_I64: ccOp = 3; break;
+ default: vassert(0);
+ }
+ switch (op8) {
+ case Iop_Or8:
+ case Iop_And8:
+ case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break;
+ default: ppIROp(op8);
+ vpanic("setFlags_DEP1(amd64)");
+ }
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
+ stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
+}
+
+
+/* For shift operations, we put in the result and the undershifted
+ result. Except if the shift amount is zero, the thunk is left
+ unchanged. */
+
+static void setFlags_DEP1_DEP2_shift ( IROp op64,
+ IRTemp res,
+ IRTemp resUS,
+ IRType ty,
+ IRTemp guard )
+{
+ Int ccOp = 0;
+ switch (ty) {
+ case Ity_I8: ccOp = 0; break;
+ case Ity_I16: ccOp = 1; break;
+ case Ity_I32: ccOp = 2; break;
+ case Ity_I64: ccOp = 3; break;
+ default: vassert(0);
+ }
+
+ vassert(guard);
+
+ /* Both kinds of right shifts are handled by the same thunk
+ operation. */
+ switch (op64) {
+ case Iop_Shr64:
+ case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break;
+ case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break;
+ default: ppIROp(op64);
+ vpanic("setFlags_DEP1_DEP2_shift(amd64)");
+ }
+
+ /* DEP1 contains the result, DEP2 contains the undershifted value. */
+ stmt( IRStmt_Put( OFFB_CC_OP,
+ IRExpr_Mux0X( mkexpr(guard),
+ IRExpr_Get(OFFB_CC_OP,Ity_I64),
+ mkU64(ccOp))) );
+ stmt( IRStmt_Put( OFFB_CC_DEP1,
+ IRExpr_Mux0X( mkexpr(guard),
+ IRExpr_Get(OFFB_CC_DEP1,Ity_I64),
+ widenUto64(mkexpr(res)))) );
+ stmt( IRStmt_Put( OFFB_CC_DEP2,
+ IRExpr_Mux0X( mkexpr(guard),
+ IRExpr_Get(OFFB_CC_DEP2,Ity_I64),
+ widenUto64(mkexpr(resUS)))) );
+}
+
+
+/* For the inc/dec case, we store in DEP1 the result value and in NDEP
+ the former value of the carry flag, which unfortunately we have to
+ compute. */
+
+static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
+{
+ Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB;
+
+ switch (ty) {
+ case Ity_I8: ccOp += 0; break;
+ case Ity_I16: ccOp += 1; break;
+ case Ity_I32: ccOp += 2; break;
+ case Ity_I64: ccOp += 3; break;
+ default: vassert(0);
+ }
+
+ /* This has to come first, because calculating the C flag
+ may require reading all four thunk fields. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) );
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
+ stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) );
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
+}
+
+
+/* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
+ two arguments. */
+
+static
+void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op )
+{
+ switch (ty) {
+ case Ity_I8:
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) );
+ break;
+ case Ity_I16:
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) );
+ break;
+ case Ity_I32:
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) );
+ break;
+ case Ity_I64:
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) );
+ break;
+ default:
+ vpanic("setFlags_MUL(amd64)");
+ }
+ stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) ));
+}
+
+
+/* -------------- Condition codes. -------------- */
+
+/* Condition codes, using the AMD encoding. */
+
+static HChar* name_AMD64Condcode ( AMD64Condcode cond )
+{
+ switch (cond) {
+ case AMD64CondO: return "o";
+ case AMD64CondNO: return "no";
+ case AMD64CondB: return "b";
+ case AMD64CondNB: return "ae"; /*"nb";*/
+ case AMD64CondZ: return "e"; /*"z";*/
+ case AMD64CondNZ: return "ne"; /*"nz";*/
+ case AMD64CondBE: return "be";
+ case AMD64CondNBE: return "a"; /*"nbe";*/
+ case AMD64CondS: return "s";
+ case AMD64CondNS: return "ns";
+ case AMD64CondP: return "p";
+ case AMD64CondNP: return "np";
+ case AMD64CondL: return "l";
+ case AMD64CondNL: return "ge"; /*"nl";*/
+ case AMD64CondLE: return "le";
+ case AMD64CondNLE: return "g"; /*"nle";*/
+ case AMD64CondAlways: return "ALWAYS";
+ default: vpanic("name_AMD64Condcode");
+ }
+}
+
+static
+AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode cond,
+ /*OUT*/Bool* needInvert )
+{
+ vassert(cond >= AMD64CondO && cond <= AMD64CondNLE);
+ if (cond & 1) {
+ *needInvert = True;
+ return cond-1;
+ } else {
+ *needInvert = False;
+ return cond;
+ }
+}
+
+
+/* -------------- Helpers for ADD/SUB with carry. -------------- */
+
+/* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
+ appropriately.
+
+ Optionally, generate a store for the 'tres' value. This can either
+ be a normal store, or it can be a cas-with-possible-failure style
+ store:
+
+ if taddr is IRTemp_INVALID, then no store is generated.
+
+ if taddr is not IRTemp_INVALID, then a store (using taddr as
+ the address) is generated:
+
+ if texpVal is IRTemp_INVALID then a normal store is
+ generated, and restart_point must be zero (it is irrelevant).
+
+ if texpVal is not IRTemp_INVALID then a cas-style store is
+ generated. texpVal is the expected value, restart_point
+ is the restart point if the store fails, and texpVal must
+ have the same type as tres.
+
+*/
+static void helper_ADC ( Int sz,
+ IRTemp tres, IRTemp ta1, IRTemp ta2,
+ /* info about optional store: */
+ IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
+{
+ UInt thunkOp;
+ IRType ty = szToITy(sz);
+ IRTemp oldc = newTemp(Ity_I64);
+ IRTemp oldcn = newTemp(ty);
+ IROp plus = mkSizedOp(ty, Iop_Add8);
+ IROp xor = mkSizedOp(ty, Iop_Xor8);
+
+ vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
+
+ switch (sz) {
+ case 8: thunkOp = AMD64G_CC_OP_ADCQ; break;
+ case 4: thunkOp = AMD64G_CC_OP_ADCL; break;
+ case 2: thunkOp = AMD64G_CC_OP_ADCW; break;
+ case 1: thunkOp = AMD64G_CC_OP_ADCB; break;
+ default: vassert(0);
+ }
+
+ /* oldc = old carry flag, 0 or 1 */
+ assign( oldc, binop(Iop_And64,
+ mk_amd64g_calculate_rflags_c(),
+ mkU64(1)) );
+
+ assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
+
+ assign( tres, binop(plus,
+ binop(plus,mkexpr(ta1),mkexpr(ta2)),
+ mkexpr(oldcn)) );
+
+ /* Possibly generate a store of 'tres' to 'taddr'. See comment at
+ start of this function. */
+ if (taddr != IRTemp_INVALID) {
+ if (texpVal == IRTemp_INVALID) {
+ vassert(restart_point == 0);
+ storeLE( mkexpr(taddr), mkexpr(tres) );
+ } else {
+ vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
+ /* .. and hence 'texpVal' has the same type as 'tres'. */
+ casLE( mkexpr(taddr),
+ mkexpr(texpVal), mkexpr(tres), restart_point );
+ }
+ }
+
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
+ stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
+ mkexpr(oldcn)) )) );
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
+}
+
+
+/* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
+ appropriately. As with helper_ADC, possibly generate a store of
+ the result -- see comments on helper_ADC for details.
+*/
+static void helper_SBB ( Int sz,
+ IRTemp tres, IRTemp ta1, IRTemp ta2,
+ /* info about optional store: */
+ IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
+{
+ UInt thunkOp;
+ IRType ty = szToITy(sz);
+ IRTemp oldc = newTemp(Ity_I64);
+ IRTemp oldcn = newTemp(ty);
+ IROp minus = mkSizedOp(ty, Iop_Sub8);
+ IROp xor = mkSizedOp(ty, Iop_Xor8);
+
+ vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
+
+ switch (sz) {
+ case 8: thunkOp = AMD64G_CC_OP_SBBQ; break;
+ case 4: thunkOp = AMD64G_CC_OP_SBBL; break;
+ case 2: thunkOp = AMD64G_CC_OP_SBBW; break;
+ case 1: thunkOp = AMD64G_CC_OP_SBBB; break;
+ default: vassert(0);
+ }
+
+ /* oldc = old carry flag, 0 or 1 */
+ assign( oldc, binop(Iop_And64,
+ mk_amd64g_calculate_rflags_c(),
+ mkU64(1)) );
+
+ assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
+
+ assign( tres, binop(minus,
+ binop(minus,mkexpr(ta1),mkexpr(ta2)),
+ mkexpr(oldcn)) );
+
+ /* Possibly generate a store of 'tres' to 'taddr'. See comment at
+ start of this function. */
+ if (taddr != IRTemp_INVALID) {
+ if (texpVal == IRTemp_INVALID) {
+ vassert(restart_point == 0);
+ storeLE( mkexpr(taddr), mkexpr(tres) );
+ } else {
+ vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
+ /* .. and hence 'texpVal' has the same type as 'tres'. */
+ casLE( mkexpr(taddr),
+ mkexpr(texpVal), mkexpr(tres), restart_point );
+ }
+ }
+
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
+ stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) );
+ stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
+ mkexpr(oldcn)) )) );
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
+}
+
+
+/* -------------- Helpers for disassembly printing. -------------- */
+
+static HChar* nameGrp1 ( Int opc_aux )
+{
+ static HChar* grp1_names[8]
+ = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
+ if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)");
+ return grp1_names[opc_aux];
+}
+
+static HChar* nameGrp2 ( Int opc_aux )
+{
+ static HChar* grp2_names[8]
+ = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
+ if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)");
+ return grp2_names[opc_aux];
+}
+
+static HChar* nameGrp4 ( Int opc_aux )
+{
+ static HChar* grp4_names[8]
+ = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
+ if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)");
+ return grp4_names[opc_aux];
+}
+
+static HChar* nameGrp5 ( Int opc_aux )
+{
+ static HChar* grp5_names[8]
+ = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
+ if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)");
+ return grp5_names[opc_aux];
+}
+
+static HChar* nameGrp8 ( Int opc_aux )
+{
+ static HChar* grp8_names[8]
+ = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
+ if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)");
+ return grp8_names[opc_aux];
+}
+
+//.. static HChar* nameSReg ( UInt sreg )
+//.. {
+//.. switch (sreg) {
+//.. case R_ES: return "%es";
+//.. case R_CS: return "%cs";
+//.. case R_SS: return "%ss";
+//.. case R_DS: return "%ds";
+//.. case R_FS: return "%fs";
+//.. case R_GS: return "%gs";
+//.. default: vpanic("nameSReg(x86)");
+//.. }
+//.. }
+
+static HChar* nameMMXReg ( Int mmxreg )
+{
+ static HChar* mmx_names[8]
+ = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
+ if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)");
+ return mmx_names[mmxreg];
+}
+
+static HChar* nameXMMReg ( Int xmmreg )
+{
+ static HChar* xmm_names[16]
+ = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
+ "%xmm4", "%xmm5", "%xmm6", "%xmm7",
+ "%xmm8", "%xmm9", "%xmm10", "%xmm11",
+ "%xmm12", "%xmm13", "%xmm14", "%xmm15" };
+ if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)");
+ return xmm_names[xmmreg];
+}
+
+static HChar* nameMMXGran ( Int gran )
+{
+ switch (gran) {
+ case 0: return "b";
+ case 1: return "w";
+ case 2: return "d";
+ case 3: return "q";
+ default: vpanic("nameMMXGran(amd64,guest)");
+ }
+}
+
+static HChar nameISize ( Int size )
+{
+ switch (size) {
+ case 8: return 'q';
+ case 4: return 'l';
+ case 2: return 'w';
+ case 1: return 'b';
+ default: vpanic("nameISize(amd64)");
+ }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- JMP helpers ---*/
+/*------------------------------------------------------------*/
+
+static void jmp_lit( IRJumpKind kind, Addr64 d64 )
+{
+ irsb->next = mkU64(d64);
+ irsb->jumpkind = kind;
+}
+
+static void jmp_treg( IRJumpKind kind, IRTemp t )
+{
+ irsb->next = mkexpr(t);
+ irsb->jumpkind = kind;
+}
+
+static
+void jcc_01 ( AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true )
+{
+ Bool invert;
+ AMD64Condcode condPos;
+ condPos = positiveIse_AMD64Condcode ( cond, &invert );
+ if (invert) {
+ stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
+ Ijk_Boring,
+ IRConst_U64(d64_false) ) );
+ irsb->next = mkU64(d64_true);
+ irsb->jumpkind = Ijk_Boring;
+ } else {
+ stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
+ Ijk_Boring,
+ IRConst_U64(d64_true) ) );
+ irsb->next = mkU64(d64_false);
+ irsb->jumpkind = Ijk_Boring;
+ }
+}
+
+/* Let new_rsp be the %rsp value after a call/return. Let nia be the
+ guest address of the next instruction to be executed.
+
+ This function generates an AbiHint to say that -128(%rsp)
+ .. -1(%rsp) should now be regarded as uninitialised.
+*/
+static
+void make_redzone_AbiHint ( VexAbiInfo* vbi,
+ IRTemp new_rsp, IRTemp nia, HChar* who )
+{
+ Int szB = vbi->guest_stack_redzone_size;
+ vassert(szB >= 0);
+
+ /* A bit of a kludge. Currently the only AbI we've guested AMD64
+ for is ELF. So just check it's the expected 128 value
+ (paranoia). */
+ vassert(szB == 128);
+
+ if (0) vex_printf("AbiHint: %s\n", who);
+ vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64);
+ vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64);
+ if (szB > 0)
+ stmt( IRStmt_AbiHint(
+ binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)),
+ szB,
+ mkexpr(nia)
+ ));
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Disassembling addressing modes ---*/
+/*------------------------------------------------------------*/
+
+static
+HChar* segRegTxt ( Prefix pfx )
+{
+ if (pfx & PFX_CS) return "%cs:";
+ if (pfx & PFX_DS) return "%ds:";
+ if (pfx & PFX_ES) return "%es:";
+ if (pfx & PFX_FS) return "%fs:";
+ if (pfx & PFX_GS) return "%gs:";
+ if (pfx & PFX_SS) return "%ss:";
+ return ""; /* no override */
+}
+
+
+/* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
+ linear address by adding any required segment override as indicated
+ by sorb, and also dealing with any address size override
+ present. */
+static
+IRExpr* handleAddrOverrides ( VexAbiInfo* vbi,
+ Prefix pfx, IRExpr* virtual )
+{
+ /* --- segment overrides --- */
+ if (pfx & PFX_FS) {
+ if (vbi->guest_amd64_assume_fs_is_zero) {
+ /* Note that this is a linux-kernel specific hack that relies
+ on the assumption that %fs is always zero. */
+ /* return virtual + guest_FS_ZERO. */
+ virtual = binop(Iop_Add64, virtual,
+ IRExpr_Get(OFFB_FS_ZERO, Ity_I64));
+ } else {
+ unimplemented("amd64 %fs segment override");
+ }
+ }
+
+ if (pfx & PFX_GS) {
+ if (vbi->guest_amd64_assume_gs_is_0x60) {
+ /* Note that this is a darwin-kernel specific hack that relies
+ on the assumption that %gs is always 0x60. */
+ /* return virtual + guest_GS_0x60. */
+ virtual = binop(Iop_Add64, virtual,
+ IRExpr_Get(OFFB_GS_0x60, Ity_I64));
+ } else {
+ unimplemented("amd64 %gs segment override");
+ }
+ }
+
+ /* cs, ds, es and ss are simply ignored in 64-bit mode. */
+
+ /* --- address size override --- */
+ if (haveASO(pfx))
+ virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual));
+
+ return virtual;
+}
+
+//.. {
+//.. Int sreg;
+//.. IRType hWordTy;
+//.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
+//..
+//.. if (sorb == 0)
+//.. /* the common case - no override */
+//.. return virtual;
+//..
+//.. switch (sorb) {
+//.. case 0x3E: sreg = R_DS; break;
+//.. case 0x26: sreg = R_ES; break;
+//.. case 0x64: sreg = R_FS; break;
+//.. case 0x65: sreg = R_GS; break;
+//.. default: vpanic("handleAddrOverrides(x86,guest)");
+//.. }
+//..
+//.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
+//..
+//.. seg_selector = newTemp(Ity_I32);
+//.. ldt_ptr = newTemp(hWordTy);
+//.. gdt_ptr = newTemp(hWordTy);
+//.. r64 = newTemp(Ity_I64);
+//..
+//.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
+//.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
+//.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
+//..
+//.. /*
+//.. Call this to do the translation and limit checks:
+//.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
+//.. UInt seg_selector, UInt virtual_addr )
+//.. */
+//.. assign(
+//.. r64,
+//.. mkIRExprCCall(
+//.. Ity_I64,
+//.. 0/*regparms*/,
+//.. "x86g_use_seg_selector",
+//.. &x86g_use_seg_selector,
+//.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
+//.. mkexpr(seg_selector), virtual)
+//.. )
+//.. );
+//..
+//.. /* If the high 32 of the result are non-zero, there was a
+//.. failure in address translation. In which case, make a
+//.. quick exit.
+//.. */
+//.. stmt(
+//.. IRStmt_Exit(
+//.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
+//.. Ijk_MapFail,
+//.. IRConst_U32( guest_eip_curr_instr )
+//.. )
+//.. );
+//..
+//.. /* otherwise, here's the translated result. */
+//.. return unop(Iop_64to32, mkexpr(r64));
+//.. }
+
+
+/* Generate IR to calculate an address indicated by a ModRM and
+ following SIB bytes. The expression, and the number of bytes in
+ the address mode, are returned (the latter in *len). Note that
+ this fn should not be called if the R/M part of the address denotes
+ a register instead of memory. If print_codegen is true, text of
+ the addressing mode is placed in buf.
+
+ The computed address is stored in a new tempreg, and the
+ identity of the tempreg is returned.
+
+ extra_bytes holds the number of bytes after the amode, as supplied
+ by the caller. This is needed to make sense of %rip-relative
+ addresses. Note that the value that *len is set to is only the
+ length of the amode itself and does not include the value supplied
+ in extra_bytes.
+ */
+
+static IRTemp disAMode_copy2tmp ( IRExpr* addr64 )
+{
+ IRTemp tmp = newTemp(Ity_I64);
+ assign( tmp, addr64 );
+ return tmp;
+}
+
+static
+IRTemp disAMode ( /*OUT*/Int* len,
+ VexAbiInfo* vbi, Prefix pfx, Long delta,
+ /*OUT*/HChar* buf, Int extra_bytes )
+{
+ UChar mod_reg_rm = getUChar(delta);
+ delta++;
+
+ buf[0] = (UChar)0;
+ vassert(extra_bytes >= 0 && extra_bytes < 10);
+
+ /* squeeze out the reg field from mod_reg_rm, since a 256-entry
+ jump table seems a bit excessive.
+ */
+ mod_reg_rm &= 0xC7; /* is now XX000YYY */
+ mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
+ /* is now XX0XXYYY */
+ mod_reg_rm &= 0x1F; /* is now 000XXYYY */
+ switch (mod_reg_rm) {
+
+ /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
+ REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
+ */
+ case 0x00: case 0x01: case 0x02: case 0x03:
+ /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
+ { UChar rm = toUChar(mod_reg_rm & 7);
+ DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
+ *len = 1;
+ return disAMode_copy2tmp(
+ handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm)));
+ }
+
+ /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
+ REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
+ */
+ case 0x08: case 0x09: case 0x0A: case 0x0B:
+ /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
+ { UChar rm = toUChar(mod_reg_rm & 7);
+ Long d = getSDisp8(delta);
+ if (d == 0) {
+ DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
+ } else {
+ DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
+ }
+ *len = 2;
+ return disAMode_copy2tmp(
+ handleAddrOverrides(vbi, pfx,
+ binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
+ }
+
+ /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
+ REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
+ */
+ case 0x10: case 0x11: case 0x12: case 0x13:
+ /* ! 14 */ case 0x15: case 0x16: case 0x17:
+ { UChar rm = toUChar(mod_reg_rm & 7);
+ Long d = getSDisp32(delta);
+ DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
+ *len = 5;
+ return disAMode_copy2tmp(
+ handleAddrOverrides(vbi, pfx,
+ binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
+ }
+
+ /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
+ /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
+ case 0x18: case 0x19: case 0x1A: case 0x1B:
+ case 0x1C: case 0x1D: case 0x1E: case 0x1F:
+ vpanic("disAMode(amd64): not an addr!");
+
+ /* RIP + disp32. This assumes that guest_RIP_curr_instr is set
+ correctly at the start of handling each instruction. */
+ case 0x05:
+ { Long d = getSDisp32(delta);
+ *len = 5;
+ DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d);
+ /* We need to know the next instruction's start address.
+ Try and figure out what it is, record the guess, and ask
+ the top-level driver logic (bbToIR_AMD64) to check we
+ guessed right, after the instruction is completely
+ decoded. */
+ guest_RIP_next_mustcheck = True;
+ guest_RIP_next_assumed = guest_RIP_bbstart
+ + delta+4 + extra_bytes;
+ return disAMode_copy2tmp(
+ handleAddrOverrides(vbi, pfx,
+ binop(Iop_Add64, mkU64(guest_RIP_next_assumed),
+ mkU64(d))));
+ }
+
+ case 0x04: {
+ /* SIB, with no displacement. Special cases:
+ -- %rsp cannot act as an index value.
+ If index_r indicates %rsp, zero is used for the index.
+ -- when mod is zero and base indicates RBP or R13, base is
+ instead a 32-bit sign-extended literal.
+ It's all madness, I tell you. Extract %index, %base and
+ scale from the SIB byte. The value denoted is then:
+ | %index == %RSP && (%base == %RBP || %base == %R13)
+ = d32 following SIB byte
+ | %index == %RSP && !(%base == %RBP || %base == %R13)
+ = %base
+ | %index != %RSP && (%base == %RBP || %base == %R13)
+ = d32 following SIB byte + (%index << scale)
+ | %index != %RSP && !(%base == %RBP || %base == %R13)
+ = %base + (%index << scale)
+ */
+ UChar sib = getUChar(delta);
+ UChar scale = toUChar((sib >> 6) & 3);
+ UChar index_r = toUChar((sib >> 3) & 7);
+ UChar base_r = toUChar(sib & 7);
+ /* correct since #(R13) == 8 + #(RBP) */
+ Bool base_is_BPor13 = toBool(base_r == R_RBP);
+ Bool index_is_SP = toBool(index_r == R_RSP && 0==getRexX(pfx));
+ delta++;
+
+ if ((!index_is_SP) && (!base_is_BPor13)) {
+ if (scale == 0) {
+ DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
+ nameIRegRexB(8,pfx,base_r),
+ nameIReg64rexX(pfx,index_r));
+ } else {
+ DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
+ nameIRegRexB(8,pfx,base_r),
+ nameIReg64rexX(pfx,index_r), 1<<scale);
+ }
+ *len = 2;
+ return
+ disAMode_copy2tmp(
+ handleAddrOverrides(vbi, pfx,
+ binop(Iop_Add64,
+ getIRegRexB(8,pfx,base_r),
+ binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
+ mkU8(scale)))));
+ }
+
+ if ((!index_is_SP) && base_is_BPor13) {
+ Long d = getSDisp32(delta);
+ DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d,
+ nameIReg64rexX(pfx,index_r), 1<<scale);
+ *len = 6;
+ return
+ disAMode_copy2tmp(
+ handleAddrOverrides(vbi, pfx,
+ binop(Iop_Add64,
+ binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
+ mkU8(scale)),
+ mkU64(d))));
+ }
+
+ if (index_is_SP && (!base_is_BPor13)) {
+ DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r));
+ *len = 2;
+ return disAMode_copy2tmp(
+ handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r)));
+ }
+
+ if (index_is_SP && base_is_BPor13) {
+ Long d = getSDisp32(delta);
+ DIS(buf, "%s%lld", segRegTxt(pfx), d);
+ *len = 6;
+ return disAMode_copy2tmp(
+ handleAddrOverrides(vbi, pfx, mkU64(d)));
+ }
+
+ vassert(0);
+ }
+
+ /* SIB, with 8-bit displacement. Special cases:
+ -- %esp cannot act as an index value.
+ If index_r indicates %esp, zero is used for the index.
+ Denoted value is:
+ | %index == %ESP
+ = d8 + %base
+ | %index != %ESP
+ = d8 + %base + (%index << scale)
+ */
+ case 0x0C: {
+ UChar sib = getUChar(delta);
+ UChar scale = toUChar((sib >> 6) & 3);
+ UChar index_r = toUChar((sib >> 3) & 7);
+ UChar base_r = toUChar(sib & 7);
+ Long d = getSDisp8(delta+1);
+
+ if (index_r == R_RSP && 0==getRexX(pfx)) {
+ DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
+ d, nameIRegRexB(8,pfx,base_r));
+ *len = 3;
+ return disAMode_copy2tmp(
+ handleAddrOverrides(vbi, pfx,
+ binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
+ } else {
+ if (scale == 0) {
+ DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
+ nameIRegRexB(8,pfx,base_r),
+ nameIReg64rexX(pfx,index_r));
+ } else {
+ DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
+ nameIRegRexB(8,pfx,base_r),
+ nameIReg64rexX(pfx,index_r), 1<<scale);
+ }
+ *len = 3;
+ return
+ disAMode_copy2tmp(
+ handleAddrOverrides(vbi, pfx,
+ binop(Iop_Add64,
+ binop(Iop_Add64,
+ getIRegRexB(8,pfx,base_r),
+ binop(Iop_Shl64,
+ getIReg64rexX(pfx,index_r), mkU8(scale))),
+ mkU64(d))));
+ }
+ vassert(0); /*NOTREACHED*/
+ }
+
+ /* SIB, with 32-bit displacement. Special cases:
+ -- %rsp cannot act as an index value.
+ If index_r indicates %rsp, zero is used for the index.
+ Denoted value is:
+ | %index == %RSP
+ = d32 + %base
+ | %index != %RSP
+ = d32 + %base + (%index << scale)
+ */
+ case 0x14: {
+ UChar sib = getUChar(delta);
+ UChar scale = toUChar((sib >> 6) & 3);
+ UChar index_r = toUChar((sib >> 3) & 7);
+ UChar base_r = toUChar(sib & 7);
+ Long d = getSDisp32(delta+1);
+
+ if (index_r == R_RSP && 0==getRexX(pfx)) {
+ DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
+ d, nameIRegRexB(8,pfx,base_r));
+ *len = 6;
+ return disAMode_copy2tmp(
+ handleAddrOverrides(vbi, pfx,
+ binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
+ } else {
+ if (scale == 0) {
+ DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
+ nameIRegRexB(8,pfx,base_r),
+ nameIReg64rexX(pfx,index_r));
+ } else {
+ DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
+ nameIRegRexB(8,pfx,base_r),
+ nameIReg64rexX(pfx,index_r), 1<<scale);
+ }
+ *len = 6;
+ return
+ disAMode_copy2tmp(
+ handleAddrOverrides(vbi, pfx,
+ binop(Iop_Add64,
+ binop(Iop_Add64,
+ getIRegRexB(8,pfx,base_r),
+ binop(Iop_Shl64,
+ getIReg64rexX(pfx,index_r), mkU8(scale))),
+ mkU64(d))));
+ }
+ vassert(0); /*NOTREACHED*/
+ }
+
+ default:
+ vpanic("disAMode(amd64)");
+ return 0; /*notreached*/
+ }
+}
+
+
+/* Figure out the number of (insn-stream) bytes constituting the amode
+ beginning at delta. Is useful for getting hold of literals beyond
+ the end of the amode before it has been disassembled. */
+
+static UInt lengthAMode ( Prefix pfx, Long delta )
+{
+ UChar mod_reg_rm = getUChar(delta);
+ delta++;
+
+ /* squeeze out the reg field from mod_reg_rm, since a 256-entry
+ jump table seems a bit excessive.
+ */
+ mod_reg_rm &= 0xC7; /* is now XX000YYY */
+ mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
+ /* is now XX0XXYYY */
+ mod_reg_rm &= 0x1F; /* is now 000XXYYY */
+ switch (mod_reg_rm) {
+
+ /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
+ REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
+ */
+ case 0x00: case 0x01: case 0x02: case 0x03:
+ /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
+ return 1;
+
+ /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
+ REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
+ */
+ case 0x08: case 0x09: case 0x0A: case 0x0B:
+ /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
+ return 2;
+
+ /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
+ REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
+ */
+ case 0x10: case 0x11: case 0x12: case 0x13:
+ /* ! 14 */ case 0x15: case 0x16: case 0x17:
+ return 5;
+
+ /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
+ /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
+ /* Not an address, but still handled. */
+ case 0x18: case 0x19: case 0x1A: case 0x1B:
+ case 0x1C: case 0x1D: case 0x1E: case 0x1F:
+ return 1;
+
+ /* RIP + disp32. */
+ case 0x05:
+ return 5;
+
+ case 0x04: {
+ /* SIB, with no displacement. */
+ UChar sib = getUChar(delta);
+ UChar base_r = toUChar(sib & 7);
+ /* correct since #(R13) == 8 + #(RBP) */
+ Bool base_is_BPor13 = toBool(base_r == R_RBP);
+
+ if (base_is_BPor13) {
+ return 6;
+ } else {
+ return 2;
+ }
+ }
+
+ /* SIB, with 8-bit displacement. */
+ case 0x0C:
+ return 3;
+
+ /* SIB, with 32-bit displacement. */
+ case 0x14:
+ return 6;
+
+ default:
+ vpanic("lengthAMode(amd64)");
+ return 0; /*notreached*/
+ }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Disassembling common idioms ---*/
+/*------------------------------------------------------------*/
+
+/* Handle binary integer instructions of the form
+ op E, G meaning
+ op reg-or-mem, reg
+ Is passed the a ptr to the modRM byte, the actual operation, and the
+ data size. Returns the address advanced completely over this
+ instruction.
+
+ E(src) is reg-or-mem
+ G(dst) is reg.
+
+ If E is reg, --> GET %G, tmp
+ OP %E, tmp
+ PUT tmp, %G
+
+ If E is mem and OP is not reversible,
+ --> (getAddr E) -> tmpa
+ LD (tmpa), tmpa
+ GET %G, tmp2
+ OP tmpa, tmp2
+ PUT tmp2, %G
+
+ If E is mem and OP is reversible
+ --> (getAddr E) -> tmpa
+ LD (tmpa), tmpa
+ OP %G, tmpa
+ PUT tmpa, %G
+*/
+static
+ULong dis_op2_E_G ( VexAbiInfo* vbi,
+ Prefix pfx,
+ Bool addSubCarry,
+ IROp op8,
+ Bool keep,
+ Int size,
+ Long delta0,
+ HChar* t_amd64opc )
+{
+ HChar dis_buf[50];
+ Int len;
+ IRType ty = szToITy(size);
+ IRTemp dst1 = newTemp(ty);
+ IRTemp src = newTemp(ty);
+ IRTemp dst0 = newTemp(ty);
+ UChar rm = getUChar(delta0);
+ IRTemp addr = IRTemp_INVALID;
+
+ /* addSubCarry == True indicates the intended operation is
+ add-with-carry or subtract-with-borrow. */
+ if (addSubCarry) {
+ vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
+ vassert(keep);
+ }
+
+ if (epartIsReg(rm)) {
+ /* Specially handle XOR reg,reg, because that doesn't really
+ depend on reg, and doing the obvious thing potentially
+ generates a spurious value check failure due to the bogus
+ dependency. */
+ if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
+ && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
+ if (False && op8 == Iop_Sub8)
+ vex_printf("vex amd64->IR: sbb %%r,%%r optimisation(1)\n");
+ putIRegG(size,pfx,rm, mkU(ty,0));
+ }
+
+ assign( dst0, getIRegG(size,pfx,rm) );
+ assign( src, getIRegE(size,pfx,rm) );
+
+ if (addSubCarry && op8 == Iop_Add8) {
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
+ putIRegG(size, pfx, rm, mkexpr(dst1));
+ } else
+ if (addSubCarry && op8 == Iop_Sub8) {
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
+ putIRegG(size, pfx, rm, mkexpr(dst1));
+ } else {
+ assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
+ if (isAddSub(op8))
+ setFlags_DEP1_DEP2(op8, dst0, src, ty);
+ else
+ setFlags_DEP1(op8, dst1, ty);
+ if (keep)
+ putIRegG(size, pfx, rm, mkexpr(dst1));
+ }
+
+ DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
+ nameIRegE(size,pfx,rm),
+ nameIRegG(size,pfx,rm));
+ return 1+delta0;
+ } else {
+ /* E refers to memory */
+ addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
+ assign( dst0, getIRegG(size,pfx,rm) );
+ assign( src, loadLE(szToITy(size), mkexpr(addr)) );
+
+ if (addSubCarry && op8 == Iop_Add8) {
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
+ putIRegG(size, pfx, rm, mkexpr(dst1));
+ } else
+ if (addSubCarry && op8 == Iop_Sub8) {
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
+ putIRegG(size, pfx, rm, mkexpr(dst1));
+ } else {
+ assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
+ if (isAddSub(op8))
+ setFlags_DEP1_DEP2(op8, dst0, src, ty);
+ else
+ setFlags_DEP1(op8, dst1, ty);
+ if (keep)
+ putIRegG(size, pfx, rm, mkexpr(dst1));
+ }
+
+ DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
+ dis_buf, nameIRegG(size, pfx, rm));
+ return len+delta0;
+ }
+}
+
+
+
+/* Handle binary integer instructions of the form
+ op G, E meaning
+ op reg, reg-or-mem
+ Is passed the a ptr to the modRM byte, the actual operation, and the
+ data size. Returns the address advanced completely over this
+ instruction.
+
+ G(src) is reg.
+ E(dst) is reg-or-mem
+
+ If E is reg, --> GET %E, tmp
+ OP %G, tmp
+ PUT tmp, %E
+
+ If E is mem, --> (getAddr E) -> tmpa
+ LD (tmpa), tmpv
+ OP %G, tmpv
+ ST tmpv, (tmpa)
+*/
+static
+ULong dis_op2_G_E ( VexAbiInfo* vbi,
+ Prefix pfx,
+ Bool addSubCarry,
+ IROp op8,
+ Bool keep,
+ Int size,
+ Long delta0,
+ HChar* t_amd64opc )
+{
+ HChar dis_buf[50];
+ Int len;
+ IRType ty = szToITy(size);
+ IRTemp dst1 = newTemp(ty);
+ IRTemp src = newTemp(ty);
+ IRTemp dst0 = newTemp(ty);
+ UChar rm = getUChar(delta0);
+ IRTemp addr = IRTemp_INVALID;
+
+ /* addSubCarry == True indicates the intended operation is
+ add-with-carry or subtract-with-borrow. */
+ if (addSubCarry) {
+ vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
+ vassert(keep);
+ }
+
+ if (epartIsReg(rm)) {
+ /* Specially handle XOR reg,reg, because that doesn't really
+ depend on reg, and doing the obvious thing potentially
+ generates a spurious value check failure due to the bogus
+ dependency. Ditto SBB reg,reg. */
+ if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
+ && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
+ putIRegE(size,pfx,rm, mkU(ty,0));
+ }
+
+ assign(dst0, getIRegE(size,pfx,rm));
+ assign(src, getIRegG(size,pfx,rm));
+
+ if (addSubCarry && op8 == Iop_Add8) {
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
+ putIRegE(size, pfx, rm, mkexpr(dst1));
+ } else
+ if (addSubCarry && op8 == Iop_Sub8) {
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
+ putIRegE(size, pfx, rm, mkexpr(dst1));
+ } else {
+ assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
+ if (isAddSub(op8))
+ setFlags_DEP1_DEP2(op8, dst0, src, ty);
+ else
+ setFlags_DEP1(op8, dst1, ty);
+ if (keep)
+ putIRegE(size, pfx, rm, mkexpr(dst1));
+ }
+
+ DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
+ nameIRegG(size,pfx,rm),
+ nameIRegE(size,pfx,rm));
+ return 1+delta0;
+ }
+
+ /* E refers to memory */
+ {
+ addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
+ assign(dst0, loadLE(ty,mkexpr(addr)));
+ assign(src, getIRegG(size,pfx,rm));
+
+ if (addSubCarry && op8 == Iop_Add8) {
+ if (pfx & PFX_LOCK) {
+ /* cas-style store */
+ helper_ADC( size, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_ADC( size, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
+ } else
+ if (addSubCarry && op8 == Iop_Sub8) {
+ if (pfx & PFX_LOCK) {
+ /* cas-style store */
+ helper_SBB( size, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_SBB( size, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
+ } else {
+ assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
+ if (keep) {
+ if (pfx & PFX_LOCK) {
+ if (0) vex_printf("locked case\n" );
+ casLE( mkexpr(addr),
+ mkexpr(dst0)/*expval*/,
+ mkexpr(dst1)/*newval*/, guest_RIP_curr_instr );
+ } else {
+ if (0) vex_printf("nonlocked case\n");
+ storeLE(mkexpr(addr), mkexpr(dst1));
+ }
+ }
+ if (isAddSub(op8))
+ setFlags_DEP1_DEP2(op8, dst0, src, ty);
+ else
+ setFlags_DEP1(op8, dst1, ty);
+ }
+
+ DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
+ nameIRegG(size,pfx,rm), dis_buf);
+ return len+delta0;
+ }
+}
+
+
+/* Handle move instructions of the form
+ mov E, G meaning
+ mov reg-or-mem, reg
+ Is passed the a ptr to the modRM byte, and the data size. Returns
+ the address advanced completely over this instruction.
+
+ E(src) is reg-or-mem
+ G(dst) is reg.
+
+ If E is reg, --> GET %E, tmpv
+ PUT tmpv, %G
+
+ If E is mem --> (getAddr E) -> tmpa
+ LD (tmpa), tmpb
+ PUT tmpb, %G
+*/
+static
+ULong dis_mov_E_G ( VexAbiInfo* vbi,
+ Prefix pfx,
+ Int size,
+ Long delta0 )
+{
+ Int len;
+ UChar rm = getUChar(delta0);
+ HChar dis_buf[50];
+
+ if (epartIsReg(rm)) {
+ putIRegG(size, pfx, rm, getIRegE(size, pfx, rm));
+ DIP("mov%c %s,%s\n", nameISize(size),
+ nameIRegE(size,pfx,rm),
+ nameIRegG(size,pfx,rm));
+ return 1+delta0;
+ }
+
+ /* E refers to memory */
+ {
+ IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
+ putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr)));
+ DIP("mov%c %s,%s\n", nameISize(size),
+ dis_buf,
+ nameIRegG(size,pfx,rm));
+ return delta0+len;
+ }
+}
+
+
+/* Handle move instructions of the form
+ mov G, E meaning
+ mov reg, reg-or-mem
+ Is passed the a ptr to the modRM byte, and the data size. Returns
+ the address advanced completely over this instruction.
+
+ G(src) is reg.
+ E(dst) is reg-or-mem
+
+ If E is reg, --> GET %G, tmp
+ PUT tmp, %E
+
+ If E is mem, --> (getAddr E) -> tmpa
+ GET %G, tmpv
+ ST tmpv, (tmpa)
+*/
+static
+ULong dis_mov_G_E ( VexAbiInfo* vbi,
+ Prefix pfx,
+ Int size,
+ Long delta0 )
+{
+ Int len;
+ UChar rm = getUChar(delta0);
+ HChar dis_buf[50];
+
+ if (epartIsReg(rm)) {
+ putIRegE(size, pfx, rm, getIRegG(size, pfx, rm));
+ DIP("mov%c %s,%s\n", nameISize(size),
+ nameIRegG(size,pfx,rm),
+ nameIRegE(size,pfx,rm));
+ return 1+delta0;
+ }
+
+ /* E refers to memory */
+ {
+ IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
+ storeLE( mkexpr(addr), getIRegG(size, pfx, rm) );
+ DIP("mov%c %s,%s\n", nameISize(size),
+ nameIRegG(size,pfx,rm),
+ dis_buf);
+ return len+delta0;
+ }
+}
+
+
+/* op $immediate, AL/AX/EAX/RAX. */
+static
+ULong dis_op_imm_A ( Int size,
+ Bool carrying,
+ IROp op8,
+ Bool keep,
+ Long delta,
+ HChar* t_amd64opc )
+{
+ Int size4 = imin(size,4);
+ IRType ty = szToITy(size);
+ IRTemp dst0 = newTemp(ty);
+ IRTemp src = newTemp(ty);
+ IRTemp dst1 = newTemp(ty);
+ Long lit = getSDisp(size4,delta);
+ assign(dst0, getIRegRAX(size));
+ assign(src, mkU(ty,lit & mkSizeMask(size)));
+
+ if (isAddSub(op8) && !carrying) {
+ assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
+ setFlags_DEP1_DEP2(op8, dst0, src, ty);
+ }
+ else
+ if (isLogic(op8)) {
+ vassert(!carrying);
+ assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
+ setFlags_DEP1(op8, dst1, ty);
+ }
+ else
+ if (op8 == Iop_Add8 && carrying) {
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
+ }
+ else
+ if (op8 == Iop_Sub8 && carrying) {
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
+ }
+ else
+ vpanic("dis_op_imm_A(amd64,guest)");
+
+ if (keep)
+ putIRegRAX(size, mkexpr(dst1));
+
+ DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size),
+ lit, nameIRegRAX(size));
+ return delta+size4;
+}
+
+
+/* Sign- and Zero-extending moves. */
+static
+ULong dis_movx_E_G ( VexAbiInfo* vbi,
+ Prefix pfx,
+ Long delta, Int szs, Int szd, Bool sign_extend )
+{
+ UChar rm = getUChar(delta);
+ if (epartIsReg(rm)) {
+ putIRegG(szd, pfx, rm,
+ doScalarWidening(
+ szs,szd,sign_extend,
+ getIRegE(szs,pfx,rm)));
+ DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
+ nameISize(szs),
+ nameISize(szd),
+ nameIRegE(szs,pfx,rm),
+ nameIRegG(szd,pfx,rm));
+ return 1+delta;
+ }
+
+ /* E refers to memory */
+ {
+ Int len;
+ HChar dis_buf[50];
+ IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
+ putIRegG(szd, pfx, rm,
+ doScalarWidening(
+ szs,szd,sign_extend,
+ loadLE(szToITy(szs),mkexpr(addr))));
+ DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
+ nameISize(szs),
+ nameISize(szd),
+ dis_buf,
+ nameIRegG(szd,pfx,rm));
+ return len+delta;
+ }
+}
+
+
+/* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by
+ the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */
+static
+void codegen_div ( Int sz, IRTemp t, Bool signed_divide )
+{
+ /* special-case the 64-bit case */
+ if (sz == 8) {
+ IROp op = signed_divide ? Iop_DivModS128to64
+ : Iop_DivModU128to64;
+ IRTemp src128 = newTemp(Ity_I128);
+ IRTemp dst128 = newTemp(Ity_I128);
+ assign( src128, binop(Iop_64HLto128,
+ getIReg64(R_RDX),
+ getIReg64(R_RAX)) );
+ assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) );
+ putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) );
+ putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) );
+ } else {
+ IROp op = signed_divide ? Iop_DivModS64to32
+ : Iop_DivModU64to32;
+ IRTemp src64 = newTemp(Ity_I64);
+ IRTemp dst64 = newTemp(Ity_I64);
+ switch (sz) {
+ case 4:
+ assign( src64,
+ binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) );
+ assign( dst64,
+ binop(op, mkexpr(src64), mkexpr(t)) );
+ putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) );
+ putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) );
+ break;
+ case 2: {
+ IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
+ IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
+ assign( src64, unop(widen3264,
+ binop(Iop_16HLto32,
+ getIRegRDX(2),
+ getIRegRAX(2))) );
+ assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) );
+ putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) );
+ putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) );
+ break;
+ }
+ case 1: {
+ IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
+ IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
+ IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16;
+ assign( src64, unop(widen3264,
+ unop(widen1632, getIRegRAX(2))) );
+ assign( dst64,
+ binop(op, mkexpr(src64),
+ unop(widen1632, unop(widen816, mkexpr(t)))) );
+ putIRegRAX( 1, unop(Iop_16to8,
+ unop(Iop_32to16,
+ unop(Iop_64to32,mkexpr(dst64)))) );
+ putIRegAH( unop(Iop_16to8,
+ unop(Iop_32to16,
+ unop(Iop_64HIto32,mkexpr(dst64)))) );
+ break;
+ }
+ default:
+ vpanic("codegen_div(amd64)");
+ }
+ }
+}
+
+static
+ULong dis_Grp1 ( VexAbiInfo* vbi,
+ Prefix pfx,
+ Long delta, UChar modrm,
+ Int am_sz, Int d_sz, Int sz, Long d64 )
+{
+ Int len;
+ HChar dis_buf[50];
+ IRType ty = szToITy(sz);
+ IRTemp dst1 = newTemp(ty);
+ IRTemp src = newTemp(ty);
+ IRTemp dst0 = newTemp(ty);
+ IRTemp addr = IRTemp_INVALID;
+ IROp op8 = Iop_INVALID;
+ ULong mask = mkSizeMask(sz);
+
+ switch (gregLO3ofRM(modrm)) {
+ case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break;
+ case 2: break; // ADC
+ case 3: break; // SBB
+ case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break;
+ case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break;
+ /*NOTREACHED*/
+ default: vpanic("dis_Grp1(amd64): unhandled case");
+ }
+
+ if (epartIsReg(modrm)) {
+ vassert(am_sz == 1);
+
+ assign(dst0, getIRegE(sz,pfx,modrm));
+ assign(src, mkU(ty,d64 & mask));
+
+ if (gregLO3ofRM(modrm) == 2 /* ADC */) {
+ helper_ADC( sz, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
+ } else
+ if (gregLO3ofRM(modrm) == 3 /* SBB */) {
+ helper_SBB( sz, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
+ } else {
+ assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
+ if (isAddSub(op8))
+ setFlags_DEP1_DEP2(op8, dst0, src, ty);
+ else
+ setFlags_DEP1(op8, dst1, ty);
+ }
+
+ if (gregLO3ofRM(modrm) < 7)
+ putIRegE(sz, pfx, modrm, mkexpr(dst1));
+
+ delta += (am_sz + d_sz);
+ DIP("%s%c $%lld, %s\n",
+ nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64,
+ nameIRegE(sz,pfx,modrm));
+ } else {
+ addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
+
+ assign(dst0, loadLE(ty,mkexpr(addr)));
+ assign(src, mkU(ty,d64 & mask));
+
+ if (gregLO3ofRM(modrm) == 2 /* ADC */) {
+ if (pfx & PFX_LOCK) {
+ /* cas-style store */
+ helper_ADC( sz, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_ADC( sz, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
+ } else
+ if (gregLO3ofRM(modrm) == 3 /* SBB */) {
+ if (pfx & PFX_LOCK) {
+ /* cas-style store */
+ helper_SBB( sz, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_SBB( sz, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
+ } else {
+ assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
+ if (gregLO3ofRM(modrm) < 7) {
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
+ mkexpr(dst1)/*newVal*/,
+ guest_RIP_curr_instr );
+ } else {
+ storeLE(mkexpr(addr), mkexpr(dst1));
+ }
+ }
+ if (isAddSub(op8))
+ setFlags_DEP1_DEP2(op8, dst0, src, ty);
+ else
+ setFlags_DEP1(op8, dst1, ty);
+ }
+
+ delta += (len+d_sz);
+ DIP("%s%c $%lld, %s\n",
+ nameGrp1(gregLO3ofRM(modrm)), nameISize(sz),
+ d64, dis_buf);
+ }
+ return delta;
+}
+
+
+/* Group 2 extended opcodes. shift_expr must be an 8-bit typed
+ expression. */
+
+static
+ULong dis_Grp2 ( VexAbiInfo* vbi,
+ Prefix pfx,
+ Long delta, UChar modrm,
+ Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr,
+ HChar* shift_expr_txt, Bool* decode_OK )
+{
+ /* delta on entry points at the modrm byte. */
+ HChar dis_buf[50];
+ Int len;
+ Bool isShift, isRotate, isRotateC;
+ IRType ty = szToITy(sz);
+ IRTemp dst0 = newTemp(ty);
+ IRTemp dst1 = newTemp(ty);
+ IRTemp addr = IRTemp_INVALID;
+
+ *decode_OK = True;
+
+ vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
+
+ /* Put value to shift/rotate in dst0. */
+ if (epartIsReg(modrm)) {
+ assign(dst0, getIRegE(sz, pfx, modrm));
+ delta += (am_sz + d_sz);
+ } else {
+ addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
+ assign(dst0, loadLE(ty,mkexpr(addr)));
+ delta += len + d_sz;
+ }
+
+ isShift = False;
+ switch (gregLO3ofRM(modrm)) { case 4: case 5: case 7: isShift = True; }
+
+ isRotate = False;
+ switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; }
+
+ isRotateC = False;
+ switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; }
+
+ if (gregLO3ofRM(modrm) == 6) {
+ *decode_OK = False;
+ return delta;
+ }
+
+ if (!isShift && !isRotate && !isRotateC) {
+ /*NOTREACHED*/
+ vpanic("dis_Grp2(Reg): unhandled case(amd64)");
+ }
+
+ if (isRotateC) {
+ /* Call a helper; this insn is so ridiculous it does not deserve
+ better. One problem is, the helper has to calculate both the
+ new value and the new flags. This is more than 64 bits, and
+ there is no way to return more than 64 bits from the helper.
+ Hence the crude and obvious solution is to call it twice,
+ using the sign of the sz field to indicate whether it is the
+ value or rflags result we want.
+ */
+ Bool left = toBool(gregLO3ofRM(modrm) == 2);
+ IRExpr** argsVALUE;
+ IRExpr** argsRFLAGS;
+
+ IRTemp new_value = newTemp(Ity_I64);
+ IRTemp new_rflags = newTemp(Ity_I64);
+ IRTemp old_rflags = newTemp(Ity_I64);
+
+ assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) );
+
+ argsVALUE
+ = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
+ widenUto64(shift_expr), /* rotate amount */
+ mkexpr(old_rflags),
+ mkU64(sz) );
+ assign( new_value,
+ mkIRExprCCall(
+ Ity_I64,
+ 0/*regparm*/,
+ left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
+ left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
+ argsVALUE
+ )
+ );
+
+ argsRFLAGS
+ = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
+ widenUto64(shift_expr), /* rotate amount */
+ mkexpr(old_rflags),
+ mkU64(-sz) );
+ assign( new_rflags,
+ mkIRExprCCall(
+ Ity_I64,
+ 0/*regparm*/,
+ left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
+ left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
+ argsRFLAGS
+ )
+ );
+
+ assign( dst1, narrowTo(ty, mkexpr(new_value)) );
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
+ }
+
+ else
+ if (isShift) {
+
+ IRTemp pre64 = newTemp(Ity_I64);
+ IRTemp res64 = newTemp(Ity_I64);
+ IRTemp res64ss = newTemp(Ity_I64);
+ IRTemp shift_amt = newTemp(Ity_I8);
+ UChar mask = toUChar(sz==8 ? 63 : 31);
+ IROp op64;
+
+ switch (gregLO3ofRM(modrm)) {
+ case 4: op64 = Iop_Shl64; break;
+ case 5: op64 = Iop_Shr64; break;
+ case 7: op64 = Iop_Sar64; break;
+ /*NOTREACHED*/
+ default: vpanic("dis_Grp2:shift"); break;
+ }
+
+ /* Widen the value to be shifted to 64 bits, do the shift, and
+ narrow back down. This seems surprisingly long-winded, but
+ unfortunately the AMD semantics requires that 8/16/32-bit
+ shifts give defined results for shift values all the way up
+ to 32, and this seems the simplest way to do it. It has the
+ advantage that the only IR level shifts generated are of 64
+ bit values, and the shift amount is guaranteed to be in the
+ range 0 .. 63, thereby observing the IR semantics requiring
+ all shift values to be in the range 0 .. 2^word_size-1.
+
+ Therefore the shift amount is masked with 63 for 64-bit shifts
+ and 31 for all others.
+ */
+ /* shift_amt = shift_expr & MASK, regardless of operation size */
+ assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) );
+
+ /* suitably widen the value to be shifted to 64 bits. */
+ assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0))
+ : widenUto64(mkexpr(dst0)) );
+
+ /* res64 = pre64 `shift` shift_amt */
+ assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) );
+
+ /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */
+ assign( res64ss,
+ binop(op64,
+ mkexpr(pre64),
+ binop(Iop_And8,
+ binop(Iop_Sub8,
+ mkexpr(shift_amt), mkU8(1)),
+ mkU8(mask))) );
+
+ /* Build the flags thunk. */
+ setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt);
+
+ /* Narrow the result back down. */
+ assign( dst1, narrowTo(ty, mkexpr(res64)) );
+
+ } /* if (isShift) */
+
+ else
+ if (isRotate) {
+ Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1
+ : (ty==Ity_I32 ? 2 : 3));
+ Bool left = toBool(gregLO3ofRM(modrm) == 0);
+ IRTemp rot_amt = newTemp(Ity_I8);
+ IRTemp rot_amt64 = newTemp(Ity_I8);
+ IRTemp oldFlags = newTemp(Ity_I64);
+ UChar mask = toUChar(sz==8 ? 63 : 31);
+
+ /* rot_amt = shift_expr & mask */
+ /* By masking the rotate amount thusly, the IR-level Shl/Shr
+ expressions never shift beyond the word size and thus remain
+ well defined. */
+ assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask)));
+
+ if (ty == Ity_I64)
+ assign(rot_amt, mkexpr(rot_amt64));
+ else
+ assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1)));
+
+ if (left) {
+
+ /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
+ assign(dst1,
+ binop( mkSizedOp(ty,Iop_Or8),
+ binop( mkSizedOp(ty,Iop_Shl8),
+ mkexpr(dst0),
+ mkexpr(rot_amt)
+ ),
+ binop( mkSizedOp(ty,Iop_Shr8),
+ mkexpr(dst0),
+ binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
+ )
+ )
+ );
+ ccOp += AMD64G_CC_OP_ROLB;
+
+ } else { /* right */
+
+ /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
+ assign(dst1,
+ binop( mkSizedOp(ty,Iop_Or8),
+ binop( mkSizedOp(ty,Iop_Shr8),
+ mkexpr(dst0),
+ mkexpr(rot_amt)
+ ),
+ binop( mkSizedOp(ty,Iop_Shl8),
+ mkexpr(dst0),
+ binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
+ )
+ )
+ );
+ ccOp += AMD64G_CC_OP_RORB;
+
+ }
+
+ /* dst1 now holds the rotated value. Build flag thunk. We
+ need the resulting value for this, and the previous flags.
+ Except don't set it if the rotate count is zero. */
+
+ assign(oldFlags, mk_amd64g_calculate_rflags_all());
+
+ /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
+ stmt( IRStmt_Put( OFFB_CC_OP,
+ IRExpr_Mux0X( mkexpr(rot_amt64),
+ IRExpr_Get(OFFB_CC_OP,Ity_I64),
+ mkU64(ccOp))) );
+ stmt( IRStmt_Put( OFFB_CC_DEP1,
+ IRExpr_Mux0X( mkexpr(rot_amt64),
+ IRExpr_Get(OFFB_CC_DEP1,Ity_I64),
+ widenUto64(mkexpr(dst1)))) );
+ stmt( IRStmt_Put( OFFB_CC_DEP2,
+ IRExpr_Mux0X( mkexpr(rot_amt64),
+ IRExpr_Get(OFFB_CC_DEP2,Ity_I64),
+ mkU64(0))) );
+ stmt( IRStmt_Put( OFFB_CC_NDEP,
+ IRExpr_Mux0X( mkexpr(rot_amt64),
+ IRExpr_Get(OFFB_CC_NDEP,Ity_I64),
+ mkexpr(oldFlags))) );
+ } /* if (isRotate) */
+
+ /* Save result, and finish up. */
+ if (epartIsReg(modrm)) {
+ putIRegE(sz, pfx, modrm, mkexpr(dst1));
+ if (vex_traceflags & VEX_TRACE_FE) {
+ vex_printf("%s%c ",
+ nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
+ if (shift_expr_txt)
+ vex_printf("%s", shift_expr_txt);
+ else
+ ppIRExpr(shift_expr);
+ vex_printf(", %s\n", nameIRegE(sz,pfx,modrm));
+ }
+ } else {
+ storeLE(mkexpr(addr), mkexpr(dst1));
+ if (vex_traceflags & VEX_TRACE_FE) {
+ vex_printf("%s%c ",
+ nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
+ if (shift_expr_txt)
+ vex_printf("%s", shift_expr_txt);
+ else
+ ppIRExpr(shift_expr);
+ vex_printf(", %s\n", dis_buf);
+ }
+ }
+ return delta;
+}
+
+
+/* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
+static
+ULong dis_Grp8_Imm ( VexAbiInfo* vbi,
+ Prefix pfx,
+ Long delta, UChar modrm,
+ Int am_sz, Int sz, ULong src_val,
+ Bool* decode_OK )
+{
+ /* src_val denotes a d8.
+ And delta on entry points at the modrm byte. */
+
+ IRType ty = szToITy(sz);
+ IRTemp t2 = newTemp(Ity_I64);
+ IRTemp t2m = newTemp(Ity_I64);
+ IRTemp t_addr = IRTemp_INVALID;
+ HChar dis_buf[50];
+ ULong mask;
+
+ /* we're optimists :-) */
+ *decode_OK = True;
+
+ /* Limit src_val -- the bit offset -- to something within a word.
+ The Intel docs say that literal offsets larger than a word are
+ masked in this way. */
+ switch (sz) {
+ case 2: src_val &= 15; break;
+ case 4: src_val &= 31; break;
+ case 8: src_val &= 63; break;
+ default: *decode_OK = False; return delta;
+ }
+
+ /* Invent a mask suitable for the operation. */
+ switch (gregLO3ofRM(modrm)) {
+ case 4: /* BT */ mask = 0; break;
+ case 5: /* BTS */ mask = 1ULL << src_val; break;
+ case 6: /* BTR */ mask = ~(1ULL << src_val); break;
+ case 7: /* BTC */ mask = 1ULL << src_val; break;
+ /* If this needs to be extended, probably simplest to make a
+ new function to handle the other cases (0 .. 3). The
+ Intel docs do however not indicate any use for 0 .. 3, so
+ we don't expect this to happen. */
+ default: *decode_OK = False; return delta;
+ }
+
+ /* Fetch the value to be tested and modified into t2, which is
+ 64-bits wide regardless of sz. */
+ if (epartIsReg(modrm)) {
+ vassert(am_sz == 1);
+ assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) );
+ delta += (am_sz + 1);
+ DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
+ nameISize(sz),
+ src_val, nameIRegE(sz,pfx,modrm));
+ } else {
+ Int len;
+ t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 );
+ delta += (len+1);
+ assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) );
+ DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
+ nameISize(sz),
+ src_val, dis_buf);
+ }
+
+ /* Compute the new value into t2m, if non-BT. */
+ switch (gregLO3ofRM(modrm)) {
+ case 4: /* BT */
+ break;
+ case 5: /* BTS */
+ assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) );
+ break;
+ case 6: /* BTR */
+ assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) );
+ break;
+ case 7: /* BTC */
+ assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) );
+ break;
+ default:
+ /*NOTREACHED*/ /*the previous switch guards this*/
+ vassert(0);
+ }
+
+ /* Write the result back, if non-BT. */
+ if (gregLO3ofRM(modrm) != 4 /* BT */) {
+ if (epartIsReg(modrm)) {
+ putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m)));
+ } else {
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(t_addr),
+ narrowTo(ty, mkexpr(t2))/*expd*/,
+ narrowTo(ty, mkexpr(t2m))/*new*/,
+ guest_RIP_curr_instr );
+ } else {
+ storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
+ }
+ }
+ }
+
+ /* Copy relevant bit from t2 into the carry flag. */
+ /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ stmt( IRStmt_Put(
+ OFFB_CC_DEP1,
+ binop(Iop_And64,
+ binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)),
+ mkU64(1))
+ ));
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
+
+ return delta;
+}
+
+
+/* Signed/unsigned widening multiply. Generate IR to multiply the
+ value in RAX/EAX/AX/AL by the given IRTemp, and park the result in
+ RDX:RAX/EDX:EAX/DX:AX/AX.
+*/
+static void codegen_mulL_A_D ( Int sz, Bool syned,
+ IRTemp tmp, HChar* tmp_txt )
+{
+ IRType ty = szToITy(sz);
+ IRTemp t1 = newTemp(ty);
+
+ assign( t1, getIRegRAX(sz) );
+
+ switch (ty) {
+ case Ity_I64: {
+ IRTemp res128 = newTemp(Ity_I128);
+ IRTemp resHi = newTemp(Ity_I64);
+ IRTemp resLo = newTemp(Ity_I64);
+ IROp mulOp = syned ? Iop_MullS64 : Iop_MullU64;
+ UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
+ setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp );
+ assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
+ assign( resHi, unop(Iop_128HIto64,mkexpr(res128)));
+ assign( resLo, unop(Iop_128to64,mkexpr(res128)));
+ putIReg64(R_RDX, mkexpr(resHi));
+ putIReg64(R_RAX, mkexpr(resLo));
+ break;
+ }
+ case Ity_I32: {
+ IRTemp res64 = newTemp(Ity_I64);
+ IRTemp resHi = newTemp(Ity_I32);
+ IRTemp resLo = newTemp(Ity_I32);
+ IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32;
+ UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
+ setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp );
+ assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
+ assign( resHi, unop(Iop_64HIto32,mkexpr(res64)));
+ assign( resLo, unop(Iop_64to32,mkexpr(res64)));
+ putIRegRDX(4, mkexpr(resHi));
+ putIRegRAX(4, mkexpr(resLo));
+ break;
+ }
+ case Ity_I16: {
+ IRTemp res32 = newTemp(Ity_I32);
+ IRTemp resHi = newTemp(Ity_I16);
+ IRTemp resLo = newTemp(Ity_I16);
+ IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16;
+ UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
+ setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp );
+ assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
+ assign( resHi, unop(Iop_32HIto16,mkexpr(res32)));
+ assign( resLo, unop(Iop_32to16,mkexpr(res32)));
+ putIRegRDX(2, mkexpr(resHi));
+ putIRegRAX(2, mkexpr(resLo));
+ break;
+ }
+ case Ity_I8: {
+ IRTemp res16 = newTemp(Ity_I16);
+ IRTemp resHi = newTemp(Ity_I8);
+ IRTemp resLo = newTemp(Ity_I8);
+ IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8;
+ UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
+ setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp );
+ assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
+ assign( resHi, unop(Iop_16HIto8,mkexpr(res16)));
+ assign( resLo, unop(Iop_16to8,mkexpr(res16)));
+ putIRegRAX(2, mkexpr(res16));
+ break;
+ }
+ default:
+ ppIRType(ty);
+ vpanic("codegen_mulL_A_D(amd64)");
+ }
+ DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt);
+}
+
+
+/* Group 3 extended opcodes. */
+static
+ULong dis_Grp3 ( VexAbiInfo* vbi,
+ Prefix pfx, Int sz, Long delta, Bool* decode_OK )
+{
+ Long d64;
+ UChar modrm;
+ HChar dis_buf[50];
+ Int len;
+ IRTemp addr;
+ IRType ty = szToITy(sz);
+ IRTemp t1 = newTemp(ty);
+ IRTemp dst1, src, dst0;
+ *decode_OK = True;
+ modrm = getUChar(delta);
+ if (epartIsReg(modrm)) {
+ switch (gregLO3ofRM(modrm)) {
+ case 0: { /* TEST */
+ delta++;
+ d64 = getSDisp(imin(4,sz), delta);
+ delta += imin(4,sz);
+ dst1 = newTemp(ty);
+ assign(dst1, binop(mkSizedOp(ty,Iop_And8),
+ getIRegE(sz,pfx,modrm),
+ mkU(ty, d64 & mkSizeMask(sz))));
+ setFlags_DEP1( Iop_And8, dst1, ty );
+ DIP("test%c $%lld, %s\n",
+ nameISize(sz), d64,
+ nameIRegE(sz, pfx, modrm));
+ break;
+ }
+ case 1:
+ *decode_OK = False;
+ return delta;
+ case 2: /* NOT */
+ delta++;
+ putIRegE(sz, pfx, modrm,
+ unop(mkSizedOp(ty,Iop_Not8),
+ getIRegE(sz, pfx, modrm)));
+ DIP("not%c %s\n", nameISize(sz),
+ nameIRegE(sz, pfx, modrm));
+ break;
+ case 3: /* NEG */
+ delta++;
+ dst0 = newTemp(ty);
+ src = newTemp(ty);
+ dst1 = newTemp(ty);
+ assign(dst0, mkU(ty,0));
+ assign(src, getIRegE(sz, pfx, modrm));
+ assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
+ mkexpr(src)));
+ setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
+ putIRegE(sz, pfx, modrm, mkexpr(dst1));
+ DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm));
+ break;
+ case 4: /* MUL (unsigned widening) */
+ delta++;
+ src = newTemp(ty);
+ assign(src, getIRegE(sz,pfx,modrm));
+ codegen_mulL_A_D ( sz, False, src,
+ nameIRegE(sz,pfx,modrm) );
+ break;
+ case 5: /* IMUL (signed widening) */
+ delta++;
+ src = newTemp(ty);
+ assign(src, getIRegE(sz,pfx,modrm));
+ codegen_mulL_A_D ( sz, True, src,
+ nameIRegE(sz,pfx,modrm) );
+ break;
+ case 6: /* DIV */
+ delta++;
+ assign( t1, getIRegE(sz, pfx, modrm) );
+ codegen_div ( sz, t1, False );
+ DIP("div%c %s\n", nameISize(sz),
+ nameIRegE(sz, pfx, modrm));
+ break;
+ case 7: /* IDIV */
+ delta++;
+ assign( t1, getIRegE(sz, pfx, modrm) );
+ codegen_div ( sz, t1, True );
+ DIP("idiv%c %s\n", nameISize(sz),
+ nameIRegE(sz, pfx, modrm));
+ break;
+ default:
+ /*NOTREACHED*/
+ vpanic("Grp3(amd64,R)");
+ }
+ } else {
+ addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
+ /* we have to inform disAMode of any immediate
+ bytes used */
+ gregLO3ofRM(modrm)==0/*TEST*/
+ ? imin(4,sz)
+ : 0
+ );
+ t1 = newTemp(ty);
+ delta += len;
+ assign(t1, loadLE(ty,mkexpr(addr)));
+ switch (gregLO3ofRM(modrm)) {
+ case 0: { /* TEST */
+ d64 = getSDisp(imin(4,sz), delta);
+ delta += imin(4,sz);
+ dst1 = newTemp(ty);
+ assign(dst1, binop(mkSizedOp(ty,Iop_And8),
+ mkexpr(t1),
+ mkU(ty, d64 & mkSizeMask(sz))));
+ setFlags_DEP1( Iop_And8, dst1, ty );
+ DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf);
+ break;
+ }
+ case 1:
+ *decode_OK = False;
+ return delta;
+ case 2: /* NOT */
+ dst1 = newTemp(ty);
+ assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
+ guest_RIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(dst1) );
+ }
+ DIP("not%c %s\n", nameISize(sz), dis_buf);
+ break;
+ case 3: /* NEG */
+ dst0 = newTemp(ty);
+ src = newTemp(ty);
+ dst1 = newTemp(ty);
+ assign(dst0, mkU(ty,0));
+ assign(src, mkexpr(t1));
+ assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
+ mkexpr(src)));
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
+ guest_RIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(dst1) );
+ }
+ setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
+ DIP("neg%c %s\n", nameISize(sz), dis_buf);
+ break;
+ case 4: /* MUL (unsigned widening) */
+ codegen_mulL_A_D ( sz, False, t1, dis_buf );
+ break;
+ case 5: /* IMUL */
+ codegen_mulL_A_D ( sz, True, t1, dis_buf );
+ break;
+ case 6: /* DIV */
+ codegen_div ( sz, t1, False );
+ DIP("div%c %s\n", nameISize(sz), dis_buf);
+ break;
+ case 7: /* IDIV */
+ codegen_div ( sz, t1, True );
+ DIP("idiv%c %s\n", nameISize(sz), dis_buf);
+ break;
+ default:
+ /*NOTREACHED*/
+ vpanic("Grp3(amd64,M)");
+ }
+ }
+ return delta;
+}
+
+
+/* Group 4 extended opcodes. */
+static
+ULong dis_Grp4 ( VexAbiInfo* vbi,
+ Prefix pfx, Long delta, Bool* decode_OK )
+{
+ Int alen;
+ UChar modrm;
+ HChar dis_buf[50];
+ IRType ty = Ity_I8;
+ IRTemp t1 = newTemp(ty);
+ IRTemp t2 = newTemp(ty);
+
+ *decode_OK = True;
+
+ modrm = getUChar(delta);
+ if (epartIsReg(modrm)) {
+ assign(t1, getIRegE(1, pfx, modrm));
+ switch (gregLO3ofRM(modrm)) {
+ case 0: /* INC */
+ assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
+ putIRegE(1, pfx, modrm, mkexpr(t2));
+ setFlags_INC_DEC( True, t2, ty );
+ break;
+ case 1: /* DEC */
+ assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
+ putIRegE(1, pfx, modrm, mkexpr(t2));
+ setFlags_INC_DEC( False, t2, ty );
+ break;
+ default:
+ *decode_OK = False;
+ return delta;
+ }
+ delta++;
+ DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)),
+ nameIRegE(1, pfx, modrm));
+ } else {
+ IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ assign( t1, loadLE(ty, mkexpr(addr)) );
+ switch (gregLO3ofRM(modrm)) {
+ case 0: /* INC */
+ assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
+ guest_RIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(t2) );
+ }
+ setFlags_INC_DEC( True, t2, ty );
+ break;
+ case 1: /* DEC */
+ assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
+ guest_RIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(t2) );
+ }
+ setFlags_INC_DEC( False, t2, ty );
+ break;
+ default:
+ *decode_OK = False;
+ return delta;
+ }
+ delta += alen;
+ DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf);
+ }
+ return delta;
+}
+
+
+/* Group 5 extended opcodes. */
+static
+ULong dis_Grp5 ( VexAbiInfo* vbi,
+ Prefix pfx, Int sz, Long delta,
+ DisResult* dres, Bool* decode_OK )
+{
+ Int len;
+ UChar modrm;
+ HChar dis_buf[50];
+ IRTemp addr = IRTemp_INVALID;
+ IRType ty = szToITy(sz);
+ IRTemp t1 = newTemp(ty);
+ IRTemp t2 = IRTemp_INVALID;
+ IRTemp t3 = IRTemp_INVALID;
+ Bool showSz = True;
+
+ *decode_OK = True;
+
+ modrm = getUChar(delta);
+ if (epartIsReg(modrm)) {
+ assign(t1, getIRegE(sz,pfx,modrm));
+ switch (gregLO3ofRM(modrm)) {
+ case 0: /* INC */
+ t2 = newTemp(ty);
+ assign(t2, binop(mkSizedOp(ty,Iop_Add8),
+ mkexpr(t1), mkU(ty,1)));
+ setFlags_INC_DEC( True, t2, ty );
+ putIRegE(sz,pfx,modrm, mkexpr(t2));
+ break;
+ case 1: /* DEC */
+ t2 = newTemp(ty);
+ assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
+ mkexpr(t1), mkU(ty,1)));
+ setFlags_INC_DEC( False, t2, ty );
+ putIRegE(sz,pfx,modrm, mkexpr(t2));
+ break;
+ case 2: /* call Ev */
+ /* Ignore any sz value and operate as if sz==8. */
+ if (!(sz == 4 || sz == 8)) goto unhandled;
+ sz = 8;
+ t3 = newTemp(Ity_I64);
+ assign(t3, getIRegE(sz,pfx,modrm));
+ t2 = newTemp(Ity_I64);
+ assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
+ putIReg64(R_RSP, mkexpr(t2));
+ storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1));
+ make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)");
+ jmp_treg(Ijk_Call,t3);
+ dres->whatNext = Dis_StopHere;
+ showSz = False;
+ break;
+ case 4: /* jmp Ev */
+ /* Ignore any sz value and operate as if sz==8. */
+ if (!(sz == 4 || sz == 8)) goto unhandled;
+ sz = 8;
+ t3 = newTemp(Ity_I64);
+ assign(t3, getIRegE(sz,pfx,modrm));
+ jmp_treg(Ijk_Boring,t3);
+ dres->whatNext = Dis_StopHere;
+ showSz = False;
+ break;
+ default:
+ *decode_OK = False;
+ return delta;
+ }
+ delta++;
+ DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
+ showSz ? nameISize(sz) : ' ',
+ nameIRegE(sz, pfx, modrm));
+ } else {
+ addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
+ if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4
+ && gregLO3ofRM(modrm) != 6) {
+ assign(t1, loadLE(ty,mkexpr(addr)));
+ }
+ switch (gregLO3ofRM(modrm)) {
+ case 0: /* INC */
+ t2 = newTemp(ty);
+ assign(t2, binop(mkSizedOp(ty,Iop_Add8),
+ mkexpr(t1), mkU(ty,1)));
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(addr),
+ mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
+ } else {
+ storeLE(mkexpr(addr),mkexpr(t2));
+ }
+ setFlags_INC_DEC( True, t2, ty );
+ break;
+ case 1: /* DEC */
+ t2 = newTemp(ty);
+ assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
+ mkexpr(t1), mkU(ty,1)));
+ if (pfx & PFX_LOCK) {
+ casLE( mkexpr(addr),
+ mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
+ } else {
+ storeLE(mkexpr(addr),mkexpr(t2));
+ }
+ setFlags_INC_DEC( False, t2, ty );
+ break;
+ case 2: /* call Ev */
+ /* Ignore any sz value and operate as if sz==8. */
+ if (!(sz == 4 || sz == 8)) goto unhandled;
+ sz = 8;
+ t3 = newTemp(Ity_I64);
+ assign(t3, loadLE(Ity_I64,mkexpr(addr)));
+ t2 = newTemp(Ity_I64);
+ assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
+ putIReg64(R_RSP, mkexpr(t2));
+ storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len));
+ make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)");
+ jmp_treg(Ijk_Call,t3);
+ dres->whatNext = Dis_StopHere;
+ showSz = False;
+ break;
+ case 4: /* JMP Ev */
+ /* Ignore any sz value and operate as if sz==8. */
+ if (!(sz == 4 || sz == 8)) goto unhandled;
+ sz = 8;
+ t3 = newTemp(Ity_I64);
+ assign(t3, loadLE(Ity_I64,mkexpr(addr)));
+ jmp_treg(Ijk_Boring,t3);
+ dres->whatNext = Dis_StopHere;
+ showSz = False;
+ break;
+ case 6: /* PUSH Ev */
+ /* There is no encoding for 32-bit operand size; hence ... */
+ if (sz == 4) sz = 8;
+ if (!(sz == 8 || sz == 2)) goto unhandled;
+ if (sz == 8) {
+ t3 = newTemp(Ity_I64);
+ assign(t3, loadLE(Ity_I64,mkexpr(addr)));
+ t2 = newTemp(Ity_I64);
+ assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
+ putIReg64(R_RSP, mkexpr(t2) );
+ storeLE( mkexpr(t2), mkexpr(t3) );
+ break;
+ } else {
+ goto unhandled; /* awaiting test case */
+ }
+ default:
+ unhandled:
+ *decode_OK = False;
+ return delta;
+ }
+ delta += len;
+ DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
+ showSz ? nameISize(sz) : ' ',
+ dis_buf);
+ }
+ return delta;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Disassembling string ops (including REP prefixes) ---*/
+/*------------------------------------------------------------*/
+
+/* Code shared by all the string ops */
+static
+void dis_string_op_increment ( Int sz, IRTemp t_inc )
+{
+ UChar logSz;
+ if (sz == 8 || sz == 4 || sz == 2) {
+ logSz = 1;
+ if (sz == 4) logSz = 2;
+ if (sz == 8) logSz = 3;
+ assign( t_inc,
+ binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ),
+ mkU8(logSz) ) );
+ } else {
+ assign( t_inc,
+ IRExpr_Get( OFFB_DFLAG, Ity_I64 ) );
+ }
+}
+
+static
+void dis_string_op( void (*dis_OP)( Int, IRTemp ),
+ Int sz, HChar* name, Prefix pfx )
+{
+ IRTemp t_inc = newTemp(Ity_I64);
+ /* Really we ought to inspect the override prefixes, but we don't.
+ The following assertion catches any resulting sillyness. */
+ vassert(pfx == clearSegBits(pfx));
+ dis_string_op_increment(sz, t_inc);
+ dis_OP( sz, t_inc );
+ DIP("%s%c\n", name, nameISize(sz));
+}
+
+static
+void dis_MOVS ( Int sz, IRTemp t_inc )
+{
+ IRType ty = szToITy(sz);
+ IRTemp td = newTemp(Ity_I64); /* RDI */
+ IRTemp ts = newTemp(Ity_I64); /* RSI */
+
+ assign( td, getIReg64(R_RDI) );
+ assign( ts, getIReg64(R_RSI) );
+
+ storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) );
+
+ putIReg64( R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) );
+ putIReg64( R_RSI, binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)) );
+}
+
+static
+void dis_LODS ( Int sz, IRTemp t_inc )
+{
+ IRType ty = szToITy(sz);
+ IRTemp ts = newTemp(Ity_I64); /* RSI */
+
+ assign( ts, getIReg64(R_RSI) );
+
+ putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) );
+
+ putIReg64( R_RSI, binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)) );
+}
+
+static
+void dis_STOS ( Int sz, IRTemp t_inc )
+{
+ IRType ty = szToITy(sz);
+ IRTemp ta = newTemp(ty); /* rAX */
+ IRTemp td = newTemp(Ity_I64); /* RDI */
+
+ assign( ta, getIRegRAX(sz) );
+
+ assign( td, getIReg64(R_RDI) );
+
+ storeLE( mkexpr(td), mkexpr(ta) );
+
+ putIReg64( R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) );
+}
+
+static
+void dis_CMPS ( Int sz, IRTemp t_inc )
+{
+ IRType ty = szToITy(sz);
+ IRTemp tdv = newTemp(ty); /* (RDI) */
+ IRTemp tsv = newTemp(ty); /* (RSI) */
+ IRTemp td = newTemp(Ity_I64); /* RDI */
+ IRTemp ts = newTemp(Ity_I64); /* RSI */
+
+ assign( td, getIReg64(R_RDI) );
+
+ assign( ts, getIReg64(R_RSI) );
+
+ assign( tdv, loadLE(ty,mkexpr(td)) );
+
+ assign( tsv, loadLE(ty,mkexpr(ts)) );
+
+ setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty );
+
+ putIReg64(R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) );
+
+ putIReg64(R_RSI, binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)) );
+}
+
+static
+void dis_SCAS ( Int sz, IRTemp t_inc )
+{
+ IRType ty = szToITy(sz);
+ IRTemp ta = newTemp(ty); /* rAX */
+ IRTemp td = newTemp(Ity_I64); /* RDI */
+ IRTemp tdv = newTemp(ty); /* (RDI) */
+
+ assign( ta, getIRegRAX(sz) );
+
+ assign( td, getIReg64(R_RDI) );
+
+ assign( tdv, loadLE(ty,mkexpr(td)) );
+
+ setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty );
+
+ putIReg64(R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) );
+}
+
+
+/* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume
+ the insn is the last one in the basic block, and so emit a jump to
+ the next insn, rather than just falling through. */
+static
+void dis_REP_op ( AMD64Condcode cond,
+ void (*dis_OP)(Int, IRTemp),
+ Int sz, Addr64 rip, Addr64 rip_next, HChar* name,
+ Prefix pfx )
+{
+ IRTemp t_inc = newTemp(Ity_I64);
+ IRTemp tc = newTemp(Ity_I64); /* RCX */
+
+ /* Really we ought to inspect the override prefixes, but we don't.
+ The following assertion catches any resulting sillyness. */
+ vassert(pfx == clearSegBits(pfx));
+
+ assign( tc, getIReg64(R_RCX) );
+
+ stmt( IRStmt_Exit( binop(Iop_CmpEQ64,mkexpr(tc),mkU64(0)),
+ Ijk_Boring,
+ IRConst_U64(rip_next) ) );
+
+ putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) );
+
+ dis_string_op_increment(sz, t_inc);
+ dis_OP (sz, t_inc);
+
+ if (cond == AMD64CondAlways) {
+ jmp_lit(Ijk_Boring,rip);
+ } else {
+ stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond),
+ Ijk_Boring,
+ IRConst_U64(rip) ) );
+ jmp_lit(Ijk_Boring,rip_next);
+ }
+ DIP("%s%c\n", name, nameISize(sz));
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Arithmetic, etc. ---*/
+/*------------------------------------------------------------*/
+
+/* IMUL E, G. Supplied eip points to the modR/M byte. */
+static
+ULong dis_mul_E_G ( VexAbiInfo* vbi,
+ Prefix pfx,
+ Int size,
+ Long delta0 )
+{
+ Int alen;
+ HChar dis_buf[50];
+ UChar rm = getUChar(delta0);
+ IRType ty = szToITy(size);
+ IRTemp te = newTemp(ty);
+ IRTemp tg = newTemp(ty);
+ IRTemp resLo = newTemp(ty);
+
+ assign( tg, getIRegG(size, pfx, rm) );
+ if (epartIsReg(rm)) {
+ assign( te, getIRegE(size, pfx, rm) );
+ } else {
+ IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 );
+ assign( te, loadLE(ty,mkexpr(addr)) );
+ }
+
+ setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB );
+
+ assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) );
+
+ putIRegG(size, pfx, rm, mkexpr(resLo) );
+
+ if (epartIsReg(rm)) {
+ DIP("imul%c %s, %s\n", nameISize(size),
+ nameIRegE(size,pfx,rm),
+ nameIRegG(size,pfx,rm));
+ return 1+delta0;
+ } else {
+ DIP("imul%c %s, %s\n", nameISize(size),
+ dis_buf,
+ nameIRegG(size,pfx,rm));
+ return alen+delta0;
+ }
+}
+
+
+/* IMUL I * E -> G. Supplied rip points to the modR/M byte. */
+static
+ULong dis_imul_I_E_G ( VexAbiInfo* vbi,
+ Prefix pfx,
+ Int size,
+ Long delta,
+ Int litsize )
+{
+ Long d64;
+ Int alen;
+ HChar dis_buf[50];
+ UChar rm = getUChar(delta);
+ IRType ty = szToITy(size);
+ IRTemp te = newTemp(ty);
+ IRTemp tl = newTemp(ty);
+ IRTemp resLo = newTemp(ty);
+
+ vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8);
+
+ if (epartIsReg(rm)) {
+ assign(te, getIRegE(size, pfx, rm));
+ delta++;
+ } else {
+ IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
+ imin(4,litsize) );
+ assign(te, loadLE(ty, mkexpr(addr)));
+ delta += alen;
+ }
+ d64 = getSDisp(imin(4,litsize),delta);
+ delta += imin(4,litsize);
+
+ d64 &= mkSizeMask(size);
+ assign(tl, mkU(ty,d64));
+
+ assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) ));
+
+ setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB );
+
+ putIRegG(size, pfx, rm, mkexpr(resLo));
+
+ DIP("imul%c $%lld, %s, %s\n",
+ nameISize(size), d64,
+ ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ),
+ nameIRegG(size,pfx,rm) );
+ return delta;
+}
+
+
+/* Generate an IR sequence to do a popcount operation on the supplied
+ IRTemp, and return a new IRTemp holding the result. 'ty' may be
+ Ity_I16, Ity_I32 or Ity_I64 only. */
+static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src )
+{
+ Int i;
+ if (ty == Ity_I16) {
+ IRTemp old = IRTemp_INVALID;
+ IRTemp nyu = IRTemp_INVALID;
+ IRTemp mask[4], shift[4];
+ for (i = 0; i < 4; i++) {
+ mask[i] = newTemp(ty);
+ shift[i] = 1 << i;
+ }
+ assign(mask[0], mkU16(0x5555));
+ assign(mask[1], mkU16(0x3333));
+ assign(mask[2], mkU16(0x0F0F));
+ assign(mask[3], mkU16(0x00FF));
+ old = src;
+ for (i = 0; i < 4; i++) {
+ nyu = newTemp(ty);
+ assign(nyu,
+ binop(Iop_Add16,
+ binop(Iop_And16,
+ mkexpr(old),
+ mkexpr(mask[i])),
+ binop(Iop_And16,
+ binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])),
+ mkexpr(mask[i]))));
+ old = nyu;
+ }
+ return nyu;
+ }
+ if (ty == Ity_I32) {
+ IRTemp old = IRTemp_INVALID;
+ IRTemp nyu = IRTemp_INVALID;
+ IRTemp mask[5], shift[5];
+ for (i = 0; i < 5; i++) {
+ mask[i] = newTemp(ty);
+ shift[i] = 1 << i;
+ }
+ assign(mask[0], mkU32(0x55555555));
+ assign(mask[1], mkU32(0x33333333));
+ assign(mask[2], mkU32(0x0F0F0F0F));
+ assign(mask[3], mkU32(0x00FF00FF));
+ assign(mask[4], mkU32(0x0000FFFF));
+ old = src;
+ for (i = 0; i < 5; i++) {
+ nyu = newTemp(ty);
+ assign(nyu,
+ binop(Iop_Add32,
+ binop(Iop_And32,
+ mkexpr(old),
+ mkexpr(mask[i])),
+ binop(Iop_And32,
+ binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
+ mkexpr(mask[i]))));
+ old = nyu;
+ }
+ return nyu;
+ }
+ if (ty == Ity_I64) {
+ IRTemp old = IRTemp_INVALID;
+ IRTemp nyu = IRTemp_INVALID;
+ IRTemp mask[6], shift[6];
+ for (i = 0; i < 6; i++) {
+ mask[i] = newTemp(ty);
+ shift[i] = 1 << i;
+ }
+ assign(mask[0], mkU64(0x5555555555555555ULL));
+ assign(mask[1], mkU64(0x3333333333333333ULL));
+ assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL));
+ assign(mask[3], mkU64(0x00FF00FF00FF00FFULL));
+ assign(mask[4], mkU64(0x0000FFFF0000FFFFULL));
+ assign(mask[5], mkU64(0x00000000FFFFFFFFULL));
+ old = src;
+ for (i = 0; i < 6; i++) {
+ nyu = newTemp(ty);
+ assign(nyu,
+ binop(Iop_Add64,
+ binop(Iop_And64,
+ mkexpr(old),
+ mkexpr(mask[i])),
+ binop(Iop_And64,
+ binop(Iop_Shr64, mkexpr(old), mkU8(shift[i])),
+ mkexpr(mask[i]))));
+ old = nyu;
+ }
+ return nyu;
+ }
+ /*NOTREACHED*/
+ vassert(0);
+}
+
+
+/* Generate an IR sequence to do a count-leading-zeroes operation on
+ the supplied IRTemp, and return a new IRTemp holding the result.
+ 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
+ the argument is zero, return the number of bits in the word (the
+ natural semantics). */
+static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
+{
+ vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16);
+
+ IRTemp src64 = newTemp(Ity_I64);
+ assign(src64, widenUto64( mkexpr(src) ));
+
+ IRTemp src64x = newTemp(Ity_I64);
+ assign(src64x,
+ binop(Iop_Shl64, mkexpr(src64),
+ mkU8(64 - 8 * sizeofIRType(ty))));
+
+ // Clz64 has undefined semantics when its input is zero, so
+ // special-case around that.
+ IRTemp res64 = newTemp(Ity_I64);
+ assign(res64,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0))),
+ unop(Iop_Clz64, mkexpr(src64x)),
+ mkU64(8 * sizeofIRType(ty))
+ ));
+
+ IRTemp res = newTemp(ty);
+ assign(res, narrowTo(ty, mkexpr(res64)));
+ return res;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- ---*/
+/*--- x87 FLOATING POINT INSTRUCTIONS ---*/
+/*--- ---*/
+/*------------------------------------------------------------*/
+
+/* --- Helper functions for dealing with the register stack. --- */
+
+/* --- Set the emulation-warning pseudo-register. --- */
+
+static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ )
+{
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
+ stmt( IRStmt_Put( OFFB_EMWARN, e ) );
+}
+
+/* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
+
+static IRExpr* mkQNaN64 ( void )
+{
+ /* QNaN is 0 2047 1 0(51times)
+ == 0b 11111111111b 1 0(51times)
+ == 0x7FF8 0000 0000 0000
+ */
+ return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL));
+}
+
+/* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */
+
+static IRExpr* get_ftop ( void )
+{
+ return IRExpr_Get( OFFB_FTOP, Ity_I32 );
+}
+
+static void put_ftop ( IRExpr* e )
+{
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
+ stmt( IRStmt_Put( OFFB_FTOP, e ) );
+}
+
+/* --------- Get/put the C3210 bits. --------- */
+
+static IRExpr* /* :: Ity_I64 */ get_C3210 ( void )
+{
+ return IRExpr_Get( OFFB_FC3210, Ity_I64 );
+}
+
+static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ )
+{
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
+ stmt( IRStmt_Put( OFFB_FC3210, e ) );
+}
+
+/* --------- Get/put the FPU rounding mode. --------- */
+static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
+{
+ return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 ));
+}
+
+static void put_fpround ( IRExpr* /* :: Ity_I32 */ e )
+{
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
+ stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) );
+}
+
+
+/* --------- Synthesise a 2-bit FPU rounding mode. --------- */
+/* Produces a value in 0 .. 3, which is encoded as per the type
+ IRRoundingMode. Since the guest_FPROUND value is also encoded as
+ per IRRoundingMode, we merely need to get it and mask it for
+ safety.
+*/
+static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
+{
+ return binop( Iop_And32, get_fpround(), mkU32(3) );
+}
+
+static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
+{
+ return mkU32(Irrm_NEAREST);
+}
+
+
+/* --------- Get/set FP register tag bytes. --------- */
+
+/* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
+
+static void put_ST_TAG ( Int i, IRExpr* value )
+{
+ IRRegArray* descr;
+ vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8);
+ descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
+ stmt( IRStmt_PutI( descr, get_ftop(), i, value ) );
+}
+
+/* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
+ zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
+
+static IRExpr* get_ST_TAG ( Int i )
+{
+ IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
+ return IRExpr_GetI( descr, get_ftop(), i );
+}
+
+
+/* --------- Get/set FP registers. --------- */
+
+/* Given i, and some expression e, emit 'ST(i) = e' and set the
+ register's tag to indicate the register is full. The previous
+ state of the register is not checked. */
+
+static void put_ST_UNCHECKED ( Int i, IRExpr* value )
+{
+ IRRegArray* descr;
+ vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64);
+ descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
+ stmt( IRStmt_PutI( descr, get_ftop(), i, value ) );
+ /* Mark the register as in-use. */
+ put_ST_TAG(i, mkU8(1));
+}
+
+/* Given i, and some expression e, emit
+ ST(i) = is_full(i) ? NaN : e
+ and set the tag accordingly.
+*/
+
+static void put_ST ( Int i, IRExpr* value )
+{
+ put_ST_UNCHECKED( i,
+ IRExpr_Mux0X( get_ST_TAG(i),
+ /* 0 means empty */
+ value,
+ /* non-0 means full */
+ mkQNaN64()
+ )
+ );
+}
+
+
+/* Given i, generate an expression yielding 'ST(i)'. */
+
+static IRExpr* get_ST_UNCHECKED ( Int i )
+{
+ IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
+ return IRExpr_GetI( descr, get_ftop(), i );
+}
+
+
+/* Given i, generate an expression yielding
+ is_full(i) ? ST(i) : NaN
+*/
+
+static IRExpr* get_ST ( Int i )
+{
+ return
+ IRExpr_Mux0X( get_ST_TAG(i),
+ /* 0 means empty */
+ mkQNaN64(),
+ /* non-0 means full */
+ get_ST_UNCHECKED(i));
+}
+
+
+/* Adjust FTOP downwards by one register. */
+
+static void fp_push ( void )
+{
+ put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) );
+}
+
+/* Adjust FTOP upwards by one register, and mark the vacated register
+ as empty. */
+
+static void fp_pop ( void )
+{
+ put_ST_TAG(0, mkU8(0));
+ put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
+}
+
+/* Clear the C2 bit of the FPU status register, for
+ sin/cos/tan/sincos. */
+
+static void clear_C2 ( void )
+{
+ put_C3210( binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)) );
+}
+
+/* Invent a plausible-looking FPU status word value:
+ ((ftop & 7) << 11) | (c3210 & 0x4700)
+ */
+static IRExpr* get_FPU_sw ( void )
+{
+ return
+ unop(Iop_32to16,
+ binop(Iop_Or32,
+ binop(Iop_Shl32,
+ binop(Iop_And32, get_ftop(), mkU32(7)),
+ mkU8(11)),
+ binop(Iop_And32, unop(Iop_64to32, get_C3210()),
+ mkU32(0x4700))
+ ));
+}
+
+
+/* ------------------------------------------------------- */
+/* Given all that stack-mangling junk, we can now go ahead
+ and describe FP instructions.
+*/
+
+/* ST(0) = ST(0) `op` mem64/32(addr)
+ Need to check ST(0)'s tag on read, but not on write.
+*/
+static
+void fp_do_op_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf,
+ IROp op, Bool dbl )
+{
+ DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
+ if (dbl) {
+ put_ST_UNCHECKED(0,
+ triop( op,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(0),
+ loadLE(Ity_F64,mkexpr(addr))
+ ));
+ } else {
+ put_ST_UNCHECKED(0,
+ triop( op,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(0),
+ unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr)))
+ ));
+ }
+}
+
+
+/* ST(0) = mem64/32(addr) `op` ST(0)
+ Need to check ST(0)'s tag on read, but not on write.
+*/
+static
+void fp_do_oprev_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf,
+ IROp op, Bool dbl )
+{
+ DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
+ if (dbl) {
+ put_ST_UNCHECKED(0,
+ triop( op,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ loadLE(Ity_F64,mkexpr(addr)),
+ get_ST(0)
+ ));
+ } else {
+ put_ST_UNCHECKED(0,
+ triop( op,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))),
+ get_ST(0)
+ ));
+ }
+}
+
+
+/* ST(dst) = ST(dst) `op` ST(src).
+ Check dst and src tags when reading but not on write.
+*/
+static
+void fp_do_op_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
+ Bool pop_after )
+{
+ DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
+ put_ST_UNCHECKED(
+ st_dst,
+ triop( op,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(st_dst),
+ get_ST(st_src) )
+ );
+ if (pop_after)
+ fp_pop();
+}
+
+/* ST(dst) = ST(src) `op` ST(dst).
+ Check dst and src tags when reading but not on write.
+*/
+static
+void fp_do_oprev_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
+ Bool pop_after )
+{
+ DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
+ put_ST_UNCHECKED(
+ st_dst,
+ triop( op,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(st_src),
+ get_ST(st_dst) )
+ );
+ if (pop_after)
+ fp_pop();
+}
+
+/* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */
+static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after )
+{
+ DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i);
+ /* This is a bit of a hack (and isn't really right). It sets
+ Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
+ documentation implies A and S are unchanged.
+ */
+ /* It's also fishy in that it is used both for COMIP and
+ UCOMIP, and they aren't the same (although similar). */
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ stmt( IRStmt_Put(
+ OFFB_CC_DEP1,
+ binop( Iop_And64,
+ unop( Iop_32Uto64,
+ binop(Iop_CmpF64, get_ST(0), get_ST(i))),
+ mkU64(0x45)
+ )));
+ if (pop_after)
+ fp_pop();
+}
+
+
+/* returns
+ 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 )
+*/
+static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 )
+{
+ IRTemp t32 = newTemp(Ity_I32);
+ assign( t32, e32 );
+ return
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ binop(Iop_CmpLT64U,
+ unop(Iop_32Uto64,
+ binop(Iop_Add32, mkexpr(t32), mkU32(32768))),
+ mkU64(65536))),
+ mkU16( 0x8000 ),
+ unop(Iop_32to16, mkexpr(t32)));
+}
+
+
+static
+ULong dis_FPU ( /*OUT*/Bool* decode_ok,
+ VexAbiInfo* vbi, Prefix pfx, Long delta )
+{
+ Int len;
+ UInt r_src, r_dst;
+ HChar dis_buf[50];
+ IRTemp t1, t2;
+
+ /* On entry, delta points at the second byte of the insn (the modrm
+ byte).*/
+ UChar first_opcode = getUChar(delta-1);
+ UChar modrm = getUChar(delta+0);
+
+ /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
+
+ if (first_opcode == 0xD8) {
+ if (modrm < 0xC0) {
+
+ /* bits 5,4,3 are an opcode extension, and the modRM also
+ specifies an address. */
+ IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
+ delta += len;
+
+ switch (gregLO3ofRM(modrm)) {
+
+ case 0: /* FADD single-real */
+ fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False );
+ break;
+
+ case 1: /* FMUL single-real */
+ fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False );
+ break;
+
+//.. case 2: /* FCOM single-real */
+//.. DIP("fcoms %s\n", dis_buf);
+//.. /* This forces C1 to zero, which isn't right. */
+//.. put_C3210(
+//.. binop( Iop_And32,
+//.. binop(Iop_Shl32,
+//.. binop(Iop_CmpF64,
+//.. get_ST(0),
+//.. unop(Iop_F32toF64,
+//.. loadLE(Ity_F32,mkexpr(addr)))),
+//.. mkU8(8)),
+//.. mkU32(0x4500)
+//.. ));
+//.. break;
+//..
+//.. case 3: /* FCOMP single-real */
+//.. DIP("fcomps %s\n", dis_buf);
+//.. /* This forces C1 to zero, which isn't right. */
+//.. put_C3210(
+//.. binop( Iop_And32,
+//.. binop(Iop_Shl32,
+//.. binop(Iop_CmpF64,
+//.. get_ST(0),
+//.. unop(Iop_F32toF64,
+//.. loadLE(Ity_F32,mkexpr(addr)))),
+//.. mkU8(8)),
+//.. mkU32(0x4500)
+//.. ));
+//.. fp_pop();
+//.. break;
+
+ case 4: /* FSUB single-real */
+ fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False );
+ break;
+
+ case 5: /* FSUBR single-real */
+ fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False );
+ break;
+
+ case 6: /* FDIV single-real */
+ fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False );
+ break;
+
+ case 7: /* FDIVR single-real */
+ fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False );
+ break;
+
+ default:
+ vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
+ vex_printf("first_opcode == 0xD8\n");
+ goto decode_fail;
+ }
+ } else {
+ delta++;
+ switch (modrm) {
+
+ case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
+ fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False );
+ break;
+
+ case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
+ fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False );
+ break;
+
+ /* Dunno if this is right */
+ case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
+ r_dst = (UInt)modrm - 0xD0;
+ DIP("fcom %%st(0),%%st(%d)\n", r_dst);
+ /* This forces C1 to zero, which isn't right. */
+ put_C3210(
+ unop(Iop_32Uto64,
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
+ mkU8(8)),
+ mkU32(0x4500)
+ )));
+ break;
+
+ /* Dunno if this is right */
+ case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
+ r_dst = (UInt)modrm - 0xD8;
+ DIP("fcomp %%st(0),%%st(%d)\n", r_dst);
+ /* This forces C1 to zero, which isn't right. */
+ put_C3210(
+ unop(Iop_32Uto64,
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
+ mkU8(8)),
+ mkU32(0x4500)
+ )));
+ fp_pop();
+ break;
+
+ case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
+ fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False );
+ break;
+
+ case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
+ fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False );
+ break;
+
+ case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
+ fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False );
+ break;
+
+ case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
+ fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False );
+ break;
+
+ default:
+ goto decode_fail;
+ }
+ }
+ }
+
+ /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
+ else
+ if (first_opcode == 0xD9) {
+ if (modrm < 0xC0) {
+
+ /* bits 5,4,3 are an opcode extension, and the modRM also
+ specifies an address. */
+ IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
+ delta += len;
+
+ switch (gregLO3ofRM(modrm)) {
+
+ case 0: /* FLD single-real */
+ DIP("flds %s\n", dis_buf);
+ fp_push();
+ put_ST(0, unop(Iop_F32toF64,
+ loadLE(Ity_F32, mkexpr(addr))));
+ break;
+
+ case 2: /* FST single-real */
+ DIP("fsts %s\n", dis_buf);
+ storeLE(mkexpr(addr),
+ binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
+ break;
+
+ case 3: /* FSTP single-real */
+ DIP("fstps %s\n", dis_buf);
+ storeLE(mkexpr(addr),
+ binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
+ fp_pop();
+ break;
+
+ case 4: { /* FLDENV m28 */
+ /* Uses dirty helper:
+ VexEmWarn amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */
+ IRTemp ew = newTemp(Ity_I32);
+ IRTemp w64 = newTemp(Ity_I64);
+ IRDirty* d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "amd64g_dirtyhelper_FLDENV",
+ &amd64g_dirtyhelper_FLDENV,
+ mkIRExprVec_1( mkexpr(addr) )
+ );
+ d->needsBBP = True;
+ d->tmp = w64;
+ /* declare we're reading memory */
+ d->mFx = Ifx_Read;
+ d->mAddr = mkexpr(addr);
+ d->mSize = 28;
+
+ /* declare we're writing guest state */
+ d->nFxState = 4;
+
+ d->fxState[0].fx = Ifx_Write;
+ d->fxState[0].offset = OFFB_FTOP;
+ d->fxState[0].size = sizeof(UInt);
+
+ d->fxState[1].fx = Ifx_Write;
+ d->fxState[1].offset = OFFB_FPTAGS;
+ d->fxState[1].size = 8 * sizeof(UChar);
+
+ d->fxState[2].fx = Ifx_Write;
+ d->fxState[2].offset = OFFB_FPROUND;
+ d->fxState[2].size = sizeof(ULong);
+
+ d->fxState[3].fx = Ifx_Write;
+ d->fxState[3].offset = OFFB_FC3210;
+ d->fxState[3].size = sizeof(ULong);
+
+ stmt( IRStmt_Dirty(d) );
+
+ /* ew contains any emulation warning we may need to
+ issue. If needed, side-exit to the next insn,
+ reporting the warning, so that Valgrind's dispatcher
+ sees the warning. */
+ assign(ew, unop(Iop_64to32,mkexpr(w64)) );
+ put_emwarn( mkexpr(ew) );
+ stmt(
+ IRStmt_Exit(
+ binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
+ Ijk_EmWarn,
+ IRConst_U64( guest_RIP_bbstart+delta )
+ )
+ );
+
+ DIP("fldenv %s\n", dis_buf);
+ break;
+ }
+
+ case 5: {/* FLDCW */
+ /* The only thing we observe in the control word is the
+ rounding mode. Therefore, pass the 16-bit value
+ (x87 native-format control word) to a clean helper,
+ getting back a 64-bit value, the lower half of which
+ is the FPROUND value to store, and the upper half of
+ which is the emulation-warning token which may be
+ generated.
+ */
+ /* ULong amd64h_check_fldcw ( ULong ); */
+ IRTemp t64 = newTemp(Ity_I64);
+ IRTemp ew = newTemp(Ity_I32);
+ DIP("fldcw %s\n", dis_buf);
+ assign( t64, mkIRExprCCall(
+ Ity_I64, 0/*regparms*/,
+ "amd64g_check_fldcw",
+ &amd64g_check_fldcw,
+ mkIRExprVec_1(
+ unop( Iop_16Uto64,
+ loadLE(Ity_I16, mkexpr(addr)))
+ )
+ )
+ );
+
+ put_fpround( unop(Iop_64to32, mkexpr(t64)) );
+ assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
+ put_emwarn( mkexpr(ew) );
+ /* Finally, if an emulation warning was reported,
+ side-exit to the next insn, reporting the warning,
+ so that Valgrind's dispatcher sees the warning. */
+ stmt(
+ IRStmt_Exit(
+ binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
+ Ijk_EmWarn,
+ IRConst_U64( guest_RIP_bbstart+delta )
+ )
+ );
+ break;
+ }
+
+ case 6: { /* FNSTENV m28 */
+ /* Uses dirty helper:
+ void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */
+ IRDirty* d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "amd64g_dirtyhelper_FSTENV",
+ &amd64g_dirtyhelper_FSTENV,
+ mkIRExprVec_1( mkexpr(addr) )
+ );
+ d->needsBBP = True;
+ /* declare we're writing memory */
+ d->mFx = Ifx_Write;
+ d->mAddr = mkexpr(addr);
+ d->mSize = 28;
+
+ /* declare we're reading guest state */
+ d->nFxState = 4;
+
+ d->fxState[0].fx = Ifx_Read;
+ d->fxState[0].offset = OFFB_FTOP;
+ d->fxState[0].size = sizeof(UInt);
+
+ d->fxState[1].fx = Ifx_Read;
+ d->fxState[1].offset = OFFB_FPTAGS;
+ d->fxState[1].size = 8 * sizeof(UChar);
+
+ d->fxState[2].fx = Ifx_Read;
+ d->fxState[2].offset = OFFB_FPROUND;
+ d->fxState[2].size = sizeof(ULong);
+
+ d->fxState[3].fx = Ifx_Read;
+ d->fxState[3].offset = OFFB_FC3210;
+ d->fxState[3].size = sizeof(ULong);
+
+ stmt( IRStmt_Dirty(d) );
+
+ DIP("fnstenv %s\n", dis_buf);
+ break;
+ }
+
+ case 7: /* FNSTCW */
+ /* Fake up a native x87 FPU control word. The only
+ thing it depends on is FPROUND[1:0], so call a clean
+ helper to cook it up. */
+ /* ULong amd64g_create_fpucw ( ULong fpround ) */
+ DIP("fnstcw %s\n", dis_buf);
+ storeLE(
+ mkexpr(addr),
+ unop( Iop_64to16,
+ mkIRExprCCall(
+ Ity_I64, 0/*regp*/,
+ "amd64g_create_fpucw", &amd64g_create_fpucw,
+ mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) )
+ )
+ )
+ );
+ break;
+
+ default:
+ vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
+ vex_printf("first_opcode == 0xD9\n");
+ goto decode_fail;
+ }
+
+ } else {
+ delta++;
+ switch (modrm) {
+
+ case 0xC0 ... 0xC7: /* FLD %st(?) */
+ r_src = (UInt)modrm - 0xC0;
+ DIP("fld %%st(%u)\n", r_src);
+ t1 = newTemp(Ity_F64);
+ assign(t1, get_ST(r_src));
+ fp_push();
+ put_ST(0, mkexpr(t1));
+ break;
+
+ case 0xC8 ... 0xCF: /* FXCH %st(?) */
+ r_src = (UInt)modrm - 0xC8;
+ DIP("fxch %%st(%u)\n", r_src);
+ t1 = newTemp(Ity_F64);
+ t2 = newTemp(Ity_F64);
+ assign(t1, get_ST(0));
+ assign(t2, get_ST(r_src));
+ put_ST_UNCHECKED(0, mkexpr(t2));
+ put_ST_UNCHECKED(r_src, mkexpr(t1));
+ break;
+
+ case 0xE0: /* FCHS */
+ DIP("fchs\n");
+ put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0)));
+ break;
+
+ case 0xE1: /* FABS */
+ DIP("fabs\n");
+ put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0)));
+ break;
+
+ case 0xE5: { /* FXAM */
+ /* This is an interesting one. It examines %st(0),
+ regardless of whether the tag says it's empty or not.
+ Here, just pass both the tag (in our format) and the
+ value (as a double, actually a ULong) to a helper
+ function. */
+ IRExpr** args
+ = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)),
+ unop(Iop_ReinterpF64asI64,
+ get_ST_UNCHECKED(0)) );
+ put_C3210(mkIRExprCCall(
+ Ity_I64,
+ 0/*regparm*/,
+ "amd64g_calculate_FXAM", &amd64g_calculate_FXAM,
+ args
+ ));
+ DIP("fxam\n");
+ break;
+ }
+
+ case 0xE8: /* FLD1 */
+ DIP("fld1\n");
+ fp_push();
+ /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
+ put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)));
+ break;
+
+ case 0xE9: /* FLDL2T */
+ DIP("fldl2t\n");
+ fp_push();
+ /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
+ put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL)));
+ break;
+
+ case 0xEA: /* FLDL2E */
+ DIP("fldl2e\n");
+ fp_push();
+ /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
+ put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL)));
+ break;
+
+ case 0xEB: /* FLDPI */
+ DIP("fldpi\n");
+ fp_push();
+ /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
+ put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL)));
+ break;
+
+ case 0xEC: /* FLDLG2 */
+ DIP("fldlg2\n");
+ fp_push();
+ /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
+ put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL)));
+ break;
+
+ case 0xED: /* FLDLN2 */
+ DIP("fldln2\n");
+ fp_push();
+ /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
+ put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL)));
+ break;
+
+ case 0xEE: /* FLDZ */
+ DIP("fldz\n");
+ fp_push();
+ /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
+ put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL)));
+ break;
+
+ case 0xF0: /* F2XM1 */
+ DIP("f2xm1\n");
+ put_ST_UNCHECKED(0,
+ binop(Iop_2xm1F64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(0)));
+ break;
+
+ case 0xF1: /* FYL2X */
+ DIP("fyl2x\n");
+ put_ST_UNCHECKED(1,
+ triop(Iop_Yl2xF64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(1),
+ get_ST(0)));
+ fp_pop();
+ break;
+
+ case 0xF2: /* FPTAN */
+ DIP("ftan\n");
+ put_ST_UNCHECKED(0,
+ binop(Iop_TanF64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(0)));
+ fp_push();
+ put_ST(0, IRExpr_Const(IRConst_F64(1.0)));
+ clear_C2(); /* HACK */
+ break;
+
+ case 0xF3: /* FPATAN */
+ DIP("fpatan\n");
+ put_ST_UNCHECKED(1,
+ triop(Iop_AtanF64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(1),
+ get_ST(0)));
+ fp_pop();
+ break;
+
+ case 0xF4: { /* FXTRACT */
+ IRTemp argF = newTemp(Ity_F64);
+ IRTemp sigF = newTemp(Ity_F64);
+ IRTemp expF = newTemp(Ity_F64);
+ IRTemp argI = newTemp(Ity_I64);
+ IRTemp sigI = newTemp(Ity_I64);
+ IRTemp expI = newTemp(Ity_I64);
+ DIP("fxtract\n");
+ assign( argF, get_ST(0) );
+ assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF)));
+ assign( sigI,
+ mkIRExprCCall(
+ Ity_I64, 0/*regparms*/,
+ "x86amd64g_calculate_FXTRACT",
+ &x86amd64g_calculate_FXTRACT,
+ mkIRExprVec_2( mkexpr(argI),
+ mkIRExpr_HWord(0)/*sig*/ ))
+ );
+ assign( expI,
+ mkIRExprCCall(
+ Ity_I64, 0/*regparms*/,
+ "x86amd64g_calculate_FXTRACT",
+ &x86amd64g_calculate_FXTRACT,
+ mkIRExprVec_2( mkexpr(argI),
+ mkIRExpr_HWord(1)/*exp*/ ))
+ );
+ assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) );
+ assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) );
+ /* exponent */
+ put_ST_UNCHECKED(0, mkexpr(expF) );
+ fp_push();
+ /* significand */
+ put_ST(0, mkexpr(sigF) );
+ break;
+ }
+
+ case 0xF5: { /* FPREM1 -- IEEE compliant */
+ IRTemp a1 = newTemp(Ity_F64);
+ IRTemp a2 = newTemp(Ity_F64);
+ DIP("fprem1\n");
+ /* Do FPREM1 twice, once to get the remainder, and once
+ to get the C3210 flag values. */
+ assign( a1, get_ST(0) );
+ assign( a2, get_ST(1) );
+ put_ST_UNCHECKED(0,
+ triop(Iop_PRem1F64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ mkexpr(a1),
+ mkexpr(a2)));
+ put_C3210(
+ unop(Iop_32Uto64,
+ triop(Iop_PRem1C3210F64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ mkexpr(a1),
+ mkexpr(a2)) ));
+ break;
+ }
+
+ case 0xF7: /* FINCSTP */
+ DIP("fincstp\n");
+ put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
+ break;
+
+ case 0xF8: { /* FPREM -- not IEEE compliant */
+ IRTemp a1 = newTemp(Ity_F64);
+ IRTemp a2 = newTemp(Ity_F64);
+ DIP("fprem\n");
+ /* Do FPREM twice, once to get the remainder, and once
+ to get the C3210 flag values. */
+ assign( a1, get_ST(0) );
+ assign( a2, get_ST(1) );
+ put_ST_UNCHECKED(0,
+ triop(Iop_PRemF64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ mkexpr(a1),
+ mkexpr(a2)));
+ put_C3210(
+ unop(Iop_32Uto64,
+ triop(Iop_PRemC3210F64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ mkexpr(a1),
+ mkexpr(a2)) ));
+ break;
+ }
+
+ case 0xF9: /* FYL2XP1 */
+ DIP("fyl2xp1\n");
+ put_ST_UNCHECKED(1,
+ triop(Iop_Yl2xp1F64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(1),
+ get_ST(0)));
+ fp_pop();
+ break;
+
+ case 0xFA: /* FSQRT */
+ DIP("fsqrt\n");
+ put_ST_UNCHECKED(0,
+ binop(Iop_SqrtF64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(0)));
+ break;
+
+ case 0xFB: { /* FSINCOS */
+ IRTemp a1 = newTemp(Ity_F64);
+ assign( a1, get_ST(0) );
+ DIP("fsincos\n");
+ put_ST_UNCHECKED(0,
+ binop(Iop_SinF64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ mkexpr(a1)));
+ fp_push();
+ put_ST(0,
+ binop(Iop_CosF64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ mkexpr(a1)));
+ clear_C2(); /* HACK */
+ break;
+ }
+
+ case 0xFC: /* FRNDINT */
+ DIP("frndint\n");
+ put_ST_UNCHECKED(0,
+ binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
+ break;
+
+ case 0xFD: /* FSCALE */
+ DIP("fscale\n");
+ put_ST_UNCHECKED(0,
+ triop(Iop_ScaleF64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(0),
+ get_ST(1)));
+ break;
+
+ case 0xFE: /* FSIN */
+ DIP("fsin\n");
+ put_ST_UNCHECKED(0,
+ binop(Iop_SinF64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(0)));
+ clear_C2(); /* HACK */
+ break;
+
+ case 0xFF: /* FCOS */
+ DIP("fcos\n");
+ put_ST_UNCHECKED(0,
+ binop(Iop_CosF64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(0)));
+ clear_C2(); /* HACK */
+ break;
+
+ default:
+ goto decode_fail;
+ }
+ }
+ }
+
+ /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
+ else
+ if (first_opcode == 0xDA) {
+
+ if (modrm < 0xC0) {
+
+ /* bits 5,4,3 are an opcode extension, and the modRM also
+ specifies an address. */
+ IROp fop;
+ IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
+ delta += len;
+ switch (gregLO3ofRM(modrm)) {
+
+ case 0: /* FIADD m32int */ /* ST(0) += m32int */
+ DIP("fiaddl %s\n", dis_buf);
+ fop = Iop_AddF64;
+ goto do_fop_m32;
+
+ case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
+ DIP("fimull %s\n", dis_buf);
+ fop = Iop_MulF64;
+ goto do_fop_m32;
+
+ case 4: /* FISUB m32int */ /* ST(0) -= m32int */
+ DIP("fisubl %s\n", dis_buf);
+ fop = Iop_SubF64;
+ goto do_fop_m32;
+
+ case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
+ DIP("fisubrl %s\n", dis_buf);
+ fop = Iop_SubF64;
+ goto do_foprev_m32;
+
+ case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
+ DIP("fisubl %s\n", dis_buf);
+ fop = Iop_DivF64;
+ goto do_fop_m32;
+
+ case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
+ DIP("fidivrl %s\n", dis_buf);
+ fop = Iop_DivF64;
+ goto do_foprev_m32;
+
+ do_fop_m32:
+ put_ST_UNCHECKED(0,
+ triop(fop,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(0),
+ unop(Iop_I32StoF64,
+ loadLE(Ity_I32, mkexpr(addr)))));
+ break;
+
+ do_foprev_m32:
+ put_ST_UNCHECKED(0,
+ triop(fop,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ unop(Iop_I32StoF64,
+ loadLE(Ity_I32, mkexpr(addr))),
+ get_ST(0)));
+ break;
+
+ default:
+ vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
+ vex_printf("first_opcode == 0xDA\n");
+ goto decode_fail;
+ }
+
+ } else {
+
+ delta++;
+ switch (modrm) {
+
+ case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
+ r_src = (UInt)modrm - 0xC0;
+ DIP("fcmovb %%st(%u), %%st(0)\n", r_src);
+ put_ST_UNCHECKED(0,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ mk_amd64g_calculate_condition(AMD64CondB)),
+ get_ST(0), get_ST(r_src)) );
+ break;
+
+ case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
+ r_src = (UInt)modrm - 0xC8;
+ DIP("fcmovz %%st(%u), %%st(0)\n", r_src);
+ put_ST_UNCHECKED(0,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ mk_amd64g_calculate_condition(AMD64CondZ)),
+ get_ST(0), get_ST(r_src)) );
+ break;
+
+ case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
+ r_src = (UInt)modrm - 0xD0;
+ DIP("fcmovbe %%st(%u), %%st(0)\n", r_src);
+ put_ST_UNCHECKED(0,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ mk_amd64g_calculate_condition(AMD64CondBE)),
+ get_ST(0), get_ST(r_src)) );
+ break;
+
+ case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
+ r_src = (UInt)modrm - 0xD8;
+ DIP("fcmovu %%st(%u), %%st(0)\n", r_src);
+ put_ST_UNCHECKED(0,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ mk_amd64g_calculate_condition(AMD64CondP)),
+ get_ST(0), get_ST(r_src)) );
+ break;
+
+ case 0xE9: /* FUCOMPP %st(0),%st(1) */
+ DIP("fucompp %%st(0),%%st(1)\n");
+ /* This forces C1 to zero, which isn't right. */
+ put_C3210(
+ unop(Iop_32Uto64,
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64, get_ST(0), get_ST(1)),
+ mkU8(8)),
+ mkU32(0x4500)
+ )));
+ fp_pop();
+ fp_pop();
+ break;
+
+ default:
+ goto decode_fail;
+ }
+
+ }
+ }
+
+ /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
+ else
+ if (first_opcode == 0xDB) {
+ if (modrm < 0xC0) {
+
+ /* bits 5,4,3 are an opcode extension, and the modRM also
+ specifies an address. */
+ IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
+ delta += len;
+
+ switch (gregLO3ofRM(modrm)) {
+
+ case 0: /* FILD m32int */
+ DIP("fildl %s\n", dis_buf);
+ fp_push();
+ put_ST(0, unop(Iop_I32StoF64,
+ loadLE(Ity_I32, mkexpr(addr))));
+ break;
+
+ case 1: /* FISTTPL m32 (SSE3) */
+ DIP("fisttpl %s\n", dis_buf);
+ storeLE( mkexpr(addr),
+ binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) );
+ fp_pop();
+ break;
+
+ case 2: /* FIST m32 */
+ DIP("fistl %s\n", dis_buf);
+ storeLE( mkexpr(addr),
+ binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
+ break;
+
+ case 3: /* FISTP m32 */
+ DIP("fistpl %s\n", dis_buf);
+ storeLE( mkexpr(addr),
+ binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
+ fp_pop();
+ break;
+
+ case 5: { /* FLD extended-real */
+ /* Uses dirty helper:
+ ULong amd64g_loadF80le ( ULong )
+ addr holds the address. First, do a dirty call to
+ get hold of the data. */
+ IRTemp val = newTemp(Ity_I64);
+ IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) );
+
+ IRDirty* d = unsafeIRDirty_1_N (
+ val,
+ 0/*regparms*/,
+ "amd64g_dirtyhelper_loadF80le",
+ &amd64g_dirtyhelper_loadF80le,
+ args
+ );
+ /* declare that we're reading memory */
+ d->mFx = Ifx_Read;
+ d->mAddr = mkexpr(addr);
+ d->mSize = 10;
+
+ /* execute the dirty call, dumping the result in val. */
+ stmt( IRStmt_Dirty(d) );
+ fp_push();
+ put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val)));
+
+ DIP("fldt %s\n", dis_buf);
+ break;
+ }
+
+ case 7: { /* FSTP extended-real */
+ /* Uses dirty helper:
+ void amd64g_storeF80le ( ULong addr, ULong data )
+ */
+ IRExpr** args
+ = mkIRExprVec_2( mkexpr(addr),
+ unop(Iop_ReinterpF64asI64, get_ST(0)) );
+
+ IRDirty* d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "amd64g_dirtyhelper_storeF80le",
+ &amd64g_dirtyhelper_storeF80le,
+ args
+ );
+ /* declare we're writing memory */
+ d->mFx = Ifx_Write;
+ d->mAddr = mkexpr(addr);
+ d->mSize = 10;
+
+ /* execute the dirty call. */
+ stmt( IRStmt_Dirty(d) );
+ fp_pop();
+
+ DIP("fstpt\n %s", dis_buf);
+ break;
+ }
+
+ default:
+ vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
+ vex_printf("first_opcode == 0xDB\n");
+ goto decode_fail;
+ }
+
+ } else {
+
+ delta++;
+ switch (modrm) {
+
+ case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
+ r_src = (UInt)modrm - 0xC0;
+ DIP("fcmovnb %%st(%u), %%st(0)\n", r_src);
+ put_ST_UNCHECKED(0,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ mk_amd64g_calculate_condition(AMD64CondNB)),
+ get_ST(0), get_ST(r_src)) );
+ break;
+
+ case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
+ r_src = (UInt)modrm - 0xC8;
+ DIP("fcmovnz %%st(%u), %%st(0)\n", r_src);
+ put_ST_UNCHECKED(
+ 0,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ mk_amd64g_calculate_condition(AMD64CondNZ)),
+ get_ST(0),
+ get_ST(r_src)
+ )
+ );
+ break;
+
+ case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
+ r_src = (UInt)modrm - 0xD0;
+ DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src);
+ put_ST_UNCHECKED(
+ 0,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ mk_amd64g_calculate_condition(AMD64CondNBE)),
+ get_ST(0),
+ get_ST(r_src)
+ )
+ );
+ break;
+
+ case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
+ r_src = (UInt)modrm - 0xD8;
+ DIP("fcmovnu %%st(%u), %%st(0)\n", r_src);
+ put_ST_UNCHECKED(
+ 0,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ mk_amd64g_calculate_condition(AMD64CondNP)),
+ get_ST(0),
+ get_ST(r_src)
+ )
+ );
+ break;
+
+ case 0xE2:
+ DIP("fnclex\n");
+ break;
+
+ case 0xE3: {
+ /* Uses dirty helper:
+ void amd64g_do_FINIT ( VexGuestAMD64State* ) */
+ IRDirty* d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "amd64g_dirtyhelper_FINIT",
+ &amd64g_dirtyhelper_FINIT,
+ mkIRExprVec_0()
+ );
+ d->needsBBP = True;
+
+ /* declare we're writing guest state */
+ d->nFxState = 5;
+
+ d->fxState[0].fx = Ifx_Write;
+ d->fxState[0].offset = OFFB_FTOP;
+ d->fxState[0].size = sizeof(UInt);
+
+ d->fxState[1].fx = Ifx_Write;
+ d->fxState[1].offset = OFFB_FPREGS;
+ d->fxState[1].size = 8 * sizeof(ULong);
+
+ d->fxState[2].fx = Ifx_Write;
+ d->fxState[2].offset = OFFB_FPTAGS;
+ d->fxState[2].size = 8 * sizeof(UChar);
+
+ d->fxState[3].fx = Ifx_Write;
+ d->fxState[3].offset = OFFB_FPROUND;
+ d->fxState[3].size = sizeof(ULong);
+
+ d->fxState[4].fx = Ifx_Write;
+ d->fxState[4].offset = OFFB_FC3210;
+ d->fxState[4].size = sizeof(ULong);
+
+ stmt( IRStmt_Dirty(d) );
+
+ DIP("fninit\n");
+ break;
+ }
+
+ case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
+ fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False );
+ break;
+
+ case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
+ fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False );
+ break;
+
+ default:
+ goto decode_fail;
+ }
+ }
+ }
+
+ /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
+ else
+ if (first_opcode == 0xDC) {
+ if (modrm < 0xC0) {
+
+ /* bits 5,4,3 are an opcode extension, and the modRM also
+ specifies an address. */
+ IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
+ delta += len;
+
+ switch (gregLO3ofRM(modrm)) {
+
+ case 0: /* FADD double-real */
+ fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True );
+ break;
+
+ case 1: /* FMUL double-real */
+ fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True );
+ break;
+
+//.. case 2: /* FCOM double-real */
+//.. DIP("fcoml %s\n", dis_buf);
+//.. /* This forces C1 to zero, which isn't right. */
+//.. put_C3210(
+//.. binop( Iop_And32,
+//.. binop(Iop_Shl32,
+//.. binop(Iop_CmpF64,
+//.. get_ST(0),
+//.. loadLE(Ity_F64,mkexpr(addr))),
+//.. mkU8(8)),
+//.. mkU32(0x4500)
+//.. ));
+//.. break;
+
+ case 3: /* FCOMP double-real */
+ DIP("fcompl %s\n", dis_buf);
+ /* This forces C1 to zero, which isn't right. */
+ put_C3210(
+ unop(Iop_32Uto64,
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64,
+ get_ST(0),
+ loadLE(Ity_F64,mkexpr(addr))),
+ mkU8(8)),
+ mkU32(0x4500)
+ )));
+ fp_pop();
+ break;
+
+ case 4: /* FSUB double-real */
+ fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True );
+ break;
+
+ case 5: /* FSUBR double-real */
+ fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True );
+ break;
+
+ case 6: /* FDIV double-real */
+ fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True );
+ break;
+
+ case 7: /* FDIVR double-real */
+ fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True );
+ break;
+
+ default:
+ vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
+ vex_printf("first_opcode == 0xDC\n");
+ goto decode_fail;
+ }
+
+ } else {
+
+ delta++;
+ switch (modrm) {
+
+ case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
+ fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False );
+ break;
+
+ case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
+ fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False );
+ break;
+
+ case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
+ fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False );
+ break;
+
+ case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
+ fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False );
+ break;
+
+ case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
+ fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False );
+ break;
+
+ case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
+ fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False );
+ break;
+
+ default:
+ goto decode_fail;
+ }
+
+ }
+ }
+
+ /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
+ else
+ if (first_opcode == 0xDD) {
+
+ if (modrm < 0xC0) {
+
+ /* bits 5,4,3 are an opcode extension, and the modRM also
+ specifies an address. */
+ IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
+ delta += len;
+
+ switch (gregLO3ofRM(modrm)) {
+
+ case 0: /* FLD double-real */
+ DIP("fldl %s\n", dis_buf);
+ fp_push();
+ put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
+ break;
+
+ case 1: /* FISTTPQ m64 (SSE3) */
+ DIP("fistppll %s\n", dis_buf);
+ storeLE( mkexpr(addr),
+ binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) );
+ fp_pop();
+ break;
+
+ case 2: /* FST double-real */
+ DIP("fstl %s\n", dis_buf);
+ storeLE(mkexpr(addr), get_ST(0));
+ break;
+
+ case 3: /* FSTP double-real */
+ DIP("fstpl %s\n", dis_buf);
+ storeLE(mkexpr(addr), get_ST(0));
+ fp_pop();
+ break;
+
+//.. case 4: { /* FRSTOR m108 */
+//.. /* Uses dirty helper:
+//.. VexEmWarn x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */
+//.. IRTemp ew = newTemp(Ity_I32);
+//.. IRDirty* d = unsafeIRDirty_0_N (
+//.. 0/*regparms*/,
+//.. "x86g_dirtyhelper_FRSTOR",
+//.. &x86g_dirtyhelper_FRSTOR,
+//.. mkIRExprVec_1( mkexpr(addr) )
+//.. );
+//.. d->needsBBP = True;
+//.. d->tmp = ew;
+//.. /* declare we're reading memory */
+//.. d->mFx = Ifx_Read;
+//.. d->mAddr = mkexpr(addr);
+//.. d->mSize = 108;
+//..
+//.. /* declare we're writing guest state */
+//.. d->nFxState = 5;
+//..
+//.. d->fxState[0].fx = Ifx_Write;
+//.. d->fxState[0].offset = OFFB_FTOP;
+//.. d->fxState[0].size = sizeof(UInt);
+//..
+//.. d->fxState[1].fx = Ifx_Write;
+//.. d->fxState[1].offset = OFFB_FPREGS;
+//.. d->fxState[1].size = 8 * sizeof(ULong);
+//..
+//.. d->fxState[2].fx = Ifx_Write;
+//.. d->fxState[2].offset = OFFB_FPTAGS;
+//.. d->fxState[2].size = 8 * sizeof(UChar);
+//..
+//.. d->fxState[3].fx = Ifx_Write;
+//.. d->fxState[3].offset = OFFB_FPROUND;
+//.. d->fxState[3].size = sizeof(UInt);
+//..
+//.. d->fxState[4].fx = Ifx_Write;
+//.. d->fxState[4].offset = OFFB_FC3210;
+//.. d->fxState[4].size = sizeof(UInt);
+//..
+//.. stmt( IRStmt_Dirty(d) );
+//..
+//.. /* ew contains any emulation warning we may need to
+//.. issue. If needed, side-exit to the next insn,
+//.. reporting the warning, so that Valgrind's dispatcher
+//.. sees the warning. */
+//.. put_emwarn( mkexpr(ew) );
+//.. stmt(
+//.. IRStmt_Exit(
+//.. binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
+//.. Ijk_EmWarn,
+//.. IRConst_U32( ((Addr32)guest_eip_bbstart)+delta)
+//.. )
+//.. );
+//..
+//.. DIP("frstor %s\n", dis_buf);
+//.. break;
+//.. }
+//..
+//.. case 6: { /* FNSAVE m108 */
+//.. /* Uses dirty helper:
+//.. void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */
+//.. IRDirty* d = unsafeIRDirty_0_N (
+//.. 0/*regparms*/,
+//.. "x86g_dirtyhelper_FSAVE",
+//.. &x86g_dirtyhelper_FSAVE,
+//.. mkIRExprVec_1( mkexpr(addr) )
+//.. );
+//.. d->needsBBP = True;
+//.. /* declare we're writing memory */
+//.. d->mFx = Ifx_Write;
+//.. d->mAddr = mkexpr(addr);
+//.. d->mSize = 108;
+//..
+//.. /* declare we're reading guest state */
+//.. d->nFxState = 5;
+//..
+//.. d->fxState[0].fx = Ifx_Read;
+//.. d->fxState[0].offset = OFFB_FTOP;
+//.. d->fxState[0].size = sizeof(UInt);
+//..
+//.. d->fxState[1].fx = Ifx_Read;
+//.. d->fxState[1].offset = OFFB_FPREGS;
+//.. d->fxState[1].size = 8 * sizeof(ULong);
+//..
+//.. d->fxState[2].fx = Ifx_Read;
+//.. d->fxState[2].offset = OFFB_FPTAGS;
+//.. d->fxState[2].size = 8 * sizeof(UChar);
+//..
+//.. d->fxState[3].fx = Ifx_Read;
+//.. d->fxState[3].offset = OFFB_FPROUND;
+//.. d->fxState[3].size = sizeof(UInt);
+//..
+//.. d->fxState[4].fx = Ifx_Read;
+//.. d->fxState[4].offset = OFFB_FC3210;
+//.. d->fxState[4].size = sizeof(UInt);
+//..
+//.. stmt( IRStmt_Dirty(d) );
+//..
+//.. DIP("fnsave %s\n", dis_buf);
+//.. break;
+//.. }
+
+ case 7: { /* FNSTSW m16 */
+ IRExpr* sw = get_FPU_sw();
+ vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16);
+ storeLE( mkexpr(addr), sw );
+ DIP("fnstsw %s\n", dis_buf);
+ break;
+ }
+
+ default:
+ vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
+ vex_printf("first_opcode == 0xDD\n");
+ goto decode_fail;
+ }
+ } else {
+ delta++;
+ switch (modrm) {
+
+ case 0xC0 ... 0xC7: /* FFREE %st(?) */
+ r_dst = (UInt)modrm - 0xC0;
+ DIP("ffree %%st(%u)\n", r_dst);
+ put_ST_TAG ( r_dst, mkU8(0) );
+ break;
+
+ case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
+ r_dst = (UInt)modrm - 0xD0;
+ DIP("fst %%st(0),%%st(%u)\n", r_dst);
+ /* P4 manual says: "If the destination operand is a
+ non-empty register, the invalid-operation exception
+ is not generated. Hence put_ST_UNCHECKED. */
+ put_ST_UNCHECKED(r_dst, get_ST(0));
+ break;
+
+ case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
+ r_dst = (UInt)modrm - 0xD8;
+ DIP("fstp %%st(0),%%st(%u)\n", r_dst);
+ /* P4 manual says: "If the destination operand is a
+ non-empty register, the invalid-operation exception
+ is not generated. Hence put_ST_UNCHECKED. */
+ put_ST_UNCHECKED(r_dst, get_ST(0));
+ fp_pop();
+ break;
+
+ case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
+ r_dst = (UInt)modrm - 0xE0;
+ DIP("fucom %%st(0),%%st(%u)\n", r_dst);
+ /* This forces C1 to zero, which isn't right. */
+ put_C3210(
+ unop(Iop_32Uto64,
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
+ mkU8(8)),
+ mkU32(0x4500)
+ )));
+ break;
+
+ case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
+ r_dst = (UInt)modrm - 0xE8;
+ DIP("fucomp %%st(0),%%st(%u)\n", r_dst);
+ /* This forces C1 to zero, which isn't right. */
+ put_C3210(
+ unop(Iop_32Uto64,
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
+ mkU8(8)),
+ mkU32(0x4500)
+ )));
+ fp_pop();
+ break;
+
+ default:
+ goto decode_fail;
+ }
+ }
+ }
+
+ /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
+ else
+ if (first_opcode == 0xDE) {
+
+ if (modrm < 0xC0) {
+
+ /* bits 5,4,3 are an opcode extension, and the modRM also
+ specifies an address. */
+ IROp fop;
+ IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
+ delta += len;
+
+ switch (gregLO3ofRM(modrm)) {
+
+ case 0: /* FIADD m16int */ /* ST(0) += m16int */
+ DIP("fiaddw %s\n", dis_buf);
+ fop = Iop_AddF64;
+ goto do_fop_m16;
+
+ case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
+ DIP("fimulw %s\n", dis_buf);
+ fop = Iop_MulF64;
+ goto do_fop_m16;
+
+ case 4: /* FISUB m16int */ /* ST(0) -= m16int */
+ DIP("fisubw %s\n", dis_buf);
+ fop = Iop_SubF64;
+ goto do_fop_m16;
+
+ case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
+ DIP("fisubrw %s\n", dis_buf);
+ fop = Iop_SubF64;
+ goto do_foprev_m16;
+
+ case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
+ DIP("fisubw %s\n", dis_buf);
+ fop = Iop_DivF64;
+ goto do_fop_m16;
+
+ case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
+ DIP("fidivrw %s\n", dis_buf);
+ fop = Iop_DivF64;
+ goto do_foprev_m16;
+
+ do_fop_m16:
+ put_ST_UNCHECKED(0,
+ triop(fop,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(0),
+ unop(Iop_I32StoF64,
+ unop(Iop_16Sto32,
+ loadLE(Ity_I16, mkexpr(addr))))));
+ break;
+
+ do_foprev_m16:
+ put_ST_UNCHECKED(0,
+ triop(fop,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ unop(Iop_I32StoF64,
+ unop(Iop_16Sto32,
+ loadLE(Ity_I16, mkexpr(addr)))),
+ get_ST(0)));
+ break;
+
+ default:
+ vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
+ vex_printf("first_opcode == 0xDE\n");
+ goto decode_fail;
+ }
+
+ } else {
+
+ delta++;
+ switch (modrm) {
+
+ case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
+ fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True );
+ break;
+
+ case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
+ fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True );
+ break;
+
+ case 0xD9: /* FCOMPP %st(0),%st(1) */
+ DIP("fcompp %%st(0),%%st(1)\n");
+ /* This forces C1 to zero, which isn't right. */
+ put_C3210(
+ unop(Iop_32Uto64,
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64, get_ST(0), get_ST(1)),
+ mkU8(8)),
+ mkU32(0x4500)
+ )));
+ fp_pop();
+ fp_pop();
+ break;
+
+ case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
+ fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True );
+ break;
+
+ case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
+ fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True );
+ break;
+
+ case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
+ fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True );
+ break;
+
+ case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
+ fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True );
+ break;
+
+ default:
+ goto decode_fail;
+ }
+
+ }
+ }
+
+ /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
+ else
+ if (first_opcode == 0xDF) {
+
+ if (modrm < 0xC0) {
+
+ /* bits 5,4,3 are an opcode extension, and the modRM also
+ specifies an address. */
+ IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
+ delta += len;
+
+ switch (gregLO3ofRM(modrm)) {
+
+ case 0: /* FILD m16int */
+ DIP("fildw %s\n", dis_buf);
+ fp_push();
+ put_ST(0, unop(Iop_I32StoF64,
+ unop(Iop_16Sto32,
+ loadLE(Ity_I16, mkexpr(addr)))));
+ break;
+
+ case 1: /* FISTTPS m16 (SSE3) */
+ DIP("fisttps %s\n", dis_buf);
+ storeLE( mkexpr(addr),
+ x87ishly_qnarrow_32_to_16(
+ binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ));
+ fp_pop();
+ break;
+
+ case 2: /* FIST m16 */
+ DIP("fists %s\n", dis_buf);
+ storeLE( mkexpr(addr),
+ x87ishly_qnarrow_32_to_16(
+ binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
+ break;
+
+ case 3: /* FISTP m16 */
+ DIP("fistps %s\n", dis_buf);
+ storeLE( mkexpr(addr),
+ x87ishly_qnarrow_32_to_16(
+ binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
+ fp_pop();
+ break;
+
+ case 5: /* FILD m64 */
+ DIP("fildll %s\n", dis_buf);
+ fp_push();
+ put_ST(0, binop(Iop_I64StoF64,
+ get_roundingmode(),
+ loadLE(Ity_I64, mkexpr(addr))));
+ break;
+
+ case 7: /* FISTP m64 */
+ DIP("fistpll %s\n", dis_buf);
+ storeLE( mkexpr(addr),
+ binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) );
+ fp_pop();
+ break;
+
+ default:
+ vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
+ vex_printf("first_opcode == 0xDF\n");
+ goto decode_fail;
+ }
+
+ } else {
+
+ delta++;
+ switch (modrm) {
+
+ case 0xC0: /* FFREEP %st(0) */
+ DIP("ffreep %%st(%d)\n", 0);
+ put_ST_TAG ( 0, mkU8(0) );
+ fp_pop();
+ break;
+
+ case 0xE0: /* FNSTSW %ax */
+ DIP("fnstsw %%ax\n");
+ /* Invent a plausible-looking FPU status word value and
+ dump it in %AX:
+ ((ftop & 7) << 11) | (c3210 & 0x4700)
+ */
+ putIRegRAX(
+ 2,
+ unop(Iop_32to16,
+ binop(Iop_Or32,
+ binop(Iop_Shl32,
+ binop(Iop_And32, get_ftop(), mkU32(7)),
+ mkU8(11)),
+ binop(Iop_And32,
+ unop(Iop_64to32, get_C3210()),
+ mkU32(0x4700))
+ )));
+ break;
+
+ case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
+ fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True );
+ break;
+
+ case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
+ /* not really right since COMIP != UCOMIP */
+ fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True );
+ break;
+
+ default:
+ goto decode_fail;
+ }
+ }
+
+ }
+
+ else
+ goto decode_fail;
+
+ *decode_ok = True;
+ return delta;
+
+ decode_fail:
+ *decode_ok = False;
+ return delta;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- ---*/
+/*--- MMX INSTRUCTIONS ---*/
+/*--- ---*/
+/*------------------------------------------------------------*/
+
+/* Effect of MMX insns on x87 FPU state (table 11-2 of
+ IA32 arch manual, volume 3):
+
+ Read from, or write to MMX register (viz, any insn except EMMS):
+ * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
+ * FP stack pointer set to zero
+
+ EMMS:
+ * All tags set to Invalid (empty) -- FPTAGS[i] := zero
+ * FP stack pointer set to zero
+*/
+
+static void do_MMX_preamble ( void )
+{
+ Int i;
+ IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
+ IRExpr* zero = mkU32(0);
+ IRExpr* tag1 = mkU8(1);
+ put_ftop(zero);
+ for (i = 0; i < 8; i++)
+ stmt( IRStmt_PutI( descr, zero, i, tag1 ) );
+}
+
+static void do_EMMS_preamble ( void )
+{
+ Int i;
+ IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
+ IRExpr* zero = mkU32(0);
+ IRExpr* tag0 = mkU8(0);
+ put_ftop(zero);
+ for (i = 0; i < 8; i++)
+ stmt( IRStmt_PutI( descr, zero, i, tag0 ) );
+}
+
+
+static IRExpr* getMMXReg ( UInt archreg )
+{
+ vassert(archreg < 8);
+ return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 );
+}
+
+
+static void putMMXReg ( UInt archreg, IRExpr* e )
+{
+ vassert(archreg < 8);
+ vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
+ stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) );
+}
+
+
+/* Helper for non-shift MMX insns. Note this is incomplete in the
+ sense that it does not first call do_MMX_preamble() -- that is the
+ responsibility of its caller. */
+
+static
+ULong dis_MMXop_regmem_to_reg ( VexAbiInfo* vbi,
+ Prefix pfx,
+ Long delta,
+ UChar opc,
+ HChar* name,
+ Bool show_granularity )
+{
+ HChar dis_buf[50];
+ UChar modrm = getUChar(delta);
+ Bool isReg = epartIsReg(modrm);
+ IRExpr* argL = NULL;
+ IRExpr* argR = NULL;
+ IRExpr* argG = NULL;
+ IRExpr* argE = NULL;
+ IRTemp res = newTemp(Ity_I64);
+
+ Bool invG = False;
+ IROp op = Iop_INVALID;
+ void* hAddr = NULL;
+ HChar* hName = NULL;
+ Bool eLeft = False;
+
+# define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
+
+ switch (opc) {
+ /* Original MMX ones */
+ case 0xFC: op = Iop_Add8x8; break;
+ case 0xFD: op = Iop_Add16x4; break;
+ case 0xFE: op = Iop_Add32x2; break;
+
+ case 0xEC: op = Iop_QAdd8Sx8; break;
+ case 0xED: op = Iop_QAdd16Sx4; break;
+
+ case 0xDC: op = Iop_QAdd8Ux8; break;
+ case 0xDD: op = Iop_QAdd16Ux4; break;
+
+ case 0xF8: op = Iop_Sub8x8; break;
+ case 0xF9: op = Iop_Sub16x4; break;
+ case 0xFA: op = Iop_Sub32x2; break;
+
+ case 0xE8: op = Iop_QSub8Sx8; break;
+ case 0xE9: op = Iop_QSub16Sx4; break;
+
+ case 0xD8: op = Iop_QSub8Ux8; break;
+ case 0xD9: op = Iop_QSub16Ux4; break;
+
+ case 0xE5: op = Iop_MulHi16Sx4; break;
+ case 0xD5: op = Iop_Mul16x4; break;
+ case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break;
+
+ case 0x74: op = Iop_CmpEQ8x8; break;
+ case 0x75: op = Iop_CmpEQ16x4; break;
+ case 0x76: op = Iop_CmpEQ32x2; break;
+
+ case 0x64: op = Iop_CmpGT8Sx8; break;
+ case 0x65: op = Iop_CmpGT16Sx4; break;
+ case 0x66: op = Iop_CmpGT32Sx2; break;
+
+ case 0x6B: op = Iop_QNarrow32Sx2; eLeft = True; break;
+ case 0x63: op = Iop_QNarrow16Sx4; eLeft = True; break;
+ case 0x67: op = Iop_QNarrow16Ux4; eLeft = True; break;
+
+ case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break;
+ case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
+ case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break;
+
+ case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break;
+ case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break;
+ case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break;
+
+ case 0xDB: op = Iop_And64; break;
+ case 0xDF: op = Iop_And64; invG = True; break;
+ case 0xEB: op = Iop_Or64; break;
+ case 0xEF: /* Possibly do better here if argL and argR are the
+ same reg */
+ op = Iop_Xor64; break;
+
+ /* Introduced in SSE1 */
+ case 0xE0: op = Iop_Avg8Ux8; break;
+ case 0xE3: op = Iop_Avg16Ux4; break;
+ case 0xEE: op = Iop_Max16Sx4; break;
+ case 0xDE: op = Iop_Max8Ux8; break;
+ case 0xEA: op = Iop_Min16Sx4; break;
+ case 0xDA: op = Iop_Min8Ux8; break;
+ case 0xE4: op = Iop_MulHi16Ux4; break;
+ case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break;
+
+ /* Introduced in SSE2 */
+ case 0xD4: op = Iop_Add64; break;
+ case 0xFB: op = Iop_Sub64; break;
+
+ default:
+ vex_printf("\n0x%x\n", (Int)opc);
+ vpanic("dis_MMXop_regmem_to_reg");
+ }
+
+# undef XXX
+
+ argG = getMMXReg(gregLO3ofRM(modrm));
+ if (invG)
+ argG = unop(Iop_Not64, argG);
+
+ if (isReg) {
+ delta++;
+ argE = getMMXReg(eregLO3ofRM(modrm));
+ } else {
+ Int len;
+ IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
+ delta += len;
+ argE = loadLE(Ity_I64, mkexpr(addr));
+ }
+
+ if (eLeft) {
+ argL = argE;
+ argR = argG;
+ } else {
+ argL = argG;
+ argR = argE;
+ }
+
+ if (op != Iop_INVALID) {
+ vassert(hName == NULL);
+ vassert(hAddr == NULL);
+ assign(res, binop(op, argL, argR));
+ } else {
+ vassert(hName != NULL);
+ vassert(hAddr != NULL);
+ assign( res,
+ mkIRExprCCall(
+ Ity_I64,
+ 0/*regparms*/, hName, hAddr,
+ mkIRExprVec_2( argL, argR )
+ )
+ );
+ }
+
+ putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
+
+ DIP("%s%s %s, %s\n",
+ name, show_granularity ? nameMMXGran(opc & 3) : "",
+ ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ),
+ nameMMXReg(gregLO3ofRM(modrm)) );
+
+ return delta;
+}
+
+
+/* Vector by scalar shift of G by the amount specified at the bottom
+ of E. This is a straight copy of dis_SSE_shiftG_byE. */
+
+static ULong dis_MMX_shiftG_byE ( VexAbiInfo* vbi,
+ Prefix pfx, Long delta,
+ HChar* opname, IROp op )
+{
+ HChar dis_buf[50];
+ Int alen, size;
+ IRTemp addr;
+ Bool shl, shr, sar;
+ UChar rm = getUChar(delta);
+ IRTemp g0 = newTemp(Ity_I64);
+ IRTemp g1 = newTemp(Ity_I64);
+ IRTemp amt = newTemp(Ity_I64);
+ IRTemp amt8 = newTemp(Ity_I8);
+
+ if (epartIsReg(rm)) {
+ assign( amt, getMMXReg(eregLO3ofRM(rm)) );
+ DIP("%s %s,%s\n", opname,
+ nameMMXReg(eregLO3ofRM(rm)),
+ nameMMXReg(gregLO3ofRM(rm)) );
+ delta++;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameMMXReg(gregLO3ofRM(rm)) );
+ delta += alen;
+ }
+ assign( g0, getMMXReg(gregLO3ofRM(rm)) );
+ assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
+
+ shl = shr = sar = False;
+ size = 0;
+ switch (op) {
+ case Iop_ShlN16x4: shl = True; size = 32; break;
+ case Iop_ShlN32x2: shl = True; size = 32; break;
+ case Iop_Shl64: shl = True; size = 64; break;
+ case Iop_ShrN16x4: shr = True; size = 16; break;
+ case Iop_ShrN32x2: shr = True; size = 32; break;
+ case Iop_Shr64: shr = True; size = 64; break;
+ case Iop_SarN16x4: sar = True; size = 16; break;
+ case Iop_SarN32x2: sar = True; size = 32; break;
+ default: vassert(0);
+ }
+
+ if (shl || shr) {
+ assign(
+ g1,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))),
+ mkU64(0),
+ binop(op, mkexpr(g0), mkexpr(amt8))
+ )
+ );
+ } else
+ if (sar) {
+ assign(
+ g1,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))),
+ binop(op, mkexpr(g0), mkU8(size-1)),
+ binop(op, mkexpr(g0), mkexpr(amt8))
+ )
+ );
+ } else {
+ vassert(0);
+ }
+
+ putMMXReg( gregLO3ofRM(rm), mkexpr(g1) );
+ return delta;
+}
+
+
+/* Vector by scalar shift of E by an immediate byte. This is a
+ straight copy of dis_SSE_shiftE_imm. */
+
+static
+ULong dis_MMX_shiftE_imm ( Long delta, HChar* opname, IROp op )
+{
+ Bool shl, shr, sar;
+ UChar rm = getUChar(delta);
+ IRTemp e0 = newTemp(Ity_I64);
+ IRTemp e1 = newTemp(Ity_I64);
+ UChar amt, size;
+ vassert(epartIsReg(rm));
+ vassert(gregLO3ofRM(rm) == 2
+ || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
+ amt = getUChar(delta+1);
+ delta += 2;
+ DIP("%s $%d,%s\n", opname,
+ (Int)amt,
+ nameMMXReg(eregLO3ofRM(rm)) );
+
+ assign( e0, getMMXReg(eregLO3ofRM(rm)) );
+
+ shl = shr = sar = False;
+ size = 0;
+ switch (op) {
+ case Iop_ShlN16x4: shl = True; size = 16; break;
+ case Iop_ShlN32x2: shl = True; size = 32; break;
+ case Iop_Shl64: shl = True; size = 64; break;
+ case Iop_SarN16x4: sar = True; size = 16; break;
+ case Iop_SarN32x2: sar = True; size = 32; break;
+ case Iop_ShrN16x4: shr = True; size = 16; break;
+ case Iop_ShrN32x2: shr = True; size = 32; break;
+ case Iop_Shr64: shr = True; size = 64; break;
+ default: vassert(0);
+ }
+
+ if (shl || shr) {
+ assign( e1, amt >= size
+ ? mkU64(0)
+ : binop(op, mkexpr(e0), mkU8(amt))
+ );
+ } else
+ if (sar) {
+ assign( e1, amt >= size
+ ? binop(op, mkexpr(e0), mkU8(size-1))
+ : binop(op, mkexpr(e0), mkU8(amt))
+ );
+ } else {
+ vassert(0);
+ }
+
+ putMMXReg( eregLO3ofRM(rm), mkexpr(e1) );
+ return delta;
+}
+
+
+/* Completely handle all MMX instructions except emms. */
+
+static
+ULong dis_MMX ( Bool* decode_ok,
+ VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta )
+{
+ Int len;
+ UChar modrm;
+ HChar dis_buf[50];
+ UChar opc = getUChar(delta);
+ delta++;
+
+ /* dis_MMX handles all insns except emms. */
+ do_MMX_preamble();
+
+ switch (opc) {
+
+ case 0x6E:
+ if (sz == 4) {
+ /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/
+ modrm = getUChar(delta);
+ if (epartIsReg(modrm)) {
+ delta++;
+ putMMXReg(
+ gregLO3ofRM(modrm),
+ binop( Iop_32HLto64,
+ mkU32(0),
+ getIReg32(eregOfRexRM(pfx,modrm)) ) );
+ DIP("movd %s, %s\n",
+ nameIReg32(eregOfRexRM(pfx,modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
+ delta += len;
+ putMMXReg(
+ gregLO3ofRM(modrm),
+ binop( Iop_32HLto64,
+ mkU32(0),
+ loadLE(Ity_I32, mkexpr(addr)) ) );
+ DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
+ }
+ }
+ else
+ if (sz == 8) {
+ /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/
+ modrm = getUChar(delta);
+ if (epartIsReg(modrm)) {
+ delta++;
+ putMMXReg( gregLO3ofRM(modrm),
+ getIReg64(eregOfRexRM(pfx,modrm)) );
+ DIP("movd %s, %s\n",
+ nameIReg64(eregOfRexRM(pfx,modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
+ delta += len;
+ putMMXReg( gregLO3ofRM(modrm),
+ loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("movd{64} %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
+ }
+ }
+ else {
+ goto mmx_decode_failure;
+ }
+ break;
+
+ case 0x7E:
+ if (sz == 4) {
+ /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */
+ modrm = getUChar(delta);
+ if (epartIsReg(modrm)) {
+ delta++;
+ putIReg32( eregOfRexRM(pfx,modrm),
+ unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
+ DIP("movd %s, %s\n",
+ nameMMXReg(gregLO3ofRM(modrm)),
+ nameIReg32(eregOfRexRM(pfx,modrm)));
+ } else {
+ IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
+ delta += len;
+ storeLE( mkexpr(addr),
+ unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
+ DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
+ }
+ }
+ else
+ if (sz == 8) {
+ /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */
+ modrm = getUChar(delta);
+ if (epartIsReg(modrm)) {
+ delta++;
+ putIReg64( eregOfRexRM(pfx,modrm),
+ getMMXReg(gregLO3ofRM(modrm)) );
+ DIP("movd %s, %s\n",
+ nameMMXReg(gregLO3ofRM(modrm)),
+ nameIReg64(eregOfRexRM(pfx,modrm)));
+ } else {
+ IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
+ delta += len;
+ storeLE( mkexpr(addr),
+ getMMXReg(gregLO3ofRM(modrm)) );
+ DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
+ }
+ } else {
+ goto mmx_decode_failure;
+ }
+ break;
+
+ case 0x6F:
+ /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4
+ && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
+ goto mmx_decode_failure;
+ modrm = getUChar(delta);
+ if (epartIsReg(modrm)) {
+ delta++;
+ putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm)) );
+ DIP("movq %s, %s\n",
+ nameMMXReg(eregLO3ofRM(modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
+ delta += len;
+ putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("movq %s, %s\n",
+ dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
+ }
+ break;
+
+ case 0x7F:
+ /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
+ if (sz != 4
+ && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
+ goto mmx_decode_failure;
+ modrm = getUChar(delta);
+ if (epartIsReg(modrm)) {
+ /* Fall through. The assembler doesn't appear to generate
+ these. */
+ goto mmx_decode_failure;
+ } else {
+ IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
+ delta += len;
+ storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
+ DIP("mov(nt)q %s, %s\n",
+ nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
+ }
+ break;
+
+ case 0xFC:
+ case 0xFD:
+ case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padd", True );
+ break;
+
+ case 0xEC:
+ case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4
+ && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padds", True );
+ break;
+
+ case 0xDC:
+ case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "paddus", True );
+ break;
+
+ case 0xF8:
+ case 0xF9:
+ case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psub", True );
+ break;
+
+ case 0xE8:
+ case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubs", True );
+ break;
+
+ case 0xD8:
+ case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubus", True );
+ break;
+
+ case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmulhw", False );
+ break;
+
+ case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmullw", False );
+ break;
+
+ case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
+ vassert(sz == 4);
+ delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmaddwd", False );
+ break;
+
+ case 0x74:
+ case 0x75:
+ case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpeq", True );
+ break;
+
+ case 0x64:
+ case 0x65:
+ case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpgt", True );
+ break;
+
+ case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packssdw", False );
+ break;
+
+ case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packsswb", False );
+ break;
+
+ case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packuswb", False );
+ break;
+
+ case 0x68:
+ case 0x69:
+ case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4
+ && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckh", True );
+ break;
+
+ case 0x60:
+ case 0x61:
+ case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4
+ && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckl", True );
+ break;
+
+ case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pand", False );
+ break;
+
+ case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pandn", False );
+ break;
+
+ case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "por", False );
+ break;
+
+ case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pxor", False );
+ break;
+
+# define SHIFT_BY_REG(_name,_op) \
+ delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \
+ break;
+
+ /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4);
+ case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2);
+ case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64);
+
+ /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4);
+ case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2);
+ case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64);
+
+ /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4);
+ case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2);
+
+# undef SHIFT_BY_REG
+
+ case 0x71:
+ case 0x72:
+ case 0x73: {
+ /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
+ UChar byte2, subopc;
+ if (sz != 4)
+ goto mmx_decode_failure;
+ byte2 = getUChar(delta); /* amode / sub-opcode */
+ subopc = toUChar( (byte2 >> 3) & 7 );
+
+# define SHIFT_BY_IMM(_name,_op) \
+ do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
+ } while (0)
+
+ if (subopc == 2 /*SRL*/ && opc == 0x71)
+ SHIFT_BY_IMM("psrlw", Iop_ShrN16x4);
+ else if (subopc == 2 /*SRL*/ && opc == 0x72)
+ SHIFT_BY_IMM("psrld", Iop_ShrN32x2);
+ else if (subopc == 2 /*SRL*/ && opc == 0x73)
+ SHIFT_BY_IMM("psrlq", Iop_Shr64);
+
+ else if (subopc == 4 /*SAR*/ && opc == 0x71)
+ SHIFT_BY_IMM("psraw", Iop_SarN16x4);
+ else if (subopc == 4 /*SAR*/ && opc == 0x72)
+ SHIFT_BY_IMM("psrad", Iop_SarN32x2);
+
+ else if (subopc == 6 /*SHL*/ && opc == 0x71)
+ SHIFT_BY_IMM("psllw", Iop_ShlN16x4);
+ else if (subopc == 6 /*SHL*/ && opc == 0x72)
+ SHIFT_BY_IMM("pslld", Iop_ShlN32x2);
+ else if (subopc == 6 /*SHL*/ && opc == 0x73)
+ SHIFT_BY_IMM("psllq", Iop_Shl64);
+
+ else goto mmx_decode_failure;
+
+# undef SHIFT_BY_IMM
+ break;
+ }
+
+ case 0xF7: {
+ IRTemp addr = newTemp(Ity_I64);
+ IRTemp regD = newTemp(Ity_I64);
+ IRTemp regM = newTemp(Ity_I64);
+ IRTemp mask = newTemp(Ity_I64);
+ IRTemp olddata = newTemp(Ity_I64);
+ IRTemp newdata = newTemp(Ity_I64);
+
+ modrm = getUChar(delta);
+ if (sz != 4 || (!epartIsReg(modrm)))
+ goto mmx_decode_failure;
+ delta++;
+
+ assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
+ assign( regM, getMMXReg( eregLO3ofRM(modrm) ));
+ assign( regD, getMMXReg( gregLO3ofRM(modrm) ));
+ assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) );
+ assign( olddata, loadLE( Ity_I64, mkexpr(addr) ));
+ assign( newdata,
+ binop(Iop_Or64,
+ binop(Iop_And64,
+ mkexpr(regD),
+ mkexpr(mask) ),
+ binop(Iop_And64,
+ mkexpr(olddata),
+ unop(Iop_Not64, mkexpr(mask)))) );
+ storeLE( mkexpr(addr), mkexpr(newdata) );
+ DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm) ),
+ nameMMXReg( gregLO3ofRM(modrm) ) );
+ break;
+ }
+
+ /* --- MMX decode failure --- */
+ default:
+ mmx_decode_failure:
+ *decode_ok = False;
+ return delta; /* ignored */
+
+ }
+
+ *decode_ok = True;
+ return delta;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- More misc arithmetic and other obscure insns. ---*/
+/*------------------------------------------------------------*/
+
+/* Generate base << amt with vacated places filled with stuff
+ from xtra. amt guaranteed in 0 .. 63. */
+static
+IRExpr* shiftL64_with_extras ( IRTemp base, IRTemp xtra, IRTemp amt )
+{
+ /* if amt == 0
+ then base
+ else (base << amt) | (xtra >>u (64-amt))
+ */
+ return
+ IRExpr_Mux0X(
+ mkexpr(amt),
+ mkexpr(base),
+ binop(Iop_Or64,
+ binop(Iop_Shl64, mkexpr(base), mkexpr(amt)),
+ binop(Iop_Shr64, mkexpr(xtra),
+ binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
+ )
+ );
+}
+
+/* Generate base >>u amt with vacated places filled with stuff
+ from xtra. amt guaranteed in 0 .. 63. */
+static
+IRExpr* shiftR64_with_extras ( IRTemp xtra, IRTemp base, IRTemp amt )
+{
+ /* if amt == 0
+ then base
+ else (base >>u amt) | (xtra << (64-amt))
+ */
+ return
+ IRExpr_Mux0X(
+ mkexpr(amt),
+ mkexpr(base),
+ binop(Iop_Or64,
+ binop(Iop_Shr64, mkexpr(base), mkexpr(amt)),
+ binop(Iop_Shl64, mkexpr(xtra),
+ binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
+ )
+ );
+}
+
+/* Double length left and right shifts. Apparently only required in
+ v-size (no b- variant). */
+static
+ULong dis_SHLRD_Gv_Ev ( VexAbiInfo* vbi,
+ Prefix pfx,
+ Long delta, UChar modrm,
+ Int sz,
+ IRExpr* shift_amt,
+ Bool amt_is_literal,
+ HChar* shift_amt_txt,
+ Bool left_shift )
+{
+ /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
+ for printing it. And eip on entry points at the modrm byte. */
+ Int len;
+ HChar dis_buf[50];
+
+ IRType ty = szToITy(sz);
+ IRTemp gsrc = newTemp(ty);
+ IRTemp esrc = newTemp(ty);
+ IRTemp addr = IRTemp_INVALID;
+ IRTemp tmpSH = newTemp(Ity_I8);
+ IRTemp tmpSS = newTemp(Ity_I8);
+ IRTemp tmp64 = IRTemp_INVALID;
+ IRTemp res64 = IRTemp_INVALID;
+ IRTemp rss64 = IRTemp_INVALID;
+ IRTemp resTy = IRTemp_INVALID;
+ IRTemp rssTy = IRTemp_INVALID;
+ Int mask = sz==8 ? 63 : 31;
+
+ vassert(sz == 2 || sz == 4 || sz == 8);
+
+ /* The E-part is the destination; this is shifted. The G-part
+ supplies bits to be shifted into the E-part, but is not
+ changed.
+
+ If shifting left, form a double-length word with E at the top
+ and G at the bottom, and shift this left. The result is then in
+ the high part.
+
+ If shifting right, form a double-length word with G at the top
+ and E at the bottom, and shift this right. The result is then
+ at the bottom. */
+
+ /* Fetch the operands. */
+
+ assign( gsrc, getIRegG(sz, pfx, modrm) );
+
+ if (epartIsReg(modrm)) {
+ delta++;
+ assign( esrc, getIRegE(sz, pfx, modrm) );
+ DIP("sh%cd%c %s, %s, %s\n",
+ ( left_shift ? 'l' : 'r' ), nameISize(sz),
+ shift_amt_txt,
+ nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm));
+ } else {
+ addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
+ /* # bytes following amode */
+ amt_is_literal ? 1 : 0 );
+ delta += len;
+ assign( esrc, loadLE(ty, mkexpr(addr)) );
+ DIP("sh%cd%c %s, %s, %s\n",
+ ( left_shift ? 'l' : 'r' ), nameISize(sz),
+ shift_amt_txt,
+ nameIRegG(sz, pfx, modrm), dis_buf);
+ }
+
+ /* Calculate the masked shift amount (tmpSH), the masked subshift
+ amount (tmpSS), the shifted value (res64) and the subshifted
+ value (rss64). */
+
+ assign( tmpSH, binop(Iop_And8, shift_amt, mkU8(mask)) );
+ assign( tmpSS, binop(Iop_And8,
+ binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ),
+ mkU8(mask)));
+
+ tmp64 = newTemp(Ity_I64);
+ res64 = newTemp(Ity_I64);
+ rss64 = newTemp(Ity_I64);
+
+ if (sz == 2 || sz == 4) {
+
+ /* G is xtra; E is data */
+ /* what a freaking nightmare: */
+ if (sz == 4 && left_shift) {
+ assign( tmp64, binop(Iop_32HLto64, mkexpr(esrc), mkexpr(gsrc)) );
+ assign( res64,
+ binop(Iop_Shr64,
+ binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
+ mkU8(32)) );
+ assign( rss64,
+ binop(Iop_Shr64,
+ binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSS)),
+ mkU8(32)) );
+ }
+ else
+ if (sz == 4 && !left_shift) {
+ assign( tmp64, binop(Iop_32HLto64, mkexpr(gsrc), mkexpr(esrc)) );
+ assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
+ assign( rss64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSS)) );
+ }
+ else
+ if (sz == 2 && left_shift) {
+ assign( tmp64,
+ binop(Iop_32HLto64,
+ binop(Iop_16HLto32, mkexpr(esrc), mkexpr(gsrc)),
+ binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc))
+ ));
+ /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */
+ assign( res64,
+ binop(Iop_Shr64,
+ binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
+ mkU8(48)) );
+ /* subshift formed by shifting [esrc'0000'0000'0000] */
+ assign( rss64,
+ binop(Iop_Shr64,
+ binop(Iop_Shl64,
+ binop(Iop_Shl64, unop(Iop_16Uto64, mkexpr(esrc)),
+ mkU8(48)),
+ mkexpr(tmpSS)),
+ mkU8(48)) );
+ }
+ else
+ if (sz == 2 && !left_shift) {
+ assign( tmp64,
+ binop(Iop_32HLto64,
+ binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)),
+ binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(esrc))
+ ));
+ /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */
+ assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
+ /* subshift formed by shifting [0000'0000'0000'esrc] */
+ assign( rss64, binop(Iop_Shr64,
+ unop(Iop_16Uto64, mkexpr(esrc)),
+ mkexpr(tmpSS)) );
+ }
+
+ } else {
+
+ vassert(sz == 8);
+ if (left_shift) {
+ assign( res64, shiftL64_with_extras( esrc, gsrc, tmpSH ));
+ assign( rss64, shiftL64_with_extras( esrc, gsrc, tmpSS ));
+ } else {
+ assign( res64, shiftR64_with_extras( gsrc, esrc, tmpSH ));
+ assign( rss64, shiftR64_with_extras( gsrc, esrc, tmpSS ));
+ }
+
+ }
+
+ resTy = newTemp(ty);
+ rssTy = newTemp(ty);
+ assign( resTy, narrowTo(ty, mkexpr(res64)) );
+ assign( rssTy, narrowTo(ty, mkexpr(rss64)) );
+
+ /* Put result back and write the flags thunk. */
+ setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl64 : Iop_Sar64,
+ resTy, rssTy, ty, tmpSH );
+
+ if (epartIsReg(modrm)) {
+ putIRegE(sz, pfx, modrm, mkexpr(resTy));
+ } else {
+ storeLE( mkexpr(addr), mkexpr(resTy) );
+ }
+
+ if (amt_is_literal) delta++;
+ return delta;
+}
+
+
+/* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
+ required. */
+
+typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp;
+
+static HChar* nameBtOp ( BtOp op )
+{
+ switch (op) {
+ case BtOpNone: return "";
+ case BtOpSet: return "s";
+ case BtOpReset: return "r";
+ case BtOpComp: return "c";
+ default: vpanic("nameBtOp(amd64)");
+ }
+}
+
+
+static
+ULong dis_bt_G_E ( VexAbiInfo* vbi,
+ Prefix pfx, Int sz, Long delta, BtOp op )
+{
+ HChar dis_buf[50];
+ UChar modrm;
+ Int len;
+ IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
+ t_addr1, t_rsp, t_mask, t_new;
+
+ vassert(sz == 2 || sz == 4 || sz == 8);
+
+ t_fetched = t_bitno0 = t_bitno1 = t_bitno2
+ = t_addr0 = t_addr1 = t_rsp
+ = t_mask = t_new = IRTemp_INVALID;
+
+ t_fetched = newTemp(Ity_I8);
+ t_new = newTemp(Ity_I8);
+ t_bitno0 = newTemp(Ity_I64);
+ t_bitno1 = newTemp(Ity_I64);
+ t_bitno2 = newTemp(Ity_I8);
+ t_addr1 = newTemp(Ity_I64);
+ modrm = getUChar(delta);
+
+ assign( t_bitno0, widenSto64(getIRegG(sz, pfx, modrm)) );
+
+ if (epartIsReg(modrm)) {
+ delta++;
+ /* Get it onto the client's stack. Oh, this is a horrible
+ kludge. See https://bugs.kde.org/show_bug.cgi?id=245925.
+ Because of the ELF ABI stack redzone, there may be live data
+ up to 128 bytes below %RSP. So we can't just push it on the
+ stack, else we may wind up trashing live data, and causing
+ impossible-to-find simulation errors. (Yes, this did
+ happen.) So we need to drop RSP before at least 128 before
+ pushing it. That unfortunately means hitting Memcheck's
+ fast-case painting code. Ideally we should drop more than
+ 128, to reduce the chances of breaking buggy programs that
+ have live data below -128(%RSP). Memcheck fast-cases moves
+ of 288 bytes due to the need to handle ppc64-linux quickly,
+ so let's use 288. Of course the real fix is to get rid of
+ this kludge entirely. */
+ t_rsp = newTemp(Ity_I64);
+ t_addr0 = newTemp(Ity_I64);
+
+ vassert(vbi->guest_stack_redzone_size == 128);
+ assign( t_rsp, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(288)) );
+ putIReg64(R_RSP, mkexpr(t_rsp));
+
+ storeLE( mkexpr(t_rsp), getIRegE(sz, pfx, modrm) );
+
+ /* Make t_addr0 point at it. */
+ assign( t_addr0, mkexpr(t_rsp) );
+
+ /* Mask out upper bits of the shift amount, since we're doing a
+ reg. */
+ assign( t_bitno1, binop(Iop_And64,
+ mkexpr(t_bitno0),
+ mkU64(sz == 8 ? 63 : sz == 4 ? 31 : 15)) );
+
+ } else {
+ t_addr0 = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
+ delta += len;
+ assign( t_bitno1, mkexpr(t_bitno0) );
+ }
+
+ /* At this point: t_addr0 is the address being operated on. If it
+ was a reg, we will have pushed it onto the client's stack.
+ t_bitno1 is the bit number, suitably masked in the case of a
+ reg. */
+
+ /* Now the main sequence. */
+ assign( t_addr1,
+ binop(Iop_Add64,
+ mkexpr(t_addr0),
+ binop(Iop_Sar64, mkexpr(t_bitno1), mkU8(3))) );
+
+ /* t_addr1 now holds effective address */
+
+ assign( t_bitno2,
+ unop(Iop_64to8,
+ binop(Iop_And64, mkexpr(t_bitno1), mkU64(7))) );
+
+ /* t_bitno2 contains offset of bit within byte */
+
+ if (op != BtOpNone) {
+ t_mask = newTemp(Ity_I8);
+ assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) );
+ }
+
+ /* t_mask is now a suitable byte mask */
+
+ assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) );
+
+ if (op != BtOpNone) {
+ switch (op) {
+ case BtOpSet:
+ assign( t_new,
+ binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
+ break;
+ case BtOpComp:
+ assign( t_new,
+ binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
+ break;
+ case BtOpReset:
+ assign( t_new,
+ binop(Iop_And8, mkexpr(t_fetched),
+ unop(Iop_Not8, mkexpr(t_mask))) );
+ break;
+ default:
+ vpanic("dis_bt_G_E(amd64)");
+ }
+ if ((pfx & PFX_LOCK) && !epartIsReg(modrm)) {
+ casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
+ mkexpr(t_new)/*new*/,
+ guest_RIP_curr_instr );
+ } else {
+ storeLE( mkexpr(t_addr1), mkexpr(t_new) );
+ }
+ }
+
+ /* Side effect done; now get selected bit into Carry flag */
+ /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ stmt( IRStmt_Put(
+ OFFB_CC_DEP1,
+ binop(Iop_And64,
+ binop(Iop_Shr64,
+ unop(Iop_8Uto64, mkexpr(t_fetched)),
+ mkexpr(t_bitno2)),
+ mkU64(1)))
+ );
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
+
+ /* Move reg operand from stack back to reg */
+ if (epartIsReg(modrm)) {
+ /* t_rsp still points at it. */
+ /* only write the reg if actually modifying it; doing otherwise
+ zeroes the top half erroneously when doing btl due to
+ standard zero-extend rule */
+ if (op != BtOpNone)
+ putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) );
+ putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(288)) );
+ }
+
+ DIP("bt%s%c %s, %s\n",
+ nameBtOp(op), nameISize(sz), nameIRegG(sz, pfx, modrm),
+ ( epartIsReg(modrm) ? nameIRegE(sz, pfx, modrm) : dis_buf ) );
+
+ return delta;
+}
+
+
+
+/* Handle BSF/BSR. Only v-size seems necessary. */
+static
+ULong dis_bs_E_G ( VexAbiInfo* vbi,
+ Prefix pfx, Int sz, Long delta, Bool fwds )
+{
+ Bool isReg;
+ UChar modrm;
+ HChar dis_buf[50];
+
+ IRType ty = szToITy(sz);
+ IRTemp src = newTemp(ty);
+ IRTemp dst = newTemp(ty);
+ IRTemp src64 = newTemp(Ity_I64);
+ IRTemp dst64 = newTemp(Ity_I64);
+ IRTemp src8 = newTemp(Ity_I8);
+
+ vassert(sz == 8 || sz == 4 || sz == 2);
+
+ modrm = getUChar(delta);
+ isReg = epartIsReg(modrm);
+ if (isReg) {
+ delta++;
+ assign( src, getIRegE(sz, pfx, modrm) );
+ } else {
+ Int len;
+ IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
+ delta += len;
+ assign( src, loadLE(ty, mkexpr(addr)) );
+ }
+
+ DIP("bs%c%c %s, %s\n",
+ fwds ? 'f' : 'r', nameISize(sz),
+ ( isReg ? nameIRegE(sz, pfx, modrm) : dis_buf ),
+ nameIRegG(sz, pfx, modrm));
+
+ /* First, widen src to 64 bits if it is not already. */
+ assign( src64, widenUto64(mkexpr(src)) );
+
+ /* Generate an 8-bit expression which is zero iff the
+ original is zero, and nonzero otherwise */
+ assign( src8,
+ unop(Iop_1Uto8,
+ binop(Iop_CmpNE64,
+ mkexpr(src64), mkU64(0))) );
+
+ /* Flags: Z is 1 iff source value is zero. All others
+ are undefined -- we force them to zero. */
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ stmt( IRStmt_Put(
+ OFFB_CC_DEP1,
+ IRExpr_Mux0X( mkexpr(src8),
+ /* src==0 */
+ mkU64(AMD64G_CC_MASK_Z),
+ /* src!=0 */
+ mkU64(0)
+ )
+ ));
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
+
+ /* Result: iff source value is zero, we can't use
+ Iop_Clz64/Iop_Ctz64 as they have no defined result in that case.
+ But anyway, amd64 semantics say the result is undefined in
+ such situations. Hence handle the zero case specially. */
+
+ /* Bleh. What we compute:
+
+ bsf64: if src == 0 then {dst is unchanged}
+ else Ctz64(src)
+
+ bsr64: if src == 0 then {dst is unchanged}
+ else 63 - Clz64(src)
+
+ bsf32: if src == 0 then {dst is unchanged}
+ else Ctz64(32Uto64(src))
+
+ bsr32: if src == 0 then {dst is unchanged}
+ else 63 - Clz64(32Uto64(src))
+
+ bsf16: if src == 0 then {dst is unchanged}
+ else Ctz64(32Uto64(16Uto32(src)))
+
+ bsr16: if src == 0 then {dst is unchanged}
+ else 63 - Clz64(32Uto64(16Uto32(src)))
+ */
+
+ /* The main computation, guarding against zero. */
+ assign( dst64,
+ IRExpr_Mux0X(
+ mkexpr(src8),
+ /* src == 0 -- leave dst unchanged */
+ widenUto64( getIRegG( sz, pfx, modrm ) ),
+ /* src != 0 */
+ fwds ? unop(Iop_Ctz64, mkexpr(src64))
+ : binop(Iop_Sub64,
+ mkU64(63),
+ unop(Iop_Clz64, mkexpr(src64)))
+ )
+ );
+
+ if (sz == 2)
+ assign( dst, unop(Iop_64to16, mkexpr(dst64)) );
+ else
+ if (sz == 4)
+ assign( dst, unop(Iop_64to32, mkexpr(dst64)) );
+ else
+ assign( dst, mkexpr(dst64) );
+
+ /* dump result back */
+ putIRegG( sz, pfx, modrm, mkexpr(dst) );
+
+ return delta;
+}
+
+
+/* swap rAX with the reg specified by reg and REX.B */
+static
+void codegen_xchg_rAX_Reg ( Prefix pfx, Int sz, UInt regLo3 )
+{
+ IRType ty = szToITy(sz);
+ IRTemp t1 = newTemp(ty);
+ IRTemp t2 = newTemp(ty);
+ vassert(sz == 4 || sz == 8);
+ vassert(regLo3 < 8);
+ if (sz == 8) {
+ assign( t1, getIReg64(R_RAX) );
+ assign( t2, getIRegRexB(8, pfx, regLo3) );
+ putIReg64( R_RAX, mkexpr(t2) );
+ putIRegRexB(8, pfx, regLo3, mkexpr(t1) );
+ } else {
+ assign( t1, getIReg32(R_RAX) );
+ assign( t2, getIRegRexB(4, pfx, regLo3) );
+ putIReg32( R_RAX, mkexpr(t2) );
+ putIRegRexB(4, pfx, regLo3, mkexpr(t1) );
+ }
+ DIP("xchg%c %s, %s\n",
+ nameISize(sz), nameIRegRAX(sz),
+ nameIRegRexB(sz,pfx, regLo3));
+}
+
+
+static
+void codegen_SAHF ( void )
+{
+ /* Set the flags to:
+ (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O)
+ -- retain the old O flag
+ | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
+ |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C)
+ */
+ ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
+ |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
+ IRTemp oldflags = newTemp(Ity_I64);
+ assign( oldflags, mk_amd64g_calculate_rflags_all() );
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1,
+ binop(Iop_Or64,
+ binop(Iop_And64, mkexpr(oldflags), mkU64(AMD64G_CC_MASK_O)),
+ binop(Iop_And64,
+ binop(Iop_Shr64, getIReg64(R_RAX), mkU8(8)),
+ mkU64(mask_SZACP))
+ )
+ ));
+}
+
+
+static
+void codegen_LAHF ( void )
+{
+ /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
+ IRExpr* rax_with_hole;
+ IRExpr* new_byte;
+ IRExpr* new_rax;
+ ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
+ |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
+
+ IRTemp flags = newTemp(Ity_I64);
+ assign( flags, mk_amd64g_calculate_rflags_all() );
+
+ rax_with_hole
+ = binop(Iop_And64, getIReg64(R_RAX), mkU64(~0xFF00ULL));
+ new_byte
+ = binop(Iop_Or64, binop(Iop_And64, mkexpr(flags), mkU64(mask_SZACP)),
+ mkU64(1<<1));
+ new_rax
+ = binop(Iop_Or64, rax_with_hole,
+ binop(Iop_Shl64, new_byte, mkU8(8)));
+ putIReg64(R_RAX, new_rax);
+}
+
+
+static
+ULong dis_cmpxchg_G_E ( /*OUT*/Bool* ok,
+ VexAbiInfo* vbi,
+ Prefix pfx,
+ Int size,
+ Long delta0 )
+{
+ HChar dis_buf[50];
+ Int len;
+
+ IRType ty = szToITy(size);
+ IRTemp acc = newTemp(ty);
+ IRTemp src = newTemp(ty);
+ IRTemp dest = newTemp(ty);
+ IRTemp dest2 = newTemp(ty);
+ IRTemp acc2 = newTemp(ty);
+ IRTemp cond8 = newTemp(Ity_I8);
+ IRTemp addr = IRTemp_INVALID;
+ UChar rm = getUChar(delta0);
+
+ /* There are 3 cases to consider:
+
+ reg-reg: ignore any lock prefix, generate sequence based
+ on Mux0X
+
+ reg-mem, not locked: ignore any lock prefix, generate sequence
+ based on Mux0X
+
+ reg-mem, locked: use IRCAS
+ */
+
+ if (epartIsReg(rm)) {
+ /* case 1 */
+ assign( dest, getIRegE(size, pfx, rm) );
+ delta0++;
+ assign( src, getIRegG(size, pfx, rm) );
+ assign( acc, getIRegRAX(size) );
+ setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
+ assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) );
+ assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
+ assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
+ putIRegRAX(size, mkexpr(acc2));
+ putIRegE(size, pfx, rm, mkexpr(dest2));
+ DIP("cmpxchg%c %s,%s\n", nameISize(size),
+ nameIRegG(size,pfx,rm),
+ nameIRegE(size,pfx,rm) );
+ }
+ else if (!epartIsReg(rm) && !(pfx & PFX_LOCK)) {
+ /* case 2 */
+ addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
+ assign( dest, loadLE(ty, mkexpr(addr)) );
+ delta0 += len;
+ assign( src, getIRegG(size, pfx, rm) );
+ assign( acc, getIRegRAX(size) );
+ setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
+ assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) );
+ assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
+ assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
+ putIRegRAX(size, mkexpr(acc2));
+ storeLE( mkexpr(addr), mkexpr(dest2) );
+ DIP("cmpxchg%c %s,%s\n", nameISize(size),
+ nameIRegG(size,pfx,rm), dis_buf);
+ }
+ else if (!epartIsReg(rm) && (pfx & PFX_LOCK)) {
+ /* case 3 */
+ /* src is new value. acc is expected value. dest is old value.
+ Compute success from the output of the IRCAS, and steer the
+ new value for RAX accordingly: in case of success, RAX is
+ unchanged. */
+ addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
+ delta0 += len;
+ assign( src, getIRegG(size, pfx, rm) );
+ assign( acc, getIRegRAX(size) );
+ stmt( IRStmt_CAS(
+ mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
+ NULL, mkexpr(acc), NULL, mkexpr(src) )
+ ));
+ setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
+ assign( cond8, unop(Iop_1Uto8, mk_amd64g_calculate_condition(AMD64CondZ)) );
+ assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
+ putIRegRAX(size, mkexpr(acc2));
+ DIP("cmpxchg%c %s,%s\n", nameISize(size),
+ nameIRegG(size,pfx,rm), dis_buf);
+ }
+ else vassert(0);
+
+ *ok = True;
+ return delta0;
+}
+
+
+/* Handle conditional move instructions of the form
+ cmovcc E(reg-or-mem), G(reg)
+
+ E(src) is reg-or-mem
+ G(dst) is reg.
+
+ If E is reg, --> GET %E, tmps
+ GET %G, tmpd
+ CMOVcc tmps, tmpd
+ PUT tmpd, %G
+
+ If E is mem --> (getAddr E) -> tmpa
+ LD (tmpa), tmps
+ GET %G, tmpd
+ CMOVcc tmps, tmpd
+ PUT tmpd, %G
+*/
+static
+ULong dis_cmov_E_G ( VexAbiInfo* vbi,
+ Prefix pfx,
+ Int sz,
+ AMD64Condcode cond,
+ Long delta0 )
+{
+ UChar rm = getUChar(delta0);
+ HChar dis_buf[50];
+ Int len;
+
+ IRType ty = szToITy(sz);
+ IRTemp tmps = newTemp(ty);
+ IRTemp tmpd = newTemp(ty);
+
+ if (epartIsReg(rm)) {
+ assign( tmps, getIRegE(sz, pfx, rm) );
+ assign( tmpd, getIRegG(sz, pfx, rm) );
+
+ putIRegG( sz, pfx, rm,
+ IRExpr_Mux0X( unop(Iop_1Uto8,
+ mk_amd64g_calculate_condition(cond)),
+ mkexpr(tmpd),
+ mkexpr(tmps) )
+ );
+ DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
+ nameIRegE(sz,pfx,rm),
+ nameIRegG(sz,pfx,rm));
+ return 1+delta0;
+ }
+
+ /* E refers to memory */
+ {
+ IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
+ assign( tmps, loadLE(ty, mkexpr(addr)) );
+ assign( tmpd, getIRegG(sz, pfx, rm) );
+
+ putIRegG( sz, pfx, rm,
+ IRExpr_Mux0X( unop(Iop_1Uto8,
+ mk_amd64g_calculate_condition(cond)),
+ mkexpr(tmpd),
+ mkexpr(tmps) )
+ );
+
+ DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
+ dis_buf,
+ nameIRegG(sz,pfx,rm));
+ return len+delta0;
+ }
+}
+
+
+static
+ULong dis_xadd_G_E ( /*OUT*/Bool* decode_ok,
+ VexAbiInfo* vbi,
+ Prefix pfx, Int sz, Long delta0 )
+{
+ Int len;
+ UChar rm = getUChar(delta0);
+ HChar dis_buf[50];
+
+ IRType ty = szToITy(sz);
+ IRTemp tmpd = newTemp(ty);
+ IRTemp tmpt0 = newTemp(ty);
+ IRTemp tmpt1 = newTemp(ty);
+
+ /* There are 3 cases to consider:
+
+ reg-reg: ignore any lock prefix,
+ generate 'naive' (non-atomic) sequence
+
+ reg-mem, not locked: ignore any lock prefix, generate 'naive'
+ (non-atomic) sequence
+
+ reg-mem, locked: use IRCAS
+ */
+
+ if (epartIsReg(rm)) {
+ /* case 1 */
+ assign( tmpd, getIRegE(sz, pfx, rm) );
+ assign( tmpt0, getIRegG(sz, pfx, rm) );
+ assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
+ mkexpr(tmpd), mkexpr(tmpt0)) );
+ setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
+ putIRegG(sz, pfx, rm, mkexpr(tmpd));
+ putIRegE(sz, pfx, rm, mkexpr(tmpt1));
+ DIP("xadd%c %s, %s\n",
+ nameISize(sz), nameIRegG(sz,pfx,rm),
+ nameIRegE(sz,pfx,rm));
+ *decode_ok = True;
+ return 1+delta0;
+ }
+ else if (!epartIsReg(rm) && !(pfx & PFX_LOCK)) {
+ /* case 2 */
+ IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
+ assign( tmpd, loadLE(ty, mkexpr(addr)) );
+ assign( tmpt0, getIRegG(sz, pfx, rm) );
+ assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
+ mkexpr(tmpd), mkexpr(tmpt0)) );
+ setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
+ storeLE( mkexpr(addr), mkexpr(tmpt1) );
+ putIRegG(sz, pfx, rm, mkexpr(tmpd));
+ DIP("xadd%c %s, %s\n",
+ nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
+ *decode_ok = True;
+ return len+delta0;
+ }
+ else if (!epartIsReg(rm) && (pfx & PFX_LOCK)) {
+ /* case 3 */
+ IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
+ assign( tmpd, loadLE(ty, mkexpr(addr)) );
+ assign( tmpt0, getIRegG(sz, pfx, rm) );
+ assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
+ mkexpr(tmpd), mkexpr(tmpt0)) );
+ casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
+ mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr );
+ setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
+ putIRegG(sz, pfx, rm, mkexpr(tmpd));
+ DIP("xadd%c %s, %s\n",
+ nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
+ *decode_ok = True;
+ return len+delta0;
+ }
+ /*UNREACHED*/
+ vassert(0);
+}
+
+//.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
+//..
+//.. static
+//.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 )
+//.. {
+//.. Int len;
+//.. IRTemp addr;
+//.. UChar rm = getUChar(delta0);
+//.. HChar dis_buf[50];
+//..
+//.. if (epartIsReg(rm)) {
+//.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
+//.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
+//.. return 1+delta0;
+//.. } else {
+//.. addr = disAMode ( &len, sorb, delta0, dis_buf );
+//.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
+//.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
+//.. return len+delta0;
+//.. }
+//.. }
+//..
+//.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
+//.. dst is ireg and sz==4, zero out top half of it. */
+//..
+//.. static
+//.. UInt dis_mov_Sw_Ew ( UChar sorb,
+//.. Int sz,
+//.. UInt delta0 )
+//.. {
+//.. Int len;
+//.. IRTemp addr;
+//.. UChar rm = getUChar(delta0);
+//.. HChar dis_buf[50];
+//..
+//.. vassert(sz == 2 || sz == 4);
+//..
+//.. if (epartIsReg(rm)) {
+//.. if (sz == 4)
+//.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
+//.. else
+//.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
+//..
+//.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
+//.. return 1+delta0;
+//.. } else {
+//.. addr = disAMode ( &len, sorb, delta0, dis_buf );
+//.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
+//.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
+//.. return len+delta0;
+//.. }
+//.. }
+//..
+//..
+//.. static
+//.. void dis_push_segreg ( UInt sreg, Int sz )
+//.. {
+//.. IRTemp t1 = newTemp(Ity_I16);
+//.. IRTemp ta = newTemp(Ity_I32);
+//.. vassert(sz == 2 || sz == 4);
+//..
+//.. assign( t1, getSReg(sreg) );
+//.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
+//.. putIReg(4, R_ESP, mkexpr(ta));
+//.. storeLE( mkexpr(ta), mkexpr(t1) );
+//..
+//.. DIP("pushw %s\n", nameSReg(sreg));
+//.. }
+//..
+//.. static
+//.. void dis_pop_segreg ( UInt sreg, Int sz )
+//.. {
+//.. IRTemp t1 = newTemp(Ity_I16);
+//.. IRTemp ta = newTemp(Ity_I32);
+//.. vassert(sz == 2 || sz == 4);
+//..
+//.. assign( ta, getIReg(4, R_ESP) );
+//.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
+//..
+//.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
+//.. putSReg( sreg, mkexpr(t1) );
+//.. DIP("pop %s\n", nameSReg(sreg));
+//.. }
+
+static
+void dis_ret ( VexAbiInfo* vbi, ULong d64 )
+{
+ IRTemp t1 = newTemp(Ity_I64);
+ IRTemp t2 = newTemp(Ity_I64);
+ IRTemp t3 = newTemp(Ity_I64);
+ assign(t1, getIReg64(R_RSP));
+ assign(t2, loadLE(Ity_I64,mkexpr(t1)));
+ assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64)));
+ putIReg64(R_RSP, mkexpr(t3));
+ make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret");
+ jmp_treg(Ijk_Ret,t2);
+}
+
+
+/*------------------------------------------------------------*/
+/*--- SSE/SSE2/SSE3 helpers ---*/
+/*------------------------------------------------------------*/
+
+/* Worker function; do not call directly.
+ Handles full width G = G `op` E and G = (not G) `op` E.
+*/
+
+static ULong dis_SSE_E_to_G_all_wrk (
+ VexAbiInfo* vbi,
+ Prefix pfx, Long delta,
+ HChar* opname, IROp op,
+ Bool invertG
+ )
+{
+ HChar dis_buf[50];
+ Int alen;
+ IRTemp addr;
+ UChar rm = getUChar(delta);
+ IRExpr* gpart
+ = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm)))
+ : getXMMReg(gregOfRexRM(pfx,rm));
+ if (epartIsReg(rm)) {
+ putXMMReg( gregOfRexRM(pfx,rm),
+ binop(op, gpart,
+ getXMMReg(eregOfRexRM(pfx,rm))) );
+ DIP("%s %s,%s\n", opname,
+ nameXMMReg(eregOfRexRM(pfx,rm)),
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ return delta+1;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ putXMMReg( gregOfRexRM(pfx,rm),
+ binop(op, gpart,
+ loadLE(Ity_V128, mkexpr(addr))) );
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ return delta+alen;
+ }
+}
+
+
+/* All lanes SSE binary operation, G = G `op` E. */
+
+static
+ULong dis_SSE_E_to_G_all ( VexAbiInfo* vbi,
+ Prefix pfx, Long delta,
+ HChar* opname, IROp op )
+{
+ return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, False );
+}
+
+/* All lanes SSE binary operation, G = (not G) `op` E. */
+
+static
+ULong dis_SSE_E_to_G_all_invG ( VexAbiInfo* vbi,
+ Prefix pfx, Long delta,
+ HChar* opname, IROp op )
+{
+ return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, True );
+}
+
+
+/* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
+
+static ULong dis_SSE_E_to_G_lo32 ( VexAbiInfo* vbi,
+ Prefix pfx, Long delta,
+ HChar* opname, IROp op )
+{
+ HChar dis_buf[50];
+ Int alen;
+ IRTemp addr;
+ UChar rm = getUChar(delta);
+ IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
+ if (epartIsReg(rm)) {
+ putXMMReg( gregOfRexRM(pfx,rm),
+ binop(op, gpart,
+ getXMMReg(eregOfRexRM(pfx,rm))) );
+ DIP("%s %s,%s\n", opname,
+ nameXMMReg(eregOfRexRM(pfx,rm)),
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ return delta+1;
+ } else {
+ /* We can only do a 32-bit memory read, so the upper 3/4 of the
+ E operand needs to be made simply of zeroes. */
+ IRTemp epart = newTemp(Ity_V128);
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ assign( epart, unop( Iop_32UtoV128,
+ loadLE(Ity_I32, mkexpr(addr))) );
+ putXMMReg( gregOfRexRM(pfx,rm),
+ binop(op, gpart, mkexpr(epart)) );
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ return delta+alen;
+ }
+}
+
+
+/* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
+
+static ULong dis_SSE_E_to_G_lo64 ( VexAbiInfo* vbi,
+ Prefix pfx, Long delta,
+ HChar* opname, IROp op )
+{
+ HChar dis_buf[50];
+ Int alen;
+ IRTemp addr;
+ UChar rm = getUChar(delta);
+ IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
+ if (epartIsReg(rm)) {
+ putXMMReg( gregOfRexRM(pfx,rm),
+ binop(op, gpart,
+ getXMMReg(eregOfRexRM(pfx,rm))) );
+ DIP("%s %s,%s\n", opname,
+ nameXMMReg(eregOfRexRM(pfx,rm)),
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ return delta+1;
+ } else {
+ /* We can only do a 64-bit memory read, so the upper half of the
+ E operand needs to be made simply of zeroes. */
+ IRTemp epart = newTemp(Ity_V128);
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ assign( epart, unop( Iop_64UtoV128,
+ loadLE(Ity_I64, mkexpr(addr))) );
+ putXMMReg( gregOfRexRM(pfx,rm),
+ binop(op, gpart, mkexpr(epart)) );
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ return delta+alen;
+ }
+}
+
+
+/* All lanes unary SSE operation, G = op(E). */
+
+static ULong dis_SSE_E_to_G_unary_all (
+ VexAbiInfo* vbi,
+ Prefix pfx, Long delta,
+ HChar* opname, IROp op
+ )
+{
+ HChar dis_buf[50];
+ Int alen;
+ IRTemp addr;
+ UChar rm = getUChar(delta);
+ if (epartIsReg(rm)) {
+ putXMMReg( gregOfRexRM(pfx,rm),
+ unop(op, getXMMReg(eregOfRexRM(pfx,rm))) );
+ DIP("%s %s,%s\n", opname,
+ nameXMMReg(eregOfRexRM(pfx,rm)),
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ return delta+1;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ putXMMReg( gregOfRexRM(pfx,rm),
+ unop(op, loadLE(Ity_V128, mkexpr(addr))) );
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ return delta+alen;
+ }
+}
+
+
+/* Lowest 32-bit lane only unary SSE operation, G = op(E). */
+
+static ULong dis_SSE_E_to_G_unary_lo32 (
+ VexAbiInfo* vbi,
+ Prefix pfx, Long delta,
+ HChar* opname, IROp op
+ )
+{
+ /* First we need to get the old G value and patch the low 32 bits
+ of the E operand into it. Then apply op and write back to G. */
+ HChar dis_buf[50];
+ Int alen;
+ IRTemp addr;
+ UChar rm = getUChar(delta);
+ IRTemp oldG0 = newTemp(Ity_V128);
+ IRTemp oldG1 = newTemp(Ity_V128);
+
+ assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
+
+ if (epartIsReg(rm)) {
+ assign( oldG1,
+ binop( Iop_SetV128lo32,
+ mkexpr(oldG0),
+ getXMMRegLane32(eregOfRexRM(pfx,rm), 0)) );
+ putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
+ DIP("%s %s,%s\n", opname,
+ nameXMMReg(eregOfRexRM(pfx,rm)),
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ return delta+1;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ assign( oldG1,
+ binop( Iop_SetV128lo32,
+ mkexpr(oldG0),
+ loadLE(Ity_I32, mkexpr(addr)) ));
+ putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ return delta+alen;
+ }
+}
+
+
+/* Lowest 64-bit lane only unary SSE operation, G = op(E). */
+
+static ULong dis_SSE_E_to_G_unary_lo64 (
+ VexAbiInfo* vbi,
+ Prefix pfx, Long delta,
+ HChar* opname, IROp op
+ )
+{
+ /* First we need to get the old G value and patch the low 64 bits
+ of the E operand into it. Then apply op and write back to G. */
+ HChar dis_buf[50];
+ Int alen;
+ IRTemp addr;
+ UChar rm = getUChar(delta);
+ IRTemp oldG0 = newTemp(Ity_V128);
+ IRTemp oldG1 = newTemp(Ity_V128);
+
+ assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
+
+ if (epartIsReg(rm)) {
+ assign( oldG1,
+ binop( Iop_SetV128lo64,
+ mkexpr(oldG0),
+ getXMMRegLane64(eregOfRexRM(pfx,rm), 0)) );
+ putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
+ DIP("%s %s,%s\n", opname,
+ nameXMMReg(eregOfRexRM(pfx,rm)),
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ return delta+1;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ assign( oldG1,
+ binop( Iop_SetV128lo64,
+ mkexpr(oldG0),
+ loadLE(Ity_I64, mkexpr(addr)) ));
+ putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ return delta+alen;
+ }
+}
+
+
+/* SSE integer binary operation:
+ G = G `op` E (eLeft == False)
+ G = E `op` G (eLeft == True)
+*/
+static ULong dis_SSEint_E_to_G(
+ VexAbiInfo* vbi,
+ Prefix pfx, Long delta,
+ HChar* opname, IROp op,
+ Bool eLeft
+ )
+{
+ HChar dis_buf[50];
+ Int alen;
+ IRTemp addr;
+ UChar rm = getUChar(delta);
+ IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
+ IRExpr* epart = NULL;
+ if (epartIsReg(rm)) {
+ epart = getXMMReg(eregOfRexRM(pfx,rm));
+ DIP("%s %s,%s\n", opname,
+ nameXMMReg(eregOfRexRM(pfx,rm)),
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ delta += 1;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ epart = loadLE(Ity_V128, mkexpr(addr));
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ delta += alen;
+ }
+ putXMMReg( gregOfRexRM(pfx,rm),
+ eLeft ? binop(op, epart, gpart)
+ : binop(op, gpart, epart) );
+ return delta;
+}
+
+
+/* Helper for doing SSE FP comparisons. */
+
+static void findSSECmpOp ( Bool* needNot, IROp* op,
+ Int imm8, Bool all_lanes, Int sz )
+{
+ imm8 &= 7;
+ *needNot = False;
+ *op = Iop_INVALID;
+ if (imm8 >= 4) {
+ *needNot = True;
+ imm8 -= 4;
+ }
+
+ if (sz == 4 && all_lanes) {
+ switch (imm8) {
+ case 0: *op = Iop_CmpEQ32Fx4; return;
+ case 1: *op = Iop_CmpLT32Fx4; return;
+ case 2: *op = Iop_CmpLE32Fx4; return;
+ case 3: *op = Iop_CmpUN32Fx4; return;
+ default: break;
+ }
+ }
+ if (sz == 4 && !all_lanes) {
+ switch (imm8) {
+ case 0: *op = Iop_CmpEQ32F0x4; return;
+ case 1: *op = Iop_CmpLT32F0x4; return;
+ case 2: *op = Iop_CmpLE32F0x4; return;
+ case 3: *op = Iop_CmpUN32F0x4; return;
+ default: break;
+ }
+ }
+ if (sz == 8 && all_lanes) {
+ switch (imm8) {
+ case 0: *op = Iop_CmpEQ64Fx2; return;
+ case 1: *op = Iop_CmpLT64Fx2; return;
+ case 2: *op = Iop_CmpLE64Fx2; return;
+ case 3: *op = Iop_CmpUN64Fx2; return;
+ default: break;
+ }
+ }
+ if (sz == 8 && !all_lanes) {
+ switch (imm8) {
+ case 0: *op = Iop_CmpEQ64F0x2; return;
+ case 1: *op = Iop_CmpLT64F0x2; return;
+ case 2: *op = Iop_CmpLE64F0x2; return;
+ case 3: *op = Iop_CmpUN64F0x2; return;
+ default: break;
+ }
+ }
+ vpanic("findSSECmpOp(amd64,guest)");
+}
+
+/* Handles SSE 32F/64F comparisons. */
+
+static ULong dis_SSEcmp_E_to_G ( VexAbiInfo* vbi,
+ Prefix pfx, Long delta,
+ HChar* opname, Bool all_lanes, Int sz )
+{
+ HChar dis_buf[50];
+ Int alen, imm8;
+ IRTemp addr;
+ Bool needNot = False;
+ IROp op = Iop_INVALID;
+ IRTemp plain = newTemp(Ity_V128);
+ UChar rm = getUChar(delta);
+ UShort mask = 0;
+ vassert(sz == 4 || sz == 8);
+ if (epartIsReg(rm)) {
+ imm8 = getUChar(delta+1);
+ findSSECmpOp(&needNot, &op, imm8, all_lanes, sz);
+ assign( plain, binop(op, getXMMReg(gregOfRexRM(pfx,rm)),
+ getXMMReg(eregOfRexRM(pfx,rm))) );
+ delta += 2;
+ DIP("%s $%d,%s,%s\n", opname,
+ (Int)imm8,
+ nameXMMReg(eregOfRexRM(pfx,rm)),
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
+ imm8 = getUChar(delta+alen);
+ findSSECmpOp(&needNot, &op, imm8, all_lanes, sz);
+ assign( plain,
+ binop(
+ op,
+ getXMMReg(gregOfRexRM(pfx,rm)),
+ all_lanes ? loadLE(Ity_V128, mkexpr(addr))
+ : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
+ : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))
+ )
+ );
+ delta += alen+1;
+ DIP("%s $%d,%s,%s\n", opname,
+ (Int)imm8,
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ }
+
+ if (needNot && all_lanes) {
+ putXMMReg( gregOfRexRM(pfx,rm),
+ unop(Iop_NotV128, mkexpr(plain)) );
+ }
+ else
+ if (needNot && !all_lanes) {
+ mask = toUShort(sz==4 ? 0x000F : 0x00FF);
+ putXMMReg( gregOfRexRM(pfx,rm),
+ binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) );
+ }
+ else {
+ putXMMReg( gregOfRexRM(pfx,rm), mkexpr(plain) );
+ }
+
+ return delta;
+}
+
+
+/* Vector by scalar shift of G by the amount specified at the bottom
+ of E. */
+
+static ULong dis_SSE_shiftG_byE ( VexAbiInfo* vbi,
+ Prefix pfx, Long delta,
+ HChar* opname, IROp op )
+{
+ HChar dis_buf[50];
+ Int alen, size;
+ IRTemp addr;
+ Bool shl, shr, sar;
+ UChar rm = getUChar(delta);
+ IRTemp g0 = newTemp(Ity_V128);
+ IRTemp g1 = newTemp(Ity_V128);
+ IRTemp amt = newTemp(Ity_I32);
+ IRTemp amt8 = newTemp(Ity_I8);
+ if (epartIsReg(rm)) {
+ assign( amt, getXMMRegLane32(eregOfRexRM(pfx,rm), 0) );
+ DIP("%s %s,%s\n", opname,
+ nameXMMReg(eregOfRexRM(pfx,rm)),
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ delta++;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ assign( amt, loadLE(Ity_I32, mkexpr(addr)) );
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ delta += alen;
+ }
+ assign( g0, getXMMReg(gregOfRexRM(pfx,rm)) );
+ assign( amt8, unop(Iop_32to8, mkexpr(amt)) );
+
+ shl = shr = sar = False;
+ size = 0;
+ switch (op) {
+ case Iop_ShlN16x8: shl = True; size = 32; break;
+ case Iop_ShlN32x4: shl = True; size = 32; break;
+ case Iop_ShlN64x2: shl = True; size = 64; break;
+ case Iop_SarN16x8: sar = True; size = 16; break;
+ case Iop_SarN32x4: sar = True; size = 32; break;
+ case Iop_ShrN16x8: shr = True; size = 16; break;
+ case Iop_ShrN32x4: shr = True; size = 32; break;
+ case Iop_ShrN64x2: shr = True; size = 64; break;
+ default: vassert(0);
+ }
+
+ if (shl || shr) {
+ assign(
+ g1,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ binop(Iop_CmpLT64U, unop(Iop_32Uto64,mkexpr(amt)), mkU64(size))),
+ mkV128(0x0000),
+ binop(op, mkexpr(g0), mkexpr(amt8))
+ )
+ );
+ } else
+ if (sar) {
+ assign(
+ g1,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ binop(Iop_CmpLT64U, unop(Iop_32Uto64,mkexpr(amt)), mkU64(size))),
+ binop(op, mkexpr(g0), mkU8(size-1)),
+ binop(op, mkexpr(g0), mkexpr(amt8))
+ )
+ );
+ } else {
+ vassert(0);
+ }
+
+ putXMMReg( gregOfRexRM(pfx,rm), mkexpr(g1) );
+ return delta;
+}
+
+
+/* Vector by scalar shift of E by an immediate byte. */
+
+static
+ULong dis_SSE_shiftE_imm ( Prefix pfx,
+ Long delta, HChar* opname, IROp op )
+{
+ Bool shl, shr, sar;
+ UChar rm = getUChar(delta);
+ IRTemp e0 = newTemp(Ity_V128);
+ IRTemp e1 = newTemp(Ity_V128);
+ UChar amt, size;
+ vassert(epartIsReg(rm));
+ vassert(gregLO3ofRM(rm) == 2
+ || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
+ amt = getUChar(delta+1);
+ delta += 2;
+ DIP("%s $%d,%s\n", opname,
+ (Int)amt,
+ nameXMMReg(eregOfRexRM(pfx,rm)) );
+ assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) );
+
+ shl = shr = sar = False;
+ size = 0;
+ switch (op) {
+ case Iop_ShlN16x8: shl = True; size = 16; break;
+ case Iop_ShlN32x4: shl = True; size = 32; break;
+ case Iop_ShlN64x2: shl = True; size = 64; break;
+ case Iop_SarN16x8: sar = True; size = 16; break;
+ case Iop_SarN32x4: sar = True; size = 32; break;
+ case Iop_ShrN16x8: shr = True; size = 16; break;
+ case Iop_ShrN32x4: shr = True; size = 32; break;
+ case Iop_ShrN64x2: shr = True; size = 64; break;
+ default: vassert(0);
+ }
+
+ if (shl || shr) {
+ assign( e1, amt >= size
+ ? mkV128(0x0000)
+ : binop(op, mkexpr(e0), mkU8(amt))
+ );
+ } else
+ if (sar) {
+ assign( e1, amt >= size
+ ? binop(op, mkexpr(e0), mkU8(size-1))
+ : binop(op, mkexpr(e0), mkU8(amt))
+ );
+ } else {
+ vassert(0);
+ }
+
+ putXMMReg( eregOfRexRM(pfx,rm), mkexpr(e1) );
+ return delta;
+}
+
+
+/* Get the current SSE rounding mode. */
+
+static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void )
+{
+ return
+ unop( Iop_64to32,
+ binop( Iop_And64,
+ IRExpr_Get( OFFB_SSEROUND, Ity_I64 ),
+ mkU64(3) ));
+}
+
+static void put_sse_roundingmode ( IRExpr* sseround )
+{
+ vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32);
+ stmt( IRStmt_Put( OFFB_SSEROUND,
+ unop(Iop_32Uto64,sseround) ) );
+}
+
+/* Break a 128-bit value up into four 32-bit ints. */
+
+static void breakup128to32s ( IRTemp t128,
+ /*OUTs*/
+ IRTemp* t3, IRTemp* t2,
+ IRTemp* t1, IRTemp* t0 )
+{
+ IRTemp hi64 = newTemp(Ity_I64);
+ IRTemp lo64 = newTemp(Ity_I64);
+ assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
+ assign( lo64, unop(Iop_V128to64, mkexpr(t128)) );
+
+ vassert(t0 && *t0 == IRTemp_INVALID);
+ vassert(t1 && *t1 == IRTemp_INVALID);
+ vassert(t2 && *t2 == IRTemp_INVALID);
+ vassert(t3 && *t3 == IRTemp_INVALID);
+
+ *t0 = newTemp(Ity_I32);
+ *t1 = newTemp(Ity_I32);
+ *t2 = newTemp(Ity_I32);
+ *t3 = newTemp(Ity_I32);
+ assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
+ assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
+ assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
+ assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
+}
+
+/* Construct a 128-bit value from four 32-bit ints. */
+
+static IRExpr* mk128from32s ( IRTemp t3, IRTemp t2,
+ IRTemp t1, IRTemp t0 )
+{
+ return
+ binop( Iop_64HLtoV128,
+ binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
+ binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
+ );
+}
+
+/* Break a 64-bit value up into four 16-bit ints. */
+
+static void breakup64to16s ( IRTemp t64,
+ /*OUTs*/
+ IRTemp* t3, IRTemp* t2,
+ IRTemp* t1, IRTemp* t0 )
+{
+ IRTemp hi32 = newTemp(Ity_I32);
+ IRTemp lo32 = newTemp(Ity_I32);
+ assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) );
+ assign( lo32, unop(Iop_64to32, mkexpr(t64)) );
+
+ vassert(t0 && *t0 == IRTemp_INVALID);
+ vassert(t1 && *t1 == IRTemp_INVALID);
+ vassert(t2 && *t2 == IRTemp_INVALID);
+ vassert(t3 && *t3 == IRTemp_INVALID);
+
+ *t0 = newTemp(Ity_I16);
+ *t1 = newTemp(Ity_I16);
+ *t2 = newTemp(Ity_I16);
+ *t3 = newTemp(Ity_I16);
+ assign( *t0, unop(Iop_32to16, mkexpr(lo32)) );
+ assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) );
+ assign( *t2, unop(Iop_32to16, mkexpr(hi32)) );
+ assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) );
+}
+
+/* Construct a 64-bit value from four 16-bit ints. */
+
+static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2,
+ IRTemp t1, IRTemp t0 )
+{
+ return
+ binop( Iop_32HLto64,
+ binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)),
+ binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0))
+ );
+}
+
+
+/* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
+ values (aa,bb), computes, for each of the 4 16-bit lanes:
+
+ (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
+*/
+static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx )
+{
+ IRTemp aa = newTemp(Ity_I64);
+ IRTemp bb = newTemp(Ity_I64);
+ IRTemp aahi32s = newTemp(Ity_I64);
+ IRTemp aalo32s = newTemp(Ity_I64);
+ IRTemp bbhi32s = newTemp(Ity_I64);
+ IRTemp bblo32s = newTemp(Ity_I64);
+ IRTemp rHi = newTemp(Ity_I64);
+ IRTemp rLo = newTemp(Ity_I64);
+ IRTemp one32x2 = newTemp(Ity_I64);
+ assign(aa, aax);
+ assign(bb, bbx);
+ assign( aahi32s,
+ binop(Iop_SarN32x2,
+ binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)),
+ mkU8(16) ));
+ assign( aalo32s,
+ binop(Iop_SarN32x2,
+ binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)),
+ mkU8(16) ));
+ assign( bbhi32s,
+ binop(Iop_SarN32x2,
+ binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)),
+ mkU8(16) ));
+ assign( bblo32s,
+ binop(Iop_SarN32x2,
+ binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)),
+ mkU8(16) ));
+ assign(one32x2, mkU64( (1ULL << 32) + 1 ));
+ assign(
+ rHi,
+ binop(
+ Iop_ShrN32x2,
+ binop(
+ Iop_Add32x2,
+ binop(
+ Iop_ShrN32x2,
+ binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)),
+ mkU8(14)
+ ),
+ mkexpr(one32x2)
+ ),
+ mkU8(1)
+ )
+ );
+ assign(
+ rLo,
+ binop(
+ Iop_ShrN32x2,
+ binop(
+ Iop_Add32x2,
+ binop(
+ Iop_ShrN32x2,
+ binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)),
+ mkU8(14)
+ ),
+ mkexpr(one32x2)
+ ),
+ mkU8(1)
+ )
+ );
+ return
+ binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo));
+}
+
+/* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
+ values (aa,bb), computes, for each lane:
+
+ if aa_lane < 0 then - bb_lane
+ else if aa_lane > 0 then bb_lane
+ else 0
+*/
+static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB )
+{
+ IRTemp aa = newTemp(Ity_I64);
+ IRTemp bb = newTemp(Ity_I64);
+ IRTemp zero = newTemp(Ity_I64);
+ IRTemp bbNeg = newTemp(Ity_I64);
+ IRTemp negMask = newTemp(Ity_I64);
+ IRTemp posMask = newTemp(Ity_I64);
+ IROp opSub = Iop_INVALID;
+ IROp opCmpGTS = Iop_INVALID;
+
+ switch (laneszB) {
+ case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break;
+ case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break;
+ case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break;
+ default: vassert(0);
+ }
+
+ assign( aa, aax );
+ assign( bb, bbx );
+ assign( zero, mkU64(0) );
+ assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) );
+ assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) );
+ assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) );
+
+ return
+ binop(Iop_Or64,
+ binop(Iop_And64, mkexpr(bb), mkexpr(posMask)),
+ binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) );
+
+}
+
+/* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
+ value aa, computes, for each lane
+
+ if aa < 0 then -aa else aa
+
+ Note that the result is interpreted as unsigned, so that the
+ absolute value of the most negative signed input can be
+ represented.
+*/
+static IRExpr* dis_PABS_helper ( IRExpr* aax, Int laneszB )
+{
+ IRTemp aa = newTemp(Ity_I64);
+ IRTemp zero = newTemp(Ity_I64);
+ IRTemp aaNeg = newTemp(Ity_I64);
+ IRTemp negMask = newTemp(Ity_I64);
+ IRTemp posMask = newTemp(Ity_I64);
+ IROp opSub = Iop_INVALID;
+ IROp opSarN = Iop_INVALID;
+
+ switch (laneszB) {
+ case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break;
+ case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break;
+ case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break;
+ default: vassert(0);
+ }
+
+ assign( aa, aax );
+ assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) );
+ assign( posMask, unop(Iop_Not64, mkexpr(negMask)) );
+ assign( zero, mkU64(0) );
+ assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) );
+ return
+ binop(Iop_Or64,
+ binop(Iop_And64, mkexpr(aa), mkexpr(posMask)),
+ binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) );
+}
+
+static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64,
+ IRTemp lo64, Long byteShift )
+{
+ vassert(byteShift >= 1 && byteShift <= 7);
+ return
+ binop(Iop_Or64,
+ binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))),
+ binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift))
+ );
+}
+
+/* Generate a SIGSEGV followed by a restart of the current instruction
+ if effective_addr is not 16-aligned. This is required behaviour
+ for some SSE3 instructions and all 128-bit SSSE3 instructions.
+ This assumes that guest_RIP_curr_instr is set correctly! */
+static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr )
+{
+ stmt(
+ IRStmt_Exit(
+ binop(Iop_CmpNE64,
+ binop(Iop_And64,mkexpr(effective_addr),mkU64(0xF)),
+ mkU64(0)),
+ Ijk_SigSEGV,
+ IRConst_U64(guest_RIP_curr_instr)
+ )
+ );
+}
+
+
+/* Helper for deciding whether a given insn (starting at the opcode
+ byte) may validly be used with a LOCK prefix. The following insns
+ may be used with LOCK when their destination operand is in memory.
+ AFAICS this is exactly the same for both 32-bit and 64-bit mode.
+
+ ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
+ OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
+ ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
+ SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
+ AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
+ SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
+ XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
+
+ DEC FE /1, FF /1
+ INC FE /0, FF /0
+
+ NEG F6 /3, F7 /3
+ NOT F6 /2, F7 /2
+
+ XCHG 86, 87
+
+ BTC 0F BB, 0F BA /7
+ BTR 0F B3, 0F BA /6
+ BTS 0F AB, 0F BA /5
+
+ CMPXCHG 0F B0, 0F B1
+ CMPXCHG8B 0F C7 /1
+
+ XADD 0F C0, 0F C1
+
+ ------------------------------
+
+ 80 /0 = addb $imm8, rm8
+ 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
+ 82 /0 = addb $imm8, rm8
+ 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
+
+ 00 = addb r8, rm8
+ 01 = addl r32, rm32 and addw r16, rm16
+
+ Same for ADD OR ADC SBB AND SUB XOR
+
+ FE /1 = dec rm8
+ FF /1 = dec rm32 and dec rm16
+
+ FE /0 = inc rm8
+ FF /0 = inc rm32 and inc rm16
+
+ F6 /3 = neg rm8
+ F7 /3 = neg rm32 and neg rm16
+
+ F6 /2 = not rm8
+ F7 /2 = not rm32 and not rm16
+
+ 0F BB = btcw r16, rm16 and btcl r32, rm32
+ OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
+
+ Same for BTS, BTR
+*/
+static Bool can_be_used_with_LOCK_prefix ( UChar* opc )
+{
+ switch (opc[0]) {
+ case 0x00: case 0x01: case 0x08: case 0x09:
+ case 0x10: case 0x11: case 0x18: case 0x19:
+ case 0x20: case 0x21: case 0x28: case 0x29:
+ case 0x30: case 0x31:
+ if (!epartIsReg(opc[1]))
+ return True;
+ break;
+
+ case 0x80: case 0x81: case 0x82: case 0x83:
+ if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6
+ && !epartIsReg(opc[1]))
+ return True;
+ break;
+
+ case 0xFE: case 0xFF:
+ if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1
+ && !epartIsReg(opc[1]))
+ return True;
+ break;
+
+ case 0xF6: case 0xF7:
+ if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3
+ && !epartIsReg(opc[1]))
+ return True;
+ break;
+
+ case 0x86: case 0x87:
+ if (!epartIsReg(opc[1]))
+ return True;
+ break;
+
+ case 0x0F: {
+ switch (opc[1]) {
+ case 0xBB: case 0xB3: case 0xAB:
+ if (!epartIsReg(opc[2]))
+ return True;
+ break;
+ case 0xBA:
+ if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7
+ && !epartIsReg(opc[2]))
+ return True;
+ break;
+ case 0xB0: case 0xB1:
+ if (!epartIsReg(opc[2]))
+ return True;
+ break;
+ case 0xC7:
+ if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
+ return True;
+ break;
+ case 0xC0: case 0xC1:
+ if (!epartIsReg(opc[2]))
+ return True;
+ break;
+ default:
+ break;
+ } /* switch (opc[1]) */
+ break;
+ }
+
+ default:
+ break;
+ } /* switch (opc[0]) */
+
+ return False;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Disassemble a single instruction ---*/
+/*------------------------------------------------------------*/
+
+/* Disassemble a single instruction into IR. The instruction is
+ located in host memory at &guest_code[delta]. */
+
+static
+DisResult disInstr_AMD64_WRK (
+ /*OUT*/Bool* expect_CAS,
+ Bool put_IP,
+ Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
+ Bool resteerCisOk,
+ void* callback_opaque,
+ Long delta64,
+ VexArchInfo* archinfo,
+ VexAbiInfo* vbi
+ )
+{
+ IRType ty;
+ IRTemp addr, t0, t1, t2, t3, t4, t5, t6;
+ Int alen;
+ UChar opc, modrm, abyte, pre;
+ Long d64;
+ HChar dis_buf[50];
+ Int am_sz, d_sz, n, n_prefixes;
+ DisResult dres;
+ UChar* insn; /* used in SSE decoders */
+
+ /* The running delta */
+ Long delta = delta64;
+
+ /* Holds eip at the start of the insn, so that we can print
+ consistent error messages for unimplemented insns. */
+ Long delta_start = delta;
+
+ /* sz denotes the nominal data-op size of the insn; we change it to
+ 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of
+ conflict REX.W takes precedence. */
+ Int sz = 4;
+
+ /* pfx holds the summary of prefixes. */
+ Prefix pfx = PFX_EMPTY;
+
+ /* Set result defaults. */
+ dres.whatNext = Dis_Continue;
+ dres.len = 0;
+ dres.continueAt = 0;
+
+ *expect_CAS = False;
+
+ vassert(guest_RIP_next_assumed == 0);
+ vassert(guest_RIP_next_mustcheck == False);
+
+ addr = t0 = t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID;
+
+ DIP("\t0x%llx: ", guest_RIP_bbstart+delta);
+
+ /* We may be asked to update the guest RIP before going further. */
+ if (put_IP)
+ stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr)) );
+
+ /* Spot "Special" instructions (see comment at top of file). */
+ {
+ UChar* code = (UChar*)(guest_code + delta);
+ /* Spot the 16-byte preamble:
+ 48C1C703 rolq $3, %rdi
+ 48C1C70D rolq $13, %rdi
+ 48C1C73D rolq $61, %rdi
+ 48C1C733 rolq $51, %rdi
+ */
+ if (code[ 0] == 0x48 && code[ 1] == 0xC1 && code[ 2] == 0xC7
+ && code[ 3] == 0x03 &&
+ code[ 4] == 0x48 && code[ 5] == 0xC1 && code[ 6] == 0xC7
+ && code[ 7] == 0x0D &&
+ code[ 8] == 0x48 && code[ 9] == 0xC1 && code[10] == 0xC7
+ && code[11] == 0x3D &&
+ code[12] == 0x48 && code[13] == 0xC1 && code[14] == 0xC7
+ && code[15] == 0x33) {
+ /* Got a "Special" instruction preamble. Which one is it? */
+ if (code[16] == 0x48 && code[17] == 0x87
+ && code[18] == 0xDB /* xchgq %rbx,%rbx */) {
+ /* %RDX = client_request ( %RAX ) */
+ DIP("%%rdx = client_request ( %%rax )\n");
+ delta += 19;
+ jmp_lit(Ijk_ClientReq, guest_RIP_bbstart+delta);
+ dres.whatNext = Dis_StopHere;
+ goto decode_success;
+ }
+ else
+ if (code[16] == 0x48 && code[17] == 0x87
+ && code[18] == 0xC9 /* xchgq %rcx,%rcx */) {
+ /* %RAX = guest_NRADDR */
+ DIP("%%rax = guest_NRADDR\n");
+ delta += 19;
+ putIRegRAX(8, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
+ goto decode_success;
+ }
+ else
+ if (code[16] == 0x48 && code[17] == 0x87
+ && code[18] == 0xD2 /* xchgq %rdx,%rdx */) {
+ /* call-noredir *%RAX */
+ DIP("call-noredir *%%rax\n");
+ delta += 19;
+ t1 = newTemp(Ity_I64);
+ assign(t1, getIRegRAX(8));
+ t2 = newTemp(Ity_I64);
+ assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
+ putIReg64(R_RSP, mkexpr(t2));
+ storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta));
+ jmp_treg(Ijk_NoRedir,t1);
+ dres.whatNext = Dis_StopHere;
+ goto decode_success;
+ }
+ /* We don't know what it is. */
+ goto decode_failure;
+ /*NOTREACHED*/
+ }
+ }
+
+ /* Eat prefixes, summarising the result in pfx and sz, and rejecting
+ as many invalid combinations as possible. */
+ n_prefixes = 0;
+ while (True) {
+ if (n_prefixes > 7) goto decode_failure;
+ pre = getUChar(delta);
+ switch (pre) {
+ case 0x66: pfx |= PFX_66; break;
+ case 0x67: pfx |= PFX_ASO; break;
+ case 0xF2: pfx |= PFX_F2; break;
+ case 0xF3: pfx |= PFX_F3; break;
+ case 0xF0: pfx |= PFX_LOCK; *expect_CAS = True; break;
+ case 0x2E: pfx |= PFX_CS; break;
+ case 0x3E: pfx |= PFX_DS; break;
+ case 0x26: pfx |= PFX_ES; break;
+ case 0x64: pfx |= PFX_FS; break;
+ case 0x65: pfx |= PFX_GS; break;
+ case 0x36: pfx |= PFX_SS; break;
+ case 0x40 ... 0x4F:
+ pfx |= PFX_REX;
+ if (pre & (1<<3)) pfx |= PFX_REXW;
+ if (pre & (1<<2)) pfx |= PFX_REXR;
+ if (pre & (1<<1)) pfx |= PFX_REXX;
+ if (pre & (1<<0)) pfx |= PFX_REXB;
+ break;
+ default:
+ goto not_a_prefix;
+ }
+ n_prefixes++;
+ delta++;
+ }
+
+ not_a_prefix:
+
+ /* Dump invalid combinations */
+ n = 0;
+ if (pfx & PFX_F2) n++;
+ if (pfx & PFX_F3) n++;
+ if (n > 1)
+ goto decode_failure; /* can't have both */
+
+ n = 0;
+ if (pfx & PFX_CS) n++;
+ if (pfx & PFX_DS) n++;
+ if (pfx & PFX_ES) n++;
+ if (pfx & PFX_FS) n++;
+ if (pfx & PFX_GS) n++;
+ if (pfx & PFX_SS) n++;
+ if (n > 1)
+ goto decode_failure; /* multiple seg overrides == illegal */
+
+ /* We have a %fs prefix. Reject it if there's no evidence in 'vbi'
+ that we should accept it. */
+ if ((pfx & PFX_FS) && !vbi->guest_amd64_assume_fs_is_zero)
+ goto decode_failure;
+
+ /* Ditto for %gs prefixes. */
+ if ((pfx & PFX_GS) && !vbi->guest_amd64_assume_gs_is_0x60)
+ goto decode_failure;
+
+ /* Set up sz. */
+ sz = 4;
+ if (pfx & PFX_66) sz = 2;
+ if ((pfx & PFX_REX) && (pfx & PFX_REXW)) sz = 8;
+
+ /* Now we should be looking at the primary opcode byte or the
+ leading F2 or F3. Check that any LOCK prefix is actually
+ allowed. */
+
+ if (pfx & PFX_LOCK) {
+ if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) {
+ DIP("lock ");
+ } else {
+ *expect_CAS = False;
+ goto decode_failure;
+ }
+ }
+
+
+ /* ---------------------------------------------------- */
+ /* --- The SSE/SSE2 decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* What did I do to deserve SSE ? Perhaps I was really bad in a
+ previous life? */
+
+ /* Note, this doesn't handle SSE3 right now. All amd64s support
+ SSE2 as a minimum so there is no point distinguishing SSE1 vs
+ SSE2. */
+
+ insn = (UChar*)&guest_code[delta];
+
+ /* FXSAVE is spuriously at the start here only because it is
+ thusly placed in guest-x86/toIR.c. */
+
+ /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory.
+ Note that REX.W 0F AE /0 writes a slightly different format and
+ we don't handle that here. */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xAE
+ && !epartIsReg(insn[2]) && gregOfRexRM(pfx,insn[2]) == 0) {
+ IRDirty* d;
+ modrm = getUChar(delta+2);
+ vassert(sz == 4);
+ vassert(!epartIsReg(modrm));
+ /* REX.W must not be set. That should be assured us by sz == 4
+ above. */
+ vassert(!(pfx & PFX_REXW));
+
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ delta += 2+alen;
+
+ DIP("fxsave %s\n", dis_buf);
+
+ /* Uses dirty helper:
+ void amd64g_do_FXSAVE ( VexGuestAMD64State*, UInt ) */
+ d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "amd64g_dirtyhelper_FXSAVE",
+ &amd64g_dirtyhelper_FXSAVE,
+ mkIRExprVec_1( mkexpr(addr) )
+ );
+ d->needsBBP = True;
+
+ /* declare we're writing memory */
+ d->mFx = Ifx_Write;
+ d->mAddr = mkexpr(addr);
+ d->mSize = 512;
+
+ /* declare we're reading guest state */
+ d->nFxState = 7;
+
+ d->fxState[0].fx = Ifx_Read;
+ d->fxState[0].offset = OFFB_FTOP;
+ d->fxState[0].size = sizeof(UInt);
+
+ d->fxState[1].fx = Ifx_Read;
+ d->fxState[1].offset = OFFB_FPREGS;
+ d->fxState[1].size = 8 * sizeof(ULong);
+
+ d->fxState[2].fx = Ifx_Read;
+ d->fxState[2].offset = OFFB_FPTAGS;
+ d->fxState[2].size = 8 * sizeof(UChar);
+
+ d->fxState[3].fx = Ifx_Read;
+ d->fxState[3].offset = OFFB_FPROUND;
+ d->fxState[3].size = sizeof(ULong);
+
+ d->fxState[4].fx = Ifx_Read;
+ d->fxState[4].offset = OFFB_FC3210;
+ d->fxState[4].size = sizeof(ULong);
+
+ d->fxState[5].fx = Ifx_Read;
+ d->fxState[5].offset = OFFB_XMM0;
+ d->fxState[5].size = 16 * sizeof(U128);
+
+ d->fxState[6].fx = Ifx_Read;
+ d->fxState[6].offset = OFFB_SSEROUND;
+ d->fxState[6].size = sizeof(ULong);
+
+ /* Be paranoid ... this assertion tries to ensure the 16 %xmm
+ images are packed back-to-back. If not, the value of
+ d->fxState[5].size is wrong. */
+ vassert(16 == sizeof(U128));
+ vassert(OFFB_XMM15 == (OFFB_XMM0 + 15 * 16));
+
+ stmt( IRStmt_Dirty(d) );
+
+ goto decode_success;
+ }
+
+ /* ------ SSE decoder main ------ */
+
+ /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x58) {
+ delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "addps", Iop_Add32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
+ if (haveF3no66noF2(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x58) {
+ delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "addss", Iop_Add32F0x4 );
+ goto decode_success;
+ }
+
+ /* 0F 55 = ANDNPS -- G = (not G) and E */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x55) {
+ delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta+2, "andnps", Iop_AndV128 );
+ goto decode_success;
+ }
+
+ /* 0F 54 = ANDPS -- G = G and E */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x54) {
+ delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "andps", Iop_AndV128 );
+ goto decode_success;
+ }
+
+ /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xC2) {
+ delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmpps", True, 4 );
+ goto decode_success;
+ }
+
+ /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
+ if (haveF3no66noF2(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xC2) {
+ delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmpss", False, 4 );
+ goto decode_success;
+ }
+
+ /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
+ /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) {
+ IRTemp argL = newTemp(Ity_F32);
+ IRTemp argR = newTemp(Ity_F32);
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ assign( argR, getXMMRegLane32F( eregOfRexRM(pfx,modrm),
+ 0/*lowest lane*/ ) );
+ delta += 2+1;
+ DIP("%scomiss %s,%s\n", insn[1]==0x2E ? "u" : "",
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( argR, loadLE(Ity_F32, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("%scomiss %s,%s\n", insn[1]==0x2E ? "u" : "",
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ }
+ assign( argL, getXMMRegLane32F( gregOfRexRM(pfx,modrm),
+ 0/*lowest lane*/ ) );
+
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ stmt( IRStmt_Put(
+ OFFB_CC_DEP1,
+ binop( Iop_And64,
+ unop( Iop_32Uto64,
+ binop(Iop_CmpF64,
+ unop(Iop_F32toF64,mkexpr(argL)),
+ unop(Iop_F32toF64,mkexpr(argR)))),
+ mkU64(0x45)
+ )));
+
+ goto decode_success;
+ }
+
+ /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
+ half xmm */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x2A) {
+ IRTemp arg64 = newTemp(Ity_I64);
+ IRTemp rmode = newTemp(Ity_I32);
+
+ modrm = getUChar(delta+2);
+ do_MMX_preamble();
+ if (epartIsReg(modrm)) {
+ assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
+ delta += 2+1;
+ DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("cvtpi2ps %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ }
+
+ assign( rmode, get_sse_roundingmode() );
+
+ putXMMRegLane32F(
+ gregOfRexRM(pfx,modrm), 0,
+ binop(Iop_F64toF32,
+ mkexpr(rmode),
+ unop(Iop_I32StoF64,
+ unop(Iop_64to32, mkexpr(arg64)) )) );
+
+ putXMMRegLane32F(
+ gregOfRexRM(pfx,modrm), 1,
+ binop(Iop_F64toF32,
+ mkexpr(rmode),
+ unop(Iop_I32StoF64,
+ unop(Iop_64HIto32, mkexpr(arg64)) )) );
+
+ goto decode_success;
+ }
+
+ /* F3 0F 2A = CVTSI2SS
+ -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm
+ -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */
+ if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x2A) {
+
+ IRTemp rmode = newTemp(Ity_I32);
+ assign( rmode, get_sse_roundingmode() );
+ modrm = getUChar(delta+2);
+
+ if (sz == 4) {
+ IRTemp arg32 = newTemp(Ity_I32);
+ if (epartIsReg(modrm)) {
+ assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
+ delta += 2+1;
+ DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("cvtsi2ss %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ }
+ putXMMRegLane32F(
+ gregOfRexRM(pfx,modrm), 0,
+ binop(Iop_F64toF32,
+ mkexpr(rmode),
+ unop(Iop_I32StoF64, mkexpr(arg32)) ) );
+ } else {
+ /* sz == 8 */
+ IRTemp arg64 = newTemp(Ity_I64);
+ if (epartIsReg(modrm)) {
+ assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
+ delta += 2+1;
+ DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("cvtsi2ssq %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ }
+ putXMMRegLane32F(
+ gregOfRexRM(pfx,modrm), 0,
+ binop(Iop_F64toF32,
+ mkexpr(rmode),
+ binop(Iop_I64StoF64, mkexpr(rmode), mkexpr(arg64)) ) );
+ }
+
+ goto decode_success;
+ }
+
+ /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
+ I32 in mmx, according to prevailing SSE rounding mode */
+ /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
+ I32 in mmx, rounding towards zero */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) {
+ IRTemp dst64 = newTemp(Ity_I64);
+ IRTemp rmode = newTemp(Ity_I32);
+ IRTemp f32lo = newTemp(Ity_F32);
+ IRTemp f32hi = newTemp(Ity_F32);
+ Bool r2zero = toBool(insn[1] == 0x2C);
+
+ do_MMX_preamble();
+ modrm = getUChar(delta+2);
+
+ if (epartIsReg(modrm)) {
+ delta += 2+1;
+ assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
+ assign(f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1));
+ DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
+ assign(f32hi, loadLE(Ity_F32, binop( Iop_Add64,
+ mkexpr(addr),
+ mkU64(4) )));
+ delta += 2+alen;
+ DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
+ dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+
+ if (r2zero) {
+ assign(rmode, mkU32((UInt)Irrm_ZERO) );
+ } else {
+ assign( rmode, get_sse_roundingmode() );
+ }
+
+ assign(
+ dst64,
+ binop( Iop_32HLto64,
+ binop( Iop_F64toI32S,
+ mkexpr(rmode),
+ unop( Iop_F32toF64, mkexpr(f32hi) ) ),
+ binop( Iop_F64toI32S,
+ mkexpr(rmode),
+ unop( Iop_F32toF64, mkexpr(f32lo) ) )
+ )
+ );
+
+ putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
+ goto decode_success;
+ }
+
+ /* F3 0F 2D = CVTSS2SI
+ when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
+ according to prevailing SSE rounding mode
+ when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
+ according to prevailing SSE rounding mode
+ */
+ /* F3 0F 2C = CVTTSS2SI
+ when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
+ truncating towards zero
+ when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
+ truncating towards zero
+ */
+ if (haveF3no66noF2(pfx)
+ && insn[0] == 0x0F
+ && (insn[1] == 0x2D || insn[1] == 0x2C)) {
+ IRTemp rmode = newTemp(Ity_I32);
+ IRTemp f32lo = newTemp(Ity_F32);
+ Bool r2zero = toBool(insn[1] == 0x2C);
+ vassert(sz == 4 || sz == 8);
+
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ delta += 2+1;
+ assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
+ DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "",
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameIReg(sz, gregOfRexRM(pfx,modrm), False));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
+ delta += 2+alen;
+ DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "",
+ dis_buf,
+ nameIReg(sz, gregOfRexRM(pfx,modrm), False));
+ }
+
+ if (r2zero) {
+ assign( rmode, mkU32((UInt)Irrm_ZERO) );
+ } else {
+ assign( rmode, get_sse_roundingmode() );
+ }
+
+ if (sz == 4) {
+ putIReg32( gregOfRexRM(pfx,modrm),
+ binop( Iop_F64toI32S,
+ mkexpr(rmode),
+ unop(Iop_F32toF64, mkexpr(f32lo))) );
+ } else {
+ putIReg64( gregOfRexRM(pfx,modrm),
+ binop( Iop_F64toI64S,
+ mkexpr(rmode),
+ unop(Iop_F32toF64, mkexpr(f32lo))) );
+ }
+
+ goto decode_success;
+ }
+
+ /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x5E) {
+ delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "divps", Iop_Div32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
+ if (haveF3no66noF2(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x5E) {
+ delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "divss", Iop_Div32F0x4 );
+ goto decode_success;
+ }
+
+ /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
+ if (insn[0] == 0x0F && insn[1] == 0xAE
+ && haveNo66noF2noF3(pfx)
+ && !epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 2) {
+
+ IRTemp t64 = newTemp(Ity_I64);
+ IRTemp ew = newTemp(Ity_I32);
+
+ vassert(sz == 4);
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ delta += 2+alen;
+ DIP("ldmxcsr %s\n", dis_buf);
+
+ /* The only thing we observe in %mxcsr is the rounding mode.
+ Therefore, pass the 32-bit value (SSE native-format control
+ word) to a clean helper, getting back a 64-bit value, the
+ lower half of which is the SSEROUND value to store, and the
+ upper half of which is the emulation-warning token which may
+ be generated.
+ */
+ /* ULong amd64h_check_ldmxcsr ( ULong ); */
+ assign( t64, mkIRExprCCall(
+ Ity_I64, 0/*regparms*/,
+ "amd64g_check_ldmxcsr",
+ &amd64g_check_ldmxcsr,
+ mkIRExprVec_1(
+ unop(Iop_32Uto64,
+ loadLE(Ity_I32, mkexpr(addr))
+ )
+ )
+ )
+ );
+
+ put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) );
+ assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
+ put_emwarn( mkexpr(ew) );
+ /* Finally, if an emulation warning was reported, side-exit to
+ the next insn, reporting the warning, so that Valgrind's
+ dispatcher sees the warning. */
+ stmt(
+ IRStmt_Exit(
+ binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)),
+ Ijk_EmWarn,
+ IRConst_U64(guest_RIP_bbstart+delta)
+ )
+ );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F F7 = MASKMOVQ -- 8x8 masked store */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xF7) {
+ Bool ok = False;
+ delta = dis_MMX( &ok, vbi, pfx, sz, delta+1 );
+ if (!ok)
+ goto decode_failure;
+ goto decode_success;
+ }
+
+ /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x5F) {
+ delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "maxps", Iop_Max32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
+ if (haveF3no66noF2(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x5F) {
+ delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "maxss", Iop_Max32F0x4 );
+ goto decode_success;
+ }
+
+ /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x5D) {
+ delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "minps", Iop_Min32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
+ if (haveF3no66noF2(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x5D) {
+ delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "minss", Iop_Min32F0x4 );
+ goto decode_success;
+ }
+
+ /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
+ /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
+ if (haveNo66noF2noF3(pfx)
+ && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) {
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ putXMMReg( gregOfRexRM(pfx,modrm),
+ getXMMReg( eregOfRexRM(pfx,modrm) ));
+ DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+1;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ if (insn[1] == 0x28/*movaps*/)
+ gen_SEGV_if_not_16_aligned( addr );
+ putXMMReg( gregOfRexRM(pfx,modrm),
+ loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("mov[ua]ps %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+alen;
+ }
+ goto decode_success;
+ }
+
+ /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
+ /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
+ if (haveNo66noF2noF3(pfx)
+ && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F && (insn[1] == 0x29 || insn[1] == 0x11)) {
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ /* fall through; awaiting test case */
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ if (insn[1] == 0x29/*movaps*/)
+ gen_SEGV_if_not_16_aligned( addr );
+ storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
+ DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
+ dis_buf );
+ delta += 2+alen;
+ goto decode_success;
+ }
+ }
+
+ /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
+ /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
+ if (haveNo66noF2noF3(pfx)
+ && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x16) {
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ delta += 2+1;
+ putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
+ getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ) );
+ DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ delta += 2+alen;
+ putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
+ loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("movhps %s,%s\n", dis_buf,
+ nameXMMReg( gregOfRexRM(pfx,modrm) ));
+ }
+ goto decode_success;
+ }
+
+ /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
+ if (haveNo66noF2noF3(pfx)
+ && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x17) {
+ if (!epartIsReg(insn[2])) {
+ delta += 2;
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ delta += alen;
+ storeLE( mkexpr(addr),
+ getXMMRegLane64( gregOfRexRM(pfx,insn[2]),
+ 1/*upper lane*/ ) );
+ DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx,insn[2]) ),
+ dis_buf);
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
+ /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
+ if (haveNo66noF2noF3(pfx)
+ && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x12) {
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ delta += 2+1;
+ putXMMRegLane64( gregOfRexRM(pfx,modrm),
+ 0/*lower lane*/,
+ getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ));
+ DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ delta += 2+alen;
+ putXMMRegLane64( gregOfRexRM(pfx,modrm), 0/*lower lane*/,
+ loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("movlps %s, %s\n",
+ dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
+ }
+ goto decode_success;
+ }
+
+ /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
+ if (haveNo66noF2noF3(pfx)
+ && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x13) {
+ if (!epartIsReg(insn[2])) {
+ delta += 2;
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ delta += alen;
+ storeLE( mkexpr(addr),
+ getXMMRegLane64( gregOfRexRM(pfx,insn[2]),
+ 0/*lower lane*/ ) );
+ DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx,insn[2]) ),
+ dis_buf);
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
+ to 4 lowest bits of ireg(G) */
+ if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x50) {
+ /* sz == 8 is a kludge to handle insns with REX.W redundantly
+ set to 1, which has been known to happen:
+
+ 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d
+
+ 20071106: Intel docs say that REX.W isn't redundant: when
+ present, a 64-bit register is written; when not present, only
+ the 32-bit half is written. However, testing on a Core2
+ machine suggests the entire 64 bit register is written
+ irrespective of the status of REX.W. That could be because
+ of the default rule that says "if the lower half of a 32-bit
+ register is written, the upper half is zeroed". By using
+ putIReg32 here we inadvertantly produce the same behaviour as
+ the Core2, for the same reason -- putIReg32 implements said
+ rule.
+
+ AMD docs give no indication that REX.W is even valid for this
+ insn. */
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ Int src;
+ t0 = newTemp(Ity_I32);
+ t1 = newTemp(Ity_I32);
+ t2 = newTemp(Ity_I32);
+ t3 = newTemp(Ity_I32);
+ delta += 2+1;
+ src = eregOfRexRM(pfx,modrm);
+ assign( t0, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)),
+ mkU32(1) ));
+ assign( t1, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)),
+ mkU32(2) ));
+ assign( t2, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)),
+ mkU32(4) ));
+ assign( t3, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)),
+ mkU32(8) ));
+ putIReg32( gregOfRexRM(pfx,modrm),
+ binop(Iop_Or32,
+ binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
+ binop(Iop_Or32, mkexpr(t2), mkexpr(t3))
+ )
+ );
+ DIP("movmskps %s,%s\n", nameXMMReg(src),
+ nameIReg32(gregOfRexRM(pfx,modrm)));
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
+ /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
+ if ( ( (haveNo66noF2noF3(pfx) && sz == 4)
+ || (have66noF2noF3(pfx) && sz == 2)
+ )
+ && insn[0] == 0x0F && insn[1] == 0x2B) {
+ modrm = getUChar(delta+2);
+ if (!epartIsReg(modrm)) {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
+ DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+alen;
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
+ Intel manual does not say anything about the usual business of
+ the FP reg tags getting trashed whenever an MMX insn happens.
+ So we just leave them alone.
+ */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xE7) {
+ modrm = getUChar(delta+2);
+ if (!epartIsReg(modrm)) {
+ /* do_MMX_preamble(); Intel docs don't specify this */
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
+ DIP("movntq %s,%s\n", dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ delta += 2+alen;
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
+ (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
+ if (haveF3no66noF2(pfx)
+ && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x10) {
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
+ getXMMRegLane32( eregOfRexRM(pfx,modrm), 0 ));
+ DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+1;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
+ putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
+ loadLE(Ity_I32, mkexpr(addr)) );
+ DIP("movss %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+alen;
+ }
+ goto decode_success;
+ }
+
+ /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
+ or lo 1/4 xmm). */
+ if (haveF3no66noF2(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x11) {
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ /* fall through, we don't yet have a test case */
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ storeLE( mkexpr(addr),
+ getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
+ DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
+ dis_buf);
+ delta += 2+alen;
+ goto decode_success;
+ }
+ }
+
+ /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x59) {
+ delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "mulps", Iop_Mul32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
+ if (haveF3no66noF2(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x59) {
+ delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "mulss", Iop_Mul32F0x4 );
+ goto decode_success;
+ }
+
+ /* 0F 56 = ORPS -- G = G and E */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x56) {
+ delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "orps", Iop_OrV128 );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xE0) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ vbi, pfx, delta+2, insn[1], "pavgb", False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xE3) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ vbi, pfx, delta+2, insn[1], "pavgw", False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
+ zero-extend of it in ireg(G). */
+ if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0xC5) {
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ IRTemp sV = newTemp(Ity_I64);
+ t5 = newTemp(Ity_I16);
+ do_MMX_preamble();
+ assign(sV, getMMXReg(eregLO3ofRM(modrm)));
+ breakup64to16s( sV, &t3, &t2, &t1, &t0 );
+ switch (insn[3] & 3) {
+ case 0: assign(t5, mkexpr(t0)); break;
+ case 1: assign(t5, mkexpr(t1)); break;
+ case 2: assign(t5, mkexpr(t2)); break;
+ case 3: assign(t5, mkexpr(t3)); break;
+ default: vassert(0);
+ }
+ if (sz == 8)
+ putIReg64(gregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(t5)));
+ else
+ putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t5)));
+ DIP("pextrw $%d,%s,%s\n",
+ (Int)insn[3], nameMMXReg(eregLO3ofRM(modrm)),
+ sz==8 ? nameIReg64(gregOfRexRM(pfx,modrm))
+ : nameIReg32(gregOfRexRM(pfx,modrm))
+ );
+ delta += 4;
+ goto decode_success;
+ }
+ /* else fall through */
+ /* note, for anyone filling in the mem case: this insn has one
+ byte after the amode and therefore you must pass 1 as the
+ last arg to disAMode */
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
+ put it into the specified lane of mmx(G). */
+ if (haveNo66noF2noF3(pfx)
+ && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0xC4) {
+ /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
+ mmx reg. t4 is the new lane value. t5 is the original
+ mmx value. t6 is the new mmx value. */
+ Int lane;
+ t4 = newTemp(Ity_I16);
+ t5 = newTemp(Ity_I64);
+ t6 = newTemp(Ity_I64);
+ modrm = insn[2];
+ do_MMX_preamble();
+
+ assign(t5, getMMXReg(gregLO3ofRM(modrm)));
+ breakup64to16s( t5, &t3, &t2, &t1, &t0 );
+
+ if (epartIsReg(modrm)) {
+ assign(t4, getIReg16(eregOfRexRM(pfx,modrm)));
+ delta += 3+1;
+ lane = insn[3+1-1];
+ DIP("pinsrw $%d,%s,%s\n", (Int)lane,
+ nameIReg16(eregOfRexRM(pfx,modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 1 );
+ delta += 3+alen;
+ lane = insn[3+alen-1];
+ assign(t4, loadLE(Ity_I16, mkexpr(addr)));
+ DIP("pinsrw $%d,%s,%s\n", (Int)lane,
+ dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+
+ switch (lane & 3) {
+ case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
+ case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
+ case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
+ case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
+ default: vassert(0);
+ }
+ putMMXReg(gregLO3ofRM(modrm), mkexpr(t6));
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F EE = PMAXSW -- 16x4 signed max */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xEE) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ vbi, pfx, delta+2, insn[1], "pmaxsw", False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F DE = PMAXUB -- 8x8 unsigned max */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xDE) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ vbi, pfx, delta+2, insn[1], "pmaxub", False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F EA = PMINSW -- 16x4 signed min */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xEA) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ vbi, pfx, delta+2, insn[1], "pminsw", False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F DA = PMINUB -- 8x8 unsigned min */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xDA) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ vbi, pfx, delta+2, insn[1], "pminub", False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
+ mmx(G), turn them into a byte, and put zero-extend of it in
+ ireg(G). */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xD7) {
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ do_MMX_preamble();
+ t0 = newTemp(Ity_I64);
+ t1 = newTemp(Ity_I64);
+ assign(t0, getMMXReg(eregLO3ofRM(modrm)));
+ assign(t1, mkIRExprCCall(
+ Ity_I64, 0/*regparms*/,
+ "amd64g_calculate_mmx_pmovmskb",
+ &amd64g_calculate_mmx_pmovmskb,
+ mkIRExprVec_1(mkexpr(t0))));
+ putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_64to32,mkexpr(t1)));
+ DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
+ nameIReg32(gregOfRexRM(pfx,modrm)));
+ delta += 3;
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xE4) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ vbi, pfx, delta+2, insn[1], "pmuluh", False );
+ goto decode_success;
+ }
+
+ /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
+ /* 0F 18 /1 = PREFETCH0 -- with various different hints */
+ /* 0F 18 /2 = PREFETCH1 */
+ /* 0F 18 /3 = PREFETCH2 */
+ if (insn[0] == 0x0F && insn[1] == 0x18
+ && haveNo66noF2noF3(pfx)
+ && !epartIsReg(insn[2])
+ && gregLO3ofRM(insn[2]) >= 0 && gregLO3ofRM(insn[2]) <= 3) {
+ HChar* hintstr = "??";
+
+ modrm = getUChar(delta+2);
+ vassert(!epartIsReg(modrm));
+
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ delta += 2+alen;
+
+ switch (gregLO3ofRM(modrm)) {
+ case 0: hintstr = "nta"; break;
+ case 1: hintstr = "t0"; break;
+ case 2: hintstr = "t1"; break;
+ case 3: hintstr = "t2"; break;
+ default: vassert(0);
+ }
+
+ DIP("prefetch%s %s\n", hintstr, dis_buf);
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xF6) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ vbi, pfx, delta+2, insn[1], "psadbw", False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x70) {
+ Int order;
+ IRTemp sV, dV, s3, s2, s1, s0;
+ s3 = s2 = s1 = s0 = IRTemp_INVALID;
+ sV = newTemp(Ity_I64);
+ dV = newTemp(Ity_I64);
+ do_MMX_preamble();
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
+ order = (Int)insn[3];
+ delta += 2+2;
+ DIP("pshufw $%d,%s,%s\n", order,
+ nameMMXReg(eregLO3ofRM(modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf,
+ 1/*extra byte after amode*/ );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ order = (Int)insn[2+alen];
+ delta += 3+alen;
+ DIP("pshufw $%d,%s,%s\n", order,
+ dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+ breakup64to16s( sV, &s3, &s2, &s1, &s0 );
+# define SEL(n) \
+ ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
+ assign(dV,
+ mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
+ SEL((order>>2)&3), SEL((order>>0)&3) )
+ );
+ putMMXReg(gregLO3ofRM(modrm), mkexpr(dV));
+# undef SEL
+ goto decode_success;
+ }
+
+ /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x53) {
+ delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2,
+ "rcpps", Iop_Recip32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
+ if (haveF3no66noF2(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x53) {
+ delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta+2,
+ "rcpss", Iop_Recip32F0x4 );
+ goto decode_success;
+ }
+
+ /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x52) {
+ delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2,
+ "rsqrtps", Iop_RSqrt32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
+ if (haveF3no66noF2(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x52) {
+ delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta+2,
+ "rsqrtss", Iop_RSqrt32F0x4 );
+ goto decode_success;
+ }
+
+ /* 0F AE /7 = SFENCE -- flush pending operations to memory */
+ if (haveNo66noF2noF3(pfx)
+ && insn[0] == 0x0F && insn[1] == 0xAE
+ && epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 7
+ && sz == 4) {
+ delta += 3;
+ /* Insert a memory fence. It's sometimes important that these
+ are carried through to the generated code. */
+ stmt( IRStmt_MBE(Imbe_Fence) );
+ DIP("sfence\n");
+ goto decode_success;
+ }
+
+ /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xC6) {
+ Int select;
+ IRTemp sV, dV;
+ IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
+ sV = newTemp(Ity_V128);
+ dV = newTemp(Ity_V128);
+ s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
+ modrm = insn[2];
+ assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ select = (Int)insn[3];
+ delta += 2+2;
+ DIP("shufps $%d,%s,%s\n", select,
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf,
+ 1/*byte at end of insn*/ );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ select = (Int)insn[2+alen];
+ delta += 3+alen;
+ DIP("shufps $%d,%s,%s\n", select,
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ breakup128to32s( dV, &d3, &d2, &d1, &d0 );
+ breakup128to32s( sV, &s3, &s2, &s1, &s0 );
+
+# define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
+# define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
+
+ putXMMReg(
+ gregOfRexRM(pfx,modrm),
+ mk128from32s( SELS((select>>6)&3), SELS((select>>4)&3),
+ SELD((select>>2)&3), SELD((select>>0)&3) )
+ );
+
+# undef SELD
+# undef SELS
+
+ goto decode_success;
+ }
+
+ /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x51) {
+ delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2,
+ "sqrtps", Iop_Sqrt32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
+ if (haveF3no66noF2(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x51) {
+ delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta+2,
+ "sqrtss", Iop_Sqrt32F0x4 );
+ goto decode_success;
+ }
+
+ /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
+ if (insn[0] == 0x0F && insn[1] == 0xAE
+ && haveNo66noF2noF3(pfx)
+ && !epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 3) {
+
+ vassert(sz == 4);
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ delta += 2+alen;
+
+ /* Fake up a native SSE mxcsr word. The only thing it depends
+ on is SSEROUND[1:0], so call a clean helper to cook it up.
+ */
+ /* ULong amd64h_create_mxcsr ( ULong sseround ) */
+ DIP("stmxcsr %s\n", dis_buf);
+ storeLE(
+ mkexpr(addr),
+ unop(Iop_64to32,
+ mkIRExprCCall(
+ Ity_I64, 0/*regp*/,
+ "amd64g_create_mxcsr", &amd64g_create_mxcsr,
+ mkIRExprVec_1( unop(Iop_32Uto64,get_sse_roundingmode()) )
+ )
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x5C) {
+ delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "subps", Iop_Sub32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
+ if (haveF3no66noF2(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x5C) {
+ delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta+2, "subss", Iop_Sub32F0x4 );
+ goto decode_success;
+ }
+
+ /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
+ /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
+ /* These just appear to be special cases of SHUFPS */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
+ IRTemp sV, dV;
+ IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
+ Bool hi = toBool(insn[1] == 0x15);
+ sV = newTemp(Ity_V128);
+ dV = newTemp(Ity_V128);
+ s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
+ modrm = insn[2];
+ assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta += 2+1;
+ DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ breakup128to32s( dV, &d3, &d2, &d1, &d0 );
+ breakup128to32s( sV, &s3, &s2, &s1, &s0 );
+
+ if (hi) {
+ putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( s3, d3, s2, d2 ) );
+ } else {
+ putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( s1, d1, s0, d0 ) );
+ }
+
+ goto decode_success;
+ }
+
+ /* 0F 57 = XORPS -- G = G and E */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x57) {
+ delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "xorps", Iop_XorV128 );
+ goto decode_success;
+ }
+
+ /* ---------------------------------------------------- */
+ /* --- end of the SSE decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* ---------------------------------------------------- */
+ /* --- start of the SSE2 decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
+ if (have66noF2noF3(pfx)
+ && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x58) {
+ delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "addpd", Iop_Add64Fx2 );
+ goto decode_success;
+ }
+
+ /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
+ if (haveF2no66noF3(pfx)
+ && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x58) {
+ delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "addsd", Iop_Add64F0x2 );
+ goto decode_success;
+ }
+
+ /* 66 0F 55 = ANDNPD -- G = (not G) and E */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x55) {
+ delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta+2, "andnpd", Iop_AndV128 );
+ goto decode_success;
+ }
+
+ /* 66 0F 54 = ANDPD -- G = G and E */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x54) {
+ delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "andpd", Iop_AndV128 );
+ goto decode_success;
+ }
+
+ /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xC2) {
+ delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmppd", True, 8 );
+ goto decode_success;
+ }
+
+ /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
+ if (haveF2no66noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xC2) {
+ delta = dis_SSEcmp_E_to_G( vbi, pfx, delta+2, "cmpsd", False, 8 );
+ goto decode_success;
+ }
+
+ /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
+ /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) {
+ IRTemp argL = newTemp(Ity_F64);
+ IRTemp argR = newTemp(Ity_F64);
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ assign( argR, getXMMRegLane64F( eregOfRexRM(pfx,modrm),
+ 0/*lowest lane*/ ) );
+ delta += 2+1;
+ DIP("%scomisd %s,%s\n", insn[1]==0x2E ? "u" : "",
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( argR, loadLE(Ity_F64, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("%scomisd %s,%s\n", insn[1]==0x2E ? "u" : "",
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ }
+ assign( argL, getXMMRegLane64F( gregOfRexRM(pfx,modrm),
+ 0/*lowest lane*/ ) );
+
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ stmt( IRStmt_Put(
+ OFFB_CC_DEP1,
+ binop( Iop_And64,
+ unop( Iop_32Uto64,
+ binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)) ),
+ mkU64(0x45)
+ )));
+
+ goto decode_success;
+ }
+
+ /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
+ F64 in xmm(G) */
+ if (haveF3no66noF2(pfx) && insn[0] == 0x0F && insn[1] == 0xE6) {
+ IRTemp arg64 = newTemp(Ity_I64);
+ if (sz != 4) goto decode_failure;
+
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ assign( arg64, getXMMRegLane64(eregOfRexRM(pfx,modrm), 0) );
+ delta += 2+1;
+ DIP("cvtdq2pd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("cvtdq2pd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ }
+
+ putXMMRegLane64F(
+ gregOfRexRM(pfx,modrm), 0,
+ unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)))
+ );
+
+ putXMMRegLane64F(
+ gregOfRexRM(pfx,modrm), 1,
+ unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)))
+ );
+
+ goto decode_success;
+ }
+
+ /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
+ xmm(G) */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x5B) {
+ IRTemp argV = newTemp(Ity_V128);
+ IRTemp rmode = newTemp(Ity_I32);
+
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta += 2+1;
+ DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("cvtdq2ps %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ }
+
+ assign( rmode, get_sse_roundingmode() );
+ breakup128to32s( argV, &t3, &t2, &t1, &t0 );
+
+# define CVT(_t) binop( Iop_F64toF32, \
+ mkexpr(rmode), \
+ unop(Iop_I32StoF64,mkexpr(_t)))
+
+ putXMMRegLane32F( gregOfRexRM(pfx,modrm), 3, CVT(t3) );
+ putXMMRegLane32F( gregOfRexRM(pfx,modrm), 2, CVT(t2) );
+ putXMMRegLane32F( gregOfRexRM(pfx,modrm), 1, CVT(t1) );
+ putXMMRegLane32F( gregOfRexRM(pfx,modrm), 0, CVT(t0) );
+
+# undef CVT
+
+ goto decode_success;
+ }
+
+ /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
+ lo half xmm(G), and zero upper half, rounding towards zero */
+ /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
+ lo half xmm(G), according to prevailing rounding mode, and zero
+ upper half */
+ if ( ( (haveF2no66noF3(pfx) && sz == 4)
+ || (have66noF2noF3(pfx) && sz == 2)
+ )
+ && insn[0] == 0x0F && insn[1] == 0xE6) {
+ IRTemp argV = newTemp(Ity_V128);
+ IRTemp rmode = newTemp(Ity_I32);
+ Bool r2zero = toBool(sz == 2);
+
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta += 2+1;
+ DIP("cvt%spd2dq %s,%s\n", r2zero ? "t" : "",
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("cvt%spd2dq %s,%s\n", r2zero ? "t" : "",
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ }
+
+ if (r2zero) {
+ assign(rmode, mkU32((UInt)Irrm_ZERO) );
+ } else {
+ assign( rmode, get_sse_roundingmode() );
+ }
+
+ t0 = newTemp(Ity_F64);
+ t1 = newTemp(Ity_F64);
+ assign( t0, unop(Iop_ReinterpI64asF64,
+ unop(Iop_V128to64, mkexpr(argV))) );
+ assign( t1, unop(Iop_ReinterpI64asF64,
+ unop(Iop_V128HIto64, mkexpr(argV))) );
+
+# define CVT(_t) binop( Iop_F64toI32S, \
+ mkexpr(rmode), \
+ mkexpr(_t) )
+
+ putXMMRegLane32( gregOfRexRM(pfx,modrm), 3, mkU32(0) );
+ putXMMRegLane32( gregOfRexRM(pfx,modrm), 2, mkU32(0) );
+ putXMMRegLane32( gregOfRexRM(pfx,modrm), 1, CVT(t1) );
+ putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, CVT(t0) );
+
+# undef CVT
+
+ goto decode_success;
+ }
+
+ /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
+ I32 in mmx, according to prevailing SSE rounding mode */
+ /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
+ I32 in mmx, rounding towards zero */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) {
+ IRTemp dst64 = newTemp(Ity_I64);
+ IRTemp rmode = newTemp(Ity_I32);
+ IRTemp f64lo = newTemp(Ity_F64);
+ IRTemp f64hi = newTemp(Ity_F64);
+ Bool r2zero = toBool(insn[1] == 0x2C);
+
+ do_MMX_preamble();
+ modrm = getUChar(delta+2);
+
+ if (epartIsReg(modrm)) {
+ delta += 2+1;
+ assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
+ assign(f64hi, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 1));
+ DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "",
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
+ assign(f64hi, loadLE(Ity_F64, binop( Iop_Add64,
+ mkexpr(addr),
+ mkU64(8) )));
+ delta += 2+alen;
+ DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "",
+ dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+
+ if (r2zero) {
+ assign(rmode, mkU32((UInt)Irrm_ZERO) );
+ } else {
+ assign( rmode, get_sse_roundingmode() );
+ }
+
+ assign(
+ dst64,
+ binop( Iop_32HLto64,
+ binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ),
+ binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) )
+ )
+ );
+
+ putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
+ goto decode_success;
+ }
+
+ /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
+ lo half xmm(G), rounding according to prevailing SSE rounding
+ mode, and zero upper half */
+ /* Note, this is practically identical to CVTPD2DQ. It would have
+ been nicer to merge them together, but the insn[] offsets differ
+ by one. */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x5A) {
+ IRTemp argV = newTemp(Ity_V128);
+ IRTemp rmode = newTemp(Ity_I32);
+
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta += 2+1;
+ DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("cvtpd2ps %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ }
+
+ assign( rmode, get_sse_roundingmode() );
+ t0 = newTemp(Ity_F64);
+ t1 = newTemp(Ity_F64);
+ assign( t0, unop(Iop_ReinterpI64asF64,
+ unop(Iop_V128to64, mkexpr(argV))) );
+ assign( t1, unop(Iop_ReinterpI64asF64,
+ unop(Iop_V128HIto64, mkexpr(argV))) );
+
+# define CVT(_t) binop( Iop_F64toF32, \
+ mkexpr(rmode), \
+ mkexpr(_t) )
+
+ putXMMRegLane32( gregOfRexRM(pfx,modrm), 3, mkU32(0) );
+ putXMMRegLane32( gregOfRexRM(pfx,modrm), 2, mkU32(0) );
+ putXMMRegLane32F( gregOfRexRM(pfx,modrm), 1, CVT(t1) );
+ putXMMRegLane32F( gregOfRexRM(pfx,modrm), 0, CVT(t0) );
+
+# undef CVT
+
+ goto decode_success;
+ }
+
+ /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
+ xmm(G) */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x2A) {
+ IRTemp arg64 = newTemp(Ity_I64);
+
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ /* Only switch to MMX mode if the source is a MMX register.
+ This is inconsistent with all other instructions which
+ convert between XMM and (M64 or MMX), which always switch
+ to MMX mode even if 64-bit operand is M64 and not MMX. At
+ least, that's what the Intel docs seem to me to say.
+ Fixes #210264. */
+ do_MMX_preamble();
+ assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
+ delta += 2+1;
+ DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("cvtpi2pd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ }
+
+ putXMMRegLane64F(
+ gregOfRexRM(pfx,modrm), 0,
+ unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) )
+ );
+
+ putXMMRegLane64F(
+ gregOfRexRM(pfx,modrm), 1,
+ unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) )
+ );
+
+ goto decode_success;
+ }
+
+ /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
+ xmm(G), rounding towards zero */
+ /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
+ xmm(G), as per the prevailing rounding mode */
+ if ( ( (have66noF2noF3(pfx) && sz == 2)
+ || (haveF3no66noF2(pfx) && sz == 4)
+ )
+ && insn[0] == 0x0F && insn[1] == 0x5B) {
+ IRTemp argV = newTemp(Ity_V128);
+ IRTemp rmode = newTemp(Ity_I32);
+ Bool r2zero = toBool(sz == 4);
+
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ assign( argV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta += 2+1;
+ DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("cvtps2dq %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ }
+
+ if (r2zero) {
+ assign( rmode, mkU32((UInt)Irrm_ZERO) );
+ } else {
+ assign( rmode, get_sse_roundingmode() );
+ }
+
+ breakup128to32s( argV, &t3, &t2, &t1, &t0 );
+
+ /* This is less than ideal. If it turns out to be a performance
+ bottleneck it can be improved. */
+# define CVT(_t) \
+ binop( Iop_F64toI32S, \
+ mkexpr(rmode), \
+ unop( Iop_F32toF64, \
+ unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
+
+ putXMMRegLane32( gregOfRexRM(pfx,modrm), 3, CVT(t3) );
+ putXMMRegLane32( gregOfRexRM(pfx,modrm), 2, CVT(t2) );
+ putXMMRegLane32( gregOfRexRM(pfx,modrm), 1, CVT(t1) );
+ putXMMRegLane32( gregOfRexRM(pfx,modrm), 0, CVT(t0) );
+
+# undef CVT
+
+ goto decode_success;
+ }
+
+ /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
+ F64 in xmm(G). */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x5A) {
+ IRTemp f32lo = newTemp(Ity_F32);
+ IRTemp f32hi = newTemp(Ity_F32);
+
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ assign( f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0) );
+ assign( f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1) );
+ delta += 2+1;
+ DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) );
+ assign( f32hi, loadLE(Ity_F32,
+ binop(Iop_Add64,mkexpr(addr),mkU64(4))) );
+ delta += 2+alen;
+ DIP("cvtps2pd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ }
+
+ putXMMRegLane64F( gregOfRexRM(pfx,modrm), 1,
+ unop(Iop_F32toF64, mkexpr(f32hi)) );
+ putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
+ unop(Iop_F32toF64, mkexpr(f32lo)) );
+
+ goto decode_success;
+ }
+
+ /* F2 0F 2D = CVTSD2SI
+ when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
+ according to prevailing SSE rounding mode
+ when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
+ according to prevailing SSE rounding mode
+ */
+ /* F2 0F 2C = CVTTSD2SI
+ when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
+ truncating towards zero
+ when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
+ truncating towards zero
+ */
+ if (haveF2no66noF3(pfx)
+ && insn[0] == 0x0F
+ && (insn[1] == 0x2D || insn[1] == 0x2C)) {
+ IRTemp rmode = newTemp(Ity_I32);
+ IRTemp f64lo = newTemp(Ity_F64);
+ Bool r2zero = toBool(insn[1] == 0x2C);
+ vassert(sz == 4 || sz == 8);
+
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ delta += 2+1;
+ assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
+ DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameIReg(sz, gregOfRexRM(pfx,modrm), False));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
+ delta += 2+alen;
+ DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
+ dis_buf,
+ nameIReg(sz, gregOfRexRM(pfx,modrm), False));
+ }
+
+ if (r2zero) {
+ assign( rmode, mkU32((UInt)Irrm_ZERO) );
+ } else {
+ assign( rmode, get_sse_roundingmode() );
+ }
+
+ if (sz == 4) {
+ putIReg32( gregOfRexRM(pfx,modrm),
+ binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) );
+ } else {
+ putIReg64( gregOfRexRM(pfx,modrm),
+ binop( Iop_F64toI64S, mkexpr(rmode), mkexpr(f64lo)) );
+ }
+
+ goto decode_success;
+ }
+
+ /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
+ low 1/4 xmm(G), according to prevailing SSE rounding mode */
+ if (haveF2no66noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x5A) {
+ IRTemp rmode = newTemp(Ity_I32);
+ IRTemp f64lo = newTemp(Ity_F64);
+ vassert(sz == 4);
+
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ delta += 2+1;
+ assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
+ DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
+ delta += 2+alen;
+ DIP("cvtsd2ss %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ assign( rmode, get_sse_roundingmode() );
+ putXMMRegLane32F(
+ gregOfRexRM(pfx,modrm), 0,
+ binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) )
+ );
+
+ goto decode_success;
+ }
+
+ /* F2 0F 2A = CVTSI2SD
+ when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm
+ when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm
+ */
+ if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x2A) {
+ modrm = getUChar(delta+2);
+
+ if (sz == 4) {
+ IRTemp arg32 = newTemp(Ity_I32);
+ if (epartIsReg(modrm)) {
+ assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
+ delta += 2+1;
+ DIP("cvtsi2sd %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("cvtsi2sd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ }
+ putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
+ unop(Iop_I32StoF64, mkexpr(arg32))
+ );
+ } else {
+ /* sz == 8 */
+ IRTemp arg64 = newTemp(Ity_I64);
+ if (epartIsReg(modrm)) {
+ assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
+ delta += 2+1;
+ DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("cvtsi2sdq %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ }
+ putXMMRegLane64F(
+ gregOfRexRM(pfx,modrm),
+ 0,
+ binop( Iop_I64StoF64,
+ get_sse_roundingmode(),
+ mkexpr(arg64)
+ )
+ );
+
+ }
+
+ goto decode_success;
+ }
+
+ /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
+ low half xmm(G) */
+ if (haveF3no66noF2(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x5A) {
+ IRTemp f32lo = newTemp(Ity_F32);
+
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ delta += 2+1;
+ assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
+ DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
+ delta += 2+alen;
+ DIP("cvtss2sd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
+ unop( Iop_F32toF64, mkexpr(f32lo) ) );
+
+ goto decode_success;
+ }
+
+ /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x5E) {
+ delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "divpd", Iop_Div64Fx2 );
+ goto decode_success;
+ }
+
+ /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
+ if (haveF2no66noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x5E) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "divsd", Iop_Div64F0x2 );
+ goto decode_success;
+ }
+
+ /* 0F AE /5 = LFENCE -- flush pending operations to memory */
+ /* 0F AE /6 = MFENCE -- flush pending operations to memory */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xAE
+ && epartIsReg(insn[2])
+ && (gregLO3ofRM(insn[2]) == 5 || gregLO3ofRM(insn[2]) == 6)) {
+ delta += 3;
+ /* Insert a memory fence. It's sometimes important that these
+ are carried through to the generated code. */
+ stmt( IRStmt_MBE(Imbe_Fence) );
+ DIP("%sfence\n", gregLO3ofRM(insn[2])==5 ? "l" : "m");
+ goto decode_success;
+ }
+
+ /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x5F) {
+ delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "maxpd", Iop_Max64Fx2 );
+ goto decode_success;
+ }
+
+ /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
+ if (haveF2no66noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x5F) {
+ delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "maxsd", Iop_Max64F0x2 );
+ goto decode_success;
+ }
+
+ /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x5D) {
+ delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "minpd", Iop_Min64Fx2 );
+ goto decode_success;
+ }
+
+ /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
+ if (haveF2no66noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x5D) {
+ delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "minsd", Iop_Min64F0x2 );
+ goto decode_success;
+ }
+
+ /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
+ /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
+ /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
+ if (have66noF2noF3(pfx)
+ && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F
+ && (insn[1] == 0x28 || insn[1] == 0x10 || insn[1] == 0x6F)) {
+ HChar* wot = insn[1]==0x28 ? "apd" :
+ insn[1]==0x10 ? "upd" : "dqa";
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ putXMMReg( gregOfRexRM(pfx,modrm),
+ getXMMReg( eregOfRexRM(pfx,modrm) ));
+ DIP("mov%s %s,%s\n", wot, nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+1;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ if (insn[1] == 0x28/*movapd*/ || insn[1] == 0x6F/*movdqa*/)
+ gen_SEGV_if_not_16_aligned( addr );
+ putXMMReg( gregOfRexRM(pfx,modrm),
+ loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("mov%s %s,%s\n", wot, dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+alen;
+ }
+ goto decode_success;
+ }
+
+ /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
+ /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
+ if (have66noF2noF3(pfx) && insn[0] == 0x0F
+ && (insn[1] == 0x29 || insn[1] == 0x11)) {
+ HChar* wot = insn[1]==0x29 ? "apd" : "upd";
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ putXMMReg( eregOfRexRM(pfx,modrm),
+ getXMMReg( gregOfRexRM(pfx,modrm) ) );
+ DIP("mov%s %s,%s\n", wot, nameXMMReg(gregOfRexRM(pfx,modrm)),
+ nameXMMReg(eregOfRexRM(pfx,modrm)));
+ delta += 2+1;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ if (insn[1] == 0x29/*movapd*/)
+ gen_SEGV_if_not_16_aligned( addr );
+ storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
+ DIP("mov%s %s,%s\n", wot, nameXMMReg(gregOfRexRM(pfx,modrm)),
+ dis_buf );
+ delta += 2+alen;
+ }
+ goto decode_success;
+ }
+
+ /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4, zeroing high 3/4 of xmm. */
+ /* or from ireg64/m64 to xmm lo 1/2, zeroing high 1/2 of xmm. */
+ if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x6E) {
+ vassert(sz == 2 || sz == 8);
+ if (sz == 2) sz = 4;
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ delta += 2+1;
+ if (sz == 4) {
+ putXMMReg(
+ gregOfRexRM(pfx,modrm),
+ unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) )
+ );
+ DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ putXMMReg(
+ gregOfRexRM(pfx,modrm),
+ unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) )
+ );
+ DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ delta += 2+alen;
+ putXMMReg(
+ gregOfRexRM(pfx,modrm),
+ sz == 4
+ ? unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) )
+ : unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)) )
+ );
+ DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+ goto decode_success;
+ }
+
+ /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */
+ /* or from xmm low 1/2 to ireg64 or m64. */
+ if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x7E) {
+ if (sz == 2) sz = 4;
+ vassert(sz == 4 || sz == 8);
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ delta += 2+1;
+ if (sz == 4) {
+ putIReg32( eregOfRexRM(pfx,modrm),
+ getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
+ DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
+ nameIReg32(eregOfRexRM(pfx,modrm)));
+ } else {
+ putIReg64( eregOfRexRM(pfx,modrm),
+ getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
+ DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
+ nameIReg64(eregOfRexRM(pfx,modrm)));
+ }
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ delta += 2+alen;
+ storeLE( mkexpr(addr),
+ sz == 4
+ ? getXMMRegLane32(gregOfRexRM(pfx,modrm),0)
+ : getXMMRegLane64(gregOfRexRM(pfx,modrm),0) );
+ DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q',
+ nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
+ }
+ goto decode_success;
+ }
+
+ /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x7F) {
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ delta += 2+1;
+ putXMMReg( eregOfRexRM(pfx,modrm),
+ getXMMReg(gregOfRexRM(pfx,modrm)) );
+ DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
+ nameXMMReg(eregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ delta += 2+alen;
+ storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
+ DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
+ }
+ goto decode_success;
+ }
+
+ /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
+ if (haveF3no66noF2(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x6F) {
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ putXMMReg( gregOfRexRM(pfx,modrm),
+ getXMMReg( eregOfRexRM(pfx,modrm) ));
+ DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+1;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ putXMMReg( gregOfRexRM(pfx,modrm),
+ loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("movdqu %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+alen;
+ }
+ goto decode_success;
+ }
+
+ /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
+ if (haveF3no66noF2(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x7F) {
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ goto decode_failure; /* awaiting test case */
+ delta += 2+1;
+ putXMMReg( eregOfRexRM(pfx,modrm),
+ getXMMReg(gregOfRexRM(pfx,modrm)) );
+ DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
+ nameXMMReg(eregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ delta += 2+alen;
+ storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
+ DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
+ }
+ goto decode_success;
+ }
+
+ /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
+ if (haveF2no66noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xD6) {
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ do_MMX_preamble();
+ putMMXReg( gregLO3ofRM(modrm),
+ getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
+ DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ delta += 2+1;
+ goto decode_success;
+ } else {
+ /* apparently no mem case for this insn */
+ goto decode_failure;
+ }
+ }
+
+ /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
+ /* These seems identical to MOVHPS. This instruction encoding is
+ completely crazy. */
+ if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x16) {
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ /* fall through; apparently reg-reg is not possible */
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ delta += 2+alen;
+ putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
+ loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("movhpd %s,%s\n", dis_buf,
+ nameXMMReg( gregOfRexRM(pfx,modrm) ));
+ goto decode_success;
+ }
+ }
+
+ /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
+ /* Again, this seems identical to MOVHPS. */
+ if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x17) {
+ if (!epartIsReg(insn[2])) {
+ delta += 2;
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ delta += alen;
+ storeLE( mkexpr(addr),
+ getXMMRegLane64( gregOfRexRM(pfx,insn[2]),
+ 1/*upper lane*/ ) );
+ DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx,insn[2]) ),
+ dis_buf);
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
+ /* Identical to MOVLPS ? */
+ if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x12) {
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ /* fall through; apparently reg-reg is not possible */
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ delta += 2+alen;
+ putXMMRegLane64( gregOfRexRM(pfx,modrm),
+ 0/*lower lane*/,
+ loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("movlpd %s, %s\n",
+ dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
+ goto decode_success;
+ }
+ }
+
+ /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
+ /* Identical to MOVLPS ? */
+ if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x13) {
+ modrm = getUChar(delta+2);
+ if (!epartIsReg(modrm)) {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ delta += 2+alen;
+ storeLE( mkexpr(addr),
+ getXMMRegLane64( gregOfRexRM(pfx,modrm),
+ 0/*lower lane*/ ) );
+ DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
+ dis_buf);
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
+ 2 lowest bits of ireg(G) */
+ if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x50) {
+ /* sz == 8 is a kludge to handle insns with REX.W redundantly
+ set to 1, which has been known to happen:
+ 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d
+ 20071106: see further comments on MOVMSKPS implementation above.
+ */
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ Int src;
+ t0 = newTemp(Ity_I32);
+ t1 = newTemp(Ity_I32);
+ delta += 2+1;
+ src = eregOfRexRM(pfx,modrm);
+ assign( t0, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(31)),
+ mkU32(1) ));
+ assign( t1, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(30)),
+ mkU32(2) ));
+ putIReg32( gregOfRexRM(pfx,modrm),
+ binop(Iop_Or32, mkexpr(t0), mkexpr(t1))
+ );
+ DIP("movmskpd %s,%s\n", nameXMMReg(src),
+ nameIReg32(gregOfRexRM(pfx,modrm)));
+ goto decode_success;
+ }
+ /* else fall through */
+ goto decode_failure;
+ }
+
+ /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xF7) {
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ IRTemp regD = newTemp(Ity_V128);
+ IRTemp mask = newTemp(Ity_V128);
+ IRTemp olddata = newTemp(Ity_V128);
+ IRTemp newdata = newTemp(Ity_V128);
+ addr = newTemp(Ity_I64);
+
+ assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
+ assign( regD, getXMMReg( gregOfRexRM(pfx,modrm) ));
+
+ /* Unfortunately can't do the obvious thing with SarN8x16
+ here since that can't be re-emitted as SSE2 code - no such
+ insn. */
+ assign(
+ mask,
+ binop(Iop_64HLtoV128,
+ binop(Iop_SarN8x8,
+ getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ),
+ mkU8(7) ),
+ binop(Iop_SarN8x8,
+ getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ),
+ mkU8(7) ) ));
+ assign( olddata, loadLE( Ity_V128, mkexpr(addr) ));
+ assign( newdata,
+ binop(Iop_OrV128,
+ binop(Iop_AndV128,
+ mkexpr(regD),
+ mkexpr(mask) ),
+ binop(Iop_AndV128,
+ mkexpr(olddata),
+ unop(Iop_NotV128, mkexpr(mask)))) );
+ storeLE( mkexpr(addr), mkexpr(newdata) );
+
+ delta += 2+1;
+ DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRexRM(pfx,modrm) ),
+ nameXMMReg( gregOfRexRM(pfx,modrm) ) );
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xE7) {
+ modrm = getUChar(delta+2);
+ if (!epartIsReg(modrm)) {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
+ DIP("movntdq %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+alen;
+ goto decode_success;
+ }
+ /* else fall through */
+ goto decode_failure;
+ }
+
+ /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
+ if (haveNo66noF2noF3(pfx) &&
+ insn[0] == 0x0F && insn[1] == 0xC3) {
+ vassert(sz == 4 || sz == 8);
+ modrm = getUChar(delta+2);
+ if (!epartIsReg(modrm)) {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ storeLE( mkexpr(addr), getIRegG(sz, pfx, modrm) );
+ DIP("movnti %s,%s\n", dis_buf,
+ nameIRegG(sz, pfx, modrm));
+ delta += 2+alen;
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
+ or lo half xmm). */
+ if (have66noF2noF3(pfx)
+ && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0xD6) {
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ /* fall through, awaiting test case */
+ /* dst: lo half copied, hi half zeroed */
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ storeLE( mkexpr(addr),
+ getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
+ DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf );
+ delta += 2+alen;
+ goto decode_success;
+ }
+ }
+
+ /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
+ hi half). */
+ if (haveF3no66noF2(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xD6) {
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ do_MMX_preamble();
+ putXMMReg( gregOfRexRM(pfx,modrm),
+ unop(Iop_64UtoV128, getMMXReg( eregLO3ofRM(modrm) )) );
+ DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+1;
+ goto decode_success;
+ } else {
+ /* apparently no mem case for this insn */
+ goto decode_failure;
+ }
+ }
+
+ /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
+ G (lo half xmm). Upper half of G is zeroed out. */
+ /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
+ G (lo half xmm). If E is mem, upper half of G is zeroed out.
+ If E is reg, upper half of G is unchanged. */
+ if ( (haveF2no66noF3(pfx)
+ && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x10)
+ ||
+ (haveF3no66noF2(pfx)
+ && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x7E)
+ ) {
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
+ getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
+ if (insn[1] == 0x7E/*MOVQ*/) {
+ /* zero bits 127:64 */
+ putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkU64(0) );
+ }
+ DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+1;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
+ putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
+ loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("movsd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+alen;
+ }
+ goto decode_success;
+ }
+
+ /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
+ or lo half xmm). */
+ if (haveF2no66noF3(pfx)
+ && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x11) {
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ putXMMRegLane64( eregOfRexRM(pfx,modrm), 0,
+ getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
+ DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
+ nameXMMReg(eregOfRexRM(pfx,modrm)));
+ delta += 2+1;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ storeLE( mkexpr(addr),
+ getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
+ DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
+ dis_buf);
+ delta += 2+alen;
+ }
+ goto decode_success;
+ }
+
+ /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
+ if (have66noF2noF3(pfx)
+ && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x59) {
+ delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "mulpd", Iop_Mul64Fx2 );
+ goto decode_success;
+ }
+
+ /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
+ if (haveF2no66noF3(pfx)
+ && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x59) {
+ delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "mulsd", Iop_Mul64F0x2 );
+ goto decode_success;
+ }
+
+ /* 66 0F 56 = ORPD -- G = G and E */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x56) {
+ delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "orpd", Iop_OrV128 );
+ goto decode_success;
+ }
+
+ /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xC6) {
+ Int select;
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp s1 = newTemp(Ity_I64);
+ IRTemp s0 = newTemp(Ity_I64);
+ IRTemp d1 = newTemp(Ity_I64);
+ IRTemp d0 = newTemp(Ity_I64);
+
+ modrm = insn[2];
+ assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ select = (Int)insn[3];
+ delta += 2+2;
+ DIP("shufpd $%d,%s,%s\n", select,
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 1 );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ select = (Int)insn[2+alen];
+ delta += 3+alen;
+ DIP("shufpd $%d,%s,%s\n", select,
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
+
+# define SELD(n) mkexpr((n)==0 ? d0 : d1)
+# define SELS(n) mkexpr((n)==0 ? s0 : s1)
+
+ putXMMReg(
+ gregOfRexRM(pfx,modrm),
+ binop(Iop_64HLtoV128, SELS((select>>1)&1), SELD((select>>0)&1) )
+ );
+
+# undef SELD
+# undef SELS
+
+ goto decode_success;
+ }
+
+ /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x51) {
+ delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta+2,
+ "sqrtpd", Iop_Sqrt64Fx2 );
+ goto decode_success;
+ }
+
+ /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
+ if (haveF2no66noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x51) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_unary_lo64( vbi, pfx, delta+2,
+ "sqrtsd", Iop_Sqrt64F0x2 );
+ goto decode_success;
+ }
+
+ /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x5C) {
+ delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "subpd", Iop_Sub64Fx2 );
+ goto decode_success;
+ }
+
+ /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
+ if (haveF2no66noF3(pfx)
+ && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x5C) {
+ delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta+2, "subsd", Iop_Sub64F0x2 );
+ goto decode_success;
+ }
+
+ /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
+ /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
+ /* These just appear to be special cases of SHUFPS */
+ if (have66noF2noF3(pfx)
+ && sz == 2 /* could be 8 if rex also present */
+ && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
+ IRTemp s1 = newTemp(Ity_I64);
+ IRTemp s0 = newTemp(Ity_I64);
+ IRTemp d1 = newTemp(Ity_I64);
+ IRTemp d0 = newTemp(Ity_I64);
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ Bool hi = toBool(insn[1] == 0x15);
+
+ modrm = insn[2];
+ assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta += 2+1;
+ DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
+
+ if (hi) {
+ putXMMReg( gregOfRexRM(pfx,modrm),
+ binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) );
+ } else {
+ putXMMReg( gregOfRexRM(pfx,modrm),
+ binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) );
+ }
+
+ goto decode_success;
+ }
+
+ /* 66 0F 57 = XORPD -- G = G xor E */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x57) {
+ delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "xorpd", Iop_XorV128 );
+ goto decode_success;
+ }
+
+ /* 66 0F 6B = PACKSSDW */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x6B) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "packssdw", Iop_QNarrow32Sx4, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 63 = PACKSSWB */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x63) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "packsswb", Iop_QNarrow16Sx8, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 67 = PACKUSWB */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x67) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "packuswb", Iop_QNarrow16Ux8, True );
+ goto decode_success;
+ }
+
+ /* 66 0F FC = PADDB */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xFC) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "paddb", Iop_Add8x16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F FE = PADDD */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xFE) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "paddd", Iop_Add32x4, False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
+ /* 0F D4 = PADDQ -- add 64x1 */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xD4) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ vbi, pfx, delta+2, insn[1], "paddq", False );
+ goto decode_success;
+ }
+
+ /* 66 0F D4 = PADDQ */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xD4) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "paddq", Iop_Add64x2, False );
+ goto decode_success;
+ }
+
+ /* 66 0F FD = PADDW */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xFD) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "paddw", Iop_Add16x8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F EC = PADDSB */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xEC) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "paddsb", Iop_QAdd8Sx16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F ED = PADDSW */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xED) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "paddsw", Iop_QAdd16Sx8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F DC = PADDUSB */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xDC) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "paddusb", Iop_QAdd8Ux16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F DD = PADDUSW */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xDD) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "paddusw", Iop_QAdd16Ux8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F DB = PAND */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xDB) {
+ delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "pand", Iop_AndV128 );
+ goto decode_success;
+ }
+
+ /* 66 0F DF = PANDN */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xDF) {
+ delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta+2, "pandn", Iop_AndV128 );
+ goto decode_success;
+ }
+
+ /* 66 0F E0 = PAVGB */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xE0) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "pavgb", Iop_Avg8Ux16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F E3 = PAVGW */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xE3) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "pavgw", Iop_Avg16Ux8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 74 = PCMPEQB */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x74) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "pcmpeqb", Iop_CmpEQ8x16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 76 = PCMPEQD */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x76) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "pcmpeqd", Iop_CmpEQ32x4, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 75 = PCMPEQW */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x75) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "pcmpeqw", Iop_CmpEQ16x8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 64 = PCMPGTB */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x64) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "pcmpgtb", Iop_CmpGT8Sx16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 66 = PCMPGTD */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x66) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "pcmpgtd", Iop_CmpGT32Sx4, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 65 = PCMPGTW */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x65) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "pcmpgtw", Iop_CmpGT16Sx8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
+ zero-extend of it in ireg(G). */
+ if (have66noF2noF3(pfx)
+ && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0xC5) {
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ t5 = newTemp(Ity_V128);
+ t4 = newTemp(Ity_I16);
+ assign(t5, getXMMReg(eregOfRexRM(pfx,modrm)));
+ breakup128to32s( t5, &t3, &t2, &t1, &t0 );
+ switch (insn[3] & 7) {
+ case 0: assign(t4, unop(Iop_32to16, mkexpr(t0))); break;
+ case 1: assign(t4, unop(Iop_32HIto16, mkexpr(t0))); break;
+ case 2: assign(t4, unop(Iop_32to16, mkexpr(t1))); break;
+ case 3: assign(t4, unop(Iop_32HIto16, mkexpr(t1))); break;
+ case 4: assign(t4, unop(Iop_32to16, mkexpr(t2))); break;
+ case 5: assign(t4, unop(Iop_32HIto16, mkexpr(t2))); break;
+ case 6: assign(t4, unop(Iop_32to16, mkexpr(t3))); break;
+ case 7: assign(t4, unop(Iop_32HIto16, mkexpr(t3))); break;
+ default: vassert(0);
+ }
+ putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t4)));
+ DIP("pextrw $%d,%s,%s\n",
+ (Int)insn[3], nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameIReg32(gregOfRexRM(pfx,modrm)));
+ delta += 4;
+ goto decode_success;
+ }
+ /* else fall through */
+ /* note, if memory case is ever filled in, there is 1 byte after
+ amode */
+ }
+
+ /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
+ put it into the specified lane of xmm(G). */
+ if (have66noF2noF3(pfx)
+ && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0xC4) {
+ Int lane;
+ t4 = newTemp(Ity_I16);
+ modrm = insn[2];
+
+ if (epartIsReg(modrm)) {
+ assign(t4, getIReg16(eregOfRexRM(pfx,modrm)));
+ delta += 3+1;
+ lane = insn[3+1-1];
+ DIP("pinsrw $%d,%s,%s\n", (Int)lane,
+ nameIReg16(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf,
+ 1/*byte after the amode*/ );
+ delta += 3+alen;
+ lane = insn[3+alen-1];
+ assign(t4, loadLE(Ity_I16, mkexpr(addr)));
+ DIP("pinsrw $%d,%s,%s\n", (Int)lane,
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ putXMMRegLane16( gregOfRexRM(pfx,modrm), lane & 7, mkexpr(t4) );
+ goto decode_success;
+ }
+
+ /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
+ E(xmm or mem) to G(xmm) */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xF5) {
+ IRTemp s1V = newTemp(Ity_V128);
+ IRTemp s2V = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp s1Hi = newTemp(Ity_I64);
+ IRTemp s1Lo = newTemp(Ity_I64);
+ IRTemp s2Hi = newTemp(Ity_I64);
+ IRTemp s2Lo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ assign( s1V, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta += 2+1;
+ DIP("pmaddwd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("pmaddwd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+ assign( s2V, getXMMReg(gregOfRexRM(pfx,modrm)) );
+ assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
+ assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
+ assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
+ assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
+ assign( dHi, mkIRExprCCall(
+ Ity_I64, 0/*regparms*/,
+ "amd64g_calculate_mmx_pmaddwd",
+ &amd64g_calculate_mmx_pmaddwd,
+ mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
+ ));
+ assign( dLo, mkIRExprCCall(
+ Ity_I64, 0/*regparms*/,
+ "amd64g_calculate_mmx_pmaddwd",
+ &amd64g_calculate_mmx_pmaddwd,
+ mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
+ ));
+ assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
+ putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV));
+ goto decode_success;
+ }
+
+ /* 66 0F EE = PMAXSW -- 16x8 signed max */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xEE) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "pmaxsw", Iop_Max16Sx8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xDE) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "pmaxub", Iop_Max8Ux16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F EA = PMINSW -- 16x8 signed min */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xEA) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "pminsw", Iop_Min16Sx8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F DA = PMINUB -- 8x16 unsigned min */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xDA) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "pminub", Iop_Min8Ux16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes in
+ xmm(E), turn them into a byte, and put zero-extend of it in
+ ireg(G). Doing this directly is just too cumbersome; give up
+ therefore and call a helper. */
+ /* UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); */
+ if (have66noF2noF3(pfx)
+ && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0xD7) {
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ t0 = newTemp(Ity_I64);
+ t1 = newTemp(Ity_I64);
+ assign(t0, getXMMRegLane64(eregOfRexRM(pfx,modrm), 0));
+ assign(t1, getXMMRegLane64(eregOfRexRM(pfx,modrm), 1));
+ t5 = newTemp(Ity_I64);
+ assign(t5, mkIRExprCCall(
+ Ity_I64, 0/*regparms*/,
+ "amd64g_calculate_sse_pmovmskb",
+ &amd64g_calculate_sse_pmovmskb,
+ mkIRExprVec_2( mkexpr(t1), mkexpr(t0) )));
+ putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_64to32,mkexpr(t5)));
+ DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameIReg32(gregOfRexRM(pfx,modrm)));
+ delta += 3;
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xE4) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "pmulhuw", Iop_MulHi16Ux8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xE5) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "pmulhw", Iop_MulHi16Sx8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F D5 = PMULHL -- 16x8 multiply */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xD5) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "pmullw", Iop_Mul16x8, False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
+ /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
+ 0 to form 64-bit result */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xF4) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+ t1 = newTemp(Ity_I32);
+ t0 = newTemp(Ity_I32);
+ modrm = insn[2];
+
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
+ delta += 2+1;
+ DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("pmuludq %s,%s\n", dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+
+ assign( t0, unop(Iop_64to32, mkexpr(dV)) );
+ assign( t1, unop(Iop_64to32, mkexpr(sV)) );
+ putMMXReg( gregLO3ofRM(modrm),
+ binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) );
+ goto decode_success;
+ }
+
+ /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
+ 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
+ half */
+ /* This is a really poor translation -- could be improved if
+ performance critical */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xF4) {
+ IRTemp sV, dV;
+ IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
+ sV = newTemp(Ity_V128);
+ dV = newTemp(Ity_V128);
+ s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
+ t1 = newTemp(Ity_I64);
+ t0 = newTemp(Ity_I64);
+ modrm = insn[2];
+ assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta += 2+1;
+ DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("pmuludq %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ breakup128to32s( dV, &d3, &d2, &d1, &d0 );
+ breakup128to32s( sV, &s3, &s2, &s1, &s0 );
+
+ assign( t0, binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) );
+ putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, mkexpr(t0) );
+ assign( t1, binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)) );
+ putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkexpr(t1) );
+ goto decode_success;
+ }
+
+ /* 66 0F EB = POR */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xEB) {
+ delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "por", Iop_OrV128 );
+ goto decode_success;
+ }
+
+ /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
+ from E(xmm or mem) to G(xmm) */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xF6) {
+ IRTemp s1V = newTemp(Ity_V128);
+ IRTemp s2V = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp s1Hi = newTemp(Ity_I64);
+ IRTemp s1Lo = newTemp(Ity_I64);
+ IRTemp s2Hi = newTemp(Ity_I64);
+ IRTemp s2Lo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ assign( s1V, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta += 2+1;
+ DIP("psadbw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("psadbw %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+ assign( s2V, getXMMReg(gregOfRexRM(pfx,modrm)) );
+ assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
+ assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
+ assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
+ assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
+ assign( dHi, mkIRExprCCall(
+ Ity_I64, 0/*regparms*/,
+ "amd64g_calculate_mmx_psadbw",
+ &amd64g_calculate_mmx_psadbw,
+ mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
+ ));
+ assign( dLo, mkIRExprCCall(
+ Ity_I64, 0/*regparms*/,
+ "amd64g_calculate_mmx_psadbw",
+ &amd64g_calculate_mmx_psadbw,
+ mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
+ ));
+ assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
+ putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV));
+ goto decode_success;
+ }
+
+ /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x70) {
+ Int order;
+ IRTemp sV, dV, s3, s2, s1, s0;
+ s3 = s2 = s1 = s0 = IRTemp_INVALID;
+ sV = newTemp(Ity_V128);
+ dV = newTemp(Ity_V128);
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ order = (Int)insn[3];
+ delta += 3+1;
+ DIP("pshufd $%d,%s,%s\n", order,
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf,
+ 1/*byte after the amode*/ );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ order = (Int)insn[2+alen];
+ delta += 2+alen+1;
+ DIP("pshufd $%d,%s,%s\n", order,
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+ breakup128to32s( sV, &s3, &s2, &s1, &s0 );
+
+# define SEL(n) \
+ ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
+ assign(dV,
+ mk128from32s( SEL((order>>6)&3), SEL((order>>4)&3),
+ SEL((order>>2)&3), SEL((order>>0)&3) )
+ );
+ putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV));
+# undef SEL
+ goto decode_success;
+ }
+
+ /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
+ mem) to G(xmm), and copy lower half */
+ if (haveF3no66noF2(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x70) {
+ Int order;
+ IRTemp sVhi, dVhi, sV, dV, s3, s2, s1, s0;
+ s3 = s2 = s1 = s0 = IRTemp_INVALID;
+ sV = newTemp(Ity_V128);
+ dV = newTemp(Ity_V128);
+ sVhi = newTemp(Ity_I64);
+ dVhi = newTemp(Ity_I64);
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ order = (Int)insn[3];
+ delta += 3+1;
+ DIP("pshufhw $%d,%s,%s\n", order,
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf,
+ 1/*byte after the amode*/ );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ order = (Int)insn[2+alen];
+ delta += 2+alen+1;
+ DIP("pshufhw $%d,%s,%s\n", order,
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+ assign( sVhi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ breakup64to16s( sVhi, &s3, &s2, &s1, &s0 );
+
+# define SEL(n) \
+ ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
+ assign(dVhi,
+ mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
+ SEL((order>>2)&3), SEL((order>>0)&3) )
+ );
+ assign(dV, binop( Iop_64HLtoV128,
+ mkexpr(dVhi),
+ unop(Iop_V128to64, mkexpr(sV))) );
+ putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV));
+# undef SEL
+ goto decode_success;
+ }
+
+ /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
+ mem) to G(xmm), and copy upper half */
+ if (haveF2no66noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x70) {
+ Int order;
+ IRTemp sVlo, dVlo, sV, dV, s3, s2, s1, s0;
+ s3 = s2 = s1 = s0 = IRTemp_INVALID;
+ sV = newTemp(Ity_V128);
+ dV = newTemp(Ity_V128);
+ sVlo = newTemp(Ity_I64);
+ dVlo = newTemp(Ity_I64);
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ order = (Int)insn[3];
+ delta += 3+1;
+ DIP("pshuflw $%d,%s,%s\n", order,
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf,
+ 1/*byte after the amode*/ );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ order = (Int)insn[2+alen];
+ delta += 2+alen+1;
+ DIP("pshuflw $%d,%s,%s\n", order,
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+ assign( sVlo, unop(Iop_V128to64, mkexpr(sV)) );
+ breakup64to16s( sVlo, &s3, &s2, &s1, &s0 );
+
+# define SEL(n) \
+ ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
+ assign(dVlo,
+ mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
+ SEL((order>>2)&3), SEL((order>>0)&3) )
+ );
+ assign(dV, binop( Iop_64HLtoV128,
+ unop(Iop_V128HIto64, mkexpr(sV)),
+ mkexpr(dVlo) ) );
+ putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(dV));
+# undef SEL
+ goto decode_success;
+ }
+
+ /* 66 0F 72 /6 ib = PSLLD by immediate */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x72
+ && epartIsReg(insn[2])
+ && gregLO3ofRM(insn[2]) == 6) {
+ delta = dis_SSE_shiftE_imm( pfx, delta+2, "pslld", Iop_ShlN32x4 );
+ goto decode_success;
+ }
+
+ /* 66 0F F2 = PSLLD by E */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xF2) {
+ delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "pslld", Iop_ShlN32x4 );
+ goto decode_success;
+ }
+
+ /* 66 0F 73 /7 ib = PSLLDQ by immediate */
+ /* note, if mem case ever filled in, 1 byte after amode */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x73
+ && epartIsReg(insn[2])
+ && gregLO3ofRM(insn[2]) == 7) {
+ IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
+ Int imm = (Int)insn[3];
+ Int reg = eregOfRexRM(pfx,insn[2]);
+ DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg));
+ vassert(imm >= 0 && imm <= 255);
+ delta += 4;
+
+ sV = newTemp(Ity_V128);
+ dV = newTemp(Ity_V128);
+ hi64 = newTemp(Ity_I64);
+ lo64 = newTemp(Ity_I64);
+ hi64r = newTemp(Ity_I64);
+ lo64r = newTemp(Ity_I64);
+
+ if (imm >= 16) {
+ putXMMReg(reg, mkV128(0x0000));
+ goto decode_success;
+ }
+
+ assign( sV, getXMMReg(reg) );
+ assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
+
+ if (imm == 0) {
+ assign( lo64r, mkexpr(lo64) );
+ assign( hi64r, mkexpr(hi64) );
+ }
+ else
+ if (imm == 8) {
+ assign( lo64r, mkU64(0) );
+ assign( hi64r, mkexpr(lo64) );
+ }
+ else
+ if (imm > 8) {
+ assign( lo64r, mkU64(0) );
+ assign( hi64r, binop( Iop_Shl64,
+ mkexpr(lo64),
+ mkU8( 8*(imm-8) ) ));
+ } else {
+ assign( lo64r, binop( Iop_Shl64,
+ mkexpr(lo64),
+ mkU8(8 * imm) ));
+ assign( hi64r,
+ binop( Iop_Or64,
+ binop(Iop_Shl64, mkexpr(hi64),
+ mkU8(8 * imm)),
+ binop(Iop_Shr64, mkexpr(lo64),
+ mkU8(8 * (8 - imm)) )
+ )
+ );
+ }
+ assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
+ putXMMReg(reg, mkexpr(dV));
+ goto decode_success;
+ }
+
+ /* 66 0F 73 /6 ib = PSLLQ by immediate */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x73
+ && epartIsReg(insn[2])
+ && gregLO3ofRM(insn[2]) == 6) {
+ delta = dis_SSE_shiftE_imm( pfx, delta+2, "psllq", Iop_ShlN64x2 );
+ goto decode_success;
+ }
+
+ /* 66 0F F3 = PSLLQ by E */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xF3) {
+ delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psllq", Iop_ShlN64x2 );
+ goto decode_success;
+ }
+
+ /* 66 0F 71 /6 ib = PSLLW by immediate */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x71
+ && epartIsReg(insn[2])
+ && gregLO3ofRM(insn[2]) == 6) {
+ delta = dis_SSE_shiftE_imm( pfx, delta+2, "psllw", Iop_ShlN16x8 );
+ goto decode_success;
+ }
+
+ /* 66 0F F1 = PSLLW by E */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xF1) {
+ delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psllw", Iop_ShlN16x8 );
+ goto decode_success;
+ }
+
+ /* 66 0F 72 /4 ib = PSRAD by immediate */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x72
+ && epartIsReg(insn[2])
+ && gregLO3ofRM(insn[2]) == 4) {
+ delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrad", Iop_SarN32x4 );
+ goto decode_success;
+ }
+
+ /* 66 0F E2 = PSRAD by E */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xE2) {
+ delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrad", Iop_SarN32x4 );
+ goto decode_success;
+ }
+
+ /* 66 0F 71 /4 ib = PSRAW by immediate */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x71
+ && epartIsReg(insn[2])
+ && gregLO3ofRM(insn[2]) == 4) {
+ delta = dis_SSE_shiftE_imm( pfx, delta+2, "psraw", Iop_SarN16x8 );
+ goto decode_success;
+ }
+
+ /* 66 0F E1 = PSRAW by E */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xE1) {
+ delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psraw", Iop_SarN16x8 );
+ goto decode_success;
+ }
+
+ /* 66 0F 72 /2 ib = PSRLD by immediate */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x72
+ && epartIsReg(insn[2])
+ && gregLO3ofRM(insn[2]) == 2) {
+ delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrld", Iop_ShrN32x4 );
+ goto decode_success;
+ }
+
+ /* 66 0F D2 = PSRLD by E */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xD2) {
+ delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrld", Iop_ShrN32x4 );
+ goto decode_success;
+ }
+
+ /* 66 0F 73 /3 ib = PSRLDQ by immediate */
+ /* note, if mem case ever filled in, 1 byte after amode */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x73
+ && epartIsReg(insn[2])
+ && gregLO3ofRM(insn[2]) == 3) {
+ IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
+ Int imm = (Int)insn[3];
+ Int reg = eregOfRexRM(pfx,insn[2]);
+ DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg));
+ vassert(imm >= 0 && imm <= 255);
+ delta += 4;
+
+ sV = newTemp(Ity_V128);
+ dV = newTemp(Ity_V128);
+ hi64 = newTemp(Ity_I64);
+ lo64 = newTemp(Ity_I64);
+ hi64r = newTemp(Ity_I64);
+ lo64r = newTemp(Ity_I64);
+
+ if (imm >= 16) {
+ putXMMReg(reg, mkV128(0x0000));
+ goto decode_success;
+ }
+
+ assign( sV, getXMMReg(reg) );
+ assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
+
+ if (imm == 0) {
+ assign( lo64r, mkexpr(lo64) );
+ assign( hi64r, mkexpr(hi64) );
+ }
+ else
+ if (imm == 8) {
+ assign( hi64r, mkU64(0) );
+ assign( lo64r, mkexpr(hi64) );
+ }
+ else
+ if (imm > 8) {
+ assign( hi64r, mkU64(0) );
+ assign( lo64r, binop( Iop_Shr64,
+ mkexpr(hi64),
+ mkU8( 8*(imm-8) ) ));
+ } else {
+ assign( hi64r, binop( Iop_Shr64,
+ mkexpr(hi64),
+ mkU8(8 * imm) ));
+ assign( lo64r,
+ binop( Iop_Or64,
+ binop(Iop_Shr64, mkexpr(lo64),
+ mkU8(8 * imm)),
+ binop(Iop_Shl64, mkexpr(hi64),
+ mkU8(8 * (8 - imm)) )
+ )
+ );
+ }
+
+ assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
+ putXMMReg(reg, mkexpr(dV));
+ goto decode_success;
+ }
+
+ /* 66 0F 73 /2 ib = PSRLQ by immediate */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x73
+ && epartIsReg(insn[2])
+ && gregLO3ofRM(insn[2]) == 2) {
+ delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrlq", Iop_ShrN64x2 );
+ goto decode_success;
+ }
+
+ /* 66 0F D3 = PSRLQ by E */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xD3) {
+ delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrlq", Iop_ShrN64x2 );
+ goto decode_success;
+ }
+
+ /* 66 0F 71 /2 ib = PSRLW by immediate */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x71
+ && epartIsReg(insn[2])
+ && gregLO3ofRM(insn[2]) == 2) {
+ delta = dis_SSE_shiftE_imm( pfx, delta+2, "psrlw", Iop_ShrN16x8 );
+ goto decode_success;
+ }
+
+ /* 66 0F D1 = PSRLW by E */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xD1) {
+ delta = dis_SSE_shiftG_byE( vbi, pfx, delta+2, "psrlw", Iop_ShrN16x8 );
+ goto decode_success;
+ }
+
+ /* 66 0F F8 = PSUBB */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xF8) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "psubb", Iop_Sub8x16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F FA = PSUBD */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xFA) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "psubd", Iop_Sub32x4, False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
+ /* 0F FB = PSUBQ -- sub 64x1 */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xFB) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ vbi, pfx, delta+2, insn[1], "psubq", False );
+ goto decode_success;
+ }
+
+ /* 66 0F FB = PSUBQ */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xFB) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "psubq", Iop_Sub64x2, False );
+ goto decode_success;
+ }
+
+ /* 66 0F F9 = PSUBW */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xF9) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "psubw", Iop_Sub16x8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F E8 = PSUBSB */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xE8) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "psubsb", Iop_QSub8Sx16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F E9 = PSUBSW */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xE9) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "psubsw", Iop_QSub16Sx8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F D8 = PSUBSB */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xD8) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "psubusb", Iop_QSub8Ux16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F D9 = PSUBSW */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xD9) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "psubusw", Iop_QSub16Ux8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 68 = PUNPCKHBW */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x68) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "punpckhbw",
+ Iop_InterleaveHI8x16, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 6A = PUNPCKHDQ */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x6A) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "punpckhdq",
+ Iop_InterleaveHI32x4, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 6D = PUNPCKHQDQ */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x6D) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "punpckhqdq",
+ Iop_InterleaveHI64x2, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 69 = PUNPCKHWD */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x69) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "punpckhwd",
+ Iop_InterleaveHI16x8, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 60 = PUNPCKLBW */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x60) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "punpcklbw",
+ Iop_InterleaveLO8x16, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 62 = PUNPCKLDQ */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x62) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "punpckldq",
+ Iop_InterleaveLO32x4, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 6C = PUNPCKLQDQ */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x6C) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "punpcklqdq",
+ Iop_InterleaveLO64x2, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 61 = PUNPCKLWD */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x61) {
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+2,
+ "punpcklwd",
+ Iop_InterleaveLO16x8, True );
+ goto decode_success;
+ }
+
+ /* 66 0F EF = PXOR */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xEF) {
+ delta = dis_SSE_E_to_G_all( vbi, pfx, delta+2, "pxor", Iop_XorV128 );
+ goto decode_success;
+ }
+
+//.. //-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */
+//.. //-- if (insn[0] == 0x0F && insn[1] == 0xAE
+//.. //-- && (!epartIsReg(insn[2]))
+//.. //-- && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) {
+//.. //-- Bool store = gregOfRM(insn[2]) == 0;
+//.. //-- vg_assert(sz == 4);
+//.. //-- pair = disAMode ( cb, sorb, eip+2, dis_buf );
+//.. //-- t1 = LOW24(pair);
+//.. //-- eip += 2+HI8(pair);
+//.. //-- uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512,
+//.. //-- Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1],
+//.. //-- Lit16, (UShort)insn[2],
+//.. //-- TempReg, t1 );
+//.. //-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf );
+//.. //-- goto decode_success;
+//.. //-- }
+
+ /* 0F AE /7 = CLFLUSH -- flush cache line */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xAE
+ && !epartIsReg(insn[2]) && gregLO3ofRM(insn[2]) == 7) {
+
+ /* This is something of a hack. We need to know the size of the
+ cache line containing addr. Since we don't (easily), assume
+ 256 on the basis that no real cache would have a line that
+ big. It's safe to invalidate more stuff than we need, just
+ inefficient. */
+ ULong lineszB = 256ULL;
+
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ delta += 2+alen;
+
+ /* Round addr down to the start of the containing block. */
+ stmt( IRStmt_Put(
+ OFFB_TISTART,
+ binop( Iop_And64,
+ mkexpr(addr),
+ mkU64( ~(lineszB-1) ))) );
+
+ stmt( IRStmt_Put(OFFB_TILEN, mkU64(lineszB) ) );
+
+ irsb->jumpkind = Ijk_TInval;
+ irsb->next = mkU64(guest_RIP_bbstart+delta);
+ dres.whatNext = Dis_StopHere;
+
+ DIP("clflush %s\n", dis_buf);
+ goto decode_success;
+ }
+
+ /* ---------------------------------------------------- */
+ /* --- end of the SSE/SSE2 decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* ---------------------------------------------------- */
+ /* --- start of the SSE3 decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
+ duplicating some lanes (2:2:0:0). */
+ /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
+ duplicating some lanes (3:3:1:1). */
+ if (haveF3no66noF2(pfx) && sz == 4
+ && insn[0] == 0x0F && (insn[1] == 0x12 || insn[1] == 0x16)) {
+ IRTemp s3, s2, s1, s0;
+ IRTemp sV = newTemp(Ity_V128);
+ Bool isH = insn[1] == 0x16;
+ s3 = s2 = s1 = s0 = IRTemp_INVALID;
+
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) );
+ DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+1;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+alen;
+ }
+
+ breakup128to32s( sV, &s3, &s2, &s1, &s0 );
+ putXMMReg( gregOfRexRM(pfx,modrm),
+ isH ? mk128from32s( s3, s3, s1, s1 )
+ : mk128from32s( s2, s2, s0, s0 ) );
+ goto decode_success;
+ }
+
+ /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
+ duplicating some lanes (0:1:0:1). */
+ if (haveF2no66noF3(pfx)
+ && (sz == 4 || /* ignore redundant REX.W */ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x12) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp d0 = newTemp(Ity_I64);
+
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) );
+ DIP("movddup %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+1;
+ assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("movddup %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+alen;
+ }
+
+ putXMMReg( gregOfRexRM(pfx,modrm),
+ binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) );
+ goto decode_success;
+ }
+
+ /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
+ if (haveF2no66noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xD0) {
+ IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
+ IRTemp eV = newTemp(Ity_V128);
+ IRTemp gV = newTemp(Ity_V128);
+ IRTemp addV = newTemp(Ity_V128);
+ IRTemp subV = newTemp(Ity_V128);
+ a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
+
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
+ DIP("addsubps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+1;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("addsubps %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+alen;
+ }
+
+ assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ assign( addV, binop(Iop_Add32Fx4, mkexpr(gV), mkexpr(eV)) );
+ assign( subV, binop(Iop_Sub32Fx4, mkexpr(gV), mkexpr(eV)) );
+
+ breakup128to32s( addV, &a3, &a2, &a1, &a0 );
+ breakup128to32s( subV, &s3, &s2, &s1, &s0 );
+
+ putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( a3, s2, a1, s0 ));
+ goto decode_success;
+ }
+
+ /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xD0) {
+ IRTemp eV = newTemp(Ity_V128);
+ IRTemp gV = newTemp(Ity_V128);
+ IRTemp addV = newTemp(Ity_V128);
+ IRTemp subV = newTemp(Ity_V128);
+ IRTemp a1 = newTemp(Ity_I64);
+ IRTemp s0 = newTemp(Ity_I64);
+
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
+ DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+1;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("addsubpd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+alen;
+ }
+
+ assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ assign( addV, binop(Iop_Add64Fx2, mkexpr(gV), mkexpr(eV)) );
+ assign( subV, binop(Iop_Sub64Fx2, mkexpr(gV), mkexpr(eV)) );
+
+ assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
+ assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
+
+ putXMMReg( gregOfRexRM(pfx,modrm),
+ binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
+ goto decode_success;
+ }
+
+ /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
+ /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
+ if (haveF2no66noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) {
+ IRTemp e3, e2, e1, e0, g3, g2, g1, g0;
+ IRTemp eV = newTemp(Ity_V128);
+ IRTemp gV = newTemp(Ity_V128);
+ IRTemp leftV = newTemp(Ity_V128);
+ IRTemp rightV = newTemp(Ity_V128);
+ Bool isAdd = insn[1] == 0x7C;
+ HChar* str = isAdd ? "add" : "sub";
+ e3 = e2 = e1 = e0 = g3 = g2 = g1 = g0 = IRTemp_INVALID;
+
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
+ DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+1;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("h%sps %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+alen;
+ }
+
+ assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ breakup128to32s( eV, &e3, &e2, &e1, &e0 );
+ breakup128to32s( gV, &g3, &g2, &g1, &g0 );
+
+ assign( leftV, mk128from32s( e2, e0, g2, g0 ) );
+ assign( rightV, mk128from32s( e3, e1, g3, g1 ) );
+
+ putXMMReg( gregOfRexRM(pfx,modrm),
+ binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
+ mkexpr(leftV), mkexpr(rightV) ) );
+ goto decode_success;
+ }
+
+ /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
+ /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) {
+ IRTemp e1 = newTemp(Ity_I64);
+ IRTemp e0 = newTemp(Ity_I64);
+ IRTemp g1 = newTemp(Ity_I64);
+ IRTemp g0 = newTemp(Ity_I64);
+ IRTemp eV = newTemp(Ity_V128);
+ IRTemp gV = newTemp(Ity_V128);
+ IRTemp leftV = newTemp(Ity_V128);
+ IRTemp rightV = newTemp(Ity_V128);
+ Bool isAdd = insn[1] == 0x7C;
+ HChar* str = isAdd ? "add" : "sub";
+
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
+ DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+1;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("h%spd %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+alen;
+ }
+
+ assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ assign( e1, unop(Iop_V128HIto64, mkexpr(eV) ));
+ assign( e0, unop(Iop_V128to64, mkexpr(eV) ));
+ assign( g1, unop(Iop_V128HIto64, mkexpr(gV) ));
+ assign( g0, unop(Iop_V128to64, mkexpr(gV) ));
+
+ assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) );
+ assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) );
+
+ putXMMReg( gregOfRexRM(pfx,modrm),
+ binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
+ mkexpr(leftV), mkexpr(rightV) ) );
+ goto decode_success;
+ }
+
+ /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
+ if (haveF2no66noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0xF0) {
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ goto decode_failure;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 );
+ putXMMReg( gregOfRexRM(pfx,modrm),
+ loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("lddqu %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 2+alen;
+ }
+ goto decode_success;
+ }
+
+ /* ---------------------------------------------------- */
+ /* --- end of the SSE3 decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* ---------------------------------------------------- */
+ /* --- start of the SSSE3 decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
+ Unsigned Bytes (MMX) */
+ if (haveNo66noF2noF3(pfx)
+ && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+ IRTemp sVoddsSX = newTemp(Ity_I64);
+ IRTemp sVevensSX = newTemp(Ity_I64);
+ IRTemp dVoddsZX = newTemp(Ity_I64);
+ IRTemp dVevensZX = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
+ delta += 3+1;
+ DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pmaddubsw %s,%s\n", dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+
+ /* compute dV unsigned x sV signed */
+ assign( sVoddsSX,
+ binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) );
+ assign( sVevensSX,
+ binop(Iop_SarN16x4,
+ binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)),
+ mkU8(8)) );
+ assign( dVoddsZX,
+ binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) );
+ assign( dVevensZX,
+ binop(Iop_ShrN16x4,
+ binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)),
+ mkU8(8)) );
+
+ putMMXReg(
+ gregLO3ofRM(modrm),
+ binop(Iop_QAdd16Sx4,
+ binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
+ binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX))
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
+ Unsigned Bytes (XMM) */
+ if (have66noF2noF3(pfx)
+ && (sz == 2 || /*redundant REX.W*/ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sVoddsSX = newTemp(Ity_V128);
+ IRTemp sVevensSX = newTemp(Ity_V128);
+ IRTemp dVoddsZX = newTemp(Ity_V128);
+ IRTemp dVevensZX = newTemp(Ity_V128);
+
+ modrm = insn[3];
+ assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta += 3+1;
+ DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pmaddubsw %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ /* compute dV unsigned x sV signed */
+ assign( sVoddsSX,
+ binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) );
+ assign( sVevensSX,
+ binop(Iop_SarN16x8,
+ binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)),
+ mkU8(8)) );
+ assign( dVoddsZX,
+ binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) );
+ assign( dVevensZX,
+ binop(Iop_ShrN16x8,
+ binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)),
+ mkU8(8)) );
+
+ putXMMReg(
+ gregOfRexRM(pfx,modrm),
+ binop(Iop_QAdd16Sx8,
+ binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
+ binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX))
+ )
+ );
+ goto decode_success;
+ }
+
+ /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
+ /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
+ mmx) and G to G (mmx). */
+ /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
+ mmx) and G to G (mmx). */
+ /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
+ to G (mmx). */
+ /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
+ to G (mmx). */
+ /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
+ to G (mmx). */
+ /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
+ to G (mmx). */
+
+ if (haveNo66noF2noF3(pfx)
+ && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
+ || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
+ HChar* str = "???";
+ IROp opV64 = Iop_INVALID;
+ IROp opCatO = Iop_CatOddLanes16x4;
+ IROp opCatE = Iop_CatEvenLanes16x4;
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+
+ modrm = insn[3];
+
+ switch (insn[2]) {
+ case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
+ case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
+ case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
+ case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
+ case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
+ case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
+ default: vassert(0);
+ }
+ if (insn[2] == 0x02 || insn[2] == 0x06) {
+ opCatO = Iop_InterleaveHI32x2;
+ opCatE = Iop_InterleaveLO32x2;
+ }
+
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
+ delta += 3+1;
+ DIP("ph%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("ph%s %s,%s\n", str, dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+
+ putMMXReg(
+ gregLO3ofRM(modrm),
+ binop(opV64,
+ binop(opCatE,mkexpr(sV),mkexpr(dV)),
+ binop(opCatO,mkexpr(sV),mkexpr(dV))
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
+ xmm) and G to G (xmm). */
+ /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
+ xmm) and G to G (xmm). */
+ /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
+ G to G (xmm). */
+ /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
+ G to G (xmm). */
+ /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
+ G to G (xmm). */
+ /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
+ G to G (xmm). */
+
+ if (have66noF2noF3(pfx)
+ && (sz == 2 || /*redundant REX.W*/ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
+ || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
+ HChar* str = "???";
+ IROp opV64 = Iop_INVALID;
+ IROp opCatO = Iop_CatOddLanes16x4;
+ IROp opCatE = Iop_CatEvenLanes16x4;
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+
+ modrm = insn[3];
+
+ switch (insn[2]) {
+ case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
+ case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
+ case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
+ case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
+ case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
+ case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
+ default: vassert(0);
+ }
+ if (insn[2] == 0x02 || insn[2] == 0x06) {
+ opCatO = Iop_InterleaveHI32x2;
+ opCatE = Iop_InterleaveLO32x2;
+ }
+
+ assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) );
+ DIP("ph%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 3+1;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("ph%s %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 3+alen;
+ }
+
+ assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ /* This isn't a particularly efficient way to compute the
+ result, but at least it avoids a proliferation of IROps,
+ hence avoids complication all the backends. */
+ putXMMReg(
+ gregOfRexRM(pfx,modrm),
+ binop(Iop_64HLtoV128,
+ binop(opV64,
+ binop(opCatE,mkexpr(sHi),mkexpr(sLo)),
+ binop(opCatO,mkexpr(sHi),mkexpr(sLo))
+ ),
+ binop(opV64,
+ binop(opCatE,mkexpr(dHi),mkexpr(dLo)),
+ binop(opCatO,mkexpr(dHi),mkexpr(dLo))
+ )
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
+ (MMX) */
+ if (haveNo66noF2noF3(pfx)
+ && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
+ delta += 3+1;
+ DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pmulhrsw %s,%s\n", dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+
+ putMMXReg(
+ gregLO3ofRM(modrm),
+ dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
+ Scale (XMM) */
+ if (have66noF2noF3(pfx)
+ && (sz == 2 || /*redundant REX.W*/ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta += 3+1;
+ DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pmulhrsw %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ putXMMReg(
+ gregOfRexRM(pfx,modrm),
+ binop(Iop_64HLtoV128,
+ dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
+ dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
+ /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
+ /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */
+ if (haveNo66noF2noF3(pfx)
+ && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+ HChar* str = "???";
+ Int laneszB = 0;
+
+ switch (insn[2]) {
+ case 0x08: laneszB = 1; str = "b"; break;
+ case 0x09: laneszB = 2; str = "w"; break;
+ case 0x0A: laneszB = 4; str = "d"; break;
+ default: vassert(0);
+ }
+
+ modrm = insn[3];
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
+ delta += 3+1;
+ DIP("psign%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("psign%s %s,%s\n", str, dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+
+ putMMXReg(
+ gregLO3ofRM(modrm),
+ dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
+ /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
+ /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */
+ if (have66noF2noF3(pfx)
+ && (sz == 2 || /*redundant REX.W*/ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+ HChar* str = "???";
+ Int laneszB = 0;
+
+ switch (insn[2]) {
+ case 0x08: laneszB = 1; str = "b"; break;
+ case 0x09: laneszB = 2; str = "w"; break;
+ case 0x0A: laneszB = 4; str = "d"; break;
+ default: vassert(0);
+ }
+
+ modrm = insn[3];
+ assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta += 3+1;
+ DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("psign%s %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ putXMMReg(
+ gregOfRexRM(pfx,modrm),
+ binop(Iop_64HLtoV128,
+ dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
+ dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
+ /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
+ /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
+ if (haveNo66noF2noF3(pfx)
+ && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
+ IRTemp sV = newTemp(Ity_I64);
+ HChar* str = "???";
+ Int laneszB = 0;
+
+ switch (insn[2]) {
+ case 0x1C: laneszB = 1; str = "b"; break;
+ case 0x1D: laneszB = 2; str = "w"; break;
+ case 0x1E: laneszB = 4; str = "d"; break;
+ default: vassert(0);
+ }
+
+ modrm = insn[3];
+ do_MMX_preamble();
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
+ delta += 3+1;
+ DIP("pabs%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pabs%s %s,%s\n", str, dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+
+ putMMXReg(
+ gregLO3ofRM(modrm),
+ dis_PABS_helper( mkexpr(sV), laneszB )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
+ /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
+ /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
+ if (have66noF2noF3(pfx)
+ && (sz == 2 || /*redundant REX.W*/ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ HChar* str = "???";
+ Int laneszB = 0;
+
+ switch (insn[2]) {
+ case 0x1C: laneszB = 1; str = "b"; break;
+ case 0x1D: laneszB = 2; str = "w"; break;
+ case 0x1E: laneszB = 4; str = "d"; break;
+ default: vassert(0);
+ }
+
+ modrm = insn[3];
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta += 3+1;
+ DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pabs%s %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ putXMMReg(
+ gregOfRexRM(pfx,modrm),
+ binop(Iop_64HLtoV128,
+ dis_PABS_helper( mkexpr(sHi), laneszB ),
+ dis_PABS_helper( mkexpr(sLo), laneszB )
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+ IRTemp res = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
+ d64 = (Long)insn[3+1];
+ delta += 3+1+1;
+ DIP("palignr $%d,%s,%s\n", (Int)d64,
+ nameMMXReg(eregLO3ofRM(modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 1 );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ d64 = (Long)insn[3+alen];
+ delta += 3+alen+1;
+ DIP("palignr $%d%s,%s\n", (Int)d64,
+ dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+
+ if (d64 == 0) {
+ assign( res, mkexpr(sV) );
+ }
+ else if (d64 >= 1 && d64 <= 7) {
+ assign(res,
+ binop(Iop_Or64,
+ binop(Iop_Shr64, mkexpr(sV), mkU8(8*d64)),
+ binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d64))
+ )));
+ }
+ else if (d64 == 8) {
+ assign( res, mkexpr(dV) );
+ }
+ else if (d64 >= 9 && d64 <= 15) {
+ assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d64-8))) );
+ }
+ else if (d64 >= 16 && d64 <= 255) {
+ assign( res, mkU64(0) );
+ }
+ else
+ vassert(0);
+
+ putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
+ goto decode_success;
+ }
+
+ /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
+ if (have66noF2noF3(pfx)
+ && (sz == 2 || /*redundant REX.W*/ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+ IRTemp rHi = newTemp(Ity_I64);
+ IRTemp rLo = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ d64 = (Long)insn[3+1];
+ delta += 3+1+1;
+ DIP("palignr $%d,%s,%s\n", (Int)d64,
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 1 );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ d64 = (Long)insn[3+alen];
+ delta += 3+alen+1;
+ DIP("palignr $%d,%s,%s\n", (Int)d64,
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ if (d64 == 0) {
+ assign( rHi, mkexpr(sHi) );
+ assign( rLo, mkexpr(sLo) );
+ }
+ else if (d64 >= 1 && d64 <= 7) {
+ assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d64) );
+ assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d64) );
+ }
+ else if (d64 == 8) {
+ assign( rHi, mkexpr(dLo) );
+ assign( rLo, mkexpr(sHi) );
+ }
+ else if (d64 >= 9 && d64 <= 15) {
+ assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d64-8) );
+ assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d64-8) );
+ }
+ else if (d64 == 16) {
+ assign( rHi, mkexpr(dHi) );
+ assign( rLo, mkexpr(dLo) );
+ }
+ else if (d64 >= 17 && d64 <= 23) {
+ assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d64-16))) );
+ assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d64-16) );
+ }
+ else if (d64 == 24) {
+ assign( rHi, mkU64(0) );
+ assign( rLo, mkexpr(dHi) );
+ }
+ else if (d64 >= 25 && d64 <= 31) {
+ assign( rHi, mkU64(0) );
+ assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d64-24))) );
+ }
+ else if (d64 >= 32 && d64 <= 255) {
+ assign( rHi, mkU64(0) );
+ assign( rLo, mkU64(0) );
+ }
+ else
+ vassert(0);
+
+ putXMMReg(
+ gregOfRexRM(pfx,modrm),
+ binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
+ );
+ goto decode_success;
+ }
+
+ /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
+ if (haveNo66noF2noF3(pfx)
+ && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
+ delta += 3+1;
+ DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pshufb %s,%s\n", dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+
+ putMMXReg(
+ gregLO3ofRM(modrm),
+ binop(
+ Iop_And64,
+ /* permute the lanes */
+ binop(
+ Iop_Perm8x8,
+ mkexpr(dV),
+ binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL))
+ ),
+ /* mask off lanes which have (index & 0x80) == 0x80 */
+ unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7)))
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
+ if (have66noF2noF3(pfx)
+ && (sz == 2 || /*redundant REX.W*/ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+ IRTemp rHi = newTemp(Ity_I64);
+ IRTemp rLo = newTemp(Ity_I64);
+ IRTemp sevens = newTemp(Ity_I64);
+ IRTemp mask0x80hi = newTemp(Ity_I64);
+ IRTemp mask0x80lo = newTemp(Ity_I64);
+ IRTemp maskBit3hi = newTemp(Ity_I64);
+ IRTemp maskBit3lo = newTemp(Ity_I64);
+ IRTemp sAnd7hi = newTemp(Ity_I64);
+ IRTemp sAnd7lo = newTemp(Ity_I64);
+ IRTemp permdHi = newTemp(Ity_I64);
+ IRTemp permdLo = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta += 3+1;
+ DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pshufb %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ assign( sevens, mkU64(0x0707070707070707ULL) );
+
+ /*
+ mask0x80hi = Not(SarN8x8(sHi,7))
+ maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7)
+ sAnd7hi = And(sHi,sevens)
+ permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi),
+ And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) )
+ rHi = And(permdHi,mask0x80hi)
+ */
+ assign(
+ mask0x80hi,
+ unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7))));
+
+ assign(
+ maskBit3hi,
+ binop(Iop_SarN8x8,
+ binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)),
+ mkU8(7)));
+
+ assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens)));
+
+ assign(
+ permdHi,
+ binop(
+ Iop_Or64,
+ binop(Iop_And64,
+ binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)),
+ mkexpr(maskBit3hi)),
+ binop(Iop_And64,
+ binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)),
+ unop(Iop_Not64,mkexpr(maskBit3hi))) ));
+
+ assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) );
+
+ /* And the same for the lower half of the result. What fun. */
+
+ assign(
+ mask0x80lo,
+ unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7))));
+
+ assign(
+ maskBit3lo,
+ binop(Iop_SarN8x8,
+ binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)),
+ mkU8(7)));
+
+ assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens)));
+
+ assign(
+ permdLo,
+ binop(
+ Iop_Or64,
+ binop(Iop_And64,
+ binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)),
+ mkexpr(maskBit3lo)),
+ binop(Iop_And64,
+ binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)),
+ unop(Iop_Not64,mkexpr(maskBit3lo))) ));
+
+ assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) );
+
+ putXMMReg(
+ gregOfRexRM(pfx,modrm),
+ binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
+ );
+ goto decode_success;
+ }
+
+ /* ---------------------------------------------------- */
+ /* --- end of the SSSE3 decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* ---------------------------------------------------- */
+ /* --- start of the SSE4 decoder --- */
+ /* ---------------------------------------------------- */
+
+ /* 66 0F 3A 0D /r ib = BLENDPD xmm1, xmm2/m128, imm8
+ Blend Packed Double Precision Floating-Point Values (XMM) */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0D ) {
+
+ Int imm8;
+ UShort imm8_mask_16;
+
+ IRTemp dst_vec = newTemp(Ity_V128);
+ IRTemp src_vec = newTemp(Ity_V128);
+ IRTemp imm8_mask = newTemp(Ity_V128);
+
+ modrm = insn[3];
+ assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
+
+ if ( epartIsReg( modrm ) ) {
+ imm8 = (Int)insn[4];
+ assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
+ delta += 3+1+1;
+ DIP( "blendpd $%d, %s,%s\n", imm8,
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf,
+ 1/* imm8 is 1 byte after the amode */ );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
+ imm8 = (Int)insn[2+alen+1];
+ delta += 3+alen+1;
+ DIP( "blendpd $%d, %s,%s\n",
+ imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ switch( imm8 & 3 ) {
+ case 0: imm8_mask_16 = 0x0000; break;
+ case 1: imm8_mask_16 = 0x00FF; break;
+ case 2: imm8_mask_16 = 0xFF00; break;
+ case 3: imm8_mask_16 = 0xFFFF; break;
+ default: vassert(0); break;
+ }
+ assign( imm8_mask, mkV128( imm8_mask_16 ) );
+
+ putXMMReg( gregOfRexRM(pfx, modrm),
+ binop( Iop_OrV128,
+ binop( Iop_AndV128, mkexpr(src_vec), mkexpr(imm8_mask) ),
+ binop( Iop_AndV128, mkexpr(dst_vec),
+ unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
+
+ goto decode_success;
+ }
+
+
+ /* 66 0F 3A 0C /r ib = BLENDPS xmm1, xmm2/m128, imm8
+ Blend Packed Single Precision Floating-Point Values (XMM) */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0C ) {
+
+ Int imm8;
+ IRTemp dst_vec = newTemp(Ity_V128);
+ IRTemp src_vec = newTemp(Ity_V128);
+
+ modrm = insn[3];
+
+ assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
+
+ if ( epartIsReg( modrm ) ) {
+ imm8 = (Int)insn[3+1];
+ assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
+ delta += 3+1+1;
+ DIP( "blendps $%d, %s,%s\n", imm8,
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf,
+ 1/* imm8 is 1 byte after the amode */ );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
+ imm8 = (Int)insn[3+alen];
+ delta += 3+alen+1;
+ DIP( "blendpd $%d, %s,%s\n",
+ imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00, 0x0F0F,
+ 0x0FF0, 0x0FFF, 0xF000, 0xF00F, 0xF0F0, 0xF0FF,
+ 0xFF00, 0xFF0F, 0xFFF0, 0xFFFF };
+ IRTemp imm8_mask = newTemp(Ity_V128);
+ assign( imm8_mask, mkV128( imm8_perms[ (imm8 & 15) ] ) );
+
+ putXMMReg( gregOfRexRM(pfx, modrm),
+ binop( Iop_OrV128,
+ binop( Iop_AndV128, mkexpr(src_vec), mkexpr(imm8_mask) ),
+ binop( Iop_AndV128, mkexpr(dst_vec),
+ unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
+
+ goto decode_success;
+ }
+
+
+ /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
+ * Carry-less multiplication of selected XMM quadwords into XMM
+ * registers (a.k.a multiplication of polynomials over GF(2))
+ */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x44 ) {
+
+ Int imm8;
+ IRTemp svec = newTemp(Ity_V128);
+ IRTemp dvec = newTemp(Ity_V128);
+
+ modrm = insn[3];
+
+ assign( dvec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
+
+ if ( epartIsReg( modrm ) ) {
+ imm8 = (Int)insn[4];
+ assign( svec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
+ delta += 3+1+1;
+ DIP( "pclmulqdq $%d, %s,%s\n", imm8,
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf,
+ 1/* imm8 is 1 byte after the amode */ );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( svec, loadLE( Ity_V128, mkexpr(addr) ) );
+ imm8 = (Int)insn[2+alen+1];
+ delta += 3+alen+1;
+ DIP( "pclmulqdq $%d, %s,%s\n",
+ imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ t0 = newTemp(Ity_I64);
+ t1 = newTemp(Ity_I64);
+ assign(t0, unop((imm8&1)? Iop_V128HIto64 : Iop_V128to64, mkexpr(dvec)));
+ assign(t1, unop((imm8&16) ? Iop_V128HIto64 : Iop_V128to64, mkexpr(svec)));
+
+ t2 = newTemp(Ity_I64);
+ t3 = newTemp(Ity_I64);
+
+ IRExpr** args;
+
+ args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(0));
+ assign(t2,
+ mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul",
+ &amd64g_calculate_pclmul, args));
+ args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(1));
+ assign(t3,
+ mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul",
+ &amd64g_calculate_pclmul, args));
+
+ IRTemp res = newTemp(Ity_V128);
+ assign(res, binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)));
+ putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
+
+ goto decode_success;
+ }
+
+ /* 66 0F 3A 41 /r ib = DPPD xmm1, xmm2/m128, imm8
+ Dot Product of Packed Double Precision Floating-Point Values (XMM) */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x41 ) {
+
+ Int imm8;
+ IRTemp src_vec = newTemp(Ity_V128);
+ IRTemp dst_vec = newTemp(Ity_V128);
+ IRTemp and_vec = newTemp(Ity_V128);
+ IRTemp sum_vec = newTemp(Ity_V128);
+
+ modrm = insn[3];
+
+ assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
+
+ if ( epartIsReg( modrm ) ) {
+ imm8 = (Int)insn[4];
+ assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
+ delta += 3+1+1;
+ DIP( "dppd $%d, %s,%s\n", imm8,
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf,
+ 1/* imm8 is 1 byte after the amode */ );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
+ imm8 = (Int)insn[2+alen+1];
+ delta += 3+alen+1;
+ DIP( "dppd $%d, %s,%s\n",
+ imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ UShort imm8_perms[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF };
+
+ assign( and_vec, binop( Iop_AndV128,
+ binop( Iop_Mul64Fx2,
+ mkexpr(dst_vec), mkexpr(src_vec) ),
+ mkV128( imm8_perms[ ((imm8 >> 4) & 3) ] ) ) );
+
+ assign( sum_vec, binop( Iop_Add64F0x2,
+ binop( Iop_InterleaveHI64x2,
+ mkexpr(and_vec), mkexpr(and_vec) ),
+ binop( Iop_InterleaveLO64x2,
+ mkexpr(and_vec), mkexpr(and_vec) ) ) );
+
+ putXMMReg( gregOfRexRM( pfx, modrm ),
+ binop( Iop_AndV128,
+ binop( Iop_InterleaveLO64x2,
+ mkexpr(sum_vec), mkexpr(sum_vec) ),
+ mkV128( imm8_perms[ (imm8 & 3) ] ) ) );
+
+ goto decode_success;
+ }
+
+
+ /* 66 0F 3A 40 /r ib = DPPS xmm1, xmm2/m128, imm8
+ Dot Product of Packed Single Precision Floating-Point Values (XMM) */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F
+ && insn[1] == 0x3A
+ && insn[2] == 0x40 ) {
+
+ Int imm8;
+ IRTemp xmm1_vec = newTemp(Ity_V128);
+ IRTemp xmm2_vec = newTemp(Ity_V128);
+ IRTemp tmp_prod_vec = newTemp(Ity_V128);
+ IRTemp prod_vec = newTemp(Ity_V128);
+ IRTemp sum_vec = newTemp(Ity_V128);
+ IRTemp v3, v2, v1, v0;
+ v3 = v2 = v1 = v0 = IRTemp_INVALID;
+
+ modrm = insn[3];
+
+ assign( xmm1_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
+
+ if ( epartIsReg( modrm ) ) {
+ imm8 = (Int)insn[4];
+ assign( xmm2_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
+ delta += 3+1+1;
+ DIP( "dpps $%d, %s,%s\n", imm8,
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf,
+ 1/* imm8 is 1 byte after the amode */ );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( xmm2_vec, loadLE( Ity_V128, mkexpr(addr) ) );
+ imm8 = (Int)insn[2+alen+1];
+ delta += 3+alen+1;
+ DIP( "dpps $%d, %s,%s\n",
+ imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
+ 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
+ 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0, 0xFFFF };
+
+ assign( tmp_prod_vec,
+ binop( Iop_AndV128,
+ binop( Iop_Mul32Fx4, mkexpr(xmm1_vec), mkexpr(xmm2_vec) ),
+ mkV128( imm8_perms[((imm8 >> 4)& 15)] ) ) );
+ breakup128to32s( tmp_prod_vec, &v3, &v2, &v1, &v0 );
+ assign( prod_vec, mk128from32s( v3, v1, v2, v0 ) );
+
+ assign( sum_vec, binop( Iop_Add32Fx4,
+ binop( Iop_InterleaveHI32x4,
+ mkexpr(prod_vec), mkexpr(prod_vec) ),
+ binop( Iop_InterleaveLO32x4,
+ mkexpr(prod_vec), mkexpr(prod_vec) ) ) );
+
+ putXMMReg( gregOfRexRM(pfx, modrm),
+ binop( Iop_AndV128,
+ binop( Iop_Add32Fx4,
+ binop( Iop_InterleaveHI32x4,
+ mkexpr(sum_vec), mkexpr(sum_vec) ),
+ binop( Iop_InterleaveLO32x4,
+ mkexpr(sum_vec), mkexpr(sum_vec) ) ),
+ mkV128( imm8_perms[ (imm8 & 15) ] ) ) );
+
+ goto decode_success;
+ }
+
+
+ /* 66 0F 3A 21 /r ib = INSERTPS xmm1, xmm2/m32, imm8
+ Insert Packed Single Precision Floating-Point Value (XMM) */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x21 ) {
+
+ Int imm8;
+ Int imm8_count_s;
+ Int imm8_count_d;
+ Int imm8_zmask;
+ IRTemp dstVec = newTemp(Ity_V128);
+ IRTemp srcDWord = newTemp(Ity_I32);
+
+ modrm = insn[3];
+
+ assign( dstVec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
+
+ if ( epartIsReg( modrm ) ) {
+ IRTemp src_vec = newTemp(Ity_V128);
+ assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
+
+ IRTemp src_lane_0 = IRTemp_INVALID;
+ IRTemp src_lane_1 = IRTemp_INVALID;
+ IRTemp src_lane_2 = IRTemp_INVALID;
+ IRTemp src_lane_3 = IRTemp_INVALID;
+ breakup128to32s( src_vec,
+ &src_lane_3, &src_lane_2, &src_lane_1, &src_lane_0 );
+
+ imm8 = (Int)insn[4];
+ imm8_count_s = ((imm8 >> 6) & 3);
+ switch( imm8_count_s ) {
+ case 0: assign( srcDWord, mkexpr(src_lane_0) ); break;
+ case 1: assign( srcDWord, mkexpr(src_lane_1) ); break;
+ case 2: assign( srcDWord, mkexpr(src_lane_2) ); break;
+ case 3: assign( srcDWord, mkexpr(src_lane_3) ); break;
+ default: vassert(0); break;
+ }
+
+ delta += 3+1+1;
+ DIP( "insertps $%d, %s,%s\n", imm8,
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf,
+ 1/* const imm8 is 1 byte after the amode */ );
+ assign( srcDWord, loadLE( Ity_I32, mkexpr(addr) ) );
+ imm8 = (Int)insn[2+alen+1];
+ imm8_count_s = 0;
+ delta += 3+alen+1;
+ DIP( "insertps $%d, %s,%s\n",
+ imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ IRTemp dst_lane_0 = IRTemp_INVALID;
+ IRTemp dst_lane_1 = IRTemp_INVALID;
+ IRTemp dst_lane_2 = IRTemp_INVALID;
+ IRTemp dst_lane_3 = IRTemp_INVALID;
+ breakup128to32s( dstVec,
+ &dst_lane_3, &dst_lane_2, &dst_lane_1, &dst_lane_0 );
+
+ imm8_count_d = ((imm8 >> 4) & 3);
+ switch( imm8_count_d ) {
+ case 0: dst_lane_0 = srcDWord; break;
+ case 1: dst_lane_1 = srcDWord; break;
+ case 2: dst_lane_2 = srcDWord; break;
+ case 3: dst_lane_3 = srcDWord; break;
+ default: vassert(0); break;
+ }
+
+ imm8_zmask = (imm8 & 15);
+ IRTemp zero_32 = newTemp(Ity_I32);
+ assign( zero_32, mkU32(0) );
+
+ IRExpr* ire_vec_128 = mk128from32s(
+ ((imm8_zmask & 8) == 8) ? zero_32 : dst_lane_3,
+ ((imm8_zmask & 4) == 4) ? zero_32 : dst_lane_2,
+ ((imm8_zmask & 2) == 2) ? zero_32 : dst_lane_1,
+ ((imm8_zmask & 1) == 1) ? zero_32 : dst_lane_0 );
+
+ putXMMReg( gregOfRexRM(pfx, modrm), ire_vec_128 );
+
+ goto decode_success;
+ }
+
+
+ /* 66 0F 3A 14 /r ib = PEXTRB r/m16, xmm, imm8
+ Extract Byte from xmm, store in mem or zero-extend + store in gen.reg. (XMM) */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x14 ) {
+
+ Int imm8;
+ IRTemp xmm_vec = newTemp(Ity_V128);
+ IRTemp sel_lane = newTemp(Ity_I32);
+ IRTemp shr_lane = newTemp(Ity_I32);
+
+ modrm = insn[3];
+ assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
+ breakup128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
+
+ if ( epartIsReg( modrm ) ) {
+ imm8 = (Int)insn[3+1];
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
+ imm8 = (Int)insn[3+alen];
+ }
+ switch( (imm8 >> 2) & 3 ) {
+ case 0: assign( sel_lane, mkexpr(t0) ); break;
+ case 1: assign( sel_lane, mkexpr(t1) ); break;
+ case 2: assign( sel_lane, mkexpr(t2) ); break;
+ case 3: assign( sel_lane, mkexpr(t3) ); break;
+ default: vassert(0);
+ }
+ assign( shr_lane,
+ binop( Iop_Shr32, mkexpr(sel_lane), mkU8(((imm8 & 3)*8)) ) );
+
+ if ( epartIsReg( modrm ) ) {
+ putIReg64( eregOfRexRM(pfx,modrm),
+ unop( Iop_32Uto64,
+ binop(Iop_And32, mkexpr(shr_lane), mkU32(255)) ) );
+
+ delta += 3+1+1;
+ DIP( "pextrb $%d, %s,%s\n", imm8,
+ nameXMMReg( gregOfRexRM(pfx, modrm) ),
+ nameIReg64( eregOfRexRM(pfx, modrm) ) );
+ } else {
+ storeLE( mkexpr(addr), unop(Iop_32to8, mkexpr(shr_lane) ) );
+ delta += 3+alen+1;
+ DIP( "$%d, pextrb %s,%s\n",
+ imm8, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
+ }
+
+ goto decode_success;
+ }
+
+
+ /* 66 0F 3A 16 /r ib = PEXTRD reg/mem32, xmm2, imm8
+ Extract Doubleword int from xmm reg and store in gen.reg or mem. (XMM)
+ Note that this insn has the same opcodes as PEXTRQ, but
+ here the REX.W bit is _not_ present */
+ if ( have66noF2noF3( pfx )
+ && sz == 2 /* REX.W is _not_ present */
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x16 ) {
+
+ Int imm8_10;
+ IRTemp xmm_vec = newTemp(Ity_V128);
+ IRTemp src_dword = newTemp(Ity_I32);
+
+ modrm = insn[3];
+ assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
+ breakup128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
+
+ if ( epartIsReg( modrm ) ) {
+ imm8_10 = (Int)(insn[3+1] & 3);
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
+ imm8_10 = (Int)(insn[3+alen] & 3);
+ }
+
+ switch ( imm8_10 ) {
+ case 0: assign( src_dword, mkexpr(t0) ); break;
+ case 1: assign( src_dword, mkexpr(t1) ); break;
+ case 2: assign( src_dword, mkexpr(t2) ); break;
+ case 3: assign( src_dword, mkexpr(t3) ); break;
+ default: vassert(0);
+ }
+
+ if ( epartIsReg( modrm ) ) {
+ putIReg32( eregOfRexRM(pfx,modrm), mkexpr(src_dword) );
+ delta += 3+1+1;
+ DIP( "pextrd $%d, %s,%s\n", imm8_10,
+ nameXMMReg( gregOfRexRM(pfx, modrm) ),
+ nameIReg32( eregOfRexRM(pfx, modrm) ) );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(src_dword) );
+ delta += 3+alen+1;
+ DIP( "pextrd $%d, %s,%s\n",
+ imm8_10, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
+ }
+
+ goto decode_success;
+ }
+
+
+ /* 66 REX.W 0F 3A 16 /r ib = PEXTRQ reg/mem64, xmm2, imm8
+ Extract Quadword int from xmm reg and store in gen.reg or mem. (XMM)
+ Note that this insn has the same opcodes as PEXTRD, but
+ here the REX.W bit is present */
+ if ( have66noF2noF3( pfx )
+ && sz == 8 /* REX.W is present */
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x16 ) {
+
+ Int imm8_0;
+ IRTemp xmm_vec = newTemp(Ity_V128);
+ IRTemp src_qword = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
+
+ if ( epartIsReg( modrm ) ) {
+ imm8_0 = (Int)(insn[3+1] & 1);
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
+ imm8_0 = (Int)(insn[3+alen] & 1);
+ }
+ switch ( imm8_0 ) {
+ case 0: assign( src_qword, unop(Iop_V128to64, mkexpr(xmm_vec)) ); break;
+ case 1: assign( src_qword, unop(Iop_V128HIto64, mkexpr(xmm_vec)) ); break;
+ default: vassert(0);
+ }
+
+ if ( epartIsReg( modrm ) ) {
+ putIReg64( eregOfRexRM(pfx,modrm), mkexpr(src_qword) );
+ delta += 3+1+1;
+ DIP( "pextrq $%d, %s,%s\n", imm8_0,
+ nameXMMReg( gregOfRexRM(pfx, modrm) ),
+ nameIReg64( eregOfRexRM(pfx, modrm) ) );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(src_qword) );
+ delta += 3+alen+1;
+ DIP( "pextrq $%d, %s,%s\n",
+ imm8_0, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
+ }
+
+ goto decode_success;
+ }
+
+
+ /* 66 0F 3A 15 /r ib = PEXTRW r/m16, xmm, imm8
+ Extract Word from xmm, store in mem or zero-extend + store in gen.reg. (XMM) */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x15 ) {
+
+ Int imm8_20;
+ IRTemp xmm_vec = newTemp(Ity_V128);
+ IRTemp src_word = newTemp(Ity_I16);
+
+ modrm = insn[3];
+ assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
+ breakup128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
+
+ if ( epartIsReg( modrm ) ) {
+ imm8_20 = (Int)(insn[3+1] & 7);
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
+ imm8_20 = (Int)(insn[3+alen] & 7);
+ }
+
+ switch ( imm8_20 ) {
+ case 0: assign( src_word, unop(Iop_32to16, mkexpr(t0)) ); break;
+ case 1: assign( src_word, unop(Iop_32HIto16, mkexpr(t0)) ); break;
+ case 2: assign( src_word, unop(Iop_32to16, mkexpr(t1)) ); break;
+ case 3: assign( src_word, unop(Iop_32HIto16, mkexpr(t1)) ); break;
+ case 4: assign( src_word, unop(Iop_32to16, mkexpr(t2)) ); break;
+ case 5: assign( src_word, unop(Iop_32HIto16, mkexpr(t2)) ); break;
+ case 6: assign( src_word, unop(Iop_32to16, mkexpr(t3)) ); break;
+ case 7: assign( src_word, unop(Iop_32HIto16, mkexpr(t3)) ); break;
+ default: vassert(0);
+ }
+
+ if ( epartIsReg( modrm ) ) {
+ putIReg64( eregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(src_word)) );
+ delta += 3+1+1;
+ DIP( "pextrw $%d, %s,%s\n", imm8_20,
+ nameXMMReg( gregOfRexRM(pfx, modrm) ),
+ nameIReg64( eregOfRexRM(pfx, modrm) ) );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(src_word) );
+ delta += 3+alen+1;
+ DIP( "pextrw $%d, %s,%s\n",
+ imm8_20, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
+ }
+
+ goto decode_success;
+ }
+
+
+ /* 66 REX.W 0F 3A 22 /r ib = PINSRQ xmm1, r/m64, imm8
+ Extract Quadword int from gen.reg/mem64 and insert into xmm1 */
+ if ( have66noF2noF3( pfx )
+ && sz == 8 /* REX.W is present */
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x22 ) {
+
+ Int imm8_0;
+ IRTemp src_elems = newTemp(Ity_I64);
+ IRTemp src_vec = newTemp(Ity_V128);
+
+ modrm = insn[3];
+
+ if ( epartIsReg( modrm ) ) {
+ imm8_0 = (Int)(insn[3+1] & 1);
+ assign( src_elems, getIReg64( eregOfRexRM(pfx,modrm) ) );
+ delta += 3+1+1;
+ DIP( "pinsrq $%d, %s,%s\n", imm8_0,
+ nameIReg64( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
+ imm8_0 = (Int)(insn[3+alen] & 1);
+ assign( src_elems, loadLE( Ity_I64, mkexpr(addr) ) );
+ delta += 3+alen+1;
+ DIP( "pinsrq $%d, %s,%s\n",
+ imm8_0, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ UShort mask = 0;
+ if ( imm8_0 == 0 ) {
+ mask = 0xFF00;
+ assign( src_vec, binop( Iop_64HLtoV128, mkU64(0), mkexpr(src_elems) ) );
+ } else {
+ mask = 0x00FF;
+ assign( src_vec, binop( Iop_64HLtoV128, mkexpr(src_elems), mkU64(0) ) );
+ }
+
+ putXMMReg( gregOfRexRM(pfx, modrm),
+ binop( Iop_OrV128, mkexpr(src_vec),
+ binop( Iop_AndV128,
+ getXMMReg( gregOfRexRM(pfx, modrm) ),
+ mkV128(mask) ) ) );
+
+ goto decode_success;
+ }
+
+
+ /* 66 no-REX.W 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8
+ Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */
+ if ( have66noF2noF3( pfx )
+ && sz == 2 /* REX.W is NOT present */
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x22 ) {
+
+ Int imm8_10;
+ IRTemp src_elems = newTemp(Ity_I32);
+ IRTemp src_vec = newTemp(Ity_V128);
+ IRTemp z32 = newTemp(Ity_I32);
+
+ modrm = insn[3];
+
+ if ( epartIsReg( modrm ) ) {
+ imm8_10 = (Int)(insn[3+1] & 3);
+ assign( src_elems, getIReg32( eregOfRexRM(pfx,modrm) ) );
+ delta += 3+1+1;
+ DIP( "pinsrd $%d, %s,%s\n", imm8_10,
+ nameIReg32( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
+ imm8_10 = (Int)(insn[3+alen] & 3);
+ assign( src_elems, loadLE( Ity_I32, mkexpr(addr) ) );
+ delta += 3+alen+1;
+ DIP( "pinsrd $%d, %s,%s\n",
+ imm8_10, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ assign(z32, mkU32(0));
+
+ UShort mask = 0;
+ switch (imm8_10) {
+ case 3: mask = 0x0FFF;
+ assign(src_vec, mk128from32s(src_elems, z32, z32, z32));
+ break;
+ case 2: mask = 0xF0FF;
+ assign(src_vec, mk128from32s(z32, src_elems, z32, z32));
+ break;
+ case 1: mask = 0xFF0F;
+ assign(src_vec, mk128from32s(z32, z32, src_elems, z32));
+ break;
+ case 0: mask = 0xFFF0;
+ assign(src_vec, mk128from32s(z32, z32, z32, src_elems));
+ break;
+ default: vassert(0);
+ }
+
+ putXMMReg( gregOfRexRM(pfx, modrm),
+ binop( Iop_OrV128, mkexpr(src_vec),
+ binop( Iop_AndV128,
+ getXMMReg( gregOfRexRM(pfx, modrm) ),
+ mkV128(mask) ) ) );
+
+ goto decode_success;
+ }
+
+ /* 66 0F 3A 20 /r ib = PINSRB xmm1, r32/m8, imm8
+ Extract byte from r32/m8 and insert into xmm1 */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x20 ) {
+
+ Int imm8;
+ IRTemp new8 = newTemp(Ity_I64);
+
+ modrm = insn[3];
+
+ if ( epartIsReg( modrm ) ) {
+ imm8 = (Int)(insn[3+1] & 0xF);
+ assign( new8, binop(Iop_And64,
+ unop(Iop_32Uto64,
+ getIReg32(eregOfRexRM(pfx,modrm))),
+ mkU64(0xFF)));
+ delta += 3+1+1;
+ DIP( "pinsrb $%d,%s,%s\n", imm8,
+ nameIReg32( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
+ imm8 = (Int)(insn[3+alen] & 0xF);
+ assign( new8, unop(Iop_8Uto64, loadLE( Ity_I8, mkexpr(addr) )));
+ delta += 3+alen+1;
+ DIP( "pinsrb $%d,%s,%s\n",
+ imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ // Create a V128 value which has the selected byte in the
+ // specified lane, and zeroes everywhere else.
+ IRTemp tmp128 = newTemp(Ity_V128);
+ IRTemp halfshift = newTemp(Ity_I64);
+ assign(halfshift, binop(Iop_Shl64,
+ mkexpr(new8), mkU8(8 * (imm8 & 7))));
+ vassert(imm8 >= 0 && imm8 <= 15);
+ if (imm8 < 8) {
+ assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift)));
+ } else {
+ assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0)));
+ }
+
+ UShort mask = ~(1 << imm8);
+
+ putXMMReg( gregOfRexRM(pfx, modrm),
+ binop( Iop_OrV128,
+ mkexpr(tmp128),
+ binop( Iop_AndV128,
+ getXMMReg( gregOfRexRM(pfx, modrm) ),
+ mkV128(mask) ) ) );
+
+ goto decode_success;
+ }
+
+ /* 66 0F 38 37 = PCMPGTQ
+ 64x2 comparison (signed, presumably; the Intel docs don't say :-)
+ */
+ if ( have66noF2noF3( pfx ) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x37) {
+ /* FIXME: this needs an alignment check */
+ delta = dis_SSEint_E_to_G( vbi, pfx, delta+3,
+ "pcmpgtq", Iop_CmpGT64Sx2, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 3D /r = PMAXSD xmm1, xmm2/m128
+ Maximum of Packed Signed Double Word Integers (XMM)
+ 66 0F 38 39 /r = PMINSD xmm1, xmm2/m128
+ Minimum of Packed Signed Double Word Integers (XMM) */
+ if ( have66noF2noF3( pfx ) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x3D || insn[2] == 0x39)) {
+ /* FIXME: this needs an alignment check */
+ Bool isMAX = insn[2] == 0x3D;
+ delta = dis_SSEint_E_to_G(
+ vbi, pfx, delta+3,
+ isMAX ? "pmaxsd" : "pminsd",
+ isMAX ? Iop_Max32Sx4 : Iop_Min32Sx4,
+ False
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 3F /r = PMAXUD xmm1, xmm2/m128
+ Maximum of Packed Unsigned Doubleword Integers (XMM)
+ 66 0F 38 3B /r = PMINUD xmm1, xmm2/m128
+ Minimum of Packed Unsigned Doubleword Integers (XMM) */
+ if ( have66noF2noF3( pfx ) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x3F || insn[2] == 0x3B)) {
+ /* FIXME: this needs an alignment check */
+ Bool isMAX = insn[2] == 0x3F;
+ delta = dis_SSEint_E_to_G(
+ vbi, pfx, delta+3,
+ isMAX ? "pmaxud" : "pminud",
+ isMAX ? Iop_Max32Ux4 : Iop_Min32Ux4,
+ False
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 3E /r = PMAXUW xmm1, xmm2/m128
+ Maximum of Packed Unsigned Word Integers (XMM)
+ 66 0F 38 3A /r = PMINUW xmm1, xmm2/m128
+ Minimum of Packed Unsigned Word Integers (XMM)
+ */
+ if ( have66noF2noF3( pfx ) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x3E || insn[2] == 0x3A)) {
+ /* FIXME: this needs an alignment check */
+ Bool isMAX = insn[2] == 0x3E;
+ delta = dis_SSEint_E_to_G(
+ vbi, pfx, delta+3,
+ isMAX ? "pmaxuw" : "pminuw",
+ isMAX ? Iop_Max16Ux8 : Iop_Min16Ux8,
+ False
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 3C /r = PMAXSB xmm1, xmm2/m128
+ 8Sx16 (signed) max
+ 66 0F 38 38 /r = PMINSB xmm1, xmm2/m128
+ 8Sx16 (signed) min
+ */
+ if ( have66noF2noF3( pfx ) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x3C || insn[2] == 0x38)) {
+ /* FIXME: this needs an alignment check */
+ Bool isMAX = insn[2] == 0x3C;
+ delta = dis_SSEint_E_to_G(
+ vbi, pfx, delta+3,
+ isMAX ? "pmaxsb" : "pminsb",
+ isMAX ? Iop_Max8Sx16 : Iop_Min8Sx16,
+ False
+ );
+ goto decode_success;
+ }
+
+ /* 66 0f 38 20 /r = PMOVSXBW xmm1, xmm2/m64
+ Packed Move with Sign Extend from Byte to Word (XMM) */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x20 ) {
+
+ modrm = insn[3];
+
+ IRTemp srcVec = newTemp(Ity_V128);
+
+ if ( epartIsReg( modrm ) ) {
+ assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
+ delta += 3+1;
+ DIP( "pmovsxbw %s,%s\n",
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ assign( srcVec,
+ unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
+ delta += 3+alen;
+ DIP( "pmovsxbw %s,%s\n",
+ dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ putXMMReg( gregOfRexRM(pfx, modrm),
+ binop( Iop_SarN16x8,
+ binop( Iop_ShlN16x8,
+ binop( Iop_InterleaveLO8x16,
+ IRExpr_Const( IRConst_V128(0) ),
+ mkexpr(srcVec) ),
+ mkU8(8) ),
+ mkU8(8) ) );
+
+ goto decode_success;
+ }
+
+
+ /* 66 0f 38 21 /r = PMOVSXBD xmm1, xmm2/m32
+ Packed Move with Sign Extend from Byte to DWord (XMM) */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x21 ) {
+
+ modrm = insn[3];
+
+ IRTemp srcVec = newTemp(Ity_V128);
+
+ if ( epartIsReg( modrm ) ) {
+ assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
+ delta += 3+1;
+ DIP( "pmovsxbd %s,%s\n",
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ assign( srcVec,
+ unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
+ delta += 3+alen;
+ DIP( "pmovsxbd %s,%s\n",
+ dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ IRTemp zeroVec = newTemp(Ity_V128);
+ assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
+
+ putXMMReg( gregOfRexRM(pfx, modrm),
+ binop( Iop_SarN32x4,
+ binop( Iop_ShlN32x4,
+ binop( Iop_InterleaveLO8x16,
+ mkexpr(zeroVec),
+ binop( Iop_InterleaveLO8x16,
+ mkexpr(zeroVec),
+ mkexpr(srcVec) ) ),
+ mkU8(24) ), mkU8(24) ) );
+
+ goto decode_success;
+ }
+
+
+ /* 66 0f 38 22 /r = PMOVSXBQ xmm1, xmm2/m16
+ Packed Move with Sign Extend from Byte to QWord (XMM) */
+ if ( have66noF2noF3(pfx)
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x22 ) {
+
+ modrm = insn[3];
+
+ IRTemp srcBytes = newTemp(Ity_I16);
+
+ if ( epartIsReg(modrm) ) {
+ assign( srcBytes, getXMMRegLane16( eregOfRexRM(pfx, modrm), 0 ) );
+ delta += 3+1;
+ DIP( "pmovsxbq %s,%s\n",
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ assign( srcBytes, loadLE( Ity_I16, mkexpr(addr) ) );
+ delta += 3+alen;
+ DIP( "pmovsxbq %s,%s\n",
+ dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ putXMMReg( gregOfRexRM( pfx, modrm ),
+ binop( Iop_64HLtoV128,
+ unop( Iop_8Sto64,
+ unop( Iop_16HIto8,
+ mkexpr(srcBytes) ) ),
+ unop( Iop_8Sto64,
+ unop( Iop_16to8, mkexpr(srcBytes) ) ) ) );
+
+ goto decode_success;
+ }
+
+
+ /* 66 0f 38 23 /r = PMOVSXWD xmm1, xmm2/m64
+ Packed Move with Sign Extend from Word to DWord (XMM) */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x23 ) {
+
+ modrm = insn[3];
+
+ IRTemp srcVec = newTemp(Ity_V128);
+
+ if ( epartIsReg(modrm) ) {
+ assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
+ delta += 3+1;
+ DIP( "pmovsxwd %s,%s\n",
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ assign( srcVec,
+ unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
+ delta += 3+alen;
+ DIP( "pmovsxwd %s,%s\n",
+ dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ putXMMReg( gregOfRexRM(pfx, modrm),
+ binop( Iop_SarN32x4,
+ binop( Iop_ShlN32x4,
+ binop( Iop_InterleaveLO16x8,
+ IRExpr_Const( IRConst_V128(0) ),
+ mkexpr(srcVec) ),
+ mkU8(16) ),
+ mkU8(16) ) );
+
+ goto decode_success;
+ }
+
+
+ /* 66 0f 38 24 /r = PMOVSXWQ xmm1, xmm2/m32
+ Packed Move with Sign Extend from Word to QWord (XMM) */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x24 ) {
+
+ modrm = insn[3];
+
+ IRTemp srcBytes = newTemp(Ity_I32);
+
+ if ( epartIsReg( modrm ) ) {
+ assign( srcBytes, getXMMRegLane32( eregOfRexRM(pfx, modrm), 0 ) );
+ delta += 3+1;
+ DIP( "pmovsxwq %s,%s\n",
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) );
+ delta += 3+alen;
+ DIP( "pmovsxwq %s,%s\n",
+ dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ putXMMReg( gregOfRexRM( pfx, modrm ),
+ binop( Iop_64HLtoV128,
+ unop( Iop_16Sto64,
+ unop( Iop_32HIto16, mkexpr(srcBytes) ) ),
+ unop( Iop_16Sto64,
+ unop( Iop_32to16, mkexpr(srcBytes) ) ) ) );
+
+ goto decode_success;
+ }
+
+
+ /* 66 0f 38 25 /r = PMOVSXDQ xmm1, xmm2/m64
+ Packed Move with Sign Extend from Double Word to Quad Word (XMM) */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x25 ) {
+
+ modrm = insn[3];
+
+ IRTemp srcBytes = newTemp(Ity_I64);
+
+ if ( epartIsReg(modrm) ) {
+ assign( srcBytes, getXMMRegLane64( eregOfRexRM(pfx, modrm), 0 ) );
+ delta += 3+1;
+ DIP( "pmovsxdq %s,%s\n",
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ assign( srcBytes, loadLE( Ity_I64, mkexpr(addr) ) );
+ delta += 3+alen;
+ DIP( "pmovsxdq %s,%s\n",
+ dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ putXMMReg( gregOfRexRM(pfx, modrm),
+ binop( Iop_64HLtoV128,
+ unop( Iop_32Sto64,
+ unop( Iop_64HIto32, mkexpr(srcBytes) ) ),
+ unop( Iop_32Sto64,
+ unop( Iop_64to32, mkexpr(srcBytes) ) ) ) );
+
+ goto decode_success;
+ }
+
+
+ /* 66 0f 38 30 /r = PMOVZXBW xmm1, xmm2/m64
+ Packed Move with Zero Extend from Byte to Word (XMM) */
+ if ( have66noF2noF3(pfx)
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x30 ) {
+
+ modrm = insn[3];
+
+ IRTemp srcVec = newTemp(Ity_V128);
+
+ if ( epartIsReg(modrm) ) {
+ assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
+ delta += 3+1;
+ DIP( "pmovzxbw %s,%s\n",
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ assign( srcVec,
+ unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
+ delta += 3+alen;
+ DIP( "pmovzxbw %s,%s\n",
+ dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ putXMMReg( gregOfRexRM(pfx, modrm),
+ binop( Iop_InterleaveLO8x16,
+ IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
+
+ goto decode_success;
+ }
+
+
+ /* 66 0f 38 31 /r = PMOVZXBD xmm1, xmm2/m32
+ Packed Move with Zero Extend from Byte to DWord (XMM) */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x31 ) {
+
+ modrm = insn[3];
+
+ IRTemp srcVec = newTemp(Ity_V128);
+
+ if ( epartIsReg(modrm) ) {
+ assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
+ delta += 3+1;
+ DIP( "pmovzxbd %s,%s\n",
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ assign( srcVec,
+ unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
+ delta += 3+alen;
+ DIP( "pmovzxbd %s,%s\n",
+ dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ IRTemp zeroVec = newTemp(Ity_V128);
+ assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
+
+ putXMMReg( gregOfRexRM( pfx, modrm ),
+ binop( Iop_InterleaveLO8x16,
+ mkexpr(zeroVec),
+ binop( Iop_InterleaveLO8x16,
+ mkexpr(zeroVec), mkexpr(srcVec) ) ) );
+
+ goto decode_success;
+ }
+
+
+ /* 66 0f 38 32 /r = PMOVZXBQ xmm1, xmm2/m16
+ Packed Move with Zero Extend from Byte to QWord (XMM) */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x32 ) {
+
+ modrm = insn[3];
+
+ IRTemp srcVec = newTemp(Ity_V128);
+
+ if ( epartIsReg(modrm) ) {
+ assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
+ delta += 3+1;
+ DIP( "pmovzxbq %s,%s\n",
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ assign( srcVec,
+ unop( Iop_32UtoV128,
+ unop( Iop_16Uto32, loadLE( Ity_I16, mkexpr(addr) ) ) ) );
+ delta += 3+alen;
+ DIP( "pmovzxbq %s,%s\n",
+ dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ IRTemp zeroVec = newTemp(Ity_V128);
+ assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
+
+ putXMMReg( gregOfRexRM( pfx, modrm ),
+ binop( Iop_InterleaveLO8x16,
+ mkexpr(zeroVec),
+ binop( Iop_InterleaveLO8x16,
+ mkexpr(zeroVec),
+ binop( Iop_InterleaveLO8x16,
+ mkexpr(zeroVec), mkexpr(srcVec) ) ) ) );
+
+ goto decode_success;
+ }
+
+
+ /* 66 0f 38 33 /r = PMOVZXWD xmm1, xmm2/m64
+ Packed Move with Zero Extend from Word to DWord (XMM) */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x33 ) {
+
+ modrm = insn[3];
+
+ IRTemp srcVec = newTemp(Ity_V128);
+
+ if ( epartIsReg(modrm) ) {
+ assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
+ delta += 3+1;
+ DIP( "pmovzxwd %s,%s\n",
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ assign( srcVec,
+ unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
+ delta += 3+alen;
+ DIP( "pmovzxwd %s,%s\n",
+ dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ putXMMReg( gregOfRexRM(pfx, modrm),
+ binop( Iop_InterleaveLO16x8,
+ IRExpr_Const( IRConst_V128(0) ),
+ mkexpr(srcVec) ) );
+
+ goto decode_success;
+ }
+
+
+ /* 66 0f 38 34 /r = PMOVZXWQ xmm1, xmm2/m32
+ Packed Move with Zero Extend from Word to QWord (XMM) */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x34 ) {
+
+ modrm = insn[3];
+
+ IRTemp srcVec = newTemp(Ity_V128);
+
+ if ( epartIsReg( modrm ) ) {
+ assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
+ delta += 3+1;
+ DIP( "pmovzxwq %s,%s\n",
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ assign( srcVec,
+ unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
+ delta += 3+alen;
+ DIP( "pmovzxwq %s,%s\n",
+ dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ IRTemp zeroVec = newTemp( Ity_V128 );
+ assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
+
+ putXMMReg( gregOfRexRM( pfx, modrm ),
+ binop( Iop_InterleaveLO16x8,
+ mkexpr(zeroVec),
+ binop( Iop_InterleaveLO16x8,
+ mkexpr(zeroVec), mkexpr(srcVec) ) ) );
+
+ goto decode_success;
+ }
+
+
+ /* 66 0f 38 35 /r = PMOVZXDQ xmm1, xmm2/m64
+ Packed Move with Zero Extend from DWord to QWord (XMM) */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x35 ) {
+
+ modrm = insn[3];
+
+ IRTemp srcVec = newTemp(Ity_V128);
+
+ if ( epartIsReg(modrm) ) {
+ assign( srcVec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
+ delta += 3+1;
+ DIP( "pmovzxdq %s,%s\n",
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ assign( srcVec,
+ unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
+ delta += 3+alen;
+ DIP( "pmovzxdq %s,%s\n",
+ dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ putXMMReg( gregOfRexRM(pfx, modrm),
+ binop( Iop_InterleaveLO32x4,
+ IRExpr_Const( IRConst_V128(0) ),
+ mkexpr(srcVec) ) );
+
+ goto decode_success;
+ }
+
+
+ /* 66 0f 38 40 /r = PMULLD xmm1, xmm2/m128
+ 32x4 integer multiply from xmm2/m128 to xmm1 */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x40 ) {
+
+ modrm = insn[3];
+
+ IRTemp argL = newTemp(Ity_V128);
+ IRTemp argR = newTemp(Ity_V128);
+
+ if ( epartIsReg(modrm) ) {
+ assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) );
+ delta += 3+1;
+ DIP( "pmulld %s,%s\n",
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( argL, loadLE( Ity_V128, mkexpr(addr) ));
+ delta += 3+alen;
+ DIP( "pmulld %s,%s\n",
+ dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) ));
+
+ putXMMReg( gregOfRexRM(pfx, modrm),
+ binop( Iop_Mul32x4, mkexpr(argL), mkexpr(argR)) );
+
+ goto decode_success;
+ }
+
+
+ /* F3 0F B8 = POPCNT{W,L,Q}
+ Count the number of 1 bits in a register
+ */
+ if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */
+ && insn[0] == 0x0F && insn[1] == 0xB8) {
+ vassert(sz == 2 || sz == 4 || sz == 8);
+ /*IRType*/ ty = szToITy(sz);
+ IRTemp src = newTemp(ty);
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ assign(src, getIRegE(sz, pfx, modrm));
+ delta += 2+1;
+ DIP("popcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
+ nameIRegG(sz, pfx, modrm));
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0);
+ assign(src, loadLE(ty, mkexpr(addr)));
+ delta += 2+alen;
+ DIP("popcnt%c %s, %s\n", nameISize(sz), dis_buf,
+ nameIRegG(sz, pfx, modrm));
+ }
+
+ IRTemp result = gen_POPCOUNT(ty, src);
+ putIRegG(sz, pfx, modrm, mkexpr(result));
+
+ // Update flags. This is pretty lame .. perhaps can do better
+ // if this turns out to be performance critical.
+ // O S A C P are cleared. Z is set if SRC == 0.
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1,
+ binop(Iop_Shl64,
+ unop(Iop_1Uto64,
+ binop(Iop_CmpEQ64,
+ widenUto64(mkexpr(src)),
+ mkU64(0))),
+ mkU8(AMD64G_CC_SHIFT_Z))));
+
+ goto decode_success;
+ }
+
+
+ /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
+ (Partial implementation only -- only deal with cases where
+ the rounding mode is specified directly by the immediate byte.)
+ 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
+ (Limitations ditto)
+ */
+ if (have66noF2noF3(pfx)
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x3A
+ && (insn[2] == 0x0B || insn[2] == 0x0A)) {
+
+ Bool isD = insn[2] == 0x0B;
+ IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
+ IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
+ Int imm = 0;
+
+ modrm = insn[3];
+
+ if (epartIsReg(modrm)) {
+ assign( src,
+ isD ? getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 )
+ : getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
+ imm = insn[3+1];
+ if (imm & ~3) goto decode_failure;
+ delta += 3+1+1;
+ DIP( "rounds%c $%d,%s,%s\n",
+ isD ? 'd' : 's',
+ imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
+ imm = insn[3+alen];
+ if (imm & ~3) goto decode_failure;
+ delta += 3+alen+1;
+ DIP( "roundsd $%d,%s,%s\n",
+ imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ /* (imm & 3) contains an Intel-encoded rounding mode. Because
+ that encoding is the same as the encoding for IRRoundingMode,
+ we can use that value directly in the IR as a rounding
+ mode. */
+ assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
+ mkU32(imm & 3), mkexpr(src)) );
+
+ if (isD)
+ putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
+ else
+ putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
+
+ goto decode_success;
+ }
+
+ /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
+ which we can only decode if we're sure this is an AMD cpu that
+ supports LZCNT, since otherwise it's BSR, which behaves
+ differently. */
+ if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */
+ && insn[0] == 0x0F && insn[1] == 0xBD
+ && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT)) {
+ vassert(sz == 2 || sz == 4 || sz == 8);
+ /*IRType*/ ty = szToITy(sz);
+ IRTemp src = newTemp(ty);
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ assign(src, getIRegE(sz, pfx, modrm));
+ delta += 2+1;
+ DIP("lzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
+ nameIRegG(sz, pfx, modrm));
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+2, dis_buf, 0);
+ assign(src, loadLE(ty, mkexpr(addr)));
+ delta += 2+alen;
+ DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf,
+ nameIRegG(sz, pfx, modrm));
+ }
+
+ IRTemp res = gen_LZCNT(ty, src);
+ putIRegG(sz, pfx, modrm, mkexpr(res));
+
+ // Update flags. This is pretty lame .. perhaps can do better
+ // if this turns out to be performance critical.
+ // O S A P are cleared. Z is set if RESULT == 0.
+ // C is set if SRC is zero.
+ IRTemp src64 = newTemp(Ity_I64);
+ IRTemp res64 = newTemp(Ity_I64);
+ assign(src64, widenUto64(mkexpr(src)));
+ assign(res64, widenUto64(mkexpr(res)));
+
+ IRTemp oszacp = newTemp(Ity_I64);
+ assign(
+ oszacp,
+ binop(Iop_Or64,
+ binop(Iop_Shl64,
+ unop(Iop_1Uto64,
+ binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))),
+ mkU8(AMD64G_CC_SHIFT_Z)),
+ binop(Iop_Shl64,
+ unop(Iop_1Uto64,
+ binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))),
+ mkU8(AMD64G_CC_SHIFT_C))
+ )
+ );
+
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
+
+ goto decode_success;
+ }
+
+ /* 66 0F 3A 63 /r ib = PCMPISTRI imm8, xmm2/m128, xmm1
+ 66 0F 3A 62 /r ib = PCMPISTRM imm8, xmm2/m128, xmm1
+ 66 0F 3A 61 /r ib = PCMPESTRI imm8, xmm2/m128, xmm1
+ 66 0F 3A 60 /r ib = PCMPESTRM imm8, xmm2/m128, xmm1
+ (selected special cases that actually occur in glibc,
+ not by any means a complete implementation.)
+ */
+ if (have66noF2noF3(pfx)
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x3A
+ && (insn[2] >= 0x60 && insn[2] <= 0x63)) {
+
+ UInt isISTRx = insn[2] & 2;
+ UInt isxSTRM = (insn[2] & 1) ^ 1;
+ UInt regNoL = 0;
+ UInt regNoR = 0;
+ UChar imm = 0;
+
+ /* This is a nasty kludge. We need to pass 2 x V128 to the
+ helper (which is clean). Since we can't do that, use a dirty
+ helper to compute the results directly from the XMM regs in
+ the guest state. That means for the memory case, we need to
+ move the left operand into a pseudo-register (XMM16, let's
+ call it). */
+ modrm = insn[3];
+ if (epartIsReg(modrm)) {
+ regNoL = eregOfRexRM(pfx, modrm);
+ regNoR = gregOfRexRM(pfx, modrm);
+ imm = insn[3+1];
+ delta += 3+1+1;
+ } else {
+ regNoL = 16; /* use XMM16 as an intermediary */
+ regNoR = gregOfRexRM(pfx, modrm);
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ /* No alignment check; I guess that makes sense, given that
+ these insns are for dealing with C style strings. */
+ stmt( IRStmt_Put( OFFB_XMM16, loadLE(Ity_V128, mkexpr(addr)) ));
+ imm = insn[3+alen];
+ delta += 3+alen+1;
+ }
+
+ /* Now we know the XMM reg numbers for the operands, and the
+ immediate byte. Is it one we can actually handle? Throw out
+ any cases for which the helper function has not been
+ verified. */
+ switch (imm) {
+ case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x12:
+ case 0x1A: case 0x3A: case 0x44: case 0x4A:
+ break;
+ default:
+ goto decode_failure;
+ }
+
+ /* Who ya gonna call? Presumably not Ghostbusters. */
+ void* fn = &amd64g_dirtyhelper_PCMPxSTRx;
+ HChar* nm = "amd64g_dirtyhelper_PCMPxSTRx";
+
+ /* Round up the arguments. Note that this is a kludge -- the
+ use of mkU64 rather than mkIRExpr_HWord implies the
+ assumption that the host's word size is 64-bit. */
+ UInt gstOffL = regNoL == 16 ? OFFB_XMM16 : xmmGuestRegOffset(regNoL);
+ UInt gstOffR = xmmGuestRegOffset(regNoR);
+
+ IRExpr* opc4_and_imm = mkU64((insn[2] << 8) | (imm & 0xFF));
+ IRExpr* gstOffLe = mkU64(gstOffL);
+ IRExpr* gstOffRe = mkU64(gstOffR);
+ IRExpr* edxIN = isISTRx ? mkU64(0) : getIRegRDX(8);
+ IRExpr* eaxIN = isISTRx ? mkU64(0) : getIRegRAX(8);
+ IRExpr** args
+ = mkIRExprVec_5( opc4_and_imm, gstOffLe, gstOffRe, edxIN, eaxIN );
+
+ IRTemp resT = newTemp(Ity_I64);
+ IRDirty* d = unsafeIRDirty_1_N( resT, 0/*regparms*/, nm, fn, args );
+ /* It's not really a dirty call, but we can't use the clean
+ helper mechanism here for the very lame reason that we can't
+ pass 2 x V128s by value to a helper, nor get one back. Hence
+ this roundabout scheme. */
+ d->needsBBP = True;
+ d->nFxState = 2;
+ d->fxState[0].fx = Ifx_Read;
+ d->fxState[0].offset = gstOffL;
+ d->fxState[0].size = sizeof(U128);
+ d->fxState[1].fx = Ifx_Read;
+ d->fxState[1].offset = gstOffR;
+ d->fxState[1].size = sizeof(U128);
+ if (isxSTRM) {
+ /* Declare that the helper writes XMM0. */
+ d->nFxState = 3;
+ d->fxState[2].fx = Ifx_Write;
+ d->fxState[2].offset = xmmGuestRegOffset(0);
+ d->fxState[2].size = sizeof(U128);
+ }
+
+ stmt( IRStmt_Dirty(d) );
+
+ /* Now resT[15:0] holds the new OSZACP values, so the condition
+ codes must be updated. And for a xSTRI case, resT[31:16]
+ holds the new ECX value, so stash that too. */
+ if (!isxSTRM) {
+ putIReg64(R_RCX, binop(Iop_And64,
+ binop(Iop_Shr64, mkexpr(resT), mkU8(16)),
+ mkU64(0xFFFF)));
+ }
+
+ stmt( IRStmt_Put(
+ OFFB_CC_DEP1,
+ binop(Iop_And64, mkexpr(resT), mkU64(0xFFFF))
+ ));
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
+
+ if (regNoL == 16) {
+ DIP("pcmp%cstr%c $%x,%s,%s\n",
+ isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
+ (UInt)imm, dis_buf, nameXMMReg(regNoR));
+ } else {
+ DIP("pcmp%cstr%c $%x,%s,%s\n",
+ isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
+ (UInt)imm, nameXMMReg(regNoL), nameXMMReg(regNoR));
+ }
+
+ goto decode_success;
+ }
+
+
+ /* 66 0f 38 17 /r = PTEST xmm1, xmm2/m128
+ Logical compare (set ZF and CF from AND/ANDN of the operands) */
+ if (have66noF2noF3( pfx ) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x17) {
+ modrm = insn[3];
+ IRTemp vecE = newTemp(Ity_V128);
+ IRTemp vecG = newTemp(Ity_V128);
+
+ if ( epartIsReg(modrm) ) {
+ assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm)));
+ delta += 3+1;
+ DIP( "ptest %s,%s\n",
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
+ delta += 3+alen;
+ DIP( "ptest %s,%s\n",
+ dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm)));
+
+ /* Set Z=1 iff (vecE & vecG) == 0
+ Set C=1 iff (vecE & not vecG) == 0
+ */
+
+ /* andV, andnV: vecE & vecG, vecE and not(vecG) */
+ IRTemp andV = newTemp(Ity_V128);
+ IRTemp andnV = newTemp(Ity_V128);
+ assign(andV, binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG)));
+ assign(andnV, binop(Iop_AndV128,
+ mkexpr(vecE),
+ binop(Iop_XorV128, mkexpr(vecG),
+ mkV128(0xFFFF))));
+
+ /* The same, but reduced to 64-bit values, by or-ing the top
+ and bottom 64-bits together. It relies on this trick:
+
+ InterleaveLO64x2([a,b],[c,d]) == [b,d] hence
+
+ InterleaveLO64x2([a,b],[a,b]) == [b,b] and similarly
+ InterleaveHI64x2([a,b],[a,b]) == [a,a]
+
+ and so the OR of the above 2 exprs produces
+ [a OR b, a OR b], from which we simply take the lower half.
+ */
+ IRTemp and64 = newTemp(Ity_I64);
+ IRTemp andn64 = newTemp(Ity_I64);
+
+ assign(
+ and64,
+ unop(Iop_V128to64,
+ binop(Iop_OrV128,
+ binop(Iop_InterleaveLO64x2, mkexpr(andV), mkexpr(andV)),
+ binop(Iop_InterleaveHI64x2, mkexpr(andV), mkexpr(andV))
+ )
+ )
+ );
+
+ assign(
+ andn64,
+ unop(Iop_V128to64,
+ binop(Iop_OrV128,
+ binop(Iop_InterleaveLO64x2, mkexpr(andnV), mkexpr(andnV)),
+ binop(Iop_InterleaveHI64x2, mkexpr(andnV), mkexpr(andnV))
+ )
+ )
+ );
+
+ /* Now convert and64, andn64 to all-zeroes or all-1s, so we can
+ slice out the Z and C bits conveniently. We use the standard
+ trick all-zeroes -> all-zeroes, anything-else -> all-ones
+ done by "(x | -x) >>s (word-size - 1)".
+ */
+ IRTemp z64 = newTemp(Ity_I64);
+ IRTemp c64 = newTemp(Ity_I64);
+ assign(z64,
+ unop(Iop_Not64,
+ binop(Iop_Sar64,
+ binop(Iop_Or64,
+ binop(Iop_Sub64, mkU64(0), mkexpr(and64)),
+ mkexpr(and64)
+ ),
+ mkU8(63)))
+ );
+
+ assign(c64,
+ unop(Iop_Not64,
+ binop(Iop_Sar64,
+ binop(Iop_Or64,
+ binop(Iop_Sub64, mkU64(0), mkexpr(andn64)),
+ mkexpr(andn64)
+ ),
+ mkU8(63)))
+ );
+
+ /* And finally, slice out the Z and C flags and set the flags
+ thunk to COPY for them. OSAP are set to zero. */
+ IRTemp newOSZACP = newTemp(Ity_I64);
+ assign(newOSZACP,
+ binop(Iop_Or64,
+ binop(Iop_And64, mkexpr(z64), mkU64(AMD64G_CC_MASK_Z)),
+ binop(Iop_And64, mkexpr(c64), mkU64(AMD64G_CC_MASK_C))
+ )
+ );
+
+ stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newOSZACP)));
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
+
+ goto decode_success;
+ }
+
+
+ /* ---------------------------------------------------- */
+ /* --- end of the SSE4 decoder --- */
+ /* ---------------------------------------------------- */
+
+ /*after_sse_decoders:*/
+
+ /* Get the primary opcode. */
+ opc = getUChar(delta); delta++;
+
+ /* We get here if the current insn isn't SSE, or this CPU doesn't
+ support SSE. */
+
+ switch (opc) {
+
+ /* ------------------------ Control flow --------------- */
+
+ case 0xC2: /* RET imm16 */
+ if (have66orF2orF3(pfx)) goto decode_failure;
+ d64 = getUDisp16(delta);
+ delta += 2;
+ dis_ret(vbi, d64);
+ dres.whatNext = Dis_StopHere;
+ DIP("ret %lld\n", d64);
+ break;
+
+ case 0xC3: /* RET */
+ if (have66orF2(pfx)) goto decode_failure;
+ /* F3 is acceptable on AMD. */
+ dis_ret(vbi, 0);
+ dres.whatNext = Dis_StopHere;
+ DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n");
+ break;
+
+ case 0xE8: /* CALL J4 */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ d64 = getSDisp32(delta); delta += 4;
+ d64 += (guest_RIP_bbstart+delta);
+ /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */
+ t1 = newTemp(Ity_I64);
+ assign(t1, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
+ putIReg64(R_RSP, mkexpr(t1));
+ storeLE( mkexpr(t1), mkU64(guest_RIP_bbstart+delta));
+ t2 = newTemp(Ity_I64);
+ assign(t2, mkU64((Addr64)d64));
+ make_redzone_AbiHint(vbi, t1, t2/*nia*/, "call-d32");
+ if (resteerOkFn( callback_opaque, (Addr64)d64) ) {
+ /* follow into the call target. */
+ dres.whatNext = Dis_ResteerU;
+ dres.continueAt = d64;
+ } else {
+ jmp_lit(Ijk_Call,d64);
+ dres.whatNext = Dis_StopHere;
+ }
+ DIP("call 0x%llx\n",d64);
+ break;
+
+//.. //-- case 0xC8: /* ENTER */
+//.. //-- d32 = getUDisp16(eip); eip += 2;
+//.. //-- abyte = getUChar(delta); delta++;
+//.. //--
+//.. //-- vg_assert(sz == 4);
+//.. //-- vg_assert(abyte == 0);
+//.. //--
+//.. //-- t1 = newTemp(cb); t2 = newTemp(cb);
+//.. //-- uInstr2(cb, GET, sz, ArchReg, R_EBP, TempReg, t1);
+//.. //-- uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2);
+//.. //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
+//.. //-- uLiteral(cb, sz);
+//.. //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
+//.. //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
+//.. //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP);
+//.. //-- if (d32) {
+//.. //-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
+//.. //-- uLiteral(cb, d32);
+//.. //-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
+//.. //-- }
+//.. //-- DIP("enter 0x%x, 0x%x", d32, abyte);
+//.. //-- break;
+
+ case 0xC9: /* LEAVE */
+ /* In 64-bit mode this defaults to a 64-bit operand size. There
+ is no way to encode a 32-bit variant. Hence sz==4 but we do
+ it as if sz=8. */
+ if (sz != 4)
+ goto decode_failure;
+ t1 = newTemp(Ity_I64);
+ t2 = newTemp(Ity_I64);
+ assign(t1, getIReg64(R_RBP));
+ /* First PUT RSP looks redundant, but need it because RSP must
+ always be up-to-date for Memcheck to work... */
+ putIReg64(R_RSP, mkexpr(t1));
+ assign(t2, loadLE(Ity_I64,mkexpr(t1)));
+ putIReg64(R_RBP, mkexpr(t2));
+ putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(8)) );
+ DIP("leave\n");
+ break;
+
+//.. //-- /* ---------------- Misc weird-ass insns --------------- */
+//.. //--
+//.. //-- case 0x27: /* DAA */
+//.. //-- case 0x2F: /* DAS */
+//.. //-- t1 = newTemp(cb);
+//.. //-- uInstr2(cb, GET, 1, ArchReg, R_AL, TempReg, t1);
+//.. //-- /* Widen %AL to 32 bits, so it's all defined when we push it. */
+//.. //-- uInstr1(cb, WIDEN, 4, TempReg, t1);
+//.. //-- uWiden(cb, 1, False);
+//.. //-- uInstr0(cb, CALLM_S, 0);
+//.. //-- uInstr1(cb, PUSH, 4, TempReg, t1);
+//.. //-- uInstr1(cb, CALLM, 0, Lit16,
+//.. //-- opc == 0x27 ? VGOFF_(helper_DAA) : VGOFF_(helper_DAS) );
+//.. //-- uFlagsRWU(cb, FlagsAC, FlagsSZACP, FlagO);
+//.. //-- uInstr1(cb, POP, 4, TempReg, t1);
+//.. //-- uInstr0(cb, CALLM_E, 0);
+//.. //-- uInstr2(cb, PUT, 1, TempReg, t1, ArchReg, R_AL);
+//.. //-- DIP(opc == 0x27 ? "daa\n" : "das\n");
+//.. //-- break;
+//.. //--
+//.. //-- case 0x37: /* AAA */
+//.. //-- case 0x3F: /* AAS */
+//.. //-- t1 = newTemp(cb);
+//.. //-- uInstr2(cb, GET, 2, ArchReg, R_EAX, TempReg, t1);
+//.. //-- /* Widen %AL to 32 bits, so it's all defined when we push it. */
+//.. //-- uInstr1(cb, WIDEN, 4, TempReg, t1);
+//.. //-- uWiden(cb, 2, False);
+//.. //-- uInstr0(cb, CALLM_S, 0);
+//.. //-- uInstr1(cb, PUSH, 4, TempReg, t1);
+//.. //-- uInstr1(cb, CALLM, 0, Lit16,
+//.. //-- opc == 0x37 ? VGOFF_(helper_AAA) : VGOFF_(helper_AAS) );
+//.. //-- uFlagsRWU(cb, FlagA, FlagsAC, FlagsEmpty);
+//.. //-- uInstr1(cb, POP, 4, TempReg, t1);
+//.. //-- uInstr0(cb, CALLM_E, 0);
+//.. //-- uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX);
+//.. //-- DIP(opc == 0x37 ? "aaa\n" : "aas\n");
+//.. //-- break;
+//.. //--
+//.. //-- case 0xD4: /* AAM */
+//.. //-- case 0xD5: /* AAD */
+//.. //-- d32 = getUChar(delta); delta++;
+//.. //-- if (d32 != 10) VG_(core_panic)("disInstr: AAM/AAD but base not 10 !");
+//.. //-- t1 = newTemp(cb);
+//.. //-- uInstr2(cb, GET, 2, ArchReg, R_EAX, TempReg, t1);
+//.. //-- /* Widen %AX to 32 bits, so it's all defined when we push it. */
+//.. //-- uInstr1(cb, WIDEN, 4, TempReg, t1);
+//.. //-- uWiden(cb, 2, False);
+//.. //-- uInstr0(cb, CALLM_S, 0);
+//.. //-- uInstr1(cb, PUSH, 4, TempReg, t1);
+//.. //-- uInstr1(cb, CALLM, 0, Lit16,
+//.. //-- opc == 0xD4 ? VGOFF_(helper_AAM) : VGOFF_(helper_AAD) );
+//.. //-- uFlagsRWU(cb, FlagsEmpty, FlagsSZP, FlagsEmpty);
+//.. //-- uInstr1(cb, POP, 4, TempReg, t1);
+//.. //-- uInstr0(cb, CALLM_E, 0);
+//.. //-- uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX);
+//.. //-- DIP(opc == 0xD4 ? "aam\n" : "aad\n");
+//.. //-- break;
+
+ /* ------------------------ CWD/CDQ -------------------- */
+
+ case 0x98: /* CBW */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ if (sz == 8) {
+ putIRegRAX( 8, unop(Iop_32Sto64, getIRegRAX(4)) );
+ DIP(/*"cdqe\n"*/"cltq");
+ break;
+ }
+ if (sz == 4) {
+ putIRegRAX( 4, unop(Iop_16Sto32, getIRegRAX(2)) );
+ DIP("cwtl\n");
+ break;
+ }
+ if (sz == 2) {
+ putIRegRAX( 2, unop(Iop_8Sto16, getIRegRAX(1)) );
+ DIP("cbw\n");
+ break;
+ }
+ goto decode_failure;
+
+ case 0x99: /* CWD/CDQ/CQO */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ vassert(sz == 2 || sz == 4 || sz == 8);
+ ty = szToITy(sz);
+ putIRegRDX( sz,
+ binop(mkSizedOp(ty,Iop_Sar8),
+ getIRegRAX(sz),
+ mkU8(sz == 2 ? 15 : (sz == 4 ? 31 : 63))) );
+ DIP(sz == 2 ? "cwd\n"
+ : (sz == 4 ? /*"cdq\n"*/ "cltd\n"
+ : "cqo\n"));
+ break;
+
+ /* ------------------------ FPU ops -------------------- */
+
+ case 0x9E: /* SAHF */
+ codegen_SAHF();
+ DIP("sahf\n");
+ break;
+
+ case 0x9F: /* LAHF */
+ codegen_LAHF();
+ DIP("lahf\n");
+ break;
+
+ case 0x9B: /* FWAIT */
+ /* ignore? */
+ DIP("fwait\n");
+ break;
+
+ case 0xD8:
+ case 0xD9:
+ case 0xDA:
+ case 0xDB:
+ case 0xDC:
+ case 0xDD:
+ case 0xDE:
+ case 0xDF: {
+ Bool redundantREXWok = False;
+
+ if (haveF2orF3(pfx))
+ goto decode_failure;
+
+ /* kludge to tolerate redundant rex.w prefixes (should do this
+ properly one day) */
+ /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */
+ if ( (opc == 0xD9 && getUChar(delta+0) == 0xFA)/*fsqrt*/ )
+ redundantREXWok = True;
+
+ if ( (sz == 4
+ || (sz == 8 && redundantREXWok))
+ && haveNo66noF2noF3(pfx)) {
+ Long delta0 = delta;
+ Bool decode_OK = False;
+ delta = dis_FPU ( &decode_OK, vbi, pfx, delta );
+ if (!decode_OK) {
+ delta = delta0;
+ goto decode_failure;
+ }
+ break;
+ } else {
+ goto decode_failure;
+ }
+ }
+
+ /* ------------------------ INT ------------------------ */
+
+ case 0xCC: /* INT 3 */
+ jmp_lit(Ijk_SigTRAP, guest_RIP_bbstart + delta);
+ dres.whatNext = Dis_StopHere;
+ DIP("int $0x3\n");
+ break;
+
+ case 0xCD: { /* INT imm8 */
+ IRJumpKind jk = Ijk_Boring;
+ if (have66orF2orF3(pfx)) goto decode_failure;
+ d64 = getUChar(delta); delta++;
+ switch (d64) {
+ case 32: jk = Ijk_Sys_int32; break;
+ default: goto decode_failure;
+ }
+ guest_RIP_next_mustcheck = True;
+ guest_RIP_next_assumed = guest_RIP_bbstart + delta;
+ jmp_lit(jk, guest_RIP_next_assumed);
+ /* It's important that all ArchRegs carry their up-to-date value
+ at this point. So we declare an end-of-block here, which
+ forces any TempRegs caching ArchRegs to be flushed. */
+ dres.whatNext = Dis_StopHere;
+ DIP("int $0x%02x\n", (UInt)d64);
+ break;
+ }
+
+ /* ------------------------ Jcond, byte offset --------- */
+
+ case 0xEB: /* Jb (jump, byte offset) */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ if (sz != 4)
+ goto decode_failure; /* JRS added 2004 July 11 */
+ d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
+ delta++;
+ if (resteerOkFn(callback_opaque,d64)) {
+ dres.whatNext = Dis_ResteerU;
+ dres.continueAt = d64;
+ } else {
+ jmp_lit(Ijk_Boring,d64);
+ dres.whatNext = Dis_StopHere;
+ }
+ DIP("jmp-8 0x%llx\n", d64);
+ break;
+
+ case 0xE9: /* Jv (jump, 16/32 offset) */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ if (sz != 4)
+ goto decode_failure; /* JRS added 2004 July 11 */
+ d64 = (guest_RIP_bbstart+delta+sz) + getSDisp(sz,delta);
+ delta += sz;
+ if (resteerOkFn(callback_opaque,d64)) {
+ dres.whatNext = Dis_ResteerU;
+ dres.continueAt = d64;
+ } else {
+ jmp_lit(Ijk_Boring,d64);
+ dres.whatNext = Dis_StopHere;
+ }
+ DIP("jmp 0x%llx\n", d64);
+ break;
+
+ case 0x70:
+ case 0x71:
+ case 0x72: /* JBb/JNAEb (jump below) */
+ case 0x73: /* JNBb/JAEb (jump not below) */
+ case 0x74: /* JZb/JEb (jump zero) */
+ case 0x75: /* JNZb/JNEb (jump not zero) */
+ case 0x76: /* JBEb/JNAb (jump below or equal) */
+ case 0x77: /* JNBEb/JAb (jump not below or equal) */
+ case 0x78: /* JSb (jump negative) */
+ case 0x79: /* JSb (jump not negative) */
+ case 0x7A: /* JP (jump parity even) */
+ case 0x7B: /* JNP/JPO (jump parity odd) */
+ case 0x7C: /* JLb/JNGEb (jump less) */
+ case 0x7D: /* JGEb/JNLb (jump greater or equal) */
+ case 0x7E: /* JLEb/JNGb (jump less or equal) */
+ case 0x7F: /* JGb/JNLEb (jump greater) */
+ { Long jmpDelta;
+ HChar* comment = "";
+ if (haveF2orF3(pfx)) goto decode_failure;
+ jmpDelta = getSDisp8(delta);
+ vassert(-128 <= jmpDelta && jmpDelta < 128);
+ d64 = (guest_RIP_bbstart+delta+1) + jmpDelta;
+ delta++;
+ if (resteerCisOk
+ && vex_control.guest_chase_cond
+ && (Addr64)d64 != (Addr64)guest_RIP_bbstart
+ && jmpDelta < 0
+ && resteerOkFn( callback_opaque, d64) ) {
+ /* Speculation: assume this backward branch is taken. So we
+ need to emit a side-exit to the insn following this one,
+ on the negation of the condition, and continue at the
+ branch target address (d64). If we wind up back at the
+ first instruction of the trace, just stop; it's better to
+ let the IR loop unroller handle that case. */
+ stmt( IRStmt_Exit(
+ mk_amd64g_calculate_condition(
+ (AMD64Condcode)(1 ^ (opc - 0x70))),
+ Ijk_Boring,
+ IRConst_U64(guest_RIP_bbstart+delta) ) );
+ dres.whatNext = Dis_ResteerC;
+ dres.continueAt = d64;
+ comment = "(assumed taken)";
+ }
+ else
+ if (resteerCisOk
+ && vex_control.guest_chase_cond
+ && (Addr64)d64 != (Addr64)guest_RIP_bbstart
+ && jmpDelta >= 0
+ && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) {
+ /* Speculation: assume this forward branch is not taken. So
+ we need to emit a side-exit to d64 (the dest) and continue
+ disassembling at the insn immediately following this
+ one. */
+ stmt( IRStmt_Exit(
+ mk_amd64g_calculate_condition((AMD64Condcode)(opc - 0x70)),
+ Ijk_Boring,
+ IRConst_U64(d64) ) );
+ dres.whatNext = Dis_ResteerC;
+ dres.continueAt = guest_RIP_bbstart+delta;
+ comment = "(assumed not taken)";
+ }
+ else {
+ /* Conservative default translation - end the block at this
+ point. */
+ jcc_01( (AMD64Condcode)(opc - 0x70),
+ guest_RIP_bbstart+delta,
+ d64 );
+ dres.whatNext = Dis_StopHere;
+ }
+ DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc - 0x70), d64, comment);
+ break;
+ }
+
+ case 0xE3:
+ /* JRCXZ or JECXZ, depending address size override. */
+ if (have66orF2orF3(pfx)) goto decode_failure;
+ d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
+ delta++;
+ if (haveASO(pfx)) {
+ /* 32-bit */
+ stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
+ unop(Iop_32Uto64, getIReg32(R_RCX)),
+ mkU64(0)),
+ Ijk_Boring,
+ IRConst_U64(d64))
+ );
+ DIP("jecxz 0x%llx\n", d64);
+ } else {
+ /* 64-bit */
+ stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
+ getIReg64(R_RCX),
+ mkU64(0)),
+ Ijk_Boring,
+ IRConst_U64(d64))
+ );
+ DIP("jrcxz 0x%llx\n", d64);
+ }
+ break;
+
+ case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
+ case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
+ case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
+ { /* The docs say this uses rCX as a count depending on the
+ address size override, not the operand one. Since we don't
+ handle address size overrides, I guess that means RCX. */
+ IRExpr* zbit = NULL;
+ IRExpr* count = NULL;
+ IRExpr* cond = NULL;
+ HChar* xtra = NULL;
+
+ if (have66orF2orF3(pfx) || haveASO(pfx)) goto decode_failure;
+ d64 = guest_RIP_bbstart+delta+1 + getSDisp8(delta);
+ delta++;
+ putIReg64(R_RCX, binop(Iop_Sub64, getIReg64(R_RCX), mkU64(1)));
+
+ count = getIReg64(R_RCX);
+ cond = binop(Iop_CmpNE64, count, mkU64(0));
+ switch (opc) {
+ case 0xE2:
+ xtra = "";
+ break;
+ case 0xE1:
+ xtra = "e";
+ zbit = mk_amd64g_calculate_condition( AMD64CondZ );
+ cond = mkAnd1(cond, zbit);
+ break;
+ case 0xE0:
+ xtra = "ne";
+ zbit = mk_amd64g_calculate_condition( AMD64CondNZ );
+ cond = mkAnd1(cond, zbit);
+ break;
+ default:
+ vassert(0);
+ }
+ stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64)) );
+
+ DIP("loop%s 0x%llx\n", xtra, d64);
+ break;
+ }
+
+ /* ------------------------ IMUL ----------------------- */
+
+ case 0x69: /* IMUL Iv, Ev, Gv */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, sz );
+ break;
+ case 0x6B: /* IMUL Ib, Ev, Gv */
+ delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, 1 );
+ break;
+
+ /* ------------------------ MOV ------------------------ */
+
+ case 0x88: /* MOV Gb,Eb */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_mov_G_E(vbi, pfx, 1, delta);
+ break;
+
+ case 0x89: /* MOV Gv,Ev */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_mov_G_E(vbi, pfx, sz, delta);
+ break;
+
+ case 0x8A: /* MOV Eb,Gb */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_mov_E_G(vbi, pfx, 1, delta);
+ break;
+
+ case 0x8B: /* MOV Ev,Gv */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_mov_E_G(vbi, pfx, sz, delta);
+ break;
+
+ case 0x8D: /* LEA M,Gv */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ if (sz != 4 && sz != 8)
+ goto decode_failure;
+ modrm = getUChar(delta);
+ if (epartIsReg(modrm))
+ goto decode_failure;
+ /* NOTE! this is the one place where a segment override prefix
+ has no effect on the address calculation. Therefore we clear
+ any segment override bits in pfx. */
+ addr = disAMode ( &alen, vbi, clearSegBits(pfx), delta, dis_buf, 0 );
+ delta += alen;
+ /* This is a hack. But it isn't clear that really doing the
+ calculation at 32 bits is really worth it. Hence for leal,
+ do the full 64-bit calculation and then truncate it. */
+ putIRegG( sz, pfx, modrm,
+ sz == 4
+ ? unop(Iop_64to32, mkexpr(addr))
+ : mkexpr(addr)
+ );
+ DIP("lea%c %s, %s\n", nameISize(sz), dis_buf,
+ nameIRegG(sz,pfx,modrm));
+ break;
+
+//.. case 0x8C: /* MOV Sw,Ew -- MOV from a SEGMENT REGISTER */
+//.. delta = dis_mov_Sw_Ew(sorb, sz, delta);
+//.. break;
+//..
+//.. case 0x8E: /* MOV Ew,Sw -- MOV to a SEGMENT REGISTER */
+//.. delta = dis_mov_Ew_Sw(sorb, delta);
+//.. break;
+
+ case 0xA0: /* MOV Ob,AL */
+ if (have66orF2orF3(pfx)) goto decode_failure;
+ sz = 1;
+ /* Fall through ... */
+ case 0xA1: /* MOV Ov,eAX */
+ if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
+ goto decode_failure;
+ d64 = getDisp64(delta);
+ delta += 8;
+ ty = szToITy(sz);
+ addr = newTemp(Ity_I64);
+ assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
+ putIRegRAX(sz, loadLE( ty, mkexpr(addr) ));
+ DIP("mov%c %s0x%llx, %s\n", nameISize(sz),
+ segRegTxt(pfx), d64,
+ nameIRegRAX(sz));
+ break;
+
+ case 0xA2: /* MOV AL,Ob */
+ if (have66orF2orF3(pfx)) goto decode_failure;
+ sz = 1;
+ /* Fall through ... */
+ case 0xA3: /* MOV eAX,Ov */
+ if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
+ goto decode_failure;
+ d64 = getDisp64(delta);
+ delta += 8;
+ ty = szToITy(sz);
+ addr = newTemp(Ity_I64);
+ assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
+ storeLE( mkexpr(addr), getIRegRAX(sz) );
+ DIP("mov%c %s, %s0x%llx\n", nameISize(sz), nameIRegRAX(sz),
+ segRegTxt(pfx), d64);
+ break;
+
+ /* XXXX be careful here with moves to AH/BH/CH/DH */
+ case 0xB0: /* MOV imm,AL */
+ case 0xB1: /* MOV imm,CL */
+ case 0xB2: /* MOV imm,DL */
+ case 0xB3: /* MOV imm,BL */
+ case 0xB4: /* MOV imm,AH */
+ case 0xB5: /* MOV imm,CH */
+ case 0xB6: /* MOV imm,DH */
+ case 0xB7: /* MOV imm,BH */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ d64 = getUChar(delta);
+ delta += 1;
+ putIRegRexB(1, pfx, opc-0xB0, mkU8(d64));
+ DIP("movb $%lld,%s\n", d64, nameIRegRexB(1,pfx,opc-0xB0));
+ break;
+
+ case 0xB8: /* MOV imm,eAX */
+ case 0xB9: /* MOV imm,eCX */
+ case 0xBA: /* MOV imm,eDX */
+ case 0xBB: /* MOV imm,eBX */
+ case 0xBC: /* MOV imm,eSP */
+ case 0xBD: /* MOV imm,eBP */
+ case 0xBE: /* MOV imm,eSI */
+ case 0xBF: /* MOV imm,eDI */
+ /* This is the one-and-only place where 64-bit literals are
+ allowed in the instruction stream. */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ if (sz == 8) {
+ d64 = getDisp64(delta);
+ delta += 8;
+ putIRegRexB(8, pfx, opc-0xB8, mkU64(d64));
+ DIP("movabsq $%lld,%s\n", (Long)d64,
+ nameIRegRexB(8,pfx,opc-0xB8));
+ } else {
+ d64 = getSDisp(imin(4,sz),delta);
+ delta += imin(4,sz);
+ putIRegRexB(sz, pfx, opc-0xB8,
+ mkU(szToITy(sz), d64 & mkSizeMask(sz)));
+ DIP("mov%c $%lld,%s\n", nameISize(sz),
+ (Long)d64,
+ nameIRegRexB(sz,pfx,opc-0xB8));
+ }
+ break;
+
+ case 0xC6: /* MOV Ib,Eb */
+ sz = 1;
+ goto do_Mov_I_E;
+ case 0xC7: /* MOV Iv,Ev */
+ goto do_Mov_I_E;
+
+ do_Mov_I_E:
+ if (haveF2orF3(pfx)) goto decode_failure;
+ modrm = getUChar(delta);
+ if (epartIsReg(modrm)) {
+ delta++; /* mod/rm byte */
+ d64 = getSDisp(imin(4,sz),delta);
+ delta += imin(4,sz);
+ putIRegE(sz, pfx, modrm,
+ mkU(szToITy(sz), d64 & mkSizeMask(sz)));
+ DIP("mov%c $%lld, %s\n", nameISize(sz),
+ (Long)d64,
+ nameIRegE(sz,pfx,modrm));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
+ /*xtra*/imin(4,sz) );
+ delta += alen;
+ d64 = getSDisp(imin(4,sz),delta);
+ delta += imin(4,sz);
+ storeLE(mkexpr(addr),
+ mkU(szToITy(sz), d64 & mkSizeMask(sz)));
+ DIP("mov%c $%lld, %s\n", nameISize(sz), (Long)d64, dis_buf);
+ }
+ break;
+
+ /* ------------------------ MOVx ------------------------ */
+
+ case 0x63: /* MOVSX */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ if (haveREX(pfx) && 1==getRexW(pfx)) {
+ vassert(sz == 8);
+ /* movsx r/m32 to r64 */
+ modrm = getUChar(delta);
+ if (epartIsReg(modrm)) {
+ delta++;
+ putIRegG(8, pfx, modrm,
+ unop(Iop_32Sto64,
+ getIRegE(4, pfx, modrm)));
+ DIP("movslq %s,%s\n",
+ nameIRegE(4, pfx, modrm),
+ nameIRegG(8, pfx, modrm));
+ break;
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ delta += alen;
+ putIRegG(8, pfx, modrm,
+ unop(Iop_32Sto64,
+ loadLE(Ity_I32, mkexpr(addr))));
+ DIP("movslq %s,%s\n", dis_buf,
+ nameIRegG(8, pfx, modrm));
+ break;
+ }
+ } else {
+ goto decode_failure;
+ }
+
+ /* ------------------------ opl imm, A ----------------- */
+
+ case 0x04: /* ADD Ib, AL */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" );
+ break;
+ case 0x05: /* ADD Iv, eAX */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op_imm_A(sz, False, Iop_Add8, True, delta, "add" );
+ break;
+
+ case 0x0C: /* OR Ib, AL */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" );
+ break;
+ case 0x0D: /* OR Iv, eAX */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" );
+ break;
+
+ case 0x14: /* ADC Ib, AL */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" );
+ break;
+ case 0x15: /* ADC Iv, eAX */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" );
+ break;
+
+ case 0x1C: /* SBB Ib, AL */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" );
+ break;
+ case 0x1D: /* SBB Iv, eAX */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" );
+ break;
+
+ case 0x24: /* AND Ib, AL */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" );
+ break;
+ case 0x25: /* AND Iv, eAX */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" );
+ break;
+
+ case 0x2C: /* SUB Ib, AL */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op_imm_A(1, False, Iop_Sub8, True, delta, "sub" );
+ break;
+ case 0x2D: /* SUB Iv, eAX */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" );
+ break;
+
+ case 0x34: /* XOR Ib, AL */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" );
+ break;
+ case 0x35: /* XOR Iv, eAX */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" );
+ break;
+
+ case 0x3C: /* CMP Ib, AL */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" );
+ break;
+ case 0x3D: /* CMP Iv, eAX */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" );
+ break;
+
+ case 0xA8: /* TEST Ib, AL */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" );
+ break;
+ case 0xA9: /* TEST Iv, eAX */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" );
+ break;
+
+ /* ------------------------ opl Ev, Gv ----------------- */
+
+ case 0x02: /* ADD Eb,Gb */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" );
+ break;
+ case 0x03: /* ADD Ev,Gv */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" );
+ break;
+
+ case 0x0A: /* OR Eb,Gb */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" );
+ break;
+ case 0x0B: /* OR Ev,Gv */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" );
+ break;
+
+ case 0x12: /* ADC Eb,Gb */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" );
+ break;
+ case 0x13: /* ADC Ev,Gv */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" );
+ break;
+
+ case 0x1A: /* SBB Eb,Gb */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" );
+ break;
+ case 0x1B: /* SBB Ev,Gv */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" );
+ break;
+
+ case 0x22: /* AND Eb,Gb */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" );
+ break;
+ case 0x23: /* AND Ev,Gv */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" );
+ break;
+
+ case 0x2A: /* SUB Eb,Gb */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" );
+ break;
+ case 0x2B: /* SUB Ev,Gv */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" );
+ break;
+
+ case 0x32: /* XOR Eb,Gb */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" );
+ break;
+ case 0x33: /* XOR Ev,Gv */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" );
+ break;
+
+ case 0x3A: /* CMP Eb,Gb */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" );
+ break;
+ case 0x3B: /* CMP Ev,Gv */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" );
+ break;
+
+ case 0x84: /* TEST Eb,Gb */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, 1, delta, "test" );
+ break;
+ case 0x85: /* TEST Ev,Gv */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, sz, delta, "test" );
+ break;
+
+ /* ------------------------ opl Gv, Ev ----------------- */
+
+ case 0x00: /* ADD Gb,Eb */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" );
+ break;
+ case 0x01: /* ADD Gv,Ev */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" );
+ break;
+
+ case 0x08: /* OR Gb,Eb */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" );
+ break;
+ case 0x09: /* OR Gv,Ev */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" );
+ break;
+
+ case 0x10: /* ADC Gb,Eb */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" );
+ break;
+ case 0x11: /* ADC Gv,Ev */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" );
+ break;
+
+ case 0x18: /* SBB Gb,Eb */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" );
+ break;
+ case 0x19: /* SBB Gv,Ev */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" );
+ break;
+
+ case 0x20: /* AND Gb,Eb */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" );
+ break;
+ case 0x21: /* AND Gv,Ev */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" );
+ break;
+
+ case 0x28: /* SUB Gb,Eb */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" );
+ break;
+ case 0x29: /* SUB Gv,Ev */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" );
+ break;
+
+ case 0x30: /* XOR Gb,Eb */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" );
+ break;
+ case 0x31: /* XOR Gv,Ev */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" );
+ break;
+
+ case 0x38: /* CMP Gb,Eb */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" );
+ break;
+ case 0x39: /* CMP Gv,Ev */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" );
+ break;
+
+ /* ------------------------ POP ------------------------ */
+
+ case 0x58: /* POP eAX */
+ case 0x59: /* POP eCX */
+ case 0x5A: /* POP eDX */
+ case 0x5B: /* POP eBX */
+ case 0x5D: /* POP eBP */
+ case 0x5E: /* POP eSI */
+ case 0x5F: /* POP eDI */
+ case 0x5C: /* POP eSP */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ vassert(sz == 2 || sz == 4 || sz == 8);
+ if (sz == 4)
+ sz = 8; /* there is no encoding for 32-bit pop in 64-bit mode */
+ t1 = newTemp(szToITy(sz));
+ t2 = newTemp(Ity_I64);
+ assign(t2, getIReg64(R_RSP));
+ assign(t1, loadLE(szToITy(sz),mkexpr(t2)));
+ putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
+ putIRegRexB(sz, pfx, opc-0x58, mkexpr(t1));
+ DIP("pop%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x58));
+ break;
+
+ case 0x9D: /* POPF */
+ /* Note. There is no encoding for a 32-bit popf in 64-bit mode.
+ So sz==4 actually means sz==8. */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ vassert(sz == 2 || sz == 4);
+ if (sz == 4) sz = 8;
+ if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
+ t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I64);
+ assign(t2, getIReg64(R_RSP));
+ assign(t1, widenUto64(loadLE(szToITy(sz),mkexpr(t2))));
+ putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
+ /* t1 is the flag word. Mask out everything except OSZACP and
+ set the flags thunk to AMD64G_CC_OP_COPY. */
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1,
+ binop(Iop_And64,
+ mkexpr(t1),
+ mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P
+ | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z
+ | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O )
+ )
+ )
+ );
+
+ /* Also need to set the D flag, which is held in bit 10 of t1.
+ If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
+ stmt( IRStmt_Put(
+ OFFB_DFLAG,
+ IRExpr_Mux0X(
+ unop(Iop_32to8,
+ unop(Iop_64to32,
+ binop(Iop_And64,
+ binop(Iop_Shr64, mkexpr(t1), mkU8(10)),
+ mkU64(1)))),
+ mkU64(1),
+ mkU64(0xFFFFFFFFFFFFFFFFULL)))
+ );
+
+ /* And set the ID flag */
+ stmt( IRStmt_Put(
+ OFFB_IDFLAG,
+ IRExpr_Mux0X(
+ unop(Iop_32to8,
+ unop(Iop_64to32,
+ binop(Iop_And64,
+ binop(Iop_Shr64, mkexpr(t1), mkU8(21)),
+ mkU64(1)))),
+ mkU64(0),
+ mkU64(1)))
+ );
+
+ /* And set the AC flag too */
+ stmt( IRStmt_Put(
+ OFFB_ACFLAG,
+ IRExpr_Mux0X(
+ unop(Iop_32to8,
+ unop(Iop_64to32,
+ binop(Iop_And64,
+ binop(Iop_Shr64, mkexpr(t1), mkU8(18)),
+ mkU64(1)))),
+ mkU64(0),
+ mkU64(1)))
+ );
+
+ DIP("popf%c\n", nameISize(sz));
+ break;
+
+//.. case 0x61: /* POPA */
+//.. /* This is almost certainly wrong for sz==2. So ... */
+//.. if (sz != 4) goto decode_failure;
+//..
+//.. /* t5 is the old %ESP value. */
+//.. t5 = newTemp(Ity_I32);
+//.. assign( t5, getIReg(4, R_ESP) );
+//..
+//.. /* Reload all the registers, except %esp. */
+//.. putIReg(4,R_EAX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(28)) ));
+//.. putIReg(4,R_ECX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(24)) ));
+//.. putIReg(4,R_EDX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(20)) ));
+//.. putIReg(4,R_EBX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(16)) ));
+//.. /* ignore saved %ESP */
+//.. putIReg(4,R_EBP, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 8)) ));
+//.. putIReg(4,R_ESI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 4)) ));
+//.. putIReg(4,R_EDI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 0)) ));
+//..
+//.. /* and move %ESP back up */
+//.. putIReg( 4, R_ESP, binop(Iop_Add32, mkexpr(t5), mkU32(8*4)) );
+//..
+//.. DIP("pusha%c\n", nameISize(sz));
+//.. break;
+
+ case 0x8F: { /* POPQ m64 / POPW m16 */
+ Int len;
+ UChar rm;
+ /* There is no encoding for 32-bit pop in 64-bit mode.
+ So sz==4 actually means sz==8. */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ vassert(sz == 2 || sz == 4);
+ if (sz == 4) sz = 8;
+ if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
+
+ rm = getUChar(delta);
+
+ /* make sure this instruction is correct POP */
+ if (epartIsReg(rm) || gregLO3ofRM(rm) != 0)
+ goto decode_failure;
+ /* and has correct size */
+ vassert(sz == 8);
+
+ t1 = newTemp(Ity_I64);
+ t3 = newTemp(Ity_I64);
+ assign( t1, getIReg64(R_RSP) );
+ assign( t3, loadLE(Ity_I64, mkexpr(t1)) );
+
+ /* Increase RSP; must be done before the STORE. Intel manual
+ says: If the RSP register is used as a base register for
+ addressing a destination operand in memory, the POP
+ instruction computes the effective address of the operand
+ after it increments the RSP register. */
+ putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(sz)) );
+
+ addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
+ storeLE( mkexpr(addr), mkexpr(t3) );
+
+ DIP("popl %s\n", dis_buf);
+
+ delta += len;
+ break;
+ }
+
+//.. //-- case 0x1F: /* POP %DS */
+//.. //-- dis_pop_segreg( cb, R_DS, sz ); break;
+//.. //-- case 0x07: /* POP %ES */
+//.. //-- dis_pop_segreg( cb, R_ES, sz ); break;
+//.. //-- case 0x17: /* POP %SS */
+//.. //-- dis_pop_segreg( cb, R_SS, sz ); break;
+
+ /* ------------------------ PUSH ----------------------- */
+
+ case 0x50: /* PUSH eAX */
+ case 0x51: /* PUSH eCX */
+ case 0x52: /* PUSH eDX */
+ case 0x53: /* PUSH eBX */
+ case 0x55: /* PUSH eBP */
+ case 0x56: /* PUSH eSI */
+ case 0x57: /* PUSH eDI */
+ case 0x54: /* PUSH eSP */
+ /* This is the Right Way, in that the value to be pushed is
+ established before %rsp is changed, so that pushq %rsp
+ correctly pushes the old value. */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ vassert(sz == 2 || sz == 4 || sz == 8);
+ if (sz == 4)
+ sz = 8; /* there is no encoding for 32-bit push in 64-bit mode */
+ ty = sz==2 ? Ity_I16 : Ity_I64;
+ t1 = newTemp(ty);
+ t2 = newTemp(Ity_I64);
+ assign(t1, getIRegRexB(sz, pfx, opc-0x50));
+ assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(sz)));
+ putIReg64(R_RSP, mkexpr(t2) );
+ storeLE(mkexpr(t2),mkexpr(t1));
+ DIP("push%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x50));
+ break;
+
+ case 0x68: /* PUSH Iv */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
+ if (sz == 4) sz = 8;
+ d64 = getSDisp(imin(4,sz),delta);
+ delta += imin(4,sz);
+ goto do_push_I;
+ case 0x6A: /* PUSH Ib, sign-extended to sz */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
+ if (sz == 4) sz = 8;
+ d64 = getSDisp8(delta); delta += 1;
+ goto do_push_I;
+ do_push_I:
+ ty = szToITy(sz);
+ t1 = newTemp(Ity_I64);
+ t2 = newTemp(ty);
+ assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
+ putIReg64(R_RSP, mkexpr(t1) );
+ /* stop mkU16 asserting if d32 is a negative 16-bit number
+ (bug #132813) */
+ if (ty == Ity_I16)
+ d64 &= 0xFFFF;
+ storeLE( mkexpr(t1), mkU(ty,d64) );
+ DIP("push%c $%lld\n", nameISize(sz), (Long)d64);
+ break;
+
+ case 0x9C: /* PUSHF */ {
+ /* Note. There is no encoding for a 32-bit pushf in 64-bit
+ mode. So sz==4 actually means sz==8. */
+ /* 24 July 06: has also been seen with a redundant REX prefix,
+ so must also allow sz==8. */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ vassert(sz == 2 || sz == 4 || sz == 8);
+ if (sz == 4) sz = 8;
+ if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
+
+ t1 = newTemp(Ity_I64);
+ assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
+ putIReg64(R_RSP, mkexpr(t1) );
+
+ t2 = newTemp(Ity_I64);
+ assign( t2, mk_amd64g_calculate_rflags_all() );
+
+ /* Patch in the D flag. This can simply be a copy of bit 10 of
+ baseBlock[OFFB_DFLAG]. */
+ t3 = newTemp(Ity_I64);
+ assign( t3, binop(Iop_Or64,
+ mkexpr(t2),
+ binop(Iop_And64,
+ IRExpr_Get(OFFB_DFLAG,Ity_I64),
+ mkU64(1<<10)))
+ );
+
+ /* And patch in the ID flag. */
+ t4 = newTemp(Ity_I64);
+ assign( t4, binop(Iop_Or64,
+ mkexpr(t3),
+ binop(Iop_And64,
+ binop(Iop_Shl64, IRExpr_Get(OFFB_IDFLAG,Ity_I64),
+ mkU8(21)),
+ mkU64(1<<21)))
+ );
+
+ /* And patch in the AC flag too. */
+ t5 = newTemp(Ity_I64);
+ assign( t5, binop(Iop_Or64,
+ mkexpr(t4),
+ binop(Iop_And64,
+ binop(Iop_Shl64, IRExpr_Get(OFFB_ACFLAG,Ity_I64),
+ mkU8(18)),
+ mkU64(1<<18)))
+ );
+
+ /* if sz==2, the stored value needs to be narrowed. */
+ if (sz == 2)
+ storeLE( mkexpr(t1), unop(Iop_32to16,
+ unop(Iop_64to32,mkexpr(t5))) );
+ else
+ storeLE( mkexpr(t1), mkexpr(t5) );
+
+ DIP("pushf%c\n", nameISize(sz));
+ break;
+ }
+
+//.. case 0x60: /* PUSHA */
+//.. /* This is almost certainly wrong for sz==2. So ... */
+//.. if (sz != 4) goto decode_failure;
+//..
+//.. /* This is the Right Way, in that the value to be pushed is
+//.. established before %esp is changed, so that pusha
+//.. correctly pushes the old %esp value. New value of %esp is
+//.. pushed at start. */
+//.. /* t0 is the %ESP value we're going to push. */
+//.. t0 = newTemp(Ity_I32);
+//.. assign( t0, getIReg(4, R_ESP) );
+//..
+//.. /* t5 will be the new %ESP value. */
+//.. t5 = newTemp(Ity_I32);
+//.. assign( t5, binop(Iop_Sub32, mkexpr(t0), mkU32(8*4)) );
+//..
+//.. /* Update guest state before prodding memory. */
+//.. putIReg(4, R_ESP, mkexpr(t5));
+//..
+//.. /* Dump all the registers. */
+//.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(28)), getIReg(4,R_EAX) );
+//.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(24)), getIReg(4,R_ECX) );
+//.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(20)), getIReg(4,R_EDX) );
+//.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(16)), getIReg(4,R_EBX) );
+//.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(12)), mkexpr(t0) /*esp*/);
+//.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 8)), getIReg(4,R_EBP) );
+//.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 4)), getIReg(4,R_ESI) );
+//.. storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 0)), getIReg(4,R_EDI) );
+//..
+//.. DIP("pusha%c\n", nameISize(sz));
+//.. break;
+//..
+//..
+//.. //-- case 0x0E: /* PUSH %CS */
+//.. //-- dis_push_segreg( cb, R_CS, sz ); break;
+//.. //-- case 0x1E: /* PUSH %DS */
+//.. //-- dis_push_segreg( cb, R_DS, sz ); break;
+//.. //-- case 0x06: /* PUSH %ES */
+//.. //-- dis_push_segreg( cb, R_ES, sz ); break;
+//.. //-- case 0x16: /* PUSH %SS */
+//.. //-- dis_push_segreg( cb, R_SS, sz ); break;
+//..
+//.. /* ------------------------ SCAS et al ----------------- */
+//..
+//.. case 0xA4: /* MOVS, no REP prefix */
+//.. case 0xA5:
+//.. dis_string_op( dis_MOVS, ( opc == 0xA4 ? 1 : sz ), "movs", sorb );
+//.. break;
+//..
+//.. case 0xA6: /* CMPSb, no REP prefix */
+//.. //-- case 0xA7:
+//.. dis_string_op( dis_CMPS, ( opc == 0xA6 ? 1 : sz ), "cmps", sorb );
+//.. break;
+//.. //--
+//.. //--
+ case 0xAC: /* LODS, no REP prefix */
+ case 0xAD:
+ dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", pfx );
+ break;
+//..
+//.. case 0xAE: /* SCAS, no REP prefix */
+//.. case 0xAF:
+//.. dis_string_op( dis_SCAS, ( opc == 0xAE ? 1 : sz ), "scas", sorb );
+//.. break;
+
+
+ case 0xFC: /* CLD */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ stmt( IRStmt_Put( OFFB_DFLAG, mkU64(1)) );
+ DIP("cld\n");
+ break;
+
+ case 0xFD: /* STD */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ stmt( IRStmt_Put( OFFB_DFLAG, mkU64(-1ULL)) );
+ DIP("std\n");
+ break;
+
+ case 0xF8: /* CLC */
+ case 0xF9: /* STC */
+ case 0xF5: /* CMC */
+ t0 = newTemp(Ity_I64);
+ t1 = newTemp(Ity_I64);
+ assign( t0, mk_amd64g_calculate_rflags_all() );
+ switch (opc) {
+ case 0xF8:
+ assign( t1, binop(Iop_And64, mkexpr(t0),
+ mkU64(~AMD64G_CC_MASK_C)));
+ DIP("clc\n");
+ break;
+ case 0xF9:
+ assign( t1, binop(Iop_Or64, mkexpr(t0),
+ mkU64(AMD64G_CC_MASK_C)));
+ DIP("stc\n");
+ break;
+ case 0xF5:
+ assign( t1, binop(Iop_Xor64, mkexpr(t0),
+ mkU64(AMD64G_CC_MASK_C)));
+ DIP("cmc\n");
+ break;
+ default:
+ vpanic("disInstr(x64)(clc/stc/cmc)");
+ }
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t1) ));
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
+ break;
+
+//.. /* REPNE prefix insn */
+//.. case 0xF2: {
+//.. Addr32 eip_orig = guest_eip_bbstart + delta - 1;
+//.. vassert(sorb == 0);
+//.. abyte = getUChar(delta); delta++;
+//..
+//.. if (abyte == 0x66) { sz = 2; abyte = getUChar(delta); delta++; }
+//.. whatNext = Dis_StopHere;
+//..
+//.. switch (abyte) {
+//.. /* According to the Intel manual, "repne movs" should never occur, but
+//.. * in practice it has happened, so allow for it here... */
+//.. case 0xA4: sz = 1; /* REPNE MOVS<sz> */
+//.. goto decode_failure;
+//.. //-- case 0xA5:
+//.. // dis_REP_op ( CondNZ, dis_MOVS, sz, eip_orig,
+//.. // guest_eip_bbstart+delta, "repne movs" );
+//.. // break;
+//.. //--
+//.. //-- case 0xA6: sz = 1; /* REPNE CMPS<sz> */
+//.. //-- case 0xA7:
+//.. //-- dis_REP_op ( cb, CondNZ, dis_CMPS, sz, eip_orig, eip, "repne cmps" );
+//.. //-- break;
+//.. //--
+//.. case 0xAE: sz = 1; /* REPNE SCAS<sz> */
+//.. case 0xAF:
+//.. dis_REP_op ( X86CondNZ, dis_SCAS, sz, eip_orig,
+//.. guest_eip_bbstart+delta, "repne scas" );
+//.. break;
+//..
+//.. default:
+//.. goto decode_failure;
+//.. }
+//.. break;
+//.. }
+
+ /* ------ AE: SCAS variants ------ */
+ case 0xAE:
+ case 0xAF:
+ /* F2 AE/AF: repne scasb/repne scas{w,l,q} */
+ if (haveASO(pfx))
+ goto decode_failure;
+ if (haveF2(pfx) && !haveF3(pfx)) {
+ if (opc == 0xAE)
+ sz = 1;
+ dis_REP_op ( AMD64CondNZ, dis_SCAS, sz,
+ guest_RIP_curr_instr,
+ guest_RIP_bbstart+delta, "repne scas", pfx );
+ dres.whatNext = Dis_StopHere;
+ break;
+ }
+ /* F3 AE/AF: repe scasb/repe scas{w,l,q} */
+ if (haveASO(pfx))
+ goto decode_failure;
+ if (!haveF2(pfx) && haveF3(pfx)) {
+ if (opc == 0xAE)
+ sz = 1;
+ dis_REP_op ( AMD64CondZ, dis_SCAS, sz,
+ guest_RIP_curr_instr,
+ guest_RIP_bbstart+delta, "repe scas", pfx );
+ dres.whatNext = Dis_StopHere;
+ break;
+ }
+ /* AE/AF: scasb/scas{w,l,q} */
+ if (!haveF2(pfx) && !haveF3(pfx)) {
+ if (opc == 0xAE)
+ sz = 1;
+ dis_string_op( dis_SCAS, sz, "scas", pfx );
+ break;
+ }
+ goto decode_failure;
+
+ /* ------ A6, A7: CMPS variants ------ */
+ case 0xA6:
+ case 0xA7:
+ /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */
+ if (haveASO(pfx))
+ goto decode_failure;
+ if (haveF3(pfx) && !haveF2(pfx)) {
+ if (opc == 0xA6)
+ sz = 1;
+ dis_REP_op ( AMD64CondZ, dis_CMPS, sz,
+ guest_RIP_curr_instr,
+ guest_RIP_bbstart+delta, "repe cmps", pfx );
+ dres.whatNext = Dis_StopHere;
+ break;
+ }
+ goto decode_failure;
+
+ /* ------ AA, AB: STOS variants ------ */
+ case 0xAA:
+ case 0xAB:
+ /* F3 AA/AB: rep stosb/rep stos{w,l,q} */
+ if (haveASO(pfx))
+ goto decode_failure;
+ if (haveF3(pfx) && !haveF2(pfx)) {
+ if (opc == 0xAA)
+ sz = 1;
+ dis_REP_op ( AMD64CondAlways, dis_STOS, sz,
+ guest_RIP_curr_instr,
+ guest_RIP_bbstart+delta, "rep stos", pfx );
+ dres.whatNext = Dis_StopHere;
+ break;
+ }
+ /* AA/AB: stosb/stos{w,l,q} */
+ if (!haveF3(pfx) && !haveF2(pfx)) {
+ if (opc == 0xAA)
+ sz = 1;
+ dis_string_op( dis_STOS, sz, "stos", pfx );
+ break;
+ }
+ goto decode_failure;
+
+ /* ------ A4, A5: MOVS variants ------ */
+ case 0xA4:
+ case 0xA5:
+ /* F3 A4: rep movsb */
+ if (haveASO(pfx))
+ goto decode_failure;
+ if (haveF3(pfx) && !haveF2(pfx)) {
+ if (opc == 0xA4)
+ sz = 1;
+ dis_REP_op ( AMD64CondAlways, dis_MOVS, sz,
+ guest_RIP_curr_instr,
+ guest_RIP_bbstart+delta, "rep movs", pfx );
+ dres.whatNext = Dis_StopHere;
+ break;
+ }
+ /* A4: movsb */
+ if (!haveF3(pfx) && !haveF2(pfx)) {
+ if (opc == 0xA4)
+ sz = 1;
+ dis_string_op( dis_MOVS, sz, "movs", pfx );
+ break;
+ }
+ goto decode_failure;
+
+
+ /* ------------------------ XCHG ----------------------- */
+
+ /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
+ prefix. Therefore, surround it with a IRStmt_MBE(Imbe_BusLock)
+ and IRStmt_MBE(Imbe_BusUnlock) pair. But be careful; if it is
+ used with an explicit LOCK prefix, we don't want to end up with
+ two IRStmt_MBE(Imbe_BusLock)s -- one made here and one made by
+ the generic LOCK logic at the top of disInstr. */
+ case 0x86: /* XCHG Gb,Eb */
+ sz = 1;
+ /* Fall through ... */
+ case 0x87: /* XCHG Gv,Ev */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ modrm = getUChar(delta);
+ ty = szToITy(sz);
+ t1 = newTemp(ty); t2 = newTemp(ty);
+ if (epartIsReg(modrm)) {
+ assign(t1, getIRegE(sz, pfx, modrm));
+ assign(t2, getIRegG(sz, pfx, modrm));
+ putIRegG(sz, pfx, modrm, mkexpr(t1));
+ putIRegE(sz, pfx, modrm, mkexpr(t2));
+ delta++;
+ DIP("xchg%c %s, %s\n",
+ nameISize(sz), nameIRegG(sz, pfx, modrm),
+ nameIRegE(sz, pfx, modrm));
+ } else {
+ *expect_CAS = True;
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ assign( t1, loadLE(ty, mkexpr(addr)) );
+ assign( t2, getIRegG(sz, pfx, modrm) );
+ casLE( mkexpr(addr),
+ mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
+ putIRegG( sz, pfx, modrm, mkexpr(t1) );
+ delta += alen;
+ DIP("xchg%c %s, %s\n", nameISize(sz),
+ nameIRegG(sz, pfx, modrm), dis_buf);
+ }
+ break;
+
+ case 0x90: /* XCHG eAX,eAX */
+ /* detect and handle F3 90 (rep nop) specially */
+ if (!have66(pfx) && !haveF2(pfx) && haveF3(pfx)) {
+ DIP("rep nop (P4 pause)\n");
+ /* "observe" the hint. The Vex client needs to be careful not
+ to cause very long delays as a result, though. */
+ jmp_lit(Ijk_Yield, guest_RIP_bbstart+delta);
+ dres.whatNext = Dis_StopHere;
+ break;
+ }
+ /* detect and handle NOPs specially */
+ if (/* F2/F3 probably change meaning completely */
+ !haveF2orF3(pfx)
+ /* If REX.B is 1, we're not exchanging rAX with itself */
+ && getRexB(pfx)==0 ) {
+ DIP("nop\n");
+ break;
+ }
+ /* else fall through to normal case. */
+ case 0x91: /* XCHG rAX,rCX */
+ case 0x92: /* XCHG rAX,rDX */
+ case 0x93: /* XCHG rAX,rBX */
+ case 0x94: /* XCHG rAX,rSP */
+ case 0x95: /* XCHG rAX,rBP */
+ case 0x96: /* XCHG rAX,rSI */
+ case 0x97: /* XCHG rAX,rDI */
+
+ /* guard against mutancy */
+ if (haveF2orF3(pfx)) goto decode_failure;
+
+ /* sz == 2 could legitimately happen, but we don't handle it yet */
+ if (sz == 2) goto decode_failure; /* awaiting test case */
+
+ codegen_xchg_rAX_Reg ( pfx, sz, opc - 0x90 );
+ break;
+
+//.. //-- /* ------------------------ XLAT ----------------------- */
+//.. //--
+//.. //-- case 0xD7: /* XLAT */
+//.. //-- t1 = newTemp(cb); t2 = newTemp(cb);
+//.. //-- uInstr2(cb, GET, sz, ArchReg, R_EBX, TempReg, t1); /* get eBX */
+//.. //-- handleAddrOverrides( cb, sorb, t1 ); /* make t1 DS:eBX */
+//.. //-- uInstr2(cb, GET, 1, ArchReg, R_AL, TempReg, t2); /* get AL */
+//.. //-- /* Widen %AL to 32 bits, so it's all defined when we add it. */
+//.. //-- uInstr1(cb, WIDEN, 4, TempReg, t2);
+//.. //-- uWiden(cb, 1, False);
+//.. //-- uInstr2(cb, ADD, sz, TempReg, t2, TempReg, t1); /* add AL to eBX */
+//.. //-- uInstr2(cb, LOAD, 1, TempReg, t1, TempReg, t2); /* get byte at t1 into t2 */
+//.. //-- uInstr2(cb, PUT, 1, TempReg, t2, ArchReg, R_AL); /* put byte into AL */
+//.. //--
+//.. //-- DIP("xlat%c [ebx]\n", nameISize(sz));
+//.. //-- break;
+
+ /* ------------------------ IN / OUT ----------------------- */
+
+ case 0xE4: /* IN imm8, AL */
+ sz = 1;
+ t1 = newTemp(Ity_I64);
+ abyte = getUChar(delta); delta++;
+ assign(t1, mkU64( abyte & 0xFF ));
+ DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
+ goto do_IN;
+ case 0xE5: /* IN imm8, eAX */
+ if (!(sz == 2 || sz == 4)) goto decode_failure;
+ t1 = newTemp(Ity_I64);
+ abyte = getUChar(delta); delta++;
+ assign(t1, mkU64( abyte & 0xFF ));
+ DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
+ goto do_IN;
+ case 0xEC: /* IN %DX, AL */
+ sz = 1;
+ t1 = newTemp(Ity_I64);
+ assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
+ DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
+ nameIRegRAX(sz));
+ goto do_IN;
+ case 0xED: /* IN %DX, eAX */
+ if (!(sz == 2 || sz == 4)) goto decode_failure;
+ t1 = newTemp(Ity_I64);
+ assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
+ DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
+ nameIRegRAX(sz));
+ goto do_IN;
+ do_IN: {
+ /* At this point, sz indicates the width, and t1 is a 64-bit
+ value giving port number. */
+ IRDirty* d;
+ if (haveF2orF3(pfx)) goto decode_failure;
+ vassert(sz == 1 || sz == 2 || sz == 4);
+ ty = szToITy(sz);
+ t2 = newTemp(Ity_I64);
+ d = unsafeIRDirty_1_N(
+ t2,
+ 0/*regparms*/,
+ "amd64g_dirtyhelper_IN",
+ &amd64g_dirtyhelper_IN,
+ mkIRExprVec_2( mkexpr(t1), mkU64(sz) )
+ );
+ /* do the call, dumping the result in t2. */
+ stmt( IRStmt_Dirty(d) );
+ putIRegRAX(sz, narrowTo( ty, mkexpr(t2) ) );
+ break;
+ }
+
+ case 0xE6: /* OUT AL, imm8 */
+ sz = 1;
+ t1 = newTemp(Ity_I64);
+ abyte = getUChar(delta); delta++;
+ assign( t1, mkU64( abyte & 0xFF ) );
+ DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
+ goto do_OUT;
+ case 0xE7: /* OUT eAX, imm8 */
+ if (!(sz == 2 || sz == 4)) goto decode_failure;
+ t1 = newTemp(Ity_I64);
+ abyte = getUChar(delta); delta++;
+ assign( t1, mkU64( abyte & 0xFF ) );
+ DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
+ goto do_OUT;
+ case 0xEE: /* OUT AL, %DX */
+ sz = 1;
+ t1 = newTemp(Ity_I64);
+ assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
+ DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
+ nameIRegRDX(2));
+ goto do_OUT;
+ case 0xEF: /* OUT eAX, %DX */
+ if (!(sz == 2 || sz == 4)) goto decode_failure;
+ t1 = newTemp(Ity_I64);
+ assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
+ DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
+ nameIRegRDX(2));
+ goto do_OUT;
+ do_OUT: {
+ /* At this point, sz indicates the width, and t1 is a 64-bit
+ value giving port number. */
+ IRDirty* d;
+ if (haveF2orF3(pfx)) goto decode_failure;
+ vassert(sz == 1 || sz == 2 || sz == 4);
+ ty = szToITy(sz);
+ d = unsafeIRDirty_0_N(
+ 0/*regparms*/,
+ "amd64g_dirtyhelper_OUT",
+ &amd64g_dirtyhelper_OUT,
+ mkIRExprVec_3( mkexpr(t1),
+ widenUto64( getIRegRAX(sz) ),
+ mkU64(sz) )
+ );
+ stmt( IRStmt_Dirty(d) );
+ break;
+ }
+
+ /* ------------------------ (Grp1 extensions) ---------- */
+
+ case 0x80: /* Grp1 Ib,Eb */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ modrm = getUChar(delta);
+ am_sz = lengthAMode(pfx,delta);
+ sz = 1;
+ d_sz = 1;
+ d64 = getSDisp8(delta + am_sz);
+ delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
+ break;
+
+ case 0x81: /* Grp1 Iv,Ev */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ modrm = getUChar(delta);
+ am_sz = lengthAMode(pfx,delta);
+ d_sz = imin(sz,4);
+ d64 = getSDisp(d_sz, delta + am_sz);
+ delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
+ break;
+
+ case 0x83: /* Grp1 Ib,Ev */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ modrm = getUChar(delta);
+ am_sz = lengthAMode(pfx,delta);
+ d_sz = 1;
+ d64 = getSDisp8(delta + am_sz);
+ delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
+ break;
+
+ /* ------------------------ (Grp2 extensions) ---------- */
+
+ case 0xC0: { /* Grp2 Ib,Eb */
+ Bool decode_OK = True;
+ if (haveF2orF3(pfx)) goto decode_failure;
+ modrm = getUChar(delta);
+ am_sz = lengthAMode(pfx,delta);
+ d_sz = 1;
+ d64 = getUChar(delta + am_sz);
+ sz = 1;
+ delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
+ mkU8(d64 & 0xFF), NULL, &decode_OK );
+ if (!decode_OK) goto decode_failure;
+ break;
+ }
+ case 0xC1: { /* Grp2 Ib,Ev */
+ Bool decode_OK = True;
+ if (haveF2orF3(pfx)) goto decode_failure;
+ modrm = getUChar(delta);
+ am_sz = lengthAMode(pfx,delta);
+ d_sz = 1;
+ d64 = getUChar(delta + am_sz);
+ delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
+ mkU8(d64 & 0xFF), NULL, &decode_OK );
+ if (!decode_OK) goto decode_failure;
+ break;
+ }
+ case 0xD0: { /* Grp2 1,Eb */
+ Bool decode_OK = True;
+ if (haveF2orF3(pfx)) goto decode_failure;
+ modrm = getUChar(delta);
+ am_sz = lengthAMode(pfx,delta);
+ d_sz = 0;
+ d64 = 1;
+ sz = 1;
+ delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
+ mkU8(d64), NULL, &decode_OK );
+ if (!decode_OK) goto decode_failure;
+ break;
+ }
+ case 0xD1: { /* Grp2 1,Ev */
+ Bool decode_OK = True;
+ if (haveF2orF3(pfx)) goto decode_failure;
+ modrm = getUChar(delta);
+ am_sz = lengthAMode(pfx,delta);
+ d_sz = 0;
+ d64 = 1;
+ delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
+ mkU8(d64), NULL, &decode_OK );
+ if (!decode_OK) goto decode_failure;
+ break;
+ }
+ case 0xD2: { /* Grp2 CL,Eb */
+ Bool decode_OK = True;
+ if (haveF2orF3(pfx)) goto decode_failure;
+ modrm = getUChar(delta);
+ am_sz = lengthAMode(pfx,delta);
+ d_sz = 0;
+ sz = 1;
+ delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
+ getIRegCL(), "%cl", &decode_OK );
+ if (!decode_OK) goto decode_failure;
+ break;
+ }
+ case 0xD3: { /* Grp2 CL,Ev */
+ Bool decode_OK = True;
+ if (haveF2orF3(pfx)) goto decode_failure;
+ modrm = getUChar(delta);
+ am_sz = lengthAMode(pfx,delta);
+ d_sz = 0;
+ delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
+ getIRegCL(), "%cl", &decode_OK );
+ if (!decode_OK) goto decode_failure;
+ break;
+ }
+
+ /* ------------------------ (Grp3 extensions) ---------- */
+
+ case 0xF6: { /* Grp3 Eb */
+ Bool decode_OK = True;
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_Grp3 ( vbi, pfx, 1, delta, &decode_OK );
+ if (!decode_OK) goto decode_failure;
+ break;
+ }
+ case 0xF7: { /* Grp3 Ev */
+ Bool decode_OK = True;
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_Grp3 ( vbi, pfx, sz, delta, &decode_OK );
+ if (!decode_OK) goto decode_failure;
+ break;
+ }
+
+ /* ------------------------ (Grp4 extensions) ---------- */
+
+ case 0xFE: { /* Grp4 Eb */
+ Bool decode_OK = True;
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_Grp4 ( vbi, pfx, delta, &decode_OK );
+ if (!decode_OK) goto decode_failure;
+ break;
+ }
+
+ /* ------------------------ (Grp5 extensions) ---------- */
+
+ case 0xFF: { /* Grp5 Ev */
+ Bool decode_OK = True;
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_Grp5 ( vbi, pfx, sz, delta, &dres, &decode_OK );
+ if (!decode_OK) goto decode_failure;
+ break;
+ }
+
+ /* ------------------------ Escapes to 2-byte opcodes -- */
+
+ case 0x0F: {
+ opc = getUChar(delta); delta++;
+ switch (opc) {
+
+ /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */
+
+ case 0xBA: { /* Grp8 Ib,Ev */
+ Bool decode_OK = False;
+ if (haveF2orF3(pfx)) goto decode_failure;
+ modrm = getUChar(delta);
+ am_sz = lengthAMode(pfx,delta);
+ d64 = getSDisp8(delta + am_sz);
+ delta = dis_Grp8_Imm ( vbi, pfx, delta, modrm, am_sz, sz, d64,
+ &decode_OK );
+ if (!decode_OK)
+ goto decode_failure;
+ break;
+ }
+
+ /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */
+
+ case 0xBC: /* BSF Gv,Ev */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_bs_E_G ( vbi, pfx, sz, delta, True );
+ break;
+ case 0xBD: /* BSR Gv,Ev */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_bs_E_G ( vbi, pfx, sz, delta, False );
+ break;
+
+ /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */
+
+ case 0xC8: /* BSWAP %eax */
+ case 0xC9:
+ case 0xCA:
+ case 0xCB:
+ case 0xCC:
+ case 0xCD:
+ case 0xCE:
+ case 0xCF: /* BSWAP %edi */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ /* According to the AMD64 docs, this insn can have size 4 or
+ 8. */
+ if (sz == 4) {
+ t1 = newTemp(Ity_I32);
+ t2 = newTemp(Ity_I32);
+ assign( t1, getIRegRexB(4, pfx, opc-0xC8) );
+ assign( t2,
+ binop(Iop_Or32,
+ binop(Iop_Shl32, mkexpr(t1), mkU8(24)),
+ binop(Iop_Or32,
+ binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)),
+ mkU32(0x00FF0000)),
+ binop(Iop_Or32,
+ binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)),
+ mkU32(0x0000FF00)),
+ binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)),
+ mkU32(0x000000FF) )
+ )))
+ );
+ putIRegRexB(4, pfx, opc-0xC8, mkexpr(t2));
+ DIP("bswapl %s\n", nameIRegRexB(4, pfx, opc-0xC8));
+ break;
+ }
+ else if (sz == 8) {
+ IRTemp m8 = newTemp(Ity_I64);
+ IRTemp s8 = newTemp(Ity_I64);
+ IRTemp m16 = newTemp(Ity_I64);
+ IRTemp s16 = newTemp(Ity_I64);
+ IRTemp m32 = newTemp(Ity_I64);
+ t1 = newTemp(Ity_I64);
+ t2 = newTemp(Ity_I64);
+ assign( t1, getIRegRexB(8, pfx, opc-0xC8) );
+
+ assign( m8, mkU64(0xFF00FF00FF00FF00ULL) );
+ assign( s8,
+ binop(Iop_Or64,
+ binop(Iop_Shr64,
+ binop(Iop_And64,mkexpr(t1),mkexpr(m8)),
+ mkU8(8)),
+ binop(Iop_And64,
+ binop(Iop_Shl64,mkexpr(t1),mkU8(8)),
+ mkexpr(m8))
+ )
+ );
+
+ assign( m16, mkU64(0xFFFF0000FFFF0000ULL) );
+ assign( s16,
+ binop(Iop_Or64,
+ binop(Iop_Shr64,
+ binop(Iop_And64,mkexpr(s8),mkexpr(m16)),
+ mkU8(16)),
+ binop(Iop_And64,
+ binop(Iop_Shl64,mkexpr(s8),mkU8(16)),
+ mkexpr(m16))
+ )
+ );
+
+ assign( m32, mkU64(0xFFFFFFFF00000000ULL) );
+ assign( t2,
+ binop(Iop_Or64,
+ binop(Iop_Shr64,
+ binop(Iop_And64,mkexpr(s16),mkexpr(m32)),
+ mkU8(32)),
+ binop(Iop_And64,
+ binop(Iop_Shl64,mkexpr(s16),mkU8(32)),
+ mkexpr(m32))
+ )
+ );
+
+ putIRegRexB(8, pfx, opc-0xC8, mkexpr(t2));
+ DIP("bswapq %s\n", nameIRegRexB(8, pfx, opc-0xC8));
+ break;
+ } else {
+ goto decode_failure;
+ }
+
+ /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */
+
+ /* All of these are possible at sizes 2, 4 and 8, but until a
+ size 2 test case shows up, only handle sizes 4 and 8. */
+
+ case 0xA3: /* BT Gv,Ev */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
+ delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpNone );
+ break;
+ case 0xB3: /* BTR Gv,Ev */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
+ delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpReset );
+ break;
+ case 0xAB: /* BTS Gv,Ev */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
+ delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpSet );
+ break;
+ case 0xBB: /* BTC Gv,Ev */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
+ delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpComp );
+ break;
+
+ /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */
+
+ case 0x40:
+ case 0x41:
+ case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
+ case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
+ case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
+ case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
+ case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
+ case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
+ case 0x48: /* CMOVSb (cmov negative) */
+ case 0x49: /* CMOVSb (cmov not negative) */
+ case 0x4A: /* CMOVP (cmov parity even) */
+ case 0x4B: /* CMOVNP (cmov parity odd) */
+ case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
+ case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
+ case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
+ case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_cmov_E_G(vbi, pfx, sz, (AMD64Condcode)(opc - 0x40), delta);
+ break;
+
+ /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
+
+ case 0xB0: { /* CMPXCHG Gb,Eb */
+ Bool ok = True;
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, 1, delta );
+ if (!ok) goto decode_failure;
+ break;
+ }
+ case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */
+ Bool ok = True;
+ if (haveF2orF3(pfx)) goto decode_failure;
+ if (sz != 2 && sz != 4 && sz != 8) goto decode_failure;
+ delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, sz, delta );
+ if (!ok) goto decode_failure;
+ break;
+ }
+
+ case 0xC7: { /* CMPXCHG8B Ev, CMPXCHG16B Ev */
+ IRType elemTy = sz==4 ? Ity_I32 : Ity_I64;
+ IRTemp expdHi = newTemp(elemTy);
+ IRTemp expdLo = newTemp(elemTy);
+ IRTemp dataHi = newTemp(elemTy);
+ IRTemp dataLo = newTemp(elemTy);
+ IRTemp oldHi = newTemp(elemTy);
+ IRTemp oldLo = newTemp(elemTy);
+ IRTemp flags_old = newTemp(Ity_I64);
+ IRTemp flags_new = newTemp(Ity_I64);
+ IRTemp success = newTemp(Ity_I1);
+ IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64;
+ IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64;
+ IROp opCasCmpEQ = sz==4 ? Iop_CasCmpEQ32 : Iop_CasCmpEQ64;
+ IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0);
+ IRTemp expdHi64 = newTemp(Ity_I64);
+ IRTemp expdLo64 = newTemp(Ity_I64);
+
+ /* Translate this using a DCAS, even if there is no LOCK
+ prefix. Life is too short to bother with generating two
+ different translations for the with/without-LOCK-prefix
+ cases. */
+ *expect_CAS = True;
+
+ /* Decode, and generate address. */
+ if (have66orF2orF3(pfx)) goto decode_failure;
+ if (sz != 4 && sz != 8) goto decode_failure;
+ if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16))
+ goto decode_failure;
+ modrm = getUChar(delta);
+ if (epartIsReg(modrm)) goto decode_failure;
+ if (gregLO3ofRM(modrm) != 1) goto decode_failure;
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ delta += alen;
+
+ /* cmpxchg16b requires an alignment check. */
+ if (sz == 8)
+ gen_SEGV_if_not_16_aligned( addr );
+
+ /* Get the expected and new values. */
+ assign( expdHi64, getIReg64(R_RDX) );
+ assign( expdLo64, getIReg64(R_RAX) );
+
+ /* These are the correctly-sized expected and new values.
+ However, we also get expdHi64/expdLo64 above as 64-bits
+ regardless, because we will need them later in the 32-bit
+ case (paradoxically). */
+ assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64))
+ : mkexpr(expdHi64) );
+ assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64))
+ : mkexpr(expdLo64) );
+ assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) );
+ assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) );
+
+ /* Do the DCAS */
+ stmt( IRStmt_CAS(
+ mkIRCAS( oldHi, oldLo,
+ Iend_LE, mkexpr(addr),
+ mkexpr(expdHi), mkexpr(expdLo),
+ mkexpr(dataHi), mkexpr(dataLo)
+ )));
+
+ /* success when oldHi:oldLo == expdHi:expdLo */
+ assign( success,
+ binop(opCasCmpEQ,
+ binop(opOR,
+ binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)),
+ binop(opXOR, mkexpr(oldLo), mkexpr(expdLo))
+ ),
+ zero
+ ));
+
+ /* If the DCAS is successful, that is to say oldHi:oldLo ==
+ expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX,
+ which is where they came from originally. Both the actual
+ contents of these two regs, and any shadow values, are
+ unchanged. If the DCAS fails then we're putting into
+ RDX:RAX the value seen in memory. */
+ /* Now of course there's a complication in the 32-bit case
+ (bah!): if the DCAS succeeds, we need to leave RDX:RAX
+ unchanged; but if we use the same scheme as in the 64-bit
+ case, we get hit by the standard rule that a write to the
+ bottom 32 bits of an integer register zeros the upper 32
+ bits. And so the upper halves of RDX and RAX mysteriously
+ become zero. So we have to stuff back in the original
+ 64-bit values which we previously stashed in
+ expdHi64:expdLo64, even if we're doing a cmpxchg8b. */
+ /* It's just _so_ much fun ... */
+ putIRegRDX( 8,
+ IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
+ sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi))
+ : mkexpr(oldHi),
+ mkexpr(expdHi64)
+ ));
+ putIRegRAX( 8,
+ IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
+ sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo))
+ : mkexpr(oldLo),
+ mkexpr(expdLo64)
+ ));
+
+ /* Copy the success bit into the Z flag and leave the others
+ unchanged */
+ assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all()));
+ assign(
+ flags_new,
+ binop(Iop_Or64,
+ binop(Iop_And64, mkexpr(flags_old),
+ mkU64(~AMD64G_CC_MASK_Z)),
+ binop(Iop_Shl64,
+ binop(Iop_And64,
+ unop(Iop_1Uto64, mkexpr(success)), mkU64(1)),
+ mkU8(AMD64G_CC_SHIFT_Z)) ));
+
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ /* Set NDEP even though it isn't used. This makes
+ redundant-PUT elimination of previous stores to this field
+ work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
+
+ /* Sheesh. Aren't you glad it was me and not you that had to
+ write and validate all this grunge? */
+
+ DIP("cmpxchg8b %s\n", dis_buf);
+ break;
+
+ }
+
+ /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */
+
+ case 0xA2: { /* CPUID */
+ /* Uses dirty helper:
+ void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* )
+ declared to mod rax, wr rbx, rcx, rdx
+ */
+ IRDirty* d = NULL;
+ HChar* fName = NULL;
+ void* fAddr = NULL;
+ if (haveF2orF3(pfx)) goto decode_failure;
+ if (archinfo->hwcaps == (VEX_HWCAPS_AMD64_SSE3
+ |VEX_HWCAPS_AMD64_CX16)) {
+ //fName = "amd64g_dirtyhelper_CPUID_sse3_and_cx16";
+ //fAddr = &amd64g_dirtyhelper_CPUID_sse3_and_cx16;
+ /* This is a Core-2-like machine */
+ fName = "amd64g_dirtyhelper_CPUID_sse42_and_cx16";
+ fAddr = &amd64g_dirtyhelper_CPUID_sse42_and_cx16;
+ /* This is a Core-i5-like machine */
+ }
+ else {
+ /* Give a CPUID for at least a baseline machine, SSE2
+ only, and no CX16 */
+ fName = "amd64g_dirtyhelper_CPUID_baseline";
+ fAddr = &amd64g_dirtyhelper_CPUID_baseline;
+ }
+
+ vassert(fName); vassert(fAddr);
+ d = unsafeIRDirty_0_N ( 0/*regparms*/,
+ fName, fAddr, mkIRExprVec_0() );
+ /* declare guest state effects */
+ d->needsBBP = True;
+ d->nFxState = 4;
+ d->fxState[0].fx = Ifx_Modify;
+ d->fxState[0].offset = OFFB_RAX;
+ d->fxState[0].size = 8;
+ d->fxState[1].fx = Ifx_Write;
+ d->fxState[1].offset = OFFB_RBX;
+ d->fxState[1].size = 8;
+ d->fxState[2].fx = Ifx_Modify;
+ d->fxState[2].offset = OFFB_RCX;
+ d->fxState[2].size = 8;
+ d->fxState[3].fx = Ifx_Write;
+ d->fxState[3].offset = OFFB_RDX;
+ d->fxState[3].size = 8;
+ /* execute the dirty call, side-effecting guest state */
+ stmt( IRStmt_Dirty(d) );
+ /* CPUID is a serialising insn. So, just in case someone is
+ using it as a memory fence ... */
+ stmt( IRStmt_MBE(Imbe_Fence) );
+ DIP("cpuid\n");
+ break;
+ }
+
+ /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */
+
+ case 0xB6: /* MOVZXb Eb,Gv */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ if (sz != 2 && sz != 4 && sz != 8)
+ goto decode_failure;
+ delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, False );
+ break;
+ case 0xB7: /* MOVZXw Ew,Gv */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ if (sz != 4 && sz != 8)
+ goto decode_failure;
+ delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, False );
+ break;
+
+ case 0xBE: /* MOVSXb Eb,Gv */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ if (sz != 2 && sz != 4 && sz != 8)
+ goto decode_failure;
+ delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, True );
+ break;
+ case 0xBF: /* MOVSXw Ew,Gv */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ if (sz != 4 && sz != 8)
+ goto decode_failure;
+ delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, True );
+ break;
+
+//.. //-- /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */
+//.. //--
+//.. //-- case 0xC3: /* MOVNTI Gv,Ev */
+//.. //-- vg_assert(sz == 4);
+//.. //-- modrm = getUChar(eip);
+//.. //-- vg_assert(!epartIsReg(modrm));
+//.. //-- t1 = newTemp(cb);
+//.. //-- uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1);
+//.. //-- pair = disAMode ( cb, sorb, eip, dis_buf );
+//.. //-- t2 = LOW24(pair);
+//.. //-- eip += HI8(pair);
+//.. //-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
+//.. //-- DIP("movnti %s,%s\n", nameIReg(4,gregOfRM(modrm)), dis_buf);
+//.. //-- break;
+
+ /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */
+
+ case 0xAF: /* IMUL Ev, Gv */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ delta = dis_mul_E_G ( vbi, pfx, sz, delta );
+ break;
+
+ /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */
+
+ case 0x1F:
+ if (haveF2orF3(pfx)) goto decode_failure;
+ modrm = getUChar(delta);
+ if (epartIsReg(modrm)) goto decode_failure;
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ delta += alen;
+ DIP("nop%c %s\n", nameISize(sz), dis_buf);
+ break;
+
+ /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */
+ case 0x80:
+ case 0x81:
+ case 0x82: /* JBb/JNAEb (jump below) */
+ case 0x83: /* JNBb/JAEb (jump not below) */
+ case 0x84: /* JZb/JEb (jump zero) */
+ case 0x85: /* JNZb/JNEb (jump not zero) */
+ case 0x86: /* JBEb/JNAb (jump below or equal) */
+ case 0x87: /* JNBEb/JAb (jump not below or equal) */
+ case 0x88: /* JSb (jump negative) */
+ case 0x89: /* JSb (jump not negative) */
+ case 0x8A: /* JP (jump parity even) */
+ case 0x8B: /* JNP/JPO (jump parity odd) */
+ case 0x8C: /* JLb/JNGEb (jump less) */
+ case 0x8D: /* JGEb/JNLb (jump greater or equal) */
+ case 0x8E: /* JLEb/JNGb (jump less or equal) */
+ case 0x8F: /* JGb/JNLEb (jump greater) */
+ { Long jmpDelta;
+ HChar* comment = "";
+ if (haveF2orF3(pfx)) goto decode_failure;
+ jmpDelta = getSDisp32(delta);
+ d64 = (guest_RIP_bbstart+delta+4) + jmpDelta;
+ delta += 4;
+ if (resteerCisOk
+ && vex_control.guest_chase_cond
+ && (Addr64)d64 != (Addr64)guest_RIP_bbstart
+ && jmpDelta < 0
+ && resteerOkFn( callback_opaque, d64) ) {
+ /* Speculation: assume this backward branch is taken. So
+ we need to emit a side-exit to the insn following this
+ one, on the negation of the condition, and continue at
+ the branch target address (d64). If we wind up back at
+ the first instruction of the trace, just stop; it's
+ better to let the IR loop unroller handle that case. */
+ stmt( IRStmt_Exit(
+ mk_amd64g_calculate_condition(
+ (AMD64Condcode)(1 ^ (opc - 0x80))),
+ Ijk_Boring,
+ IRConst_U64(guest_RIP_bbstart+delta) ) );
+ dres.whatNext = Dis_ResteerC;
+ dres.continueAt = d64;
+ comment = "(assumed taken)";
+ }
+ else
+ if (resteerCisOk
+ && vex_control.guest_chase_cond
+ && (Addr64)d64 != (Addr64)guest_RIP_bbstart
+ && jmpDelta >= 0
+ && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) {
+ /* Speculation: assume this forward branch is not taken.
+ So we need to emit a side-exit to d64 (the dest) and
+ continue disassembling at the insn immediately
+ following this one. */
+ stmt( IRStmt_Exit(
+ mk_amd64g_calculate_condition((AMD64Condcode)
+ (opc - 0x80)),
+ Ijk_Boring,
+ IRConst_U64(d64) ) );
+ dres.whatNext = Dis_ResteerC;
+ dres.continueAt = guest_RIP_bbstart+delta;
+ comment = "(assumed not taken)";
+ }
+ else {
+ /* Conservative default translation - end the block at
+ this point. */
+ jcc_01( (AMD64Condcode)(opc - 0x80),
+ guest_RIP_bbstart+delta,
+ d64 );
+ dres.whatNext = Dis_StopHere;
+ }
+ DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc - 0x80), d64, comment);
+ break;
+ }
+
+ /* =-=-=-=-=-=-=-=-=- PREFETCH =-=-=-=-=-=-=-=-=-= */
+ case 0x0D: /* 0F 0D /0 -- prefetch mem8 */
+ /* 0F 0D /1 -- prefetchw mem8 */
+ if (have66orF2orF3(pfx)) goto decode_failure;
+ modrm = getUChar(delta);
+ if (epartIsReg(modrm)) goto decode_failure;
+ if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1)
+ goto decode_failure;
+
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ delta += alen;
+
+ switch (gregLO3ofRM(modrm)) {
+ case 0: DIP("prefetch %s\n", dis_buf); break;
+ case 1: DIP("prefetchw %s\n", dis_buf); break;
+ default: vassert(0); /*NOTREACHED*/
+ }
+ break;
+
+ /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */
+ case 0x31: { /* RDTSC */
+ IRTemp val = newTemp(Ity_I64);
+ IRExpr** args = mkIRExprVec_0();
+ IRDirty* d = unsafeIRDirty_1_N (
+ val,
+ 0/*regparms*/,
+ "amd64g_dirtyhelper_RDTSC",
+ &amd64g_dirtyhelper_RDTSC,
+ args
+ );
+ if (have66orF2orF3(pfx)) goto decode_failure;
+ /* execute the dirty call, dumping the result in val. */
+ stmt( IRStmt_Dirty(d) );
+ putIRegRDX(4, unop(Iop_64HIto32, mkexpr(val)));
+ putIRegRAX(4, unop(Iop_64to32, mkexpr(val)));
+ DIP("rdtsc\n");
+ break;
+ }
+
+//.. /* =-=-=-=-=-=-=-=-=- PUSH/POP Sreg =-=-=-=-=-=-=-=-=-= */
+//..
+//.. case 0xA1: /* POP %FS */
+//.. dis_pop_segreg( R_FS, sz ); break;
+//.. case 0xA9: /* POP %GS */
+//.. dis_pop_segreg( R_GS, sz ); break;
+//..
+//.. case 0xA0: /* PUSH %FS */
+//.. dis_push_segreg( R_FS, sz ); break;
+//.. case 0xA8: /* PUSH %GS */
+//.. dis_push_segreg( R_GS, sz ); break;
+
+ /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */
+ case 0x90:
+ case 0x91:
+ case 0x92: /* set-Bb/set-NAEb (set if below) */
+ case 0x93: /* set-NBb/set-AEb (set if not below) */
+ case 0x94: /* set-Zb/set-Eb (set if zero) */
+ case 0x95: /* set-NZb/set-NEb (set if not zero) */
+ case 0x96: /* set-BEb/set-NAb (set if below or equal) */
+ case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */
+ case 0x98: /* set-Sb (set if negative) */
+ case 0x99: /* set-Sb (set if not negative) */
+ case 0x9A: /* set-P (set if parity even) */
+ case 0x9B: /* set-NP (set if parity odd) */
+ case 0x9C: /* set-Lb/set-NGEb (set if less) */
+ case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */
+ case 0x9E: /* set-LEb/set-NGb (set if less or equal) */
+ case 0x9F: /* set-Gb/set-NLEb (set if greater) */
+ if (haveF2orF3(pfx)) goto decode_failure;
+ t1 = newTemp(Ity_I8);
+ assign( t1, unop(Iop_1Uto8,mk_amd64g_calculate_condition(opc-0x90)) );
+ modrm = getUChar(delta);
+ if (epartIsReg(modrm)) {
+ delta++;
+ putIRegE(1, pfx, modrm, mkexpr(t1));
+ DIP("set%s %s\n", name_AMD64Condcode(opc-0x90),
+ nameIRegE(1,pfx,modrm));
+ } else {
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ delta += alen;
+ storeLE( mkexpr(addr), mkexpr(t1) );
+ DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), dis_buf);
+ }
+ break;
+
+ /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */
+
+ case 0xA4: /* SHLDv imm8,Gv,Ev */
+ modrm = getUChar(delta);
+ d64 = delta + lengthAMode(pfx, delta);
+ vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
+ delta = dis_SHLRD_Gv_Ev (
+ vbi, pfx, delta, modrm, sz,
+ mkU8(getUChar(d64)), True, /* literal */
+ dis_buf, True /* left */ );
+ break;
+ case 0xA5: /* SHLDv %cl,Gv,Ev */
+ modrm = getUChar(delta);
+ delta = dis_SHLRD_Gv_Ev (
+ vbi, pfx, delta, modrm, sz,
+ getIRegCL(), False, /* not literal */
+ "%cl", True /* left */ );
+ break;
+
+ case 0xAC: /* SHRDv imm8,Gv,Ev */
+ modrm = getUChar(delta);
+ d64 = delta + lengthAMode(pfx, delta);
+ vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
+ delta = dis_SHLRD_Gv_Ev (
+ vbi, pfx, delta, modrm, sz,
+ mkU8(getUChar(d64)), True, /* literal */
+ dis_buf, False /* right */ );
+ break;
+ case 0xAD: /* SHRDv %cl,Gv,Ev */
+ modrm = getUChar(delta);
+ delta = dis_SHLRD_Gv_Ev (
+ vbi, pfx, delta, modrm, sz,
+ getIRegCL(), False, /* not literal */
+ "%cl", False /* right */);
+ break;
+
+ /* =-=-=-=-=-=-=-=-=- SYSCALL -=-=-=-=-=-=-=-=-=-= */
+ case 0x05: /* SYSCALL */
+ guest_RIP_next_mustcheck = True;
+ guest_RIP_next_assumed = guest_RIP_bbstart + delta;
+ putIReg64( R_RCX, mkU64(guest_RIP_next_assumed) );
+ /* It's important that all guest state is up-to-date
+ at this point. So we declare an end-of-block here, which
+ forces any cached guest state to be flushed. */
+ jmp_lit(Ijk_Sys_syscall, guest_RIP_next_assumed);
+ dres.whatNext = Dis_StopHere;
+ DIP("syscall\n");
+ break;
+
+ /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */
+
+ case 0xC0: { /* XADD Gb,Eb */
+ Bool decode_OK = False;
+ delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, 1, delta );
+ if (!decode_OK)
+ goto decode_failure;
+ break;
+ }
+ case 0xC1: { /* XADD Gv,Ev */
+ Bool decode_OK = False;
+ delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, sz, delta );
+ if (!decode_OK)
+ goto decode_failure;
+ break;
+ }
+
+ /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */
+
+ case 0x71:
+ case 0x72:
+ case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
+
+ case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
+ case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
+ case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
+ case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xFC:
+ case 0xFD:
+ case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xEC:
+ case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xDC:
+ case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xF8:
+ case 0xF9:
+ case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xE8:
+ case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xD8:
+ case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0x74:
+ case 0x75:
+ case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0x64:
+ case 0x65:
+ case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0x68:
+ case 0x69:
+ case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0x60:
+ case 0x61:
+ case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xF2:
+ case 0xF3:
+
+ case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xD2:
+ case 0xD3:
+
+ case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xE2:
+ {
+ Long delta0 = delta-1;
+ Bool decode_OK = False;
+
+ /* If sz==2 this is SSE, and we assume sse idec has
+ already spotted those cases by now. */
+ if (sz != 4 && sz != 8)
+ goto decode_failure;
+ if (have66orF2orF3(pfx))
+ goto decode_failure;
+
+ delta = dis_MMX ( &decode_OK, vbi, pfx, sz, delta-1 );
+ if (!decode_OK) {
+ delta = delta0;
+ goto decode_failure;
+ }
+ break;
+ }
+
+ case 0x0E: /* FEMMS */
+ case 0x77: /* EMMS */
+ if (sz != 4)
+ goto decode_failure;
+ do_EMMS_preamble();
+ DIP("{f}emms\n");
+ break;
+
+ /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */
+ case 0x01: /* 0F 01 /0 -- SGDT */
+ /* 0F 01 /1 -- SIDT */
+ {
+ /* This is really revolting, but ... since each processor
+ (core) only has one IDT and one GDT, just let the guest
+ see it (pass-through semantics). I can't see any way to
+ construct a faked-up value, so don't bother to try. */
+ modrm = getUChar(delta);
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ delta += alen;
+ if (epartIsReg(modrm)) goto decode_failure;
+ if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1)
+ goto decode_failure;
+ switch (gregLO3ofRM(modrm)) {
+ case 0: DIP("sgdt %s\n", dis_buf); break;
+ case 1: DIP("sidt %s\n", dis_buf); break;
+ default: vassert(0); /*NOTREACHED*/
+ }
+
+ IRDirty* d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "amd64g_dirtyhelper_SxDT",
+ &amd64g_dirtyhelper_SxDT,
+ mkIRExprVec_2( mkexpr(addr),
+ mkU64(gregLO3ofRM(modrm)) )
+ );
+ /* declare we're writing memory */
+ d->mFx = Ifx_Write;
+ d->mAddr = mkexpr(addr);
+ d->mSize = 6;
+ stmt( IRStmt_Dirty(d) );
+ break;
+ }
+
+ /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */
+
+ default:
+ goto decode_failure;
+ } /* switch (opc) for the 2-byte opcodes */
+ goto decode_success;
+ } /* case 0x0F: of primary opcode */
+
+ /* ------------------------ ??? ------------------------ */
+
+ default:
+ decode_failure:
+ /* All decode failures end up here. */
+ vex_printf("vex amd64->IR: unhandled instruction bytes: "
+ "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
+ (Int)getUChar(delta_start+0),
+ (Int)getUChar(delta_start+1),
+ (Int)getUChar(delta_start+2),
+ (Int)getUChar(delta_start+3),
+ (Int)getUChar(delta_start+4),
+ (Int)getUChar(delta_start+5) );
+
+ /* Tell the dispatcher that this insn cannot be decoded, and so has
+ not been executed, and (is currently) the next to be executed.
+ RIP should be up-to-date since it made so at the start of each
+ insn, but nevertheless be paranoid and update it again right
+ now. */
+ stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
+ jmp_lit(Ijk_NoDecode, guest_RIP_curr_instr);
+ dres.whatNext = Dis_StopHere;
+ dres.len = 0;
+ /* We also need to say that a CAS is not expected now, regardless
+ of what it might have been set to at the start of the function,
+ since the IR that we've emitted just above (to synthesis a
+ SIGILL) does not involve any CAS, and presumably no other IR has
+ been emitted for this (non-decoded) insn. */
+ *expect_CAS = False;
+ return dres;
+
+ } /* switch (opc) for the main (primary) opcode switch. */
+
+ decode_success:
+ /* All decode successes end up here. */
+ DIP("\n");
+ dres.len = (Int)toUInt(delta - delta_start);
+ return dres;
+}
+
+#undef DIP
+#undef DIS
+
+
+/*------------------------------------------------------------*/
+/*--- Top-level fn ---*/
+/*------------------------------------------------------------*/
+
+/* Disassemble a single instruction into IR. The instruction
+ is located in host memory at &guest_code[delta]. */
+
+DisResult disInstr_AMD64 ( IRSB* irsb_IN,
+ Bool put_IP,
+ Bool (*resteerOkFn) ( void*, Addr64 ),
+ Bool resteerCisOk,
+ void* callback_opaque,
+ UChar* guest_code_IN,
+ Long delta,
+ Addr64 guest_IP,
+ VexArch guest_arch,
+ VexArchInfo* archinfo,
+ VexAbiInfo* abiinfo,
+ Bool host_bigendian_IN )
+{
+ Int i, x1, x2;
+ Bool expect_CAS, has_CAS;
+ DisResult dres;
+
+ /* Set globals (see top of this file) */
+ vassert(guest_arch == VexArchAMD64);
+ guest_code = guest_code_IN;
+ irsb = irsb_IN;
+ host_is_bigendian = host_bigendian_IN;
+ guest_RIP_curr_instr = guest_IP;
+ guest_RIP_bbstart = guest_IP - delta;
+
+ /* We'll consult these after doing disInstr_AMD64_WRK. */
+ guest_RIP_next_assumed = 0;
+ guest_RIP_next_mustcheck = False;
+
+ x1 = irsb_IN->stmts_used;
+ expect_CAS = False;
+ dres = disInstr_AMD64_WRK ( &expect_CAS, put_IP, resteerOkFn,
+ resteerCisOk,
+ callback_opaque,
+ delta, archinfo, abiinfo );
+ x2 = irsb_IN->stmts_used;
+ vassert(x2 >= x1);
+
+ /* If disInstr_AMD64_WRK tried to figure out the next rip, check it
+ got it right. Failure of this assertion is serious and denotes
+ a bug in disInstr. */
+ if (guest_RIP_next_mustcheck
+ && guest_RIP_next_assumed != guest_RIP_curr_instr + dres.len) {
+ vex_printf("\n");
+ vex_printf("assumed next %%rip = 0x%llx\n",
+ guest_RIP_next_assumed );
+ vex_printf(" actual next %%rip = 0x%llx\n",
+ guest_RIP_curr_instr + dres.len );
+ vpanic("disInstr_AMD64: disInstr miscalculated next %rip");
+ }
+
+ /* See comment at the top of disInstr_AMD64_WRK for meaning of
+ expect_CAS. Here, we (sanity-)check for the presence/absence of
+ IRCAS as directed by the returned expect_CAS value. */
+ has_CAS = False;
+ for (i = x1; i < x2; i++) {
+ if (irsb_IN->stmts[i]->tag == Ist_CAS)
+ has_CAS = True;
+ }
+
+ if (expect_CAS != has_CAS) {
+ /* inconsistency detected. re-disassemble the instruction so as
+ to generate a useful error message; then assert. */
+ vex_traceflags |= VEX_TRACE_FE;
+ dres = disInstr_AMD64_WRK ( &expect_CAS, put_IP, resteerOkFn,
+ resteerCisOk,
+ callback_opaque,
+ delta, archinfo, abiinfo );
+ for (i = x1; i < x2; i++) {
+ vex_printf("\t\t");
+ ppIRStmt(irsb_IN->stmts[i]);
+ vex_printf("\n");
+ }
+ /* Failure of this assertion is serious and denotes a bug in
+ disInstr. */
+ vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling");
+ }
+
+ return dres;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Unused stuff ---*/
+/*------------------------------------------------------------*/
+
+// A potentially more Memcheck-friendly version of gen_LZCNT, if
+// this should ever be needed.
+//
+//static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
+//{
+// /* Scheme is simple: propagate the most significant 1-bit into all
+// lower positions in the word. This gives a word of the form
+// 0---01---1. Now invert it, giving a word of the form
+// 1---10---0, then do a population-count idiom (to count the 1s,
+// which is the number of leading zeroes, or the word size if the
+// original word was 0.
+// */
+// Int i;
+// IRTemp t[7];
+// for (i = 0; i < 7; i++) {
+// t[i] = newTemp(ty);
+// }
+// if (ty == Ity_I64) {
+// assign(t[0], binop(Iop_Or64, mkexpr(src),
+// binop(Iop_Shr64, mkexpr(src), mkU8(1))));
+// assign(t[1], binop(Iop_Or64, mkexpr(t[0]),
+// binop(Iop_Shr64, mkexpr(t[0]), mkU8(2))));
+// assign(t[2], binop(Iop_Or64, mkexpr(t[1]),
+// binop(Iop_Shr64, mkexpr(t[1]), mkU8(4))));
+// assign(t[3], binop(Iop_Or64, mkexpr(t[2]),
+// binop(Iop_Shr64, mkexpr(t[2]), mkU8(8))));
+// assign(t[4], binop(Iop_Or64, mkexpr(t[3]),
+// binop(Iop_Shr64, mkexpr(t[3]), mkU8(16))));
+// assign(t[5], binop(Iop_Or64, mkexpr(t[4]),
+// binop(Iop_Shr64, mkexpr(t[4]), mkU8(32))));
+// assign(t[6], unop(Iop_Not64, mkexpr(t[5])));
+// return gen_POPCOUNT(ty, t[6]);
+// }
+// if (ty == Ity_I32) {
+// assign(t[0], binop(Iop_Or32, mkexpr(src),
+// binop(Iop_Shr32, mkexpr(src), mkU8(1))));
+// assign(t[1], binop(Iop_Or32, mkexpr(t[0]),
+// binop(Iop_Shr32, mkexpr(t[0]), mkU8(2))));
+// assign(t[2], binop(Iop_Or32, mkexpr(t[1]),
+// binop(Iop_Shr32, mkexpr(t[1]), mkU8(4))));
+// assign(t[3], binop(Iop_Or32, mkexpr(t[2]),
+// binop(Iop_Shr32, mkexpr(t[2]), mkU8(8))));
+// assign(t[4], binop(Iop_Or32, mkexpr(t[3]),
+// binop(Iop_Shr32, mkexpr(t[3]), mkU8(16))));
+// assign(t[5], unop(Iop_Not32, mkexpr(t[4])));
+// return gen_POPCOUNT(ty, t[5]);
+// }
+// if (ty == Ity_I16) {
+// assign(t[0], binop(Iop_Or16, mkexpr(src),
+// binop(Iop_Shr16, mkexpr(src), mkU8(1))));
+// assign(t[1], binop(Iop_Or16, mkexpr(t[0]),
+// binop(Iop_Shr16, mkexpr(t[0]), mkU8(2))));
+// assign(t[2], binop(Iop_Or16, mkexpr(t[1]),
+// binop(Iop_Shr16, mkexpr(t[1]), mkU8(4))));
+// assign(t[3], binop(Iop_Or16, mkexpr(t[2]),
+// binop(Iop_Shr16, mkexpr(t[2]), mkU8(8))));
+// assign(t[4], unop(Iop_Not16, mkexpr(t[3])));
+// return gen_POPCOUNT(ty, t[4]);
+// }
+// vassert(0);
+//}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end guest_amd64_toIR.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/VEX/priv/guest_arm_defs.h b/VEX/priv/guest_arm_defs.h
new file mode 100644
index 0000000..02078c4
--- /dev/null
+++ b/VEX/priv/guest_arm_defs.h
@@ -0,0 +1,238 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin guest_arm_defs.h ---*/
+/*---------------------------------------------------------------*/
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Only to be used within the guest-arm directory. */
+
+#ifndef __VEX_GUEST_ARM_DEFS_H
+#define __VEX_GUEST_ARM_DEFS_H
+
+
+/*---------------------------------------------------------*/
+/*--- arm to IR conversion ---*/
+/*---------------------------------------------------------*/
+
+/* Convert one ARM insn to IR. See the type DisOneInstrFn in
+ bb_to_IR.h. */
+extern
+DisResult disInstr_ARM ( IRSB* irbb,
+ Bool put_IP,
+ Bool (*resteerOkFn) ( void*, Addr64 ),
+ Bool resteerCisOk,
+ void* callback_opaque,
+ UChar* guest_code,
+ Long delta,
+ Addr64 guest_IP,
+ VexArch guest_arch,
+ VexArchInfo* archinfo,
+ VexAbiInfo* abiinfo,
+ Bool host_bigendian );
+
+/* Used by the optimiser to specialise calls to helpers. */
+extern
+IRExpr* guest_arm_spechelper ( HChar* function_name,
+ IRExpr** args,
+ IRStmt** precedingStmts,
+ Int n_precedingStmts );
+
+/* Describes to the optimser which part of the guest state require
+ precise memory exceptions. This is logically part of the guest
+ state description. */
+extern
+Bool guest_arm_state_requires_precise_mem_exns ( Int, Int );
+
+extern
+VexGuestLayout armGuest_layout;
+
+
+/*---------------------------------------------------------*/
+/*--- arm guest helpers ---*/
+/*---------------------------------------------------------*/
+
+/* --- CLEAN HELPERS --- */
+
+/* Calculate NZCV from the supplied thunk components, in the positions
+ they appear in the CPSR, viz bits 31:28 for N Z V C respectively.
+ Returned bits 27:0 are zero. */
+extern
+UInt armg_calculate_flags_nzcv ( UInt cc_op, UInt cc_dep1,
+ UInt cc_dep2, UInt cc_dep3 );
+
+/* Calculate the C flag from the thunk components, in the lowest bit
+ of the word (bit 0). */
+extern
+UInt armg_calculate_flag_c ( UInt cc_op, UInt cc_dep1,
+ UInt cc_dep2, UInt cc_dep3 );
+
+/* Calculate the V flag from the thunk components, in the lowest bit
+ of the word (bit 0). */
+extern
+UInt armg_calculate_flag_v ( UInt cc_op, UInt cc_dep1,
+ UInt cc_dep2, UInt cc_dep3 );
+
+/* Calculate the specified condition from the thunk components, in the
+ lowest bit of the word (bit 0). */
+extern
+UInt armg_calculate_condition ( UInt cond_n_op /* ARMCondcode << 4 | cc_op */,
+ UInt cc_dep1,
+ UInt cc_dep2, UInt cc_dep3 );
+
+/* Calculate the QC flag from the thunk components, in the lowest bit
+ of the word (bit 0). */
+extern
+UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2,
+ UInt resR1, UInt resR2 );
+
+
+/*---------------------------------------------------------*/
+/*--- Condition code stuff ---*/
+/*---------------------------------------------------------*/
+
+/* Flags masks. Defines positions of flags bits in the CPSR. */
+#define ARMG_CC_SHIFT_N 31
+#define ARMG_CC_SHIFT_Z 30
+#define ARMG_CC_SHIFT_C 29
+#define ARMG_CC_SHIFT_V 28
+#define ARMG_CC_SHIFT_Q 27
+
+#define ARMG_CC_MASK_N (1 << ARMG_CC_SHIFT_N)
+#define ARMG_CC_MASK_Z (1 << ARMG_CC_SHIFT_Z)
+#define ARMG_CC_MASK_C (1 << ARMG_CC_SHIFT_C)
+#define ARMG_CC_MASK_V (1 << ARMG_CC_SHIFT_V)
+#define ARMG_CC_MASK_Q (1 << ARMG_CC_SHIFT_Q)
+
+/* Flag thunk descriptors. A four-word thunk is used to record
+ details of the most recent flag-setting operation, so NZCV can
+ be computed later if needed.
+
+ The four words are:
+
+ CC_OP, which describes the operation.
+
+ CC_DEP1, CC_DEP2, CC_DEP3. These are arguments to the
+ operation. We want set up the mcx_masks in flag helper calls
+ involving these fields so that Memcheck "believes" that the
+ resulting flags are data-dependent on both CC_DEP1 and
+ CC_DEP2. Hence the name DEP.
+
+ When building the thunk, it is always necessary to write words into
+ CC_DEP1/2/3, even if those args are not used given the
+ CC_OP field. This is important because otherwise Memcheck could
+ give false positives as it does not understand the relationship
+ between the CC_OP field and CC_DEP1/2/3, and so believes
+ that the definedness of the stored flags always depends on
+ all 3 DEP values.
+
+ A summary of the field usages is:
+
+ OP DEP1 DEP2 DEP3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ OP_COPY current NZCV unused unused
+ OP_ADD argL argR unused
+ OP_SUB argL argR unused
+ OP_ADC argL argR old_C
+ OP_SBB argL argR old_C
+ OP_LOGIC result shifter_co old_V
+ OP_MUL result unused old_C:old_V
+ OP_MULL resLO32 resHI32 old_C:old_V
+*/
+
+enum {
+ ARMG_CC_OP_COPY=0, /* DEP1 = NZCV in 31:28, DEP2 = 0, DEP3 = 0
+ just copy DEP1 to output */
+
+ ARMG_CC_OP_ADD, /* DEP1 = argL (Rn), DEP2 = argR (shifter_op),
+ DEP3 = 0 */
+
+ ARMG_CC_OP_SUB, /* DEP1 = argL (Rn), DEP2 = argR (shifter_op),
+ DEP3 = 0 */
+
+ ARMG_CC_OP_ADC, /* DEP1 = argL (Rn), DEP2 = arg2 (shifter_op),
+ DEP3 = oldC (in LSB) */
+
+ ARMG_CC_OP_SBB, /* DEP1 = argL (Rn), DEP2 = arg2 (shifter_op),
+ DEP3 = oldC (in LSB) */
+
+ ARMG_CC_OP_LOGIC, /* DEP1 = result, DEP2 = shifter_carry_out (in LSB),
+ DEP3 = old V flag (in LSB) */
+
+ ARMG_CC_OP_MUL, /* DEP1 = result, DEP2 = 0, DEP3 = oldC:old_V
+ (in bits 1:0) */
+
+ ARMG_CC_OP_MULL, /* DEP1 = resLO32, DEP2 = resHI32, DEP3 = oldC:old_V
+ (in bits 1:0) */
+
+ ARMG_CC_OP_NUMBER
+};
+
+/* XXXX because of the calling conventions for
+ armg_calculate_condition, all this OP values MUST be in the range
+ 0 .. 15 only (viz, 4-bits). */
+
+
+
+/* Defines conditions which we can ask for (ARM ARM 2e page A3-6) */
+
+typedef
+ enum {
+ ARMCondEQ = 0, /* equal : Z=1 */
+ ARMCondNE = 1, /* not equal : Z=0 */
+
+ ARMCondHS = 2, /* >=u (higher or same) : C=1 */
+ ARMCondLO = 3, /* <u (lower) : C=0 */
+
+ ARMCondMI = 4, /* minus (negative) : N=1 */
+ ARMCondPL = 5, /* plus (zero or +ve) : N=0 */
+
+ ARMCondVS = 6, /* overflow : V=1 */
+ ARMCondVC = 7, /* no overflow : V=0 */
+
+ ARMCondHI = 8, /* >u (higher) : C=1 && Z=0 */
+ ARMCondLS = 9, /* <=u (lower or same) : C=0 || Z=1 */
+
+ ARMCondGE = 10, /* >=s (signed greater or equal) : N=V */
+ ARMCondLT = 11, /* <s (signed less than) : N!=V */
+
+ ARMCondGT = 12, /* >s (signed greater) : Z=0 && N=V */
+ ARMCondLE = 13, /* <=s (signed less or equal) : Z=1 || N!=V */
+
+ ARMCondAL = 14, /* always (unconditional) : 1 */
+ ARMCondNV = 15 /* never (unconditional): : 0 */
+ /* NB: ARM have deprecated the use of the NV condition code.
+ You are now supposed to use MOV R0,R0 as a noop rather than
+ MOVNV R0,R0 as was previously recommended. Future processors
+ may have the NV condition code reused to do other things. */
+ }
+ ARMCondcode;
+
+#endif /* ndef __VEX_GUEST_ARM_DEFS_H */
+
+/*---------------------------------------------------------------*/
+/*--- end guest_arm_defs.h ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/guest_arm_helpers.c b/VEX/priv/guest_arm_helpers.c
new file mode 100644
index 0000000..f6689a0
--- /dev/null
+++ b/VEX/priv/guest_arm_helpers.c
@@ -0,0 +1,701 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin guest_arm_helpers.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex_emwarn.h"
+#include "libvex_guest_arm.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+
+#include "main_util.h"
+#include "guest_generic_bb_to_IR.h"
+#include "guest_arm_defs.h"
+
+
+/* This file contains helper functions for arm guest code. Calls to
+ these functions are generated by the back end. These calls are of
+ course in the host machine code and this file will be compiled to
+ host machine code, so that all makes sense.
+
+ Only change the signatures of these helper functions very
+ carefully. If you change the signature here, you'll have to change
+ the parameters passed to it in the IR calls constructed by
+ guest-arm/toIR.c.
+*/
+
+
+
+/* generalised left-shifter */
+static inline UInt lshift ( UInt x, Int n )
+{
+ if (n >= 0)
+ return x << n;
+ else
+ return x >> (-n);
+}
+
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* Calculate NZCV from the supplied thunk components, in the positions
+ they appear in the CPSR, viz bits 31:28 for N Z C V respectively.
+ Returned bits 27:0 are zero. */
+UInt armg_calculate_flags_nzcv ( UInt cc_op, UInt cc_dep1,
+ UInt cc_dep2, UInt cc_dep3 )
+{
+ switch (cc_op) {
+ case ARMG_CC_OP_COPY:
+ /* (nzcv, unused, unused) */
+ return cc_dep1;
+ case ARMG_CC_OP_ADD: {
+ /* (argL, argR, unused) */
+ UInt argL = cc_dep1;
+ UInt argR = cc_dep2;
+ UInt res = argL + argR;
+ UInt nf = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 );
+ UInt zf = lshift( res == 0, ARMG_CC_SHIFT_Z );
+ // CF and VF need verification
+ UInt cf = lshift( res < argL, ARMG_CC_SHIFT_C );
+ UInt vf = lshift( (res ^ argL) & (res ^ argR),
+ ARMG_CC_SHIFT_V + 1 - 32 )
+ & ARMG_CC_MASK_V;
+ //vex_printf("%08x %08x -> n %x z %x c %x v %x\n",
+ // argL, argR, nf, zf, cf, vf);
+ return nf | zf | cf | vf;
+ }
+ case ARMG_CC_OP_SUB: {
+ /* (argL, argR, unused) */
+ UInt argL = cc_dep1;
+ UInt argR = cc_dep2;
+ UInt res = argL - argR;
+ UInt nf = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 );
+ UInt zf = lshift( res == 0, ARMG_CC_SHIFT_Z );
+ // XXX cf is inverted relative to normal sense
+ UInt cf = lshift( argL >= argR, ARMG_CC_SHIFT_C );
+ UInt vf = lshift( (argL ^ argR) & (argL ^ res),
+ ARMG_CC_SHIFT_V + 1 - 32 )
+ & ARMG_CC_MASK_V;
+ //vex_printf("%08x %08x -> n %x z %x c %x v %x\n",
+ // argL, argR, nf, zf, cf, vf);
+ return nf | zf | cf | vf;
+ }
+ case ARMG_CC_OP_ADC: {
+ /* (argL, argR, oldC) */
+ UInt argL = cc_dep1;
+ UInt argR = cc_dep2;
+ UInt oldC = cc_dep3;
+ UInt res = (argL + argR) + oldC;
+ UInt nf = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 );
+ UInt zf = lshift( res == 0, ARMG_CC_SHIFT_Z );
+ UInt cf = oldC ? lshift( res <= argL, ARMG_CC_SHIFT_C )
+ : lshift( res < argL, ARMG_CC_SHIFT_C );
+ UInt vf = lshift( (res ^ argL) & (res ^ argR),
+ ARMG_CC_SHIFT_V + 1 - 32 )
+ & ARMG_CC_MASK_V;
+ //vex_printf("%08x %08x -> n %x z %x c %x v %x\n",
+ // argL, argR, nf, zf, cf, vf);
+ return nf | zf | cf | vf;
+ }
+ case ARMG_CC_OP_SBB: {
+ /* (argL, argR, oldC) */
+ UInt argL = cc_dep1;
+ UInt argR = cc_dep2;
+ UInt oldC = cc_dep3;
+ UInt res = argL - argR - (oldC ^ 1);
+ UInt nf = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 );
+ UInt zf = lshift( res == 0, ARMG_CC_SHIFT_Z );
+ UInt cf = oldC ? lshift( argL >= argR, ARMG_CC_SHIFT_C )
+ : lshift( argL > argR, ARMG_CC_SHIFT_C );
+ UInt vf = lshift( (argL ^ argR) & (argL ^ res),
+ ARMG_CC_SHIFT_V + 1 - 32 )
+ & ARMG_CC_MASK_V;
+ //vex_printf("%08x %08x -> n %x z %x c %x v %x\n",
+ // argL, argR, nf, zf, cf, vf);
+ return nf | zf | cf | vf;
+ }
+ case ARMG_CC_OP_LOGIC: {
+ /* (res, shco, oldV) */
+ UInt res = cc_dep1;
+ UInt shco = cc_dep2;
+ UInt oldV = cc_dep3;
+ UInt nf = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 );
+ UInt zf = lshift( res == 0, ARMG_CC_SHIFT_Z );
+ UInt cf = lshift( shco & 1, ARMG_CC_SHIFT_C );
+ UInt vf = lshift( oldV & 1, ARMG_CC_SHIFT_V );
+ return nf | zf | cf | vf;
+ }
+ case ARMG_CC_OP_MUL: {
+ /* (res, unused, oldC:oldV) */
+ UInt res = cc_dep1;
+ UInt oldC = (cc_dep3 >> 1) & 1;
+ UInt oldV = (cc_dep3 >> 0) & 1;
+ UInt nf = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 );
+ UInt zf = lshift( res == 0, ARMG_CC_SHIFT_Z );
+ UInt cf = lshift( oldC & 1, ARMG_CC_SHIFT_C );
+ UInt vf = lshift( oldV & 1, ARMG_CC_SHIFT_V );
+ return nf | zf | cf | vf;
+ }
+ case ARMG_CC_OP_MULL: {
+ /* (resLo32, resHi32, oldC:oldV) */
+ UInt resLo32 = cc_dep1;
+ UInt resHi32 = cc_dep2;
+ UInt oldC = (cc_dep3 >> 1) & 1;
+ UInt oldV = (cc_dep3 >> 0) & 1;
+ UInt nf = lshift( resHi32 & (1<<31), ARMG_CC_SHIFT_N - 31 );
+ UInt zf = lshift( (resHi32|resLo32) == 0, ARMG_CC_SHIFT_Z );
+ UInt cf = lshift( oldC & 1, ARMG_CC_SHIFT_C );
+ UInt vf = lshift( oldV & 1, ARMG_CC_SHIFT_V );
+ return nf | zf | cf | vf;
+ }
+ default:
+ /* shouldn't really make these calls from generated code */
+ vex_printf("armg_calculate_flags_nzcv"
+ "( op=%u, dep1=0x%x, dep2=0x%x, dep3=0x%x )\n",
+ cc_op, cc_dep1, cc_dep2, cc_dep3 );
+ vpanic("armg_calculate_flags_nzcv");
+ }
+}
+
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* Calculate the C flag from the thunk components, in the lowest bit
+ of the word (bit 0). */
+UInt armg_calculate_flag_c ( UInt cc_op, UInt cc_dep1,
+ UInt cc_dep2, UInt cc_dep3 )
+{
+ UInt r = armg_calculate_flags_nzcv(cc_op, cc_dep1, cc_dep2, cc_dep3);
+ return (r >> ARMG_CC_SHIFT_C) & 1;
+}
+
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* Calculate the V flag from the thunk components, in the lowest bit
+ of the word (bit 0). */
+UInt armg_calculate_flag_v ( UInt cc_op, UInt cc_dep1,
+ UInt cc_dep2, UInt cc_dep3 )
+{
+ UInt r = armg_calculate_flags_nzcv(cc_op, cc_dep1, cc_dep2, cc_dep3);
+ return (r >> ARMG_CC_SHIFT_V) & 1;
+}
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* Calculate the QC flag from the arguments, in the lowest bit
+ of the word (bit 0). Urr, having this out of line is bizarre.
+ Push back inline. */
+UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2,
+ UInt resR1, UInt resR2 )
+{
+ if (resL1 != resR1 || resL2 != resR2)
+ return 1;
+ else
+ return 0;
+}
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* Calculate the specified condition from the thunk components, in the
+ lowest bit of the word (bit 0). */
+extern
+UInt armg_calculate_condition ( UInt cond_n_op /* ARMCondcode << 4 | cc_op */,
+ UInt cc_dep1,
+ UInt cc_dep2, UInt cc_dep3 )
+{
+ UInt cond = cond_n_op >> 4;
+ UInt cc_op = cond_n_op & 0xF;
+ UInt nf, zf, vf, cf, nzcv, inv;
+ // vex_printf("XXXXXXXX %x %x %x %x\n",
+ // cond_n_op, cc_dep1, cc_dep2, cc_dep3);
+
+ // skip flags computation in this case
+ if (cond == ARMCondAL) return 1;
+
+ inv = cond & 1;
+ nzcv = armg_calculate_flags_nzcv(cc_op, cc_dep1, cc_dep2, cc_dep3);
+
+ switch (cond) {
+ case ARMCondEQ: // Z=1 => z
+ case ARMCondNE: // Z=0
+ zf = nzcv >> ARMG_CC_SHIFT_Z;
+ return 1 & (inv ^ zf);
+
+ case ARMCondHS: // C=1 => c
+ case ARMCondLO: // C=0
+ cf = nzcv >> ARMG_CC_SHIFT_C;
+ return 1 & (inv ^ cf);
+
+ case ARMCondMI: // N=1 => n
+ case ARMCondPL: // N=0
+ nf = nzcv >> ARMG_CC_SHIFT_N;
+ return 1 & (inv ^ nf);
+
+ case ARMCondVS: // V=1 => v
+ case ARMCondVC: // V=0
+ vf = nzcv >> ARMG_CC_SHIFT_V;
+ return 1 & (inv ^ vf);
+
+ case ARMCondHI: // C=1 && Z=0 => c & ~z
+ case ARMCondLS: // C=0 || Z=1
+ cf = nzcv >> ARMG_CC_SHIFT_C;
+ zf = nzcv >> ARMG_CC_SHIFT_Z;
+ return 1 & (inv ^ (cf & ~zf));
+
+ case ARMCondGE: // N=V => ~(n^v)
+ case ARMCondLT: // N!=V
+ nf = nzcv >> ARMG_CC_SHIFT_N;
+ vf = nzcv >> ARMG_CC_SHIFT_V;
+ return 1 & (inv ^ ~(nf ^ vf));
+
+ case ARMCondGT: // Z=0 && N=V => ~z & ~(n^v) => ~(z | (n^v))
+ case ARMCondLE: // Z=1 || N!=V
+ nf = nzcv >> ARMG_CC_SHIFT_N;
+ vf = nzcv >> ARMG_CC_SHIFT_V;
+ zf = nzcv >> ARMG_CC_SHIFT_Z;
+ return 1 & (inv ^ ~(zf | (nf ^ vf)));
+
+ case ARMCondAL: // handled above
+ case ARMCondNV: // should never get here: Illegal instr
+ default:
+ /* shouldn't really make these calls from generated code */
+ vex_printf("armg_calculate_condition(ARM)"
+ "( %u, %u, 0x%x, 0x%x, 0x%x )\n",
+ cond, cc_op, cc_dep1, cc_dep2, cc_dep3 );
+ vpanic("armg_calculate_condition(ARM)");
+ }
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Flag-helpers translation-time function specialisers. ---*/
+/*--- These help iropt specialise calls the above run-time ---*/
+/*--- flags functions. ---*/
+/*---------------------------------------------------------------*/
+
+/* Used by the optimiser to try specialisations. Returns an
+ equivalent expression, or NULL if none. */
+
+static Bool isU32 ( IRExpr* e, UInt n )
+{
+ return
+ toBool( e->tag == Iex_Const
+ && e->Iex.Const.con->tag == Ico_U32
+ && e->Iex.Const.con->Ico.U32 == n );
+}
+
+IRExpr* guest_arm_spechelper ( HChar* function_name,
+ IRExpr** args,
+ IRStmt** precedingStmts,
+ Int n_precedingStmts )
+{
+# define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
+# define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
+# define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
+# define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
+
+ Int i, arity = 0;
+ for (i = 0; args[i]; i++)
+ arity++;
+# if 0
+ vex_printf("spec request:\n");
+ vex_printf(" %s ", function_name);
+ for (i = 0; i < arity; i++) {
+ vex_printf(" ");
+ ppIRExpr(args[i]);
+ }
+ vex_printf("\n");
+# endif
+
+ /* --------- specialising "armg_calculate_condition" --------- */
+
+ if (vex_streq(function_name, "armg_calculate_condition")) {
+ /* specialise calls to above "armg_calculate condition" function */
+ IRExpr *cond_n_op, *cc_dep1, *cc_dep2, *cc_dep3;
+ vassert(arity == 4);
+ cond_n_op = args[0]; /* ARMCondcode << 4 | ARMG_CC_OP_* */
+ cc_dep1 = args[1];
+ cc_dep2 = args[2];
+ cc_dep3 = args[3];
+
+ /*---------------- SUB ----------------*/
+
+ if (isU32(cond_n_op, (ARMCondEQ << 4) | ARMG_CC_OP_SUB)) {
+ /* EQ after SUB --> test argL == argR */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
+ }
+ if (isU32(cond_n_op, (ARMCondNE << 4) | ARMG_CC_OP_SUB)) {
+ /* NE after SUB --> test argL != argR */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpNE32, cc_dep1, cc_dep2));
+ }
+
+ if (isU32(cond_n_op, (ARMCondLE << 4) | ARMG_CC_OP_SUB)) {
+ /* LE after SUB --> test argL <=s argR */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
+ }
+
+ if (isU32(cond_n_op, (ARMCondLT << 4) | ARMG_CC_OP_SUB)) {
+ /* LT after SUB --> test argL <s argR */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
+ }
+
+ if (isU32(cond_n_op, (ARMCondGE << 4) | ARMG_CC_OP_SUB)) {
+ /* GE after SUB --> test argL >=s argR
+ --> test argR <=s argL */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpLE32S, cc_dep2, cc_dep1));
+ }
+
+ if (isU32(cond_n_op, (ARMCondHS << 4) | ARMG_CC_OP_SUB)) {
+ /* HS after SUB --> test argL >=u argR
+ --> test argR <=u argL */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpLE32U, cc_dep2, cc_dep1));
+ }
+
+ if (isU32(cond_n_op, (ARMCondLS << 4) | ARMG_CC_OP_SUB)) {
+ /* LS after SUB --> test argL <=u argR */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
+ }
+
+ /*---------------- LOGIC ----------------*/
+ if (isU32(cond_n_op, (ARMCondEQ << 4) | ARMG_CC_OP_LOGIC)) {
+ /* EQ after LOGIC --> test res == 0 */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
+ }
+ if (isU32(cond_n_op, (ARMCondNE << 4) | ARMG_CC_OP_LOGIC)) {
+ /* NE after LOGIC --> test res != 0 */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
+ }
+
+ /*----------------- AL -----------------*/
+ /* A critically important case for Thumb code.
+
+ What we're trying to spot is the case where cond_n_op is an
+ expression of the form Or32(..., 0xE0) since that means the
+ caller is asking for CondAL and we can simply return 1
+ without caring what the ... part is. This is a potentially
+ dodgy kludge in that it assumes that the ... part has zeroes
+ in bits 7:4, so that the result of the Or32 is guaranteed to
+ be 0xE in bits 7:4. Given that the places where this first
+ arg are constructed (in guest_arm_toIR.c) are very
+ constrained, we can get away with this. To make this
+ guaranteed safe would require to have a new primop, Slice44
+ or some such, thusly
+
+ Slice44(arg1, arg2) = 0--(24)--0 arg1[7:4] arg2[3:0]
+
+ and we would then look for Slice44(0xE0, ...)
+ which would give the required safety property.
+
+ It would be infeasibly expensive to scan backwards through
+ the entire block looking for an assignment to the temp, so
+ just look at the previous 16 statements. That should find it
+ if it is an interesting case, as a result of how the
+ boilerplate guff at the start of each Thumb insn translation
+ is made.
+ */
+ if (cond_n_op->tag == Iex_RdTmp) {
+ Int j;
+ IRTemp look_for = cond_n_op->Iex.RdTmp.tmp;
+ Int limit = n_precedingStmts - 16;
+ if (limit < 0) limit = 0;
+ if (0) vex_printf("scanning %d .. %d\n", n_precedingStmts-1, limit);
+ for (j = n_precedingStmts - 1; j >= limit; j--) {
+ IRStmt* st = precedingStmts[j];
+ if (st->tag == Ist_WrTmp
+ && st->Ist.WrTmp.tmp == look_for
+ && st->Ist.WrTmp.data->tag == Iex_Binop
+ && st->Ist.WrTmp.data->Iex.Binop.op == Iop_Or32
+ && isU32(st->Ist.WrTmp.data->Iex.Binop.arg2, (ARMCondAL << 4)))
+ return mkU32(1);
+ }
+ /* Didn't find any useful binding to the first arg
+ in the previous 16 stmts. */
+ }
+ }
+
+# undef unop
+# undef binop
+# undef mkU32
+# undef mkU8
+
+ return NULL;
+}
+
+
+/*----------------------------------------------*/
+/*--- The exported fns .. ---*/
+/*----------------------------------------------*/
+
+/* VISIBLE TO LIBVEX CLIENT */
+#if 0
+void LibVEX_GuestARM_put_flags ( UInt flags_native,
+ /*OUT*/VexGuestARMState* vex_state )
+{
+ vassert(0); // FIXME
+
+ /* Mask out everything except N Z V C. */
+ flags_native
+ &= (ARMG_CC_MASK_N | ARMG_CC_MASK_Z | ARMG_CC_MASK_V | ARMG_CC_MASK_C);
+
+ vex_state->guest_CC_OP = ARMG_CC_OP_COPY;
+ vex_state->guest_CC_DEP1 = flags_native;
+ vex_state->guest_CC_DEP2 = 0;
+ vex_state->guest_CC_NDEP = 0;
+}
+#endif
+
+/* VISIBLE TO LIBVEX CLIENT */
+UInt LibVEX_GuestARM_get_cpsr ( /*IN*/VexGuestARMState* vex_state )
+{
+ UInt cpsr = 0;
+ // NZCV
+ cpsr |= armg_calculate_flags_nzcv(
+ vex_state->guest_CC_OP,
+ vex_state->guest_CC_DEP1,
+ vex_state->guest_CC_DEP2,
+ vex_state->guest_CC_NDEP
+ );
+ vassert(0 == (cpsr & 0x0FFFFFFF));
+ // Q
+ if (vex_state->guest_QFLAG32 > 0)
+ cpsr |= (1 << 27);
+ // GE
+ if (vex_state->guest_GEFLAG0 > 0)
+ cpsr |= (1 << 16);
+ if (vex_state->guest_GEFLAG1 > 0)
+ cpsr |= (1 << 17);
+ if (vex_state->guest_GEFLAG2 > 0)
+ cpsr |= (1 << 18);
+ if (vex_state->guest_GEFLAG3 > 0)
+ cpsr |= (1 << 19);
+ // M
+ cpsr |= (1 << 4); // 0b10000 means user-mode
+ // J,T J (bit 24) is zero by initialisation above
+ // T we copy from R15T[0]
+ if (vex_state->guest_R15T & 1)
+ cpsr |= (1 << 5);
+ // ITSTATE we punt on for the time being. Could compute it
+ // if needed though.
+ // E, endianness, 0 (littleendian) from initialisation above
+ // A,I,F disable some async exceptions. Not sure about these.
+ // Leave as zero for the time being.
+ return cpsr;
+}
+
+/* VISIBLE TO LIBVEX CLIENT */
+void LibVEX_GuestARM_initialise ( /*OUT*/VexGuestARMState* vex_state )
+{
+ vex_state->guest_R0 = 0;
+ vex_state->guest_R1 = 0;
+ vex_state->guest_R2 = 0;
+ vex_state->guest_R3 = 0;
+ vex_state->guest_R4 = 0;
+ vex_state->guest_R5 = 0;
+ vex_state->guest_R6 = 0;
+ vex_state->guest_R7 = 0;
+ vex_state->guest_R8 = 0;
+ vex_state->guest_R9 = 0;
+ vex_state->guest_R10 = 0;
+ vex_state->guest_R11 = 0;
+ vex_state->guest_R12 = 0;
+ vex_state->guest_R13 = 0;
+ vex_state->guest_R14 = 0;
+ vex_state->guest_R15T = 0; /* NB: implies ARM mode */
+
+ vex_state->guest_CC_OP = ARMG_CC_OP_COPY;
+ vex_state->guest_CC_DEP1 = 0;
+ vex_state->guest_CC_DEP2 = 0;
+ vex_state->guest_CC_NDEP = 0;
+ vex_state->guest_QFLAG32 = 0;
+ vex_state->guest_GEFLAG0 = 0;
+ vex_state->guest_GEFLAG1 = 0;
+ vex_state->guest_GEFLAG2 = 0;
+ vex_state->guest_GEFLAG3 = 0;
+
+ vex_state->guest_EMWARN = 0;
+ vex_state->guest_TISTART = 0;
+ vex_state->guest_TILEN = 0;
+ vex_state->guest_NRADDR = 0;
+ vex_state->guest_IP_AT_SYSCALL = 0;
+
+ vex_state->guest_D0 = 0;
+ vex_state->guest_D1 = 0;
+ vex_state->guest_D2 = 0;
+ vex_state->guest_D3 = 0;
+ vex_state->guest_D4 = 0;
+ vex_state->guest_D5 = 0;
+ vex_state->guest_D6 = 0;
+ vex_state->guest_D7 = 0;
+ vex_state->guest_D8 = 0;
+ vex_state->guest_D9 = 0;
+ vex_state->guest_D10 = 0;
+ vex_state->guest_D11 = 0;
+ vex_state->guest_D12 = 0;
+ vex_state->guest_D13 = 0;
+ vex_state->guest_D14 = 0;
+ vex_state->guest_D15 = 0;
+ vex_state->guest_D16 = 0;
+ vex_state->guest_D17 = 0;
+ vex_state->guest_D18 = 0;
+ vex_state->guest_D19 = 0;
+ vex_state->guest_D20 = 0;
+ vex_state->guest_D21 = 0;
+ vex_state->guest_D22 = 0;
+ vex_state->guest_D23 = 0;
+ vex_state->guest_D24 = 0;
+ vex_state->guest_D25 = 0;
+ vex_state->guest_D26 = 0;
+ vex_state->guest_D27 = 0;
+ vex_state->guest_D28 = 0;
+ vex_state->guest_D29 = 0;
+ vex_state->guest_D30 = 0;
+ vex_state->guest_D31 = 0;
+
+ /* ARM encoded; zero is the default as it happens (result flags
+ (NZCV) cleared, FZ disabled, round to nearest, non-vector mode,
+ all exns masked, all exn sticky bits cleared). */
+ vex_state->guest_FPSCR = 0;
+
+ vex_state->guest_TPIDRURO = 0;
+
+ /* Not in a Thumb IT block. */
+ vex_state->guest_ITSTATE = 0;
+
+ vex_state->padding1 = 0;
+ vex_state->padding2 = 0;
+ vex_state->padding3 = 0;
+}
+
+
+/*-----------------------------------------------------------*/
+/*--- Describing the arm guest state, for the benefit ---*/
+/*--- of iropt and instrumenters. ---*/
+/*-----------------------------------------------------------*/
+
+/* Figure out if any part of the guest state contained in minoff
+ .. maxoff requires precise memory exceptions. If in doubt return
+ True (but this is generates significantly slower code).
+
+ We enforce precise exns for guest R13(sp), R15T(pc).
+*/
+Bool guest_arm_state_requires_precise_mem_exns ( Int minoff,
+ Int maxoff)
+{
+ Int sp_min = offsetof(VexGuestARMState, guest_R13);
+ Int sp_max = sp_min + 4 - 1;
+ Int pc_min = offsetof(VexGuestARMState, guest_R15T);
+ Int pc_max = pc_min + 4 - 1;
+
+ if (maxoff < sp_min || minoff > sp_max) {
+ /* no overlap with sp */
+ } else {
+ return True;
+ }
+
+ if (maxoff < pc_min || minoff > pc_max) {
+ /* no overlap with pc */
+ } else {
+ return True;
+ }
+
+ /* We appear to need precise updates of R11 in order to get proper
+ stacktraces from non-optimised code. */
+ Int r11_min = offsetof(VexGuestARMState, guest_R11);
+ Int r11_max = r11_min + 4 - 1;
+
+ if (maxoff < r11_min || minoff > r11_max) {
+ /* no overlap with r11 */
+ } else {
+ return True;
+ }
+
+ /* Ditto R7, particularly needed for proper stacktraces in Thumb
+ code. */
+ Int r7_min = offsetof(VexGuestARMState, guest_R7);
+ Int r7_max = r7_min + 4 - 1;
+
+ if (maxoff < r7_min || minoff > r7_max) {
+ /* no overlap with r7 */
+ } else {
+ return True;
+ }
+
+ return False;
+}
+
+
+
+#define ALWAYSDEFD(field) \
+ { offsetof(VexGuestARMState, field), \
+ (sizeof ((VexGuestARMState*)0)->field) }
+
+VexGuestLayout
+ armGuest_layout
+ = {
+ /* Total size of the guest state, in bytes. */
+ .total_sizeB = sizeof(VexGuestARMState),
+
+ /* Describe the stack pointer. */
+ .offset_SP = offsetof(VexGuestARMState,guest_R13),
+ .sizeof_SP = 4,
+
+ /* Describe the instruction pointer. */
+ .offset_IP = offsetof(VexGuestARMState,guest_R15T),
+ .sizeof_IP = 4,
+
+ /* Describe any sections to be regarded by Memcheck as
+ 'always-defined'. */
+ .n_alwaysDefd = 10,
+
+ /* flags thunk: OP is always defd, whereas DEP1 and DEP2
+ have to be tracked. See detailed comment in gdefs.h on
+ meaning of thunk fields. */
+ .alwaysDefd
+ = { /* 0 */ ALWAYSDEFD(guest_R15T),
+ /* 1 */ ALWAYSDEFD(guest_CC_OP),
+ /* 2 */ ALWAYSDEFD(guest_CC_NDEP),
+ /* 3 */ ALWAYSDEFD(guest_EMWARN),
+ /* 4 */ ALWAYSDEFD(guest_TISTART),
+ /* 5 */ ALWAYSDEFD(guest_TILEN),
+ /* 6 */ ALWAYSDEFD(guest_NRADDR),
+ /* 7 */ ALWAYSDEFD(guest_IP_AT_SYSCALL),
+ /* 8 */ ALWAYSDEFD(guest_TPIDRURO),
+ /* 9 */ ALWAYSDEFD(guest_ITSTATE)
+ }
+ };
+
+
+/*---------------------------------------------------------------*/
+/*--- end guest_arm_helpers.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/guest_arm_toIR.c b/VEX/priv/guest_arm_toIR.c
new file mode 100644
index 0000000..c1f9211
--- /dev/null
+++ b/VEX/priv/guest_arm_toIR.c
@@ -0,0 +1,17970 @@
+
+/*--------------------------------------------------------------------*/
+/*--- begin guest_arm_toIR.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ NEON support is
+ Copyright (C) 2010-2010 Samsung Electronics
+ contributed by Dmitry Zhurikhin <zhur@ispras.ru>
+ and Kirill Batuzov <batuzovk@ispras.ru>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+/* XXXX thumb to check:
+ that all cases where putIRegT writes r15, we generate a jump.
+
+ All uses of newTemp assign to an IRTemp and not a UInt
+
+ For all thumb loads and stores, including VFP ones, new-ITSTATE is
+ backed out before the memory op, and restored afterwards. This
+ needs to happen even after we go uncond. (and for sure it doesn't
+ happen for VFP loads/stores right now).
+
+ VFP on thumb: check that we exclude all r13/r15 cases that we
+ should.
+
+ XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
+ taking into account the number of insns guarded by an IT.
+
+ remove the nasty hack, in the spechelper, of looking for Or32(...,
+ 0xE0) in as the first arg to armg_calculate_condition, and instead
+ use Slice44 as specified in comments in the spechelper.
+
+ add specialisations for armg_calculate_flag_c and _v, as they
+ are moderately often needed in Thumb code.
+
+ Correctness: ITSTATE handling in Thumb SVCs is wrong.
+
+ Correctness (obscure): in m_transtab, when invalidating code
+ address ranges, invalidate up to 18 bytes after the end of the
+ range. This is because the ITSTATE optimisation at the top of
+ _THUMB_WRK below analyses up to 18 bytes before the start of any
+ given instruction, and so might depend on the invalidated area.
+*/
+
+/* Limitations, etc
+
+ - pretty dodgy exception semantics for {LD,ST}Mxx, no doubt
+
+ - SWP: the restart jump back is Ijk_Boring; it should be
+ Ijk_NoRedir but that's expensive. See comments on casLE() in
+ guest_x86_toIR.c.
+*/
+
+/* "Special" instructions.
+
+ This instruction decoder can decode four special instructions
+ which mean nothing natively (are no-ops as far as regs/mem are
+ concerned) but have meaning for supporting Valgrind. A special
+ instruction is flagged by a 16-byte preamble:
+
+ E1A0C1EC E1A0C6EC E1A0CEEC E1A0C9EC
+ (mov r12, r12, ROR #3; mov r12, r12, ROR #13;
+ mov r12, r12, ROR #29; mov r12, r12, ROR #19)
+
+ Following that, one of the following 3 are allowed
+ (standard interpretation in parentheses):
+
+ E18AA00A (orr r10,r10,r10) R3 = client_request ( R4 )
+ E18BB00B (orr r11,r11,r11) R3 = guest_NRADDR
+ E18CC00C (orr r12,r12,r12) branch-and-link-to-noredir R4
+
+ Any other bytes following the 16-byte preamble are illegal and
+ constitute a failure in instruction decoding. This all assumes
+ that the preamble will never occur except in specific code
+ fragments designed for Valgrind to catch.
+*/
+
+/* Translates ARM(v5) code to IR. */
+
+#include "libvex_basictypes.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+#include "libvex_guest_arm.h"
+
+#include "main_util.h"
+#include "main_globals.h"
+#include "guest_generic_bb_to_IR.h"
+#include "guest_arm_defs.h"
+
+
+/*------------------------------------------------------------*/
+/*--- Globals ---*/
+/*------------------------------------------------------------*/
+
+/* These are set at the start of the translation of a instruction, so
+ that we don't have to pass them around endlessly. CONST means does
+ not change during translation of the instruction.
+*/
+
+/* CONST: is the host bigendian? This has to do with float vs double
+ register accesses on VFP, but it's complex and not properly thought
+ out. */
+static Bool host_is_bigendian;
+
+/* CONST: The guest address for the instruction currently being
+ translated. This is the real, "decoded" address (not subject
+ to the CPSR.T kludge). */
+static Addr32 guest_R15_curr_instr_notENC;
+
+/* CONST, FOR ASSERTIONS ONLY. Indicates whether currently processed
+ insn is Thumb (True) or ARM (False). */
+static Bool __curr_is_Thumb;
+
+/* MOD: The IRSB* into which we're generating code. */
+static IRSB* irsb;
+
+/* These are to do with handling writes to r15. They are initially
+ set at the start of disInstr_ARM_WRK to indicate no update,
+ possibly updated during the routine, and examined again at the end.
+ If they have been set to indicate a r15 update then a jump is
+ generated. Note, "explicit" jumps (b, bx, etc) are generated
+ directly, not using this mechanism -- this is intended to handle
+ the implicit-style jumps resulting from (eg) assigning to r15 as
+ the result of insns we wouldn't normally consider branchy. */
+
+/* MOD. Initially False; set to True iff abovementioned handling is
+ required. */
+static Bool r15written;
+
+/* MOD. Initially IRTemp_INVALID. If the r15 branch to be generated
+ is conditional, this holds the gating IRTemp :: Ity_I32. If the
+ branch to be generated is unconditional, this remains
+ IRTemp_INVALID. */
+static IRTemp r15guard; /* :: Ity_I32, 0 or 1 */
+
+/* MOD. Initially Ijk_Boring. If an r15 branch is to be generated,
+ this holds the jump kind. */
+static IRTemp r15kind;
+
+
+/*------------------------------------------------------------*/
+/*--- Debugging output ---*/
+/*------------------------------------------------------------*/
+
+#define DIP(format, args...) \
+ if (vex_traceflags & VEX_TRACE_FE) \
+ vex_printf(format, ## args)
+
+#define DIS(buf, format, args...) \
+ if (vex_traceflags & VEX_TRACE_FE) \
+ vex_sprintf(buf, format, ## args)
+
+#define ASSERT_IS_THUMB \
+ do { vassert(__curr_is_Thumb); } while (0)
+
+#define ASSERT_IS_ARM \
+ do { vassert(! __curr_is_Thumb); } while (0)
+
+
+/*------------------------------------------------------------*/
+/*--- Helper bits and pieces for deconstructing the ---*/
+/*--- arm insn stream. ---*/
+/*------------------------------------------------------------*/
+
+/* Do a little-endian load of a 32-bit word, regardless of the
+ endianness of the underlying host. */
+static inline UInt getUIntLittleEndianly ( UChar* p )
+{
+ UInt w = 0;
+ w = (w << 8) | p[3];
+ w = (w << 8) | p[2];
+ w = (w << 8) | p[1];
+ w = (w << 8) | p[0];
+ return w;
+}
+
+/* Do a little-endian load of a 16-bit word, regardless of the
+ endianness of the underlying host. */
+static inline UShort getUShortLittleEndianly ( UChar* p )
+{
+ UShort w = 0;
+ w = (w << 8) | p[1];
+ w = (w << 8) | p[0];
+ return w;
+}
+
+static UInt ROR32 ( UInt x, UInt sh ) {
+ vassert(sh >= 0 && sh < 32);
+ if (sh == 0)
+ return x;
+ else
+ return (x << (32-sh)) | (x >> sh);
+}
+
+static Int popcount32 ( UInt x )
+{
+ Int res = 0, i;
+ for (i = 0; i < 32; i++) {
+ res += (x & 1);
+ x >>= 1;
+ }
+ return res;
+}
+
+static UInt setbit32 ( UInt x, Int ix, UInt b )
+{
+ UInt mask = 1 << ix;
+ x &= ~mask;
+ x |= ((b << ix) & mask);
+ return x;
+}
+
+#define BITS2(_b1,_b0) \
+ (((_b1) << 1) | (_b0))
+
+#define BITS3(_b2,_b1,_b0) \
+ (((_b2) << 2) | ((_b1) << 1) | (_b0))
+
+#define BITS4(_b3,_b2,_b1,_b0) \
+ (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
+
+#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
+ ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
+ | BITS4((_b3),(_b2),(_b1),(_b0)))
+
+#define BITS5(_b4,_b3,_b2,_b1,_b0) \
+ (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
+#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
+ (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
+#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
+ (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
+
+#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
+ (((_b8) << 8) \
+ | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
+
+#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
+ (((_b9) << 9) | ((_b8) << 8) \
+ | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
+
+/* produces _uint[_bMax:_bMin] */
+#define SLICE_UInt(_uint,_bMax,_bMin) \
+ (( ((UInt)(_uint)) >> (_bMin)) \
+ & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
+
+
+/*------------------------------------------------------------*/
+/*--- Helper bits and pieces for creating IR fragments. ---*/
+/*------------------------------------------------------------*/
+
+static IRExpr* mkU64 ( ULong i )
+{
+ return IRExpr_Const(IRConst_U64(i));
+}
+
+static IRExpr* mkU32 ( UInt i )
+{
+ return IRExpr_Const(IRConst_U32(i));
+}
+
+static IRExpr* mkU8 ( UInt i )
+{
+ vassert(i < 256);
+ return IRExpr_Const(IRConst_U8( (UChar)i ));
+}
+
+static IRExpr* mkexpr ( IRTemp tmp )
+{
+ return IRExpr_RdTmp(tmp);
+}
+
+static IRExpr* unop ( IROp op, IRExpr* a )
+{
+ return IRExpr_Unop(op, a);
+}
+
+static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
+{
+ return IRExpr_Binop(op, a1, a2);
+}
+
+static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
+{
+ return IRExpr_Triop(op, a1, a2, a3);
+}
+
+static IRExpr* loadLE ( IRType ty, IRExpr* addr )
+{
+ return IRExpr_Load(Iend_LE, ty, addr);
+}
+
+/* Add a statement to the list held by "irbb". */
+static void stmt ( IRStmt* st )
+{
+ addStmtToIRSB( irsb, st );
+}
+
+static void assign ( IRTemp dst, IRExpr* e )
+{
+ stmt( IRStmt_WrTmp(dst, e) );
+}
+
+static void storeLE ( IRExpr* addr, IRExpr* data )
+{
+ stmt( IRStmt_Store(Iend_LE, addr, data) );
+}
+
+/* Generate a new temporary of the given type. */
+static IRTemp newTemp ( IRType ty )
+{
+ vassert(isPlausibleIRType(ty));
+ return newIRTemp( irsb->tyenv, ty );
+}
+
+/* Produces a value in 0 .. 3, which is encoded as per the type
+ IRRoundingMode. */
+static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
+{
+ return mkU32(Irrm_NEAREST);
+}
+
+/* Generate an expression for SRC rotated right by ROT. */
+static IRExpr* genROR32( IRTemp src, Int rot )
+{
+ vassert(rot >= 0 && rot < 32);
+ if (rot == 0)
+ return mkexpr(src);
+ return
+ binop(Iop_Or32,
+ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
+ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
+}
+
+static IRExpr* mkU128 ( ULong i )
+{
+ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
+}
+
+/* Generate a 4-aligned version of the given expression if
+ the given condition is true. Else return it unchanged. */
+static IRExpr* align4if ( IRExpr* e, Bool b )
+{
+ if (b)
+ return binop(Iop_And32, e, mkU32(~3));
+ else
+ return e;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Helpers for accessing guest registers. ---*/
+/*------------------------------------------------------------*/
+
+#define OFFB_R0 offsetof(VexGuestARMState,guest_R0)
+#define OFFB_R1 offsetof(VexGuestARMState,guest_R1)
+#define OFFB_R2 offsetof(VexGuestARMState,guest_R2)
+#define OFFB_R3 offsetof(VexGuestARMState,guest_R3)
+#define OFFB_R4 offsetof(VexGuestARMState,guest_R4)
+#define OFFB_R5 offsetof(VexGuestARMState,guest_R5)
+#define OFFB_R6 offsetof(VexGuestARMState,guest_R6)
+#define OFFB_R7 offsetof(VexGuestARMState,guest_R7)
+#define OFFB_R8 offsetof(VexGuestARMState,guest_R8)
+#define OFFB_R9 offsetof(VexGuestARMState,guest_R9)
+#define OFFB_R10 offsetof(VexGuestARMState,guest_R10)
+#define OFFB_R11 offsetof(VexGuestARMState,guest_R11)
+#define OFFB_R12 offsetof(VexGuestARMState,guest_R12)
+#define OFFB_R13 offsetof(VexGuestARMState,guest_R13)
+#define OFFB_R14 offsetof(VexGuestARMState,guest_R14)
+#define OFFB_R15T offsetof(VexGuestARMState,guest_R15T)
+
+#define OFFB_CC_OP offsetof(VexGuestARMState,guest_CC_OP)
+#define OFFB_CC_DEP1 offsetof(VexGuestARMState,guest_CC_DEP1)
+#define OFFB_CC_DEP2 offsetof(VexGuestARMState,guest_CC_DEP2)
+#define OFFB_CC_NDEP offsetof(VexGuestARMState,guest_CC_NDEP)
+#define OFFB_NRADDR offsetof(VexGuestARMState,guest_NRADDR)
+
+#define OFFB_D0 offsetof(VexGuestARMState,guest_D0)
+#define OFFB_D1 offsetof(VexGuestARMState,guest_D1)
+#define OFFB_D2 offsetof(VexGuestARMState,guest_D2)
+#define OFFB_D3 offsetof(VexGuestARMState,guest_D3)
+#define OFFB_D4 offsetof(VexGuestARMState,guest_D4)
+#define OFFB_D5 offsetof(VexGuestARMState,guest_D5)
+#define OFFB_D6 offsetof(VexGuestARMState,guest_D6)
+#define OFFB_D7 offsetof(VexGuestARMState,guest_D7)
+#define OFFB_D8 offsetof(VexGuestARMState,guest_D8)
+#define OFFB_D9 offsetof(VexGuestARMState,guest_D9)
+#define OFFB_D10 offsetof(VexGuestARMState,guest_D10)
+#define OFFB_D11 offsetof(VexGuestARMState,guest_D11)
+#define OFFB_D12 offsetof(VexGuestARMState,guest_D12)
+#define OFFB_D13 offsetof(VexGuestARMState,guest_D13)
+#define OFFB_D14 offsetof(VexGuestARMState,guest_D14)
+#define OFFB_D15 offsetof(VexGuestARMState,guest_D15)
+#define OFFB_D16 offsetof(VexGuestARMState,guest_D16)
+#define OFFB_D17 offsetof(VexGuestARMState,guest_D17)
+#define OFFB_D18 offsetof(VexGuestARMState,guest_D18)
+#define OFFB_D19 offsetof(VexGuestARMState,guest_D19)
+#define OFFB_D20 offsetof(VexGuestARMState,guest_D20)
+#define OFFB_D21 offsetof(VexGuestARMState,guest_D21)
+#define OFFB_D22 offsetof(VexGuestARMState,guest_D22)
+#define OFFB_D23 offsetof(VexGuestARMState,guest_D23)
+#define OFFB_D24 offsetof(VexGuestARMState,guest_D24)
+#define OFFB_D25 offsetof(VexGuestARMState,guest_D25)
+#define OFFB_D26 offsetof(VexGuestARMState,guest_D26)
+#define OFFB_D27 offsetof(VexGuestARMState,guest_D27)
+#define OFFB_D28 offsetof(VexGuestARMState,guest_D28)
+#define OFFB_D29 offsetof(VexGuestARMState,guest_D29)
+#define OFFB_D30 offsetof(VexGuestARMState,guest_D30)
+#define OFFB_D31 offsetof(VexGuestARMState,guest_D31)
+
+#define OFFB_FPSCR offsetof(VexGuestARMState,guest_FPSCR)
+#define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
+#define OFFB_ITSTATE offsetof(VexGuestARMState,guest_ITSTATE)
+#define OFFB_QFLAG32 offsetof(VexGuestARMState,guest_QFLAG32)
+#define OFFB_GEFLAG0 offsetof(VexGuestARMState,guest_GEFLAG0)
+#define OFFB_GEFLAG1 offsetof(VexGuestARMState,guest_GEFLAG1)
+#define OFFB_GEFLAG2 offsetof(VexGuestARMState,guest_GEFLAG2)
+#define OFFB_GEFLAG3 offsetof(VexGuestARMState,guest_GEFLAG3)
+
+
+/* ---------------- Integer registers ---------------- */
+
+static Int integerGuestRegOffset ( UInt iregNo )
+{
+ /* Do we care about endianness here? We do if sub-parts of integer
+ registers are accessed, but I don't think that ever happens on
+ ARM. */
+ switch (iregNo) {
+ case 0: return OFFB_R0;
+ case 1: return OFFB_R1;
+ case 2: return OFFB_R2;
+ case 3: return OFFB_R3;
+ case 4: return OFFB_R4;
+ case 5: return OFFB_R5;
+ case 6: return OFFB_R6;
+ case 7: return OFFB_R7;
+ case 8: return OFFB_R8;
+ case 9: return OFFB_R9;
+ case 10: return OFFB_R10;
+ case 11: return OFFB_R11;
+ case 12: return OFFB_R12;
+ case 13: return OFFB_R13;
+ case 14: return OFFB_R14;
+ case 15: return OFFB_R15T;
+ default: vassert(0);
+ }
+}
+
+/* Plain ("low level") read from a reg; no +8 offset magic for r15. */
+static IRExpr* llGetIReg ( UInt iregNo )
+{
+ vassert(iregNo < 16);
+ return IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
+}
+
+/* Architected read from a reg in ARM mode. This automagically adds 8
+ to all reads of r15. */
+static IRExpr* getIRegA ( UInt iregNo )
+{
+ IRExpr* e;
+ ASSERT_IS_ARM;
+ vassert(iregNo < 16);
+ if (iregNo == 15) {
+ /* If asked for r15, don't read the guest state value, as that
+ may not be up to date in the case where loop unrolling has
+ happened, because the first insn's write to the block is
+ omitted; hence in the 2nd and subsequent unrollings we don't
+ have a correct value in guest r15. Instead produce the
+ constant that we know would be produced at this point. */
+ vassert(0 == (guest_R15_curr_instr_notENC & 3));
+ e = mkU32(guest_R15_curr_instr_notENC + 8);
+ } else {
+ e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
+ }
+ return e;
+}
+
+/* Architected read from a reg in Thumb mode. This automagically adds
+ 4 to all reads of r15. */
+static IRExpr* getIRegT ( UInt iregNo )
+{
+ IRExpr* e;
+ ASSERT_IS_THUMB;
+ vassert(iregNo < 16);
+ if (iregNo == 15) {
+ /* Ditto comment in getIReg. */
+ vassert(0 == (guest_R15_curr_instr_notENC & 1));
+ e = mkU32(guest_R15_curr_instr_notENC + 4);
+ } else {
+ e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
+ }
+ return e;
+}
+
+/* Plain ("low level") write to a reg; no jump or alignment magic for
+ r15. */
+static void llPutIReg ( UInt iregNo, IRExpr* e )
+{
+ vassert(iregNo < 16);
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
+ stmt( IRStmt_Put(integerGuestRegOffset(iregNo), e) );
+}
+
+/* Architected write to an integer register in ARM mode. If it is to
+ r15, record info so at the end of this insn's translation, a branch
+ to it can be made. Also handles conditional writes to the
+ register: if guardT == IRTemp_INVALID then the write is
+ unconditional. If writing r15, also 4-align it. */
+static void putIRegA ( UInt iregNo,
+ IRExpr* e,
+ IRTemp guardT /* :: Ity_I32, 0 or 1 */,
+ IRJumpKind jk /* if a jump is generated */ )
+{
+ /* if writing r15, force e to be 4-aligned. */
+ // INTERWORKING FIXME. this needs to be relaxed so that
+ // puts caused by LDMxx which load r15 interwork right.
+ // but is no aligned too relaxed?
+ //if (iregNo == 15)
+ // e = binop(Iop_And32, e, mkU32(~3));
+ ASSERT_IS_ARM;
+ /* So, generate either an unconditional or a conditional write to
+ the reg. */
+ if (guardT == IRTemp_INVALID) {
+ /* unconditional write */
+ llPutIReg( iregNo, e );
+ } else {
+ llPutIReg( iregNo,
+ IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
+ llGetIReg(iregNo),
+ e ));
+ }
+ if (iregNo == 15) {
+ // assert against competing r15 updates. Shouldn't
+ // happen; should be ruled out by the instr matching
+ // logic.
+ vassert(r15written == False);
+ vassert(r15guard == IRTemp_INVALID);
+ vassert(r15kind == Ijk_Boring);
+ r15written = True;
+ r15guard = guardT;
+ r15kind = jk;
+ }
+}
+
+
+/* Architected write to an integer register in Thumb mode. Writes to
+ r15 are not allowed. Handles conditional writes to the register:
+ if guardT == IRTemp_INVALID then the write is unconditional. */
+static void putIRegT ( UInt iregNo,
+ IRExpr* e,
+ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
+{
+ /* So, generate either an unconditional or a conditional write to
+ the reg. */
+ ASSERT_IS_THUMB;
+ vassert(iregNo >= 0 && iregNo <= 14);
+ if (guardT == IRTemp_INVALID) {
+ /* unconditional write */
+ llPutIReg( iregNo, e );
+ } else {
+ llPutIReg( iregNo,
+ IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
+ llGetIReg(iregNo),
+ e ));
+ }
+}
+
+
+/* Thumb16 and Thumb32 only.
+ Returns true if reg is 13 or 15. Implements the BadReg
+ predicate in the ARM ARM. */
+static Bool isBadRegT ( UInt r )
+{
+ vassert(r <= 15);
+ ASSERT_IS_THUMB;
+ return r == 13 || r == 15;
+}
+
+
+/* ---------------- Double registers ---------------- */
+
+static Int doubleGuestRegOffset ( UInt dregNo )
+{
+ /* Do we care about endianness here? Probably do if we ever get
+ into the situation of dealing with the single-precision VFP
+ registers. */
+ switch (dregNo) {
+ case 0: return OFFB_D0;
+ case 1: return OFFB_D1;
+ case 2: return OFFB_D2;
+ case 3: return OFFB_D3;
+ case 4: return OFFB_D4;
+ case 5: return OFFB_D5;
+ case 6: return OFFB_D6;
+ case 7: return OFFB_D7;
+ case 8: return OFFB_D8;
+ case 9: return OFFB_D9;
+ case 10: return OFFB_D10;
+ case 11: return OFFB_D11;
+ case 12: return OFFB_D12;
+ case 13: return OFFB_D13;
+ case 14: return OFFB_D14;
+ case 15: return OFFB_D15;
+ case 16: return OFFB_D16;
+ case 17: return OFFB_D17;
+ case 18: return OFFB_D18;
+ case 19: return OFFB_D19;
+ case 20: return OFFB_D20;
+ case 21: return OFFB_D21;
+ case 22: return OFFB_D22;
+ case 23: return OFFB_D23;
+ case 24: return OFFB_D24;
+ case 25: return OFFB_D25;
+ case 26: return OFFB_D26;
+ case 27: return OFFB_D27;
+ case 28: return OFFB_D28;
+ case 29: return OFFB_D29;
+ case 30: return OFFB_D30;
+ case 31: return OFFB_D31;
+ default: vassert(0);
+ }
+}
+
+/* Plain ("low level") read from a VFP Dreg. */
+static IRExpr* llGetDReg ( UInt dregNo )
+{
+ vassert(dregNo < 32);
+ return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_F64 );
+}
+
+/* Architected read from a VFP Dreg. */
+static IRExpr* getDReg ( UInt dregNo ) {
+ return llGetDReg( dregNo );
+}
+
+/* Plain ("low level") write to a VFP Dreg. */
+static void llPutDReg ( UInt dregNo, IRExpr* e )
+{
+ vassert(dregNo < 32);
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
+ stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
+}
+
+/* Architected write to a VFP Dreg. Handles conditional writes to the
+ register: if guardT == IRTemp_INVALID then the write is
+ unconditional. */
+static void putDReg ( UInt dregNo,
+ IRExpr* e,
+ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
+{
+ /* So, generate either an unconditional or a conditional write to
+ the reg. */
+ if (guardT == IRTemp_INVALID) {
+ /* unconditional write */
+ llPutDReg( dregNo, e );
+ } else {
+ llPutDReg( dregNo,
+ IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
+ llGetDReg(dregNo),
+ e ));
+ }
+}
+
+/* And now exactly the same stuff all over again, but this time
+ taking/returning I64 rather than F64, to support 64-bit Neon
+ ops. */
+
+/* Plain ("low level") read from a Neon Integer Dreg. */
+static IRExpr* llGetDRegI64 ( UInt dregNo )
+{
+ vassert(dregNo < 32);
+ return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_I64 );
+}
+
+/* Architected read from a Neon Integer Dreg. */
+static IRExpr* getDRegI64 ( UInt dregNo ) {
+ return llGetDRegI64( dregNo );
+}
+
+/* Plain ("low level") write to a Neon Integer Dreg. */
+static void llPutDRegI64 ( UInt dregNo, IRExpr* e )
+{
+ vassert(dregNo < 32);
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
+ stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
+}
+
+/* Architected write to a Neon Integer Dreg. Handles conditional
+ writes to the register: if guardT == IRTemp_INVALID then the write
+ is unconditional. */
+static void putDRegI64 ( UInt dregNo,
+ IRExpr* e,
+ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
+{
+ /* So, generate either an unconditional or a conditional write to
+ the reg. */
+ if (guardT == IRTemp_INVALID) {
+ /* unconditional write */
+ llPutDRegI64( dregNo, e );
+ } else {
+ llPutDRegI64( dregNo,
+ IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
+ llGetDRegI64(dregNo),
+ e ));
+ }
+}
+
+/* ---------------- Quad registers ---------------- */
+
+static Int quadGuestRegOffset ( UInt qregNo )
+{
+ /* Do we care about endianness here? Probably do if we ever get
+ into the situation of dealing with the 64 bit Neon registers. */
+ switch (qregNo) {
+ case 0: return OFFB_D0;
+ case 1: return OFFB_D2;
+ case 2: return OFFB_D4;
+ case 3: return OFFB_D6;
+ case 4: return OFFB_D8;
+ case 5: return OFFB_D10;
+ case 6: return OFFB_D12;
+ case 7: return OFFB_D14;
+ case 8: return OFFB_D16;
+ case 9: return OFFB_D18;
+ case 10: return OFFB_D20;
+ case 11: return OFFB_D22;
+ case 12: return OFFB_D24;
+ case 13: return OFFB_D26;
+ case 14: return OFFB_D28;
+ case 15: return OFFB_D30;
+ default: vassert(0);
+ }
+}
+
+/* Plain ("low level") read from a Neon Qreg. */
+static IRExpr* llGetQReg ( UInt qregNo )
+{
+ vassert(qregNo < 16);
+ return IRExpr_Get( quadGuestRegOffset(qregNo), Ity_V128 );
+}
+
+/* Architected read from a Neon Qreg. */
+static IRExpr* getQReg ( UInt qregNo ) {
+ return llGetQReg( qregNo );
+}
+
+/* Plain ("low level") write to a Neon Qreg. */
+static void llPutQReg ( UInt qregNo, IRExpr* e )
+{
+ vassert(qregNo < 16);
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
+ stmt( IRStmt_Put(quadGuestRegOffset(qregNo), e) );
+}
+
+/* Architected write to a Neon Qreg. Handles conditional writes to the
+ register: if guardT == IRTemp_INVALID then the write is
+ unconditional. */
+static void putQReg ( UInt qregNo,
+ IRExpr* e,
+ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
+{
+ /* So, generate either an unconditional or a conditional write to
+ the reg. */
+ if (guardT == IRTemp_INVALID) {
+ /* unconditional write */
+ llPutQReg( qregNo, e );
+ } else {
+ llPutQReg( qregNo,
+ IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
+ llGetQReg(qregNo),
+ e ));
+ }
+}
+
+
+/* ---------------- Float registers ---------------- */
+
+static Int floatGuestRegOffset ( UInt fregNo )
+{
+ /* Start with the offset of the containing double, and then correct
+ for endianness. Actually this is completely bogus and needs
+ careful thought. */
+ Int off;
+ vassert(fregNo < 32);
+ off = doubleGuestRegOffset(fregNo >> 1);
+ if (host_is_bigendian) {
+ vassert(0);
+ } else {
+ if (fregNo & 1)
+ off += 4;
+ }
+ return off;
+}
+
+/* Plain ("low level") read from a VFP Freg. */
+static IRExpr* llGetFReg ( UInt fregNo )
+{
+ vassert(fregNo < 32);
+ return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
+}
+
+/* Architected read from a VFP Freg. */
+static IRExpr* getFReg ( UInt fregNo ) {
+ return llGetFReg( fregNo );
+}
+
+/* Plain ("low level") write to a VFP Freg. */
+static void llPutFReg ( UInt fregNo, IRExpr* e )
+{
+ vassert(fregNo < 32);
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
+ stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
+}
+
+/* Architected write to a VFP Freg. Handles conditional writes to the
+ register: if guardT == IRTemp_INVALID then the write is
+ unconditional. */
+static void putFReg ( UInt fregNo,
+ IRExpr* e,
+ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
+{
+ /* So, generate either an unconditional or a conditional write to
+ the reg. */
+ if (guardT == IRTemp_INVALID) {
+ /* unconditional write */
+ llPutFReg( fregNo, e );
+ } else {
+ llPutFReg( fregNo,
+ IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
+ llGetFReg(fregNo),
+ e ));
+ }
+}
+
+
+/* ---------------- Misc registers ---------------- */
+
+static void putMiscReg32 ( UInt gsoffset,
+ IRExpr* e, /* :: Ity_I32 */
+ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
+{
+ switch (gsoffset) {
+ case OFFB_FPSCR: break;
+ case OFFB_QFLAG32: break;
+ case OFFB_GEFLAG0: break;
+ case OFFB_GEFLAG1: break;
+ case OFFB_GEFLAG2: break;
+ case OFFB_GEFLAG3: break;
+ default: vassert(0); /* awaiting more cases */
+ }
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
+
+ if (guardT == IRTemp_INVALID) {
+ /* unconditional write */
+ stmt(IRStmt_Put(gsoffset, e));
+ } else {
+ stmt(IRStmt_Put(
+ gsoffset,
+ IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
+ IRExpr_Get(gsoffset, Ity_I32),
+ e
+ )
+ ));
+ }
+}
+
+static IRTemp get_ITSTATE ( void )
+{
+ ASSERT_IS_THUMB;
+ IRTemp t = newTemp(Ity_I32);
+ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
+ return t;
+}
+
+static void put_ITSTATE ( IRTemp t )
+{
+ ASSERT_IS_THUMB;
+ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
+}
+
+static IRTemp get_QFLAG32 ( void )
+{
+ IRTemp t = newTemp(Ity_I32);
+ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
+ return t;
+}
+
+static void put_QFLAG32 ( IRTemp t, IRTemp condT )
+{
+ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
+}
+
+/* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
+ Status Register) to indicate that overflow or saturation occurred.
+ Nb: t must be zero to denote no saturation, and any nonzero
+ value to indicate saturation. */
+static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
+{
+ IRTemp old = get_QFLAG32();
+ IRTemp nyu = newTemp(Ity_I32);
+ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
+ put_QFLAG32(nyu, condT);
+}
+
+/* Generate code to set APSR.GE[flagNo]. Each fn call sets 1 bit.
+ flagNo: which flag bit to set [3...0]
+ lowbits_to_ignore: 0 = look at all 32 bits
+ 8 = look at top 24 bits only
+ 16 = look at top 16 bits only
+ 31 = look at the top bit only
+ e: input value to be evaluated.
+ The new value is taken from 'e' with the lowest 'lowbits_to_ignore'
+ masked out. If the resulting value is zero then the GE flag is
+ set to 0; any other value sets the flag to 1. */
+static void put_GEFLAG32 ( Int flagNo, /* 0, 1, 2 or 3 */
+ Int lowbits_to_ignore, /* 0, 8, 16 or 31 */
+ IRExpr* e, /* Ity_I32 */
+ IRTemp condT )
+{
+ vassert( flagNo >= 0 && flagNo <= 3 );
+ vassert( lowbits_to_ignore == 0 ||
+ lowbits_to_ignore == 8 ||
+ lowbits_to_ignore == 16 ||
+ lowbits_to_ignore == 31 );
+ IRTemp masked = newTemp(Ity_I32);
+ assign(masked, binop(Iop_Shr32, e, mkU8(lowbits_to_ignore)));
+
+ switch (flagNo) {
+ case 0: putMiscReg32(OFFB_GEFLAG0, mkexpr(masked), condT); break;
+ case 1: putMiscReg32(OFFB_GEFLAG1, mkexpr(masked), condT); break;
+ case 2: putMiscReg32(OFFB_GEFLAG2, mkexpr(masked), condT); break;
+ case 3: putMiscReg32(OFFB_GEFLAG3, mkexpr(masked), condT); break;
+ default: vassert(0);
+ }
+}
+
+/* Return the (32-bit, zero-or-nonzero representation scheme) of
+ the specified GE flag. */
+static IRExpr* get_GEFLAG32( Int flagNo /* 0, 1, 2, 3 */ )
+{
+ switch (flagNo) {
+ case 0: return IRExpr_Get( OFFB_GEFLAG0, Ity_I32 );
+ case 1: return IRExpr_Get( OFFB_GEFLAG1, Ity_I32 );
+ case 2: return IRExpr_Get( OFFB_GEFLAG2, Ity_I32 );
+ case 3: return IRExpr_Get( OFFB_GEFLAG3, Ity_I32 );
+ default: vassert(0);
+ }
+}
+
+/* Set all 4 GE flags from the given 32-bit value as follows: GE 3 and
+ 2 are set from bit 31 of the value, and GE 1 and 0 are set from bit
+ 15 of the value. All other bits are ignored. */
+static void set_GE_32_10_from_bits_31_15 ( IRTemp t32, IRTemp condT )
+{
+ IRTemp ge10 = newTemp(Ity_I32);
+ IRTemp ge32 = newTemp(Ity_I32);
+ assign(ge10, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
+ assign(ge32, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
+ put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
+ put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
+ put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
+ put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
+}
+
+
+/* Set all 4 GE flags from the given 32-bit value as follows: GE 3
+ from bit 31, GE 2 from bit 23, GE 1 from bit 15, and GE0 from
+ bit 7. All other bits are ignored. */
+static void set_GE_3_2_1_0_from_bits_31_23_15_7 ( IRTemp t32, IRTemp condT )
+{
+ IRTemp ge0 = newTemp(Ity_I32);
+ IRTemp ge1 = newTemp(Ity_I32);
+ IRTemp ge2 = newTemp(Ity_I32);
+ IRTemp ge3 = newTemp(Ity_I32);
+ assign(ge0, binop(Iop_And32, mkexpr(t32), mkU32(0x00000080)));
+ assign(ge1, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
+ assign(ge2, binop(Iop_And32, mkexpr(t32), mkU32(0x00800000)));
+ assign(ge3, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
+ put_GEFLAG32( 0, 0, mkexpr(ge0), condT );
+ put_GEFLAG32( 1, 0, mkexpr(ge1), condT );
+ put_GEFLAG32( 2, 0, mkexpr(ge2), condT );
+ put_GEFLAG32( 3, 0, mkexpr(ge3), condT );
+}
+
+
+/* ---------------- FPSCR stuff ---------------- */
+
+/* Generate IR to get hold of the rounding mode bits in FPSCR, and
+ convert them to IR format. Bind the final result to the
+ returned temp. */
+static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
+{
+ /* The ARMvfp encoding for rounding mode bits is:
+ 00 to nearest
+ 01 to +infinity
+ 10 to -infinity
+ 11 to zero
+ We need to convert that to the IR encoding:
+ 00 to nearest (the default)
+ 10 to +infinity
+ 01 to -infinity
+ 11 to zero
+ Which can be done by swapping bits 0 and 1.
+ The rmode bits are at 23:22 in FPSCR.
+ */
+ IRTemp armEncd = newTemp(Ity_I32);
+ IRTemp swapped = newTemp(Ity_I32);
+ /* Fish FPSCR[23:22] out, and slide to bottom. Doesn't matter that
+ we don't zero out bits 24 and above, since the assignment to
+ 'swapped' will mask them out anyway. */
+ assign(armEncd,
+ binop(Iop_Shr32, IRExpr_Get(OFFB_FPSCR, Ity_I32), mkU8(22)));
+ /* Now swap them. */
+ assign(swapped,
+ binop(Iop_Or32,
+ binop(Iop_And32,
+ binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
+ mkU32(2)),
+ binop(Iop_And32,
+ binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
+ mkU32(1))
+ ));
+ return swapped;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Helpers for flag handling and conditional insns ---*/
+/*------------------------------------------------------------*/
+
+static HChar* name_ARMCondcode ( ARMCondcode cond )
+{
+ switch (cond) {
+ case ARMCondEQ: return "{eq}";
+ case ARMCondNE: return "{ne}";
+ case ARMCondHS: return "{hs}"; // or 'cs'
+ case ARMCondLO: return "{lo}"; // or 'cc'
+ case ARMCondMI: return "{mi}";
+ case ARMCondPL: return "{pl}";
+ case ARMCondVS: return "{vs}";
+ case ARMCondVC: return "{vc}";
+ case ARMCondHI: return "{hi}";
+ case ARMCondLS: return "{ls}";
+ case ARMCondGE: return "{ge}";
+ case ARMCondLT: return "{lt}";
+ case ARMCondGT: return "{gt}";
+ case ARMCondLE: return "{le}";
+ case ARMCondAL: return ""; // {al}: is the default
+ case ARMCondNV: return "{nv}";
+ default: vpanic("name_ARMCondcode");
+ }
+}
+/* and a handy shorthand for it */
+static HChar* nCC ( ARMCondcode cond ) {
+ return name_ARMCondcode(cond);
+}
+
+
+/* Build IR to calculate some particular condition from stored
+ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
+ Ity_I32, suitable for narrowing. Although the return type is
+ Ity_I32, the returned value is either 0 or 1. 'cond' must be
+ :: Ity_I32 and must denote the condition to compute in
+ bits 7:4, and be zero everywhere else.
+*/
+static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond )
+{
+ vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32);
+ /* And 'cond' had better produce a value in which only bits 7:4
+ bits are nonzero. However, obviously we can't assert for
+ that. */
+
+ /* So what we're constructing for the first argument is
+ "(cond << 4) | stored-operation-operation". However,
+ as per comments above, must be supplied pre-shifted to this
+ function.
+
+ This pairing scheme requires that the ARM_CC_OP_ values all fit
+ in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
+ 8 bits of the first argument. */
+ IRExpr** args
+ = mkIRExprVec_4(
+ binop(Iop_Or32, IRExpr_Get(OFFB_CC_OP, Ity_I32), cond),
+ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
+ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
+ IRExpr_Get(OFFB_CC_NDEP, Ity_I32)
+ );
+ IRExpr* call
+ = mkIRExprCCall(
+ Ity_I32,
+ 0/*regparm*/,
+ "armg_calculate_condition", &armg_calculate_condition,
+ args
+ );
+
+ /* Exclude the requested condition, OP and NDEP from definedness
+ checking. We're only interested in DEP1 and DEP2. */
+ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
+ return call;
+}
+
+
+/* Build IR to calculate some particular condition from stored
+ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
+ Ity_I32, suitable for narrowing. Although the return type is
+ Ity_I32, the returned value is either 0 or 1.
+*/
+static IRExpr* mk_armg_calculate_condition ( ARMCondcode cond )
+{
+ /* First arg is "(cond << 4) | condition". This requires that the
+ ARM_CC_OP_ values all fit in 4 bits. Hence we are passing a
+ (COND, OP) pair in the lowest 8 bits of the first argument. */
+ vassert(cond >= 0 && cond <= 15);
+ return mk_armg_calculate_condition_dyn( mkU32(cond << 4) );
+}
+
+
+/* Build IR to calculate just the carry flag from stored
+ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
+ Ity_I32. */
+static IRExpr* mk_armg_calculate_flag_c ( void )
+{
+ IRExpr** args
+ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
+ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
+ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
+ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
+ IRExpr* call
+ = mkIRExprCCall(
+ Ity_I32,
+ 0/*regparm*/,
+ "armg_calculate_flag_c", &armg_calculate_flag_c,
+ args
+ );
+ /* Exclude OP and NDEP from definedness checking. We're only
+ interested in DEP1 and DEP2. */
+ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
+ return call;
+}
+
+
+/* Build IR to calculate just the overflow flag from stored
+ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
+ Ity_I32. */
+static IRExpr* mk_armg_calculate_flag_v ( void )
+{
+ IRExpr** args
+ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
+ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
+ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
+ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
+ IRExpr* call
+ = mkIRExprCCall(
+ Ity_I32,
+ 0/*regparm*/,
+ "armg_calculate_flag_v", &armg_calculate_flag_v,
+ args
+ );
+ /* Exclude OP and NDEP from definedness checking. We're only
+ interested in DEP1 and DEP2. */
+ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
+ return call;
+}
+
+
+/* Build IR to calculate N Z C V in bits 31:28 of the
+ returned word. */
+static IRExpr* mk_armg_calculate_flags_nzcv ( void )
+{
+ IRExpr** args
+ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
+ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
+ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
+ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
+ IRExpr* call
+ = mkIRExprCCall(
+ Ity_I32,
+ 0/*regparm*/,
+ "armg_calculate_flags_nzcv", &armg_calculate_flags_nzcv,
+ args
+ );
+ /* Exclude OP and NDEP from definedness checking. We're only
+ interested in DEP1 and DEP2. */
+ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
+ return call;
+}
+
+static IRExpr* mk_armg_calculate_flag_qc ( IRExpr* resL, IRExpr* resR, Bool Q )
+{
+ IRExpr** args1;
+ IRExpr** args2;
+ IRExpr *call1, *call2, *res;
+
+ if (Q) {
+ args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(0)),
+ binop(Iop_GetElem32x4, resL, mkU8(1)),
+ binop(Iop_GetElem32x4, resR, mkU8(0)),
+ binop(Iop_GetElem32x4, resR, mkU8(1)) );
+ args2 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(2)),
+ binop(Iop_GetElem32x4, resL, mkU8(3)),
+ binop(Iop_GetElem32x4, resR, mkU8(2)),
+ binop(Iop_GetElem32x4, resR, mkU8(3)) );
+ } else {
+ args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x2, resL, mkU8(0)),
+ binop(Iop_GetElem32x2, resL, mkU8(1)),
+ binop(Iop_GetElem32x2, resR, mkU8(0)),
+ binop(Iop_GetElem32x2, resR, mkU8(1)) );
+ }
+
+#if 1
+ call1 = mkIRExprCCall(
+ Ity_I32,
+ 0/*regparm*/,
+ "armg_calculate_flag_qc", &armg_calculate_flag_qc,
+ args1
+ );
+ if (Q) {
+ call2 = mkIRExprCCall(
+ Ity_I32,
+ 0/*regparm*/,
+ "armg_calculate_flag_qc", &armg_calculate_flag_qc,
+ args2
+ );
+ }
+ if (Q) {
+ res = binop(Iop_Or32, call1, call2);
+ } else {
+ res = call1;
+ }
+#else
+ if (Q) {
+ res = unop(Iop_1Uto32,
+ binop(Iop_CmpNE32,
+ binop(Iop_Or32,
+ binop(Iop_Or32,
+ binop(Iop_Xor32,
+ args1[0],
+ args1[2]),
+ binop(Iop_Xor32,
+ args1[1],
+ args1[3])),
+ binop(Iop_Or32,
+ binop(Iop_Xor32,
+ args2[0],
+ args2[2]),
+ binop(Iop_Xor32,
+ args2[1],
+ args2[3]))),
+ mkU32(0)));
+ } else {
+ res = unop(Iop_1Uto32,
+ binop(Iop_CmpNE32,
+ binop(Iop_Or32,
+ binop(Iop_Xor32,
+ args1[0],
+ args1[2]),
+ binop(Iop_Xor32,
+ args1[1],
+ args1[3])),
+ mkU32(0)));
+ }
+#endif
+ return res;
+}
+
+// FIXME: this is named wrongly .. looks like a sticky set of
+// QC, not a write to it.
+static void setFlag_QC ( IRExpr* resL, IRExpr* resR, Bool Q,
+ IRTemp condT )
+{
+ putMiscReg32 (OFFB_FPSCR,
+ binop(Iop_Or32,
+ IRExpr_Get(OFFB_FPSCR, Ity_I32),
+ binop(Iop_Shl32,
+ mk_armg_calculate_flag_qc(resL, resR, Q),
+ mkU8(27))),
+ condT);
+}
+
+/* Build IR to conditionally set the flags thunk. As with putIReg, if
+ guard is IRTemp_INVALID then it's unconditional, else it holds a
+ condition :: Ity_I32. */
+static
+void setFlags_D1_D2_ND ( UInt cc_op, IRTemp t_dep1,
+ IRTemp t_dep2, IRTemp t_ndep,
+ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
+{
+ IRTemp c8;
+ vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I32));
+ vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I32));
+ vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I32));
+ vassert(cc_op >= ARMG_CC_OP_COPY && cc_op < ARMG_CC_OP_NUMBER);
+ if (guardT == IRTemp_INVALID) {
+ /* unconditional */
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(cc_op) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
+ } else {
+ /* conditional */
+ c8 = newTemp(Ity_I8);
+ assign( c8, unop(Iop_32to8, mkexpr(guardT)) );
+ stmt( IRStmt_Put(
+ OFFB_CC_OP,
+ IRExpr_Mux0X( mkexpr(c8),
+ IRExpr_Get(OFFB_CC_OP, Ity_I32),
+ mkU32(cc_op) )));
+ stmt( IRStmt_Put(
+ OFFB_CC_DEP1,
+ IRExpr_Mux0X( mkexpr(c8),
+ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
+ mkexpr(t_dep1) )));
+ stmt( IRStmt_Put(
+ OFFB_CC_DEP2,
+ IRExpr_Mux0X( mkexpr(c8),
+ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
+ mkexpr(t_dep2) )));
+ stmt( IRStmt_Put(
+ OFFB_CC_NDEP,
+ IRExpr_Mux0X( mkexpr(c8),
+ IRExpr_Get(OFFB_CC_NDEP, Ity_I32),
+ mkexpr(t_ndep) )));
+ }
+}
+
+
+/* Minor variant of the above that sets NDEP to zero (if it
+ sets it at all) */
+static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
+ IRTemp t_dep2,
+ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
+{
+ IRTemp z32 = newTemp(Ity_I32);
+ assign( z32, mkU32(0) );
+ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
+}
+
+
+/* Minor variant of the above that sets DEP2 to zero (if it
+ sets it at all) */
+static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
+ IRTemp t_ndep,
+ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
+{
+ IRTemp z32 = newTemp(Ity_I32);
+ assign( z32, mkU32(0) );
+ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
+}
+
+
+/* Minor variant of the above that sets DEP2 and NDEP to zero (if it
+ sets them at all) */
+static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
+ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
+{
+ IRTemp z32 = newTemp(Ity_I32);
+ assign( z32, mkU32(0) );
+ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
+}
+
+
+/* ARM only */
+/* Generate a side-exit to the next instruction, if the given guard
+ expression :: Ity_I32 is 0 (note! the side exit is taken if the
+ condition is false!) This is used to skip over conditional
+ instructions which we can't generate straight-line code for, either
+ because they are too complex or (more likely) they potentially
+ generate exceptions.
+*/
+static void mk_skip_over_A32_if_cond_is_false (
+ IRTemp guardT /* :: Ity_I32, 0 or 1 */
+ )
+{
+ ASSERT_IS_ARM;
+ vassert(guardT != IRTemp_INVALID);
+ vassert(0 == (guest_R15_curr_instr_notENC & 3));
+ stmt( IRStmt_Exit(
+ unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
+ Ijk_Boring,
+ IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4))
+ ));
+}
+
+/* Thumb16 only */
+/* ditto, but jump over a 16-bit thumb insn */
+static void mk_skip_over_T16_if_cond_is_false (
+ IRTemp guardT /* :: Ity_I32, 0 or 1 */
+ )
+{
+ ASSERT_IS_THUMB;
+ vassert(guardT != IRTemp_INVALID);
+ vassert(0 == (guest_R15_curr_instr_notENC & 1));
+ stmt( IRStmt_Exit(
+ unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
+ Ijk_Boring,
+ IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1))
+ ));
+}
+
+
+/* Thumb32 only */
+/* ditto, but jump over a 32-bit thumb insn */
+static void mk_skip_over_T32_if_cond_is_false (
+ IRTemp guardT /* :: Ity_I32, 0 or 1 */
+ )
+{
+ ASSERT_IS_THUMB;
+ vassert(guardT != IRTemp_INVALID);
+ vassert(0 == (guest_R15_curr_instr_notENC & 1));
+ stmt( IRStmt_Exit(
+ unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
+ Ijk_Boring,
+ IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1))
+ ));
+}
+
+
+/* Thumb16 and Thumb32 only
+ Generate a SIGILL followed by a restart of the current instruction
+ if the given temp is nonzero. */
+static void gen_SIGILL_T_if_nonzero ( IRTemp t /* :: Ity_I32 */ )
+{
+ ASSERT_IS_THUMB;
+ vassert(t != IRTemp_INVALID);
+ vassert(0 == (guest_R15_curr_instr_notENC & 1));
+ stmt(
+ IRStmt_Exit(
+ binop(Iop_CmpNE32, mkexpr(t), mkU32(0)),
+ Ijk_NoDecode,
+ IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1))
+ )
+ );
+}
+
+
+/* Inspect the old_itstate, and generate a SIGILL if it indicates that
+ we are currently in an IT block and are not the last in the block.
+ This also rolls back guest_ITSTATE to its old value before the exit
+ and restores it to its new value afterwards. This is so that if
+ the exit is taken, we have an up to date version of ITSTATE
+ available. Without doing that, we have no hope of making precise
+ exceptions work. */
+static void gen_SIGILL_T_if_in_but_NLI_ITBlock (
+ IRTemp old_itstate /* :: Ity_I32 */,
+ IRTemp new_itstate /* :: Ity_I32 */
+ )
+{
+ ASSERT_IS_THUMB;
+ put_ITSTATE(old_itstate); // backout
+ IRTemp guards_for_next3 = newTemp(Ity_I32);
+ assign(guards_for_next3,
+ binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
+ gen_SIGILL_T_if_nonzero(guards_for_next3);
+ put_ITSTATE(new_itstate); //restore
+}
+
+
+/* Simpler version of the above, which generates a SIGILL if
+ we're anywhere within an IT block. */
+static void gen_SIGILL_T_if_in_ITBlock (
+ IRTemp old_itstate /* :: Ity_I32 */,
+ IRTemp new_itstate /* :: Ity_I32 */
+ )
+{
+ put_ITSTATE(old_itstate); // backout
+ gen_SIGILL_T_if_nonzero(old_itstate);
+ put_ITSTATE(new_itstate); //restore
+}
+
+
+/* Generate an APSR value, from the NZCV thunk, and
+ from QFLAG32 and GEFLAG0 .. GEFLAG3. */
+static IRTemp synthesise_APSR ( void )
+{
+ IRTemp res1 = newTemp(Ity_I32);
+ // Get NZCV
+ assign( res1, mk_armg_calculate_flags_nzcv() );
+ // OR in the Q value
+ IRTemp res2 = newTemp(Ity_I32);
+ assign(
+ res2,
+ binop(Iop_Or32,
+ mkexpr(res1),
+ binop(Iop_Shl32,
+ unop(Iop_1Uto32,
+ binop(Iop_CmpNE32,
+ mkexpr(get_QFLAG32()),
+ mkU32(0))),
+ mkU8(ARMG_CC_SHIFT_Q)))
+ );
+ // OR in GE0 .. GE3
+ IRExpr* ge0
+ = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(0), mkU32(0)));
+ IRExpr* ge1
+ = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(1), mkU32(0)));
+ IRExpr* ge2
+ = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(2), mkU32(0)));
+ IRExpr* ge3
+ = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(3), mkU32(0)));
+ IRTemp res3 = newTemp(Ity_I32);
+ assign(res3,
+ binop(Iop_Or32,
+ mkexpr(res2),
+ binop(Iop_Or32,
+ binop(Iop_Or32,
+ binop(Iop_Shl32, ge0, mkU8(16)),
+ binop(Iop_Shl32, ge1, mkU8(17))),
+ binop(Iop_Or32,
+ binop(Iop_Shl32, ge2, mkU8(18)),
+ binop(Iop_Shl32, ge3, mkU8(19))) )));
+ return res3;
+}
+
+
+/* and the inverse transformation: given an APSR value,
+ set the NZCV thunk, the Q flag, and the GE flags. */
+static void desynthesise_APSR ( Bool write_nzcvq, Bool write_ge,
+ IRTemp apsrT, IRTemp condT )
+{
+ vassert(write_nzcvq || write_ge);
+ if (write_nzcvq) {
+ // Do NZCV
+ IRTemp immT = newTemp(Ity_I32);
+ assign(immT, binop(Iop_And32, mkexpr(apsrT), mkU32(0xF0000000)) );
+ setFlags_D1(ARMG_CC_OP_COPY, immT, condT);
+ // Do Q
+ IRTemp qnewT = newTemp(Ity_I32);
+ assign(qnewT, binop(Iop_And32, mkexpr(apsrT), mkU32(ARMG_CC_MASK_Q)));
+ put_QFLAG32(qnewT, condT);
+ }
+ if (write_ge) {
+ // Do GE3..0
+ put_GEFLAG32(0, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<16)),
+ condT);
+ put_GEFLAG32(1, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<17)),
+ condT);
+ put_GEFLAG32(2, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<18)),
+ condT);
+ put_GEFLAG32(3, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<19)),
+ condT);
+ }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Helpers for saturation ---*/
+/*------------------------------------------------------------*/
+
+/* FIXME: absolutely the only diff. between (a) armUnsignedSatQ and
+ (b) armSignedSatQ is that in (a) the floor is set to 0, whereas in
+ (b) the floor is computed from the value of imm5. these two fnsn
+ should be commoned up. */
+
+/* UnsignedSatQ(): 'clamp' each value so it lies between 0 <= x <= (2^N)-1
+ Optionally return flag resQ saying whether saturation occurred.
+ See definition in manual, section A2.2.1, page 41
+ (bits(N), boolean) UnsignedSatQ( integer i, integer N )
+ {
+ if ( i > (2^N)-1 ) { result = (2^N)-1; saturated = TRUE; }
+ elsif ( i < 0 ) { result = 0; saturated = TRUE; }
+ else { result = i; saturated = FALSE; }
+ return ( result<N-1:0>, saturated );
+ }
+*/
+static void armUnsignedSatQ( IRTemp* res, /* OUT - Ity_I32 */
+ IRTemp* resQ, /* OUT - Ity_I32 */
+ IRTemp regT, /* value to clamp - Ity_I32 */
+ UInt imm5 ) /* saturation ceiling */
+{
+ UInt ceil = (1 << imm5) - 1; // (2^imm5)-1
+ UInt floor = 0;
+
+ IRTemp node0 = newTemp(Ity_I32);
+ IRTemp node1 = newTemp(Ity_I32);
+ IRTemp node2 = newTemp(Ity_I1);
+ IRTemp node3 = newTemp(Ity_I32);
+ IRTemp node4 = newTemp(Ity_I32);
+ IRTemp node5 = newTemp(Ity_I1);
+ IRTemp node6 = newTemp(Ity_I32);
+
+ assign( node0, mkexpr(regT) );
+ assign( node1, mkU32(ceil) );
+ assign( node2, binop( Iop_CmpLT32S, mkexpr(node1), mkexpr(node0) ) );
+ assign( node3, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node2)),
+ mkexpr(node0),
+ mkexpr(node1) ) );
+ assign( node4, mkU32(floor) );
+ assign( node5, binop( Iop_CmpLT32S, mkexpr(node3), mkexpr(node4) ) );
+ assign( node6, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node5)),
+ mkexpr(node3),
+ mkexpr(node4) ) );
+ assign( *res, mkexpr(node6) );
+
+ /* if saturation occurred, then resQ is set to some nonzero value
+ if sat did not occur, resQ is guaranteed to be zero. */
+ if (resQ) {
+ assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
+ }
+}
+
+
+/* SignedSatQ(): 'clamp' each value so it lies between -2^N <= x <= (2^N) - 1
+ Optionally return flag resQ saying whether saturation occurred.
+ - see definition in manual, section A2.2.1, page 41
+ (bits(N), boolean ) SignedSatQ( integer i, integer N )
+ {
+ if ( i > 2^(N-1) - 1 ) { result = 2^(N-1) - 1; saturated = TRUE; }
+ elsif ( i < -(2^(N-1)) ) { result = -(2^(N-1)); saturated = FALSE; }
+ else { result = i; saturated = FALSE; }
+ return ( result[N-1:0], saturated );
+ }
+*/
+static void armSignedSatQ( IRTemp regT, /* value to clamp - Ity_I32 */
+ UInt imm5, /* saturation ceiling */
+ IRTemp* res, /* OUT - Ity_I32 */
+ IRTemp* resQ ) /* OUT - Ity_I32 */
+{
+ Int ceil = (1 << (imm5-1)) - 1; // (2^(imm5-1))-1
+ Int floor = -(1 << (imm5-1)); // -(2^(imm5-1))
+
+ IRTemp node0 = newTemp(Ity_I32);
+ IRTemp node1 = newTemp(Ity_I32);
+ IRTemp node2 = newTemp(Ity_I1);
+ IRTemp node3 = newTemp(Ity_I32);
+ IRTemp node4 = newTemp(Ity_I32);
+ IRTemp node5 = newTemp(Ity_I1);
+ IRTemp node6 = newTemp(Ity_I32);
+
+ assign( node0, mkexpr(regT) );
+ assign( node1, mkU32(ceil) );
+ assign( node2, binop( Iop_CmpLT32S, mkexpr(node1), mkexpr(node0) ) );
+ assign( node3, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node2)),
+ mkexpr(node0), mkexpr(node1) ) );
+ assign( node4, mkU32(floor) );
+ assign( node5, binop( Iop_CmpLT32S, mkexpr(node3), mkexpr(node4) ) );
+ assign( node6, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node5)),
+ mkexpr(node3), mkexpr(node4) ) );
+ assign( *res, mkexpr(node6) );
+
+ /* if saturation occurred, then resQ is set to some nonzero value
+ if sat did not occur, resQ is guaranteed to be zero. */
+ if (resQ) {
+ assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
+ }
+}
+
+
+/* Compute a value 0 :: I32 or 1 :: I32, indicating whether signed
+ overflow occurred for 32-bit addition. Needs both args and the
+ result. HD p27. */
+static
+IRExpr* signed_overflow_after_Add32 ( IRExpr* resE,
+ IRTemp argL, IRTemp argR )
+{
+ IRTemp res = newTemp(Ity_I32);
+ assign(res, resE);
+ return
+ binop( Iop_Shr32,
+ binop( Iop_And32,
+ binop( Iop_Xor32, mkexpr(res), mkexpr(argL) ),
+ binop( Iop_Xor32, mkexpr(res), mkexpr(argR) )),
+ mkU8(31) );
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Larger helpers ---*/
+/*------------------------------------------------------------*/
+
+/* Compute both the result and new C flag value for a LSL by an imm5
+ or by a register operand. May generate reads of the old C value
+ (hence only safe to use before any writes to guest state happen).
+ Are factored out so can be used by both ARM and Thumb.
+
+ Note that in compute_result_and_C_after_{LSL,LSR,ASR}_by{imm5,reg},
+ "res" (the result) is a.k.a. "shop", shifter operand
+ "newC" (the new C) is a.k.a. "shco", shifter carry out
+
+ The calling convention for res and newC is a bit funny. They could
+ be passed by value, but instead are passed by ref.
+*/
+
+static void compute_result_and_C_after_LSL_by_imm5 (
+ /*OUT*/HChar* buf,
+ IRTemp* res,
+ IRTemp* newC,
+ IRTemp rMt, UInt shift_amt, /* operands */
+ UInt rM /* only for debug printing */
+ )
+{
+ if (shift_amt == 0) {
+ if (newC) {
+ assign( *newC, mk_armg_calculate_flag_c() );
+ }
+ assign( *res, mkexpr(rMt) );
+ DIS(buf, "r%u", rM);
+ } else {
+ vassert(shift_amt >= 1 && shift_amt <= 31);
+ if (newC) {
+ assign( *newC,
+ binop(Iop_And32,
+ binop(Iop_Shr32, mkexpr(rMt),
+ mkU8(32 - shift_amt)),
+ mkU32(1)));
+ }
+ assign( *res,
+ binop(Iop_Shl32, mkexpr(rMt), mkU8(shift_amt)) );
+ DIS(buf, "r%u, LSL #%u", rM, shift_amt);
+ }
+}
+
+
+static void compute_result_and_C_after_LSL_by_reg (
+ /*OUT*/HChar* buf,
+ IRTemp* res,
+ IRTemp* newC,
+ IRTemp rMt, IRTemp rSt, /* operands */
+ UInt rM, UInt rS /* only for debug printing */
+ )
+{
+ // shift left in range 0 .. 255
+ // amt = rS & 255
+ // res = amt < 32 ? Rm << amt : 0
+ // newC = amt == 0 ? oldC :
+ // amt in 1..32 ? Rm[32-amt] : 0
+ IRTemp amtT = newTemp(Ity_I32);
+ assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
+ if (newC) {
+ /* mux0X(amt == 0,
+ mux0X(amt < 32,
+ 0,
+ Rm[(32-amt) & 31])
+ oldC)
+ */
+ /* About the best you can do is pray that iropt is able
+ to nuke most or all of the following junk. */
+ IRTemp oldC = newTemp(Ity_I32);
+ assign(oldC, mk_armg_calculate_flag_c() );
+ assign(
+ *newC,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0))),
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))),
+ mkU32(0),
+ binop(Iop_Shr32,
+ mkexpr(rMt),
+ unop(Iop_32to8,
+ binop(Iop_And32,
+ binop(Iop_Sub32,
+ mkU32(32),
+ mkexpr(amtT)),
+ mkU32(31)
+ )
+ )
+ )
+ ),
+ mkexpr(oldC)
+ )
+ );
+ }
+ // (Rm << (Rs & 31)) & (((Rs & 255) - 32) >>s 31)
+ // Lhs of the & limits the shift to 31 bits, so as to
+ // give known IR semantics. Rhs of the & is all 1s for
+ // Rs <= 31 and all 0s for Rs >= 32.
+ assign(
+ *res,
+ binop(
+ Iop_And32,
+ binop(Iop_Shl32,
+ mkexpr(rMt),
+ unop(Iop_32to8,
+ binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
+ binop(Iop_Sar32,
+ binop(Iop_Sub32,
+ mkexpr(amtT),
+ mkU32(32)),
+ mkU8(31))));
+ DIS(buf, "r%u, LSL r%u", rM, rS);
+}
+
+
+static void compute_result_and_C_after_LSR_by_imm5 (
+ /*OUT*/HChar* buf,
+ IRTemp* res,
+ IRTemp* newC,
+ IRTemp rMt, UInt shift_amt, /* operands */
+ UInt rM /* only for debug printing */
+ )
+{
+ if (shift_amt == 0) {
+ // conceptually a 32-bit shift, however:
+ // res = 0
+ // newC = Rm[31]
+ if (newC) {
+ assign( *newC,
+ binop(Iop_And32,
+ binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
+ mkU32(1)));
+ }
+ assign( *res, mkU32(0) );
+ DIS(buf, "r%u, LSR #0(a.k.a. 32)", rM);
+ } else {
+ // shift in range 1..31
+ // res = Rm >>u shift_amt
+ // newC = Rm[shift_amt - 1]
+ vassert(shift_amt >= 1 && shift_amt <= 31);
+ if (newC) {
+ assign( *newC,
+ binop(Iop_And32,
+ binop(Iop_Shr32, mkexpr(rMt),
+ mkU8(shift_amt - 1)),
+ mkU32(1)));
+ }
+ assign( *res,
+ binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)) );
+ DIS(buf, "r%u, LSR #%u", rM, shift_amt);
+ }
+}
+
+
+static void compute_result_and_C_after_LSR_by_reg (
+ /*OUT*/HChar* buf,
+ IRTemp* res,
+ IRTemp* newC,
+ IRTemp rMt, IRTemp rSt, /* operands */
+ UInt rM, UInt rS /* only for debug printing */
+ )
+{
+ // shift right in range 0 .. 255
+ // amt = rS & 255
+ // res = amt < 32 ? Rm >>u amt : 0
+ // newC = amt == 0 ? oldC :
+ // amt in 1..32 ? Rm[amt-1] : 0
+ IRTemp amtT = newTemp(Ity_I32);
+ assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
+ if (newC) {
+ /* mux0X(amt == 0,
+ mux0X(amt < 32,
+ 0,
+ Rm[(amt-1) & 31])
+ oldC)
+ */
+ IRTemp oldC = newTemp(Ity_I32);
+ assign(oldC, mk_armg_calculate_flag_c() );
+ assign(
+ *newC,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0))),
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))),
+ mkU32(0),
+ binop(Iop_Shr32,
+ mkexpr(rMt),
+ unop(Iop_32to8,
+ binop(Iop_And32,
+ binop(Iop_Sub32,
+ mkexpr(amtT),
+ mkU32(1)),
+ mkU32(31)
+ )
+ )
+ )
+ ),
+ mkexpr(oldC)
+ )
+ );
+ }
+ // (Rm >>u (Rs & 31)) & (((Rs & 255) - 32) >>s 31)
+ // Lhs of the & limits the shift to 31 bits, so as to
+ // give known IR semantics. Rhs of the & is all 1s for
+ // Rs <= 31 and all 0s for Rs >= 32.
+ assign(
+ *res,
+ binop(
+ Iop_And32,
+ binop(Iop_Shr32,
+ mkexpr(rMt),
+ unop(Iop_32to8,
+ binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
+ binop(Iop_Sar32,
+ binop(Iop_Sub32,
+ mkexpr(amtT),
+ mkU32(32)),
+ mkU8(31))));
+ DIS(buf, "r%u, LSR r%u", rM, rS);
+}
+
+
+static void compute_result_and_C_after_ASR_by_imm5 (
+ /*OUT*/HChar* buf,
+ IRTemp* res,
+ IRTemp* newC,
+ IRTemp rMt, UInt shift_amt, /* operands */
+ UInt rM /* only for debug printing */
+ )
+{
+ if (shift_amt == 0) {
+ // conceptually a 32-bit shift, however:
+ // res = Rm >>s 31
+ // newC = Rm[31]
+ if (newC) {
+ assign( *newC,
+ binop(Iop_And32,
+ binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
+ mkU32(1)));
+ }
+ assign( *res, binop(Iop_Sar32, mkexpr(rMt), mkU8(31)) );
+ DIS(buf, "r%u, ASR #0(a.k.a. 32)", rM);
+ } else {
+ // shift in range 1..31
+ // res = Rm >>s shift_amt
+ // newC = Rm[shift_amt - 1]
+ vassert(shift_amt >= 1 && shift_amt <= 31);
+ if (newC) {
+ assign( *newC,
+ binop(Iop_And32,
+ binop(Iop_Shr32, mkexpr(rMt),
+ mkU8(shift_amt - 1)),
+ mkU32(1)));
+ }
+ assign( *res,
+ binop(Iop_Sar32, mkexpr(rMt), mkU8(shift_amt)) );
+ DIS(buf, "r%u, ASR #%u", rM, shift_amt);
+ }
+}
+
+
+static void compute_result_and_C_after_ASR_by_reg (
+ /*OUT*/HChar* buf,
+ IRTemp* res,
+ IRTemp* newC,
+ IRTemp rMt, IRTemp rSt, /* operands */
+ UInt rM, UInt rS /* only for debug printing */
+ )
+{
+ // arithmetic shift right in range 0 .. 255
+ // amt = rS & 255
+ // res = amt < 32 ? Rm >>s amt : Rm >>s 31
+ // newC = amt == 0 ? oldC :
+ // amt in 1..32 ? Rm[amt-1] : Rm[31]
+ IRTemp amtT = newTemp(Ity_I32);
+ assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
+ if (newC) {
+ /* mux0X(amt == 0,
+ mux0X(amt < 32,
+ Rm[31],
+ Rm[(amt-1) & 31])
+ oldC)
+ */
+ IRTemp oldC = newTemp(Ity_I32);
+ assign(oldC, mk_armg_calculate_flag_c() );
+ assign(
+ *newC,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0))),
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))),
+ binop(Iop_Shr32,
+ mkexpr(rMt),
+ mkU8(31)
+ ),
+ binop(Iop_Shr32,
+ mkexpr(rMt),
+ unop(Iop_32to8,
+ binop(Iop_And32,
+ binop(Iop_Sub32,
+ mkexpr(amtT),
+ mkU32(1)),
+ mkU32(31)
+ )
+ )
+ )
+ ),
+ mkexpr(oldC)
+ )
+ );
+ }
+ // (Rm >>s (amt <u 32 ? amt : 31))
+ assign(
+ *res,
+ binop(
+ Iop_Sar32,
+ mkexpr(rMt),
+ unop(
+ Iop_32to8,
+ IRExpr_Mux0X(
+ unop(
+ Iop_1Uto8,
+ binop(Iop_CmpLT32U, mkexpr(amtT), mkU32(32))),
+ mkU32(31),
+ mkexpr(amtT)))));
+ DIS(buf, "r%u, ASR r%u", rM, rS);
+}
+
+
+static void compute_result_and_C_after_ROR_by_reg (
+ /*OUT*/HChar* buf,
+ IRTemp* res,
+ IRTemp* newC,
+ IRTemp rMt, IRTemp rSt, /* operands */
+ UInt rM, UInt rS /* only for debug printing */
+ )
+{
+ // rotate right in range 0 .. 255
+ // amt = rS & 255
+ // shop = Rm `ror` (amt & 31)
+ // shco = amt == 0 ? oldC : Rm[(amt-1) & 31]
+ IRTemp amtT = newTemp(Ity_I32);
+ assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
+ IRTemp amt5T = newTemp(Ity_I32);
+ assign( amt5T, binop(Iop_And32, mkexpr(rSt), mkU32(31)) );
+ IRTemp oldC = newTemp(Ity_I32);
+ assign(oldC, mk_armg_calculate_flag_c() );
+ if (newC) {
+ assign(
+ *newC,
+ IRExpr_Mux0X(
+ unop(Iop_32to8, mkexpr(amtT)),
+ mkexpr(oldC),
+ binop(Iop_And32,
+ binop(Iop_Shr32,
+ mkexpr(rMt),
+ unop(Iop_32to8,
+ binop(Iop_And32,
+ binop(Iop_Sub32,
+ mkexpr(amtT),
+ mkU32(1)
+ ),
+ mkU32(31)
+ )
+ )
+ ),
+ mkU32(1)
+ )
+ )
+ );
+ }
+ assign(
+ *res,
+ IRExpr_Mux0X(
+ unop(Iop_32to8, mkexpr(amt5T)), mkexpr(rMt),
+ binop(Iop_Or32,
+ binop(Iop_Shr32,
+ mkexpr(rMt),
+ unop(Iop_32to8, mkexpr(amt5T))
+ ),
+ binop(Iop_Shl32,
+ mkexpr(rMt),
+ unop(Iop_32to8,
+ binop(Iop_Sub32, mkU32(32), mkexpr(amt5T))
+ )
+ )
+ )
+ )
+ );
+ DIS(buf, "r%u, ROR r#%u", rM, rS);
+}
+
+
+/* Generate an expression corresponding to the immediate-shift case of
+ a shifter operand. This is used both for ARM and Thumb2.
+
+ Bind it to a temporary, and return that via *res. If newC is
+ non-NULL, also compute a value for the shifter's carry out (in the
+ LSB of a word), bind it to a temporary, and return that via *shco.
+
+ Generates GETs from the guest state and is therefore not safe to
+ use once we start doing PUTs to it, for any given instruction.
+
+ 'how' is encoded thusly:
+ 00b LSL, 01b LSR, 10b ASR, 11b ROR
+ Most but not all ARM and Thumb integer insns use this encoding.
+ Be careful to ensure the right value is passed here.
+*/
+static void compute_result_and_C_after_shift_by_imm5 (
+ /*OUT*/HChar* buf,
+ /*OUT*/IRTemp* res,
+ /*OUT*/IRTemp* newC,
+ IRTemp rMt, /* reg to shift */
+ UInt how, /* what kind of shift */
+ UInt shift_amt, /* shift amount (0..31) */
+ UInt rM /* only for debug printing */
+ )
+{
+ vassert(shift_amt < 32);
+ vassert(how < 4);
+
+ switch (how) {
+
+ case 0:
+ compute_result_and_C_after_LSL_by_imm5(
+ buf, res, newC, rMt, shift_amt, rM
+ );
+ break;
+
+ case 1:
+ compute_result_and_C_after_LSR_by_imm5(
+ buf, res, newC, rMt, shift_amt, rM
+ );
+ break;
+
+ case 2:
+ compute_result_and_C_after_ASR_by_imm5(
+ buf, res, newC, rMt, shift_amt, rM
+ );
+ break;
+
+ case 3:
+ if (shift_amt == 0) {
+ IRTemp oldcT = newTemp(Ity_I32);
+ // rotate right 1 bit through carry (?)
+ // RRX -- described at ARM ARM A5-17
+ // res = (oldC << 31) | (Rm >>u 1)
+ // newC = Rm[0]
+ if (newC) {
+ assign( *newC,
+ binop(Iop_And32, mkexpr(rMt), mkU32(1)));
+ }
+ assign( oldcT, mk_armg_calculate_flag_c() );
+ assign( *res,
+ binop(Iop_Or32,
+ binop(Iop_Shl32, mkexpr(oldcT), mkU8(31)),
+ binop(Iop_Shr32, mkexpr(rMt), mkU8(1))) );
+ DIS(buf, "r%u, RRX", rM);
+ } else {
+ // rotate right in range 1..31
+ // res = Rm `ror` shift_amt
+ // newC = Rm[shift_amt - 1]
+ vassert(shift_amt >= 1 && shift_amt <= 31);
+ if (newC) {
+ assign( *newC,
+ binop(Iop_And32,
+ binop(Iop_Shr32, mkexpr(rMt),
+ mkU8(shift_amt - 1)),
+ mkU32(1)));
+ }
+ assign( *res,
+ binop(Iop_Or32,
+ binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)),
+ binop(Iop_Shl32, mkexpr(rMt),
+ mkU8(32-shift_amt))));
+ DIS(buf, "r%u, ROR #%u", rM, shift_amt);
+ }
+ break;
+
+ default:
+ /*NOTREACHED*/
+ vassert(0);
+ }
+}
+
+
+/* Generate an expression corresponding to the register-shift case of
+ a shifter operand. This is used both for ARM and Thumb2.
+
+ Bind it to a temporary, and return that via *res. If newC is
+ non-NULL, also compute a value for the shifter's carry out (in the
+ LSB of a word), bind it to a temporary, and return that via *shco.
+
+ Generates GETs from the guest state and is therefore not safe to
+ use once we start doing PUTs to it, for any given instruction.
+
+ 'how' is encoded thusly:
+ 00b LSL, 01b LSR, 10b ASR, 11b ROR
+ Most but not all ARM and Thumb integer insns use this encoding.
+ Be careful to ensure the right value is passed here.
+*/
+static void compute_result_and_C_after_shift_by_reg (
+ /*OUT*/HChar* buf,
+ /*OUT*/IRTemp* res,
+ /*OUT*/IRTemp* newC,
+ IRTemp rMt, /* reg to shift */
+ UInt how, /* what kind of shift */
+ IRTemp rSt, /* shift amount */
+ UInt rM, /* only for debug printing */
+ UInt rS /* only for debug printing */
+ )
+{
+ vassert(how < 4);
+ switch (how) {
+ case 0: { /* LSL */
+ compute_result_and_C_after_LSL_by_reg(
+ buf, res, newC, rMt, rSt, rM, rS
+ );
+ break;
+ }
+ case 1: { /* LSR */
+ compute_result_and_C_after_LSR_by_reg(
+ buf, res, newC, rMt, rSt, rM, rS
+ );
+ break;
+ }
+ case 2: { /* ASR */
+ compute_result_and_C_after_ASR_by_reg(
+ buf, res, newC, rMt, rSt, rM, rS
+ );
+ break;
+ }
+ case 3: { /* ROR */
+ compute_result_and_C_after_ROR_by_reg(
+ buf, res, newC, rMt, rSt, rM, rS
+ );
+ break;
+ }
+ default:
+ /*NOTREACHED*/
+ vassert(0);
+ }
+}
+
+
+/* Generate an expression corresponding to a shifter_operand, bind it
+ to a temporary, and return that via *shop. If shco is non-NULL,
+ also compute a value for the shifter's carry out (in the LSB of a
+ word), bind it to a temporary, and return that via *shco.
+
+ If for some reason we can't come up with a shifter operand (missing
+ case? not really a shifter operand?) return False.
+
+ Generates GETs from the guest state and is therefore not safe to
+ use once we start doing PUTs to it, for any given instruction.
+
+ For ARM insns only; not for Thumb.
+*/
+static Bool mk_shifter_operand ( UInt insn_25, UInt insn_11_0,
+ /*OUT*/IRTemp* shop,
+ /*OUT*/IRTemp* shco,
+ /*OUT*/HChar* buf )
+{
+ UInt insn_4 = (insn_11_0 >> 4) & 1;
+ UInt insn_7 = (insn_11_0 >> 7) & 1;
+ vassert(insn_25 <= 0x1);
+ vassert(insn_11_0 <= 0xFFF);
+
+ vassert(shop && *shop == IRTemp_INVALID);
+ *shop = newTemp(Ity_I32);
+
+ if (shco) {
+ vassert(*shco == IRTemp_INVALID);
+ *shco = newTemp(Ity_I32);
+ }
+
+ /* 32-bit immediate */
+
+ if (insn_25 == 1) {
+ /* immediate: (7:0) rotated right by 2 * (11:8) */
+ UInt imm = (insn_11_0 >> 0) & 0xFF;
+ UInt rot = 2 * ((insn_11_0 >> 8) & 0xF);
+ vassert(rot <= 30);
+ imm = ROR32(imm, rot);
+ if (shco) {
+ if (rot == 0) {
+ assign( *shco, mk_armg_calculate_flag_c() );
+ } else {
+ assign( *shco, mkU32( (imm >> 31) & 1 ) );
+ }
+ }
+ DIS(buf, "#0x%x", imm);
+ assign( *shop, mkU32(imm) );
+ return True;
+ }
+
+ /* Shift/rotate by immediate */
+
+ if (insn_25 == 0 && insn_4 == 0) {
+ /* Rm (3:0) shifted (6:5) by immediate (11:7) */
+ UInt shift_amt = (insn_11_0 >> 7) & 0x1F;
+ UInt rM = (insn_11_0 >> 0) & 0xF;
+ UInt how = (insn_11_0 >> 5) & 3;
+ /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
+ IRTemp rMt = newTemp(Ity_I32);
+ assign(rMt, getIRegA(rM));
+
+ vassert(shift_amt <= 31);
+
+ compute_result_and_C_after_shift_by_imm5(
+ buf, shop, shco, rMt, how, shift_amt, rM
+ );
+ return True;
+ }
+
+ /* Shift/rotate by register */
+ if (insn_25 == 0 && insn_4 == 1) {
+ /* Rm (3:0) shifted (6:5) by Rs (11:8) */
+ UInt rM = (insn_11_0 >> 0) & 0xF;
+ UInt rS = (insn_11_0 >> 8) & 0xF;
+ UInt how = (insn_11_0 >> 5) & 3;
+ /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
+ IRTemp rMt = newTemp(Ity_I32);
+ IRTemp rSt = newTemp(Ity_I32);
+
+ if (insn_7 == 1)
+ return False; /* not really a shifter operand */
+
+ assign(rMt, getIRegA(rM));
+ assign(rSt, getIRegA(rS));
+
+ compute_result_and_C_after_shift_by_reg(
+ buf, shop, shco, rMt, how, rSt, rM, rS
+ );
+ return True;
+ }
+
+ vex_printf("mk_shifter_operand(0x%x,0x%x)\n", insn_25, insn_11_0 );
+ return False;
+}
+
+
+/* ARM only */
+static
+IRExpr* mk_EA_reg_plusminus_imm12 ( UInt rN, UInt bU, UInt imm12,
+ /*OUT*/HChar* buf )
+{
+ vassert(rN < 16);
+ vassert(bU < 2);
+ vassert(imm12 < 0x1000);
+ UChar opChar = bU == 1 ? '+' : '-';
+ DIS(buf, "[r%u, #%c%u]", rN, opChar, imm12);
+ return
+ binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
+ getIRegA(rN),
+ mkU32(imm12) );
+}
+
+
+/* ARM only.
+ NB: This is "DecodeImmShift" in newer versions of the the ARM ARM.
+*/
+static
+IRExpr* mk_EA_reg_plusminus_shifted_reg ( UInt rN, UInt bU, UInt rM,
+ UInt sh2, UInt imm5,
+ /*OUT*/HChar* buf )
+{
+ vassert(rN < 16);
+ vassert(bU < 2);
+ vassert(rM < 16);
+ vassert(sh2 < 4);
+ vassert(imm5 < 32);
+ UChar opChar = bU == 1 ? '+' : '-';
+ IRExpr* index = NULL;
+ switch (sh2) {
+ case 0: /* LSL */
+ /* imm5 can be in the range 0 .. 31 inclusive. */
+ index = binop(Iop_Shl32, getIRegA(rM), mkU8(imm5));
+ DIS(buf, "[r%u, %c r%u LSL #%u]", rN, opChar, rM, imm5);
+ break;
+ case 1: /* LSR */
+ if (imm5 == 0) {
+ index = mkU32(0);
+ vassert(0); // ATC
+ } else {
+ index = binop(Iop_Shr32, getIRegA(rM), mkU8(imm5));
+ }
+ DIS(buf, "[r%u, %cr%u, LSR #%u]",
+ rN, opChar, rM, imm5 == 0 ? 32 : imm5);
+ break;
+ case 2: /* ASR */
+ /* Doesn't this just mean that the behaviour with imm5 == 0
+ is the same as if it had been 31 ? */
+ if (imm5 == 0) {
+ index = binop(Iop_Sar32, getIRegA(rM), mkU8(31));
+ vassert(0); // ATC
+ } else {
+ index = binop(Iop_Sar32, getIRegA(rM), mkU8(imm5));
+ }
+ DIS(buf, "[r%u, %cr%u, ASR #%u]",
+ rN, opChar, rM, imm5 == 0 ? 32 : imm5);
+ break;
+ case 3: /* ROR or RRX */
+ if (imm5 == 0) {
+ IRTemp rmT = newTemp(Ity_I32);
+ IRTemp cflagT = newTemp(Ity_I32);
+ assign(rmT, getIRegA(rM));
+ assign(cflagT, mk_armg_calculate_flag_c());
+ index = binop(Iop_Or32,
+ binop(Iop_Shl32, mkexpr(cflagT), mkU8(31)),
+ binop(Iop_Shr32, mkexpr(rmT), mkU8(1)));
+ DIS(buf, "[r%u, %cr%u, RRX]", rN, opChar, rM);
+ } else {
+ IRTemp rmT = newTemp(Ity_I32);
+ assign(rmT, getIRegA(rM));
+ vassert(imm5 >= 1 && imm5 <= 31);
+ index = binop(Iop_Or32,
+ binop(Iop_Shl32, mkexpr(rmT), mkU8(32-imm5)),
+ binop(Iop_Shr32, mkexpr(rmT), mkU8(imm5)));
+ DIS(buf, "[r%u, %cr%u, ROR #%u]", rN, opChar, rM, imm5);
+ }
+ break;
+ default:
+ vassert(0);
+ }
+ vassert(index);
+ return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
+ getIRegA(rN), index);
+}
+
+
+/* ARM only */
+static
+IRExpr* mk_EA_reg_plusminus_imm8 ( UInt rN, UInt bU, UInt imm8,
+ /*OUT*/HChar* buf )
+{
+ vassert(rN < 16);
+ vassert(bU < 2);
+ vassert(imm8 < 0x100);
+ UChar opChar = bU == 1 ? '+' : '-';
+ DIS(buf, "[r%u, #%c%u]", rN, opChar, imm8);
+ return
+ binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
+ getIRegA(rN),
+ mkU32(imm8) );
+}
+
+
+/* ARM only */
+static
+IRExpr* mk_EA_reg_plusminus_reg ( UInt rN, UInt bU, UInt rM,
+ /*OUT*/HChar* buf )
+{
+ vassert(rN < 16);
+ vassert(bU < 2);
+ vassert(rM < 16);
+ UChar opChar = bU == 1 ? '+' : '-';
+ IRExpr* index = getIRegA(rM);
+ DIS(buf, "[r%u, %c r%u]", rN, opChar, rM);
+ return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
+ getIRegA(rN), index);
+}
+
+
+/* irRes :: Ity_I32 holds a floating point comparison result encoded
+ as an IRCmpF64Result. Generate code to convert it to an
+ ARM-encoded (N,Z,C,V) group in the lowest 4 bits of an I32 value.
+ Assign a new temp to hold that value, and return the temp. */
+static
+IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes )
+{
+ IRTemp ix = newTemp(Ity_I32);
+ IRTemp termL = newTemp(Ity_I32);
+ IRTemp termR = newTemp(Ity_I32);
+ IRTemp nzcv = newTemp(Ity_I32);
+
+ /* This is where the fun starts. We have to convert 'irRes' from
+ an IR-convention return result (IRCmpF64Result) to an
+ ARM-encoded (N,Z,C,V) group. The final result is in the bottom
+ 4 bits of 'nzcv'. */
+ /* Map compare result from IR to ARM(nzcv) */
+ /*
+ FP cmp result | IR | ARM(nzcv)
+ --------------------------------
+ UN 0x45 0011
+ LT 0x01 1000
+ GT 0x00 0010
+ EQ 0x40 0110
+ */
+ /* Now since you're probably wondering WTF ..
+
+ ix fishes the useful bits out of the IR value, bits 6 and 0, and
+ places them side by side, giving a number which is 0, 1, 2 or 3.
+
+ termL is a sequence cooked up by GNU superopt. It converts ix
+ into an almost correct value NZCV value (incredibly), except
+ for the case of UN, where it produces 0100 instead of the
+ required 0011.
+
+ termR is therefore a correction term, also computed from ix. It
+ is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
+ the final correct value, we subtract termR from termL.
+
+ Don't take my word for it. There's a test program at the bottom
+ of this file, to try this out with.
+ */
+ assign(
+ ix,
+ binop(Iop_Or32,
+ binop(Iop_And32,
+ binop(Iop_Shr32, mkexpr(irRes), mkU8(5)),
+ mkU32(3)),
+ binop(Iop_And32, mkexpr(irRes), mkU32(1))));
+
+ assign(
+ termL,
+ binop(Iop_Add32,
+ binop(Iop_Shr32,
+ binop(Iop_Sub32,
+ binop(Iop_Shl32,
+ binop(Iop_Xor32, mkexpr(ix), mkU32(1)),
+ mkU8(30)),
+ mkU32(1)),
+ mkU8(29)),
+ mkU32(1)));
+
+ assign(
+ termR,
+ binop(Iop_And32,
+ binop(Iop_And32,
+ mkexpr(ix),
+ binop(Iop_Shr32, mkexpr(ix), mkU8(1))),
+ mkU32(1)));
+
+ assign(nzcv, binop(Iop_Sub32, mkexpr(termL), mkexpr(termR)));
+ return nzcv;
+}
+
+
+/* Thumb32 only. This is "ThumbExpandImm" in the ARM ARM. If
+ updatesC is non-NULL, a boolean is written to it indicating whether
+ or not the C flag is updated, as per ARM ARM "ThumbExpandImm_C".
+*/
+static UInt thumbExpandImm ( Bool* updatesC,
+ UInt imm1, UInt imm3, UInt imm8 )
+{
+ vassert(imm1 < (1<<1));
+ vassert(imm3 < (1<<3));
+ vassert(imm8 < (1<<8));
+ UInt i_imm3_a = (imm1 << 4) | (imm3 << 1) | ((imm8 >> 7) & 1);
+ UInt abcdefgh = imm8;
+ UInt lbcdefgh = imm8 | 0x80;
+ if (updatesC) {
+ *updatesC = i_imm3_a >= 8;
+ }
+ switch (i_imm3_a) {
+ case 0: case 1:
+ return abcdefgh;
+ case 2: case 3:
+ return (abcdefgh << 16) | abcdefgh;
+ case 4: case 5:
+ return (abcdefgh << 24) | (abcdefgh << 8);
+ case 6: case 7:
+ return (abcdefgh << 24) | (abcdefgh << 16)
+ | (abcdefgh << 8) | abcdefgh;
+ case 8 ... 31:
+ return lbcdefgh << (32 - i_imm3_a);
+ default:
+ break;
+ }
+ /*NOTREACHED*/vassert(0);
+}
+
+
+/* Version of thumbExpandImm where we simply feed it the
+ instruction halfwords (the lowest addressed one is I0). */
+static UInt thumbExpandImm_from_I0_I1 ( Bool* updatesC,
+ UShort i0s, UShort i1s )
+{
+ UInt i0 = (UInt)i0s;
+ UInt i1 = (UInt)i1s;
+ UInt imm1 = SLICE_UInt(i0,10,10);
+ UInt imm3 = SLICE_UInt(i1,14,12);
+ UInt imm8 = SLICE_UInt(i1,7,0);
+ return thumbExpandImm(updatesC, imm1, imm3, imm8);
+}
+
+
+/* Thumb16 only. Given the firstcond and mask fields from an IT
+ instruction, compute the 32-bit ITSTATE value implied, as described
+ in libvex_guest_arm.h. This is not the ARM ARM representation.
+ Also produce the t/e chars for the 2nd, 3rd, 4th insns, for
+ disassembly printing. Returns False if firstcond or mask
+ denote something invalid.
+
+ The number and conditions for the instructions to be
+ conditionalised depend on firstcond and mask:
+
+ mask cond 1 cond 2 cond 3 cond 4
+
+ 1000 fc[3:0]
+ x100 fc[3:0] fc[3:1]:x
+ xy10 fc[3:0] fc[3:1]:x fc[3:1]:y
+ xyz1 fc[3:0] fc[3:1]:x fc[3:1]:y fc[3:1]:z
+
+ The condition fields are assembled in *itstate backwards (cond 4 at
+ the top, cond 1 at the bottom). Conditions are << 4'd and then
+ ^0xE'd, and those fields that correspond to instructions in the IT
+ block are tagged with a 1 bit.
+*/
+static Bool compute_ITSTATE ( /*OUT*/UInt* itstate,
+ /*OUT*/UChar* ch1,
+ /*OUT*/UChar* ch2,
+ /*OUT*/UChar* ch3,
+ UInt firstcond, UInt mask )
+{
+ vassert(firstcond <= 0xF);
+ vassert(mask <= 0xF);
+ *itstate = 0;
+ *ch1 = *ch2 = *ch3 = '.';
+ if (mask == 0)
+ return False; /* the logic below actually ensures this anyway,
+ but clearer to make it explicit. */
+ if (firstcond == 0xF)
+ return False; /* NV is not allowed */
+ if (firstcond == 0xE && popcount32(mask) != 1)
+ return False; /* if firstcond is AL then all the rest must be too */
+
+ UInt m3 = (mask >> 3) & 1;
+ UInt m2 = (mask >> 2) & 1;
+ UInt m1 = (mask >> 1) & 1;
+ UInt m0 = (mask >> 0) & 1;
+
+ UInt fc = (firstcond << 4) | 1/*in-IT-block*/;
+ UInt ni = (0xE/*AL*/ << 4) | 0/*not-in-IT-block*/;
+
+ if (m3 == 1 && (m2|m1|m0) == 0) {
+ *itstate = (ni << 24) | (ni << 16) | (ni << 8) | fc;
+ *itstate ^= 0xE0E0E0E0;
+ return True;
+ }
+
+ if (m2 == 1 && (m1|m0) == 0) {
+ *itstate = (ni << 24) | (ni << 16) | (setbit32(fc, 4, m3) << 8) | fc;
+ *itstate ^= 0xE0E0E0E0;
+ *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
+ return True;
+ }
+
+ if (m1 == 1 && m0 == 0) {
+ *itstate = (ni << 24)
+ | (setbit32(fc, 4, m2) << 16)
+ | (setbit32(fc, 4, m3) << 8) | fc;
+ *itstate ^= 0xE0E0E0E0;
+ *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
+ *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
+ return True;
+ }
+
+ if (m0 == 1) {
+ *itstate = (setbit32(fc, 4, m1) << 24)
+ | (setbit32(fc, 4, m2) << 16)
+ | (setbit32(fc, 4, m3) << 8) | fc;
+ *itstate ^= 0xE0E0E0E0;
+ *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
+ *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
+ *ch3 = m1 == (firstcond & 1) ? 't' : 'e';
+ return True;
+ }
+
+ return False;
+}
+
+
+/* Generate IR to do 32-bit bit reversal, a la Hacker's Delight
+ Chapter 7 Section 1. */
+static IRTemp gen_BITREV ( IRTemp x0 )
+{
+ IRTemp x1 = newTemp(Ity_I32);
+ IRTemp x2 = newTemp(Ity_I32);
+ IRTemp x3 = newTemp(Ity_I32);
+ IRTemp x4 = newTemp(Ity_I32);
+ IRTemp x5 = newTemp(Ity_I32);
+ UInt c1 = 0x55555555;
+ UInt c2 = 0x33333333;
+ UInt c3 = 0x0F0F0F0F;
+ UInt c4 = 0x00FF00FF;
+ UInt c5 = 0x0000FFFF;
+ assign(x1,
+ binop(Iop_Or32,
+ binop(Iop_Shl32,
+ binop(Iop_And32, mkexpr(x0), mkU32(c1)),
+ mkU8(1)),
+ binop(Iop_Shr32,
+ binop(Iop_And32, mkexpr(x0), mkU32(~c1)),
+ mkU8(1))
+ ));
+ assign(x2,
+ binop(Iop_Or32,
+ binop(Iop_Shl32,
+ binop(Iop_And32, mkexpr(x1), mkU32(c2)),
+ mkU8(2)),
+ binop(Iop_Shr32,
+ binop(Iop_And32, mkexpr(x1), mkU32(~c2)),
+ mkU8(2))
+ ));
+ assign(x3,
+ binop(Iop_Or32,
+ binop(Iop_Shl32,
+ binop(Iop_And32, mkexpr(x2), mkU32(c3)),
+ mkU8(4)),
+ binop(Iop_Shr32,
+ binop(Iop_And32, mkexpr(x2), mkU32(~c3)),
+ mkU8(4))
+ ));
+ assign(x4,
+ binop(Iop_Or32,
+ binop(Iop_Shl32,
+ binop(Iop_And32, mkexpr(x3), mkU32(c4)),
+ mkU8(8)),
+ binop(Iop_Shr32,
+ binop(Iop_And32, mkexpr(x3), mkU32(~c4)),
+ mkU8(8))
+ ));
+ assign(x5,
+ binop(Iop_Or32,
+ binop(Iop_Shl32,
+ binop(Iop_And32, mkexpr(x4), mkU32(c5)),
+ mkU8(16)),
+ binop(Iop_Shr32,
+ binop(Iop_And32, mkexpr(x4), mkU32(~c5)),
+ mkU8(16))
+ ));
+ return x5;
+}
+
+
+/* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
+ 0:1:2:3 (aka byte-swap). */
+static IRTemp gen_REV ( IRTemp arg )
+{
+ IRTemp res = newTemp(Ity_I32);
+ assign(res,
+ binop(Iop_Or32,
+ binop(Iop_Shl32, mkexpr(arg), mkU8(24)),
+ binop(Iop_Or32,
+ binop(Iop_And32, binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
+ mkU32(0x00FF0000)),
+ binop(Iop_Or32,
+ binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
+ mkU32(0x0000FF00)),
+ binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(24)),
+ mkU32(0x000000FF) )
+ ))));
+ return res;
+}
+
+
+/* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
+ 2:3:0:1 (swap within lo and hi halves). */
+static IRTemp gen_REV16 ( IRTemp arg )
+{
+ IRTemp res = newTemp(Ity_I32);
+ assign(res,
+ binop(Iop_Or32,
+ binop(Iop_And32,
+ binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
+ mkU32(0xFF00FF00)),
+ binop(Iop_And32,
+ binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
+ mkU32(0x00FF00FF))));
+ return res;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Advanced SIMD (NEON) instructions ---*/
+/*------------------------------------------------------------*/
+
+/*------------------------------------------------------------*/
+/*--- NEON data processing ---*/
+/*------------------------------------------------------------*/
+
+/* For all NEON DP ops, we use the normal scheme to handle conditional
+ writes to registers -- pass in condT and hand that on to the
+ put*Reg functions. In ARM mode condT is always IRTemp_INVALID
+ since NEON is unconditional for ARM. In Thumb mode condT is
+ derived from the ITSTATE shift register in the normal way. */
+
+static
+UInt get_neon_d_regno(UInt theInstr)
+{
+ UInt x = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
+ if (theInstr & 0x40) {
+ if (x & 1) {
+ x = x + 0x100;
+ } else {
+ x = x >> 1;
+ }
+ }
+ return x;
+}
+
+static
+UInt get_neon_n_regno(UInt theInstr)
+{
+ UInt x = ((theInstr >> 3) & 0x10) | ((theInstr >> 16) & 0xF);
+ if (theInstr & 0x40) {
+ if (x & 1) {
+ x = x + 0x100;
+ } else {
+ x = x >> 1;
+ }
+ }
+ return x;
+}
+
+static
+UInt get_neon_m_regno(UInt theInstr)
+{
+ UInt x = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
+ if (theInstr & 0x40) {
+ if (x & 1) {
+ x = x + 0x100;
+ } else {
+ x = x >> 1;
+ }
+ }
+ return x;
+}
+
+static
+Bool dis_neon_vext ( UInt theInstr, IRTemp condT )
+{
+ UInt dreg = get_neon_d_regno(theInstr);
+ UInt mreg = get_neon_m_regno(theInstr);
+ UInt nreg = get_neon_n_regno(theInstr);
+ UInt imm4 = (theInstr >> 8) & 0xf;
+ UInt Q = (theInstr >> 6) & 1;
+ HChar reg_t = Q ? 'q' : 'd';
+
+ if (Q) {
+ putQReg(dreg, triop(Iop_ExtractV128, getQReg(nreg),
+ getQReg(mreg), mkU8(imm4)), condT);
+ } else {
+ putDRegI64(dreg, triop(Iop_Extract64, getDRegI64(nreg),
+ getDRegI64(mreg), mkU8(imm4)), condT);
+ }
+ DIP("vext.8 %c%d, %c%d, %c%d, #%d\n", reg_t, dreg, reg_t, nreg,
+ reg_t, mreg, imm4);
+ return True;
+}
+
+/* VTBL, VTBX */
+static
+Bool dis_neon_vtb ( UInt theInstr, IRTemp condT )
+{
+ UInt op = (theInstr >> 6) & 1;
+ UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
+ UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
+ UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
+ UInt len = (theInstr >> 8) & 3;
+ Int i;
+ IROp cmp;
+ ULong imm;
+ IRTemp arg_l;
+ IRTemp old_mask, new_mask, cur_mask;
+ IRTemp old_res, new_res;
+ IRTemp old_arg, new_arg;
+
+ if (dreg >= 0x100 || mreg >= 0x100 || nreg >= 0x100)
+ return False;
+ if (nreg + len > 31)
+ return False;
+
+ cmp = Iop_CmpGT8Ux8;
+
+ old_mask = newTemp(Ity_I64);
+ old_res = newTemp(Ity_I64);
+ old_arg = newTemp(Ity_I64);
+ assign(old_mask, mkU64(0));
+ assign(old_res, mkU64(0));
+ assign(old_arg, getDRegI64(mreg));
+ imm = 8;
+ imm = (imm << 8) | imm;
+ imm = (imm << 16) | imm;
+ imm = (imm << 32) | imm;
+
+ for (i = 0; i <= len; i++) {
+ arg_l = newTemp(Ity_I64);
+ new_mask = newTemp(Ity_I64);
+ cur_mask = newTemp(Ity_I64);
+ new_res = newTemp(Ity_I64);
+ new_arg = newTemp(Ity_I64);
+ assign(arg_l, getDRegI64(nreg+i));
+ assign(new_arg, binop(Iop_Sub8x8, mkexpr(old_arg), mkU64(imm)));
+ assign(cur_mask, binop(cmp, mkU64(imm), mkexpr(old_arg)));
+ assign(new_mask, binop(Iop_Or64, mkexpr(old_mask), mkexpr(cur_mask)));
+ assign(new_res, binop(Iop_Or64,
+ mkexpr(old_res),
+ binop(Iop_And64,
+ binop(Iop_Perm8x8,
+ mkexpr(arg_l),
+ binop(Iop_And64,
+ mkexpr(old_arg),
+ mkexpr(cur_mask))),
+ mkexpr(cur_mask))));
+
+ old_arg = new_arg;
+ old_mask = new_mask;
+ old_res = new_res;
+ }
+ if (op) {
+ new_res = newTemp(Ity_I64);
+ assign(new_res, binop(Iop_Or64,
+ binop(Iop_And64,
+ getDRegI64(dreg),
+ unop(Iop_Not64, mkexpr(old_mask))),
+ mkexpr(old_res)));
+ old_res = new_res;
+ }
+
+ putDRegI64(dreg, mkexpr(old_res), condT);
+ DIP("vtb%c.8 d%u, {", op ? 'x' : 'l', dreg);
+ if (len > 0) {
+ DIP("d%u-d%u", nreg, nreg + len);
+ } else {
+ DIP("d%u", nreg);
+ }
+ DIP("}, d%u\n", mreg);
+ return True;
+}
+
+/* VDUP (scalar) */
+static
+Bool dis_neon_vdup ( UInt theInstr, IRTemp condT )
+{
+ UInt Q = (theInstr >> 6) & 1;
+ UInt dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
+ UInt mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
+ UInt imm4 = (theInstr >> 16) & 0xF;
+ UInt index;
+ UInt size;
+ IRTemp arg_m;
+ IRTemp res;
+ IROp op, op2;
+
+ if ((imm4 == 0) || (imm4 == 8))
+ return False;
+ if ((Q == 1) && ((dreg & 1) == 1))
+ return False;
+ if (Q)
+ dreg >>= 1;
+ arg_m = newTemp(Ity_I64);
+ assign(arg_m, getDRegI64(mreg));
+ if (Q)
+ res = newTemp(Ity_V128);
+ else
+ res = newTemp(Ity_I64);
+ if ((imm4 & 1) == 1) {
+ op = Q ? Iop_Dup8x16 : Iop_Dup8x8;
+ op2 = Iop_GetElem8x8;
+ index = imm4 >> 1;
+ size = 8;
+ } else if ((imm4 & 3) == 2) {
+ op = Q ? Iop_Dup16x8 : Iop_Dup16x4;
+ op2 = Iop_GetElem16x4;
+ index = imm4 >> 2;
+ size = 16;
+ } else if ((imm4 & 7) == 4) {
+ op = Q ? Iop_Dup32x4 : Iop_Dup32x2;
+ op2 = Iop_GetElem32x2;
+ index = imm4 >> 3;
+ size = 32;
+ } else {
+ return False; // can this ever happen?
+ }
+ assign(res, unop(op, binop(op2, mkexpr(arg_m), mkU8(index))));
+ if (Q) {
+ putQReg(dreg, mkexpr(res), condT);
+ } else {
+ putDRegI64(dreg, mkexpr(res), condT);
+ }
+ DIP("vdup.%d %c%d, d%d[%d]\n", size, Q ? 'q' : 'd', dreg, mreg, index);
+ return True;
+}
+
+/* A7.4.1 Three registers of the same length */
+static
+Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT )
+{
+ UInt Q = (theInstr >> 6) & 1;
+ UInt dreg = get_neon_d_regno(theInstr);
+ UInt nreg = get_neon_n_regno(theInstr);
+ UInt mreg = get_neon_m_regno(theInstr);
+ UInt A = (theInstr >> 8) & 0xF;
+ UInt B = (theInstr >> 4) & 1;
+ UInt C = (theInstr >> 20) & 0x3;
+ UInt U = (theInstr >> 24) & 1;
+ UInt size = C;
+
+ IRTemp arg_n;
+ IRTemp arg_m;
+ IRTemp res;
+
+ if (Q) {
+ arg_n = newTemp(Ity_V128);
+ arg_m = newTemp(Ity_V128);
+ res = newTemp(Ity_V128);
+ assign(arg_n, getQReg(nreg));
+ assign(arg_m, getQReg(mreg));
+ } else {
+ arg_n = newTemp(Ity_I64);
+ arg_m = newTemp(Ity_I64);
+ res = newTemp(Ity_I64);
+ assign(arg_n, getDRegI64(nreg));
+ assign(arg_m, getDRegI64(mreg));
+ }
+
+ switch(A) {
+ case 0:
+ if (B == 0) {
+ /* VHADD */
+ ULong imm = 0;
+ IRExpr *imm_val;
+ IROp addOp;
+ IROp andOp;
+ IROp shOp;
+ char regType = Q ? 'q' : 'd';
+
+ if (size == 3)
+ return False;
+ switch(size) {
+ case 0: imm = 0x101010101010101LL; break;
+ case 1: imm = 0x1000100010001LL; break;
+ case 2: imm = 0x100000001LL; break;
+ default: vassert(0);
+ }
+ if (Q) {
+ imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
+ andOp = Iop_AndV128;
+ } else {
+ imm_val = mkU64(imm);
+ andOp = Iop_And64;
+ }
+ if (U) {
+ switch(size) {
+ case 0:
+ addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
+ shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
+ break;
+ case 1:
+ addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
+ shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
+ break;
+ case 2:
+ addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
+ shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
+ break;
+ default:
+ vassert(0);
+ }
+ } else {
+ switch(size) {
+ case 0:
+ addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
+ shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
+ break;
+ case 1:
+ addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
+ shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
+ break;
+ case 2:
+ addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
+ shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
+ break;
+ default:
+ vassert(0);
+ }
+ }
+ assign(res,
+ binop(addOp,
+ binop(addOp,
+ binop(shOp, mkexpr(arg_m), mkU8(1)),
+ binop(shOp, mkexpr(arg_n), mkU8(1))),
+ binop(shOp,
+ binop(addOp,
+ binop(andOp, mkexpr(arg_m), imm_val),
+ binop(andOp, mkexpr(arg_n), imm_val)),
+ mkU8(1))));
+ DIP("vhadd.%c%d %c%d, %c%d, %c%d\n",
+ U ? 'u' : 's', 8 << size, regType,
+ dreg, regType, nreg, regType, mreg);
+ } else {
+ /* VQADD */
+ IROp op, op2;
+ IRTemp tmp;
+ char reg_t = Q ? 'q' : 'd';
+ if (Q) {
+ switch (size) {
+ case 0:
+ op = U ? Iop_QAdd8Ux16 : Iop_QAdd8Sx16;
+ op2 = Iop_Add8x16;
+ break;
+ case 1:
+ op = U ? Iop_QAdd16Ux8 : Iop_QAdd16Sx8;
+ op2 = Iop_Add16x8;
+ break;
+ case 2:
+ op = U ? Iop_QAdd32Ux4 : Iop_QAdd32Sx4;
+ op2 = Iop_Add32x4;
+ break;
+ case 3:
+ op = U ? Iop_QAdd64Ux2 : Iop_QAdd64Sx2;
+ op2 = Iop_Add64x2;
+ break;
+ default:
+ vassert(0);
+ }
+ } else {
+ switch (size) {
+ case 0:
+ op = U ? Iop_QAdd8Ux8 : Iop_QAdd8Sx8;
+ op2 = Iop_Add8x8;
+ break;
+ case 1:
+ op = U ? Iop_QAdd16Ux4 : Iop_QAdd16Sx4;
+ op2 = Iop_Add16x4;
+ break;
+ case 2:
+ op = U ? Iop_QAdd32Ux2 : Iop_QAdd32Sx2;
+ op2 = Iop_Add32x2;
+ break;
+ case 3:
+ op = U ? Iop_QAdd64Ux1 : Iop_QAdd64Sx1;
+ op2 = Iop_Add64;
+ break;
+ default:
+ vassert(0);
+ }
+ }
+ if (Q) {
+ tmp = newTemp(Ity_V128);
+ } else {
+ tmp = newTemp(Ity_I64);
+ }
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+#ifndef DISABLE_QC_FLAG
+ assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
+ setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
+#endif
+ DIP("vqadd.%c%d %c%d, %c%d, %c%d\n",
+ U ? 'u' : 's',
+ 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
+ }
+ break;
+ case 1:
+ if (B == 0) {
+ /* VRHADD */
+ /* VRHADD C, A, B ::=
+ C = (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) */
+ IROp shift_op, add_op;
+ IRTemp cc;
+ ULong one = 1;
+ HChar reg_t = Q ? 'q' : 'd';
+ switch (size) {
+ case 0: one = (one << 8) | one; /* fall through */
+ case 1: one = (one << 16) | one; /* fall through */
+ case 2: one = (one << 32) | one; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ if (Q) {
+ switch (size) {
+ case 0:
+ shift_op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
+ add_op = Iop_Add8x16;
+ break;
+ case 1:
+ shift_op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
+ add_op = Iop_Add16x8;
+ break;
+ case 2:
+ shift_op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
+ add_op = Iop_Add32x4;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ } else {
+ switch (size) {
+ case 0:
+ shift_op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
+ add_op = Iop_Add8x8;
+ break;
+ case 1:
+ shift_op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
+ add_op = Iop_Add16x4;
+ break;
+ case 2:
+ shift_op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
+ add_op = Iop_Add32x2;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ }
+ if (Q) {
+ cc = newTemp(Ity_V128);
+ assign(cc, binop(shift_op,
+ binop(add_op,
+ binop(add_op,
+ binop(Iop_AndV128,
+ mkexpr(arg_n),
+ binop(Iop_64HLtoV128,
+ mkU64(one),
+ mkU64(one))),
+ binop(Iop_AndV128,
+ mkexpr(arg_m),
+ binop(Iop_64HLtoV128,
+ mkU64(one),
+ mkU64(one)))),
+ binop(Iop_64HLtoV128,
+ mkU64(one),
+ mkU64(one))),
+ mkU8(1)));
+ assign(res, binop(add_op,
+ binop(add_op,
+ binop(shift_op,
+ mkexpr(arg_n),
+ mkU8(1)),
+ binop(shift_op,
+ mkexpr(arg_m),
+ mkU8(1))),
+ mkexpr(cc)));
+ } else {
+ cc = newTemp(Ity_I64);
+ assign(cc, binop(shift_op,
+ binop(add_op,
+ binop(add_op,
+ binop(Iop_And64,
+ mkexpr(arg_n),
+ mkU64(one)),
+ binop(Iop_And64,
+ mkexpr(arg_m),
+ mkU64(one))),
+ mkU64(one)),
+ mkU8(1)));
+ assign(res, binop(add_op,
+ binop(add_op,
+ binop(shift_op,
+ mkexpr(arg_n),
+ mkU8(1)),
+ binop(shift_op,
+ mkexpr(arg_m),
+ mkU8(1))),
+ mkexpr(cc)));
+ }
+ DIP("vrhadd.%c%d %c%d, %c%d, %c%d\n",
+ U ? 'u' : 's',
+ 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
+ } else {
+ if (U == 0) {
+ switch(C) {
+ case 0: {
+ /* VAND */
+ HChar reg_t = Q ? 'q' : 'd';
+ if (Q) {
+ assign(res, binop(Iop_AndV128, mkexpr(arg_n),
+ mkexpr(arg_m)));
+ } else {
+ assign(res, binop(Iop_And64, mkexpr(arg_n),
+ mkexpr(arg_m)));
+ }
+ DIP("vand %c%d, %c%d, %c%d\n",
+ reg_t, dreg, reg_t, nreg, reg_t, mreg);
+ break;
+ }
+ case 1: {
+ /* VBIC */
+ HChar reg_t = Q ? 'q' : 'd';
+ if (Q) {
+ assign(res, binop(Iop_AndV128,mkexpr(arg_n),
+ unop(Iop_NotV128, mkexpr(arg_m))));
+ } else {
+ assign(res, binop(Iop_And64, mkexpr(arg_n),
+ unop(Iop_Not64, mkexpr(arg_m))));
+ }
+ DIP("vbic %c%d, %c%d, %c%d\n",
+ reg_t, dreg, reg_t, nreg, reg_t, mreg);
+ break;
+ }
+ case 2:
+ if ( nreg != mreg) {
+ /* VORR */
+ HChar reg_t = Q ? 'q' : 'd';
+ if (Q) {
+ assign(res, binop(Iop_OrV128, mkexpr(arg_n),
+ mkexpr(arg_m)));
+ } else {
+ assign(res, binop(Iop_Or64, mkexpr(arg_n),
+ mkexpr(arg_m)));
+ }
+ DIP("vorr %c%d, %c%d, %c%d\n",
+ reg_t, dreg, reg_t, nreg, reg_t, mreg);
+ } else {
+ /* VMOV */
+ HChar reg_t = Q ? 'q' : 'd';
+ assign(res, mkexpr(arg_m));
+ DIP("vmov %c%d, %c%d\n", reg_t, dreg, reg_t, mreg);
+ }
+ break;
+ case 3:{
+ /* VORN */
+ HChar reg_t = Q ? 'q' : 'd';
+ if (Q) {
+ assign(res, binop(Iop_OrV128,mkexpr(arg_n),
+ unop(Iop_NotV128, mkexpr(arg_m))));
+ } else {
+ assign(res, binop(Iop_Or64, mkexpr(arg_n),
+ unop(Iop_Not64, mkexpr(arg_m))));
+ }
+ DIP("vorn %c%d, %c%d, %c%d\n",
+ reg_t, dreg, reg_t, nreg, reg_t, mreg);
+ break;
+ }
+ }
+ } else {
+ switch(C) {
+ case 0:
+ /* VEOR (XOR) */
+ if (Q) {
+ assign(res, binop(Iop_XorV128, mkexpr(arg_n),
+ mkexpr(arg_m)));
+ } else {
+ assign(res, binop(Iop_Xor64, mkexpr(arg_n),
+ mkexpr(arg_m)));
+ }
+ DIP("veor %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ break;
+ case 1:
+ /* VBSL */
+ if (Q) {
+ IRTemp reg_d = newTemp(Ity_V128);
+ assign(reg_d, getQReg(dreg));
+ assign(res,
+ binop(Iop_OrV128,
+ binop(Iop_AndV128, mkexpr(arg_n),
+ mkexpr(reg_d)),
+ binop(Iop_AndV128,
+ mkexpr(arg_m),
+ unop(Iop_NotV128,
+ mkexpr(reg_d)) ) ) );
+ } else {
+ IRTemp reg_d = newTemp(Ity_I64);
+ assign(reg_d, getDRegI64(dreg));
+ assign(res,
+ binop(Iop_Or64,
+ binop(Iop_And64, mkexpr(arg_n),
+ mkexpr(reg_d)),
+ binop(Iop_And64,
+ mkexpr(arg_m),
+ unop(Iop_Not64, mkexpr(reg_d)))));
+ }
+ DIP("vbsl %c%u, %c%u, %c%u\n",
+ Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ break;
+ case 2:
+ /* VBIT */
+ if (Q) {
+ IRTemp reg_d = newTemp(Ity_V128);
+ assign(reg_d, getQReg(dreg));
+ assign(res,
+ binop(Iop_OrV128,
+ binop(Iop_AndV128, mkexpr(arg_n),
+ mkexpr(arg_m)),
+ binop(Iop_AndV128,
+ mkexpr(reg_d),
+ unop(Iop_NotV128, mkexpr(arg_m)))));
+ } else {
+ IRTemp reg_d = newTemp(Ity_I64);
+ assign(reg_d, getDRegI64(dreg));
+ assign(res,
+ binop(Iop_Or64,
+ binop(Iop_And64, mkexpr(arg_n),
+ mkexpr(arg_m)),
+ binop(Iop_And64,
+ mkexpr(reg_d),
+ unop(Iop_Not64, mkexpr(arg_m)))));
+ }
+ DIP("vbit %c%u, %c%u, %c%u\n",
+ Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ break;
+ case 3:
+ /* VBIF */
+ if (Q) {
+ IRTemp reg_d = newTemp(Ity_V128);
+ assign(reg_d, getQReg(dreg));
+ assign(res,
+ binop(Iop_OrV128,
+ binop(Iop_AndV128, mkexpr(reg_d),
+ mkexpr(arg_m)),
+ binop(Iop_AndV128,
+ mkexpr(arg_n),
+ unop(Iop_NotV128, mkexpr(arg_m)))));
+ } else {
+ IRTemp reg_d = newTemp(Ity_I64);
+ assign(reg_d, getDRegI64(dreg));
+ assign(res,
+ binop(Iop_Or64,
+ binop(Iop_And64, mkexpr(reg_d),
+ mkexpr(arg_m)),
+ binop(Iop_And64,
+ mkexpr(arg_n),
+ unop(Iop_Not64, mkexpr(arg_m)))));
+ }
+ DIP("vbif %c%u, %c%u, %c%u\n",
+ Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ break;
+ }
+ }
+ }
+ break;
+ case 2:
+ if (B == 0) {
+ /* VHSUB */
+ /* (A >> 1) - (B >> 1) - (NOT (A) & B & 1) */
+ ULong imm = 0;
+ IRExpr *imm_val;
+ IROp subOp;
+ IROp notOp;
+ IROp andOp;
+ IROp shOp;
+ if (size == 3)
+ return False;
+ switch(size) {
+ case 0: imm = 0x101010101010101LL; break;
+ case 1: imm = 0x1000100010001LL; break;
+ case 2: imm = 0x100000001LL; break;
+ default: vassert(0);
+ }
+ if (Q) {
+ imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
+ andOp = Iop_AndV128;
+ notOp = Iop_NotV128;
+ } else {
+ imm_val = mkU64(imm);
+ andOp = Iop_And64;
+ notOp = Iop_Not64;
+ }
+ if (U) {
+ switch(size) {
+ case 0:
+ subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
+ shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
+ break;
+ case 1:
+ subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
+ shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
+ break;
+ case 2:
+ subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
+ shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
+ break;
+ default:
+ vassert(0);
+ }
+ } else {
+ switch(size) {
+ case 0:
+ subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
+ shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
+ break;
+ case 1:
+ subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
+ shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
+ break;
+ case 2:
+ subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
+ shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
+ break;
+ default:
+ vassert(0);
+ }
+ }
+ assign(res,
+ binop(subOp,
+ binop(subOp,
+ binop(shOp, mkexpr(arg_n), mkU8(1)),
+ binop(shOp, mkexpr(arg_m), mkU8(1))),
+ binop(andOp,
+ binop(andOp,
+ unop(notOp, mkexpr(arg_n)),
+ mkexpr(arg_m)),
+ imm_val)));
+ DIP("vhsub.%c%u %c%u, %c%u, %c%u\n",
+ U ? 'u' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
+ mreg);
+ } else {
+ /* VQSUB */
+ IROp op, op2;
+ IRTemp tmp;
+ if (Q) {
+ switch (size) {
+ case 0:
+ op = U ? Iop_QSub8Ux16 : Iop_QSub8Sx16;
+ op2 = Iop_Sub8x16;
+ break;
+ case 1:
+ op = U ? Iop_QSub16Ux8 : Iop_QSub16Sx8;
+ op2 = Iop_Sub16x8;
+ break;
+ case 2:
+ op = U ? Iop_QSub32Ux4 : Iop_QSub32Sx4;
+ op2 = Iop_Sub32x4;
+ break;
+ case 3:
+ op = U ? Iop_QSub64Ux2 : Iop_QSub64Sx2;
+ op2 = Iop_Sub64x2;
+ break;
+ default:
+ vassert(0);
+ }
+ } else {
+ switch (size) {
+ case 0:
+ op = U ? Iop_QSub8Ux8 : Iop_QSub8Sx8;
+ op2 = Iop_Sub8x8;
+ break;
+ case 1:
+ op = U ? Iop_QSub16Ux4 : Iop_QSub16Sx4;
+ op2 = Iop_Sub16x4;
+ break;
+ case 2:
+ op = U ? Iop_QSub32Ux2 : Iop_QSub32Sx2;
+ op2 = Iop_Sub32x2;
+ break;
+ case 3:
+ op = U ? Iop_QSub64Ux1 : Iop_QSub64Sx1;
+ op2 = Iop_Sub64;
+ break;
+ default:
+ vassert(0);
+ }
+ }
+ if (Q)
+ tmp = newTemp(Ity_V128);
+ else
+ tmp = newTemp(Ity_I64);
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+#ifndef DISABLE_QC_FLAG
+ assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
+ setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
+#endif
+ DIP("vqsub.%c%u %c%u, %c%u, %c%u\n",
+ U ? 'u' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
+ mreg);
+ }
+ break;
+ case 3: {
+ IROp op;
+ if (Q) {
+ switch (size) {
+ case 0: op = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; break;
+ case 1: op = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; break;
+ case 2: op = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ } else {
+ switch (size) {
+ case 0: op = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; break;
+ case 1: op = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; break;
+ case 2: op = U ? Iop_CmpGT32Ux2: Iop_CmpGT32Sx2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ }
+ if (B == 0) {
+ /* VCGT */
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+ DIP("vcgt.%c%u %c%u, %c%u, %c%u\n",
+ U ? 'u' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
+ mreg);
+ } else {
+ /* VCGE */
+ /* VCGE res, argn, argm
+ is equal to
+ VCGT tmp, argm, argn
+ VNOT res, tmp */
+ assign(res,
+ unop(Q ? Iop_NotV128 : Iop_Not64,
+ binop(op, mkexpr(arg_m), mkexpr(arg_n))));
+ DIP("vcge.%c%u %c%u, %c%u, %c%u\n",
+ U ? 'u' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
+ mreg);
+ }
+ }
+ break;
+ case 4:
+ if (B == 0) {
+ /* VSHL */
+ IROp op, sub_op;
+ IRTemp tmp;
+ if (U) {
+ switch (size) {
+ case 0: op = Q ? Iop_Shl8x16 : Iop_Shl8x8; break;
+ case 1: op = Q ? Iop_Shl16x8 : Iop_Shl16x4; break;
+ case 2: op = Q ? Iop_Shl32x4 : Iop_Shl32x2; break;
+ case 3: op = Q ? Iop_Shl64x2 : Iop_Shl64; break;
+ default: vassert(0);
+ }
+ } else {
+ tmp = newTemp(Q ? Ity_V128 : Ity_I64);
+ switch (size) {
+ case 0:
+ op = Q ? Iop_Sar8x16 : Iop_Sar8x8;
+ sub_op = Q ? Iop_Sub8x16 : Iop_Sub8x8;
+ break;
+ case 1:
+ op = Q ? Iop_Sar16x8 : Iop_Sar16x4;
+ sub_op = Q ? Iop_Sub16x8 : Iop_Sub16x4;
+ break;
+ case 2:
+ op = Q ? Iop_Sar32x4 : Iop_Sar32x2;
+ sub_op = Q ? Iop_Sub32x4 : Iop_Sub32x2;
+ break;
+ case 3:
+ op = Q ? Iop_Sar64x2 : Iop_Sar64;
+ sub_op = Q ? Iop_Sub64x2 : Iop_Sub64;
+ break;
+ default:
+ vassert(0);
+ }
+ }
+ if (U) {
+ if (!Q && (size == 3))
+ assign(res, binop(op, mkexpr(arg_m),
+ unop(Iop_64to8, mkexpr(arg_n))));
+ else
+ assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
+ } else {
+ if (Q)
+ assign(tmp, binop(sub_op,
+ binop(Iop_64HLtoV128, mkU64(0), mkU64(0)),
+ mkexpr(arg_n)));
+ else
+ assign(tmp, binop(sub_op, mkU64(0), mkexpr(arg_n)));
+ if (!Q && (size == 3))
+ assign(res, binop(op, mkexpr(arg_m),
+ unop(Iop_64to8, mkexpr(tmp))));
+ else
+ assign(res, binop(op, mkexpr(arg_m), mkexpr(tmp)));
+ }
+ DIP("vshl.%c%u %c%u, %c%u, %c%u\n",
+ U ? 'u' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
+ nreg);
+ } else {
+ /* VQSHL */
+ IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt;
+ IRTemp tmp, shval, mask, old_shval;
+ UInt i;
+ ULong esize;
+ cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
+ cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
+ if (U) {
+ switch (size) {
+ case 0:
+ op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
+ op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
+ op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
+ op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
+ break;
+ case 1:
+ op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
+ op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
+ op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
+ op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
+ break;
+ case 2:
+ op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
+ op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
+ op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
+ op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
+ break;
+ case 3:
+ op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
+ op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
+ op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
+ op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
+ break;
+ default:
+ vassert(0);
+ }
+ } else {
+ switch (size) {
+ case 0:
+ op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
+ op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
+ op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
+ op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
+ break;
+ case 1:
+ op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
+ op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
+ op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
+ op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
+ break;
+ case 2:
+ op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
+ op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
+ op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
+ op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
+ break;
+ case 3:
+ op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
+ op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
+ op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
+ op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
+ break;
+ default:
+ vassert(0);
+ }
+ }
+ if (Q) {
+ tmp = newTemp(Ity_V128);
+ shval = newTemp(Ity_V128);
+ mask = newTemp(Ity_V128);
+ } else {
+ tmp = newTemp(Ity_I64);
+ shval = newTemp(Ity_I64);
+ mask = newTemp(Ity_I64);
+ }
+ assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
+#ifndef DISABLE_QC_FLAG
+ /* Only least significant byte from second argument is used.
+ Copy this byte to the whole vector element. */
+ assign(shval, binop(op_shrn,
+ binop(op_shln,
+ mkexpr(arg_n),
+ mkU8((8 << size) - 8)),
+ mkU8((8 << size) - 8)));
+ for(i = 0; i < size; i++) {
+ old_shval = shval;
+ shval = newTemp(Q ? Ity_V128 : Ity_I64);
+ assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
+ mkexpr(old_shval),
+ binop(op_shln,
+ mkexpr(old_shval),
+ mkU8(8 << i))));
+ }
+ /* If shift is greater or equal to the element size and
+ element is non-zero, then QC flag should be set. */
+ esize = (8 << size) - 1;
+ esize = (esize << 8) | esize;
+ esize = (esize << 16) | esize;
+ esize = (esize << 32) | esize;
+ setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
+ binop(cmp_gt, mkexpr(shval),
+ Q ? mkU128(esize) : mkU64(esize)),
+ unop(cmp_neq, mkexpr(arg_m))),
+ Q ? mkU128(0) : mkU64(0),
+ Q, condT);
+ /* Othervise QC flag should be set if shift value is positive and
+ result beign rightshifted the same value is not equal to left
+ argument. */
+ assign(mask, binop(cmp_gt, mkexpr(shval),
+ Q ? mkU128(0) : mkU64(0)));
+ if (!Q && size == 3)
+ assign(tmp, binop(op_rev, mkexpr(res),
+ unop(Iop_64to8, mkexpr(arg_n))));
+ else
+ assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
+ setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
+ mkexpr(tmp), mkexpr(mask)),
+ binop(Q ? Iop_AndV128 : Iop_And64,
+ mkexpr(arg_m), mkexpr(mask)),
+ Q, condT);
+#endif
+ DIP("vqshl.%c%u %c%u, %c%u, %c%u\n",
+ U ? 'u' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
+ nreg);
+ }
+ break;
+ case 5:
+ if (B == 0) {
+ /* VRSHL */
+ IROp op, op_shrn, op_shln, cmp_gt, op_sub, op_add;
+ IRTemp shval, old_shval, imm_val, round;
+ UInt i;
+ ULong imm;
+ cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
+ imm = 1L;
+ switch (size) {
+ case 0: imm = (imm << 8) | imm; /* fall through */
+ case 1: imm = (imm << 16) | imm; /* fall through */
+ case 2: imm = (imm << 32) | imm; /* fall through */
+ case 3: break;
+ default: vassert(0);
+ }
+ imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
+ round = newTemp(Q ? Ity_V128 : Ity_I64);
+ assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
+ if (U) {
+ switch (size) {
+ case 0:
+ op = Q ? Iop_Shl8x16 : Iop_Shl8x8;
+ op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
+ op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
+ op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
+ op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
+ break;
+ case 1:
+ op = Q ? Iop_Shl16x8 : Iop_Shl16x4;
+ op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
+ op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
+ op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
+ op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
+ break;
+ case 2:
+ op = Q ? Iop_Shl32x4 : Iop_Shl32x2;
+ op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
+ op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
+ op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
+ op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
+ break;
+ case 3:
+ op = Q ? Iop_Shl64x2 : Iop_Shl64;
+ op_sub = Q ? Iop_Sub64x2 : Iop_Sub64;
+ op_add = Q ? Iop_Add64x2 : Iop_Add64;
+ op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
+ op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
+ break;
+ default:
+ vassert(0);
+ }
+ } else {
+ switch (size) {
+ case 0:
+ op = Q ? Iop_Sal8x16 : Iop_Sal8x8;
+ op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
+ op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
+ op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
+ op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
+ break;
+ case 1:
+ op = Q ? Iop_Sal16x8 : Iop_Sal16x4;
+ op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
+ op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
+ op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
+ op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
+ break;
+ case 2:
+ op = Q ? Iop_Sal32x4 : Iop_Sal32x2;
+ op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
+ op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
+ op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
+ op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
+ break;
+ case 3:
+ op = Q ? Iop_Sal64x2 : Iop_Sal64x1;
+ op_sub = Q ? Iop_Sub64x2 : Iop_Sub64;
+ op_add = Q ? Iop_Add64x2 : Iop_Add64;
+ op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
+ op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
+ break;
+ default:
+ vassert(0);
+ }
+ }
+ if (Q) {
+ shval = newTemp(Ity_V128);
+ } else {
+ shval = newTemp(Ity_I64);
+ }
+ /* Only least significant byte from second argument is used.
+ Copy this byte to the whole vector element. */
+ assign(shval, binop(op_shrn,
+ binop(op_shln,
+ mkexpr(arg_n),
+ mkU8((8 << size) - 8)),
+ mkU8((8 << size) - 8)));
+ for (i = 0; i < size; i++) {
+ old_shval = shval;
+ shval = newTemp(Q ? Ity_V128 : Ity_I64);
+ assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
+ mkexpr(old_shval),
+ binop(op_shln,
+ mkexpr(old_shval),
+ mkU8(8 << i))));
+ }
+ /* Compute the result */
+ if (!Q && size == 3 && U) {
+ assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
+ binop(op,
+ mkexpr(arg_m),
+ unop(Iop_64to8,
+ binop(op_add,
+ mkexpr(arg_n),
+ mkexpr(imm_val)))),
+ binop(Q ? Iop_AndV128 : Iop_And64,
+ mkexpr(imm_val),
+ binop(cmp_gt,
+ Q ? mkU128(0) : mkU64(0),
+ mkexpr(arg_n)))));
+ assign(res, binop(op_add,
+ binop(op,
+ mkexpr(arg_m),
+ unop(Iop_64to8, mkexpr(arg_n))),
+ mkexpr(round)));
+ } else {
+ assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
+ binop(op,
+ mkexpr(arg_m),
+ binop(op_add,
+ mkexpr(arg_n),
+ mkexpr(imm_val))),
+ binop(Q ? Iop_AndV128 : Iop_And64,
+ mkexpr(imm_val),
+ binop(cmp_gt,
+ Q ? mkU128(0) : mkU64(0),
+ mkexpr(arg_n)))));
+ assign(res, binop(op_add,
+ binop(op, mkexpr(arg_m), mkexpr(arg_n)),
+ mkexpr(round)));
+ }
+ DIP("vrshl.%c%u %c%u, %c%u, %c%u\n",
+ U ? 'u' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
+ nreg);
+ } else {
+ /* VQRSHL */
+ IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_sub, op_add;
+ IRTemp tmp, shval, mask, old_shval, imm_val, round;
+ UInt i;
+ ULong esize, imm;
+ cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
+ cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
+ imm = 1L;
+ switch (size) {
+ case 0: imm = (imm << 8) | imm; /* fall through */
+ case 1: imm = (imm << 16) | imm; /* fall through */
+ case 2: imm = (imm << 32) | imm; /* fall through */
+ case 3: break;
+ default: vassert(0);
+ }
+ imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
+ round = newTemp(Q ? Ity_V128 : Ity_I64);
+ assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
+ if (U) {
+ switch (size) {
+ case 0:
+ op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
+ op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
+ op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
+ op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
+ op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
+ op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
+ break;
+ case 1:
+ op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
+ op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
+ op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
+ op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
+ op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
+ op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
+ break;
+ case 2:
+ op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
+ op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
+ op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
+ op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
+ op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
+ op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
+ break;
+ case 3:
+ op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
+ op_sub = Q ? Iop_Sub64x2 : Iop_Sub64;
+ op_add = Q ? Iop_Add64x2 : Iop_Add64;
+ op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
+ op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
+ op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
+ break;
+ default:
+ vassert(0);
+ }
+ } else {
+ switch (size) {
+ case 0:
+ op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
+ op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
+ op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
+ op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
+ op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
+ op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
+ break;
+ case 1:
+ op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
+ op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
+ op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
+ op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
+ op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
+ op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
+ break;
+ case 2:
+ op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
+ op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
+ op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
+ op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
+ op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
+ op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
+ break;
+ case 3:
+ op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
+ op_sub = Q ? Iop_Sub64x2 : Iop_Sub64;
+ op_add = Q ? Iop_Add64x2 : Iop_Add64;
+ op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
+ op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
+ op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
+ break;
+ default:
+ vassert(0);
+ }
+ }
+ if (Q) {
+ tmp = newTemp(Ity_V128);
+ shval = newTemp(Ity_V128);
+ mask = newTemp(Ity_V128);
+ } else {
+ tmp = newTemp(Ity_I64);
+ shval = newTemp(Ity_I64);
+ mask = newTemp(Ity_I64);
+ }
+ /* Only least significant byte from second argument is used.
+ Copy this byte to the whole vector element. */
+ assign(shval, binop(op_shrn,
+ binop(op_shln,
+ mkexpr(arg_n),
+ mkU8((8 << size) - 8)),
+ mkU8((8 << size) - 8)));
+ for (i = 0; i < size; i++) {
+ old_shval = shval;
+ shval = newTemp(Q ? Ity_V128 : Ity_I64);
+ assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
+ mkexpr(old_shval),
+ binop(op_shln,
+ mkexpr(old_shval),
+ mkU8(8 << i))));
+ }
+ /* Compute the result */
+ assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
+ binop(op,
+ mkexpr(arg_m),
+ binop(op_add,
+ mkexpr(arg_n),
+ mkexpr(imm_val))),
+ binop(Q ? Iop_AndV128 : Iop_And64,
+ mkexpr(imm_val),
+ binop(cmp_gt,
+ Q ? mkU128(0) : mkU64(0),
+ mkexpr(arg_n)))));
+ assign(res, binop(op_add,
+ binop(op, mkexpr(arg_m), mkexpr(arg_n)),
+ mkexpr(round)));
+#ifndef DISABLE_QC_FLAG
+ /* If shift is greater or equal to the element size and element is
+ non-zero, then QC flag should be set. */
+ esize = (8 << size) - 1;
+ esize = (esize << 8) | esize;
+ esize = (esize << 16) | esize;
+ esize = (esize << 32) | esize;
+ setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
+ binop(cmp_gt, mkexpr(shval),
+ Q ? mkU128(esize) : mkU64(esize)),
+ unop(cmp_neq, mkexpr(arg_m))),
+ Q ? mkU128(0) : mkU64(0),
+ Q, condT);
+ /* Othervise QC flag should be set if shift value is positive and
+ result beign rightshifted the same value is not equal to left
+ argument. */
+ assign(mask, binop(cmp_gt, mkexpr(shval),
+ Q ? mkU128(0) : mkU64(0)));
+ if (!Q && size == 3)
+ assign(tmp, binop(op_rev, mkexpr(res),
+ unop(Iop_64to8, mkexpr(arg_n))));
+ else
+ assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
+ setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
+ mkexpr(tmp), mkexpr(mask)),
+ binop(Q ? Iop_AndV128 : Iop_And64,
+ mkexpr(arg_m), mkexpr(mask)),
+ Q, condT);
+#endif
+ DIP("vqrshl.%c%u %c%u, %c%u, %c%u\n",
+ U ? 'u' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
+ nreg);
+ }
+ break;
+ case 6:
+ /* VMAX, VMIN */
+ if (B == 0) {
+ /* VMAX */
+ IROp op;
+ if (U == 0) {
+ switch (size) {
+ case 0: op = Q ? Iop_Max8Sx16 : Iop_Max8Sx8; break;
+ case 1: op = Q ? Iop_Max16Sx8 : Iop_Max16Sx4; break;
+ case 2: op = Q ? Iop_Max32Sx4 : Iop_Max32Sx2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ } else {
+ switch (size) {
+ case 0: op = Q ? Iop_Max8Ux16 : Iop_Max8Ux8; break;
+ case 1: op = Q ? Iop_Max16Ux8 : Iop_Max16Ux4; break;
+ case 2: op = Q ? Iop_Max32Ux4 : Iop_Max32Ux2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ }
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+ DIP("vmax.%c%u %c%u, %c%u, %c%u\n",
+ U ? 'u' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
+ mreg);
+ } else {
+ /* VMIN */
+ IROp op;
+ if (U == 0) {
+ switch (size) {
+ case 0: op = Q ? Iop_Min8Sx16 : Iop_Min8Sx8; break;
+ case 1: op = Q ? Iop_Min16Sx8 : Iop_Min16Sx4; break;
+ case 2: op = Q ? Iop_Min32Sx4 : Iop_Min32Sx2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ } else {
+ switch (size) {
+ case 0: op = Q ? Iop_Min8Ux16 : Iop_Min8Ux8; break;
+ case 1: op = Q ? Iop_Min16Ux8 : Iop_Min16Ux4; break;
+ case 2: op = Q ? Iop_Min32Ux4 : Iop_Min32Ux2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ }
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+ DIP("vmin.%c%u %c%u, %c%u, %c%u\n",
+ U ? 'u' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
+ mreg);
+ }
+ break;
+ case 7:
+ if (B == 0) {
+ /* VABD */
+ IROp op_cmp, op_sub;
+ IRTemp cond;
+ if ((theInstr >> 23) & 1) {
+ vpanic("VABDL should not be in dis_neon_data_3same\n");
+ }
+ if (Q) {
+ switch (size) {
+ case 0:
+ op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
+ op_sub = Iop_Sub8x16;
+ break;
+ case 1:
+ op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
+ op_sub = Iop_Sub16x8;
+ break;
+ case 2:
+ op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
+ op_sub = Iop_Sub32x4;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ } else {
+ switch (size) {
+ case 0:
+ op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
+ op_sub = Iop_Sub8x8;
+ break;
+ case 1:
+ op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
+ op_sub = Iop_Sub16x4;
+ break;
+ case 2:
+ op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
+ op_sub = Iop_Sub32x2;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ }
+ if (Q) {
+ cond = newTemp(Ity_V128);
+ } else {
+ cond = newTemp(Ity_I64);
+ }
+ assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
+ assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
+ binop(Q ? Iop_AndV128 : Iop_And64,
+ binop(op_sub, mkexpr(arg_n),
+ mkexpr(arg_m)),
+ mkexpr(cond)),
+ binop(Q ? Iop_AndV128 : Iop_And64,
+ binop(op_sub, mkexpr(arg_m),
+ mkexpr(arg_n)),
+ unop(Q ? Iop_NotV128 : Iop_Not64,
+ mkexpr(cond)))));
+ DIP("vabd.%c%u %c%u, %c%u, %c%u\n",
+ U ? 'u' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
+ mreg);
+ } else {
+ /* VABA */
+ IROp op_cmp, op_sub, op_add;
+ IRTemp cond, acc, tmp;
+ if ((theInstr >> 23) & 1) {
+ vpanic("VABAL should not be in dis_neon_data_3same");
+ }
+ if (Q) {
+ switch (size) {
+ case 0:
+ op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
+ op_sub = Iop_Sub8x16;
+ op_add = Iop_Add8x16;
+ break;
+ case 1:
+ op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
+ op_sub = Iop_Sub16x8;
+ op_add = Iop_Add16x8;
+ break;
+ case 2:
+ op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
+ op_sub = Iop_Sub32x4;
+ op_add = Iop_Add32x4;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ } else {
+ switch (size) {
+ case 0:
+ op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
+ op_sub = Iop_Sub8x8;
+ op_add = Iop_Add8x8;
+ break;
+ case 1:
+ op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
+ op_sub = Iop_Sub16x4;
+ op_add = Iop_Add16x4;
+ break;
+ case 2:
+ op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
+ op_sub = Iop_Sub32x2;
+ op_add = Iop_Add32x2;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ }
+ if (Q) {
+ cond = newTemp(Ity_V128);
+ acc = newTemp(Ity_V128);
+ tmp = newTemp(Ity_V128);
+ assign(acc, getQReg(dreg));
+ } else {
+ cond = newTemp(Ity_I64);
+ acc = newTemp(Ity_I64);
+ tmp = newTemp(Ity_I64);
+ assign(acc, getDRegI64(dreg));
+ }
+ assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
+ assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
+ binop(Q ? Iop_AndV128 : Iop_And64,
+ binop(op_sub, mkexpr(arg_n),
+ mkexpr(arg_m)),
+ mkexpr(cond)),
+ binop(Q ? Iop_AndV128 : Iop_And64,
+ binop(op_sub, mkexpr(arg_m),
+ mkexpr(arg_n)),
+ unop(Q ? Iop_NotV128 : Iop_Not64,
+ mkexpr(cond)))));
+ assign(res, binop(op_add, mkexpr(acc), mkexpr(tmp)));
+ DIP("vaba.%c%u %c%u, %c%u, %c%u\n",
+ U ? 'u' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
+ mreg);
+ }
+ break;
+ case 8:
+ if (B == 0) {
+ IROp op;
+ if (U == 0) {
+ /* VADD */
+ switch (size) {
+ case 0: op = Q ? Iop_Add8x16 : Iop_Add8x8; break;
+ case 1: op = Q ? Iop_Add16x8 : Iop_Add16x4; break;
+ case 2: op = Q ? Iop_Add32x4 : Iop_Add32x2; break;
+ case 3: op = Q ? Iop_Add64x2 : Iop_Add64; break;
+ default: vassert(0);
+ }
+ DIP("vadd.i%u %c%u, %c%u, %c%u\n",
+ 8 << size, Q ? 'q' : 'd',
+ dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ } else {
+ /* VSUB */
+ switch (size) {
+ case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
+ case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
+ case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
+ case 3: op = Q ? Iop_Sub64x2 : Iop_Sub64; break;
+ default: vassert(0);
+ }
+ DIP("vsub.i%u %c%u, %c%u, %c%u\n",
+ 8 << size, Q ? 'q' : 'd',
+ dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ }
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+ } else {
+ IROp op;
+ switch (size) {
+ case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
+ case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
+ case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
+ case 3: op = Q ? Iop_CmpNEZ64x2 : Iop_CmpwNEZ64; break;
+ default: vassert(0);
+ }
+ if (U == 0) {
+ /* VTST */
+ assign(res, unop(op, binop(Q ? Iop_AndV128 : Iop_And64,
+ mkexpr(arg_n),
+ mkexpr(arg_m))));
+ DIP("vtst.%u %c%u, %c%u, %c%u\n",
+ 8 << size, Q ? 'q' : 'd',
+ dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ } else {
+ /* VCEQ */
+ assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
+ unop(op,
+ binop(Q ? Iop_XorV128 : Iop_Xor64,
+ mkexpr(arg_n),
+ mkexpr(arg_m)))));
+ DIP("vceq.i%u %c%u, %c%u, %c%u\n",
+ 8 << size, Q ? 'q' : 'd',
+ dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ }
+ }
+ break;
+ case 9:
+ if (B == 0) {
+ /* VMLA, VMLS (integer) */
+ IROp op, op2;
+ UInt P = (theInstr >> 24) & 1;
+ if (P) {
+ switch (size) {
+ case 0:
+ op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
+ op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
+ break;
+ case 1:
+ op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
+ op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
+ break;
+ case 2:
+ op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
+ op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ } else {
+ switch (size) {
+ case 0:
+ op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
+ op2 = Q ? Iop_Add8x16 : Iop_Add8x8;
+ break;
+ case 1:
+ op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
+ op2 = Q ? Iop_Add16x8 : Iop_Add16x4;
+ break;
+ case 2:
+ op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
+ op2 = Q ? Iop_Add32x4 : Iop_Add32x2;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ }
+ assign(res, binop(op2,
+ Q ? getQReg(dreg) : getDRegI64(dreg),
+ binop(op, mkexpr(arg_n), mkexpr(arg_m))));
+ DIP("vml%c.i%u %c%u, %c%u, %c%u\n",
+ P ? 's' : 'a', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
+ mreg);
+ } else {
+ /* VMUL */
+ IROp op;
+ UInt P = (theInstr >> 24) & 1;
+ if (P) {
+ switch (size) {
+ case 0:
+ op = Q ? Iop_PolynomialMul8x16 : Iop_PolynomialMul8x8;
+ break;
+ case 1: case 2: case 3: return False;
+ default: vassert(0);
+ }
+ } else {
+ switch (size) {
+ case 0: op = Q ? Iop_Mul8x16 : Iop_Mul8x8; break;
+ case 1: op = Q ? Iop_Mul16x8 : Iop_Mul16x4; break;
+ case 2: op = Q ? Iop_Mul32x4 : Iop_Mul32x2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ }
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+ DIP("vmul.%c%u %c%u, %c%u, %c%u\n",
+ P ? 'p' : 'i', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
+ mreg);
+ }
+ break;
+ case 10: {
+ /* VPMAX, VPMIN */
+ UInt P = (theInstr >> 4) & 1;
+ IROp op;
+ if (Q)
+ return False;
+ if (P) {
+ switch (size) {
+ case 0: op = U ? Iop_PwMin8Ux8 : Iop_PwMin8Sx8; break;
+ case 1: op = U ? Iop_PwMin16Ux4 : Iop_PwMin16Sx4; break;
+ case 2: op = U ? Iop_PwMin32Ux2 : Iop_PwMin32Sx2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ } else {
+ switch (size) {
+ case 0: op = U ? Iop_PwMax8Ux8 : Iop_PwMax8Sx8; break;
+ case 1: op = U ? Iop_PwMax16Ux4 : Iop_PwMax16Sx4; break;
+ case 2: op = U ? Iop_PwMax32Ux2 : Iop_PwMax32Sx2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ }
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+ DIP("vp%s.%c%u %c%u, %c%u, %c%u\n",
+ P ? "min" : "max", U ? 'u' : 's',
+ 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
+ Q ? 'q' : 'd', mreg);
+ break;
+ }
+ case 11:
+ if (B == 0) {
+ if (U == 0) {
+ /* VQDMULH */
+ IROp op ,op2;
+ ULong imm;
+ switch (size) {
+ case 0: case 3:
+ return False;
+ case 1:
+ op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
+ op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
+ imm = 1LL << 15;
+ imm = (imm << 16) | imm;
+ imm = (imm << 32) | imm;
+ break;
+ case 2:
+ op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
+ op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
+ imm = 1LL << 31;
+ imm = (imm << 32) | imm;
+ break;
+ default:
+ vassert(0);
+ }
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+#ifndef DISABLE_QC_FLAG
+ setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
+ binop(op2, mkexpr(arg_n),
+ Q ? mkU128(imm) : mkU64(imm)),
+ binop(op2, mkexpr(arg_m),
+ Q ? mkU128(imm) : mkU64(imm))),
+ Q ? mkU128(0) : mkU64(0),
+ Q, condT);
+#endif
+ DIP("vqdmulh.s%u %c%u, %c%u, %c%u\n",
+ 8 << size, Q ? 'q' : 'd',
+ dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ } else {
+ /* VQRDMULH */
+ IROp op ,op2;
+ ULong imm;
+ switch(size) {
+ case 0: case 3:
+ return False;
+ case 1:
+ imm = 1LL << 15;
+ imm = (imm << 16) | imm;
+ imm = (imm << 32) | imm;
+ op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
+ op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
+ break;
+ case 2:
+ imm = 1LL << 31;
+ imm = (imm << 32) | imm;
+ op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
+ op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
+ break;
+ default:
+ vassert(0);
+ }
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+#ifndef DISABLE_QC_FLAG
+ setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
+ binop(op2, mkexpr(arg_n),
+ Q ? mkU128(imm) : mkU64(imm)),
+ binop(op2, mkexpr(arg_m),
+ Q ? mkU128(imm) : mkU64(imm))),
+ Q ? mkU128(0) : mkU64(0),
+ Q, condT);
+#endif
+ DIP("vqrdmulh.s%u %c%u, %c%u, %c%u\n",
+ 8 << size, Q ? 'q' : 'd',
+ dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ }
+ } else {
+ if (U == 0) {
+ /* VPADD */
+ IROp op;
+ if (Q)
+ return False;
+ switch (size) {
+ case 0: op = Q ? Iop_PwAdd8x16 : Iop_PwAdd8x8; break;
+ case 1: op = Q ? Iop_PwAdd16x8 : Iop_PwAdd16x4; break;
+ case 2: op = Q ? Iop_PwAdd32x4 : Iop_PwAdd32x2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+ DIP("vpadd.i%d %c%u, %c%u, %c%u\n",
+ 8 << size, Q ? 'q' : 'd',
+ dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ }
+ }
+ break;
+ /* Starting from here these are FP SIMD cases */
+ case 13:
+ if (B == 0) {
+ IROp op;
+ if (U == 0) {
+ if ((C >> 1) == 0) {
+ /* VADD */
+ op = Q ? Iop_Add32Fx4 : Iop_Add32Fx2 ;
+ DIP("vadd.f32 %c%u, %c%u, %c%u\n",
+ Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ } else {
+ /* VSUB */
+ op = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2 ;
+ DIP("vsub.f32 %c%u, %c%u, %c%u\n",
+ Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ }
+ } else {
+ if ((C >> 1) == 0) {
+ /* VPADD */
+ if (Q)
+ return False;
+ op = Iop_PwAdd32Fx2;
+ DIP("vpadd.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
+ } else {
+ /* VABD */
+ if (Q) {
+ assign(res, unop(Iop_Abs32Fx4,
+ binop(Iop_Sub32Fx4,
+ mkexpr(arg_n),
+ mkexpr(arg_m))));
+ } else {
+ assign(res, unop(Iop_Abs32Fx2,
+ binop(Iop_Sub32Fx2,
+ mkexpr(arg_n),
+ mkexpr(arg_m))));
+ }
+ DIP("vabd.f32 %c%u, %c%u, %c%u\n",
+ Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ break;
+ }
+ }
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+ } else {
+ if (U == 0) {
+ /* VMLA, VMLS */
+ IROp op, op2;
+ UInt P = (theInstr >> 21) & 1;
+ if (P) {
+ switch (size & 1) {
+ case 0:
+ op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
+ op2 = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
+ break;
+ case 1: return False;
+ default: vassert(0);
+ }
+ } else {
+ switch (size & 1) {
+ case 0:
+ op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
+ op2 = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
+ break;
+ case 1: return False;
+ default: vassert(0);
+ }
+ }
+ assign(res, binop(op2,
+ Q ? getQReg(dreg) : getDRegI64(dreg),
+ binop(op, mkexpr(arg_n), mkexpr(arg_m))));
+
+ DIP("vml%c.f32 %c%u, %c%u, %c%u\n",
+ P ? 's' : 'a', Q ? 'q' : 'd',
+ dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ } else {
+ /* VMUL */
+ IROp op;
+ if ((C >> 1) != 0)
+ return False;
+ op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2 ;
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+ DIP("vmul.f32 %c%u, %c%u, %c%u\n",
+ Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ }
+ }
+ break;
+ case 14:
+ if (B == 0) {
+ if (U == 0) {
+ if ((C >> 1) == 0) {
+ /* VCEQ */
+ IROp op;
+ if ((theInstr >> 20) & 1)
+ return False;
+ op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2;
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+ DIP("vceq.f32 %c%u, %c%u, %c%u\n",
+ Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ } else {
+ return False;
+ }
+ } else {
+ if ((C >> 1) == 0) {
+ /* VCGE */
+ IROp op;
+ if ((theInstr >> 20) & 1)
+ return False;
+ op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+ DIP("vcge.f32 %c%u, %c%u, %c%u\n",
+ Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ } else {
+ /* VCGT */
+ IROp op;
+ if ((theInstr >> 20) & 1)
+ return False;
+ op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+ DIP("vcgt.f32 %c%u, %c%u, %c%u\n",
+ Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ }
+ }
+ } else {
+ if (U == 1) {
+ /* VACGE, VACGT */
+ UInt op_bit = (theInstr >> 21) & 1;
+ IROp op, op2;
+ op2 = Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2;
+ if (op_bit) {
+ op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
+ assign(res, binop(op,
+ unop(op2, mkexpr(arg_n)),
+ unop(op2, mkexpr(arg_m))));
+ } else {
+ op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
+ assign(res, binop(op,
+ unop(op2, mkexpr(arg_n)),
+ unop(op2, mkexpr(arg_m))));
+ }
+ DIP("vacg%c.f32 %c%u, %c%u, %c%u\n", op_bit ? 't' : 'e',
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
+ Q ? 'q' : 'd', mreg);
+ }
+ }
+ break;
+ case 15:
+ if (B == 0) {
+ if (U == 0) {
+ /* VMAX, VMIN */
+ IROp op;
+ if ((theInstr >> 20) & 1)
+ return False;
+ if ((theInstr >> 21) & 1) {
+ op = Q ? Iop_Min32Fx4 : Iop_Min32Fx2;
+ DIP("vmin.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ } else {
+ op = Q ? Iop_Max32Fx4 : Iop_Max32Fx2;
+ DIP("vmax.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ }
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+ } else {
+ /* VPMAX, VPMIN */
+ IROp op;
+ if (Q)
+ return False;
+ if ((theInstr >> 20) & 1)
+ return False;
+ if ((theInstr >> 21) & 1) {
+ op = Iop_PwMin32Fx2;
+ DIP("vpmin.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
+ } else {
+ op = Iop_PwMax32Fx2;
+ DIP("vpmax.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
+ }
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+ }
+ } else {
+ if (U == 0) {
+ if ((C >> 1) == 0) {
+ /* VRECPS */
+ if ((theInstr >> 20) & 1)
+ return False;
+ assign(res, binop(Q ? Iop_Recps32Fx4 : Iop_Recps32Fx2,
+ mkexpr(arg_n),
+ mkexpr(arg_m)));
+ DIP("vrecps.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ } else {
+ /* VRSQRTS */
+ if ((theInstr >> 20) & 1)
+ return False;
+ assign(res, binop(Q ? Iop_Rsqrts32Fx4 : Iop_Rsqrts32Fx2,
+ mkexpr(arg_n),
+ mkexpr(arg_m)));
+ DIP("vrsqrts.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
+ }
+ }
+ }
+ break;
+ }
+
+ if (Q) {
+ putQReg(dreg, mkexpr(res), condT);
+ } else {
+ putDRegI64(dreg, mkexpr(res), condT);
+ }
+
+ return True;
+}
+
+/* A7.4.2 Three registers of different length */
+static
+Bool dis_neon_data_3diff ( UInt theInstr, IRTemp condT )
+{
+ UInt A = (theInstr >> 8) & 0xf;
+ UInt B = (theInstr >> 20) & 3;
+ UInt U = (theInstr >> 24) & 1;
+ UInt P = (theInstr >> 9) & 1;
+ UInt mreg = get_neon_m_regno(theInstr);
+ UInt nreg = get_neon_n_regno(theInstr);
+ UInt dreg = get_neon_d_regno(theInstr);
+ UInt size = B;
+ ULong imm;
+ IRTemp res, arg_m, arg_n, cond, tmp;
+ IROp cvt, cvt2, cmp, op, op2, sh, add;
+ switch (A) {
+ case 0: case 1: case 2: case 3:
+ /* VADDL, VADDW, VSUBL, VSUBW */
+ if (dreg & 1)
+ return False;
+ dreg >>= 1;
+ size = B;
+ switch (size) {
+ case 0:
+ cvt = U ? Iop_Longen8Ux8 : Iop_Longen8Sx8;
+ op = (A & 2) ? Iop_Sub16x8 : Iop_Add16x8;
+ break;
+ case 1:
+ cvt = U ? Iop_Longen16Ux4 : Iop_Longen16Sx4;
+ op = (A & 2) ? Iop_Sub32x4 : Iop_Add32x4;
+ break;
+ case 2:
+ cvt = U ? Iop_Longen32Ux2 : Iop_Longen32Sx2;
+ op = (A & 2) ? Iop_Sub64x2 : Iop_Add64x2;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ arg_n = newTemp(Ity_V128);
+ arg_m = newTemp(Ity_V128);
+ if (A & 1) {
+ if (nreg & 1)
+ return False;
+ nreg >>= 1;
+ assign(arg_n, getQReg(nreg));
+ } else {
+ assign(arg_n, unop(cvt, getDRegI64(nreg)));
+ }
+ assign(arg_m, unop(cvt, getDRegI64(mreg)));
+ putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
+ condT);
+ DIP("v%s%c.%c%u q%u, %c%u, d%u\n", (A & 2) ? "sub" : "add",
+ (A & 1) ? 'w' : 'l', U ? 'u' : 's', 8 << size, dreg,
+ (A & 1) ? 'q' : 'd', nreg, mreg);
+ return True;
+ case 4:
+ /* VADDHN, VRADDHN */
+ if (mreg & 1)
+ return False;
+ mreg >>= 1;
+ if (nreg & 1)
+ return False;
+ nreg >>= 1;
+ size = B;
+ switch (size) {
+ case 0:
+ op = Iop_Add16x8;
+ cvt = Iop_Shorten16x8;
+ sh = Iop_ShrN16x8;
+ imm = 1U << 7;
+ imm = (imm << 16) | imm;
+ imm = (imm << 32) | imm;
+ break;
+ case 1:
+ op = Iop_Add32x4;
+ cvt = Iop_Shorten32x4;
+ sh = Iop_ShrN32x4;
+ imm = 1U << 15;
+ imm = (imm << 32) | imm;
+ break;
+ case 2:
+ op = Iop_Add64x2;
+ cvt = Iop_Shorten64x2;
+ sh = Iop_ShrN64x2;
+ imm = 1U << 31;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ tmp = newTemp(Ity_V128);
+ res = newTemp(Ity_V128);
+ assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
+ if (U) {
+ /* VRADDHN */
+ assign(res, binop(op, mkexpr(tmp),
+ binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
+ } else {
+ assign(res, mkexpr(tmp));
+ }
+ putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
+ condT);
+ DIP("v%saddhn.i%u d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
+ nreg, mreg);
+ return True;
+ case 5:
+ /* VABAL */
+ if (!((theInstr >> 23) & 1)) {
+ vpanic("VABA should not be in dis_neon_data_3diff\n");
+ }
+ if (dreg & 1)
+ return False;
+ dreg >>= 1;
+ switch (size) {
+ case 0:
+ cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
+ cvt = U ? Iop_Longen8Ux8 : Iop_Longen8Sx8;
+ cvt2 = Iop_Longen8Sx8;
+ op = Iop_Sub16x8;
+ op2 = Iop_Add16x8;
+ break;
+ case 1:
+ cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
+ cvt = U ? Iop_Longen16Ux4 : Iop_Longen16Sx4;
+ cvt2 = Iop_Longen16Sx4;
+ op = Iop_Sub32x4;
+ op2 = Iop_Add32x4;
+ break;
+ case 2:
+ cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
+ cvt = U ? Iop_Longen32Ux2 : Iop_Longen32Sx2;
+ cvt2 = Iop_Longen32Sx2;
+ op = Iop_Sub64x2;
+ op2 = Iop_Add64x2;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ arg_n = newTemp(Ity_V128);
+ arg_m = newTemp(Ity_V128);
+ cond = newTemp(Ity_V128);
+ res = newTemp(Ity_V128);
+ assign(arg_n, unop(cvt, getDRegI64(nreg)));
+ assign(arg_m, unop(cvt, getDRegI64(mreg)));
+ assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
+ getDRegI64(mreg))));
+ assign(res, binop(op2,
+ binop(Iop_OrV128,
+ binop(Iop_AndV128,
+ binop(op, mkexpr(arg_n), mkexpr(arg_m)),
+ mkexpr(cond)),
+ binop(Iop_AndV128,
+ binop(op, mkexpr(arg_m), mkexpr(arg_n)),
+ unop(Iop_NotV128, mkexpr(cond)))),
+ getQReg(dreg)));
+ putQReg(dreg, mkexpr(res), condT);
+ DIP("vabal.%c%u q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
+ nreg, mreg);
+ return True;
+ case 6:
+ /* VSUBHN, VRSUBHN */
+ if (mreg & 1)
+ return False;
+ mreg >>= 1;
+ if (nreg & 1)
+ return False;
+ nreg >>= 1;
+ size = B;
+ switch (size) {
+ case 0:
+ op = Iop_Sub16x8;
+ op2 = Iop_Add16x8;
+ cvt = Iop_Shorten16x8;
+ sh = Iop_ShrN16x8;
+ imm = 1U << 7;
+ imm = (imm << 16) | imm;
+ imm = (imm << 32) | imm;
+ break;
+ case 1:
+ op = Iop_Sub32x4;
+ op2 = Iop_Add32x4;
+ cvt = Iop_Shorten32x4;
+ sh = Iop_ShrN32x4;
+ imm = 1U << 15;
+ imm = (imm << 32) | imm;
+ break;
+ case 2:
+ op = Iop_Sub64x2;
+ op2 = Iop_Add64x2;
+ cvt = Iop_Shorten64x2;
+ sh = Iop_ShrN64x2;
+ imm = 1U << 31;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ tmp = newTemp(Ity_V128);
+ res = newTemp(Ity_V128);
+ assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
+ if (U) {
+ /* VRSUBHN */
+ assign(res, binop(op2, mkexpr(tmp),
+ binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
+ } else {
+ assign(res, mkexpr(tmp));
+ }
+ putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
+ condT);
+ DIP("v%ssubhn.i%u d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
+ nreg, mreg);
+ return True;
+ case 7:
+ /* VABDL */
+ if (!((theInstr >> 23) & 1)) {
+ vpanic("VABL should not be in dis_neon_data_3diff\n");
+ }
+ if (dreg & 1)
+ return False;
+ dreg >>= 1;
+ switch (size) {
+ case 0:
+ cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
+ cvt = U ? Iop_Longen8Ux8 : Iop_Longen8Sx8;
+ cvt2 = Iop_Longen8Sx8;
+ op = Iop_Sub16x8;
+ break;
+ case 1:
+ cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
+ cvt = U ? Iop_Longen16Ux4 : Iop_Longen16Sx4;
+ cvt2 = Iop_Longen16Sx4;
+ op = Iop_Sub32x4;
+ break;
+ case 2:
+ cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
+ cvt = U ? Iop_Longen32Ux2 : Iop_Longen32Sx2;
+ cvt2 = Iop_Longen32Sx2;
+ op = Iop_Sub64x2;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ arg_n = newTemp(Ity_V128);
+ arg_m = newTemp(Ity_V128);
+ cond = newTemp(Ity_V128);
+ res = newTemp(Ity_V128);
+ assign(arg_n, unop(cvt, getDRegI64(nreg)));
+ assign(arg_m, unop(cvt, getDRegI64(mreg)));
+ assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
+ getDRegI64(mreg))));
+ assign(res, binop(Iop_OrV128,
+ binop(Iop_AndV128,
+ binop(op, mkexpr(arg_n), mkexpr(arg_m)),
+ mkexpr(cond)),
+ binop(Iop_AndV128,
+ binop(op, mkexpr(arg_m), mkexpr(arg_n)),
+ unop(Iop_NotV128, mkexpr(cond)))));
+ putQReg(dreg, mkexpr(res), condT);
+ DIP("vabdl.%c%u q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
+ nreg, mreg);
+ return True;
+ case 8:
+ case 10:
+ /* VMLAL, VMLSL (integer) */
+ if (dreg & 1)
+ return False;
+ dreg >>= 1;
+ size = B;
+ switch (size) {
+ case 0:
+ op = U ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
+ op2 = P ? Iop_Sub16x8 : Iop_Add16x8;
+ break;
+ case 1:
+ op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
+ op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
+ break;
+ case 2:
+ op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
+ op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ res = newTemp(Ity_V128);
+ assign(res, binop(op, getDRegI64(nreg),getDRegI64(mreg)));
+ putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
+ DIP("vml%cl.%c%u q%u, d%u, d%u\n", P ? 's' : 'a', U ? 'u' : 's',
+ 8 << size, dreg, nreg, mreg);
+ return True;
+ case 9:
+ case 11:
+ /* VQDMLAL, VQDMLSL */
+ if (U)
+ return False;
+ if (dreg & 1)
+ return False;
+ dreg >>= 1;
+ size = B;
+ switch (size) {
+ case 0: case 3:
+ return False;
+ case 1:
+ op = Iop_QDMulLong16Sx4;
+ cmp = Iop_CmpEQ16x4;
+ add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
+ op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
+ imm = 1LL << 15;
+ imm = (imm << 16) | imm;
+ imm = (imm << 32) | imm;
+ break;
+ case 2:
+ op = Iop_QDMulLong32Sx2;
+ cmp = Iop_CmpEQ32x2;
+ add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
+ op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
+ imm = 1LL << 31;
+ imm = (imm << 32) | imm;
+ break;
+ default:
+ vassert(0);
+ }
+ res = newTemp(Ity_V128);
+ tmp = newTemp(Ity_V128);
+ assign(res, binop(op, getDRegI64(nreg), getDRegI64(mreg)));
+#ifndef DISABLE_QC_FLAG
+ assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
+ setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
+ True, condT);
+ setFlag_QC(binop(Iop_And64,
+ binop(cmp, getDRegI64(nreg), mkU64(imm)),
+ binop(cmp, getDRegI64(mreg), mkU64(imm))),
+ mkU64(0),
+ False, condT);
+#endif
+ putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
+ DIP("vqdml%cl.s%u q%u, d%u, d%u\n", P ? 's' : 'a', 8 << size, dreg,
+ nreg, mreg);
+ return True;
+ case 12:
+ case 14:
+ /* VMULL (integer or polynomial) */
+ if (dreg & 1)
+ return False;
+ dreg >>= 1;
+ size = B;
+ switch (size) {
+ case 0:
+ op = (U) ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
+ if (P)
+ op = Iop_PolynomialMull8x8;
+ break;
+ case 1:
+ op = (U) ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
+ break;
+ case 2:
+ op = (U) ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
+ break;
+ default:
+ vassert(0);
+ }
+ putQReg(dreg, binop(op, getDRegI64(nreg),
+ getDRegI64(mreg)), condT);
+ DIP("vmull.%c%u q%u, d%u, d%u\n", P ? 'p' : (U ? 'u' : 's'),
+ 8 << size, dreg, nreg, mreg);
+ return True;
+ case 13:
+ /* VQDMULL */
+ if (U)
+ return False;
+ if (dreg & 1)
+ return False;
+ dreg >>= 1;
+ size = B;
+ switch (size) {
+ case 0:
+ case 3:
+ return False;
+ case 1:
+ op = Iop_QDMulLong16Sx4;
+ op2 = Iop_CmpEQ16x4;
+ imm = 1LL << 15;
+ imm = (imm << 16) | imm;
+ imm = (imm << 32) | imm;
+ break;
+ case 2:
+ op = Iop_QDMulLong32Sx2;
+ op2 = Iop_CmpEQ32x2;
+ imm = 1LL << 31;
+ imm = (imm << 32) | imm;
+ break;
+ default:
+ vassert(0);
+ }
+ putQReg(dreg, binop(op, getDRegI64(nreg), getDRegI64(mreg)),
+ condT);
+#ifndef DISABLE_QC_FLAG
+ setFlag_QC(binop(Iop_And64,
+ binop(op2, getDRegI64(nreg), mkU64(imm)),
+ binop(op2, getDRegI64(mreg), mkU64(imm))),
+ mkU64(0),
+ False, condT);
+#endif
+ DIP("vqdmull.s%u q%u, d%u, d%u\n", 8 << size, dreg, nreg, mreg);
+ return True;
+ default:
+ return False;
+ }
+ return False;
+}
+
+/* A7.4.3 Two registers and a scalar */
+static
+Bool dis_neon_data_2reg_and_scalar ( UInt theInstr, IRTemp condT )
+{
+# define INSN(_bMax,_bMin) SLICE_UInt(theInstr, (_bMax), (_bMin))
+ UInt U = INSN(24,24);
+ UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
+ UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
+ UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
+ UInt size = INSN(21,20);
+ UInt index;
+ UInt Q = INSN(24,24);
+
+ if (INSN(27,25) != 1 || INSN(23,23) != 1
+ || INSN(6,6) != 1 || INSN(4,4) != 0)
+ return False;
+
+ /* VMLA, VMLS (scalar) */
+ if ((INSN(11,8) & BITS4(1,0,1,0)) == BITS4(0,0,0,0)) {
+ IRTemp res, arg_m, arg_n;
+ IROp dup, get, op, op2, add, sub;
+ if (Q) {
+ if ((dreg & 1) || (nreg & 1))
+ return False;
+ dreg >>= 1;
+ nreg >>= 1;
+ res = newTemp(Ity_V128);
+ arg_m = newTemp(Ity_V128);
+ arg_n = newTemp(Ity_V128);
+ assign(arg_n, getQReg(nreg));
+ switch(size) {
+ case 1:
+ dup = Iop_Dup16x8;
+ get = Iop_GetElem16x4;
+ index = mreg >> 3;
+ mreg &= 7;
+ break;
+ case 2:
+ dup = Iop_Dup32x4;
+ get = Iop_GetElem32x2;
+ index = mreg >> 4;
+ mreg &= 0xf;
+ break;
+ case 0:
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
+ } else {
+ res = newTemp(Ity_I64);
+ arg_m = newTemp(Ity_I64);
+ arg_n = newTemp(Ity_I64);
+ assign(arg_n, getDRegI64(nreg));
+ switch(size) {
+ case 1:
+ dup = Iop_Dup16x4;
+ get = Iop_GetElem16x4;
+ index = mreg >> 3;
+ mreg &= 7;
+ break;
+ case 2:
+ dup = Iop_Dup32x2;
+ get = Iop_GetElem32x2;
+ index = mreg >> 4;
+ mreg &= 0xf;
+ break;
+ case 0:
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
+ }
+ if (INSN(8,8)) {
+ switch (size) {
+ case 2:
+ op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
+ add = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
+ sub = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
+ break;
+ case 0:
+ case 1:
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ } else {
+ switch (size) {
+ case 1:
+ op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
+ add = Q ? Iop_Add16x8 : Iop_Add16x4;
+ sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
+ break;
+ case 2:
+ op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
+ add = Q ? Iop_Add32x4 : Iop_Add32x2;
+ sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
+ break;
+ case 0:
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ }
+ op2 = INSN(10,10) ? sub : add;
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+ if (Q)
+ putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)),
+ condT);
+ else
+ putDRegI64(dreg, binop(op2, getDRegI64(dreg), mkexpr(res)),
+ condT);
+ DIP("vml%c.%c%u %c%u, %c%u, d%u[%u]\n", INSN(10,10) ? 's' : 'a',
+ INSN(8,8) ? 'f' : 'i', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, mreg, index);
+ return True;
+ }
+
+ /* VMLAL, VMLSL (scalar) */
+ if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,0)) {
+ IRTemp res, arg_m, arg_n;
+ IROp dup, get, op, op2, add, sub;
+ if (dreg & 1)
+ return False;
+ dreg >>= 1;
+ res = newTemp(Ity_V128);
+ arg_m = newTemp(Ity_I64);
+ arg_n = newTemp(Ity_I64);
+ assign(arg_n, getDRegI64(nreg));
+ switch(size) {
+ case 1:
+ dup = Iop_Dup16x4;
+ get = Iop_GetElem16x4;
+ index = mreg >> 3;
+ mreg &= 7;
+ break;
+ case 2:
+ dup = Iop_Dup32x2;
+ get = Iop_GetElem32x2;
+ index = mreg >> 4;
+ mreg &= 0xf;
+ break;
+ case 0:
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
+ switch (size) {
+ case 1:
+ op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
+ add = Iop_Add32x4;
+ sub = Iop_Sub32x4;
+ break;
+ case 2:
+ op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
+ add = Iop_Add64x2;
+ sub = Iop_Sub64x2;
+ break;
+ case 0:
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ op2 = INSN(10,10) ? sub : add;
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+ putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
+ DIP("vml%cl.%c%u q%u, d%u, d%u[%u]\n",
+ INSN(10,10) ? 's' : 'a', U ? 'u' : 's',
+ 8 << size, dreg, nreg, mreg, index);
+ return True;
+ }
+
+ /* VQDMLAL, VQDMLSL (scalar) */
+ if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,1) && !U) {
+ IRTemp res, arg_m, arg_n, tmp;
+ IROp dup, get, op, op2, add, cmp;
+ UInt P = INSN(10,10);
+ ULong imm;
+ if (dreg & 1)
+ return False;
+ dreg >>= 1;
+ res = newTemp(Ity_V128);
+ arg_m = newTemp(Ity_I64);
+ arg_n = newTemp(Ity_I64);
+ assign(arg_n, getDRegI64(nreg));
+ switch(size) {
+ case 1:
+ dup = Iop_Dup16x4;
+ get = Iop_GetElem16x4;
+ index = mreg >> 3;
+ mreg &= 7;
+ break;
+ case 2:
+ dup = Iop_Dup32x2;
+ get = Iop_GetElem32x2;
+ index = mreg >> 4;
+ mreg &= 0xf;
+ break;
+ case 0:
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
+ switch (size) {
+ case 0:
+ case 3:
+ return False;
+ case 1:
+ op = Iop_QDMulLong16Sx4;
+ cmp = Iop_CmpEQ16x4;
+ add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
+ op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
+ imm = 1LL << 15;
+ imm = (imm << 16) | imm;
+ imm = (imm << 32) | imm;
+ break;
+ case 2:
+ op = Iop_QDMulLong32Sx2;
+ cmp = Iop_CmpEQ32x2;
+ add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
+ op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
+ imm = 1LL << 31;
+ imm = (imm << 32) | imm;
+ break;
+ default:
+ vassert(0);
+ }
+ res = newTemp(Ity_V128);
+ tmp = newTemp(Ity_V128);
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+#ifndef DISABLE_QC_FLAG
+ assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
+ setFlag_QC(binop(Iop_And64,
+ binop(cmp, mkexpr(arg_n), mkU64(imm)),
+ binop(cmp, mkexpr(arg_m), mkU64(imm))),
+ mkU64(0),
+ False, condT);
+ setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
+ True, condT);
+#endif
+ putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
+ DIP("vqdml%cl.s%u q%u, d%u, d%u[%u]\n", P ? 's' : 'a', 8 << size,
+ dreg, nreg, mreg, index);
+ return True;
+ }
+
+ /* VMUL (by scalar) */
+ if ((INSN(11,8) & BITS4(1,1,1,0)) == BITS4(1,0,0,0)) {
+ IRTemp res, arg_m, arg_n;
+ IROp dup, get, op;
+ if (Q) {
+ if ((dreg & 1) || (nreg & 1))
+ return False;
+ dreg >>= 1;
+ nreg >>= 1;
+ res = newTemp(Ity_V128);
+ arg_m = newTemp(Ity_V128);
+ arg_n = newTemp(Ity_V128);
+ assign(arg_n, getQReg(nreg));
+ switch(size) {
+ case 1:
+ dup = Iop_Dup16x8;
+ get = Iop_GetElem16x4;
+ index = mreg >> 3;
+ mreg &= 7;
+ break;
+ case 2:
+ dup = Iop_Dup32x4;
+ get = Iop_GetElem32x2;
+ index = mreg >> 4;
+ mreg &= 0xf;
+ break;
+ case 0:
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
+ } else {
+ res = newTemp(Ity_I64);
+ arg_m = newTemp(Ity_I64);
+ arg_n = newTemp(Ity_I64);
+ assign(arg_n, getDRegI64(nreg));
+ switch(size) {
+ case 1:
+ dup = Iop_Dup16x4;
+ get = Iop_GetElem16x4;
+ index = mreg >> 3;
+ mreg &= 7;
+ break;
+ case 2:
+ dup = Iop_Dup32x2;
+ get = Iop_GetElem32x2;
+ index = mreg >> 4;
+ mreg &= 0xf;
+ break;
+ case 0:
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
+ }
+ switch (size) {
+ case 1:
+ op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
+ break;
+ case 2:
+ op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
+ break;
+ case 0:
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+ if (Q)
+ putQReg(dreg, mkexpr(res), condT);
+ else
+ putDRegI64(dreg, mkexpr(res), condT);
+ DIP("vmul.i%u %c%u, %c%u, d%u[%u]\n", 8 << size, Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', nreg, mreg, index);
+ return True;
+ }
+
+ /* VMULL (scalar) */
+ if (INSN(11,8) == BITS4(1,0,1,0)) {
+ IRTemp res, arg_m, arg_n;
+ IROp dup, get, op;
+ if (dreg & 1)
+ return False;
+ dreg >>= 1;
+ res = newTemp(Ity_V128);
+ arg_m = newTemp(Ity_I64);
+ arg_n = newTemp(Ity_I64);
+ assign(arg_n, getDRegI64(nreg));
+ switch(size) {
+ case 1:
+ dup = Iop_Dup16x4;
+ get = Iop_GetElem16x4;
+ index = mreg >> 3;
+ mreg &= 7;
+ break;
+ case 2:
+ dup = Iop_Dup32x2;
+ get = Iop_GetElem32x2;
+ index = mreg >> 4;
+ mreg &= 0xf;
+ break;
+ case 0:
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
+ switch (size) {
+ case 1: op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4; break;
+ case 2: op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2; break;
+ case 0: case 3: return False;
+ default: vassert(0);
+ }
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+ putQReg(dreg, mkexpr(res), condT);
+ DIP("vmull.%c%u q%u, d%u, d%u[%u]\n", U ? 'u' : 's', 8 << size, dreg,
+ nreg, mreg, index);
+ return True;
+ }
+
+ /* VQDMULL */
+ if (INSN(11,8) == BITS4(1,0,1,1) && !U) {
+ IROp op ,op2, dup, get;
+ ULong imm;
+ IRTemp res, arg_m, arg_n;
+ if (dreg & 1)
+ return False;
+ dreg >>= 1;
+ res = newTemp(Ity_V128);
+ arg_m = newTemp(Ity_I64);
+ arg_n = newTemp(Ity_I64);
+ assign(arg_n, getDRegI64(nreg));
+ switch(size) {
+ case 1:
+ dup = Iop_Dup16x4;
+ get = Iop_GetElem16x4;
+ index = mreg >> 3;
+ mreg &= 7;
+ break;
+ case 2:
+ dup = Iop_Dup32x2;
+ get = Iop_GetElem32x2;
+ index = mreg >> 4;
+ mreg &= 0xf;
+ break;
+ case 0:
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
+ switch (size) {
+ case 0:
+ case 3:
+ return False;
+ case 1:
+ op = Iop_QDMulLong16Sx4;
+ op2 = Iop_CmpEQ16x4;
+ imm = 1LL << 15;
+ imm = (imm << 16) | imm;
+ imm = (imm << 32) | imm;
+ break;
+ case 2:
+ op = Iop_QDMulLong32Sx2;
+ op2 = Iop_CmpEQ32x2;
+ imm = 1LL << 31;
+ imm = (imm << 32) | imm;
+ break;
+ default:
+ vassert(0);
+ }
+ putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
+ condT);
+#ifndef DISABLE_QC_FLAG
+ setFlag_QC(binop(Iop_And64,
+ binop(op2, mkexpr(arg_n), mkU64(imm)),
+ binop(op2, mkexpr(arg_m), mkU64(imm))),
+ mkU64(0),
+ False, condT);
+#endif
+ DIP("vqdmull.s%u q%u, d%u, d%u[%u]\n", 8 << size, dreg, nreg, mreg,
+ index);
+ return True;
+ }
+
+ /* VQDMULH */
+ if (INSN(11,8) == BITS4(1,1,0,0)) {
+ IROp op ,op2, dup, get;
+ ULong imm;
+ IRTemp res, arg_m, arg_n;
+ if (Q) {
+ if ((dreg & 1) || (nreg & 1))
+ return False;
+ dreg >>= 1;
+ nreg >>= 1;
+ res = newTemp(Ity_V128);
+ arg_m = newTemp(Ity_V128);
+ arg_n = newTemp(Ity_V128);
+ assign(arg_n, getQReg(nreg));
+ switch(size) {
+ case 1:
+ dup = Iop_Dup16x8;
+ get = Iop_GetElem16x4;
+ index = mreg >> 3;
+ mreg &= 7;
+ break;
+ case 2:
+ dup = Iop_Dup32x4;
+ get = Iop_GetElem32x2;
+ index = mreg >> 4;
+ mreg &= 0xf;
+ break;
+ case 0:
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
+ } else {
+ res = newTemp(Ity_I64);
+ arg_m = newTemp(Ity_I64);
+ arg_n = newTemp(Ity_I64);
+ assign(arg_n, getDRegI64(nreg));
+ switch(size) {
+ case 1:
+ dup = Iop_Dup16x4;
+ get = Iop_GetElem16x4;
+ index = mreg >> 3;
+ mreg &= 7;
+ break;
+ case 2:
+ dup = Iop_Dup32x2;
+ get = Iop_GetElem32x2;
+ index = mreg >> 4;
+ mreg &= 0xf;
+ break;
+ case 0:
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
+ }
+ switch (size) {
+ case 0:
+ case 3:
+ return False;
+ case 1:
+ op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
+ op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
+ imm = 1LL << 15;
+ imm = (imm << 16) | imm;
+ imm = (imm << 32) | imm;
+ break;
+ case 2:
+ op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
+ op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
+ imm = 1LL << 31;
+ imm = (imm << 32) | imm;
+ break;
+ default:
+ vassert(0);
+ }
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+#ifndef DISABLE_QC_FLAG
+ setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
+ binop(op2, mkexpr(arg_n),
+ Q ? mkU128(imm) : mkU64(imm)),
+ binop(op2, mkexpr(arg_m),
+ Q ? mkU128(imm) : mkU64(imm))),
+ Q ? mkU128(0) : mkU64(0),
+ Q, condT);
+#endif
+ if (Q)
+ putQReg(dreg, mkexpr(res), condT);
+ else
+ putDRegI64(dreg, mkexpr(res), condT);
+ DIP("vqdmulh.s%u %c%u, %c%u, d%u[%u]\n",
+ 8 << size, Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', nreg, mreg, index);
+ return True;
+ }
+
+ /* VQRDMULH (scalar) */
+ if (INSN(11,8) == BITS4(1,1,0,1)) {
+ IROp op ,op2, dup, get;
+ ULong imm;
+ IRTemp res, arg_m, arg_n;
+ if (Q) {
+ if ((dreg & 1) || (nreg & 1))
+ return False;
+ dreg >>= 1;
+ nreg >>= 1;
+ res = newTemp(Ity_V128);
+ arg_m = newTemp(Ity_V128);
+ arg_n = newTemp(Ity_V128);
+ assign(arg_n, getQReg(nreg));
+ switch(size) {
+ case 1:
+ dup = Iop_Dup16x8;
+ get = Iop_GetElem16x4;
+ index = mreg >> 3;
+ mreg &= 7;
+ break;
+ case 2:
+ dup = Iop_Dup32x4;
+ get = Iop_GetElem32x2;
+ index = mreg >> 4;
+ mreg &= 0xf;
+ break;
+ case 0:
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
+ } else {
+ res = newTemp(Ity_I64);
+ arg_m = newTemp(Ity_I64);
+ arg_n = newTemp(Ity_I64);
+ assign(arg_n, getDRegI64(nreg));
+ switch(size) {
+ case 1:
+ dup = Iop_Dup16x4;
+ get = Iop_GetElem16x4;
+ index = mreg >> 3;
+ mreg &= 7;
+ break;
+ case 2:
+ dup = Iop_Dup32x2;
+ get = Iop_GetElem32x2;
+ index = mreg >> 4;
+ mreg &= 0xf;
+ break;
+ case 0:
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
+ }
+ switch (size) {
+ case 0:
+ case 3:
+ return False;
+ case 1:
+ op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
+ op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
+ imm = 1LL << 15;
+ imm = (imm << 16) | imm;
+ imm = (imm << 32) | imm;
+ break;
+ case 2:
+ op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
+ op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
+ imm = 1LL << 31;
+ imm = (imm << 32) | imm;
+ break;
+ default:
+ vassert(0);
+ }
+ assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
+#ifndef DISABLE_QC_FLAG
+ setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
+ binop(op2, mkexpr(arg_n),
+ Q ? mkU128(imm) : mkU64(imm)),
+ binop(op2, mkexpr(arg_m),
+ Q ? mkU128(imm) : mkU64(imm))),
+ Q ? mkU128(0) : mkU64(0),
+ Q, condT);
+#endif
+ if (Q)
+ putQReg(dreg, mkexpr(res), condT);
+ else
+ putDRegI64(dreg, mkexpr(res), condT);
+ DIP("vqrdmulh.s%u %c%u, %c%u, d%u[%u]\n",
+ 8 << size, Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', nreg, mreg, index);
+ return True;
+ }
+
+ return False;
+# undef INSN
+}
+
+/* A7.4.4 Two registers and a shift amount */
+static
+Bool dis_neon_data_2reg_and_shift ( UInt theInstr, IRTemp condT )
+{
+ UInt A = (theInstr >> 8) & 0xf;
+ UInt B = (theInstr >> 6) & 1;
+ UInt L = (theInstr >> 7) & 1;
+ UInt U = (theInstr >> 24) & 1;
+ UInt Q = B;
+ UInt imm6 = (theInstr >> 16) & 0x3f;
+ UInt shift_imm;
+ UInt size = 4;
+ UInt tmp;
+ UInt mreg = get_neon_m_regno(theInstr);
+ UInt dreg = get_neon_d_regno(theInstr);
+ ULong imm = 0;
+ IROp op, cvt, add = Iop_INVALID, cvt2, op_rev;
+ IRTemp reg_m, res, mask;
+
+ if (L == 0 && ((theInstr >> 19) & 7) == 0)
+ /* It is one reg and immediate */
+ return False;
+
+ tmp = (L << 6) | imm6;
+ if (tmp & 0x40) {
+ size = 3;
+ shift_imm = 64 - imm6;
+ } else if (tmp & 0x20) {
+ size = 2;
+ shift_imm = 64 - imm6;
+ } else if (tmp & 0x10) {
+ size = 1;
+ shift_imm = 32 - imm6;
+ } else if (tmp & 0x8) {
+ size = 0;
+ shift_imm = 16 - imm6;
+ } else {
+ return False;
+ }
+
+ switch (A) {
+ case 3:
+ case 2:
+ /* VRSHR, VRSRA */
+ if (shift_imm > 0) {
+ IRExpr *imm_val;
+ imm = 1L;
+ switch (size) {
+ case 0:
+ imm = (imm << 8) | imm;
+ /* fall through */
+ case 1:
+ imm = (imm << 16) | imm;
+ /* fall through */
+ case 2:
+ imm = (imm << 32) | imm;
+ /* fall through */
+ case 3:
+ break;
+ default:
+ vassert(0);
+ }
+ if (Q) {
+ reg_m = newTemp(Ity_V128);
+ res = newTemp(Ity_V128);
+ imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
+ assign(reg_m, getQReg(mreg));
+ switch (size) {
+ case 0:
+ add = Iop_Add8x16;
+ op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
+ break;
+ case 1:
+ add = Iop_Add16x8;
+ op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
+ break;
+ case 2:
+ add = Iop_Add32x4;
+ op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
+ break;
+ case 3:
+ add = Iop_Add64x2;
+ op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
+ break;
+ default:
+ vassert(0);
+ }
+ } else {
+ reg_m = newTemp(Ity_I64);
+ res = newTemp(Ity_I64);
+ imm_val = mkU64(imm);
+ assign(reg_m, getDRegI64(mreg));
+ switch (size) {
+ case 0:
+ add = Iop_Add8x8;
+ op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
+ break;
+ case 1:
+ add = Iop_Add16x4;
+ op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
+ break;
+ case 2:
+ add = Iop_Add32x2;
+ op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
+ break;
+ case 3:
+ add = Iop_Add64;
+ op = U ? Iop_Shr64 : Iop_Sar64;
+ break;
+ default:
+ vassert(0);
+ }
+ }
+ assign(res,
+ binop(add,
+ binop(op,
+ mkexpr(reg_m),
+ mkU8(shift_imm)),
+ binop(Q ? Iop_AndV128 : Iop_And64,
+ binop(op,
+ mkexpr(reg_m),
+ mkU8(shift_imm - 1)),
+ imm_val)));
+ } else {
+ if (Q) {
+ res = newTemp(Ity_V128);
+ assign(res, getQReg(mreg));
+ } else {
+ res = newTemp(Ity_I64);
+ assign(res, getDRegI64(mreg));
+ }
+ }
+ if (A == 3) {
+ if (Q) {
+ putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
+ condT);
+ } else {
+ putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
+ condT);
+ }
+ DIP("vrsra.%c%u %c%u, %c%u, #%u\n",
+ U ? 'u' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
+ } else {
+ if (Q) {
+ putQReg(dreg, mkexpr(res), condT);
+ } else {
+ putDRegI64(dreg, mkexpr(res), condT);
+ }
+ DIP("vrshr.%c%u %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
+ }
+ return True;
+ case 1:
+ case 0:
+ /* VSHR, VSRA */
+ if (Q) {
+ reg_m = newTemp(Ity_V128);
+ assign(reg_m, getQReg(mreg));
+ res = newTemp(Ity_V128);
+ } else {
+ reg_m = newTemp(Ity_I64);
+ assign(reg_m, getDRegI64(mreg));
+ res = newTemp(Ity_I64);
+ }
+ if (Q) {
+ switch (size) {
+ case 0:
+ op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
+ add = Iop_Add8x16;
+ break;
+ case 1:
+ op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
+ add = Iop_Add16x8;
+ break;
+ case 2:
+ op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
+ add = Iop_Add32x4;
+ break;
+ case 3:
+ op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
+ add = Iop_Add64x2;
+ break;
+ default:
+ vassert(0);
+ }
+ } else {
+ switch (size) {
+ case 0:
+ op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
+ add = Iop_Add8x8;
+ break;
+ case 1:
+ op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
+ add = Iop_Add16x4;
+ break;
+ case 2:
+ op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
+ add = Iop_Add32x2;
+ break;
+ case 3:
+ op = U ? Iop_Shr64 : Iop_Sar64;
+ add = Iop_Add64;
+ break;
+ default:
+ vassert(0);
+ }
+ }
+ assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
+ if (A == 1) {
+ if (Q) {
+ putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
+ condT);
+ } else {
+ putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
+ condT);
+ }
+ DIP("vsra.%c%u %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
+ } else {
+ if (Q) {
+ putQReg(dreg, mkexpr(res), condT);
+ } else {
+ putDRegI64(dreg, mkexpr(res), condT);
+ }
+ DIP("vshr.%c%u %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
+ }
+ return True;
+ case 4:
+ /* VSRI */
+ if (!U)
+ return False;
+ if (Q) {
+ res = newTemp(Ity_V128);
+ mask = newTemp(Ity_V128);
+ } else {
+ res = newTemp(Ity_I64);
+ mask = newTemp(Ity_I64);
+ }
+ switch (size) {
+ case 0: op = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; break;
+ case 1: op = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; break;
+ case 2: op = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; break;
+ case 3: op = Q ? Iop_ShrN64x2 : Iop_Shr64; break;
+ default: vassert(0);
+ }
+ if (Q) {
+ assign(mask, binop(op, binop(Iop_64HLtoV128,
+ mkU64(0xFFFFFFFFFFFFFFFFLL),
+ mkU64(0xFFFFFFFFFFFFFFFFLL)),
+ mkU8(shift_imm)));
+ assign(res, binop(Iop_OrV128,
+ binop(Iop_AndV128,
+ getQReg(dreg),
+ unop(Iop_NotV128,
+ mkexpr(mask))),
+ binop(op,
+ getQReg(mreg),
+ mkU8(shift_imm))));
+ putQReg(dreg, mkexpr(res), condT);
+ } else {
+ assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
+ mkU8(shift_imm)));
+ assign(res, binop(Iop_Or64,
+ binop(Iop_And64,
+ getDRegI64(dreg),
+ unop(Iop_Not64,
+ mkexpr(mask))),
+ binop(op,
+ getDRegI64(mreg),
+ mkU8(shift_imm))));
+ putDRegI64(dreg, mkexpr(res), condT);
+ }
+ DIP("vsri.%u %c%u, %c%u, #%u\n",
+ 8 << size, Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', mreg, shift_imm);
+ return True;
+ case 5:
+ if (U) {
+ /* VSLI */
+ shift_imm = 8 * (1 << size) - shift_imm;
+ if (Q) {
+ res = newTemp(Ity_V128);
+ mask = newTemp(Ity_V128);
+ } else {
+ res = newTemp(Ity_I64);
+ mask = newTemp(Ity_I64);
+ }
+ switch (size) {
+ case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
+ case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
+ case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
+ case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
+ default: vassert(0);
+ }
+ if (Q) {
+ assign(mask, binop(op, binop(Iop_64HLtoV128,
+ mkU64(0xFFFFFFFFFFFFFFFFLL),
+ mkU64(0xFFFFFFFFFFFFFFFFLL)),
+ mkU8(shift_imm)));
+ assign(res, binop(Iop_OrV128,
+ binop(Iop_AndV128,
+ getQReg(dreg),
+ unop(Iop_NotV128,
+ mkexpr(mask))),
+ binop(op,
+ getQReg(mreg),
+ mkU8(shift_imm))));
+ putQReg(dreg, mkexpr(res), condT);
+ } else {
+ assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
+ mkU8(shift_imm)));
+ assign(res, binop(Iop_Or64,
+ binop(Iop_And64,
+ getDRegI64(dreg),
+ unop(Iop_Not64,
+ mkexpr(mask))),
+ binop(op,
+ getDRegI64(mreg),
+ mkU8(shift_imm))));
+ putDRegI64(dreg, mkexpr(res), condT);
+ }
+ DIP("vsli.%u %c%u, %c%u, #%u\n",
+ 8 << size, Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', mreg, shift_imm);
+ return True;
+ } else {
+ /* VSHL #imm */
+ shift_imm = 8 * (1 << size) - shift_imm;
+ if (Q) {
+ res = newTemp(Ity_V128);
+ } else {
+ res = newTemp(Ity_I64);
+ }
+ switch (size) {
+ case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
+ case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
+ case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
+ case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
+ default: vassert(0);
+ }
+ assign(res, binop(op, Q ? getQReg(mreg) : getDRegI64(mreg),
+ mkU8(shift_imm)));
+ if (Q) {
+ putQReg(dreg, mkexpr(res), condT);
+ } else {
+ putDRegI64(dreg, mkexpr(res), condT);
+ }
+ DIP("vshl.i%u %c%u, %c%u, #%u\n",
+ 8 << size, Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', mreg, shift_imm);
+ return True;
+ }
+ break;
+ case 6:
+ case 7:
+ /* VQSHL, VQSHLU */
+ shift_imm = 8 * (1 << size) - shift_imm;
+ if (U) {
+ if (A & 1) {
+ switch (size) {
+ case 0:
+ op = Q ? Iop_QShlN8x16 : Iop_QShlN8x8;
+ op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
+ break;
+ case 1:
+ op = Q ? Iop_QShlN16x8 : Iop_QShlN16x4;
+ op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
+ break;
+ case 2:
+ op = Q ? Iop_QShlN32x4 : Iop_QShlN32x2;
+ op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
+ break;
+ case 3:
+ op = Q ? Iop_QShlN64x2 : Iop_QShlN64x1;
+ op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
+ break;
+ default:
+ vassert(0);
+ }
+ DIP("vqshl.u%u %c%u, %c%u, #%u\n",
+ 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
+ } else {
+ switch (size) {
+ case 0:
+ op = Q ? Iop_QShlN8Sx16 : Iop_QShlN8Sx8;
+ op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
+ break;
+ case 1:
+ op = Q ? Iop_QShlN16Sx8 : Iop_QShlN16Sx4;
+ op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
+ break;
+ case 2:
+ op = Q ? Iop_QShlN32Sx4 : Iop_QShlN32Sx2;
+ op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
+ break;
+ case 3:
+ op = Q ? Iop_QShlN64Sx2 : Iop_QShlN64Sx1;
+ op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
+ break;
+ default:
+ vassert(0);
+ }
+ DIP("vqshlu.s%u %c%u, %c%u, #%u\n",
+ 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
+ }
+ } else {
+ if (!(A & 1))
+ return False;
+ switch (size) {
+ case 0:
+ op = Q ? Iop_QSalN8x16 : Iop_QSalN8x8;
+ op_rev = Q ? Iop_SarN8x16 : Iop_SarN8x8;
+ break;
+ case 1:
+ op = Q ? Iop_QSalN16x8 : Iop_QSalN16x4;
+ op_rev = Q ? Iop_SarN16x8 : Iop_SarN16x4;
+ break;
+ case 2:
+ op = Q ? Iop_QSalN32x4 : Iop_QSalN32x2;
+ op_rev = Q ? Iop_SarN32x4 : Iop_SarN32x2;
+ break;
+ case 3:
+ op = Q ? Iop_QSalN64x2 : Iop_QSalN64x1;
+ op_rev = Q ? Iop_SarN64x2 : Iop_Sar64;
+ break;
+ default:
+ vassert(0);
+ }
+ DIP("vqshl.s%u %c%u, %c%u, #%u\n",
+ 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
+ }
+ if (Q) {
+ tmp = newTemp(Ity_V128);
+ res = newTemp(Ity_V128);
+ reg_m = newTemp(Ity_V128);
+ assign(reg_m, getQReg(mreg));
+ } else {
+ tmp = newTemp(Ity_I64);
+ res = newTemp(Ity_I64);
+ reg_m = newTemp(Ity_I64);
+ assign(reg_m, getDRegI64(mreg));
+ }
+ assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
+#ifndef DISABLE_QC_FLAG
+ assign(tmp, binop(op_rev, mkexpr(res), mkU8(shift_imm)));
+ setFlag_QC(mkexpr(tmp), mkexpr(reg_m), Q, condT);
+#endif
+ if (Q)
+ putQReg(dreg, mkexpr(res), condT);
+ else
+ putDRegI64(dreg, mkexpr(res), condT);
+ return True;
+ case 8:
+ if (!U) {
+ if (L == 1)
+ return False;
+ size++;
+ dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
+ mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
+ if (mreg & 1)
+ return False;
+ mreg >>= 1;
+ if (!B) {
+ /* VSHRN*/
+ IROp narOp;
+ reg_m = newTemp(Ity_V128);
+ assign(reg_m, getQReg(mreg));
+ res = newTemp(Ity_I64);
+ switch (size) {
+ case 1:
+ op = Iop_ShrN16x8;
+ narOp = Iop_Shorten16x8;
+ break;
+ case 2:
+ op = Iop_ShrN32x4;
+ narOp = Iop_Shorten32x4;
+ break;
+ case 3:
+ op = Iop_ShrN64x2;
+ narOp = Iop_Shorten64x2;
+ break;
+ default:
+ vassert(0);
+ }
+ assign(res, unop(narOp,
+ binop(op,
+ mkexpr(reg_m),
+ mkU8(shift_imm))));
+ putDRegI64(dreg, mkexpr(res), condT);
+ DIP("vshrn.i%u d%u, q%u, #%u\n", 8 << size, dreg, mreg,
+ shift_imm);
+ return True;
+ } else {
+ /* VRSHRN */
+ IROp addOp, shOp, narOp;
+ IRExpr *imm_val;
+ reg_m = newTemp(Ity_V128);
+ assign(reg_m, getQReg(mreg));
+ res = newTemp(Ity_I64);
+ imm = 1L;
+ switch (size) {
+ case 0: imm = (imm << 8) | imm; /* fall through */
+ case 1: imm = (imm << 16) | imm; /* fall through */
+ case 2: imm = (imm << 32) | imm; /* fall through */
+ case 3: break;
+ default: vassert(0);
+ }
+ imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
+ switch (size) {
+ case 1:
+ addOp = Iop_Add16x8;
+ shOp = Iop_ShrN16x8;
+ narOp = Iop_Shorten16x8;
+ break;
+ case 2:
+ addOp = Iop_Add32x4;
+ shOp = Iop_ShrN32x4;
+ narOp = Iop_Shorten32x4;
+ break;
+ case 3:
+ addOp = Iop_Add64x2;
+ shOp = Iop_ShrN64x2;
+ narOp = Iop_Shorten64x2;
+ break;
+ default:
+ vassert(0);
+ }
+ assign(res, unop(narOp,
+ binop(addOp,
+ binop(shOp,
+ mkexpr(reg_m),
+ mkU8(shift_imm)),
+ binop(Iop_AndV128,
+ binop(shOp,
+ mkexpr(reg_m),
+ mkU8(shift_imm - 1)),
+ imm_val))));
+ putDRegI64(dreg, mkexpr(res), condT);
+ if (shift_imm == 0) {
+ DIP("vmov%u d%u, q%u, #%u\n", 8 << size, dreg, mreg,
+ shift_imm);
+ } else {
+ DIP("vrshrn.i%u d%u, q%u, #%u\n", 8 << size, dreg, mreg,
+ shift_imm);
+ }
+ return True;
+ }
+ } else {
+ /* fall through */
+ }
+ case 9:
+ dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
+ mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
+ if (mreg & 1)
+ return False;
+ mreg >>= 1;
+ size++;
+ if ((theInstr >> 8) & 1) {
+ switch (size) {
+ case 1:
+ op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
+ cvt = U ? Iop_QShortenU16Ux8 : Iop_QShortenS16Sx8;
+ cvt2 = U ? Iop_Longen8Ux8 : Iop_Longen8Sx8;
+ break;
+ case 2:
+ op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
+ cvt = U ? Iop_QShortenU32Ux4 : Iop_QShortenS32Sx4;
+ cvt2 = U ? Iop_Longen16Ux4 : Iop_Longen16Sx4;
+ break;
+ case 3:
+ op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
+ cvt = U ? Iop_QShortenU64Ux2 : Iop_QShortenS64Sx2;
+ cvt2 = U ? Iop_Longen32Ux2 : Iop_Longen32Sx2;
+ break;
+ default:
+ vassert(0);
+ }
+ DIP("vq%sshrn.%c%u d%u, q%u, #%u\n", B ? "r" : "",
+ U ? 'u' : 's', 8 << size, dreg, mreg, shift_imm);
+ } else {
+ vassert(U);
+ switch (size) {
+ case 1:
+ op = Iop_SarN16x8;
+ cvt = Iop_QShortenU16Sx8;
+ cvt2 = Iop_Longen8Ux8;
+ break;
+ case 2:
+ op = Iop_SarN32x4;
+ cvt = Iop_QShortenU32Sx4;
+ cvt2 = Iop_Longen16Ux4;
+ break;
+ case 3:
+ op = Iop_SarN64x2;
+ cvt = Iop_QShortenU64Sx2;
+ cvt2 = Iop_Longen32Ux2;
+ break;
+ default:
+ vassert(0);
+ }
+ DIP("vq%sshrun.s%u d%u, q%u, #%u\n", B ? "r" : "",
+ 8 << size, dreg, mreg, shift_imm);
+ }
+ if (B) {
+ if (shift_imm > 0) {
+ imm = 1;
+ switch (size) {
+ case 1: imm = (imm << 16) | imm; /* fall through */
+ case 2: imm = (imm << 32) | imm; /* fall through */
+ case 3: break;
+ case 0: default: vassert(0);
+ }
+ switch (size) {
+ case 1: add = Iop_Add16x8; break;
+ case 2: add = Iop_Add32x4; break;
+ case 3: add = Iop_Add64x2; break;
+ case 0: default: vassert(0);
+ }
+ }
+ }
+ reg_m = newTemp(Ity_V128);
+ res = newTemp(Ity_V128);
+ assign(reg_m, getQReg(mreg));
+ if (B) {
+ /* VQRSHRN, VQRSHRUN */
+ assign(res, binop(add,
+ binop(op, mkexpr(reg_m), mkU8(shift_imm)),
+ binop(Iop_AndV128,
+ binop(op,
+ mkexpr(reg_m),
+ mkU8(shift_imm - 1)),
+ mkU128(imm))));
+ } else {
+ /* VQSHRN, VQSHRUN */
+ assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
+ }
+#ifndef DISABLE_QC_FLAG
+ setFlag_QC(unop(cvt2, unop(cvt, mkexpr(res))), mkexpr(res),
+ True, condT);
+#endif
+ putDRegI64(dreg, unop(cvt, mkexpr(res)), condT);
+ return True;
+ case 10:
+ /* VSHLL
+ VMOVL ::= VSHLL #0 */
+ if (B)
+ return False;
+ if (dreg & 1)
+ return False;
+ dreg >>= 1;
+ shift_imm = (8 << size) - shift_imm;
+ res = newTemp(Ity_V128);
+ switch (size) {
+ case 0:
+ op = Iop_ShlN16x8;
+ cvt = U ? Iop_Longen8Ux8 : Iop_Longen8Sx8;
+ break;
+ case 1:
+ op = Iop_ShlN32x4;
+ cvt = U ? Iop_Longen16Ux4 : Iop_Longen16Sx4;
+ break;
+ case 2:
+ op = Iop_ShlN64x2;
+ cvt = U ? Iop_Longen32Ux2 : Iop_Longen32Sx2;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ assign(res, binop(op, unop(cvt, getDRegI64(mreg)), mkU8(shift_imm)));
+ putQReg(dreg, mkexpr(res), condT);
+ if (shift_imm == 0) {
+ DIP("vmovl.%c%u q%u, d%u\n", U ? 'u' : 's', 8 << size,
+ dreg, mreg);
+ } else {
+ DIP("vshll.%c%u q%u, d%u, #%u\n", U ? 'u' : 's', 8 << size,
+ dreg, mreg, shift_imm);
+ }
+ return True;
+ case 14:
+ case 15:
+ /* VCVT floating-point <-> fixed-point */
+ if ((theInstr >> 8) & 1) {
+ if (U) {
+ op = Q ? Iop_F32ToFixed32Ux4_RZ : Iop_F32ToFixed32Ux2_RZ;
+ } else {
+ op = Q ? Iop_F32ToFixed32Sx4_RZ : Iop_F32ToFixed32Sx2_RZ;
+ }
+ DIP("vcvt.%c32.f32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
+ 64 - ((theInstr >> 16) & 0x3f));
+ } else {
+ if (U) {
+ op = Q ? Iop_Fixed32UToF32x4_RN : Iop_Fixed32UToF32x2_RN;
+ } else {
+ op = Q ? Iop_Fixed32SToF32x4_RN : Iop_Fixed32SToF32x2_RN;
+ }
+ DIP("vcvt.f32.%c32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
+ 64 - ((theInstr >> 16) & 0x3f));
+ }
+ if (((theInstr >> 21) & 1) == 0)
+ return False;
+ if (Q) {
+ putQReg(dreg, binop(op, getQReg(mreg),
+ mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
+ } else {
+ putDRegI64(dreg, binop(op, getDRegI64(mreg),
+ mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
+ }
+ return True;
+ default:
+ return False;
+
+ }
+ return False;
+}
+
+/* A7.4.5 Two registers, miscellaneous */
+static
+Bool dis_neon_data_2reg_misc ( UInt theInstr, IRTemp condT )
+{
+ UInt A = (theInstr >> 16) & 3;
+ UInt B = (theInstr >> 6) & 0x1f;
+ UInt Q = (theInstr >> 6) & 1;
+ UInt U = (theInstr >> 24) & 1;
+ UInt size = (theInstr >> 18) & 3;
+ UInt dreg = get_neon_d_regno(theInstr);
+ UInt mreg = get_neon_m_regno(theInstr);
+ UInt F = (theInstr >> 10) & 1;
+ IRTemp arg_d;
+ IRTemp arg_m;
+ IRTemp res;
+ switch (A) {
+ case 0:
+ if (Q) {
+ arg_m = newTemp(Ity_V128);
+ res = newTemp(Ity_V128);
+ assign(arg_m, getQReg(mreg));
+ } else {
+ arg_m = newTemp(Ity_I64);
+ res = newTemp(Ity_I64);
+ assign(arg_m, getDRegI64(mreg));
+ }
+ switch (B >> 1) {
+ case 0: {
+ /* VREV64 */
+ IROp op;
+ switch (size) {
+ case 0:
+ op = Q ? Iop_Reverse64_8x16 : Iop_Reverse64_8x8;
+ break;
+ case 1:
+ op = Q ? Iop_Reverse64_16x8 : Iop_Reverse64_16x4;
+ break;
+ case 2:
+ op = Q ? Iop_Reverse64_32x4 : Iop_Reverse64_32x2;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ assign(res, unop(op, mkexpr(arg_m)));
+ DIP("vrev64.%u %c%u, %c%u\n", 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
+ break;
+ }
+ case 1: {
+ /* VREV32 */
+ IROp op;
+ switch (size) {
+ case 0:
+ op = Q ? Iop_Reverse32_8x16 : Iop_Reverse32_8x8;
+ break;
+ case 1:
+ op = Q ? Iop_Reverse32_16x8 : Iop_Reverse32_16x4;
+ break;
+ case 2:
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ assign(res, unop(op, mkexpr(arg_m)));
+ DIP("vrev32.%u %c%u, %c%u\n", 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
+ break;
+ }
+ case 2: {
+ /* VREV16 */
+ IROp op;
+ switch (size) {
+ case 0:
+ op = Q ? Iop_Reverse16_8x16 : Iop_Reverse16_8x8;
+ break;
+ case 1:
+ case 2:
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ assign(res, unop(op, mkexpr(arg_m)));
+ DIP("vrev16.%u %c%u, %c%u\n", 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
+ break;
+ }
+ case 3:
+ return False;
+ case 4:
+ case 5: {
+ /* VPADDL */
+ IROp op;
+ U = (theInstr >> 7) & 1;
+ if (Q) {
+ switch (size) {
+ case 0: op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16; break;
+ case 1: op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8; break;
+ case 2: op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ } else {
+ switch (size) {
+ case 0: op = U ? Iop_PwAddL8Ux8 : Iop_PwAddL8Sx8; break;
+ case 1: op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4; break;
+ case 2: op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ }
+ assign(res, unop(op, mkexpr(arg_m)));
+ DIP("vpaddl.%c%u %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
+ break;
+ }
+ case 6:
+ case 7:
+ return False;
+ case 8: {
+ /* VCLS */
+ IROp op;
+ switch (size) {
+ case 0: op = Q ? Iop_Cls8Sx16 : Iop_Cls8Sx8; break;
+ case 1: op = Q ? Iop_Cls16Sx8 : Iop_Cls16Sx4; break;
+ case 2: op = Q ? Iop_Cls32Sx4 : Iop_Cls32Sx2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ assign(res, unop(op, mkexpr(arg_m)));
+ DIP("vcls.s%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', mreg);
+ break;
+ }
+ case 9: {
+ /* VCLZ */
+ IROp op;
+ switch (size) {
+ case 0: op = Q ? Iop_Clz8Sx16 : Iop_Clz8Sx8; break;
+ case 1: op = Q ? Iop_Clz16Sx8 : Iop_Clz16Sx4; break;
+ case 2: op = Q ? Iop_Clz32Sx4 : Iop_Clz32Sx2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ assign(res, unop(op, mkexpr(arg_m)));
+ DIP("vclz.i%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', mreg);
+ break;
+ }
+ case 10:
+ /* VCNT */
+ assign(res, unop(Q ? Iop_Cnt8x16 : Iop_Cnt8x8, mkexpr(arg_m)));
+ DIP("vcnt.8 %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
+ mreg);
+ break;
+ case 11:
+ /* VMVN */
+ if (Q)
+ assign(res, unop(Iop_NotV128, mkexpr(arg_m)));
+ else
+ assign(res, unop(Iop_Not64, mkexpr(arg_m)));
+ DIP("vmvn %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
+ mreg);
+ break;
+ case 12:
+ case 13: {
+ /* VPADAL */
+ IROp op, add_op;
+ U = (theInstr >> 7) & 1;
+ if (Q) {
+ switch (size) {
+ case 0:
+ op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16;
+ add_op = Iop_Add16x8;
+ break;
+ case 1:
+ op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8;
+ add_op = Iop_Add32x4;
+ break;
+ case 2:
+ op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4;
+ add_op = Iop_Add64x2;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ } else {
+ switch (size) {
+ case 0:
+ op = U ? Iop_PwAddL8Ux8 : Iop_PwAddL8Sx8;
+ add_op = Iop_Add16x4;
+ break;
+ case 1:
+ op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4;
+ add_op = Iop_Add32x2;
+ break;
+ case 2:
+ op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2;
+ add_op = Iop_Add64;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ }
+ if (Q) {
+ arg_d = newTemp(Ity_V128);
+ assign(arg_d, getQReg(dreg));
+ } else {
+ arg_d = newTemp(Ity_I64);
+ assign(arg_d, getDRegI64(dreg));
+ }
+ assign(res, binop(add_op, unop(op, mkexpr(arg_m)),
+ mkexpr(arg_d)));
+ DIP("vpadal.%c%u %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
+ break;
+ }
+ case 14: {
+ /* VQABS */
+ IROp op_sub, op_qsub, op_cmp;
+ IRTemp mask, tmp;
+ IRExpr *zero1, *zero2;
+ IRExpr *neg, *neg2;
+ if (Q) {
+ zero1 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
+ zero2 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
+ mask = newTemp(Ity_V128);
+ tmp = newTemp(Ity_V128);
+ } else {
+ zero1 = mkU64(0);
+ zero2 = mkU64(0);
+ mask = newTemp(Ity_I64);
+ tmp = newTemp(Ity_I64);
+ }
+ switch (size) {
+ case 0:
+ op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
+ op_qsub = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
+ op_cmp = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
+ break;
+ case 1:
+ op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
+ op_qsub = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
+ op_cmp = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4;
+ break;
+ case 2:
+ op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
+ op_qsub = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
+ op_cmp = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ assign(mask, binop(op_cmp, mkexpr(arg_m), zero1));
+ neg = binop(op_qsub, zero2, mkexpr(arg_m));
+ neg2 = binop(op_sub, zero2, mkexpr(arg_m));
+ assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
+ binop(Q ? Iop_AndV128 : Iop_And64,
+ mkexpr(mask),
+ mkexpr(arg_m)),
+ binop(Q ? Iop_AndV128 : Iop_And64,
+ unop(Q ? Iop_NotV128 : Iop_Not64,
+ mkexpr(mask)),
+ neg)));
+#ifndef DISABLE_QC_FLAG
+ assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
+ binop(Q ? Iop_AndV128 : Iop_And64,
+ mkexpr(mask),
+ mkexpr(arg_m)),
+ binop(Q ? Iop_AndV128 : Iop_And64,
+ unop(Q ? Iop_NotV128 : Iop_Not64,
+ mkexpr(mask)),
+ neg2)));
+ setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
+#endif
+ DIP("vqabs.s%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', mreg);
+ break;
+ }
+ case 15: {
+ /* VQNEG */
+ IROp op, op2;
+ IRExpr *zero;
+ if (Q) {
+ zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
+ } else {
+ zero = mkU64(0);
+ }
+ switch (size) {
+ case 0:
+ op = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
+ op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
+ break;
+ case 1:
+ op = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
+ op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
+ break;
+ case 2:
+ op = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
+ op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ assign(res, binop(op, zero, mkexpr(arg_m)));
+#ifndef DISABLE_QC_FLAG
+ setFlag_QC(mkexpr(res), binop(op2, zero, mkexpr(arg_m)),
+ Q, condT);
+#endif
+ DIP("vqneg.s%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', mreg);
+ break;
+ }
+ default:
+ vassert(0);
+ }
+ if (Q) {
+ putQReg(dreg, mkexpr(res), condT);
+ } else {
+ putDRegI64(dreg, mkexpr(res), condT);
+ }
+ return True;
+ case 1:
+ if (Q) {
+ arg_m = newTemp(Ity_V128);
+ res = newTemp(Ity_V128);
+ assign(arg_m, getQReg(mreg));
+ } else {
+ arg_m = newTemp(Ity_I64);
+ res = newTemp(Ity_I64);
+ assign(arg_m, getDRegI64(mreg));
+ }
+ switch ((B >> 1) & 0x7) {
+ case 0: {
+ /* VCGT #0 */
+ IRExpr *zero;
+ IROp op;
+ if (Q) {
+ zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
+ } else {
+ zero = mkU64(0);
+ }
+ if (F) {
+ switch (size) {
+ case 0: case 1: case 3: return False;
+ case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
+ default: vassert(0);
+ }
+ } else {
+ switch (size) {
+ case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
+ case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
+ case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ }
+ assign(res, binop(op, mkexpr(arg_m), zero));
+ DIP("vcgt.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
+ break;
+ }
+ case 1: {
+ /* VCGE #0 */
+ IROp op;
+ IRExpr *zero;
+ if (Q) {
+ zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
+ } else {
+ zero = mkU64(0);
+ }
+ if (F) {
+ switch (size) {
+ case 0: case 1: case 3: return False;
+ case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
+ default: vassert(0);
+ }
+ assign(res, binop(op, mkexpr(arg_m), zero));
+ } else {
+ switch (size) {
+ case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
+ case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
+ case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
+ binop(op, zero, mkexpr(arg_m))));
+ }
+ DIP("vcge.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
+ break;
+ }
+ case 2: {
+ /* VCEQ #0 */
+ IROp op;
+ IRExpr *zero;
+ if (F) {
+ if (Q) {
+ zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
+ } else {
+ zero = mkU64(0);
+ }
+ switch (size) {
+ case 0: case 1: case 3: return False;
+ case 2: op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2; break;
+ default: vassert(0);
+ }
+ assign(res, binop(op, zero, mkexpr(arg_m)));
+ } else {
+ switch (size) {
+ case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
+ case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
+ case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
+ unop(op, mkexpr(arg_m))));
+ }
+ DIP("vceq.%c%u %c%u, %c%u, #0\n", F ? 'f' : 'i', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
+ break;
+ }
+ case 3: {
+ /* VCLE #0 */
+ IRExpr *zero;
+ IROp op;
+ if (Q) {
+ zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
+ } else {
+ zero = mkU64(0);
+ }
+ if (F) {
+ switch (size) {
+ case 0: case 1: case 3: return False;
+ case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
+ default: vassert(0);
+ }
+ assign(res, binop(op, zero, mkexpr(arg_m)));
+ } else {
+ switch (size) {
+ case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
+ case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
+ case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
+ binop(op, mkexpr(arg_m), zero)));
+ }
+ DIP("vcle.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
+ break;
+ }
+ case 4: {
+ /* VCLT #0 */
+ IROp op;
+ IRExpr *zero;
+ if (Q) {
+ zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
+ } else {
+ zero = mkU64(0);
+ }
+ if (F) {
+ switch (size) {
+ case 0: case 1: case 3: return False;
+ case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
+ default: vassert(0);
+ }
+ assign(res, binop(op, zero, mkexpr(arg_m)));
+ } else {
+ switch (size) {
+ case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
+ case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
+ case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ assign(res, binop(op, zero, mkexpr(arg_m)));
+ }
+ DIP("vclt.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
+ break;
+ }
+ case 5:
+ return False;
+ case 6: {
+ /* VABS */
+ if (!F) {
+ IROp op;
+ switch(size) {
+ case 0: op = Q ? Iop_Abs8x16 : Iop_Abs8x8; break;
+ case 1: op = Q ? Iop_Abs16x8 : Iop_Abs16x4; break;
+ case 2: op = Q ? Iop_Abs32x4 : Iop_Abs32x2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ assign(res, unop(op, mkexpr(arg_m)));
+ } else {
+ assign(res, unop(Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2,
+ mkexpr(arg_m)));
+ }
+ DIP("vabs.%c%u %c%u, %c%u\n",
+ F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', mreg);
+ break;
+ }
+ case 7: {
+ /* VNEG */
+ IROp op;
+ IRExpr *zero;
+ if (F) {
+ switch (size) {
+ case 0: case 1: case 3: return False;
+ case 2: op = Q ? Iop_Neg32Fx4 : Iop_Neg32Fx2; break;
+ default: vassert(0);
+ }
+ assign(res, unop(op, mkexpr(arg_m)));
+ } else {
+ if (Q) {
+ zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
+ } else {
+ zero = mkU64(0);
+ }
+ switch (size) {
+ case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
+ case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
+ case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ assign(res, binop(op, zero, mkexpr(arg_m)));
+ }
+ DIP("vneg.%c%u %c%u, %c%u\n",
+ F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
+ Q ? 'q' : 'd', mreg);
+ break;
+ }
+ default:
+ vassert(0);
+ }
+ if (Q) {
+ putQReg(dreg, mkexpr(res), condT);
+ } else {
+ putDRegI64(dreg, mkexpr(res), condT);
+ }
+ return True;
+ case 2:
+ if ((B >> 1) == 0) {
+ /* VSWP */
+ if (Q) {
+ arg_m = newTemp(Ity_V128);
+ assign(arg_m, getQReg(mreg));
+ putQReg(mreg, getQReg(dreg), condT);
+ putQReg(dreg, mkexpr(arg_m), condT);
+ } else {
+ arg_m = newTemp(Ity_I64);
+ assign(arg_m, getDRegI64(mreg));
+ putDRegI64(mreg, getDRegI64(dreg), condT);
+ putDRegI64(dreg, mkexpr(arg_m), condT);
+ }
+ DIP("vswp %c%u, %c%u\n",
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
+ return True;
+ } else if ((B >> 1) == 1) {
+ /* VTRN */
+ IROp op_lo, op_hi;
+ IRTemp res1, res2;
+ if (Q) {
+ arg_m = newTemp(Ity_V128);
+ arg_d = newTemp(Ity_V128);
+ res1 = newTemp(Ity_V128);
+ res2 = newTemp(Ity_V128);
+ assign(arg_m, getQReg(mreg));
+ assign(arg_d, getQReg(dreg));
+ } else {
+ res1 = newTemp(Ity_I64);
+ res2 = newTemp(Ity_I64);
+ arg_m = newTemp(Ity_I64);
+ arg_d = newTemp(Ity_I64);
+ assign(arg_m, getDRegI64(mreg));
+ assign(arg_d, getDRegI64(dreg));
+ }
+ if (Q) {
+ switch (size) {
+ case 0:
+ op_lo = Iop_InterleaveOddLanes8x16;
+ op_hi = Iop_InterleaveEvenLanes8x16;
+ break;
+ case 1:
+ op_lo = Iop_InterleaveOddLanes16x8;
+ op_hi = Iop_InterleaveEvenLanes16x8;
+ break;
+ case 2:
+ op_lo = Iop_InterleaveOddLanes32x4;
+ op_hi = Iop_InterleaveEvenLanes32x4;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ } else {
+ switch (size) {
+ case 0:
+ op_lo = Iop_InterleaveOddLanes8x8;
+ op_hi = Iop_InterleaveEvenLanes8x8;
+ break;
+ case 1:
+ op_lo = Iop_InterleaveOddLanes16x4;
+ op_hi = Iop_InterleaveEvenLanes16x4;
+ break;
+ case 2:
+ op_lo = Iop_InterleaveLO32x2;
+ op_hi = Iop_InterleaveHI32x2;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ }
+ assign(res1, binop(op_lo, mkexpr(arg_m), mkexpr(arg_d)));
+ assign(res2, binop(op_hi, mkexpr(arg_m), mkexpr(arg_d)));
+ if (Q) {
+ putQReg(dreg, mkexpr(res1), condT);
+ putQReg(mreg, mkexpr(res2), condT);
+ } else {
+ putDRegI64(dreg, mkexpr(res1), condT);
+ putDRegI64(mreg, mkexpr(res2), condT);
+ }
+ DIP("vtrn.%u %c%u, %c%u\n",
+ 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
+ return True;
+ } else if ((B >> 1) == 2) {
+ /* VUZP */
+ IROp op_lo, op_hi;
+ IRTemp res1, res2;
+ if (!Q && size == 2)
+ return False;
+ if (Q) {
+ arg_m = newTemp(Ity_V128);
+ arg_d = newTemp(Ity_V128);
+ res1 = newTemp(Ity_V128);
+ res2 = newTemp(Ity_V128);
+ assign(arg_m, getQReg(mreg));
+ assign(arg_d, getQReg(dreg));
+ } else {
+ res1 = newTemp(Ity_I64);
+ res2 = newTemp(Ity_I64);
+ arg_m = newTemp(Ity_I64);
+ arg_d = newTemp(Ity_I64);
+ assign(arg_m, getDRegI64(mreg));
+ assign(arg_d, getDRegI64(dreg));
+ }
+ switch (size) {
+ case 0:
+ op_lo = Q ? Iop_CatOddLanes8x16 : Iop_CatOddLanes8x8;
+ op_hi = Q ? Iop_CatEvenLanes8x16 : Iop_CatEvenLanes8x8;
+ break;
+ case 1:
+ op_lo = Q ? Iop_CatOddLanes16x8 : Iop_CatOddLanes16x4;
+ op_hi = Q ? Iop_CatEvenLanes16x8 : Iop_CatEvenLanes16x4;
+ break;
+ case 2:
+ op_lo = Iop_CatOddLanes32x4;
+ op_hi = Iop_CatEvenLanes32x4;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ assign(res1, binop(op_lo, mkexpr(arg_m), mkexpr(arg_d)));
+ assign(res2, binop(op_hi, mkexpr(arg_m), mkexpr(arg_d)));
+ if (Q) {
+ putQReg(dreg, mkexpr(res1), condT);
+ putQReg(mreg, mkexpr(res2), condT);
+ } else {
+ putDRegI64(dreg, mkexpr(res1), condT);
+ putDRegI64(mreg, mkexpr(res2), condT);
+ }
+ DIP("vuzp.%u %c%u, %c%u\n",
+ 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
+ return True;
+ } else if ((B >> 1) == 3) {
+ /* VZIP */
+ IROp op_lo, op_hi;
+ IRTemp res1, res2;
+ if (!Q && size == 2)
+ return False;
+ if (Q) {
+ arg_m = newTemp(Ity_V128);
+ arg_d = newTemp(Ity_V128);
+ res1 = newTemp(Ity_V128);
+ res2 = newTemp(Ity_V128);
+ assign(arg_m, getQReg(mreg));
+ assign(arg_d, getQReg(dreg));
+ } else {
+ res1 = newTemp(Ity_I64);
+ res2 = newTemp(Ity_I64);
+ arg_m = newTemp(Ity_I64);
+ arg_d = newTemp(Ity_I64);
+ assign(arg_m, getDRegI64(mreg));
+ assign(arg_d, getDRegI64(dreg));
+ }
+ switch (size) {
+ case 0:
+ op_lo = Q ? Iop_InterleaveHI8x16 : Iop_InterleaveHI8x8;
+ op_hi = Q ? Iop_InterleaveLO8x16 : Iop_InterleaveLO8x8;
+ break;
+ case 1:
+ op_lo = Q ? Iop_InterleaveHI16x8 : Iop_InterleaveHI16x4;
+ op_hi = Q ? Iop_InterleaveLO16x8 : Iop_InterleaveLO16x4;
+ break;
+ case 2:
+ op_lo = Iop_InterleaveHI32x4;
+ op_hi = Iop_InterleaveLO32x4;
+ break;
+ case 3:
+ return False;
+ default:
+ vassert(0);
+ }
+ assign(res1, binop(op_lo, mkexpr(arg_m), mkexpr(arg_d)));
+ assign(res2, binop(op_hi, mkexpr(arg_m), mkexpr(arg_d)));
+ if (Q) {
+ putQReg(dreg, mkexpr(res1), condT);
+ putQReg(mreg, mkexpr(res2), condT);
+ } else {
+ putDRegI64(dreg, mkexpr(res1), condT);
+ putDRegI64(mreg, mkexpr(res2), condT);
+ }
+ DIP("vzip.%u %c%u, %c%u\n",
+ 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
+ return True;
+ } else if (B == 8) {
+ /* VMOVN */
+ IROp op;
+ mreg >>= 1;
+ switch (size) {
+ case 0: op = Iop_Shorten16x8; break;
+ case 1: op = Iop_Shorten32x4; break;
+ case 2: op = Iop_Shorten64x2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ putDRegI64(dreg, unop(op, getQReg(mreg)), condT);
+ DIP("vmovn.i%u d%u, q%u\n", 16 << size, dreg, mreg);
+ return True;
+ } else if (B == 9 || (B >> 1) == 5) {
+ /* VQMOVN, VQMOVUN */
+ IROp op, op2;
+ IRTemp tmp;
+ dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
+ mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
+ if (mreg & 1)
+ return False;
+ mreg >>= 1;
+ switch (size) {
+ case 0: op2 = Iop_Shorten16x8; break;
+ case 1: op2 = Iop_Shorten32x4; break;
+ case 2: op2 = Iop_Shorten64x2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ switch (B & 3) {
+ case 0:
+ vassert(0);
+ case 1:
+ switch (size) {
+ case 0: op = Iop_QShortenU16Sx8; break;
+ case 1: op = Iop_QShortenU32Sx4; break;
+ case 2: op = Iop_QShortenU64Sx2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ DIP("vqmovun.s%u d%u, q%u\n", 16 << size, dreg, mreg);
+ break;
+ case 2:
+ switch (size) {
+ case 0: op = Iop_QShortenS16Sx8; break;
+ case 1: op = Iop_QShortenS32Sx4; break;
+ case 2: op = Iop_QShortenS64Sx2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ DIP("vqmovn.s%u d%u, q%u\n", 16 << size, dreg, mreg);
+ break;
+ case 3:
+ switch (size) {
+ case 0: op = Iop_QShortenU16Ux8; break;
+ case 1: op = Iop_QShortenU32Ux4; break;
+ case 2: op = Iop_QShortenU64Ux2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ DIP("vqmovn.u%u d%u, q%u\n", 16 << size, dreg, mreg);
+ break;
+ default:
+ vassert(0);
+ }
+ res = newTemp(Ity_I64);
+ tmp = newTemp(Ity_I64);
+ assign(res, unop(op, getQReg(mreg)));
+#ifndef DISABLE_QC_FLAG
+ assign(tmp, unop(op2, getQReg(mreg)));
+ setFlag_QC(mkexpr(res), mkexpr(tmp), False, condT);
+#endif
+ putDRegI64(dreg, mkexpr(res), condT);
+ return True;
+ } else if (B == 12) {
+ /* VSHLL (maximum shift) */
+ IROp op, cvt;
+ UInt shift_imm;
+ if (Q)
+ return False;
+ if (dreg & 1)
+ return False;
+ dreg >>= 1;
+ shift_imm = 8 << size;
+ res = newTemp(Ity_V128);
+ switch (size) {
+ case 0: op = Iop_ShlN16x8; cvt = Iop_Longen8Ux8; break;
+ case 1: op = Iop_ShlN32x4; cvt = Iop_Longen16Ux4; break;
+ case 2: op = Iop_ShlN64x2; cvt = Iop_Longen32Ux2; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+ assign(res, binop(op, unop(cvt, getDRegI64(mreg)),
+ mkU8(shift_imm)));
+ putQReg(dreg, mkexpr(res), condT);
+ DIP("vshll.i%u q%u, d%u, #%u\n", 8 << size, dreg, mreg, 8 << size);
+ return True;
+ } else if ((B >> 3) == 3 && (B & 3) == 0) {
+ /* VCVT (half<->single) */
+ /* Half-precision extensions are needed to run this */
+ vassert(0); // ATC
+ if (((theInstr >> 18) & 3) != 1)
+ return False;
+ if ((theInstr >> 8) & 1) {
+ if (dreg & 1)
+ return False;
+ dreg >>= 1;
+ putQReg(dreg, unop(Iop_F16toF32x4, getDRegI64(mreg)),
+ condT);
+ DIP("vcvt.f32.f16 q%u, d%u\n", dreg, mreg);
+ } else {
+ if (mreg & 1)
+ return False;
+ mreg >>= 1;
+ putDRegI64(dreg, unop(Iop_F32toF16x4, getQReg(mreg)),
+ condT);
+ DIP("vcvt.f16.f32 d%u, q%u\n", dreg, mreg);
+ }
+ return True;
+ } else {
+ return False;
+ }
+ vassert(0);
+ return True;
+ case 3:
+ if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,0)) {
+ /* VRECPE */
+ IROp op;
+ F = (theInstr >> 8) & 1;
+ if (size != 2)
+ return False;
+ if (Q) {
+ op = F ? Iop_Recip32Fx4 : Iop_Recip32x4;
+ putQReg(dreg, unop(op, getQReg(mreg)), condT);
+ DIP("vrecpe.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
+ } else {
+ op = F ? Iop_Recip32Fx2 : Iop_Recip32x2;
+ putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
+ DIP("vrecpe.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
+ }
+ return True;
+ } else if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,1)) {
+ /* VRSQRTE */
+ IROp op;
+ F = (B >> 2) & 1;
+ if (size != 2)
+ return False;
+ if (F) {
+ /* fp */
+ op = Q ? Iop_Rsqrte32Fx4 : Iop_Rsqrte32Fx2;
+ } else {
+ /* unsigned int */
+ op = Q ? Iop_Rsqrte32x4 : Iop_Rsqrte32x2;
+ }
+ if (Q) {
+ putQReg(dreg, unop(op, getQReg(mreg)), condT);
+ DIP("vrsqrte.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
+ } else {
+ putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
+ DIP("vrsqrte.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
+ }
+ return True;
+ } else if ((B >> 3) == 3) {
+ /* VCVT (fp<->integer) */
+ IROp op;
+ if (size != 2)
+ return False;
+ switch ((B >> 1) & 3) {
+ case 0:
+ op = Q ? Iop_I32StoFx4 : Iop_I32StoFx2;
+ DIP("vcvt.f32.s32 %c%u, %c%u\n",
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
+ break;
+ case 1:
+ op = Q ? Iop_I32UtoFx4 : Iop_I32UtoFx2;
+ DIP("vcvt.f32.u32 %c%u, %c%u\n",
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
+ break;
+ case 2:
+ op = Q ? Iop_FtoI32Sx4_RZ : Iop_FtoI32Sx2_RZ;
+ DIP("vcvt.s32.f32 %c%u, %c%u\n",
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
+ break;
+ case 3:
+ op = Q ? Iop_FtoI32Ux4_RZ : Iop_FtoI32Ux2_RZ;
+ DIP("vcvt.u32.f32 %c%u, %c%u\n",
+ Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
+ break;
+ default:
+ vassert(0);
+ }
+ if (Q) {
+ putQReg(dreg, unop(op, getQReg(mreg)), condT);
+ } else {
+ putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
+ }
+ return True;
+ } else {
+ return False;
+ }
+ vassert(0);
+ return True;
+ default:
+ vassert(0);
+ }
+ return False;
+}
+
+/* A7.4.6 One register and a modified immediate value */
+static
+void ppNeonImm(UInt imm, UInt cmode, UInt op)
+{
+ int i;
+ switch (cmode) {
+ case 0: case 1: case 8: case 9:
+ vex_printf("0x%x", imm);
+ break;
+ case 2: case 3: case 10: case 11:
+ vex_printf("0x%x00", imm);
+ break;
+ case 4: case 5:
+ vex_printf("0x%x0000", imm);
+ break;
+ case 6: case 7:
+ vex_printf("0x%x000000", imm);
+ break;
+ case 12:
+ vex_printf("0x%xff", imm);
+ break;
+ case 13:
+ vex_printf("0x%xffff", imm);
+ break;
+ case 14:
+ if (op) {
+ vex_printf("0x");
+ for (i = 7; i >= 0; i--)
+ vex_printf("%s", (imm & (1 << i)) ? "ff" : "00");
+ } else {
+ vex_printf("0x%x", imm);
+ }
+ break;
+ case 15:
+ vex_printf("0x%x", imm);
+ break;
+ }
+}
+
+static
+const char *ppNeonImmType(UInt cmode, UInt op)
+{
+ switch (cmode) {
+ case 0 ... 7:
+ case 12: case 13:
+ return "i32";
+ case 8 ... 11:
+ return "i16";
+ case 14:
+ if (op)
+ return "i64";
+ else
+ return "i8";
+ case 15:
+ if (op)
+ vassert(0);
+ else
+ return "f32";
+ default:
+ vassert(0);
+ }
+}
+
+static
+void DIPimm(UInt imm, UInt cmode, UInt op,
+ const char *instr, UInt Q, UInt dreg)
+{
+ if (vex_traceflags & VEX_TRACE_FE) {
+ vex_printf("%s.%s %c%u, #", instr,
+ ppNeonImmType(cmode, op), Q ? 'q' : 'd', dreg);
+ ppNeonImm(imm, cmode, op);
+ vex_printf("\n");
+ }
+}
+
+static
+Bool dis_neon_data_1reg_and_imm ( UInt theInstr, IRTemp condT )
+{
+ UInt dreg = get_neon_d_regno(theInstr);
+ ULong imm_raw = ((theInstr >> 17) & 0x80) | ((theInstr >> 12) & 0x70) |
+ (theInstr & 0xf);
+ ULong imm_raw_pp = imm_raw;
+ UInt cmode = (theInstr >> 8) & 0xf;
+ UInt op_bit = (theInstr >> 5) & 1;
+ ULong imm = 0;
+ UInt Q = (theInstr >> 6) & 1;
+ int i, j;
+ UInt tmp;
+ IRExpr *imm_val;
+ IRExpr *expr;
+ IRTemp tmp_var;
+ switch(cmode) {
+ case 7: case 6:
+ imm_raw = imm_raw << 8;
+ /* fallthrough */
+ case 5: case 4:
+ imm_raw = imm_raw << 8;
+ /* fallthrough */
+ case 3: case 2:
+ imm_raw = imm_raw << 8;
+ /* fallthrough */
+ case 0: case 1:
+ imm = (imm_raw << 32) | imm_raw;
+ break;
+ case 11: case 10:
+ imm_raw = imm_raw << 8;
+ /* fallthrough */
+ case 9: case 8:
+ imm_raw = (imm_raw << 16) | imm_raw;
+ imm = (imm_raw << 32) | imm_raw;
+ break;
+ case 13:
+ imm_raw = (imm_raw << 8) | 0xff;
+ /* fallthrough */
+ case 12:
+ imm_raw = (imm_raw << 8) | 0xff;
+ imm = (imm_raw << 32) | imm_raw;
+ break;
+ case 14:
+ if (! op_bit) {
+ for(i = 0; i < 8; i++) {
+ imm = (imm << 8) | imm_raw;
+ }
+ } else {
+ for(i = 7; i >= 0; i--) {
+ tmp = 0;
+ for(j = 0; j < 8; j++) {
+ tmp = (tmp << 1) | ((imm_raw >> i) & 1);
+ }
+ imm = (imm << 8) | tmp;
+ }
+ }
+ break;
+ case 15:
+ imm = (imm_raw & 0x80) << 5;
+ imm |= ~((imm_raw & 0x40) << 5);
+ for(i = 1; i <= 4; i++)
+ imm |= (imm_raw & 0x40) << i;
+ imm |= (imm_raw & 0x7f);
+ imm = imm << 19;
+ imm = (imm << 32) | imm;
+ break;
+ default:
+ return False;
+ }
+ if (Q) {
+ imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
+ } else {
+ imm_val = mkU64(imm);
+ }
+ if (((op_bit == 0) &&
+ (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 12) == 12))) ||
+ ((op_bit == 1) && (cmode == 14))) {
+ /* VMOV (immediate) */
+ if (Q) {
+ putQReg(dreg, imm_val, condT);
+ } else {
+ putDRegI64(dreg, imm_val, condT);
+ }
+ DIPimm(imm_raw_pp, cmode, op_bit, "vmov", Q, dreg);
+ return True;
+ }
+ if ((op_bit == 1) &&
+ (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 14) == 12))) {
+ /* VMVN (immediate) */
+ if (Q) {
+ putQReg(dreg, unop(Iop_NotV128, imm_val), condT);
+ } else {
+ putDRegI64(dreg, unop(Iop_Not64, imm_val), condT);
+ }
+ DIPimm(imm_raw_pp, cmode, op_bit, "vmvn", Q, dreg);
+ return True;
+ }
+ if (Q) {
+ tmp_var = newTemp(Ity_V128);
+ assign(tmp_var, getQReg(dreg));
+ } else {
+ tmp_var = newTemp(Ity_I64);
+ assign(tmp_var, getDRegI64(dreg));
+ }
+ if ((op_bit == 0) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
+ /* VORR (immediate) */
+ if (Q)
+ expr = binop(Iop_OrV128, mkexpr(tmp_var), imm_val);
+ else
+ expr = binop(Iop_Or64, mkexpr(tmp_var), imm_val);
+ DIPimm(imm_raw_pp, cmode, op_bit, "vorr", Q, dreg);
+ } else if ((op_bit == 1) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
+ /* VBIC (immediate) */
+ if (Q)
+ expr = binop(Iop_AndV128, mkexpr(tmp_var),
+ unop(Iop_NotV128, imm_val));
+ else
+ expr = binop(Iop_And64, mkexpr(tmp_var), unop(Iop_Not64, imm_val));
+ DIPimm(imm_raw_pp, cmode, op_bit, "vbic", Q, dreg);
+ } else {
+ return False;
+ }
+ if (Q)
+ putQReg(dreg, expr, condT);
+ else
+ putDRegI64(dreg, expr, condT);
+ return True;
+}
+
+/* A7.4 Advanced SIMD data-processing instructions */
+static
+Bool dis_neon_data_processing ( UInt theInstr, IRTemp condT )
+{
+ UInt A = (theInstr >> 19) & 0x1F;
+ UInt B = (theInstr >> 8) & 0xF;
+ UInt C = (theInstr >> 4) & 0xF;
+ UInt U = (theInstr >> 24) & 0x1;
+
+ if (! (A & 0x10)) {
+ return dis_neon_data_3same(theInstr, condT);
+ }
+ if (((A & 0x17) == 0x10) && ((C & 0x9) == 0x1)) {
+ return dis_neon_data_1reg_and_imm(theInstr, condT);
+ }
+ if ((C & 1) == 1) {
+ return dis_neon_data_2reg_and_shift(theInstr, condT);
+ }
+ if (((C & 5) == 0) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
+ return dis_neon_data_3diff(theInstr, condT);
+ }
+ if (((C & 5) == 4) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
+ return dis_neon_data_2reg_and_scalar(theInstr, condT);
+ }
+ if ((A & 0x16) == 0x16) {
+ if ((U == 0) && ((C & 1) == 0)) {
+ return dis_neon_vext(theInstr, condT);
+ }
+ if ((U != 1) || ((C & 1) == 1))
+ return False;
+ if ((B & 8) == 0) {
+ return dis_neon_data_2reg_misc(theInstr, condT);
+ }
+ if ((B & 12) == 8) {
+ return dis_neon_vtb(theInstr, condT);
+ }
+ if ((B == 12) && ((C & 9) == 0)) {
+ return dis_neon_vdup(theInstr, condT);
+ }
+ }
+ return False;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- NEON loads and stores ---*/
+/*------------------------------------------------------------*/
+
+/* For NEON memory operations, we use the standard scheme to handle
+ conditionalisation: generate a jump around the instruction if the
+ condition is false. That's only necessary in Thumb mode, however,
+ since in ARM mode NEON instructions are unconditional. */
+
+/* A helper function for what follows. It assumes we already went
+ uncond as per comments at the top of this section. */
+static
+void mk_neon_elem_load_to_one_lane( UInt rD, UInt inc, UInt index,
+ UInt N, UInt size, IRTemp addr )
+{
+ UInt i;
+ switch (size) {
+ case 0:
+ putDRegI64(rD, triop(Iop_SetElem8x8, getDRegI64(rD), mkU8(index),
+ loadLE(Ity_I8, mkexpr(addr))), IRTemp_INVALID);
+ break;
+ case 1:
+ putDRegI64(rD, triop(Iop_SetElem16x4, getDRegI64(rD), mkU8(index),
+ loadLE(Ity_I16, mkexpr(addr))), IRTemp_INVALID);
+ break;
+ case 2:
+ putDRegI64(rD, triop(Iop_SetElem32x2, getDRegI64(rD), mkU8(index),
+ loadLE(Ity_I32, mkexpr(addr))), IRTemp_INVALID);
+ break;
+ default:
+ vassert(0);
+ }
+ for (i = 1; i <= N; i++) {
+ switch (size) {
+ case 0:
+ putDRegI64(rD + i * inc,
+ triop(Iop_SetElem8x8,
+ getDRegI64(rD + i * inc),
+ mkU8(index),
+ loadLE(Ity_I8, binop(Iop_Add32,
+ mkexpr(addr),
+ mkU32(i * 1)))),
+ IRTemp_INVALID);
+ break;
+ case 1:
+ putDRegI64(rD + i * inc,
+ triop(Iop_SetElem16x4,
+ getDRegI64(rD + i * inc),
+ mkU8(index),
+ loadLE(Ity_I16, binop(Iop_Add32,
+ mkexpr(addr),
+ mkU32(i * 2)))),
+ IRTemp_INVALID);
+ break;
+ case 2:
+ putDRegI64(rD + i * inc,
+ triop(Iop_SetElem32x2,
+ getDRegI64(rD + i * inc),
+ mkU8(index),
+ loadLE(Ity_I32, binop(Iop_Add32,
+ mkexpr(addr),
+ mkU32(i * 4)))),
+ IRTemp_INVALID);
+ break;
+ default:
+ vassert(0);
+ }
+ }
+}
+
+/* A(nother) helper function for what follows. It assumes we already
+ went uncond as per comments at the top of this section. */
+static
+void mk_neon_elem_store_from_one_lane( UInt rD, UInt inc, UInt index,
+ UInt N, UInt size, IRTemp addr )
+{
+ UInt i;
+ switch (size) {
+ case 0:
+ storeLE(mkexpr(addr),
+ binop(Iop_GetElem8x8, getDRegI64(rD), mkU8(index)));
+ break;
+ case 1:
+ storeLE(mkexpr(addr),
+ binop(Iop_GetElem16x4, getDRegI64(rD), mkU8(index)));
+ break;
+ case 2:
+ storeLE(mkexpr(addr),
+ binop(Iop_GetElem32x2, getDRegI64(rD), mkU8(index)));
+ break;
+ default:
+ vassert(0);
+ }
+ for (i = 1; i <= N; i++) {
+ switch (size) {
+ case 0:
+ storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 1)),
+ binop(Iop_GetElem8x8, getDRegI64(rD + i * inc),
+ mkU8(index)));
+ break;
+ case 1:
+ storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 2)),
+ binop(Iop_GetElem16x4, getDRegI64(rD + i * inc),
+ mkU8(index)));
+ break;
+ case 2:
+ storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 4)),
+ binop(Iop_GetElem32x2, getDRegI64(rD + i * inc),
+ mkU8(index)));
+ break;
+ default:
+ vassert(0);
+ }
+ }
+}
+
+/* A7.7 Advanced SIMD element or structure load/store instructions */
+static
+Bool dis_neon_elem_or_struct_load ( UInt theInstr,
+ Bool isT, IRTemp condT )
+{
+# define INSN(_bMax,_bMin) SLICE_UInt(theInstr, (_bMax), (_bMin))
+ UInt A = INSN(23,23);
+ UInt B = INSN(11,8);
+ UInt L = INSN(21,21);
+ UInt rD = (INSN(22,22) << 4) | INSN(15,12);
+ UInt rN = INSN(19,16);
+ UInt rM = INSN(3,0);
+ UInt N, size, i, j;
+ UInt inc;
+ UInt regs = 1;
+
+ if (isT) {
+ vassert(condT != IRTemp_INVALID);
+ } else {
+ vassert(condT == IRTemp_INVALID);
+ }
+ /* So now, if condT is not IRTemp_INVALID, we know we're
+ dealing with Thumb code. */
+
+ if (INSN(20,20) != 0)
+ return False;
+
+ IRTemp initialRn = newTemp(Ity_I32);
+ assign(initialRn, isT ? getIRegT(rN) : getIRegA(rN));
+
+ IRTemp initialRm = newTemp(Ity_I32);
+ assign(initialRm, isT ? getIRegT(rM) : getIRegA(rM));
+
+ if (A) {
+ N = B & 3;
+ if ((B >> 2) < 3) {
+ /* VSTn / VLDn (n-element structure from/to one lane) */
+
+ size = B >> 2;
+
+ switch (size) {
+ case 0: i = INSN(7,5); inc = 1; break;
+ case 1: i = INSN(7,6); inc = INSN(5,5) ? 2 : 1; break;
+ case 2: i = INSN(7,7); inc = INSN(6,6) ? 2 : 1; break;
+ case 3: return False;
+ default: vassert(0);
+ }
+
+ IRTemp addr = newTemp(Ity_I32);
+ assign(addr, mkexpr(initialRn));
+
+ // go uncond
+ if (condT != IRTemp_INVALID)
+ mk_skip_over_T32_if_cond_is_false(condT);
+ // now uncond
+
+ if (L)
+ mk_neon_elem_load_to_one_lane(rD, inc, i, N, size, addr);
+ else
+ mk_neon_elem_store_from_one_lane(rD, inc, i, N, size, addr);
+ DIP("v%s%u.%u {", L ? "ld" : "st", N + 1, 8 << size);
+ for (j = 0; j <= N; j++) {
+ if (j)
+ DIP(", ");
+ DIP("d%u[%u]", rD + j * inc, i);
+ }
+ DIP("}, [r%u]", rN);
+ if (rM != 13 && rM != 15) {
+ DIP(", r%u\n", rM);
+ } else {
+ DIP("%s\n", (rM != 15) ? "!" : "");
+ }
+ } else {
+ /* VLDn (single element to all lanes) */
+ UInt r;
+ if (L == 0)
+ return False;
+
+ inc = INSN(5,5) + 1;
+ size = INSN(7,6);
+
+ /* size == 3 and size == 2 cases differ in alignment constraints */
+ if (size == 3 && N == 3 && INSN(4,4) == 1)
+ size = 2;
+
+ if (size == 0 && N == 0 && INSN(4,4) == 1)
+ return False;
+ if (N == 2 && INSN(4,4) == 1)
+ return False;
+ if (size == 3)
+ return False;
+
+ // go uncond
+ if (condT != IRTemp_INVALID)
+ mk_skip_over_T32_if_cond_is_false(condT);
+ // now uncond
+
+ IRTemp addr = newTemp(Ity_I32);
+ assign(addr, mkexpr(initialRn));
+
+ if (N == 0 && INSN(5,5))
+ regs = 2;
+
+ for (r = 0; r < regs; r++) {
+ switch (size) {
+ case 0:
+ putDRegI64(rD + r, unop(Iop_Dup8x8,
+ loadLE(Ity_I8, mkexpr(addr))),
+ IRTemp_INVALID);
+ break;
+ case 1:
+ putDRegI64(rD + r, unop(Iop_Dup16x4,
+ loadLE(Ity_I16, mkexpr(addr))),
+ IRTemp_INVALID);
+ break;
+ case 2:
+ putDRegI64(rD + r, unop(Iop_Dup32x2,
+ loadLE(Ity_I32, mkexpr(addr))),
+ IRTemp_INVALID);
+ break;
+ default:
+ vassert(0);
+ }
+ for (i = 1; i <= N; i++) {
+ switch (size) {
+ case 0:
+ putDRegI64(rD + r + i * inc,
+ unop(Iop_Dup8x8,
+ loadLE(Ity_I8, binop(Iop_Add32,
+ mkexpr(addr),
+ mkU32(i * 1)))),
+ IRTemp_INVALID);
+ break;
+ case 1:
+ putDRegI64(rD + r + i * inc,
+ unop(Iop_Dup16x4,
+ loadLE(Ity_I16, binop(Iop_Add32,
+ mkexpr(addr),
+ mkU32(i * 2)))),
+ IRTemp_INVALID);
+ break;
+ case 2:
+ putDRegI64(rD + r + i * inc,
+ unop(Iop_Dup32x2,
+ loadLE(Ity_I32, binop(Iop_Add32,
+ mkexpr(addr),
+ mkU32(i * 4)))),
+ IRTemp_INVALID);
+ break;
+ default:
+ vassert(0);
+ }
+ }
+ }
+ DIP("vld%u.%u {", N + 1, 8 << size);
+ for (r = 0; r < regs; r++) {
+ for (i = 0; i <= N; i++) {
+ if (i || r)
+ DIP(", ");
+ DIP("d%u[]", rD + r + i * inc);
+ }
+ }
+ DIP("}, [r%u]", rN);
+ if (rM != 13 && rM != 15) {
+ DIP(", r%u\n", rM);
+ } else {
+ DIP("%s\n", (rM != 15) ? "!" : "");
+ }
+ }
+ /* Writeback. We're uncond here, so no condT-ing. */
+ if (rM != 15) {
+ if (rM == 13) {
+ IRExpr* e = binop(Iop_Add32,
+ mkexpr(initialRn),
+ mkU32((1 << size) * (N + 1)));
+ if (isT)
+ putIRegT(rN, e, IRTemp_INVALID);
+ else
+ putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
+ } else {
+ IRExpr* e = binop(Iop_Add32,
+ mkexpr(initialRn),
+ mkexpr(initialRm));
+ if (isT)
+ putIRegT(rN, e, IRTemp_INVALID);
+ else
+ putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
+ }
+ }
+ return True;
+ } else {
+ IRTemp tmp;
+ UInt r, elems;
+ /* VSTn / VLDn (multiple n-element structures) */
+ if (B == BITS4(0,0,1,0) || B == BITS4(0,1,1,0)
+ || B == BITS4(0,1,1,1) || B == BITS4(1,0,1,0)) {
+ N = 0;
+ } else if (B == BITS4(0,0,1,1) || B == BITS4(1,0,0,0)
+ || B == BITS4(1,0,0,1)) {
+ N = 1;
+ } else if (B == BITS4(0,1,0,0) || B == BITS4(0,1,0,1)) {
+ N = 2;
+ } else if (B == BITS4(0,0,0,0) || B == BITS4(0,0,0,1)) {
+ N = 3;
+ } else {
+ return False;
+ }
+ inc = (B & 1) + 1;
+ if (N == 1 && B == BITS4(0,0,1,1)) {
+ regs = 2;
+ } else if (N == 0) {
+ if (B == BITS4(1,0,1,0)) {
+ regs = 2;
+ } else if (B == BITS4(0,1,1,0)) {
+ regs = 3;
+ } else if (B == BITS4(0,0,1,0)) {
+ regs = 4;
+ }
+ }
+
+ size = INSN(7,6);
+ if (N == 0 && size == 3)
+ size = 2;
+ if (size == 3)
+ return False;
+
+ elems = 8 / (1 << size);
+
+ // go uncond
+ if (condT != IRTemp_INVALID)
+ mk_skip_over_T32_if_cond_is_false(condT);
+ // now uncond
+
+ IRTemp addr = newTemp(Ity_I32);
+ assign(addr, mkexpr(initialRn));
+
+ for (r = 0; r < regs; r++) {
+ for (i = 0; i < elems; i++) {
+ if (L)
+ mk_neon_elem_load_to_one_lane(rD + r, inc, i, N, size, addr);
+ else
+ mk_neon_elem_store_from_one_lane(rD + r, inc, i, N, size, addr);
+ tmp = newTemp(Ity_I32);
+ assign(tmp, binop(Iop_Add32, mkexpr(addr),
+ mkU32((1 << size) * (N + 1))));
+ addr = tmp;
+ }
+ }
+ /* Writeback */
+ if (rM != 15) {
+ if (rM == 13) {
+ IRExpr* e = binop(Iop_Add32,
+ mkexpr(initialRn),
+ mkU32(8 * (N + 1) * regs));
+ if (isT)
+ putIRegT(rN, e, IRTemp_INVALID);
+ else
+ putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
+ } else {
+ IRExpr* e = binop(Iop_Add32,
+ mkexpr(initialRn),
+ mkexpr(initialRm));
+ if (isT)
+ putIRegT(rN, e, IRTemp_INVALID);
+ else
+ putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
+ }
+ }
+ DIP("v%s%u.%u {", L ? "ld" : "st", N + 1, 8 << INSN(7,6));
+ if ((inc == 1 && regs * (N + 1) > 1)
+ || (inc == 2 && regs > 1 && N > 0)) {
+ DIP("d%u-d%u", rD, rD + regs * (N + 1) - 1);
+ } else {
+ for (r = 0; r < regs; r++) {
+ for (i = 0; i <= N; i++) {
+ if (i || r)
+ DIP(", ");
+ DIP("d%u", rD + r + i * inc);
+ }
+ }
+ }
+ DIP("}, [r%u]", rN);
+ if (rM != 13 && rM != 15) {
+ DIP(", r%u\n", rM);
+ } else {
+ DIP("%s\n", (rM != 15) ? "!" : "");
+ }
+ return True;
+ }
+# undef INSN
+}
+
+
+/*------------------------------------------------------------*/
+/*--- NEON, top level control ---*/
+/*------------------------------------------------------------*/
+
+/* Both ARM and Thumb */
+
+/* Translate a NEON instruction. If successful, returns
+ True and *dres may or may not be updated. If failure, returns
+ False and doesn't change *dres nor create any IR.
+
+ The Thumb and ARM encodings are similar for the 24 bottom bits, but
+ the top 8 bits are slightly different. In both cases, the caller
+ must pass the entire 32 bits. Callers may pass any instruction;
+ this ignores non-NEON ones.
+
+ Caller must supply an IRTemp 'condT' holding the gating condition,
+ or IRTemp_INVALID indicating the insn is always executed. In ARM
+ code, this must always be IRTemp_INVALID because NEON insns are
+ unconditional for ARM.
+
+ Finally, the caller must indicate whether this occurs in ARM or in
+ Thumb code.
+*/
+static Bool decode_NEON_instruction (
+ /*MOD*/DisResult* dres,
+ UInt insn32,
+ IRTemp condT,
+ Bool isT
+ )
+{
+# define INSN(_bMax,_bMin) SLICE_UInt(insn32, (_bMax), (_bMin))
+
+ /* There are two kinds of instruction to deal with: load/store and
+ data processing. In each case, in ARM mode we merely identify
+ the kind, and pass it on to the relevant sub-handler. In Thumb
+ mode we identify the kind, swizzle the bits around to make it
+ have the same encoding as in ARM, and hand it on to the
+ sub-handler.
+ */
+
+ /* In ARM mode, NEON instructions can't be conditional. */
+ if (!isT)
+ vassert(condT == IRTemp_INVALID);
+
+ /* Data processing:
+ Thumb: 111U 1111 AAAA Axxx xxxx BBBB CCCC xxxx
+ ARM: 1111 001U AAAA Axxx xxxx BBBB CCCC xxxx
+ */
+ if (!isT && INSN(31,25) == BITS7(1,1,1,1,0,0,1)) {
+ // ARM, DP
+ return dis_neon_data_processing(INSN(31,0), condT);
+ }
+ if (isT && INSN(31,29) == BITS3(1,1,1)
+ && INSN(27,24) == BITS4(1,1,1,1)) {
+ // Thumb, DP
+ UInt reformatted = INSN(23,0);
+ reformatted |= (INSN(28,28) << 24); // U bit
+ reformatted |= (BITS7(1,1,1,1,0,0,1) << 25);
+ return dis_neon_data_processing(reformatted, condT);
+ }
+
+ /* Load/store:
+ Thumb: 1111 1001 AxL0 xxxx xxxx BBBB xxxx xxxx
+ ARM: 1111 0100 AxL0 xxxx xxxx BBBB xxxx xxxx
+ */
+ if (!isT && INSN(31,24) == BITS8(1,1,1,1,0,1,0,0)) {
+ // ARM, memory
+ return dis_neon_elem_or_struct_load(INSN(31,0), isT, condT);
+ }
+ if (isT && INSN(31,24) == BITS8(1,1,1,1,1,0,0,1)) {
+ UInt reformatted = INSN(23,0);
+ reformatted |= (BITS8(1,1,1,1,0,1,0,0) << 24);
+ return dis_neon_elem_or_struct_load(reformatted, isT, condT);
+ }
+
+ /* Doesn't match. */
+ return False;
+
+# undef INSN
+}
+
+
+/*------------------------------------------------------------*/
+/*--- V6 MEDIA instructions ---*/
+/*------------------------------------------------------------*/
+
+/* Both ARM and Thumb */
+
+/* Translate a V6 media instruction. If successful, returns
+ True and *dres may or may not be updated. If failure, returns
+ False and doesn't change *dres nor create any IR.
+
+ The Thumb and ARM encodings are completely different. In Thumb
+ mode, the caller must pass the entire 32 bits. In ARM mode it must
+ pass the lower 28 bits. Apart from that, callers may pass any
+ instruction; this function ignores anything it doesn't recognise.
+
+ Caller must supply an IRTemp 'condT' holding the gating condition,
+ or IRTemp_INVALID indicating the insn is always executed.
+
+ Caller must also supply an ARMCondcode 'cond'. This is only used
+ for debug printing, no other purpose. For ARM, this is simply the
+ top 4 bits of the original instruction. For Thumb, the condition
+ is not (really) known until run time, and so ARMCondAL should be
+ passed, only so that printing of these instructions does not show
+ any condition.
+
+ Finally, the caller must indicate whether this occurs in ARM or in
+ Thumb code.
+*/
+static Bool decode_V6MEDIA_instruction (
+ /*MOD*/DisResult* dres,
+ UInt insnv6m,
+ IRTemp condT,
+ ARMCondcode conq,
+ Bool isT
+ )
+{
+# define INSNA(_bMax,_bMin) SLICE_UInt(insnv6m, (_bMax), (_bMin))
+# define INSNT0(_bMax,_bMin) SLICE_UInt( ((insnv6m >> 16) & 0xFFFF), \
+ (_bMax), (_bMin) )
+# define INSNT1(_bMax,_bMin) SLICE_UInt( ((insnv6m >> 0) & 0xFFFF), \
+ (_bMax), (_bMin) )
+ HChar dis_buf[128];
+ dis_buf[0] = 0;
+
+ if (isT) {
+ vassert(conq == ARMCondAL);
+ } else {
+ vassert(INSNA(31,28) == BITS4(0,0,0,0)); // caller's obligation
+ vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
+ }
+
+ /* ----------- smulbb, smulbt, smultb, smultt ----------- */
+ {
+ UInt regD = 99, regM = 99, regN = 99, bitM = 0, bitN = 0;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFB1 && INSNT1(15,12) == BITS4(1,1,1,1)
+ && INSNT1(7,6) == BITS2(0,0)) {
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ regN = INSNT0(3,0);
+ bitM = INSNT1(4,4);
+ bitN = INSNT1(5,5);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (BITS8(0,0,0,1,0,1,1,0) == INSNA(27,20) &&
+ BITS4(0,0,0,0) == INSNA(15,12) &&
+ BITS4(1,0,0,0) == (INSNA(7,4) & BITS4(1,0,0,1)) ) {
+ regD = INSNA(19,16);
+ regM = INSNA(11,8);
+ regN = INSNA(3,0);
+ bitM = INSNA(6,6);
+ bitN = INSNA(5,5);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp srcN = newTemp(Ity_I32);
+ IRTemp srcM = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+
+ assign( srcN, binop(Iop_Sar32,
+ binop(Iop_Shl32,
+ isT ? getIRegT(regN) : getIRegA(regN),
+ mkU8(bitN ? 0 : 16)), mkU8(16)) );
+ assign( srcM, binop(Iop_Sar32,
+ binop(Iop_Shl32,
+ isT ? getIRegT(regM) : getIRegA(regM),
+ mkU8(bitM ? 0 : 16)), mkU8(16)) );
+ assign( res, binop(Iop_Mul32, mkexpr(srcN), mkexpr(srcM)) );
+
+ if (isT)
+ putIRegT( regD, mkexpr(res), condT );
+ else
+ putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
+
+ DIP( "smul%c%c%s r%u, r%u, r%u\n", bitN ? 't' : 'b', bitM ? 't' : 'b',
+ nCC(conq), regD, regN, regM );
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ------------ smulwb<y><c> <Rd>,<Rn>,<Rm> ------------- */
+ /* ------------ smulwt<y><c> <Rd>,<Rn>,<Rm> ------------- */
+ {
+ UInt regD = 99, regN = 99, regM = 99, bitM = 0;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFB3 && INSNT1(15,12) == BITS4(1,1,1,1)
+ && INSNT1(7,5) == BITS3(0,0,0)) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ bitM = INSNT1(4,4);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
+ INSNA(15,12) == BITS4(0,0,0,0) &&
+ (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,1,0)) {
+ regD = INSNA(19,16);
+ regN = INSNA(3,0);
+ regM = INSNA(11,8);
+ bitM = INSNA(6,6);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp irt_prod = newTemp(Ity_I64);
+
+ assign( irt_prod,
+ binop(Iop_MullS32,
+ isT ? getIRegT(regN) : getIRegA(regN),
+ binop(Iop_Sar32,
+ binop(Iop_Shl32,
+ isT ? getIRegT(regM) : getIRegA(regM),
+ mkU8(bitM ? 0 : 16)),
+ mkU8(16))) );
+
+ IRExpr* ire_result = binop(Iop_Or32,
+ binop( Iop_Shl32,
+ unop(Iop_64HIto32, mkexpr(irt_prod)),
+ mkU8(16) ),
+ binop( Iop_Shr32,
+ unop(Iop_64to32, mkexpr(irt_prod)),
+ mkU8(16) ) );
+
+ if (isT)
+ putIRegT( regD, ire_result, condT );
+ else
+ putIRegA( regD, ire_result, condT, Ijk_Boring );
+
+ DIP("smulw%c%s r%u, r%u, r%u\n",
+ bitM ? 't' : 'b', nCC(conq),regD,regN,regM);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ------------ pkhbt<c> Rd, Rn, Rm {,LSL #imm} ------------- */
+ /* ------------ pkhtb<c> Rd, Rn, Rm {,ASR #imm} ------------- */
+ {
+ UInt regD = 99, regN = 99, regM = 99, imm5 = 99, shift_type = 99;
+ Bool tbform = False;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xEAC
+ && INSNT1(15,15) == 0 && INSNT1(4,4) == 0) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ imm5 = (INSNT1(14,12) << 2) | INSNT1(7,6);
+ shift_type = (INSNT1(5,5) << 1) | 0;
+ tbform = (INSNT1(5,5) == 0) ? False : True;
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
+ INSNA(5,4) == BITS2(0,1) &&
+ (INSNA(6,6) == 0 || INSNA(6,6) == 1) ) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ imm5 = INSNA(11,7);
+ shift_type = (INSNA(6,6) << 1) | 0;
+ tbform = (INSNA(6,6) == 0) ? False : True;
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp irt_regM = newTemp(Ity_I32);
+ IRTemp irt_regM_shift = newTemp(Ity_I32);
+ assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
+ compute_result_and_C_after_shift_by_imm5(
+ dis_buf, &irt_regM_shift, NULL, irt_regM, shift_type, imm5, regM );
+
+ UInt mask = (tbform == True) ? 0x0000FFFF : 0xFFFF0000;
+ IRExpr* ire_result
+ = binop( Iop_Or32,
+ binop(Iop_And32, mkexpr(irt_regM_shift), mkU32(mask)),
+ binop(Iop_And32, isT ? getIRegT(regN) : getIRegA(regN),
+ unop(Iop_Not32, mkU32(mask))) );
+
+ if (isT)
+ putIRegT( regD, ire_result, condT );
+ else
+ putIRegA( regD, ire_result, condT, Ijk_Boring );
+
+ DIP( "pkh%s%s r%u, r%u, r%u %s\n", tbform ? "tb" : "bt",
+ nCC(conq), regD, regN, regM, dis_buf );
+
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ---------- usat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
+ {
+ UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,1,0)
+ && INSNT0(4,4) == 0
+ && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
+ regD = INSNT1(11,8);
+ regN = INSNT0(3,0);
+ shift_type = (INSNT0(5,5) << 1) | 0;
+ imm5 = (INSNT1(14,12) << 2) | INSNT1(7,6);
+ sat_imm = INSNT1(4,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN))
+ gate = True;
+ if (shift_type == BITS2(1,0) && imm5 == 0)
+ gate = False;
+ }
+ } else {
+ if (INSNA(27,21) == BITS7(0,1,1,0,1,1,1) &&
+ INSNA(5,4) == BITS2(0,1)) {
+ regD = INSNA(15,12);
+ regN = INSNA(3,0);
+ shift_type = (INSNA(6,6) << 1) | 0;
+ imm5 = INSNA(11,7);
+ sat_imm = INSNA(20,16);
+ if (regD != 15 && regN != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp irt_regN = newTemp(Ity_I32);
+ IRTemp irt_regN_shift = newTemp(Ity_I32);
+ IRTemp irt_sat_Q = newTemp(Ity_I32);
+ IRTemp irt_result = newTemp(Ity_I32);
+
+ assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
+ compute_result_and_C_after_shift_by_imm5(
+ dis_buf, &irt_regN_shift, NULL,
+ irt_regN, shift_type, imm5, regN );
+
+ armUnsignedSatQ( &irt_result, &irt_sat_Q, irt_regN_shift, sat_imm );
+ or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
+
+ if (isT)
+ putIRegT( regD, mkexpr(irt_result), condT );
+ else
+ putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
+
+ DIP("usat%s r%u, #0x%04x, %s\n",
+ nCC(conq), regD, imm5, dis_buf);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ----------- ssat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
+ {
+ UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
+ && INSNT0(4,4) == 0
+ && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
+ regD = INSNT1(11,8);
+ regN = INSNT0(3,0);
+ shift_type = (INSNT0(5,5) << 1) | 0;
+ imm5 = (INSNT1(14,12) << 2) | INSNT1(7,6);
+ sat_imm = INSNT1(4,0) + 1;
+ if (!isBadRegT(regD) && !isBadRegT(regN))
+ gate = True;
+ if (shift_type == BITS2(1,0) && imm5 == 0)
+ gate = False;
+ }
+ } else {
+ if (INSNA(27,21) == BITS7(0,1,1,0,1,0,1) &&
+ INSNA(5,4) == BITS2(0,1)) {
+ regD = INSNA(15,12);
+ regN = INSNA(3,0);
+ shift_type = (INSNA(6,6) << 1) | 0;
+ imm5 = INSNA(11,7);
+ sat_imm = INSNA(20,16) + 1;
+ if (regD != 15 && regN != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp irt_regN = newTemp(Ity_I32);
+ IRTemp irt_regN_shift = newTemp(Ity_I32);
+ IRTemp irt_sat_Q = newTemp(Ity_I32);
+ IRTemp irt_result = newTemp(Ity_I32);
+
+ assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
+ compute_result_and_C_after_shift_by_imm5(
+ dis_buf, &irt_regN_shift, NULL,
+ irt_regN, shift_type, imm5, regN );
+
+ armSignedSatQ( irt_regN_shift, sat_imm, &irt_result, &irt_sat_Q );
+ or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
+
+ if (isT)
+ putIRegT( regD, mkexpr(irt_result), condT );
+ else
+ putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
+
+ DIP( "ssat%s r%u, #0x%04x, %s\n",
+ nCC(conq), regD, imm5, dis_buf);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* -------------- usat16<c> <Rd>,#<imm4>,<Rn> --------------- */
+ {
+ UInt regD = 99, regN = 99, sat_imm = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xF3A && (INSNT1(15,0) & 0xF0F0) == 0x0000) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ sat_imm = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,1,1,1,0) &&
+ INSNA(11,8) == BITS4(1,1,1,1) &&
+ INSNA(7,4) == BITS4(0,0,1,1)) {
+ regD = INSNA(15,12);
+ regN = INSNA(3,0);
+ sat_imm = INSNA(19,16);
+ if (regD != 15 && regN != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp irt_regN = newTemp(Ity_I32);
+ IRTemp irt_regN_lo = newTemp(Ity_I32);
+ IRTemp irt_regN_hi = newTemp(Ity_I32);
+ IRTemp irt_Q_lo = newTemp(Ity_I32);
+ IRTemp irt_Q_hi = newTemp(Ity_I32);
+ IRTemp irt_res_lo = newTemp(Ity_I32);
+ IRTemp irt_res_hi = newTemp(Ity_I32);
+
+ assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( irt_regN_lo, binop( Iop_Sar32,
+ binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
+ mkU8(16)) );
+ assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
+
+ armUnsignedSatQ( &irt_res_lo, &irt_Q_lo, irt_regN_lo, sat_imm );
+ or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
+
+ armUnsignedSatQ( &irt_res_hi, &irt_Q_hi, irt_regN_hi, sat_imm );
+ or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
+
+ IRExpr* ire_result = binop( Iop_Or32,
+ binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)),
+ mkexpr(irt_res_lo) );
+
+ if (isT)
+ putIRegT( regD, ire_result, condT );
+ else
+ putIRegA( regD, ire_result, condT, Ijk_Boring );
+
+ DIP( "usat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* -------------- uadd16<c> <Rd>,<Rn>,<Rm> -------------- */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
+ INSNA(11,8) == BITS4(1,1,1,1) &&
+ INSNA(7,4) == BITS4(0,0,0,1)) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp rNt = newTemp(Ity_I32);
+ IRTemp rMt = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ IRTemp reso = newTemp(Ity_I32);
+
+ assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
+ if (isT)
+ putIRegT( regD, mkexpr(res), condT );
+ else
+ putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
+
+ assign(reso, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
+ set_GE_32_10_from_bits_31_15(reso, condT);
+
+ DIP("uadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* -------------- sadd16<c> <Rd>,<Rn>,<Rm> -------------- */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
+ INSNA(11,8) == BITS4(1,1,1,1) &&
+ INSNA(7,4) == BITS4(0,0,0,1)) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp rNt = newTemp(Ity_I32);
+ IRTemp rMt = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ IRTemp reso = newTemp(Ity_I32);
+
+ assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
+ if (isT)
+ putIRegT( regD, mkexpr(res), condT );
+ else
+ putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
+
+ assign(reso, unop(Iop_Not32,
+ binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt))));
+ set_GE_32_10_from_bits_31_15(reso, condT);
+
+ DIP("sadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ---------------- usub16<c> <Rd>,<Rn>,<Rm> ---------------- */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
+ INSNA(11,8) == BITS4(1,1,1,1) &&
+ INSNA(7,4) == BITS4(0,1,1,1)) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp rNt = newTemp(Ity_I32);
+ IRTemp rMt = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ IRTemp reso = newTemp(Ity_I32);
+
+ assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
+ if (isT)
+ putIRegT( regD, mkexpr(res), condT );
+ else
+ putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
+
+ assign(reso, unop(Iop_Not32,
+ binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt))));
+ set_GE_32_10_from_bits_31_15(reso, condT);
+
+ DIP("usub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* -------------- ssub16<c> <Rd>,<Rn>,<Rm> -------------- */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
+ INSNA(11,8) == BITS4(1,1,1,1) &&
+ INSNA(7,4) == BITS4(0,1,1,1)) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp rNt = newTemp(Ity_I32);
+ IRTemp rMt = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ IRTemp reso = newTemp(Ity_I32);
+
+ assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
+ if (isT)
+ putIRegT( regD, mkexpr(res), condT );
+ else
+ putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
+
+ assign(reso, unop(Iop_Not32,
+ binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt))));
+ set_GE_32_10_from_bits_31_15(reso, condT);
+
+ DIP("ssub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ----------------- uadd8<c> <Rd>,<Rn>,<Rm> ---------------- */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
+ INSNA(11,8) == BITS4(1,1,1,1) &&
+ (INSNA(7,4) == BITS4(1,0,0,1))) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp rNt = newTemp(Ity_I32);
+ IRTemp rMt = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ IRTemp reso = newTemp(Ity_I32);
+
+ assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
+ if (isT)
+ putIRegT( regD, mkexpr(res), condT );
+ else
+ putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
+
+ assign(reso, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
+ set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
+
+ DIP("uadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ------------------- sadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
+ INSNA(11,8) == BITS4(1,1,1,1) &&
+ (INSNA(7,4) == BITS4(1,0,0,1))) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp rNt = newTemp(Ity_I32);
+ IRTemp rMt = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ IRTemp reso = newTemp(Ity_I32);
+
+ assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
+ if (isT)
+ putIRegT( regD, mkexpr(res), condT );
+ else
+ putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
+
+ assign(reso, unop(Iop_Not32,
+ binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt))));
+ set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
+
+ DIP("sadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ------------------- usub8<c> <Rd>,<Rn>,<Rm> ------------------ */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
+ INSNA(11,8) == BITS4(1,1,1,1) &&
+ (INSNA(7,4) == BITS4(1,1,1,1))) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp rNt = newTemp(Ity_I32);
+ IRTemp rMt = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ IRTemp reso = newTemp(Ity_I32);
+
+ assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
+ if (isT)
+ putIRegT( regD, mkexpr(res), condT );
+ else
+ putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
+
+ assign(reso, unop(Iop_Not32,
+ binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt))));
+ set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
+
+ DIP("usub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ------------------- ssub8<c> <Rd>,<Rn>,<Rm> ------------------ */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
+ INSNA(11,8) == BITS4(1,1,1,1) &&
+ INSNA(7,4) == BITS4(1,1,1,1)) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp rNt = newTemp(Ity_I32);
+ IRTemp rMt = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ IRTemp reso = newTemp(Ity_I32);
+
+ assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
+ if (isT)
+ putIRegT( regD, mkexpr(res), condT );
+ else
+ putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
+
+ assign(reso, unop(Iop_Not32,
+ binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt))));
+ set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
+
+ DIP("ssub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ------------------ qadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
+ INSNA(11,8) == BITS4(1,1,1,1) &&
+ INSNA(7,4) == BITS4(1,0,0,1)) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp rNt = newTemp(Ity_I32);
+ IRTemp rMt = newTemp(Ity_I32);
+ IRTemp res_q = newTemp(Ity_I32);
+
+ assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ assign(res_q, binop(Iop_QAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
+ if (isT)
+ putIRegT( regD, mkexpr(res_q), condT );
+ else
+ putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
+
+ DIP("qadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ------------------ qsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
+ INSNA(11,8) == BITS4(1,1,1,1) &&
+ INSNA(7,4) == BITS4(1,1,1,1)) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp rNt = newTemp(Ity_I32);
+ IRTemp rMt = newTemp(Ity_I32);
+ IRTemp res_q = newTemp(Ity_I32);
+
+ assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ assign(res_q, binop(Iop_QSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
+ if (isT)
+ putIRegT( regD, mkexpr(res_q), condT );
+ else
+ putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
+
+ DIP("qsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ------------------ uqadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
+ INSNA(11,8) == BITS4(1,1,1,1) &&
+ (INSNA(7,4) == BITS4(1,0,0,1))) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp rNt = newTemp(Ity_I32);
+ IRTemp rMt = newTemp(Ity_I32);
+ IRTemp res_q = newTemp(Ity_I32);
+
+ assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ assign(res_q, binop(Iop_QAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
+ if (isT)
+ putIRegT( regD, mkexpr(res_q), condT );
+ else
+ putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
+
+ DIP("uqadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ------------------ uqsub8<c> <Rd>,<Rn>,<Rm> ------------------ */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
+ INSNA(11,8) == BITS4(1,1,1,1) &&
+ (INSNA(7,4) == BITS4(1,1,1,1))) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp rNt = newTemp(Ity_I32);
+ IRTemp rMt = newTemp(Ity_I32);
+ IRTemp res_q = newTemp(Ity_I32);
+
+ assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ assign(res_q, binop(Iop_QSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
+ if (isT)
+ putIRegT( regD, mkexpr(res_q), condT );
+ else
+ putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
+
+ DIP("uqsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ----------------- uhadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
+ INSNA(11,8) == BITS4(1,1,1,1) &&
+ INSNA(7,4) == BITS4(1,0,0,1)) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp rNt = newTemp(Ity_I32);
+ IRTemp rMt = newTemp(Ity_I32);
+ IRTemp res_q = newTemp(Ity_I32);
+
+ assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ assign(res_q, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
+ if (isT)
+ putIRegT( regD, mkexpr(res_q), condT );
+ else
+ putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
+
+ DIP("uhadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ----------------- shadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
+ INSNA(11,8) == BITS4(1,1,1,1) &&
+ INSNA(7,4) == BITS4(1,0,0,1)) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp rNt = newTemp(Ity_I32);
+ IRTemp rMt = newTemp(Ity_I32);
+ IRTemp res_q = newTemp(Ity_I32);
+
+ assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ assign(res_q, binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
+ if (isT)
+ putIRegT( regD, mkexpr(res_q), condT );
+ else
+ putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
+
+ DIP("shadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ------------------ qadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
+ INSNA(11,8) == BITS4(1,1,1,1) &&
+ INSNA(7,4) == BITS4(0,0,0,1)) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp rNt = newTemp(Ity_I32);
+ IRTemp rMt = newTemp(Ity_I32);
+ IRTemp res_q = newTemp(Ity_I32);
+
+ assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ assign(res_q, binop(Iop_QAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
+ if (isT)
+ putIRegT( regD, mkexpr(res_q), condT );
+ else
+ putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
+
+ DIP("qadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ------------------ qsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
+ INSNA(11,8) == BITS4(1,1,1,1) &&
+ INSNA(7,4) == BITS4(0,1,1,1)) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp rNt = newTemp(Ity_I32);
+ IRTemp rMt = newTemp(Ity_I32);
+ IRTemp res_q = newTemp(Ity_I32);
+
+ assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ assign(res_q, binop(Iop_QSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
+ if (isT)
+ putIRegT( regD, mkexpr(res_q), condT );
+ else
+ putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
+
+ DIP("qsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /////////////////////////////////////////////////////////////////
+ /////////////////////////////////////////////////////////////////
+ /////////////////////////////////////////////////////////////////
+ /////////////////////////////////////////////////////////////////
+ /////////////////////////////////////////////////////////////////
+
+ /* ------------------- qsax<c> <Rd>,<Rn>,<Rm> ------------------- */
+ /* note: the hardware seems to construct the result differently
+ from wot the manual says. */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
+ INSNA(11,8) == BITS4(1,1,1,1) &&
+ INSNA(7,4) == BITS4(0,1,0,1)) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp irt_regN = newTemp(Ity_I32);
+ IRTemp irt_regM = newTemp(Ity_I32);
+ IRTemp irt_sum = newTemp(Ity_I32);
+ IRTemp irt_diff = newTemp(Ity_I32);
+ IRTemp irt_sum_res = newTemp(Ity_I32);
+ IRTemp irt_diff_res = newTemp(Ity_I32);
+
+ assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ assign( irt_diff,
+ binop( Iop_Sub32,
+ binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
+ binop( Iop_Sar32,
+ binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
+ mkU8(16) ) ) );
+ armSignedSatQ( irt_diff, 0x10, &irt_diff_res, NULL);
+
+ assign( irt_sum,
+ binop( Iop_Add32,
+ binop( Iop_Sar32,
+ binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
+ mkU8(16) ),
+ binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) )) );
+ armSignedSatQ( irt_sum, 0x10, &irt_sum_res, NULL );
+
+ IRExpr* ire_result = binop( Iop_Or32,
+ binop( Iop_Shl32, mkexpr(irt_diff_res),
+ mkU8(16) ),
+ binop( Iop_And32, mkexpr(irt_sum_res),
+ mkU32(0xFFFF)) );
+
+ if (isT)
+ putIRegT( regD, ire_result, condT );
+ else
+ putIRegA( regD, ire_result, condT, Ijk_Boring );
+
+ DIP( "qsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ------------------- qasx<c> <Rd>,<Rn>,<Rm> ------------------- */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
+ INSNA(11,8) == BITS4(1,1,1,1) &&
+ INSNA(7,4) == BITS4(0,0,1,1)) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp irt_regN = newTemp(Ity_I32);
+ IRTemp irt_regM = newTemp(Ity_I32);
+ IRTemp irt_sum = newTemp(Ity_I32);
+ IRTemp irt_diff = newTemp(Ity_I32);
+ IRTemp irt_res_sum = newTemp(Ity_I32);
+ IRTemp irt_res_diff = newTemp(Ity_I32);
+
+ assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ assign( irt_diff,
+ binop( Iop_Sub32,
+ binop( Iop_Sar32,
+ binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
+ mkU8(16) ),
+ binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
+ armSignedSatQ( irt_diff, 0x10, &irt_res_diff, NULL );
+
+ assign( irt_sum,
+ binop( Iop_Add32,
+ binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
+ binop( Iop_Sar32,
+ binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
+ mkU8(16) ) ) );
+ armSignedSatQ( irt_sum, 0x10, &irt_res_sum, NULL );
+
+ IRExpr* ire_result
+ = binop( Iop_Or32,
+ binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
+ binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
+
+ if (isT)
+ putIRegT( regD, ire_result, condT );
+ else
+ putIRegA( regD, ire_result, condT, Ijk_Boring );
+
+ DIP( "qasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ------------------- sasx<c> <Rd>,<Rn>,<Rm> ------------------- */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
+ INSNA(11,8) == BITS4(1,1,1,1) &&
+ INSNA(7,4) == BITS4(0,0,1,1)) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp irt_regN = newTemp(Ity_I32);
+ IRTemp irt_regM = newTemp(Ity_I32);
+ IRTemp irt_sum = newTemp(Ity_I32);
+ IRTemp irt_diff = newTemp(Ity_I32);
+
+ assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ assign( irt_diff,
+ binop( Iop_Sub32,
+ binop( Iop_Sar32,
+ binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
+ mkU8(16) ),
+ binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
+
+ assign( irt_sum,
+ binop( Iop_Add32,
+ binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
+ binop( Iop_Sar32,
+ binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
+ mkU8(16) ) ) );
+
+ IRExpr* ire_result
+ = binop( Iop_Or32,
+ binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
+ binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
+
+ IRTemp ge10 = newTemp(Ity_I32);
+ assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
+ put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
+ put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
+
+ IRTemp ge32 = newTemp(Ity_I32);
+ assign(ge32, unop(Iop_Not32, mkexpr(irt_sum)));
+ put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
+ put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
+
+ if (isT)
+ putIRegT( regD, ire_result, condT );
+ else
+ putIRegA( regD, ire_result, condT, Ijk_Boring );
+
+ DIP( "sasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* --------------- smuad, smuadx<c><Rd>,<Rn>,<Rm> --------------- */
+ /* --------------- smsad, smsadx<c><Rd>,<Rn>,<Rm> --------------- */
+ {
+ UInt regD = 99, regN = 99, regM = 99, bitM = 99;
+ Bool gate = False, isAD = False;
+
+ if (isT) {
+ if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
+ && (INSNT1(15,0) & 0xF0E0) == 0xF000) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ bitM = INSNT1(4,4);
+ isAD = INSNT0(15,4) == 0xFB2;
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
+ INSNA(15,12) == BITS4(1,1,1,1) &&
+ (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1) ) {
+ regD = INSNA(19,16);
+ regN = INSNA(3,0);
+ regM = INSNA(11,8);
+ bitM = INSNA(5,5);
+ isAD = INSNA(6,6) == 0;
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp irt_regN = newTemp(Ity_I32);
+ IRTemp irt_regM = newTemp(Ity_I32);
+ IRTemp irt_prod_lo = newTemp(Ity_I32);
+ IRTemp irt_prod_hi = newTemp(Ity_I32);
+ IRTemp tmpM = newTemp(Ity_I32);
+
+ assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
+
+ assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
+ assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
+
+ assign( irt_prod_lo,
+ binop( Iop_Mul32,
+ binop( Iop_Sar32,
+ binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
+ mkU8(16) ),
+ binop( Iop_Sar32,
+ binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
+ mkU8(16) ) ) );
+ assign( irt_prod_hi, binop(Iop_Mul32,
+ binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)),
+ binop(Iop_Sar32, mkexpr(irt_regM), mkU8(16))) );
+ IRExpr* ire_result
+ = binop( isAD ? Iop_Add32 : Iop_Sub32,
+ mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) );
+
+ if (isT)
+ putIRegT( regD, ire_result, condT );
+ else
+ putIRegA( regD, ire_result, condT, Ijk_Boring );
+
+ if (isAD) {
+ or_into_QFLAG32(
+ signed_overflow_after_Add32( ire_result,
+ irt_prod_lo, irt_prod_hi ),
+ condT
+ );
+ }
+
+ DIP("smu%cd%s%s r%u, r%u, r%u\n",
+ isAD ? 'a' : 's',
+ bitM ? "x" : "", nCC(conq), regD, regN, regM);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* --------------- smlad{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
+ /* --------------- smlsd{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
+ {
+ UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
+ Bool gate = False, isAD = False;
+
+ if (isT) {
+ if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
+ && INSNT1(7,5) == BITS3(0,0,0)) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ regA = INSNT1(15,12);
+ bitM = INSNT1(4,4);
+ isAD = INSNT0(15,4) == 0xFB2;
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
+ && !isBadRegT(regA))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
+ (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
+ regD = INSNA(19,16);
+ regA = INSNA(15,12);
+ regN = INSNA(3,0);
+ regM = INSNA(11,8);
+ bitM = INSNA(5,5);
+ isAD = INSNA(6,6) == 0;
+ if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp irt_regN = newTemp(Ity_I32);
+ IRTemp irt_regM = newTemp(Ity_I32);
+ IRTemp irt_regA = newTemp(Ity_I32);
+ IRTemp irt_prod_lo = newTemp(Ity_I32);
+ IRTemp irt_prod_hi = newTemp(Ity_I32);
+ IRTemp irt_sum = newTemp(Ity_I32);
+ IRTemp tmpM = newTemp(Ity_I32);
+
+ assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
+ assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
+
+ assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
+ assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
+
+ assign( irt_prod_lo,
+ binop(Iop_Mul32,
+ binop(Iop_Sar32,
+ binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
+ mkU8(16)),
+ binop(Iop_Sar32,
+ binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
+ mkU8(16))) );
+ assign( irt_prod_hi,
+ binop( Iop_Mul32,
+ binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
+ binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
+ assign( irt_sum, binop( isAD ? Iop_Add32 : Iop_Sub32,
+ mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) ) );
+
+ IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_sum), mkexpr(irt_regA));
+
+ if (isT)
+ putIRegT( regD, ire_result, condT );
+ else
+ putIRegA( regD, ire_result, condT, Ijk_Boring );
+
+ if (isAD) {
+ or_into_QFLAG32(
+ signed_overflow_after_Add32( mkexpr(irt_sum),
+ irt_prod_lo, irt_prod_hi ),
+ condT
+ );
+ }
+
+ or_into_QFLAG32(
+ signed_overflow_after_Add32( ire_result, irt_sum, irt_regA ),
+ condT
+ );
+
+ DIP("sml%cd%s%s r%u, r%u, r%u, r%u\n",
+ isAD ? 'a' : 's',
+ bitM ? "x" : "", nCC(conq), regD, regN, regM, regA);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ----- smlabb, smlabt, smlatb, smlatt <Rd>,<Rn>,<Rm>,<Ra> ----- */
+ {
+ UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99, bitN = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFB1 && INSNT1(7,6) == BITS2(0,0)) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ regA = INSNT1(15,12);
+ bitM = INSNT1(4,4);
+ bitN = INSNT1(5,5);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
+ && !isBadRegT(regA))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
+ (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
+ regD = INSNA(19,16);
+ regN = INSNA(3,0);
+ regM = INSNA(11,8);
+ regA = INSNA(15,12);
+ bitM = INSNA(6,6);
+ bitN = INSNA(5,5);
+ if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp irt_regA = newTemp(Ity_I32);
+ IRTemp irt_prod = newTemp(Ity_I32);
+
+ assign( irt_prod,
+ binop(Iop_Mul32,
+ binop(Iop_Sar32,
+ binop(Iop_Shl32,
+ isT ? getIRegT(regN) : getIRegA(regN),
+ mkU8(bitN ? 0 : 16)),
+ mkU8(16)),
+ binop(Iop_Sar32,
+ binop(Iop_Shl32,
+ isT ? getIRegT(regM) : getIRegA(regM),
+ mkU8(bitM ? 0 : 16)),
+ mkU8(16))) );
+
+ assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
+
+ IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_prod), mkexpr(irt_regA));
+
+ if (isT)
+ putIRegT( regD, ire_result, condT );
+ else
+ putIRegA( regD, ire_result, condT, Ijk_Boring );
+
+ or_into_QFLAG32(
+ signed_overflow_after_Add32( ire_result, irt_prod, irt_regA ),
+ condT
+ );
+
+ DIP( "smla%c%c%s r%u, r%u, r%u, r%u\n",
+ bitN ? 't' : 'b', bitM ? 't' : 'b',
+ nCC(conq), regD, regN, regM, regA );
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ----- smlawb, smlawt <Rd>,<Rn>,<Rm>,<Ra> ----- */
+ {
+ UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFB3 && INSNT1(7,5) == BITS3(0,0,0)) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ regA = INSNT1(15,12);
+ bitM = INSNT1(4,4);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
+ && !isBadRegT(regA))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
+ (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,0,0)) {
+ regD = INSNA(19,16);
+ regN = INSNA(3,0);
+ regM = INSNA(11,8);
+ regA = INSNA(15,12);
+ bitM = INSNA(6,6);
+ if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp irt_regA = newTemp(Ity_I32);
+ IRTemp irt_prod = newTemp(Ity_I64);
+
+ assign( irt_prod,
+ binop(Iop_MullS32,
+ isT ? getIRegT(regN) : getIRegA(regN),
+ binop(Iop_Sar32,
+ binop(Iop_Shl32,
+ isT ? getIRegT(regM) : getIRegA(regM),
+ mkU8(bitM ? 0 : 16)),
+ mkU8(16))) );
+
+ assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
+
+ IRTemp prod32 = newTemp(Ity_I32);
+ assign(prod32,
+ binop(Iop_Or32,
+ binop(Iop_Shl32, unop(Iop_64HIto32, mkexpr(irt_prod)), mkU8(16)),
+ binop(Iop_Shr32, unop(Iop_64to32, mkexpr(irt_prod)), mkU8(16))
+ ));
+
+ IRExpr* ire_result = binop(Iop_Add32, mkexpr(prod32), mkexpr(irt_regA));
+
+ if (isT)
+ putIRegT( regD, ire_result, condT );
+ else
+ putIRegA( regD, ire_result, condT, Ijk_Boring );
+
+ or_into_QFLAG32(
+ signed_overflow_after_Add32( ire_result, prod32, irt_regA ),
+ condT
+ );
+
+ DIP( "smlaw%c%s r%u, r%u, r%u, r%u\n",
+ bitM ? 't' : 'b',
+ nCC(conq), regD, regN, regM, regA );
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ------------------- sel<c> <Rd>,<Rn>,<Rm> -------------------- */
+ /* fixme: fix up the test in v6media.c so that we can pass the ge
+ flags as part of the test. */
+ {
+ UInt regD = 99, regN = 99, regM = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
+ INSNA(11,8) == BITS4(1,1,1,1) &&
+ INSNA(7,4) == BITS4(1,0,1,1)) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp irt_ge_flag0 = newTemp(Ity_I32);
+ IRTemp irt_ge_flag1 = newTemp(Ity_I32);
+ IRTemp irt_ge_flag2 = newTemp(Ity_I32);
+ IRTemp irt_ge_flag3 = newTemp(Ity_I32);
+
+ assign( irt_ge_flag0, get_GEFLAG32(0) );
+ assign( irt_ge_flag1, get_GEFLAG32(1) );
+ assign( irt_ge_flag2, get_GEFLAG32(2) );
+ assign( irt_ge_flag3, get_GEFLAG32(3) );
+
+ IRExpr* ire_ge_flag0_or
+ = binop(Iop_Or32, mkexpr(irt_ge_flag0),
+ binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag0)));
+ IRExpr* ire_ge_flag1_or
+ = binop(Iop_Or32, mkexpr(irt_ge_flag1),
+ binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag1)));
+ IRExpr* ire_ge_flag2_or
+ = binop(Iop_Or32, mkexpr(irt_ge_flag2),
+ binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag2)));
+ IRExpr* ire_ge_flag3_or
+ = binop(Iop_Or32, mkexpr(irt_ge_flag3),
+ binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag3)));
+
+ IRExpr* ire_ge_flags
+ = binop( Iop_Or32,
+ binop(Iop_Or32,
+ binop(Iop_And32,
+ binop(Iop_Sar32, ire_ge_flag0_or, mkU8(31)),
+ mkU32(0x000000ff)),
+ binop(Iop_And32,
+ binop(Iop_Sar32, ire_ge_flag1_or, mkU8(31)),
+ mkU32(0x0000ff00))),
+ binop(Iop_Or32,
+ binop(Iop_And32,
+ binop(Iop_Sar32, ire_ge_flag2_or, mkU8(31)),
+ mkU32(0x00ff0000)),
+ binop(Iop_And32,
+ binop(Iop_Sar32, ire_ge_flag3_or, mkU8(31)),
+ mkU32(0xff000000))) );
+
+ IRExpr* ire_result
+ = binop(Iop_Or32,
+ binop(Iop_And32,
+ isT ? getIRegT(regN) : getIRegA(regN),
+ ire_ge_flags ),
+ binop(Iop_And32,
+ isT ? getIRegT(regM) : getIRegA(regM),
+ unop(Iop_Not32, ire_ge_flags)));
+
+ if (isT)
+ putIRegT( regD, ire_result, condT );
+ else
+ putIRegA( regD, ire_result, condT, Ijk_Boring );
+
+ DIP("sel%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ----------------- uxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
+ {
+ UInt regD = 99, regN = 99, regM = 99, rotate = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFA3 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
+ regN = INSNT0(3,0);
+ regD = INSNT1(11,8);
+ regM = INSNT1(3,0);
+ rotate = INSNT1(5,4);
+ if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,0,1,1,0,0) &&
+ INSNA(9,4) == BITS6(0,0,0,1,1,1) ) {
+ regD = INSNA(15,12);
+ regN = INSNA(19,16);
+ regM = INSNA(3,0);
+ rotate = INSNA(11,10);
+ if (regD != 15 && regN != 15 && regM != 15)
+ gate = True;
+ }
+ }
+
+ if (gate) {
+ IRTemp irt_regN = newTemp(Ity_I32);
+ assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
+
+ IRTemp irt_regM = newTemp(Ity_I32);
+ assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
+
+ IRTemp irt_rot = newTemp(Ity_I32);
+ assign( irt_rot, binop(Iop_And32,
+ genROR32(irt_regM, 8 * rotate),
+ mkU32(0x00FF00FF)) );
+
+ IRExpr* resLo
+ = binop(Iop_And32,
+ binop(Iop_Add32, mkexpr(irt_regN), mkexpr(irt_rot)),
+ mkU32(0x0000FFFF));
+
+ IRExpr* resHi
+ = binop(Iop_Add32,
+ binop(Iop_And32, mkexpr(irt_regN), mkU32(0xFFFF0000)),
+ binop(Iop_And32, mkexpr(irt_rot), mkU32(0xFFFF0000)));
+
+ IRExpr* ire_result
+ = binop( Iop_Or32, resHi, resLo );
+
+ if (isT)
+ putIRegT( regD, ire_result, condT );
+ else
+ putIRegA( regD, ire_result, condT, Ijk_Boring );
+
+ DIP( "uxtab16%s r%u, r%u, r%u, ROR #%u\n",
+ nCC(conq), regD, regN, regM, 8 * rotate );
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* --------------- usad8 Rd,Rn,Rm ---------------- */
+ /* --------------- usada8 Rd,Rn,Rm,Ra ---------------- */
+ {
+ UInt rD = 99, rN = 99, rM = 99, rA = 99;
+ Bool gate = False;
+
+ if (isT) {
+ if (INSNT0(15,4) == 0xFB7 && INSNT1(7,4) == BITS4(0,0,0,0)) {
+ rN = INSNT0(3,0);
+ rA = INSNT1(15,12);
+ rD = INSNT1(11,8);
+ rM = INSNT1(3,0);
+ if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && rA != 13)
+ gate = True;
+ }
+ } else {
+ if (INSNA(27,20) == BITS8(0,1,1,1,1,0,0,0) &&
+ INSNA(7,4) == BITS4(0,0,0,1) ) {
+ rD = INSNA(19,16);
+ rA = INSNA(15,12);
+ rM = INSNA(11,8);
+ rN = INSNA(3,0);
+ if (rD != 15 && rN != 15 && rM != 15 /* but rA can be 15 */)
+ gate = True;
+ }
+ }
+ /* We allow rA == 15, to denote the usad8 (no accumulator) case. */
+
+ if (gate) {
+ IRExpr* rNe = isT ? getIRegT(rN) : getIRegA(rN);
+ IRExpr* rMe = isT ? getIRegT(rM) : getIRegA(rM);
+ IRExpr* rAe = rA == 15 ? mkU32(0)
+ : (isT ? getIRegT(rA) : getIRegA(rA));
+ IRExpr* res = binop(Iop_Add32,
+ binop(Iop_Sad8Ux4, rNe, rMe),
+ rAe);
+ if (isT)
+ putIRegT( rD, res, condT );
+ else
+ putIRegA( rD, res, condT, Ijk_Boring );
+
+ if (rA == 15) {
+ DIP( "usad8%s r%u, r%u, r%u\n",
+ nCC(conq), rD, rN, rM );
+ } else {
+ DIP( "usada8%s r%u, r%u, r%u, r%u\n",
+ nCC(conq), rD, rN, rM, rA );
+ }
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ---------- Doesn't match anything. ---------- */
+ return False;
+
+# undef INSNA
+# undef INSNT0
+# undef INSNT1
+}
+
+
+/*------------------------------------------------------------*/
+/*--- LDMxx/STMxx helper (both ARM and Thumb32) ---*/
+/*------------------------------------------------------------*/
+
+/* Generate IR for LDMxx and STMxx. This is complex. Assumes it's
+ unconditional, so the caller must produce a jump-around before
+ calling this, if the insn is to be conditional. Caller is
+ responsible for all validation of parameters. For LDMxx, if PC is
+ amongst the values loaded, caller is also responsible for
+ generating the jump. */
+static void mk_ldm_stm ( Bool arm, /* True: ARM, False: Thumb */
+ UInt rN, /* base reg */
+ UInt bINC, /* 1: inc, 0: dec */
+ UInt bBEFORE, /* 1: inc/dec before, 0: after */
+ UInt bW, /* 1: writeback to Rn */
+ UInt bL, /* 1: load, 0: store */
+ UInt regList )
+{
+ Int i, r, m, nRegs;
+
+ /* Get hold of the old Rn value. We might need to write its value
+ to memory during a store, and if it's also the writeback
+ register then we need to get its value now. We can't treat it
+ exactly like the other registers we're going to transfer,
+ because for xxMDA and xxMDB writeback forms, the generated IR
+ updates Rn in the guest state before any transfers take place.
+ We have to do this as per comments below, in order that if Rn is
+ the stack pointer then it always has a value is below or equal
+ to any of the transfer addresses. Ick. */
+ IRTemp oldRnT = newTemp(Ity_I32);
+ assign(oldRnT, arm ? getIRegA(rN) : getIRegT(rN));
+
+ IRTemp anchorT = newTemp(Ity_I32);
+ /* The old (Addison-Wesley) ARM ARM seems to say that LDMxx/STMxx
+ ignore the bottom two bits of the address. However, Cortex-A8
+ doesn't seem to care. Hence: */
+ /* No .. don't force alignment .. */
+ /* assign(anchorT, binop(Iop_And32, mkexpr(oldRnT), mkU32(~3U))); */
+ /* Instead, use the potentially misaligned address directly. */
+ assign(anchorT, mkexpr(oldRnT));
+
+ IROp opADDorSUB = bINC ? Iop_Add32 : Iop_Sub32;
+ // bINC == 1: xxMIA, xxMIB
+ // bINC == 0: xxMDA, xxMDB
+
+ // For xxMDA and xxMDB, update Rn first if necessary. We have
+ // to do this first so that, for the common idiom of the transfers
+ // faulting because we're pushing stuff onto a stack and the stack
+ // is growing down onto allocate-on-fault pages (as Valgrind simulates),
+ // we need to have the SP up-to-date "covering" (pointing below) the
+ // transfer area. For the same reason, if we are doing xxMIA or xxMIB,
+ // do the transfer first, and then update rN afterwards.
+ nRegs = 0;
+ for (i = 0; i < 16; i++) {
+ if ((regList & (1 << i)) != 0)
+ nRegs++;
+ }
+ if (bW == 1 && !bINC) {
+ IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
+ if (arm)
+ putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
+ else
+ putIRegT( rN, e, IRTemp_INVALID );
+ }
+
+ // Make up a list of the registers to transfer, and their offsets
+ // in memory relative to the anchor. If the base reg (Rn) is part
+ // of the transfer, then do it last for a load and first for a store.
+ UInt xReg[16], xOff[16];
+ Int nX = 0;
+ m = 0;
+ for (i = 0; i < 16; i++) {
+ r = bINC ? i : (15-i);
+ if (0 == (regList & (1<<r)))
+ continue;
+ if (bBEFORE)
+ m++;
+ /* paranoia: check we aren't transferring the writeback
+ register during a load. Should be assured by decode-point
+ check above. */
+ if (bW == 1 && bL == 1)
+ vassert(r != rN);
+
+ xOff[nX] = 4 * m;
+ xReg[nX] = r;
+ nX++;
+
+ if (!bBEFORE)
+ m++;
+ }
+ vassert(m == nRegs);
+ vassert(nX == nRegs);
+ vassert(nX <= 16);
+
+ if (bW == 0 && (regList & (1<<rN)) != 0) {
+ /* Non-writeback, and basereg is to be transferred. Do its
+ transfer last for a load and first for a store. Requires
+ reordering xOff/xReg. */
+ if (0) {
+ vex_printf("\nREG_LIST_PRE: (rN=%d)\n", rN);
+ for (i = 0; i < nX; i++)
+ vex_printf("reg %d off %d\n", xReg[i], xOff[i]);
+ vex_printf("\n");
+ }
+
+ vassert(nX > 0);
+ for (i = 0; i < nX; i++) {
+ if (xReg[i] == rN)
+ break;
+ }
+ vassert(i < nX); /* else we didn't find it! */
+ UInt tReg = xReg[i];
+ UInt tOff = xOff[i];
+ if (bL == 1) {
+ /* load; make this transfer happen last */
+ if (i < nX-1) {
+ for (m = i+1; m < nX; m++) {
+ xReg[m-1] = xReg[m];
+ xOff[m-1] = xOff[m];
+ }
+ vassert(m == nX);
+ xReg[m-1] = tReg;
+ xOff[m-1] = tOff;
+ }
+ } else {
+ /* store; make this transfer happen first */
+ if (i > 0) {
+ for (m = i-1; m >= 0; m--) {
+ xReg[m+1] = xReg[m];
+ xOff[m+1] = xOff[m];
+ }
+ vassert(m == -1);
+ xReg[0] = tReg;
+ xOff[0] = tOff;
+ }
+ }
+
+ if (0) {
+ vex_printf("REG_LIST_POST:\n");
+ for (i = 0; i < nX; i++)
+ vex_printf("reg %d off %d\n", xReg[i], xOff[i]);
+ vex_printf("\n");
+ }
+ }
+
+ /* Actually generate the transfers */
+ for (i = 0; i < nX; i++) {
+ r = xReg[i];
+ if (bL == 1) {
+ IRExpr* e = loadLE(Ity_I32,
+ binop(opADDorSUB, mkexpr(anchorT),
+ mkU32(xOff[i])));
+ if (arm) {
+ putIRegA( r, e, IRTemp_INVALID, Ijk_Ret );
+ } else {
+ // no: putIRegT( r, e, IRTemp_INVALID );
+ // putIRegT refuses to write to R15. But that might happen.
+ // Since this is uncond, and we need to be able to
+ // write the PC, just use the low level put:
+ llPutIReg( r, e );
+ }
+ } else {
+ /* if we're storing Rn, make sure we use the correct
+ value, as per extensive comments above */
+ storeLE( binop(opADDorSUB, mkexpr(anchorT), mkU32(xOff[i])),
+ r == rN ? mkexpr(oldRnT)
+ : (arm ? getIRegA(r) : getIRegT(r) ) );
+ }
+ }
+
+ // If we are doing xxMIA or xxMIB,
+ // do the transfer first, and then update rN afterwards.
+ if (bW == 1 && bINC) {
+ IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
+ if (arm)
+ putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
+ else
+ putIRegT( rN, e, IRTemp_INVALID );
+ }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- VFP (CP 10 and 11) instructions ---*/
+/*------------------------------------------------------------*/
+
+/* Both ARM and Thumb */
+
+/* Translate a CP10 or CP11 instruction. If successful, returns
+ True and *dres may or may not be updated. If failure, returns
+ False and doesn't change *dres nor create any IR.
+
+ The ARM and Thumb encodings are identical for the low 28 bits of
+ the insn (yay!) and that's what the caller must supply, iow, imm28
+ has the top 4 bits masked out. Caller is responsible for
+ determining whether the masked-out bits are valid for a CP10/11
+ insn. The rules for the top 4 bits are:
+
+ ARM: 0000 to 1110 allowed, and this is the gating condition.
+ 1111 (NV) is not allowed.
+
+ Thumb: must be 1110. The gating condition is taken from
+ ITSTATE in the normal way.
+
+ Conditionalisation:
+
+ Caller must supply an IRTemp 'condT' holding the gating condition,
+ or IRTemp_INVALID indicating the insn is always executed.
+
+ Caller must also supply an ARMCondcode 'cond'. This is only used
+ for debug printing, no other purpose. For ARM, this is simply the
+ top 4 bits of the original instruction. For Thumb, the condition
+ is not (really) known until run time, and so ARMCondAL should be
+ passed, only so that printing of these instructions does not show
+ any condition.
+
+ Finally, the caller must indicate whether this occurs in ARM or
+ Thumb code.
+*/
+static Bool decode_CP10_CP11_instruction (
+ /*MOD*/DisResult* dres,
+ UInt insn28,
+ IRTemp condT,
+ ARMCondcode conq,
+ Bool isT
+ )
+{
+# define INSN(_bMax,_bMin) SLICE_UInt(insn28, (_bMax), (_bMin))
+
+ vassert(INSN(31,28) == BITS4(0,0,0,0)); // caller's obligation
+
+ if (isT) {
+ vassert(conq == ARMCondAL);
+ } else {
+ vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
+ }
+
+ /* ----------------------------------------------------------- */
+ /* -- VFP instructions -- double precision (mostly) -- */
+ /* ----------------------------------------------------------- */
+
+ /* --------------------- fldmx, fstmx --------------------- */
+ /*
+ 31 27 23 19 15 11 7 0
+ P U WL
+ C4-100, C5-26 1 FSTMX cond 1100 1000 Rn Dd 1011 offset
+ C4-100, C5-28 2 FSTMIAX cond 1100 1010 Rn Dd 1011 offset
+ C4-100, C5-30 3 FSTMDBX cond 1101 0010 Rn Dd 1011 offset
+
+ C4-42, C5-26 1 FLDMX cond 1100 1001 Rn Dd 1011 offset
+ C4-42, C5-28 2 FLDMIAX cond 1100 1011 Rn Dd 1011 offset
+ C4-42, C5-30 3 FLDMDBX cond 1101 0011 Rn Dd 1011 offset
+
+ Regs transferred: Dd .. D(d + (offset-3)/2)
+ offset must be odd, must not imply a reg > 15
+ IA/DB: Rn is changed by (4 + 8 x # regs transferred)
+
+ case coding:
+ 1 at-Rn (access at Rn)
+ 2 ia-Rn (access at Rn, then Rn += 4+8n)
+ 3 db-Rn (Rn -= 4+8n, then access at Rn)
+ */
+ if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
+ && INSN(11,8) == BITS4(1,0,1,1)) {
+ UInt bP = (insn28 >> 24) & 1;
+ UInt bU = (insn28 >> 23) & 1;
+ UInt bW = (insn28 >> 21) & 1;
+ UInt bL = (insn28 >> 20) & 1;
+ UInt offset = (insn28 >> 0) & 0xFF;
+ UInt rN = INSN(19,16);
+ UInt dD = (INSN(22,22) << 4) | INSN(15,12);
+ UInt nRegs = (offset - 1) / 2;
+ UInt summary = 0;
+ Int i;
+
+ /**/ if (bP == 0 && bU == 1 && bW == 0) {
+ summary = 1;
+ }
+ else if (bP == 0 && bU == 1 && bW == 1) {
+ summary = 2;
+ }
+ else if (bP == 1 && bU == 0 && bW == 1) {
+ summary = 3;
+ }
+ else goto after_vfp_fldmx_fstmx;
+
+ /* no writebacks to r15 allowed. No use of r15 in thumb mode. */
+ if (rN == 15 && (summary == 2 || summary == 3 || isT))
+ goto after_vfp_fldmx_fstmx;
+
+ /* offset must be odd, and specify at least one register */
+ if (0 == (offset & 1) || offset < 3)
+ goto after_vfp_fldmx_fstmx;
+
+ /* can't transfer regs after D15 */
+ if (dD + nRegs - 1 >= 32)
+ goto after_vfp_fldmx_fstmx;
+
+ /* Now, we can't do a conditional load or store, since that very
+ likely will generate an exception. So we have to take a side
+ exit at this point if the condition is false. */
+ if (condT != IRTemp_INVALID) {
+ if (isT)
+ mk_skip_over_T32_if_cond_is_false( condT );
+ else
+ mk_skip_over_A32_if_cond_is_false( condT );
+ condT = IRTemp_INVALID;
+ }
+ /* Ok, now we're unconditional. Do the load or store. */
+
+ /* get the old Rn value */
+ IRTemp rnT = newTemp(Ity_I32);
+ assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
+ rN == 15));
+
+ /* make a new value for Rn, post-insn */
+ IRTemp rnTnew = IRTemp_INVALID;
+ if (summary == 2 || summary == 3) {
+ rnTnew = newTemp(Ity_I32);
+ assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
+ mkexpr(rnT),
+ mkU32(4 + 8 * nRegs)));
+ }
+
+ /* decide on the base transfer address */
+ IRTemp taT = newTemp(Ity_I32);
+ assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
+
+ /* update Rn if necessary -- in case 3, we're moving it down, so
+ update before any memory reference, in order to keep Memcheck
+ and V's stack-extending logic (on linux) happy */
+ if (summary == 3) {
+ if (isT)
+ putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
+ else
+ putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
+ }
+
+ /* generate the transfers */
+ for (i = 0; i < nRegs; i++) {
+ IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
+ if (bL) {
+ putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
+ } else {
+ storeLE(addr, getDReg(dD + i));
+ }
+ }
+
+ /* update Rn if necessary -- in case 2, we're moving it up, so
+ update after any memory reference, in order to keep Memcheck
+ and V's stack-extending logic (on linux) happy */
+ if (summary == 2) {
+ if (isT)
+ putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
+ else
+ putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
+ }
+
+ HChar* nm = bL==1 ? "ld" : "st";
+ switch (summary) {
+ case 1: DIP("f%smx%s r%u, {d%u-d%u}\n",
+ nm, nCC(conq), rN, dD, dD + nRegs - 1);
+ break;
+ case 2: DIP("f%smiax%s r%u!, {d%u-d%u}\n",
+ nm, nCC(conq), rN, dD, dD + nRegs - 1);
+ break;
+ case 3: DIP("f%smdbx%s r%u!, {d%u-d%u}\n",
+ nm, nCC(conq), rN, dD, dD + nRegs - 1);
+ break;
+ default: vassert(0);
+ }
+
+ goto decode_success_vfp;
+ /* FIXME alignment constraints? */
+ }
+
+ after_vfp_fldmx_fstmx:
+
+ /* --------------------- fldmd, fstmd --------------------- */
+ /*
+ 31 27 23 19 15 11 7 0
+ P U WL
+ C4-96, C5-26 1 FSTMD cond 1100 1000 Rn Dd 1011 offset
+ C4-96, C5-28 2 FSTMDIA cond 1100 1010 Rn Dd 1011 offset
+ C4-96, C5-30 3 FSTMDDB cond 1101 0010 Rn Dd 1011 offset
+
+ C4-38, C5-26 1 FLDMD cond 1100 1001 Rn Dd 1011 offset
+ C4-38, C5-28 2 FLDMIAD cond 1100 1011 Rn Dd 1011 offset
+ C4-38, C5-30 3 FLDMDBD cond 1101 0011 Rn Dd 1011 offset
+
+ Regs transferred: Dd .. D(d + (offset-2)/2)
+ offset must be even, must not imply a reg > 15
+ IA/DB: Rn is changed by (8 x # regs transferred)
+
+ case coding:
+ 1 at-Rn (access at Rn)
+ 2 ia-Rn (access at Rn, then Rn += 8n)
+ 3 db-Rn (Rn -= 8n, then access at Rn)
+ */
+ if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
+ && INSN(11,8) == BITS4(1,0,1,1)) {
+ UInt bP = (insn28 >> 24) & 1;
+ UInt bU = (insn28 >> 23) & 1;
+ UInt bW = (insn28 >> 21) & 1;
+ UInt bL = (insn28 >> 20) & 1;
+ UInt offset = (insn28 >> 0) & 0xFF;
+ UInt rN = INSN(19,16);
+ UInt dD = (INSN(22,22) << 4) | INSN(15,12);
+ UInt nRegs = offset / 2;
+ UInt summary = 0;
+ Int i;
+
+ /**/ if (bP == 0 && bU == 1 && bW == 0) {
+ summary = 1;
+ }
+ else if (bP == 0 && bU == 1 && bW == 1) {
+ summary = 2;
+ }
+ else if (bP == 1 && bU == 0 && bW == 1) {
+ summary = 3;
+ }
+ else goto after_vfp_fldmd_fstmd;
+
+ /* no writebacks to r15 allowed. No use of r15 in thumb mode. */
+ if (rN == 15 && (summary == 2 || summary == 3 || isT))
+ goto after_vfp_fldmd_fstmd;
+
+ /* offset must be even, and specify at least one register */
+ if (1 == (offset & 1) || offset < 2)
+ goto after_vfp_fldmd_fstmd;
+
+ /* can't transfer regs after D15 */
+ if (dD + nRegs - 1 >= 32)
+ goto after_vfp_fldmd_fstmd;
+
+ /* Now, we can't do a conditional load or store, since that very
+ likely will generate an exception. So we have to take a side
+ exit at this point if the condition is false. */
+ if (condT != IRTemp_INVALID) {
+ if (isT)
+ mk_skip_over_T32_if_cond_is_false( condT );
+ else
+ mk_skip_over_A32_if_cond_is_false( condT );
+ condT = IRTemp_INVALID;
+ }
+ /* Ok, now we're unconditional. Do the load or store. */
+
+ /* get the old Rn value */
+ IRTemp rnT = newTemp(Ity_I32);
+ assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
+ rN == 15));
+
+ /* make a new value for Rn, post-insn */
+ IRTemp rnTnew = IRTemp_INVALID;
+ if (summary == 2 || summary == 3) {
+ rnTnew = newTemp(Ity_I32);
+ assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
+ mkexpr(rnT),
+ mkU32(8 * nRegs)));
+ }
+
+ /* decide on the base transfer address */
+ IRTemp taT = newTemp(Ity_I32);
+ assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
+
+ /* update Rn if necessary -- in case 3, we're moving it down, so
+ update before any memory reference, in order to keep Memcheck
+ and V's stack-extending logic (on linux) happy */
+ if (summary == 3) {
+ if (isT)
+ putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
+ else
+ putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
+ }
+
+ /* generate the transfers */
+ for (i = 0; i < nRegs; i++) {
+ IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
+ if (bL) {
+ putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
+ } else {
+ storeLE(addr, getDReg(dD + i));
+ }
+ }
+
+ /* update Rn if necessary -- in case 2, we're moving it up, so
+ update after any memory reference, in order to keep Memcheck
+ and V's stack-extending logic (on linux) happy */
+ if (summary == 2) {
+ if (isT)
+ putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
+ else
+ putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
+ }
+
+ HChar* nm = bL==1 ? "ld" : "st";
+ switch (summary) {
+ case 1: DIP("f%smd%s r%u, {d%u-d%u}\n",
+ nm, nCC(conq), rN, dD, dD + nRegs - 1);
+ break;
+ case 2: DIP("f%smiad%s r%u!, {d%u-d%u}\n",
+ nm, nCC(conq), rN, dD, dD + nRegs - 1);
+ break;
+ case 3: DIP("f%smdbd%s r%u!, {d%u-d%u}\n",
+ nm, nCC(conq), rN, dD, dD + nRegs - 1);
+ break;
+ default: vassert(0);
+ }
+
+ goto decode_success_vfp;
+ /* FIXME alignment constraints? */
+ }
+
+ after_vfp_fldmd_fstmd:
+
+ /* ------------------- fmrx, fmxr ------------------- */
+ if (BITS8(1,1,1,0,1,1,1,1) == INSN(27,20)
+ && BITS4(1,0,1,0) == INSN(11,8)
+ && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
+ UInt rD = INSN(15,12);
+ UInt reg = INSN(19,16);
+ if (reg == BITS4(0,0,0,1)) {
+ if (rD == 15) {
+ IRTemp nzcvT = newTemp(Ity_I32);
+ /* When rD is 15, we are copying the top 4 bits of FPSCR
+ into CPSR. That is, set the flags thunk to COPY and
+ install FPSCR[31:28] as the value to copy. */
+ assign(nzcvT, binop(Iop_And32,
+ IRExpr_Get(OFFB_FPSCR, Ity_I32),
+ mkU32(0xF0000000)));
+ setFlags_D1(ARMG_CC_OP_COPY, nzcvT, condT);
+ DIP("fmstat%s\n", nCC(conq));
+ } else {
+ /* Otherwise, merely transfer FPSCR to r0 .. r14. */
+ IRExpr* e = IRExpr_Get(OFFB_FPSCR, Ity_I32);
+ if (isT)
+ putIRegT(rD, e, condT);
+ else
+ putIRegA(rD, e, condT, Ijk_Boring);
+ DIP("fmrx%s r%u, fpscr\n", nCC(conq), rD);
+ }
+ goto decode_success_vfp;
+ }
+ /* fall through */
+ }
+
+ if (BITS8(1,1,1,0,1,1,1,0) == INSN(27,20)
+ && BITS4(1,0,1,0) == INSN(11,8)
+ && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
+ UInt rD = INSN(15,12);
+ UInt reg = INSN(19,16);
+ if (reg == BITS4(0,0,0,1)) {
+ putMiscReg32(OFFB_FPSCR,
+ isT ? getIRegT(rD) : getIRegA(rD), condT);
+ DIP("fmxr%s fpscr, r%u\n", nCC(conq), rD);
+ goto decode_success_vfp;
+ }
+ /* fall through */
+ }
+
+ /* --------------------- vmov --------------------- */
+ // VMOV dM, rD, rN
+ if (0x0C400B10 == (insn28 & 0x0FF00FD0)) {
+ UInt dM = INSN(3,0) | (INSN(5,5) << 4);
+ UInt rD = INSN(15,12); /* lo32 */
+ UInt rN = INSN(19,16); /* hi32 */
+ if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))) {
+ /* fall through */
+ } else {
+ putDReg(dM,
+ unop(Iop_ReinterpI64asF64,
+ binop(Iop_32HLto64,
+ isT ? getIRegT(rN) : getIRegA(rN),
+ isT ? getIRegT(rD) : getIRegA(rD))),
+ condT);
+ DIP("vmov%s d%u, r%u, r%u\n", nCC(conq), dM, rD, rN);
+ goto decode_success_vfp;
+ }
+ /* fall through */
+ }
+
+ // VMOV rD, rN, dM
+ if (0x0C500B10 == (insn28 & 0x0FF00FD0)) {
+ UInt dM = INSN(3,0) | (INSN(5,5) << 4);
+ UInt rD = INSN(15,12); /* lo32 */
+ UInt rN = INSN(19,16); /* hi32 */
+ if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))
+ || rD == rN) {
+ /* fall through */
+ } else {
+ IRTemp i64 = newTemp(Ity_I64);
+ assign(i64, unop(Iop_ReinterpF64asI64, getDReg(dM)));
+ IRExpr* hi32 = unop(Iop_64HIto32, mkexpr(i64));
+ IRExpr* lo32 = unop(Iop_64to32, mkexpr(i64));
+ if (isT) {
+ putIRegT(rN, hi32, condT);
+ putIRegT(rD, lo32, condT);
+ } else {
+ putIRegA(rN, hi32, condT, Ijk_Boring);
+ putIRegA(rD, lo32, condT, Ijk_Boring);
+ }
+ DIP("vmov%s r%u, r%u, d%u\n", nCC(conq), rD, rN, dM);
+ goto decode_success_vfp;
+ }
+ /* fall through */
+ }
+
+ // VMOV sD, sD+1, rN, rM
+ if (0x0C400A10 == (insn28 & 0x0FF00FD0)) {
+ UInt sD = (INSN(3,0) << 1) | INSN(5,5);
+ UInt rN = INSN(15,12);
+ UInt rM = INSN(19,16);
+ if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
+ || sD == 31) {
+ /* fall through */
+ } else {
+ putFReg(sD,
+ unop(Iop_ReinterpI32asF32, isT ? getIRegT(rN) : getIRegA(rN)),
+ condT);
+ putFReg(sD+1,
+ unop(Iop_ReinterpI32asF32, isT ? getIRegT(rM) : getIRegA(rM)),
+ condT);
+ DIP("vmov%s, s%u, s%u, r%u, r%u\n",
+ nCC(conq), sD, sD + 1, rN, rM);
+ goto decode_success_vfp;
+ }
+ }
+
+ // VMOV rN, rM, sD, sD+1
+ if (0x0C500A10 == (insn28 & 0x0FF00FD0)) {
+ UInt sD = (INSN(3,0) << 1) | INSN(5,5);
+ UInt rN = INSN(15,12);
+ UInt rM = INSN(19,16);
+ if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
+ || sD == 31 || rN == rM) {
+ /* fall through */
+ } else {
+ IRExpr* res0 = unop(Iop_ReinterpF32asI32, getFReg(sD));
+ IRExpr* res1 = unop(Iop_ReinterpF32asI32, getFReg(sD+1));
+ if (isT) {
+ putIRegT(rN, res0, condT);
+ putIRegT(rM, res1, condT);
+ } else {
+ putIRegA(rN, res0, condT, Ijk_Boring);
+ putIRegA(rM, res1, condT, Ijk_Boring);
+ }
+ DIP("vmov%s, r%u, r%u, s%u, s%u\n",
+ nCC(conq), rN, rM, sD, sD + 1);
+ goto decode_success_vfp;
+ }
+ }
+
+ // VMOV rD[x], rT (ARM core register to scalar)
+ if (0x0E000B10 == (insn28 & 0x0F900F1F)) {
+ UInt rD = (INSN(7,7) << 4) | INSN(19,16);
+ UInt rT = INSN(15,12);
+ UInt opc = (INSN(22,21) << 2) | INSN(6,5);
+ UInt index;
+ if (rT == 15 || (isT && rT == 13)) {
+ /* fall through */
+ } else {
+ if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
+ index = opc & 7;
+ putDRegI64(rD, triop(Iop_SetElem8x8,
+ getDRegI64(rD),
+ mkU8(index),
+ unop(Iop_32to8,
+ isT ? getIRegT(rT) : getIRegA(rT))),
+ condT);
+ DIP("vmov%s.8 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
+ goto decode_success_vfp;
+ }
+ else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
+ index = (opc >> 1) & 3;
+ putDRegI64(rD, triop(Iop_SetElem16x4,
+ getDRegI64(rD),
+ mkU8(index),
+ unop(Iop_32to16,
+ isT ? getIRegT(rT) : getIRegA(rT))),
+ condT);
+ DIP("vmov%s.16 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
+ goto decode_success_vfp;
+ }
+ else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0)) {
+ index = (opc >> 2) & 1;
+ putDRegI64(rD, triop(Iop_SetElem32x2,
+ getDRegI64(rD),
+ mkU8(index),
+ isT ? getIRegT(rT) : getIRegA(rT)),
+ condT);
+ DIP("vmov%s.32 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
+ goto decode_success_vfp;
+ } else {
+ /* fall through */
+ }
+ }
+ }
+
+ // VMOV (scalar to ARM core register)
+ // VMOV rT, rD[x]
+ if (0x0E100B10 == (insn28 & 0x0F100F1F)) {
+ UInt rN = (INSN(7,7) << 4) | INSN(19,16);
+ UInt rT = INSN(15,12);
+ UInt U = INSN(23,23);
+ UInt opc = (INSN(22,21) << 2) | INSN(6,5);
+ UInt index;
+ if (rT == 15 || (isT && rT == 13)) {
+ /* fall through */
+ } else {
+ if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
+ index = opc & 7;
+ IRExpr* e = unop(U ? Iop_8Uto32 : Iop_8Sto32,
+ binop(Iop_GetElem8x8,
+ getDRegI64(rN),
+ mkU8(index)));
+ if (isT)
+ putIRegT(rT, e, condT);
+ else
+ putIRegA(rT, e, condT, Ijk_Boring);
+ DIP("vmov%s.%c8 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
+ rT, rN, index);
+ goto decode_success_vfp;
+ }
+ else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
+ index = (opc >> 1) & 3;
+ IRExpr* e = unop(U ? Iop_16Uto32 : Iop_16Sto32,
+ binop(Iop_GetElem16x4,
+ getDRegI64(rN),
+ mkU8(index)));
+ if (isT)
+ putIRegT(rT, e, condT);
+ else
+ putIRegA(rT, e, condT, Ijk_Boring);
+ DIP("vmov%s.%c16 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
+ rT, rN, index);
+ goto decode_success_vfp;
+ }
+ else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0) && U == 0) {
+ index = (opc >> 2) & 1;
+ IRExpr* e = binop(Iop_GetElem32x2, getDRegI64(rN), mkU8(index));
+ if (isT)
+ putIRegT(rT, e, condT);
+ else
+ putIRegA(rT, e, condT, Ijk_Boring);
+ DIP("vmov%s.32 r%u, d%u[%u]\n", nCC(conq), rT, rN, index);
+ goto decode_success_vfp;
+ } else {
+ /* fall through */
+ }
+ }
+ }
+
+ // VMOV.F32 sD, #imm
+ // FCONSTS sD, #imm
+ if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
+ && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,0)) {
+ UInt rD = (INSN(15,12) << 1) | INSN(22,22);
+ UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
+ UInt b = (imm8 >> 6) & 1;
+ UInt imm;
+ imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,(imm8 >> 5) & 1) << 8)
+ | ((imm8 & 0x1f) << 3);
+ imm <<= 16;
+ putFReg(rD, unop(Iop_ReinterpI32asF32, mkU32(imm)), condT);
+ DIP("fconsts%s s%u #%u", nCC(conq), rD, imm8);
+ goto decode_success_vfp;
+ }
+
+ // VMOV.F64 dD, #imm
+ // FCONSTD dD, #imm
+ if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
+ && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,1)) {
+ UInt rD = INSN(15,12) | (INSN(22,22) << 4);
+ UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
+ UInt b = (imm8 >> 6) & 1;
+ ULong imm;
+ imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,b) << 8)
+ | BITS8(b,b,0,0,0,0,0,0) | (imm8 & 0x3f);
+ imm <<= 48;
+ putDReg(rD, unop(Iop_ReinterpI64asF64, mkU64(imm)), condT);
+ DIP("fconstd%s d%u #%u", nCC(conq), rD, imm8);
+ goto decode_success_vfp;
+ }
+
+ /* ---------------------- vdup ------------------------- */
+ // VDUP dD, rT
+ // VDUP qD, rT
+ if (BITS8(1,1,1,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,1))
+ && BITS4(1,0,1,1) == INSN(11,8) && INSN(6,6) == 0 && INSN(4,4) == 1) {
+ UInt rD = (INSN(7,7) << 4) | INSN(19,16);
+ UInt rT = INSN(15,12);
+ UInt Q = INSN(21,21);
+ UInt size = (INSN(22,22) << 1) | INSN(5,5);
+ if (rT == 15 || (isT && rT == 13) || size == 3i || (Q && (rD & 1))) {
+ /* fall through */
+ } else {
+ IRExpr* e = isT ? getIRegT(rT) : getIRegA(rT);
+ if (Q) {
+ rD >>= 1;
+ switch (size) {
+ case 0:
+ putQReg(rD, unop(Iop_Dup32x4, e), condT);
+ break;
+ case 1:
+ putQReg(rD, unop(Iop_Dup16x8, unop(Iop_32to16, e)),
+ condT);
+ break;
+ case 2:
+ putQReg(rD, unop(Iop_Dup8x16, unop(Iop_32to8, e)),
+ condT);
+ break;
+ default:
+ vassert(0);
+ }
+ DIP("vdup.%u q%u, r%u\n", 32 / (1<<size), rD, rT);
+ } else {
+ switch (size) {
+ case 0:
+ putDRegI64(rD, unop(Iop_Dup32x2, e), condT);
+ break;
+ case 1:
+ putDRegI64(rD, unop(Iop_Dup16x4, unop(Iop_32to16, e)),
+ condT);
+ break;
+ case 2:
+ putDRegI64(rD, unop(Iop_Dup8x8, unop(Iop_32to8, e)),
+ condT);
+ break;
+ default:
+ vassert(0);
+ }
+ DIP("vdup.%u d%u, r%u\n", 32 / (1<<size), rD, rT);
+ }
+ goto decode_success_vfp;
+ }
+ }
+
+ /* --------------------- f{ld,st}d --------------------- */
+ // FLDD, FSTD
+ if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
+ && BITS4(1,0,1,1) == INSN(11,8)) {
+ UInt dD = INSN(15,12) | (INSN(22,22) << 4);
+ UInt rN = INSN(19,16);
+ UInt offset = (insn28 & 0xFF) << 2;
+ UInt bU = (insn28 >> 23) & 1; /* 1: +offset 0: -offset */
+ UInt bL = (insn28 >> 20) & 1; /* 1: load 0: store */
+ /* make unconditional */
+ if (condT != IRTemp_INVALID) {
+ if (isT)
+ mk_skip_over_T32_if_cond_is_false( condT );
+ else
+ mk_skip_over_A32_if_cond_is_false( condT );
+ condT = IRTemp_INVALID;
+ }
+ IRTemp ea = newTemp(Ity_I32);
+ assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
+ align4if(isT ? getIRegT(rN) : getIRegA(rN),
+ rN == 15),
+ mkU32(offset)));
+ if (bL) {
+ putDReg(dD, loadLE(Ity_F64,mkexpr(ea)), IRTemp_INVALID);
+ } else {
+ storeLE(mkexpr(ea), getDReg(dD));
+ }
+ DIP("f%sd%s d%u, [r%u, %c#%u]\n",
+ bL ? "ld" : "st", nCC(conq), dD, rN,
+ bU ? '+' : '-', offset);
+ goto decode_success_vfp;
+ }
+
+ /* --------------------- dp insns (D) --------------------- */
+ if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
+ && BITS4(1,0,1,1) == INSN(11,8)
+ && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
+ UInt dM = INSN(3,0) | (INSN(5,5) << 4); /* argR */
+ UInt dD = INSN(15,12) | (INSN(22,22) << 4); /* dst/acc */
+ UInt dN = INSN(19,16) | (INSN(7,7) << 4); /* argL */
+ UInt bP = (insn28 >> 23) & 1;
+ UInt bQ = (insn28 >> 21) & 1;
+ UInt bR = (insn28 >> 20) & 1;
+ UInt bS = (insn28 >> 6) & 1;
+ UInt opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
+ IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
+ switch (opc) {
+ case BITS4(0,0,0,0): /* MAC: d + n * m */
+ putDReg(dD, triop(Iop_AddF64, rm,
+ getDReg(dD),
+ triop(Iop_MulF64, rm, getDReg(dN),
+ getDReg(dM))),
+ condT);
+ DIP("fmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
+ goto decode_success_vfp;
+ case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
+ putDReg(dD, triop(Iop_AddF64, rm,
+ getDReg(dD),
+ unop(Iop_NegF64,
+ triop(Iop_MulF64, rm, getDReg(dN),
+ getDReg(dM)))),
+ condT);
+ DIP("fnmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
+ goto decode_success_vfp;
+ case BITS4(0,0,1,0): /* MSC: - d + n * m */
+ putDReg(dD, triop(Iop_AddF64, rm,
+ unop(Iop_NegF64, getDReg(dD)),
+ triop(Iop_MulF64, rm, getDReg(dN),
+ getDReg(dM))),
+ condT);
+ DIP("fmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
+ goto decode_success_vfp;
+ case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
+ putDReg(dD, triop(Iop_AddF64, rm,
+ unop(Iop_NegF64, getDReg(dD)),
+ unop(Iop_NegF64,
+ triop(Iop_MulF64, rm, getDReg(dN),
+ getDReg(dM)))),
+ condT);
+ DIP("fnmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
+ goto decode_success_vfp;
+ case BITS4(0,1,0,0): /* MUL: n * m */
+ putDReg(dD, triop(Iop_MulF64, rm, getDReg(dN), getDReg(dM)),
+ condT);
+ DIP("fmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
+ goto decode_success_vfp;
+ case BITS4(0,1,0,1): /* NMUL: - n * m */
+ putDReg(dD, unop(Iop_NegF64,
+ triop(Iop_MulF64, rm, getDReg(dN),
+ getDReg(dM))),
+ condT);
+ DIP("fnmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
+ goto decode_success_vfp;
+ case BITS4(0,1,1,0): /* ADD: n + m */
+ putDReg(dD, triop(Iop_AddF64, rm, getDReg(dN), getDReg(dM)),
+ condT);
+ DIP("faddd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
+ goto decode_success_vfp;
+ case BITS4(0,1,1,1): /* SUB: n - m */
+ putDReg(dD, triop(Iop_SubF64, rm, getDReg(dN), getDReg(dM)),
+ condT);
+ DIP("fsubd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
+ goto decode_success_vfp;
+ case BITS4(1,0,0,0): /* DIV: n / m */
+ putDReg(dD, triop(Iop_DivF64, rm, getDReg(dN), getDReg(dM)),
+ condT);
+ DIP("fdivd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
+ goto decode_success_vfp;
+ default:
+ break;
+ }
+ }
+
+ /* --------------------- compares (D) --------------------- */
+ /* 31 27 23 19 15 11 7 3
+ 28 24 20 16 12 8 4 0
+ FCMPD cond 1110 1D11 0100 Dd 1011 0100 Dm
+ FCMPED cond 1110 1D11 0100 Dd 1011 1100 Dm
+ FCMPZD cond 1110 1D11 0101 Dd 1011 0100 0000
+ FCMPZED cond 1110 1D11 0101 Dd 1011 1100 0000
+ Z N
+
+ Z=0 Compare Dd vs Dm and set FPSCR 31:28 accordingly
+ Z=1 Compare Dd vs zero
+
+ N=1 generates Invalid Operation exn if either arg is any kind of NaN
+ N=0 generates Invalid Operation exn if either arg is a signalling NaN
+ (Not that we pay any attention to N here)
+ */
+ if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
+ && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
+ && BITS4(1,0,1,1) == INSN(11,8)
+ && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
+ UInt bZ = (insn28 >> 16) & 1;
+ UInt bN = (insn28 >> 7) & 1;
+ UInt dD = INSN(15,12) | (INSN(22,22) << 4);
+ UInt dM = INSN(3,0) | (INSN(5,5) << 4);
+ if (bZ && INSN(3,0) != 0) {
+ /* does not decode; fall through */
+ } else {
+ IRTemp argL = newTemp(Ity_F64);
+ IRTemp argR = newTemp(Ity_F64);
+ IRTemp irRes = newTemp(Ity_I32);
+ assign(argL, getDReg(dD));
+ assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0)) : getDReg(dM));
+ assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
+
+ IRTemp nzcv = IRTemp_INVALID;
+ IRTemp oldFPSCR = newTemp(Ity_I32);
+ IRTemp newFPSCR = newTemp(Ity_I32);
+
+ /* This is where the fun starts. We have to convert 'irRes'
+ from an IR-convention return result (IRCmpF64Result) to an
+ ARM-encoded (N,Z,C,V) group. The final result is in the
+ bottom 4 bits of 'nzcv'. */
+ /* Map compare result from IR to ARM(nzcv) */
+ /*
+ FP cmp result | IR | ARM(nzcv)
+ --------------------------------
+ UN 0x45 0011
+ LT 0x01 1000
+ GT 0x00 0010
+ EQ 0x40 0110
+ */
+ nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
+
+ /* And update FPSCR accordingly */
+ assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
+ assign(newFPSCR,
+ binop(Iop_Or32,
+ binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
+ binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
+
+ putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
+
+ if (bZ) {
+ DIP("fcmpz%sd%s d%u\n", bN ? "e" : "", nCC(conq), dD);
+ } else {
+ DIP("fcmp%sd%s d%u, d%u\n", bN ? "e" : "", nCC(conq), dD, dM);
+ }
+ goto decode_success_vfp;
+ }
+ /* fall through */
+ }
+
+ /* --------------------- unary (D) --------------------- */
+ if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
+ && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
+ && BITS4(1,0,1,1) == INSN(11,8)
+ && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
+ UInt dD = INSN(15,12) | (INSN(22,22) << 4);
+ UInt dM = INSN(3,0) | (INSN(5,5) << 4);
+ UInt b16 = (insn28 >> 16) & 1;
+ UInt b7 = (insn28 >> 7) & 1;
+ /**/ if (b16 == 0 && b7 == 0) {
+ // FCPYD
+ putDReg(dD, getDReg(dM), condT);
+ DIP("fcpyd%s d%u, d%u\n", nCC(conq), dD, dM);
+ goto decode_success_vfp;
+ }
+ else if (b16 == 0 && b7 == 1) {
+ // FABSD
+ putDReg(dD, unop(Iop_AbsF64, getDReg(dM)), condT);
+ DIP("fabsd%s d%u, d%u\n", nCC(conq), dD, dM);
+ goto decode_success_vfp;
+ }
+ else if (b16 == 1 && b7 == 0) {
+ // FNEGD
+ putDReg(dD, unop(Iop_NegF64, getDReg(dM)), condT);
+ DIP("fnegd%s d%u, d%u\n", nCC(conq), dD, dM);
+ goto decode_success_vfp;
+ }
+ else if (b16 == 1 && b7 == 1) {
+ // FSQRTD
+ IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
+ putDReg(dD, binop(Iop_SqrtF64, rm, getDReg(dM)), condT);
+ DIP("fsqrtd%s d%u, d%u\n", nCC(conq), dD, dM);
+ goto decode_success_vfp;
+ }
+ else
+ vassert(0);
+
+ /* fall through */
+ }
+
+ /* ----------------- I <-> D conversions ----------------- */
+
+ // F{S,U}ITOD dD, fM
+ if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
+ && BITS4(1,0,0,0) == (INSN(19,16) & BITS4(1,1,1,1))
+ && BITS4(1,0,1,1) == INSN(11,8)
+ && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
+ UInt bM = (insn28 >> 5) & 1;
+ UInt fM = (INSN(3,0) << 1) | bM;
+ UInt dD = INSN(15,12) | (INSN(22,22) << 4);
+ UInt syned = (insn28 >> 7) & 1;
+ if (syned) {
+ // FSITOD
+ putDReg(dD, unop(Iop_I32StoF64,
+ unop(Iop_ReinterpF32asI32, getFReg(fM))),
+ condT);
+ DIP("fsitod%s d%u, s%u\n", nCC(conq), dD, fM);
+ } else {
+ // FUITOD
+ putDReg(dD, unop(Iop_I32UtoF64,
+ unop(Iop_ReinterpF32asI32, getFReg(fM))),
+ condT);
+ DIP("fuitod%s d%u, s%u\n", nCC(conq), dD, fM);
+ }
+ goto decode_success_vfp;
+ }
+
+ // FTO{S,U}ID fD, dM
+ if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
+ && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
+ && BITS4(1,0,1,1) == INSN(11,8)
+ && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
+ UInt bD = (insn28 >> 22) & 1;
+ UInt fD = (INSN(15,12) << 1) | bD;
+ UInt dM = INSN(3,0) | (INSN(5,5) << 4);
+ UInt bZ = (insn28 >> 7) & 1;
+ UInt syned = (insn28 >> 16) & 1;
+ IRTemp rmode = newTemp(Ity_I32);
+ assign(rmode, bZ ? mkU32(Irrm_ZERO)
+ : mkexpr(mk_get_IR_rounding_mode()));
+ if (syned) {
+ // FTOSID
+ putFReg(fD, unop(Iop_ReinterpI32asF32,
+ binop(Iop_F64toI32S, mkexpr(rmode),
+ getDReg(dM))),
+ condT);
+ DIP("ftosi%sd%s s%u, d%u\n", bZ ? "z" : "",
+ nCC(conq), fD, dM);
+ } else {
+ // FTOUID
+ putFReg(fD, unop(Iop_ReinterpI32asF32,
+ binop(Iop_F64toI32U, mkexpr(rmode),
+ getDReg(dM))),
+ condT);
+ DIP("ftoui%sd%s s%u, d%u\n", bZ ? "z" : "",
+ nCC(conq), fD, dM);
+ }
+ goto decode_success_vfp;
+ }
+
+ /* ----------------------------------------------------------- */
+ /* -- VFP instructions -- single precision -- */
+ /* ----------------------------------------------------------- */
+
+ /* --------------------- fldms, fstms --------------------- */
+ /*
+ 31 27 23 19 15 11 7 0
+ P UDWL
+ C4-98, C5-26 1 FSTMD cond 1100 1x00 Rn Fd 1010 offset
+ C4-98, C5-28 2 FSTMDIA cond 1100 1x10 Rn Fd 1010 offset
+ C4-98, C5-30 3 FSTMDDB cond 1101 0x10 Rn Fd 1010 offset
+
+ C4-40, C5-26 1 FLDMD cond 1100 1x01 Rn Fd 1010 offset
+ C4-40, C5-26 2 FLDMIAD cond 1100 1x11 Rn Fd 1010 offset
+ C4-40, C5-26 3 FLDMDBD cond 1101 0x11 Rn Fd 1010 offset
+
+ Regs transferred: F(Fd:D) .. F(Fd:d + offset)
+ offset must not imply a reg > 15
+ IA/DB: Rn is changed by (4 x # regs transferred)
+
+ case coding:
+ 1 at-Rn (access at Rn)
+ 2 ia-Rn (access at Rn, then Rn += 4n)
+ 3 db-Rn (Rn -= 4n, then access at Rn)
+ */
+ if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
+ && INSN(11,8) == BITS4(1,0,1,0)) {
+ UInt bP = (insn28 >> 24) & 1;
+ UInt bU = (insn28 >> 23) & 1;
+ UInt bW = (insn28 >> 21) & 1;
+ UInt bL = (insn28 >> 20) & 1;
+ UInt bD = (insn28 >> 22) & 1;
+ UInt offset = (insn28 >> 0) & 0xFF;
+ UInt rN = INSN(19,16);
+ UInt fD = (INSN(15,12) << 1) | bD;
+ UInt nRegs = offset;
+ UInt summary = 0;
+ Int i;
+
+ /**/ if (bP == 0 && bU == 1 && bW == 0) {
+ summary = 1;
+ }
+ else if (bP == 0 && bU == 1 && bW == 1) {
+ summary = 2;
+ }
+ else if (bP == 1 && bU == 0 && bW == 1) {
+ summary = 3;
+ }
+ else goto after_vfp_fldms_fstms;
+
+ /* no writebacks to r15 allowed. No use of r15 in thumb mode. */
+ if (rN == 15 && (summary == 2 || summary == 3 || isT))
+ goto after_vfp_fldms_fstms;
+
+ /* offset must specify at least one register */
+ if (offset < 1)
+ goto after_vfp_fldms_fstms;
+
+ /* can't transfer regs after S31 */
+ if (fD + nRegs - 1 >= 32)
+ goto after_vfp_fldms_fstms;
+
+ /* Now, we can't do a conditional load or store, since that very
+ likely will generate an exception. So we have to take a side
+ exit at this point if the condition is false. */
+ if (condT != IRTemp_INVALID) {
+ if (isT)
+ mk_skip_over_T32_if_cond_is_false( condT );
+ else
+ mk_skip_over_A32_if_cond_is_false( condT );
+ condT = IRTemp_INVALID;
+ }
+ /* Ok, now we're unconditional. Do the load or store. */
+
+ /* get the old Rn value */
+ IRTemp rnT = newTemp(Ity_I32);
+ assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
+ rN == 15));
+
+ /* make a new value for Rn, post-insn */
+ IRTemp rnTnew = IRTemp_INVALID;
+ if (summary == 2 || summary == 3) {
+ rnTnew = newTemp(Ity_I32);
+ assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
+ mkexpr(rnT),
+ mkU32(4 * nRegs)));
+ }
+
+ /* decide on the base transfer address */
+ IRTemp taT = newTemp(Ity_I32);
+ assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
+
+ /* update Rn if necessary -- in case 3, we're moving it down, so
+ update before any memory reference, in order to keep Memcheck
+ and V's stack-extending logic (on linux) happy */
+ if (summary == 3) {
+ if (isT)
+ putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
+ else
+ putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
+ }
+
+ /* generate the transfers */
+ for (i = 0; i < nRegs; i++) {
+ IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(4*i));
+ if (bL) {
+ putFReg(fD + i, loadLE(Ity_F32, addr), IRTemp_INVALID);
+ } else {
+ storeLE(addr, getFReg(fD + i));
+ }
+ }
+
+ /* update Rn if necessary -- in case 2, we're moving it up, so
+ update after any memory reference, in order to keep Memcheck
+ and V's stack-extending logic (on linux) happy */
+ if (summary == 2) {
+ if (isT)
+ putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
+ else
+ putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
+ }
+
+ HChar* nm = bL==1 ? "ld" : "st";
+ switch (summary) {
+ case 1: DIP("f%sms%s r%u, {s%u-s%u}\n",
+ nm, nCC(conq), rN, fD, fD + nRegs - 1);
+ break;
+ case 2: DIP("f%smias%s r%u!, {s%u-s%u}\n",
+ nm, nCC(conq), rN, fD, fD + nRegs - 1);
+ break;
+ case 3: DIP("f%smdbs%s r%u!, {s%u-s%u}\n",
+ nm, nCC(conq), rN, fD, fD + nRegs - 1);
+ break;
+ default: vassert(0);
+ }
+
+ goto decode_success_vfp;
+ /* FIXME alignment constraints? */
+ }
+
+ after_vfp_fldms_fstms:
+
+ /* --------------------- fmsr, fmrs --------------------- */
+ if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
+ && BITS4(1,0,1,0) == INSN(11,8)
+ && BITS4(0,0,0,0) == INSN(3,0)
+ && BITS4(0,0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
+ UInt rD = INSN(15,12);
+ UInt b7 = (insn28 >> 7) & 1;
+ UInt fN = (INSN(19,16) << 1) | b7;
+ UInt b20 = (insn28 >> 20) & 1;
+ if (rD == 15) {
+ /* fall through */
+ /* Let's assume that no sane person would want to do
+ floating-point transfers to or from the program counter,
+ and simply decline to decode the instruction. The ARM ARM
+ doesn't seem to explicitly disallow this case, though. */
+ } else {
+ if (b20) {
+ IRExpr* res = unop(Iop_ReinterpF32asI32, getFReg(fN));
+ if (isT)
+ putIRegT(rD, res, condT);
+ else
+ putIRegA(rD, res, condT, Ijk_Boring);
+ DIP("fmrs%s r%u, s%u\n", nCC(conq), rD, fN);
+ } else {
+ putFReg(fN, unop(Iop_ReinterpI32asF32,
+ isT ? getIRegT(rD) : getIRegA(rD)),
+ condT);
+ DIP("fmsr%s s%u, r%u\n", nCC(conq), fN, rD);
+ }
+ goto decode_success_vfp;
+ }
+ /* fall through */
+ }
+
+ /* --------------------- f{ld,st}s --------------------- */
+ // FLDS, FSTS
+ if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
+ && BITS4(1,0,1,0) == INSN(11,8)) {
+ UInt bD = (insn28 >> 22) & 1;
+ UInt fD = (INSN(15,12) << 1) | bD;
+ UInt rN = INSN(19,16);
+ UInt offset = (insn28 & 0xFF) << 2;
+ UInt bU = (insn28 >> 23) & 1; /* 1: +offset 0: -offset */
+ UInt bL = (insn28 >> 20) & 1; /* 1: load 0: store */
+ /* make unconditional */
+ if (condT != IRTemp_INVALID) {
+ if (isT)
+ mk_skip_over_T32_if_cond_is_false( condT );
+ else
+ mk_skip_over_A32_if_cond_is_false( condT );
+ condT = IRTemp_INVALID;
+ }
+ IRTemp ea = newTemp(Ity_I32);
+ assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
+ align4if(isT ? getIRegT(rN) : getIRegA(rN),
+ rN == 15),
+ mkU32(offset)));
+ if (bL) {
+ putFReg(fD, loadLE(Ity_F32,mkexpr(ea)), IRTemp_INVALID);
+ } else {
+ storeLE(mkexpr(ea), getFReg(fD));
+ }
+ DIP("f%ss%s s%u, [r%u, %c#%u]\n",
+ bL ? "ld" : "st", nCC(conq), fD, rN,
+ bU ? '+' : '-', offset);
+ goto decode_success_vfp;
+ }
+
+ /* --------------------- dp insns (F) --------------------- */
+ if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
+ && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
+ && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
+ UInt bM = (insn28 >> 5) & 1;
+ UInt bD = (insn28 >> 22) & 1;
+ UInt bN = (insn28 >> 7) & 1;
+ UInt fM = (INSN(3,0) << 1) | bM; /* argR */
+ UInt fD = (INSN(15,12) << 1) | bD; /* dst/acc */
+ UInt fN = (INSN(19,16) << 1) | bN; /* argL */
+ UInt bP = (insn28 >> 23) & 1;
+ UInt bQ = (insn28 >> 21) & 1;
+ UInt bR = (insn28 >> 20) & 1;
+ UInt bS = (insn28 >> 6) & 1;
+ UInt opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
+ IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
+ switch (opc) {
+ case BITS4(0,0,0,0): /* MAC: d + n * m */
+ putFReg(fD, triop(Iop_AddF32, rm,
+ getFReg(fD),
+ triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
+ condT);
+ DIP("fmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
+ goto decode_success_vfp;
+ case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
+ putFReg(fD, triop(Iop_AddF32, rm,
+ getFReg(fD),
+ unop(Iop_NegF32,
+ triop(Iop_MulF32, rm, getFReg(fN),
+ getFReg(fM)))),
+ condT);
+ DIP("fnmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
+ goto decode_success_vfp;
+ case BITS4(0,0,1,0): /* MSC: - d + n * m */
+ putFReg(fD, triop(Iop_AddF32, rm,
+ unop(Iop_NegF32, getFReg(fD)),
+ triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
+ condT);
+ DIP("fmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
+ goto decode_success_vfp;
+ case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
+ putFReg(fD, triop(Iop_AddF32, rm,
+ unop(Iop_NegF32, getFReg(fD)),
+ unop(Iop_NegF32,
+ triop(Iop_MulF32, rm,
+ getFReg(fN),
+ getFReg(fM)))),
+ condT);
+ DIP("fnmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
+ goto decode_success_vfp;
+ case BITS4(0,1,0,0): /* MUL: n * m */
+ putFReg(fD, triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM)),
+ condT);
+ DIP("fmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
+ goto decode_success_vfp;
+ case BITS4(0,1,0,1): /* NMUL: - n * m */
+ putFReg(fD, unop(Iop_NegF32,
+ triop(Iop_MulF32, rm, getFReg(fN),
+ getFReg(fM))),
+ condT);
+ DIP("fnmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
+ goto decode_success_vfp;
+ case BITS4(0,1,1,0): /* ADD: n + m */
+ putFReg(fD, triop(Iop_AddF32, rm, getFReg(fN), getFReg(fM)),
+ condT);
+ DIP("fadds%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
+ goto decode_success_vfp;
+ case BITS4(0,1,1,1): /* SUB: n - m */
+ putFReg(fD, triop(Iop_SubF32, rm, getFReg(fN), getFReg(fM)),
+ condT);
+ DIP("fsubs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
+ goto decode_success_vfp;
+ case BITS4(1,0,0,0): /* DIV: n / m */
+ putFReg(fD, triop(Iop_DivF32, rm, getFReg(fN), getFReg(fM)),
+ condT);
+ DIP("fdivs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
+ goto decode_success_vfp;
+ default:
+ break;
+ }
+ }
+
+ /* --------------------- compares (S) --------------------- */
+ /* 31 27 23 19 15 11 7 3
+ 28 24 20 16 12 8 4 0
+ FCMPS cond 1110 1D11 0100 Fd 1010 01M0 Fm
+ FCMPES cond 1110 1D11 0100 Fd 1010 11M0 Fm
+ FCMPZS cond 1110 1D11 0101 Fd 1010 0100 0000
+ FCMPZED cond 1110 1D11 0101 Fd 1010 1100 0000
+ Z N
+
+ Z=0 Compare Fd:D vs Fm:M and set FPSCR 31:28 accordingly
+ Z=1 Compare Fd:D vs zero
+
+ N=1 generates Invalid Operation exn if either arg is any kind of NaN
+ N=0 generates Invalid Operation exn if either arg is a signalling NaN
+ (Not that we pay any attention to N here)
+ */
+ if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
+ && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
+ && BITS4(1,0,1,0) == INSN(11,8)
+ && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
+ UInt bZ = (insn28 >> 16) & 1;
+ UInt bN = (insn28 >> 7) & 1;
+ UInt bD = (insn28 >> 22) & 1;
+ UInt bM = (insn28 >> 5) & 1;
+ UInt fD = (INSN(15,12) << 1) | bD;
+ UInt fM = (INSN(3,0) << 1) | bM;
+ if (bZ && (INSN(3,0) != 0 || (INSN(7,4) & 3) != 0)) {
+ /* does not decode; fall through */
+ } else {
+ IRTemp argL = newTemp(Ity_F64);
+ IRTemp argR = newTemp(Ity_F64);
+ IRTemp irRes = newTemp(Ity_I32);
+
+ assign(argL, unop(Iop_F32toF64, getFReg(fD)));
+ assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0))
+ : unop(Iop_F32toF64, getFReg(fM)));
+ assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
+
+ IRTemp nzcv = IRTemp_INVALID;
+ IRTemp oldFPSCR = newTemp(Ity_I32);
+ IRTemp newFPSCR = newTemp(Ity_I32);
+
+ /* This is where the fun starts. We have to convert 'irRes'
+ from an IR-convention return result (IRCmpF64Result) to an
+ ARM-encoded (N,Z,C,V) group. The final result is in the
+ bottom 4 bits of 'nzcv'. */
+ /* Map compare result from IR to ARM(nzcv) */
+ /*
+ FP cmp result | IR | ARM(nzcv)
+ --------------------------------
+ UN 0x45 0011
+ LT 0x01 1000
+ GT 0x00 0010
+ EQ 0x40 0110
+ */
+ nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
+
+ /* And update FPSCR accordingly */
+ assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
+ assign(newFPSCR,
+ binop(Iop_Or32,
+ binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
+ binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
+
+ putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
+
+ if (bZ) {
+ DIP("fcmpz%ss%s s%u\n", bN ? "e" : "", nCC(conq), fD);
+ } else {
+ DIP("fcmp%ss%s s%u, s%u\n", bN ? "e" : "",
+ nCC(conq), fD, fM);
+ }
+ goto decode_success_vfp;
+ }
+ /* fall through */
+ }
+
+ /* --------------------- unary (S) --------------------- */
+ if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
+ && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
+ && BITS4(1,0,1,0) == INSN(11,8)
+ && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
+ UInt bD = (insn28 >> 22) & 1;
+ UInt bM = (insn28 >> 5) & 1;
+ UInt fD = (INSN(15,12) << 1) | bD;
+ UInt fM = (INSN(3,0) << 1) | bM;
+ UInt b16 = (insn28 >> 16) & 1;
+ UInt b7 = (insn28 >> 7) & 1;
+ /**/ if (b16 == 0 && b7 == 0) {
+ // FCPYS
+ putFReg(fD, getFReg(fM), condT);
+ DIP("fcpys%s s%u, s%u\n", nCC(conq), fD, fM);
+ goto decode_success_vfp;
+ }
+ else if (b16 == 0 && b7 == 1) {
+ // FABSS
+ putFReg(fD, unop(Iop_AbsF32, getFReg(fM)), condT);
+ DIP("fabss%s s%u, s%u\n", nCC(conq), fD, fM);
+ goto decode_success_vfp;
+ }
+ else if (b16 == 1 && b7 == 0) {
+ // FNEGS
+ putFReg(fD, unop(Iop_NegF32, getFReg(fM)), condT);
+ DIP("fnegs%s s%u, s%u\n", nCC(conq), fD, fM);
+ goto decode_success_vfp;
+ }
+ else if (b16 == 1 && b7 == 1) {
+ // FSQRTS
+ IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
+ putFReg(fD, binop(Iop_SqrtF32, rm, getFReg(fM)), condT);
+ DIP("fsqrts%s s%u, s%u\n", nCC(conq), fD, fM);
+ goto decode_success_vfp;
+ }
+ else
+ vassert(0);
+
+ /* fall through */
+ }
+
+ /* ----------------- I <-> S conversions ----------------- */
+
+ // F{S,U}ITOS fD, fM
+ /* These are more complex than FSITOD/FUITOD. In the D cases, a 32
+ bit int will always fit within the 53 bit mantissa, so there's
+ no possibility of a loss of precision, but that's obviously not
+ the case here. Hence this case possibly requires rounding, and
+ so it drags in the current rounding mode. */
+ if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
+ && BITS4(1,0,0,0) == INSN(19,16)
+ && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
+ && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
+ UInt bM = (insn28 >> 5) & 1;
+ UInt bD = (insn28 >> 22) & 1;
+ UInt fM = (INSN(3,0) << 1) | bM;
+ UInt fD = (INSN(15,12) << 1) | bD;
+ UInt syned = (insn28 >> 7) & 1;
+ IRTemp rmode = newTemp(Ity_I32);
+ assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
+ if (syned) {
+ // FSITOS
+ putFReg(fD, binop(Iop_F64toF32,
+ mkexpr(rmode),
+ unop(Iop_I32StoF64,
+ unop(Iop_ReinterpF32asI32, getFReg(fM)))),
+ condT);
+ DIP("fsitos%s s%u, s%u\n", nCC(conq), fD, fM);
+ } else {
+ // FUITOS
+ putFReg(fD, binop(Iop_F64toF32,
+ mkexpr(rmode),
+ unop(Iop_I32UtoF64,
+ unop(Iop_ReinterpF32asI32, getFReg(fM)))),
+ condT);
+ DIP("fuitos%s s%u, s%u\n", nCC(conq), fD, fM);
+ }
+ goto decode_success_vfp;
+ }
+
+ // FTO{S,U}IS fD, fM
+ if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
+ && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
+ && BITS4(1,0,1,0) == INSN(11,8)
+ && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
+ UInt bM = (insn28 >> 5) & 1;
+ UInt bD = (insn28 >> 22) & 1;
+ UInt fD = (INSN(15,12) << 1) | bD;
+ UInt fM = (INSN(3,0) << 1) | bM;
+ UInt bZ = (insn28 >> 7) & 1;
+ UInt syned = (insn28 >> 16) & 1;
+ IRTemp rmode = newTemp(Ity_I32);
+ assign(rmode, bZ ? mkU32(Irrm_ZERO)
+ : mkexpr(mk_get_IR_rounding_mode()));
+ if (syned) {
+ // FTOSIS
+ putFReg(fD, unop(Iop_ReinterpI32asF32,
+ binop(Iop_F64toI32S, mkexpr(rmode),
+ unop(Iop_F32toF64, getFReg(fM)))),
+ condT);
+ DIP("ftosi%ss%s s%u, d%u\n", bZ ? "z" : "",
+ nCC(conq), fD, fM);
+ goto decode_success_vfp;
+ } else {
+ // FTOUIS
+ putFReg(fD, unop(Iop_ReinterpI32asF32,
+ binop(Iop_F64toI32U, mkexpr(rmode),
+ unop(Iop_F32toF64, getFReg(fM)))),
+ condT);
+ DIP("ftoui%ss%s s%u, d%u\n", bZ ? "z" : "",
+ nCC(conq), fD, fM);
+ goto decode_success_vfp;
+ }
+ }
+
+ /* ----------------- S <-> D conversions ----------------- */
+
+ // FCVTDS
+ if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
+ && BITS4(0,1,1,1) == INSN(19,16)
+ && BITS4(1,0,1,0) == INSN(11,8)
+ && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
+ UInt dD = INSN(15,12) | (INSN(22,22) << 4);
+ UInt bM = (insn28 >> 5) & 1;
+ UInt fM = (INSN(3,0) << 1) | bM;
+ putDReg(dD, unop(Iop_F32toF64, getFReg(fM)), condT);
+ DIP("fcvtds%s d%u, s%u\n", nCC(conq), dD, fM);
+ goto decode_success_vfp;
+ }
+
+ // FCVTSD
+ if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
+ && BITS4(0,1,1,1) == INSN(19,16)
+ && BITS4(1,0,1,1) == INSN(11,8)
+ && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
+ UInt bD = (insn28 >> 22) & 1;
+ UInt fD = (INSN(15,12) << 1) | bD;
+ UInt dM = INSN(3,0) | (INSN(5,5) << 4);
+ IRTemp rmode = newTemp(Ity_I32);
+ assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
+ putFReg(fD, binop(Iop_F64toF32, mkexpr(rmode), getDReg(dM)),
+ condT);
+ DIP("fcvtsd%s s%u, d%u\n", nCC(conq), fD, dM);
+ goto decode_success_vfp;
+ }
+
+ /* FAILURE */
+ return False;
+
+ decode_success_vfp:
+ /* Check that any accepted insn really is a CP10 or CP11 insn, iow,
+ assert that we aren't accepting, in this fn, insns that actually
+ should be handled somewhere else. */
+ vassert(INSN(11,9) == BITS3(1,0,1)); // 11:8 = 1010 or 1011
+ return True;
+
+# undef INSN
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Instructions in NV (never) space ---*/
+/*------------------------------------------------------------*/
+
+/* ARM only */
+/* Translate a NV space instruction. If successful, returns True and
+ *dres may or may not be updated. If failure, returns False and
+ doesn't change *dres nor create any IR.
+
+ Note that all NEON instructions (in ARM mode) are handled through
+ here, since they are all in NV space.
+*/
+static Bool decode_NV_instruction ( /*MOD*/DisResult* dres,
+ VexArchInfo* archinfo,
+ UInt insn )
+{
+# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
+# define INSN_COND SLICE_UInt(insn, 31, 28)
+
+ HChar dis_buf[128];
+
+ // Should only be called for NV instructions
+ vassert(BITS4(1,1,1,1) == INSN_COND);
+
+ /* ------------------------ pld ------------------------ */
+ if (BITS8(0,1,0,1, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
+ && BITS4(1,1,1,1) == INSN(15,12)) {
+ UInt rN = INSN(19,16);
+ UInt imm12 = INSN(11,0);
+ UInt bU = INSN(23,23);
+ DIP("pld [r%u, #%c%u]\n", rN, bU ? '+' : '-', imm12);
+ return True;
+ }
+
+ if (BITS8(0,1,1,1, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
+ && BITS4(1,1,1,1) == INSN(15,12)
+ && 0 == INSN(4,4)) {
+ UInt rN = INSN(19,16);
+ UInt rM = INSN(3,0);
+ UInt imm5 = INSN(11,7);
+ UInt sh2 = INSN(6,5);
+ UInt bU = INSN(23,23);
+ if (rM != 15) {
+ IRExpr* eaE = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
+ sh2, imm5, dis_buf);
+ IRTemp eaT = newTemp(Ity_I32);
+ /* Bind eaE to a temp merely for debugging-vex purposes, so we
+ can check it's a plausible decoding. It will get removed
+ by iropt a little later on. */
+ vassert(eaE);
+ assign(eaT, eaE);
+ DIP("pld %s\n", dis_buf);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ------------------------ pli ------------------------ */
+ if (BITS8(0,1,0,0, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
+ && BITS4(1,1,1,1) == INSN(15,12)) {
+ UInt rN = INSN(19,16);
+ UInt imm12 = INSN(11,0);
+ UInt bU = INSN(23,23);
+ DIP("pli [r%u, #%c%u]\n", rN, bU ? '+' : '-', imm12);
+ return True;
+ }
+
+ /* --------------------- Interworking branches --------------------- */
+
+ // BLX (1), viz, unconditional branch and link to R15+simm24
+ // and set CPSR.T = 1, that is, switch to Thumb mode
+ if (INSN(31,25) == BITS7(1,1,1,1,1,0,1)) {
+ UInt bitH = INSN(24,24);
+ Int uimm24 = INSN(23,0);
+ Int simm24 = (((uimm24 << 8) >> 8) << 2) + (bitH << 1);
+ /* Now this is a bit tricky. Since we're decoding an ARM insn,
+ it is implies that CPSR.T == 0. Hence the current insn's
+ address is guaranteed to be of the form X--(30)--X00. So, no
+ need to mask any bits off it. But need to set the lowest bit
+ to 1 to denote we're in Thumb mode after this, since
+ guest_R15T has CPSR.T as the lowest bit. And we can't chase
+ into the call, so end the block at this point. */
+ UInt dst = guest_R15_curr_instr_notENC + 8 + (simm24 | 1);
+ putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
+ IRTemp_INVALID/*because AL*/, Ijk_Boring );
+ irsb->next = mkU32(dst);
+ irsb->jumpkind = Ijk_Call;
+ dres->whatNext = Dis_StopHere;
+ DIP("blx 0x%x (and switch to Thumb mode)\n", dst - 1);
+ return True;
+ }
+
+ /* ------------------- v7 barrier insns ------------------- */
+ switch (insn) {
+ case 0xF57FF06F: /* ISB */
+ stmt( IRStmt_MBE(Imbe_Fence) );
+ DIP("ISB\n");
+ return True;
+ case 0xF57FF04F: /* DSB */
+ stmt( IRStmt_MBE(Imbe_Fence) );
+ DIP("DSB\n");
+ return True;
+ case 0xF57FF05F: /* DMB */
+ stmt( IRStmt_MBE(Imbe_Fence) );
+ DIP("DMB\n");
+ return True;
+ default:
+ break;
+ }
+
+ /* ------------------- NEON ------------------- */
+ if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
+ Bool ok_neon = decode_NEON_instruction(
+ dres, insn, IRTemp_INVALID/*unconditional*/,
+ False/*!isT*/
+ );
+ if (ok_neon)
+ return True;
+ }
+
+ // unrecognised
+ return False;
+
+# undef INSN_COND
+# undef INSN
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Disassemble a single ARM instruction ---*/
+/*------------------------------------------------------------*/
+
+/* Disassemble a single ARM instruction into IR. The instruction is
+ located in host memory at guest_instr, and has (decoded) guest IP
+ of guest_R15_curr_instr_notENC, which will have been set before the
+ call here. */
+
+static
+DisResult disInstr_ARM_WRK (
+ Bool put_IP,
+ Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
+ Bool resteerCisOk,
+ void* callback_opaque,
+ UChar* guest_instr,
+ VexArchInfo* archinfo,
+ VexAbiInfo* abiinfo
+ )
+{
+ // A macro to fish bits out of 'insn'.
+# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
+# define INSN_COND SLICE_UInt(insn, 31, 28)
+
+ DisResult dres;
+ UInt insn;
+ //Bool allow_VFP = False;
+ //UInt hwcaps = archinfo->hwcaps;
+ IRTemp condT; /* :: Ity_I32 */
+ UInt summary;
+ HChar dis_buf[128]; // big enough to hold LDMIA etc text
+
+ /* What insn variants are we supporting today? */
+ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
+ // etc etc
+
+ /* Set result defaults. */
+ dres.whatNext = Dis_Continue;
+ dres.len = 4;
+ dres.continueAt = 0;
+
+ /* Set default actions for post-insn handling of writes to r15, if
+ required. */
+ r15written = False;
+ r15guard = IRTemp_INVALID; /* unconditional */
+ r15kind = Ijk_Boring;
+
+ /* At least this is simple on ARM: insns are all 4 bytes long, and
+ 4-aligned. So just fish the whole thing out of memory right now
+ and have done. */
+ insn = getUIntLittleEndianly( guest_instr );
+
+ if (0) vex_printf("insn: 0x%x\n", insn);
+
+ DIP("\t(arm) 0x%x: ", (UInt)guest_R15_curr_instr_notENC);
+
+ /* We may be asked to update the guest R15 before going further. */
+ vassert(0 == (guest_R15_curr_instr_notENC & 3));
+ if (put_IP) {
+ llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) );
+ }
+
+ /* ----------------------------------------------------------- */
+
+ /* Spot "Special" instructions (see comment at top of file). */
+ {
+ UChar* code = (UChar*)guest_instr;
+ /* Spot the 16-byte preamble:
+
+ e1a0c1ec mov r12, r12, ROR #3
+ e1a0c6ec mov r12, r12, ROR #13
+ e1a0ceec mov r12, r12, ROR #29
+ e1a0c9ec mov r12, r12, ROR #19
+ */
+ UInt word1 = 0xE1A0C1EC;
+ UInt word2 = 0xE1A0C6EC;
+ UInt word3 = 0xE1A0CEEC;
+ UInt word4 = 0xE1A0C9EC;
+ if (getUIntLittleEndianly(code+ 0) == word1 &&
+ getUIntLittleEndianly(code+ 4) == word2 &&
+ getUIntLittleEndianly(code+ 8) == word3 &&
+ getUIntLittleEndianly(code+12) == word4) {
+ /* Got a "Special" instruction preamble. Which one is it? */
+ if (getUIntLittleEndianly(code+16) == 0xE18AA00A
+ /* orr r10,r10,r10 */) {
+ /* R3 = client_request ( R4 ) */
+ DIP("r3 = client_request ( %%r4 )\n");
+ irsb->next = mkU32( guest_R15_curr_instr_notENC + 20 );
+ irsb->jumpkind = Ijk_ClientReq;
+ dres.whatNext = Dis_StopHere;
+ goto decode_success;
+ }
+ else
+ if (getUIntLittleEndianly(code+16) == 0xE18BB00B
+ /* orr r11,r11,r11 */) {
+ /* R3 = guest_NRADDR */
+ DIP("r3 = guest_NRADDR\n");
+ dres.len = 20;
+ llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
+ goto decode_success;
+ }
+ else
+ if (getUIntLittleEndianly(code+16) == 0xE18CC00C
+ /* orr r12,r12,r12 */) {
+ /* branch-and-link-to-noredir R4 */
+ DIP("branch-and-link-to-noredir r4\n");
+ llPutIReg(14, mkU32( guest_R15_curr_instr_notENC + 20) );
+ irsb->next = llGetIReg(4);
+ irsb->jumpkind = Ijk_NoRedir;
+ dres.whatNext = Dis_StopHere;
+ goto decode_success;
+ }
+ /* We don't know what it is. Set opc1/opc2 so decode_failure
+ can print the insn following the Special-insn preamble. */
+ insn = getUIntLittleEndianly(code+16);
+ goto decode_failure;
+ /*NOTREACHED*/
+ }
+
+ }
+
+ /* ----------------------------------------------------------- */
+
+ /* Main ARM instruction decoder starts here. */
+
+ /* Deal with the condition. Strategy is to merely generate a
+ condition temporary at this point (or IRTemp_INVALID, meaning
+ unconditional). We leave it to lower-level instruction decoders
+ to decide whether they can generate straight-line code, or
+ whether they must generate a side exit before the instruction.
+ condT :: Ity_I32 and is always either zero or one. */
+ condT = IRTemp_INVALID;
+ switch ( (ARMCondcode)INSN_COND ) {
+ case ARMCondNV: {
+ // Illegal instruction prior to v5 (see ARM ARM A3-5), but
+ // some cases are acceptable
+ Bool ok = decode_NV_instruction(&dres, archinfo, insn);
+ if (ok)
+ goto decode_success;
+ else
+ goto decode_failure;
+ }
+ case ARMCondAL: // Always executed
+ break;
+ case ARMCondEQ: case ARMCondNE: case ARMCondHS: case ARMCondLO:
+ case ARMCondMI: case ARMCondPL: case ARMCondVS: case ARMCondVC:
+ case ARMCondHI: case ARMCondLS: case ARMCondGE: case ARMCondLT:
+ case ARMCondGT: case ARMCondLE:
+ condT = newTemp(Ity_I32);
+ assign( condT, mk_armg_calculate_condition( INSN_COND ));
+ break;
+ }
+
+ /* ----------------------------------------------------------- */
+ /* -- ARMv5 integer instructions -- */
+ /* ----------------------------------------------------------- */
+
+ /* ---------------- Data processing ops ------------------- */
+
+ if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0))
+ && !(INSN(25,25) == 0 && INSN(7,7) == 1 && INSN(4,4) == 1)) {
+ IRTemp shop = IRTemp_INVALID; /* shifter operand */
+ IRTemp shco = IRTemp_INVALID; /* shifter carry out */
+ UInt rD = (insn >> 12) & 0xF; /* 15:12 */
+ UInt rN = (insn >> 16) & 0xF; /* 19:16 */
+ UInt bitS = (insn >> 20) & 1; /* 20:20 */
+ IRTemp rNt = IRTemp_INVALID;
+ IRTemp res = IRTemp_INVALID;
+ IRTemp oldV = IRTemp_INVALID;
+ IRTemp oldC = IRTemp_INVALID;
+ HChar* name = NULL;
+ IROp op = Iop_INVALID;
+ Bool ok;
+
+ switch (INSN(24,21)) {
+
+ /* --------- ADD, SUB, AND, OR --------- */
+ case BITS4(0,1,0,0): /* ADD: Rd = Rn + shifter_operand */
+ name = "add"; op = Iop_Add32; goto rd_eq_rn_op_SO;
+ case BITS4(0,0,1,0): /* SUB: Rd = Rn - shifter_operand */
+ name = "sub"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
+ case BITS4(0,0,1,1): /* RSB: Rd = shifter_operand - Rn */
+ name = "rsb"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
+ case BITS4(0,0,0,0): /* AND: Rd = Rn & shifter_operand */
+ name = "and"; op = Iop_And32; goto rd_eq_rn_op_SO;
+ case BITS4(1,1,0,0): /* OR: Rd = Rn | shifter_operand */
+ name = "orr"; op = Iop_Or32; goto rd_eq_rn_op_SO;
+ case BITS4(0,0,0,1): /* EOR: Rd = Rn ^ shifter_operand */
+ name = "eor"; op = Iop_Xor32; goto rd_eq_rn_op_SO;
+ case BITS4(1,1,1,0): /* BIC: Rd = Rn & ~shifter_operand */
+ name = "bic"; op = Iop_And32; goto rd_eq_rn_op_SO;
+ rd_eq_rn_op_SO: {
+ Bool isRSB = False;
+ Bool isBIC = False;
+ switch (INSN(24,21)) {
+ case BITS4(0,0,1,1):
+ vassert(op == Iop_Sub32); isRSB = True; break;
+ case BITS4(1,1,1,0):
+ vassert(op == Iop_And32); isBIC = True; break;
+ default:
+ break;
+ }
+ rNt = newTemp(Ity_I32);
+ assign(rNt, getIRegA(rN));
+ ok = mk_shifter_operand(
+ INSN(25,25), INSN(11,0),
+ &shop, bitS ? &shco : NULL, dis_buf
+ );
+ if (!ok)
+ break;
+ res = newTemp(Ity_I32);
+ // compute the main result
+ if (isRSB) {
+ // reverse-subtract: shifter_operand - Rn
+ vassert(op == Iop_Sub32);
+ assign(res, binop(op, mkexpr(shop), mkexpr(rNt)) );
+ } else if (isBIC) {
+ // andn: shifter_operand & ~Rn
+ vassert(op == Iop_And32);
+ assign(res, binop(op, mkexpr(rNt),
+ unop(Iop_Not32, mkexpr(shop))) );
+ } else {
+ // normal: Rn op shifter_operand
+ assign(res, binop(op, mkexpr(rNt), mkexpr(shop)) );
+ }
+ // but don't commit it until after we've finished
+ // all necessary reads from the guest state
+ if (bitS
+ && (op == Iop_And32 || op == Iop_Or32 || op == Iop_Xor32)) {
+ oldV = newTemp(Ity_I32);
+ assign( oldV, mk_armg_calculate_flag_v() );
+ }
+ // can't safely read guest state after here
+ // now safe to put the main result
+ putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
+ // XXXX!! not safe to read any guest state after
+ // this point (I think the code below doesn't do that).
+ if (!bitS)
+ vassert(shco == IRTemp_INVALID);
+ /* Update the flags thunk if necessary */
+ if (bitS) {
+ vassert(shco != IRTemp_INVALID);
+ switch (op) {
+ case Iop_Add32:
+ setFlags_D1_D2( ARMG_CC_OP_ADD, rNt, shop, condT );
+ break;
+ case Iop_Sub32:
+ if (isRSB) {
+ setFlags_D1_D2( ARMG_CC_OP_SUB, shop, rNt, condT );
+ } else {
+ setFlags_D1_D2( ARMG_CC_OP_SUB, rNt, shop, condT );
+ }
+ break;
+ case Iop_And32: /* BIC and AND set the flags the same */
+ case Iop_Or32:
+ case Iop_Xor32:
+ // oldV has been read just above
+ setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
+ res, shco, oldV, condT );
+ break;
+ default:
+ vassert(0);
+ }
+ }
+ DIP("%s%s%s r%u, r%u, %s\n",
+ name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
+ goto decode_success;
+ }
+
+ /* --------- MOV, MVN --------- */
+ case BITS4(1,1,0,1): /* MOV: Rd = shifter_operand */
+ case BITS4(1,1,1,1): { /* MVN: Rd = not(shifter_operand) */
+ Bool isMVN = INSN(24,21) == BITS4(1,1,1,1);
+ if (rN != 0)
+ break; /* rN must be zero */
+ ok = mk_shifter_operand(
+ INSN(25,25), INSN(11,0),
+ &shop, bitS ? &shco : NULL, dis_buf
+ );
+ if (!ok)
+ break;
+ res = newTemp(Ity_I32);
+ assign( res, isMVN ? unop(Iop_Not32, mkexpr(shop))
+ : mkexpr(shop) );
+ if (bitS) {
+ vassert(shco != IRTemp_INVALID);
+ oldV = newTemp(Ity_I32);
+ assign( oldV, mk_armg_calculate_flag_v() );
+ } else {
+ vassert(shco == IRTemp_INVALID);
+ }
+ // can't safely read guest state after here
+ putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
+ /* Update the flags thunk if necessary */
+ if (bitS) {
+ setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
+ res, shco, oldV, condT );
+ }
+ DIP("%s%s%s r%u, %s\n",
+ isMVN ? "mvn" : "mov",
+ nCC(INSN_COND), bitS ? "s" : "", rD, dis_buf );
+ goto decode_success;
+ }
+
+ /* --------- CMP --------- */
+ case BITS4(1,0,1,0): /* CMP: (void) Rn - shifter_operand */
+ case BITS4(1,0,1,1): { /* CMN: (void) Rn + shifter_operand */
+ Bool isCMN = INSN(24,21) == BITS4(1,0,1,1);
+ if (rD != 0)
+ break; /* rD must be zero */
+ if (bitS == 0)
+ break; /* if S (bit 20) is not set, it's not CMP/CMN */
+ rNt = newTemp(Ity_I32);
+ assign(rNt, getIRegA(rN));
+ ok = mk_shifter_operand(
+ INSN(25,25), INSN(11,0),
+ &shop, NULL, dis_buf
+ );
+ if (!ok)
+ break;
+ // can't safely read guest state after here
+ /* Update the flags thunk. */
+ setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
+ rNt, shop, condT );
+ DIP("%s%s r%u, %s\n",
+ isCMN ? "cmn" : "cmp",
+ nCC(INSN_COND), rN, dis_buf );
+ goto decode_success;
+ }
+
+ /* --------- TST --------- */
+ case BITS4(1,0,0,0): /* TST: (void) Rn & shifter_operand */
+ case BITS4(1,0,0,1): { /* TEQ: (void) Rn ^ shifter_operand */
+ Bool isTEQ = INSN(24,21) == BITS4(1,0,0,1);
+ if (rD != 0)
+ break; /* rD must be zero */
+ if (bitS == 0)
+ break; /* if S (bit 20) is not set, it's not TST/TEQ */
+ rNt = newTemp(Ity_I32);
+ assign(rNt, getIRegA(rN));
+ ok = mk_shifter_operand(
+ INSN(25,25), INSN(11,0),
+ &shop, &shco, dis_buf
+ );
+ if (!ok)
+ break;
+ /* Update the flags thunk. */
+ res = newTemp(Ity_I32);
+ assign( res, binop(isTEQ ? Iop_Xor32 : Iop_And32,
+ mkexpr(rNt), mkexpr(shop)) );
+ oldV = newTemp(Ity_I32);
+ assign( oldV, mk_armg_calculate_flag_v() );
+ // can't safely read guest state after here
+ setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
+ res, shco, oldV, condT );
+ DIP("%s%s r%u, %s\n",
+ isTEQ ? "teq" : "tst",
+ nCC(INSN_COND), rN, dis_buf );
+ goto decode_success;
+ }
+
+ /* --------- ADC, SBC, RSC --------- */
+ case BITS4(0,1,0,1): /* ADC: Rd = Rn + shifter_operand + oldC */
+ name = "adc"; goto rd_eq_rn_op_SO_op_oldC;
+ case BITS4(0,1,1,0): /* SBC: Rd = Rn - shifter_operand - (oldC ^ 1) */
+ name = "sbc"; goto rd_eq_rn_op_SO_op_oldC;
+ case BITS4(0,1,1,1): /* RSC: Rd = shifter_operand - Rn - (oldC ^ 1) */
+ name = "rsc"; goto rd_eq_rn_op_SO_op_oldC;
+ rd_eq_rn_op_SO_op_oldC: {
+ // FIXME: shco isn't used for anything. Get rid of it.
+ rNt = newTemp(Ity_I32);
+ assign(rNt, getIRegA(rN));
+ ok = mk_shifter_operand(
+ INSN(25,25), INSN(11,0),
+ &shop, bitS ? &shco : NULL, dis_buf
+ );
+ if (!ok)
+ break;
+ oldC = newTemp(Ity_I32);
+ assign( oldC, mk_armg_calculate_flag_c() );
+ res = newTemp(Ity_I32);
+ // compute the main result
+ switch (INSN(24,21)) {
+ case BITS4(0,1,0,1): /* ADC */
+ assign(res,
+ binop(Iop_Add32,
+ binop(Iop_Add32, mkexpr(rNt), mkexpr(shop)),
+ mkexpr(oldC) ));
+ break;
+ case BITS4(0,1,1,0): /* SBC */
+ assign(res,
+ binop(Iop_Sub32,
+ binop(Iop_Sub32, mkexpr(rNt), mkexpr(shop)),
+ binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
+ break;
+ case BITS4(0,1,1,1): /* RSC */
+ assign(res,
+ binop(Iop_Sub32,
+ binop(Iop_Sub32, mkexpr(shop), mkexpr(rNt)),
+ binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
+ break;
+ default:
+ vassert(0);
+ }
+ // but don't commit it until after we've finished
+ // all necessary reads from the guest state
+ // now safe to put the main result
+ putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
+ // XXXX!! not safe to read any guest state after
+ // this point (I think the code below doesn't do that).
+ if (!bitS)
+ vassert(shco == IRTemp_INVALID);
+ /* Update the flags thunk if necessary */
+ if (bitS) {
+ vassert(shco != IRTemp_INVALID);
+ switch (INSN(24,21)) {
+ case BITS4(0,1,0,1): /* ADC */
+ setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
+ rNt, shop, oldC, condT );
+ break;
+ case BITS4(0,1,1,0): /* SBC */
+ setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
+ rNt, shop, oldC, condT );
+ break;
+ case BITS4(0,1,1,1): /* RSC */
+ setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
+ shop, rNt, oldC, condT );
+ break;
+ default:
+ vassert(0);
+ }
+ }
+ DIP("%s%s%s r%u, r%u, %s\n",
+ name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
+ goto decode_success;
+ }
+
+ /* --------- ??? --------- */
+ default:
+ break;
+ }
+ } /* if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0)) */
+
+ /* --------------------- Load/store (ubyte & word) -------- */
+ // LDR STR LDRB STRB
+ /* 31 27 23 19 15 11 6 4 3 # highest bit
+ 28 24 20 16 12
+ A5-20 1 | 16 cond 0101 UB0L Rn Rd imm12
+ A5-22 1 | 32 cond 0111 UBOL Rn Rd imm5 sh2 0 Rm
+ A5-24 2 | 16 cond 0101 UB1L Rn Rd imm12
+ A5-26 2 | 32 cond 0111 UB1L Rn Rd imm5 sh2 0 Rm
+ A5-28 3 | 16 cond 0100 UB0L Rn Rd imm12
+ A5-32 3 | 32 cond 0110 UB0L Rn Rd imm5 sh2 0 Rm
+ */
+ /* case coding:
+ 1 at-ea (access at ea)
+ 2 at-ea-then-upd (access at ea, then Rn = ea)
+ 3 at-Rn-then-upd (access at Rn, then Rn = ea)
+ ea coding
+ 16 Rn +/- imm12
+ 32 Rn +/- Rm sh2 imm5
+ */
+ /* Quickly skip over all of this for hopefully most instructions */
+ if ((INSN(27,24) & BITS4(1,1,0,0)) != BITS4(0,1,0,0))
+ goto after_load_store_ubyte_or_word;
+
+ summary = 0;
+
+ /**/ if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 0) {
+ summary = 1 | 16;
+ }
+ else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 0
+ && INSN(4,4) == 0) {
+ summary = 1 | 32;
+ }
+ else if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 1) {
+ summary = 2 | 16;
+ }
+ else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 1
+ && INSN(4,4) == 0) {
+ summary = 2 | 32;
+ }
+ else if (INSN(27,24) == BITS4(0,1,0,0) && INSN(21,21) == 0) {
+ summary = 3 | 16;
+ }
+ else if (INSN(27,24) == BITS4(0,1,1,0) && INSN(21,21) == 0
+ && INSN(4,4) == 0) {
+ summary = 3 | 32;
+ }
+ else goto after_load_store_ubyte_or_word;
+
+ { UInt rN = (insn >> 16) & 0xF; /* 19:16 */
+ UInt rD = (insn >> 12) & 0xF; /* 15:12 */
+ UInt rM = (insn >> 0) & 0xF; /* 3:0 */
+ UInt bU = (insn >> 23) & 1; /* 23 */
+ UInt bB = (insn >> 22) & 1; /* 22 */
+ UInt bL = (insn >> 20) & 1; /* 20 */
+ UInt imm12 = (insn >> 0) & 0xFFF; /* 11:0 */
+ UInt imm5 = (insn >> 7) & 0x1F; /* 11:7 */
+ UInt sh2 = (insn >> 5) & 3; /* 6:5 */
+
+ /* Skip some invalid cases, which would lead to two competing
+ updates to the same register, or which are otherwise
+ disallowed by the spec. */
+ switch (summary) {
+ case 1 | 16:
+ break;
+ case 1 | 32:
+ if (rM == 15) goto after_load_store_ubyte_or_word;
+ break;
+ case 2 | 16: case 3 | 16:
+ if (rN == 15) goto after_load_store_ubyte_or_word;
+ if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
+ break;
+ case 2 | 32: case 3 | 32:
+ if (rM == 15) goto after_load_store_ubyte_or_word;
+ if (rN == 15) goto after_load_store_ubyte_or_word;
+ if (rN == rM) goto after_load_store_ubyte_or_word;
+ if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
+ break;
+ default:
+ vassert(0);
+ }
+
+ /* Now, we can't do a conditional load or store, since that very
+ likely will generate an exception. So we have to take a side
+ exit at this point if the condition is false. */
+ if (condT != IRTemp_INVALID) {
+ mk_skip_over_A32_if_cond_is_false( condT );
+ condT = IRTemp_INVALID;
+ }
+ /* Ok, now we're unconditional. Do the load or store. */
+
+ /* compute the effective address. Bind it to a tmp since we
+ may need to use it twice. */
+ IRExpr* eaE = NULL;
+ switch (summary & 0xF0) {
+ case 16:
+ eaE = mk_EA_reg_plusminus_imm12( rN, bU, imm12, dis_buf );
+ break;
+ case 32:
+ eaE = mk_EA_reg_plusminus_shifted_reg( rN, bU, rM, sh2, imm5,
+ dis_buf );
+ break;
+ }
+ vassert(eaE);
+ IRTemp eaT = newTemp(Ity_I32);
+ assign(eaT, eaE);
+
+ /* get the old Rn value */
+ IRTemp rnT = newTemp(Ity_I32);
+ assign(rnT, getIRegA(rN));
+
+ /* decide on the transfer address */
+ IRTemp taT = IRTemp_INVALID;
+ switch (summary & 0x0F) {
+ case 1: case 2: taT = eaT; break;
+ case 3: taT = rnT; break;
+ }
+ vassert(taT != IRTemp_INVALID);
+
+ if (bL == 0) {
+ /* Store. If necessary, update the base register before the
+ store itself, so that the common idiom of "str rX, [sp,
+ #-4]!" (store rX at sp-4, then do new sp = sp-4, a.k.a "push
+ rX") doesn't cause Memcheck to complain that the access is
+ below the stack pointer. Also, not updating sp before the
+ store confuses Valgrind's dynamic stack-extending logic. So
+ do it before the store. Hence we need to snarf the store
+ data before doing the basereg update. */
+
+ /* get hold of the data to be stored */
+ IRTemp rDt = newTemp(Ity_I32);
+ assign(rDt, getIRegA(rD));
+
+ /* Update Rn if necessary. */
+ switch (summary & 0x0F) {
+ case 2: case 3:
+ putIRegA( rN, mkexpr(eaT), IRTemp_INVALID, Ijk_Boring );
+ break;
+ }
+
+ /* generate the transfer */
+ if (bB == 0) { // word store
+ storeLE( mkexpr(taT), mkexpr(rDt) );
+ } else { // byte store
+ vassert(bB == 1);
+ storeLE( mkexpr(taT), unop(Iop_32to8, mkexpr(rDt)) );
+ }
+
+ } else {
+ /* Load */
+ vassert(bL == 1);
+
+ /* generate the transfer */
+ if (bB == 0) { // word load
+ putIRegA( rD, loadLE(Ity_I32, mkexpr(taT)),
+ IRTemp_INVALID, Ijk_Boring );
+ } else { // byte load
+ vassert(bB == 1);
+ putIRegA( rD, unop(Iop_8Uto32, loadLE(Ity_I8, mkexpr(taT))),
+ IRTemp_INVALID, Ijk_Boring );
+ }
+
+ /* Update Rn if necessary. */
+ switch (summary & 0x0F) {
+ case 2: case 3:
+ // should be assured by logic above:
+ if (bL == 1)
+ vassert(rD != rN); /* since we just wrote rD */
+ putIRegA( rN, mkexpr(eaT), IRTemp_INVALID, Ijk_Boring );
+ break;
+ }
+ }
+
+ switch (summary & 0x0F) {
+ case 1: DIP("%sr%s%s r%u, %s\n",
+ bL == 0 ? "st" : "ld",
+ bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
+ break;
+ case 2: DIP("%sr%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
+ bL == 0 ? "st" : "ld",
+ bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
+ break;
+ case 3: DIP("%sr%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
+ bL == 0 ? "st" : "ld",
+ bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
+ break;
+ default: vassert(0);
+ }
+
+ /* XXX deal with alignment constraints */
+
+ goto decode_success;
+
+ /* Complications:
+
+ For all loads: if the Amode specifies base register
+ writeback, and the same register is specified for Rd and Rn,
+ the results are UNPREDICTABLE.
+
+ For all loads and stores: if R15 is written, branch to
+ that address afterwards.
+
+ STRB: straightforward
+ LDRB: loaded data is zero extended
+ STR: lowest 2 bits of address are ignored
+ LDR: if the lowest 2 bits of the address are nonzero
+ then the loaded value is rotated right by 8 * the lowest 2 bits
+ */
+ }
+
+ after_load_store_ubyte_or_word:
+
+ /* --------------------- Load/store (sbyte & hword) -------- */
+ // LDRH LDRSH STRH LDRSB
+ /* 31 27 23 19 15 11 7 3 # highest bit
+ 28 24 20 16 12 8 4 0
+ A5-36 1 | 16 cond 0001 U10L Rn Rd im4h 1SH1 im4l
+ A5-38 1 | 32 cond 0001 U00L Rn Rd 0000 1SH1 Rm
+ A5-40 2 | 16 cond 0001 U11L Rn Rd im4h 1SH1 im4l
+ A5-42 2 | 32 cond 0001 U01L Rn Rd 0000 1SH1 Rm
+ A5-44 3 | 16 cond 0000 U10L Rn Rd im4h 1SH1 im4l
+ A5-46 3 | 32 cond 0000 U00L Rn Rd 0000 1SH1 Rm
+ */
+ /* case coding:
+ 1 at-ea (access at ea)
+ 2 at-ea-then-upd (access at ea, then Rn = ea)
+ 3 at-Rn-then-upd (access at Rn, then Rn = ea)
+ ea coding
+ 16 Rn +/- imm8
+ 32 Rn +/- Rm
+ */
+ /* Quickly skip over all of this for hopefully most instructions */
+ if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
+ goto after_load_store_sbyte_or_hword;
+
+ /* Check the "1SH1" thing. */
+ if ((INSN(7,4) & BITS4(1,0,0,1)) != BITS4(1,0,0,1))
+ goto after_load_store_sbyte_or_hword;
+
+ summary = 0;
+
+ /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,0)) {
+ summary = 1 | 16;
+ }
+ else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,0)) {
+ summary = 1 | 32;
+ }
+ else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,1)) {
+ summary = 2 | 16;
+ }
+ else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,1)) {
+ summary = 2 | 32;
+ }
+ else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(1,0)) {
+ summary = 3 | 16;
+ }
+ else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(0,0)) {
+ summary = 3 | 32;
+ }
+ else goto after_load_store_sbyte_or_hword;
+
+ { UInt rN = (insn >> 16) & 0xF; /* 19:16 */
+ UInt rD = (insn >> 12) & 0xF; /* 15:12 */
+ UInt rM = (insn >> 0) & 0xF; /* 3:0 */
+ UInt bU = (insn >> 23) & 1; /* 23 U=1 offset+, U=0 offset- */
+ UInt bL = (insn >> 20) & 1; /* 20 L=1 load, L=0 store */
+ UInt bH = (insn >> 5) & 1; /* H=1 halfword, H=0 byte */
+ UInt bS = (insn >> 6) & 1; /* S=1 signed, S=0 unsigned */
+ UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
+
+ /* Skip combinations that are either meaningless or already
+ handled by main word-or-unsigned-byte load-store
+ instructions. */
+ if (bS == 0 && bH == 0) /* "unsigned byte" */
+ goto after_load_store_sbyte_or_hword;
+ if (bS == 1 && bL == 0) /* "signed store" */
+ goto after_load_store_sbyte_or_hword;
+
+ /* Require 11:8 == 0 for Rn +/- Rm cases */
+ if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
+ goto after_load_store_sbyte_or_hword;
+
+ /* Skip some invalid cases, which would lead to two competing
+ updates to the same register, or which are otherwise
+ disallowed by the spec. */
+ switch (summary) {
+ case 1 | 16:
+ break;
+ case 1 | 32:
+ if (rM == 15) goto after_load_store_sbyte_or_hword;
+ break;
+ case 2 | 16: case 3 | 16:
+ if (rN == 15) goto after_load_store_sbyte_or_hword;
+ if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
+ break;
+ case 2 | 32: case 3 | 32:
+ if (rM == 15) goto after_load_store_sbyte_or_hword;
+ if (rN == 15) goto after_load_store_sbyte_or_hword;
+ if (rN == rM) goto after_load_store_sbyte_or_hword;
+ if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
+ break;
+ default:
+ vassert(0);
+ }
+
+ /* Now, we can't do a conditional load or store, since that very
+ likely will generate an exception. So we have to take a side
+ exit at this point if the condition is false. */
+ if (condT != IRTemp_INVALID) {
+ mk_skip_over_A32_if_cond_is_false( condT );
+ condT = IRTemp_INVALID;
+ }
+ /* Ok, now we're unconditional. Do the load or store. */
+
+ /* compute the effective address. Bind it to a tmp since we
+ may need to use it twice. */
+ IRExpr* eaE = NULL;
+ switch (summary & 0xF0) {
+ case 16:
+ eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
+ break;
+ case 32:
+ eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
+ break;
+ }
+ vassert(eaE);
+ IRTemp eaT = newTemp(Ity_I32);
+ assign(eaT, eaE);
+
+ /* get the old Rn value */
+ IRTemp rnT = newTemp(Ity_I32);
+ assign(rnT, getIRegA(rN));
+
+ /* decide on the transfer address */
+ IRTemp taT = IRTemp_INVALID;
+ switch (summary & 0x0F) {
+ case 1: case 2: taT = eaT; break;
+ case 3: taT = rnT; break;
+ }
+ vassert(taT != IRTemp_INVALID);
+
+ /* halfword store H 1 L 0 S 0
+ uhalf load H 1 L 1 S 0
+ shalf load H 1 L 1 S 1
+ sbyte load H 0 L 1 S 1
+ */
+ HChar* name = NULL;
+ /* generate the transfer */
+ /**/ if (bH == 1 && bL == 0 && bS == 0) { // halfword store
+ storeLE( mkexpr(taT), unop(Iop_32to16, getIRegA(rD)) );
+ name = "strh";
+ }
+ else if (bH == 1 && bL == 1 && bS == 0) { // uhalf load
+ putIRegA( rD, unop(Iop_16Uto32, loadLE(Ity_I16, mkexpr(taT))),
+ IRTemp_INVALID, Ijk_Boring );
+ name = "ldrh";
+ }
+ else if (bH == 1 && bL == 1 && bS == 1) { // shalf load
+ putIRegA( rD, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(taT))),
+ IRTemp_INVALID, Ijk_Boring );
+ name = "ldrsh";
+ }
+ else if (bH == 0 && bL == 1 && bS == 1) { // sbyte load
+ putIRegA( rD, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(taT))),
+ IRTemp_INVALID, Ijk_Boring );
+ name = "ldrsb";
+ }
+ else
+ vassert(0); // should be assured by logic above
+
+ /* Update Rn if necessary. */
+ switch (summary & 0x0F) {
+ case 2: case 3:
+ // should be assured by logic above:
+ if (bL == 1)
+ vassert(rD != rN); /* since we just wrote rD */
+ putIRegA( rN, mkexpr(eaT), IRTemp_INVALID, Ijk_Boring );
+ break;
+ }
+
+ switch (summary & 0x0F) {
+ case 1: DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
+ break;
+ case 2: DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
+ name, nCC(INSN_COND), rD, dis_buf);
+ break;
+ case 3: DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
+ name, nCC(INSN_COND), rD, dis_buf);
+ break;
+ default: vassert(0);
+ }
+
+ /* XXX deal with alignment constraints */
+
+ goto decode_success;
+
+ /* Complications:
+
+ For all loads: if the Amode specifies base register
+ writeback, and the same register is specified for Rd and Rn,
+ the results are UNPREDICTABLE.
+
+ For all loads and stores: if R15 is written, branch to
+ that address afterwards.
+
+ Misaligned halfword stores => Unpredictable
+ Misaligned halfword loads => Unpredictable
+ */
+ }
+
+ after_load_store_sbyte_or_hword:
+
+ /* --------------------- Load/store multiple -------------- */
+ // LD/STMIA LD/STMIB LD/STMDA LD/STMDB
+ // Remarkably complex and difficult to get right
+ // match 27:20 as 100XX0WL
+ if (BITS8(1,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,1,0,0))) {
+ // A5-50 LD/STMIA cond 1000 10WL Rn RegList
+ // A5-51 LD/STMIB cond 1001 10WL Rn RegList
+ // A5-53 LD/STMDA cond 1000 00WL Rn RegList
+ // A5-53 LD/STMDB cond 1001 00WL Rn RegList
+ // 28 24 20 16 0
+
+ UInt bINC = (insn >> 23) & 1;
+ UInt bBEFORE = (insn >> 24) & 1;
+
+ UInt bL = (insn >> 20) & 1; /* load=1, store=0 */
+ UInt bW = (insn >> 21) & 1; /* Rn wback=1, no wback=0 */
+ UInt rN = (insn >> 16) & 0xF;
+ UInt regList = insn & 0xFFFF;
+ /* Skip some invalid cases, which would lead to two competing
+ updates to the same register, or which are otherwise
+ disallowed by the spec. Note the test above has required
+ that S == 0, since that looks like a kernel-mode only thing.
+ Done by forcing the real pattern, viz 100XXSWL to actually be
+ 100XX0WL. */
+ if (rN == 15) goto after_load_store_multiple;
+ // reglist can't be empty
+ if (regList == 0) goto after_load_store_multiple;
+ // if requested to writeback Rn, and this is a load instruction,
+ // then Rn can't appear in RegList, since we'd have two competing
+ // new values for Rn. We do however accept this case for store
+ // instructions.
+ if (bW == 1 && bL == 1 && ((1 << rN) & regList) > 0)
+ goto after_load_store_multiple;
+
+ /* Now, we can't do a conditional load or store, since that very
+ likely will generate an exception. So we have to take a side
+ exit at this point if the condition is false. */
+ if (condT != IRTemp_INVALID) {
+ mk_skip_over_A32_if_cond_is_false( condT );
+ condT = IRTemp_INVALID;
+ }
+
+ /* Ok, now we're unconditional. Generate the IR. */
+ mk_ldm_stm( True/*arm*/, rN, bINC, bBEFORE, bW, bL, regList );
+
+ DIP("%sm%c%c%s r%u%s, {0x%04x}\n",
+ bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
+ nCC(INSN_COND),
+ rN, bW ? "!" : "", regList);
+
+ goto decode_success;
+ }
+
+ after_load_store_multiple:
+
+ /* --------------------- Control flow --------------------- */
+ // B, BL (Branch, or Branch-and-Link, to immediate offset)
+ //
+ if (BITS8(1,0,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))) {
+ UInt link = (insn >> 24) & 1;
+ UInt uimm24 = insn & ((1<<24)-1);
+ Int simm24 = (Int)uimm24;
+ UInt dst = guest_R15_curr_instr_notENC + 8
+ + (((simm24 << 8) >> 8) << 2);
+ IRJumpKind jk = link ? Ijk_Call : Ijk_Boring;
+ if (link) {
+ putIRegA(14, mkU32(guest_R15_curr_instr_notENC + 4),
+ condT, Ijk_Boring);
+ }
+ if (condT == IRTemp_INVALID) {
+ /* unconditional transfer to 'dst'. See if we can simply
+ continue tracing at the destination. */
+ if (resteerOkFn( callback_opaque, (Addr64)dst )) {
+ /* yes */
+ dres.whatNext = Dis_ResteerU;
+ dres.continueAt = (Addr64)dst;
+ } else {
+ /* no; terminate the SB at this point. */
+ irsb->next = mkU32(dst);
+ irsb->jumpkind = jk;
+ dres.whatNext = Dis_StopHere;
+ }
+ DIP("b%s 0x%x\n", link ? "l" : "", dst);
+ } else {
+ /* conditional transfer to 'dst' */
+ HChar* comment = "";
+
+ /* First see if we can do some speculative chasing into one
+ arm or the other. Be conservative and only chase if
+ !link, that is, this is a normal conditional branch to a
+ known destination. */
+ if (!link
+ && resteerCisOk
+ && vex_control.guest_chase_cond
+ && dst < guest_R15_curr_instr_notENC
+ && resteerOkFn( callback_opaque, (Addr64)(Addr32)dst) ) {
+ /* Speculation: assume this backward branch is taken. So
+ we need to emit a side-exit to the insn following this
+ one, on the negation of the condition, and continue at
+ the branch target address (dst). */
+ stmt( IRStmt_Exit( unop(Iop_Not1,
+ unop(Iop_32to1, mkexpr(condT))),
+ Ijk_Boring,
+ IRConst_U32(guest_R15_curr_instr_notENC+4) ));
+ dres.whatNext = Dis_ResteerC;
+ dres.continueAt = (Addr64)(Addr32)dst;
+ comment = "(assumed taken)";
+ }
+ else
+ if (!link
+ && resteerCisOk
+ && vex_control.guest_chase_cond
+ && dst >= guest_R15_curr_instr_notENC
+ && resteerOkFn( callback_opaque,
+ (Addr64)(Addr32)
+ (guest_R15_curr_instr_notENC+4)) ) {
+ /* Speculation: assume this forward branch is not taken.
+ So we need to emit a side-exit to dst (the dest) and
+ continue disassembling at the insn immediately
+ following this one. */
+ stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
+ Ijk_Boring,
+ IRConst_U32(dst) ));
+ dres.whatNext = Dis_ResteerC;
+ dres.continueAt = (Addr64)(Addr32)
+ (guest_R15_curr_instr_notENC+4);
+ comment = "(assumed not taken)";
+ }
+ else {
+ /* Conservative default translation - end the block at
+ this point. */
+ stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
+ jk, IRConst_U32(dst) ));
+ irsb->next = mkU32(guest_R15_curr_instr_notENC + 4);
+ irsb->jumpkind = jk;
+ dres.whatNext = Dis_StopHere;
+ }
+ DIP("b%s%s 0x%x %s\n", link ? "l" : "", nCC(INSN_COND),
+ dst, comment);
+ }
+ goto decode_success;
+ }
+
+ // B, BL (Branch, or Branch-and-Link, to a register)
+ // NB: interworking branch
+ if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
+ && INSN(19,12) == BITS8(1,1,1,1,1,1,1,1)
+ && (INSN(11,4) == BITS8(1,1,1,1,0,0,1,1)
+ || INSN(11,4) == BITS8(1,1,1,1,0,0,0,1))) {
+ IRExpr* dst;
+ UInt link = (INSN(11,4) >> 1) & 1;
+ UInt rM = INSN(3,0);
+ // we don't decode the case (link && rM == 15), as that's
+ // Unpredictable.
+ if (!(link && rM == 15)) {
+ if (condT != IRTemp_INVALID) {
+ mk_skip_over_A32_if_cond_is_false( condT );
+ }
+ // rM contains an interworking address exactly as we require
+ // (with continuation CPSR.T in bit 0), so we can use it
+ // as-is, with no masking.
+ dst = getIRegA(rM);
+ if (link) {
+ putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
+ IRTemp_INVALID/*because AL*/, Ijk_Boring );
+ }
+ irsb->next = dst;
+ irsb->jumpkind = link ? Ijk_Call
+ : (rM == 14 ? Ijk_Ret : Ijk_Boring);
+ dres.whatNext = Dis_StopHere;
+ if (condT == IRTemp_INVALID) {
+ DIP("b%sx r%u\n", link ? "l" : "", rM);
+ } else {
+ DIP("b%sx%s r%u\n", link ? "l" : "", nCC(INSN_COND), rM);
+ }
+ goto decode_success;
+ }
+ /* else: (link && rM == 15): just fall through */
+ }
+
+ /* --- NB: ARM interworking branches are in NV space, hence
+ are handled elsewhere by decode_NV_instruction.
+ ---
+ */
+
+ /* --------------------- Clz --------------------- */
+ // CLZ
+ if (INSN(27,20) == BITS8(0,0,0,1,0,1,1,0)
+ && INSN(19,16) == BITS4(1,1,1,1)
+ && INSN(11,4) == BITS8(1,1,1,1,0,0,0,1)) {
+ UInt rD = INSN(15,12);
+ UInt rM = INSN(3,0);
+ IRTemp arg = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ assign(arg, getIRegA(rM));
+ assign(res, IRExpr_Mux0X(
+ unop(Iop_1Uto8,binop(Iop_CmpEQ32, mkexpr(arg),
+ mkU32(0))),
+ unop(Iop_Clz32, mkexpr(arg)),
+ mkU32(32)
+ ));
+ putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
+ DIP("clz%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
+ goto decode_success;
+ }
+
+ /* --------------------- Mul etc --------------------- */
+ // MUL
+ if (BITS8(0,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
+ && INSN(15,12) == BITS4(0,0,0,0)
+ && INSN(7,4) == BITS4(1,0,0,1)) {
+ UInt bitS = (insn >> 20) & 1; /* 20:20 */
+ UInt rD = INSN(19,16);
+ UInt rS = INSN(11,8);
+ UInt rM = INSN(3,0);
+ if (rD == 15 || rM == 15 || rS == 15) {
+ /* Unpredictable; don't decode; fall through */
+ } else {
+ IRTemp argL = newTemp(Ity_I32);
+ IRTemp argR = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ IRTemp oldC = IRTemp_INVALID;
+ IRTemp oldV = IRTemp_INVALID;
+ assign( argL, getIRegA(rM));
+ assign( argR, getIRegA(rS));
+ assign( res, binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) );
+ if (bitS) {
+ oldC = newTemp(Ity_I32);
+ assign(oldC, mk_armg_calculate_flag_c());
+ oldV = newTemp(Ity_I32);
+ assign(oldV, mk_armg_calculate_flag_v());
+ }
+ // now update guest state
+ putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
+ if (bitS) {
+ IRTemp pair = newTemp(Ity_I32);
+ assign( pair, binop(Iop_Or32,
+ binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
+ mkexpr(oldV)) );
+ setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
+ }
+ DIP("mul%c%s r%u, r%u, r%u\n",
+ bitS ? 's' : ' ', nCC(INSN_COND), rD, rM, rS);
+ goto decode_success;
+ }
+ /* fall through */
+ }
+
+ // MLA, MLS
+ if (BITS8(0,0,0,0,0,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
+ && INSN(7,4) == BITS4(1,0,0,1)) {
+ UInt bitS = (insn >> 20) & 1; /* 20:20 */
+ UInt isMLS = (insn >> 22) & 1; /* 22:22 */
+ UInt rD = INSN(19,16);
+ UInt rN = INSN(15,12);
+ UInt rS = INSN(11,8);
+ UInt rM = INSN(3,0);
+ if (bitS == 1 && isMLS == 1) {
+ /* This isn't allowed (MLS that sets flags). don't decode;
+ fall through */
+ }
+ else
+ if (rD == 15 || rM == 15 || rS == 15 || rN == 15) {
+ /* Unpredictable; don't decode; fall through */
+ } else {
+ IRTemp argL = newTemp(Ity_I32);
+ IRTemp argR = newTemp(Ity_I32);
+ IRTemp argP = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ IRTemp oldC = IRTemp_INVALID;
+ IRTemp oldV = IRTemp_INVALID;
+ assign( argL, getIRegA(rM));
+ assign( argR, getIRegA(rS));
+ assign( argP, getIRegA(rN));
+ assign( res, binop(isMLS ? Iop_Sub32 : Iop_Add32,
+ mkexpr(argP),
+ binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) ));
+ if (bitS) {
+ vassert(!isMLS); // guaranteed above
+ oldC = newTemp(Ity_I32);
+ assign(oldC, mk_armg_calculate_flag_c());
+ oldV = newTemp(Ity_I32);
+ assign(oldV, mk_armg_calculate_flag_v());
+ }
+ // now update guest state
+ putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
+ if (bitS) {
+ IRTemp pair = newTemp(Ity_I32);
+ assign( pair, binop(Iop_Or32,
+ binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
+ mkexpr(oldV)) );
+ setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
+ }
+ DIP("ml%c%c%s r%u, r%u, r%u, r%u\n",
+ isMLS ? 's' : 'a', bitS ? 's' : ' ',
+ nCC(INSN_COND), rD, rM, rS, rN);
+ goto decode_success;
+ }
+ /* fall through */
+ }
+
+ // SMULL, UMULL
+ if (BITS8(0,0,0,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
+ && INSN(7,4) == BITS4(1,0,0,1)) {
+ UInt bitS = (insn >> 20) & 1; /* 20:20 */
+ UInt rDhi = INSN(19,16);
+ UInt rDlo = INSN(15,12);
+ UInt rS = INSN(11,8);
+ UInt rM = INSN(3,0);
+ UInt isS = (INSN(27,20) >> 2) & 1; /* 22:22 */
+ if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo) {
+ /* Unpredictable; don't decode; fall through */
+ } else {
+ IRTemp argL = newTemp(Ity_I32);
+ IRTemp argR = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I64);
+ IRTemp resHi = newTemp(Ity_I32);
+ IRTemp resLo = newTemp(Ity_I32);
+ IRTemp oldC = IRTemp_INVALID;
+ IRTemp oldV = IRTemp_INVALID;
+ IROp mulOp = isS ? Iop_MullS32 : Iop_MullU32;
+ assign( argL, getIRegA(rM));
+ assign( argR, getIRegA(rS));
+ assign( res, binop(mulOp, mkexpr(argL), mkexpr(argR)) );
+ assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
+ assign( resLo, unop(Iop_64to32, mkexpr(res)) );
+ if (bitS) {
+ oldC = newTemp(Ity_I32);
+ assign(oldC, mk_armg_calculate_flag_c());
+ oldV = newTemp(Ity_I32);
+ assign(oldV, mk_armg_calculate_flag_v());
+ }
+ // now update guest state
+ putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
+ putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
+ if (bitS) {
+ IRTemp pair = newTemp(Ity_I32);
+ assign( pair, binop(Iop_Or32,
+ binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
+ mkexpr(oldV)) );
+ setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
+ }
+ DIP("%cmull%c%s r%u, r%u, r%u, r%u\n",
+ isS ? 's' : 'u', bitS ? 's' : ' ',
+ nCC(INSN_COND), rDlo, rDhi, rM, rS);
+ goto decode_success;
+ }
+ /* fall through */
+ }
+
+ // SMLAL, UMLAL
+ if (BITS8(0,0,0,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
+ && INSN(7,4) == BITS4(1,0,0,1)) {
+ UInt bitS = (insn >> 20) & 1; /* 20:20 */
+ UInt rDhi = INSN(19,16);
+ UInt rDlo = INSN(15,12);
+ UInt rS = INSN(11,8);
+ UInt rM = INSN(3,0);
+ UInt isS = (INSN(27,20) >> 2) & 1; /* 22:22 */
+ if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo) {
+ /* Unpredictable; don't decode; fall through */
+ } else {
+ IRTemp argL = newTemp(Ity_I32);
+ IRTemp argR = newTemp(Ity_I32);
+ IRTemp old = newTemp(Ity_I64);
+ IRTemp res = newTemp(Ity_I64);
+ IRTemp resHi = newTemp(Ity_I32);
+ IRTemp resLo = newTemp(Ity_I32);
+ IRTemp oldC = IRTemp_INVALID;
+ IRTemp oldV = IRTemp_INVALID;
+ IROp mulOp = isS ? Iop_MullS32 : Iop_MullU32;
+ assign( argL, getIRegA(rM));
+ assign( argR, getIRegA(rS));
+ assign( old, binop(Iop_32HLto64, getIRegA(rDhi), getIRegA(rDlo)) );
+ assign( res, binop(Iop_Add64,
+ mkexpr(old),
+ binop(mulOp, mkexpr(argL), mkexpr(argR))) );
+ assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
+ assign( resLo, unop(Iop_64to32, mkexpr(res)) );
+ if (bitS) {
+ oldC = newTemp(Ity_I32);
+ assign(oldC, mk_armg_calculate_flag_c());
+ oldV = newTemp(Ity_I32);
+ assign(oldV, mk_armg_calculate_flag_v());
+ }
+ // now update guest state
+ putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
+ putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
+ if (bitS) {
+ IRTemp pair = newTemp(Ity_I32);
+ assign( pair, binop(Iop_Or32,
+ binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
+ mkexpr(oldV)) );
+ setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
+ }
+ DIP("%cmlal%c%s r%u, r%u, r%u, r%u\n",
+ isS ? 's' : 'u', bitS ? 's' : ' ', nCC(INSN_COND),
+ rDlo, rDhi, rM, rS);
+ goto decode_success;
+ }
+ /* fall through */
+ }
+
+ /* --------------------- Msr etc --------------------- */
+
+ // MSR apsr, #imm
+ if (INSN(27,20) == BITS8(0,0,1,1,0,0,1,0)
+ && INSN(17,12) == BITS6(0,0,1,1,1,1)) {
+ UInt write_ge = INSN(18,18);
+ UInt write_nzcvq = INSN(19,19);
+ if (write_nzcvq || write_ge) {
+ UInt imm = (INSN(11,0) >> 0) & 0xFF;
+ UInt rot = 2 * ((INSN(11,0) >> 8) & 0xF);
+ IRTemp immT = newTemp(Ity_I32);
+ vassert(rot <= 30);
+ imm = ROR32(imm, rot);
+ assign(immT, mkU32(imm));
+ desynthesise_APSR( write_nzcvq, write_ge, immT, condT );
+ DIP("msr%s cpsr%s%sf, #0x%08x\n", nCC(INSN_COND),
+ write_nzcvq ? "f" : "", write_ge ? "g" : "", imm);
+ goto decode_success;
+ }
+ /* fall through */
+ }
+
+ // MSR apsr, reg
+ if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
+ && INSN(17,12) == BITS6(0,0,1,1,1,1)
+ && INSN(11,4) == BITS8(0,0,0,0,0,0,0,0)) {
+ UInt rN = INSN(3,0);
+ UInt write_ge = INSN(18,18);
+ UInt write_nzcvq = INSN(19,19);
+ if (rN != 15 && (write_nzcvq || write_ge)) {
+ IRTemp rNt = newTemp(Ity_I32);
+ assign(rNt, getIRegA(rN));
+ desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
+ DIP("msr%s cpsr_%s%s, r%u\n", nCC(INSN_COND),
+ write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
+ goto decode_success;
+ }
+ /* fall through */
+ }
+
+ // MRS rD, cpsr
+ if ((insn & 0x0FFF0FFF) == 0x010F0000) {
+ UInt rD = INSN(15,12);
+ if (rD != 15) {
+ IRTemp apsr = synthesise_APSR();
+ putIRegA( rD, mkexpr(apsr), condT, Ijk_Boring );
+ DIP("mrs%s r%u, cpsr\n", nCC(INSN_COND), rD);
+ goto decode_success;
+ }
+ /* fall through */
+ }
+
+ /* --------------------- Svc --------------------- */
+ if (BITS8(1,1,1,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))) {
+ UInt imm24 = (insn >> 0) & 0xFFFFFF;
+ if (imm24 == 0) {
+ /* A syscall. We can't do this conditionally, hence: */
+ if (condT != IRTemp_INVALID) {
+ mk_skip_over_A32_if_cond_is_false( condT );
+ }
+ // AL after here
+ irsb->next = mkU32( guest_R15_curr_instr_notENC + 4 );
+ irsb->jumpkind = Ijk_Sys_syscall;
+ dres.whatNext = Dis_StopHere;
+ DIP("svc%s #0x%08x\n", nCC(INSN_COND), imm24);
+ goto decode_success;
+ }
+ /* fall through */
+ }
+
+ /* ------------------------ swp ------------------------ */
+
+ // SWP, SWPB
+ if (BITS8(0,0,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
+ && BITS4(0,0,0,0) == INSN(11,8)
+ && BITS4(1,0,0,1) == INSN(7,4)) {
+ UInt rN = INSN(19,16);
+ UInt rD = INSN(15,12);
+ UInt rM = INSN(3,0);
+ IRTemp tRn = newTemp(Ity_I32);
+ IRTemp tNew = newTemp(Ity_I32);
+ IRTemp tOld = IRTemp_INVALID;
+ IRTemp tSC1 = newTemp(Ity_I1);
+ UInt isB = (insn >> 22) & 1;
+
+ if (rD == 15 || rN == 15 || rM == 15 || rN == rM || rN == rD) {
+ /* undecodable; fall through */
+ } else {
+ /* make unconditional */
+ if (condT != IRTemp_INVALID) {
+ mk_skip_over_A32_if_cond_is_false( condT );
+ condT = IRTemp_INVALID;
+ }
+ /* Ok, now we're unconditional. Generate a LL-SC loop. */
+ assign(tRn, getIRegA(rN));
+ assign(tNew, getIRegA(rM));
+ if (isB) {
+ /* swpb */
+ tOld = newTemp(Ity_I8);
+ stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
+ NULL/*=>isLL*/) );
+ stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
+ unop(Iop_32to8, mkexpr(tNew))) );
+ } else {
+ /* swp */
+ tOld = newTemp(Ity_I32);
+ stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
+ NULL/*=>isLL*/) );
+ stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
+ mkexpr(tNew)) );
+ }
+ stmt( IRStmt_Exit(unop(Iop_Not1, mkexpr(tSC1)),
+ /*Ijk_NoRedir*/Ijk_Boring,
+ IRConst_U32(guest_R15_curr_instr_notENC)) );
+ putIRegA(rD, isB ? unop(Iop_8Uto32, mkexpr(tOld)) : mkexpr(tOld),
+ IRTemp_INVALID, Ijk_Boring);
+ DIP("swp%s%s r%u, r%u, [r%u]\n",
+ isB ? "b" : "", nCC(INSN_COND), rD, rM, rN);
+ goto decode_success;
+ }
+ /* fall through */
+ }
+
+ /* ----------------------------------------------------------- */
+ /* -- ARMv6 instructions -- */
+ /* ----------------------------------------------------------- */
+
+ /* --------------------- ldrex, strex --------------------- */
+
+ // LDREX
+ if (0x01900F9F == (insn & 0x0FF00FFF)) {
+ UInt rT = INSN(15,12);
+ UInt rN = INSN(19,16);
+ if (rT == 15 || rN == 15) {
+ /* undecodable; fall through */
+ } else {
+ IRTemp res;
+ /* make unconditional */
+ if (condT != IRTemp_INVALID) {
+ mk_skip_over_A32_if_cond_is_false( condT );
+ condT = IRTemp_INVALID;
+ }
+ /* Ok, now we're unconditional. Do the load. */
+ res = newTemp(Ity_I32);
+ stmt( IRStmt_LLSC(Iend_LE, res, getIRegA(rN),
+ NULL/*this is a load*/) );
+ putIRegA(rT, mkexpr(res), IRTemp_INVALID, Ijk_Boring);
+ DIP("ldrex%s r%u, [r%u]\n", nCC(INSN_COND), rT, rN);
+ goto decode_success;
+ }
+ /* fall through */
+ }
+
+ // STREX
+ if (0x01800F90 == (insn & 0x0FF00FF0)) {
+ UInt rT = INSN(3,0);
+ UInt rN = INSN(19,16);
+ UInt rD = INSN(15,12);
+ if (rT == 15 || rN == 15 || rD == 15
+ || rD == rT || rD == rN) {
+ /* undecodable; fall through */
+ } else {
+ IRTemp resSC1, resSC32;
+
+ /* make unconditional */
+ if (condT != IRTemp_INVALID) {
+ mk_skip_over_A32_if_cond_is_false( condT );
+ condT = IRTemp_INVALID;
+ }
+
+ /* Ok, now we're unconditional. Do the store. */
+ resSC1 = newTemp(Ity_I1);
+ stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegA(rN), getIRegA(rT)) );
+
+ /* Set rD to 1 on failure, 0 on success. Currently we have
+ resSC1 == 0 on failure, 1 on success. */
+ resSC32 = newTemp(Ity_I32);
+ assign(resSC32,
+ unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
+
+ putIRegA(rD, mkexpr(resSC32),
+ IRTemp_INVALID, Ijk_Boring);
+ DIP("strex%s r%u, r%u, [r%u]\n", nCC(INSN_COND), rD, rT, rN);
+ goto decode_success;
+ }
+ /* fall through */
+ }
+
+ /* --------------------- movw, movt --------------------- */
+ if (0x03000000 == (insn & 0x0FF00000)
+ || 0x03400000 == (insn & 0x0FF00000)) /* pray for CSE */ {
+ UInt rD = INSN(15,12);
+ UInt imm16 = (insn & 0xFFF) | ((insn >> 4) & 0x0000F000);
+ UInt isT = (insn >> 22) & 1;
+ if (rD == 15) {
+ /* forget it */
+ } else {
+ if (isT) {
+ putIRegA(rD,
+ binop(Iop_Or32,
+ binop(Iop_And32, getIRegA(rD), mkU32(0xFFFF)),
+ mkU32(imm16 << 16)),
+ condT, Ijk_Boring);
+ DIP("movt%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
+ goto decode_success;
+ } else {
+ putIRegA(rD, mkU32(imm16), condT, Ijk_Boring);
+ DIP("movw%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
+ goto decode_success;
+ }
+ }
+ /* fall through */
+ }
+
+ /* ----------- uxtb, sxtb, uxth, sxth, uxtb16, sxtb16 ----------- */
+ /* FIXME: this is an exact duplicate of the Thumb version. They
+ should be commoned up. */
+ if (BITS8(0,1,1,0,1, 0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,0))
+ && BITS4(1,1,1,1) == INSN(19,16)
+ && BITS4(0,1,1,1) == INSN(7,4)
+ && BITS4(0,0, 0,0) == (INSN(11,8) & BITS4(0,0,1,1))) {
+ UInt subopc = INSN(27,20) & BITS8(0,0,0,0,0, 1,1,1);
+ if (subopc != BITS4(0,0,0,1) && subopc != BITS4(0,1,0,1)) {
+ Int rot = (INSN(11,8) >> 2) & 3;
+ UInt rM = INSN(3,0);
+ UInt rD = INSN(15,12);
+ IRTemp srcT = newTemp(Ity_I32);
+ IRTemp rotT = newTemp(Ity_I32);
+ IRTemp dstT = newTemp(Ity_I32);
+ HChar* nm = "???";
+ assign(srcT, getIRegA(rM));
+ assign(rotT, genROR32(srcT, 8 * rot)); /* 0, 8, 16 or 24 only */
+ switch (subopc) {
+ case BITS4(0,1,1,0): // UXTB
+ assign(dstT, unop(Iop_8Uto32, unop(Iop_32to8, mkexpr(rotT))));
+ nm = "uxtb";
+ break;
+ case BITS4(0,0,1,0): // SXTB
+ assign(dstT, unop(Iop_8Sto32, unop(Iop_32to8, mkexpr(rotT))));
+ nm = "sxtb";
+ break;
+ case BITS4(0,1,1,1): // UXTH
+ assign(dstT, unop(Iop_16Uto32, unop(Iop_32to16, mkexpr(rotT))));
+ nm = "uxth";
+ break;
+ case BITS4(0,0,1,1): // SXTH
+ assign(dstT, unop(Iop_16Sto32, unop(Iop_32to16, mkexpr(rotT))));
+ nm = "sxth";
+ break;
+ case BITS4(0,1,0,0): // UXTB16
+ assign(dstT, binop(Iop_And32, mkexpr(rotT), mkU32(0x00FF00FF)));
+ nm = "uxtb16";
+ break;
+ case BITS4(0,0,0,0): { // SXTB16
+ IRTemp lo32 = newTemp(Ity_I32);
+ IRTemp hi32 = newTemp(Ity_I32);
+ assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
+ assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
+ assign(
+ dstT,
+ binop(Iop_Or32,
+ binop(Iop_And32,
+ unop(Iop_8Sto32,
+ unop(Iop_32to8, mkexpr(lo32))),
+ mkU32(0xFFFF)),
+ binop(Iop_Shl32,
+ unop(Iop_8Sto32,
+ unop(Iop_32to8, mkexpr(hi32))),
+ mkU8(16))
+ ));
+ nm = "sxtb16";
+ break;
+ }
+ default:
+ vassert(0); // guarded by "if" above
+ }
+ putIRegA(rD, mkexpr(dstT), condT, Ijk_Boring);
+ DIP("%s%s r%u, r%u, ROR #%u\n", nm, nCC(INSN_COND), rD, rM, rot);
+ goto decode_success;
+ }
+ /* fall through */
+ }
+
+ /* ------------------- bfi, bfc ------------------- */
+ if (BITS8(0,1,1,1,1,1,0, 0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
+ && BITS4(0, 0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
+ UInt rD = INSN(15,12);
+ UInt rN = INSN(3,0);
+ UInt msb = (insn >> 16) & 0x1F; /* 20:16 */
+ UInt lsb = (insn >> 7) & 0x1F; /* 11:7 */
+ if (rD == 15 || msb < lsb) {
+ /* undecodable; fall through */
+ } else {
+ IRTemp src = newTemp(Ity_I32);
+ IRTemp olddst = newTemp(Ity_I32);
+ IRTemp newdst = newTemp(Ity_I32);
+ UInt mask = 1 << (msb - lsb);
+ mask = (mask - 1) + mask;
+ vassert(mask != 0); // guaranteed by "msb < lsb" check above
+ mask <<= lsb;
+
+ assign(src, rN == 15 ? mkU32(0) : getIRegA(rN));
+ assign(olddst, getIRegA(rD));
+ assign(newdst,
+ binop(Iop_Or32,
+ binop(Iop_And32,
+ binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
+ mkU32(mask)),
+ binop(Iop_And32,
+ mkexpr(olddst),
+ mkU32(~mask)))
+ );
+
+ putIRegA(rD, mkexpr(newdst), condT, Ijk_Boring);
+
+ if (rN == 15) {
+ DIP("bfc%s r%u, #%u, #%u\n",
+ nCC(INSN_COND), rD, lsb, msb-lsb+1);
+ } else {
+ DIP("bfi%s r%u, r%u, #%u, #%u\n",
+ nCC(INSN_COND), rD, rN, lsb, msb-lsb+1);
+ }
+ goto decode_success;
+ }
+ /* fall through */
+ }
+
+ /* ------------------- {u,s}bfx ------------------- */
+ if (BITS8(0,1,1,1,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
+ && BITS4(0,1,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
+ UInt rD = INSN(15,12);
+ UInt rN = INSN(3,0);
+ UInt wm1 = (insn >> 16) & 0x1F; /* 20:16 */
+ UInt lsb = (insn >> 7) & 0x1F; /* 11:7 */
+ UInt msb = lsb + wm1;
+ UInt isU = (insn >> 22) & 1; /* 22:22 */
+ if (rD == 15 || rN == 15 || msb >= 32) {
+ /* undecodable; fall through */
+ } else {
+ IRTemp src = newTemp(Ity_I32);
+ IRTemp tmp = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ UInt mask = ((1 << wm1) - 1) + (1 << wm1);
+ vassert(msb >= 0 && msb <= 31);
+ vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
+
+ assign(src, getIRegA(rN));
+ assign(tmp, binop(Iop_And32,
+ binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
+ mkU32(mask)));
+ assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
+ binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
+ mkU8(31-wm1)));
+
+ putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
+
+ DIP("%s%s r%u, r%u, #%u, #%u\n",
+ isU ? "ubfx" : "sbfx",
+ nCC(INSN_COND), rD, rN, lsb, wm1 + 1);
+ goto decode_success;
+ }
+ /* fall through */
+ }
+
+ /* --------------------- Load/store doubleword ------------- */
+ // LDRD STRD
+ /* 31 27 23 19 15 11 7 3 # highest bit
+ 28 24 20 16 12 8 4 0
+ A5-36 1 | 16 cond 0001 U100 Rn Rd im4h 11S1 im4l
+ A5-38 1 | 32 cond 0001 U000 Rn Rd 0000 11S1 Rm
+ A5-40 2 | 16 cond 0001 U110 Rn Rd im4h 11S1 im4l
+ A5-42 2 | 32 cond 0001 U010 Rn Rd 0000 11S1 Rm
+ A5-44 3 | 16 cond 0000 U100 Rn Rd im4h 11S1 im4l
+ A5-46 3 | 32 cond 0000 U000 Rn Rd 0000 11S1 Rm
+ */
+ /* case coding:
+ 1 at-ea (access at ea)
+ 2 at-ea-then-upd (access at ea, then Rn = ea)
+ 3 at-Rn-then-upd (access at Rn, then Rn = ea)
+ ea coding
+ 16 Rn +/- imm8
+ 32 Rn +/- Rm
+ */
+ /* Quickly skip over all of this for hopefully most instructions */
+ if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
+ goto after_load_store_doubleword;
+
+ /* Check the "11S1" thing. */
+ if ((INSN(7,4) & BITS4(1,1,0,1)) != BITS4(1,1,0,1))
+ goto after_load_store_doubleword;
+
+ summary = 0;
+
+ /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,0,0)) {
+ summary = 1 | 16;
+ }
+ else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,0,0)) {
+ summary = 1 | 32;
+ }
+ else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,1,0)) {
+ summary = 2 | 16;
+ }
+ else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,1,0)) {
+ summary = 2 | 32;
+ }
+ else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(1,0,0)) {
+ summary = 3 | 16;
+ }
+ else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(0,0,0)) {
+ summary = 3 | 32;
+ }
+ else goto after_load_store_doubleword;
+
+ { UInt rN = (insn >> 16) & 0xF; /* 19:16 */
+ UInt rD = (insn >> 12) & 0xF; /* 15:12 */
+ UInt rM = (insn >> 0) & 0xF; /* 3:0 */
+ UInt bU = (insn >> 23) & 1; /* 23 U=1 offset+, U=0 offset- */
+ UInt bS = (insn >> 5) & 1; /* S=1 store, S=0 load */
+ UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
+
+ /* Require rD to be an even numbered register */
+ if ((rD & 1) != 0)
+ goto after_load_store_doubleword;
+
+ /* Require 11:8 == 0 for Rn +/- Rm cases */
+ if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
+ goto after_load_store_doubleword;
+
+ /* Skip some invalid cases, which would lead to two competing
+ updates to the same register, or which are otherwise
+ disallowed by the spec. */
+ switch (summary) {
+ case 1 | 16:
+ break;
+ case 1 | 32:
+ if (rM == 15) goto after_load_store_doubleword;
+ break;
+ case 2 | 16: case 3 | 16:
+ if (rN == 15) goto after_load_store_doubleword;
+ if (bS == 0 && (rN == rD || rN == rD+1))
+ goto after_load_store_doubleword;
+ break;
+ case 2 | 32: case 3 | 32:
+ if (rM == 15) goto after_load_store_doubleword;
+ if (rN == 15) goto after_load_store_doubleword;
+ if (rN == rM) goto after_load_store_doubleword;
+ if (bS == 0 && (rN == rD || rN == rD+1))
+ goto after_load_store_doubleword;
+ break;
+ default:
+ vassert(0);
+ }
+
+ /* Now, we can't do a conditional load or store, since that very
+ likely will generate an exception. So we have to take a side
+ exit at this point if the condition is false. */
+ if (condT != IRTemp_INVALID) {
+ mk_skip_over_A32_if_cond_is_false( condT );
+ condT = IRTemp_INVALID;
+ }
+ /* Ok, now we're unconditional. Do the load or store. */
+
+ /* compute the effective address. Bind it to a tmp since we
+ may need to use it twice. */
+ IRExpr* eaE = NULL;
+ switch (summary & 0xF0) {
+ case 16:
+ eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
+ break;
+ case 32:
+ eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
+ break;
+ }
+ vassert(eaE);
+ IRTemp eaT = newTemp(Ity_I32);
+ assign(eaT, eaE);
+
+ /* get the old Rn value */
+ IRTemp rnT = newTemp(Ity_I32);
+ assign(rnT, getIRegA(rN));
+
+ /* decide on the transfer address */
+ IRTemp taT = IRTemp_INVALID;
+ switch (summary & 0x0F) {
+ case 1: case 2: taT = eaT; break;
+ case 3: taT = rnT; break;
+ }
+ vassert(taT != IRTemp_INVALID);
+
+ /* XXX deal with alignment constraints */
+ /* XXX: but the A8 doesn't seem to trap for misaligned loads, so,
+ ignore alignment issues for the time being. */
+
+ /* doubleword store S 1
+ doubleword load S 0
+ */
+ HChar* name = NULL;
+ /* generate the transfers */
+ if (bS == 1) { // doubleword store
+ storeLE( binop(Iop_Add32, mkexpr(taT), mkU32(0)), getIRegA(rD+0) );
+ storeLE( binop(Iop_Add32, mkexpr(taT), mkU32(4)), getIRegA(rD+1) );
+ name = "strd";
+ } else { // doubleword load
+ putIRegA( rD+0,
+ loadLE(Ity_I32, binop(Iop_Add32, mkexpr(taT), mkU32(0))),
+ IRTemp_INVALID, Ijk_Boring );
+ putIRegA( rD+1,
+ loadLE(Ity_I32, binop(Iop_Add32, mkexpr(taT), mkU32(4))),
+ IRTemp_INVALID, Ijk_Boring );
+ name = "ldrd";
+ }
+
+ /* Update Rn if necessary. */
+ switch (summary & 0x0F) {
+ case 2: case 3:
+ // should be assured by logic above:
+ if (bS == 0) {
+ vassert(rD+0 != rN); /* since we just wrote rD+0 */
+ vassert(rD+1 != rN); /* since we just wrote rD+1 */
+ }
+ putIRegA( rN, mkexpr(eaT), IRTemp_INVALID, Ijk_Boring );
+ break;
+ }
+
+ switch (summary & 0x0F) {
+ case 1: DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
+ break;
+ case 2: DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
+ name, nCC(INSN_COND), rD, dis_buf);
+ break;
+ case 3: DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
+ name, nCC(INSN_COND), rD, dis_buf);
+ break;
+ default: vassert(0);
+ }
+
+ goto decode_success;
+ }
+
+ after_load_store_doubleword:
+
+ /* ------------------- {s,u}xtab ------------- */
+ if (BITS8(0,1,1,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
+ && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
+ && BITS4(0,1,1,1) == INSN(7,4)) {
+ UInt rN = INSN(19,16);
+ UInt rD = INSN(15,12);
+ UInt rM = INSN(3,0);
+ UInt rot = (insn >> 10) & 3;
+ UInt isU = INSN(22,22);
+ if (rN == 15/*it's {S,U}XTB*/ || rD == 15 || rM == 15) {
+ /* undecodable; fall through */
+ } else {
+ IRTemp srcL = newTemp(Ity_I32);
+ IRTemp srcR = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ assign(srcR, getIRegA(rM));
+ assign(srcL, getIRegA(rN));
+ assign(res, binop(Iop_Add32,
+ mkexpr(srcL),
+ unop(isU ? Iop_8Uto32 : Iop_8Sto32,
+ unop(Iop_32to8,
+ genROR32(srcR, 8 * rot)))));
+ putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
+ DIP("%cxtab%s r%u, r%u, r%u, ror #%u\n",
+ isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
+ goto decode_success;
+ }
+ /* fall through */
+ }
+
+ /* ------------------- {s,u}xtah ------------- */
+ if (BITS8(0,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
+ && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
+ && BITS4(0,1,1,1) == INSN(7,4)) {
+ UInt rN = INSN(19,16);
+ UInt rD = INSN(15,12);
+ UInt rM = INSN(3,0);
+ UInt rot = (insn >> 10) & 3;
+ UInt isU = INSN(22,22);
+ if (rN == 15/*it's {S,U}XTH*/ || rD == 15 || rM == 15) {
+ /* undecodable; fall through */
+ } else {
+ IRTemp srcL = newTemp(Ity_I32);
+ IRTemp srcR = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ assign(srcR, getIRegA(rM));
+ assign(srcL, getIRegA(rN));
+ assign(res, binop(Iop_Add32,
+ mkexpr(srcL),
+ unop(isU ? Iop_16Uto32 : Iop_16Sto32,
+ unop(Iop_32to16,
+ genROR32(srcR, 8 * rot)))));
+ putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
+
+ DIP("%cxtah%s r%u, r%u, r%u, ror #%u\n",
+ isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
+ goto decode_success;
+ }
+ /* fall through */
+ }
+
+ /* ------------------- rev16, rev ------------------ */
+ if (INSN(27,16) == 0x6BF
+ && (INSN(11,4) == 0xFB/*rev16*/ || INSN(11,4) == 0xF3/*rev*/)) {
+ Bool isREV = INSN(11,4) == 0xF3;
+ UInt rM = INSN(3,0);
+ UInt rD = INSN(15,12);
+ if (rM != 15 && rD != 15) {
+ IRTemp rMt = newTemp(Ity_I32);
+ assign(rMt, getIRegA(rM));
+ IRTemp res = isREV ? gen_REV(rMt) : gen_REV16(rMt);
+ putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
+ DIP("rev%s%s r%u, r%u\n", isREV ? "" : "16",
+ nCC(INSN_COND), rD, rM);
+ goto decode_success;
+ }
+ }
+
+ /* ------------------- rbit ------------------ */
+ if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xF3) {
+ UInt rD = INSN(15,12);
+ UInt rM = INSN(3,0);
+ if (rD != 15 && rM != 15) {
+ IRTemp arg = newTemp(Ity_I32);
+ assign(arg, getIRegA(rM));
+ IRTemp res = gen_BITREV(arg);
+ putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
+ DIP("rbit r%u, r%u\n", rD, rM);
+ goto decode_success;
+ }
+ }
+
+ /* ------------------- smmul ------------------ */
+ if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
+ && INSN(15,12) == BITS4(1,1,1,1)
+ && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
+ UInt bitR = INSN(5,5);
+ UInt rD = INSN(19,16);
+ UInt rM = INSN(11,8);
+ UInt rN = INSN(3,0);
+ if (rD != 15 && rM != 15 && rN != 15) {
+ IRExpr* res
+ = unop(Iop_64HIto32,
+ binop(Iop_Add64,
+ binop(Iop_MullS32, getIRegA(rN), getIRegA(rM)),
+ mkU64(bitR ? 0x80000000ULL : 0ULL)));
+ putIRegA(rD, res, condT, Ijk_Boring);
+ DIP("smmul%s%s r%u, r%u, r%u\n",
+ nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM);
+ goto decode_success;
+ }
+ }
+
+ /* ------------------- NOP ------------------ */
+ if (0x0320F000 == (insn & 0x0FFFFFFF)) {
+ DIP("nop%s\n", nCC(INSN_COND));
+ goto decode_success;
+ }
+
+ /* ----------------------------------------------------------- */
+ /* -- ARMv7 instructions -- */
+ /* ----------------------------------------------------------- */
+
+ /* -------------- read CP15 TPIDRURO register ------------- */
+ /* mrc p15, 0, r0, c13, c0, 3 up to
+ mrc p15, 0, r14, c13, c0, 3
+ */
+ /* I don't know whether this is really v7-only. But anyway, we
+ have to support it since arm-linux uses TPIDRURO as a thread
+ state register. */
+ if (0x0E1D0F70 == (insn & 0x0FFF0FFF)) {
+ UInt rD = INSN(15,12);
+ if (rD <= 14) {
+ /* skip r15, that's too stupid to handle */
+ putIRegA(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32),
+ condT, Ijk_Boring);
+ DIP("mrc%s p15,0, r%u, c13, c0, 3\n", nCC(INSN_COND), rD);
+ goto decode_success;
+ }
+ /* fall through */
+ }
+
+ /* Handle various kinds of barriers. This is rather indiscriminate
+ in the sense that they are all turned into an IR Fence, which
+ means we don't know which they are, so the back end has to
+ re-emit them all when it comes acrosss an IR Fence.
+ */
+ switch (insn) {
+ case 0xEE070F9A: /* v6 */
+ /* mcr 15, 0, r0, c7, c10, 4 (v6) equiv to DSB (v7). Data
+ Synch Barrier -- ensures completion of memory accesses. */
+ stmt( IRStmt_MBE(Imbe_Fence) );
+ DIP("mcr 15, 0, r0, c7, c10, 4 (data synch barrier)\n");
+ goto decode_success;
+ case 0xEE070FBA: /* v6 */
+ /* mcr 15, 0, r0, c7, c10, 5 (v6) equiv to DMB (v7). Data
+ Memory Barrier -- ensures ordering of memory accesses. */
+ stmt( IRStmt_MBE(Imbe_Fence) );
+ DIP("mcr 15, 0, r0, c7, c10, 5 (data memory barrier)\n");
+ goto decode_success;
+ case 0xEE070F95: /* v6 */
+ /* mcr 15, 0, r0, c7, c5, 4 (v6) equiv to ISB (v7).
+ Instruction Synchronisation Barrier (or Flush Prefetch
+ Buffer) -- a pipe flush, I think. I suspect we could
+ ignore those, but to be on the safe side emit a fence
+ anyway. */
+ stmt( IRStmt_MBE(Imbe_Fence) );
+ DIP("mcr 15, 0, r0, c7, c5, 4 (insn synch barrier)\n");
+ goto decode_success;
+ default:
+ break;
+ }
+
+ /* ----------------------------------------------------------- */
+ /* -- VFP (CP 10, CP 11) instructions (in ARM mode) -- */
+ /* ----------------------------------------------------------- */
+
+ if (INSN_COND != ARMCondNV) {
+ Bool ok_vfp = decode_CP10_CP11_instruction (
+ &dres, INSN(27,0), condT, INSN_COND,
+ False/*!isT*/
+ );
+ if (ok_vfp)
+ goto decode_success;
+ }
+
+ /* ----------------------------------------------------------- */
+ /* -- NEON instructions (in ARM mode) -- */
+ /* ----------------------------------------------------------- */
+
+ /* These are all in NV space, and so are taken care of (far) above,
+ by a call from this function to decode_NV_instruction(). */
+
+ /* ----------------------------------------------------------- */
+ /* -- v6 media instructions (in ARM mode) -- */
+ /* ----------------------------------------------------------- */
+
+ { Bool ok_v6m = decode_V6MEDIA_instruction(
+ &dres, INSN(27,0), condT, INSN_COND,
+ False/*!isT*/
+ );
+ if (ok_v6m)
+ goto decode_success;
+ }
+
+ /* ----------------------------------------------------------- */
+ /* -- Undecodable -- */
+ /* ----------------------------------------------------------- */
+
+ goto decode_failure;
+ /*NOTREACHED*/
+
+ decode_failure:
+ /* All decode failures end up here. */
+ vex_printf("disInstr(arm): unhandled instruction: "
+ "0x%x\n", insn);
+ vex_printf(" cond=%d(0x%x) 27:20=%u(0x%02x) "
+ "4:4=%d "
+ "3:0=%u(0x%x)\n",
+ (Int)INSN_COND, (UInt)INSN_COND,
+ (Int)INSN(27,20), (UInt)INSN(27,20),
+ (Int)INSN(4,4),
+ (Int)INSN(3,0), (UInt)INSN(3,0) );
+
+ /* Tell the dispatcher that this insn cannot be decoded, and so has
+ not been executed, and (is currently) the next to be executed.
+ R15 should be up-to-date since it made so at the start of each
+ insn, but nevertheless be paranoid and update it again right
+ now. */
+ vassert(0 == (guest_R15_curr_instr_notENC & 3));
+ llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) );
+ irsb->next = mkU32(guest_R15_curr_instr_notENC);
+ irsb->jumpkind = Ijk_NoDecode;
+ dres.whatNext = Dis_StopHere;
+ dres.len = 0;
+ return dres;
+
+ decode_success:
+ /* All decode successes end up here. */
+ DIP("\n");
+
+ vassert(dres.len == 4 || dres.len == 20);
+
+ /* Now then. Do we have an implicit jump to r15 to deal with? */
+ if (r15written) {
+ /* If we get jump to deal with, we assume that there's been no
+ other competing branch stuff previously generated for this
+ insn. That's reasonable, in the sense that the ARM insn set
+ appears to declare as "Unpredictable" any instruction which
+ generates more than one possible new value for r15. Hence
+ just assert. The decoders themselves should check against
+ all such instructions which are thusly Unpredictable, and
+ decline to decode them. Hence we should never get here if we
+ have competing new values for r15, and hence it is safe to
+ assert here. */
+ vassert(dres.whatNext == Dis_Continue);
+ vassert(irsb->next == NULL);
+ vassert(irsb->jumpkind = Ijk_Boring);
+ /* If r15 is unconditionally written, terminate the block by
+ jumping to it. If it's conditionally written, still
+ terminate the block (a shame, but we can't do side exits to
+ arbitrary destinations), but first jump to the next
+ instruction if the condition doesn't hold. */
+ /* We can't use getIReg(15) to get the destination, since that
+ will produce r15+8, which isn't what we want. Must use
+ llGetIReg(15) instead. */
+ if (r15guard == IRTemp_INVALID) {
+ /* unconditional */
+ } else {
+ /* conditional */
+ stmt( IRStmt_Exit(
+ unop(Iop_32to1,
+ binop(Iop_Xor32,
+ mkexpr(r15guard), mkU32(1))),
+ r15kind,
+ IRConst_U32(guest_R15_curr_instr_notENC + 4)
+ ));
+ }
+ irsb->next = llGetIReg(15);
+ irsb->jumpkind = r15kind;
+ dres.whatNext = Dis_StopHere;
+ }
+
+ return dres;
+
+# undef INSN_COND
+# undef INSN
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Disassemble a single Thumb2 instruction ---*/
+/*------------------------------------------------------------*/
+
+/* NB: in Thumb mode we do fetches of regs with getIRegT, which
+ automagically adds 4 to fetches of r15. However, writes to regs
+ are done with putIRegT, which disallows writes to r15. Hence any
+ r15 writes and associated jumps have to be done "by hand". */
+
+/* Disassemble a single Thumb instruction into IR. The instruction is
+ located in host memory at guest_instr, and has (decoded) guest IP
+ of guest_R15_curr_instr_notENC, which will have been set before the
+ call here. */
+
+static
+DisResult disInstr_THUMB_WRK (
+ Bool put_IP,
+ Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
+ Bool resteerCisOk,
+ void* callback_opaque,
+ UChar* guest_instr,
+ VexArchInfo* archinfo,
+ VexAbiInfo* abiinfo
+ )
+{
+ /* A macro to fish bits out of insn0. There's also INSN1, to fish
+ bits out of insn1, but that's defined only after the end of the
+ 16-bit insn decoder, so as to stop it mistakenly being used
+ therein. */
+# define INSN0(_bMax,_bMin) SLICE_UInt(((UInt)insn0), (_bMax), (_bMin))
+
+ DisResult dres;
+ UShort insn0; /* first 16 bits of the insn */
+ //Bool allow_VFP = False;
+ //UInt hwcaps = archinfo->hwcaps;
+ HChar dis_buf[128]; // big enough to hold LDMIA etc text
+
+ /* Summary result of the ITxxx backwards analysis: False == safe
+ but suboptimal. */
+ Bool guaranteedUnconditional = False;
+
+ /* What insn variants are we supporting today? */
+ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
+ // etc etc
+
+ /* Set result defaults. */
+ dres.whatNext = Dis_Continue;
+ dres.len = 2;
+ dres.continueAt = 0;
+
+ /* Set default actions for post-insn handling of writes to r15, if
+ required. */
+ r15written = False;
+ r15guard = IRTemp_INVALID; /* unconditional */
+ r15kind = Ijk_Boring;
+
+ /* Insns could be 2 or 4 bytes long. Just get the first 16 bits at
+ this point. If we need the second 16, get them later. We can't
+ get them both out immediately because it risks a fault (very
+ unlikely, but ..) if the second 16 bits aren't actually
+ necessary. */
+ insn0 = getUShortLittleEndianly( guest_instr );
+
+ if (0) vex_printf("insn: 0x%x\n", insn0);
+
+ DIP("\t(thumb) 0x%x: ", (UInt)guest_R15_curr_instr_notENC);
+
+ /* We may be asked to update the guest R15 before going further. */
+ vassert(0 == (guest_R15_curr_instr_notENC & 1));
+ if (put_IP) {
+ llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) );
+ }
+
+ /* ----------------------------------------------------------- */
+ /* Spot "Special" instructions (see comment at top of file). */
+ {
+ UChar* code = (UChar*)guest_instr;
+ /* Spot the 16-byte preamble:
+
+ ea4f 0cfc mov.w ip, ip, ror #3
+ ea4f 3c7c mov.w ip, ip, ror #13
+ ea4f 7c7c mov.w ip, ip, ror #29
+ ea4f 4cfc mov.w ip, ip, ror #19
+ */
+ UInt word1 = 0x0CFCEA4F;
+ UInt word2 = 0x3C7CEA4F;
+ UInt word3 = 0x7C7CEA4F;
+ UInt word4 = 0x4CFCEA4F;
+ if (getUIntLittleEndianly(code+ 0) == word1 &&
+ getUIntLittleEndianly(code+ 4) == word2 &&
+ getUIntLittleEndianly(code+ 8) == word3 &&
+ getUIntLittleEndianly(code+12) == word4) {
+ /* Got a "Special" instruction preamble. Which one is it? */
+ // 0x 0A 0A EA 4A
+ if (getUIntLittleEndianly(code+16) == 0x0A0AEA4A
+ /* orr.w r10,r10,r10 */) {
+ /* R3 = client_request ( R4 ) */
+ DIP("r3 = client_request ( %%r4 )\n");
+ irsb->next = mkU32( (guest_R15_curr_instr_notENC + 20) | 1 );
+ irsb->jumpkind = Ijk_ClientReq;
+ dres.whatNext = Dis_StopHere;
+ goto decode_success;
+ }
+ else
+ // 0x 0B 0B EA 4B
+ if (getUIntLittleEndianly(code+16) == 0x0B0BEA4B
+ /* orr r11,r11,r11 */) {
+ /* R3 = guest_NRADDR */
+ DIP("r3 = guest_NRADDR\n");
+ dres.len = 20;
+ llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
+ goto decode_success;
+ }
+ else
+ // 0x 0C 0C EA 4C
+ if (getUIntLittleEndianly(code+16) == 0x0C0CEA4C
+ /* orr r12,r12,r12 */) {
+ /* branch-and-link-to-noredir R4 */
+ DIP("branch-and-link-to-noredir r4\n");
+ llPutIReg(14, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
+ irsb->next = getIRegT(4);
+ irsb->jumpkind = Ijk_NoRedir;
+ dres.whatNext = Dis_StopHere;
+ goto decode_success;
+ }
+ /* We don't know what it is. Set insn0 so decode_failure
+ can print the insn following the Special-insn preamble. */
+ insn0 = getUShortLittleEndianly(code+16);
+ goto decode_failure;
+ /*NOTREACHED*/
+ }
+
+ }
+
+ /* ----------------------------------------------------------- */
+
+ /* Main Thumb instruction decoder starts here. It's a series of
+ switches which examine ever longer bit sequences at the MSB of
+ the instruction word, first for 16-bit insns, then for 32-bit
+ insns. */
+
+ /* --- BEGIN ITxxx optimisation analysis --- */
+ /* This is a crucial optimisation for the ITState boilerplate that
+ follows. Examine the 9 halfwords preceding this instruction,
+ and if we are absolutely sure that none of them constitute an
+ 'it' instruction, then we can be sure that this instruction is
+ not under the control of any 'it' instruction, and so
+ guest_ITSTATE must be zero. So write zero into ITSTATE right
+ now, so that iropt can fold out almost all of the resulting
+ junk.
+
+ If we aren't sure, we can always safely skip this step. So be a
+ bit conservative about it: only poke around in the same page as
+ this instruction, lest we get a fault from the previous page
+ that would not otherwise have happened. The saving grace is
+ that such skipping is pretty rare -- it only happens,
+ statistically, 18/4096ths of the time, so is judged unlikely to
+ be a performance problems.
+
+ FIXME: do better. Take into account the number of insns covered
+ by any IT insns we find, to rule out cases where an IT clearly
+ cannot cover this instruction. This would improve behaviour for
+ branch targets immediately following an IT-guarded group that is
+ not of full length. Eg, (and completely ignoring issues of 16-
+ vs 32-bit insn length):
+
+ ite cond
+ insn1
+ insn2
+ label: insn3
+ insn4
+
+ The 'it' only conditionalises insn1 and insn2. However, the
+ current analysis is conservative and considers insn3 and insn4
+ also possibly guarded. Hence if 'label:' is the start of a hot
+ loop we will get a big performance hit.
+ */
+ {
+ /* Summary result of this analysis: False == safe but
+ suboptimal. */
+ vassert(guaranteedUnconditional == False);
+
+ UInt pc = guest_R15_curr_instr_notENC;
+ vassert(0 == (pc & 1));
+
+ UInt pageoff = pc & 0xFFF;
+ if (pageoff >= 18) {
+ /* It's safe to poke about in the 9 halfwords preceding this
+ insn. So, have a look at them. */
+ guaranteedUnconditional = True; /* assume no 'it' insn found, till we do */
+
+ UShort* hwp = (UShort*)(HWord)pc;
+ Int i;
+ for (i = -1; i >= -9; i--) {
+ /* We're in the same page. (True, but commented out due
+ to expense.) */
+ /*
+ vassert( ( ((UInt)(&hwp[i])) & 0xFFFFF000 )
+ == ( pc & 0xFFFFF000 ) );
+ */
+ /* All valid IT instructions must have the form 0xBFxy,
+ where x can be anything, but y must be nonzero. */
+ if ((hwp[i] & 0xFF00) == 0xBF00 && (hwp[i] & 0xF) != 0) {
+ /* might be an 'it' insn. Play safe. */
+ guaranteedUnconditional = False;
+ break;
+ }
+ }
+ }
+ }
+ /* --- END ITxxx optimisation analysis --- */
+
+ /* Generate the guarding condition for this insn, by examining
+ ITSTATE. Assign it to condT. Also, generate new
+ values for ITSTATE ready for stuffing back into the
+ guest state, but don't actually do the Put yet, since it will
+ need to stuffed back in only after the instruction gets to a
+ point where it is sure to complete. Mostly we let the code at
+ decode_success handle this, but in cases where the insn contains
+ a side exit, we have to update them before the exit. */
+
+ /* If the ITxxx optimisation analysis above could not prove that
+ this instruction is guaranteed unconditional, we insert a
+ lengthy IR preamble to compute the guarding condition at
+ runtime. If it can prove it (which obviously we hope is the
+ normal case) then we insert a minimal preamble, which is
+ equivalent to setting guest_ITSTATE to zero and then folding
+ that through the full preamble (which completely disappears). */
+
+ IRTemp condT = IRTemp_INVALID;
+ IRTemp old_itstate = IRTemp_INVALID;
+ IRTemp new_itstate = IRTemp_INVALID;
+ IRTemp cond_AND_notInIT_T = IRTemp_INVALID;
+
+ if (guaranteedUnconditional) {
+ /* BEGIN "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
+
+ // ITSTATE = 0 :: I32
+ IRTemp z32 = newTemp(Ity_I32);
+ assign(z32, mkU32(0));
+ put_ITSTATE(z32);
+
+ // old_itstate = 0 :: I32
+ //
+ // old_itstate = get_ITSTATE();
+ old_itstate = z32; /* 0 :: I32 */
+
+ // new_itstate = old_itstate >> 8
+ // = 0 >> 8
+ // = 0 :: I32
+ //
+ // new_itstate = newTemp(Ity_I32);
+ // assign(new_itstate,
+ // binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
+ new_itstate = z32;
+
+ // ITSTATE = 0 :: I32(again)
+ //
+ // put_ITSTATE(new_itstate);
+
+ // condT1 = calc_cond_dyn( xor(and(old_istate,0xF0), 0xE0) )
+ // = calc_cond_dyn( xor(0,0xE0) )
+ // = calc_cond_dyn ( 0xE0 )
+ // = 1 :: I32
+ // Not that this matters, since the computed value is not used:
+ // see condT folding below
+ //
+ // IRTemp condT1 = newTemp(Ity_I32);
+ // assign(condT1,
+ // mk_armg_calculate_condition_dyn(
+ // binop(Iop_Xor32,
+ // binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
+ // mkU32(0xE0))
+ // )
+ // );
+
+ // condT = 32to8(and32(old_itstate,0xF0)) == 0 ? 1 : condT1
+ // = 32to8(and32(0,0xF0)) == 0 ? 1 : condT1
+ // = 32to8(0) == 0 ? 1 : condT1
+ // = 0 == 0 ? 1 : condT1
+ // = 1
+ //
+ // condT = newTemp(Ity_I32);
+ // assign(condT, IRExpr_Mux0X(
+ // unop(Iop_32to8, binop(Iop_And32,
+ // mkexpr(old_itstate),
+ // mkU32(0xF0))),
+ // mkU32(1),
+ // mkexpr(condT1)
+ // ));
+ condT = newTemp(Ity_I32);
+ assign(condT, mkU32(1));
+
+ // notInITt = xor32(and32(old_itstate, 1), 1)
+ // = xor32(and32(0, 1), 1)
+ // = xor32(0, 1)
+ // = 1 :: I32
+ //
+ // IRTemp notInITt = newTemp(Ity_I32);
+ // assign(notInITt,
+ // binop(Iop_Xor32,
+ // binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
+ // mkU32(1)));
+
+ // cond_AND_notInIT_T = and32(notInITt, condT)
+ // = and32(1, 1)
+ // = 1
+ //
+ // cond_AND_notInIT_T = newTemp(Ity_I32);
+ // assign(cond_AND_notInIT_T,
+ // binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
+ cond_AND_notInIT_T = condT; /* 1 :: I32 */
+
+ /* END "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
+ } else {
+ /* BEGIN { STANDARD PREAMBLE; } */
+
+ old_itstate = get_ITSTATE();
+
+ new_itstate = newTemp(Ity_I32);
+ assign(new_itstate,
+ binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
+
+ put_ITSTATE(new_itstate);
+
+ /* Same strategy as for ARM insns: generate a condition
+ temporary at this point (or IRTemp_INVALID, meaning
+ unconditional). We leave it to lower-level instruction
+ decoders to decide whether they can generate straight-line
+ code, or whether they must generate a side exit before the
+ instruction. condT :: Ity_I32 and is always either zero or
+ one. */
+ IRTemp condT1 = newTemp(Ity_I32);
+ assign(condT1,
+ mk_armg_calculate_condition_dyn(
+ binop(Iop_Xor32,
+ binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
+ mkU32(0xE0))
+ )
+ );
+
+ /* This is a bit complex, but needed to make Memcheck understand
+ that, if the condition in old_itstate[7:4] denotes AL (that
+ is, if this instruction is to be executed unconditionally),
+ then condT does not depend on the results of calling the
+ helper.
+
+ We test explicitly for old_itstate[7:4] == AL ^ 0xE, and in
+ that case set condT directly to 1. Else we use the results
+ of the helper. Since old_itstate is always defined and
+ because Memcheck does lazy V-bit propagation through Mux0X,
+ this will cause condT to always be a defined 1 if the
+ condition is 'AL'. From an execution semantics point of view
+ this is irrelevant since we're merely duplicating part of the
+ behaviour of the helper. But it makes it clear to Memcheck,
+ in this case, that condT does not in fact depend on the
+ contents of the condition code thunk. Without it, we get
+ quite a lot of false errors.
+
+ So, just to clarify: from a straight semantics point of view,
+ we can simply do "assign(condT, mkexpr(condT1))", and the
+ simulator still runs fine. It's just that we get loads of
+ false errors from Memcheck. */
+ condT = newTemp(Ity_I32);
+ assign(condT, IRExpr_Mux0X(
+ unop(Iop_32to8, binop(Iop_And32,
+ mkexpr(old_itstate),
+ mkU32(0xF0))),
+ mkU32(1),
+ mkexpr(condT1)
+ ));
+
+ /* Something we don't have in ARM: generate a 0 or 1 value
+ indicating whether or not we are in an IT block (NB: 0 = in
+ IT block, 1 = not in IT block). This is used to gate
+ condition code updates in 16-bit Thumb instructions. */
+ IRTemp notInITt = newTemp(Ity_I32);
+ assign(notInITt,
+ binop(Iop_Xor32,
+ binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
+ mkU32(1)));
+
+ /* Compute 'condT && notInITt' -- that is, the instruction is
+ going to execute, and we're not in an IT block. This is the
+ gating condition for updating condition codes in 16-bit Thumb
+ instructions, except for CMP, CMN and TST. */
+ cond_AND_notInIT_T = newTemp(Ity_I32);
+ assign(cond_AND_notInIT_T,
+ binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
+ /* END { STANDARD PREAMBLE; } */
+ }
+
+
+ /* At this point:
+ * ITSTATE has been updated
+ * condT holds the guarding condition for this instruction (0 or 1),
+ * notInITt is 1 if we're in "normal" code, 0 if in an IT block
+ * cond_AND_notInIT_T is the AND of the above two.
+
+ If the instruction proper can't trap, then there's nothing else
+ to do w.r.t. ITSTATE -- just go and and generate IR for the
+ insn, taking into account the guarding condition.
+
+ If, however, the instruction might trap, then we must back up
+ ITSTATE to the old value, and re-update it after the potentially
+ trapping IR section. A trap can happen either via a memory
+ reference or because we need to throw SIGILL.
+
+ If an instruction has a side exit, we need to be sure that any
+ ITSTATE backup is re-updated before the side exit.
+ */
+
+ /* ----------------------------------------------------------- */
+ /* -- -- */
+ /* -- Thumb 16-bit integer instructions -- */
+ /* -- -- */
+ /* -- IMPORTANT: references to insn1 or INSN1 are -- */
+ /* -- not allowed in this section -- */
+ /* -- -- */
+ /* ----------------------------------------------------------- */
+
+ /* 16-bit instructions inside an IT block, apart from CMP, CMN and
+ TST, do not set the condition codes. Hence we must dynamically
+ test for this case for every condition code update. */
+
+ IROp anOp = Iop_INVALID;
+ HChar* anOpNm = NULL;
+
+ /* ================ 16-bit 15:6 cases ================ */
+
+ switch (INSN0(15,6)) {
+
+ case 0x10a: // CMP
+ case 0x10b: { // CMN
+ /* ---------------- CMP Rn, Rm ---------------- */
+ Bool isCMN = INSN0(15,6) == 0x10b;
+ UInt rN = INSN0(2,0);
+ UInt rM = INSN0(5,3);
+ IRTemp argL = newTemp(Ity_I32);
+ IRTemp argR = newTemp(Ity_I32);
+ assign( argL, getIRegT(rN) );
+ assign( argR, getIRegT(rM) );
+ /* Update flags regardless of whether in an IT block or not. */
+ setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
+ argL, argR, condT );
+ DIP("%s r%u, r%u\n", isCMN ? "cmn" : "cmp", rN, rM);
+ goto decode_success;
+ }
+
+ case 0x108: {
+ /* ---------------- TST Rn, Rm ---------------- */
+ UInt rN = INSN0(2,0);
+ UInt rM = INSN0(5,3);
+ IRTemp oldC = newTemp(Ity_I32);
+ IRTemp oldV = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ assign( oldC, mk_armg_calculate_flag_c() );
+ assign( oldV, mk_armg_calculate_flag_v() );
+ assign( res, binop(Iop_And32, getIRegT(rN), getIRegT(rM)) );
+ /* Update flags regardless of whether in an IT block or not. */
+ setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
+ DIP("tst r%u, r%u\n", rN, rM);
+ goto decode_success;
+ }
+
+ case 0x109: {
+ /* ---------------- NEGS Rd, Rm ---------------- */
+ /* Rd = -Rm */
+ UInt rM = INSN0(5,3);
+ UInt rD = INSN0(2,0);
+ IRTemp arg = newTemp(Ity_I32);
+ IRTemp zero = newTemp(Ity_I32);
+ assign(arg, getIRegT(rM));
+ assign(zero, mkU32(0));
+ // rD can never be r15
+ putIRegT(rD, binop(Iop_Sub32, mkexpr(zero), mkexpr(arg)), condT);
+ setFlags_D1_D2( ARMG_CC_OP_SUB, zero, arg, cond_AND_notInIT_T);
+ DIP("negs r%u, r%u\n", rD, rM);
+ goto decode_success;
+ }
+
+ case 0x10F: {
+ /* ---------------- MVNS Rd, Rm ---------------- */
+ /* Rd = ~Rm */
+ UInt rM = INSN0(5,3);
+ UInt rD = INSN0(2,0);
+ IRTemp oldV = newTemp(Ity_I32);
+ IRTemp oldC = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ assign( oldV, mk_armg_calculate_flag_v() );
+ assign( oldC, mk_armg_calculate_flag_c() );
+ assign(res, unop(Iop_Not32, getIRegT(rM)));
+ // rD can never be r15
+ putIRegT(rD, mkexpr(res), condT);
+ setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
+ cond_AND_notInIT_T );
+ DIP("mvns r%u, r%u\n", rD, rM);
+ goto decode_success;
+ }
+
+ case 0x10C:
+ /* ---------------- ORRS Rd, Rm ---------------- */
+ anOp = Iop_Or32; anOpNm = "orr"; goto and_orr_eor_mul;
+ case 0x100:
+ /* ---------------- ANDS Rd, Rm ---------------- */
+ anOp = Iop_And32; anOpNm = "and"; goto and_orr_eor_mul;
+ case 0x101:
+ /* ---------------- EORS Rd, Rm ---------------- */
+ anOp = Iop_Xor32; anOpNm = "eor"; goto and_orr_eor_mul;
+ case 0x10d:
+ /* ---------------- MULS Rd, Rm ---------------- */
+ anOp = Iop_Mul32; anOpNm = "mul"; goto and_orr_eor_mul;
+ and_orr_eor_mul: {
+ /* Rd = Rd `op` Rm */
+ UInt rM = INSN0(5,3);
+ UInt rD = INSN0(2,0);
+ IRTemp res = newTemp(Ity_I32);
+ IRTemp oldV = newTemp(Ity_I32);
+ IRTemp oldC = newTemp(Ity_I32);
+ assign( oldV, mk_armg_calculate_flag_v() );
+ assign( oldC, mk_armg_calculate_flag_c() );
+ assign( res, binop(anOp, getIRegT(rD), getIRegT(rM) ));
+ // not safe to read guest state after here
+ // rD can never be r15
+ putIRegT(rD, mkexpr(res), condT);
+ setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
+ cond_AND_notInIT_T );
+ DIP("%s r%u, r%u\n", anOpNm, rD, rM);
+ goto decode_success;
+ }
+
+ case 0x10E: {
+ /* ---------------- BICS Rd, Rm ---------------- */
+ /* Rd = Rd & ~Rm */
+ UInt rM = INSN0(5,3);
+ UInt rD = INSN0(2,0);
+ IRTemp res = newTemp(Ity_I32);
+ IRTemp oldV = newTemp(Ity_I32);
+ IRTemp oldC = newTemp(Ity_I32);
+ assign( oldV, mk_armg_calculate_flag_v() );
+ assign( oldC, mk_armg_calculate_flag_c() );
+ assign( res, binop(Iop_And32, getIRegT(rD),
+ unop(Iop_Not32, getIRegT(rM) )));
+ // not safe to read guest state after here
+ // rD can never be r15
+ putIRegT(rD, mkexpr(res), condT);
+ setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
+ cond_AND_notInIT_T );
+ DIP("bics r%u, r%u\n", rD, rM);
+ goto decode_success;
+ }
+
+ case 0x105: {
+ /* ---------------- ADCS Rd, Rm ---------------- */
+ /* Rd = Rd + Rm + oldC */
+ UInt rM = INSN0(5,3);
+ UInt rD = INSN0(2,0);
+ IRTemp argL = newTemp(Ity_I32);
+ IRTemp argR = newTemp(Ity_I32);
+ IRTemp oldC = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ assign(argL, getIRegT(rD));
+ assign(argR, getIRegT(rM));
+ assign(oldC, mk_armg_calculate_flag_c());
+ assign(res, binop(Iop_Add32,
+ binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
+ mkexpr(oldC)));
+ // rD can never be r15
+ putIRegT(rD, mkexpr(res), condT);
+ setFlags_D1_D2_ND( ARMG_CC_OP_ADC, argL, argR, oldC,
+ cond_AND_notInIT_T );
+ DIP("adcs r%u, r%u\n", rD, rM);
+ goto decode_success;
+ }
+
+ case 0x106: {
+ /* ---------------- SBCS Rd, Rm ---------------- */
+ /* Rd = Rd - Rm - (oldC ^ 1) */
+ UInt rM = INSN0(5,3);
+ UInt rD = INSN0(2,0);
+ IRTemp argL = newTemp(Ity_I32);
+ IRTemp argR = newTemp(Ity_I32);
+ IRTemp oldC = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ assign(argL, getIRegT(rD));
+ assign(argR, getIRegT(rM));
+ assign(oldC, mk_armg_calculate_flag_c());
+ assign(res, binop(Iop_Sub32,
+ binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
+ binop(Iop_Xor32, mkexpr(oldC), mkU32(1))));
+ // rD can never be r15
+ putIRegT(rD, mkexpr(res), condT);
+ setFlags_D1_D2_ND( ARMG_CC_OP_SBB, argL, argR, oldC,
+ cond_AND_notInIT_T );
+ DIP("sbcs r%u, r%u\n", rD, rM);
+ goto decode_success;
+ }
+
+ case 0x2CB: {
+ /* ---------------- UXTB Rd, Rm ---------------- */
+ /* Rd = 8Uto32(Rm) */
+ UInt rM = INSN0(5,3);
+ UInt rD = INSN0(2,0);
+ putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFF)),
+ condT);
+ DIP("uxtb r%u, r%u\n", rD, rM);
+ goto decode_success;
+ }
+
+ case 0x2C9: {
+ /* ---------------- SXTB Rd, Rm ---------------- */
+ /* Rd = 8Sto32(Rm) */
+ UInt rM = INSN0(5,3);
+ UInt rD = INSN0(2,0);
+ putIRegT(rD, binop(Iop_Sar32,
+ binop(Iop_Shl32, getIRegT(rM), mkU8(24)),
+ mkU8(24)),
+ condT);
+ DIP("sxtb r%u, r%u\n", rD, rM);
+ goto decode_success;
+ }
+
+ case 0x2CA: {
+ /* ---------------- UXTH Rd, Rm ---------------- */
+ /* Rd = 16Uto32(Rm) */
+ UInt rM = INSN0(5,3);
+ UInt rD = INSN0(2,0);
+ putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFFFF)),
+ condT);
+ DIP("uxth r%u, r%u\n", rD, rM);
+ goto decode_success;
+ }
+
+ case 0x2C8: {
+ /* ---------------- SXTH Rd, Rm ---------------- */
+ /* Rd = 16Sto32(Rm) */
+ UInt rM = INSN0(5,3);
+ UInt rD = INSN0(2,0);
+ putIRegT(rD, binop(Iop_Sar32,
+ binop(Iop_Shl32, getIRegT(rM), mkU8(16)),
+ mkU8(16)),
+ condT);
+ DIP("sxth r%u, r%u\n", rD, rM);
+ goto decode_success;
+ }
+
+ case 0x102: // LSLS
+ case 0x103: // LSRS
+ case 0x104: // ASRS
+ case 0x107: { // RORS
+ /* ---------------- LSLS Rs, Rd ---------------- */
+ /* ---------------- LSRS Rs, Rd ---------------- */
+ /* ---------------- ASRS Rs, Rd ---------------- */
+ /* ---------------- RORS Rs, Rd ---------------- */
+ /* Rd = Rd `op` Rs, and set flags */
+ UInt rS = INSN0(5,3);
+ UInt rD = INSN0(2,0);
+ IRTemp oldV = newTemp(Ity_I32);
+ IRTemp rDt = newTemp(Ity_I32);
+ IRTemp rSt = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ IRTemp resC = newTemp(Ity_I32);
+ HChar* wot = "???";
+ assign(rSt, getIRegT(rS));
+ assign(rDt, getIRegT(rD));
+ assign(oldV, mk_armg_calculate_flag_v());
+ /* Does not appear to be the standard 'how' encoding. */
+ switch (INSN0(15,6)) {
+ case 0x102:
+ compute_result_and_C_after_LSL_by_reg(
+ dis_buf, &res, &resC, rDt, rSt, rD, rS
+ );
+ wot = "lsl";
+ break;
+ case 0x103:
+ compute_result_and_C_after_LSR_by_reg(
+ dis_buf, &res, &resC, rDt, rSt, rD, rS
+ );
+ wot = "lsr";
+ break;
+ case 0x104:
+ compute_result_and_C_after_ASR_by_reg(
+ dis_buf, &res, &resC, rDt, rSt, rD, rS
+ );
+ wot = "asr";
+ break;
+ case 0x107:
+ compute_result_and_C_after_ROR_by_reg(
+ dis_buf, &res, &resC, rDt, rSt, rD, rS
+ );
+ wot = "ror";
+ break;
+ default:
+ /*NOTREACHED*/vassert(0);
+ }
+ // not safe to read guest state after this point
+ putIRegT(rD, mkexpr(res), condT);
+ setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
+ cond_AND_notInIT_T );
+ DIP("%ss r%u, r%u\n", wot, rS, rD);
+ goto decode_success;
+ }
+
+ case 0x2E8: // REV
+ case 0x2E9: { // REV16
+ /* ---------------- REV Rd, Rm ---------------- */
+ /* ---------------- REV16 Rd, Rm ---------------- */
+ UInt rM = INSN0(5,3);
+ UInt rD = INSN0(2,0);
+ Bool isREV = INSN0(15,6) == 0x2E8;
+ IRTemp arg = newTemp(Ity_I32);
+ assign(arg, getIRegT(rM));
+ IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
+ putIRegT(rD, mkexpr(res), condT);
+ DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM);
+ goto decode_success;
+ }
+
+ default:
+ break; /* examine the next shortest prefix */
+
+ }
+
+
+ /* ================ 16-bit 15:7 cases ================ */
+
+ switch (INSN0(15,7)) {
+
+ case BITS9(1,0,1,1,0,0,0,0,0): {
+ /* ------------ ADD SP, #imm7 * 4 ------------ */
+ UInt uimm7 = INSN0(6,0);
+ putIRegT(13, binop(Iop_Add32, getIRegT(13), mkU32(uimm7 * 4)),
+ condT);
+ DIP("add sp, #%u\n", uimm7 * 4);
+ goto decode_success;
+ }
+
+ case BITS9(1,0,1,1,0,0,0,0,1): {
+ /* ------------ SUB SP, #imm7 * 4 ------------ */
+ UInt uimm7 = INSN0(6,0);
+ putIRegT(13, binop(Iop_Sub32, getIRegT(13), mkU32(uimm7 * 4)),
+ condT);
+ DIP("sub sp, #%u\n", uimm7 * 4);
+ goto decode_success;
+ }
+
+ case BITS9(0,1,0,0,0,1,1,1,0): {
+ /* ---------------- BX rM ---------------- */
+ /* Branch to reg, and optionally switch modes. Reg contains a
+ suitably encoded address therefore (w CPSR.T at the bottom).
+ Have to special-case r15, as usual. */
+ UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
+ if (BITS3(0,0,0) == INSN0(2,0)) {
+ IRTemp dst = newTemp(Ity_I32);
+ gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
+ mk_skip_over_T16_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+ if (rM <= 14) {
+ assign( dst, getIRegT(rM) );
+ } else {
+ vassert(rM == 15);
+ assign( dst, mkU32(guest_R15_curr_instr_notENC + 4) );
+ }
+ irsb->next = mkexpr(dst);
+ irsb->jumpkind = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
+ DIP("bx r%u (possibly switch to ARM mode)\n", rM);
+ goto decode_success;
+ }
+ break;
+ }
+
+ /* ---------------- BLX rM ---------------- */
+ /* Branch and link to interworking address in rM. */
+ case BITS9(0,1,0,0,0,1,1,1,1): {
+ if (BITS3(0,0,0) == INSN0(2,0)) {
+ UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
+ IRTemp dst = newTemp(Ity_I32);
+ if (rM <= 14) {
+ gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
+ mk_skip_over_T16_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+ /* We're returning to Thumb code, hence "| 1" */
+ assign( dst, getIRegT(rM) );
+ putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ),
+ IRTemp_INVALID );
+ irsb->next = mkexpr(dst);
+ irsb->jumpkind = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
+ DIP("blx r%u (possibly switch to ARM mode)\n", rM);
+ goto decode_success;
+ }
+ /* else unpredictable, fall through */
+ }
+ break;
+ }
+
+ default:
+ break; /* examine the next shortest prefix */
+
+ }
+
+
+ /* ================ 16-bit 15:8 cases ================ */
+
+ switch (INSN0(15,8)) {
+
+ case BITS8(1,1,0,1,1,1,1,1): {
+ /* ---------------- SVC ---------------- */
+ UInt imm8 = INSN0(7,0);
+ if (imm8 == 0) {
+ /* A syscall. We can't do this conditionally, hence: */
+ mk_skip_over_T16_if_cond_is_false( condT );
+ // FIXME: what if we have to back up and restart this insn?
+ // then ITSTATE will be wrong (we'll have it as "used")
+ // when it isn't. Correct is to save ITSTATE in a
+ // stash pseudo-reg, and back up from that if we have to
+ // restart.
+ // uncond after here
+ irsb->next = mkU32( (guest_R15_curr_instr_notENC + 2) | 1 );
+ irsb->jumpkind = Ijk_Sys_syscall;
+ dres.whatNext = Dis_StopHere;
+ DIP("svc #0x%08x\n", imm8);
+ goto decode_success;
+ }
+ /* else fall through */
+ break;
+ }
+
+ case BITS8(0,1,0,0,0,1,0,0): {
+ /* ---------------- ADD(HI) Rd, Rm ---------------- */
+ UInt h1 = INSN0(7,7);
+ UInt h2 = INSN0(6,6);
+ UInt rM = (h2 << 3) | INSN0(5,3);
+ UInt rD = (h1 << 3) | INSN0(2,0);
+ //if (h1 == 0 && h2 == 0) { // Original T1 was more restrictive
+ if (rD == 15 && rM == 15) {
+ // then it's invalid
+ } else {
+ IRTemp res = newTemp(Ity_I32);
+ assign( res, binop(Iop_Add32, getIRegT(rD), getIRegT(rM) ));
+ if (rD != 15) {
+ putIRegT( rD, mkexpr(res), condT );
+ } else {
+ /* Only allowed outside or last-in IT block; SIGILL if not so. */
+ gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
+ /* jump over insn if not selected */
+ mk_skip_over_T16_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+ /* non-interworking branch */
+ irsb->next = binop(Iop_Or32, mkexpr(res), mkU32(1));
+ irsb->jumpkind = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
+ }
+ DIP("add(hi) r%u, r%u\n", rD, rM);
+ goto decode_success;
+ }
+ break;
+ }
+
+ case BITS8(0,1,0,0,0,1,0,1): {
+ /* ---------------- CMP(HI) Rd, Rm ---------------- */
+ UInt h1 = INSN0(7,7);
+ UInt h2 = INSN0(6,6);
+ UInt rM = (h2 << 3) | INSN0(5,3);
+ UInt rN = (h1 << 3) | INSN0(2,0);
+ if (h1 != 0 || h2 != 0) {
+ IRTemp argL = newTemp(Ity_I32);
+ IRTemp argR = newTemp(Ity_I32);
+ assign( argL, getIRegT(rN) );
+ assign( argR, getIRegT(rM) );
+ /* Update flags regardless of whether in an IT block or not. */
+ setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
+ DIP("cmphi r%u, r%u\n", rN, rM);
+ goto decode_success;
+ }
+ break;
+ }
+
+ case BITS8(0,1,0,0,0,1,1,0): {
+ /* ---------------- MOV(HI) Rd, Rm ---------------- */
+ UInt h1 = INSN0(7,7);
+ UInt h2 = INSN0(6,6);
+ UInt rM = (h2 << 3) | INSN0(5,3);
+ UInt rD = (h1 << 3) | INSN0(2,0);
+ /* The old ARM ARM seems to disallow the case where both Rd and
+ Rm are "low" registers, but newer versions allow it. */
+ if (1 /*h1 != 0 || h2 != 0*/) {
+ IRTemp val = newTemp(Ity_I32);
+ assign( val, getIRegT(rM) );
+ if (rD != 15) {
+ putIRegT( rD, mkexpr(val), condT );
+ } else {
+ /* Only allowed outside or last-in IT block; SIGILL if not so. */
+ gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
+ /* jump over insn if not selected */
+ mk_skip_over_T16_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+ /* non-interworking branch */
+ irsb->next = binop(Iop_Or32, mkexpr(val), mkU32(1));
+ irsb->jumpkind = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
+ }
+ DIP("mov r%u, r%u\n", rD, rM);
+ goto decode_success;
+ }
+ break;
+ }
+
+ case BITS8(1,0,1,1,1,1,1,1): {
+ /* ---------------- IT (if-then) ---------------- */
+ UInt firstcond = INSN0(7,4);
+ UInt mask = INSN0(3,0);
+ UInt newITSTATE = 0;
+ /* This is the ITSTATE represented as described in
+ libvex_guest_arm.h. It is not the ARM ARM representation. */
+ UChar c1 = '.';
+ UChar c2 = '.';
+ UChar c3 = '.';
+ Bool valid = compute_ITSTATE( &newITSTATE, &c1, &c2, &c3,
+ firstcond, mask );
+ if (valid && firstcond != 0xF/*NV*/) {
+ /* Not allowed in an IT block; SIGILL if so. */
+ gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
+
+ IRTemp t = newTemp(Ity_I32);
+ assign(t, mkU32(newITSTATE));
+ put_ITSTATE(t);
+
+ DIP("it%c%c%c %s\n", c1, c2, c3, nCC(firstcond));
+ goto decode_success;
+ }
+ break;
+ }
+
+ case BITS8(1,0,1,1,0,0,0,1):
+ case BITS8(1,0,1,1,0,0,1,1):
+ case BITS8(1,0,1,1,1,0,0,1):
+ case BITS8(1,0,1,1,1,0,1,1): {
+ /* ---------------- CB{N}Z ---------------- */
+ UInt rN = INSN0(2,0);
+ UInt bOP = INSN0(11,11);
+ UInt imm32 = (INSN0(9,9) << 6) | (INSN0(7,3) << 1);
+ gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
+ /* It's a conditional branch forward. */
+ IRTemp kond = newTemp(Ity_I1);
+ assign( kond, binop(bOP ? Iop_CmpNE32 : Iop_CmpEQ32,
+ getIRegT(rN), mkU32(0)) );
+
+ vassert(0 == (guest_R15_curr_instr_notENC & 1));
+ /* Looks like the nearest insn we can branch to is the one after
+ next. That makes sense, as there's no point in being able to
+ encode a conditional branch to the next instruction. */
+ UInt dst = (guest_R15_curr_instr_notENC + 4 + imm32) | 1;
+ stmt(IRStmt_Exit( mkexpr(kond),
+ Ijk_Boring,
+ IRConst_U32(toUInt(dst)) ));
+ DIP("cb%s r%u, 0x%x\n", bOP ? "nz" : "z", rN, dst - 1);
+ goto decode_success;
+ }
+
+ default:
+ break; /* examine the next shortest prefix */
+
+ }
+
+
+ /* ================ 16-bit 15:9 cases ================ */
+
+ switch (INSN0(15,9)) {
+
+ case BITS7(1,0,1,1,0,1,0): {
+ /* ---------------- PUSH ---------------- */
+ /* This is a bit like STMxx, but way simpler. Complications we
+ don't have to deal with:
+ * SP being one of the transferred registers
+ * direction (increment vs decrement)
+ * before-vs-after-ness
+ */
+ Int i, nRegs;
+ UInt bitR = INSN0(8,8);
+ UInt regList = INSN0(7,0);
+ if (bitR) regList |= (1 << 14);
+
+ if (regList != 0) {
+ /* Since we can't generate a guaranteed non-trapping IR
+ sequence, (1) jump over the insn if it is gated false, and
+ (2) back out the ITSTATE update. */
+ mk_skip_over_T16_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ put_ITSTATE(old_itstate);
+ // now uncond
+
+ nRegs = 0;
+ for (i = 0; i < 16; i++) {
+ if ((regList & (1 << i)) != 0)
+ nRegs++;
+ }
+ vassert(nRegs >= 1 && nRegs <= 8);
+
+ /* Move SP down first of all, so we're "covered". And don't
+ mess with its alignment. */
+ IRTemp newSP = newTemp(Ity_I32);
+ assign(newSP, binop(Iop_Sub32, getIRegT(13), mkU32(4 * nRegs)));
+ putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
+
+ /* Generate a transfer base address as a forced-aligned
+ version of the final SP value. */
+ IRTemp base = newTemp(Ity_I32);
+ assign(base, binop(Iop_And32, mkexpr(newSP), mkU32(~3)));
+
+ /* Now the transfers */
+ nRegs = 0;
+ for (i = 0; i < 16; i++) {
+ if ((regList & (1 << i)) != 0) {
+ storeLE( binop(Iop_Add32, mkexpr(base), mkU32(4 * nRegs)),
+ getIRegT(i) );
+ nRegs++;
+ }
+ }
+
+ /* Reinstate the ITSTATE update. */
+ put_ITSTATE(new_itstate);
+
+ DIP("push {%s0x%04x}\n", bitR ? "lr," : "", regList & 0xFF);
+ goto decode_success;
+ }
+ break;
+ }
+
+ case BITS7(1,0,1,1,1,1,0): {
+ /* ---------------- POP ---------------- */
+ Int i, nRegs;
+ UInt bitR = INSN0(8,8);
+ UInt regList = INSN0(7,0);
+
+ if (regList != 0 || bitR) {
+ /* Since we can't generate a guaranteed non-trapping IR
+ sequence, (1) jump over the insn if it is gated false, and
+ (2) back out the ITSTATE update. */
+ mk_skip_over_T16_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ put_ITSTATE(old_itstate);
+ // now uncond
+
+ nRegs = 0;
+ for (i = 0; i < 8; i++) {
+ if ((regList & (1 << i)) != 0)
+ nRegs++;
+ }
+ vassert(nRegs >= 0 && nRegs <= 7);
+ vassert(bitR == 0 || bitR == 1);
+
+ IRTemp oldSP = newTemp(Ity_I32);
+ assign(oldSP, getIRegT(13));
+
+ /* Generate a transfer base address as a forced-aligned
+ version of the original SP value. */
+ IRTemp base = newTemp(Ity_I32);
+ assign(base, binop(Iop_And32, mkexpr(oldSP), mkU32(~3)));
+
+ /* Compute a new value for SP, but don't install it yet, so
+ that we're "covered" until all the transfers are done.
+ And don't mess with its alignment. */
+ IRTemp newSP = newTemp(Ity_I32);
+ assign(newSP, binop(Iop_Add32, mkexpr(oldSP),
+ mkU32(4 * (nRegs + bitR))));
+
+ /* Now the transfers, not including PC */
+ nRegs = 0;
+ for (i = 0; i < 8; i++) {
+ if ((regList & (1 << i)) != 0) {
+ putIRegT(i, loadLE( Ity_I32,
+ binop(Iop_Add32, mkexpr(base),
+ mkU32(4 * nRegs))),
+ IRTemp_INVALID );
+ nRegs++;
+ }
+ }
+
+ IRTemp newPC = IRTemp_INVALID;
+ if (bitR) {
+ newPC = newTemp(Ity_I32);
+ assign( newPC, loadLE( Ity_I32,
+ binop(Iop_Add32, mkexpr(base),
+ mkU32(4 * nRegs))));
+ }
+
+ /* Now we can safely install the new SP value */
+ putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
+
+ /* Reinstate the ITSTATE update. */
+ put_ITSTATE(new_itstate);
+
+ /* now, do we also have to do a branch? If so, it turns out
+ that the new PC value is encoded exactly as we need it to
+ be -- with CPSR.T in the bottom bit. So we can simply use
+ it as is, no need to mess with it. Note, therefore, this
+ is an interworking return. */
+ if (bitR) {
+ irsb->next = mkexpr(newPC);
+ irsb->jumpkind = Ijk_Ret;
+ dres.whatNext = Dis_StopHere;
+ }
+
+ DIP("pop {%s0x%04x}\n", bitR ? "pc," : "", regList & 0xFF);
+ goto decode_success;
+ }
+ break;
+ }
+
+ case BITS7(0,0,0,1,1,1,0): /* ADDS */
+ case BITS7(0,0,0,1,1,1,1): { /* SUBS */
+ /* ---------------- ADDS Rd, Rn, #uimm3 ---------------- */
+ /* ---------------- SUBS Rd, Rn, #uimm3 ---------------- */
+ UInt uimm3 = INSN0(8,6);
+ UInt rN = INSN0(5,3);
+ UInt rD = INSN0(2,0);
+ UInt isSub = INSN0(9,9);
+ IRTemp argL = newTemp(Ity_I32);
+ IRTemp argR = newTemp(Ity_I32);
+ assign( argL, getIRegT(rN) );
+ assign( argR, mkU32(uimm3) );
+ putIRegT(rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
+ mkexpr(argL), mkexpr(argR)),
+ condT);
+ setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
+ argL, argR, cond_AND_notInIT_T );
+ DIP("%s r%u, r%u, #%u\n", isSub ? "subs" : "adds", rD, rN, uimm3);
+ goto decode_success;
+ }
+
+ case BITS7(0,0,0,1,1,0,0): /* ADDS */
+ case BITS7(0,0,0,1,1,0,1): { /* SUBS */
+ /* ---------------- ADDS Rd, Rn, Rm ---------------- */
+ /* ---------------- SUBS Rd, Rn, Rm ---------------- */
+ UInt rM = INSN0(8,6);
+ UInt rN = INSN0(5,3);
+ UInt rD = INSN0(2,0);
+ UInt isSub = INSN0(9,9);
+ IRTemp argL = newTemp(Ity_I32);
+ IRTemp argR = newTemp(Ity_I32);
+ assign( argL, getIRegT(rN) );
+ assign( argR, getIRegT(rM) );
+ putIRegT( rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
+ mkexpr(argL), mkexpr(argR)),
+ condT );
+ setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
+ argL, argR, cond_AND_notInIT_T );
+ DIP("%s r%u, r%u, r%u\n", isSub ? "subs" : "adds", rD, rN, rM);
+ goto decode_success;
+ }
+
+ case BITS7(0,1,0,1,0,0,0): /* STR */
+ case BITS7(0,1,0,1,1,0,0): { /* LDR */
+ /* ------------- LDR Rd, [Rn, Rm] ------------- */
+ /* ------------- STR Rd, [Rn, Rm] ------------- */
+ /* LDR/STR Rd, [Rn + Rm] */
+ UInt rD = INSN0(2,0);
+ UInt rN = INSN0(5,3);
+ UInt rM = INSN0(8,6);
+ UInt isLD = INSN0(11,11);
+
+ mk_skip_over_T16_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+
+ IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
+ put_ITSTATE(old_itstate); // backout
+ if (isLD) {
+ putIRegT(rD, loadLE(Ity_I32, ea), IRTemp_INVALID);
+ } else {
+ storeLE(ea, getIRegT(rD));
+ }
+ put_ITSTATE(new_itstate); // restore
+
+ DIP("%s r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
+ goto decode_success;
+ }
+
+ case BITS7(0,1,0,1,0,0,1):
+ case BITS7(0,1,0,1,1,0,1): {
+ /* ------------- LDRH Rd, [Rn, Rm] ------------- */
+ /* ------------- STRH Rd, [Rn, Rm] ------------- */
+ /* LDRH/STRH Rd, [Rn + Rm] */
+ UInt rD = INSN0(2,0);
+ UInt rN = INSN0(5,3);
+ UInt rM = INSN0(8,6);
+ UInt isLD = INSN0(11,11);
+
+ mk_skip_over_T16_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+
+ IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
+ put_ITSTATE(old_itstate); // backout
+ if (isLD) {
+ putIRegT(rD, unop(Iop_16Uto32, loadLE(Ity_I16, ea)),
+ IRTemp_INVALID);
+ } else {
+ storeLE( ea, unop(Iop_32to16, getIRegT(rD)) );
+ }
+ put_ITSTATE(new_itstate); // restore
+
+ DIP("%sh r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
+ goto decode_success;
+ }
+
+ case BITS7(0,1,0,1,1,1,1): {
+ /* ------------- LDRSH Rd, [Rn, Rm] ------------- */
+ /* LDRSH Rd, [Rn + Rm] */
+ UInt rD = INSN0(2,0);
+ UInt rN = INSN0(5,3);
+ UInt rM = INSN0(8,6);
+
+ mk_skip_over_T16_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+
+ IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
+ put_ITSTATE(old_itstate); // backout
+ putIRegT(rD, unop(Iop_16Sto32, loadLE(Ity_I16, ea)),
+ IRTemp_INVALID);
+ put_ITSTATE(new_itstate); // restore
+
+ DIP("ldrsh r%u, [r%u, r%u]\n", rD, rN, rM);
+ goto decode_success;
+ }
+
+ case BITS7(0,1,0,1,0,1,1): {
+ /* ------------- LDRSB Rd, [Rn, Rm] ------------- */
+ /* LDRSB Rd, [Rn + Rm] */
+ UInt rD = INSN0(2,0);
+ UInt rN = INSN0(5,3);
+ UInt rM = INSN0(8,6);
+
+ mk_skip_over_T16_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+
+ IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
+ put_ITSTATE(old_itstate); // backout
+ putIRegT(rD, unop(Iop_8Sto32, loadLE(Ity_I8, ea)),
+ IRTemp_INVALID);
+ put_ITSTATE(new_itstate); // restore
+
+ DIP("ldrsb r%u, [r%u, r%u]\n", rD, rN, rM);
+ goto decode_success;
+ }
+
+ case BITS7(0,1,0,1,0,1,0):
+ case BITS7(0,1,0,1,1,1,0): {
+ /* ------------- LDRB Rd, [Rn, Rm] ------------- */
+ /* ------------- STRB Rd, [Rn, Rm] ------------- */
+ /* LDRB/STRB Rd, [Rn + Rm] */
+ UInt rD = INSN0(2,0);
+ UInt rN = INSN0(5,3);
+ UInt rM = INSN0(8,6);
+ UInt isLD = INSN0(11,11);
+
+ mk_skip_over_T16_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+
+ IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
+ put_ITSTATE(old_itstate); // backout
+ if (isLD) {
+ putIRegT(rD, unop(Iop_8Uto32, loadLE(Ity_I8, ea)),
+ IRTemp_INVALID);
+ } else {
+ storeLE( ea, unop(Iop_32to8, getIRegT(rD)) );
+ }
+ put_ITSTATE(new_itstate); // restore
+
+ DIP("%sb r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
+ goto decode_success;
+ }
+
+ default:
+ break; /* examine the next shortest prefix */
+
+ }
+
+
+ /* ================ 16-bit 15:11 cases ================ */
+
+ switch (INSN0(15,11)) {
+
+ case BITS5(0,0,1,1,0):
+ case BITS5(0,0,1,1,1): {
+ /* ---------------- ADDS Rn, #uimm8 ---------------- */
+ /* ---------------- SUBS Rn, #uimm8 ---------------- */
+ UInt isSub = INSN0(11,11);
+ UInt rN = INSN0(10,8);
+ UInt uimm8 = INSN0(7,0);
+ IRTemp argL = newTemp(Ity_I32);
+ IRTemp argR = newTemp(Ity_I32);
+ assign( argL, getIRegT(rN) );
+ assign( argR, mkU32(uimm8) );
+ putIRegT( rN, binop(isSub ? Iop_Sub32 : Iop_Add32,
+ mkexpr(argL), mkexpr(argR)), condT );
+ setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
+ argL, argR, cond_AND_notInIT_T );
+ DIP("%s r%u, #%u\n", isSub ? "subs" : "adds", rN, uimm8);
+ goto decode_success;
+ }
+
+ case BITS5(1,0,1,0,0): {
+ /* ---------------- ADD rD, PC, #imm8 * 4 ---------------- */
+ /* a.k.a. ADR */
+ /* rD = align4(PC) + imm8 * 4 */
+ UInt rD = INSN0(10,8);
+ UInt imm8 = INSN0(7,0);
+ putIRegT(rD, binop(Iop_Add32,
+ binop(Iop_And32, getIRegT(15), mkU32(~3U)),
+ mkU32(imm8 * 4)),
+ condT);
+ DIP("add r%u, pc, #%u\n", rD, imm8 * 4);
+ goto decode_success;
+ }
+
+ case BITS5(1,0,1,0,1): {
+ /* ---------------- ADD rD, SP, #imm8 * 4 ---------------- */
+ UInt rD = INSN0(10,8);
+ UInt imm8 = INSN0(7,0);
+ putIRegT(rD, binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4)),
+ condT);
+ DIP("add r%u, r13, #%u\n", rD, imm8 * 4);
+ goto decode_success;
+ }
+
+ case BITS5(0,0,1,0,1): {
+ /* ---------------- CMP Rn, #uimm8 ---------------- */
+ UInt rN = INSN0(10,8);
+ UInt uimm8 = INSN0(7,0);
+ IRTemp argL = newTemp(Ity_I32);
+ IRTemp argR = newTemp(Ity_I32);
+ assign( argL, getIRegT(rN) );
+ assign( argR, mkU32(uimm8) );
+ /* Update flags regardless of whether in an IT block or not. */
+ setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
+ DIP("cmp r%u, #%u\n", rN, uimm8);
+ goto decode_success;
+ }
+
+ case BITS5(0,0,1,0,0): {
+ /* -------------- (T1) MOVS Rn, #uimm8 -------------- */
+ UInt rD = INSN0(10,8);
+ UInt uimm8 = INSN0(7,0);
+ IRTemp oldV = newTemp(Ity_I32);
+ IRTemp oldC = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ assign( oldV, mk_armg_calculate_flag_v() );
+ assign( oldC, mk_armg_calculate_flag_c() );
+ assign( res, mkU32(uimm8) );
+ putIRegT(rD, mkexpr(res), condT);
+ setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
+ cond_AND_notInIT_T );
+ DIP("movs r%u, #%u\n", rD, uimm8);
+ goto decode_success;
+ }
+
+ case BITS5(0,1,0,0,1): {
+ /* ------------- LDR Rd, [PC, #imm8 * 4] ------------- */
+ /* LDR Rd, [align4(PC) + imm8 * 4] */
+ UInt rD = INSN0(10,8);
+ UInt imm8 = INSN0(7,0);
+ IRTemp ea = newTemp(Ity_I32);
+
+ mk_skip_over_T16_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+
+ assign(ea, binop(Iop_Add32,
+ binop(Iop_And32, getIRegT(15), mkU32(~3U)),
+ mkU32(imm8 * 4)));
+ put_ITSTATE(old_itstate); // backout
+ putIRegT(rD, loadLE(Ity_I32, mkexpr(ea)),
+ IRTemp_INVALID);
+ put_ITSTATE(new_itstate); // restore
+
+ DIP("ldr r%u, [pc, #%u]\n", rD, imm8 * 4);
+ goto decode_success;
+ }
+
+ case BITS5(0,1,1,0,0): /* STR */
+ case BITS5(0,1,1,0,1): { /* LDR */
+ /* ------------- LDR Rd, [Rn, #imm5 * 4] ------------- */
+ /* ------------- STR Rd, [Rn, #imm5 * 4] ------------- */
+ /* LDR/STR Rd, [Rn + imm5 * 4] */
+ UInt rD = INSN0(2,0);
+ UInt rN = INSN0(5,3);
+ UInt imm5 = INSN0(10,6);
+ UInt isLD = INSN0(11,11);
+
+ mk_skip_over_T16_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+
+ IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 4));
+ put_ITSTATE(old_itstate); // backout
+ if (isLD) {
+ putIRegT(rD, loadLE(Ity_I32, ea), IRTemp_INVALID);
+ } else {
+ storeLE( ea, getIRegT(rD) );
+ }
+ put_ITSTATE(new_itstate); // restore
+
+ DIP("%s r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 4);
+ goto decode_success;
+ }
+
+ case BITS5(1,0,0,0,0): /* STRH */
+ case BITS5(1,0,0,0,1): { /* LDRH */
+ /* ------------- LDRH Rd, [Rn, #imm5 * 2] ------------- */
+ /* ------------- STRH Rd, [Rn, #imm5 * 2] ------------- */
+ /* LDRH/STRH Rd, [Rn + imm5 * 2] */
+ UInt rD = INSN0(2,0);
+ UInt rN = INSN0(5,3);
+ UInt imm5 = INSN0(10,6);
+ UInt isLD = INSN0(11,11);
+
+ mk_skip_over_T16_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+
+ IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 2));
+ put_ITSTATE(old_itstate); // backout
+ if (isLD) {
+ putIRegT(rD, unop(Iop_16Uto32, loadLE(Ity_I16, ea)),
+ IRTemp_INVALID);
+ } else {
+ storeLE( ea, unop(Iop_32to16, getIRegT(rD)) );
+ }
+ put_ITSTATE(new_itstate); // restore
+
+ DIP("%sh r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 2);
+ goto decode_success;
+ }
+
+ case BITS5(0,1,1,1,0): /* STRB */
+ case BITS5(0,1,1,1,1): { /* LDRB */
+ /* ------------- LDRB Rd, [Rn, #imm5] ------------- */
+ /* ------------- STRB Rd, [Rn, #imm5] ------------- */
+ /* LDRB/STRB Rd, [Rn + imm5] */
+ UInt rD = INSN0(2,0);
+ UInt rN = INSN0(5,3);
+ UInt imm5 = INSN0(10,6);
+ UInt isLD = INSN0(11,11);
+
+ mk_skip_over_T16_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+
+ IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5));
+ put_ITSTATE(old_itstate); // backout
+ if (isLD) {
+ putIRegT(rD, unop(Iop_8Uto32, loadLE(Ity_I8, ea)),
+ IRTemp_INVALID);
+ } else {
+ storeLE( ea, unop(Iop_32to8, getIRegT(rD)) );
+ }
+ put_ITSTATE(new_itstate); // restore
+
+ DIP("%sb r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5);
+ goto decode_success;
+ }
+
+ case BITS5(1,0,0,1,0): /* STR */
+ case BITS5(1,0,0,1,1): { /* LDR */
+ /* ------------- LDR Rd, [SP, #imm8 * 4] ------------- */
+ /* ------------- STR Rd, [SP, #imm8 * 4] ------------- */
+ /* LDR/STR Rd, [SP + imm8 * 4] */
+ UInt rD = INSN0(10,8);
+ UInt imm8 = INSN0(7,0);
+ UInt isLD = INSN0(11,11);
+
+ mk_skip_over_T16_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+
+ IRExpr* ea = binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4));
+ put_ITSTATE(old_itstate); // backout
+ if (isLD) {
+ putIRegT(rD, loadLE(Ity_I32, ea), IRTemp_INVALID);
+ } else {
+ storeLE(ea, getIRegT(rD));
+ }
+ put_ITSTATE(new_itstate); // restore
+
+ DIP("%s r%u, [sp, #%u]\n", isLD ? "ldr" : "str", rD, imm8 * 4);
+ goto decode_success;
+ }
+
+ case BITS5(1,1,0,0,1): {
+ /* ------------- LDMIA Rn!, {reglist} ------------- */
+ Int i, nRegs = 0;
+ UInt rN = INSN0(10,8);
+ UInt list = INSN0(7,0);
+ /* Empty lists aren't allowed. */
+ if (list != 0) {
+ mk_skip_over_T16_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ put_ITSTATE(old_itstate);
+ // now uncond
+
+ IRTemp oldRn = newTemp(Ity_I32);
+ IRTemp base = newTemp(Ity_I32);
+ assign(oldRn, getIRegT(rN));
+ assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
+ for (i = 0; i < 8; i++) {
+ if (0 == (list & (1 << i)))
+ continue;
+ nRegs++;
+ putIRegT(
+ i, loadLE(Ity_I32,
+ binop(Iop_Add32, mkexpr(base),
+ mkU32(nRegs * 4 - 4))),
+ IRTemp_INVALID
+ );
+ }
+ /* Only do the writeback for rN if it isn't in the list of
+ registers to be transferred. */
+ if (0 == (list & (1 << rN))) {
+ putIRegT(rN,
+ binop(Iop_Add32, mkexpr(oldRn),
+ mkU32(nRegs * 4)),
+ IRTemp_INVALID
+ );
+ }
+
+ /* Reinstate the ITSTATE update. */
+ put_ITSTATE(new_itstate);
+
+ DIP("ldmia r%u!, {0x%04x}\n", rN, list);
+ goto decode_success;
+ }
+ break;
+ }
+
+ case BITS5(1,1,0,0,0): {
+ /* ------------- STMIA Rn!, {reglist} ------------- */
+ Int i, nRegs = 0;
+ UInt rN = INSN0(10,8);
+ UInt list = INSN0(7,0);
+ /* Empty lists aren't allowed. Also, if rN is in the list then
+ it must be the lowest numbered register in the list. */
+ Bool valid = list != 0;
+ if (valid && 0 != (list & (1 << rN))) {
+ for (i = 0; i < rN; i++) {
+ if (0 != (list & (1 << i)))
+ valid = False;
+ }
+ }
+ if (valid) {
+ mk_skip_over_T16_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ put_ITSTATE(old_itstate);
+ // now uncond
+
+ IRTemp oldRn = newTemp(Ity_I32);
+ IRTemp base = newTemp(Ity_I32);
+ assign(oldRn, getIRegT(rN));
+ assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
+ for (i = 0; i < 8; i++) {
+ if (0 == (list & (1 << i)))
+ continue;
+ nRegs++;
+ storeLE( binop(Iop_Add32, mkexpr(base), mkU32(nRegs * 4 - 4)),
+ getIRegT(i) );
+ }
+ /* Always do the writeback. */
+ putIRegT(rN,
+ binop(Iop_Add32, mkexpr(oldRn),
+ mkU32(nRegs * 4)),
+ IRTemp_INVALID);
+
+ /* Reinstate the ITSTATE update. */
+ put_ITSTATE(new_itstate);
+
+ DIP("stmia r%u!, {0x%04x}\n", rN, list);
+ goto decode_success;
+ }
+ break;
+ }
+
+ case BITS5(0,0,0,0,0): /* LSLS */
+ case BITS5(0,0,0,0,1): /* LSRS */
+ case BITS5(0,0,0,1,0): { /* ASRS */
+ /* ---------------- LSLS Rd, Rm, #imm5 ---------------- */
+ /* ---------------- LSRS Rd, Rm, #imm5 ---------------- */
+ /* ---------------- ASRS Rd, Rm, #imm5 ---------------- */
+ UInt rD = INSN0(2,0);
+ UInt rM = INSN0(5,3);
+ UInt imm5 = INSN0(10,6);
+ IRTemp res = newTemp(Ity_I32);
+ IRTemp resC = newTemp(Ity_I32);
+ IRTemp rMt = newTemp(Ity_I32);
+ IRTemp oldV = newTemp(Ity_I32);
+ HChar* wot = "???";
+ assign(rMt, getIRegT(rM));
+ assign(oldV, mk_armg_calculate_flag_v());
+ /* Looks like INSN0(12,11) are the standard 'how' encoding.
+ Could compactify if the ROR case later appears. */
+ switch (INSN0(15,11)) {
+ case BITS5(0,0,0,0,0):
+ compute_result_and_C_after_LSL_by_imm5(
+ dis_buf, &res, &resC, rMt, imm5, rM
+ );
+ wot = "lsl";
+ break;
+ case BITS5(0,0,0,0,1):
+ compute_result_and_C_after_LSR_by_imm5(
+ dis_buf, &res, &resC, rMt, imm5, rM
+ );
+ wot = "lsr";
+ break;
+ case BITS5(0,0,0,1,0):
+ compute_result_and_C_after_ASR_by_imm5(
+ dis_buf, &res, &resC, rMt, imm5, rM
+ );
+ wot = "asr";
+ break;
+ default:
+ /*NOTREACHED*/vassert(0);
+ }
+ // not safe to read guest state after this point
+ putIRegT(rD, mkexpr(res), condT);
+ setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
+ cond_AND_notInIT_T );
+ /* ignore buf and roll our own output */
+ DIP("%ss r%u, r%u, #%u\n", wot, rD, rM, imm5);
+ goto decode_success;
+ }
+
+ case BITS5(1,1,1,0,0): {
+ /* ---------------- B #simm11 ---------------- */
+ Int simm11 = INSN0(10,0);
+ simm11 = (simm11 << 21) >> 20;
+ UInt dst = simm11 + guest_R15_curr_instr_notENC + 4;
+ /* Only allowed outside or last-in IT block; SIGILL if not so. */
+ gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
+ // and skip this insn if not selected; being cleverer is too
+ // difficult
+ mk_skip_over_T16_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+ irsb->next = mkU32( dst | 1 /*CPSR.T*/ );
+ irsb->jumpkind = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
+ DIP("b 0x%x\n", dst);
+ goto decode_success;
+ }
+
+ default:
+ break; /* examine the next shortest prefix */
+
+ }
+
+
+ /* ================ 16-bit 15:12 cases ================ */
+
+ switch (INSN0(15,12)) {
+
+ case BITS4(1,1,0,1): {
+ /* ---------------- Bcond #simm8 ---------------- */
+ UInt cond = INSN0(11,8);
+ Int simm8 = INSN0(7,0);
+ simm8 = (simm8 << 24) >> 23;
+ UInt dst = simm8 + guest_R15_curr_instr_notENC + 4;
+ if (cond != ARMCondAL && cond != ARMCondNV) {
+ /* Not allowed in an IT block; SIGILL if so. */
+ gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
+
+ IRTemp kondT = newTemp(Ity_I32);
+ assign( kondT, mk_armg_calculate_condition(cond) );
+ stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
+ Ijk_Boring,
+ IRConst_U32(dst | 1/*CPSR.T*/) ));
+ irsb->next = mkU32( (guest_R15_curr_instr_notENC + 2)
+ | 1 /*CPSR.T*/ );
+ irsb->jumpkind = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
+ DIP("b%s 0x%x\n", nCC(cond), dst);
+ goto decode_success;
+ }
+ break;
+ }
+
+ default:
+ break; /* hmm, nothing matched */
+
+ }
+
+ /* ================ 16-bit misc cases ================ */
+
+ /* ------ NOP ------ */
+ if (INSN0(15,0) == 0xBF00) {
+ DIP("nop");
+ goto decode_success;
+ }
+
+ /* ----------------------------------------------------------- */
+ /* -- -- */
+ /* -- Thumb 32-bit integer instructions -- */
+ /* -- -- */
+ /* ----------------------------------------------------------- */
+
+# define INSN1(_bMax,_bMin) SLICE_UInt(((UInt)insn1), (_bMax), (_bMin))
+
+ /* second 16 bits of the instruction, if any */
+ UShort insn1 = getUShortLittleEndianly( guest_instr+2 );
+
+ anOp = Iop_INVALID; /* paranoia */
+ anOpNm = NULL; /* paranoia */
+
+ /* Change result defaults to suit 32-bit insns. */
+ vassert(dres.whatNext == Dis_Continue);
+ vassert(dres.len == 2);
+ vassert(dres.continueAt == 0);
+ dres.len = 4;
+
+ /* ---------------- BL/BLX simm26 ---------------- */
+ if (BITS5(1,1,1,1,0) == INSN0(15,11) && BITS2(1,1) == INSN1(15,14)) {
+ UInt isBL = INSN1(12,12);
+ UInt bS = INSN0(10,10);
+ UInt bJ1 = INSN1(13,13);
+ UInt bJ2 = INSN1(11,11);
+ UInt bI1 = 1 ^ (bJ1 ^ bS);
+ UInt bI2 = 1 ^ (bJ2 ^ bS);
+ Int simm25
+ = (bS << (1 + 1 + 10 + 11 + 1))
+ | (bI1 << (1 + 10 + 11 + 1))
+ | (bI2 << (10 + 11 + 1))
+ | (INSN0(9,0) << (11 + 1))
+ | (INSN1(10,0) << 1);
+ simm25 = (simm25 << 7) >> 7;
+
+ vassert(0 == (guest_R15_curr_instr_notENC & 1));
+ UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
+
+ /* One further validity case to check: in the case of BLX
+ (not-BL), that insn1[0] must be zero. */
+ Bool valid = True;
+ if (isBL == 0 && INSN1(0,0) == 1) valid = False;
+ if (valid) {
+ /* Only allowed outside or last-in IT block; SIGILL if not so. */
+ gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
+ // and skip this insn if not selected; being cleverer is too
+ // difficult
+ mk_skip_over_T32_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+
+ /* We're returning to Thumb code, hence "| 1" */
+ putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 4) | 1 ),
+ IRTemp_INVALID);
+ if (isBL) {
+ /* BL: unconditional T -> T call */
+ /* we're calling Thumb code, hence "| 1" */
+ irsb->next = mkU32( dst | 1 );
+ DIP("bl 0x%x (stay in Thumb mode)\n", dst);
+ } else {
+ /* BLX: unconditional T -> A call */
+ /* we're calling ARM code, hence "& 3" to align to a
+ valid ARM insn address */
+ irsb->next = mkU32( dst & ~3 );
+ DIP("blx 0x%x (switch to ARM mode)\n", dst & ~3);
+ }
+ irsb->jumpkind = Ijk_Call;
+ dres.whatNext = Dis_StopHere;
+ goto decode_success;
+ }
+ }
+
+ /* ---------------- {LD,ST}M{IA,DB} ---------------- */
+ if (0x3a2 == INSN0(15,6) // {LD,ST}MIA
+ || 0x3a4 == INSN0(15,6)) { // {LD,ST}MDB
+ UInt bW = INSN0(5,5); /* writeback Rn ? */
+ UInt bL = INSN0(4,4);
+ UInt rN = INSN0(3,0);
+ UInt bP = INSN1(15,15); /* reglist entry for r15 */
+ UInt bM = INSN1(14,14); /* reglist entry for r14 */
+ UInt rLmost = INSN1(12,0); /* reglist entry for r0 .. 12 */
+ UInt rL13 = INSN1(13,13); /* must be zero */
+ UInt regList = 0;
+ Bool valid = True;
+
+ UInt bINC = 1;
+ UInt bBEFORE = 0;
+ if (INSN0(15,6) == 0x3a4) {
+ bINC = 0;
+ bBEFORE = 1;
+ }
+
+ /* detect statically invalid cases, and construct the final
+ reglist */
+ if (rL13 == 1)
+ valid = False;
+
+ if (bL == 1) {
+ regList = (bP << 15) | (bM << 14) | rLmost;
+ if (rN == 15) valid = False;
+ if (popcount32(regList) < 2) valid = False;
+ if (bP == 1 && bM == 1) valid = False;
+ if (bW == 1 && (regList & (1<<rN))) valid = False;
+ } else {
+ regList = (bM << 14) | rLmost;
+ if (bP == 1) valid = False;
+ if (rN == 15) valid = False;
+ if (popcount32(regList) < 2) valid = False;
+ if (bW == 1 && (regList & (1<<rN))) valid = False;
+ if (regList & (1<<rN)) {
+ UInt i;
+ /* if Rn is in the list, then it must be the
+ lowest numbered entry */
+ for (i = 0; i < rN; i++) {
+ if (regList & (1<<i))
+ valid = False;
+ }
+ }
+ }
+
+ if (valid) {
+ if (bL == 1 && bP == 1) {
+ // We'll be writing the PC. Hence:
+ /* Only allowed outside or last-in IT block; SIGILL if not so. */
+ gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
+ }
+
+ /* Go uncond: */
+ mk_skip_over_T32_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+
+ /* Generate the IR. This might generate a write to R15, */
+ mk_ldm_stm(False/*!arm*/, rN, bINC, bBEFORE, bW, bL, regList);
+
+ if (bL == 1 && (regList & (1<<15))) {
+ // If we wrote to R15, we have an interworking return to
+ // deal with.
+ irsb->next = llGetIReg(15);
+ irsb->jumpkind = Ijk_Ret;
+ dres.whatNext = Dis_StopHere;
+ }
+
+ DIP("%sm%c%c r%u%s, {0x%04x}\n",
+ bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
+ rN, bW ? "!" : "", regList);
+
+ goto decode_success;
+ }
+ }
+
+ /* -------------- (T3) ADD{S}.W Rd, Rn, #constT -------------- */
+ if (INSN0(15,11) == BITS5(1,1,1,1,0)
+ && INSN0(9,5) == BITS5(0,1,0,0,0)
+ && INSN1(15,15) == 0) {
+ UInt bS = INSN0(4,4);
+ UInt rN = INSN0(3,0);
+ UInt rD = INSN1(11,8);
+ Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
+ /* but allow "add.w reg, sp, #constT" */
+ if (!valid && rN == 13)
+ valid = True;
+ if (valid) {
+ IRTemp argL = newTemp(Ity_I32);
+ IRTemp argR = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ UInt imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
+ assign(argL, getIRegT(rN));
+ assign(argR, mkU32(imm32));
+ assign(res, binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
+ putIRegT(rD, mkexpr(res), condT);
+ if (bS == 1)
+ setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
+ DIP("add%s.w r%u, r%u, #%u\n",
+ bS == 1 ? "s" : "", rD, rN, imm32);
+ goto decode_success;
+ }
+ }
+
+ /* ---------------- (T2) CMP.W Rn, #constT ---------------- */
+ /* ---------------- (T2) CMN.W Rn, #constT ---------------- */
+ if (INSN0(15,11) == BITS5(1,1,1,1,0)
+ && ( INSN0(9,4) == BITS6(0,1,1,0,1,1) // CMP
+ || INSN0(9,4) == BITS6(0,1,0,0,0,1)) // CMN
+ && INSN1(15,15) == 0
+ && INSN1(11,8) == BITS4(1,1,1,1)) {
+ UInt rN = INSN0(3,0);
+ if (rN != 15) {
+ IRTemp argL = newTemp(Ity_I32);
+ IRTemp argR = newTemp(Ity_I32);
+ Bool isCMN = INSN0(9,4) == BITS6(0,1,0,0,0,1);
+ UInt imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
+ assign(argL, getIRegT(rN));
+ assign(argR, mkU32(imm32));
+ setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
+ argL, argR, condT );
+ DIP("%s.w r%u, #%u\n", isCMN ? "cmn" : "cmp", rN, imm32);
+ goto decode_success;
+ }
+ }
+
+ /* -------------- (T1) TST.W Rn, #constT -------------- */
+ /* -------------- (T1) TEQ.W Rn, #constT -------------- */
+ if (INSN0(15,11) == BITS5(1,1,1,1,0)
+ && ( INSN0(9,4) == BITS6(0,0,0,0,0,1) // TST
+ || INSN0(9,4) == BITS6(0,0,1,0,0,1)) // TEQ
+ && INSN1(15,15) == 0
+ && INSN1(11,8) == BITS4(1,1,1,1)) {
+ UInt rN = INSN0(3,0);
+ if (!isBadRegT(rN)) { // yes, really, it's inconsistent with CMP.W
+ Bool isTST = INSN0(9,4) == BITS6(0,0,0,0,0,1);
+ IRTemp argL = newTemp(Ity_I32);
+ IRTemp argR = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ IRTemp oldV = newTemp(Ity_I32);
+ IRTemp oldC = newTemp(Ity_I32);
+ Bool updC = False;
+ UInt imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
+ assign(argL, getIRegT(rN));
+ assign(argR, mkU32(imm32));
+ assign(res, binop(isTST ? Iop_And32 : Iop_Xor32,
+ mkexpr(argL), mkexpr(argR)));
+ assign( oldV, mk_armg_calculate_flag_v() );
+ assign( oldC, updC
+ ? mkU32((imm32 >> 31) & 1)
+ : mk_armg_calculate_flag_c() );
+ setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
+ DIP("%s.w r%u, #%u\n", isTST ? "tst" : "teq", rN, imm32);
+ goto decode_success;
+ }
+ }
+
+ /* -------------- (T3) SUB{S}.W Rd, Rn, #constT -------------- */
+ /* -------------- (T3) RSB{S}.W Rd, Rn, #constT -------------- */
+ if (INSN0(15,11) == BITS5(1,1,1,1,0)
+ && (INSN0(9,5) == BITS5(0,1,1,0,1) // SUB
+ || INSN0(9,5) == BITS5(0,1,1,1,0)) // RSB
+ && INSN1(15,15) == 0) {
+ Bool isRSB = INSN0(9,5) == BITS5(0,1,1,1,0);
+ UInt bS = INSN0(4,4);
+ UInt rN = INSN0(3,0);
+ UInt rD = INSN1(11,8);
+ Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
+ /* but allow "sub.w sp, sp, #constT" */
+ if (!valid && !isRSB && rN == 13 && rD == 13)
+ valid = True;
+ if (valid) {
+ IRTemp argL = newTemp(Ity_I32);
+ IRTemp argR = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ UInt imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
+ assign(argL, getIRegT(rN));
+ assign(argR, mkU32(imm32));
+ assign(res, isRSB
+ ? binop(Iop_Sub32, mkexpr(argR), mkexpr(argL))
+ : binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
+ putIRegT(rD, mkexpr(res), condT);
+ if (bS == 1) {
+ if (isRSB)
+ setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
+ else
+ setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
+ }
+ DIP("%s%s.w r%u, r%u, #%u\n",
+ isRSB ? "rsb" : "sub", bS == 1 ? "s" : "", rD, rN, imm32);
+ goto decode_success;
+ }
+ }
+
+ /* -------------- (T1) ADC{S}.W Rd, Rn, #constT -------------- */
+ /* -------------- (T1) SBC{S}.W Rd, Rn, #constT -------------- */
+ if (INSN0(15,11) == BITS5(1,1,1,1,0)
+ && ( INSN0(9,5) == BITS5(0,1,0,1,0) // ADC
+ || INSN0(9,5) == BITS5(0,1,0,1,1)) // SBC
+ && INSN1(15,15) == 0) {
+ /* ADC: Rd = Rn + constT + oldC */
+ /* SBC: Rd = Rn - constT - (oldC ^ 1) */
+ UInt bS = INSN0(4,4);
+ UInt rN = INSN0(3,0);
+ UInt rD = INSN1(11,8);
+ if (!isBadRegT(rN) && !isBadRegT(rD)) {
+ IRTemp argL = newTemp(Ity_I32);
+ IRTemp argR = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ IRTemp oldC = newTemp(Ity_I32);
+ UInt imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
+ assign(argL, getIRegT(rN));
+ assign(argR, mkU32(imm32));
+ assign(oldC, mk_armg_calculate_flag_c() );
+ HChar* nm = "???";
+ switch (INSN0(9,5)) {
+ case BITS5(0,1,0,1,0): // ADC
+ nm = "adc";
+ assign(res,
+ binop(Iop_Add32,
+ binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
+ mkexpr(oldC) ));
+ putIRegT(rD, mkexpr(res), condT);
+ if (bS)
+ setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
+ argL, argR, oldC, condT );
+ break;
+ case BITS5(0,1,0,1,1): // SBC
+ nm = "sbc";
+ assign(res,
+ binop(Iop_Sub32,
+ binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
+ binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
+ putIRegT(rD, mkexpr(res), condT);
+ if (bS)
+ setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
+ argL, argR, oldC, condT );
+ break;
+ default:
+ vassert(0);
+ }
+ DIP("%s%s.w r%u, r%u, #%u\n",
+ nm, bS == 1 ? "s" : "", rD, rN, imm32);
+ goto decode_success;
+ }
+ }
+
+ /* -------------- (T1) ORR{S}.W Rd, Rn, #constT -------------- */
+ /* -------------- (T1) AND{S}.W Rd, Rn, #constT -------------- */
+ /* -------------- (T1) BIC{S}.W Rd, Rn, #constT -------------- */
+ /* -------------- (T1) EOR{S}.W Rd, Rn, #constT -------------- */
+ if (INSN0(15,11) == BITS5(1,1,1,1,0)
+ && ( INSN0(9,5) == BITS5(0,0,0,1,0) // ORR
+ || INSN0(9,5) == BITS5(0,0,0,0,0) // AND
+ || INSN0(9,5) == BITS5(0,0,0,0,1) // BIC
+ || INSN0(9,5) == BITS5(0,0,1,0,0) // EOR
+ || INSN0(9,5) == BITS5(0,0,0,1,1)) // ORN
+ && INSN1(15,15) == 0) {
+ UInt bS = INSN0(4,4);
+ UInt rN = INSN0(3,0);
+ UInt rD = INSN1(11,8);
+ if (!isBadRegT(rN) && !isBadRegT(rD)) {
+ Bool notArgR = False;
+ IROp op = Iop_INVALID;
+ HChar* nm = "???";
+ switch (INSN0(9,5)) {
+ case BITS5(0,0,0,1,0): op = Iop_Or32; nm = "orr"; break;
+ case BITS5(0,0,0,0,0): op = Iop_And32; nm = "and"; break;
+ case BITS5(0,0,0,0,1): op = Iop_And32; nm = "bic";
+ notArgR = True; break;
+ case BITS5(0,0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
+ case BITS5(0,0,0,1,1): op = Iop_Or32; nm = "orn";
+ notArgR = True; break;
+ default: vassert(0);
+ }
+ IRTemp argL = newTemp(Ity_I32);
+ IRTemp argR = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ Bool updC = False;
+ UInt imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
+ assign(argL, getIRegT(rN));
+ assign(argR, mkU32(notArgR ? ~imm32 : imm32));
+ assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
+ putIRegT(rD, mkexpr(res), condT);
+ if (bS) {
+ IRTemp oldV = newTemp(Ity_I32);
+ IRTemp oldC = newTemp(Ity_I32);
+ assign( oldV, mk_armg_calculate_flag_v() );
+ assign( oldC, updC
+ ? mkU32((imm32 >> 31) & 1)
+ : mk_armg_calculate_flag_c() );
+ setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
+ condT );
+ }
+ DIP("%s%s.w r%u, r%u, #%u\n",
+ nm, bS == 1 ? "s" : "", rD, rN, imm32);
+ goto decode_success;
+ }
+ }
+
+ /* ---------- (T3) ADD{S}.W Rd, Rn, Rm, {shift} ---------- */
+ /* ---------- (T3) SUB{S}.W Rd, Rn, Rm, {shift} ---------- */
+ /* ---------- (T3) RSB{S}.W Rd, Rn, Rm, {shift} ---------- */
+ if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
+ && ( INSN0(8,5) == BITS4(1,0,0,0) // add subopc
+ || INSN0(8,5) == BITS4(1,1,0,1) // sub subopc
+ || INSN0(8,5) == BITS4(1,1,1,0)) // rsb subopc
+ && INSN1(15,15) == 0) {
+ UInt rN = INSN0(3,0);
+ UInt rD = INSN1(11,8);
+ UInt rM = INSN1(3,0);
+ UInt bS = INSN0(4,4);
+ UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
+ UInt how = INSN1(5,4);
+
+ Bool valid = !isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM);
+ /* but allow "add.w reg, sp, reg w/ no shift */
+ if (!valid && INSN0(8,5) == BITS4(1,0,0,0) // add
+ && rN == 13 && imm5 == 0 && how == 0) {
+ valid = True;
+ }
+ /* also allow "sub.w sp, sp, reg w/ no shift */
+ if (!valid && INSN0(8,5) == BITS4(1,1,0,1) // add
+ && rD == 13 && rN == 13 && imm5 == 0 && how == 0) {
+ valid = True;
+ }
+ if (valid) {
+ Bool swap = False;
+ IROp op = Iop_INVALID;
+ HChar* nm = "???";
+ switch (INSN0(8,5)) {
+ case BITS4(1,0,0,0): op = Iop_Add32; nm = "add"; break;
+ case BITS4(1,1,0,1): op = Iop_Sub32; nm = "sub"; break;
+ case BITS4(1,1,1,0): op = Iop_Sub32; nm = "rsb";
+ swap = True; break;
+ default: vassert(0);
+ }
+
+ IRTemp argL = newTemp(Ity_I32);
+ assign(argL, getIRegT(rN));
+
+ IRTemp rMt = newTemp(Ity_I32);
+ assign(rMt, getIRegT(rM));
+
+ IRTemp argR = newTemp(Ity_I32);
+ compute_result_and_C_after_shift_by_imm5(
+ dis_buf, &argR, NULL, rMt, how, imm5, rM
+ );
+
+ IRTemp res = newTemp(Ity_I32);
+ assign(res, swap
+ ? binop(op, mkexpr(argR), mkexpr(argL))
+ : binop(op, mkexpr(argL), mkexpr(argR)));
+
+ putIRegT(rD, mkexpr(res), condT);
+ if (bS) {
+ switch (op) {
+ case Iop_Add32:
+ setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
+ break;
+ case Iop_Sub32:
+ if (swap)
+ setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
+ else
+ setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
+ break;
+ default:
+ vassert(0);
+ }
+ }
+
+ DIP("%s%s.w r%u, r%u, %s\n",
+ nm, bS ? "s" : "", rD, rN, dis_buf);
+ goto decode_success;
+ }
+ }
+
+ /* ---------- (T3) ADC{S}.W Rd, Rn, Rm, {shift} ---------- */
+ /* ---------- (T2) SBC{S}.W Rd, Rn, Rm, {shift} ---------- */
+ if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
+ && ( INSN0(8,5) == BITS4(1,0,1,0) // adc subopc
+ || INSN0(8,5) == BITS4(1,0,1,1)) // sbc subopc
+ && INSN1(15,15) == 0) {
+ /* ADC: Rd = Rn + shifter_operand + oldC */
+ /* SBC: Rd = Rn - shifter_operand - (oldC ^ 1) */
+ UInt rN = INSN0(3,0);
+ UInt rD = INSN1(11,8);
+ UInt rM = INSN1(3,0);
+ if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
+ UInt bS = INSN0(4,4);
+ UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
+ UInt how = INSN1(5,4);
+
+ IRTemp argL = newTemp(Ity_I32);
+ assign(argL, getIRegT(rN));
+
+ IRTemp rMt = newTemp(Ity_I32);
+ assign(rMt, getIRegT(rM));
+
+ IRTemp oldC = newTemp(Ity_I32);
+ assign(oldC, mk_armg_calculate_flag_c());
+
+ IRTemp argR = newTemp(Ity_I32);
+ compute_result_and_C_after_shift_by_imm5(
+ dis_buf, &argR, NULL, rMt, how, imm5, rM
+ );
+
+ HChar* nm = "???";
+ IRTemp res = newTemp(Ity_I32);
+ switch (INSN0(8,5)) {
+ case BITS4(1,0,1,0): // ADC
+ nm = "adc";
+ assign(res,
+ binop(Iop_Add32,
+ binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
+ mkexpr(oldC) ));
+ putIRegT(rD, mkexpr(res), condT);
+ if (bS)
+ setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
+ argL, argR, oldC, condT );
+ break;
+ case BITS4(1,0,1,1): // SBC
+ nm = "sbc";
+ assign(res,
+ binop(Iop_Sub32,
+ binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
+ binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
+ putIRegT(rD, mkexpr(res), condT);
+ if (bS)
+ setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
+ argL, argR, oldC, condT );
+ break;
+ default:
+ vassert(0);
+ }
+
+ DIP("%s%s.w r%u, r%u, %s\n",
+ nm, bS ? "s" : "", rD, rN, dis_buf);
+ goto decode_success;
+ }
+ }
+
+ /* ---------- (T3) AND{S}.W Rd, Rn, Rm, {shift} ---------- */
+ /* ---------- (T3) ORR{S}.W Rd, Rn, Rm, {shift} ---------- */
+ /* ---------- (T3) EOR{S}.W Rd, Rn, Rm, {shift} ---------- */
+ /* ---------- (T3) BIC{S}.W Rd, Rn, Rm, {shift} ---------- */
+ /* ---------- (T1) ORN{S}.W Rd, Rn, Rm, {shift} ---------- */
+ if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
+ && ( INSN0(8,5) == BITS4(0,0,0,0) // and subopc
+ || INSN0(8,5) == BITS4(0,0,1,0) // orr subopc
+ || INSN0(8,5) == BITS4(0,1,0,0) // eor subopc
+ || INSN0(8,5) == BITS4(0,0,0,1) // bic subopc
+ || INSN0(8,5) == BITS4(0,0,1,1)) // orn subopc
+ && INSN1(15,15) == 0) {
+ UInt rN = INSN0(3,0);
+ UInt rD = INSN1(11,8);
+ UInt rM = INSN1(3,0);
+ if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
+ Bool notArgR = False;
+ IROp op = Iop_INVALID;
+ HChar* nm = "???";
+ switch (INSN0(8,5)) {
+ case BITS4(0,0,0,0): op = Iop_And32; nm = "and"; break;
+ case BITS4(0,0,1,0): op = Iop_Or32; nm = "orr"; break;
+ case BITS4(0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
+ case BITS4(0,0,0,1): op = Iop_And32; nm = "bic";
+ notArgR = True; break;
+ case BITS4(0,0,1,1): op = Iop_Or32; nm = "orn";
+ notArgR = True; break;
+ default: vassert(0);
+ }
+ UInt bS = INSN0(4,4);
+ UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
+ UInt how = INSN1(5,4);
+
+ IRTemp rNt = newTemp(Ity_I32);
+ assign(rNt, getIRegT(rN));
+
+ IRTemp rMt = newTemp(Ity_I32);
+ assign(rMt, getIRegT(rM));
+
+ IRTemp argR = newTemp(Ity_I32);
+ IRTemp oldC = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
+
+ compute_result_and_C_after_shift_by_imm5(
+ dis_buf, &argR, bS ? &oldC : NULL, rMt, how, imm5, rM
+ );
+
+ IRTemp res = newTemp(Ity_I32);
+ if (notArgR) {
+ vassert(op == Iop_And32 || op == Iop_Or32);
+ assign(res, binop(op, mkexpr(rNt),
+ unop(Iop_Not32, mkexpr(argR))));
+ } else {
+ assign(res, binop(op, mkexpr(rNt), mkexpr(argR)));
+ }
+
+ putIRegT(rD, mkexpr(res), condT);
+ if (bS) {
+ IRTemp oldV = newTemp(Ity_I32);
+ assign( oldV, mk_armg_calculate_flag_v() );
+ setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
+ condT );
+ }
+
+ DIP("%s%s.w r%u, r%u, %s\n",
+ nm, bS ? "s" : "", rD, rN, dis_buf);
+ goto decode_success;
+ }
+ }
+
+ /* -------------- (T?) LSL{S}.W Rd, Rn, Rm -------------- */
+ /* -------------- (T?) LSR{S}.W Rd, Rn, Rm -------------- */
+ /* -------------- (T?) ASR{S}.W Rd, Rn, Rm -------------- */
+ /* -------------- (T?) ROR{S}.W Rd, Rn, Rm -------------- */
+ if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,0,0)
+ && INSN1(15,12) == BITS4(1,1,1,1)
+ && INSN1(7,4) == BITS4(0,0,0,0)) {
+ UInt how = INSN0(6,5); // standard encoding
+ UInt rN = INSN0(3,0);
+ UInt rD = INSN1(11,8);
+ UInt rM = INSN1(3,0);
+ UInt bS = INSN0(4,4);
+ Bool valid = !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rD);
+ if (how == 3) valid = False; //ATC
+ if (valid) {
+ IRTemp rNt = newTemp(Ity_I32);
+ IRTemp rMt = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ IRTemp oldC = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
+ IRTemp oldV = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
+ HChar* nms[4] = { "lsl", "lsr", "asr", "ror" };
+ HChar* nm = nms[how];
+ assign(rNt, getIRegT(rN));
+ assign(rMt, getIRegT(rM));
+ compute_result_and_C_after_shift_by_reg(
+ dis_buf, &res, bS ? &oldC : NULL,
+ rNt, how, rMt, rN, rM
+ );
+ if (bS)
+ assign(oldV, mk_armg_calculate_flag_v());
+ putIRegT(rD, mkexpr(res), condT);
+ if (bS) {
+ setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
+ condT );
+ }
+ DIP("%s%s.w r%u, r%u, r%u\n",
+ nm, bS ? "s" : "", rD, rN, rM);
+ goto decode_success;
+ }
+ }
+
+ /* ------------ (T?) MOV{S}.W Rd, Rn, {shift} ------------ */
+ /* ------------ (T?) MVN{S}.W Rd, Rn, {shift} ------------ */
+ if ((INSN0(15,0) & 0xFFCF) == 0xEA4F
+ && INSN1(15,15) == 0) {
+ UInt rD = INSN1(11,8);
+ UInt rN = INSN1(3,0);
+ if (!isBadRegT(rD) && !isBadRegT(rN)) {
+ UInt bS = INSN0(4,4);
+ UInt isMVN = INSN0(5,5);
+ UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
+ UInt how = INSN1(5,4);
+
+ IRTemp rNt = newTemp(Ity_I32);
+ assign(rNt, getIRegT(rN));
+
+ IRTemp oldRn = newTemp(Ity_I32);
+ IRTemp oldC = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
+ compute_result_and_C_after_shift_by_imm5(
+ dis_buf, &oldRn, bS ? &oldC : NULL, rNt, how, imm5, rN
+ );
+
+ IRTemp res = newTemp(Ity_I32);
+ assign(res, isMVN ? unop(Iop_Not32, mkexpr(oldRn))
+ : mkexpr(oldRn));
+
+ putIRegT(rD, mkexpr(res), condT);
+ if (bS) {
+ IRTemp oldV = newTemp(Ity_I32);
+ assign( oldV, mk_armg_calculate_flag_v() );
+ setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT);
+ }
+ DIP("%s%s.w r%u, %s\n",
+ isMVN ? "mvn" : "mov", bS ? "s" : "", rD, dis_buf);
+ goto decode_success;
+ }
+ }
+
+ /* -------------- (T?) TST.W Rn, Rm, {shift} -------------- */
+ /* -------------- (T?) TEQ.W Rn, Rm, {shift} -------------- */
+ if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
+ && ( INSN0(8,4) == BITS5(0,0,0,0,1) // TST
+ || INSN0(8,4) == BITS5(0,1,0,0,1)) // TEQ
+ && INSN1(15,15) == 0
+ && INSN1(11,8) == BITS4(1,1,1,1)) {
+ UInt rN = INSN0(3,0);
+ UInt rM = INSN1(3,0);
+ if (!isBadRegT(rN) && !isBadRegT(rM)) {
+ Bool isTST = INSN0(8,4) == BITS5(0,0,0,0,1);
+
+ UInt how = INSN1(5,4);
+ UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
+
+ IRTemp argL = newTemp(Ity_I32);
+ assign(argL, getIRegT(rN));
+
+ IRTemp rMt = newTemp(Ity_I32);
+ assign(rMt, getIRegT(rM));
+
+ IRTemp argR = newTemp(Ity_I32);
+ IRTemp oldC = newTemp(Ity_I32);
+ compute_result_and_C_after_shift_by_imm5(
+ dis_buf, &argR, &oldC, rMt, how, imm5, rM
+ );
+
+ IRTemp oldV = newTemp(Ity_I32);
+ assign( oldV, mk_armg_calculate_flag_v() );
+
+ IRTemp res = newTemp(Ity_I32);
+ assign(res, binop(isTST ? Iop_And32 : Iop_Xor32,
+ mkexpr(argL), mkexpr(argR)));
+
+ setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
+ condT );
+ DIP("%s.w r%u, %s\n", isTST ? "tst" : "teq", rN, dis_buf);
+ goto decode_success;
+ }
+ }
+
+ /* -------------- (T3) CMP.W Rn, Rm, {shift} -------------- */
+ /* -------------- (T2) CMN.W Rn, Rm, {shift} -------------- */
+ if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
+ && ( INSN0(8,4) == BITS5(1,1,0,1,1) // CMP
+ || INSN0(8,4) == BITS5(1,0,0,0,1)) // CMN
+ && INSN1(15,15) == 0
+ && INSN1(11,8) == BITS4(1,1,1,1)) {
+ UInt rN = INSN0(3,0);
+ UInt rM = INSN1(3,0);
+ if (!isBadRegT(rN) && !isBadRegT(rM)) {
+ Bool isCMN = INSN0(8,4) == BITS5(1,0,0,0,1);
+ UInt how = INSN1(5,4);
+ UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
+
+ IRTemp argL = newTemp(Ity_I32);
+ assign(argL, getIRegT(rN));
+
+ IRTemp rMt = newTemp(Ity_I32);
+ assign(rMt, getIRegT(rM));
+
+ IRTemp argR = newTemp(Ity_I32);
+ compute_result_and_C_after_shift_by_imm5(
+ dis_buf, &argR, NULL, rMt, how, imm5, rM
+ );
+
+ setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
+ argL, argR, condT );
+
+ DIP("%s.w r%u, %s\n", isCMN ? "cmn" : "cmp", rN, dis_buf);
+ goto decode_success;
+ }
+ }
+
+ /* -------------- (T2) MOV{S}.W Rd, #constT -------------- */
+ /* -------------- (T2) MVN{S}.W Rd, #constT -------------- */
+ if (INSN0(15,11) == BITS5(1,1,1,1,0)
+ && ( INSN0(9,5) == BITS5(0,0,0,1,0) // MOV
+ || INSN0(9,5) == BITS5(0,0,0,1,1)) // MVN
+ && INSN0(3,0) == BITS4(1,1,1,1)
+ && INSN1(15,15) == 0) {
+ UInt rD = INSN1(11,8);
+ if (!isBadRegT(rD)) {
+ Bool updC = False;
+ UInt bS = INSN0(4,4);
+ Bool isMVN = INSN0(5,5) == 1;
+ UInt imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
+ IRTemp res = newTemp(Ity_I32);
+ assign(res, mkU32(isMVN ? ~imm32 : imm32));
+ putIRegT(rD, mkexpr(res), condT);
+ if (bS) {
+ IRTemp oldV = newTemp(Ity_I32);
+ IRTemp oldC = newTemp(Ity_I32);
+ assign( oldV, mk_armg_calculate_flag_v() );
+ assign( oldC, updC
+ ? mkU32((imm32 >> 31) & 1)
+ : mk_armg_calculate_flag_c() );
+ setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
+ condT );
+ }
+ DIP("%s%s.w r%u, #%u\n",
+ isMVN ? "mvn" : "mov", bS ? "s" : "", rD, imm32);
+ goto decode_success;
+ }
+ }
+
+ /* -------------- (T3) MOVW Rd, #imm16 -------------- */
+ if (INSN0(15,11) == BITS5(1,1,1,1,0)
+ && INSN0(9,4) == BITS6(1,0,0,1,0,0)
+ && INSN1(15,15) == 0) {
+ UInt rD = INSN1(11,8);
+ if (!isBadRegT(rD)) {
+ UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
+ | (INSN1(14,12) << 8) | INSN1(7,0);
+ putIRegT(rD, mkU32(imm16), condT);
+ DIP("movw r%u, #%u\n", rD, imm16);
+ goto decode_success;
+ }
+ }
+
+ /* ---------------- MOVT Rd, #imm16 ---------------- */
+ if (INSN0(15,11) == BITS5(1,1,1,1,0)
+ && INSN0(9,4) == BITS6(1,0,1,1,0,0)
+ && INSN1(15,15) == 0) {
+ UInt rD = INSN1(11,8);
+ if (!isBadRegT(rD)) {
+ UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
+ | (INSN1(14,12) << 8) | INSN1(7,0);
+ IRTemp res = newTemp(Ity_I32);
+ assign(res,
+ binop(Iop_Or32,
+ binop(Iop_And32, getIRegT(rD), mkU32(0xFFFF)),
+ mkU32(imm16 << 16)));
+ putIRegT(rD, mkexpr(res), condT);
+ DIP("movt r%u, #%u\n", rD, imm16);
+ goto decode_success;
+ }
+ }
+
+ /* ---------------- LD/ST reg+/-#imm8 ---------------- */
+ /* Loads and stores of the form:
+ op Rt, [Rn, #-imm8] or
+ op Rt, [Rn], #+/-imm8 or
+ op Rt, [Rn, #+/-imm8]!
+ where op is one of
+ ldrb ldrh ldr ldrsb ldrsh
+ strb strh str
+ */
+ if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0) && INSN1(11,11) == 1) {
+ Bool valid = True;
+ Bool syned = False;
+ Bool isST = False;
+ IRType ty = Ity_I8;
+ HChar* nm = "???";
+
+ switch (INSN0(8,4)) {
+ case BITS5(0,0,0,0,0): // strb
+ nm = "strb"; isST = True; break;
+ case BITS5(0,0,0,0,1): // ldrb
+ nm = "ldrb"; break;
+ case BITS5(1,0,0,0,1): // ldrsb
+ nm = "ldrsb"; syned = True; break;
+ case BITS5(0,0,0,1,0): // strh
+ nm = "strh"; ty = Ity_I16; isST = True; break;
+ case BITS5(0,0,0,1,1): // ldrh
+ nm = "ldrh"; ty = Ity_I16; break;
+ case BITS5(1,0,0,1,1): // ldrsh
+ nm = "ldrsh"; ty = Ity_I16; syned = True; break;
+ case BITS5(0,0,1,0,0): // str
+ nm = "str"; ty = Ity_I32; isST = True; break;
+ case BITS5(0,0,1,0,1):
+ nm = "ldr"; ty = Ity_I32; break; // ldr
+ default:
+ valid = False; break;
+ }
+
+ UInt rN = INSN0(3,0);
+ UInt rT = INSN1(15,12);
+ UInt bP = INSN1(10,10);
+ UInt bU = INSN1(9,9);
+ UInt bW = INSN1(8,8);
+ UInt imm8 = INSN1(7,0);
+ Bool loadsPC = False;
+
+ if (valid) {
+ if (bP == 1 && bU == 1 && bW == 0)
+ valid = False;
+ if (bP == 0 && bW == 0)
+ valid = False;
+ if (rN == 15)
+ valid = False;
+ if (bW == 1 && rN == rT)
+ valid = False;
+ if (ty == Ity_I8 || ty == Ity_I16) {
+ if (isBadRegT(rT))
+ valid = False;
+ } else {
+ /* ty == Ity_I32 */
+ if (isST && rT == 15)
+ valid = False;
+ if (!isST && rT == 15)
+ loadsPC = True;
+ }
+ }
+
+ if (valid) {
+ // if it's a branch, it can't happen in the middle of an IT block
+ if (loadsPC)
+ gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
+ // go uncond
+ mk_skip_over_T32_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+
+ IRTemp preAddr = newTemp(Ity_I32);
+ assign(preAddr, getIRegT(rN));
+
+ IRTemp postAddr = newTemp(Ity_I32);
+ assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
+ mkexpr(preAddr), mkU32(imm8)));
+
+ IRTemp transAddr = bP == 1 ? postAddr : preAddr;
+
+ if (isST) {
+
+ /* Store. If necessary, update the base register before
+ the store itself, so that the common idiom of "str rX,
+ [sp, #-4]!" (store rX at sp-4, then do new sp = sp-4,
+ a.k.a "push rX") doesn't cause Memcheck to complain
+ that the access is below the stack pointer. Also, not
+ updating sp before the store confuses Valgrind's
+ dynamic stack-extending logic. So do it before the
+ store. Hence we need to snarf the store data before
+ doing the basereg update. */
+
+ /* get hold of the data to be stored */
+ IRTemp oldRt = newTemp(Ity_I32);
+ assign(oldRt, getIRegT(rT));
+
+ /* Update Rn if necessary. */
+ if (bW == 1) {
+ vassert(rN != rT); // assured by validity check above
+ putIRegT(rN, mkexpr(postAddr), IRTemp_INVALID);
+ }
+
+ /* generate the transfer */
+ switch (ty) {
+ case Ity_I8:
+ storeLE(mkexpr(transAddr),
+ unop(Iop_32to8, mkexpr(oldRt)));
+ break;
+ case Ity_I16:
+ storeLE(mkexpr(transAddr),
+ unop(Iop_32to16, mkexpr(oldRt)));
+ break;
+ case Ity_I32:
+ storeLE(mkexpr(transAddr), mkexpr(oldRt));
+ break;
+ default:
+ vassert(0);
+ }
+
+ } else {
+
+ /* Load. */
+
+ /* generate the transfer */
+ IRTemp newRt = newTemp(Ity_I32);
+ IROp widen = Iop_INVALID;
+ switch (ty) {
+ case Ity_I8:
+ widen = syned ? Iop_8Sto32 : Iop_8Uto32; break;
+ case Ity_I16:
+ widen = syned ? Iop_16Sto32 : Iop_16Uto32; break;
+ case Ity_I32:
+ break;
+ default:
+ vassert(0);
+ }
+ if (widen == Iop_INVALID) {
+ assign(newRt, loadLE(ty, mkexpr(transAddr)));
+ } else {
+ assign(newRt, unop(widen, loadLE(ty, mkexpr(transAddr))));
+ }
+ if (loadsPC) {
+ vassert(rT == 15);
+ llPutIReg(rT, mkexpr(newRt));
+ } else {
+ putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
+ }
+
+ if (loadsPC) {
+ /* Presumably this is an interworking branch. */
+ irsb->next = mkexpr(newRt);
+ irsb->jumpkind = Ijk_Boring; /* or _Ret ? */
+ dres.whatNext = Dis_StopHere;
+ }
+
+ /* Update Rn if necessary. */
+ if (bW == 1) {
+ vassert(rN != rT); // assured by validity check above
+ putIRegT(rN, mkexpr(postAddr), IRTemp_INVALID);
+ }
+ }
+
+ if (bP == 1 && bW == 0) {
+ DIP("%s.w r%u, [r%u, #%c%u]\n",
+ nm, rT, rN, bU ? '+' : '-', imm8);
+ }
+ else if (bP == 1 && bW == 1) {
+ DIP("%s.w r%u, [r%u, #%c%u]!\n",
+ nm, rT, rN, bU ? '+' : '-', imm8);
+ }
+ else {
+ vassert(bP == 0 && bW == 1);
+ DIP("%s.w r%u, [r%u], #%c%u\n",
+ nm, rT, rN, bU ? '+' : '-', imm8);
+ }
+
+ goto decode_success;
+ }
+ }
+
+ /* ------------- LD/ST reg+(reg<<imm2) ------------- */
+ /* Loads and stores of the form:
+ op Rt, [Rn, Rm, LSL #imm8]
+ where op is one of
+ ldrb ldrh ldr ldrsb ldrsh
+ strb strh str
+ */
+ if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)
+ && INSN1(11,6) == BITS6(0,0,0,0,0,0)) {
+ Bool valid = True;
+ Bool syned = False;
+ Bool isST = False;
+ IRType ty = Ity_I8;
+ HChar* nm = "???";
+
+ switch (INSN0(8,4)) {
+ case BITS5(0,0,0,0,0): // strb
+ nm = "strb"; isST = True; break;
+ case BITS5(0,0,0,0,1): // ldrb
+ nm = "ldrb"; break;
+ case BITS5(1,0,0,0,1): // ldrsb
+ nm = "ldrsb"; syned = True; break;
+ case BITS5(0,0,0,1,0): // strh
+ nm = "strh"; ty = Ity_I16; isST = True; break;
+ case BITS5(0,0,0,1,1): // ldrh
+ nm = "ldrh"; ty = Ity_I16; break;
+ case BITS5(1,0,0,1,1): // ldrsh
+ nm = "ldrsh"; ty = Ity_I16; syned = True; break;
+ case BITS5(0,0,1,0,0): // str
+ nm = "str"; ty = Ity_I32; isST = True; break;
+ case BITS5(0,0,1,0,1):
+ nm = "ldr"; ty = Ity_I32; break; // ldr
+ default:
+ valid = False; break;
+ }
+
+ UInt rN = INSN0(3,0);
+ UInt rM = INSN1(3,0);
+ UInt rT = INSN1(15,12);
+ UInt imm2 = INSN1(5,4);
+ Bool loadsPC = False;
+
+ if (ty == Ity_I8 || ty == Ity_I16) {
+ /* all 8- and 16-bit load and store cases have the
+ same exclusion set. */
+ if (rN == 15 || isBadRegT(rT) || isBadRegT(rM))
+ valid = False;
+ } else {
+ vassert(ty == Ity_I32);
+ if (rN == 15 || isBadRegT(rM))
+ valid = False;
+ if (isST && rT == 15)
+ valid = False;
+ /* If it is a load and rT is 15, that's only allowable if we
+ not in an IT block, or are the last in it. Need to insert
+ a dynamic check for that. */
+ if (!isST && rT == 15)
+ loadsPC = True;
+ }
+
+ if (valid) {
+ // if it's a branch, it can't happen in the middle of an IT block
+ if (loadsPC)
+ gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
+ // go uncond
+ mk_skip_over_T32_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+
+ IRTemp transAddr = newTemp(Ity_I32);
+ assign(transAddr,
+ binop( Iop_Add32,
+ getIRegT(rN),
+ binop(Iop_Shl32, getIRegT(rM), mkU8(imm2)) ));
+
+ if (isST) {
+ IRTemp oldRt = newTemp(Ity_I32);
+ assign(oldRt, getIRegT(rT));
+ switch (ty) {
+ case Ity_I8:
+ storeLE(mkexpr(transAddr),
+ unop(Iop_32to8, mkexpr(oldRt)));
+ break;
+ case Ity_I16:
+ storeLE(mkexpr(transAddr),
+ unop(Iop_32to16, mkexpr(oldRt)));
+ break;
+ case Ity_I32:
+ storeLE(mkexpr(transAddr), mkexpr(oldRt));
+ break;
+ default:
+ vassert(0);
+ }
+ } else {
+ IRTemp newRt = newTemp(Ity_I32);
+ IROp widen = Iop_INVALID;
+ switch (ty) {
+ case Ity_I8:
+ widen = syned ? Iop_8Sto32 : Iop_8Uto32; break;
+ case Ity_I16:
+ widen = syned ? Iop_16Sto32 : Iop_16Uto32; break;
+ case Ity_I32:
+ break;
+ default:
+ vassert(0);
+ }
+ if (widen == Iop_INVALID) {
+ assign(newRt, loadLE(ty, mkexpr(transAddr)));
+ } else {
+ assign(newRt, unop(widen, loadLE(ty, mkexpr(transAddr))));
+ }
+
+ /* If we're loading the PC, putIRegT will assert. So go
+ direct via llPutIReg. In all other cases use putIRegT
+ as it is safer (although could simply use llPutIReg for
+ _all_ cases here.) */
+ if (loadsPC) {
+ vassert(rT == 15);
+ llPutIReg(rT, mkexpr(newRt));
+ } else {
+ putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
+ }
+
+ if (loadsPC) {
+ /* Presumably this is an interworking branch. */
+ irsb->next = mkexpr(newRt);
+ irsb->jumpkind = Ijk_Boring; /* or _Ret ? */
+ dres.whatNext = Dis_StopHere;
+ }
+ }
+
+ DIP("%s.w r%u, [r%u, r%u, LSL #%u]\n",
+ nm, rT, rN, rM, imm2);
+
+ goto decode_success;
+ }
+ }
+
+ /* --------------- LD/ST reg+imm12 --------------- */
+ /* Loads and stores of the form:
+ op Rt, [Rn, +#imm12]
+ where op is one of
+ ldrb ldrh ldr ldrsb ldrsh
+ strb strh str
+ */
+ if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)) {
+ Bool valid = True;
+ Bool syned = False;
+ Bool isST = False;
+ IRType ty = Ity_I8;
+ HChar* nm = "???";
+
+ switch (INSN0(8,4)) {
+ case BITS5(0,1,0,0,0): // strb
+ nm = "strb"; isST = True; break;
+ case BITS5(0,1,0,0,1): // ldrb
+ nm = "ldrb"; break;
+ case BITS5(1,1,0,0,1): // ldrsb
+ nm = "ldrsb"; syned = True; break;
+ case BITS5(0,1,0,1,0): // strh
+ nm = "strh"; ty = Ity_I16; isST = True; break;
+ case BITS5(0,1,0,1,1): // ldrh
+ nm = "ldrh"; ty = Ity_I16; break;
+ case BITS5(1,1,0,1,1): // ldrsh
+ nm = "ldrsh"; ty = Ity_I16; syned = True; break;
+ case BITS5(0,1,1,0,0): // str
+ nm = "str"; ty = Ity_I32; isST = True; break;
+ case BITS5(0,1,1,0,1):
+ nm = "ldr"; ty = Ity_I32; break; // ldr
+ default:
+ valid = False; break;
+ }
+
+ UInt rN = INSN0(3,0);
+ UInt rT = INSN1(15,12);
+ UInt imm12 = INSN1(11,0);
+ Bool loadsPC = False;
+
+ if (ty == Ity_I8 || ty == Ity_I16) {
+ /* all 8- and 16-bit load and store cases have the
+ same exclusion set. */
+ if (rN == 15 || isBadRegT(rT))
+ valid = False;
+ } else {
+ vassert(ty == Ity_I32);
+ if (isST) {
+ if (rN == 15 || rT == 15)
+ valid = False;
+ } else {
+ /* For a 32-bit load, rT == 15 is only allowable if we not
+ in an IT block, or are the last in it. Need to insert
+ a dynamic check for that. Also, in this particular
+ case, rN == 15 is allowable. In this case however, the
+ value obtained for rN is (apparently)
+ "word-align(address of current insn + 4)". */
+ if (rT == 15)
+ loadsPC = True;
+ }
+ }
+
+ if (valid) {
+ // if it's a branch, it can't happen in the middle of an IT block
+ if (loadsPC)
+ gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
+ // go uncond
+ mk_skip_over_T32_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+
+ IRTemp rNt = newTemp(Ity_I32);
+ if (rN == 15) {
+ vassert(ty == Ity_I32 && !isST);
+ assign(rNt, binop(Iop_And32, getIRegT(rN), mkU32(~3)));
+ } else {
+ assign(rNt, getIRegT(rN));
+ }
+
+ IRTemp transAddr = newTemp(Ity_I32);
+ assign(transAddr,
+ binop( Iop_Add32, mkexpr(rNt), mkU32(imm12) ));
+
+ if (isST) {
+ IRTemp oldRt = newTemp(Ity_I32);
+ assign(oldRt, getIRegT(rT));
+ switch (ty) {
+ case Ity_I8:
+ storeLE(mkexpr(transAddr),
+ unop(Iop_32to8, mkexpr(oldRt)));
+ break;
+ case Ity_I16:
+ storeLE(mkexpr(transAddr),
+ unop(Iop_32to16, mkexpr(oldRt)));
+ break;
+ case Ity_I32:
+ storeLE(mkexpr(transAddr), mkexpr(oldRt));
+ break;
+ default:
+ vassert(0);
+ }
+ } else {
+ IRTemp newRt = newTemp(Ity_I32);
+ IROp widen = Iop_INVALID;
+ switch (ty) {
+ case Ity_I8:
+ widen = syned ? Iop_8Sto32 : Iop_8Uto32; break;
+ case Ity_I16:
+ widen = syned ? Iop_16Sto32 : Iop_16Uto32; break;
+ case Ity_I32:
+ break;
+ default:
+ vassert(0);
+ }
+ if (widen == Iop_INVALID) {
+ assign(newRt, loadLE(ty, mkexpr(transAddr)));
+ } else {
+ assign(newRt, unop(widen, loadLE(ty, mkexpr(transAddr))));
+ }
+ putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
+
+ if (loadsPC) {
+ /* Presumably this is an interworking branch. */
+ irsb->next = mkexpr(newRt);
+ irsb->jumpkind = Ijk_Boring; /* or _Ret ? */
+ dres.whatNext = Dis_StopHere;
+ }
+ }
+
+ DIP("%s.w r%u, [r%u, +#%u]\n", nm, rT, rN, imm12);
+
+ goto decode_success;
+ }
+ }
+
+ /* -------------- LDRD/STRD reg+/-#imm8 -------------- */
+ /* Doubleword loads and stores of the form:
+ ldrd/strd Rt, Rt2, [Rn, #-imm8] or
+ ldrd/strd Rt, Rt2, [Rn], #+/-imm8 or
+ ldrd/strd Rt, Rt2, [Rn, #+/-imm8]!
+ */
+ if (INSN0(15,9) == BITS7(1,1,1,0,1,0,0) && INSN0(6,6) == 1) {
+ UInt bP = INSN0(8,8);
+ UInt bU = INSN0(7,7);
+ UInt bW = INSN0(5,5);
+ UInt bL = INSN0(4,4); // 1: load 0: store
+ UInt rN = INSN0(3,0);
+ UInt rT = INSN1(15,12);
+ UInt rT2 = INSN1(11,8);
+ UInt imm8 = INSN1(7,0);
+
+ Bool valid = True;
+ if (bP == 0 && bW == 0) valid = False;
+ if (bW == 1 && (rN == rT || rN == rT2)) valid = False;
+ if (isBadRegT(rT) || isBadRegT(rT2)) valid = False;
+ if (rN == 15) valid = False;
+ if (bL == 1 && rT == rT2) valid = False;
+
+ if (valid) {
+ // go uncond
+ mk_skip_over_T32_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+
+ IRTemp preAddr = newTemp(Ity_I32);
+ assign(preAddr, getIRegT(rN));
+
+ IRTemp postAddr = newTemp(Ity_I32);
+ assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
+ mkexpr(preAddr), mkU32(imm8 << 2)));
+
+ IRTemp transAddr = bP == 1 ? postAddr : preAddr;
+
+ if (bL == 0) {
+ IRTemp oldRt = newTemp(Ity_I32);
+ IRTemp oldRt2 = newTemp(Ity_I32);
+ assign(oldRt, getIRegT(rT));
+ assign(oldRt2, getIRegT(rT2));
+ storeLE(mkexpr(transAddr),
+ mkexpr(oldRt));
+ storeLE(binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
+ mkexpr(oldRt2));
+ } else {
+ IRTemp newRt = newTemp(Ity_I32);
+ IRTemp newRt2 = newTemp(Ity_I32);
+ assign(newRt,
+ loadLE(Ity_I32,
+ mkexpr(transAddr)));
+ assign(newRt2,
+ loadLE(Ity_I32,
+ binop(Iop_Add32, mkexpr(transAddr), mkU32(4))));
+ putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
+ putIRegT(rT2, mkexpr(newRt2), IRTemp_INVALID);
+ }
+
+ if (bW == 1) {
+ putIRegT(rN, mkexpr(postAddr), IRTemp_INVALID);
+ }
+
+ HChar* nm = bL ? "ldrd" : "strd";
+
+ if (bP == 1 && bW == 0) {
+ DIP("%s.w r%u, r%u, [r%u, #%c%u]\n",
+ nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
+ }
+ else if (bP == 1 && bW == 1) {
+ DIP("%s.w r%u, r%u, [r%u, #%c%u]!\n",
+ nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
+ }
+ else {
+ vassert(bP == 0 && bW == 1);
+ DIP("%s.w r%u, r%u, [r%u], #%c%u\n",
+ nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
+ }
+
+ goto decode_success;
+ }
+ }
+
+ /* -------------- (T3) Bcond.W label -------------- */
+ /* This variant carries its own condition, so can't be part of an
+ IT block ... */
+ if (INSN0(15,11) == BITS5(1,1,1,1,0)
+ && INSN1(15,14) == BITS2(1,0)
+ && INSN1(12,12) == 0) {
+ UInt cond = INSN0(9,6);
+ if (cond != ARMCondAL && cond != ARMCondNV) {
+ Int simm21
+ = (INSN0(10,10) << (1 + 1 + 6 + 11 + 1))
+ | (INSN1(11,11) << (1 + 6 + 11 + 1))
+ | (INSN1(13,13) << (6 + 11 + 1))
+ | (INSN0(5,0) << (11 + 1))
+ | (INSN1(10,0) << 1);
+ simm21 = (simm21 << 11) >> 11;
+
+ vassert(0 == (guest_R15_curr_instr_notENC & 1));
+ UInt dst = simm21 + guest_R15_curr_instr_notENC + 4;
+
+ /* Not allowed in an IT block; SIGILL if so. */
+ gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
+
+ IRTemp kondT = newTemp(Ity_I32);
+ assign( kondT, mk_armg_calculate_condition(cond) );
+ stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
+ Ijk_Boring,
+ IRConst_U32(dst | 1/*CPSR.T*/) ));
+ irsb->next = mkU32( (guest_R15_curr_instr_notENC + 4)
+ | 1 /*CPSR.T*/ );
+ irsb->jumpkind = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
+ DIP("b%s.w 0x%x\n", nCC(cond), dst);
+ goto decode_success;
+ }
+ }
+
+ /* ---------------- (T4) B.W label ---------------- */
+ /* ... whereas this variant doesn't carry its own condition, so it
+ has to be either unconditional or the conditional by virtue of
+ being the last in an IT block. The upside is that there's 4
+ more bits available for the jump offset, so it has a 16-times
+ greater branch range than the T3 variant. */
+ if (INSN0(15,11) == BITS5(1,1,1,1,0)
+ && INSN1(15,14) == BITS2(1,0)
+ && INSN1(12,12) == 1) {
+ if (1) {
+ UInt bS = INSN0(10,10);
+ UInt bJ1 = INSN1(13,13);
+ UInt bJ2 = INSN1(11,11);
+ UInt bI1 = 1 ^ (bJ1 ^ bS);
+ UInt bI2 = 1 ^ (bJ2 ^ bS);
+ Int simm25
+ = (bS << (1 + 1 + 10 + 11 + 1))
+ | (bI1 << (1 + 10 + 11 + 1))
+ | (bI2 << (10 + 11 + 1))
+ | (INSN0(9,0) << (11 + 1))
+ | (INSN1(10,0) << 1);
+ simm25 = (simm25 << 7) >> 7;
+
+ vassert(0 == (guest_R15_curr_instr_notENC & 1));
+ UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
+
+ /* If in an IT block, must be the last insn. */
+ gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
+
+ // go uncond
+ mk_skip_over_T32_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+ // now uncond
+
+ // branch to dst
+ irsb->next = mkU32( dst | 1 /*CPSR.T*/ );
+ irsb->jumpkind = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
+ DIP("b.w 0x%x\n", dst);
+ goto decode_success;
+ }
+ }
+
+ /* ------------------ TBB, TBH ------------------ */
+ if (INSN0(15,4) == 0xE8D && INSN1(15,5) == 0x780) {
+ UInt rN = INSN0(3,0);
+ UInt rM = INSN1(3,0);
+ UInt bH = INSN1(4,4);
+ if (bH/*ATC*/ || (rN != 13 && !isBadRegT(rM))) {
+ /* Must be last or not-in IT block */
+ gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
+ /* Go uncond */
+ mk_skip_over_T32_if_cond_is_false(condT);
+ condT = IRTemp_INVALID;
+
+ IRExpr* ea
+ = binop(Iop_Add32,
+ getIRegT(rN),
+ bH ? binop(Iop_Shl32, getIRegT(rM), mkU8(1))
+ : getIRegT(rM));
+
+ IRTemp delta = newTemp(Ity_I32);
+ if (bH) {
+ assign(delta, unop(Iop_16Uto32, loadLE(Ity_I16, ea)));
+ } else {
+ assign(delta, unop(Iop_8Uto32, loadLE(Ity_I8, ea)));
+ }
+
+ irsb->next
+ = binop(Iop_Or32,
+ binop(Iop_Add32,
+ getIRegT(15),
+ binop(Iop_Shl32, mkexpr(delta), mkU8(1))
+ ),
+ mkU32(1)
+ );
+ irsb->jumpkind = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
+ DIP("tb%c [r%u, r%u%s]\n",
+ bH ? 'h' : 'b', rN, rM, bH ? ", LSL #1" : "");
+ goto decode_success;
+ }
+ }
+
+ /* ------------------ UBFX ------------------ */
+ /* ------------------ SBFX ------------------ */
+ /* There's also ARM versions of same, but it doesn't seem worth the
+ hassle to common up the handling (it's only a couple of C
+ statements). */
+ if ((INSN0(15,4) == 0xF3C // UBFX
+ || INSN0(15,4) == 0xF34) // SBFX
+ && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
+ UInt rN = INSN0(3,0);
+ UInt rD = INSN1(11,8);
+ UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
+ UInt wm1 = INSN1(4,0);
+ UInt msb = lsb + wm1;
+ if (!isBadRegT(rD) && !isBadRegT(rN) && msb <= 31) {
+ Bool isU = INSN0(15,4) == 0xF3C;
+ IRTemp src = newTemp(Ity_I32);
+ IRTemp tmp = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ UInt mask = ((1 << wm1) - 1) + (1 << wm1);
+ vassert(msb >= 0 && msb <= 31);
+ vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
+
+ assign(src, getIRegT(rN));
+ assign(tmp, binop(Iop_And32,
+ binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
+ mkU32(mask)));
+ assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
+ binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
+ mkU8(31-wm1)));
+
+ putIRegT(rD, mkexpr(res), condT);
+
+ DIP("%s r%u, r%u, #%u, #%u\n",
+ isU ? "ubfx" : "sbfx", rD, rN, lsb, wm1 + 1);
+ goto decode_success;
+ }
+ }
+
+ /* ------------------ UXTB ------------------ */
+ /* ------------------ UXTH ------------------ */
+ /* ------------------ SXTB ------------------ */
+ /* ------------------ SXTH ------------------ */
+ /* ----------------- UXTB16 ----------------- */
+ /* ----------------- SXTB16 ----------------- */
+ /* FIXME: this is an exact duplicate of the ARM version. They
+ should be commoned up. */
+ if ((INSN0(15,0) == 0xFA5F // UXTB
+ || INSN0(15,0) == 0xFA1F // UXTH
+ || INSN0(15,0) == 0xFA4F // SXTB
+ || INSN0(15,0) == 0xFA0F // SXTH
+ || INSN0(15,0) == 0xFA3F // UXTB16
+ || INSN0(15,0) == 0xFA2F) // SXTB16
+ && INSN1(15,12) == BITS4(1,1,1,1)
+ && INSN1(7,6) == BITS2(1,0)) {
+ UInt rD = INSN1(11,8);
+ UInt rM = INSN1(3,0);
+ UInt rot = INSN1(5,4);
+ if (!isBadRegT(rD) && !isBadRegT(rM)) {
+ HChar* nm = "???";
+ IRTemp srcT = newTemp(Ity_I32);
+ IRTemp rotT = newTemp(Ity_I32);
+ IRTemp dstT = newTemp(Ity_I32);
+ assign(srcT, getIRegT(rM));
+ assign(rotT, genROR32(srcT, 8 * rot));
+ switch (INSN0(15,0)) {
+ case 0xFA5F: // UXTB
+ nm = "uxtb";
+ assign(dstT, unop(Iop_8Uto32,
+ unop(Iop_32to8, mkexpr(rotT))));
+ break;
+ case 0xFA1F: // UXTH
+ nm = "uxth";
+ assign(dstT, unop(Iop_16Uto32,
+ unop(Iop_32to16, mkexpr(rotT))));
+ break;
+ case 0xFA4F: // SXTB
+ nm = "sxtb";
+ assign(dstT, unop(Iop_8Sto32,
+ unop(Iop_32to8, mkexpr(rotT))));
+ break;
+ case 0xFA0F: // SXTH
+ nm = "sxth";
+ assign(dstT, unop(Iop_16Sto32,
+ unop(Iop_32to16, mkexpr(rotT))));
+ break;
+ case 0xFA3F: // UXTB16
+ nm = "uxtb16";
+ assign(dstT, binop(Iop_And32, mkexpr(rotT),
+ mkU32(0x00FF00FF)));
+ break;
+ case 0xFA2F: { // SXTB16
+ nm = "sxtb16";
+ IRTemp lo32 = newTemp(Ity_I32);
+ IRTemp hi32 = newTemp(Ity_I32);
+ assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
+ assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
+ assign(
+ dstT,
+ binop(Iop_Or32,
+ binop(Iop_And32,
+ unop(Iop_8Sto32,
+ unop(Iop_32to8, mkexpr(lo32))),
+ mkU32(0xFFFF)),
+ binop(Iop_Shl32,
+ unop(Iop_8Sto32,
+ unop(Iop_32to8, mkexpr(hi32))),
+ mkU8(16))
+ ));
+ break;
+ }
+ default:
+ vassert(0);
+ }
+ putIRegT(rD, mkexpr(dstT), condT);
+ DIP("%s r%u, r%u, ror #%u\n", nm, rD, rM, 8 * rot);
+ goto decode_success;
+ }
+ }
+
+ /* -------------- MUL.W Rd, Rn, Rm -------------- */
+ if (INSN0(15,4) == 0xFB0
+ && (INSN1(15,0) & 0xF0F0) == 0xF000) {
+ UInt rN = INSN0(3,0);
+ UInt rD = INSN1(11,8);
+ UInt rM = INSN1(3,0);
+ if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
+ IRTemp res = newTemp(Ity_I32);
+ assign(res, binop(Iop_Mul32, getIRegT(rN), getIRegT(rM)));
+ putIRegT(rD, mkexpr(res), condT);
+ DIP("mul.w r%u, r%u, r%u\n", rD, rN, rM);
+ goto decode_success;
+ }
+ }
+
+ /* ------------------ {U,S}MULL ------------------ */
+ if ((INSN0(15,4) == 0xFB8 || INSN0(15,4) == 0xFBA)
+ && INSN1(7,4) == BITS4(0,0,0,0)) {
+ UInt isU = INSN0(5,5);
+ UInt rN = INSN0(3,0);
+ UInt rDlo = INSN1(15,12);
+ UInt rDhi = INSN1(11,8);
+ UInt rM = INSN1(3,0);
+ if (!isBadRegT(rDhi) && !isBadRegT(rDlo)
+ && !isBadRegT(rN) && !isBadRegT(rM) && rDlo != rDhi) {
+ IRTemp res = newTemp(Ity_I64);
+ assign(res, binop(isU ? Iop_MullU32 : Iop_MullS32,
+ getIRegT(rN), getIRegT(rM)));
+ putIRegT( rDhi, unop(Iop_64HIto32, mkexpr(res)), condT );
+ putIRegT( rDlo, unop(Iop_64to32, mkexpr(res)), condT );
+ DIP("%cmull r%u, r%u, r%u, r%u\n",
+ isU ? 'u' : 's', rDlo, rDhi, rN, rM);
+ goto decode_success;
+ }
+ }
+
+ /* ------------------ ML{A,S} ------------------ */
+ if (INSN0(15,4) == 0xFB0
+ && ( INSN1(7,4) == BITS4(0,0,0,0) // MLA
+ || INSN1(7,4) == BITS4(0,0,0,1))) { // MLS
+ UInt rN = INSN0(3,0);
+ UInt rA = INSN1(15,12);
+ UInt rD = INSN1(11,8);
+ UInt rM = INSN1(3,0);
+ if (!isBadRegT(rD) && !isBadRegT(rN)
+ && !isBadRegT(rM) && !isBadRegT(rA)) {
+ Bool isMLA = INSN1(7,4) == BITS4(0,0,0,0);
+ IRTemp res = newTemp(Ity_I32);
+ assign(res,
+ binop(isMLA ? Iop_Add32 : Iop_Sub32,
+ getIRegT(rA),
+ binop(Iop_Mul32, getIRegT(rN), getIRegT(rM))));
+ putIRegT(rD, mkexpr(res), condT);
+ DIP("%s r%u, r%u, r%u, r%u\n",
+ isMLA ? "mla" : "mls", rD, rN, rM, rA);
+ goto decode_success;
+ }
+ }
+
+ /* ------------------ (T3) ADR ------------------ */
+ if ((INSN0(15,0) == 0xF20F || INSN0(15,0) == 0xF60F)
+ && INSN1(15,15) == 0) {
+ /* rD = align4(PC) + imm32 */
+ UInt rD = INSN1(11,8);
+ if (!isBadRegT(rD)) {
+ UInt imm32 = (INSN0(10,10) << 11)
+ | (INSN1(14,12) << 8) | INSN1(7,0);
+ putIRegT(rD, binop(Iop_Add32,
+ binop(Iop_And32, getIRegT(15), mkU32(~3U)),
+ mkU32(imm32)),
+ condT);
+ DIP("add r%u, pc, #%u\n", rD, imm32);
+ goto decode_success;
+ }
+ }
+
+ /* ----------------- (T1) UMLAL ----------------- */
+ /* ----------------- (T1) SMLAL ----------------- */
+ if ((INSN0(15,4) == 0xFBE // UMLAL
+ || INSN0(15,4) == 0xFBC) // SMLAL
+ && INSN1(7,4) == BITS4(0,0,0,0)) {
+ UInt rN = INSN0(3,0);
+ UInt rDlo = INSN1(15,12);
+ UInt rDhi = INSN1(11,8);
+ UInt rM = INSN1(3,0);
+ if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
+ && !isBadRegT(rM) && rDhi != rDlo) {
+ Bool isS = INSN0(15,4) == 0xFBC;
+ IRTemp argL = newTemp(Ity_I32);
+ IRTemp argR = newTemp(Ity_I32);
+ IRTemp old = newTemp(Ity_I64);
+ IRTemp res = newTemp(Ity_I64);
+ IRTemp resHi = newTemp(Ity_I32);
+ IRTemp resLo = newTemp(Ity_I32);
+ IROp mulOp = isS ? Iop_MullS32 : Iop_MullU32;
+ assign( argL, getIRegT(rM));
+ assign( argR, getIRegT(rN));
+ assign( old, binop(Iop_32HLto64, getIRegT(rDhi), getIRegT(rDlo)) );
+ assign( res, binop(Iop_Add64,
+ mkexpr(old),
+ binop(mulOp, mkexpr(argL), mkexpr(argR))) );
+ assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
+ assign( resLo, unop(Iop_64to32, mkexpr(res)) );
+ putIRegT( rDhi, mkexpr(resHi), condT );
+ putIRegT( rDlo, mkexpr(resLo), condT );
+ DIP("%cmlal r%u, r%u, r%u, r%u\n",
+ isS ? 's' : 'u', rDlo, rDhi, rN, rM);
+ goto decode_success;
+ }
+ }
+
+ /* ------------------ (T2) ADR ------------------ */
+ if ((INSN0(15,0) == 0xF2AF || INSN0(15,0) == 0xF6AF)
+ && INSN1(15,15) == 0) {
+ /* rD = align4(PC) - imm32 */
+ UInt rD = INSN1(11,8);
+ if (!isBadRegT(rD)) {
+ UInt imm32 = (INSN0(10,10) << 11)
+ | (INSN1(14,12) << 8) | INSN1(7,0);
+ putIRegT(rD, binop(Iop_Sub32,
+ binop(Iop_And32, getIRegT(15), mkU32(~3U)),
+ mkU32(imm32)),
+ condT);
+ DIP("sub r%u, pc, #%u\n", rD, imm32);
+ goto decode_success;
+ }
+ }
+
+ /* ------------------- (T1) BFI ------------------- */
+ /* ------------------- (T1) BFC ------------------- */
+ if (INSN0(15,4) == 0xF36 && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
+ UInt rD = INSN1(11,8);
+ UInt rN = INSN0(3,0);
+ UInt msb = INSN1(4,0);
+ UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
+ if (isBadRegT(rD) || rN == 13 || msb < lsb) {
+ /* undecodable; fall through */
+ } else {
+ IRTemp src = newTemp(Ity_I32);
+ IRTemp olddst = newTemp(Ity_I32);
+ IRTemp newdst = newTemp(Ity_I32);
+ UInt mask = 1 << (msb - lsb);
+ mask = (mask - 1) + mask;
+ vassert(mask != 0); // guaranteed by "msb < lsb" check above
+ mask <<= lsb;
+
+ assign(src, rN == 15 ? mkU32(0) : getIRegT(rN));
+ assign(olddst, getIRegT(rD));
+ assign(newdst,
+ binop(Iop_Or32,
+ binop(Iop_And32,
+ binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
+ mkU32(mask)),
+ binop(Iop_And32,
+ mkexpr(olddst),
+ mkU32(~mask)))
+ );
+
+ putIRegT(rD, mkexpr(newdst), condT);
+
+ if (rN == 15) {
+ DIP("bfc r%u, #%u, #%u\n",
+ rD, lsb, msb-lsb+1);
+ } else {
+ DIP("bfi r%u, r%u, #%u, #%u\n",
+ rD, rN, lsb, msb-lsb+1);
+ }
+ goto decode_success;
+ }
+ }
+
+ /* ------------------- (T1) SXTAH ------------------- */
+ /* ------------------- (T1) UXTAH ------------------- */
+ if ((INSN0(15,4) == 0xFA1 // UXTAH
+ || INSN0(15,4) == 0xFA0) // SXTAH
+ && INSN1(15,12) == BITS4(1,1,1,1)
+ && INSN1(7,6) == BITS2(1,0)) {
+ Bool isU = INSN0(15,4) == 0xFA1;
+ UInt rN = INSN0(3,0);
+ UInt rD = INSN1(11,8);
+ UInt rM = INSN1(3,0);
+ UInt rot = INSN1(5,4);
+ if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
+ IRTemp srcL = newTemp(Ity_I32);
+ IRTemp srcR = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ assign(srcR, getIRegT(rM));
+ assign(srcL, getIRegT(rN));
+ assign(res, binop(Iop_Add32,
+ mkexpr(srcL),
+ unop(isU ? Iop_16Uto32 : Iop_16Sto32,
+ unop(Iop_32to16,
+ genROR32(srcR, 8 * rot)))));
+ putIRegT(rD, mkexpr(res), condT);
+ DIP("%cxtah r%u, r%u, r%u, ror #%u\n",
+ isU ? 'u' : 's', rD, rN, rM, rot);
+ goto decode_success;
+ }
+ }
+
+ /* ------------------- (T1) SXTAB ------------------- */
+ /* ------------------- (T1) UXTAB ------------------- */
+ if ((INSN0(15,4) == 0xFA5 // UXTAB
+ || INSN0(15,4) == 0xFA4) // SXTAB
+ && INSN1(15,12) == BITS4(1,1,1,1)
+ && INSN1(7,6) == BITS2(1,0)) {
+ Bool isU = INSN0(15,4) == 0xFA5;
+ UInt rN = INSN0(3,0);
+ UInt rD = INSN1(11,8);
+ UInt rM = INSN1(3,0);
+ UInt rot = INSN1(5,4);
+ if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
+ IRTemp srcL = newTemp(Ity_I32);
+ IRTemp srcR = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ assign(srcR, getIRegT(rM));
+ assign(srcL, getIRegT(rN));
+ assign(res, binop(Iop_Add32,
+ mkexpr(srcL),
+ unop(isU ? Iop_8Uto32 : Iop_8Sto32,
+ unop(Iop_32to8,
+ genROR32(srcR, 8 * rot)))));
+ putIRegT(rD, mkexpr(res), condT);
+ DIP("%cxtab r%u, r%u, r%u, ror #%u\n",
+ isU ? 'u' : 's', rD, rN, rM, rot);
+ goto decode_success;
+ }
+ }
+
+ /* ------------------- (T1) CLZ ------------------- */
+ if (INSN0(15,4) == 0xFAB
+ && INSN1(15,12) == BITS4(1,1,1,1)
+ && INSN1(7,4) == BITS4(1,0,0,0)) {
+ UInt rM1 = INSN0(3,0);
+ UInt rD = INSN1(11,8);
+ UInt rM2 = INSN1(3,0);
+ if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
+ IRTemp arg = newTemp(Ity_I32);
+ IRTemp res = newTemp(Ity_I32);
+ assign(arg, getIRegT(rM1));
+ assign(res, IRExpr_Mux0X(
+ unop(Iop_1Uto8,binop(Iop_CmpEQ32,
+ mkexpr(arg),
+ mkU32(0))),
+ unop(Iop_Clz32, mkexpr(arg)),
+ mkU32(32)
+ ));
+ putIRegT(rD, mkexpr(res), condT);
+ DIP("clz r%u, r%u\n", rD, rM1);
+ goto decode_success;
+ }
+ }
+
+ /* ------------------- (T1) RBIT ------------------- */
+ if (INSN0(15,4) == 0xFA9
+ && INSN1(15,12) == BITS4(1,1,1,1)
+ && INSN1(7,4) == BITS4(1,0,1,0)) {
+ UInt rM1 = INSN0(3,0);
+ UInt rD = INSN1(11,8);
+ UInt rM2 = INSN1(3,0);
+ if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
+ IRTemp arg = newTemp(Ity_I32);
+ assign(arg, getIRegT(rM1));
+ IRTemp res = gen_BITREV(arg);
+ putIRegT(rD, mkexpr(res), condT);
+ DIP("rbit r%u, r%u\n", rD, rM1);
+ goto decode_success;
+ }
+ }
+
+ /* ------------------- (T2) REV ------------------- */
+ /* ------------------- (T2) REV16 ------------------- */
+ if (INSN0(15,4) == 0xFA9
+ && INSN1(15,12) == BITS4(1,1,1,1)
+ && ( INSN1(7,4) == BITS4(1,0,0,0) // REV
+ || INSN1(7,4) == BITS4(1,0,0,1))) { // REV16
+ UInt rM1 = INSN0(3,0);
+ UInt rD = INSN1(11,8);
+ UInt rM2 = INSN1(3,0);
+ Bool isREV = INSN1(7,4) == BITS4(1,0,0,0);
+ if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
+ IRTemp arg = newTemp(Ity_I32);
+ assign(arg, getIRegT(rM1));
+ IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
+ putIRegT(rD, mkexpr(res), condT);
+ DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM1);
+ goto decode_success;
+ }
+ }
+
+ /* -------------- (T1) MSR apsr, reg -------------- */
+ if (INSN0(15,4) == 0xF38
+ && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(9,0) == 0x000) {
+ UInt rN = INSN0(3,0);
+ UInt write_ge = INSN1(10,10);
+ UInt write_nzcvq = INSN1(11,11);
+ if (!isBadRegT(rN) && (write_nzcvq || write_ge)) {
+ IRTemp rNt = newTemp(Ity_I32);
+ assign(rNt, getIRegT(rN));
+ desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
+ DIP("msr cpsr_%s%s, r%u\n",
+ write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
+ goto decode_success;
+ }
+ }
+
+ /* -------------- (T1) MRS reg, apsr -------------- */
+ if (INSN0(15,0) == 0xF3EF
+ && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(7,0) == 0x00) {
+ UInt rD = INSN1(11,8);
+ if (!isBadRegT(rD)) {
+ IRTemp apsr = synthesise_APSR();
+ putIRegT( rD, mkexpr(apsr), condT );
+ DIP("mrs r%u, cpsr\n", rD);
+ goto decode_success;
+ }
+ }
+
+ /* ----------------- (T1) LDREX ----------------- */
+ if (INSN0(15,4) == 0xE85 && INSN1(11,8) == BITS4(1,1,1,1)) {
+ UInt rN = INSN0(3,0);
+ UInt rT = INSN1(15,12);
+ UInt imm8 = INSN1(7,0);
+ if (!isBadRegT(rT) && rN != 15) {
+ IRTemp res;
+ // go uncond
+ mk_skip_over_T32_if_cond_is_false( condT );
+ // now uncond
+ res = newTemp(Ity_I32);
+ stmt( IRStmt_LLSC(Iend_LE,
+ res,
+ binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
+ NULL/*this is a load*/ ));
+ putIRegT(rT, mkexpr(res), IRTemp_INVALID);
+ DIP("ldrex r%u, [r%u, #+%u]\n", rT, rN, imm8 * 4);
+ goto decode_success;
+ }
+ }
+
+ /* ----------------- (T1) STREX ----------------- */
+ if (INSN0(15,4) == 0xE84) {
+ UInt rN = INSN0(3,0);
+ UInt rT = INSN1(15,12);
+ UInt rD = INSN1(11,8);
+ UInt imm8 = INSN1(7,0);
+ if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
+ && rD != rN && rD != rT) {
+ IRTemp resSC1, resSC32;
+
+ // go uncond
+ mk_skip_over_T32_if_cond_is_false( condT );
+ // now uncond
+
+ /* Ok, now we're unconditional. Do the store. */
+ resSC1 = newTemp(Ity_I1);
+ stmt( IRStmt_LLSC(Iend_LE,
+ resSC1,
+ binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
+ getIRegT(rT)) );
+
+ /* Set rD to 1 on failure, 0 on success. Currently we have
+ resSC1 == 0 on failure, 1 on success. */
+ resSC32 = newTemp(Ity_I32);
+ assign(resSC32,
+ unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
+
+ putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
+ DIP("strex r%u, r%u, [r%u, #+%u]\n", rD, rT, rN, imm8 * 4);
+ goto decode_success;
+ }
+ }
+
+ /* -------------- v7 barrier insns -------------- */
+ if (INSN0(15,0) == 0xF3BF && (INSN1(15,0) & 0xFF0F) == 0x8F0F) {
+ /* XXX this isn't really right, is it? The generated IR does
+ them unconditionally. I guess it doesn't matter since it
+ doesn't do any harm to do them even when the guarding
+ condition is false -- it's just a performance loss. */
+ switch (INSN1(7,4)) {
+ case 0x4: /* DSB */
+ stmt( IRStmt_MBE(Imbe_Fence) );
+ DIP("DSB\n");
+ goto decode_success;
+ case 0x5: /* DMB */
+ stmt( IRStmt_MBE(Imbe_Fence) );
+ DIP("DMB\n");
+ goto decode_success;
+ case 0x6: /* ISB */
+ stmt( IRStmt_MBE(Imbe_Fence) );
+ DIP("ISB\n");
+ goto decode_success;
+ default:
+ break;
+ }
+ }
+
+ /* ------------------- NOP ------------------ */
+ if (INSN0(15,0) == 0xF3AF && INSN1(15,0) == 0x8000) {
+ DIP("nop\n");
+ goto decode_success;
+ }
+
+ /* ----------------------------------------------------------- */
+ /* -- VFP (CP 10, CP 11) instructions (in Thumb mode) -- */
+ /* ----------------------------------------------------------- */
+
+ if (INSN0(15,12) == BITS4(1,1,1,0)) {
+ UInt insn28 = (INSN0(11,0) << 16) | INSN1(15,0);
+ Bool ok_vfp = decode_CP10_CP11_instruction (
+ &dres, insn28, condT, ARMCondAL/*bogus*/,
+ True/*isT*/
+ );
+ if (ok_vfp)
+ goto decode_success;
+ }
+
+ /* ----------------------------------------------------------- */
+ /* -- NEON instructions (in Thumb mode) -- */
+ /* ----------------------------------------------------------- */
+
+ if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
+ UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
+ Bool ok_neon = decode_NEON_instruction(
+ &dres, insn32, condT, True/*isT*/
+ );
+ if (ok_neon)
+ goto decode_success;
+ }
+
+ /* ----------------------------------------------------------- */
+ /* -- v6 media instructions (in Thumb mode) -- */
+ /* ----------------------------------------------------------- */
+
+ { UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
+ Bool ok_v6m = decode_V6MEDIA_instruction(
+ &dres, insn32, condT, ARMCondAL/*bogus*/,
+ True/*isT*/
+ );
+ if (ok_v6m)
+ goto decode_success;
+ }
+
+ /* ----------------------------------------------------------- */
+ /* -- Undecodable -- */
+ /* ----------------------------------------------------------- */
+
+ goto decode_failure;
+ /*NOTREACHED*/
+
+ decode_failure:
+ /* All decode failures end up here. */
+ vex_printf("disInstr(thumb): unhandled instruction: "
+ "0x%04x 0x%04x\n", (UInt)insn0, (UInt)insn1);
+
+ /* Back up ITSTATE to the initial value for this instruction.
+ If we don't do that, any subsequent restart of the instruction
+ will restart with the wrong value. */
+ put_ITSTATE(old_itstate);
+ /* Tell the dispatcher that this insn cannot be decoded, and so has
+ not been executed, and (is currently) the next to be executed.
+ R15 should be up-to-date since it made so at the start of each
+ insn, but nevertheless be paranoid and update it again right
+ now. */
+ vassert(0 == (guest_R15_curr_instr_notENC & 1));
+ llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) );
+ irsb->next = mkU32(guest_R15_curr_instr_notENC | 1 /* CPSR.T */);
+ irsb->jumpkind = Ijk_NoDecode;
+ dres.whatNext = Dis_StopHere;
+ dres.len = 0;
+ return dres;
+
+ decode_success:
+ /* All decode successes end up here. */
+ DIP("\n");
+
+ vassert(dres.len == 2 || dres.len == 4 || dres.len == 20);
+
+#if 0
+ // XXX is this necessary on Thumb?
+ /* Now then. Do we have an implicit jump to r15 to deal with? */
+ if (r15written) {
+ /* If we get jump to deal with, we assume that there's been no
+ other competing branch stuff previously generated for this
+ insn. That's reasonable, in the sense that the ARM insn set
+ appears to declare as "Unpredictable" any instruction which
+ generates more than one possible new value for r15. Hence
+ just assert. The decoders themselves should check against
+ all such instructions which are thusly Unpredictable, and
+ decline to decode them. Hence we should never get here if we
+ have competing new values for r15, and hence it is safe to
+ assert here. */
+ vassert(dres.whatNext == Dis_Continue);
+ vassert(irsb->next == NULL);
+ vassert(irsb->jumpkind = Ijk_Boring);
+ /* If r15 is unconditionally written, terminate the block by
+ jumping to it. If it's conditionally written, still
+ terminate the block (a shame, but we can't do side exits to
+ arbitrary destinations), but first jump to the next
+ instruction if the condition doesn't hold. */
+ /* We can't use getIRegT(15) to get the destination, since that
+ will produce r15+4, which isn't what we want. Must use
+ llGetIReg(15) instead. */
+ if (r15guard == IRTemp_INVALID) {
+ /* unconditional */
+ } else {
+ /* conditional */
+ stmt( IRStmt_Exit(
+ unop(Iop_32to1,
+ binop(Iop_Xor32,
+ mkexpr(r15guard), mkU32(1))),
+ r15kind,
+ IRConst_U32(guest_R15_curr_instr_notENC + 4)
+ ));
+ }
+ irsb->next = llGetIReg(15);
+ irsb->jumpkind = r15kind;
+ dres.whatNext = Dis_StopHere;
+ }
+#endif
+
+ return dres;
+
+# undef INSN0
+# undef INSN1
+}
+
+#undef DIP
+#undef DIS
+
+
+/*------------------------------------------------------------*/
+/*--- Top-level fn ---*/
+/*------------------------------------------------------------*/
+
+/* Disassemble a single instruction into IR. The instruction
+ is located in host memory at &guest_code[delta]. */
+
+DisResult disInstr_ARM ( IRSB* irsb_IN,
+ Bool put_IP,
+ Bool (*resteerOkFn) ( void*, Addr64 ),
+ Bool resteerCisOk,
+ void* callback_opaque,
+ UChar* guest_code_IN,
+ Long delta_ENCODED,
+ Addr64 guest_IP_ENCODED,
+ VexArch guest_arch,
+ VexArchInfo* archinfo,
+ VexAbiInfo* abiinfo,
+ Bool host_bigendian_IN )
+{
+ DisResult dres;
+ Bool isThumb = (Bool)(guest_IP_ENCODED & 1);
+
+ /* Set globals (see top of this file) */
+ vassert(guest_arch == VexArchARM);
+
+ irsb = irsb_IN;
+ host_is_bigendian = host_bigendian_IN;
+ __curr_is_Thumb = isThumb;
+
+ if (isThumb) {
+ guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED - 1;
+ } else {
+ guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED;
+ }
+
+ if (isThumb) {
+ dres = disInstr_THUMB_WRK ( put_IP, resteerOkFn,
+ resteerCisOk, callback_opaque,
+ &guest_code_IN[delta_ENCODED - 1],
+ archinfo, abiinfo );
+ } else {
+ dres = disInstr_ARM_WRK ( put_IP, resteerOkFn,
+ resteerCisOk, callback_opaque,
+ &guest_code_IN[delta_ENCODED],
+ archinfo, abiinfo );
+ }
+
+ return dres;
+}
+
+/* Test program for the conversion of IRCmpF64Result values to VFP
+ nzcv values. See handling of FCMPD et al above. */
+/*
+UInt foo ( UInt x )
+{
+ UInt ix = ((x >> 5) & 3) | (x & 1);
+ UInt termL = (((((ix ^ 1) << 30) - 1) >> 29) + 1);
+ UInt termR = (ix & (ix >> 1) & 1);
+ return termL - termR;
+}
+
+void try ( char* s, UInt ir, UInt req )
+{
+ UInt act = foo(ir);
+ printf("%s 0x%02x -> req %d%d%d%d act %d%d%d%d (0x%x)\n",
+ s, ir, (req >> 3) & 1, (req >> 2) & 1,
+ (req >> 1) & 1, (req >> 0) & 1,
+ (act >> 3) & 1, (act >> 2) & 1,
+ (act >> 1) & 1, (act >> 0) & 1, act);
+
+}
+
+int main ( void )
+{
+ printf("\n");
+ try("UN", 0x45, 0b0011);
+ try("LT", 0x01, 0b1000);
+ try("GT", 0x00, 0b0010);
+ try("EQ", 0x40, 0b0110);
+ printf("\n");
+ return 0;
+}
+*/
+
+/*--------------------------------------------------------------------*/
+/*--- end guest_arm_toIR.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/VEX/priv/guest_generic_bb_to_IR.c b/VEX/priv/guest_generic_bb_to_IR.c
new file mode 100644
index 0000000..f7dc020
--- /dev/null
+++ b/VEX/priv/guest_generic_bb_to_IR.c
@@ -0,0 +1,822 @@
+
+/*--------------------------------------------------------------------*/
+/*--- begin guest_generic_bb_to_IR.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+#include "main_util.h"
+#include "main_globals.h"
+#include "guest_generic_bb_to_IR.h"
+
+
+/* Forwards .. */
+__attribute__((regparm(2)))
+static UInt genericg_compute_checksum_4al ( HWord first_w32, HWord n_w32s );
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 );
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 );
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 );
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 );
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 );
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 );
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 );
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 );
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 );
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 );
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 );
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 );
+
+/* Small helpers */
+static Bool const_False ( void* callback_opaque, Addr64 a ) {
+ return False;
+}
+
+/* Disassemble a complete basic block, starting at guest_IP_start,
+ returning a new IRSB. The disassembler may chase across basic
+ block boundaries if it wishes and if chase_into_ok allows it.
+ The precise guest address ranges from which code has been taken
+ are written into vge. guest_IP_bbstart is taken to be the IP in
+ the guest's address space corresponding to the instruction at
+ &guest_code[0].
+
+ dis_instr_fn is the arch-specific fn to disassemble on function; it
+ is this that does the real work.
+
+ do_self_check indicates that the caller needs a self-checking
+ translation.
+
+ preamble_function is a callback which allows the caller to add
+ its own IR preamble (following the self-check, if any). May be
+ NULL. If non-NULL, the IRSB under construction is handed to
+ this function, which presumably adds IR statements to it. The
+ callback may optionally complete the block and direct bb_to_IR
+ not to disassemble any instructions into it; this is indicated
+ by the callback returning True.
+
+ offB_TIADDR and offB_TILEN are the offsets of guest_TIADDR and
+ guest_TILEN. Since this routine has to work for any guest state,
+ without knowing what it is, those offsets have to passed in.
+
+ callback_opaque is a caller-supplied pointer to data which the
+ callbacks may want to see. Vex has no idea what it is.
+ (In fact it's a VgInstrumentClosure.)
+*/
+
+IRSB* bb_to_IR ( /*OUT*/VexGuestExtents* vge,
+ /*IN*/ void* callback_opaque,
+ /*IN*/ DisOneInstrFn dis_instr_fn,
+ /*IN*/ UChar* guest_code,
+ /*IN*/ Addr64 guest_IP_bbstart,
+ /*IN*/ Bool (*chase_into_ok)(void*,Addr64),
+ /*IN*/ Bool host_bigendian,
+ /*IN*/ VexArch arch_guest,
+ /*IN*/ VexArchInfo* archinfo_guest,
+ /*IN*/ VexAbiInfo* abiinfo_both,
+ /*IN*/ IRType guest_word_type,
+ /*IN*/ Bool do_self_check,
+ /*IN*/ Bool (*preamble_function)(void*,IRSB*),
+ /*IN*/ Int offB_TISTART,
+ /*IN*/ Int offB_TILEN )
+{
+ Long delta;
+ Int i, n_instrs, first_stmt_idx;
+ Bool resteerOK, need_to_put_IP, debug_print;
+ DisResult dres;
+ IRStmt* imark;
+ static Int n_resteers = 0;
+ Int d_resteers = 0;
+ Int selfcheck_idx = 0;
+ IRSB* irsb;
+ Addr64 guest_IP_curr_instr;
+ IRConst* guest_IP_bbstart_IRConst = NULL;
+ Int n_cond_resteers_allowed = 2;
+
+ Bool (*resteerOKfn)(void*,Addr64) = NULL;
+
+ debug_print = toBool(vex_traceflags & VEX_TRACE_FE);
+
+ /* Note: for adler32 to work without % operation for the self
+ check, need to limit length of stuff it scans to 5552 bytes.
+ Therefore limiting the max bb len to 100 insns seems generously
+ conservative. */
+
+ /* check sanity .. */
+ vassert(sizeof(HWord) == sizeof(void*));
+ vassert(vex_control.guest_max_insns >= 1);
+ vassert(vex_control.guest_max_insns < 100);
+ vassert(vex_control.guest_chase_thresh >= 0);
+ vassert(vex_control.guest_chase_thresh < vex_control.guest_max_insns);
+ vassert(guest_word_type == Ity_I32 || guest_word_type == Ity_I64);
+
+ /* Start a new, empty extent. */
+ vge->n_used = 1;
+ vge->base[0] = guest_IP_bbstart;
+ vge->len[0] = 0;
+
+ /* And a new IR superblock to dump the result into. */
+ irsb = emptyIRSB();
+
+ /* Delta keeps track of how far along the guest_code array we have
+ so far gone. */
+ delta = 0;
+ n_instrs = 0;
+
+ /* Guest addresses as IRConsts. Used in the two self-checks
+ generated. */
+ if (do_self_check) {
+ guest_IP_bbstart_IRConst
+ = guest_word_type==Ity_I32
+ ? IRConst_U32(toUInt(guest_IP_bbstart))
+ : IRConst_U64(guest_IP_bbstart);
+ }
+
+ /* If asked to make a self-checking translation, leave 5 spaces
+ in which to put the check statements. We'll fill them in later
+ when we know the length and adler32 of the area to check. */
+ if (do_self_check) {
+ selfcheck_idx = irsb->stmts_used;
+ addStmtToIRSB( irsb, IRStmt_NoOp() );
+ addStmtToIRSB( irsb, IRStmt_NoOp() );
+ addStmtToIRSB( irsb, IRStmt_NoOp() );
+ addStmtToIRSB( irsb, IRStmt_NoOp() );
+ addStmtToIRSB( irsb, IRStmt_NoOp() );
+ }
+
+ /* If the caller supplied a function to add its own preamble, use
+ it now. */
+ if (preamble_function) {
+ Bool stopNow = preamble_function( callback_opaque, irsb );
+ if (stopNow) {
+ /* The callback has completed the IR block without any guest
+ insns being disassembled into it, so just return it at
+ this point, even if a self-check was requested - as there
+ is nothing to self-check. The five self-check no-ops will
+ still be in place, but they are harmless. */
+ return irsb;
+ }
+ }
+
+ /* Process instructions. */
+ while (True) {
+ vassert(n_instrs < vex_control.guest_max_insns);
+
+ /* Regardless of what chase_into_ok says, is chasing permissible
+ at all right now? Set resteerOKfn accordingly. */
+ resteerOK
+ = toBool(
+ n_instrs < vex_control.guest_chase_thresh
+ /* If making self-checking translations, don't chase
+ .. it makes the checks too complicated. We only want
+ to scan just one sequence of bytes in the check, not
+ a whole bunch. */
+ && !do_self_check
+ /* we can't afford to have a resteer once we're on the
+ last extent slot. */
+ && vge->n_used < 3
+ );
+
+ resteerOKfn
+ = resteerOK ? chase_into_ok : const_False;
+
+ /* n_cond_resteers_allowed keeps track of whether we're still
+ allowing dis_instr_fn to chase conditional branches. It
+ starts (at 2) and gets decremented each time dis_instr_fn
+ tells us it has chased a conditional branch. We then
+ decrement it, and use it to tell later calls to dis_instr_fn
+ whether or not it is allowed to chase conditional
+ branches. */
+ vassert(n_cond_resteers_allowed >= 0 && n_cond_resteers_allowed <= 2);
+
+ /* This is the IP of the instruction we're just about to deal
+ with. */
+ guest_IP_curr_instr = guest_IP_bbstart + delta;
+
+ /* This is the irsb statement array index of the first stmt in
+ this insn. That will always be the instruction-mark
+ descriptor. */
+ first_stmt_idx = irsb->stmts_used;
+
+ /* Add an instruction-mark statement. We won't know until after
+ disassembling the instruction how long it instruction is, so
+ just put in a zero length and we'll fix it up later. */
+ addStmtToIRSB( irsb, IRStmt_IMark( guest_IP_curr_instr, 0 ));
+
+ /* for the first insn, the dispatch loop will have set
+ %IP, but for all the others we have to do it ourselves. */
+ need_to_put_IP = toBool(n_instrs > 0);
+
+ /* Finally, actually disassemble an instruction. */
+ dres = dis_instr_fn ( irsb,
+ need_to_put_IP,
+ resteerOKfn,
+ toBool(n_cond_resteers_allowed > 0),
+ callback_opaque,
+ guest_code,
+ delta,
+ guest_IP_curr_instr,
+ arch_guest,
+ archinfo_guest,
+ abiinfo_both,
+ host_bigendian );
+
+ /* stay sane ... */
+ vassert(dres.whatNext == Dis_StopHere
+ || dres.whatNext == Dis_Continue
+ || dres.whatNext == Dis_ResteerU
+ || dres.whatNext == Dis_ResteerC);
+ /* ... disassembled insn length is sane ... */
+ vassert(dres.len >= 0 && dres.len <= 20);
+ /* ... continueAt is zero if no resteer requested ... */
+ if (dres.whatNext != Dis_ResteerU && dres.whatNext != Dis_ResteerC)
+ vassert(dres.continueAt == 0);
+ /* ... if we disallowed conditional resteers, check that one
+ didn't actually happen anyway ... */
+ if (n_cond_resteers_allowed == 0)
+ vassert(dres.whatNext != Dis_ResteerC);
+
+ /* Fill in the insn-mark length field. */
+ vassert(first_stmt_idx >= 0 && first_stmt_idx < irsb->stmts_used);
+ imark = irsb->stmts[first_stmt_idx];
+ vassert(imark);
+ vassert(imark->tag == Ist_IMark);
+ vassert(imark->Ist.IMark.len == 0);
+ imark->Ist.IMark.len = toUInt(dres.len);
+
+ /* Print the resulting IR, if needed. */
+ if (vex_traceflags & VEX_TRACE_FE) {
+ for (i = first_stmt_idx; i < irsb->stmts_used; i++) {
+ vex_printf(" ");
+ ppIRStmt(irsb->stmts[i]);
+ vex_printf("\n");
+ }
+ }
+
+ /* If dis_instr_fn terminated the BB at this point, check it
+ also filled in the irsb->next field. */
+ if (dres.whatNext == Dis_StopHere) {
+ vassert(irsb->next != NULL);
+ if (debug_print) {
+ vex_printf(" ");
+ vex_printf( "goto {");
+ ppIRJumpKind(irsb->jumpkind);
+ vex_printf( "} ");
+ ppIRExpr( irsb->next );
+ vex_printf( "\n");
+ }
+ }
+
+ /* Update the VexGuestExtents we are constructing. */
+ /* If vex_control.guest_max_insns is required to be < 100 and
+ each insn is at max 20 bytes long, this limit of 5000 then
+ seems reasonable since the max possible extent length will be
+ 100 * 20 == 2000. */
+ vassert(vge->len[vge->n_used-1] < 5000);
+ vge->len[vge->n_used-1]
+ = toUShort(toUInt( vge->len[vge->n_used-1] + dres.len ));
+ n_instrs++;
+ if (debug_print)
+ vex_printf("\n");
+
+ /* Advance delta (inconspicuous but very important :-) */
+ delta += (Long)dres.len;
+
+ switch (dres.whatNext) {
+ case Dis_Continue:
+ vassert(irsb->next == NULL);
+ if (n_instrs < vex_control.guest_max_insns) {
+ /* keep going */
+ } else {
+ /* We have to stop. */
+ irsb->next
+ = IRExpr_Const(
+ guest_word_type == Ity_I32
+ ? IRConst_U32(toUInt(guest_IP_bbstart+delta))
+ : IRConst_U64(guest_IP_bbstart+delta)
+ );
+ goto done;
+ }
+ break;
+ case Dis_StopHere:
+ vassert(irsb->next != NULL);
+ goto done;
+ case Dis_ResteerU:
+ case Dis_ResteerC:
+ /* Check that we actually allowed a resteer .. */
+ vassert(resteerOK);
+ vassert(irsb->next == NULL);
+ if (dres.whatNext == Dis_ResteerC) {
+ vassert(n_cond_resteers_allowed > 0);
+ n_cond_resteers_allowed--;
+ }
+ /* figure out a new delta to continue at. */
+ vassert(resteerOKfn(callback_opaque,dres.continueAt));
+ delta = dres.continueAt - guest_IP_bbstart;
+ /* we now have to start a new extent slot. */
+ vge->n_used++;
+ vassert(vge->n_used <= 3);
+ vge->base[vge->n_used-1] = dres.continueAt;
+ vge->len[vge->n_used-1] = 0;
+ n_resteers++;
+ d_resteers++;
+ if (0 && (n_resteers & 0xFF) == 0)
+ vex_printf("resteer[%d,%d] to 0x%llx (delta = %lld)\n",
+ n_resteers, d_resteers,
+ dres.continueAt, delta);
+ break;
+ default:
+ vpanic("bb_to_IR");
+ }
+ }
+ /*NOTREACHED*/
+ vassert(0);
+
+ done:
+ /* We're done. The only thing that might need attending to is that
+ a self-checking preamble may need to be created.
+
+ The scheme is to compute a rather crude checksum of the code
+ we're making a translation of, and add to the IR a call to a
+ helper routine which recomputes the checksum every time the
+ translation is run, and requests a retranslation if it doesn't
+ match. This is obviously very expensive and considerable
+ efforts are made to speed it up:
+
+ * the checksum is computed from all the 32-bit words that
+ overlap the translated code. That means it could depend on up
+ to 3 bytes before and 3 bytes after which aren't part of the
+ translated area, and so if those change then we'll
+ unnecessarily have to discard and retranslate. This seems
+ like a pretty remote possibility and it seems as if the
+ benefit of not having to deal with the ends of the range at
+ byte precision far outweigh any possible extra translations
+ needed.
+
+ * there's a generic routine and 12 specialised cases, which
+ handle the cases of 1 through 12-word lengths respectively.
+ They seem to cover about 90% of the cases that occur in
+ practice.
+ */
+ if (do_self_check) {
+
+ UInt len2check, expected32;
+ IRTemp tistart_tmp, tilen_tmp;
+ UInt (*fn_generic)(HWord, HWord) __attribute__((regparm(2)));
+ UInt (*fn_spec)(HWord) __attribute__((regparm(1)));
+ HChar* nm_generic;
+ HChar* nm_spec;
+ HWord fn_generic_entry = 0;
+ HWord fn_spec_entry = 0;
+
+ vassert(vge->n_used == 1);
+ len2check = vge->len[0];
+
+ /* stay sane */
+ vassert(len2check >= 0 && len2check < 1000/*arbitrary*/);
+
+ /* Skip the check if the translation involved zero bytes */
+ if (len2check > 0) {
+ HWord first_w32 = ((HWord)guest_code) & ~(HWord)3;
+ HWord last_w32 = (((HWord)guest_code) + len2check - 1) & ~(HWord)3;
+ vassert(first_w32 <= last_w32);
+ HWord w32_diff = last_w32 - first_w32;
+ vassert(0 == (w32_diff & 3));
+ HWord w32s_to_check = (w32_diff + 4) / 4;
+ vassert(w32s_to_check > 0 && w32s_to_check < 1004/*arbitrary*//4);
+
+ /* vex_printf("%lx %lx %ld\n", first_w32, last_w32, w32s_to_check); */
+
+ fn_generic = genericg_compute_checksum_4al;
+ nm_generic = "genericg_compute_checksum_4al";
+ fn_spec = NULL;
+ nm_spec = NULL;
+
+ switch (w32s_to_check) {
+ case 1: fn_spec = genericg_compute_checksum_4al_1;
+ nm_spec = "genericg_compute_checksum_4al_1"; break;
+ case 2: fn_spec = genericg_compute_checksum_4al_2;
+ nm_spec = "genericg_compute_checksum_4al_2"; break;
+ case 3: fn_spec = genericg_compute_checksum_4al_3;
+ nm_spec = "genericg_compute_checksum_4al_3"; break;
+ case 4: fn_spec = genericg_compute_checksum_4al_4;
+ nm_spec = "genericg_compute_checksum_4al_4"; break;
+ case 5: fn_spec = genericg_compute_checksum_4al_5;
+ nm_spec = "genericg_compute_checksum_4al_5"; break;
+ case 6: fn_spec = genericg_compute_checksum_4al_6;
+ nm_spec = "genericg_compute_checksum_4al_6"; break;
+ case 7: fn_spec = genericg_compute_checksum_4al_7;
+ nm_spec = "genericg_compute_checksum_4al_7"; break;
+ case 8: fn_spec = genericg_compute_checksum_4al_8;
+ nm_spec = "genericg_compute_checksum_4al_8"; break;
+ case 9: fn_spec = genericg_compute_checksum_4al_9;
+ nm_spec = "genericg_compute_checksum_4al_9"; break;
+ case 10: fn_spec = genericg_compute_checksum_4al_10;
+ nm_spec = "genericg_compute_checksum_4al_10"; break;
+ case 11: fn_spec = genericg_compute_checksum_4al_11;
+ nm_spec = "genericg_compute_checksum_4al_11"; break;
+ case 12: fn_spec = genericg_compute_checksum_4al_12;
+ nm_spec = "genericg_compute_checksum_4al_12"; break;
+ default: break;
+ }
+
+ expected32 = fn_generic( first_w32, w32s_to_check );
+ /* If we got a specialised version, check it produces the same
+ result as the generic version! */
+ if (fn_spec) {
+ vassert(nm_spec);
+ vassert(expected32 == fn_spec( first_w32 ));
+ } else {
+ vassert(!nm_spec);
+ }
+
+ /* Set TISTART and TILEN. These will describe to the despatcher
+ the area of guest code to invalidate should we exit with a
+ self-check failure. */
+
+ tistart_tmp = newIRTemp(irsb->tyenv, guest_word_type);
+ tilen_tmp = newIRTemp(irsb->tyenv, guest_word_type);
+
+ irsb->stmts[selfcheck_idx+0]
+ = IRStmt_WrTmp(tistart_tmp, IRExpr_Const(guest_IP_bbstart_IRConst) );
+
+ irsb->stmts[selfcheck_idx+1]
+ = IRStmt_WrTmp(tilen_tmp,
+ guest_word_type==Ity_I32
+ ? IRExpr_Const(IRConst_U32(len2check))
+ : IRExpr_Const(IRConst_U64(len2check))
+ );
+
+ irsb->stmts[selfcheck_idx+2]
+ = IRStmt_Put( offB_TISTART, IRExpr_RdTmp(tistart_tmp) );
+
+ irsb->stmts[selfcheck_idx+3]
+ = IRStmt_Put( offB_TILEN, IRExpr_RdTmp(tilen_tmp) );
+
+ /* Generate the entry point descriptors */
+ if (abiinfo_both->host_ppc_calls_use_fndescrs) {
+ HWord* descr = (HWord*)fn_generic;
+ fn_generic_entry = descr[0];
+ if (fn_spec) {
+ descr = (HWord*)fn_spec;
+ fn_spec_entry = descr[0];
+ } else {
+ fn_spec_entry = (HWord)NULL;
+ }
+ } else {
+ fn_generic_entry = (HWord)fn_generic;
+ if (fn_spec) {
+ fn_spec_entry = (HWord)fn_spec;
+ } else {
+ fn_spec_entry = (HWord)NULL;
+ }
+ }
+
+ IRExpr* callexpr = NULL;
+ if (fn_spec) {
+ callexpr = mkIRExprCCall(
+ Ity_I32, 1/*regparms*/,
+ nm_spec, (void*)fn_spec_entry,
+ mkIRExprVec_1(
+ mkIRExpr_HWord( (HWord)first_w32 )
+ )
+ );
+ } else {
+ callexpr = mkIRExprCCall(
+ Ity_I32, 2/*regparms*/,
+ nm_generic, (void*)fn_generic_entry,
+ mkIRExprVec_2(
+ mkIRExpr_HWord( (HWord)first_w32 ),
+ mkIRExpr_HWord( (HWord)w32s_to_check )
+ )
+ );
+ }
+
+ irsb->stmts[selfcheck_idx+4]
+ = IRStmt_Exit(
+ IRExpr_Binop(
+ Iop_CmpNE32,
+ callexpr,
+ IRExpr_Const(IRConst_U32(expected32))
+ ),
+ Ijk_TInval,
+ guest_IP_bbstart_IRConst
+ );
+ }
+ }
+
+ return irsb;
+}
+
+
+/*-------------------------------------------------------------
+ A support routine for doing self-checking translations.
+ -------------------------------------------------------------*/
+
+/* CLEAN HELPER */
+/* CALLED FROM GENERATED CODE */
+
+/* Compute a checksum of host memory at [addr .. addr+len-1], as fast
+ as possible. The _4al_4plus version is assured that the request is
+ for 4-aligned memory and for a block of 4 or more long, whilst the
+ _generic version must be able to handle any alignment, and lengths
+ down to zero too. This fn is called once for every use of a
+ self-checking translation, so it needs to be as fast as
+ possible. */
+
+static inline UInt ROL32 ( UInt w, Int n ) {
+ w = (w << n) | (w >> (32-n));
+ return w;
+}
+
+__attribute((regparm(2)))
+static UInt genericg_compute_checksum_4al ( HWord first_w32, HWord n_w32s )
+{
+ UInt sum1 = 0, sum2 = 0;
+ UInt* p = (UInt*)first_w32;
+ /* unrolled */
+ while (n_w32s >= 4) {
+ UInt w;
+ w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ p += 4;
+ n_w32s -= 4;
+ sum1 ^= sum2;
+ }
+ while (n_w32s >= 1) {
+ UInt w;
+ w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ p += 1;
+ n_w32s -= 1;
+ sum1 ^= sum2;
+ }
+ return sum1 + sum2;
+}
+
+/* Specialised versions of the above function */
+
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 )
+{
+ UInt sum1 = 0, sum2 = 0;
+ UInt* p = (UInt*)first_w32;
+ UInt w;
+ w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ return sum1 + sum2;
+}
+
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 )
+{
+ UInt sum1 = 0, sum2 = 0;
+ UInt* p = (UInt*)first_w32;
+ UInt w;
+ w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ return sum1 + sum2;
+}
+
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 )
+{
+ UInt sum1 = 0, sum2 = 0;
+ UInt* p = (UInt*)first_w32;
+ UInt w;
+ w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ return sum1 + sum2;
+}
+
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 )
+{
+ UInt sum1 = 0, sum2 = 0;
+ UInt* p = (UInt*)first_w32;
+ UInt w;
+ w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ return sum1 + sum2;
+}
+
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 )
+{
+ UInt sum1 = 0, sum2 = 0;
+ UInt* p = (UInt*)first_w32;
+ UInt w;
+ w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ return sum1 + sum2;
+}
+
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 )
+{
+ UInt sum1 = 0, sum2 = 0;
+ UInt* p = (UInt*)first_w32;
+ UInt w;
+ w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ return sum1 + sum2;
+}
+
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 )
+{
+ UInt sum1 = 0, sum2 = 0;
+ UInt* p = (UInt*)first_w32;
+ UInt w;
+ w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ return sum1 + sum2;
+}
+
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 )
+{
+ UInt sum1 = 0, sum2 = 0;
+ UInt* p = (UInt*)first_w32;
+ UInt w;
+ w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ return sum1 + sum2;
+}
+
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 )
+{
+ UInt sum1 = 0, sum2 = 0;
+ UInt* p = (UInt*)first_w32;
+ UInt w;
+ w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ return sum1 + sum2;
+}
+
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 )
+{
+ UInt sum1 = 0, sum2 = 0;
+ UInt* p = (UInt*)first_w32;
+ UInt w;
+ w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ return sum1 + sum2;
+}
+
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 )
+{
+ UInt sum1 = 0, sum2 = 0;
+ UInt* p = (UInt*)first_w32;
+ UInt w;
+ w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ w = p[10]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ return sum1 + sum2;
+}
+
+__attribute__((regparm(1)))
+static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 )
+{
+ UInt sum1 = 0, sum2 = 0;
+ UInt* p = (UInt*)first_w32;
+ UInt w;
+ w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[10]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ w = p[11]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
+ sum1 ^= sum2;
+ return sum1 + sum2;
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end guest_generic_bb_to_IR.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/VEX/priv/guest_generic_bb_to_IR.h b/VEX/priv/guest_generic_bb_to_IR.h
new file mode 100644
index 0000000..9ea10cb
--- /dev/null
+++ b/VEX/priv/guest_generic_bb_to_IR.h
@@ -0,0 +1,182 @@
+
+/*--------------------------------------------------------------------*/
+/*--- begin guest_generic_bb_to_IR.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#ifndef __VEX_GUEST_GENERIC_BB_TO_IR_H
+#define __VEX_GUEST_GENERIC_BB_TO_IR_H
+
+
+/* This defines stuff needed by the guest insn disassemblers.
+ It's a bit circular; is imported by
+ - the guest-specific toIR.c files (guest-{x86,amd64,ppc,arm}/toIR.c)
+ - the generic disassembly driver (bb_to_IR.c)
+ - vex_main.c
+*/
+
+
+/* ---------------------------------------------------------------
+ Result of disassembling an instruction
+ --------------------------------------------------------------- */
+
+/* The results of disassembling an instruction. There are three
+ possible outcomes. For Dis_Resteer, the disassembler _must_
+ continue at the specified address. For Dis_StopHere, the
+ disassembler _must_ terminate the BB. For Dis_Continue, we may at
+ our option either disassemble the next insn, or terminate the BB;
+ but in the latter case we must set the bb's ->next field to point
+ to the next instruction. */
+
+typedef
+
+ struct {
+
+ /* The disassembled insn has this length. Must always be
+ set. */
+ Int len;
+
+ /* What happens next?
+ Dis_StopHere: this insn terminates the BB; we must stop.
+ Dis_Continue: we can optionally continue into the next insn
+ Dis_ResteerU: followed an unconditional branch; continue at
+ 'continueAt'
+ Dis_ResteerC: (speculatively, of course) followed a
+ conditional branch; continue at 'continueAt'
+ */
+ enum { Dis_StopHere, Dis_Continue,
+ Dis_ResteerU, Dis_ResteerC } whatNext;
+
+ /* For Dis_Resteer, this is the guest address we should continue
+ at. Otherwise ignored (should be zero). */
+ Addr64 continueAt;
+
+ }
+
+ DisResult;
+
+
+/* ---------------------------------------------------------------
+ The type of a function which disassembles one instruction.
+ C's function-type syntax is really astonishing bizarre.
+ --------------------------------------------------------------- */
+
+/* A function of this type (DisOneInstrFn) disassembles an instruction
+ located at host address &guest_code[delta], whose guest IP is
+ guest_IP (this may be entirely unrelated to where the insn is
+ actually located in the host's address space.). The returned
+ DisResult.len field carries its size. If the returned
+ DisResult.whatNext field is Dis_Resteer then DisResult.continueAt
+ should hold the guest IP of the next insn to disassemble.
+
+ disInstr is not permitted to return Dis_Resteer if resteerOkFn,
+ when applied to the address which it wishes to resteer into,
+ returns False.
+
+ The resulting IR is added to the end of irbb.
+*/
+
+typedef
+
+ DisResult (*DisOneInstrFn) (
+
+ /* This is the IRSB to which the resulting IR is to be appended. */
+ /*OUT*/ IRSB* irbb,
+
+ /* Do we need to generate IR to set the guest IP for this insn,
+ or not? */
+ /*IN*/ Bool put_IP,
+
+ /* Return True iff resteering to the given addr is allowed (for
+ branches/calls to destinations that are known at JIT-time) */
+ /*IN*/ Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
+
+ /* Should we speculatively resteer across conditional branches?
+ (Experimental and not enabled by default). The strategy is
+ to assume that backward branches are taken and forward
+ branches are not taken. */
+ /*IN*/ Bool resteerCisOk,
+
+ /* Vex-opaque data passed to all caller (valgrind) supplied
+ callbacks. */
+ /*IN*/ void* callback_opaque,
+
+ /* Where is the guest code? */
+ /*IN*/ UChar* guest_code,
+
+ /* Where is the actual insn? Note: it's at &guest_code[delta] */
+ /*IN*/ Long delta,
+
+ /* What is the guest IP of the insn? */
+ /*IN*/ Addr64 guest_IP,
+
+ /* Info about the guest architecture */
+ /*IN*/ VexArch guest_arch,
+ /*IN*/ VexArchInfo* archinfo,
+
+ /* ABI info for both guest and host */
+ /*IN*/ VexAbiInfo* abiinfo,
+
+ /* Is the host bigendian? */
+ /*IN*/ Bool host_bigendian
+
+ );
+
+
+/* ---------------------------------------------------------------
+ Top-level BB to IR conversion fn.
+ --------------------------------------------------------------- */
+
+/* See detailed comment in bb_to_IR.c. */
+extern
+IRSB* bb_to_IR ( /*OUT*/VexGuestExtents* vge,
+ /*IN*/ void* closure_opaque,
+ /*IN*/ DisOneInstrFn dis_instr_fn,
+ /*IN*/ UChar* guest_code,
+ /*IN*/ Addr64 guest_IP_bbstart,
+ /*IN*/ Bool (*chase_into_ok)(void*,Addr64),
+ /*IN*/ Bool host_bigendian,
+ /*IN*/ VexArch arch_guest,
+ /*IN*/ VexArchInfo* archinfo_guest,
+ /*IN*/ VexAbiInfo* abiinfo_both,
+ /*IN*/ IRType guest_word_type,
+ /*IN*/ Bool do_self_check,
+ /*IN*/ Bool (*preamble_function)(void*,IRSB*),
+ /*IN*/ Int offB_TISTART,
+ /*IN*/ Int offB_TILEN );
+
+
+#endif /* ndef __VEX_GUEST_GENERIC_BB_TO_IR_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end guest_generic_bb_to_IR.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/VEX/priv/guest_generic_x87.c b/VEX/priv/guest_generic_x87.c
new file mode 100644
index 0000000..4204893
--- /dev/null
+++ b/VEX/priv/guest_generic_x87.c
@@ -0,0 +1,888 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin guest_generic_x87.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+/* This file contains functions for doing some x87-specific
+ operations. Both the amd64 and x86 front ends (guests) indirectly
+ call these functions via guest helper calls. By putting them here,
+ code duplication is avoided. Some of these functions are tricky
+ and hard to verify, so there is much to be said for only having one
+ copy thereof.
+*/
+
+#include "libvex_basictypes.h"
+
+#include "main_util.h"
+#include "guest_generic_x87.h"
+
+
+/* 80 and 64-bit floating point formats:
+
+ 80-bit:
+
+ S 0 0-------0 zero
+ S 0 0X------X denormals
+ S 1-7FFE 1X------X normals (all normals have leading 1)
+ S 7FFF 10------0 infinity
+ S 7FFF 10X-----X snan
+ S 7FFF 11X-----X qnan
+
+ S is the sign bit. For runs X----X, at least one of the Xs must be
+ nonzero. Exponent is 15 bits, fractional part is 63 bits, and
+ there is an explicitly represented leading 1, and a sign bit,
+ giving 80 in total.
+
+ 64-bit avoids the confusion of an explicitly represented leading 1
+ and so is simpler:
+
+ S 0 0------0 zero
+ S 0 X------X denormals
+ S 1-7FE any normals
+ S 7FF 0------0 infinity
+ S 7FF 0X-----X snan
+ S 7FF 1X-----X qnan
+
+ Exponent is 11 bits, fractional part is 52 bits, and there is a
+ sign bit, giving 64 in total.
+*/
+
+
+static inline UInt read_bit_array ( UChar* arr, UInt n )
+{
+ UChar c = arr[n >> 3];
+ c >>= (n&7);
+ return c & 1;
+}
+
+static inline void write_bit_array ( UChar* arr, UInt n, UInt b )
+{
+ UChar c = arr[n >> 3];
+ c = toUChar( c & ~(1 << (n&7)) );
+ c = toUChar( c | ((b&1) << (n&7)) );
+ arr[n >> 3] = c;
+}
+
+/* Convert an IEEE754 double (64-bit) into an x87 extended double
+ (80-bit), mimicing the hardware fairly closely. Both numbers are
+ stored little-endian. Limitations, all of which could be fixed,
+ given some level of hassle:
+
+ * Identity of NaNs is not preserved.
+
+ See comments in the code for more details.
+*/
+void convert_f64le_to_f80le ( /*IN*/UChar* f64, /*OUT*/UChar* f80 )
+{
+ Bool mantissaIsZero;
+ Int bexp, i, j, shift;
+ UChar sign;
+
+ sign = toUChar( (f64[7] >> 7) & 1 );
+ bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
+ bexp &= 0x7FF;
+
+ mantissaIsZero = False;
+ if (bexp == 0 || bexp == 0x7FF) {
+ /* We'll need to know whether or not the mantissa (bits 51:0) is
+ all zeroes in order to handle these cases. So figure it
+ out. */
+ mantissaIsZero
+ = toBool(
+ (f64[6] & 0x0F) == 0
+ && f64[5] == 0 && f64[4] == 0 && f64[3] == 0
+ && f64[2] == 0 && f64[1] == 0 && f64[0] == 0
+ );
+ }
+
+ /* If the exponent is zero, either we have a zero or a denormal.
+ Produce a zero. This is a hack in that it forces denormals to
+ zero. Could do better. */
+ if (bexp == 0) {
+ f80[9] = toUChar( sign << 7 );
+ f80[8] = f80[7] = f80[6] = f80[5] = f80[4]
+ = f80[3] = f80[2] = f80[1] = f80[0] = 0;
+
+ if (mantissaIsZero)
+ /* It really is zero, so that's all we can do. */
+ return;
+
+ /* There is at least one 1-bit in the mantissa. So it's a
+ potentially denormalised double -- but we can produce a
+ normalised long double. Count the leading zeroes in the
+ mantissa so as to decide how much to bump the exponent down
+ by. Note, this is SLOW. */
+ shift = 0;
+ for (i = 51; i >= 0; i--) {
+ if (read_bit_array(f64, i))
+ break;
+ shift++;
+ }
+
+ /* and copy into place as many bits as we can get our hands on. */
+ j = 63;
+ for (i = 51 - shift; i >= 0; i--) {
+ write_bit_array( f80, j,
+ read_bit_array( f64, i ) );
+ j--;
+ }
+
+ /* Set the exponent appropriately, and we're done. */
+ bexp -= shift;
+ bexp += (16383 - 1023);
+ f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
+ f80[8] = toUChar( bexp & 0xFF );
+ return;
+ }
+
+ /* If the exponent is 7FF, this is either an Infinity, a SNaN or
+ QNaN, as determined by examining bits 51:0, thus:
+ 0 ... 0 Inf
+ 0X ... X SNaN
+ 1X ... X QNaN
+ where at least one of the Xs is not zero.
+ */
+ if (bexp == 0x7FF) {
+ if (mantissaIsZero) {
+ /* Produce an appropriately signed infinity:
+ S 1--1 (15) 1 0--0 (63)
+ */
+ f80[9] = toUChar( (sign << 7) | 0x7F );
+ f80[8] = 0xFF;
+ f80[7] = 0x80;
+ f80[6] = f80[5] = f80[4] = f80[3]
+ = f80[2] = f80[1] = f80[0] = 0;
+ return;
+ }
+ /* So it's either a QNaN or SNaN. Distinguish by considering
+ bit 51. Note, this destroys all the trailing bits
+ (identity?) of the NaN. IEEE754 doesn't require preserving
+ these (it only requires that there be one QNaN value and one
+ SNaN value), but x87 does seem to have some ability to
+ preserve them. Anyway, here, the NaN's identity is
+ destroyed. Could be improved. */
+ if (f64[6] & 8) {
+ /* QNaN. Make a QNaN:
+ S 1--1 (15) 1 1--1 (63)
+ */
+ f80[9] = toUChar( (sign << 7) | 0x7F );
+ f80[8] = 0xFF;
+ f80[7] = 0xFF;
+ f80[6] = f80[5] = f80[4] = f80[3]
+ = f80[2] = f80[1] = f80[0] = 0xFF;
+ } else {
+ /* SNaN. Make a SNaN:
+ S 1--1 (15) 0 1--1 (63)
+ */
+ f80[9] = toUChar( (sign << 7) | 0x7F );
+ f80[8] = 0xFF;
+ f80[7] = 0x7F;
+ f80[6] = f80[5] = f80[4] = f80[3]
+ = f80[2] = f80[1] = f80[0] = 0xFF;
+ }
+ return;
+ }
+
+ /* It's not a zero, denormal, infinity or nan. So it must be a
+ normalised number. Rebias the exponent and build the new
+ number. */
+ bexp += (16383 - 1023);
+
+ f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
+ f80[8] = toUChar( bexp & 0xFF );
+ f80[7] = toUChar( (1 << 7) | ((f64[6] << 3) & 0x78)
+ | ((f64[5] >> 5) & 7) );
+ f80[6] = toUChar( ((f64[5] << 3) & 0xF8) | ((f64[4] >> 5) & 7) );
+ f80[5] = toUChar( ((f64[4] << 3) & 0xF8) | ((f64[3] >> 5) & 7) );
+ f80[4] = toUChar( ((f64[3] << 3) & 0xF8) | ((f64[2] >> 5) & 7) );
+ f80[3] = toUChar( ((f64[2] << 3) & 0xF8) | ((f64[1] >> 5) & 7) );
+ f80[2] = toUChar( ((f64[1] << 3) & 0xF8) | ((f64[0] >> 5) & 7) );
+ f80[1] = toUChar( ((f64[0] << 3) & 0xF8) );
+ f80[0] = toUChar( 0 );
+}
+
+
+/* Convert an x87 extended double (80-bit) into an IEEE 754 double
+ (64-bit), mimicking the hardware fairly closely. Both numbers are
+ stored little-endian. Limitations, both of which could be fixed,
+ given some level of hassle:
+
+ * Rounding following truncation could be a bit better.
+
+ * Identity of NaNs is not preserved.
+
+ See comments in the code for more details.
+*/
+void convert_f80le_to_f64le ( /*IN*/UChar* f80, /*OUT*/UChar* f64 )
+{
+ Bool isInf;
+ Int bexp, i, j;
+ UChar sign;
+
+ sign = toUChar((f80[9] >> 7) & 1);
+ bexp = (((UInt)f80[9]) << 8) | (UInt)f80[8];
+ bexp &= 0x7FFF;
+
+ /* If the exponent is zero, either we have a zero or a denormal.
+ But an extended precision denormal becomes a double precision
+ zero, so in either case, just produce the appropriately signed
+ zero. */
+ if (bexp == 0) {
+ f64[7] = toUChar(sign << 7);
+ f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
+ return;
+ }
+
+ /* If the exponent is 7FFF, this is either an Infinity, a SNaN or
+ QNaN, as determined by examining bits 62:0, thus:
+ 0 ... 0 Inf
+ 0X ... X SNaN
+ 1X ... X QNaN
+ where at least one of the Xs is not zero.
+ */
+ if (bexp == 0x7FFF) {
+ isInf = toBool(
+ (f80[7] & 0x7F) == 0
+ && f80[6] == 0 && f80[5] == 0 && f80[4] == 0
+ && f80[3] == 0 && f80[2] == 0 && f80[1] == 0
+ && f80[0] == 0
+ );
+ if (isInf) {
+ if (0 == (f80[7] & 0x80))
+ goto wierd_NaN;
+ /* Produce an appropriately signed infinity:
+ S 1--1 (11) 0--0 (52)
+ */
+ f64[7] = toUChar((sign << 7) | 0x7F);
+ f64[6] = 0xF0;
+ f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
+ return;
+ }
+ /* So it's either a QNaN or SNaN. Distinguish by considering
+ bit 62. Note, this destroys all the trailing bits
+ (identity?) of the NaN. IEEE754 doesn't require preserving
+ these (it only requires that there be one QNaN value and one
+ SNaN value), but x87 does seem to have some ability to
+ preserve them. Anyway, here, the NaN's identity is
+ destroyed. Could be improved. */
+ if (f80[8] & 0x40) {
+ /* QNaN. Make a QNaN:
+ S 1--1 (11) 1 1--1 (51)
+ */
+ f64[7] = toUChar((sign << 7) | 0x7F);
+ f64[6] = 0xFF;
+ f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
+ } else {
+ /* SNaN. Make a SNaN:
+ S 1--1 (11) 0 1--1 (51)
+ */
+ f64[7] = toUChar((sign << 7) | 0x7F);
+ f64[6] = 0xF7;
+ f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
+ }
+ return;
+ }
+
+ /* If it's not a Zero, NaN or Inf, and the integer part (bit 62) is
+ zero, the x87 FPU appears to consider the number denormalised
+ and converts it to a QNaN. */
+ if (0 == (f80[7] & 0x80)) {
+ wierd_NaN:
+ /* Strange hardware QNaN:
+ S 1--1 (11) 1 0--0 (51)
+ */
+ /* On a PIII, these QNaNs always appear with sign==1. I have
+ no idea why. */
+ f64[7] = (1 /*sign*/ << 7) | 0x7F;
+ f64[6] = 0xF8;
+ f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
+ return;
+ }
+
+ /* It's not a zero, denormal, infinity or nan. So it must be a
+ normalised number. Rebias the exponent and consider. */
+ bexp -= (16383 - 1023);
+ if (bexp >= 0x7FF) {
+ /* It's too big for a double. Construct an infinity. */
+ f64[7] = toUChar((sign << 7) | 0x7F);
+ f64[6] = 0xF0;
+ f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
+ return;
+ }
+
+ if (bexp <= 0) {
+ /* It's too small for a normalised double. First construct a
+ zero and then see if it can be improved into a denormal. */
+ f64[7] = toUChar(sign << 7);
+ f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
+
+ if (bexp < -52)
+ /* Too small even for a denormal. */
+ return;
+
+ /* Ok, let's make a denormal. Note, this is SLOW. */
+ /* Copy bits 63, 62, 61, etc of the src mantissa into the dst,
+ indexes 52+bexp, 51+bexp, etc, until k+bexp < 0. */
+ /* bexp is in range -52 .. 0 inclusive */
+ for (i = 63; i >= 0; i--) {
+ j = i - 12 + bexp;
+ if (j < 0) break;
+ /* We shouldn't really call vassert from generated code. */
+ vassert(j >= 0 && j < 52);
+ write_bit_array ( f64,
+ j,
+ read_bit_array ( f80, i ) );
+ }
+ /* and now we might have to round ... */
+ if (read_bit_array(f80, 10+1 - bexp) == 1)
+ goto do_rounding;
+
+ return;
+ }
+
+ /* Ok, it's a normalised number which is representable as a double.
+ Copy the exponent and mantissa into place. */
+ /*
+ for (i = 0; i < 52; i++)
+ write_bit_array ( f64,
+ i,
+ read_bit_array ( f80, i+11 ) );
+ */
+ f64[0] = toUChar( (f80[1] >> 3) | (f80[2] << 5) );
+ f64[1] = toUChar( (f80[2] >> 3) | (f80[3] << 5) );
+ f64[2] = toUChar( (f80[3] >> 3) | (f80[4] << 5) );
+ f64[3] = toUChar( (f80[4] >> 3) | (f80[5] << 5) );
+ f64[4] = toUChar( (f80[5] >> 3) | (f80[6] << 5) );
+ f64[5] = toUChar( (f80[6] >> 3) | (f80[7] << 5) );
+
+ f64[6] = toUChar( ((bexp << 4) & 0xF0) | ((f80[7] >> 3) & 0x0F) );
+
+ f64[7] = toUChar( (sign << 7) | ((bexp >> 4) & 0x7F) );
+
+ /* Now consider any rounding that needs to happen as a result of
+ truncating the mantissa. */
+ if (f80[1] & 4) /* read_bit_array(f80, 10) == 1) */ {
+
+ /* If the bottom bits of f80 are "100 0000 0000", then the
+ infinitely precise value is deemed to be mid-way between the
+ two closest representable values. Since we're doing
+ round-to-nearest (the default mode), in that case it is the
+ bit immediately above which indicates whether we should round
+ upwards or not -- if 0, we don't. All that is encapsulated
+ in the following simple test. */
+ if ((f80[1] & 0xF) == 4/*0100b*/ && f80[0] == 0)
+ return;
+
+ do_rounding:
+ /* Round upwards. This is a kludge. Once in every 2^24
+ roundings (statistically) the bottom three bytes are all 0xFF
+ and so we don't round at all. Could be improved. */
+ if (f64[0] != 0xFF) {
+ f64[0]++;
+ }
+ else
+ if (f64[0] == 0xFF && f64[1] != 0xFF) {
+ f64[0] = 0;
+ f64[1]++;
+ }
+ else
+ if (f64[0] == 0xFF && f64[1] == 0xFF && f64[2] != 0xFF) {
+ f64[0] = 0;
+ f64[1] = 0;
+ f64[2]++;
+ }
+ /* else we don't round, but we should. */
+ }
+}
+
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* Extract the signed significand or exponent component as per
+ fxtract. Arg and result are doubles travelling under the guise of
+ ULongs. Returns significand when getExp is zero and exponent
+ otherwise. */
+ULong x86amd64g_calculate_FXTRACT ( ULong arg, HWord getExp )
+{
+ ULong uSig, uExp;
+ /* Long sSig; */
+ Int sExp, i;
+ UInt sign, expExp;
+
+ /*
+ S 7FF 0------0 infinity
+ S 7FF 0X-----X snan
+ S 7FF 1X-----X qnan
+ */
+ const ULong posInf = 0x7FF0000000000000ULL;
+ const ULong negInf = 0xFFF0000000000000ULL;
+ const ULong nanMask = 0x7FF0000000000000ULL;
+ const ULong qNan = 0x7FF8000000000000ULL;
+ const ULong posZero = 0x0000000000000000ULL;
+ const ULong negZero = 0x8000000000000000ULL;
+ const ULong bit51 = 1ULL << 51;
+ const ULong bit52 = 1ULL << 52;
+ const ULong sigMask = bit52 - 1;
+
+ /* Mimic Core i5 behaviour for special cases. */
+ if (arg == posInf)
+ return getExp ? posInf : posInf;
+ if (arg == negInf)
+ return getExp ? posInf : negInf;
+ if ((arg & nanMask) == nanMask)
+ return qNan | (arg & (1ULL << 63));
+ if (arg == posZero)
+ return getExp ? negInf : posZero;
+ if (arg == negZero)
+ return getExp ? negInf : negZero;
+
+ /* Split into sign, exponent and significand. */
+ sign = ((UInt)(arg >> 63)) & 1;
+
+ /* Mask off exponent & sign. uSig is in range 0 .. 2^52-1. */
+ uSig = arg & sigMask;
+
+ /* Get the exponent. */
+ sExp = ((Int)(arg >> 52)) & 0x7FF;
+
+ /* Deal with denormals: if the exponent is zero, then the
+ significand cannot possibly be zero (negZero/posZero are handled
+ above). Shift the significand left until bit 51 of it becomes
+ 1, and decrease the exponent accordingly.
+ */
+ if (sExp == 0) {
+ for (i = 0; i < 52; i++) {
+ if (uSig & bit51)
+ break;
+ uSig <<= 1;
+ sExp--;
+ }
+ uSig <<= 1;
+ } else {
+ /* Add the implied leading-1 in the significand. */
+ uSig |= bit52;
+ }
+
+ /* Roll in the sign. */
+ /* sSig = uSig; */
+ /* if (sign) sSig =- sSig; */
+
+ /* Convert sig into a double. This should be an exact conversion.
+ Then divide by 2^52, which should give a value in the range 1.0
+ to 2.0-epsilon, at least for normalised args. */
+ /* dSig = (Double)sSig; */
+ /* dSig /= 67108864.0; */ /* 2^26 */
+ /* dSig /= 67108864.0; */ /* 2^26 */
+ uSig &= sigMask;
+ uSig |= 0x3FF0000000000000ULL;
+ if (sign)
+ uSig ^= negZero;
+
+ /* Convert exp into a double. Also an exact conversion. */
+ /* dExp = (Double)(sExp - 1023); */
+ sExp -= 1023;
+ if (sExp == 0) {
+ uExp = 0;
+ } else {
+ uExp = sExp < 0 ? -sExp : sExp;
+ expExp = 0x3FF +52;
+ /* 1 <= uExp <= 1074 */
+ /* Skip first 42 iterations of normalisation loop as we know they
+ will always happen */
+ uExp <<= 42;
+ expExp -= 42;
+ for (i = 0; i < 52-42; i++) {
+ if (uExp & bit52)
+ break;
+ uExp <<= 1;
+ expExp--;
+ }
+ uExp &= sigMask;
+ uExp |= ((ULong)expExp) << 52;
+ if (sExp < 0) uExp ^= negZero;
+ }
+
+ return getExp ? uExp : uSig;
+}
+
+
+
+/*---------------------------------------------------------*/
+/*--- SSE4.2 PCMP{E,I}STR{I,M} helpers ---*/
+/*---------------------------------------------------------*/
+
+/* We need the definitions for OSZACP eflags/rflags offsets.
+ #including guest_{amd64,x86}_defs.h causes chaos, so just copy the
+ required values directly. They are not going to change in the
+ foreseeable future :-)
+*/
+
+#define SHIFT_O 11
+#define SHIFT_S 7
+#define SHIFT_Z 6
+#define SHIFT_A 4
+#define SHIFT_C 0
+#define SHIFT_P 2
+
+#define MASK_O (1 << SHIFT_O)
+#define MASK_S (1 << SHIFT_S)
+#define MASK_Z (1 << SHIFT_Z)
+#define MASK_A (1 << SHIFT_A)
+#define MASK_C (1 << SHIFT_C)
+#define MASK_P (1 << SHIFT_P)
+
+
+/* Count leading zeroes, w/ 0-produces-32 semantics, a la Hacker's
+ Delight. */
+static UInt clz32 ( UInt x )
+{
+ Int y, m, n;
+ y = -(x >> 16);
+ m = (y >> 16) & 16;
+ n = 16 - m;
+ x = x >> m;
+ y = x - 0x100;
+ m = (y >> 16) & 8;
+ n = n + m;
+ x = x << m;
+ y = x - 0x1000;
+ m = (y >> 16) & 4;
+ n = n + m;
+ x = x << m;
+ y = x - 0x4000;
+ m = (y >> 16) & 2;
+ n = n + m;
+ x = x << m;
+ y = x >> 14;
+ m = y & ~(y >> 1);
+ return n + 2 - m;
+}
+
+static UInt ctz32 ( UInt x )
+{
+ return 32 - clz32((~x) & (x-1));
+}
+
+/* Convert a 4-bit value to a 32-bit value by cloning each bit 8
+ times. There's surely a better way to do this, but I don't know
+ what it is. */
+static UInt bits4_to_bytes4 ( UInt bits4 )
+{
+ UInt r = 0;
+ r |= (bits4 & 1) ? 0x000000FF : 0;
+ r |= (bits4 & 2) ? 0x0000FF00 : 0;
+ r |= (bits4 & 4) ? 0x00FF0000 : 0;
+ r |= (bits4 & 8) ? 0xFF000000 : 0;
+ return r;
+}
+
+
+/* Given partial results from a pcmpXstrX operation (intRes1,
+ basically), generate an I- or M-format output value, also the new
+ OSZACP flags. */
+static
+void compute_PCMPxSTRx_gen_output (/*OUT*/V128* resV,
+ /*OUT*/UInt* resOSZACP,
+ UInt intRes1,
+ UInt zmaskL, UInt zmaskR,
+ UInt validL,
+ UInt pol, UInt idx,
+ Bool isxSTRM )
+{
+ vassert((pol >> 2) == 0);
+ vassert((idx >> 1) == 0);
+
+ UInt intRes2 = 0;
+ switch (pol) {
+ case 0: intRes2 = intRes1; break; // pol +
+ case 1: intRes2 = ~intRes1; break; // pol -
+ case 2: intRes2 = intRes1; break; // pol m+
+ case 3: intRes2 = intRes1 ^ validL; break; // pol m-
+ }
+ intRes2 &= 0xFFFF;
+
+ if (isxSTRM) {
+
+ // generate M-format output (a bit or byte mask in XMM0)
+ if (idx) {
+ resV->w32[0] = bits4_to_bytes4( (intRes2 >> 0) & 0xF );
+ resV->w32[1] = bits4_to_bytes4( (intRes2 >> 4) & 0xF );
+ resV->w32[2] = bits4_to_bytes4( (intRes2 >> 8) & 0xF );
+ resV->w32[3] = bits4_to_bytes4( (intRes2 >> 12) & 0xF );
+ } else {
+ resV->w32[0] = intRes2 & 0xFFFF;
+ resV->w32[1] = 0;
+ resV->w32[2] = 0;
+ resV->w32[3] = 0;
+ }
+
+ } else {
+
+ // generate I-format output (an index in ECX)
+ // generate ecx value
+ UInt newECX = 0;
+ if (idx) {
+ // index of ms-1-bit
+ newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
+ } else {
+ // index of ls-1-bit
+ newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
+ }
+
+ resV->w32[0] = newECX;
+ resV->w32[1] = 0;
+ resV->w32[2] = 0;
+ resV->w32[3] = 0;
+
+ }
+
+ // generate new flags, common to all ISTRI and ISTRM cases
+ *resOSZACP // A, P are zero
+ = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
+ | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
+ | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0
+ | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0]
+}
+
+
+/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
+ variants.
+
+ For xSTRI variants, the new ECX value is placed in the 32 bits
+ pointed to by *resV, and the top 96 bits are zeroed. For xSTRM
+ variants, the result is a 128 bit value and is placed at *resV in
+ the obvious way.
+
+ For all variants, the new OSZACP value is placed at *resOSZACP.
+
+ argLV and argRV are the vector args. The caller must prepare a
+ 16-bit mask for each, zmaskL and zmaskR. For ISTRx variants this
+ must be 1 for each zero byte of of the respective arg. For ESTRx
+ variants this is derived from the explicit length indication, and
+ must be 0 in all places except at the bit index corresponding to
+ the valid length (0 .. 16). If the valid length is 16 then the
+ mask must be all zeroes. In all cases, bits 31:16 must be zero.
+
+ imm8 is the original immediate from the instruction. isSTRM
+ indicates whether this is a xSTRM or xSTRI variant, which controls
+ how much of *res is written.
+
+ If the given imm8 case can be handled, the return value is True.
+ If not, False is returned, and neither *res not *resOSZACP are
+ altered.
+*/
+
+Bool compute_PCMPxSTRx ( /*OUT*/V128* resV,
+ /*OUT*/UInt* resOSZACP,
+ V128* argLV, V128* argRV,
+ UInt zmaskL, UInt zmaskR,
+ UInt imm8, Bool isxSTRM )
+{
+ vassert(imm8 < 0x80);
+ vassert((zmaskL >> 16) == 0);
+ vassert((zmaskR >> 16) == 0);
+
+ /* Explicitly reject any imm8 values that haven't been validated,
+ even if they would probably work. Life is too short to have
+ unvalidated cases in the code base. */
+ switch (imm8) {
+ case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x12:
+ case 0x1A: case 0x3A: case 0x44: case 0x4A:
+ break;
+ default:
+ return False;
+ }
+
+ UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format
+ UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn
+ UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity
+ UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask
+
+ /*----------------------------------------*/
+ /*-- strcmp on byte data --*/
+ /*----------------------------------------*/
+
+ if (agg == 2/*equal each, aka strcmp*/
+ && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) {
+ Int i;
+ UChar* argL = (UChar*)argLV;
+ UChar* argR = (UChar*)argRV;
+ UInt boolResII = 0;
+ for (i = 15; i >= 0; i--) {
+ UChar cL = argL[i];
+ UChar cR = argR[i];
+ boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
+ }
+ UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
+ UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
+
+ // do invalidation, common to all equal-each cases
+ UInt intRes1
+ = (boolResII & validL & validR) // if both valid, use cmpres
+ | (~ (validL | validR)); // if both invalid, force 1
+ // else force 0
+ intRes1 &= 0xFFFF;
+
+ // generate I-format output
+ compute_PCMPxSTRx_gen_output(
+ resV, resOSZACP,
+ intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
+ );
+
+ return True;
+ }
+
+ /*----------------------------------------*/
+ /*-- set membership on byte data --*/
+ /*----------------------------------------*/
+
+ if (agg == 0/*equal any, aka find chars in a set*/
+ && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) {
+ /* argL: the string, argR: charset */
+ UInt si, ci;
+ UChar* argL = (UChar*)argLV;
+ UChar* argR = (UChar*)argRV;
+ UInt boolRes = 0;
+ UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
+ UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
+
+ for (si = 0; si < 16; si++) {
+ if ((validL & (1 << si)) == 0)
+ // run off the end of the string.
+ break;
+ UInt m = 0;
+ for (ci = 0; ci < 16; ci++) {
+ if ((validR & (1 << ci)) == 0) break;
+ if (argR[ci] == argL[si]) { m = 1; break; }
+ }
+ boolRes |= (m << si);
+ }
+
+ // boolRes is "pre-invalidated"
+ UInt intRes1 = boolRes & 0xFFFF;
+
+ // generate I-format output
+ compute_PCMPxSTRx_gen_output(
+ resV, resOSZACP,
+ intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
+ );
+
+ return True;
+ }
+
+ /*----------------------------------------*/
+ /*-- substring search on byte data --*/
+ /*----------------------------------------*/
+
+ if (agg == 3/*equal ordered, aka substring search*/
+ && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) {
+
+ /* argL: haystack, argR: needle */
+ UInt ni, hi;
+ UChar* argL = (UChar*)argLV;
+ UChar* argR = (UChar*)argRV;
+ UInt boolRes = 0;
+ UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
+ UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
+ for (hi = 0; hi < 16; hi++) {
+ if ((validL & (1 << hi)) == 0)
+ // run off the end of the haystack
+ break;
+ UInt m = 1;
+ for (ni = 0; ni < 16; ni++) {
+ if ((validR & (1 << ni)) == 0) break;
+ UInt i = ni + hi;
+ if (i >= 16) break;
+ if (argL[i] != argR[ni]) { m = 0; break; }
+ }
+ boolRes |= (m << hi);
+ }
+
+ // boolRes is "pre-invalidated"
+ UInt intRes1 = boolRes & 0xFFFF;
+
+ // generate I-format output
+ compute_PCMPxSTRx_gen_output(
+ resV, resOSZACP,
+ intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
+ );
+
+ return True;
+ }
+
+ /*----------------------------------------*/
+ /*-- ranges, unsigned byte data --*/
+ /*----------------------------------------*/
+
+ if (agg == 1/*ranges*/
+ && fmt == 0/*ub*/) {
+
+ /* argL: string, argR: range-pairs */
+ UInt ri, si;
+ UChar* argL = (UChar*)argLV;
+ UChar* argR = (UChar*)argRV;
+ UInt boolRes = 0;
+ UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
+ UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
+ for (si = 0; si < 16; si++) {
+ if ((validL & (1 << si)) == 0)
+ // run off the end of the string
+ break;
+ UInt m = 0;
+ for (ri = 0; ri < 16; ri += 2) {
+ if ((validR & (3 << ri)) != (3 << ri)) break;
+ if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
+ m = 1; break;
+ }
+ }
+ boolRes |= (m << si);
+ }
+
+ // boolRes is "pre-invalidated"
+ UInt intRes1 = boolRes & 0xFFFF;
+
+ // generate I-format output
+ compute_PCMPxSTRx_gen_output(
+ resV, resOSZACP,
+ intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
+ );
+
+ return True;
+ }
+
+ return False;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- end guest_generic_x87.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/guest_generic_x87.h b/VEX/priv/guest_generic_x87.h
new file mode 100644
index 0000000..9cbe23b
--- /dev/null
+++ b/VEX/priv/guest_generic_x87.h
@@ -0,0 +1,114 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin guest_generic_x87.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+/* This file contains functions for doing some x87-specific
+ operations. Both the amd64 and x86 front ends (guests) indirectly
+ call these functions via guest helper calls. By putting them here,
+ code duplication is avoided. Some of these functions are tricky
+ and hard to verify, so there is much to be said for only having one
+ copy thereof.
+*/
+
+#ifndef __VEX_GUEST_GENERIC_X87_H
+#define __VEX_GUEST_GENERIC_X87_H
+
+#include "libvex_basictypes.h"
+
+
+/* Convert an IEEE754 double (64-bit) into an x87 extended double
+ (80-bit), mimicing the hardware fairly closely. Both numbers are
+ stored little-endian. Limitations, all of which could be fixed,
+ given some level of hassle:
+
+ * Identity of NaNs is not preserved.
+
+ See comments in the code for more details.
+*/
+extern
+void convert_f64le_to_f80le ( /*IN*/UChar* f64, /*OUT*/UChar* f80 );
+
+
+/* Convert an x87 extended double (80-bit) into an IEEE 754 double
+ (64-bit), mimicking the hardware fairly closely. Both numbers are
+ stored little-endian. Limitations, both of which could be fixed,
+ given some level of hassle:
+
+ * Rounding following truncation could be a bit better.
+
+ * Identity of NaNs is not preserved.
+
+ See comments in the code for more details.
+*/
+extern
+void convert_f80le_to_f64le ( /*IN*/UChar* f80, /*OUT*/UChar* f64 );
+
+
+/* Layout of the real x87 state. */
+typedef
+ struct {
+ UShort env[14];
+ UChar reg[80];
+ }
+ Fpu_State;
+
+/* Offsets, in 16-bit ints, into the FPU environment (env) area. */
+#define FP_ENV_CTRL 0
+#define FP_ENV_STAT 2
+#define FP_ENV_TAG 4
+#define FP_ENV_IP 6 /* and 7 */
+#define FP_ENV_CS 8
+#define FP_ENV_OPOFF 10 /* and 11 */
+#define FP_ENV_OPSEL 12
+#define FP_REG(ii) (10*(7-(ii)))
+
+
+/* Do the computations for x86/amd64 FXTRACT. Called directly from
+ generated code. CLEAN HELPER. */
+extern ULong x86amd64g_calculate_FXTRACT ( ULong arg, HWord getExp );
+
+/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
+ variants. See bigger comment on implementation of this function
+ for details on call/return conventions. */
+extern Bool compute_PCMPxSTRx ( /*OUT*/V128* resV,
+ /*OUT*/UInt* resOSZACP,
+ V128* argLV, V128* argRV,
+ UInt zmaskL, UInt zmaskR,
+ UInt imm8, Bool isxSTRM );
+
+#endif /* ndef __VEX_GUEST_GENERIC_X87_H */
+
+/*---------------------------------------------------------------*/
+/*--- end guest_generic_x87.h ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/guest_ppc_defs.h b/VEX/priv/guest_ppc_defs.h
new file mode 100644
index 0000000..dd3c62e
--- /dev/null
+++ b/VEX/priv/guest_ppc_defs.h
@@ -0,0 +1,161 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin guest_ppc_defs.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+/* Only to be used within the guest-ppc directory. */
+
+
+#ifndef __VEX_GUEST_PPC_DEFS_H
+#define __VEX_GUEST_PPC_DEFS_H
+
+
+/*---------------------------------------------------------*/
+/*--- ppc to IR conversion ---*/
+/*---------------------------------------------------------*/
+
+/* Convert one ppc insn to IR. See the type DisOneInstrFn in
+ bb_to_IR.h. */
+extern
+DisResult disInstr_PPC ( IRSB* irbb,
+ Bool put_IP,
+ Bool (*resteerOkFn) ( void*, Addr64 ),
+ Bool resteerCisOk,
+ void* callback_opaque,
+ UChar* guest_code,
+ Long delta,
+ Addr64 guest_IP,
+ VexArch guest_arch,
+ VexArchInfo* archinfo,
+ VexAbiInfo* abiinfo,
+ Bool host_bigendian );
+
+/* Used by the optimiser to specialise calls to helpers. */
+extern
+IRExpr* guest_ppc32_spechelper ( HChar* function_name,
+ IRExpr** args,
+ IRStmt** precedingStmts,
+ Int n_precedingStmts );
+
+extern
+IRExpr* guest_ppc64_spechelper ( HChar* function_name,
+ IRExpr** args,
+ IRStmt** precedingStmts,
+ Int n_precedingStmts );
+
+/* Describes to the optimser which part of the guest state require
+ precise memory exceptions. This is logically part of the guest
+ state description. */
+extern
+Bool guest_ppc32_state_requires_precise_mem_exns ( Int, Int );
+
+extern
+Bool guest_ppc64_state_requires_precise_mem_exns ( Int, Int );
+
+extern
+VexGuestLayout ppc32Guest_layout;
+
+extern
+VexGuestLayout ppc64Guest_layout;
+
+
+/* FP Rounding mode - different encoding to IR */
+typedef
+ enum {
+ PPCrm_NEAREST = 0,
+ PPCrm_NegINF = 1,
+ PPCrm_PosINF = 2,
+ PPCrm_ZERO = 3
+ } PPCRoundingMode;
+
+/* Floating point comparison values - different encoding to IR */
+typedef
+ enum {
+ PPCcr_LT = 0x8,
+ PPCcr_GT = 0x4,
+ PPCcr_EQ = 0x2,
+ PPCcr_UN = 0x1
+ }
+ PPCCmpF64Result;
+
+/*
+ Enumeration for xer_ca/ov calculation helper functions
+*/
+enum {
+ /* 0 */ PPCG_FLAG_OP_ADD=0, // addc[o], addic
+ /* 1 */ PPCG_FLAG_OP_ADDE, // adde[o], addme[o], addze[o]
+ /* 2 */ PPCG_FLAG_OP_DIVW, // divwo
+ /* 3 */ PPCG_FLAG_OP_DIVWU, // divwuo
+ /* 4 */ PPCG_FLAG_OP_MULLW, // mullwo
+ /* 5 */ PPCG_FLAG_OP_NEG, // nego
+ /* 6 */ PPCG_FLAG_OP_SUBF, // subfo
+ /* 7 */ PPCG_FLAG_OP_SUBFC, // subfc[o]
+ /* 8 */ PPCG_FLAG_OP_SUBFE, // subfe[o], subfme[o], subfze[o]
+ /* 9 */ PPCG_FLAG_OP_SUBFI, // subfic
+ /* 10 */ PPCG_FLAG_OP_SRAW, // sraw
+ /* 11 */ PPCG_FLAG_OP_SRAWI, // srawi
+ /* 12 */ PPCG_FLAG_OP_SRAD, // srad
+ /* 13 */ PPCG_FLAG_OP_SRADI, // sradi
+ PPCG_FLAG_OP_NUMBER
+};
+
+
+/*---------------------------------------------------------*/
+/*--- ppc guest helpers ---*/
+/*---------------------------------------------------------*/
+
+/* --- CLEAN HELPERS --- */
+
+/* none, right now */
+
+/* --- DIRTY HELPERS --- */
+
+extern ULong ppcg_dirtyhelper_MFTB ( void );
+
+extern UInt ppc32g_dirtyhelper_MFSPR_268_269 ( UInt );
+
+extern UInt ppc32g_dirtyhelper_MFSPR_287 ( void );
+
+extern void ppc32g_dirtyhelper_LVS ( VexGuestPPC32State* gst,
+ UInt vD_idx, UInt sh,
+ UInt shift_right );
+
+extern void ppc64g_dirtyhelper_LVS ( VexGuestPPC64State* gst,
+ UInt vD_idx, UInt sh,
+ UInt shift_right );
+
+#endif /* ndef __VEX_GUEST_PPC_DEFS_H */
+
+/*---------------------------------------------------------------*/
+/*--- end guest_ppc_defs.h ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/guest_ppc_helpers.c b/VEX/priv/guest_ppc_helpers.c
new file mode 100644
index 0000000..e056a65
--- /dev/null
+++ b/VEX/priv/guest_ppc_helpers.c
@@ -0,0 +1,837 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin guest_ppc_helpers.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex_emwarn.h"
+#include "libvex_guest_ppc32.h"
+#include "libvex_guest_ppc64.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+
+#include "main_util.h"
+#include "guest_generic_bb_to_IR.h"
+#include "guest_ppc_defs.h"
+
+
+/* This file contains helper functions for ppc32 and ppc64 guest code.
+ Calls to these functions are generated by the back end. These
+ calls are of course in the host machine code and this file will be
+ compiled to host machine code, so that all makes sense.
+
+ Only change the signatures of these helper functions very
+ carefully. If you change the signature here, you'll have to change
+ the parameters passed to it in the IR calls constructed by
+ guest-ppc/toIR.c.
+*/
+
+
+/*---------------------------------------------------------------*/
+/*--- Misc integer helpers. ---*/
+/*---------------------------------------------------------------*/
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (non-referentially-transparent) */
+/* Horrible hack. On non-ppc platforms, return 1. */
+/* Reads a complete, consistent 64-bit TB value. */
+ULong ppcg_dirtyhelper_MFTB ( void )
+{
+# if defined(__powerpc__) || defined(_AIX)
+ ULong res;
+ UInt lo, hi1, hi2;
+ while (1) {
+ __asm__ __volatile__ ("\n"
+ "\tmftbu %0\n"
+ "\tmftb %1\n"
+ "\tmftbu %2\n"
+ : "=r" (hi1), "=r" (lo), "=r" (hi2)
+ );
+ if (hi1 == hi2) break;
+ }
+ res = ((ULong)hi1) << 32;
+ res |= (ULong)lo;
+ return res;
+# else
+ return 1ULL;
+# endif
+}
+
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (non-referentially transparent) */
+UInt ppc32g_dirtyhelper_MFSPR_268_269 ( UInt r269 )
+{
+# if defined(__powerpc__) || defined(_AIX)
+ UInt spr;
+ if (r269) {
+ __asm__ __volatile__("mfspr %0,269" : "=b"(spr));
+ } else {
+ __asm__ __volatile__("mfspr %0,268" : "=b"(spr));
+ }
+ return spr;
+# else
+ return 0;
+# endif
+}
+
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (I'm not really sure what the side effects are) */
+UInt ppc32g_dirtyhelper_MFSPR_287 ( void )
+{
+# if defined(__powerpc__) || defined(_AIX)
+ UInt spr;
+ __asm__ __volatile__("mfspr %0,287" : "=b"(spr));
+ return spr;
+# else
+ return 0;
+# endif
+}
+
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (reads guest state, writes guest mem) */
+void ppc32g_dirtyhelper_LVS ( VexGuestPPC32State* gst,
+ UInt vD_off, UInt sh, UInt shift_right )
+{
+ static
+ UChar ref[32] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F };
+ U128* pU128_src;
+ U128* pU128_dst;
+
+ vassert( vD_off <= sizeof(VexGuestPPC32State)-8 );
+ vassert( sh <= 15 );
+ vassert( shift_right <= 1 );
+ if (shift_right)
+ sh = 16-sh;
+ /* else shift left */
+
+ pU128_src = (U128*)&ref[sh];
+ pU128_dst = (U128*)( ((UChar*)gst) + vD_off );
+
+ (*pU128_dst)[0] = (*pU128_src)[0];
+ (*pU128_dst)[1] = (*pU128_src)[1];
+ (*pU128_dst)[2] = (*pU128_src)[2];
+ (*pU128_dst)[3] = (*pU128_src)[3];
+}
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (reads guest state, writes guest mem) */
+void ppc64g_dirtyhelper_LVS ( VexGuestPPC64State* gst,
+ UInt vD_off, UInt sh, UInt shift_right )
+{
+ static
+ UChar ref[32] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F };
+ U128* pU128_src;
+ U128* pU128_dst;
+
+ vassert( vD_off <= sizeof(VexGuestPPC64State)-8 );
+ vassert( sh <= 15 );
+ vassert( shift_right <= 1 );
+ if (shift_right)
+ sh = 16-sh;
+ /* else shift left */
+
+ pU128_src = (U128*)&ref[sh];
+ pU128_dst = (U128*)( ((UChar*)gst) + vD_off );
+
+ (*pU128_dst)[0] = (*pU128_src)[0];
+ (*pU128_dst)[1] = (*pU128_src)[1];
+ (*pU128_dst)[2] = (*pU128_src)[2];
+ (*pU128_dst)[3] = (*pU128_src)[3];
+}
+
+
+/* Helper-function specialiser. */
+
+IRExpr* guest_ppc32_spechelper ( HChar* function_name,
+ IRExpr** args,
+ IRStmt** precedingStmts,
+ Int n_precedingStmts )
+{
+ return NULL;
+}
+
+IRExpr* guest_ppc64_spechelper ( HChar* function_name,
+ IRExpr** args,
+ IRStmt** precedingStmts,
+ Int n_precedingStmts )
+{
+ return NULL;
+}
+
+
+/*----------------------------------------------*/
+/*--- The exported fns .. ---*/
+/*----------------------------------------------*/
+
+/* VISIBLE TO LIBVEX CLIENT */
+UInt LibVEX_GuestPPC32_get_CR ( /*IN*/VexGuestPPC32State* vex_state )
+{
+# define FIELD(_n) \
+ ( ( (UInt) \
+ ( (vex_state->guest_CR##_n##_321 & (7<<1)) \
+ | (vex_state->guest_CR##_n##_0 & 1) \
+ ) \
+ ) \
+ << (4 * (7-(_n))) \
+ )
+
+ return
+ FIELD(0) | FIELD(1) | FIELD(2) | FIELD(3)
+ | FIELD(4) | FIELD(5) | FIELD(6) | FIELD(7);
+
+# undef FIELD
+}
+
+
+/* VISIBLE TO LIBVEX CLIENT */
+/* Note: %CR is 32 bits even for ppc64 */
+UInt LibVEX_GuestPPC64_get_CR ( /*IN*/VexGuestPPC64State* vex_state )
+{
+# define FIELD(_n) \
+ ( ( (UInt) \
+ ( (vex_state->guest_CR##_n##_321 & (7<<1)) \
+ | (vex_state->guest_CR##_n##_0 & 1) \
+ ) \
+ ) \
+ << (4 * (7-(_n))) \
+ )
+
+ return
+ FIELD(0) | FIELD(1) | FIELD(2) | FIELD(3)
+ | FIELD(4) | FIELD(5) | FIELD(6) | FIELD(7);
+
+# undef FIELD
+}
+
+
+/* VISIBLE TO LIBVEX CLIENT */
+void LibVEX_GuestPPC32_put_CR ( UInt cr_native,
+ /*OUT*/VexGuestPPC32State* vex_state )
+{
+ UInt t;
+
+# define FIELD(_n) \
+ do { \
+ t = cr_native >> (4*(7-(_n))); \
+ vex_state->guest_CR##_n##_0 = toUChar(t & 1); \
+ vex_state->guest_CR##_n##_321 = toUChar(t & (7<<1)); \
+ } while (0)
+
+ FIELD(0);
+ FIELD(1);
+ FIELD(2);
+ FIELD(3);
+ FIELD(4);
+ FIELD(5);
+ FIELD(6);
+ FIELD(7);
+
+# undef FIELD
+}
+
+
+/* VISIBLE TO LIBVEX CLIENT */
+/* Note: %CR is 32 bits even for ppc64 */
+void LibVEX_GuestPPC64_put_CR ( UInt cr_native,
+ /*OUT*/VexGuestPPC64State* vex_state )
+{
+ UInt t;
+
+# define FIELD(_n) \
+ do { \
+ t = cr_native >> (4*(7-(_n))); \
+ vex_state->guest_CR##_n##_0 = toUChar(t & 1); \
+ vex_state->guest_CR##_n##_321 = toUChar(t & (7<<1)); \
+ } while (0)
+
+ FIELD(0);
+ FIELD(1);
+ FIELD(2);
+ FIELD(3);
+ FIELD(4);
+ FIELD(5);
+ FIELD(6);
+ FIELD(7);
+
+# undef FIELD
+}
+
+
+/* VISIBLE TO LIBVEX CLIENT */
+UInt LibVEX_GuestPPC32_get_XER ( /*IN*/VexGuestPPC32State* vex_state )
+{
+ UInt w = 0;
+ w |= ( ((UInt)vex_state->guest_XER_BC) & 0xFF );
+ w |= ( (((UInt)vex_state->guest_XER_SO) & 0x1) << 31 );
+ w |= ( (((UInt)vex_state->guest_XER_OV) & 0x1) << 30 );
+ w |= ( (((UInt)vex_state->guest_XER_CA) & 0x1) << 29 );
+ return w;
+}
+
+
+/* VISIBLE TO LIBVEX CLIENT */
+/* Note: %XER is 32 bits even for ppc64 */
+UInt LibVEX_GuestPPC64_get_XER ( /*IN*/VexGuestPPC64State* vex_state )
+{
+ UInt w = 0;
+ w |= ( ((UInt)vex_state->guest_XER_BC) & 0xFF );
+ w |= ( (((UInt)vex_state->guest_XER_SO) & 0x1) << 31 );
+ w |= ( (((UInt)vex_state->guest_XER_OV) & 0x1) << 30 );
+ w |= ( (((UInt)vex_state->guest_XER_CA) & 0x1) << 29 );
+ return w;
+}
+
+
+/* VISIBLE TO LIBVEX CLIENT */
+void LibVEX_GuestPPC32_put_XER ( UInt xer_native,
+ /*OUT*/VexGuestPPC32State* vex_state )
+{
+ vex_state->guest_XER_BC = toUChar(xer_native & 0xFF);
+ vex_state->guest_XER_SO = toUChar((xer_native >> 31) & 0x1);
+ vex_state->guest_XER_OV = toUChar((xer_native >> 30) & 0x1);
+ vex_state->guest_XER_CA = toUChar((xer_native >> 29) & 0x1);
+}
+
+/* VISIBLE TO LIBVEX CLIENT */
+/* Note: %XER is 32 bits even for ppc64 */
+void LibVEX_GuestPPC64_put_XER ( UInt xer_native,
+ /*OUT*/VexGuestPPC64State* vex_state )
+{
+ vex_state->guest_XER_BC = toUChar(xer_native & 0xFF);
+ vex_state->guest_XER_SO = toUChar((xer_native >> 31) & 0x1);
+ vex_state->guest_XER_OV = toUChar((xer_native >> 30) & 0x1);
+ vex_state->guest_XER_CA = toUChar((xer_native >> 29) & 0x1);
+}
+
+/* VISIBLE TO LIBVEX CLIENT */
+void LibVEX_GuestPPC32_initialise ( /*OUT*/VexGuestPPC32State* vex_state )
+{
+ Int i;
+ vex_state->guest_GPR0 = 0;
+ vex_state->guest_GPR1 = 0;
+ vex_state->guest_GPR2 = 0;
+ vex_state->guest_GPR3 = 0;
+ vex_state->guest_GPR4 = 0;
+ vex_state->guest_GPR5 = 0;
+ vex_state->guest_GPR6 = 0;
+ vex_state->guest_GPR7 = 0;
+ vex_state->guest_GPR8 = 0;
+ vex_state->guest_GPR9 = 0;
+ vex_state->guest_GPR10 = 0;
+ vex_state->guest_GPR11 = 0;
+ vex_state->guest_GPR12 = 0;
+ vex_state->guest_GPR13 = 0;
+ vex_state->guest_GPR14 = 0;
+ vex_state->guest_GPR15 = 0;
+ vex_state->guest_GPR16 = 0;
+ vex_state->guest_GPR17 = 0;
+ vex_state->guest_GPR18 = 0;
+ vex_state->guest_GPR19 = 0;
+ vex_state->guest_GPR20 = 0;
+ vex_state->guest_GPR21 = 0;
+ vex_state->guest_GPR22 = 0;
+ vex_state->guest_GPR23 = 0;
+ vex_state->guest_GPR24 = 0;
+ vex_state->guest_GPR25 = 0;
+ vex_state->guest_GPR26 = 0;
+ vex_state->guest_GPR27 = 0;
+ vex_state->guest_GPR28 = 0;
+ vex_state->guest_GPR29 = 0;
+ vex_state->guest_GPR30 = 0;
+ vex_state->guest_GPR31 = 0;
+
+ vex_state->guest_FPR0 = 0;
+ vex_state->guest_FPR1 = 0;
+ vex_state->guest_FPR2 = 0;
+ vex_state->guest_FPR3 = 0;
+ vex_state->guest_FPR4 = 0;
+ vex_state->guest_FPR5 = 0;
+ vex_state->guest_FPR6 = 0;
+ vex_state->guest_FPR7 = 0;
+ vex_state->guest_FPR8 = 0;
+ vex_state->guest_FPR9 = 0;
+ vex_state->guest_FPR10 = 0;
+ vex_state->guest_FPR11 = 0;
+ vex_state->guest_FPR12 = 0;
+ vex_state->guest_FPR13 = 0;
+ vex_state->guest_FPR14 = 0;
+ vex_state->guest_FPR15 = 0;
+ vex_state->guest_FPR16 = 0;
+ vex_state->guest_FPR17 = 0;
+ vex_state->guest_FPR18 = 0;
+ vex_state->guest_FPR19 = 0;
+ vex_state->guest_FPR20 = 0;
+ vex_state->guest_FPR21 = 0;
+ vex_state->guest_FPR22 = 0;
+ vex_state->guest_FPR23 = 0;
+ vex_state->guest_FPR24 = 0;
+ vex_state->guest_FPR25 = 0;
+ vex_state->guest_FPR26 = 0;
+ vex_state->guest_FPR27 = 0;
+ vex_state->guest_FPR28 = 0;
+ vex_state->guest_FPR29 = 0;
+ vex_state->guest_FPR30 = 0;
+ vex_state->guest_FPR31 = 0;
+
+ /* Initialise the vector state. */
+# define VECZERO(_vr) _vr[0]=_vr[1]=_vr[2]=_vr[3] = 0;
+
+ VECZERO(vex_state->guest_VR0 );
+ VECZERO(vex_state->guest_VR1 );
+ VECZERO(vex_state->guest_VR2 );
+ VECZERO(vex_state->guest_VR3 );
+ VECZERO(vex_state->guest_VR4 );
+ VECZERO(vex_state->guest_VR5 );
+ VECZERO(vex_state->guest_VR6 );
+ VECZERO(vex_state->guest_VR7 );
+ VECZERO(vex_state->guest_VR8 );
+ VECZERO(vex_state->guest_VR9 );
+ VECZERO(vex_state->guest_VR10);
+ VECZERO(vex_state->guest_VR11);
+ VECZERO(vex_state->guest_VR12);
+ VECZERO(vex_state->guest_VR13);
+ VECZERO(vex_state->guest_VR14);
+ VECZERO(vex_state->guest_VR15);
+ VECZERO(vex_state->guest_VR16);
+ VECZERO(vex_state->guest_VR17);
+ VECZERO(vex_state->guest_VR18);
+ VECZERO(vex_state->guest_VR19);
+ VECZERO(vex_state->guest_VR20);
+ VECZERO(vex_state->guest_VR21);
+ VECZERO(vex_state->guest_VR22);
+ VECZERO(vex_state->guest_VR23);
+ VECZERO(vex_state->guest_VR24);
+ VECZERO(vex_state->guest_VR25);
+ VECZERO(vex_state->guest_VR26);
+ VECZERO(vex_state->guest_VR27);
+ VECZERO(vex_state->guest_VR28);
+ VECZERO(vex_state->guest_VR29);
+ VECZERO(vex_state->guest_VR30);
+ VECZERO(vex_state->guest_VR31);
+
+# undef VECZERO
+
+ vex_state->guest_CIA = 0;
+ vex_state->guest_LR = 0;
+ vex_state->guest_CTR = 0;
+
+ vex_state->guest_XER_SO = 0;
+ vex_state->guest_XER_OV = 0;
+ vex_state->guest_XER_CA = 0;
+ vex_state->guest_XER_BC = 0;
+
+ vex_state->guest_CR0_321 = 0;
+ vex_state->guest_CR0_0 = 0;
+ vex_state->guest_CR1_321 = 0;
+ vex_state->guest_CR1_0 = 0;
+ vex_state->guest_CR2_321 = 0;
+ vex_state->guest_CR2_0 = 0;
+ vex_state->guest_CR3_321 = 0;
+ vex_state->guest_CR3_0 = 0;
+ vex_state->guest_CR4_321 = 0;
+ vex_state->guest_CR4_0 = 0;
+ vex_state->guest_CR5_321 = 0;
+ vex_state->guest_CR5_0 = 0;
+ vex_state->guest_CR6_321 = 0;
+ vex_state->guest_CR6_0 = 0;
+ vex_state->guest_CR7_321 = 0;
+ vex_state->guest_CR7_0 = 0;
+
+ vex_state->guest_FPROUND = (UInt)PPCrm_NEAREST;
+
+ vex_state->guest_VRSAVE = 0;
+
+ vex_state->guest_VSCR = 0x0; // Non-Java mode = 0
+
+ vex_state->guest_EMWARN = EmWarn_NONE;
+
+ vex_state->guest_TISTART = 0;
+ vex_state->guest_TILEN = 0;
+
+ vex_state->guest_NRADDR = 0;
+ vex_state->guest_NRADDR_GPR2 = 0;
+
+ vex_state->guest_REDIR_SP = -1;
+ for (i = 0; i < VEX_GUEST_PPC32_REDIR_STACK_SIZE; i++)
+ vex_state->guest_REDIR_STACK[i] = 0;
+
+ vex_state->guest_IP_AT_SYSCALL = 0;
+ vex_state->guest_SPRG3_RO = 0;
+}
+
+
+/* VISIBLE TO LIBVEX CLIENT */
+void LibVEX_GuestPPC64_initialise ( /*OUT*/VexGuestPPC64State* vex_state )
+{
+ Int i;
+ vex_state->guest_GPR0 = 0;
+ vex_state->guest_GPR1 = 0;
+ vex_state->guest_GPR2 = 0;
+ vex_state->guest_GPR3 = 0;
+ vex_state->guest_GPR4 = 0;
+ vex_state->guest_GPR5 = 0;
+ vex_state->guest_GPR6 = 0;
+ vex_state->guest_GPR7 = 0;
+ vex_state->guest_GPR8 = 0;
+ vex_state->guest_GPR9 = 0;
+ vex_state->guest_GPR10 = 0;
+ vex_state->guest_GPR11 = 0;
+ vex_state->guest_GPR12 = 0;
+ vex_state->guest_GPR13 = 0;
+ vex_state->guest_GPR14 = 0;
+ vex_state->guest_GPR15 = 0;
+ vex_state->guest_GPR16 = 0;
+ vex_state->guest_GPR17 = 0;
+ vex_state->guest_GPR18 = 0;
+ vex_state->guest_GPR19 = 0;
+ vex_state->guest_GPR20 = 0;
+ vex_state->guest_GPR21 = 0;
+ vex_state->guest_GPR22 = 0;
+ vex_state->guest_GPR23 = 0;
+ vex_state->guest_GPR24 = 0;
+ vex_state->guest_GPR25 = 0;
+ vex_state->guest_GPR26 = 0;
+ vex_state->guest_GPR27 = 0;
+ vex_state->guest_GPR28 = 0;
+ vex_state->guest_GPR29 = 0;
+ vex_state->guest_GPR30 = 0;
+ vex_state->guest_GPR31 = 0;
+
+ vex_state->guest_FPR0 = 0;
+ vex_state->guest_FPR1 = 0;
+ vex_state->guest_FPR2 = 0;
+ vex_state->guest_FPR3 = 0;
+ vex_state->guest_FPR4 = 0;
+ vex_state->guest_FPR5 = 0;
+ vex_state->guest_FPR6 = 0;
+ vex_state->guest_FPR7 = 0;
+ vex_state->guest_FPR8 = 0;
+ vex_state->guest_FPR9 = 0;
+ vex_state->guest_FPR10 = 0;
+ vex_state->guest_FPR11 = 0;
+ vex_state->guest_FPR12 = 0;
+ vex_state->guest_FPR13 = 0;
+ vex_state->guest_FPR14 = 0;
+ vex_state->guest_FPR15 = 0;
+ vex_state->guest_FPR16 = 0;
+ vex_state->guest_FPR17 = 0;
+ vex_state->guest_FPR18 = 0;
+ vex_state->guest_FPR19 = 0;
+ vex_state->guest_FPR20 = 0;
+ vex_state->guest_FPR21 = 0;
+ vex_state->guest_FPR22 = 0;
+ vex_state->guest_FPR23 = 0;
+ vex_state->guest_FPR24 = 0;
+ vex_state->guest_FPR25 = 0;
+ vex_state->guest_FPR26 = 0;
+ vex_state->guest_FPR27 = 0;
+ vex_state->guest_FPR28 = 0;
+ vex_state->guest_FPR29 = 0;
+ vex_state->guest_FPR30 = 0;
+ vex_state->guest_FPR31 = 0;
+
+ /* Initialise the vector state. */
+# define VECZERO(_vr) _vr[0]=_vr[1]=_vr[2]=_vr[3] = 0;
+
+ VECZERO(vex_state->guest_VR0 );
+ VECZERO(vex_state->guest_VR1 );
+ VECZERO(vex_state->guest_VR2 );
+ VECZERO(vex_state->guest_VR3 );
+ VECZERO(vex_state->guest_VR4 );
+ VECZERO(vex_state->guest_VR5 );
+ VECZERO(vex_state->guest_VR6 );
+ VECZERO(vex_state->guest_VR7 );
+ VECZERO(vex_state->guest_VR8 );
+ VECZERO(vex_state->guest_VR9 );
+ VECZERO(vex_state->guest_VR10);
+ VECZERO(vex_state->guest_VR11);
+ VECZERO(vex_state->guest_VR12);
+ VECZERO(vex_state->guest_VR13);
+ VECZERO(vex_state->guest_VR14);
+ VECZERO(vex_state->guest_VR15);
+ VECZERO(vex_state->guest_VR16);
+ VECZERO(vex_state->guest_VR17);
+ VECZERO(vex_state->guest_VR18);
+ VECZERO(vex_state->guest_VR19);
+ VECZERO(vex_state->guest_VR20);
+ VECZERO(vex_state->guest_VR21);
+ VECZERO(vex_state->guest_VR22);
+ VECZERO(vex_state->guest_VR23);
+ VECZERO(vex_state->guest_VR24);
+ VECZERO(vex_state->guest_VR25);
+ VECZERO(vex_state->guest_VR26);
+ VECZERO(vex_state->guest_VR27);
+ VECZERO(vex_state->guest_VR28);
+ VECZERO(vex_state->guest_VR29);
+ VECZERO(vex_state->guest_VR30);
+ VECZERO(vex_state->guest_VR31);
+
+# undef VECZERO
+
+ vex_state->guest_CIA = 0;
+ vex_state->guest_LR = 0;
+ vex_state->guest_CTR = 0;
+
+ vex_state->guest_XER_SO = 0;
+ vex_state->guest_XER_OV = 0;
+ vex_state->guest_XER_CA = 0;
+ vex_state->guest_XER_BC = 0;
+
+ vex_state->guest_CR0_321 = 0;
+ vex_state->guest_CR0_0 = 0;
+ vex_state->guest_CR1_321 = 0;
+ vex_state->guest_CR1_0 = 0;
+ vex_state->guest_CR2_321 = 0;
+ vex_state->guest_CR2_0 = 0;
+ vex_state->guest_CR3_321 = 0;
+ vex_state->guest_CR3_0 = 0;
+ vex_state->guest_CR4_321 = 0;
+ vex_state->guest_CR4_0 = 0;
+ vex_state->guest_CR5_321 = 0;
+ vex_state->guest_CR5_0 = 0;
+ vex_state->guest_CR6_321 = 0;
+ vex_state->guest_CR6_0 = 0;
+ vex_state->guest_CR7_321 = 0;
+ vex_state->guest_CR7_0 = 0;
+
+ vex_state->guest_FPROUND = (UInt)PPCrm_NEAREST;
+
+ vex_state->guest_VRSAVE = 0;
+
+ vex_state->guest_VSCR = 0x0; // Non-Java mode = 0
+
+ vex_state->guest_EMWARN = EmWarn_NONE;
+
+ vex_state->padding = 0;
+
+ vex_state->guest_TISTART = 0;
+ vex_state->guest_TILEN = 0;
+
+ vex_state->guest_NRADDR = 0;
+ vex_state->guest_NRADDR_GPR2 = 0;
+
+ vex_state->guest_REDIR_SP = -1;
+ for (i = 0; i < VEX_GUEST_PPC64_REDIR_STACK_SIZE; i++)
+ vex_state->guest_REDIR_STACK[i] = 0;
+
+ vex_state->guest_IP_AT_SYSCALL = 0;
+ vex_state->guest_SPRG3_RO = 0;
+
+ vex_state->padding2 = 0;
+}
+
+
+/*-----------------------------------------------------------*/
+/*--- Describing the ppc guest state, for the benefit ---*/
+/*--- of iropt and instrumenters. ---*/
+/*-----------------------------------------------------------*/
+
+/* Figure out if any part of the guest state contained in minoff
+ .. maxoff requires precise memory exceptions. If in doubt return
+ True (but this is generates significantly slower code).
+
+ By default we enforce precise exns for guest R1 (stack pointer),
+ CIA (current insn address) and LR (link register). These are the
+ minimum needed to extract correct stack backtraces from ppc
+ code. [[NB: not sure if keeping LR up to date is actually
+ necessary.]]
+*/
+Bool guest_ppc32_state_requires_precise_mem_exns ( Int minoff,
+ Int maxoff )
+{
+ Int lr_min = offsetof(VexGuestPPC32State, guest_LR);
+ Int lr_max = lr_min + 4 - 1;
+ Int r1_min = offsetof(VexGuestPPC32State, guest_GPR1);
+ Int r1_max = r1_min + 4 - 1;
+ Int cia_min = offsetof(VexGuestPPC32State, guest_CIA);
+ Int cia_max = cia_min + 4 - 1;
+
+ if (maxoff < lr_min || minoff > lr_max) {
+ /* no overlap with LR */
+ } else {
+ return True;
+ }
+
+ if (maxoff < r1_min || minoff > r1_max) {
+ /* no overlap with R1 */
+ } else {
+ return True;
+ }
+
+ if (maxoff < cia_min || minoff > cia_max) {
+ /* no overlap with CIA */
+ } else {
+ return True;
+ }
+
+ return False;
+}
+
+Bool guest_ppc64_state_requires_precise_mem_exns ( Int minoff,
+ Int maxoff )
+{
+ /* Given that R2 is a Big Deal in the ELF ppc64 ABI, it seems
+ prudent to be conservative with it, even though thus far there
+ is no evidence to suggest that it actually needs to be kept up
+ to date wrt possible exceptions. */
+ Int lr_min = offsetof(VexGuestPPC64State, guest_LR);
+ Int lr_max = lr_min + 8 - 1;
+ Int r1_min = offsetof(VexGuestPPC64State, guest_GPR1);
+ Int r1_max = r1_min + 8 - 1;
+ Int r2_min = offsetof(VexGuestPPC64State, guest_GPR2);
+ Int r2_max = r2_min + 8 - 1;
+ Int cia_min = offsetof(VexGuestPPC64State, guest_CIA);
+ Int cia_max = cia_min + 8 - 1;
+
+ if (maxoff < lr_min || minoff > lr_max) {
+ /* no overlap with LR */
+ } else {
+ return True;
+ }
+
+ if (maxoff < r1_min || minoff > r1_max) {
+ /* no overlap with R1 */
+ } else {
+ return True;
+ }
+
+ if (maxoff < r2_min || minoff > r2_max) {
+ /* no overlap with R2 */
+ } else {
+ return True;
+ }
+
+ if (maxoff < cia_min || minoff > cia_max) {
+ /* no overlap with CIA */
+ } else {
+ return True;
+ }
+
+ return False;
+}
+
+
+#define ALWAYSDEFD32(field) \
+ { offsetof(VexGuestPPC32State, field), \
+ (sizeof ((VexGuestPPC32State*)0)->field) }
+
+VexGuestLayout
+ ppc32Guest_layout
+ = {
+ /* Total size of the guest state, in bytes. */
+ .total_sizeB = sizeof(VexGuestPPC32State),
+
+ /* Describe the stack pointer. */
+ .offset_SP = offsetof(VexGuestPPC32State,guest_GPR1),
+ .sizeof_SP = 4,
+
+ /* Describe the frame pointer. */
+ .offset_FP = offsetof(VexGuestPPC32State,guest_GPR1),
+ .sizeof_FP = 4,
+
+ /* Describe the instruction pointer. */
+ .offset_IP = offsetof(VexGuestPPC32State,guest_CIA),
+ .sizeof_IP = 4,
+
+ /* Describe any sections to be regarded by Memcheck as
+ 'always-defined'. */
+ .n_alwaysDefd = 11,
+
+ .alwaysDefd
+ = { /* 0 */ ALWAYSDEFD32(guest_CIA),
+ /* 1 */ ALWAYSDEFD32(guest_EMWARN),
+ /* 2 */ ALWAYSDEFD32(guest_TISTART),
+ /* 3 */ ALWAYSDEFD32(guest_TILEN),
+ /* 4 */ ALWAYSDEFD32(guest_VSCR),
+ /* 5 */ ALWAYSDEFD32(guest_FPROUND),
+ /* 6 */ ALWAYSDEFD32(guest_NRADDR),
+ /* 7 */ ALWAYSDEFD32(guest_NRADDR_GPR2),
+ /* 8 */ ALWAYSDEFD32(guest_REDIR_SP),
+ /* 9 */ ALWAYSDEFD32(guest_REDIR_STACK),
+ /* 10 */ ALWAYSDEFD32(guest_IP_AT_SYSCALL)
+ }
+ };
+
+#define ALWAYSDEFD64(field) \
+ { offsetof(VexGuestPPC64State, field), \
+ (sizeof ((VexGuestPPC64State*)0)->field) }
+
+VexGuestLayout
+ ppc64Guest_layout
+ = {
+ /* Total size of the guest state, in bytes. */
+ .total_sizeB = sizeof(VexGuestPPC64State),
+
+ /* Describe the stack pointer. */
+ .offset_SP = offsetof(VexGuestPPC64State,guest_GPR1),
+ .sizeof_SP = 8,
+
+ /* Describe the frame pointer. */
+ .offset_FP = offsetof(VexGuestPPC64State,guest_GPR1),
+ .sizeof_FP = 8,
+
+ /* Describe the instruction pointer. */
+ .offset_IP = offsetof(VexGuestPPC64State,guest_CIA),
+ .sizeof_IP = 8,
+
+ /* Describe any sections to be regarded by Memcheck as
+ 'always-defined'. */
+ .n_alwaysDefd = 11,
+
+ .alwaysDefd
+ = { /* 0 */ ALWAYSDEFD64(guest_CIA),
+ /* 1 */ ALWAYSDEFD64(guest_EMWARN),
+ /* 2 */ ALWAYSDEFD64(guest_TISTART),
+ /* 3 */ ALWAYSDEFD64(guest_TILEN),
+ /* 4 */ ALWAYSDEFD64(guest_VSCR),
+ /* 5 */ ALWAYSDEFD64(guest_FPROUND),
+ /* 6 */ ALWAYSDEFD64(guest_NRADDR),
+ /* 7 */ ALWAYSDEFD64(guest_NRADDR_GPR2),
+ /* 8 */ ALWAYSDEFD64(guest_REDIR_SP),
+ /* 9 */ ALWAYSDEFD64(guest_REDIR_STACK),
+ /* 10 */ ALWAYSDEFD64(guest_IP_AT_SYSCALL)
+ }
+ };
+
+/*---------------------------------------------------------------*/
+/*--- end guest_ppc_helpers.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
new file mode 100644
index 0000000..f8d220d
--- /dev/null
+++ b/VEX/priv/guest_ppc_toIR.c
@@ -0,0 +1,10224 @@
+
+/*--------------------------------------------------------------------*/
+/*--- begin guest_ppc_toIR.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+/* TODO 18/Nov/05:
+
+ Spot rld... cases which are simply left/right shifts and emit
+ Shl64/Shr64 accordingly.
+
+ Altivec
+ - datastream insns
+ - lvxl,stvxl: load/store with 'least recently used' hint
+ - vexptefp, vlogefp
+
+ LIMITATIONS:
+
+ Various, including:
+
+ - Some invalid forms of lswi and lswx are accepted when they should
+ not be.
+
+ - Floating Point:
+ - All exceptions disabled in FPSCR
+ - condition codes not set in FPSCR
+
+ - Altivec floating point:
+ - vmaddfp, vnmsubfp
+ Because we're using Java/IEEE mode (FPSCR[NJ]), rather than the
+ system default of Non-Java mode, we get some small errors
+ (lowest bit only).
+ This is because Non-Java mode brutally hacks denormalised results
+ to zero, whereas we keep maximum accuracy. However, using
+ Non-Java mode would give us more inaccuracy, as our intermediate
+ results would then be zeroed, too.
+
+ - AbiHints for the stack red zone are only emitted for
+ unconditional calls and returns (bl, blr). They should also be
+ emitted for conditional calls and returns, but we don't have a
+ way to express that right now. Ah well.
+*/
+
+/* "Special" instructions.
+
+ This instruction decoder can decode four special instructions
+ which mean nothing natively (are no-ops as far as regs/mem are
+ concerned) but have meaning for supporting Valgrind. A special
+ instruction is flagged by a 16-byte preamble:
+
+ 32-bit mode: 54001800 54006800 5400E800 54009800
+ (rlwinm 0,0,3,0,0; rlwinm 0,0,13,0,0;
+ rlwinm 0,0,29,0,0; rlwinm 0,0,19,0,0)
+
+ 64-bit mode: 78001800 78006800 7800E802 78009802
+ (rotldi 0,0,3; rotldi 0,0,13;
+ rotldi 0,0,61; rotldi 0,0,51)
+
+ Following that, one of the following 3 are allowed
+ (standard interpretation in parentheses):
+
+ 7C210B78 (or 1,1,1) %R3 = client_request ( %R4 )
+ 7C421378 (or 2,2,2) %R3 = guest_NRADDR
+ 7C631B78 (or 3,3,3) branch-and-link-to-noredir %R11
+ 7C842378 (or 4,4,4) %R3 = guest_NRADDR_GPR2
+
+ Any other bytes following the 16-byte preamble are illegal and
+ constitute a failure in instruction decoding. This all assumes
+ that the preamble will never occur except in specific code
+ fragments designed for Valgrind to catch.
+*/
+
+
+/* Translates PPC32/64 code to IR. */
+
+/* References
+
+#define PPC32
+ "PowerPC Microprocessor Family:
+ The Programming Environments Manual for 32-Bit Microprocessors"
+ 02/21/2000
+ http://www-3.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF778525699600719DF2
+
+#define PPC64
+ "PowerPC Microprocessor Family:
+ Programming Environments Manual for 64-Bit Microprocessors"
+ 06/10/2003
+ http://www-3.ibm.com/chips/techlib/techlib.nsf/techdocs/F7E732FF811F783187256FDD004D3797
+
+#define AV
+ "PowerPC Microprocessor Family:
+ AltiVec(TM) Technology Programming Environments Manual"
+ 07/10/2003
+ http://www-3.ibm.com/chips/techlib/techlib.nsf/techdocs/FBFA164F824370F987256D6A006F424D
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+#include "libvex_guest_ppc32.h"
+#include "libvex_guest_ppc64.h"
+
+#include "main_util.h"
+#include "main_globals.h"
+#include "guest_generic_bb_to_IR.h"
+#include "guest_ppc_defs.h"
+
+
+/*------------------------------------------------------------*/
+/*--- Globals ---*/
+/*------------------------------------------------------------*/
+
+/* These are set at the start of the translation of an insn, right
+ down in disInstr_PPC, so that we don't have to pass them around
+ endlessly. They are all constant during the translation of any
+ given insn. */
+
+/* We need to know this to do sub-register accesses correctly. */
+static Bool host_is_bigendian;
+
+/* Pointer to the guest code area. */
+static UChar* guest_code;
+
+/* The guest address corresponding to guest_code[0]. */
+static Addr64 guest_CIA_bbstart;
+
+/* The guest address for the instruction currently being
+ translated. */
+static Addr64 guest_CIA_curr_instr;
+
+/* The IRSB* into which we're generating code. */
+static IRSB* irsb;
+
+/* Is our guest binary 32 or 64bit? Set at each call to
+ disInstr_PPC below. */
+static Bool mode64 = False;
+
+// Given a pointer to a function as obtained by "& functionname" in C,
+// produce a pointer to the actual entry point for the function. For
+// most platforms it's the identity function. Unfortunately, on
+// ppc64-linux it isn't (sigh) and ditto for ppc32-aix5 and
+// ppc64-aix5.
+static void* fnptr_to_fnentry( VexAbiInfo* vbi, void* f )
+{
+ if (vbi->host_ppc_calls_use_fndescrs) {
+ /* f is a pointer to a 3-word function descriptor, of which the
+ first word is the entry address. */
+ /* note, this is correct even with cross-jitting, since this is
+ purely a host issue, not a guest one. */
+ HWord* fdescr = (HWord*)f;
+ return (void*)(fdescr[0]);
+ } else {
+ /* Simple; "& f" points directly at the code for f. */
+ return f;
+ }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Debugging output ---*/
+/*------------------------------------------------------------*/
+
+#define DIP(format, args...) \
+ if (vex_traceflags & VEX_TRACE_FE) \
+ vex_printf(format, ## args)
+
+#define DIS(buf, format, args...) \
+ if (vex_traceflags & VEX_TRACE_FE) \
+ vex_sprintf(buf, format, ## args)
+
+
+/*------------------------------------------------------------*/
+/*--- Offsets of various parts of the ppc32/64 guest state ---*/
+/*------------------------------------------------------------*/
+
+#define offsetofPPCGuestState(_x) \
+ (mode64 ? offsetof(VexGuestPPC64State, _x) : \
+ offsetof(VexGuestPPC32State, _x))
+
+#define OFFB_CIA offsetofPPCGuestState(guest_CIA)
+#define OFFB_IP_AT_SYSCALL offsetofPPCGuestState(guest_IP_AT_SYSCALL)
+#define OFFB_SPRG3_RO offsetofPPCGuestState(guest_SPRG3_RO)
+#define OFFB_LR offsetofPPCGuestState(guest_LR)
+#define OFFB_CTR offsetofPPCGuestState(guest_CTR)
+#define OFFB_XER_SO offsetofPPCGuestState(guest_XER_SO)
+#define OFFB_XER_OV offsetofPPCGuestState(guest_XER_OV)
+#define OFFB_XER_CA offsetofPPCGuestState(guest_XER_CA)
+#define OFFB_XER_BC offsetofPPCGuestState(guest_XER_BC)
+#define OFFB_FPROUND offsetofPPCGuestState(guest_FPROUND)
+#define OFFB_VRSAVE offsetofPPCGuestState(guest_VRSAVE)
+#define OFFB_VSCR offsetofPPCGuestState(guest_VSCR)
+#define OFFB_EMWARN offsetofPPCGuestState(guest_EMWARN)
+#define OFFB_TISTART offsetofPPCGuestState(guest_TISTART)
+#define OFFB_TILEN offsetofPPCGuestState(guest_TILEN)
+#define OFFB_NRADDR offsetofPPCGuestState(guest_NRADDR)
+#define OFFB_NRADDR_GPR2 offsetofPPCGuestState(guest_NRADDR_GPR2)
+
+
+/*------------------------------------------------------------*/
+/*--- Extract instruction fields --- */
+/*------------------------------------------------------------*/
+
+/* Extract field from insn, given idx (zero = lsb) and field length */
+#define IFIELD( insn, idx, len ) ((insn >> idx) & ((1<<len)-1))
+
+/* Extract primary opcode, instr[31:26] */
+static UChar ifieldOPC( UInt instr ) {
+ return toUChar( IFIELD( instr, 26, 6 ) );
+}
+
+/* Extract 10-bit secondary opcode, instr[10:1] */
+static UInt ifieldOPClo10 ( UInt instr) {
+ return IFIELD( instr, 1, 10 );
+}
+
+/* Extract 9-bit secondary opcode, instr[9:1] */
+static UInt ifieldOPClo9 ( UInt instr) {
+ return IFIELD( instr, 1, 9 );
+}
+
+/* Extract 5-bit secondary opcode, instr[5:1] */
+static UInt ifieldOPClo5 ( UInt instr) {
+ return IFIELD( instr, 1, 5 );
+}
+
+/* Extract RD (destination register) field, instr[25:21] */
+static UChar ifieldRegDS( UInt instr ) {
+ return toUChar( IFIELD( instr, 21, 5 ) );
+}
+
+/* Extract RA (1st source register) field, instr[20:16] */
+static UChar ifieldRegA ( UInt instr ) {
+ return toUChar( IFIELD( instr, 16, 5 ) );
+}
+
+/* Extract RB (2nd source register) field, instr[15:11] */
+static UChar ifieldRegB ( UInt instr ) {
+ return toUChar( IFIELD( instr, 11, 5 ) );
+}
+
+/* Extract RC (3rd source register) field, instr[10:6] */
+static UChar ifieldRegC ( UInt instr ) {
+ return toUChar( IFIELD( instr, 6, 5 ) );
+}
+
+/* Extract 2nd lowest bit, instr[1] */
+static UChar ifieldBIT10 ( UInt instr ) {
+ return toUChar( IFIELD( instr, 10, 1 ) );
+}
+
+/* Extract 2nd lowest bit, instr[1] */
+static UChar ifieldBIT1 ( UInt instr ) {
+ return toUChar( IFIELD( instr, 1, 1 ) );
+}
+
+/* Extract lowest bit, instr[0] */
+static UChar ifieldBIT0 ( UInt instr ) {
+ return toUChar( instr & 0x1 );
+}
+
+/* Extract unsigned bottom half, instr[15:0] */
+static UInt ifieldUIMM16 ( UInt instr ) {
+ return instr & 0xFFFF;
+}
+
+/* Extract unsigned bottom 26 bits, instr[25:0] */
+static UInt ifieldUIMM26 ( UInt instr ) {
+ return instr & 0x3FFFFFF;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Guest-state identifiers ---*/
+/*------------------------------------------------------------*/
+
+typedef enum {
+ PPC_GST_CIA, // Current Instruction Address
+ PPC_GST_LR, // Link Register
+ PPC_GST_CTR, // Count Register
+ PPC_GST_XER, // Overflow, carry flags, byte count
+ PPC_GST_CR, // Condition Register
+ PPC_GST_FPSCR, // Floating Point Status/Control Register
+ PPC_GST_VRSAVE, // Vector Save/Restore Register
+ PPC_GST_VSCR, // Vector Status and Control Register
+ PPC_GST_EMWARN, // Emulation warnings
+ PPC_GST_TISTART,// For icbi: start of area to invalidate
+ PPC_GST_TILEN, // For icbi: length of area to invalidate
+ PPC_GST_IP_AT_SYSCALL, // the CIA of the most recently executed SC insn
+ PPC_GST_SPRG3_RO, // SPRG3
+ PPC_GST_MAX
+} PPC_GST;
+
+#define MASK_FPSCR_RN 0x3
+#define MASK_FPSCR_FPRF 0x1F000
+#define MASK_VSCR_VALID 0x00010001
+
+
+/*------------------------------------------------------------*/
+/*--- FP Helpers ---*/
+/*------------------------------------------------------------*/
+
+/* Produce the 32-bit pattern corresponding to the supplied
+ float. */
+static UInt float_to_bits ( Float f )
+{
+ union { UInt i; Float f; } u;
+ vassert(4 == sizeof(UInt));
+ vassert(4 == sizeof(Float));
+ vassert(4 == sizeof(u));
+ u.f = f;
+ return u.i;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Misc Helpers ---*/
+/*------------------------------------------------------------*/
+
+/* Generate mask with 1's from 'begin' through 'end',
+ wrapping if begin > end.
+ begin->end works from right to left, 0=lsb
+*/
+static UInt MASK32( UInt begin, UInt end )
+{
+ UInt m1, m2, mask;
+ vassert(begin < 32);
+ vassert(end < 32);
+ m1 = ((UInt)(-1)) << begin;
+ m2 = ((UInt)(-1)) << end << 1;
+ mask = m1 ^ m2;
+ if (begin > end) mask = ~mask; // wrap mask
+ return mask;
+}
+
+/* ditto for 64bit mask */
+static ULong MASK64( UInt begin, UInt end )
+{
+ ULong m1, m2, mask;
+ vassert(begin < 64);
+ vassert(end < 64);
+ m1 = ((ULong)(-1)) << begin;
+ m2 = ((ULong)(-1)) << end << 1;
+ mask = m1 ^ m2;
+ if (begin > end) mask = ~mask; // wrap mask
+ return mask;
+}
+
+static Addr64 nextInsnAddr( void )
+{
+ return guest_CIA_curr_instr + 4;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Helper bits and pieces for deconstructing the ---*/
+/*--- ppc32/64 insn stream. ---*/
+/*------------------------------------------------------------*/
+
+/* Add a statement to the list held by "irsb". */
+static void stmt ( IRStmt* st )
+{
+ addStmtToIRSB( irsb, st );
+}
+
+/* Generate a new temporary of the given type. */
+static IRTemp newTemp ( IRType ty )
+{
+ vassert(isPlausibleIRType(ty));
+ return newIRTemp( irsb->tyenv, ty );
+}
+
+/* Various simple conversions */
+
+static UChar extend_s_5to8 ( UChar x )
+{
+ return toUChar((((Int)x) << 27) >> 27);
+}
+
+static UInt extend_s_8to32( UChar x )
+{
+ return (UInt)((((Int)x) << 24) >> 24);
+}
+
+static UInt extend_s_16to32 ( UInt x )
+{
+ return (UInt)((((Int)x) << 16) >> 16);
+}
+
+static ULong extend_s_16to64 ( UInt x )
+{
+ return (ULong)((((Long)x) << 48) >> 48);
+}
+
+static ULong extend_s_26to64 ( UInt x )
+{
+ return (ULong)((((Long)x) << 38) >> 38);
+}
+
+static ULong extend_s_32to64 ( UInt x )
+{
+ return (ULong)((((Long)x) << 32) >> 32);
+}
+
+/* Do a big-endian load of a 32-bit word, regardless of the endianness
+ of the underlying host. */
+static UInt getUIntBigendianly ( UChar* p )
+{
+ UInt w = 0;
+ w = (w << 8) | p[0];
+ w = (w << 8) | p[1];
+ w = (w << 8) | p[2];
+ w = (w << 8) | p[3];
+ return w;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Helpers for constructing IR. ---*/
+/*------------------------------------------------------------*/
+
+static void assign ( IRTemp dst, IRExpr* e )
+{
+ stmt( IRStmt_WrTmp(dst, e) );
+}
+
+/* This generates a normal (non store-conditional) store. */
+static void storeBE ( IRExpr* addr, IRExpr* data )
+{
+ IRType tyA = typeOfIRExpr(irsb->tyenv, addr);
+ vassert(tyA == Ity_I32 || tyA == Ity_I64);
+ stmt( IRStmt_Store(Iend_BE, addr, data) );
+}
+
+static IRExpr* unop ( IROp op, IRExpr* a )
+{
+ return IRExpr_Unop(op, a);
+}
+
+static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
+{
+ return IRExpr_Binop(op, a1, a2);
+}
+
+static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
+{
+ return IRExpr_Triop(op, a1, a2, a3);
+}
+
+static IRExpr* qop ( IROp op, IRExpr* a1, IRExpr* a2,
+ IRExpr* a3, IRExpr* a4 )
+{
+ return IRExpr_Qop(op, a1, a2, a3, a4);
+}
+
+static IRExpr* mkexpr ( IRTemp tmp )
+{
+ return IRExpr_RdTmp(tmp);
+}
+
+static IRExpr* mkU8 ( UChar i )
+{
+ return IRExpr_Const(IRConst_U8(i));
+}
+
+static IRExpr* mkU16 ( UInt i )
+{
+ return IRExpr_Const(IRConst_U16(i));
+}
+
+static IRExpr* mkU32 ( UInt i )
+{
+ return IRExpr_Const(IRConst_U32(i));
+}
+
+static IRExpr* mkU64 ( ULong i )
+{
+ return IRExpr_Const(IRConst_U64(i));
+}
+
+/* This generates a normal (non load-linked) load. */
+static IRExpr* loadBE ( IRType ty, IRExpr* addr )
+{
+ return IRExpr_Load(Iend_BE, ty, addr);
+}
+
+static IRExpr* mkOR1 ( IRExpr* arg1, IRExpr* arg2 )
+{
+ vassert(typeOfIRExpr(irsb->tyenv, arg1) == Ity_I1);
+ vassert(typeOfIRExpr(irsb->tyenv, arg2) == Ity_I1);
+ return unop(Iop_32to1, binop(Iop_Or32, unop(Iop_1Uto32, arg1),
+ unop(Iop_1Uto32, arg2)));
+}
+
+static IRExpr* mkAND1 ( IRExpr* arg1, IRExpr* arg2 )
+{
+ vassert(typeOfIRExpr(irsb->tyenv, arg1) == Ity_I1);
+ vassert(typeOfIRExpr(irsb->tyenv, arg2) == Ity_I1);
+ return unop(Iop_32to1, binop(Iop_And32, unop(Iop_1Uto32, arg1),
+ unop(Iop_1Uto32, arg2)));
+}
+
+/* expand V128_8Ux16 to 2x V128_16Ux8's */
+static void expand8Ux16( IRExpr* vIn,
+ /*OUTs*/ IRTemp* vEvn, IRTemp* vOdd )
+{
+ IRTemp ones8x16 = newTemp(Ity_V128);
+
+ vassert(typeOfIRExpr(irsb->tyenv, vIn) == Ity_V128);
+ vassert(vEvn && *vEvn == IRTemp_INVALID);
+ vassert(vOdd && *vOdd == IRTemp_INVALID);
+ *vEvn = newTemp(Ity_V128);
+ *vOdd = newTemp(Ity_V128);
+
+ assign( ones8x16, unop(Iop_Dup8x16, mkU8(0x1)) );
+ assign( *vOdd, binop(Iop_MullEven8Ux16, mkexpr(ones8x16), vIn) );
+ assign( *vEvn, binop(Iop_MullEven8Ux16, mkexpr(ones8x16),
+ binop(Iop_ShrV128, vIn, mkU8(8))) );
+}
+
+/* expand V128_8Sx16 to 2x V128_16Sx8's */
+static void expand8Sx16( IRExpr* vIn,
+ /*OUTs*/ IRTemp* vEvn, IRTemp* vOdd )
+{
+ IRTemp ones8x16 = newTemp(Ity_V128);
+
+ vassert(typeOfIRExpr(irsb->tyenv, vIn) == Ity_V128);
+ vassert(vEvn && *vEvn == IRTemp_INVALID);
+ vassert(vOdd && *vOdd == IRTemp_INVALID);
+ *vEvn = newTemp(Ity_V128);
+ *vOdd = newTemp(Ity_V128);
+
+ assign( ones8x16, unop(Iop_Dup8x16, mkU8(0x1)) );
+ assign( *vOdd, binop(Iop_MullEven8Sx16, mkexpr(ones8x16), vIn) );
+ assign( *vEvn, binop(Iop_MullEven8Sx16, mkexpr(ones8x16),
+ binop(Iop_ShrV128, vIn, mkU8(8))) );
+}
+
+/* expand V128_16Uto8 to 2x V128_32Ux4's */
+static void expand16Ux8( IRExpr* vIn,
+ /*OUTs*/ IRTemp* vEvn, IRTemp* vOdd )
+{
+ IRTemp ones16x8 = newTemp(Ity_V128);
+
+ vassert(typeOfIRExpr(irsb->tyenv, vIn) == Ity_V128);
+ vassert(vEvn && *vEvn == IRTemp_INVALID);
+ vassert(vOdd && *vOdd == IRTemp_INVALID);
+ *vEvn = newTemp(Ity_V128);
+ *vOdd = newTemp(Ity_V128);
+
+ assign( ones16x8, unop(Iop_Dup16x8, mkU16(0x1)) );
+ assign( *vOdd, binop(Iop_MullEven16Ux8, mkexpr(ones16x8), vIn) );
+ assign( *vEvn, binop(Iop_MullEven16Ux8, mkexpr(ones16x8),
+ binop(Iop_ShrV128, vIn, mkU8(16))) );
+}
+
+/* expand V128_16Sto8 to 2x V128_32Sx4's */
+static void expand16Sx8( IRExpr* vIn,
+ /*OUTs*/ IRTemp* vEvn, IRTemp* vOdd )
+{
+ IRTemp ones16x8 = newTemp(Ity_V128);
+
+ vassert(typeOfIRExpr(irsb->tyenv, vIn) == Ity_V128);
+ vassert(vEvn && *vEvn == IRTemp_INVALID);
+ vassert(vOdd && *vOdd == IRTemp_INVALID);
+ *vEvn = newTemp(Ity_V128);
+ *vOdd = newTemp(Ity_V128);
+
+ assign( ones16x8, unop(Iop_Dup16x8, mkU16(0x1)) );
+ assign( *vOdd, binop(Iop_MullEven16Sx8, mkexpr(ones16x8), vIn) );
+ assign( *vEvn, binop(Iop_MullEven16Sx8, mkexpr(ones16x8),
+ binop(Iop_ShrV128, vIn, mkU8(16))) );
+}
+
+/* break V128 to 4xI32's, then sign-extend to I64's */
+static void breakV128to4x64S( IRExpr* t128,
+ /*OUTs*/
+ IRTemp* t3, IRTemp* t2,
+ IRTemp* t1, IRTemp* t0 )
+{
+ IRTemp hi64 = newTemp(Ity_I64);
+ IRTemp lo64 = newTemp(Ity_I64);
+
+ vassert(typeOfIRExpr(irsb->tyenv, t128) == Ity_V128);
+ vassert(t0 && *t0 == IRTemp_INVALID);
+ vassert(t1 && *t1 == IRTemp_INVALID);
+ vassert(t2 && *t2 == IRTemp_INVALID);
+ vassert(t3 && *t3 == IRTemp_INVALID);
+ *t0 = newTemp(Ity_I64);
+ *t1 = newTemp(Ity_I64);
+ *t2 = newTemp(Ity_I64);
+ *t3 = newTemp(Ity_I64);
+
+ assign( hi64, unop(Iop_V128HIto64, t128) );
+ assign( lo64, unop(Iop_V128to64, t128) );
+ assign( *t3, unop(Iop_32Sto64, unop(Iop_64HIto32, mkexpr(hi64))) );
+ assign( *t2, unop(Iop_32Sto64, unop(Iop_64to32, mkexpr(hi64))) );
+ assign( *t1, unop(Iop_32Sto64, unop(Iop_64HIto32, mkexpr(lo64))) );
+ assign( *t0, unop(Iop_32Sto64, unop(Iop_64to32, mkexpr(lo64))) );
+}
+
+/* break V128 to 4xI32's, then zero-extend to I64's */
+static void breakV128to4x64U ( IRExpr* t128,
+ /*OUTs*/
+ IRTemp* t3, IRTemp* t2,
+ IRTemp* t1, IRTemp* t0 )
+{
+ IRTemp hi64 = newTemp(Ity_I64);
+ IRTemp lo64 = newTemp(Ity_I64);
+
+ vassert(typeOfIRExpr(irsb->tyenv, t128) == Ity_V128);
+ vassert(t0 && *t0 == IRTemp_INVALID);
+ vassert(t1 && *t1 == IRTemp_INVALID);
+ vassert(t2 && *t2 == IRTemp_INVALID);
+ vassert(t3 && *t3 == IRTemp_INVALID);
+ *t0 = newTemp(Ity_I64);
+ *t1 = newTemp(Ity_I64);
+ *t2 = newTemp(Ity_I64);
+ *t3 = newTemp(Ity_I64);
+
+ assign( hi64, unop(Iop_V128HIto64, t128) );
+ assign( lo64, unop(Iop_V128to64, t128) );
+ assign( *t3, unop(Iop_32Uto64, unop(Iop_64HIto32, mkexpr(hi64))) );
+ assign( *t2, unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(hi64))) );
+ assign( *t1, unop(Iop_32Uto64, unop(Iop_64HIto32, mkexpr(lo64))) );
+ assign( *t0, unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(lo64))) );
+}
+
+/* Signed saturating narrow 64S to 32 */
+static IRExpr* mkQNarrow64Sto32 ( IRExpr* t64 )
+{
+ IRTemp hi32 = newTemp(Ity_I32);
+ IRTemp lo32 = newTemp(Ity_I32);
+
+ vassert(typeOfIRExpr(irsb->tyenv, t64) == Ity_I64);
+
+ assign( hi32, unop(Iop_64HIto32, t64));
+ assign( lo32, unop(Iop_64to32, t64));
+
+ return IRExpr_Mux0X(
+ /* if (hi32 == (lo32 >>s 31)) */
+ unop(Iop_1Uto8,
+ binop(Iop_CmpEQ32, mkexpr(hi32),
+ binop( Iop_Sar32, mkexpr(lo32), mkU8(31)))),
+ /* else: sign dep saturate: 1->0x80000000, 0->0x7FFFFFFF */
+ binop(Iop_Add32, mkU32(0x7FFFFFFF),
+ binop(Iop_Shr32, mkexpr(hi32), mkU8(31))),
+ /* then: within signed-32 range: lo half good enough */
+ mkexpr(lo32) );
+}
+
+/* Unsigned saturating narrow 64S to 32 */
+static IRExpr* mkQNarrow64Uto32 ( IRExpr* t64 )
+{
+ IRTemp hi32 = newTemp(Ity_I32);
+ IRTemp lo32 = newTemp(Ity_I32);
+
+ vassert(typeOfIRExpr(irsb->tyenv, t64) == Ity_I64);
+
+ assign( hi32, unop(Iop_64HIto32, t64));
+ assign( lo32, unop(Iop_64to32, t64));
+
+ return IRExpr_Mux0X(
+ /* if (top 32 bits of t64 are 0) */
+ unop(Iop_1Uto8, binop(Iop_CmpEQ32, mkexpr(hi32), mkU32(0))),
+ /* else: positive saturate -> 0xFFFFFFFF */
+ mkU32(0xFFFFFFFF),
+ /* then: within unsigned-32 range: lo half good enough */
+ mkexpr(lo32) );
+}
+
+/* Signed saturate narrow 64->32, combining to V128 */
+static IRExpr* mkV128from4x64S ( IRExpr* t3, IRExpr* t2,
+ IRExpr* t1, IRExpr* t0 )
+{
+ vassert(typeOfIRExpr(irsb->tyenv, t3) == Ity_I64);
+ vassert(typeOfIRExpr(irsb->tyenv, t2) == Ity_I64);
+ vassert(typeOfIRExpr(irsb->tyenv, t1) == Ity_I64);
+ vassert(typeOfIRExpr(irsb->tyenv, t0) == Ity_I64);
+ return binop(Iop_64HLtoV128,
+ binop(Iop_32HLto64,
+ mkQNarrow64Sto32( t3 ),
+ mkQNarrow64Sto32( t2 )),
+ binop(Iop_32HLto64,
+ mkQNarrow64Sto32( t1 ),
+ mkQNarrow64Sto32( t0 )));
+}
+
+/* Unsigned saturate narrow 64->32, combining to V128 */
+static IRExpr* mkV128from4x64U ( IRExpr* t3, IRExpr* t2,
+ IRExpr* t1, IRExpr* t0 )
+{
+ vassert(typeOfIRExpr(irsb->tyenv, t3) == Ity_I64);
+ vassert(typeOfIRExpr(irsb->tyenv, t2) == Ity_I64);
+ vassert(typeOfIRExpr(irsb->tyenv, t1) == Ity_I64);
+ vassert(typeOfIRExpr(irsb->tyenv, t0) == Ity_I64);
+ return binop(Iop_64HLtoV128,
+ binop(Iop_32HLto64,
+ mkQNarrow64Uto32( t3 ),
+ mkQNarrow64Uto32( t2 )),
+ binop(Iop_32HLto64,
+ mkQNarrow64Uto32( t1 ),
+ mkQNarrow64Uto32( t0 )));
+}
+
+/* Simulate irops Iop_MullOdd*, since we don't have them */
+#define MK_Iop_MullOdd8Ux16( expr_vA, expr_vB ) \
+ binop(Iop_MullEven8Ux16, \
+ binop(Iop_ShrV128, expr_vA, mkU8(8)), \
+ binop(Iop_ShrV128, expr_vB, mkU8(8)))
+
+#define MK_Iop_MullOdd8Sx16( expr_vA, expr_vB ) \
+ binop(Iop_MullEven8Sx16, \
+ binop(Iop_ShrV128, expr_vA, mkU8(8)), \
+ binop(Iop_ShrV128, expr_vB, mkU8(8)))
+
+#define MK_Iop_MullOdd16Ux8( expr_vA, expr_vB ) \
+ binop(Iop_MullEven16Ux8, \
+ binop(Iop_ShrV128, expr_vA, mkU8(16)), \
+ binop(Iop_ShrV128, expr_vB, mkU8(16)))
+
+#define MK_Iop_MullOdd16Sx8( expr_vA, expr_vB ) \
+ binop(Iop_MullEven16Sx8, \
+ binop(Iop_ShrV128, expr_vA, mkU8(16)), \
+ binop(Iop_ShrV128, expr_vB, mkU8(16)))
+
+static IRExpr* /* :: Ity_I64 */ mk64lo32Sto64 ( IRExpr* src )
+{
+ vassert(typeOfIRExpr(irsb->tyenv, src) == Ity_I64);
+ return unop(Iop_32Sto64, unop(Iop_64to32, src));
+}
+
+static IRExpr* /* :: Ity_I64 */ mk64lo32Uto64 ( IRExpr* src )
+{
+ vassert(typeOfIRExpr(irsb->tyenv, src) == Ity_I64);
+ return unop(Iop_32Uto64, unop(Iop_64to32, src));
+}
+
+static IROp mkSzOp ( IRType ty, IROp op8 )
+{
+ Int adj;
+ vassert(ty == Ity_I8 || ty == Ity_I16 ||
+ ty == Ity_I32 || ty == Ity_I64);
+ vassert(op8 == Iop_Add8 || op8 == Iop_Sub8 || op8 == Iop_Mul8 ||
+ op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8 ||
+ op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8 ||
+ op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8 ||
+ op8 == Iop_Not8 );
+ adj = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : (ty==Ity_I32 ? 2 : 3));
+ return adj + op8;
+}
+
+/* Make sure we get valid 32 and 64bit addresses */
+static Addr64 mkSzAddr ( IRType ty, Addr64 addr )
+{
+ vassert(ty == Ity_I32 || ty == Ity_I64);
+ return ( ty == Ity_I64 ?
+ (Addr64)addr :
+ (Addr64)extend_s_32to64( toUInt(addr) ) );
+}
+
+/* sz, ULong -> IRExpr */
+static IRExpr* mkSzImm ( IRType ty, ULong imm64 )
+{
+ vassert(ty == Ity_I32 || ty == Ity_I64);
+ return ty == Ity_I64 ? mkU64(imm64) : mkU32((UInt)imm64);
+}
+
+/* sz, ULong -> IRConst */
+static IRConst* mkSzConst ( IRType ty, ULong imm64 )
+{
+ vassert(ty == Ity_I32 || ty == Ity_I64);
+ return ( ty == Ity_I64 ?
+ IRConst_U64(imm64) :
+ IRConst_U32((UInt)imm64) );
+}
+
+/* Sign extend imm16 -> IRExpr* */
+static IRExpr* mkSzExtendS16 ( IRType ty, UInt imm16 )
+{
+ vassert(ty == Ity_I32 || ty == Ity_I64);
+ return ( ty == Ity_I64 ?
+ mkU64(extend_s_16to64(imm16)) :
+ mkU32(extend_s_16to32(imm16)) );
+}
+
+/* Sign extend imm32 -> IRExpr* */
+static IRExpr* mkSzExtendS32 ( IRType ty, UInt imm32 )
+{
+ vassert(ty == Ity_I32 || ty == Ity_I64);
+ return ( ty == Ity_I64 ?
+ mkU64(extend_s_32to64(imm32)) :
+ mkU32(imm32) );
+}
+
+/* IR narrows I32/I64 -> I8/I16/I32 */
+static IRExpr* mkNarrowTo8 ( IRType ty, IRExpr* src )
+{
+ vassert(ty == Ity_I32 || ty == Ity_I64);
+ return ty == Ity_I64 ? unop(Iop_64to8, src) : unop(Iop_32to8, src);
+}
+
+static IRExpr* mkNarrowTo16 ( IRType ty, IRExpr* src )
+{
+ vassert(ty == Ity_I32 || ty == Ity_I64);
+ return ty == Ity_I64 ? unop(Iop_64to16, src) : unop(Iop_32to16, src);
+}
+
+static IRExpr* mkNarrowTo32 ( IRType ty, IRExpr* src )
+{
+ vassert(ty == Ity_I32 || ty == Ity_I64);
+ return ty == Ity_I64 ? unop(Iop_64to32, src) : src;
+}
+
+/* Signed/Unsigned IR widens I8/I16/I32 -> I32/I64 */
+static IRExpr* mkWidenFrom8 ( IRType ty, IRExpr* src, Bool sined )
+{
+ IROp op;
+ vassert(ty == Ity_I32 || ty == Ity_I64);
+ if (sined) op = (ty==Ity_I32) ? Iop_8Sto32 : Iop_8Sto64;
+ else op = (ty==Ity_I32) ? Iop_8Uto32 : Iop_8Uto64;
+ return unop(op, src);
+}
+
+static IRExpr* mkWidenFrom16 ( IRType ty, IRExpr* src, Bool sined )
+{
+ IROp op;
+ vassert(ty == Ity_I32 || ty == Ity_I64);
+ if (sined) op = (ty==Ity_I32) ? Iop_16Sto32 : Iop_16Sto64;
+ else op = (ty==Ity_I32) ? Iop_16Uto32 : Iop_16Uto64;
+ return unop(op, src);
+}
+
+static IRExpr* mkWidenFrom32 ( IRType ty, IRExpr* src, Bool sined )
+{
+ vassert(ty == Ity_I32 || ty == Ity_I64);
+ if (ty == Ity_I32)
+ return src;
+ return (sined) ? unop(Iop_32Sto64, src) : unop(Iop_32Uto64, src);
+}
+
+
+static Int integerGuestRegOffset ( UInt archreg )
+{
+ vassert(archreg < 32);
+
+ // jrs: probably not necessary; only matters if we reference sub-parts
+ // of the ppc registers, but that isn't the case
+ // later: this might affect Altivec though?
+ vassert(host_is_bigendian);
+
+ switch (archreg) {
+ case 0: return offsetofPPCGuestState(guest_GPR0);
+ case 1: return offsetofPPCGuestState(guest_GPR1);
+ case 2: return offsetofPPCGuestState(guest_GPR2);
+ case 3: return offsetofPPCGuestState(guest_GPR3);
+ case 4: return offsetofPPCGuestState(guest_GPR4);
+ case 5: return offsetofPPCGuestState(guest_GPR5);
+ case 6: return offsetofPPCGuestState(guest_GPR6);
+ case 7: return offsetofPPCGuestState(guest_GPR7);
+ case 8: return offsetofPPCGuestState(guest_GPR8);
+ case 9: return offsetofPPCGuestState(guest_GPR9);
+ case 10: return offsetofPPCGuestState(guest_GPR10);
+ case 11: return offsetofPPCGuestState(guest_GPR11);
+ case 12: return offsetofPPCGuestState(guest_GPR12);
+ case 13: return offsetofPPCGuestState(guest_GPR13);
+ case 14: return offsetofPPCGuestState(guest_GPR14);
+ case 15: return offsetofPPCGuestState(guest_GPR15);
+ case 16: return offsetofPPCGuestState(guest_GPR16);
+ case 17: return offsetofPPCGuestState(guest_GPR17);
+ case 18: return offsetofPPCGuestState(guest_GPR18);
+ case 19: return offsetofPPCGuestState(guest_GPR19);
+ case 20: return offsetofPPCGuestState(guest_GPR20);
+ case 21: return offsetofPPCGuestState(guest_GPR21);
+ case 22: return offsetofPPCGuestState(guest_GPR22);
+ case 23: return offsetofPPCGuestState(guest_GPR23);
+ case 24: return offsetofPPCGuestState(guest_GPR24);
+ case 25: return offsetofPPCGuestState(guest_GPR25);
+ case 26: return offsetofPPCGuestState(guest_GPR26);
+ case 27: return offsetofPPCGuestState(guest_GPR27);
+ case 28: return offsetofPPCGuestState(guest_GPR28);
+ case 29: return offsetofPPCGuestState(guest_GPR29);
+ case 30: return offsetofPPCGuestState(guest_GPR30);
+ case 31: return offsetofPPCGuestState(guest_GPR31);
+ default: break;
+ }
+ vpanic("integerGuestRegOffset(ppc,be)"); /*notreached*/
+}
+
+static IRExpr* getIReg ( UInt archreg )
+{
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ vassert(archreg < 32);
+ return IRExpr_Get( integerGuestRegOffset(archreg), ty );
+}
+
+/* Ditto, but write to a reg instead. */
+static void putIReg ( UInt archreg, IRExpr* e )
+{
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ vassert(archreg < 32);
+ vassert(typeOfIRExpr(irsb->tyenv, e) == ty );
+ stmt( IRStmt_Put(integerGuestRegOffset(archreg), e) );
+}
+
+
+static Int floatGuestRegOffset ( UInt archreg )
+{
+ vassert(archreg < 32);
+
+ switch (archreg) {
+ case 0: return offsetofPPCGuestState(guest_FPR0);
+ case 1: return offsetofPPCGuestState(guest_FPR1);
+ case 2: return offsetofPPCGuestState(guest_FPR2);
+ case 3: return offsetofPPCGuestState(guest_FPR3);
+ case 4: return offsetofPPCGuestState(guest_FPR4);
+ case 5: return offsetofPPCGuestState(guest_FPR5);
+ case 6: return offsetofPPCGuestState(guest_FPR6);
+ case 7: return offsetofPPCGuestState(guest_FPR7);
+ case 8: return offsetofPPCGuestState(guest_FPR8);
+ case 9: return offsetofPPCGuestState(guest_FPR9);
+ case 10: return offsetofPPCGuestState(guest_FPR10);
+ case 11: return offsetofPPCGuestState(guest_FPR11);
+ case 12: return offsetofPPCGuestState(guest_FPR12);
+ case 13: return offsetofPPCGuestState(guest_FPR13);
+ case 14: return offsetofPPCGuestState(guest_FPR14);
+ case 15: return offsetofPPCGuestState(guest_FPR15);
+ case 16: return offsetofPPCGuestState(guest_FPR16);
+ case 17: return offsetofPPCGuestState(guest_FPR17);
+ case 18: return offsetofPPCGuestState(guest_FPR18);
+ case 19: return offsetofPPCGuestState(guest_FPR19);
+ case 20: return offsetofPPCGuestState(guest_FPR20);
+ case 21: return offsetofPPCGuestState(guest_FPR21);
+ case 22: return offsetofPPCGuestState(guest_FPR22);
+ case 23: return offsetofPPCGuestState(guest_FPR23);
+ case 24: return offsetofPPCGuestState(guest_FPR24);
+ case 25: return offsetofPPCGuestState(guest_FPR25);
+ case 26: return offsetofPPCGuestState(guest_FPR26);
+ case 27: return offsetofPPCGuestState(guest_FPR27);
+ case 28: return offsetofPPCGuestState(guest_FPR28);
+ case 29: return offsetofPPCGuestState(guest_FPR29);
+ case 30: return offsetofPPCGuestState(guest_FPR30);
+ case 31: return offsetofPPCGuestState(guest_FPR31);
+ default: break;
+ }
+ vpanic("floatGuestRegOffset(ppc)"); /*notreached*/
+}
+
+static IRExpr* getFReg ( UInt archreg )
+{
+ vassert(archreg < 32);
+ return IRExpr_Get( floatGuestRegOffset(archreg), Ity_F64 );
+}
+
+/* Ditto, but write to a reg instead. */
+static void putFReg ( UInt archreg, IRExpr* e )
+{
+ vassert(archreg < 32);
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
+ stmt( IRStmt_Put(floatGuestRegOffset(archreg), e) );
+}
+
+
+static Int vectorGuestRegOffset ( UInt archreg )
+{
+ vassert(archreg < 32);
+
+ switch (archreg) {
+ case 0: return offsetofPPCGuestState(guest_VR0);
+ case 1: return offsetofPPCGuestState(guest_VR1);
+ case 2: return offsetofPPCGuestState(guest_VR2);
+ case 3: return offsetofPPCGuestState(guest_VR3);
+ case 4: return offsetofPPCGuestState(guest_VR4);
+ case 5: return offsetofPPCGuestState(guest_VR5);
+ case 6: return offsetofPPCGuestState(guest_VR6);
+ case 7: return offsetofPPCGuestState(guest_VR7);
+ case 8: return offsetofPPCGuestState(guest_VR8);
+ case 9: return offsetofPPCGuestState(guest_VR9);
+ case 10: return offsetofPPCGuestState(guest_VR10);
+ case 11: return offsetofPPCGuestState(guest_VR11);
+ case 12: return offsetofPPCGuestState(guest_VR12);
+ case 13: return offsetofPPCGuestState(guest_VR13);
+ case 14: return offsetofPPCGuestState(guest_VR14);
+ case 15: return offsetofPPCGuestState(guest_VR15);
+ case 16: return offsetofPPCGuestState(guest_VR16);
+ case 17: return offsetofPPCGuestState(guest_VR17);
+ case 18: return offsetofPPCGuestState(guest_VR18);
+ case 19: return offsetofPPCGuestState(guest_VR19);
+ case 20: return offsetofPPCGuestState(guest_VR20);
+ case 21: return offsetofPPCGuestState(guest_VR21);
+ case 22: return offsetofPPCGuestState(guest_VR22);
+ case 23: return offsetofPPCGuestState(guest_VR23);
+ case 24: return offsetofPPCGuestState(guest_VR24);
+ case 25: return offsetofPPCGuestState(guest_VR25);
+ case 26: return offsetofPPCGuestState(guest_VR26);
+ case 27: return offsetofPPCGuestState(guest_VR27);
+ case 28: return offsetofPPCGuestState(guest_VR28);
+ case 29: return offsetofPPCGuestState(guest_VR29);
+ case 30: return offsetofPPCGuestState(guest_VR30);
+ case 31: return offsetofPPCGuestState(guest_VR31);
+ default: break;
+ }
+ vpanic("vextorGuestRegOffset(ppc)"); /*notreached*/
+}
+
+static IRExpr* getVReg ( UInt archreg )
+{
+ vassert(archreg < 32);
+ return IRExpr_Get( vectorGuestRegOffset(archreg), Ity_V128 );
+}
+
+/* Ditto, but write to a reg instead. */
+static void putVReg ( UInt archreg, IRExpr* e )
+{
+ vassert(archreg < 32);
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
+ stmt( IRStmt_Put(vectorGuestRegOffset(archreg), e) );
+}
+
+static Int guestCR321offset ( UInt cr )
+{
+ switch (cr) {
+ case 0: return offsetofPPCGuestState(guest_CR0_321 );
+ case 1: return offsetofPPCGuestState(guest_CR1_321 );
+ case 2: return offsetofPPCGuestState(guest_CR2_321 );
+ case 3: return offsetofPPCGuestState(guest_CR3_321 );
+ case 4: return offsetofPPCGuestState(guest_CR4_321 );
+ case 5: return offsetofPPCGuestState(guest_CR5_321 );
+ case 6: return offsetofPPCGuestState(guest_CR6_321 );
+ case 7: return offsetofPPCGuestState(guest_CR7_321 );
+ default: vpanic("guestCR321offset(ppc)");
+ }
+}
+
+static Int guestCR0offset ( UInt cr )
+{
+ switch (cr) {
+ case 0: return offsetofPPCGuestState(guest_CR0_0 );
+ case 1: return offsetofPPCGuestState(guest_CR1_0 );
+ case 2: return offsetofPPCGuestState(guest_CR2_0 );
+ case 3: return offsetofPPCGuestState(guest_CR3_0 );
+ case 4: return offsetofPPCGuestState(guest_CR4_0 );
+ case 5: return offsetofPPCGuestState(guest_CR5_0 );
+ case 6: return offsetofPPCGuestState(guest_CR6_0 );
+ case 7: return offsetofPPCGuestState(guest_CR7_0 );
+ default: vpanic("guestCR3offset(ppc)");
+ }
+}
+
+// ROTL(src32/64, rot_amt5/6)
+static IRExpr* /* :: Ity_I32/64 */ ROTL ( IRExpr* src,
+ IRExpr* rot_amt )
+{
+ IRExpr *mask, *rot;
+ vassert(typeOfIRExpr(irsb->tyenv,rot_amt) == Ity_I8);
+
+ if (typeOfIRExpr(irsb->tyenv,src) == Ity_I64) {
+ // rot = (src << rot_amt) | (src >> (64-rot_amt))
+ mask = binop(Iop_And8, rot_amt, mkU8(63));
+ rot = binop(Iop_Or64,
+ binop(Iop_Shl64, src, mask),
+ binop(Iop_Shr64, src, binop(Iop_Sub8, mkU8(64), mask)));
+ } else {
+ // rot = (src << rot_amt) | (src >> (32-rot_amt))
+ mask = binop(Iop_And8, rot_amt, mkU8(31));
+ rot = binop(Iop_Or32,
+ binop(Iop_Shl32, src, mask),
+ binop(Iop_Shr32, src, binop(Iop_Sub8, mkU8(32), mask)));
+ }
+ /* Note: the MuxOX is not merely an optimisation; it's needed
+ because otherwise the Shr is a shift by the word size when
+ mask denotes zero. For rotates by immediates, a lot of
+ this junk gets folded out. */
+ return IRExpr_Mux0X( mask, /* zero rotate */ src,
+ /* non-zero rotate */ rot );
+}
+
+/* Standard effective address calc: (rA + rB) */
+static IRExpr* ea_rA_idxd ( UInt rA, UInt rB )
+{
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ vassert(rA < 32);
+ vassert(rB < 32);
+ return binop(mkSzOp(ty, Iop_Add8), getIReg(rA), getIReg(rB));
+}
+
+/* Standard effective address calc: (rA + simm) */
+static IRExpr* ea_rA_simm ( UInt rA, UInt simm16 )
+{
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ vassert(rA < 32);
+ return binop(mkSzOp(ty, Iop_Add8), getIReg(rA),
+ mkSzExtendS16(ty, simm16));
+}
+
+/* Standard effective address calc: (rA|0) */
+static IRExpr* ea_rAor0 ( UInt rA )
+{
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ vassert(rA < 32);
+ if (rA == 0) {
+ return mkSzImm(ty, 0);
+ } else {
+ return getIReg(rA);
+ }
+}
+
+/* Standard effective address calc: (rA|0) + rB */
+static IRExpr* ea_rAor0_idxd ( UInt rA, UInt rB )
+{
+ vassert(rA < 32);
+ vassert(rB < 32);
+ return (rA == 0) ? getIReg(rB) : ea_rA_idxd( rA, rB );
+}
+
+/* Standard effective address calc: (rA|0) + simm16 */
+static IRExpr* ea_rAor0_simm ( UInt rA, UInt simm16 )
+{
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ vassert(rA < 32);
+ if (rA == 0) {
+ return mkSzExtendS16(ty, simm16);
+ } else {
+ return ea_rA_simm( rA, simm16 );
+ }
+}
+
+
+/* Align effective address */
+static IRExpr* addr_align( IRExpr* addr, UChar align )
+{
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ Long mask;
+ switch (align) {
+ case 1: return addr; // byte aligned
+ case 2: mask = ((Long)-1) << 1; break; // half-word aligned
+ case 4: mask = ((Long)-1) << 2; break; // word aligned
+ case 16: mask = ((Long)-1) << 4; break; // quad-word aligned
+ default:
+ vex_printf("addr_align: align = %u\n", align);
+ vpanic("addr_align(ppc)");
+ }
+
+ vassert(typeOfIRExpr(irsb->tyenv,addr) == ty);
+ return binop( mkSzOp(ty, Iop_And8), addr, mkSzImm(ty, mask) );
+}
+
+
+/* Exit the trace if ADDR (intended to be a guest memory address) is
+ not ALIGN-aligned, generating a request for a SIGBUS followed by a
+ restart of the current insn. */
+static void gen_SIGBUS_if_misaligned ( IRTemp addr, UChar align )
+{
+ vassert(align == 4 || align == 8);
+ if (mode64) {
+ vassert(typeOfIRTemp(irsb->tyenv, addr) == Ity_I64);
+ stmt(
+ IRStmt_Exit(
+ binop(Iop_CmpNE64,
+ binop(Iop_And64, mkexpr(addr), mkU64(align-1)),
+ mkU64(0)),
+ Ijk_SigBUS,
+ IRConst_U64( guest_CIA_curr_instr )
+ )
+ );
+ } else {
+ vassert(typeOfIRTemp(irsb->tyenv, addr) == Ity_I32);
+ stmt(
+ IRStmt_Exit(
+ binop(Iop_CmpNE32,
+ binop(Iop_And32, mkexpr(addr), mkU32(align-1)),
+ mkU32(0)),
+ Ijk_SigBUS,
+ IRConst_U32( guest_CIA_curr_instr )
+ )
+ );
+ }
+}
+
+
+/* Generate AbiHints which mark points at which the ELF or PowerOpen
+ ABIs say that the stack red zone (viz, -N(r1) .. -1(r1), for some
+ N) becomes undefined. That is at function calls and returns. ELF
+ ppc32 doesn't have this "feature" (how fortunate for it). nia is
+ the address of the next instruction to be executed.
+*/
+static void make_redzone_AbiHint ( VexAbiInfo* vbi,
+ IRTemp nia, HChar* who )
+{
+ Int szB = vbi->guest_stack_redzone_size;
+ if (0) vex_printf("AbiHint: %s\n", who);
+ vassert(szB >= 0);
+ if (szB > 0) {
+ if (mode64) {
+ vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64);
+ stmt( IRStmt_AbiHint(
+ binop(Iop_Sub64, getIReg(1), mkU64(szB)),
+ szB,
+ mkexpr(nia)
+ ));
+ } else {
+ vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I32);
+ stmt( IRStmt_AbiHint(
+ binop(Iop_Sub32, getIReg(1), mkU32(szB)),
+ szB,
+ mkexpr(nia)
+ ));
+ }
+ }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Helpers for condition codes. ---*/
+/*------------------------------------------------------------*/
+
+/* Condition register layout.
+
+ In the hardware, CR is laid out like this. The leftmost end is the
+ most significant bit in the register; however the IBM documentation
+ numbers the bits backwards for some reason.
+
+ CR0 CR1 .......... CR6 CR7
+ 0 .. 3 ....................... 28 .. 31 (IBM bit numbering)
+ 31 28 3 0 (normal bit numbering)
+
+ Each CR field is 4 bits: [<,>,==,SO]
+
+ Hence in IBM's notation, BI=0 is CR7[SO], BI=1 is CR7[==], etc.
+
+ Indexing from BI to guest state:
+
+ let n = BI / 4
+ off = BI % 4
+ this references CR n:
+
+ off==0 -> guest_CRn_321 >> 3
+ off==1 -> guest_CRn_321 >> 2
+ off==2 -> guest_CRn_321 >> 1
+ off==3 -> guest_CRn_SO
+
+ Bear in mind the only significant bit in guest_CRn_SO is bit 0
+ (normal notation) and in guest_CRn_321 the significant bits are
+ 3, 2 and 1 (normal notation).
+*/
+
+static void putCR321 ( UInt cr, IRExpr* e )
+{
+ vassert(cr < 8);
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8);
+ stmt( IRStmt_Put(guestCR321offset(cr), e) );
+}
+
+static void putCR0 ( UInt cr, IRExpr* e )
+{
+ vassert(cr < 8);
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8);
+ stmt( IRStmt_Put(guestCR0offset(cr), e) );
+}
+
+static IRExpr* /* :: Ity_I8 */ getCR0 ( UInt cr )
+{
+ vassert(cr < 8);
+ return IRExpr_Get(guestCR0offset(cr), Ity_I8);
+}
+
+static IRExpr* /* :: Ity_I8 */ getCR321 ( UInt cr )
+{
+ vassert(cr < 8);
+ return IRExpr_Get(guestCR321offset(cr), Ity_I8);
+}
+
+/* Fetch the specified CR bit (as per IBM/hardware notation) and
+ return it at the bottom of an I32; the top 31 bits are guaranteed
+ to be zero. */
+static IRExpr* /* :: Ity_I32 */ getCRbit ( UInt bi )
+{
+ UInt n = bi / 4;
+ UInt off = bi % 4;
+ vassert(bi < 32);
+ if (off == 3) {
+ /* Fetch the SO bit for this CR field */
+ /* Note: And32 is redundant paranoia iff guest state only has 0
+ or 1 in that slot. */
+ return binop(Iop_And32, unop(Iop_8Uto32, getCR0(n)), mkU32(1));
+ } else {
+ /* Fetch the <, > or == bit for this CR field */
+ return binop( Iop_And32,
+ binop( Iop_Shr32,
+ unop(Iop_8Uto32, getCR321(n)),
+ mkU8(toUChar(3-off)) ),
+ mkU32(1) );
+ }
+}
+
+/* Dually, write the least significant bit of BIT to the specified CR
+ bit. Indexing as per getCRbit. */
+static void putCRbit ( UInt bi, IRExpr* bit )
+{
+ UInt n, off;
+ IRExpr* safe;
+ vassert(typeOfIRExpr(irsb->tyenv,bit) == Ity_I32);
+ safe = binop(Iop_And32, bit, mkU32(1));
+ n = bi / 4;
+ off = bi % 4;
+ vassert(bi < 32);
+ if (off == 3) {
+ /* This is the SO bit for this CR field */
+ putCR0(n, unop(Iop_32to8, safe));
+ } else {
+ off = 3 - off;
+ vassert(off == 1 || off == 2 || off == 3);
+ putCR321(
+ n,
+ unop( Iop_32to8,
+ binop( Iop_Or32,
+ /* old value with field masked out */
+ binop(Iop_And32, unop(Iop_8Uto32, getCR321(n)),
+ mkU32(~(1 << off))),
+ /* new value in the right place */
+ binop(Iop_Shl32, safe, mkU8(toUChar(off)))
+ )
+ )
+ );
+ }
+}
+
+/* Fetch the specified CR bit (as per IBM/hardware notation) and
+ return it somewhere in an I32; it does not matter where, but
+ whichever bit it is, all other bits are guaranteed to be zero. In
+ other words, the I32-typed expression will be zero if the bit is
+ zero and nonzero if the bit is 1. Write into *where the index
+ of where the bit will be. */
+
+static
+IRExpr* /* :: Ity_I32 */ getCRbit_anywhere ( UInt bi, Int* where )
+{
+ UInt n = bi / 4;
+ UInt off = bi % 4;
+ vassert(bi < 32);
+ if (off == 3) {
+ /* Fetch the SO bit for this CR field */
+ /* Note: And32 is redundant paranoia iff guest state only has 0
+ or 1 in that slot. */
+ *where = 0;
+ return binop(Iop_And32, unop(Iop_8Uto32, getCR0(n)), mkU32(1));
+ } else {
+ /* Fetch the <, > or == bit for this CR field */
+ *where = 3-off;
+ return binop( Iop_And32,
+ unop(Iop_8Uto32, getCR321(n)),
+ mkU32(1 << (3-off)) );
+ }
+}
+
+/* Set the CR0 flags following an arithmetic operation.
+ (Condition Register CR0 Field Definition, PPC32 p60)
+*/
+static IRExpr* getXER_SO ( void );
+static void set_CR0 ( IRExpr* result )
+{
+ vassert(typeOfIRExpr(irsb->tyenv,result) == Ity_I32 ||
+ typeOfIRExpr(irsb->tyenv,result) == Ity_I64);
+ if (mode64) {
+ putCR321( 0, unop(Iop_64to8,
+ binop(Iop_CmpORD64S, result, mkU64(0))) );
+ } else {
+ putCR321( 0, unop(Iop_32to8,
+ binop(Iop_CmpORD32S, result, mkU32(0))) );
+ }
+ putCR0( 0, getXER_SO() );
+}
+
+
+/* Set the CR6 flags following an AltiVec compare operation. */
+static void set_AV_CR6 ( IRExpr* result, Bool test_all_ones )
+{
+ /* CR6[0:3] = {all_ones, 0, all_zeros, 0}
+ all_ones = (v[0] && v[1] && v[2] && v[3])
+ all_zeros = ~(v[0] || v[1] || v[2] || v[3])
+ */
+ IRTemp v0 = newTemp(Ity_V128);
+ IRTemp v1 = newTemp(Ity_V128);
+ IRTemp v2 = newTemp(Ity_V128);
+ IRTemp v3 = newTemp(Ity_V128);
+ IRTemp rOnes = newTemp(Ity_I8);
+ IRTemp rZeros = newTemp(Ity_I8);
+
+ vassert(typeOfIRExpr(irsb->tyenv,result) == Ity_V128);
+
+ assign( v0, result );
+ assign( v1, binop(Iop_ShrV128, result, mkU8(32)) );
+ assign( v2, binop(Iop_ShrV128, result, mkU8(64)) );
+ assign( v3, binop(Iop_ShrV128, result, mkU8(96)) );
+
+ assign( rZeros, unop(Iop_1Uto8,
+ binop(Iop_CmpEQ32, mkU32(0xFFFFFFFF),
+ unop(Iop_Not32,
+ unop(Iop_V128to32,
+ binop(Iop_OrV128,
+ binop(Iop_OrV128, mkexpr(v0), mkexpr(v1)),
+ binop(Iop_OrV128, mkexpr(v2), mkexpr(v3))))
+ ))) );
+
+ if (test_all_ones) {
+ assign( rOnes, unop(Iop_1Uto8,
+ binop(Iop_CmpEQ32, mkU32(0xFFFFFFFF),
+ unop(Iop_V128to32,
+ binop(Iop_AndV128,
+ binop(Iop_AndV128, mkexpr(v0), mkexpr(v1)),
+ binop(Iop_AndV128, mkexpr(v2), mkexpr(v3)))
+ ))) );
+ putCR321( 6, binop(Iop_Or8,
+ binop(Iop_Shl8, mkexpr(rOnes), mkU8(3)),
+ binop(Iop_Shl8, mkexpr(rZeros), mkU8(1))) );
+ } else {
+ putCR321( 6, binop(Iop_Shl8, mkexpr(rZeros), mkU8(1)) );
+ }
+ putCR0( 6, mkU8(0) );
+}
+
+
+
+/*------------------------------------------------------------*/
+/*--- Helpers for XER flags. ---*/
+/*------------------------------------------------------------*/
+
+static void putXER_SO ( IRExpr* e )
+{
+ IRExpr* so;
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8);
+ so = binop(Iop_And8, e, mkU8(1));
+ stmt( IRStmt_Put( OFFB_XER_SO, so ) );
+}
+
+static void putXER_OV ( IRExpr* e )
+{
+ IRExpr* ov;
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8);
+ ov = binop(Iop_And8, e, mkU8(1));
+ stmt( IRStmt_Put( OFFB_XER_OV, ov ) );
+}
+
+static void putXER_CA ( IRExpr* e )
+{
+ IRExpr* ca;
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8);
+ ca = binop(Iop_And8, e, mkU8(1));
+ stmt( IRStmt_Put( OFFB_XER_CA, ca ) );
+}
+
+static void putXER_BC ( IRExpr* e )
+{
+ IRExpr* bc;
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8);
+ bc = binop(Iop_And8, e, mkU8(0x7F));
+ stmt( IRStmt_Put( OFFB_XER_BC, bc ) );
+}
+
+static IRExpr* /* :: Ity_I8 */ getXER_SO ( void )
+{
+ return IRExpr_Get( OFFB_XER_SO, Ity_I8 );
+}
+
+static IRExpr* /* :: Ity_I32 */ getXER_SO32 ( void )
+{
+ return binop( Iop_And32, unop(Iop_8Uto32, getXER_SO()), mkU32(1) );
+}
+
+static IRExpr* /* :: Ity_I8 */ getXER_OV ( void )
+{
+ return IRExpr_Get( OFFB_XER_OV, Ity_I8 );
+}
+
+static IRExpr* /* :: Ity_I32 */ getXER_OV32 ( void )
+{
+ return binop( Iop_And32, unop(Iop_8Uto32, getXER_OV()), mkU32(1) );
+}
+
+static IRExpr* /* :: Ity_I32 */ getXER_CA32 ( void )
+{
+ IRExpr* ca = IRExpr_Get( OFFB_XER_CA, Ity_I8 );
+ return binop( Iop_And32, unop(Iop_8Uto32, ca ), mkU32(1) );
+}
+
+static IRExpr* /* :: Ity_I8 */ getXER_BC ( void )
+{
+ return IRExpr_Get( OFFB_XER_BC, Ity_I8 );
+}
+
+static IRExpr* /* :: Ity_I32 */ getXER_BC32 ( void )
+{
+ IRExpr* bc = IRExpr_Get( OFFB_XER_BC, Ity_I8 );
+ return binop( Iop_And32, unop(Iop_8Uto32, bc), mkU32(0x7F) );
+}
+
+
+/* RES is the result of doing OP on ARGL and ARGR. Set %XER.OV and
+ %XER.SO accordingly. */
+
+static void set_XER_OV_32( UInt op, IRExpr* res,
+ IRExpr* argL, IRExpr* argR )
+{
+ IRTemp t64;
+ IRExpr* xer_ov;
+ vassert(op < PPCG_FLAG_OP_NUMBER);
+ vassert(typeOfIRExpr(irsb->tyenv,res) == Ity_I32);
+ vassert(typeOfIRExpr(irsb->tyenv,argL) == Ity_I32);
+ vassert(typeOfIRExpr(irsb->tyenv,argR) == Ity_I32);
+
+# define INT32_MIN 0x80000000
+
+# define XOR2(_aa,_bb) \
+ binop(Iop_Xor32,(_aa),(_bb))
+
+# define XOR3(_cc,_dd,_ee) \
+ binop(Iop_Xor32,binop(Iop_Xor32,(_cc),(_dd)),(_ee))
+
+# define AND3(_ff,_gg,_hh) \
+ binop(Iop_And32,binop(Iop_And32,(_ff),(_gg)),(_hh))
+
+#define NOT(_jj) \
+ unop(Iop_Not32, (_jj))
+
+ switch (op) {
+ case /* 0 */ PPCG_FLAG_OP_ADD:
+ case /* 1 */ PPCG_FLAG_OP_ADDE:
+ /* (argL^argR^-1) & (argL^res) & (1<<31) ?1:0 */
+ // i.e. ((both_same_sign) & (sign_changed) & (sign_mask))
+ xer_ov
+ = AND3( XOR3(argL,argR,mkU32(-1)),
+ XOR2(argL,res),
+ mkU32(INT32_MIN) );
+ /* xer_ov can only be 0 or 1<<31 */
+ xer_ov
+ = binop(Iop_Shr32, xer_ov, mkU8(31) );
+ break;
+
+ case /* 2 */ PPCG_FLAG_OP_DIVW:
+ /* (argL == INT32_MIN && argR == -1) || argR == 0 */
+ xer_ov
+ = mkOR1(
+ mkAND1(
+ binop(Iop_CmpEQ32, argL, mkU32(INT32_MIN)),
+ binop(Iop_CmpEQ32, argR, mkU32(-1))
+ ),
+ binop(Iop_CmpEQ32, argR, mkU32(0) )
+ );
+ xer_ov
+ = unop(Iop_1Uto32, xer_ov);
+ break;
+
+ case /* 3 */ PPCG_FLAG_OP_DIVWU:
+ /* argR == 0 */
+ xer_ov
+ = unop(Iop_1Uto32, binop(Iop_CmpEQ32, argR, mkU32(0)));
+ break;
+
+ case /* 4 */ PPCG_FLAG_OP_MULLW:
+ /* OV true if result can't be represented in 32 bits
+ i.e sHi != sign extension of sLo */
+ t64 = newTemp(Ity_I64);
+ assign( t64, binop(Iop_MullS32, argL, argR) );
+ xer_ov
+ = binop( Iop_CmpNE32,
+ unop(Iop_64HIto32, mkexpr(t64)),
+ binop( Iop_Sar32,
+ unop(Iop_64to32, mkexpr(t64)),
+ mkU8(31))
+ );
+ xer_ov
+ = unop(Iop_1Uto32, xer_ov);
+ break;
+
+ case /* 5 */ PPCG_FLAG_OP_NEG:
+ /* argL == INT32_MIN */
+ xer_ov
+ = unop( Iop_1Uto32,
+ binop(Iop_CmpEQ32, argL, mkU32(INT32_MIN)) );
+ break;
+
+ case /* 6 */ PPCG_FLAG_OP_SUBF:
+ case /* 7 */ PPCG_FLAG_OP_SUBFC:
+ case /* 8 */ PPCG_FLAG_OP_SUBFE:
+ /* ((~argL)^argR^-1) & ((~argL)^res) & (1<<31) ?1:0; */
+ xer_ov
+ = AND3( XOR3(NOT(argL),argR,mkU32(-1)),
+ XOR2(NOT(argL),res),
+ mkU32(INT32_MIN) );
+ /* xer_ov can only be 0 or 1<<31 */
+ xer_ov
+ = binop(Iop_Shr32, xer_ov, mkU8(31) );
+ break;
+
+ default:
+ vex_printf("set_XER_OV: op = %u\n", op);
+ vpanic("set_XER_OV(ppc)");
+ }
+
+ /* xer_ov MUST denote either 0 or 1, no other value allowed */
+ putXER_OV( unop(Iop_32to8, xer_ov) );
+
+ /* Update the summary overflow */
+ putXER_SO( binop(Iop_Or8, getXER_SO(), getXER_OV()) );
+
+# undef INT32_MIN
+# undef AND3
+# undef XOR3
+# undef XOR2
+# undef NOT
+}
+
+static void set_XER_OV_64( UInt op, IRExpr* res,
+ IRExpr* argL, IRExpr* argR )
+{
+ IRExpr* xer_ov;
+ vassert(op < PPCG_FLAG_OP_NUMBER);
+ vassert(typeOfIRExpr(irsb->tyenv,res) == Ity_I64);
+ vassert(typeOfIRExpr(irsb->tyenv,argL) == Ity_I64);
+ vassert(typeOfIRExpr(irsb->tyenv,argR) == Ity_I64);
+
+# define INT64_MIN 0x8000000000000000ULL
+
+# define XOR2(_aa,_bb) \
+ binop(Iop_Xor64,(_aa),(_bb))
+
+# define XOR3(_cc,_dd,_ee) \
+ binop(Iop_Xor64,binop(Iop_Xor64,(_cc),(_dd)),(_ee))
+
+# define AND3(_ff,_gg,_hh) \
+ binop(Iop_And64,binop(Iop_And64,(_ff),(_gg)),(_hh))
+
+#define NOT(_jj) \
+ unop(Iop_Not64, (_jj))
+
+ switch (op) {
+ case /* 0 */ PPCG_FLAG_OP_ADD:
+ case /* 1 */ PPCG_FLAG_OP_ADDE:
+ /* (argL^argR^-1) & (argL^res) & (1<<63) ? 1:0 */
+ // i.e. ((both_same_sign) & (sign_changed) & (sign_mask))
+ xer_ov
+ = AND3( XOR3(argL,argR,mkU64(-1)),
+ XOR2(argL,res),
+ mkU64(INT64_MIN) );
+ /* xer_ov can only be 0 or 1<<63 */
+ xer_ov
+ = unop(Iop_64to1, binop(Iop_Shr64, xer_ov, mkU8(63)));
+ break;
+
+ case /* 2 */ PPCG_FLAG_OP_DIVW:
+ /* (argL == INT64_MIN && argR == -1) || argR == 0 */
+ xer_ov
+ = mkOR1(
+ mkAND1(
+ binop(Iop_CmpEQ64, argL, mkU64(INT64_MIN)),
+ binop(Iop_CmpEQ64, argR, mkU64(-1))
+ ),
+ binop(Iop_CmpEQ64, argR, mkU64(0) )
+ );
+ break;
+
+ case /* 3 */ PPCG_FLAG_OP_DIVWU:
+ /* argR == 0 */
+ xer_ov
+ = binop(Iop_CmpEQ64, argR, mkU64(0));
+ break;
+
+ case /* 4 */ PPCG_FLAG_OP_MULLW: {
+ /* OV true if result can't be represented in 64 bits
+ i.e sHi != sign extension of sLo */
+ xer_ov
+ = binop( Iop_CmpNE32,
+ unop(Iop_64HIto32, res),
+ binop( Iop_Sar32,
+ unop(Iop_64to32, res),
+ mkU8(31))
+ );
+ break;
+ }
+
+ case /* 5 */ PPCG_FLAG_OP_NEG:
+ /* argL == INT64_MIN */
+ xer_ov
+ = binop(Iop_CmpEQ64, argL, mkU64(INT64_MIN));
+ break;
+
+ case /* 6 */ PPCG_FLAG_OP_SUBF:
+ case /* 7 */ PPCG_FLAG_OP_SUBFC:
+ case /* 8 */ PPCG_FLAG_OP_SUBFE:
+ /* ((~argL)^argR^-1) & ((~argL)^res) & (1<<63) ?1:0; */
+ xer_ov
+ = AND3( XOR3(NOT(argL),argR,mkU64(-1)),
+ XOR2(NOT(argL),res),
+ mkU64(INT64_MIN) );
+ /* xer_ov can only be 0 or 1<<63 */
+ xer_ov
+ = unop(Iop_64to1, binop(Iop_Shr64, xer_ov, mkU8(63)));
+ break;
+
+ default:
+ vex_printf("set_XER_OV: op = %u\n", op);
+ vpanic("set_XER_OV(ppc64)");
+ }
+
+ /* xer_ov MUST denote either 0 or 1, no other value allowed */
+ putXER_OV( unop(Iop_1Uto8, xer_ov) );
+
+ /* Update the summary overflow */
+ putXER_SO( binop(Iop_Or8, getXER_SO(), getXER_OV()) );
+
+# undef INT64_MIN
+# undef AND3
+# undef XOR3
+# undef XOR2
+# undef NOT
+}
+
+static void set_XER_OV ( IRType ty, UInt op, IRExpr* res,
+ IRExpr* argL, IRExpr* argR )
+{
+ if (ty == Ity_I32)
+ set_XER_OV_32( op, res, argL, argR );
+ else
+ set_XER_OV_64( op, res, argL, argR );
+}
+
+
+
+/* RES is the result of doing OP on ARGL and ARGR with the old %XER.CA
+ value being OLDCA. Set %XER.CA accordingly. */
+
+static void set_XER_CA_32 ( UInt op, IRExpr* res,
+ IRExpr* argL, IRExpr* argR, IRExpr* oldca )
+{
+ IRExpr* xer_ca;
+ vassert(op < PPCG_FLAG_OP_NUMBER);
+ vassert(typeOfIRExpr(irsb->tyenv,res) == Ity_I32);
+ vassert(typeOfIRExpr(irsb->tyenv,argL) == Ity_I32);
+ vassert(typeOfIRExpr(irsb->tyenv,argR) == Ity_I32);
+ vassert(typeOfIRExpr(irsb->tyenv,oldca) == Ity_I32);
+
+ /* Incoming oldca is assumed to hold the values 0 or 1 only. This
+ seems reasonable given that it's always generated by
+ getXER_CA32(), which masks it accordingly. In any case it being
+ 0 or 1 is an invariant of the ppc guest state representation;
+ if it has any other value, that invariant has been violated. */
+
+ switch (op) {
+ case /* 0 */ PPCG_FLAG_OP_ADD:
+ /* res <u argL */
+ xer_ca
+ = unop(Iop_1Uto32, binop(Iop_CmpLT32U, res, argL));
+ break;
+
+ case /* 1 */ PPCG_FLAG_OP_ADDE:
+ /* res <u argL || (old_ca==1 && res==argL) */
+ xer_ca
+ = mkOR1(
+ binop(Iop_CmpLT32U, res, argL),
+ mkAND1(
+ binop(Iop_CmpEQ32, oldca, mkU32(1)),
+ binop(Iop_CmpEQ32, res, argL)
+ )
+ );
+ xer_ca
+ = unop(Iop_1Uto32, xer_ca);
+ break;
+
+ case /* 8 */ PPCG_FLAG_OP_SUBFE:
+ /* res <u argR || (old_ca==1 && res==argR) */
+ xer_ca
+ = mkOR1(
+ binop(Iop_CmpLT32U, res, argR),
+ mkAND1(
+ binop(Iop_CmpEQ32, oldca, mkU32(1)),
+ binop(Iop_CmpEQ32, res, argR)
+ )
+ );
+ xer_ca
+ = unop(Iop_1Uto32, xer_ca);
+ break;
+
+ case /* 7 */ PPCG_FLAG_OP_SUBFC:
+ case /* 9 */ PPCG_FLAG_OP_SUBFI:
+ /* res <=u argR */
+ xer_ca
+ = unop(Iop_1Uto32, binop(Iop_CmpLE32U, res, argR));
+ break;
+
+ case /* 10 */ PPCG_FLAG_OP_SRAW:
+ /* The shift amount is guaranteed to be in 0 .. 63 inclusive.
+ If it is <= 31, behave like SRAWI; else XER.CA is the sign
+ bit of argL. */
+ /* This term valid for shift amount < 32 only */
+ xer_ca
+ = binop(
+ Iop_And32,
+ binop(Iop_Sar32, argL, mkU8(31)),
+ binop( Iop_And32,
+ argL,
+ binop( Iop_Sub32,
+ binop(Iop_Shl32, mkU32(1),
+ unop(Iop_32to8,argR)),
+ mkU32(1) )
+ )
+ );
+ xer_ca
+ = IRExpr_Mux0X(
+ /* shift amt > 31 ? */
+ unop(Iop_1Uto8, binop(Iop_CmpLT32U, mkU32(31), argR)),
+ /* no -- be like srawi */
+ unop(Iop_1Uto32, binop(Iop_CmpNE32, xer_ca, mkU32(0))),
+ /* yes -- get sign bit of argL */
+ binop(Iop_Shr32, argL, mkU8(31))
+ );
+ break;
+
+ case /* 11 */ PPCG_FLAG_OP_SRAWI:
+ /* xer_ca is 1 iff src was negative and bits_shifted_out !=
+ 0. Since the shift amount is known to be in the range
+ 0 .. 31 inclusive the following seems viable:
+ xer.ca == 1 iff the following is nonzero:
+ (argL >>s 31) -- either all 0s or all 1s
+ & (argL & (1<<argR)-1) -- the stuff shifted out */
+ xer_ca
+ = binop(
+ Iop_And32,
+ binop(Iop_Sar32, argL, mkU8(31)),
+ binop( Iop_And32,
+ argL,
+ binop( Iop_Sub32,
+ binop(Iop_Shl32, mkU32(1),
+ unop(Iop_32to8,argR)),
+ mkU32(1) )
+ )
+ );
+ xer_ca
+ = unop(Iop_1Uto32, binop(Iop_CmpNE32, xer_ca, mkU32(0)));
+ break;
+
+ default:
+ vex_printf("set_XER_CA: op = %u\n", op);
+ vpanic("set_XER_CA(ppc)");
+ }
+
+ /* xer_ca MUST denote either 0 or 1, no other value allowed */
+ putXER_CA( unop(Iop_32to8, xer_ca) );
+}
+
+static void set_XER_CA_64 ( UInt op, IRExpr* res,
+ IRExpr* argL, IRExpr* argR, IRExpr* oldca )
+{
+ IRExpr* xer_ca;
+ vassert(op < PPCG_FLAG_OP_NUMBER);
+ vassert(typeOfIRExpr(irsb->tyenv,res) == Ity_I64);
+ vassert(typeOfIRExpr(irsb->tyenv,argL) == Ity_I64);
+ vassert(typeOfIRExpr(irsb->tyenv,argR) == Ity_I64);
+ vassert(typeOfIRExpr(irsb->tyenv,oldca) == Ity_I64);
+
+ /* Incoming oldca is assumed to hold the values 0 or 1 only. This
+ seems reasonable given that it's always generated by
+ getXER_CA32(), which masks it accordingly. In any case it being
+ 0 or 1 is an invariant of the ppc guest state representation;
+ if it has any other value, that invariant has been violated. */
+
+ switch (op) {
+ case /* 0 */ PPCG_FLAG_OP_ADD:
+ /* res <u argL */
+ xer_ca
+ = unop(Iop_1Uto32, binop(Iop_CmpLT64U, res, argL));
+ break;
+
+ case /* 1 */ PPCG_FLAG_OP_ADDE:
+ /* res <u argL || (old_ca==1 && res==argL) */
+ xer_ca
+ = mkOR1(
+ binop(Iop_CmpLT64U, res, argL),
+ mkAND1(
+ binop(Iop_CmpEQ64, oldca, mkU64(1)),
+ binop(Iop_CmpEQ64, res, argL)
+ )
+ );
+ xer_ca
+ = unop(Iop_1Uto32, xer_ca);
+ break;
+
+ case /* 8 */ PPCG_FLAG_OP_SUBFE:
+ /* res <u argR || (old_ca==1 && res==argR) */
+ xer_ca
+ = mkOR1(
+ binop(Iop_CmpLT64U, res, argR),
+ mkAND1(
+ binop(Iop_CmpEQ64, oldca, mkU64(1)),
+ binop(Iop_CmpEQ64, res, argR)
+ )
+ );
+ xer_ca
+ = unop(Iop_1Uto32, xer_ca);
+ break;
+
+ case /* 7 */ PPCG_FLAG_OP_SUBFC:
+ case /* 9 */ PPCG_FLAG_OP_SUBFI:
+ /* res <=u argR */
+ xer_ca
+ = unop(Iop_1Uto32, binop(Iop_CmpLE64U, res, argR));
+ break;
+
+
+ case /* 10 */ PPCG_FLAG_OP_SRAW:
+ /* The shift amount is guaranteed to be in 0 .. 31 inclusive.
+ If it is <= 31, behave like SRAWI; else XER.CA is the sign
+ bit of argL. */
+ /* This term valid for shift amount < 31 only */
+
+ xer_ca
+ = binop(
+ Iop_And64,
+ binop(Iop_Sar64, argL, mkU8(31)),
+ binop( Iop_And64,
+ argL,
+ binop( Iop_Sub64,
+ binop(Iop_Shl64, mkU64(1),
+ unop(Iop_64to8,argR)),
+ mkU64(1) )
+ )
+ );
+ xer_ca
+ = IRExpr_Mux0X(
+ /* shift amt > 31 ? */
+ unop(Iop_1Uto8, binop(Iop_CmpLT64U, mkU64(31), argR)),
+ /* no -- be like srawi */
+ unop(Iop_1Uto32, binop(Iop_CmpNE64, xer_ca, mkU64(0))),
+ /* yes -- get sign bit of argL */
+ unop(Iop_64to32, binop(Iop_Shr64, argL, mkU8(63)))
+ );
+ break;
+
+ case /* 11 */ PPCG_FLAG_OP_SRAWI:
+ /* xer_ca is 1 iff src was negative and bits_shifted_out != 0.
+ Since the shift amount is known to be in the range 0 .. 31
+ inclusive the following seems viable:
+ xer.ca == 1 iff the following is nonzero:
+ (argL >>s 31) -- either all 0s or all 1s
+ & (argL & (1<<argR)-1) -- the stuff shifted out */
+
+ xer_ca
+ = binop(
+ Iop_And64,
+ binop(Iop_Sar64, argL, mkU8(31)),
+ binop( Iop_And64,
+ argL,
+ binop( Iop_Sub64,
+ binop(Iop_Shl64, mkU64(1),
+ unop(Iop_64to8,argR)),
+ mkU64(1) )
+ )
+ );
+ xer_ca
+ = unop(Iop_1Uto32, binop(Iop_CmpNE64, xer_ca, mkU64(0)));
+ break;
+
+
+ case /* 12 */ PPCG_FLAG_OP_SRAD:
+ /* The shift amount is guaranteed to be in 0 .. 63 inclusive.
+ If it is <= 63, behave like SRADI; else XER.CA is the sign
+ bit of argL. */
+ /* This term valid for shift amount < 63 only */
+
+ xer_ca
+ = binop(
+ Iop_And64,
+ binop(Iop_Sar64, argL, mkU8(63)),
+ binop( Iop_And64,
+ argL,
+ binop( Iop_Sub64,
+ binop(Iop_Shl64, mkU64(1),
+ unop(Iop_64to8,argR)),
+ mkU64(1) )
+ )
+ );
+ xer_ca
+ = IRExpr_Mux0X(
+ /* shift amt > 63 ? */
+ unop(Iop_1Uto8, binop(Iop_CmpLT64U, mkU64(63), argR)),
+ /* no -- be like sradi */
+ unop(Iop_1Uto32, binop(Iop_CmpNE64, xer_ca, mkU64(0))),
+ /* yes -- get sign bit of argL */
+ unop(Iop_64to32, binop(Iop_Shr64, argL, mkU8(63)))
+ );
+ break;
+
+
+ case /* 13 */ PPCG_FLAG_OP_SRADI:
+ /* xer_ca is 1 iff src was negative and bits_shifted_out != 0.
+ Since the shift amount is known to be in the range 0 .. 63
+ inclusive, the following seems viable:
+ xer.ca == 1 iff the following is nonzero:
+ (argL >>s 63) -- either all 0s or all 1s
+ & (argL & (1<<argR)-1) -- the stuff shifted out */
+
+ xer_ca
+ = binop(
+ Iop_And64,
+ binop(Iop_Sar64, argL, mkU8(63)),
+ binop( Iop_And64,
+ argL,
+ binop( Iop_Sub64,
+ binop(Iop_Shl64, mkU64(1),
+ unop(Iop_64to8,argR)),
+ mkU64(1) )
+ )
+ );
+ xer_ca
+ = unop(Iop_1Uto32, binop(Iop_CmpNE64, xer_ca, mkU64(0)));
+ break;
+
+ default:
+ vex_printf("set_XER_CA: op = %u\n", op);
+ vpanic("set_XER_CA(ppc64)");
+ }
+
+ /* xer_ca MUST denote either 0 or 1, no other value allowed */
+ putXER_CA( unop(Iop_32to8, xer_ca) );
+}
+
+static void set_XER_CA ( IRType ty, UInt op, IRExpr* res,
+ IRExpr* argL, IRExpr* argR, IRExpr* oldca )
+{
+ if (ty == Ity_I32)
+ set_XER_CA_32( op, res, argL, argR, oldca );
+ else
+ set_XER_CA_64( op, res, argL, argR, oldca );
+}
+
+
+
+/*------------------------------------------------------------*/
+/*--- Read/write to guest-state --- */
+/*------------------------------------------------------------*/
+
+static IRExpr* /* :: Ity_I32/64 */ getGST ( PPC_GST reg )
+{
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ switch (reg) {
+ case PPC_GST_SPRG3_RO:
+ return IRExpr_Get( OFFB_SPRG3_RO, ty );
+
+ case PPC_GST_CIA:
+ return IRExpr_Get( OFFB_CIA, ty );
+
+ case PPC_GST_LR:
+ return IRExpr_Get( OFFB_LR, ty );
+
+ case PPC_GST_CTR:
+ return IRExpr_Get( OFFB_CTR, ty );
+
+ case PPC_GST_VRSAVE:
+ return IRExpr_Get( OFFB_VRSAVE, Ity_I32 );
+
+ case PPC_GST_VSCR:
+ return binop(Iop_And32, IRExpr_Get( OFFB_VSCR,Ity_I32 ),
+ mkU32(MASK_VSCR_VALID));
+
+ case PPC_GST_CR: {
+ /* Synthesise the entire CR into a single word. Expensive. */
+# define FIELD(_n) \
+ binop(Iop_Shl32, \
+ unop(Iop_8Uto32, \
+ binop(Iop_Or8, \
+ binop(Iop_And8, getCR321(_n), mkU8(7<<1)), \
+ binop(Iop_And8, getCR0(_n), mkU8(1)) \
+ ) \
+ ), \
+ mkU8(4 * (7-(_n))) \
+ )
+ return binop(Iop_Or32,
+ binop(Iop_Or32,
+ binop(Iop_Or32, FIELD(0), FIELD(1)),
+ binop(Iop_Or32, FIELD(2), FIELD(3))
+ ),
+ binop(Iop_Or32,
+ binop(Iop_Or32, FIELD(4), FIELD(5)),
+ binop(Iop_Or32, FIELD(6), FIELD(7))
+ )
+ );
+# undef FIELD
+ }
+
+ case PPC_GST_XER:
+ return binop(Iop_Or32,
+ binop(Iop_Or32,
+ binop( Iop_Shl32, getXER_SO32(), mkU8(31)),
+ binop( Iop_Shl32, getXER_OV32(), mkU8(30))),
+ binop(Iop_Or32,
+ binop( Iop_Shl32, getXER_CA32(), mkU8(29)),
+ getXER_BC32()));
+
+ default:
+ vex_printf("getGST(ppc): reg = %u", reg);
+ vpanic("getGST(ppc)");
+ }
+}
+
+/* Get a masked word from the given reg */
+static IRExpr* /* ::Ity_I32 */ getGST_masked ( PPC_GST reg, UInt mask )
+{
+ IRTemp val = newTemp(Ity_I32);
+ vassert( reg < PPC_GST_MAX );
+
+ switch (reg) {
+
+ case PPC_GST_FPSCR: {
+ /* Vex-generated code expects the FPSCR to be set as follows:
+ all exceptions masked, round-to-nearest.
+ This corresponds to a FPSCR value of 0x0. */
+
+ /* We're only keeping track of the rounding mode,
+ so if the mask isn't asking for this, just return 0x0 */
+ if (mask & (MASK_FPSCR_RN|MASK_FPSCR_FPRF)) {
+ assign( val, IRExpr_Get( OFFB_FPROUND, Ity_I32 ) );
+ } else {
+ assign( val, mkU32(0x0) );
+ }
+ break;
+ }
+
+ default:
+ vex_printf("getGST_masked(ppc): reg = %u", reg);
+ vpanic("getGST_masked(ppc)");
+ }
+
+ if (mask != 0xFFFFFFFF) {
+ return binop(Iop_And32, mkexpr(val), mkU32(mask));
+ } else {
+ return mkexpr(val);
+ }
+}
+
+/* Fetch the specified REG[FLD] nibble (as per IBM/hardware notation)
+ and return it at the bottom of an I32; the top 27 bits are
+ guaranteed to be zero. */
+static IRExpr* /* ::Ity_I32 */ getGST_field ( PPC_GST reg, UInt fld )
+{
+ UInt shft, mask;
+
+ vassert( fld < 8 );
+ vassert( reg < PPC_GST_MAX );
+
+ shft = 4*(7-fld);
+ mask = 0xF<<shft;
+
+ switch (reg) {
+ case PPC_GST_XER:
+ vassert(fld ==7);
+ return binop(Iop_Or32,
+ binop(Iop_Or32,
+ binop(Iop_Shl32, getXER_SO32(), mkU8(3)),
+ binop(Iop_Shl32, getXER_OV32(), mkU8(2))),
+ binop( Iop_Shl32, getXER_CA32(), mkU8(1)));
+ break;
+
+ default:
+ if (shft == 0)
+ return getGST_masked( reg, mask );
+ else
+ return binop(Iop_Shr32,
+ getGST_masked( reg, mask ),
+ mkU8(toUChar( shft )));
+ }
+}
+
+static void putGST ( PPC_GST reg, IRExpr* src )
+{
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ IRType ty_src = typeOfIRExpr(irsb->tyenv,src );
+ vassert( reg < PPC_GST_MAX );
+ switch (reg) {
+ case PPC_GST_IP_AT_SYSCALL:
+ vassert( ty_src == ty );
+ stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL, src ) );
+ break;
+ case PPC_GST_CIA:
+ vassert( ty_src == ty );
+ stmt( IRStmt_Put( OFFB_CIA, src ) );
+ break;
+ case PPC_GST_LR:
+ vassert( ty_src == ty );
+ stmt( IRStmt_Put( OFFB_LR, src ) );
+ break;
+ case PPC_GST_CTR:
+ vassert( ty_src == ty );
+ stmt( IRStmt_Put( OFFB_CTR, src ) );
+ break;
+ case PPC_GST_VRSAVE:
+ vassert( ty_src == Ity_I32 );
+ stmt( IRStmt_Put( OFFB_VRSAVE,src));
+ break;
+ case PPC_GST_VSCR:
+ vassert( ty_src == Ity_I32 );
+ stmt( IRStmt_Put( OFFB_VSCR,
+ binop(Iop_And32, src,
+ mkU32(MASK_VSCR_VALID)) ) );
+ break;
+ case PPC_GST_XER:
+ vassert( ty_src == Ity_I32 );
+ putXER_SO( unop(Iop_32to8, binop(Iop_Shr32, src, mkU8(31))) );
+ putXER_OV( unop(Iop_32to8, binop(Iop_Shr32, src, mkU8(30))) );
+ putXER_CA( unop(Iop_32to8, binop(Iop_Shr32, src, mkU8(29))) );
+ putXER_BC( unop(Iop_32to8, src) );
+ break;
+
+ case PPC_GST_EMWARN:
+ vassert( ty_src == Ity_I32 );
+ stmt( IRStmt_Put( OFFB_EMWARN,src) );
+ break;
+
+ case PPC_GST_TISTART:
+ vassert( ty_src == ty );
+ stmt( IRStmt_Put( OFFB_TISTART, src) );
+ break;
+
+ case PPC_GST_TILEN:
+ vassert( ty_src == ty );
+ stmt( IRStmt_Put( OFFB_TILEN, src) );
+ break;
+
+ default:
+ vex_printf("putGST(ppc): reg = %u", reg);
+ vpanic("putGST(ppc)");
+ }
+}
+
+/* Write masked src to the given reg */
+static void putGST_masked ( PPC_GST reg, IRExpr* src, UInt mask )
+{
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ vassert( reg < PPC_GST_MAX );
+ vassert( typeOfIRExpr(irsb->tyenv,src ) == Ity_I32 );
+
+ switch (reg) {
+ case PPC_GST_FPSCR: {
+ /* Allow writes to Rounding Mode */
+ if (mask & (MASK_FPSCR_RN|MASK_FPSCR_FPRF)) {
+ /* construct new fpround from new and old values as per mask:
+ new fpround = (src & (3 & mask)) | (fpround & (3 & ~mask)) */
+ stmt(
+ IRStmt_Put(
+ OFFB_FPROUND,
+ binop(
+ Iop_Or32,
+ binop(Iop_And32, src, mkU32((MASK_FPSCR_RN|MASK_FPSCR_FPRF) & mask)),
+ binop(
+ Iop_And32,
+ IRExpr_Get(OFFB_FPROUND,Ity_I32),
+ mkU32((MASK_FPSCR_RN|MASK_FPSCR_FPRF) & ~mask)
+ )
+ )
+ )
+ );
+ }
+
+ /* Give EmWarn for attempted writes to:
+ - Exception Controls
+ - Non-IEEE Mode
+ */
+ if (mask & 0xFC) { // Exception Control, Non-IEE mode
+ VexEmWarn ew = EmWarn_PPCexns;
+
+ /* If any of the src::exception_control bits are actually set,
+ side-exit to the next insn, reporting the warning,
+ so that Valgrind's dispatcher sees the warning. */
+ putGST( PPC_GST_EMWARN, mkU32(ew) );
+ stmt(
+ IRStmt_Exit(
+ binop(Iop_CmpNE32, mkU32(ew), mkU32(EmWarn_NONE)),
+ Ijk_EmWarn,
+ mkSzConst( ty, nextInsnAddr()) ));
+ }
+
+ /* Ignore all other writes */
+ break;
+ }
+
+ default:
+ vex_printf("putGST_masked(ppc): reg = %u", reg);
+ vpanic("putGST_masked(ppc)");
+ }
+}
+
+/* Write the least significant nibble of src to the specified
+ REG[FLD] (as per IBM/hardware notation). */
+static void putGST_field ( PPC_GST reg, IRExpr* src, UInt fld )
+{
+ UInt shft, mask;
+
+ vassert( typeOfIRExpr(irsb->tyenv,src ) == Ity_I32 );
+ vassert( fld < 8 );
+ vassert( reg < PPC_GST_MAX );
+
+ shft = 4*(7-fld);
+ mask = 0xF<<shft;
+
+ switch (reg) {
+ case PPC_GST_CR:
+ putCR0 (fld, binop(Iop_And8, mkU8(1 ), unop(Iop_32to8, src)));
+ putCR321(fld, binop(Iop_And8, mkU8(7<<1), unop(Iop_32to8, src)));
+ break;
+
+ default:
+ if (shft == 0) {
+ putGST_masked( reg, src, mask );
+ } else {
+ putGST_masked( reg,
+ binop(Iop_Shl32, src, mkU8(toUChar(shft))),
+ mask );
+ }
+ }
+}
+
+
+
+/*------------------------------------------------------------*/
+/*--- Integer Instruction Translation --- */
+/*------------------------------------------------------------*/
+
+/*
+ Integer Arithmetic Instructions
+*/
+static Bool dis_int_arith ( UInt theInstr )
+{
+ /* D-Form, XO-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar rD_addr = ifieldRegDS(theInstr);
+ UChar rA_addr = ifieldRegA(theInstr);
+ UInt uimm16 = ifieldUIMM16(theInstr);
+ UChar rB_addr = ifieldRegB(theInstr);
+ UChar flag_OE = ifieldBIT10(theInstr);
+ UInt opc2 = ifieldOPClo9(theInstr);
+ UChar flag_rC = ifieldBIT0(theInstr);
+
+ Long simm16 = extend_s_16to64(uimm16);
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ IRTemp rA = newTemp(ty);
+ IRTemp rB = newTemp(ty);
+ IRTemp rD = newTemp(ty);
+
+ Bool do_rc = False;
+
+ assign( rA, getIReg(rA_addr) );
+ assign( rB, getIReg(rB_addr) ); // XO-Form: rD, rA, rB
+
+ switch (opc1) {
+ /* D-Form */
+ case 0x0C: // addic (Add Immediate Carrying, PPC32 p351
+ DIP("addic r%u,r%u,%d\n", rD_addr, rA_addr, (Int)simm16);
+ assign( rD, binop( mkSzOp(ty, Iop_Add8), mkexpr(rA),
+ mkSzExtendS16(ty, uimm16) ) );
+ set_XER_CA( ty, PPCG_FLAG_OP_ADD,
+ mkexpr(rD), mkexpr(rA), mkSzExtendS16(ty, uimm16),
+ mkSzImm(ty, 0)/*old xer.ca, which is ignored*/ );
+ break;
+
+ case 0x0D: // addic. (Add Immediate Carrying and Record, PPC32 p352)
+ DIP("addic. r%u,r%u,%d\n", rD_addr, rA_addr, (Int)simm16);
+ assign( rD, binop( mkSzOp(ty, Iop_Add8), mkexpr(rA),
+ mkSzExtendS16(ty, uimm16) ) );
+ set_XER_CA( ty, PPCG_FLAG_OP_ADD,
+ mkexpr(rD), mkexpr(rA), mkSzExtendS16(ty, uimm16),
+ mkSzImm(ty, 0)/*old xer.ca, which is ignored*/ );
+ do_rc = True; // Always record to CR
+ flag_rC = 1;
+ break;
+
+ case 0x0E: // addi (Add Immediate, PPC32 p350)
+ // li rD,val == addi rD,0,val
+ // la disp(rA) == addi rD,rA,disp
+ if ( rA_addr == 0 ) {
+ DIP("li r%u,%d\n", rD_addr, (Int)simm16);
+ assign( rD, mkSzExtendS16(ty, uimm16) );
+ } else {
+ DIP("addi r%u,r%u,%d\n", rD_addr, rA_addr, (Int)simm16);
+ assign( rD, binop( mkSzOp(ty, Iop_Add8), mkexpr(rA),
+ mkSzExtendS16(ty, uimm16) ) );
+ }
+ break;
+
+ case 0x0F: // addis (Add Immediate Shifted, PPC32 p353)
+ // lis rD,val == addis rD,0,val
+ if ( rA_addr == 0 ) {
+ DIP("lis r%u,%d\n", rD_addr, (Int)simm16);
+ assign( rD, mkSzExtendS32(ty, uimm16 << 16) );
+ } else {
+ DIP("addis r%u,r%u,0x%x\n", rD_addr, rA_addr, (Int)simm16);
+ assign( rD, binop( mkSzOp(ty, Iop_Add8), mkexpr(rA),
+ mkSzExtendS32(ty, uimm16 << 16) ) );
+ }
+ break;
+
+ case 0x07: // mulli (Multiply Low Immediate, PPC32 p490)
+ DIP("mulli r%u,r%u,%d\n", rD_addr, rA_addr, (Int)simm16);
+ if (mode64)
+ assign( rD, unop(Iop_128to64,
+ binop(Iop_MullS64, mkexpr(rA),
+ mkSzExtendS16(ty, uimm16))) );
+ else
+ assign( rD, unop(Iop_64to32,
+ binop(Iop_MullS32, mkexpr(rA),
+ mkSzExtendS16(ty, uimm16))) );
+ break;
+
+ case 0x08: // subfic (Subtract from Immediate Carrying, PPC32 p540)
+ DIP("subfic r%u,r%u,%d\n", rD_addr, rA_addr, (Int)simm16);
+ // rD = simm16 - rA
+ assign( rD, binop( mkSzOp(ty, Iop_Sub8),
+ mkSzExtendS16(ty, uimm16),
+ mkexpr(rA)) );
+ set_XER_CA( ty, PPCG_FLAG_OP_SUBFI,
+ mkexpr(rD), mkexpr(rA), mkSzExtendS16(ty, uimm16),
+ mkSzImm(ty, 0)/*old xer.ca, which is ignored*/ );
+ break;
+
+ /* XO-Form */
+ case 0x1F:
+ do_rc = True; // All below record to CR
+
+ switch (opc2) {
+ case 0x10A: // add (Add, PPC32 p347)
+ DIP("add%s%s r%u,r%u,r%u\n",
+ flag_OE ? "o" : "", flag_rC ? ".":"",
+ rD_addr, rA_addr, rB_addr);
+ assign( rD, binop( mkSzOp(ty, Iop_Add8),
+ mkexpr(rA), mkexpr(rB) ) );
+ if (flag_OE) {
+ set_XER_OV( ty, PPCG_FLAG_OP_ADD,
+ mkexpr(rD), mkexpr(rA), mkexpr(rB) );
+ }
+ break;
+
+ case 0x00A: // addc (Add Carrying, PPC32 p348)
+ DIP("addc%s%s r%u,r%u,r%u\n",
+ flag_OE ? "o" : "", flag_rC ? ".":"",
+ rD_addr, rA_addr, rB_addr);
+ assign( rD, binop( mkSzOp(ty, Iop_Add8),
+ mkexpr(rA), mkexpr(rB)) );
+ set_XER_CA( ty, PPCG_FLAG_OP_ADD,
+ mkexpr(rD), mkexpr(rA), mkexpr(rB),
+ mkSzImm(ty, 0)/*old xer.ca, which is ignored*/ );
+ if (flag_OE) {
+ set_XER_OV( ty, PPCG_FLAG_OP_ADD,
+ mkexpr(rD), mkexpr(rA), mkexpr(rB) );
+ }
+ break;
+
+ case 0x08A: { // adde (Add Extended, PPC32 p349)
+ IRTemp old_xer_ca = newTemp(ty);
+ DIP("adde%s%s r%u,r%u,r%u\n",
+ flag_OE ? "o" : "", flag_rC ? ".":"",
+ rD_addr, rA_addr, rB_addr);
+ // rD = rA + rB + XER[CA]
+ assign( old_xer_ca, mkWidenFrom32(ty, getXER_CA32(), False) );
+ assign( rD, binop( mkSzOp(ty, Iop_Add8), mkexpr(rA),
+ binop( mkSzOp(ty, Iop_Add8),
+ mkexpr(rB), mkexpr(old_xer_ca))) );
+ set_XER_CA( ty, PPCG_FLAG_OP_ADDE,
+ mkexpr(rD), mkexpr(rA), mkexpr(rB),
+ mkexpr(old_xer_ca) );
+ if (flag_OE) {
+ set_XER_OV( ty, PPCG_FLAG_OP_ADDE,
+ mkexpr(rD), mkexpr(rA), mkexpr(rB) );
+ }
+ break;
+ }
+
+ case 0x0EA: { // addme (Add to Minus One Extended, PPC32 p354)
+ IRTemp old_xer_ca = newTemp(ty);
+ IRExpr *min_one;
+ if (rB_addr != 0) {
+ vex_printf("dis_int_arith(ppc)(addme,rB_addr)\n");
+ return False;
+ }
+ DIP("addme%s%s r%u,r%u,r%u\n",
+ flag_OE ? "o" : "", flag_rC ? ".":"",
+ rD_addr, rA_addr, rB_addr);
+ // rD = rA + (-1) + XER[CA]
+ // => Just another form of adde
+ assign( old_xer_ca, mkWidenFrom32(ty, getXER_CA32(), False) );
+ min_one = mkSzImm(ty, (Long)-1);
+ assign( rD, binop( mkSzOp(ty, Iop_Add8), mkexpr(rA),
+ binop( mkSzOp(ty, Iop_Add8),
+ min_one, mkexpr(old_xer_ca)) ));
+ set_XER_CA( ty, PPCG_FLAG_OP_ADDE,
+ mkexpr(rD), mkexpr(rA), min_one,
+ mkexpr(old_xer_ca) );
+ if (flag_OE) {
+ set_XER_OV( ty, PPCG_FLAG_OP_ADDE,
+ mkexpr(rD), mkexpr(rA), min_one );
+ }
+ break;
+ }
+
+ case 0x0CA: { // addze (Add to Zero Extended, PPC32 p355)
+ IRTemp old_xer_ca = newTemp(ty);
+ if (rB_addr != 0) {
+ vex_printf("dis_int_arith(ppc)(addze,rB_addr)\n");
+ return False;
+ }
+ DIP("addze%s%s r%u,r%u,r%u\n",
+ flag_OE ? "o" : "", flag_rC ? ".":"",
+ rD_addr, rA_addr, rB_addr);
+ // rD = rA + (0) + XER[CA]
+ // => Just another form of adde
+ assign( old_xer_ca, mkWidenFrom32(ty, getXER_CA32(), False) );
+ assign( rD, binop( mkSzOp(ty, Iop_Add8),
+ mkexpr(rA), mkexpr(old_xer_ca)) );
+ set_XER_CA( ty, PPCG_FLAG_OP_ADDE,
+ mkexpr(rD), mkexpr(rA), mkSzImm(ty, 0),
+ mkexpr(old_xer_ca) );
+ if (flag_OE) {
+ set_XER_OV( ty, PPCG_FLAG_OP_ADDE,
+ mkexpr(rD), mkexpr(rA), mkSzImm(ty, 0) );
+ }
+ break;
+ }
+
+ case 0x1EB: // divw (Divide Word, PPC32 p388)
+ DIP("divw%s%s r%u,r%u,r%u\n",
+ flag_OE ? "o" : "", flag_rC ? ".":"",
+ rD_addr, rA_addr, rB_addr);
+ if (mode64) {
+ /* Note:
+ XER settings are mode independent, and reflect the
+ overflow of the low-order 32bit result
+ CR0[LT|GT|EQ] are undefined if flag_rC && mode64
+ */
+ /* rD[hi32] are undefined: setting them to sign of lo32
+ - makes set_CR0 happy */
+ IRExpr* dividend = mk64lo32Sto64( mkexpr(rA) );
+ IRExpr* divisor = mk64lo32Sto64( mkexpr(rB) );
+ assign( rD, mk64lo32Uto64( binop(Iop_DivS64, dividend,
+ divisor) ) );
+ if (flag_OE) {
+ set_XER_OV( ty, PPCG_FLAG_OP_DIVW,
+ mkexpr(rD), dividend, divisor );
+ }
+ } else {
+ assign( rD, binop(Iop_DivS32, mkexpr(rA), mkexpr(rB)) );
+ if (flag_OE) {
+ set_XER_OV( ty, PPCG_FLAG_OP_DIVW,
+ mkexpr(rD), mkexpr(rA), mkexpr(rB) );
+ }
+ }
+ /* Note:
+ if (0x8000_0000 / -1) or (x / 0)
+ => rD=undef, if(flag_rC) CR7=undef, if(flag_OE) XER_OV=1
+ => But _no_ exception raised. */
+ break;
+
+ case 0x1CB: // divwu (Divide Word Unsigned, PPC32 p389)
+ DIP("divwu%s%s r%u,r%u,r%u\n",
+ flag_OE ? "o" : "", flag_rC ? ".":"",
+ rD_addr, rA_addr, rB_addr);
+ if (mode64) {
+ /* Note:
+ XER settings are mode independent, and reflect the
+ overflow of the low-order 32bit result
+ CR0[LT|GT|EQ] are undefined if flag_rC && mode64
+ */
+ IRExpr* dividend = mk64lo32Uto64( mkexpr(rA) );
+ IRExpr* divisor = mk64lo32Uto64( mkexpr(rB) );
+ assign( rD, mk64lo32Uto64( binop(Iop_DivU64, dividend,
+ divisor) ) );
+ if (flag_OE) {
+ set_XER_OV( ty, PPCG_FLAG_OP_DIVWU,
+ mkexpr(rD), dividend, divisor );
+ }
+ } else {
+ assign( rD, binop(Iop_DivU32, mkexpr(rA), mkexpr(rB)) );
+ if (flag_OE) {
+ set_XER_OV( ty, PPCG_FLAG_OP_DIVWU,
+ mkexpr(rD), mkexpr(rA), mkexpr(rB) );
+ }
+ }
+ /* Note: ditto comment divw, for (x / 0) */
+ break;
+
+ case 0x04B: // mulhw (Multiply High Word, PPC32 p488)
+ if (flag_OE != 0) {
+ vex_printf("dis_int_arith(ppc)(mulhw,flag_OE)\n");
+ return False;
+ }
+ DIP("mulhw%s r%u,r%u,r%u\n", flag_rC ? ".":"",
+ rD_addr, rA_addr, rB_addr);
+ if (mode64) {
+ /* rD[hi32] are undefined: setting them to sign of lo32
+ - makes set_CR0 happy */
+ assign( rD, binop(Iop_Sar64,
+ binop(Iop_Mul64,
+ mk64lo32Sto64( mkexpr(rA) ),
+ mk64lo32Sto64( mkexpr(rB) )),
+ mkU8(32)) );
+ } else {
+ assign( rD, unop(Iop_64HIto32,
+ binop(Iop_MullS32,
+ mkexpr(rA), mkexpr(rB))) );
+ }
+ break;
+
+ case 0x00B: // mulhwu (Multiply High Word Unsigned, PPC32 p489)
+ if (flag_OE != 0) {
+ vex_printf("dis_int_arith(ppc)(mulhwu,flag_OE)\n");
+ return False;
+ }
+ DIP("mulhwu%s r%u,r%u,r%u\n", flag_rC ? ".":"",
+ rD_addr, rA_addr, rB_addr);
+ if (mode64) {
+ /* rD[hi32] are undefined: setting them to sign of lo32
+ - makes set_CR0 happy */
+ assign( rD, binop(Iop_Sar64,
+ binop(Iop_Mul64,
+ mk64lo32Uto64( mkexpr(rA) ),
+ mk64lo32Uto64( mkexpr(rB) ) ),
+ mkU8(32)) );
+ } else {
+ assign( rD, unop(Iop_64HIto32,
+ binop(Iop_MullU32,
+ mkexpr(rA), mkexpr(rB))) );
+ }
+ break;
+
+ case 0x0EB: // mullw (Multiply Low Word, PPC32 p491)
+ DIP("mullw%s%s r%u,r%u,r%u\n",
+ flag_OE ? "o" : "", flag_rC ? ".":"",
+ rD_addr, rA_addr, rB_addr);
+ if (mode64) {
+ /* rD[hi32] are undefined: setting them to sign of lo32
+ - set_XER_OV() and set_CR0() depend on this */
+ IRExpr *a = unop(Iop_64to32, mkexpr(rA) );
+ IRExpr *b = unop(Iop_64to32, mkexpr(rB) );
+ assign( rD, binop(Iop_MullS32, a, b) );
+ if (flag_OE) {
+ set_XER_OV( ty, PPCG_FLAG_OP_MULLW,
+ mkexpr(rD),
+ unop(Iop_32Uto64, a), unop(Iop_32Uto64, b) );
+ }
+ } else {
+ assign( rD, unop(Iop_64to32,
+ binop(Iop_MullU32,
+ mkexpr(rA), mkexpr(rB))) );
+ if (flag_OE) {
+ set_XER_OV( ty, PPCG_FLAG_OP_MULLW,
+ mkexpr(rD), mkexpr(rA), mkexpr(rB) );
+ }
+ }
+ break;
+
+ case 0x068: // neg (Negate, PPC32 p493)
+ if (rB_addr != 0) {
+ vex_printf("dis_int_arith(ppc)(neg,rB_addr)\n");
+ return False;
+ }
+ DIP("neg%s%s r%u,r%u\n",
+ flag_OE ? "o" : "", flag_rC ? ".":"",
+ rD_addr, rA_addr);
+ // rD = (~rA) + 1
+ assign( rD, binop( mkSzOp(ty, Iop_Add8),
+ unop( mkSzOp(ty, Iop_Not8), mkexpr(rA) ),
+ mkSzImm(ty, 1)) );
+ if (flag_OE) {
+ set_XER_OV( ty, PPCG_FLAG_OP_NEG,
+ mkexpr(rD), mkexpr(rA), mkexpr(rB) );
+ }
+ break;
+
+ case 0x028: // subf (Subtract From, PPC32 p537)
+ DIP("subf%s%s r%u,r%u,r%u\n",
+ flag_OE ? "o" : "", flag_rC ? ".":"",
+ rD_addr, rA_addr, rB_addr);
+ // rD = rB - rA
+ assign( rD, binop( mkSzOp(ty, Iop_Sub8),
+ mkexpr(rB), mkexpr(rA)) );
+ if (flag_OE) {
+ set_XER_OV( ty, PPCG_FLAG_OP_SUBF,
+ mkexpr(rD), mkexpr(rA), mkexpr(rB) );
+ }
+ break;
+
+ case 0x008: // subfc (Subtract from Carrying, PPC32 p538)
+ DIP("subfc%s%s r%u,r%u,r%u\n",
+ flag_OE ? "o" : "", flag_rC ? ".":"",
+ rD_addr, rA_addr, rB_addr);
+ // rD = rB - rA
+ assign( rD, binop( mkSzOp(ty, Iop_Sub8),
+ mkexpr(rB), mkexpr(rA)) );
+ set_XER_CA( ty, PPCG_FLAG_OP_SUBFC,
+ mkexpr(rD), mkexpr(rA), mkexpr(rB),
+ mkSzImm(ty, 0)/*old xer.ca, which is ignored*/ );
+ if (flag_OE) {
+ set_XER_OV( ty, PPCG_FLAG_OP_SUBFC,
+ mkexpr(rD), mkexpr(rA), mkexpr(rB) );
+ }
+ break;
+
+ case 0x088: {// subfe (Subtract from Extended, PPC32 p539)
+ IRTemp old_xer_ca = newTemp(ty);
+ DIP("subfe%s%s r%u,r%u,r%u\n",
+ flag_OE ? "o" : "", flag_rC ? ".":"",
+ rD_addr, rA_addr, rB_addr);
+ // rD = (log not)rA + rB + XER[CA]
+ assign( old_xer_ca, mkWidenFrom32(ty, getXER_CA32(), False) );
+ assign( rD, binop( mkSzOp(ty, Iop_Add8),
+ unop( mkSzOp(ty, Iop_Not8), mkexpr(rA)),
+ binop( mkSzOp(ty, Iop_Add8),
+ mkexpr(rB), mkexpr(old_xer_ca))) );
+ set_XER_CA( ty, PPCG_FLAG_OP_SUBFE,
+ mkexpr(rD), mkexpr(rA), mkexpr(rB),
+ mkexpr(old_xer_ca) );
+ if (flag_OE) {
+ set_XER_OV( ty, PPCG_FLAG_OP_SUBFE,
+ mkexpr(rD), mkexpr(rA), mkexpr(rB) );
+ }
+ break;
+ }
+
+ case 0x0E8: { // subfme (Subtract from -1 Extended, PPC32 p541)
+ IRTemp old_xer_ca = newTemp(ty);
+ IRExpr *min_one;
+ if (rB_addr != 0) {
+ vex_printf("dis_int_arith(ppc)(subfme,rB_addr)\n");
+ return False;
+ }
+ DIP("subfme%s%s r%u,r%u\n",
+ flag_OE ? "o" : "", flag_rC ? ".":"",
+ rD_addr, rA_addr);
+ // rD = (log not)rA + (-1) + XER[CA]
+ // => Just another form of subfe
+ assign( old_xer_ca, mkWidenFrom32(ty, getXER_CA32(), False) );
+ min_one = mkSzImm(ty, (Long)-1);
+ assign( rD, binop( mkSzOp(ty, Iop_Add8),
+ unop( mkSzOp(ty, Iop_Not8), mkexpr(rA)),
+ binop( mkSzOp(ty, Iop_Add8),
+ min_one, mkexpr(old_xer_ca))) );
+ set_XER_CA( ty, PPCG_FLAG_OP_SUBFE,
+ mkexpr(rD), mkexpr(rA), min_one,
+ mkexpr(old_xer_ca) );
+ if (flag_OE) {
+ set_XER_OV( ty, PPCG_FLAG_OP_SUBFE,
+ mkexpr(rD), mkexpr(rA), min_one );
+ }
+ break;
+ }
+
+ case 0x0C8: { // subfze (Subtract from Zero Extended, PPC32 p542)
+ IRTemp old_xer_ca = newTemp(ty);
+ if (rB_addr != 0) {
+ vex_printf("dis_int_arith(ppc)(subfze,rB_addr)\n");
+ return False;
+ }
+ DIP("subfze%s%s r%u,r%u\n",
+ flag_OE ? "o" : "", flag_rC ? ".":"",
+ rD_addr, rA_addr);
+ // rD = (log not)rA + (0) + XER[CA]
+ // => Just another form of subfe
+ assign( old_xer_ca, mkWidenFrom32(ty, getXER_CA32(), False) );
+ assign( rD, binop( mkSzOp(ty, Iop_Add8),
+ unop( mkSzOp(ty, Iop_Not8),
+ mkexpr(rA)), mkexpr(old_xer_ca)) );
+ set_XER_CA( ty, PPCG_FLAG_OP_SUBFE,
+ mkexpr(rD), mkexpr(rA), mkSzImm(ty, 0),
+ mkexpr(old_xer_ca) );
+ if (flag_OE) {
+ set_XER_OV( ty, PPCG_FLAG_OP_SUBFE,
+ mkexpr(rD), mkexpr(rA), mkSzImm(ty, 0) );
+ }
+ break;
+ }
+
+
+ /* 64bit Arithmetic */
+ case 0x49: // mulhd (Multiply High DWord, PPC64 p539)
+ if (flag_OE != 0) {
+ vex_printf("dis_int_arith(ppc)(mulhd,flagOE)\n");
+ return False;
+ }
+ DIP("mulhd%s r%u,r%u,r%u\n", flag_rC ? ".":"",
+ rD_addr, rA_addr, rB_addr);
+ assign( rD, unop(Iop_128HIto64,
+ binop(Iop_MullS64,
+ mkexpr(rA), mkexpr(rB))) );
+
+ break;
+
+ case 0x9: // mulhdu (Multiply High DWord Unsigned, PPC64 p540)
+ if (flag_OE != 0) {
+ vex_printf("dis_int_arith(ppc)(mulhdu,flagOE)\n");
+ return False;
+ }
+ DIP("mulhdu%s r%u,r%u,r%u\n", flag_rC ? ".":"",
+ rD_addr, rA_addr, rB_addr);
+ assign( rD, unop(Iop_128HIto64,
+ binop(Iop_MullU64,
+ mkexpr(rA), mkexpr(rB))) );
+ break;
+
+ case 0xE9: // mulld (Multiply Low DWord, PPC64 p543)
+ DIP("mulld%s%s r%u,r%u,r%u\n",
+ flag_OE ? "o" : "", flag_rC ? ".":"",
+ rD_addr, rA_addr, rB_addr);
+ assign( rD, binop(Iop_Mul64, mkexpr(rA), mkexpr(rB)) );
+ if (flag_OE) {
+ set_XER_OV( ty, PPCG_FLAG_OP_MULLW,
+ mkexpr(rD), mkexpr(rA), mkexpr(rB) );
+ }
+ break;
+
+ case 0x1E9: // divd (Divide DWord, PPC64 p419)
+ DIP("divd%s%s r%u,r%u,r%u\n",
+ flag_OE ? "o" : "", flag_rC ? ".":"",
+ rD_addr, rA_addr, rB_addr);
+ assign( rD, binop(Iop_DivS64, mkexpr(rA), mkexpr(rB)) );
+ if (flag_OE) {
+ set_XER_OV( ty, PPCG_FLAG_OP_DIVW,
+ mkexpr(rD), mkexpr(rA), mkexpr(rB) );
+ }
+ break;
+ /* Note:
+ if (0x8000_0000_0000_0000 / -1) or (x / 0)
+ => rD=undef, if(flag_rC) CR7=undef, if(flag_OE) XER_OV=1
+ => But _no_ exception raised. */
+
+ case 0x1C9: // divdu (Divide DWord Unsigned, PPC64 p420)
+ DIP("divdu%s%s r%u,r%u,r%u\n",
+ flag_OE ? "o" : "", flag_rC ? ".":"",
+ rD_addr, rA_addr, rB_addr);
+ assign( rD, binop(Iop_DivU64, mkexpr(rA), mkexpr(rB)) );
+ if (flag_OE) {
+ set_XER_OV( ty, PPCG_FLAG_OP_DIVWU,
+ mkexpr(rD), mkexpr(rA), mkexpr(rB) );
+ }
+ break;
+ /* Note: ditto comment divd, for (x / 0) */
+
+ default:
+ vex_printf("dis_int_arith(ppc)(opc2)\n");
+ return False;
+ }
+ break;
+
+ default:
+ vex_printf("dis_int_arith(ppc)(opc1)\n");
+ return False;
+ }
+
+ putIReg( rD_addr, mkexpr(rD) );
+
+ if (do_rc && flag_rC) {
+ set_CR0( mkexpr(rD) );
+ }
+ return True;
+}
+
+
+
+/*
+ Integer Compare Instructions
+*/
+static Bool dis_int_cmp ( UInt theInstr )
+{
+ /* D-Form, X-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar crfD = toUChar( IFIELD( theInstr, 23, 3 ) );
+ UChar b22 = toUChar( IFIELD( theInstr, 22, 1 ) );
+ UChar flag_L = toUChar( IFIELD( theInstr, 21, 1 ) );
+ UChar rA_addr = ifieldRegA(theInstr);
+ UInt uimm16 = ifieldUIMM16(theInstr);
+ UChar rB_addr = ifieldRegB(theInstr);
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar b0 = ifieldBIT0(theInstr);
+
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ IRExpr *a = getIReg(rA_addr);
+ IRExpr *b;
+
+ if (!mode64 && flag_L==1) { // L==1 invalid for 32 bit.
+ vex_printf("dis_int_cmp(ppc)(flag_L)\n");
+ return False;
+ }
+
+ if (b22 != 0) {
+ vex_printf("dis_int_cmp(ppc)(b22)\n");
+ return False;
+ }
+
+ switch (opc1) {
+ case 0x0B: // cmpi (Compare Immediate, PPC32 p368)
+ DIP("cmpi cr%u,%u,r%u,%d\n", crfD, flag_L, rA_addr,
+ (Int)extend_s_16to32(uimm16));
+ b = mkSzExtendS16( ty, uimm16 );
+ if (flag_L == 1) {
+ putCR321(crfD, unop(Iop_64to8, binop(Iop_CmpORD64S, a, b)));
+ } else {
+ a = mkNarrowTo32( ty, a );
+ b = mkNarrowTo32( ty, b );
+ putCR321(crfD, unop(Iop_32to8, binop(Iop_CmpORD32S, a, b)));
+ }
+ putCR0( crfD, getXER_SO() );
+ break;
+
+ case 0x0A: // cmpli (Compare Logical Immediate, PPC32 p370)
+ DIP("cmpli cr%u,%u,r%u,0x%x\n", crfD, flag_L, rA_addr, uimm16);
+ b = mkSzImm( ty, uimm16 );
+ if (flag_L == 1) {
+ putCR321(crfD, unop(Iop_64to8, binop(Iop_CmpORD64U, a, b)));
+ } else {
+ a = mkNarrowTo32( ty, a );
+ b = mkNarrowTo32( ty, b );
+ putCR321(crfD, unop(Iop_32to8, binop(Iop_CmpORD32U, a, b)));
+ }
+ putCR0( crfD, getXER_SO() );
+ break;
+
+ /* X Form */
+ case 0x1F:
+ if (b0 != 0) {
+ vex_printf("dis_int_cmp(ppc)(0x1F,b0)\n");
+ return False;
+ }
+ b = getIReg(rB_addr);
+
+ switch (opc2) {
+ case 0x000: // cmp (Compare, PPC32 p367)
+ DIP("cmp cr%u,%u,r%u,r%u\n", crfD, flag_L, rA_addr, rB_addr);
+ /* Comparing a reg with itself produces a result which
+ doesn't depend on the contents of the reg. Therefore
+ remove the false dependency, which has been known to cause
+ memcheck to produce false errors. */
+ if (rA_addr == rB_addr)
+ a = b = typeOfIRExpr(irsb->tyenv,a) == Ity_I64
+ ? mkU64(0) : mkU32(0);
+ if (flag_L == 1) {
+ putCR321(crfD, unop(Iop_64to8, binop(Iop_CmpORD64S, a, b)));
+ } else {
+ a = mkNarrowTo32( ty, a );
+ b = mkNarrowTo32( ty, b );
+ putCR321(crfD, unop(Iop_32to8,binop(Iop_CmpORD32S, a, b)));
+ }
+ putCR0( crfD, getXER_SO() );
+ break;
+
+ case 0x020: // cmpl (Compare Logical, PPC32 p369)
+ DIP("cmpl cr%u,%u,r%u,r%u\n", crfD, flag_L, rA_addr, rB_addr);
+ /* Comparing a reg with itself produces a result which
+ doesn't depend on the contents of the reg. Therefore
+ remove the false dependency, which has been known to cause
+ memcheck to produce false errors. */
+ if (rA_addr == rB_addr)
+ a = b = typeOfIRExpr(irsb->tyenv,a) == Ity_I64
+ ? mkU64(0) : mkU32(0);
+ if (flag_L == 1) {
+ putCR321(crfD, unop(Iop_64to8, binop(Iop_CmpORD64U, a, b)));
+ } else {
+ a = mkNarrowTo32( ty, a );
+ b = mkNarrowTo32( ty, b );
+ putCR321(crfD, unop(Iop_32to8, binop(Iop_CmpORD32U, a, b)));
+ }
+ putCR0( crfD, getXER_SO() );
+ break;
+
+ default:
+ vex_printf("dis_int_cmp(ppc)(opc2)\n");
+ return False;
+ }
+ break;
+
+ default:
+ vex_printf("dis_int_cmp(ppc)(opc1)\n");
+ return False;
+ }
+
+ return True;
+}
+
+
+/*
+ Integer Logical Instructions
+*/
+static Bool dis_int_logic ( UInt theInstr )
+{
+ /* D-Form, X-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar rS_addr = ifieldRegDS(theInstr);
+ UChar rA_addr = ifieldRegA(theInstr);
+ UInt uimm16 = ifieldUIMM16(theInstr);
+ UChar rB_addr = ifieldRegB(theInstr);
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar flag_rC = ifieldBIT0(theInstr);
+
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ IRTemp rS = newTemp(ty);
+ IRTemp rA = newTemp(ty);
+ IRTemp rB = newTemp(ty);
+ IRExpr* irx;
+ Bool do_rc = False;
+
+ assign( rS, getIReg(rS_addr) );
+ assign( rB, getIReg(rB_addr) );
+
+ switch (opc1) {
+ case 0x1C: // andi. (AND Immediate, PPC32 p358)
+ DIP("andi. r%u,r%u,0x%x\n", rA_addr, rS_addr, uimm16);
+ assign( rA, binop( mkSzOp(ty, Iop_And8), mkexpr(rS),
+ mkSzImm(ty, uimm16)) );
+ do_rc = True; // Always record to CR
+ flag_rC = 1;
+ break;
+
+ case 0x1D: // andis. (AND Immediate Shifted, PPC32 p359)
+ DIP("andis r%u,r%u,0x%x\n", rA_addr, rS_addr, uimm16);
+ assign( rA, binop( mkSzOp(ty, Iop_And8), mkexpr(rS),
+ mkSzImm(ty, uimm16 << 16)) );
+ do_rc = True; // Always record to CR
+ flag_rC = 1;
+ break;
+
+ case 0x18: // ori (OR Immediate, PPC32 p497)
+ DIP("ori r%u,r%u,0x%x\n", rA_addr, rS_addr, uimm16);
+ assign( rA, binop( mkSzOp(ty, Iop_Or8), mkexpr(rS),
+ mkSzImm(ty, uimm16)) );
+ break;
+
+ case 0x19: // oris (OR Immediate Shifted, PPC32 p498)
+ DIP("oris r%u,r%u,0x%x\n", rA_addr, rS_addr, uimm16);
+ assign( rA, binop( mkSzOp(ty, Iop_Or8), mkexpr(rS),
+ mkSzImm(ty, uimm16 << 16)) );
+ break;
+
+ case 0x1A: // xori (XOR Immediate, PPC32 p550)
+ DIP("xori r%u,r%u,0x%x\n", rA_addr, rS_addr, uimm16);
+ assign( rA, binop( mkSzOp(ty, Iop_Xor8), mkexpr(rS),
+ mkSzImm(ty, uimm16)) );
+ break;
+
+ case 0x1B: // xoris (XOR Immediate Shifted, PPC32 p551)
+ DIP("xoris r%u,r%u,0x%x\n", rA_addr, rS_addr, uimm16);
+ assign( rA, binop( mkSzOp(ty, Iop_Xor8), mkexpr(rS),
+ mkSzImm(ty, uimm16 << 16)) );
+ break;
+
+ /* X Form */
+ case 0x1F:
+ do_rc = True; // All below record to CR
+
+ switch (opc2) {
+ case 0x01C: // and (AND, PPC32 p356)
+ DIP("and%s r%u,r%u,r%u\n",
+ flag_rC ? ".":"", rA_addr, rS_addr, rB_addr);
+ assign(rA, binop( mkSzOp(ty, Iop_And8),
+ mkexpr(rS), mkexpr(rB)));
+ break;
+
+ case 0x03C: // andc (AND with Complement, PPC32 p357)
+ DIP("andc%s r%u,r%u,r%u\n",
+ flag_rC ? ".":"", rA_addr, rS_addr, rB_addr);
+ assign(rA, binop( mkSzOp(ty, Iop_And8), mkexpr(rS),
+ unop( mkSzOp(ty, Iop_Not8),
+ mkexpr(rB))));
+ break;
+
+ case 0x01A: { // cntlzw (Count Leading Zeros Word, PPC32 p371)
+ IRExpr* lo32;
+ if (rB_addr!=0) {
+ vex_printf("dis_int_logic(ppc)(cntlzw,rB_addr)\n");
+ return False;
+ }
+ DIP("cntlzw%s r%u,r%u\n",
+ flag_rC ? ".":"", rA_addr, rS_addr);
+
+ // mode64: count in low word only
+ lo32 = mode64 ? unop(Iop_64to32, mkexpr(rS)) : mkexpr(rS);
+
+ // Iop_Clz32 undefined for arg==0, so deal with that case:
+ irx = binop(Iop_CmpNE32, lo32, mkU32(0));
+ assign(rA, mkWidenFrom32(ty,
+ IRExpr_Mux0X( unop(Iop_1Uto8, irx),
+ mkU32(32),
+ unop(Iop_Clz32, lo32)),
+ False));
+
+ // TODO: alternatively: assign(rA, verbose_Clz32(rS));
+ break;
+ }
+
+ case 0x11C: // eqv (Equivalent, PPC32 p396)
+ DIP("eqv%s r%u,r%u,r%u\n",
+ flag_rC ? ".":"", rA_addr, rS_addr, rB_addr);
+ assign( rA, unop( mkSzOp(ty, Iop_Not8),
+ binop( mkSzOp(ty, Iop_Xor8),
+ mkexpr(rS), mkexpr(rB))) );
+ break;
+
+ case 0x3BA: // extsb (Extend Sign Byte, PPC32 p397
+ if (rB_addr!=0) {
+ vex_printf("dis_int_logic(ppc)(extsb,rB_addr)\n");
+ return False;
+ }
+ DIP("extsb%s r%u,r%u\n",
+ flag_rC ? ".":"", rA_addr, rS_addr);
+ if (mode64)
+ assign( rA, unop(Iop_8Sto64, unop(Iop_64to8, mkexpr(rS))) );
+ else
+ assign( rA, unop(Iop_8Sto32, unop(Iop_32to8, mkexpr(rS))) );
+ break;
+
+ case 0x39A: // extsh (Extend Sign Half Word, PPC32 p398)
+ if (rB_addr!=0) {
+ vex_printf("dis_int_logic(ppc)(extsh,rB_addr)\n");
+ return False;
+ }
+ DIP("extsh%s r%u,r%u\n",
+ flag_rC ? ".":"", rA_addr, rS_addr);
+ if (mode64)
+ assign( rA, unop(Iop_16Sto64,
+ unop(Iop_64to16, mkexpr(rS))) );
+ else
+ assign( rA, unop(Iop_16Sto32,
+ unop(Iop_32to16, mkexpr(rS))) );
+ break;
+
+ case 0x1DC: // nand (NAND, PPC32 p492)
+ DIP("nand%s r%u,r%u,r%u\n",
+ flag_rC ? ".":"", rA_addr, rS_addr, rB_addr);
+ assign( rA, unop( mkSzOp(ty, Iop_Not8),
+ binop( mkSzOp(ty, Iop_And8),
+ mkexpr(rS), mkexpr(rB))) );
+ break;
+
+ case 0x07C: // nor (NOR, PPC32 p494)
+ DIP("nor%s r%u,r%u,r%u\n",
+ flag_rC ? ".":"", rA_addr, rS_addr, rB_addr);
+ assign( rA, unop( mkSzOp(ty, Iop_Not8),
+ binop( mkSzOp(ty, Iop_Or8),
+ mkexpr(rS), mkexpr(rB))) );
+ break;
+
+ case 0x1BC: // or (OR, PPC32 p495)
+ if ((!flag_rC) && rS_addr == rB_addr) {
+ DIP("mr r%u,r%u\n", rA_addr, rS_addr);
+ assign( rA, mkexpr(rS) );
+ } else {
+ DIP("or%s r%u,r%u,r%u\n",
+ flag_rC ? ".":"", rA_addr, rS_addr, rB_addr);
+ assign( rA, binop( mkSzOp(ty, Iop_Or8),
+ mkexpr(rS), mkexpr(rB)) );
+ }
+ break;
+
+ case 0x19C: // orc (OR with Complement, PPC32 p496)
+ DIP("orc%s r%u,r%u,r%u\n",
+ flag_rC ? ".":"", rA_addr, rS_addr, rB_addr);
+ assign( rA, binop( mkSzOp(ty, Iop_Or8), mkexpr(rS),
+ unop(mkSzOp(ty, Iop_Not8), mkexpr(rB))));
+ break;
+
+ case 0x13C: // xor (XOR, PPC32 p549)
+ DIP("xor%s r%u,r%u,r%u\n",
+ flag_rC ? ".":"", rA_addr, rS_addr, rB_addr);
+ assign( rA, binop( mkSzOp(ty, Iop_Xor8),
+ mkexpr(rS), mkexpr(rB)) );
+ break;
+
+
+ /* 64bit Integer Logical Instructions */
+ case 0x3DA: // extsw (Extend Sign Word, PPC64 p430)
+ if (rB_addr!=0) {
+ vex_printf("dis_int_logic(ppc)(extsw,rB_addr)\n");
+ return False;
+ }
+ DIP("extsw%s r%u,r%u\n", flag_rC ? ".":"", rA_addr, rS_addr);
+ assign(rA, unop(Iop_32Sto64, unop(Iop_64to32, mkexpr(rS))));
+ break;
+
+ case 0x03A: // cntlzd (Count Leading Zeros DWord, PPC64 p401)
+ if (rB_addr!=0) {
+ vex_printf("dis_int_logic(ppc)(cntlzd,rB_addr)\n");
+ return False;
+ }
+ DIP("cntlzd%s r%u,r%u\n",
+ flag_rC ? ".":"", rA_addr, rS_addr);
+ // Iop_Clz64 undefined for arg==0, so deal with that case:
+ irx = binop(Iop_CmpNE64, mkexpr(rS), mkU64(0));
+ assign(rA, IRExpr_Mux0X( unop(Iop_1Uto8, irx),
+ mkU64(64),
+ unop(Iop_Clz64, mkexpr(rS)) ));
+ // TODO: alternatively: assign(rA, verbose_Clz64(rS));
+ break;
+
+ case 0x1FC: // cmpb (Power6: compare bytes)
+ DIP("cmpb r%u,r%u,r%u\n", rA_addr, rS_addr, rB_addr);
+
+ if (mode64)
+ assign( rA, unop( Iop_V128to64,
+ binop( Iop_CmpEQ8x16,
+ binop( Iop_64HLtoV128, mkU64(0), mkexpr(rS) ),
+ binop( Iop_64HLtoV128, mkU64(0), mkexpr(rB) )
+ )) );
+ else
+ assign( rA, unop( Iop_V128to32,
+ binop( Iop_CmpEQ8x16,
+ unop( Iop_32UtoV128, mkexpr(rS) ),
+ unop( Iop_32UtoV128, mkexpr(rB) )
+ )) );
+ break;
+
+ case 0x2DF: { // mftgpr (move floating-point to general purpose register)
+ IRTemp frB = newTemp(Ity_F64);
+ DIP("mftgpr r%u,fr%u\n", rS_addr, rB_addr);
+
+ assign( frB, getFReg(rB_addr)); // always F64
+ if (mode64)
+ assign( rA, unop( Iop_ReinterpF64asI64, mkexpr(frB)) );
+ else
+ assign( rA, unop( Iop_64to32, unop( Iop_ReinterpF64asI64, mkexpr(frB))) );
+
+ putIReg( rS_addr, mkexpr(rA));
+ return True;
+ }
+
+ case 0x25F: { // mffgpr (move floating-point from general purpose register)
+ IRTemp frA = newTemp(Ity_F64);
+ DIP("mffgpr fr%u,r%u\n", rS_addr, rB_addr);
+
+ if (mode64)
+ assign( frA, unop( Iop_ReinterpI64asF64, mkexpr(rB)) );
+ else
+ assign( frA, unop( Iop_ReinterpI64asF64, unop( Iop_32Uto64, mkexpr(rB))) );
+
+ putFReg( rS_addr, mkexpr(frA));
+ return True;
+ }
+
+ default:
+ vex_printf("dis_int_logic(ppc)(opc2)\n");
+ return False;
+ }
+ break;
+
+ default:
+ vex_printf("dis_int_logic(ppc)(opc1)\n");
+ return False;
+ }
+
+ putIReg( rA_addr, mkexpr(rA) );
+
+ if (do_rc && flag_rC) {
+ set_CR0( mkexpr(rA) );
+ }
+ return True;
+}
+
+/*
+ Integer Parity Instructions
+*/
+static Bool dis_int_parity ( UInt theInstr )
+{
+ /* X-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar rS_addr = ifieldRegDS(theInstr);
+ UChar rA_addr = ifieldRegA(theInstr);
+ UChar rB_addr = ifieldRegB(theInstr);
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar b0 = ifieldBIT0(theInstr);
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+
+ IRTemp rS = newTemp(ty);
+ IRTemp rA = newTemp(ty);
+ IRTemp iTot1 = newTemp(Ity_I32);
+ IRTemp iTot2 = newTemp(Ity_I32);
+ IRTemp iTot3 = newTemp(Ity_I32);
+ IRTemp iTot4 = newTemp(Ity_I32);
+ IRTemp iTot5 = newTemp(Ity_I32);
+ IRTemp iTot6 = newTemp(Ity_I32);
+ IRTemp iTot7 = newTemp(Ity_I32);
+ IRTemp iTot8 = newTemp(Ity_I32);
+ IRTemp rS1 = newTemp(ty);
+ IRTemp rS2 = newTemp(ty);
+ IRTemp rS3 = newTemp(ty);
+ IRTemp rS4 = newTemp(ty);
+ IRTemp rS5 = newTemp(ty);
+ IRTemp rS6 = newTemp(ty);
+ IRTemp rS7 = newTemp(ty);
+ IRTemp iHi = newTemp(Ity_I32);
+ IRTemp iLo = newTemp(Ity_I32);
+ IROp to_bit = (mode64 ? Iop_64to1 : Iop_32to1);
+ IROp shr_op = (mode64 ? Iop_Shr64 : Iop_Shr32);
+
+ if (opc1 != 0x1f || rB_addr || b0) {
+ vex_printf("dis_int_parity(ppc)(0x1F,opc1:rB|b0)\n");
+ return False;
+ }
+
+ assign( rS, getIReg(rS_addr) );
+
+ switch (opc2) {
+ case 0xba: // prtyd (Parity Doubleword, ISA 2.05 p320)
+ DIP("prtyd r%u,r%u\n", rA_addr, rS_addr);
+ assign( iTot1, unop(Iop_1Uto32, unop(to_bit, mkexpr(rS))) );
+ assign( rS1, binop(shr_op, mkexpr(rS), mkU8(8)) );
+ assign( iTot2, binop(Iop_Add32,
+ unop(Iop_1Uto32, unop(to_bit, mkexpr(rS1))),
+ mkexpr(iTot1)) );
+ assign( rS2, binop(shr_op, mkexpr(rS1), mkU8(8)) );
+ assign( iTot3, binop(Iop_Add32,
+ unop(Iop_1Uto32, unop(to_bit, mkexpr(rS2))),
+ mkexpr(iTot2)) );
+ assign( rS3, binop(shr_op, mkexpr(rS2), mkU8(8)) );
+ assign( iTot4, binop(Iop_Add32,
+ unop(Iop_1Uto32, unop(to_bit, mkexpr(rS3))),
+ mkexpr(iTot3)) );
+ if (mode64) {
+ assign( rS4, binop(shr_op, mkexpr(rS3), mkU8(8)) );
+ assign( iTot5, binop(Iop_Add32,
+ unop(Iop_1Uto32, unop(to_bit, mkexpr(rS4))),
+ mkexpr(iTot4)) );
+ assign( rS5, binop(shr_op, mkexpr(rS4), mkU8(8)) );
+ assign( iTot6, binop(Iop_Add32,
+ unop(Iop_1Uto32, unop(to_bit, mkexpr(rS5))),
+ mkexpr(iTot5)) );
+ assign( rS6, binop(shr_op, mkexpr(rS5), mkU8(8)) );
+ assign( iTot7, binop(Iop_Add32,
+ unop(Iop_1Uto32, unop(to_bit, mkexpr(rS6))),
+ mkexpr(iTot6)) );
+ assign( rS7, binop(shr_op, mkexpr(rS6), mkU8(8)) );
+ assign( iTot8, binop(Iop_Add32,
+ unop(Iop_1Uto32, unop(to_bit, mkexpr(rS7))),
+ mkexpr(iTot7)) );
+ assign( rA, unop(Iop_32Uto64,
+ binop(Iop_And32, mkexpr(iTot8), mkU32(1))) );
+ } else
+ assign( rA, mkexpr(iTot4) );
+
+ break;
+ case 0x9a: // prtyw (Parity Word, ISA 2.05 p320)
+ assign( iTot1, unop(Iop_1Uto32, unop(to_bit, mkexpr(rS))) );
+ assign( rS1, binop(shr_op, mkexpr(rS), mkU8(8)) );
+ assign( iTot2, binop(Iop_Add32,
+ unop(Iop_1Uto32, unop(to_bit, mkexpr(rS1))),
+ mkexpr(iTot1)) );
+ assign( rS2, binop(shr_op, mkexpr(rS1), mkU8(8)) );
+ assign( iTot3, binop(Iop_Add32,
+ unop(Iop_1Uto32, unop(to_bit, mkexpr(rS2))),
+ mkexpr(iTot2)) );
+ assign( rS3, binop(shr_op, mkexpr(rS2), mkU8(8)) );
+ assign( iTot4, binop(Iop_Add32,
+ unop(Iop_1Uto32, unop(to_bit, mkexpr(rS3))),
+ mkexpr(iTot3)) );
+ assign( iLo, unop(Iop_1Uto32, unop(Iop_32to1, mkexpr(iTot4) )) );
+
+ if (mode64) {
+ assign( rS4, binop(shr_op, mkexpr(rS3), mkU8(8)) );
+ assign( iTot5, unop(Iop_1Uto32, unop(to_bit, mkexpr(rS4))) );
+ assign( rS5, binop(shr_op, mkexpr(rS4), mkU8(8)) );
+ assign( iTot6, binop(Iop_Add32,
+ unop(Iop_1Uto32, unop(to_bit, mkexpr(rS5))),
+ mkexpr(iTot5)) );
+ assign( rS6, binop(shr_op, mkexpr(rS5), mkU8(8)) );
+ assign( iTot7, binop(Iop_Add32,
+ unop(Iop_1Uto32, unop(to_bit, mkexpr(rS6))),
+ mkexpr(iTot6)) );
+ assign( rS7, binop(shr_op, mkexpr(rS6), mkU8(8)));
+ assign( iTot8, binop(Iop_Add32,
+ unop(Iop_1Uto32, unop(to_bit, mkexpr(rS7))),
+ mkexpr(iTot7)) );
+ assign( iHi, binop(Iop_And32, mkU32(1), mkexpr(iTot8)) ),
+ assign( rA, binop(Iop_32HLto64, mkexpr(iHi), mkexpr(iLo)) );
+ } else
+ assign( rA, binop(Iop_Or32, mkU32(0), mkexpr(iLo)) );
+ break;
+ default:
+ vex_printf("dis_int_parity(ppc)(opc2)\n");
+ return False;
+ }
+
+ putIReg( rA_addr, mkexpr(rA) );
+
+ return True;
+}
+
+
+/*
+ Integer Rotate Instructions
+*/
+static Bool dis_int_rot ( UInt theInstr )
+{
+ /* M-Form, MDS-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar rS_addr = ifieldRegDS(theInstr);
+ UChar rA_addr = ifieldRegA(theInstr);
+ UChar rB_addr = ifieldRegB(theInstr);
+ UChar sh_imm = rB_addr;
+ UChar MaskBeg = toUChar( IFIELD( theInstr, 6, 5 ) );
+ UChar MaskEnd = toUChar( IFIELD( theInstr, 1, 5 ) );
+ UChar msk_imm = toUChar( IFIELD( theInstr, 5, 6 ) );
+ UChar opc2 = toUChar( IFIELD( theInstr, 2, 3 ) );
+ UChar b1 = ifieldBIT1(theInstr);
+ UChar flag_rC = ifieldBIT0(theInstr);
+
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ IRTemp rS = newTemp(ty);
+ IRTemp rA = newTemp(ty);
+ IRTemp rB = newTemp(ty);
+ IRTemp rot = newTemp(ty);
+ IRExpr *r;
+ UInt mask32;
+ ULong mask64;
+
+ assign( rS, getIReg(rS_addr) );
+ assign( rB, getIReg(rB_addr) );
+
+ switch (opc1) {
+ case 0x14: {
+ // rlwimi (Rotate Left Word Imm then Mask Insert, PPC32 p500)
+ DIP("rlwimi%s r%u,r%u,%d,%d,%d\n", flag_rC ? ".":"",
+ rA_addr, rS_addr, sh_imm, MaskBeg, MaskEnd);
+ if (mode64) {
+ // tmp32 = (ROTL(rS_Lo32, Imm)
+ // rA = ((tmp32 || tmp32) & mask64) | (rA & ~mask64)
+ mask64 = MASK64(31-MaskEnd, 31-MaskBeg);
+ r = ROTL( unop(Iop_64to32, mkexpr(rS) ), mkU8(sh_imm) );
+ r = unop(Iop_32Uto64, r);
+ assign( rot, binop(Iop_Or64, r,
+ binop(Iop_Shl64, r, mkU8(32))) );
+ assign( rA,
+ binop(Iop_Or64,
+ binop(Iop_And64, mkexpr(rot), mkU64(mask64)),
+ binop(Iop_And64, getIReg(rA_addr), mkU64(~mask64))) );
+ }
+ else {
+ // rA = (ROTL(rS, Imm) & mask) | (rA & ~mask);
+ mask32 = MASK32(31-MaskEnd, 31-MaskBeg);
+ r = ROTL(mkexpr(rS), mkU8(sh_imm));
+ assign( rA,
+ binop(Iop_Or32,
+ binop(Iop_And32, mkU32(mask32), r),
+ binop(Iop_And32, getIReg(rA_addr), mkU32(~mask32))) );
+ }
+ break;
+ }
+
+ case 0x15: {
+ // rlwinm (Rotate Left Word Imm then AND with Mask, PPC32 p501)
+ vassert(MaskBeg < 32);
+ vassert(MaskEnd < 32);
+ vassert(sh_imm < 32);
+
+ if (mode64) {
+ IRTemp rTmp = newTemp(Ity_I64);
+ mask64 = MASK64(31-MaskEnd, 31-MaskBeg);
+ DIP("rlwinm%s r%u,r%u,%d,%d,%d\n", flag_rC ? ".":"",
+ rA_addr, rS_addr, sh_imm, MaskBeg, MaskEnd);
+ // tmp32 = (ROTL(rS_Lo32, Imm)
+ // rA = ((tmp32 || tmp32) & mask64)
+ r = ROTL( unop(Iop_64to32, mkexpr(rS) ), mkU8(sh_imm) );
+ r = unop(Iop_32Uto64, r);
+ assign( rTmp, r );
+ r = NULL;
+ assign( rot, binop(Iop_Or64, mkexpr(rTmp),
+ binop(Iop_Shl64, mkexpr(rTmp), mkU8(32))) );
+ assign( rA, binop(Iop_And64, mkexpr(rot), mkU64(mask64)) );
+ }
+ else {
+ if (MaskBeg == 0 && sh_imm+MaskEnd == 31) {
+ /* Special-case the ,n,0,31-n form as that is just n-bit
+ shift left, PPC32 p501 */
+ DIP("slwi%s r%u,r%u,%d\n", flag_rC ? ".":"",
+ rA_addr, rS_addr, sh_imm);
+ assign( rA, binop(Iop_Shl32, mkexpr(rS), mkU8(sh_imm)) );
+ }
+ else if (MaskEnd == 31 && sh_imm+MaskBeg == 32) {
+ /* Special-case the ,32-n,n,31 form as that is just n-bit
+ unsigned shift right, PPC32 p501 */
+ DIP("srwi%s r%u,r%u,%d\n", flag_rC ? ".":"",
+ rA_addr, rS_addr, MaskBeg);
+ assign( rA, binop(Iop_Shr32, mkexpr(rS), mkU8(MaskBeg)) );
+ }
+ else {
+ /* General case. */
+ mask32 = MASK32(31-MaskEnd, 31-MaskBeg);
+ DIP("rlwinm%s r%u,r%u,%d,%d,%d\n", flag_rC ? ".":"",
+ rA_addr, rS_addr, sh_imm, MaskBeg, MaskEnd);
+ // rA = ROTL(rS, Imm) & mask
+ assign( rA, binop(Iop_And32,
+ ROTL(mkexpr(rS), mkU8(sh_imm)),
+ mkU32(mask32)) );
+ }
+ }
+ break;
+ }
+
+ case 0x17: {
+ // rlwnm (Rotate Left Word then AND with Mask, PPC32 p503
+ DIP("rlwnm%s r%u,r%u,r%u,%d,%d\n", flag_rC ? ".":"",
+ rA_addr, rS_addr, rB_addr, MaskBeg, MaskEnd);
+ if (mode64) {
+ mask64 = MASK64(31-MaskEnd, 31-MaskBeg);
+ /* weird insn alert!
+ tmp32 = (ROTL(rS_Lo32, rB[0-4])
+ rA = ((tmp32 || tmp32) & mask64)
+ */
+ // note, ROTL does the masking, so we don't do it here
+ r = ROTL( unop(Iop_64to32, mkexpr(rS)),
+ unop(Iop_64to8, mkexpr(rB)) );
+ r = unop(Iop_32Uto64, r);
+ assign(rot, binop(Iop_Or64, r, binop(Iop_Shl64, r, mkU8(32))));
+ assign( rA, binop(Iop_And64, mkexpr(rot), mkU64(mask64)) );
+ } else {
+ mask32 = MASK32(31-MaskEnd, 31-MaskBeg);
+ // rA = ROTL(rS, rB[0-4]) & mask
+ // note, ROTL does the masking, so we don't do it here
+ assign( rA, binop(Iop_And32,
+ ROTL(mkexpr(rS),
+ unop(Iop_32to8, mkexpr(rB))),
+ mkU32(mask32)) );
+ }
+ break;
+ }
+
+ /* 64bit Integer Rotates */
+ case 0x1E: {
+ msk_imm = ((msk_imm & 1) << 5) | (msk_imm >> 1);
+ sh_imm |= b1 << 5;
+
+ vassert( msk_imm < 64 );
+ vassert( sh_imm < 64 );
+
+ switch (opc2) {
+ case 0x4: {
+ /* r = ROTL64( rS, rB_lo6) */
+ r = ROTL( mkexpr(rS), unop(Iop_64to8, mkexpr(rB)) );
+
+ if (b1 == 0) { // rldcl (Rotl DWord, Clear Left, PPC64 p555)
+ DIP("rldcl%s r%u,r%u,r%u,%u\n", flag_rC ? ".":"",
+ rA_addr, rS_addr, rB_addr, msk_imm);
+ // note, ROTL does the masking, so we don't do it here
+ mask64 = MASK64(0, 63-msk_imm);
+ assign( rA, binop(Iop_And64, r, mkU64(mask64)) );
+ break;
+ } else { // rldcr (Rotl DWord, Clear Right, PPC64 p556)
+ DIP("rldcr%s r%u,r%u,r%u,%u\n", flag_rC ? ".":"",
+ rA_addr, rS_addr, rB_addr, msk_imm);
+ mask64 = MASK64(63-msk_imm, 63);
+ assign( rA, binop(Iop_And64, r, mkU64(mask64)) );
+ break;
+ }
+ break;
+ }
+ case 0x2: // rldic (Rotl DWord Imm, Clear, PPC64 p557)
+ DIP("rldic%s r%u,r%u,%u,%u\n", flag_rC ? ".":"",
+ rA_addr, rS_addr, sh_imm, msk_imm);
+ r = ROTL(mkexpr(rS), mkU8(sh_imm));
+ mask64 = MASK64(sh_imm, 63-msk_imm);
+ assign( rA, binop(Iop_And64, r, mkU64(mask64)) );
+ break;
+ // later: deal with special case: (msk_imm==0) => SHL(sh_imm)
+ /*
+ Hmm... looks like this'll do the job more simply:
+ r = SHL(rS, sh_imm)
+ m = ~(1 << (63-msk_imm))
+ assign(rA, r & m);
+ */
+
+ case 0x0: // rldicl (Rotl DWord Imm, Clear Left, PPC64 p558)
+ if (mode64
+ && sh_imm + msk_imm == 64 && msk_imm >= 1 && msk_imm <= 63) {
+ /* special-case the ,64-n,n form as that is just
+ unsigned shift-right by n */
+ DIP("srdi%s r%u,r%u,%u\n",
+ flag_rC ? ".":"", rA_addr, rS_addr, msk_imm);
+ assign( rA, binop(Iop_Shr64, mkexpr(rS), mkU8(msk_imm)) );
+ } else {
+ DIP("rldicl%s r%u,r%u,%u,%u\n", flag_rC ? ".":"",
+ rA_addr, rS_addr, sh_imm, msk_imm);
+ r = ROTL(mkexpr(rS), mkU8(sh_imm));
+ mask64 = MASK64(0, 63-msk_imm);
+ assign( rA, binop(Iop_And64, r, mkU64(mask64)) );
+ }
+ break;
+
+ case 0x1: // rldicr (Rotl DWord Imm, Clear Right, PPC64 p559)
+ if (mode64
+ && sh_imm + msk_imm == 63 && sh_imm >= 1 && sh_imm <= 63) {
+ /* special-case the ,n,63-n form as that is just
+ shift-left by n */
+ DIP("sldi%s r%u,r%u,%u\n",
+ flag_rC ? ".":"", rA_addr, rS_addr, sh_imm);
+ assign( rA, binop(Iop_Shl64, mkexpr(rS), mkU8(sh_imm)) );
+ } else {
+ DIP("rldicr%s r%u,r%u,%u,%u\n", flag_rC ? ".":"",
+ rA_addr, rS_addr, sh_imm, msk_imm);
+ r = ROTL(mkexpr(rS), mkU8(sh_imm));
+ mask64 = MASK64(63-msk_imm, 63);
+ assign( rA, binop(Iop_And64, r, mkU64(mask64)) );
+ }
+ break;
+
+ case 0x3: { // rldimi (Rotl DWord Imm, Mask Insert, PPC64 p560)
+ IRTemp rA_orig = newTemp(ty);
+ DIP("rldimi%s r%u,r%u,%u,%u\n", flag_rC ? ".":"",
+ rA_addr, rS_addr, sh_imm, msk_imm);
+ r = ROTL(mkexpr(rS), mkU8(sh_imm));
+ mask64 = MASK64(sh_imm, 63-msk_imm);
+ assign( rA_orig, getIReg(rA_addr) );
+ assign( rA, binop(Iop_Or64,
+ binop(Iop_And64, mkU64(mask64), r),
+ binop(Iop_And64, mkU64(~mask64),
+ mkexpr(rA_orig))) );
+ break;
+ }
+ default:
+ vex_printf("dis_int_rot(ppc)(opc2)\n");
+ return False;
+ }
+ break;
+ }
+
+ default:
+ vex_printf("dis_int_rot(ppc)(opc1)\n");
+ return False;
+ }
+
+ putIReg( rA_addr, mkexpr(rA) );
+
+ if (flag_rC) {
+ set_CR0( mkexpr(rA) );
+ }
+ return True;
+}
+
+
+/*
+ Integer Load Instructions
+*/
+static Bool dis_int_load ( UInt theInstr )
+{
+ /* D-Form, X-Form, DS-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar rD_addr = ifieldRegDS(theInstr);
+ UChar rA_addr = ifieldRegA(theInstr);
+ UInt uimm16 = ifieldUIMM16(theInstr);
+ UChar rB_addr = ifieldRegB(theInstr);
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar b1 = ifieldBIT1(theInstr);
+ UChar b0 = ifieldBIT0(theInstr);
+
+ Int simm16 = extend_s_16to32(uimm16);
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ IRTemp EA = newTemp(ty);
+ IRExpr* val;
+
+ switch (opc1) {
+ case 0x1F: // register offset
+ assign( EA, ea_rAor0_idxd( rA_addr, rB_addr ) );
+ break;
+ case 0x3A: // immediate offset: 64bit: ld/ldu/lwa: mask off
+ // lowest 2 bits of immediate before forming EA
+ simm16 = simm16 & 0xFFFFFFFC;
+ default: // immediate offset
+ assign( EA, ea_rAor0_simm( rA_addr, simm16 ) );
+ break;
+ }
+
+ switch (opc1) {
+ case 0x22: // lbz (Load B & Zero, PPC32 p433)
+ DIP("lbz r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
+ val = loadBE(Ity_I8, mkexpr(EA));
+ putIReg( rD_addr, mkWidenFrom8(ty, val, False) );
+ break;
+
+ case 0x23: // lbzu (Load B & Zero, Update, PPC32 p434)
+ if (rA_addr == 0 || rA_addr == rD_addr) {
+ vex_printf("dis_int_load(ppc)(lbzu,rA_addr|rD_addr)\n");
+ return False;
+ }
+ DIP("lbzu r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
+ val = loadBE(Ity_I8, mkexpr(EA));
+ putIReg( rD_addr, mkWidenFrom8(ty, val, False) );
+ putIReg( rA_addr, mkexpr(EA) );
+ break;
+
+ case 0x2A: // lha (Load HW Alg, PPC32 p445)
+ DIP("lha r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
+ val = loadBE(Ity_I16, mkexpr(EA));
+ putIReg( rD_addr, mkWidenFrom16(ty, val, True) );
+ break;
+
+ case 0x2B: // lhau (Load HW Alg, Update, PPC32 p446)
+ if (rA_addr == 0 || rA_addr == rD_addr) {
+ vex_printf("dis_int_load(ppc)(lhau,rA_addr|rD_addr)\n");
+ return False;
+ }
+ DIP("lhau r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
+ val = loadBE(Ity_I16, mkexpr(EA));
+ putIReg( rD_addr, mkWidenFrom16(ty, val, True) );
+ putIReg( rA_addr, mkexpr(EA) );
+ break;
+
+ case 0x28: // lhz (Load HW & Zero, PPC32 p450)
+ DIP("lhz r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
+ val = loadBE(Ity_I16, mkexpr(EA));
+ putIReg( rD_addr, mkWidenFrom16(ty, val, False) );
+ break;
+
+ case 0x29: // lhzu (Load HW & and Zero, Update, PPC32 p451)
+ if (rA_addr == 0 || rA_addr == rD_addr) {
+ vex_printf("dis_int_load(ppc)(lhzu,rA_addr|rD_addr)\n");
+ return False;
+ }
+ DIP("lhzu r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
+ val = loadBE(Ity_I16, mkexpr(EA));
+ putIReg( rD_addr, mkWidenFrom16(ty, val, False) );
+ putIReg( rA_addr, mkexpr(EA) );
+ break;
+
+ case 0x20: // lwz (Load W & Zero, PPC32 p460)
+ DIP("lwz r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
+ val = loadBE(Ity_I32, mkexpr(EA));
+ putIReg( rD_addr, mkWidenFrom32(ty, val, False) );
+ break;
+
+ case 0x21: // lwzu (Load W & Zero, Update, PPC32 p461))
+ if (rA_addr == 0 || rA_addr == rD_addr) {
+ vex_printf("dis_int_load(ppc)(lwzu,rA_addr|rD_addr)\n");
+ return False;
+ }
+ DIP("lwzu r%u,%d(r%u)\n", rD_addr, (Int)simm16, rA_addr);
+ val = loadBE(Ity_I32, mkexpr(EA));
+ putIReg( rD_addr, mkWidenFrom32(ty, val, False) );
+ putIReg( rA_addr, mkexpr(EA) );
+ break;
+
+ /* X Form */
+ case 0x1F:
+ if (b0 != 0) {
+ vex_printf("dis_int_load(ppc)(Ox1F,b0)\n");
+ return False;
+ }
+
+ switch (opc2) {
+ case 0x077: // lbzux (Load B & Zero, Update Indexed, PPC32 p435)
+ DIP("lbzux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
+ if (rA_addr == 0 || rA_addr == rD_addr) {
+ vex_printf("dis_int_load(ppc)(lwzux,rA_addr|rD_addr)\n");
+ return False;
+ }
+ val = loadBE(Ity_I8, mkexpr(EA));
+ putIReg( rD_addr, mkWidenFrom8(ty, val, False) );
+ putIReg( rA_addr, mkexpr(EA) );
+ break;
+
+ case 0x057: // lbzx (Load B & Zero, Indexed, PPC32 p436)
+ DIP("lbzx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
+ val = loadBE(Ity_I8, mkexpr(EA));
+ putIReg( rD_addr, mkWidenFrom8(ty, val, False) );
+ break;
+
+ case 0x177: // lhaux (Load HW Alg, Update Indexed, PPC32 p447)
+ if (rA_addr == 0 || rA_addr == rD_addr) {
+ vex_printf("dis_int_load(ppc)(lhaux,rA_addr|rD_addr)\n");
+ return False;
+ }
+ DIP("lhaux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
+ val = loadBE(Ity_I16, mkexpr(EA));
+ putIReg( rD_addr, mkWidenFrom16(ty, val, True) );
+ putIReg( rA_addr, mkexpr(EA) );
+ break;
+
+ case 0x157: // lhax (Load HW Alg, Indexed, PPC32 p448)
+ DIP("lhax r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
+ val = loadBE(Ity_I16, mkexpr(EA));
+ putIReg( rD_addr, mkWidenFrom16(ty, val, True) );
+ break;
+
+ case 0x137: // lhzux (Load HW & Zero, Update Indexed, PPC32 p452)
+ if (rA_addr == 0 || rA_addr == rD_addr) {
+ vex_printf("dis_int_load(ppc)(lhzux,rA_addr|rD_addr)\n");
+ return False;
+ }
+ DIP("lhzux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
+ val = loadBE(Ity_I16, mkexpr(EA));
+ putIReg( rD_addr, mkWidenFrom16(ty, val, False) );
+ putIReg( rA_addr, mkexpr(EA) );
+ break;
+
+ case 0x117: // lhzx (Load HW & Zero, Indexed, PPC32 p453)
+ DIP("lhzx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
+ val = loadBE(Ity_I16, mkexpr(EA));
+ putIReg( rD_addr, mkWidenFrom16(ty, val, False) );
+ break;
+
+ case 0x037: // lwzux (Load W & Zero, Update Indexed, PPC32 p462)
+ if (rA_addr == 0 || rA_addr == rD_addr) {
+ vex_printf("dis_int_load(ppc)(lwzux,rA_addr|rD_addr)\n");
+ return False;
+ }
+ DIP("lwzux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
+ val = loadBE(Ity_I32, mkexpr(EA));
+ putIReg( rD_addr, mkWidenFrom32(ty, val, False) );
+ putIReg( rA_addr, mkexpr(EA) );
+ break;
+
+ case 0x017: // lwzx (Load W & Zero, Indexed, PPC32 p463)
+ DIP("lwzx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
+ val = loadBE(Ity_I32, mkexpr(EA));
+ putIReg( rD_addr, mkWidenFrom32(ty, val, False) );
+ break;
+
+
+ /* 64bit Loads */
+ case 0x035: // ldux (Load DWord, Update Indexed, PPC64 p475)
+ if (rA_addr == 0 || rA_addr == rD_addr) {
+ vex_printf("dis_int_load(ppc)(ldux,rA_addr|rD_addr)\n");
+ return False;
+ }
+ DIP("ldux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
+ putIReg( rD_addr, loadBE(Ity_I64, mkexpr(EA)) );
+ putIReg( rA_addr, mkexpr(EA) );
+ break;
+
+ case 0x015: // ldx (Load DWord, Indexed, PPC64 p476)
+ DIP("ldx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
+ putIReg( rD_addr, loadBE(Ity_I64, mkexpr(EA)) );
+ break;
+
+ case 0x175: // lwaux (Load W Alg, Update Indexed, PPC64 p501)
+ if (rA_addr == 0 || rA_addr == rD_addr) {
+ vex_printf("dis_int_load(ppc)(lwaux,rA_addr|rD_addr)\n");
+ return False;
+ }
+ DIP("lwaux r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
+ putIReg( rD_addr,
+ unop(Iop_32Sto64, loadBE(Ity_I32, mkexpr(EA))) );
+ putIReg( rA_addr, mkexpr(EA) );
+ break;
+
+ case 0x155: // lwax (Load W Alg, Indexed, PPC64 p502)
+ DIP("lwax r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
+ putIReg( rD_addr,
+ unop(Iop_32Sto64, loadBE(Ity_I32, mkexpr(EA))) );
+ break;
+
+ default:
+ vex_printf("dis_int_load(ppc)(opc2)\n");
+ return False;
+ }
+ break;
+
+ /* DS Form - 64bit Loads. In each case EA will have been formed
+ with the lowest 2 bits masked off the immediate offset. */
+ case 0x3A:
+ switch ((b1<<1) | b0) {
+ case 0x0: // ld (Load DWord, PPC64 p472)
+ DIP("ld r%u,%d(r%u)\n", rD_addr, simm16, rA_addr);
+ putIReg( rD_addr, loadBE(Ity_I64, mkexpr(EA)) );
+ break;
+
+ case 0x1: // ldu (Load DWord, Update, PPC64 p474)
+ if (rA_addr == 0 || rA_addr == rD_addr) {
+ vex_printf("dis_int_load(ppc)(ldu,rA_addr|rD_addr)\n");
+ return False;
+ }
+ DIP("ldu r%u,%d(r%u)\n", rD_addr, simm16, rA_addr);
+ putIReg( rD_addr, loadBE(Ity_I64, mkexpr(EA)) );
+ putIReg( rA_addr, mkexpr(EA) );
+ break;
+
+ case 0x2: // lwa (Load Word Alg, PPC64 p499)
+ DIP("lwa r%u,%d(r%u)\n", rD_addr, simm16, rA_addr);
+ putIReg( rD_addr,
+ unop(Iop_32Sto64, loadBE(Ity_I32, mkexpr(EA))) );
+ break;
+
+ default:
+ vex_printf("dis_int_load(ppc)(0x3A, opc2)\n");
+ return False;
+ }
+ break;
+
+ default:
+ vex_printf("dis_int_load(ppc)(opc1)\n");
+ return False;
+ }
+ return True;
+}
+
+
+
+/*
+ Integer Store Instructions
+*/
+static Bool dis_int_store ( UInt theInstr, VexAbiInfo* vbi )
+{
+ /* D-Form, X-Form, DS-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UInt rS_addr = ifieldRegDS(theInstr);
+ UInt rA_addr = ifieldRegA(theInstr);
+ UInt uimm16 = ifieldUIMM16(theInstr);
+ UInt rB_addr = ifieldRegB(theInstr);
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar b1 = ifieldBIT1(theInstr);
+ UChar b0 = ifieldBIT0(theInstr);
+
+ Int simm16 = extend_s_16to32(uimm16);
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ IRTemp rS = newTemp(ty);
+ IRTemp rB = newTemp(ty);
+ IRTemp EA = newTemp(ty);
+
+ assign( rB, getIReg(rB_addr) );
+ assign( rS, getIReg(rS_addr) );
+
+ switch (opc1) {
+ case 0x1F: // register offset
+ assign( EA, ea_rAor0_idxd( rA_addr, rB_addr ) );
+ break;
+ case 0x3E: // immediate offset: 64bit: std/stdu: mask off
+ // lowest 2 bits of immediate before forming EA
+ simm16 = simm16 & 0xFFFFFFFC;
+ default: // immediate offset
+ assign( EA, ea_rAor0_simm( rA_addr, simm16 ) );
+ break;
+ }
+
+ switch (opc1) {
+ case 0x26: // stb (Store B, PPC32 p509)
+ DIP("stb r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
+ storeBE( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) );
+ break;
+
+ case 0x27: // stbu (Store B, Update, PPC32 p510)
+ if (rA_addr == 0 ) {
+ vex_printf("dis_int_store(ppc)(stbu,rA_addr)\n");
+ return False;
+ }
+ DIP("stbu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
+ putIReg( rA_addr, mkexpr(EA) );
+ storeBE( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) );
+ break;
+
+ case 0x2C: // sth (Store HW, PPC32 p522)
+ DIP("sth r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
+ storeBE( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) );
+ break;
+
+ case 0x2D: // sthu (Store HW, Update, PPC32 p524)
+ if (rA_addr == 0) {
+ vex_printf("dis_int_store(ppc)(sthu,rA_addr)\n");
+ return False;
+ }
+ DIP("sthu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
+ putIReg( rA_addr, mkexpr(EA) );
+ storeBE( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) );
+ break;
+
+ case 0x24: // stw (Store W, PPC32 p530)
+ DIP("stw r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
+ storeBE( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) );
+ break;
+
+ case 0x25: // stwu (Store W, Update, PPC32 p534)
+ if (rA_addr == 0) {
+ vex_printf("dis_int_store(ppc)(stwu,rA_addr)\n");
+ return False;
+ }
+ DIP("stwu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
+ putIReg( rA_addr, mkexpr(EA) );
+ storeBE( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) );
+ break;
+
+ /* X Form : all these use EA_indexed */
+ case 0x1F:
+ if (b0 != 0) {
+ vex_printf("dis_int_store(ppc)(0x1F,b0)\n");
+ return False;
+ }
+
+ switch (opc2) {
+ case 0x0F7: // stbux (Store B, Update Indexed, PPC32 p511)
+ if (rA_addr == 0) {
+ vex_printf("dis_int_store(ppc)(stbux,rA_addr)\n");
+ return False;
+ }
+ DIP("stbux r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
+ putIReg( rA_addr, mkexpr(EA) );
+ storeBE( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) );
+ break;
+
+ case 0x0D7: // stbx (Store B Indexed, PPC32 p512)
+ DIP("stbx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
+ storeBE( mkexpr(EA), mkNarrowTo8(ty, mkexpr(rS)) );
+ break;
+
+ case 0x1B7: // sthux (Store HW, Update Indexed, PPC32 p525)
+ if (rA_addr == 0) {
+ vex_printf("dis_int_store(ppc)(sthux,rA_addr)\n");
+ return False;
+ }
+ DIP("sthux r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
+ putIReg( rA_addr, mkexpr(EA) );
+ storeBE( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) );
+ break;
+
+ case 0x197: // sthx (Store HW Indexed, PPC32 p526)
+ DIP("sthx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
+ storeBE( mkexpr(EA), mkNarrowTo16(ty, mkexpr(rS)) );
+ break;
+
+ case 0x0B7: // stwux (Store W, Update Indexed, PPC32 p535)
+ if (rA_addr == 0) {
+ vex_printf("dis_int_store(ppc)(stwux,rA_addr)\n");
+ return False;
+ }
+ DIP("stwux r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
+ putIReg( rA_addr, mkexpr(EA) );
+ storeBE( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) );
+ break;
+
+ case 0x097: // stwx (Store W Indexed, PPC32 p536)
+ DIP("stwx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
+ storeBE( mkexpr(EA), mkNarrowTo32(ty, mkexpr(rS)) );
+ break;
+
+
+ /* 64bit Stores */
+ case 0x0B5: // stdux (Store DWord, Update Indexed, PPC64 p584)
+ if (rA_addr == 0) {
+ vex_printf("dis_int_store(ppc)(stdux,rA_addr)\n");
+ return False;
+ }
+ DIP("stdux r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
+ putIReg( rA_addr, mkexpr(EA) );
+ storeBE( mkexpr(EA), mkexpr(rS) );
+ break;
+
+ case 0x095: // stdx (Store DWord Indexed, PPC64 p585)
+ DIP("stdx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
+ storeBE( mkexpr(EA), mkexpr(rS) );
+ break;
+
+ default:
+ vex_printf("dis_int_store(ppc)(opc2)\n");
+ return False;
+ }
+ break;
+
+ /* DS Form - 64bit Stores. In each case EA will have been formed
+ with the lowest 2 bits masked off the immediate offset. */
+ case 0x3E:
+ switch ((b1<<1) | b0) {
+ case 0x0: // std (Store DWord, PPC64 p580)
+ DIP("std r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
+ storeBE( mkexpr(EA), mkexpr(rS) );
+ break;
+
+ case 0x1: // stdu (Store DWord, Update, PPC64 p583)
+ DIP("stdu r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
+ putIReg( rA_addr, mkexpr(EA) );
+ storeBE( mkexpr(EA), mkexpr(rS) );
+ break;
+
+ default:
+ vex_printf("dis_int_load(ppc)(0x3A, opc2)\n");
+ return False;
+ }
+ break;
+
+ default:
+ vex_printf("dis_int_store(ppc)(opc1)\n");
+ return False;
+ }
+ return True;
+}
+
+
+
+/*
+ Integer Load/Store Multiple Instructions
+*/
+static Bool dis_int_ldst_mult ( UInt theInstr )
+{
+ /* D-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar rD_addr = ifieldRegDS(theInstr);
+ UChar rS_addr = rD_addr;
+ UChar rA_addr = ifieldRegA(theInstr);
+ UInt uimm16 = ifieldUIMM16(theInstr);
+
+ Int simm16 = extend_s_16to32(uimm16);
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ IRTemp EA = newTemp(ty);
+ UInt r = 0;
+ UInt ea_off = 0;
+ IRExpr* irx_addr;
+
+ assign( EA, ea_rAor0_simm( rA_addr, simm16 ) );
+
+ switch (opc1) {
+ case 0x2E: // lmw (Load Multiple Word, PPC32 p454)
+ if (rA_addr >= rD_addr) {
+ vex_printf("dis_int_ldst_mult(ppc)(lmw,rA_addr)\n");
+ return False;
+ }
+ DIP("lmw r%u,%d(r%u)\n", rD_addr, simm16, rA_addr);
+ for (r = rD_addr; r <= 31; r++) {
+ irx_addr = binop(Iop_Add32, mkexpr(EA), mkU32(ea_off));
+ putIReg( r, mkWidenFrom32(ty, loadBE(Ity_I32, irx_addr ),
+ False) );
+ ea_off += 4;
+ }
+ break;
+
+ case 0x2F: // stmw (Store Multiple Word, PPC32 p527)
+ DIP("stmw r%u,%d(r%u)\n", rS_addr, simm16, rA_addr);
+ for (r = rS_addr; r <= 31; r++) {
+ irx_addr = binop(Iop_Add32, mkexpr(EA), mkU32(ea_off));
+ storeBE( irx_addr, mkNarrowTo32(ty, getIReg(r)) );
+ ea_off += 4;
+ }
+ break;
+
+ default:
+ vex_printf("dis_int_ldst_mult(ppc)(opc1)\n");
+ return False;
+ }
+ return True;
+}
+
+
+
+/*
+ Integer Load/Store String Instructions
+*/
+static
+void generate_lsw_sequence ( IRTemp tNBytes, // # bytes, :: Ity_I32
+ IRTemp EA, // EA
+ Int rD, // first dst register
+ Int maxBytes ) // 32 or 128
+{
+ Int i, shift = 24;
+ IRExpr* e_nbytes = mkexpr(tNBytes);
+ IRExpr* e_EA = mkexpr(EA);
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+
+ vassert(rD >= 0 && rD < 32);
+ rD--; if (rD < 0) rD = 31;
+
+ for (i = 0; i < maxBytes; i++) {
+ /* if (nBytes < (i+1)) goto NIA; */
+ stmt( IRStmt_Exit( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)),
+ Ijk_Boring,
+ mkSzConst( ty, nextInsnAddr()) ));
+ /* when crossing into a new dest register, set it to zero. */
+ if ((i % 4) == 0) {
+ rD++; if (rD == 32) rD = 0;
+ putIReg(rD, mkSzImm(ty, 0));
+ shift = 24;
+ }
+ /* rD |= (8Uto32(*(EA+i))) << shift */
+ vassert(shift == 0 || shift == 8 || shift == 16 || shift == 24);
+ putIReg(
+ rD,
+ mkWidenFrom32(
+ ty,
+ binop(
+ Iop_Or32,
+ mkNarrowTo32(ty, getIReg(rD)),
+ binop(
+ Iop_Shl32,
+ unop(
+ Iop_8Uto32,
+ loadBE(Ity_I8,
+ binop(mkSzOp(ty,Iop_Add8), e_EA, mkSzImm(ty,i)))
+ ),
+ mkU8(toUChar(shift))
+ )
+ ),
+ /*Signed*/False
+ )
+ );
+ shift -= 8;
+ }
+}
+
+static
+void generate_stsw_sequence ( IRTemp tNBytes, // # bytes, :: Ity_I32
+ IRTemp EA, // EA
+ Int rS, // first src register
+ Int maxBytes ) // 32 or 128
+{
+ Int i, shift = 24;
+ IRExpr* e_nbytes = mkexpr(tNBytes);
+ IRExpr* e_EA = mkexpr(EA);
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+
+ vassert(rS >= 0 && rS < 32);
+ rS--; if (rS < 0) rS = 31;
+
+ for (i = 0; i < maxBytes; i++) {
+ /* if (nBytes < (i+1)) goto NIA; */
+ stmt( IRStmt_Exit( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)),
+ Ijk_Boring,
+ mkSzConst( ty, nextInsnAddr() ) ));
+ /* check for crossing into a new src register. */
+ if ((i % 4) == 0) {
+ rS++; if (rS == 32) rS = 0;
+ shift = 24;
+ }
+ /* *(EA+i) = 32to8(rS >> shift) */
+ vassert(shift == 0 || shift == 8 || shift == 16 || shift == 24);
+ storeBE(
+ binop(mkSzOp(ty,Iop_Add8), e_EA, mkSzImm(ty,i)),
+ unop(Iop_32to8,
+ binop(Iop_Shr32,
+ mkNarrowTo32(ty, getIReg(rS)),
+ mkU8(toUChar(shift))))
+ );
+ shift -= 8;
+ }
+}
+
+static Bool dis_int_ldst_str ( UInt theInstr, /*OUT*/Bool* stopHere )
+{
+ /* X-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar rD_addr = ifieldRegDS(theInstr);
+ UChar rS_addr = rD_addr;
+ UChar rA_addr = ifieldRegA(theInstr);
+ UChar rB_addr = ifieldRegB(theInstr);
+ UChar NumBytes = rB_addr;
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar b0 = ifieldBIT0(theInstr);
+
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ IRTemp t_EA = newTemp(ty);
+ IRTemp t_nbytes = IRTemp_INVALID;
+
+ *stopHere = False;
+
+ if (opc1 != 0x1F || b0 != 0) {
+ vex_printf("dis_int_ldst_str(ppc)(opc1)\n");
+ return False;
+ }
+
+ switch (opc2) {
+ case 0x255: // lswi (Load String Word Immediate, PPC32 p455)
+ /* NB: does not reject the case where RA is in the range of
+ registers to be loaded. It should. */
+ DIP("lswi r%u,r%u,%d\n", rD_addr, rA_addr, NumBytes);
+ assign( t_EA, ea_rAor0(rA_addr) );
+ if (NumBytes == 8 && !mode64) {
+ /* Special case hack */
+ /* rD = Mem[EA]; (rD+1)%32 = Mem[EA+4] */
+ putIReg( rD_addr,
+ loadBE(Ity_I32, mkexpr(t_EA)) );
+ putIReg( (rD_addr+1) % 32,
+ loadBE(Ity_I32,
+ binop(Iop_Add32, mkexpr(t_EA), mkU32(4))) );
+ } else {
+ t_nbytes = newTemp(Ity_I32);
+ assign( t_nbytes, mkU32(NumBytes==0 ? 32 : NumBytes) );
+ generate_lsw_sequence( t_nbytes, t_EA, rD_addr, 32 );
+ *stopHere = True;
+ }
+ return True;
+
+ case 0x215: // lswx (Load String Word Indexed, PPC32 p456)
+ /* NB: does not reject the case where RA is in the range of
+ registers to be loaded. It should. Although considering
+ that that can only be detected at run time, it's not easy to
+ do so. */
+ if (rD_addr == rA_addr || rD_addr == rB_addr)
+ return False;
+ if (rD_addr == 0 && rA_addr == 0)
+ return False;
+ DIP("lswx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
+ t_nbytes = newTemp(Ity_I32);
+ assign( t_EA, ea_rAor0_idxd(rA_addr,rB_addr) );
+ assign( t_nbytes, unop( Iop_8Uto32, getXER_BC() ) );
+ generate_lsw_sequence( t_nbytes, t_EA, rD_addr, 128 );
+ *stopHere = True;
+ return True;
+
+ case 0x2D5: // stswi (Store String Word Immediate, PPC32 p528)
+ DIP("stswi r%u,r%u,%d\n", rS_addr, rA_addr, NumBytes);
+ assign( t_EA, ea_rAor0(rA_addr) );
+ if (NumBytes == 8 && !mode64) {
+ /* Special case hack */
+ /* Mem[EA] = rD; Mem[EA+4] = (rD+1)%32 */
+ storeBE( mkexpr(t_EA),
+ getIReg(rD_addr) );
+ storeBE( binop(Iop_Add32, mkexpr(t_EA), mkU32(4)),
+ getIReg((rD_addr+1) % 32) );
+ } else {
+ t_nbytes = newTemp(Ity_I32);
+ assign( t_nbytes, mkU32(NumBytes==0 ? 32 : NumBytes) );
+ generate_stsw_sequence( t_nbytes, t_EA, rD_addr, 32 );
+ *stopHere = True;
+ }
+ return True;
+
+ case 0x295: // stswx (Store String Word Indexed, PPC32 p529)
+ DIP("stswx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
+ t_nbytes = newTemp(Ity_I32);
+ assign( t_EA, ea_rAor0_idxd(rA_addr,rB_addr) );
+ assign( t_nbytes, unop( Iop_8Uto32, getXER_BC() ) );
+ generate_stsw_sequence( t_nbytes, t_EA, rS_addr, 128 );
+ *stopHere = True;
+ return True;
+
+ default:
+ vex_printf("dis_int_ldst_str(ppc)(opc2)\n");
+ return False;
+ }
+ return True;
+}
+
+
+/* ------------------------------------------------------------------
+ Integer Branch Instructions
+ ------------------------------------------------------------------ */
+
+/*
+ Branch helper function
+ ok = BO[2] | ((CTR[0] != 0) ^ BO[1])
+ Returns an I32 which is 0x00000000 if the ctr condition failed
+ and 0xFFFFFFFF otherwise.
+*/
+static IRExpr* /* :: Ity_I32 */ branch_ctr_ok( UInt BO )
+{
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ IRTemp ok = newTemp(Ity_I32);
+
+ if ((BO >> 2) & 1) { // independent of ctr
+ assign( ok, mkU32(0xFFFFFFFF) );
+ } else {
+ if ((BO >> 1) & 1) { // ctr == 0 ?
+ assign( ok, unop( Iop_1Sto32,
+ binop( mkSzOp(ty, Iop_CmpEQ8),
+ getGST( PPC_GST_CTR ),
+ mkSzImm(ty,0))) );
+ } else { // ctr != 0 ?
+ assign( ok, unop( Iop_1Sto32,
+ binop( mkSzOp(ty, Iop_CmpNE8),
+ getGST( PPC_GST_CTR ),
+ mkSzImm(ty,0))) );
+ }
+ }
+ return mkexpr(ok);
+}
+
+
+/*
+ Branch helper function cond_ok = BO[4] | (CR[BI] == BO[3])
+ Returns an I32 which is either 0 if the condition failed or
+ some arbitrary nonzero value otherwise. */
+
+static IRExpr* /* :: Ity_I32 */ branch_cond_ok( UInt BO, UInt BI )
+{
+ Int where;
+ IRTemp res = newTemp(Ity_I32);
+ IRTemp cr_bi = newTemp(Ity_I32);
+
+ if ((BO >> 4) & 1) {
+ assign( res, mkU32(1) );
+ } else {
+ // ok = (CR[BI] == BO[3]) Note, the following relies on
+ // getCRbit_anywhere returning a value which
+ // is either zero or has exactly 1 bit set.
+ assign( cr_bi, getCRbit_anywhere( BI, &where ) );
+
+ if ((BO >> 3) & 1) {
+ /* We can use cr_bi as-is. */
+ assign( res, mkexpr(cr_bi) );
+ } else {
+ /* We have to invert the sense of the information held in
+ cr_bi. For that we need to know which bit
+ getCRbit_anywhere regards as significant. */
+ assign( res, binop(Iop_Xor32, mkexpr(cr_bi),
+ mkU32(1<<where)) );
+ }
+ }
+ return mkexpr(res);
+}
+
+
+/*
+ Integer Branch Instructions
+*/
+static Bool dis_branch ( UInt theInstr,
+ VexAbiInfo* vbi,
+ /*OUT*/DisResult* dres,
+ Bool (*resteerOkFn)(void*,Addr64),
+ void* callback_opaque )
+{
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar BO = ifieldRegDS(theInstr);
+ UChar BI = ifieldRegA(theInstr);
+ UInt BD_u16 = ifieldUIMM16(theInstr) & 0xFFFFFFFC; /* mask off */
+ UChar b11to15 = ifieldRegB(theInstr);
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UInt LI_u26 = ifieldUIMM26(theInstr) & 0xFFFFFFFC; /* mask off */
+ UChar flag_AA = ifieldBIT1(theInstr);
+ UChar flag_LK = ifieldBIT0(theInstr);
+
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ Addr64 tgt = 0;
+ Int BD = extend_s_16to32(BD_u16);
+ IRTemp do_branch = newTemp(Ity_I32);
+ IRTemp ctr_ok = newTemp(Ity_I32);
+ IRTemp cond_ok = newTemp(Ity_I32);
+ IRExpr* e_nia = mkSzImm(ty, nextInsnAddr());
+ IRConst* c_nia = mkSzConst(ty, nextInsnAddr());
+ IRTemp lr_old = newTemp(ty);
+
+ /* Hack to pass through code that just wants to read the PC */
+ if (theInstr == 0x429F0005) {
+ DIP("bcl 0x%x, 0x%x (a.k.a mr lr,cia+4)\n", BO, BI);
+ putGST( PPC_GST_LR, e_nia );
+ return True;
+ }
+
+ /* The default what-next. Individual cases can override it. */
+ dres->whatNext = Dis_StopHere;
+
+ switch (opc1) {
+ case 0x12: // b (Branch, PPC32 p360)
+ if (flag_AA) {
+ tgt = mkSzAddr( ty, extend_s_26to64(LI_u26) );
+ } else {
+ tgt = mkSzAddr( ty, guest_CIA_curr_instr +
+ (Long)extend_s_26to64(LI_u26) );
+ }
+ if (mode64) {
+ DIP("b%s%s 0x%llx\n",
+ flag_LK ? "l" : "", flag_AA ? "a" : "", tgt);
+ } else {
+ DIP("b%s%s 0x%x\n",
+ flag_LK ? "l" : "", flag_AA ? "a" : "", (Addr32)tgt);
+ }
+
+ if (flag_LK) {
+ putGST( PPC_GST_LR, e_nia );
+ if (vbi->guest_ppc_zap_RZ_at_bl
+ && vbi->guest_ppc_zap_RZ_at_bl( (ULong)tgt) ) {
+ IRTemp t_tgt = newTemp(ty);
+ assign(t_tgt, mode64 ? mkU64(tgt) : mkU32(tgt) );
+ make_redzone_AbiHint( vbi, t_tgt,
+ "branch-and-link (unconditional call)" );
+ }
+ }
+
+ if (resteerOkFn( callback_opaque, tgt )) {
+ dres->whatNext = Dis_ResteerU;
+ dres->continueAt = tgt;
+ } else {
+ irsb->jumpkind = flag_LK ? Ijk_Call : Ijk_Boring;
+ irsb->next = mkSzImm(ty, tgt);
+ }
+ break;
+
+ case 0x10: // bc (Branch Conditional, PPC32 p361)
+ DIP("bc%s%s 0x%x, 0x%x, 0x%x\n",
+ flag_LK ? "l" : "", flag_AA ? "a" : "", BO, BI, BD);
+
+ if (!(BO & 0x4)) {
+ putGST( PPC_GST_CTR,
+ binop(mkSzOp(ty, Iop_Sub8),
+ getGST( PPC_GST_CTR ), mkSzImm(ty, 1)) );
+ }
+
+ /* This is a bit subtle. ctr_ok is either all 0s or all 1s.
+ cond_ok is either zero or nonzero, since that's the cheapest
+ way to compute it. Anding them together gives a value which
+ is either zero or non zero and so that's what we must test
+ for in the IRStmt_Exit. */
+ assign( ctr_ok, branch_ctr_ok( BO ) );
+ assign( cond_ok, branch_cond_ok( BO, BI ) );
+ assign( do_branch,
+ binop(Iop_And32, mkexpr(cond_ok), mkexpr(ctr_ok)) );
+
+ if (flag_AA) {
+ tgt = mkSzAddr(ty, extend_s_16to64(BD_u16));
+ } else {
+ tgt = mkSzAddr(ty, guest_CIA_curr_instr +
+ (Long)extend_s_16to64(BD_u16));
+ }
+ if (flag_LK)
+ putGST( PPC_GST_LR, e_nia );
+
+ stmt( IRStmt_Exit(
+ binop(Iop_CmpNE32, mkexpr(do_branch), mkU32(0)),
+ flag_LK ? Ijk_Call : Ijk_Boring,
+ mkSzConst(ty, tgt) ) );
+
+ irsb->jumpkind = Ijk_Boring;
+ irsb->next = e_nia;
+ break;
+
+ case 0x13:
+ /* For bclr and bcctr, it appears that the lowest two bits of
+ b11to15 are a branch hint, and so we only need to ensure it's
+ of the form 000XX. */
+ if ((b11to15 & ~3) != 0) {
+ vex_printf("dis_int_branch(ppc)(0x13,b11to15)(%d)\n", (Int)b11to15);
+ return False;
+ }
+
+ switch (opc2) {
+ case 0x210: // bcctr (Branch Cond. to Count Register, PPC32 p363)
+ if ((BO & 0x4) == 0) { // "decr and test CTR" option invalid
+ vex_printf("dis_int_branch(ppc)(bcctr,BO)\n");
+ return False;
+ }
+ DIP("bcctr%s 0x%x, 0x%x\n", flag_LK ? "l" : "", BO, BI);
+
+ assign( cond_ok, branch_cond_ok( BO, BI ) );
+
+ /* FIXME: this is confusing. lr_old holds the old value
+ of ctr, not lr :-) */
+ assign( lr_old, addr_align( getGST( PPC_GST_CTR ), 4 ));
+
+ if (flag_LK)
+ putGST( PPC_GST_LR, e_nia );
+
+ stmt( IRStmt_Exit(
+ binop(Iop_CmpEQ32, mkexpr(cond_ok), mkU32(0)),
+ Ijk_Boring,
+ c_nia ));
+
+ if (flag_LK && vbi->guest_ppc_zap_RZ_at_bl) {
+ make_redzone_AbiHint( vbi, lr_old,
+ "b-ctr-l (indirect call)" );
+ }
+
+ irsb->jumpkind = flag_LK ? Ijk_Call : Ijk_Boring;
+ irsb->next = mkexpr(lr_old);
+ break;
+
+ case 0x010: { // bclr (Branch Cond. to Link Register, PPC32 p365)
+ Bool vanilla_return = False;
+ if ((BO & 0x14 /* 1z1zz */) == 0x14 && flag_LK == 0) {
+ DIP("blr\n");
+ vanilla_return = True;
+ } else {
+ DIP("bclr%s 0x%x, 0x%x\n", flag_LK ? "l" : "", BO, BI);
+ }
+
+ if (!(BO & 0x4)) {
+ putGST( PPC_GST_CTR,
+ binop(mkSzOp(ty, Iop_Sub8),
+ getGST( PPC_GST_CTR ), mkSzImm(ty, 1)) );
+ }
+
+ /* See comments above for 'bc' about this */
+ assign( ctr_ok, branch_ctr_ok( BO ) );
+ assign( cond_ok, branch_cond_ok( BO, BI ) );
+ assign( do_branch,
+ binop(Iop_And32, mkexpr(cond_ok), mkexpr(ctr_ok)) );
+
+ assign( lr_old, addr_align( getGST( PPC_GST_LR ), 4 ));
+
+ if (flag_LK)
+ putGST( PPC_GST_LR, e_nia );
+
+ stmt( IRStmt_Exit(
+ binop(Iop_CmpEQ32, mkexpr(do_branch), mkU32(0)),
+ Ijk_Boring,
+ c_nia ));
+
+ if (vanilla_return && vbi->guest_ppc_zap_RZ_at_blr) {
+ make_redzone_AbiHint( vbi, lr_old,
+ "branch-to-lr (unconditional return)" );
+ }
+
+ /* blrl is pretty strange; it's like a return that sets the
+ return address of its caller to the insn following this
+ one. Mark it as a return. */
+ irsb->jumpkind = Ijk_Ret; /* was flag_LK ? Ijk_Call : Ijk_Ret; */
+ irsb->next = mkexpr(lr_old);
+ break;
+ }
+ default:
+ vex_printf("dis_int_branch(ppc)(opc2)\n");
+ return False;
+ }
+ break;
+
+ default:
+ vex_printf("dis_int_branch(ppc)(opc1)\n");
+ return False;
+ }
+
+ return True;
+}
+
+
+
+/*
+ Condition Register Logical Instructions
+*/
+static Bool dis_cond_logic ( UInt theInstr )
+{
+ /* XL-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar crbD_addr = ifieldRegDS(theInstr);
+ UChar crfD_addr = toUChar( IFIELD(theInstr, 23, 3) );
+ UChar crbA_addr = ifieldRegA(theInstr);
+ UChar crfS_addr = toUChar( IFIELD(theInstr, 18, 3) );
+ UChar crbB_addr = ifieldRegB(theInstr);
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar b0 = ifieldBIT0(theInstr);
+
+ IRTemp crbD = newTemp(Ity_I32);
+ IRTemp crbA = newTemp(Ity_I32);
+ IRTemp crbB = newTemp(Ity_I32);
+
+ if (opc1 != 19 || b0 != 0) {
+ vex_printf("dis_cond_logic(ppc)(opc1)\n");
+ return False;
+ }
+
+ if (opc2 == 0) { // mcrf (Move Cond Reg Field, PPC32 p464)
+ if (((crbD_addr & 0x3) != 0) ||
+ ((crbA_addr & 0x3) != 0) || (crbB_addr != 0)) {
+ vex_printf("dis_cond_logic(ppc)(crbD|crbA|crbB != 0)\n");
+ return False;
+ }
+ DIP("mcrf cr%u,cr%u\n", crfD_addr, crfS_addr);
+ putCR0( crfD_addr, getCR0( crfS_addr) );
+ putCR321( crfD_addr, getCR321(crfS_addr) );
+ } else {
+ assign( crbA, getCRbit(crbA_addr) );
+ if (crbA_addr == crbB_addr)
+ crbB = crbA;
+ else
+ assign( crbB, getCRbit(crbB_addr) );
+
+ switch (opc2) {
+ case 0x101: // crand (Cond Reg AND, PPC32 p372)
+ DIP("crand crb%d,crb%d,crb%d\n", crbD_addr, crbA_addr, crbB_addr);
+ assign( crbD, binop(Iop_And32, mkexpr(crbA), mkexpr(crbB)) );
+ break;
+ case 0x081: // crandc (Cond Reg AND w. Complement, PPC32 p373)
+ DIP("crandc crb%d,crb%d,crb%d\n", crbD_addr, crbA_addr, crbB_addr);
+ assign( crbD, binop(Iop_And32,
+ mkexpr(crbA),
+ unop(Iop_Not32, mkexpr(crbB))) );
+ break;
+ case 0x121: // creqv (Cond Reg Equivalent, PPC32 p374)
+ DIP("creqv crb%d,crb%d,crb%d\n", crbD_addr, crbA_addr, crbB_addr);
+ assign( crbD, unop(Iop_Not32,
+ binop(Iop_Xor32, mkexpr(crbA), mkexpr(crbB))) );
+ break;
+ case 0x0E1: // crnand (Cond Reg NAND, PPC32 p375)
+ DIP("crnand crb%d,crb%d,crb%d\n", crbD_addr, crbA_addr, crbB_addr);
+ assign( crbD, unop(Iop_Not32,
+ binop(Iop_And32, mkexpr(crbA), mkexpr(crbB))) );
+ break;
+ case 0x021: // crnor (Cond Reg NOR, PPC32 p376)
+ DIP("crnor crb%d,crb%d,crb%d\n", crbD_addr, crbA_addr, crbB_addr);
+ assign( crbD, unop(Iop_Not32,
+ binop(Iop_Or32, mkexpr(crbA), mkexpr(crbB))) );
+ break;
+ case 0x1C1: // cror (Cond Reg OR, PPC32 p377)
+ DIP("cror crb%d,crb%d,crb%d\n", crbD_addr, crbA_addr, crbB_addr);
+ assign( crbD, binop(Iop_Or32, mkexpr(crbA), mkexpr(crbB)) );
+ break;
+ case 0x1A1: // crorc (Cond Reg OR w. Complement, PPC32 p378)
+ DIP("crorc crb%d,crb%d,crb%d\n", crbD_addr, crbA_addr, crbB_addr);
+ assign( crbD, binop(Iop_Or32,
+ mkexpr(crbA),
+ unop(Iop_Not32, mkexpr(crbB))) );
+ break;
+ case 0x0C1: // crxor (Cond Reg XOR, PPC32 p379)
+ DIP("crxor crb%d,crb%d,crb%d\n", crbD_addr, crbA_addr, crbB_addr);
+ assign( crbD, binop(Iop_Xor32, mkexpr(crbA), mkexpr(crbB)) );
+ break;
+ default:
+ vex_printf("dis_cond_logic(ppc)(opc2)\n");
+ return False;
+ }
+
+ putCRbit( crbD_addr, mkexpr(crbD) );
+ }
+ return True;
+}
+
+
+/*
+ Trap instructions
+*/
+
+/* Do the code generation for a trap. Returned Bool is true iff
+ this is an unconditional trap. If the two arg IRExpr*s are
+ Ity_I32s then the comparison is 32-bit. If they are Ity_I64s
+ then they are 64-bit, and we must be disassembling 64-bit
+ instructions. */
+static Bool do_trap ( UChar TO,
+ IRExpr* argL0, IRExpr* argR0, Addr64 cia )
+{
+ IRTemp argL, argR;
+ IRExpr *argLe, *argRe, *cond, *tmp;
+
+ Bool is32bit = typeOfIRExpr(irsb->tyenv, argL0 ) == Ity_I32;
+
+ IROp opAND = is32bit ? Iop_And32 : Iop_And64;
+ IROp opOR = is32bit ? Iop_Or32 : Iop_Or64;
+ IROp opCMPORDS = is32bit ? Iop_CmpORD32S : Iop_CmpORD64S;
+ IROp opCMPORDU = is32bit ? Iop_CmpORD32U : Iop_CmpORD64U;
+ IROp opCMPNE = is32bit ? Iop_CmpNE32 : Iop_CmpNE64;
+ IROp opCMPEQ = is32bit ? Iop_CmpEQ32 : Iop_CmpEQ64;
+ IRExpr* const0 = is32bit ? mkU32(0) : mkU64(0);
+ IRExpr* const2 = is32bit ? mkU32(2) : mkU64(2);
+ IRExpr* const4 = is32bit ? mkU32(4) : mkU64(4);
+ IRExpr* const8 = is32bit ? mkU32(8) : mkU64(8);
+
+ const UChar b11100 = 0x1C;
+ const UChar b00111 = 0x07;
+
+ if (is32bit) {
+ vassert( typeOfIRExpr(irsb->tyenv, argL0) == Ity_I32 );
+ vassert( typeOfIRExpr(irsb->tyenv, argR0) == Ity_I32 );
+ } else {
+ vassert( typeOfIRExpr(irsb->tyenv, argL0) == Ity_I64 );
+ vassert( typeOfIRExpr(irsb->tyenv, argR0) == Ity_I64 );
+ vassert( mode64 );
+ }
+
+ if ((TO & b11100) == b11100 || (TO & b00111) == b00111) {
+ /* Unconditional trap. Just do the exit without
+ testing the arguments. */
+ stmt( IRStmt_Exit(
+ binop(opCMPEQ, const0, const0),
+ Ijk_SigTRAP,
+ mode64 ? IRConst_U64(cia) : IRConst_U32((UInt)cia)
+ ));
+ return True; /* unconditional trap */
+ }
+
+ if (is32bit) {
+ argL = newTemp(Ity_I32);
+ argR = newTemp(Ity_I32);
+ } else {
+ argL = newTemp(Ity_I64);
+ argR = newTemp(Ity_I64);
+ }
+
+ assign( argL, argL0 );
+ assign( argR, argR0 );
+
+ argLe = mkexpr(argL);
+ argRe = mkexpr(argR);
+
+ cond = const0;
+ if (TO & 16) { // L <s R
+ tmp = binop(opAND, binop(opCMPORDS, argLe, argRe), const8);
+ cond = binop(opOR, tmp, cond);
+ }
+ if (TO & 8) { // L >s R
+ tmp = binop(opAND, binop(opCMPORDS, argLe, argRe), const4);
+ cond = binop(opOR, tmp, cond);
+ }
+ if (TO & 4) { // L == R
+ tmp = binop(opAND, binop(opCMPORDS, argLe, argRe), const2);
+ cond = binop(opOR, tmp, cond);
+ }
+ if (TO & 2) { // L <u R
+ tmp = binop(opAND, binop(opCMPORDU, argLe, argRe), const8);
+ cond = binop(opOR, tmp, cond);
+ }
+ if (TO & 1) { // L >u R
+ tmp = binop(opAND, binop(opCMPORDU, argLe, argRe), const4);
+ cond = binop(opOR, tmp, cond);
+ }
+ stmt( IRStmt_Exit(
+ binop(opCMPNE, cond, const0),
+ Ijk_SigTRAP,
+ mode64 ? IRConst_U64(cia) : IRConst_U32((UInt)cia)
+ ));
+ return False; /* not an unconditional trap */
+}
+
+static Bool dis_trapi ( UInt theInstr,
+ /*OUT*/DisResult* dres )
+{
+ /* D-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar TO = ifieldRegDS(theInstr);
+ UChar rA_addr = ifieldRegA(theInstr);
+ UInt uimm16 = ifieldUIMM16(theInstr);
+ ULong simm16 = extend_s_16to64(uimm16);
+ Addr64 cia = guest_CIA_curr_instr;
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ Bool uncond = False;
+
+ switch (opc1) {
+ case 0x03: // twi (Trap Word Immediate, PPC32 p548)
+ uncond = do_trap( TO,
+ mode64 ? unop(Iop_64to32, getIReg(rA_addr))
+ : getIReg(rA_addr),
+ mkU32( (UInt)simm16 ),
+ cia );
+ if (TO == 4) {
+ DIP("tweqi r%u,%d\n", (UInt)rA_addr, (Int)simm16);
+ } else {
+ DIP("tw%di r%u,%d\n", (Int)TO, (UInt)rA_addr, (Int)simm16);
+ }
+ break;
+ case 0x02: // tdi
+ if (!mode64)
+ return False;
+ uncond = do_trap( TO, getIReg(rA_addr), mkU64( (ULong)simm16 ), cia );
+ if (TO == 4) {
+ DIP("tdeqi r%u,%d\n", (UInt)rA_addr, (Int)simm16);
+ } else {
+ DIP("td%di r%u,%d\n", (Int)TO, (UInt)rA_addr, (Int)simm16);
+ }
+ break;
+ default:
+ return False;
+ }
+
+ if (uncond) {
+ /* If the trap shows signs of being unconditional, don't
+ continue decoding past it. */
+ irsb->next = mkSzImm( ty, nextInsnAddr() );
+ irsb->jumpkind = Ijk_Boring;
+ dres->whatNext = Dis_StopHere;
+ }
+
+ return True;
+}
+
+static Bool dis_trap ( UInt theInstr,
+ /*OUT*/DisResult* dres )
+{
+ /* X-Form */
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar TO = ifieldRegDS(theInstr);
+ UChar rA_addr = ifieldRegA(theInstr);
+ UChar rB_addr = ifieldRegB(theInstr);
+ Addr64 cia = guest_CIA_curr_instr;
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ Bool uncond = False;
+
+ if (ifieldBIT0(theInstr) != 0)
+ return False;
+
+ switch (opc2) {
+ case 0x004: // tw (Trap Word, PPC64 p540)
+ uncond = do_trap( TO,
+ mode64 ? unop(Iop_64to32, getIReg(rA_addr))
+ : getIReg(rA_addr),
+ mode64 ? unop(Iop_64to32, getIReg(rB_addr))
+ : getIReg(rB_addr),
+ cia );
+ if (TO == 4) {
+ DIP("tweq r%u,r%u\n", (UInt)rA_addr, (UInt)rB_addr);
+ } else {
+ DIP("tw%d r%u,r%u\n", (Int)TO, (UInt)rA_addr, (UInt)rB_addr);
+ }
+ break;
+ case 0x044: // td (Trap Doubleword, PPC64 p534)
+ if (!mode64)
+ return False;
+ uncond = do_trap( TO, getIReg(rA_addr), getIReg(rB_addr), cia );
+ if (TO == 4) {
+ DIP("tdeq r%u,r%u\n", (UInt)rA_addr, (UInt)rB_addr);
+ } else {
+ DIP("td%d r%u,r%u\n", (Int)TO, (UInt)rA_addr, (UInt)rB_addr);
+ }
+ break;
+ default:
+ return False;
+ }
+
+ if (uncond) {
+ /* If the trap shows signs of being unconditional, don't
+ continue decoding past it. */
+ irsb->next = mkSzImm( ty, nextInsnAddr() );
+ irsb->jumpkind = Ijk_Boring;
+ dres->whatNext = Dis_StopHere;
+ }
+
+ return True;
+}
+
+
+/*
+ System Linkage Instructions
+*/
+static Bool dis_syslink ( UInt theInstr,
+ VexAbiInfo* abiinfo, DisResult* dres )
+{
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+
+ if (theInstr != 0x44000002) {
+ vex_printf("dis_syslink(ppc)(theInstr)\n");
+ return False;
+ }
+
+ // sc (System Call, PPC32 p504)
+ DIP("sc\n");
+
+ /* Copy CIA into the IP_AT_SYSCALL pseudo-register, so that on AIX
+ Valgrind can back the guest up to this instruction if it needs
+ to restart the syscall. */
+ putGST( PPC_GST_IP_AT_SYSCALL, getGST( PPC_GST_CIA ) );
+
+ /* It's important that all ArchRegs carry their up-to-date value
+ at this point. So we declare an end-of-block here, which
+ forces any TempRegs caching ArchRegs to be flushed. */
+ irsb->next = abiinfo->guest_ppc_sc_continues_at_LR
+ ? getGST( PPC_GST_LR )
+ : mkSzImm( ty, nextInsnAddr() );
+ irsb->jumpkind = Ijk_Sys_syscall;
+
+ dres->whatNext = Dis_StopHere;
+ return True;
+}
+
+
+/*
+ Memory Synchronization Instructions
+
+ Note on Reservations:
+ We rely on the assumption that V will in fact only allow one thread at
+ once to run. In effect, a thread can make a reservation, but we don't
+ check any stores it does. Instead, the reservation is cancelled when
+ the scheduler switches to another thread (run_thread_for_a_while()).
+*/
+static Bool dis_memsync ( UInt theInstr )
+{
+ /* X-Form, XL-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UInt b11to25 = IFIELD(theInstr, 11, 15);
+ UChar flag_L = ifieldRegDS(theInstr);
+ UInt b11to20 = IFIELD(theInstr, 11, 10);
+ UChar rD_addr = ifieldRegDS(theInstr);
+ UChar rS_addr = rD_addr;
+ UChar rA_addr = ifieldRegA(theInstr);
+ UChar rB_addr = ifieldRegB(theInstr);
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar b0 = ifieldBIT0(theInstr);
+
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ IRTemp EA = newTemp(ty);
+
+ assign( EA, ea_rAor0_idxd( rA_addr, rB_addr ) );
+
+ switch (opc1) {
+ /* XL-Form */
+ case 0x13: // isync (Instruction Synchronize, PPC32 p432)
+ if (opc2 != 0x096) {
+ vex_printf("dis_memsync(ppc)(0x13,opc2)\n");
+ return False;
+ }
+ if (b11to25 != 0 || b0 != 0) {
+ vex_printf("dis_memsync(ppc)(0x13,b11to25|b0)\n");
+ return False;
+ }
+ DIP("isync\n");
+ stmt( IRStmt_MBE(Imbe_Fence) );
+ break;
+
+ /* X-Form */
+ case 0x1F:
+ switch (opc2) {
+ case 0x356: // eieio (Enforce In-Order Exec of I/O, PPC32 p394)
+ if (b11to25 != 0 || b0 != 0) {
+ vex_printf("dis_memsync(ppc)(eiei0,b11to25|b0)\n");
+ return False;
+ }
+ DIP("eieio\n");
+ /* Insert a memory fence, just to be on the safe side. */
+ stmt( IRStmt_MBE(Imbe_Fence) );
+ break;
+
+ case 0x014: { // lwarx (Load Word and Reserve Indexed, PPC32 p458)
+ IRTemp res;
+ /* According to the PowerPC ISA version 2.05, b0 (called EH
+ in the documentation) is merely a hint bit to the
+ hardware, I think as to whether or not contention is
+ likely. So we can just ignore it. */
+ DIP("lwarx r%u,r%u,r%u,EH=%u\n", rD_addr, rA_addr, rB_addr, (UInt)b0);
+
+ // trap if misaligned
+ gen_SIGBUS_if_misaligned( EA, 4 );
+
+ // and actually do the load
+ res = newTemp(Ity_I32);
+ stmt( IRStmt_LLSC(Iend_BE, res, mkexpr(EA), NULL/*this is a load*/) );
+
+ putIReg( rD_addr, mkWidenFrom32(ty, mkexpr(res), False) );
+ break;
+ }
+
+ case 0x096: {
+ // stwcx. (Store Word Conditional Indexed, PPC32 p532)
+ // Note this has to handle stwcx. in both 32- and 64-bit modes,
+ // so isn't quite as straightforward as it might otherwise be.
+ IRTemp rS = newTemp(Ity_I32);
+ IRTemp resSC;
+ if (b0 != 1) {
+ vex_printf("dis_memsync(ppc)(stwcx.,b0)\n");
+ return False;
+ }
+ DIP("stwcx. r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
+
+ // trap if misaligned
+ gen_SIGBUS_if_misaligned( EA, 4 );
+
+ // Get the data to be stored, and narrow to 32 bits if necessary
+ assign( rS, mkNarrowTo32(ty, getIReg(rS_addr)) );
+
+ // Do the store, and get success/failure bit into resSC
+ resSC = newTemp(Ity_I1);
+ stmt( IRStmt_LLSC(Iend_BE, resSC, mkexpr(EA), mkexpr(rS)) );
+
+ // Set CR0[LT GT EQ S0] = 0b000 || XER[SO] on failure
+ // Set CR0[LT GT EQ S0] = 0b001 || XER[SO] on success
+ putCR321(0, binop(Iop_Shl8, unop(Iop_1Uto8, mkexpr(resSC)), mkU8(1)));
+ putCR0(0, getXER_SO());
+
+ /* Note:
+ If resaddr != lwarx_resaddr, CR0[EQ] is undefined, and
+ whether rS is stored is dependent on that value. */
+ /* So I guess we can just ignore this case? */
+ break;
+ }
+
+ case 0x256: // sync (Synchronize, PPC32 p543),
+ // also lwsync (L==1), ptesync (L==2)
+ /* http://sources.redhat.com/ml/binutils/2000-12/msg00311.html
+
+ The PowerPC architecture used in IBM chips has expanded
+ the sync instruction into two variants: lightweight sync
+ and heavyweight sync. The original sync instruction is
+ the new heavyweight sync and lightweight sync is a strict
+ subset of the heavyweight sync functionality. This allows
+ the programmer to specify a less expensive operation on
+ high-end systems when the full sync functionality is not
+ necessary.
+
+ The basic "sync" mnemonic now utilizes an operand. "sync"
+ without an operand now becomes a extended mnemonic for
+ heavyweight sync. Processors without the lwsync
+ instruction will not decode the L field and will perform a
+ heavyweight sync. Everything is backward compatible.
+
+ sync = sync 0
+ lwsync = sync 1
+ ptesync = sync 2 *** TODO - not implemented ***
+ */
+ if (b11to20 != 0 || b0 != 0) {
+ vex_printf("dis_memsync(ppc)(sync/lwsync,b11to20|b0)\n");
+ return False;
+ }
+ if (flag_L != 0/*sync*/ && flag_L != 1/*lwsync*/) {
+ vex_printf("dis_memsync(ppc)(sync/lwsync,flag_L)\n");
+ return False;
+ }
+ DIP("%ssync\n", flag_L == 1 ? "lw" : "");
+ /* Insert a memory fence. It's sometimes important that these
+ are carried through to the generated code. */
+ stmt( IRStmt_MBE(Imbe_Fence) );
+ break;
+
+ /* 64bit Memsync */
+ case 0x054: { // ldarx (Load DWord and Reserve Indexed, PPC64 p473)
+ IRTemp res;
+ /* According to the PowerPC ISA version 2.05, b0 (called EH
+ in the documentation) is merely a hint bit to the
+ hardware, I think as to whether or not contention is
+ likely. So we can just ignore it. */
+ if (!mode64)
+ return False;
+ DIP("ldarx r%u,r%u,r%u,EH=%u\n", rD_addr, rA_addr, rB_addr, (UInt)b0);
+
+ // trap if misaligned
+ gen_SIGBUS_if_misaligned( EA, 8 );
+
+ // and actually do the load
+ res = newTemp(Ity_I64);
+ stmt( IRStmt_LLSC(Iend_BE, res, mkexpr(EA), NULL/*this is a load*/) );
+
+ putIReg( rD_addr, mkexpr(res) );
+ break;
+ }
+
+ case 0x0D6: { // stdcx. (Store DWord Condition Indexd, PPC64 p581)
+ // A marginally simplified version of the stwcx. case
+ IRTemp rS = newTemp(Ity_I64);
+ IRTemp resSC;
+ if (b0 != 1) {
+ vex_printf("dis_memsync(ppc)(stdcx.,b0)\n");
+ return False;
+ }
+ if (!mode64)
+ return False;
+ DIP("stdcx. r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
+
+ // trap if misaligned
+ gen_SIGBUS_if_misaligned( EA, 8 );
+
+ // Get the data to be stored
+ assign( rS, getIReg(rS_addr) );
+
+ // Do the store, and get success/failure bit into resSC
+ resSC = newTemp(Ity_I1);
+ stmt( IRStmt_LLSC(Iend_BE, resSC, mkexpr(EA), mkexpr(rS)) );
+
+ // Set CR0[LT GT EQ S0] = 0b000 || XER[SO] on failure
+ // Set CR0[LT GT EQ S0] = 0b001 || XER[SO] on success
+ putCR321(0, binop(Iop_Shl8, unop(Iop_1Uto8, mkexpr(resSC)), mkU8(1)));
+ putCR0(0, getXER_SO());
+
+ /* Note:
+ If resaddr != lwarx_resaddr, CR0[EQ] is undefined, and
+ whether rS is stored is dependent on that value. */
+ /* So I guess we can just ignore this case? */
+ break;
+ }
+
+ default:
+ vex_printf("dis_memsync(ppc)(opc2)\n");
+ return False;
+ }
+ break;
+
+ default:
+ vex_printf("dis_memsync(ppc)(opc1)\n");
+ return False;
+ }
+ return True;
+}
+
+
+
+/*
+ Integer Shift Instructions
+*/
+static Bool dis_int_shift ( UInt theInstr )
+{
+ /* X-Form, XS-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar rS_addr = ifieldRegDS(theInstr);
+ UChar rA_addr = ifieldRegA(theInstr);
+ UChar rB_addr = ifieldRegB(theInstr);
+ UChar sh_imm = rB_addr;
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar b1 = ifieldBIT1(theInstr);
+ UChar flag_rC = ifieldBIT0(theInstr);
+
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ IRTemp rA = newTemp(ty);
+ IRTemp rS = newTemp(ty);
+ IRTemp rB = newTemp(ty);
+ IRTemp outofrange = newTemp(Ity_I8);
+ IRTemp rS_lo32 = newTemp(Ity_I32);
+ IRTemp rB_lo32 = newTemp(Ity_I32);
+ IRExpr* e_tmp;
+
+ assign( rS, getIReg(rS_addr) );
+ assign( rB, getIReg(rB_addr) );
+ assign( rS_lo32, mkNarrowTo32(ty, mkexpr(rS)) );
+ assign( rB_lo32, mkNarrowTo32(ty, mkexpr(rB)) );
+
+ if (opc1 == 0x1F) {
+ switch (opc2) {
+ case 0x018: { // slw (Shift Left Word, PPC32 p505)
+ DIP("slw%s r%u,r%u,r%u\n", flag_rC ? ".":"",
+ rA_addr, rS_addr, rB_addr);
+ /* rA = rS << rB */
+ /* ppc32 semantics are:
+ slw(x,y) = (x << (y & 31)) -- primary result
+ & ~((y << 26) >>s 31) -- make result 0
+ for y in 32 .. 63
+ */
+ e_tmp =
+ binop( Iop_And32,
+ binop( Iop_Shl32,
+ mkexpr(rS_lo32),
+ unop( Iop_32to8,
+ binop(Iop_And32,
+ mkexpr(rB_lo32), mkU32(31)))),
+ unop( Iop_Not32,
+ binop( Iop_Sar32,
+ binop(Iop_Shl32, mkexpr(rB_lo32), mkU8(26)),
+ mkU8(31))) );
+ assign( rA, mkWidenFrom32(ty, e_tmp, /* Signed */False) );
+ break;
+ }
+
+ case 0x318: { // sraw (Shift Right Alg Word, PPC32 p506)
+ IRTemp sh_amt = newTemp(Ity_I32);
+ DIP("sraw%s r%u,r%u,r%u\n", flag_rC ? ".":"",
+ rA_addr, rS_addr, rB_addr);
+ /* JRS: my reading of the (poorly worded) PPC32 doc p506 is:
+ amt = rB & 63
+ rA = Sar32( rS, amt > 31 ? 31 : amt )
+ XER.CA = amt > 31 ? sign-of-rS : (computation as per srawi)
+ */
+ assign( sh_amt, binop(Iop_And32, mkU32(0x3F),
+ mkexpr(rB_lo32)) );
+ assign( outofrange,
+ unop( Iop_1Uto8,
+ binop(Iop_CmpLT32U, mkU32(31),
+ mkexpr(sh_amt)) ));
+ e_tmp = binop( Iop_Sar32,
+ mkexpr(rS_lo32),
+ unop( Iop_32to8,
+ IRExpr_Mux0X( mkexpr(outofrange),
+ mkexpr(sh_amt),
+ mkU32(31)) ) );
+ assign( rA, mkWidenFrom32(ty, e_tmp, /* Signed */True) );
+
+ set_XER_CA( ty, PPCG_FLAG_OP_SRAW,
+ mkexpr(rA),
+ mkWidenFrom32(ty, mkexpr(rS_lo32), True),
+ mkWidenFrom32(ty, mkexpr(sh_amt), True ),
+ mkWidenFrom32(ty, getXER_CA32(), True) );
+ break;
+ }
+
+ case 0x338: // srawi (Shift Right Alg Word Immediate, PPC32 p507)
+ DIP("srawi%s r%u,r%u,%d\n", flag_rC ? ".":"",
+ rA_addr, rS_addr, sh_imm);
+ vassert(sh_imm < 32);
+ if (mode64) {
+ assign( rA, binop(Iop_Sar64,
+ binop(Iop_Shl64, getIReg(rS_addr),
+ mkU8(32)),
+ mkU8(32 + sh_imm)) );
+ } else {
+ assign( rA, binop(Iop_Sar32, mkexpr(rS_lo32),
+ mkU8(sh_imm)) );
+ }
+
+ set_XER_CA( ty, PPCG_FLAG_OP_SRAWI,
+ mkexpr(rA),
+ mkWidenFrom32(ty, mkexpr(rS_lo32), /* Syned */True),
+ mkSzImm(ty, sh_imm),
+ mkWidenFrom32(ty, getXER_CA32(), /* Syned */False) );
+ break;
+
+ case 0x218: // srw (Shift Right Word, PPC32 p508)
+ DIP("srw%s r%u,r%u,r%u\n", flag_rC ? ".":"",
+ rA_addr, rS_addr, rB_addr);
+ /* rA = rS >>u rB */
+ /* ppc32 semantics are:
+ srw(x,y) = (x >>u (y & 31)) -- primary result
+ & ~((y << 26) >>s 31) -- make result 0
+ for y in 32 .. 63
+ */
+ e_tmp =
+ binop(
+ Iop_And32,
+ binop( Iop_Shr32,
+ mkexpr(rS_lo32),
+ unop( Iop_32to8,
+ binop(Iop_And32, mkexpr(rB_lo32),
+ mkU32(31)))),
+ unop( Iop_Not32,
+ binop( Iop_Sar32,
+ binop(Iop_Shl32, mkexpr(rB_lo32),
+ mkU8(26)),
+ mkU8(31))));
+ assign( rA, mkWidenFrom32(ty, e_tmp, /* Signed */False) );
+ break;
+
+
+ /* 64bit Shifts */
+ case 0x01B: // sld (Shift Left DWord, PPC64 p568)
+ DIP("sld%s r%u,r%u,r%u\n",
+ flag_rC ? ".":"", rA_addr, rS_addr, rB_addr);
+ /* rA = rS << rB */
+ /* ppc64 semantics are:
+ slw(x,y) = (x << (y & 63)) -- primary result
+ & ~((y << 57) >>s 63) -- make result 0
+ for y in 64 ..
+ */
+ assign( rA,
+ binop(
+ Iop_And64,
+ binop( Iop_Shl64,
+ mkexpr(rS),
+ unop( Iop_64to8,
+ binop(Iop_And64, mkexpr(rB), mkU64(63)))),
+ unop( Iop_Not64,
+ binop( Iop_Sar64,
+ binop(Iop_Shl64, mkexpr(rB), mkU8(57)),
+ mkU8(63)))) );
+ break;
+
+ case 0x31A: { // srad (Shift Right Alg DWord, PPC64 p570)
+ IRTemp sh_amt = newTemp(Ity_I64);
+ DIP("srad%s r%u,r%u,r%u\n",
+ flag_rC ? ".":"", rA_addr, rS_addr, rB_addr);
+ /* amt = rB & 127
+ rA = Sar64( rS, amt > 63 ? 63 : amt )
+ XER.CA = amt > 63 ? sign-of-rS : (computation as per srawi)
+ */
+ assign( sh_amt, binop(Iop_And64, mkU64(0x7F), mkexpr(rB)) );
+ assign( outofrange,
+ unop( Iop_1Uto8,
+ binop(Iop_CmpLT64U, mkU64(63),
+ mkexpr(sh_amt)) ));
+ assign( rA,
+ binop( Iop_Sar64,
+ mkexpr(rS),
+ unop( Iop_64to8,
+ IRExpr_Mux0X( mkexpr(outofrange),
+ mkexpr(sh_amt),
+ mkU64(63)) ))
+ );
+ set_XER_CA( ty, PPCG_FLAG_OP_SRAD,
+ mkexpr(rA), mkexpr(rS), mkexpr(sh_amt),
+ mkWidenFrom32(ty, getXER_CA32(), /* Syned */False) );
+ break;
+ }
+
+ case 0x33A: case 0x33B: // sradi (Shr Alg DWord Imm, PPC64 p571)
+ sh_imm |= b1<<5;
+ vassert(sh_imm < 64);
+ DIP("sradi%s r%u,r%u,%u\n",
+ flag_rC ? ".":"", rA_addr, rS_addr, sh_imm);
+ assign( rA, binop(Iop_Sar64, getIReg(rS_addr), mkU8(sh_imm)) );
+
+ set_XER_CA( ty, PPCG_FLAG_OP_SRADI,
+ mkexpr(rA),
+ getIReg(rS_addr),
+ mkU64(sh_imm),
+ mkWidenFrom32(ty, getXER_CA32(), /* Syned */False) );
+ break;
+
+ case 0x21B: // srd (Shift Right DWord, PPC64 p574)
+ DIP("srd%s r%u,r%u,r%u\n",
+ flag_rC ? ".":"", rA_addr, rS_addr, rB_addr);
+ /* rA = rS >>u rB */
+ /* ppc semantics are:
+ srw(x,y) = (x >>u (y & 63)) -- primary result
+ & ~((y << 57) >>s 63) -- make result 0
+ for y in 64 .. 127
+ */
+ assign( rA,
+ binop(
+ Iop_And64,
+ binop( Iop_Shr64,
+ mkexpr(rS),
+ unop( Iop_64to8,
+ binop(Iop_And64, mkexpr(rB), mkU64(63)))),
+ unop( Iop_Not64,
+ binop( Iop_Sar64,
+ binop(Iop_Shl64, mkexpr(rB), mkU8(57)),
+ mkU8(63)))) );
+ break;
+
+ default:
+ vex_printf("dis_int_shift(ppc)(opc2)\n");
+ return False;
+ }
+ } else {
+ vex_printf("dis_int_shift(ppc)(opc1)\n");
+ return False;
+ }
+
+ putIReg( rA_addr, mkexpr(rA) );
+
+ if (flag_rC) {
+ set_CR0( mkexpr(rA) );
+ }
+ return True;
+}
+
+
+
+/*
+ Integer Load/Store Reverse Instructions
+*/
+/* Generates code to swap the byte order in an Ity_I32. */
+static IRExpr* /* :: Ity_I32 */ gen_byterev32 ( IRTemp t )
+{
+ vassert(typeOfIRTemp(irsb->tyenv, t) == Ity_I32);
+ return
+ binop(Iop_Or32,
+ binop(Iop_Shl32, mkexpr(t), mkU8(24)),
+ binop(Iop_Or32,
+ binop(Iop_And32, binop(Iop_Shl32, mkexpr(t), mkU8(8)),
+ mkU32(0x00FF0000)),
+ binop(Iop_Or32,
+ binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(8)),
+ mkU32(0x0000FF00)),
+ binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(24)),
+ mkU32(0x000000FF) )
+ )));
+}
+
+/* Generates code to swap the byte order in the lower half of an Ity_I32,
+ and zeroes the upper half. */
+static IRExpr* /* :: Ity_I32 */ gen_byterev16 ( IRTemp t )
+{
+ vassert(typeOfIRTemp(irsb->tyenv, t) == Ity_I32);
+ return
+ binop(Iop_Or32,
+ binop(Iop_And32, binop(Iop_Shl32, mkexpr(t), mkU8(8)),
+ mkU32(0x0000FF00)),
+ binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(8)),
+ mkU32(0x000000FF))
+ );
+}
+
+static Bool dis_int_ldst_rev ( UInt theInstr )
+{
+ /* X-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar rD_addr = ifieldRegDS(theInstr);
+ UChar rS_addr = rD_addr;
+ UChar rA_addr = ifieldRegA(theInstr);
+ UChar rB_addr = ifieldRegB(theInstr);
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar b0 = ifieldBIT0(theInstr);
+
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ IRTemp EA = newTemp(ty);
+ IRTemp w1 = newTemp(Ity_I32);
+ IRTemp w2 = newTemp(Ity_I32);
+
+ if (opc1 != 0x1F || b0 != 0) {
+ vex_printf("dis_int_ldst_rev(ppc)(opc1|b0)\n");
+ return False;
+ }
+
+ assign( EA, ea_rAor0_idxd( rA_addr, rB_addr ) );
+
+ switch (opc2) {
+
+ case 0x316: // lhbrx (Load Halfword Byte-Reverse Indexed, PPC32 p449)
+ DIP("lhbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
+ assign( w1, unop(Iop_16Uto32, loadBE(Ity_I16, mkexpr(EA))) );
+ assign( w2, gen_byterev16(w1) );
+ putIReg( rD_addr, mkWidenFrom32(ty, mkexpr(w2),
+ /* Signed */False) );
+ break;
+
+ case 0x216: // lwbrx (Load Word Byte-Reverse Indexed, PPC32 p459)
+ DIP("lwbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
+ assign( w1, loadBE(Ity_I32, mkexpr(EA)) );
+ assign( w2, gen_byterev32(w1) );
+ putIReg( rD_addr, mkWidenFrom32(ty, mkexpr(w2),
+ /* Signed */False) );
+ break;
+
+ case 0x396: // sthbrx (Store Half Word Byte-Reverse Indexed, PPC32 p523)
+ DIP("sthbrx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
+ assign( w1, mkNarrowTo32(ty, getIReg(rS_addr)) );
+ storeBE( mkexpr(EA), unop(Iop_32to16, gen_byterev16(w1)) );
+ break;
+
+ case 0x296: // stwbrx (Store Word Byte-Reverse Indxd, PPC32 p531)
+ DIP("stwbrx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
+ assign( w1, mkNarrowTo32(ty, getIReg(rS_addr)) );
+ storeBE( mkexpr(EA), gen_byterev32(w1) );
+ break;
+
+ default:
+ vex_printf("dis_int_ldst_rev(ppc)(opc2)\n");
+ return False;
+ }
+ return True;
+}
+
+
+
+/*
+ Processor Control Instructions
+*/
+static Bool dis_proc_ctl ( VexAbiInfo* vbi, UInt theInstr )
+{
+ UChar opc1 = ifieldOPC(theInstr);
+
+ /* X-Form */
+ UChar crfD = toUChar( IFIELD( theInstr, 23, 3 ) );
+ UChar b21to22 = toUChar( IFIELD( theInstr, 21, 2 ) );
+ UChar rD_addr = ifieldRegDS(theInstr);
+ UInt b11to20 = IFIELD( theInstr, 11, 10 );
+
+ /* XFX-Form */
+ UChar rS_addr = rD_addr;
+ UInt SPR = b11to20;
+ UInt TBR = b11to20;
+ UChar b20 = toUChar( IFIELD( theInstr, 20, 1 ) );
+ UInt CRM = IFIELD( theInstr, 12, 8 );
+ UChar b11 = toUChar( IFIELD( theInstr, 11, 1 ) );
+
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar b0 = ifieldBIT0(theInstr);
+
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ IRTemp rS = newTemp(ty);
+ assign( rS, getIReg(rS_addr) );
+
+ /* Reorder SPR field as per PPC32 p470 */
+ SPR = ((SPR & 0x1F) << 5) | ((SPR >> 5) & 0x1F);
+ /* Reorder TBR field as per PPC32 p475 */
+ TBR = ((TBR & 31) << 5) | ((TBR >> 5) & 31);
+
+ if (opc1 != 0x1F || b0 != 0) {
+ vex_printf("dis_proc_ctl(ppc)(opc1|b0)\n");
+ return False;
+ }
+
+ switch (opc2) {
+ /* X-Form */
+ case 0x200: { // mcrxr (Move to Cond Register from XER, PPC32 p466)
+ if (b21to22 != 0 || b11to20 != 0) {
+ vex_printf("dis_proc_ctl(ppc)(mcrxr,b21to22|b11to20)\n");
+ return False;
+ }
+ DIP("mcrxr crf%d\n", crfD);
+ /* Move XER[0-3] (the top 4 bits of XER) to CR[crfD] */
+ putGST_field( PPC_GST_CR,
+ getGST_field( PPC_GST_XER, 7 ),
+ crfD );
+
+ // Clear XER[0-3]
+ putXER_SO( mkU8(0) );
+ putXER_OV( mkU8(0) );
+ putXER_CA( mkU8(0) );
+ break;
+ }
+
+ case 0x013:
+ // b11to20==0: mfcr (Move from Cond Register, PPC32 p467)
+ // b20==1 & b11==0: mfocrf (Move from One CR Field)
+ // However it seems that the 'mfcr' behaviour is an acceptable
+ // implementation of mfocr (from the 2.02 arch spec)
+ if (b11to20 == 0) {
+ DIP("mfcr r%u\n", rD_addr);
+ putIReg( rD_addr, mkWidenFrom32(ty, getGST( PPC_GST_CR ),
+ /* Signed */False) );
+ break;
+ }
+ if (b20 == 1 && b11 == 0) {
+ DIP("mfocrf r%u,%u\n", rD_addr, CRM);
+ putIReg( rD_addr, mkWidenFrom32(ty, getGST( PPC_GST_CR ),
+ /* Signed */False) );
+ break;
+ }
+ /* not decodable */
+ return False;
+
+ /* XFX-Form */
+ case 0x153: // mfspr (Move from Special-Purpose Register, PPC32 p470)
+
+ switch (SPR) { // Choose a register...
+ case 0x1:
+ DIP("mfxer r%u\n", rD_addr);
+ putIReg( rD_addr, mkWidenFrom32(ty, getGST( PPC_GST_XER ),
+ /* Signed */False) );
+ break;
+ case 0x8:
+ DIP("mflr r%u\n", rD_addr);
+ putIReg( rD_addr, getGST( PPC_GST_LR ) );
+ break;
+ case 0x9:
+ DIP("mfctr r%u\n", rD_addr);
+ putIReg( rD_addr, getGST( PPC_GST_CTR ) );
+ break;
+ case 0x100:
+ DIP("mfvrsave r%u\n", rD_addr);
+ putIReg( rD_addr, mkWidenFrom32(ty, getGST( PPC_GST_VRSAVE ),
+ /* Signed */False) );
+ break;
+
+ case 0x103:
+ DIP("mfspr r%u, SPRG3(readonly)\n", rD_addr);
+ putIReg( rD_addr, getGST( PPC_GST_SPRG3_RO ) );
+ break;
+
+ /* Even a lowly PPC7400 can run the associated helper, so no
+ obvious need for feature testing at this point. */
+ case 268 /* 0x10C */:
+ case 269 /* 0x10D */: {
+ UInt arg = SPR==268 ? 0 : 1;
+ IRTemp val = newTemp(Ity_I32);
+ IRExpr** args = mkIRExprVec_1( mkU32(arg) );
+ IRDirty* d = unsafeIRDirty_1_N(
+ val,
+ 0/*regparms*/,
+ "ppc32g_dirtyhelper_MFSPR_268_269",
+ fnptr_to_fnentry
+ (vbi, &ppc32g_dirtyhelper_MFSPR_268_269),
+ args
+ );
+ /* execute the dirty call, dumping the result in val. */
+ stmt( IRStmt_Dirty(d) );
+ putIReg( rD_addr,
+ mkWidenFrom32(ty, mkexpr(val), False/*unsigned*/) );
+ DIP("mfspr r%u,%u", rD_addr, (UInt)SPR);
+ break;
+ }
+
+ /* Again, runs natively on PPC7400 (7447, really). Not
+ bothering with a feature test. */
+ case 287: /* 0x11F */ {
+ IRTemp val = newTemp(Ity_I32);
+ IRExpr** args = mkIRExprVec_0();
+ IRDirty* d = unsafeIRDirty_1_N(
+ val,
+ 0/*regparms*/,
+ "ppc32g_dirtyhelper_MFSPR_287",
+ fnptr_to_fnentry
+ (vbi, &ppc32g_dirtyhelper_MFSPR_287),
+ args
+ );
+ /* execute the dirty call, dumping the result in val. */
+ stmt( IRStmt_Dirty(d) );
+ putIReg( rD_addr,
+ mkWidenFrom32(ty, mkexpr(val), False/*unsigned*/) );
+ DIP("mfspr r%u,%u", rD_addr, (UInt)SPR);
+ break;
+ }
+
+ default:
+ vex_printf("dis_proc_ctl(ppc)(mfspr,SPR)(0x%x)\n", SPR);
+ return False;
+ }
+ break;
+
+ case 0x173: { // mftb (Move from Time Base, PPC32 p475)
+ IRTemp val = newTemp(Ity_I64);
+ IRExpr** args = mkIRExprVec_0();
+ IRDirty* d = unsafeIRDirty_1_N(
+ val,
+ 0/*regparms*/,
+ "ppcg_dirtyhelper_MFTB",
+ fnptr_to_fnentry(vbi, &ppcg_dirtyhelper_MFTB),
+ args );
+ /* execute the dirty call, dumping the result in val. */
+ stmt( IRStmt_Dirty(d) );
+
+ switch (TBR) {
+ case 269:
+ DIP("mftbu r%u", rD_addr);
+ putIReg( rD_addr,
+ mkWidenFrom32(ty, unop(Iop_64HIto32, mkexpr(val)),
+ /* Signed */False) );
+ break;
+ case 268:
+ DIP("mftb r%u", rD_addr);
+ putIReg( rD_addr, (mode64) ? mkexpr(val) :
+ unop(Iop_64to32, mkexpr(val)) );
+ break;
+ default:
+ return False; /* illegal instruction */
+ }
+ break;
+ }
+
+ case 0x090: {
+ // b20==0: mtcrf (Move to Cond Register Fields, PPC32 p477)
+ // b20==1: mtocrf (Move to One Cond Reg Field)
+ Int cr;
+ UChar shft;
+ if (b11 != 0)
+ return False;
+ if (b20 == 1) {
+ /* ppc64 v2.02 spec says mtocrf gives undefined outcome if >
+ 1 field is written. It seems more robust to decline to
+ decode the insn if so. */
+ switch (CRM) {
+ case 0x01: case 0x02: case 0x04: case 0x08:
+ case 0x10: case 0x20: case 0x40: case 0x80:
+ break;
+ default:
+ return False;
+ }
+ }
+ DIP("%s 0x%x,r%u\n", b20==1 ? "mtocrf" : "mtcrf",
+ CRM, rS_addr);
+ /* Write to each field specified by CRM */
+ for (cr = 0; cr < 8; cr++) {
+ if ((CRM & (1 << (7-cr))) == 0)
+ continue;
+ shft = 4*(7-cr);
+ putGST_field( PPC_GST_CR,
+ binop(Iop_Shr32,
+ mkNarrowTo32(ty, mkexpr(rS)),
+ mkU8(shft)), cr );
+ }
+ break;
+ }
+
+ case 0x1D3: // mtspr (Move to Special-Purpose Register, PPC32 p483)
+
+ switch (SPR) { // Choose a register...
+ case 0x1:
+ DIP("mtxer r%u\n", rS_addr);
+ putGST( PPC_GST_XER, mkNarrowTo32(ty, mkexpr(rS)) );
+ break;
+ case 0x8:
+ DIP("mtlr r%u\n", rS_addr);
+ putGST( PPC_GST_LR, mkexpr(rS) );
+ break;
+ case 0x9:
+ DIP("mtctr r%u\n", rS_addr);
+ putGST( PPC_GST_CTR, mkexpr(rS) );
+ break;
+ case 0x100:
+ DIP("mtvrsave r%u\n", rS_addr);
+ putGST( PPC_GST_VRSAVE, mkNarrowTo32(ty, mkexpr(rS)) );
+ break;
+
+ default:
+ vex_printf("dis_proc_ctl(ppc)(mtspr,SPR)(%u)\n", SPR);
+ return False;
+ }
+ break;
+
+ default:
+ vex_printf("dis_proc_ctl(ppc)(opc2)\n");
+ return False;
+ }
+ return True;
+}
+
+
+/*
+ Cache Management Instructions
+*/
+static Bool dis_cache_manage ( UInt theInstr,
+ DisResult* dres,
+ VexArchInfo* guest_archinfo )
+{
+ /* X-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar b21to25 = ifieldRegDS(theInstr);
+ UChar rA_addr = ifieldRegA(theInstr);
+ UChar rB_addr = ifieldRegB(theInstr);
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar b0 = ifieldBIT0(theInstr);
+ UInt lineszB = guest_archinfo->ppc_cache_line_szB;
+ Bool is_dcbzl = False;
+
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+
+ /* For dcbt, the lowest two bits of b21to25 encode an
+ access-direction hint (TH field) which we ignore. Well, that's
+ what the PowerPC documentation says. In fact xlc -O4 on POWER5
+ seems to generate values of 8 and 10 for b21to25. */
+ if (opc1 == 0x1F && opc2 == 0x116) {
+ /* b21to25 &= ~3; */ /* if the docs were true */
+ b21to25 = 0; /* blunt instrument */
+ }
+ if (opc1 == 0x1F && opc2 == 0x3F6) { // dcbz
+ if (b21to25 == 1) {
+ is_dcbzl = True;
+ b21to25 = 0;
+ if (!(guest_archinfo->ppc_dcbzl_szB)) {
+ vex_printf("dis_cache_manage(ppc)(dcbzl not supported by host)\n");
+ return False;
+ }
+ }
+ }
+
+ if (opc1 != 0x1F || b21to25 != 0 || b0 != 0) {
+ if (0) vex_printf("dis_cache_manage %d %d %d\n",
+ (Int)opc1, (Int)b21to25, (Int)b0);
+ vex_printf("dis_cache_manage(ppc)(opc1|b21to25|b0)\n");
+ return False;
+ }
+
+ /* stay sane .. */
+ vassert(lineszB == 32 || lineszB == 64 || lineszB == 128);
+
+ switch (opc2) {
+//zz case 0x2F6: // dcba (Data Cache Block Allocate, PPC32 p380)
+//zz vassert(0); /* AWAITING TEST CASE */
+//zz DIP("dcba r%u,r%u\n", rA_addr, rB_addr);
+//zz if (0) vex_printf("vex ppc->IR: kludged dcba\n");
+//zz break;
+
+ case 0x056: // dcbf (Data Cache Block Flush, PPC32 p382)
+ DIP("dcbf r%u,r%u\n", rA_addr, rB_addr);
+ /* nop as far as vex is concerned */
+ break;
+
+ case 0x036: // dcbst (Data Cache Block Store, PPC32 p384)
+ DIP("dcbst r%u,r%u\n", rA_addr, rB_addr);
+ /* nop as far as vex is concerned */
+ break;
+
+ case 0x116: // dcbt (Data Cache Block Touch, PPC32 p385)
+ DIP("dcbt r%u,r%u\n", rA_addr, rB_addr);
+ /* nop as far as vex is concerned */
+ break;
+
+ case 0x0F6: // dcbtst (Data Cache Block Touch for Store, PPC32 p386)
+ DIP("dcbtst r%u,r%u\n", rA_addr, rB_addr);
+ /* nop as far as vex is concerned */
+ break;
+
+ case 0x3F6: { // dcbz (Data Cache Block Clear to Zero, PPC32 p387)
+ // dcbzl (Data Cache Block Clear to Zero Long, bug#135264)
+ /* Clear all bytes in cache block at (rA|0) + rB. */
+ IRTemp EA = newTemp(ty);
+ IRTemp addr = newTemp(ty);
+ IRExpr* irx_addr;
+ UInt i;
+ UInt clearszB;
+ if (is_dcbzl) {
+ clearszB = guest_archinfo->ppc_dcbzl_szB;
+ DIP("dcbzl r%u,r%u\n", rA_addr, rB_addr);
+ }
+ else {
+ clearszB = guest_archinfo->ppc_dcbz_szB;
+ DIP("dcbz r%u,r%u\n", rA_addr, rB_addr);
+ }
+
+ assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) );
+
+ if (mode64) {
+ /* Round EA down to the start of the containing block. */
+ assign( addr, binop( Iop_And64,
+ mkexpr(EA),
+ mkU64( ~((ULong)clearszB-1) )) );
+
+ for (i = 0; i < clearszB / 8; i++) {
+ irx_addr = binop( Iop_Add64, mkexpr(addr), mkU64(i*8) );
+ storeBE( irx_addr, mkU64(0) );
+ }
+ } else {
+ /* Round EA down to the start of the containing block. */
+ assign( addr, binop( Iop_And32,
+ mkexpr(EA),
+ mkU32( ~(clearszB-1) )) );
+
+ for (i = 0; i < clearszB / 4; i++) {
+ irx_addr = binop( Iop_Add32, mkexpr(addr), mkU32(i*4) );
+ storeBE( irx_addr, mkU32(0) );
+ }
+ }
+ break;
+ }
+
+ case 0x3D6: {
+ // icbi (Instruction Cache Block Invalidate, PPC32 p431)
+ /* Invalidate all translations containing code from the cache
+ block at (rA|0) + rB. */
+ IRTemp EA = newTemp(ty);
+ IRTemp addr = newTemp(ty);
+ DIP("icbi r%u,r%u\n", rA_addr, rB_addr);
+ assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) );
+
+ /* Round EA down to the start of the containing block. */
+ assign( addr, binop( mkSzOp(ty, Iop_And8),
+ mkexpr(EA),
+ mkSzImm(ty, ~(((ULong)lineszB)-1) )) );
+ putGST( PPC_GST_TISTART, mkexpr(addr) );
+ putGST( PPC_GST_TILEN, mkSzImm(ty, lineszB) );
+
+ /* be paranoid ... */
+ stmt( IRStmt_MBE(Imbe_Fence) );
+
+ irsb->jumpkind = Ijk_TInval;
+ irsb->next = mkSzImm(ty, nextInsnAddr());
+ dres->whatNext = Dis_StopHere;
+ break;
+ }
+
+ default:
+ vex_printf("dis_cache_manage(ppc)(opc2)\n");
+ return False;
+ }
+ return True;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Floating Point Helpers ---*/
+/*------------------------------------------------------------*/
+
+/* --------- Synthesise a 2-bit FPU rounding mode. --------- */
+/* Produces a value in 0 .. 3, which is encoded as per the type
+ IRRoundingMode. PPCRoundingMode encoding is different to
+ IRRoundingMode, so need to map it.
+*/
+static IRExpr* /* :: Ity_I32 */ get_IR_roundingmode ( void )
+{
+/*
+ rounding mode | PPC | IR
+ ------------------------
+ to nearest | 00 | 00
+ to zero | 01 | 11
+ to +infinity | 10 | 10
+ to -infinity | 11 | 01
+*/
+ IRTemp rm_PPC32 = newTemp(Ity_I32);
+ assign( rm_PPC32, getGST_masked( PPC_GST_FPSCR, MASK_FPSCR_RN ) );
+
+ // rm_IR = XOR( rm_PPC32, (rm_PPC32 << 1) & 2)
+ return binop( Iop_Xor32,
+ mkexpr(rm_PPC32),
+ binop( Iop_And32,
+ binop(Iop_Shl32, mkexpr(rm_PPC32), mkU8(1)),
+ mkU32(2) ));
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Floating Point Instruction Translation ---*/
+/*------------------------------------------------------------*/
+
+/*
+ Floating Point Load Instructions
+*/
+static Bool dis_fp_load ( UInt theInstr )
+{
+ /* X-Form, D-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar frD_addr = ifieldRegDS(theInstr);
+ UChar rA_addr = ifieldRegA(theInstr);
+ UChar rB_addr = ifieldRegB(theInstr);
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar b0 = ifieldBIT0(theInstr);
+ UInt uimm16 = ifieldUIMM16(theInstr);
+
+ Int simm16 = extend_s_16to32(uimm16);
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ IRTemp EA = newTemp(ty);
+ IRTemp rA = newTemp(ty);
+ IRTemp rB = newTemp(ty);
+ IRTemp iHi = newTemp(Ity_I32);
+ IRTemp iLo = newTemp(Ity_I32);
+
+ assign( rA, getIReg(rA_addr) );
+ assign( rB, getIReg(rB_addr) );
+
+ /* These are completely straightforward from a rounding and status
+ bits perspective: no rounding involved and no funny status or CR
+ bits affected. */
+
+ switch (opc1) {
+ case 0x30: // lfs (Load Float Single, PPC32 p441)
+ DIP("lfs fr%u,%d(r%u)\n", frD_addr, simm16, rA_addr);
+ assign( EA, ea_rAor0_simm(rA_addr, simm16) );
+ putFReg( frD_addr,
+ unop(Iop_F32toF64, loadBE(Ity_F32, mkexpr(EA))) );
+ break;
+
+ case 0x31: // lfsu (Load Float Single, Update, PPC32 p442)
+ if (rA_addr == 0)
+ return False;
+ DIP("lfsu fr%u,%d(r%u)\n", frD_addr, simm16, rA_addr);
+ assign( EA, ea_rA_simm(rA_addr, simm16) );
+ putFReg( frD_addr,
+ unop(Iop_F32toF64, loadBE(Ity_F32, mkexpr(EA))) );
+ putIReg( rA_addr, mkexpr(EA) );
+ break;
+
+ case 0x32: // lfd (Load Float Double, PPC32 p437)
+ DIP("lfd fr%u,%d(r%u)\n", frD_addr, simm16, rA_addr);
+ assign( EA, ea_rAor0_simm(rA_addr, simm16) );
+ putFReg( frD_addr, loadBE(Ity_F64, mkexpr(EA)) );
+ break;
+
+ case 0x33: // lfdu (Load Float Double, Update, PPC32 p438)
+ if (rA_addr == 0)
+ return False;
+ DIP("lfdu fr%u,%d(r%u)\n", frD_addr, simm16, rA_addr);
+ assign( EA, ea_rA_simm(rA_addr, simm16) );
+ putFReg( frD_addr, loadBE(Ity_F64, mkexpr(EA)) );
+ putIReg( rA_addr, mkexpr(EA) );
+ break;
+
+ case 0x1F:
+ if (b0 != 0) {
+ vex_printf("dis_fp_load(ppc)(instr,b0)\n");
+ return False;
+ }
+
+ switch(opc2) {
+ case 0x217: // lfsx (Load Float Single Indexed, PPC32 p444)
+ DIP("lfsx fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr);
+ assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) );
+ putFReg( frD_addr, unop( Iop_F32toF64,
+ loadBE(Ity_F32, mkexpr(EA))) );
+ break;
+
+ case 0x237: // lfsux (Load Float Single, Update Indxd, PPC32 p443)
+ if (rA_addr == 0)
+ return False;
+ DIP("lfsux fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr);
+ assign( EA, ea_rA_idxd(rA_addr, rB_addr) );
+ putFReg( frD_addr,
+ unop(Iop_F32toF64, loadBE(Ity_F32, mkexpr(EA))) );
+ putIReg( rA_addr, mkexpr(EA) );
+ break;
+
+ case 0x257: // lfdx (Load Float Double Indexed, PPC32 p440)
+ DIP("lfdx fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr);
+ assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) );
+ putFReg( frD_addr, loadBE(Ity_F64, mkexpr(EA)) );
+ break;
+
+ case 0x277: // lfdux (Load Float Double, Update Indxd, PPC32 p439)
+ if (rA_addr == 0)
+ return False;
+ DIP("lfdux fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr);
+ assign( EA, ea_rA_idxd(rA_addr, rB_addr) );
+ putFReg( frD_addr, loadBE(Ity_F64, mkexpr(EA)) );
+ putIReg( rA_addr, mkexpr(EA) );
+ break;
+
+ case 0x357: // lfiwax (Load Float As Integer, Indxd, ISA 2.05 p120)
+ DIP("lfiwax fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr);
+ assign( EA, ea_rAor0_idxd( rA_addr, rB_addr ) );
+ assign( iLo, loadBE(Ity_I32, mkexpr(EA)) );
+ assign( iHi, binop(Iop_Sub32,
+ mkU32(0),
+ binop(Iop_Shr32, mkexpr(iLo), mkU8(31))) );
+ putFReg( frD_addr, unop(Iop_ReinterpI64asF64,
+ binop(Iop_32HLto64, mkexpr(iHi), mkexpr(iLo))) );
+ break;
+
+ default:
+ vex_printf("dis_fp_load(ppc)(opc2)\n");
+ return False;
+ }
+ break;
+
+ default:
+ vex_printf("dis_fp_load(ppc)(opc1)\n");
+ return False;
+ }
+ return True;
+}
+
+
+
+/*
+ Floating Point Store Instructions
+*/
+static Bool dis_fp_store ( UInt theInstr )
+{
+ /* X-Form, D-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar frS_addr = ifieldRegDS(theInstr);
+ UChar rA_addr = ifieldRegA(theInstr);
+ UChar rB_addr = ifieldRegB(theInstr);
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar b0 = ifieldBIT0(theInstr);
+ Int uimm16 = ifieldUIMM16(theInstr);
+
+ Int simm16 = extend_s_16to32(uimm16);
+ IRTemp frS = newTemp(Ity_F64);
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ IRTemp EA = newTemp(ty);
+ IRTemp rA = newTemp(ty);
+ IRTemp rB = newTemp(ty);
+
+ assign( frS, getFReg(frS_addr) );
+ assign( rA, getIReg(rA_addr) );
+ assign( rB, getIReg(rB_addr) );
+
+ /* These are straightforward from a status bits perspective: no
+ funny status or CR bits affected. For single precision stores,
+ the values are truncated and denormalised (not rounded) to turn
+ them into single precision values. */
+
+ switch (opc1) {
+
+ case 0x34: // stfs (Store Float Single, PPC32 p518)
+ DIP("stfs fr%u,%d(r%u)\n", frS_addr, simm16, rA_addr);
+ assign( EA, ea_rAor0_simm(rA_addr, simm16) );
+ /* Use Iop_TruncF64asF32 to truncate and possible denormalise
+ the value to be stored in the correct way, without any
+ rounding. */
+ storeBE( mkexpr(EA),
+ unop(Iop_TruncF64asF32, mkexpr(frS)) );
+ break;
+
+ case 0x35: // stfsu (Store Float Single, Update, PPC32 p519)
+ if (rA_addr == 0)
+ return False;
+ DIP("stfsu fr%u,%d(r%u)\n", frS_addr, simm16, rA_addr);
+ assign( EA, ea_rA_simm(rA_addr, simm16) );
+ /* See comment for stfs */
+ storeBE( mkexpr(EA),
+ unop(Iop_TruncF64asF32, mkexpr(frS)) );
+ putIReg( rA_addr, mkexpr(EA) );
+ break;
+
+ case 0x36: // stfd (Store Float Double, PPC32 p513)
+ DIP("stfd fr%u,%d(r%u)\n", frS_addr, simm16, rA_addr);
+ assign( EA, ea_rAor0_simm(rA_addr, simm16) );
+ storeBE( mkexpr(EA), mkexpr(frS) );
+ break;
+
+ case 0x37: // stfdu (Store Float Double, Update, PPC32 p514)
+ if (rA_addr == 0)
+ return False;
+ DIP("stfdu fr%u,%d(r%u)\n", frS_addr, simm16, rA_addr);
+ assign( EA, ea_rA_simm(rA_addr, simm16) );
+ storeBE( mkexpr(EA), mkexpr(frS) );
+ putIReg( rA_addr, mkexpr(EA) );
+ break;
+
+ case 0x1F:
+ if (b0 != 0) {
+ vex_printf("dis_fp_store(ppc)(instr,b0)\n");
+ return False;
+ }
+ switch(opc2) {
+ case 0x297: // stfsx (Store Float Single Indexed, PPC32 p521)
+ DIP("stfsx fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr);
+ assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) );
+ /* See note for stfs */
+ storeBE( mkexpr(EA),
+ unop(Iop_TruncF64asF32, mkexpr(frS)) );
+ break;
+
+ case 0x2B7: // stfsux (Store Float Sgl, Update Indxd, PPC32 p520)
+ if (rA_addr == 0)
+ return False;
+ DIP("stfsux fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr);
+ assign( EA, ea_rA_idxd(rA_addr, rB_addr) );
+ /* See note for stfs */
+ storeBE( mkexpr(EA),
+ unop(Iop_TruncF64asF32, mkexpr(frS)) );
+ putIReg( rA_addr, mkexpr(EA) );
+ break;
+
+ case 0x2D7: // stfdx (Store Float Double Indexed, PPC32 p516)
+ DIP("stfdx fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr);
+ assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) );
+ storeBE( mkexpr(EA), mkexpr(frS) );
+ break;
+
+ case 0x2F7: // stfdux (Store Float Dbl, Update Indxd, PPC32 p515)
+ if (rA_addr == 0)
+ return False;
+ DIP("stfdux fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr);
+ assign( EA, ea_rA_idxd(rA_addr, rB_addr) );
+ storeBE( mkexpr(EA), mkexpr(frS) );
+ putIReg( rA_addr, mkexpr(EA) );
+ break;
+
+ case 0x3D7: // stfiwx (Store Float as Int, Indexed, PPC32 p517)
+ // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX)
+ DIP("stfiwx fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr);
+ assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) );
+ storeBE( mkexpr(EA),
+ unop(Iop_64to32, unop(Iop_ReinterpF64asI64, mkexpr(frS))) );
+ break;
+
+ default:
+ vex_printf("dis_fp_store(ppc)(opc2)\n");
+ return False;
+ }
+ break;
+
+ default:
+ vex_printf("dis_fp_store(ppc)(opc1)\n");
+ return False;
+ }
+ return True;
+}
+
+
+
+/*
+ Floating Point Arith Instructions
+*/
+static Bool dis_fp_arith ( UInt theInstr )
+{
+ /* A-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar frD_addr = ifieldRegDS(theInstr);
+ UChar frA_addr = ifieldRegA(theInstr);
+ UChar frB_addr = ifieldRegB(theInstr);
+ UChar frC_addr = ifieldRegC(theInstr);
+ UChar opc2 = ifieldOPClo5(theInstr);
+ UChar flag_rC = ifieldBIT0(theInstr);
+
+ IRTemp frD = newTemp(Ity_F64);
+ IRTemp frA = newTemp(Ity_F64);
+ IRTemp frB = newTemp(Ity_F64);
+ IRTemp frC = newTemp(Ity_F64);
+ IRExpr* rm = get_IR_roundingmode();
+
+ /* By default, we will examine the results of the operation and set
+ fpscr[FPRF] accordingly. */
+ Bool set_FPRF = True;
+
+ /* By default, if flag_RC is set, we will clear cr1 after the
+ operation. In reality we should set cr1 to indicate the
+ exception status of the operation, but since we're not
+ simulating exceptions, the exception status will appear to be
+ zero. Hence cr1 should be cleared if this is a . form insn. */
+ Bool clear_CR1 = True;
+
+ assign( frA, getFReg(frA_addr));
+ assign( frB, getFReg(frB_addr));
+ assign( frC, getFReg(frC_addr));
+
+ switch (opc1) {
+ case 0x3B:
+ switch (opc2) {
+ case 0x12: // fdivs (Floating Divide Single, PPC32 p407)
+ if (frC_addr != 0)
+ return False;
+ DIP("fdivs%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frA_addr, frB_addr);
+ assign( frD, triop( Iop_DivF64r32,
+ rm, mkexpr(frA), mkexpr(frB) ));
+ break;
+
+ case 0x14: // fsubs (Floating Subtract Single, PPC32 p430)
+ if (frC_addr != 0)
+ return False;
+ DIP("fsubs%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frA_addr, frB_addr);
+ assign( frD, triop( Iop_SubF64r32,
+ rm, mkexpr(frA), mkexpr(frB) ));
+ break;
+
+ case 0x15: // fadds (Floating Add Single, PPC32 p401)
+ if (frC_addr != 0)
+ return False;
+ DIP("fadds%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frA_addr, frB_addr);
+ assign( frD, triop( Iop_AddF64r32,
+ rm, mkexpr(frA), mkexpr(frB) ));
+ break;
+
+ case 0x16: // fsqrts (Floating SqRt (Single-Precision), PPC32 p428)
+ // NOTE: POWERPC OPTIONAL, "General-Purpose Group" (PPC32_FX)
+ if (frA_addr != 0 || frC_addr != 0)
+ return False;
+ DIP("fsqrts%s fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frB_addr);
+ // however illogically, on ppc970 this insn behaves identically
+ // to fsqrt (double-precision). So use SqrtF64, not SqrtF64r32.
+ assign( frD, binop( Iop_SqrtF64, rm, mkexpr(frB) ));
+ break;
+
+ case 0x18: // fres (Floating Reciprocal Estimate Single, PPC32 p421)
+ // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX)
+ if (frA_addr != 0 || frC_addr != 0)
+ return False;
+ DIP("fres%s fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frB_addr);
+ { IRExpr* ieee_one
+ = IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL));
+ assign( frD, triop( Iop_DivF64r32,
+ rm,
+ ieee_one, mkexpr(frB) ));
+ }
+ break;
+
+ case 0x19: // fmuls (Floating Multiply Single, PPC32 p414)
+ if (frB_addr != 0)
+ return False;
+ DIP("fmuls%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frA_addr, frC_addr);
+ assign( frD, triop( Iop_MulF64r32,
+ rm, mkexpr(frA), mkexpr(frC) ));
+ break;
+
+ case 0x1A: // frsqrtes (Floating Recip SqRt Est Single)
+ // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX)
+ // Undocumented instruction?
+ if (frA_addr != 0 || frC_addr != 0)
+ return False;
+ DIP("frsqrtes%s fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frB_addr);
+ assign( frD, unop(Iop_Est5FRSqrt, mkexpr(frB)) );
+ break;
+
+ default:
+ vex_printf("dis_fp_arith(ppc)(3B: opc2)\n");
+ return False;
+ }
+ break;
+
+ case 0x3F:
+ switch (opc2) {
+ case 0x12: // fdiv (Floating Div (Double-Precision), PPC32 p406)
+ if (frC_addr != 0)
+ return False;
+ DIP("fdiv%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frA_addr, frB_addr);
+ assign( frD, triop(Iop_DivF64, rm, mkexpr(frA), mkexpr(frB)) );
+ break;
+
+ case 0x14: // fsub (Floating Sub (Double-Precision), PPC32 p429)
+ if (frC_addr != 0)
+ return False;
+ DIP("fsub%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frA_addr, frB_addr);
+ assign( frD, triop(Iop_SubF64, rm, mkexpr(frA), mkexpr(frB)) );
+ break;
+
+ case 0x15: // fadd (Floating Add (Double-Precision), PPC32 p400)
+ if (frC_addr != 0)
+ return False;
+ DIP("fadd%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frA_addr, frB_addr);
+ assign( frD, triop(Iop_AddF64, rm, mkexpr(frA), mkexpr(frB)) );
+ break;
+
+ case 0x16: // fsqrt (Floating SqRt (Double-Precision), PPC32 p427)
+ // NOTE: POWERPC OPTIONAL, "General-Purpose Group" (PPC32_FX)
+ if (frA_addr != 0 || frC_addr != 0)
+ return False;
+ DIP("fsqrt%s fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frB_addr);
+ assign( frD, binop(Iop_SqrtF64, rm, mkexpr(frB)) );
+ break;
+
+ case 0x17: { // fsel (Floating Select, PPC32 p426)
+ // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX)
+ IRTemp cc = newTemp(Ity_I32);
+ IRTemp cc_b0 = newTemp(Ity_I32);
+
+ DIP("fsel%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frA_addr, frC_addr, frB_addr);
+
+ // cc: UN == 0x41, LT == 0x01, GT == 0x00, EQ == 0x40
+ // => GT|EQ == (cc & 0x1 == 0)
+ assign( cc, binop(Iop_CmpF64, mkexpr(frA),
+ IRExpr_Const(IRConst_F64(0))) );
+ assign( cc_b0, binop(Iop_And32, mkexpr(cc), mkU32(1)) );
+
+ // frD = (frA >= 0.0) ? frC : frB
+ // = (cc_b0 == 0) ? frC : frB
+ assign( frD,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ binop(Iop_CmpEQ32, mkexpr(cc_b0), mkU32(0))),
+ mkexpr(frB),
+ mkexpr(frC) ));
+
+ /* One of the rare ones which don't mess with FPRF */
+ set_FPRF = False;
+ break;
+ }
+
+ case 0x18: // fre (Floating Reciprocal Estimate)
+ // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX)
+ // Note: unclear whether this insn really exists or not
+ // ppc970 doesn't have it, but POWER5 does
+ if (frA_addr != 0 || frC_addr != 0)
+ return False;
+ DIP("fre%s fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frB_addr);
+ { IRExpr* ieee_one
+ = IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL));
+ assign( frD, triop( Iop_DivF64,
+ rm,
+ ieee_one, mkexpr(frB) ));
+ }
+ break;
+
+ case 0x19: // fmul (Floating Mult (Double Precision), PPC32 p413)
+ if (frB_addr != 0)
+ vex_printf("dis_fp_arith(ppc)(instr,fmul)\n");
+ DIP("fmul%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frA_addr, frC_addr);
+ assign( frD, triop(Iop_MulF64, rm, mkexpr(frA), mkexpr(frC)) );
+ break;
+
+ case 0x1A: // frsqrte (Floating Recip SqRt Est., PPC32 p424)
+ // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX)
+ if (frA_addr != 0 || frC_addr != 0)
+ return False;
+ DIP("frsqrte%s fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frB_addr);
+ assign( frD, unop(Iop_Est5FRSqrt, mkexpr(frB)) );
+ break;
+
+ default:
+ vex_printf("dis_fp_arith(ppc)(3F: opc2)\n");
+ return False;
+ }
+ break;
+
+ default:
+ vex_printf("dis_fp_arith(ppc)(opc1)\n");
+ return False;
+ }
+
+ putFReg( frD_addr, mkexpr(frD) );
+
+ if (set_FPRF) {
+ // XXX XXX XXX FIXME
+ // set FPRF from frD
+ }
+
+ if (flag_rC && clear_CR1) {
+ putCR321( 1, mkU8(0) );
+ putCR0( 1, mkU8(0) );
+ }
+
+ return True;
+}
+
+
+
+/*
+ Floating Point Mult-Add Instructions
+*/
+static Bool dis_fp_multadd ( UInt theInstr )
+{
+ /* A-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar frD_addr = ifieldRegDS(theInstr);
+ UChar frA_addr = ifieldRegA(theInstr);
+ UChar frB_addr = ifieldRegB(theInstr);
+ UChar frC_addr = ifieldRegC(theInstr);
+ UChar opc2 = ifieldOPClo5(theInstr);
+ UChar flag_rC = ifieldBIT0(theInstr);
+
+ IRTemp frD = newTemp(Ity_F64);
+ IRTemp frA = newTemp(Ity_F64);
+ IRTemp frB = newTemp(Ity_F64);
+ IRTemp frC = newTemp(Ity_F64);
+ IRTemp rmt = newTemp(Ity_I32);
+ IRExpr* rm;
+
+ /* By default, we will examine the results of the operation and set
+ fpscr[FPRF] accordingly. */
+ Bool set_FPRF = True;
+
+ /* By default, if flag_RC is set, we will clear cr1 after the
+ operation. In reality we should set cr1 to indicate the
+ exception status of the operation, but since we're not
+ simulating exceptions, the exception status will appear to be
+ zero. Hence cr1 should be cleared if this is a . form insn. */
+ Bool clear_CR1 = True;
+
+ /* Bind the rounding mode expression to a temp; there's no
+ point in creating gratuitous CSEs, as we know we'll need
+ to use it twice. */
+ assign( rmt, get_IR_roundingmode() );
+ rm = mkexpr(rmt);
+
+ assign( frA, getFReg(frA_addr));
+ assign( frB, getFReg(frB_addr));
+ assign( frC, getFReg(frC_addr));
+
+ /* The rounding in this is all a bit dodgy. The idea is to only do
+ one rounding. That clearly isn't achieveable without dedicated
+ four-input IR primops, although in the single precision case we
+ can sort-of simulate it by doing the inner multiply in double
+ precision.
+
+ In the negated cases, the negation happens after rounding. */
+
+ switch (opc1) {
+ case 0x3B:
+ switch (opc2) {
+ case 0x1C: // fmsubs (Floating Mult-Subtr Single, PPC32 p412)
+ DIP("fmsubs%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frA_addr, frC_addr, frB_addr);
+ assign( frD, qop( Iop_MSubF64r32, rm,
+ mkexpr(frA), mkexpr(frC), mkexpr(frB) ));
+ break;
+
+ case 0x1D: // fmadds (Floating Mult-Add Single, PPC32 p409)
+ DIP("fmadds%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frA_addr, frC_addr, frB_addr);
+ assign( frD, qop( Iop_MAddF64r32, rm,
+ mkexpr(frA), mkexpr(frC), mkexpr(frB) ));
+ break;
+
+ case 0x1E: // fnmsubs (Float Neg Mult-Subtr Single, PPC32 p420)
+ DIP("fnmsubs%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frA_addr, frC_addr, frB_addr);
+ assign( frD, unop( Iop_NegF64,
+ qop( Iop_MSubF64r32, rm,
+ mkexpr(frA), mkexpr(frC), mkexpr(frB) )));
+ break;
+
+ case 0x1F: // fnmadds (Floating Negative Multiply-Add Single, PPC32 p418)
+ DIP("fnmadds%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frA_addr, frC_addr, frB_addr);
+ assign( frD, unop( Iop_NegF64,
+ qop( Iop_MAddF64r32, rm,
+ mkexpr(frA), mkexpr(frC), mkexpr(frB) )));
+ break;
+
+ default:
+ vex_printf("dis_fp_multadd(ppc)(3B: opc2)\n");
+ return False;
+ }
+ break;
+
+ case 0x3F:
+ switch (opc2) {
+ case 0x1C: // fmsub (Float Mult-Sub (Dbl Precision), PPC32 p411)
+ DIP("fmsub%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frA_addr, frC_addr, frB_addr);
+ assign( frD, qop( Iop_MSubF64, rm,
+ mkexpr(frA), mkexpr(frC), mkexpr(frB) ));
+ break;
+
+ case 0x1D: // fmadd (Float Mult-Add (Dbl Precision), PPC32 p408)
+ DIP("fmadd%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frA_addr, frC_addr, frB_addr);
+ assign( frD, qop( Iop_MAddF64, rm,
+ mkexpr(frA), mkexpr(frC), mkexpr(frB) ));
+ break;
+
+ case 0x1E: // fnmsub (Float Neg Mult-Subtr (Dbl Precision), PPC32 p419)
+ DIP("fnmsub%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frA_addr, frC_addr, frB_addr);
+ assign( frD, unop( Iop_NegF64,
+ qop( Iop_MSubF64, rm,
+ mkexpr(frA), mkexpr(frC), mkexpr(frB) )));
+ break;
+
+ case 0x1F: // fnmadd (Float Neg Mult-Add (Dbl Precision), PPC32 p417)
+ DIP("fnmadd%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
+ frD_addr, frA_addr, frC_addr, frB_addr);
+ assign( frD, unop( Iop_NegF64,
+ qop( Iop_MAddF64, rm,
+ mkexpr(frA), mkexpr(frC), mkexpr(frB) )));
+ break;
+
+ default:
+ vex_printf("dis_fp_multadd(ppc)(3F: opc2)\n");
+ return False;
+ }
+ break;
+
+ default:
+ vex_printf("dis_fp_multadd(ppc)(opc1)\n");
+ return False;
+ }
+
+ putFReg( frD_addr, mkexpr(frD) );
+
+ if (set_FPRF) {
+ // XXX XXX XXX FIXME
+ // set FPRF from frD
+ }
+
+ if (flag_rC && clear_CR1) {
+ putCR321( 1, mkU8(0) );
+ putCR0( 1, mkU8(0) );
+ }
+
+ return True;
+}
+
+
+
+/*
+ Floating Point Compare Instructions
+*/
+static Bool dis_fp_cmp ( UInt theInstr )
+{
+ /* X-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar crfD = toUChar( IFIELD( theInstr, 23, 3 ) );
+ UChar b21to22 = toUChar( IFIELD( theInstr, 21, 2 ) );
+ UChar frA_addr = ifieldRegA(theInstr);
+ UChar frB_addr = ifieldRegB(theInstr);
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar b0 = ifieldBIT0(theInstr);
+
+ IRTemp ccIR = newTemp(Ity_I32);
+ IRTemp ccPPC32 = newTemp(Ity_I32);
+
+ IRTemp frA = newTemp(Ity_F64);
+ IRTemp frB = newTemp(Ity_F64);
+
+ if (opc1 != 0x3F || b21to22 != 0 || b0 != 0) {
+ vex_printf("dis_fp_cmp(ppc)(instr)\n");
+ return False;
+ }
+
+ assign( frA, getFReg(frA_addr));
+ assign( frB, getFReg(frB_addr));
+
+ assign( ccIR, binop(Iop_CmpF64, mkexpr(frA), mkexpr(frB)) );
+
+ /* Map compare result from IR to PPC32 */
+ /*
+ FP cmp result | PPC | IR
+ --------------------------
+ UN | 0x1 | 0x45
+ EQ | 0x2 | 0x40
+ GT | 0x4 | 0x00
+ LT | 0x8 | 0x01
+ */
+
+ // ccPPC32 = Shl(1, (~(ccIR>>5) & 2)
+ // | ((ccIR ^ (ccIR>>6)) & 1)
+ assign(
+ ccPPC32,
+ binop(
+ Iop_Shl32,
+ mkU32(1),
+ unop(
+ Iop_32to8,
+ binop(
+ Iop_Or32,
+ binop(
+ Iop_And32,
+ unop(
+ Iop_Not32,
+ binop(Iop_Shr32, mkexpr(ccIR), mkU8(5))
+ ),
+ mkU32(2)
+ ),
+ binop(
+ Iop_And32,
+ binop(
+ Iop_Xor32,
+ mkexpr(ccIR),
+ binop(Iop_Shr32, mkexpr(ccIR), mkU8(6))
+ ),
+ mkU32(1)
+ )
+ )
+ )
+ )
+ );
+
+ putGST_field( PPC_GST_CR, mkexpr(ccPPC32), crfD );
+
+ /* CAB: TODO?: Support writing cc to FPSCR->FPCC ?
+ putGST_field( PPC_GST_FPSCR, mkexpr(ccPPC32), 4 );
+ */
+ // XXX XXX XXX FIXME
+ // Also write the result into FPRF (it's not entirely clear how)
+
+ /* Note: Differences between fcmpu and fcmpo are only in exception
+ flag settings, which aren't supported anyway. */
+ switch (opc2) {
+ case 0x000: // fcmpu (Floating Compare Unordered, PPC32 p403)
+ DIP("fcmpu crf%d,fr%u,fr%u\n", crfD, frA_addr, frB_addr);
+ break;
+ case 0x020: // fcmpo (Floating Compare Ordered, PPC32 p402)
+ DIP("fcmpo crf%d,fr%u,fr%u\n", crfD, frA_addr, frB_addr);
+ break;
+ default:
+ vex_printf("dis_fp_cmp(ppc)(opc2)\n");
+ return False;
+ }
+ return True;
+}
+
+
+
+/*
+ Floating Point Rounding/Conversion Instructions
+*/
+static Bool dis_fp_round ( UInt theInstr )
+{
+ /* X-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar b16to20 = ifieldRegA(theInstr);
+ UChar frD_addr = ifieldRegDS(theInstr);
+ UChar frB_addr = ifieldRegB(theInstr);
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar flag_rC = ifieldBIT0(theInstr);
+
+ IRTemp frD = newTemp(Ity_F64);
+ IRTemp frB = newTemp(Ity_F64);
+ IRTemp r_tmp32 = newTemp(Ity_I32);
+ IRTemp r_tmp64 = newTemp(Ity_I64);
+ IRExpr* rm = get_IR_roundingmode();
+
+ /* By default, we will examine the results of the operation and set
+ fpscr[FPRF] accordingly. */
+ Bool set_FPRF = True;
+
+ /* By default, if flag_RC is set, we will clear cr1 after the
+ operation. In reality we should set cr1 to indicate the
+ exception status of the operation, but since we're not
+ simulating exceptions, the exception status will appear to be
+ zero. Hence cr1 should be cleared if this is a . form insn. */
+ Bool clear_CR1 = True;
+
+ if (opc1 != 0x3F || b16to20 != 0) {
+ vex_printf("dis_fp_round(ppc)(instr)\n");
+ return False;
+ }
+
+ assign( frB, getFReg(frB_addr));
+
+ switch (opc2) {
+ case 0x00C: // frsp (Float Round to Single, PPC32 p423)
+ DIP("frsp%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
+ assign( frD, binop( Iop_RoundF64toF32, rm, mkexpr(frB) ));
+ break;
+
+ case 0x00E: // fctiw (Float Conv to Int, PPC32 p404)
+ DIP("fctiw%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
+ assign( r_tmp32,
+ binop(Iop_F64toI32S, rm, mkexpr(frB)) );
+ assign( frD, unop( Iop_ReinterpI64asF64,
+ unop( Iop_32Uto64, mkexpr(r_tmp32))));
+ /* FPRF is undefined after fctiw. Leave unchanged. */
+ set_FPRF = False;
+ break;
+
+ case 0x00F: // fctiwz (Float Conv to Int, Round to Zero, PPC32 p405)
+ DIP("fctiwz%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
+ assign( r_tmp32,
+ binop(Iop_F64toI32S, mkU32(Irrm_ZERO), mkexpr(frB) ));
+ assign( frD, unop( Iop_ReinterpI64asF64,
+ unop( Iop_32Uto64, mkexpr(r_tmp32))));
+ /* FPRF is undefined after fctiwz. Leave unchanged. */
+ set_FPRF = False;
+ break;
+
+ case 0x32E: // fctid (Float Conv to Int DWord, PPC64 p437)
+ DIP("fctid%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
+ assign( r_tmp64,
+ binop(Iop_F64toI64S, rm, mkexpr(frB)) );
+ assign( frD, unop( Iop_ReinterpI64asF64, mkexpr(r_tmp64)) );
+ /* FPRF is undefined after fctid. Leave unchanged. */
+ set_FPRF = False;
+ break;
+
+ case 0x32F: // fctidz (Float Conv to Int DWord, Round to Zero, PPC64 p437)
+ DIP("fctidz%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
+ assign( r_tmp64,
+ binop(Iop_F64toI64S, mkU32(Irrm_ZERO), mkexpr(frB)) );
+ assign( frD, unop( Iop_ReinterpI64asF64, mkexpr(r_tmp64)) );
+ /* FPRF is undefined after fctidz. Leave unchanged. */
+ set_FPRF = False;
+ break;
+
+ case 0x34E: // fcfid (Float Conv from Int DWord, PPC64 p434)
+ DIP("fcfid%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
+ assign( r_tmp64, unop( Iop_ReinterpF64asI64, mkexpr(frB)) );
+ assign( frD,
+ binop(Iop_I64StoF64, rm, mkexpr(r_tmp64)) );
+ break;
+
+ case 0x188: case 0x1A8: case 0x1C8: case 0x1E8: // frin, friz, frip, frim
+ switch(opc2) {
+ case 0x188: // frin (Floating Round to Integer Nearest)
+ DIP("frin%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
+ assign( r_tmp64,
+ binop(Iop_F64toI64S, mkU32(Irrm_NEAREST), mkexpr(frB)) );
+ break;
+ case 0x1A8: // friz (Floating Round to Integer Toward Zero)
+ DIP("friz%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
+ assign( r_tmp64,
+ binop(Iop_F64toI64S, mkU32(Irrm_ZERO), mkexpr(frB)) );
+ break;
+ case 0x1C8: // frip (Floating Round to Integer Plus)
+ DIP("frip%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
+ assign( r_tmp64,
+ binop(Iop_F64toI64S, mkU32(Irrm_PosINF), mkexpr(frB)) );
+ break;
+ case 0x1E8: // frim (Floating Round to Integer Minus)
+ DIP("frim%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
+ assign( r_tmp64,
+ binop(Iop_F64toI64S, mkU32(Irrm_NegINF), mkexpr(frB)) );
+ break;
+ }
+
+ /* don't use the rounded integer if frB is outside -9e18..9e18 */
+ /* F64 has only log10(2**52) significant digits anyway */
+ /* need to preserve sign of zero */
+ /* frD = (fabs(frB) > 9e18) ? frB :
+ (sign(frB)) ? -fabs((double)r_tmp64) : (double)r_tmp64 */
+ assign(frD, IRExpr_Mux0X( unop(Iop_32to8,
+ binop(Iop_CmpF64,
+ IRExpr_Const(IRConst_F64(9e18)),
+ unop(Iop_AbsF64, mkexpr(frB)))),
+ IRExpr_Mux0X(unop(Iop_32to8,
+ binop(Iop_Shr32,
+ unop(Iop_64HIto32,
+ unop(Iop_ReinterpF64asI64,
+ mkexpr(frB))), mkU8(31))),
+ binop(Iop_I64StoF64, mkU32(0), mkexpr(r_tmp64) ),
+ unop(Iop_NegF64,
+ unop( Iop_AbsF64,
+ binop(Iop_I64StoF64, mkU32(0),
+ mkexpr(r_tmp64)) )) ),
+ mkexpr(frB)));
+ break;
+
+ default:
+ vex_printf("dis_fp_round(ppc)(opc2)\n");
+ return False;
+ }
+
+ putFReg( frD_addr, mkexpr(frD) );
+
+ if (set_FPRF) {
+ // XXX XXX XXX FIXME
+ // set FPRF from frD
+ }
+
+ if (flag_rC && clear_CR1) {
+ putCR321( 1, mkU8(0) );
+ putCR0( 1, mkU8(0) );
+ }
+
+ return True;
+}
+
+/*
+ Floating Point Pair Instructions
+*/
+static Bool dis_fp_pair ( UInt theInstr )
+{
+ /* X-Form/DS-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar frT_hi_addr = ifieldRegDS(theInstr);
+ UChar frT_lo_addr = frT_hi_addr + 1;
+ UChar rA_addr = ifieldRegA(theInstr);
+ UChar rB_addr = ifieldRegB(theInstr);
+ UInt uimm16 = ifieldUIMM16(theInstr);
+ Int simm16 = extend_s_16to32(uimm16);
+ UInt opc2 = ifieldOPClo10(theInstr);
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ IRTemp EA_hi = newTemp(ty);
+ IRTemp EA_lo = newTemp(ty);
+ IRTemp frT_hi = newTemp(Ity_F64);
+ IRTemp frT_lo = newTemp(Ity_F64);
+ UChar b0 = ifieldBIT0(theInstr);
+ Bool is_load = 0;
+
+ if ((frT_hi_addr %2) != 0) {
+ vex_printf("dis_fp_pair(ppc) : odd frT register\n");
+ return False;
+ }
+
+ switch (opc1) {
+ case 0x1F: // register offset
+ switch(opc2) {
+ case 0x317: // lfdpx (FP Load Double Pair X-form, ISA 2.05 p125)
+ DIP("ldpx fr%u,r%u,r%u\n", frT_hi_addr, rA_addr, rB_addr);
+ is_load = 1;
+ break;
+ case 0x397: // stfdpx (FP STORE Double Pair X-form, ISA 2.05 p125)
+ DIP("stdpx fr%u,r%u,r%u\n", frT_hi_addr, rA_addr, rB_addr);
+ break;
+ default:
+ vex_printf("dis_fp_pair(ppc) : X-form wrong opc2\n");
+ return False;
+ }
+
+ if (b0 != 0) {
+ vex_printf("dis_fp_pair(ppc)(0x1F,b0)\n");
+ return False;
+ }
+ assign( EA_hi, ea_rAor0_idxd( rA_addr, rB_addr ) );
+ break;
+ case 0x39: // lfdp (FP Load Double Pair DS-form, ISA 2.05 p125)
+ DIP("lfdp fr%u,%d(r%u)\n", frT_hi_addr, simm16, rA_addr);
+ assign( EA_hi, ea_rAor0_simm( rA_addr, simm16 ) );
+ is_load = 1;
+ break;
+ case 0x3d: // stfdp (FP Store Double Pair DS-form, ISA 2.05 p125)
+ DIP("stfdp fr%u,%d(r%u)\n", frT_hi_addr, simm16, rA_addr);
+ assign( EA_hi, ea_rAor0_simm( rA_addr, simm16 ) );
+ break;
+ default: // immediate offset
+ vex_printf("dis_fp_pair(ppc)(instr)\n");
+ return False;
+ }
+
+ if (mode64)
+ assign( EA_lo, binop(Iop_Add64, mkexpr(EA_hi), mkU64(8)) );
+ else
+ assign( EA_lo, binop(Iop_Add32, mkexpr(EA_hi), mkU32(8)) );
+
+ assign( frT_hi, getFReg(frT_hi_addr) );
+ assign( frT_lo, getFReg(frT_lo_addr) );
+
+ if (is_load) {
+ putFReg( frT_hi_addr, loadBE(Ity_F64, mkexpr(EA_hi)) );
+ putFReg( frT_lo_addr, loadBE(Ity_F64, mkexpr(EA_lo)) );
+ } else {
+ storeBE( mkexpr(EA_hi), mkexpr(frT_hi) );
+ storeBE( mkexpr(EA_lo), mkexpr(frT_lo) );
+ }
+
+ return True;
+}
+
+
+/*
+ Floating Point Move Instructions
+*/
+static Bool dis_fp_move ( UInt theInstr )
+{
+ /* X-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar frD_addr = ifieldRegDS(theInstr);
+ UChar frA_addr = ifieldRegA(theInstr);
+ UChar frB_addr = ifieldRegB(theInstr);
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar flag_rC = ifieldBIT0(theInstr);
+
+ IRTemp frD = newTemp(Ity_F64);
+ IRTemp frB = newTemp(Ity_F64);
+ IRTemp itmpB = newTemp(Ity_F64);
+ IRTemp frA;
+ IRTemp signA;
+ IRTemp hiD;
+
+ if (opc1 != 0x3F || (frA_addr != 0 && opc2 != 0x008)) {
+ vex_printf("dis_fp_move(ppc)(instr)\n");
+ return False;
+ }
+
+ assign( frB, getFReg(frB_addr));
+
+ switch (opc2) {
+ case 0x008: // fcpsgn (Floating Copy Sign, ISA_V2.05 p126)
+ DIP("fcpsgn%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frA_addr,
+ frB_addr);
+ signA = newTemp(Ity_I32);
+ hiD = newTemp(Ity_I32);
+ itmpB = newTemp(Ity_I64);
+ frA = newTemp(Ity_F64);
+ assign( frA, getFReg(frA_addr) );
+
+ /* get A's sign bit */
+ assign(signA, binop(Iop_And32,
+ unop(Iop_64HIto32, unop(Iop_ReinterpF64asI64,
+ mkexpr(frA))),
+ mkU32(0x80000000)) );
+
+ assign( itmpB, unop(Iop_ReinterpF64asI64, mkexpr(frB)) );
+
+ /* mask off B's sign bit and or in A's sign bit */
+ assign(hiD, binop(Iop_Or32,
+ binop(Iop_And32,
+ unop(Iop_64HIto32,
+ mkexpr(itmpB)), /* frB's high 32 bits */
+ mkU32(0x7fffffff)),
+ mkexpr(signA)) );
+
+ /* combine hiD/loB into frD */
+ assign( frD, unop(Iop_ReinterpI64asF64,
+ binop(Iop_32HLto64,
+ mkexpr(hiD),
+ unop(Iop_64to32,
+ mkexpr(itmpB)))) ); /* frB's low 32 bits */
+ break;
+
+ case 0x028: // fneg (Floating Negate, PPC32 p416)
+ DIP("fneg%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
+ assign( frD, unop( Iop_NegF64, mkexpr(frB) ));
+ break;
+
+ case 0x048: // fmr (Floating Move Register, PPC32 p410)
+ DIP("fmr%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
+ assign( frD, mkexpr(frB) );
+ break;
+
+ case 0x088: // fnabs (Floating Negative Absolute Value, PPC32 p415)
+ DIP("fnabs%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
+ assign( frD, unop( Iop_NegF64, unop( Iop_AbsF64, mkexpr(frB) )));
+ break;
+
+ case 0x108: // fabs (Floating Absolute Value, PPC32 p399)
+ DIP("fabs%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
+ assign( frD, unop( Iop_AbsF64, mkexpr(frB) ));
+ break;
+
+ default:
+ vex_printf("dis_fp_move(ppc)(opc2)\n");
+ return False;
+ }
+
+ putFReg( frD_addr, mkexpr(frD) );
+
+ /* None of these change FPRF. cr1 is set in the usual way though,
+ if flag_rC is set. */
+
+ if (flag_rC) {
+ putCR321( 1, mkU8(0) );
+ putCR0( 1, mkU8(0) );
+ }
+
+ return True;
+}
+
+
+
+/*
+ Floating Point Status/Control Register Instructions
+*/
+static Bool dis_fp_scr ( UInt theInstr )
+{
+ /* Many forms - see each switch case */
+ UChar opc1 = ifieldOPC(theInstr);
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar flag_rC = ifieldBIT0(theInstr);
+
+ if (opc1 != 0x3F) {
+ vex_printf("dis_fp_scr(ppc)(instr)\n");
+ return False;
+ }
+
+ switch (opc2) {
+ case 0x026: { // mtfsb1 (Move to FPSCR Bit 1, PPC32 p479)
+ // Bit crbD of the FPSCR is set.
+ UChar crbD = ifieldRegDS(theInstr);
+ UInt b11to20 = IFIELD(theInstr, 11, 10);
+
+ if (b11to20 != 0) {
+ vex_printf("dis_fp_scr(ppc)(instr,mtfsb1)\n");
+ return False;
+ }
+ DIP("mtfsb1%s crb%d \n", flag_rC ? ".":"", crbD);
+ putGST_masked( PPC_GST_FPSCR, mkU32(1<<(31-crbD)), 1<<(31-crbD) );
+ break;
+ }
+
+ case 0x040: { // mcrfs (Move to Condition Register from FPSCR, PPC32 p465)
+ UChar crfD = toUChar( IFIELD( theInstr, 23, 3 ) );
+ UChar b21to22 = toUChar( IFIELD( theInstr, 21, 2 ) );
+ UChar crfS = toUChar( IFIELD( theInstr, 18, 3 ) );
+ UChar b11to17 = toUChar( IFIELD( theInstr, 11, 7 ) );
+ IRTemp tmp = newTemp(Ity_I32);
+ IRExpr* fpscr_all;
+ if (b21to22 != 0 || b11to17 != 0 || flag_rC != 0) {
+ vex_printf("dis_fp_scr(ppc)(instr,mcrfs)\n");
+ return False;
+ }
+ DIP("mcrfs crf%d,crf%d\n", crfD, crfS);
+ vassert(crfD < 8);
+ vassert(crfS < 8);
+ fpscr_all = getGST_masked( PPC_GST_FPSCR, MASK_FPSCR_RN );
+ assign( tmp, binop(Iop_And32,
+ binop(Iop_Shr32,fpscr_all,mkU8(4 * (7-crfS))),
+ mkU32(0xF)) );
+ putGST_field( PPC_GST_CR, mkexpr(tmp), crfD );
+ break;
+ }
+
+ case 0x046: { // mtfsb0 (Move to FPSCR Bit 0, PPC32 p478)
+ // Bit crbD of the FPSCR is cleared.
+ UChar crbD = ifieldRegDS(theInstr);
+ UInt b11to20 = IFIELD(theInstr, 11, 10);
+
+ if (b11to20 != 0) {
+ vex_printf("dis_fp_scr(ppc)(instr,mtfsb0)\n");
+ return False;
+ }
+ DIP("mtfsb0%s crb%d\n", flag_rC ? ".":"", crbD);
+ putGST_masked( PPC_GST_FPSCR, mkU32(0), 1<<(31-crbD) );
+ break;
+ }
+
+ case 0x086: { // mtfsfi (Move to FPSCR Field Immediate, PPC32 p481)
+ UChar crfD = toUChar( IFIELD( theInstr, 23, 3 ) );
+ UChar b16to22 = toUChar( IFIELD( theInstr, 16, 7 ) );
+ UChar IMM = toUChar( IFIELD( theInstr, 12, 4 ) );
+ UChar b11 = toUChar( IFIELD( theInstr, 11, 1 ) );
+
+ if (b16to22 != 0 || b11 != 0) {
+ vex_printf("dis_fp_scr(ppc)(instr,mtfsfi)\n");
+ return False;
+ }
+ DIP("mtfsfi%s crf%d,%d\n", flag_rC ? ".":"", crfD, IMM);
+ putGST_field( PPC_GST_FPSCR, mkU32(IMM), crfD );
+ break;
+ }
+
+ case 0x247: { // mffs (Move from FPSCR, PPC32 p468)
+ UChar frD_addr = ifieldRegDS(theInstr);
+ UInt b11to20 = IFIELD(theInstr, 11, 10);
+ IRExpr* fpscr_all = getGST_masked( PPC_GST_FPSCR, MASK_FPSCR_RN );
+
+ if (b11to20 != 0) {
+ vex_printf("dis_fp_scr(ppc)(instr,mffs)\n");
+ return False;
+ }
+ DIP("mffs%s fr%u\n", flag_rC ? ".":"", frD_addr);
+ putFReg( frD_addr,
+ unop( Iop_ReinterpI64asF64,
+ unop( Iop_32Uto64, fpscr_all )));
+ break;
+ }
+
+ case 0x2C7: { // mtfsf (Move to FPSCR Fields, PPC32 p480)
+ UChar b25 = toUChar( IFIELD(theInstr, 25, 1) );
+ UChar FM = toUChar( IFIELD(theInstr, 17, 8) );
+ UChar frB_addr = ifieldRegB(theInstr);
+ IRTemp frB = newTemp(Ity_F64);
+ IRTemp rB_32 = newTemp(Ity_I32);
+ Int i, mask;
+
+ if (b25 == 1) {
+ /* new 64 bit move variant for power 6. If L field (bit 25) is
+ * a one do a full 64 bit move. Note, the FPSCR is not really
+ * properly modeled. This instruciton only changes the value of
+ * the rounding mode. The HW exception bits do not get set in
+ * the simulator. 1/12/09
+ */
+ DIP("mtfsf%s %d,fr%u (L=1)\n", flag_rC ? ".":"", FM, frB_addr);
+ mask = 0xFF;
+
+ } else {
+ DIP("mtfsf%s %d,fr%u\n", flag_rC ? ".":"", FM, frB_addr);
+ // Build 32bit mask from FM:
+ mask = 0;
+ for (i=0; i<8; i++) {
+ if ((FM & (1<<(7-i))) == 1) {
+ mask |= 0xF << (7-i);
+ }
+ }
+ }
+ assign( frB, getFReg(frB_addr));
+ assign( rB_32, unop( Iop_64to32,
+ unop( Iop_ReinterpF64asI64, mkexpr(frB) )));
+ putGST_masked( PPC_GST_FPSCR, mkexpr(rB_32), mask );
+ break;
+ }
+
+ default:
+ vex_printf("dis_fp_scr(ppc)(opc2)\n");
+ return False;
+ }
+ return True;
+}
+
+
+
+/*------------------------------------------------------------*/
+/*--- AltiVec Instruction Translation ---*/
+/*------------------------------------------------------------*/
+
+/*
+ Altivec Cache Control Instructions (Data Streams)
+*/
+static Bool dis_av_datastream ( UInt theInstr )
+{
+ /* X-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar flag_T = toUChar( IFIELD( theInstr, 25, 1 ) );
+ UChar flag_A = flag_T;
+ UChar b23to24 = toUChar( IFIELD( theInstr, 23, 2 ) );
+ UChar STRM = toUChar( IFIELD( theInstr, 21, 2 ) );
+ UChar rA_addr = ifieldRegA(theInstr);
+ UChar rB_addr = ifieldRegB(theInstr);
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar b0 = ifieldBIT0(theInstr);
+
+ if (opc1 != 0x1F || b23to24 != 0 || b0 != 0) {
+ vex_printf("dis_av_datastream(ppc)(instr)\n");
+ return False;
+ }
+
+ switch (opc2) {
+ case 0x156: // dst (Data Stream Touch, AV p115)
+ DIP("dst%s r%u,r%u,%d\n", flag_T ? "t" : "",
+ rA_addr, rB_addr, STRM);
+ break;
+
+ case 0x176: // dstst (Data Stream Touch for Store, AV p117)
+ DIP("dstst%s r%u,r%u,%d\n", flag_T ? "t" : "",
+ rA_addr, rB_addr, STRM);
+ break;
+
+ case 0x336: // dss (Data Stream Stop, AV p114)
+ if (rA_addr != 0 || rB_addr != 0) {
+ vex_printf("dis_av_datastream(ppc)(opc2,dst)\n");
+ return False;
+ }
+ if (flag_A == 0) {
+ DIP("dss %d\n", STRM);
+ } else {
+ DIP("dssall\n");
+ }
+ break;
+
+ default:
+ vex_printf("dis_av_datastream(ppc)(opc2)\n");
+ return False;
+ }
+ return True;
+}
+
+/*
+ AltiVec Processor Control Instructions
+*/
+static Bool dis_av_procctl ( UInt theInstr )
+{
+ /* VX-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar vD_addr = ifieldRegDS(theInstr);
+ UChar vA_addr = ifieldRegA(theInstr);
+ UChar vB_addr = ifieldRegB(theInstr);
+ UInt opc2 = IFIELD( theInstr, 0, 11 );
+
+ if (opc1 != 0x4) {
+ vex_printf("dis_av_procctl(ppc)(instr)\n");
+ return False;
+ }
+
+ switch (opc2) {
+ case 0x604: // mfvscr (Move from VSCR, AV p129)
+ if (vA_addr != 0 || vB_addr != 0) {
+ vex_printf("dis_av_procctl(ppc)(opc2,dst)\n");
+ return False;
+ }
+ DIP("mfvscr v%d\n", vD_addr);
+ putVReg( vD_addr, unop(Iop_32UtoV128, getGST( PPC_GST_VSCR )) );
+ break;
+
+ case 0x644: { // mtvscr (Move to VSCR, AV p130)
+ IRTemp vB = newTemp(Ity_V128);
+ if (vD_addr != 0 || vA_addr != 0) {
+ vex_printf("dis_av_procctl(ppc)(opc2,dst)\n");
+ return False;
+ }
+ DIP("mtvscr v%d\n", vB_addr);
+ assign( vB, getVReg(vB_addr));
+ putGST( PPC_GST_VSCR, unop(Iop_V128to32, mkexpr(vB)) );
+ break;
+ }
+ default:
+ vex_printf("dis_av_procctl(ppc)(opc2)\n");
+ return False;
+ }
+ return True;
+}
+
+/*
+ AltiVec Load Instructions
+*/
+static Bool dis_av_load ( VexAbiInfo* vbi, UInt theInstr )
+{
+ /* X-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar vD_addr = ifieldRegDS(theInstr);
+ UChar rA_addr = ifieldRegA(theInstr);
+ UChar rB_addr = ifieldRegB(theInstr);
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar b0 = ifieldBIT0(theInstr);
+
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ IRTemp EA = newTemp(ty);
+ IRTemp EA_align16 = newTemp(ty);
+
+ if (opc1 != 0x1F || b0 != 0) {
+ vex_printf("dis_av_load(ppc)(instr)\n");
+ return False;
+ }
+
+ assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) );
+ assign( EA_align16, addr_align( mkexpr(EA), 16 ) );
+
+ switch (opc2) {
+
+ case 0x006: { // lvsl (Load Vector for Shift Left, AV p123)
+ IRDirty* d;
+ UInt vD_off = vectorGuestRegOffset(vD_addr);
+ IRExpr** args = mkIRExprVec_3(
+ mkU32(vD_off),
+ binop(Iop_And32, mkNarrowTo32(ty, mkexpr(EA)),
+ mkU32(0xF)),
+ mkU32(0)/*left*/ );
+ if (!mode64) {
+ d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "ppc32g_dirtyhelper_LVS",
+ fnptr_to_fnentry(vbi, &ppc32g_dirtyhelper_LVS),
+ args );
+ } else {
+ d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "ppc64g_dirtyhelper_LVS",
+ fnptr_to_fnentry(vbi, &ppc64g_dirtyhelper_LVS),
+ args );
+ }
+ DIP("lvsl v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr);
+ /* declare guest state effects */
+ d->needsBBP = True;
+ d->nFxState = 1;
+ d->fxState[0].fx = Ifx_Write;
+ d->fxState[0].offset = vD_off;
+ d->fxState[0].size = sizeof(U128);
+
+ /* execute the dirty call, side-effecting guest state */
+ stmt( IRStmt_Dirty(d) );
+ break;
+ }
+ case 0x026: { // lvsr (Load Vector for Shift Right, AV p125)
+ IRDirty* d;
+ UInt vD_off = vectorGuestRegOffset(vD_addr);
+ IRExpr** args = mkIRExprVec_3(
+ mkU32(vD_off),
+ binop(Iop_And32, mkNarrowTo32(ty, mkexpr(EA)),
+ mkU32(0xF)),
+ mkU32(1)/*right*/ );
+ if (!mode64) {
+ d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "ppc32g_dirtyhelper_LVS",
+ fnptr_to_fnentry(vbi, &ppc32g_dirtyhelper_LVS),
+ args );
+ } else {
+ d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "ppc64g_dirtyhelper_LVS",
+ fnptr_to_fnentry(vbi, &ppc64g_dirtyhelper_LVS),
+ args );
+ }
+ DIP("lvsr v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr);
+ /* declare guest state effects */
+ d->needsBBP = True;
+ d->nFxState = 1;
+ d->fxState[0].fx = Ifx_Write;
+ d->fxState[0].offset = vD_off;
+ d->fxState[0].size = sizeof(U128);
+
+ /* execute the dirty call, side-effecting guest state */
+ stmt( IRStmt_Dirty(d) );
+ break;
+ }
+ case 0x007: // lvebx (Load Vector Element Byte Indexed, AV p119)
+ DIP("lvebx v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr);
+ /* loads addressed byte into vector[EA[0:3]
+ since all other destination bytes are undefined,
+ can simply load entire vector from 16-aligned EA */
+ putVReg( vD_addr, loadBE(Ity_V128, mkexpr(EA_align16)) );
+ break;
+
+ case 0x027: // lvehx (Load Vector Element Half Word Indexed, AV p121)
+ DIP("lvehx v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr);
+ /* see note for lvebx */
+ putVReg( vD_addr, loadBE(Ity_V128, mkexpr(EA_align16)) );
+ break;
+
+ case 0x047: // lvewx (Load Vector Element Word Indexed, AV p122)
+ DIP("lvewx v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr);
+ /* see note for lvebx */
+ putVReg( vD_addr, loadBE(Ity_V128, mkexpr(EA_align16)) );
+ break;
+
+ case 0x067: // lvx (Load Vector Indexed, AV p127)
+ DIP("lvx v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr);
+ putVReg( vD_addr, loadBE(Ity_V128, mkexpr(EA_align16)) );
+ break;
+
+ case 0x167: // lvxl (Load Vector Indexed LRU, AV p128)
+ DIP("lvxl v%d,r%u,r%u\n", vD_addr, rA_addr, rB_addr);
+ putVReg( vD_addr, loadBE(Ity_V128, mkexpr(EA_align16)) );
+ break;
+
+ default:
+ vex_printf("dis_av_load(ppc)(opc2)\n");
+ return False;
+ }
+ return True;
+}
+
+
+/*
+ AltiVec Store Instructions
+*/
+static Bool dis_av_store ( UInt theInstr )
+{
+ /* X-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar vS_addr = ifieldRegDS(theInstr);
+ UChar rA_addr = ifieldRegA(theInstr);
+ UChar rB_addr = ifieldRegB(theInstr);
+ UInt opc2 = ifieldOPClo10(theInstr);
+ UChar b0 = ifieldBIT0(theInstr);
+
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ IRTemp EA = newTemp(ty);
+ IRTemp addr_aligned = newTemp(ty);
+ IRTemp vS = newTemp(Ity_V128);
+ IRTemp eb = newTemp(Ity_I8);
+ IRTemp idx = newTemp(Ity_I8);
+
+ if (opc1 != 0x1F || b0 != 0) {
+ vex_printf("dis_av_store(ppc)(instr)\n");
+ return False;
+ }
+
+ assign( vS, getVReg(vS_addr));
+ assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) );
+
+ switch (opc2) {
+ case 0x087: { // stvebx (Store Vector Byte Indexed, AV p131)
+ DIP("stvebx v%d,r%u,r%u\n", vS_addr, rA_addr, rB_addr);
+ assign( eb, binop(Iop_And8, mkU8(0xF),
+ unop(Iop_32to8,
+ mkNarrowTo32(ty, mkexpr(EA)) )) );
+ assign( idx, binop(Iop_Shl8,
+ binop(Iop_Sub8, mkU8(15), mkexpr(eb)),
+ mkU8(3)) );
+ storeBE( mkexpr(EA),
+ unop(Iop_32to8, unop(Iop_V128to32,
+ binop(Iop_ShrV128, mkexpr(vS), mkexpr(idx)))) );
+ break;
+ }
+ case 0x0A7: { // stvehx (Store Vector Half Word Indexed, AV p132)
+ DIP("stvehx v%d,r%u,r%u\n", vS_addr, rA_addr, rB_addr);
+ assign( addr_aligned, addr_align(mkexpr(EA), 2) );
+ assign( eb, binop(Iop_And8, mkU8(0xF),
+ mkNarrowTo8(ty, mkexpr(addr_aligned) )) );
+ assign( idx, binop(Iop_Shl8,
+ binop(Iop_Sub8, mkU8(14), mkexpr(eb)),
+ mkU8(3)) );
+ storeBE( mkexpr(addr_aligned),
+ unop(Iop_32to16, unop(Iop_V128to32,
+ binop(Iop_ShrV128, mkexpr(vS), mkexpr(idx)))) );
+ break;
+ }
+ case 0x0C7: { // stvewx (Store Vector Word Indexed, AV p133)
+ DIP("stvewx v%d,r%u,r%u\n", vS_addr, rA_addr, rB_addr);
+ assign( addr_aligned, addr_align(mkexpr(EA), 4) );
+ assign( eb, binop(Iop_And8, mkU8(0xF),
+ mkNarrowTo8(ty, mkexpr(addr_aligned) )) );
+ assign( idx, binop(Iop_Shl8,
+ binop(Iop_Sub8, mkU8(12), mkexpr(eb)),
+ mkU8(3)) );
+ storeBE( mkexpr(addr_aligned),
+ unop(Iop_V128to32,
+ binop(Iop_ShrV128, mkexpr(vS), mkexpr(idx))) );
+ break;
+ }
+
+ case 0x0E7: // stvx (Store Vector Indexed, AV p134)
+ DIP("stvx v%d,r%u,r%u\n", vS_addr, rA_addr, rB_addr);
+ storeBE( addr_align( mkexpr(EA), 16 ), mkexpr(vS) );
+ break;
+
+ case 0x1E7: // stvxl (Store Vector Indexed LRU, AV p135)
+ DIP("stvxl v%d,r%u,r%u\n", vS_addr, rA_addr, rB_addr);
+ storeBE( addr_align( mkexpr(EA), 16 ), mkexpr(vS) );
+ break;
+
+ default:
+ vex_printf("dis_av_store(ppc)(opc2)\n");
+ return False;
+ }
+ return True;
+}
+
+/*
+ AltiVec Arithmetic Instructions
+*/
+static Bool dis_av_arith ( UInt theInstr )
+{
+ /* VX-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar vD_addr = ifieldRegDS(theInstr);
+ UChar vA_addr = ifieldRegA(theInstr);
+ UChar vB_addr = ifieldRegB(theInstr);
+ UInt opc2 = IFIELD( theInstr, 0, 11 );
+
+ IRTemp vA = newTemp(Ity_V128);
+ IRTemp vB = newTemp(Ity_V128);
+ IRTemp z3 = newTemp(Ity_I64);
+ IRTemp z2 = newTemp(Ity_I64);
+ IRTemp z1 = newTemp(Ity_I64);
+ IRTemp z0 = newTemp(Ity_I64);
+ IRTemp aEvn, aOdd;
+ IRTemp a15, a14, a13, a12, a11, a10, a9, a8;
+ IRTemp a7, a6, a5, a4, a3, a2, a1, a0;
+ IRTemp b3, b2, b1, b0;
+
+ aEvn = aOdd = IRTemp_INVALID;
+ a15 = a14 = a13 = a12 = a11 = a10 = a9 = a8 = IRTemp_INVALID;
+ a7 = a6 = a5 = a4 = a3 = a2 = a1 = a0 = IRTemp_INVALID;
+ b3 = b2 = b1 = b0 = IRTemp_INVALID;
+
+ assign( vA, getVReg(vA_addr));
+ assign( vB, getVReg(vB_addr));
+
+ if (opc1 != 0x4) {
+ vex_printf("dis_av_arith(ppc)(opc1 != 0x4)\n");
+ return False;
+ }
+
+ switch (opc2) {
+ /* Add */
+ case 0x180: { // vaddcuw (Add Carryout Unsigned Word, AV p136)
+ DIP("vaddcuw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ /* unsigned_ov(x+y) = (y >u not(x)) */
+ putVReg( vD_addr, binop(Iop_ShrN32x4,
+ binop(Iop_CmpGT32Ux4, mkexpr(vB),
+ unop(Iop_NotV128, mkexpr(vA))),
+ mkU8(31)) );
+ break;
+ }
+ case 0x000: // vaddubm (Add Unsigned Byte Modulo, AV p141)
+ DIP("vaddubm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Add8x16, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x040: // vadduhm (Add Unsigned Half Word Modulo, AV p143)
+ DIP("vadduhm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Add16x8, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x080: // vadduwm (Add Unsigned Word Modulo, AV p145)
+ DIP("vadduwm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Add32x4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x200: // vaddubs (Add Unsigned Byte Saturate, AV p142)
+ DIP("vaddubs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_QAdd8Ux16, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT], perhaps via new primop: Iop_SatOfQAdd8Ux16
+ break;
+
+ case 0x240: // vadduhs (Add Unsigned Half Word Saturate, AV p144)
+ DIP("vadduhs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_QAdd16Ux8, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
+
+ case 0x280: // vadduws (Add Unsigned Word Saturate, AV p146)
+ DIP("vadduws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_QAdd32Ux4, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
+
+ case 0x300: // vaddsbs (Add Signed Byte Saturate, AV p138)
+ DIP("vaddsbs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_QAdd8Sx16, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
+
+ case 0x340: // vaddshs (Add Signed Half Word Saturate, AV p139)
+ DIP("vaddshs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_QAdd16Sx8, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
+
+ case 0x380: // vaddsws (Add Signed Word Saturate, AV p140)
+ DIP("vaddsws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_QAdd32Sx4, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
+
+
+ /* Subtract */
+ case 0x580: { // vsubcuw (Subtract Carryout Unsigned Word, AV p260)
+ DIP("vsubcuw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ /* unsigned_ov(x-y) = (y >u x) */
+ putVReg( vD_addr, binop(Iop_ShrN32x4,
+ unop(Iop_NotV128,
+ binop(Iop_CmpGT32Ux4, mkexpr(vB),
+ mkexpr(vA))),
+ mkU8(31)) );
+ break;
+ }
+ case 0x400: // vsububm (Subtract Unsigned Byte Modulo, AV p265)
+ DIP("vsububm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Sub8x16, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x440: // vsubuhm (Subtract Unsigned Half Word Modulo, AV p267)
+ DIP("vsubuhm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Sub16x8, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x480: // vsubuwm (Subtract Unsigned Word Modulo, AV p269)
+ DIP("vsubuwm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Sub32x4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x600: // vsububs (Subtract Unsigned Byte Saturate, AV p266)
+ DIP("vsububs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_QSub8Ux16, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
+
+ case 0x640: // vsubuhs (Subtract Unsigned HWord Saturate, AV p268)
+ DIP("vsubuhs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_QSub16Ux8, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
+
+ case 0x680: // vsubuws (Subtract Unsigned Word Saturate, AV p270)
+ DIP("vsubuws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_QSub32Ux4, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
+
+ case 0x700: // vsubsbs (Subtract Signed Byte Saturate, AV p262)
+ DIP("vsubsbs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_QSub8Sx16, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
+
+ case 0x740: // vsubshs (Subtract Signed Half Word Saturate, AV p263)
+ DIP("vsubshs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_QSub16Sx8, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
+
+ case 0x780: // vsubsws (Subtract Signed Word Saturate, AV p264)
+ DIP("vsubsws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_QSub32Sx4, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ break;
+
+
+ /* Maximum */
+ case 0x002: // vmaxub (Maximum Unsigned Byte, AV p182)
+ DIP("vmaxub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Max8Ux16, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x042: // vmaxuh (Maximum Unsigned Half Word, AV p183)
+ DIP("vmaxuh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Max16Ux8, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x082: // vmaxuw (Maximum Unsigned Word, AV p184)
+ DIP("vmaxuw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Max32Ux4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x102: // vmaxsb (Maximum Signed Byte, AV p179)
+ DIP("vmaxsb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Max8Sx16, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x142: // vmaxsh (Maximum Signed Half Word, AV p180)
+ DIP("vmaxsh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Max16Sx8, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x182: // vmaxsw (Maximum Signed Word, AV p181)
+ DIP("vmaxsw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Max32Sx4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+
+ /* Minimum */
+ case 0x202: // vminub (Minimum Unsigned Byte, AV p191)
+ DIP("vminub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Min8Ux16, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x242: // vminuh (Minimum Unsigned Half Word, AV p192)
+ DIP("vminuh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Min16Ux8, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x282: // vminuw (Minimum Unsigned Word, AV p193)
+ DIP("vminuw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Min32Ux4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x302: // vminsb (Minimum Signed Byte, AV p188)
+ DIP("vminsb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Min8Sx16, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x342: // vminsh (Minimum Signed Half Word, AV p189)
+ DIP("vminsh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Min16Sx8, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x382: // vminsw (Minimum Signed Word, AV p190)
+ DIP("vminsw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Min32Sx4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+
+ /* Average */
+ case 0x402: // vavgub (Average Unsigned Byte, AV p152)
+ DIP("vavgub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Avg8Ux16, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x442: // vavguh (Average Unsigned Half Word, AV p153)
+ DIP("vavguh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Avg16Ux8, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x482: // vavguw (Average Unsigned Word, AV p154)
+ DIP("vavguw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Avg32Ux4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x502: // vavgsb (Average Signed Byte, AV p149)
+ DIP("vavgsb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Avg8Sx16, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x542: // vavgsh (Average Signed Half Word, AV p150)
+ DIP("vavgsh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Avg16Sx8, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x582: // vavgsw (Average Signed Word, AV p151)
+ DIP("vavgsw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Avg32Sx4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+
+ /* Multiply */
+ case 0x008: // vmuloub (Multiply Odd Unsigned Byte, AV p213)
+ DIP("vmuloub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr,
+ binop(Iop_MullEven8Ux16, mkexpr(vA), mkexpr(vB)));
+ break;
+
+ case 0x048: // vmulouh (Multiply Odd Unsigned Half Word, AV p214)
+ DIP("vmulouh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr,
+ binop(Iop_MullEven16Ux8, mkexpr(vA), mkexpr(vB)));
+ break;
+
+ case 0x108: // vmulosb (Multiply Odd Signed Byte, AV p211)
+ DIP("vmulosb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr,
+ binop(Iop_MullEven8Sx16, mkexpr(vA), mkexpr(vB)));
+ break;
+
+ case 0x148: // vmulosh (Multiply Odd Signed Half Word, AV p212)
+ DIP("vmulosh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr,
+ binop(Iop_MullEven16Sx8, mkexpr(vA), mkexpr(vB)));
+ break;
+
+ case 0x208: // vmuleub (Multiply Even Unsigned Byte, AV p209)
+ DIP("vmuleub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, MK_Iop_MullOdd8Ux16( mkexpr(vA), mkexpr(vB) ));
+ break;
+
+ case 0x248: // vmuleuh (Multiply Even Unsigned Half Word, AV p210)
+ DIP("vmuleuh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, MK_Iop_MullOdd16Ux8( mkexpr(vA), mkexpr(vB) ));
+ break;
+
+ case 0x308: // vmulesb (Multiply Even Signed Byte, AV p207)
+ DIP("vmulesb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, MK_Iop_MullOdd8Sx16( mkexpr(vA), mkexpr(vB) ));
+ break;
+
+ case 0x348: // vmulesh (Multiply Even Signed Half Word, AV p208)
+ DIP("vmulesh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, MK_Iop_MullOdd16Sx8( mkexpr(vA), mkexpr(vB) ));
+ break;
+
+
+ /* Sum Across Partial */
+ case 0x608: { // vsum4ubs (Sum Partial (1/4) UB Saturate, AV p275)
+ IRTemp aEE, aEO, aOE, aOO;
+ aEE = aEO = aOE = aOO = IRTemp_INVALID;
+ DIP("vsum4ubs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+
+ /* vA: V128_8Ux16 -> 4 x V128_32Ux4, sign-extended */
+ expand8Ux16( mkexpr(vA), &aEvn, &aOdd ); // (15,13...),(14,12...)
+ expand16Ux8( mkexpr(aEvn), &aEE, &aEO ); // (15,11...),(13, 9...)
+ expand16Ux8( mkexpr(aOdd), &aOE, &aOO ); // (14,10...),(12, 8...)
+
+ /* break V128 to 4xI32's, zero-extending to I64's */
+ breakV128to4x64U( mkexpr(aEE), &a15, &a11, &a7, &a3 );
+ breakV128to4x64U( mkexpr(aOE), &a14, &a10, &a6, &a2 );
+ breakV128to4x64U( mkexpr(aEO), &a13, &a9, &a5, &a1 );
+ breakV128to4x64U( mkexpr(aOO), &a12, &a8, &a4, &a0 );
+ breakV128to4x64U( mkexpr(vB), &b3, &b2, &b1, &b0 );
+
+ /* add lanes */
+ assign( z3, binop(Iop_Add64, mkexpr(b3),
+ binop(Iop_Add64,
+ binop(Iop_Add64, mkexpr(a15), mkexpr(a14)),
+ binop(Iop_Add64, mkexpr(a13), mkexpr(a12)))) );
+ assign( z2, binop(Iop_Add64, mkexpr(b2),
+ binop(Iop_Add64,
+ binop(Iop_Add64, mkexpr(a11), mkexpr(a10)),
+ binop(Iop_Add64, mkexpr(a9), mkexpr(a8)))) );
+ assign( z1, binop(Iop_Add64, mkexpr(b1),
+ binop(Iop_Add64,
+ binop(Iop_Add64, mkexpr(a7), mkexpr(a6)),
+ binop(Iop_Add64, mkexpr(a5), mkexpr(a4)))) );
+ assign( z0, binop(Iop_Add64, mkexpr(b0),
+ binop(Iop_Add64,
+ binop(Iop_Add64, mkexpr(a3), mkexpr(a2)),
+ binop(Iop_Add64, mkexpr(a1), mkexpr(a0)))) );
+
+ /* saturate-narrow to 32bit, and combine to V128 */
+ putVReg( vD_addr, mkV128from4x64U( mkexpr(z3), mkexpr(z2),
+ mkexpr(z1), mkexpr(z0)) );
+ break;
+ }
+ case 0x708: { // vsum4sbs (Sum Partial (1/4) SB Saturate, AV p273)
+ IRTemp aEE, aEO, aOE, aOO;
+ aEE = aEO = aOE = aOO = IRTemp_INVALID;
+ DIP("vsum4sbs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+
+ /* vA: V128_8Sx16 -> 4 x V128_32Sx4, sign-extended */
+ expand8Sx16( mkexpr(vA), &aEvn, &aOdd ); // (15,13...),(14,12...)
+ expand16Sx8( mkexpr(aEvn), &aEE, &aEO ); // (15,11...),(13, 9...)
+ expand16Sx8( mkexpr(aOdd), &aOE, &aOO ); // (14,10...),(12, 8...)
+
+ /* break V128 to 4xI32's, sign-extending to I64's */
+ breakV128to4x64S( mkexpr(aEE), &a15, &a11, &a7, &a3 );
+ breakV128to4x64S( mkexpr(aOE), &a14, &a10, &a6, &a2 );
+ breakV128to4x64S( mkexpr(aEO), &a13, &a9, &a5, &a1 );
+ breakV128to4x64S( mkexpr(aOO), &a12, &a8, &a4, &a0 );
+ breakV128to4x64S( mkexpr(vB), &b3, &b2, &b1, &b0 );
+
+ /* add lanes */
+ assign( z3, binop(Iop_Add64, mkexpr(b3),
+ binop(Iop_Add64,
+ binop(Iop_Add64, mkexpr(a15), mkexpr(a14)),
+ binop(Iop_Add64, mkexpr(a13), mkexpr(a12)))) );
+ assign( z2, binop(Iop_Add64, mkexpr(b2),
+ binop(Iop_Add64,
+ binop(Iop_Add64, mkexpr(a11), mkexpr(a10)),
+ binop(Iop_Add64, mkexpr(a9), mkexpr(a8)))) );
+ assign( z1, binop(Iop_Add64, mkexpr(b1),
+ binop(Iop_Add64,
+ binop(Iop_Add64, mkexpr(a7), mkexpr(a6)),
+ binop(Iop_Add64, mkexpr(a5), mkexpr(a4)))) );
+ assign( z0, binop(Iop_Add64, mkexpr(b0),
+ binop(Iop_Add64,
+ binop(Iop_Add64, mkexpr(a3), mkexpr(a2)),
+ binop(Iop_Add64, mkexpr(a1), mkexpr(a0)))) );
+
+ /* saturate-narrow to 32bit, and combine to V128 */
+ putVReg( vD_addr, mkV128from4x64S( mkexpr(z3), mkexpr(z2),
+ mkexpr(z1), mkexpr(z0)) );
+ break;
+ }
+ case 0x648: { // vsum4shs (Sum Partial (1/4) SHW Saturate, AV p274)
+ DIP("vsum4shs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+
+ /* vA: V128_16Sx8 -> 2 x V128_32Sx4, sign-extended */
+ expand16Sx8( mkexpr(vA), &aEvn, &aOdd ); // (7,5...),(6,4...)
+
+ /* break V128 to 4xI32's, sign-extending to I64's */
+ breakV128to4x64S( mkexpr(aEvn), &a7, &a5, &a3, &a1 );
+ breakV128to4x64S( mkexpr(aOdd), &a6, &a4, &a2, &a0 );
+ breakV128to4x64S( mkexpr(vB), &b3, &b2, &b1, &b0 );
+
+ /* add lanes */
+ assign( z3, binop(Iop_Add64, mkexpr(b3),
+ binop(Iop_Add64, mkexpr(a7), mkexpr(a6))));
+ assign( z2, binop(Iop_Add64, mkexpr(b2),
+ binop(Iop_Add64, mkexpr(a5), mkexpr(a4))));
+ assign( z1, binop(Iop_Add64, mkexpr(b1),
+ binop(Iop_Add64, mkexpr(a3), mkexpr(a2))));
+ assign( z0, binop(Iop_Add64, mkexpr(b0),
+ binop(Iop_Add64, mkexpr(a1), mkexpr(a0))));
+
+ /* saturate-narrow to 32bit, and combine to V128 */
+ putVReg( vD_addr, mkV128from4x64S( mkexpr(z3), mkexpr(z2),
+ mkexpr(z1), mkexpr(z0)) );
+ break;
+ }
+ case 0x688: { // vsum2sws (Sum Partial (1/2) SW Saturate, AV p272)
+ DIP("vsum2sws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+
+ /* break V128 to 4xI32's, sign-extending to I64's */
+ breakV128to4x64S( mkexpr(vA), &a3, &a2, &a1, &a0 );
+ breakV128to4x64S( mkexpr(vB), &b3, &b2, &b1, &b0 );
+
+ /* add lanes */
+ assign( z2, binop(Iop_Add64, mkexpr(b2),
+ binop(Iop_Add64, mkexpr(a3), mkexpr(a2))) );
+ assign( z0, binop(Iop_Add64, mkexpr(b0),
+ binop(Iop_Add64, mkexpr(a1), mkexpr(a0))) );
+
+ /* saturate-narrow to 32bit, and combine to V128 */
+ putVReg( vD_addr, mkV128from4x64S( mkU64(0), mkexpr(z2),
+ mkU64(0), mkexpr(z0)) );
+ break;
+ }
+ case 0x788: { // vsumsws (Sum SW Saturate, AV p271)
+ DIP("vsumsws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+
+ /* break V128 to 4xI32's, sign-extending to I64's */
+ breakV128to4x64S( mkexpr(vA), &a3, &a2, &a1, &a0 );
+ breakV128to4x64S( mkexpr(vB), &b3, &b2, &b1, &b0 );
+
+ /* add lanes */
+ assign( z0, binop(Iop_Add64, mkexpr(b0),
+ binop(Iop_Add64,
+ binop(Iop_Add64, mkexpr(a3), mkexpr(a2)),
+ binop(Iop_Add64, mkexpr(a1), mkexpr(a0)))) );
+
+ /* saturate-narrow to 32bit, and combine to V128 */
+ putVReg( vD_addr, mkV128from4x64S( mkU64(0), mkU64(0),
+ mkU64(0), mkexpr(z0)) );
+ break;
+ }
+ default:
+ vex_printf("dis_av_arith(ppc)(opc2=0x%x)\n", opc2);
+ return False;
+ }
+ return True;
+}
+
+/*
+ AltiVec Logic Instructions
+*/
+static Bool dis_av_logic ( UInt theInstr )
+{
+ /* VX-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar vD_addr = ifieldRegDS(theInstr);
+ UChar vA_addr = ifieldRegA(theInstr);
+ UChar vB_addr = ifieldRegB(theInstr);
+ UInt opc2 = IFIELD( theInstr, 0, 11 );
+
+ IRTemp vA = newTemp(Ity_V128);
+ IRTemp vB = newTemp(Ity_V128);
+ assign( vA, getVReg(vA_addr));
+ assign( vB, getVReg(vB_addr));
+
+ if (opc1 != 0x4) {
+ vex_printf("dis_av_logic(ppc)(opc1 != 0x4)\n");
+ return False;
+ }
+
+ switch (opc2) {
+ case 0x404: // vand (And, AV p147)
+ DIP("vand v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_AndV128, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x444: // vandc (And, AV p148)
+ DIP("vandc v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_AndV128, mkexpr(vA),
+ unop(Iop_NotV128, mkexpr(vB))) );
+ break;
+
+ case 0x484: // vor (Or, AV p217)
+ DIP("vor v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_OrV128, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x4C4: // vxor (Xor, AV p282)
+ DIP("vxor v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_XorV128, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x504: // vnor (Nor, AV p216)
+ DIP("vnor v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr,
+ unop(Iop_NotV128, binop(Iop_OrV128, mkexpr(vA), mkexpr(vB))) );
+ break;
+
+ default:
+ vex_printf("dis_av_logic(ppc)(opc2=0x%x)\n", opc2);
+ return False;
+ }
+ return True;
+}
+
+/*
+ AltiVec Compare Instructions
+*/
+static Bool dis_av_cmp ( UInt theInstr )
+{
+ /* VXR-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar vD_addr = ifieldRegDS(theInstr);
+ UChar vA_addr = ifieldRegA(theInstr);
+ UChar vB_addr = ifieldRegB(theInstr);
+ UChar flag_rC = ifieldBIT10(theInstr);
+ UInt opc2 = IFIELD( theInstr, 0, 10 );
+
+ IRTemp vA = newTemp(Ity_V128);
+ IRTemp vB = newTemp(Ity_V128);
+ IRTemp vD = newTemp(Ity_V128);
+ assign( vA, getVReg(vA_addr));
+ assign( vB, getVReg(vB_addr));
+
+ if (opc1 != 0x4) {
+ vex_printf("dis_av_cmp(ppc)(instr)\n");
+ return False;
+ }
+
+ switch (opc2) {
+ case 0x006: // vcmpequb (Compare Equal-to Unsigned B, AV p160)
+ DIP("vcmpequb%s v%d,v%d,v%d\n", (flag_rC ? ".":""),
+ vD_addr, vA_addr, vB_addr);
+ assign( vD, binop(Iop_CmpEQ8x16, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x046: // vcmpequh (Compare Equal-to Unsigned HW, AV p161)
+ DIP("vcmpequh%s v%d,v%d,v%d\n", (flag_rC ? ".":""),
+ vD_addr, vA_addr, vB_addr);
+ assign( vD, binop(Iop_CmpEQ16x8, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x086: // vcmpequw (Compare Equal-to Unsigned W, AV p162)
+ DIP("vcmpequw%s v%d,v%d,v%d\n", (flag_rC ? ".":""),
+ vD_addr, vA_addr, vB_addr);
+ assign( vD, binop(Iop_CmpEQ32x4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x206: // vcmpgtub (Compare Greater-than Unsigned B, AV p168)
+ DIP("vcmpgtub%s v%d,v%d,v%d\n", (flag_rC ? ".":""),
+ vD_addr, vA_addr, vB_addr);
+ assign( vD, binop(Iop_CmpGT8Ux16, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x246: // vcmpgtuh (Compare Greater-than Unsigned HW, AV p169)
+ DIP("vcmpgtuh%s v%d,v%d,v%d\n", (flag_rC ? ".":""),
+ vD_addr, vA_addr, vB_addr);
+ assign( vD, binop(Iop_CmpGT16Ux8, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x286: // vcmpgtuw (Compare Greater-than Unsigned W, AV p170)
+ DIP("vcmpgtuw%s v%d,v%d,v%d\n", (flag_rC ? ".":""),
+ vD_addr, vA_addr, vB_addr);
+ assign( vD, binop(Iop_CmpGT32Ux4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x306: // vcmpgtsb (Compare Greater-than Signed B, AV p165)
+ DIP("vcmpgtsb%s v%d,v%d,v%d\n", (flag_rC ? ".":""),
+ vD_addr, vA_addr, vB_addr);
+ assign( vD, binop(Iop_CmpGT8Sx16, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x346: // vcmpgtsh (Compare Greater-than Signed HW, AV p166)
+ DIP("vcmpgtsh%s v%d,v%d,v%d\n", (flag_rC ? ".":""),
+ vD_addr, vA_addr, vB_addr);
+ assign( vD, binop(Iop_CmpGT16Sx8, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x386: // vcmpgtsw (Compare Greater-than Signed W, AV p167)
+ DIP("vcmpgtsw%s v%d,v%d,v%d\n", (flag_rC ? ".":""),
+ vD_addr, vA_addr, vB_addr);
+ assign( vD, binop(Iop_CmpGT32Sx4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ default:
+ vex_printf("dis_av_cmp(ppc)(opc2)\n");
+ return False;
+ }
+
+ putVReg( vD_addr, mkexpr(vD) );
+
+ if (flag_rC) {
+ set_AV_CR6( mkexpr(vD), True );
+ }
+ return True;
+}
+
+/*
+ AltiVec Multiply-Sum Instructions
+*/
+static Bool dis_av_multarith ( UInt theInstr )
+{
+ /* VA-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar vD_addr = ifieldRegDS(theInstr);
+ UChar vA_addr = ifieldRegA(theInstr);
+ UChar vB_addr = ifieldRegB(theInstr);
+ UChar vC_addr = ifieldRegC(theInstr);
+ UChar opc2 = toUChar( IFIELD( theInstr, 0, 6 ) );
+
+ IRTemp vA = newTemp(Ity_V128);
+ IRTemp vB = newTemp(Ity_V128);
+ IRTemp vC = newTemp(Ity_V128);
+ IRTemp zeros = newTemp(Ity_V128);
+ IRTemp aLo = newTemp(Ity_V128);
+ IRTemp bLo = newTemp(Ity_V128);
+ IRTemp cLo = newTemp(Ity_V128);
+ IRTemp zLo = newTemp(Ity_V128);
+ IRTemp aHi = newTemp(Ity_V128);
+ IRTemp bHi = newTemp(Ity_V128);
+ IRTemp cHi = newTemp(Ity_V128);
+ IRTemp zHi = newTemp(Ity_V128);
+ IRTemp abEvn = newTemp(Ity_V128);
+ IRTemp abOdd = newTemp(Ity_V128);
+ IRTemp z3 = newTemp(Ity_I64);
+ IRTemp z2 = newTemp(Ity_I64);
+ IRTemp z1 = newTemp(Ity_I64);
+ IRTemp z0 = newTemp(Ity_I64);
+ IRTemp ab7, ab6, ab5, ab4, ab3, ab2, ab1, ab0;
+ IRTemp c3, c2, c1, c0;
+
+ ab7 = ab6 = ab5 = ab4 = ab3 = ab2 = ab1 = ab0 = IRTemp_INVALID;
+ c3 = c2 = c1 = c0 = IRTemp_INVALID;
+
+ assign( vA, getVReg(vA_addr));
+ assign( vB, getVReg(vB_addr));
+ assign( vC, getVReg(vC_addr));
+ assign( zeros, unop(Iop_Dup32x4, mkU32(0)) );
+
+ if (opc1 != 0x4) {
+ vex_printf("dis_av_multarith(ppc)(instr)\n");
+ return False;
+ }
+
+ switch (opc2) {
+ /* Multiply-Add */
+ case 0x20: { // vmhaddshs (Mult Hi, Add Signed HW Saturate, AV p185)
+ IRTemp cSigns = newTemp(Ity_V128);
+ DIP("vmhaddshs v%d,v%d,v%d,v%d\n",
+ vD_addr, vA_addr, vB_addr, vC_addr);
+ assign(cSigns, binop(Iop_CmpGT16Sx8, mkexpr(zeros), mkexpr(vC)));
+ assign(aLo, binop(Iop_InterleaveLO16x8, mkexpr(zeros), mkexpr(vA)));
+ assign(bLo, binop(Iop_InterleaveLO16x8, mkexpr(zeros), mkexpr(vB)));
+ assign(cLo, binop(Iop_InterleaveLO16x8, mkexpr(cSigns),mkexpr(vC)));
+ assign(aHi, binop(Iop_InterleaveHI16x8, mkexpr(zeros), mkexpr(vA)));
+ assign(bHi, binop(Iop_InterleaveHI16x8, mkexpr(zeros), mkexpr(vB)));
+ assign(cHi, binop(Iop_InterleaveHI16x8, mkexpr(cSigns),mkexpr(vC)));
+
+ assign( zLo, binop(Iop_Add32x4, mkexpr(cLo),
+ binop(Iop_SarN32x4,
+ binop(Iop_MullEven16Sx8,
+ mkexpr(aLo), mkexpr(bLo)),
+ mkU8(15))) );
+
+ assign( zHi, binop(Iop_Add32x4, mkexpr(cHi),
+ binop(Iop_SarN32x4,
+ binop(Iop_MullEven16Sx8,
+ mkexpr(aHi), mkexpr(bHi)),
+ mkU8(15))) );
+
+ putVReg( vD_addr,
+ binop(Iop_QNarrow32Sx4, mkexpr(zHi), mkexpr(zLo)) );
+ break;
+ }
+ case 0x21: { // vmhraddshs (Mult High Round, Add Signed HW Saturate, AV p186)
+ IRTemp zKonst = newTemp(Ity_V128);
+ IRTemp cSigns = newTemp(Ity_V128);
+ DIP("vmhraddshs v%d,v%d,v%d,v%d\n",
+ vD_addr, vA_addr, vB_addr, vC_addr);
+ assign(cSigns, binop(Iop_CmpGT16Sx8, mkexpr(zeros), mkexpr(vC)) );
+ assign(aLo, binop(Iop_InterleaveLO16x8, mkexpr(zeros), mkexpr(vA)));
+ assign(bLo, binop(Iop_InterleaveLO16x8, mkexpr(zeros), mkexpr(vB)));
+ assign(cLo, binop(Iop_InterleaveLO16x8, mkexpr(cSigns),mkexpr(vC)));
+ assign(aHi, binop(Iop_InterleaveHI16x8, mkexpr(zeros), mkexpr(vA)));
+ assign(bHi, binop(Iop_InterleaveHI16x8, mkexpr(zeros), mkexpr(vB)));
+ assign(cHi, binop(Iop_InterleaveHI16x8, mkexpr(cSigns),mkexpr(vC)));
+
+ /* shifting our const avoids store/load version of Dup */
+ assign( zKonst, binop(Iop_ShlN32x4, unop(Iop_Dup32x4, mkU32(0x1)),
+ mkU8(14)) );
+
+ assign( zLo, binop(Iop_Add32x4, mkexpr(cLo),
+ binop(Iop_SarN32x4,
+ binop(Iop_Add32x4, mkexpr(zKonst),
+ binop(Iop_MullEven16Sx8,
+ mkexpr(aLo), mkexpr(bLo))),
+ mkU8(15))) );
+
+ assign( zHi, binop(Iop_Add32x4, mkexpr(cHi),
+ binop(Iop_SarN32x4,
+ binop(Iop_Add32x4, mkexpr(zKonst),
+ binop(Iop_MullEven16Sx8,
+ mkexpr(aHi), mkexpr(bHi))),
+ mkU8(15))) );
+
+ putVReg( vD_addr, binop(Iop_QNarrow32Sx4, mkexpr(zHi), mkexpr(zLo)) );
+ break;
+ }
+ case 0x22: { // vmladduhm (Mult Low, Add Unsigned HW Modulo, AV p194)
+ DIP("vmladduhm v%d,v%d,v%d,v%d\n",
+ vD_addr, vA_addr, vB_addr, vC_addr);
+ assign(aLo, binop(Iop_InterleaveLO16x8, mkexpr(zeros), mkexpr(vA)));
+ assign(bLo, binop(Iop_InterleaveLO16x8, mkexpr(zeros), mkexpr(vB)));
+ assign(cLo, binop(Iop_InterleaveLO16x8, mkexpr(zeros), mkexpr(vC)));
+ assign(aHi, binop(Iop_InterleaveHI16x8, mkexpr(zeros), mkexpr(vA)));
+ assign(bHi, binop(Iop_InterleaveHI16x8, mkexpr(zeros), mkexpr(vB)));
+ assign(cHi, binop(Iop_InterleaveHI16x8, mkexpr(zeros), mkexpr(vC)));
+ assign(zLo, binop(Iop_Add32x4,
+ binop(Iop_MullEven16Ux8, mkexpr(aLo), mkexpr(bLo)),
+ mkexpr(cLo)) );
+ assign(zHi, binop(Iop_Add32x4,
+ binop(Iop_MullEven16Ux8, mkexpr(aHi), mkexpr(bHi)),
+ mkexpr(cHi)));
+ putVReg(vD_addr, binop(Iop_Narrow32x4, mkexpr(zHi), mkexpr(zLo)));
+ break;
+ }
+
+
+ /* Multiply-Sum */
+ case 0x24: { // vmsumubm (Multiply Sum Unsigned B Modulo, AV p204)
+ IRTemp abEE, abEO, abOE, abOO;
+ abEE = abEO = abOE = abOO = IRTemp_INVALID;
+ DIP("vmsumubm v%d,v%d,v%d,v%d\n",
+ vD_addr, vA_addr, vB_addr, vC_addr);
+
+ /* multiply vA,vB (unsigned, widening) */
+ assign( abEvn, MK_Iop_MullOdd8Ux16( mkexpr(vA), mkexpr(vB) ));
+ assign( abOdd, binop(Iop_MullEven8Ux16, mkexpr(vA), mkexpr(vB)) );
+
+ /* evn,odd: V128_16Ux8 -> 2 x V128_32Ux4, zero-extended */
+ expand16Ux8( mkexpr(abEvn), &abEE, &abEO );
+ expand16Ux8( mkexpr(abOdd), &abOE, &abOO );
+
+ putVReg( vD_addr,
+ binop(Iop_Add32x4, mkexpr(vC),
+ binop(Iop_Add32x4,
+ binop(Iop_Add32x4, mkexpr(abEE), mkexpr(abEO)),
+ binop(Iop_Add32x4, mkexpr(abOE), mkexpr(abOO)))) );
+ break;
+ }
+ case 0x25: { // vmsummbm (Multiply Sum Mixed-Sign B Modulo, AV p201)
+ IRTemp aEvn, aOdd, bEvn, bOdd;
+ IRTemp abEE = newTemp(Ity_V128);
+ IRTemp abEO = newTemp(Ity_V128);
+ IRTemp abOE = newTemp(Ity_V128);
+ IRTemp abOO = newTemp(Ity_V128);
+ aEvn = aOdd = bEvn = bOdd = IRTemp_INVALID;
+ DIP("vmsummbm v%d,v%d,v%d,v%d\n",
+ vD_addr, vA_addr, vB_addr, vC_addr);
+
+ /* sign-extend vA, zero-extend vB, for mixed-sign multiply
+ (separating out adjacent lanes to different vectors) */
+ expand8Sx16( mkexpr(vA), &aEvn, &aOdd );
+ expand8Ux16( mkexpr(vB), &bEvn, &bOdd );
+
+ /* multiply vA, vB, again separating adjacent lanes */
+ assign( abEE, MK_Iop_MullOdd16Sx8( mkexpr(aEvn), mkexpr(bEvn) ));
+ assign( abEO, binop(Iop_MullEven16Sx8, mkexpr(aEvn), mkexpr(bEvn)) );
+ assign( abOE, MK_Iop_MullOdd16Sx8( mkexpr(aOdd), mkexpr(bOdd) ));
+ assign( abOO, binop(Iop_MullEven16Sx8, mkexpr(aOdd), mkexpr(bOdd)) );
+
+ /* add results together, + vC */
+ putVReg( vD_addr,
+ binop(Iop_QAdd32Sx4, mkexpr(vC),
+ binop(Iop_QAdd32Sx4,
+ binop(Iop_QAdd32Sx4, mkexpr(abEE), mkexpr(abEO)),
+ binop(Iop_QAdd32Sx4, mkexpr(abOE), mkexpr(abOO)))) );
+ break;
+ }
+ case 0x26: { // vmsumuhm (Multiply Sum Unsigned HW Modulo, AV p205)
+ DIP("vmsumuhm v%d,v%d,v%d,v%d\n",
+ vD_addr, vA_addr, vB_addr, vC_addr);
+ assign( abEvn, MK_Iop_MullOdd16Ux8( mkexpr(vA), mkexpr(vB) ));
+ assign( abOdd, binop(Iop_MullEven16Ux8, mkexpr(vA), mkexpr(vB)) );
+ putVReg( vD_addr,
+ binop(Iop_Add32x4, mkexpr(vC),
+ binop(Iop_Add32x4, mkexpr(abEvn), mkexpr(abOdd))) );
+ break;
+ }
+ case 0x27: { // vmsumuhs (Multiply Sum Unsigned HW Saturate, AV p206)
+ DIP("vmsumuhs v%d,v%d,v%d,v%d\n",
+ vD_addr, vA_addr, vB_addr, vC_addr);
+ /* widening multiply, separating lanes */
+ assign( abEvn, MK_Iop_MullOdd16Ux8(mkexpr(vA), mkexpr(vB) ));
+ assign( abOdd, binop(Iop_MullEven16Ux8, mkexpr(vA), mkexpr(vB)) );
+
+ /* break V128 to 4xI32's, zero-extending to I64's */
+ breakV128to4x64U( mkexpr(abEvn), &ab7, &ab5, &ab3, &ab1 );
+ breakV128to4x64U( mkexpr(abOdd), &ab6, &ab4, &ab2, &ab0 );
+ breakV128to4x64U( mkexpr(vC), &c3, &c2, &c1, &c0 );
+
+ /* add lanes */
+ assign( z3, binop(Iop_Add64, mkexpr(c3),
+ binop(Iop_Add64, mkexpr(ab7), mkexpr(ab6))));
+ assign( z2, binop(Iop_Add64, mkexpr(c2),
+ binop(Iop_Add64, mkexpr(ab5), mkexpr(ab4))));
+ assign( z1, binop(Iop_Add64, mkexpr(c1),
+ binop(Iop_Add64, mkexpr(ab3), mkexpr(ab2))));
+ assign( z0, binop(Iop_Add64, mkexpr(c0),
+ binop(Iop_Add64, mkexpr(ab1), mkexpr(ab0))));
+
+ /* saturate-narrow to 32bit, and combine to V128 */
+ putVReg( vD_addr, mkV128from4x64U( mkexpr(z3), mkexpr(z2),
+ mkexpr(z1), mkexpr(z0)) );
+
+ break;
+ }
+ case 0x28: { // vmsumshm (Multiply Sum Signed HW Modulo, AV p202)
+ DIP("vmsumshm v%d,v%d,v%d,v%d\n",
+ vD_addr, vA_addr, vB_addr, vC_addr);
+ assign( abEvn, MK_Iop_MullOdd16Sx8( mkexpr(vA), mkexpr(vB) ));
+ assign( abOdd, binop(Iop_MullEven16Sx8, mkexpr(vA), mkexpr(vB)) );
+ putVReg( vD_addr,
+ binop(Iop_Add32x4, mkexpr(vC),
+ binop(Iop_Add32x4, mkexpr(abOdd), mkexpr(abEvn))) );
+ break;
+ }
+ case 0x29: { // vmsumshs (Multiply Sum Signed HW Saturate, AV p203)
+ DIP("vmsumshs v%d,v%d,v%d,v%d\n",
+ vD_addr, vA_addr, vB_addr, vC_addr);
+ /* widening multiply, separating lanes */
+ assign( abEvn, MK_Iop_MullOdd16Sx8( mkexpr(vA), mkexpr(vB) ));
+ assign( abOdd, binop(Iop_MullEven16Sx8, mkexpr(vA), mkexpr(vB)) );
+
+ /* break V128 to 4xI32's, sign-extending to I64's */
+ breakV128to4x64S( mkexpr(abEvn), &ab7, &ab5, &ab3, &ab1 );
+ breakV128to4x64S( mkexpr(abOdd), &ab6, &ab4, &ab2, &ab0 );
+ breakV128to4x64S( mkexpr(vC), &c3, &c2, &c1, &c0 );
+
+ /* add lanes */
+ assign( z3, binop(Iop_Add64, mkexpr(c3),
+ binop(Iop_Add64, mkexpr(ab7), mkexpr(ab6))));
+ assign( z2, binop(Iop_Add64, mkexpr(c2),
+ binop(Iop_Add64, mkexpr(ab5), mkexpr(ab4))));
+ assign( z1, binop(Iop_Add64, mkexpr(c1),
+ binop(Iop_Add64, mkexpr(ab3), mkexpr(ab2))));
+ assign( z0, binop(Iop_Add64, mkexpr(c0),
+ binop(Iop_Add64, mkexpr(ab1), mkexpr(ab0))));
+
+ /* saturate-narrow to 32bit, and combine to V128 */
+ putVReg( vD_addr, mkV128from4x64S( mkexpr(z3), mkexpr(z2),
+ mkexpr(z1), mkexpr(z0)) );
+ break;
+ }
+ default:
+ vex_printf("dis_av_multarith(ppc)(opc2)\n");
+ return False;
+ }
+ return True;
+}
+
+/*
+ AltiVec Shift/Rotate Instructions
+*/
+static Bool dis_av_shift ( UInt theInstr )
+{
+ /* VX-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar vD_addr = ifieldRegDS(theInstr);
+ UChar vA_addr = ifieldRegA(theInstr);
+ UChar vB_addr = ifieldRegB(theInstr);
+ UInt opc2 = IFIELD( theInstr, 0, 11 );
+
+ IRTemp vA = newTemp(Ity_V128);
+ IRTemp vB = newTemp(Ity_V128);
+ assign( vA, getVReg(vA_addr));
+ assign( vB, getVReg(vB_addr));
+
+ if (opc1 != 0x4){
+ vex_printf("dis_av_shift(ppc)(instr)\n");
+ return False;
+ }
+
+ switch (opc2) {
+ /* Rotate */
+ case 0x004: // vrlb (Rotate Left Integer B, AV p234)
+ DIP("vrlb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Rol8x16, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x044: // vrlh (Rotate Left Integer HW, AV p235)
+ DIP("vrlh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Rol16x8, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x084: // vrlw (Rotate Left Integer W, AV p236)
+ DIP("vrlw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Rol32x4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+
+ /* Shift Left */
+ case 0x104: // vslb (Shift Left Integer B, AV p240)
+ DIP("vslb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Shl8x16, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x144: // vslh (Shift Left Integer HW, AV p242)
+ DIP("vslh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Shl16x8, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x184: // vslw (Shift Left Integer W, AV p244)
+ DIP("vslw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Shl32x4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x1C4: { // vsl (Shift Left, AV p239)
+ IRTemp sh = newTemp(Ity_I8);
+ DIP("vsl v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ assign( sh, binop(Iop_And8, mkU8(0x7),
+ unop(Iop_32to8,
+ unop(Iop_V128to32, mkexpr(vB)))) );
+ putVReg( vD_addr,
+ binop(Iop_ShlV128, mkexpr(vA), mkexpr(sh)) );
+ break;
+ }
+ case 0x40C: { // vslo (Shift Left by Octet, AV p243)
+ IRTemp sh = newTemp(Ity_I8);
+ DIP("vslo v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ assign( sh, binop(Iop_And8, mkU8(0x78),
+ unop(Iop_32to8,
+ unop(Iop_V128to32, mkexpr(vB)))) );
+ putVReg( vD_addr,
+ binop(Iop_ShlV128, mkexpr(vA), mkexpr(sh)) );
+ break;
+ }
+
+
+ /* Shift Right */
+ case 0x204: // vsrb (Shift Right B, AV p256)
+ DIP("vsrb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Shr8x16, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x244: // vsrh (Shift Right HW, AV p257)
+ DIP("vsrh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Shr16x8, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x284: // vsrw (Shift Right W, AV p259)
+ DIP("vsrw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Shr32x4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x2C4: { // vsr (Shift Right, AV p251)
+ IRTemp sh = newTemp(Ity_I8);
+ DIP("vsr v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ assign( sh, binop(Iop_And8, mkU8(0x7),
+ unop(Iop_32to8,
+ unop(Iop_V128to32, mkexpr(vB)))) );
+ putVReg( vD_addr,
+ binop(Iop_ShrV128, mkexpr(vA), mkexpr(sh)) );
+ break;
+ }
+ case 0x304: // vsrab (Shift Right Alg B, AV p253)
+ DIP("vsrab v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Sar8x16, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x344: // vsrah (Shift Right Alg HW, AV p254)
+ DIP("vsrah v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Sar16x8, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x384: // vsraw (Shift Right Alg W, AV p255)
+ DIP("vsraw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Sar32x4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x44C: { // vsro (Shift Right by Octet, AV p258)
+ IRTemp sh = newTemp(Ity_I8);
+ DIP("vsro v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ assign( sh, binop(Iop_And8, mkU8(0x78),
+ unop(Iop_32to8,
+ unop(Iop_V128to32, mkexpr(vB)))) );
+ putVReg( vD_addr,
+ binop(Iop_ShrV128, mkexpr(vA), mkexpr(sh)) );
+ break;
+ }
+
+ default:
+ vex_printf("dis_av_shift(ppc)(opc2)\n");
+ return False;
+ }
+ return True;
+}
+
+/*
+ AltiVec Permute Instructions
+*/
+static Bool dis_av_permute ( UInt theInstr )
+{
+ /* VA-Form, VX-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar vD_addr = ifieldRegDS(theInstr);
+ UChar vA_addr = ifieldRegA(theInstr);
+ UChar UIMM_5 = vA_addr;
+ UChar vB_addr = ifieldRegB(theInstr);
+ UChar vC_addr = ifieldRegC(theInstr);
+ UChar b10 = ifieldBIT10(theInstr);
+ UChar SHB_uimm4 = toUChar( IFIELD( theInstr, 6, 4 ) );
+ UInt opc2 = toUChar( IFIELD( theInstr, 0, 6 ) );
+
+ UChar SIMM_8 = extend_s_5to8(UIMM_5);
+
+ IRTemp vA = newTemp(Ity_V128);
+ IRTemp vB = newTemp(Ity_V128);
+ IRTemp vC = newTemp(Ity_V128);
+ assign( vA, getVReg(vA_addr));
+ assign( vB, getVReg(vB_addr));
+ assign( vC, getVReg(vC_addr));
+
+ if (opc1 != 0x4) {
+ vex_printf("dis_av_permute(ppc)(instr)\n");
+ return False;
+ }
+
+ switch (opc2) {
+ case 0x2A: // vsel (Conditional Select, AV p238)
+ DIP("vsel v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_addr);
+ /* vD = (vA & ~vC) | (vB & vC) */
+ putVReg( vD_addr, binop(Iop_OrV128,
+ binop(Iop_AndV128, mkexpr(vA), unop(Iop_NotV128, mkexpr(vC))),
+ binop(Iop_AndV128, mkexpr(vB), mkexpr(vC))) );
+ return True;
+
+ case 0x2B: { // vperm (Permute, AV p218)
+ /* limited to two args for IR, so have to play games... */
+ IRTemp a_perm = newTemp(Ity_V128);
+ IRTemp b_perm = newTemp(Ity_V128);
+ IRTemp mask = newTemp(Ity_V128);
+ IRTemp vC_andF = newTemp(Ity_V128);
+ DIP("vperm v%d,v%d,v%d,v%d\n",
+ vD_addr, vA_addr, vB_addr, vC_addr);
+ /* Limit the Perm8x16 steering values to 0 .. 15 as that is what
+ IR specifies, and also to hide irrelevant bits from
+ memcheck */
+ assign( vC_andF,
+ binop(Iop_AndV128, mkexpr(vC),
+ unop(Iop_Dup8x16, mkU8(0xF))) );
+ assign( a_perm,
+ binop(Iop_Perm8x16, mkexpr(vA), mkexpr(vC_andF)) );
+ assign( b_perm,
+ binop(Iop_Perm8x16, mkexpr(vB), mkexpr(vC_andF)) );
+ // mask[i8] = (vC[i8]_4 == 1) ? 0xFF : 0x0
+ assign( mask, binop(Iop_SarN8x16,
+ binop(Iop_ShlN8x16, mkexpr(vC), mkU8(3)),
+ mkU8(7)) );
+ // dst = (a & ~mask) | (b & mask)
+ putVReg( vD_addr, binop(Iop_OrV128,
+ binop(Iop_AndV128, mkexpr(a_perm),
+ unop(Iop_NotV128, mkexpr(mask))),
+ binop(Iop_AndV128, mkexpr(b_perm),
+ mkexpr(mask))) );
+ return True;
+ }
+ case 0x2C: // vsldoi (Shift Left Double by Octet Imm, AV p241)
+ if (b10 != 0) {
+ vex_printf("dis_av_permute(ppc)(vsldoi)\n");
+ return False;
+ }
+ DIP("vsldoi v%d,v%d,v%d,%d\n",
+ vD_addr, vA_addr, vB_addr, SHB_uimm4);
+ if (SHB_uimm4 == 0)
+ putVReg( vD_addr, mkexpr(vA) );
+ else
+ putVReg( vD_addr,
+ binop(Iop_OrV128,
+ binop(Iop_ShlV128, mkexpr(vA), mkU8(SHB_uimm4*8)),
+ binop(Iop_ShrV128, mkexpr(vB), mkU8((16-SHB_uimm4)*8))) );
+ return True;
+
+ default:
+ break; // Fall through...
+ }
+
+ opc2 = IFIELD( theInstr, 0, 11 );
+ switch (opc2) {
+
+ /* Merge */
+ case 0x00C: // vmrghb (Merge High B, AV p195)
+ DIP("vmrghb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr,
+ binop(Iop_InterleaveHI8x16, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x04C: // vmrghh (Merge High HW, AV p196)
+ DIP("vmrghh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr,
+ binop(Iop_InterleaveHI16x8, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x08C: // vmrghw (Merge High W, AV p197)
+ DIP("vmrghw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr,
+ binop(Iop_InterleaveHI32x4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x10C: // vmrglb (Merge Low B, AV p198)
+ DIP("vmrglb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr,
+ binop(Iop_InterleaveLO8x16, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x14C: // vmrglh (Merge Low HW, AV p199)
+ DIP("vmrglh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr,
+ binop(Iop_InterleaveLO16x8, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x18C: // vmrglw (Merge Low W, AV p200)
+ DIP("vmrglw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr,
+ binop(Iop_InterleaveLO32x4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+
+ /* Splat */
+ case 0x20C: { // vspltb (Splat Byte, AV p245)
+ /* vD = Dup8x16( vB[UIMM_5] ) */
+ UChar sh_uimm = (15 - (UIMM_5 & 15)) * 8;
+ DIP("vspltb v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5);
+ putVReg( vD_addr, unop(Iop_Dup8x16,
+ unop(Iop_32to8, unop(Iop_V128to32,
+ binop(Iop_ShrV128, mkexpr(vB), mkU8(sh_uimm))))) );
+ break;
+ }
+ case 0x24C: { // vsplth (Splat Half Word, AV p246)
+ UChar sh_uimm = (7 - (UIMM_5 & 7)) * 16;
+ DIP("vsplth v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5);
+ putVReg( vD_addr, unop(Iop_Dup16x8,
+ unop(Iop_32to16, unop(Iop_V128to32,
+ binop(Iop_ShrV128, mkexpr(vB), mkU8(sh_uimm))))) );
+ break;
+ }
+ case 0x28C: { // vspltw (Splat Word, AV p250)
+ /* vD = Dup32x4( vB[UIMM_5] ) */
+ UChar sh_uimm = (3 - (UIMM_5 & 3)) * 32;
+ DIP("vspltw v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5);
+ putVReg( vD_addr, unop(Iop_Dup32x4,
+ unop(Iop_V128to32,
+ binop(Iop_ShrV128, mkexpr(vB), mkU8(sh_uimm)))) );
+ break;
+ }
+ case 0x30C: // vspltisb (Splat Immediate Signed B, AV p247)
+ DIP("vspltisb v%d,%d\n", vD_addr, (Char)SIMM_8);
+ putVReg( vD_addr, unop(Iop_Dup8x16, mkU8(SIMM_8)) );
+ break;
+
+ case 0x34C: // vspltish (Splat Immediate Signed HW, AV p248)
+ DIP("vspltish v%d,%d\n", vD_addr, (Char)SIMM_8);
+ putVReg( vD_addr,
+ unop(Iop_Dup16x8, mkU16(extend_s_8to32(SIMM_8))) );
+ break;
+
+ case 0x38C: // vspltisw (Splat Immediate Signed W, AV p249)
+ DIP("vspltisw v%d,%d\n", vD_addr, (Char)SIMM_8);
+ putVReg( vD_addr,
+ unop(Iop_Dup32x4, mkU32(extend_s_8to32(SIMM_8))) );
+ break;
+
+ default:
+ vex_printf("dis_av_permute(ppc)(opc2)\n");
+ return False;
+ }
+ return True;
+}
+
+/*
+ AltiVec Pack/Unpack Instructions
+*/
+static Bool dis_av_pack ( UInt theInstr )
+{
+ /* VX-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar vD_addr = ifieldRegDS(theInstr);
+ UChar vA_addr = ifieldRegA(theInstr);
+ UChar vB_addr = ifieldRegB(theInstr);
+ UInt opc2 = IFIELD( theInstr, 0, 11 );
+
+ IRTemp signs = IRTemp_INVALID;
+ IRTemp zeros = IRTemp_INVALID;
+ IRTemp vA = newTemp(Ity_V128);
+ IRTemp vB = newTemp(Ity_V128);
+ assign( vA, getVReg(vA_addr));
+ assign( vB, getVReg(vB_addr));
+
+ if (opc1 != 0x4) {
+ vex_printf("dis_av_pack(ppc)(instr)\n");
+ return False;
+ }
+
+ switch (opc2) {
+ /* Packing */
+ case 0x00E: // vpkuhum (Pack Unsigned HW Unsigned Modulo, AV p224)
+ DIP("vpkuhum v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Narrow16x8, mkexpr(vA), mkexpr(vB)) );
+ return True;
+
+ case 0x04E: // vpkuwum (Pack Unsigned W Unsigned Modulo, AV p226)
+ DIP("vpkuwum v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Narrow32x4, mkexpr(vA), mkexpr(vB)) );
+ return True;
+
+ case 0x08E: // vpkuhus (Pack Unsigned HW Unsigned Saturate, AV p225)
+ DIP("vpkuhus v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr,
+ binop(Iop_QNarrow16Ux8, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ return True;
+
+ case 0x0CE: // vpkuwus (Pack Unsigned W Unsigned Saturate, AV p227)
+ DIP("vpkuwus v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr,
+ binop(Iop_QNarrow32Ux4, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ return True;
+
+ case 0x10E: { // vpkshus (Pack Signed HW Unsigned Saturate, AV p221)
+ // This insn does a signed->unsigned saturating conversion.
+ // Conversion done here, then uses unsigned->unsigned vpk insn:
+ // => UnsignedSaturatingNarrow( x & ~ (x >>s 15) )
+ IRTemp vA_tmp = newTemp(Ity_V128);
+ IRTemp vB_tmp = newTemp(Ity_V128);
+ DIP("vpkshus v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ assign( vA_tmp, binop(Iop_AndV128, mkexpr(vA),
+ unop(Iop_NotV128,
+ binop(Iop_SarN16x8,
+ mkexpr(vA), mkU8(15)))) );
+ assign( vB_tmp, binop(Iop_AndV128, mkexpr(vB),
+ unop(Iop_NotV128,
+ binop(Iop_SarN16x8,
+ mkexpr(vB), mkU8(15)))) );
+ putVReg( vD_addr, binop(Iop_QNarrow16Ux8,
+ mkexpr(vA_tmp), mkexpr(vB_tmp)) );
+ // TODO: set VSCR[SAT]
+ return True;
+ }
+ case 0x14E: { // vpkswus (Pack Signed W Unsigned Saturate, AV p223)
+ // This insn does a signed->unsigned saturating conversion.
+ // Conversion done here, then uses unsigned->unsigned vpk insn:
+ // => UnsignedSaturatingNarrow( x & ~ (x >>s 31) )
+ IRTemp vA_tmp = newTemp(Ity_V128);
+ IRTemp vB_tmp = newTemp(Ity_V128);
+ DIP("vpkswus v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ assign( vA_tmp, binop(Iop_AndV128, mkexpr(vA),
+ unop(Iop_NotV128,
+ binop(Iop_SarN32x4,
+ mkexpr(vA), mkU8(31)))) );
+ assign( vB_tmp, binop(Iop_AndV128, mkexpr(vB),
+ unop(Iop_NotV128,
+ binop(Iop_SarN32x4,
+ mkexpr(vB), mkU8(31)))) );
+ putVReg( vD_addr, binop(Iop_QNarrow32Ux4,
+ mkexpr(vA_tmp), mkexpr(vB_tmp)) );
+ // TODO: set VSCR[SAT]
+ return True;
+ }
+ case 0x18E: // vpkshss (Pack Signed HW Signed Saturate, AV p220)
+ DIP("vpkshss v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr,
+ binop(Iop_QNarrow16Sx8, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ return True;
+
+ case 0x1CE: // vpkswss (Pack Signed W Signed Saturate, AV p222)
+ DIP("vpkswss v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr,
+ binop(Iop_QNarrow32Sx4, mkexpr(vA), mkexpr(vB)) );
+ // TODO: set VSCR[SAT]
+ return True;
+
+ case 0x30E: { // vpkpx (Pack Pixel, AV p219)
+ /* CAB: Worth a new primop? */
+ /* Using shifts to compact pixel elements, then packing them */
+ IRTemp a1 = newTemp(Ity_V128);
+ IRTemp a2 = newTemp(Ity_V128);
+ IRTemp a3 = newTemp(Ity_V128);
+ IRTemp a_tmp = newTemp(Ity_V128);
+ IRTemp b1 = newTemp(Ity_V128);
+ IRTemp b2 = newTemp(Ity_V128);
+ IRTemp b3 = newTemp(Ity_V128);
+ IRTemp b_tmp = newTemp(Ity_V128);
+ DIP("vpkpx v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ assign( a1, binop(Iop_ShlN16x8,
+ binop(Iop_ShrN32x4, mkexpr(vA), mkU8(19)),
+ mkU8(10)) );
+ assign( a2, binop(Iop_ShlN16x8,
+ binop(Iop_ShrN16x8, mkexpr(vA), mkU8(11)),
+ mkU8(5)) );
+ assign( a3, binop(Iop_ShrN16x8,
+ binop(Iop_ShlN16x8, mkexpr(vA), mkU8(8)),
+ mkU8(11)) );
+ assign( a_tmp, binop(Iop_OrV128, mkexpr(a1),
+ binop(Iop_OrV128, mkexpr(a2), mkexpr(a3))) );
+
+ assign( b1, binop(Iop_ShlN16x8,
+ binop(Iop_ShrN32x4, mkexpr(vB), mkU8(19)),
+ mkU8(10)) );
+ assign( b2, binop(Iop_ShlN16x8,
+ binop(Iop_ShrN16x8, mkexpr(vB), mkU8(11)),
+ mkU8(5)) );
+ assign( b3, binop(Iop_ShrN16x8,
+ binop(Iop_ShlN16x8, mkexpr(vB), mkU8(8)),
+ mkU8(11)) );
+ assign( b_tmp, binop(Iop_OrV128, mkexpr(b1),
+ binop(Iop_OrV128, mkexpr(b2), mkexpr(b3))) );
+
+ putVReg( vD_addr, binop(Iop_Narrow32x4,
+ mkexpr(a_tmp), mkexpr(b_tmp)) );
+ return True;
+ }
+
+ default:
+ break; // Fall through...
+ }
+
+
+ if (vA_addr != 0) {
+ vex_printf("dis_av_pack(ppc)(vA_addr)\n");
+ return False;
+ }
+
+ signs = newTemp(Ity_V128);
+ zeros = newTemp(Ity_V128);
+ assign( zeros, unop(Iop_Dup32x4, mkU32(0)) );
+
+ switch (opc2) {
+ /* Unpacking */
+ case 0x20E: { // vupkhsb (Unpack High Signed B, AV p277)
+ DIP("vupkhsb v%d,v%d\n", vD_addr, vB_addr);
+ assign( signs, binop(Iop_CmpGT8Sx16, mkexpr(zeros), mkexpr(vB)) );
+ putVReg( vD_addr,
+ binop(Iop_InterleaveHI8x16, mkexpr(signs), mkexpr(vB)) );
+ break;
+ }
+ case 0x24E: { // vupkhsh (Unpack High Signed HW, AV p278)
+ DIP("vupkhsh v%d,v%d\n", vD_addr, vB_addr);
+ assign( signs, binop(Iop_CmpGT16Sx8, mkexpr(zeros), mkexpr(vB)) );
+ putVReg( vD_addr,
+ binop(Iop_InterleaveHI16x8, mkexpr(signs), mkexpr(vB)) );
+ break;
+ }
+ case 0x28E: { // vupklsb (Unpack Low Signed B, AV p280)
+ DIP("vupklsb v%d,v%d\n", vD_addr, vB_addr);
+ assign( signs, binop(Iop_CmpGT8Sx16, mkexpr(zeros), mkexpr(vB)) );
+ putVReg( vD_addr,
+ binop(Iop_InterleaveLO8x16, mkexpr(signs), mkexpr(vB)) );
+ break;
+ }
+ case 0x2CE: { // vupklsh (Unpack Low Signed HW, AV p281)
+ DIP("vupklsh v%d,v%d\n", vD_addr, vB_addr);
+ assign( signs, binop(Iop_CmpGT16Sx8, mkexpr(zeros), mkexpr(vB)) );
+ putVReg( vD_addr,
+ binop(Iop_InterleaveLO16x8, mkexpr(signs), mkexpr(vB)) );
+ break;
+ }
+ case 0x34E: { // vupkhpx (Unpack High Pixel16, AV p276)
+ /* CAB: Worth a new primop? */
+ /* Using shifts to isolate pixel elements, then expanding them */
+ IRTemp z0 = newTemp(Ity_V128);
+ IRTemp z1 = newTemp(Ity_V128);
+ IRTemp z01 = newTemp(Ity_V128);
+ IRTemp z2 = newTemp(Ity_V128);
+ IRTemp z3 = newTemp(Ity_V128);
+ IRTemp z23 = newTemp(Ity_V128);
+ DIP("vupkhpx v%d,v%d\n", vD_addr, vB_addr);
+ assign( z0, binop(Iop_ShlN16x8,
+ binop(Iop_SarN16x8, mkexpr(vB), mkU8(15)),
+ mkU8(8)) );
+ assign( z1, binop(Iop_ShrN16x8,
+ binop(Iop_ShlN16x8, mkexpr(vB), mkU8(1)),
+ mkU8(11)) );
+ assign( z01, binop(Iop_InterleaveHI16x8, mkexpr(zeros),
+ binop(Iop_OrV128, mkexpr(z0), mkexpr(z1))) );
+ assign( z2, binop(Iop_ShrN16x8,
+ binop(Iop_ShlN16x8,
+ binop(Iop_ShrN16x8, mkexpr(vB), mkU8(5)),
+ mkU8(11)),
+ mkU8(3)) );
+ assign( z3, binop(Iop_ShrN16x8,
+ binop(Iop_ShlN16x8, mkexpr(vB), mkU8(11)),
+ mkU8(11)) );
+ assign( z23, binop(Iop_InterleaveHI16x8, mkexpr(zeros),
+ binop(Iop_OrV128, mkexpr(z2), mkexpr(z3))) );
+ putVReg( vD_addr,
+ binop(Iop_OrV128,
+ binop(Iop_ShlN32x4, mkexpr(z01), mkU8(16)),
+ mkexpr(z23)) );
+ break;
+ }
+ case 0x3CE: { // vupklpx (Unpack Low Pixel16, AV p279)
+ /* identical to vupkhpx, except interleaving LO */
+ IRTemp z0 = newTemp(Ity_V128);
+ IRTemp z1 = newTemp(Ity_V128);
+ IRTemp z01 = newTemp(Ity_V128);
+ IRTemp z2 = newTemp(Ity_V128);
+ IRTemp z3 = newTemp(Ity_V128);
+ IRTemp z23 = newTemp(Ity_V128);
+ DIP("vupklpx v%d,v%d\n", vD_addr, vB_addr);
+ assign( z0, binop(Iop_ShlN16x8,
+ binop(Iop_SarN16x8, mkexpr(vB), mkU8(15)),
+ mkU8(8)) );
+ assign( z1, binop(Iop_ShrN16x8,
+ binop(Iop_ShlN16x8, mkexpr(vB), mkU8(1)),
+ mkU8(11)) );
+ assign( z01, binop(Iop_InterleaveLO16x8, mkexpr(zeros),
+ binop(Iop_OrV128, mkexpr(z0), mkexpr(z1))) );
+ assign( z2, binop(Iop_ShrN16x8,
+ binop(Iop_ShlN16x8,
+ binop(Iop_ShrN16x8, mkexpr(vB), mkU8(5)),
+ mkU8(11)),
+ mkU8(3)) );
+ assign( z3, binop(Iop_ShrN16x8,
+ binop(Iop_ShlN16x8, mkexpr(vB), mkU8(11)),
+ mkU8(11)) );
+ assign( z23, binop(Iop_InterleaveLO16x8, mkexpr(zeros),
+ binop(Iop_OrV128, mkexpr(z2), mkexpr(z3))) );
+ putVReg( vD_addr,
+ binop(Iop_OrV128,
+ binop(Iop_ShlN32x4, mkexpr(z01), mkU8(16)),
+ mkexpr(z23)) );
+ break;
+ }
+ default:
+ vex_printf("dis_av_pack(ppc)(opc2)\n");
+ return False;
+ }
+ return True;
+}
+
+
+/*
+ AltiVec Floating Point Arithmetic Instructions
+*/
+static Bool dis_av_fp_arith ( UInt theInstr )
+{
+ /* VA-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar vD_addr = ifieldRegDS(theInstr);
+ UChar vA_addr = ifieldRegA(theInstr);
+ UChar vB_addr = ifieldRegB(theInstr);
+ UChar vC_addr = ifieldRegC(theInstr);
+ UInt opc2=0;
+
+ IRTemp vA = newTemp(Ity_V128);
+ IRTemp vB = newTemp(Ity_V128);
+ IRTemp vC = newTemp(Ity_V128);
+ assign( vA, getVReg(vA_addr));
+ assign( vB, getVReg(vB_addr));
+ assign( vC, getVReg(vC_addr));
+
+ if (opc1 != 0x4) {
+ vex_printf("dis_av_fp_arith(ppc)(instr)\n");
+ return False;
+ }
+
+ opc2 = IFIELD( theInstr, 0, 6 );
+ switch (opc2) {
+ case 0x2E: // vmaddfp (Multiply Add FP, AV p177)
+ DIP("vmaddfp v%d,v%d,v%d,v%d\n",
+ vD_addr, vA_addr, vC_addr, vB_addr);
+ putVReg( vD_addr,
+ binop(Iop_Add32Fx4, mkexpr(vB),
+ binop(Iop_Mul32Fx4, mkexpr(vA), mkexpr(vC))) );
+ return True;
+
+ case 0x2F: { // vnmsubfp (Negative Multiply-Subtract FP, AV p215)
+ DIP("vnmsubfp v%d,v%d,v%d,v%d\n",
+ vD_addr, vA_addr, vC_addr, vB_addr);
+ putVReg( vD_addr,
+ binop(Iop_Sub32Fx4,
+ mkexpr(vB),
+ binop(Iop_Mul32Fx4, mkexpr(vA), mkexpr(vC))) );
+ return True;
+ }
+
+ default:
+ break; // Fall through...
+ }
+
+ opc2 = IFIELD( theInstr, 0, 11 );
+ switch (opc2) {
+ case 0x00A: // vaddfp (Add FP, AV p137)
+ DIP("vaddfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Add32Fx4, mkexpr(vA), mkexpr(vB)) );
+ return True;
+
+ case 0x04A: // vsubfp (Subtract FP, AV p261)
+ DIP("vsubfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Sub32Fx4, mkexpr(vA), mkexpr(vB)) );
+ return True;
+
+ case 0x40A: // vmaxfp (Maximum FP, AV p178)
+ DIP("vmaxfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Max32Fx4, mkexpr(vA), mkexpr(vB)) );
+ return True;
+
+ case 0x44A: // vminfp (Minimum FP, AV p187)
+ DIP("vminfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
+ putVReg( vD_addr, binop(Iop_Min32Fx4, mkexpr(vA), mkexpr(vB)) );
+ return True;
+
+ default:
+ break; // Fall through...
+ }
+
+
+ if (vA_addr != 0) {
+ vex_printf("dis_av_fp_arith(ppc)(vA_addr)\n");
+ return False;
+ }
+
+ switch (opc2) {
+ case 0x10A: // vrefp (Reciprocal Esimate FP, AV p228)
+ DIP("vrefp v%d,v%d\n", vD_addr, vB_addr);
+ putVReg( vD_addr, unop(Iop_Recip32Fx4, mkexpr(vB)) );
+ return True;
+
+ case 0x14A: // vrsqrtefp (Reciprocal Sqrt Estimate FP, AV p237)
+ DIP("vrsqrtefp v%d,v%d\n", vD_addr, vB_addr);
+ putVReg( vD_addr, unop(Iop_RSqrt32Fx4, mkexpr(vB)) );
+ return True;
+
+ case 0x18A: // vexptefp (2 Raised to the Exp Est FP, AV p173)
+ DIP("vexptefp v%d,v%d\n", vD_addr, vB_addr);
+ DIP(" => not implemented\n");
+ return False;
+
+ case 0x1CA: // vlogefp (Log2 Estimate FP, AV p175)
+ DIP("vlogefp v%d,v%d\n", vD_addr, vB_addr);
+ DIP(" => not implemented\n");
+ return False;
+
+ default:
+ vex_printf("dis_av_fp_arith(ppc)(opc2=0x%x)\n",opc2);
+ return False;
+ }
+ return True;
+}
+
+/*
+ AltiVec Floating Point Compare Instructions
+*/
+static Bool dis_av_fp_cmp ( UInt theInstr )
+{
+ /* VXR-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar vD_addr = ifieldRegDS(theInstr);
+ UChar vA_addr = ifieldRegA(theInstr);
+ UChar vB_addr = ifieldRegB(theInstr);
+ UChar flag_rC = ifieldBIT10(theInstr);
+ UInt opc2 = IFIELD( theInstr, 0, 10 );
+
+ Bool cmp_bounds = False;
+
+ IRTemp vA = newTemp(Ity_V128);
+ IRTemp vB = newTemp(Ity_V128);
+ IRTemp vD = newTemp(Ity_V128);
+ assign( vA, getVReg(vA_addr));
+ assign( vB, getVReg(vB_addr));
+
+ if (opc1 != 0x4) {
+ vex_printf("dis_av_fp_cmp(ppc)(instr)\n");
+ return False;
+ }
+
+ switch (opc2) {
+ case 0x0C6: // vcmpeqfp (Compare Equal-to FP, AV p159)
+ DIP("vcmpeqfp%s v%d,v%d,v%d\n", (flag_rC ? ".":""),
+ vD_addr, vA_addr, vB_addr);
+ assign( vD, binop(Iop_CmpEQ32Fx4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x1C6: // vcmpgefp (Compare Greater-than-or-Equal-to, AV p163)
+ DIP("vcmpgefp%s v%d,v%d,v%d\n", (flag_rC ? ".":""),
+ vD_addr, vA_addr, vB_addr);
+ assign( vD, binop(Iop_CmpGE32Fx4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x2C6: // vcmpgtfp (Compare Greater-than FP, AV p164)
+ DIP("vcmpgtfp%s v%d,v%d,v%d\n", (flag_rC ? ".":""),
+ vD_addr, vA_addr, vB_addr);
+ assign( vD, binop(Iop_CmpGT32Fx4, mkexpr(vA), mkexpr(vB)) );
+ break;
+
+ case 0x3C6: { // vcmpbfp (Compare Bounds FP, AV p157)
+ IRTemp gt = newTemp(Ity_V128);
+ IRTemp lt = newTemp(Ity_V128);
+ IRTemp zeros = newTemp(Ity_V128);
+ DIP("vcmpbfp%s v%d,v%d,v%d\n", (flag_rC ? ".":""),
+ vD_addr, vA_addr, vB_addr);
+ cmp_bounds = True;
+ assign( zeros, unop(Iop_Dup32x4, mkU32(0)) );
+
+ /* Note: making use of fact that the ppc backend for compare insns
+ return zero'd lanes if either of the corresponding arg lanes is
+ a nan.
+
+ Perhaps better to have an irop Iop_isNan32Fx4, but then we'd
+ need this for the other compares too (vcmpeqfp etc)...
+ Better still, tighten down the spec for compare irops.
+ */
+ assign( gt, unop(Iop_NotV128,
+ binop(Iop_CmpLE32Fx4, mkexpr(vA), mkexpr(vB))) );
+ assign( lt, unop(Iop_NotV128,
+ binop(Iop_CmpGE32Fx4, mkexpr(vA),
+ binop(Iop_Sub32Fx4, mkexpr(zeros),
+ mkexpr(vB)))) );
+
+ // finally, just shift gt,lt to correct position
+ assign( vD, binop(Iop_ShlN32x4,
+ binop(Iop_OrV128,
+ binop(Iop_AndV128, mkexpr(gt),
+ unop(Iop_Dup32x4, mkU32(0x2))),
+ binop(Iop_AndV128, mkexpr(lt),
+ unop(Iop_Dup32x4, mkU32(0x1)))),
+ mkU8(30)) );
+ break;
+ }
+
+ default:
+ vex_printf("dis_av_fp_cmp(ppc)(opc2)\n");
+ return False;
+ }
+
+ putVReg( vD_addr, mkexpr(vD) );
+
+ if (flag_rC) {
+ set_AV_CR6( mkexpr(vD), !cmp_bounds );
+ }
+ return True;
+}
+
+/*
+ AltiVec Floating Point Convert/Round Instructions
+*/
+static Bool dis_av_fp_convert ( UInt theInstr )
+{
+ /* VX-Form */
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar vD_addr = ifieldRegDS(theInstr);
+ UChar UIMM_5 = ifieldRegA(theInstr);
+ UChar vB_addr = ifieldRegB(theInstr);
+ UInt opc2 = IFIELD( theInstr, 0, 11 );
+
+ IRTemp vB = newTemp(Ity_V128);
+ IRTemp vScale = newTemp(Ity_V128);
+ IRTemp vInvScale = newTemp(Ity_V128);
+
+ float scale, inv_scale;
+
+ assign( vB, getVReg(vB_addr));
+
+ /* scale = 2^UIMM, cast to float, reinterpreted as uint */
+ scale = (float)( (unsigned int) 1<<UIMM_5 );
+ assign( vScale, unop(Iop_Dup32x4, mkU32( float_to_bits(scale) )) );
+ inv_scale = 1/scale;
+ assign( vInvScale,
+ unop(Iop_Dup32x4, mkU32( float_to_bits(inv_scale) )) );
+
+ if (opc1 != 0x4) {
+ vex_printf("dis_av_fp_convert(ppc)(instr)\n");
+ return False;
+ }
+
+ switch (opc2) {
+ case 0x30A: // vcfux (Convert from Unsigned Fixed-Point W, AV p156)
+ DIP("vcfux v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5);
+ putVReg( vD_addr, binop(Iop_Mul32Fx4,
+ unop(Iop_I32UtoFx4, mkexpr(vB)),
+ mkexpr(vInvScale)) );
+ return True;
+
+ case 0x34A: // vcfsx (Convert from Signed Fixed-Point W, AV p155)
+ DIP("vcfsx v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5);
+
+ putVReg( vD_addr, binop(Iop_Mul32Fx4,
+ unop(Iop_I32StoFx4, mkexpr(vB)),
+ mkexpr(vInvScale)) );
+ return True;
+
+ case 0x38A: // vctuxs (Convert to Unsigned Fixed-Point W Saturate, AV p172)
+ DIP("vctuxs v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5);
+ putVReg( vD_addr,
+ unop(Iop_QFtoI32Ux4_RZ,
+ binop(Iop_Mul32Fx4, mkexpr(vB), mkexpr(vScale))) );
+ return True;
+
+ case 0x3CA: // vctsxs (Convert to Signed Fixed-Point W Saturate, AV p171)
+ DIP("vctsxs v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5);
+ putVReg( vD_addr,
+ unop(Iop_QFtoI32Sx4_RZ,
+ binop(Iop_Mul32Fx4, mkexpr(vB), mkexpr(vScale))) );
+ return True;
+
+ default:
+ break; // Fall through...
+ }
+
+ if (UIMM_5 != 0) {
+ vex_printf("dis_av_fp_convert(ppc)(UIMM_5)\n");
+ return False;
+ }
+
+ switch (opc2) {
+ case 0x20A: // vrfin (Round to FP Integer Nearest, AV p231)
+ DIP("vrfin v%d,v%d\n", vD_addr, vB_addr);
+ putVReg( vD_addr, unop(Iop_RoundF32x4_RN, mkexpr(vB)) );
+ break;
+
+ case 0x24A: // vrfiz (Round to FP Integer toward zero, AV p233)
+ DIP("vrfiz v%d,v%d\n", vD_addr, vB_addr);
+ putVReg( vD_addr, unop(Iop_RoundF32x4_RZ, mkexpr(vB)) );
+ break;
+
+ case 0x28A: // vrfip (Round to FP Integer toward +inf, AV p232)
+ DIP("vrfip v%d,v%d\n", vD_addr, vB_addr);
+ putVReg( vD_addr, unop(Iop_RoundF32x4_RP, mkexpr(vB)) );
+ break;
+
+ case 0x2CA: // vrfim (Round to FP Integer toward -inf, AV p230)
+ DIP("vrfim v%d,v%d\n", vD_addr, vB_addr);
+ putVReg( vD_addr, unop(Iop_RoundF32x4_RM, mkexpr(vB)) );
+ break;
+
+ default:
+ vex_printf("dis_av_fp_convert(ppc)(opc2)\n");
+ return False;
+ }
+ return True;
+}
+
+
+
+
+
+/*------------------------------------------------------------*/
+/*--- Disassemble a single instruction ---*/
+/*------------------------------------------------------------*/
+
+/* Disassemble a single instruction into IR. The instruction
+ is located in host memory at &guest_code[delta]. */
+
+static
+DisResult disInstr_PPC_WRK (
+ Bool put_IP,
+ Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
+ Bool resteerCisOk,
+ void* callback_opaque,
+ Long delta64,
+ VexArchInfo* archinfo,
+ VexAbiInfo* abiinfo
+ )
+{
+ UChar opc1;
+ UInt opc2;
+ DisResult dres;
+ UInt theInstr;
+ IRType ty = mode64 ? Ity_I64 : Ity_I32;
+ Bool allow_F = False;
+ Bool allow_V = False;
+ Bool allow_FX = False;
+ Bool allow_GX = False;
+ UInt hwcaps = archinfo->hwcaps;
+ Long delta;
+
+ /* What insn variants are we supporting today? */
+ if (mode64) {
+ allow_F = True;
+ allow_V = (0 != (hwcaps & VEX_HWCAPS_PPC64_V));
+ allow_FX = (0 != (hwcaps & VEX_HWCAPS_PPC64_FX));
+ allow_GX = (0 != (hwcaps & VEX_HWCAPS_PPC64_GX));
+ } else {
+ allow_F = (0 != (hwcaps & VEX_HWCAPS_PPC32_F));
+ allow_V = (0 != (hwcaps & VEX_HWCAPS_PPC32_V));
+ allow_FX = (0 != (hwcaps & VEX_HWCAPS_PPC32_FX));
+ allow_GX = (0 != (hwcaps & VEX_HWCAPS_PPC32_GX));
+ }
+
+ /* The running delta */
+ delta = (Long)mkSzAddr(ty, (ULong)delta64);
+
+ /* Set result defaults. */
+ dres.whatNext = Dis_Continue;
+ dres.len = 0;
+ dres.continueAt = 0;
+
+ /* At least this is simple on PPC32: insns are all 4 bytes long, and
+ 4-aligned. So just fish the whole thing out of memory right now
+ and have done. */
+ theInstr = getUIntBigendianly( (UChar*)(&guest_code[delta]) );
+
+ if (0) vex_printf("insn: 0x%x\n", theInstr);
+
+ DIP("\t0x%llx: ", (ULong)guest_CIA_curr_instr);
+
+ /* We may be asked to update the guest CIA before going further. */
+ if (put_IP)
+ putGST( PPC_GST_CIA, mkSzImm(ty, guest_CIA_curr_instr) );
+
+ /* Spot "Special" instructions (see comment at top of file). */
+ {
+ UChar* code = (UChar*)(guest_code + delta);
+ /* Spot the 16-byte preamble:
+ 32-bit mode:
+ 54001800 rlwinm 0,0,3,0,0
+ 54006800 rlwinm 0,0,13,0,0
+ 5400E800 rlwinm 0,0,29,0,0
+ 54009800 rlwinm 0,0,19,0,0
+ 64-bit mode:
+ 78001800 rotldi 0,0,3
+ 78006800 rotldi 0,0,13
+ 7800E802 rotldi 0,0,61
+ 78009802 rotldi 0,0,51
+ */
+ UInt word1 = mode64 ? 0x78001800 : 0x54001800;
+ UInt word2 = mode64 ? 0x78006800 : 0x54006800;
+ UInt word3 = mode64 ? 0x7800E802 : 0x5400E800;
+ UInt word4 = mode64 ? 0x78009802 : 0x54009800;
+ if (getUIntBigendianly(code+ 0) == word1 &&
+ getUIntBigendianly(code+ 4) == word2 &&
+ getUIntBigendianly(code+ 8) == word3 &&
+ getUIntBigendianly(code+12) == word4) {
+ /* Got a "Special" instruction preamble. Which one is it? */
+ if (getUIntBigendianly(code+16) == 0x7C210B78 /* or 1,1,1 */) {
+ /* %R3 = client_request ( %R4 ) */
+ DIP("r3 = client_request ( %%r4 )\n");
+ delta += 20;
+ irsb->next = mkSzImm( ty, guest_CIA_bbstart + delta );
+ irsb->jumpkind = Ijk_ClientReq;
+ dres.whatNext = Dis_StopHere;
+ goto decode_success;
+ }
+ else
+ if (getUIntBigendianly(code+16) == 0x7C421378 /* or 2,2,2 */) {
+ /* %R3 = guest_NRADDR */
+ DIP("r3 = guest_NRADDR\n");
+ delta += 20;
+ dres.len = 20;
+ putIReg(3, IRExpr_Get( OFFB_NRADDR, ty ));
+ goto decode_success;
+ }
+ else
+ if (getUIntBigendianly(code+16) == 0x7C631B78 /* or 3,3,3 */) {
+ /* branch-and-link-to-noredir %R11 */
+ DIP("branch-and-link-to-noredir r11\n");
+ delta += 20;
+ putGST( PPC_GST_LR, mkSzImm(ty, guest_CIA_bbstart + (Long)delta) );
+ irsb->next = getIReg(11);
+ irsb->jumpkind = Ijk_NoRedir;
+ dres.whatNext = Dis_StopHere;
+ goto decode_success;
+ }
+ else
+ if (getUIntBigendianly(code+16) == 0x7C842378 /* or 4,4,4 */) {
+ /* %R3 = guest_NRADDR_GPR2 */
+ DIP("r3 = guest_NRADDR_GPR2\n");
+ delta += 20;
+ dres.len = 20;
+ putIReg(3, IRExpr_Get( OFFB_NRADDR_GPR2, ty ));
+ goto decode_success;
+ }
+ /* We don't know what it is. Set opc1/opc2 so decode_failure
+ can print the insn following the Special-insn preamble. */
+ theInstr = getUIntBigendianly(code+16);
+ opc1 = ifieldOPC(theInstr);
+ opc2 = ifieldOPClo10(theInstr);
+ goto decode_failure;
+ /*NOTREACHED*/
+ }
+ }
+
+ opc1 = ifieldOPC(theInstr);
+ opc2 = ifieldOPClo10(theInstr);
+
+ // Note: all 'reserved' bits must be cleared, else invalid
+ switch (opc1) {
+
+ /* Integer Arithmetic Instructions */
+ case 0x0C: case 0x0D: case 0x0E: // addic, addic., addi
+ case 0x0F: case 0x07: case 0x08: // addis, mulli, subfic
+ if (dis_int_arith( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* Integer Compare Instructions */
+ case 0x0B: case 0x0A: // cmpi, cmpli
+ if (dis_int_cmp( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* Integer Logical Instructions */
+ case 0x1C: case 0x1D: case 0x18: // andi., andis., ori
+ case 0x19: case 0x1A: case 0x1B: // oris, xori, xoris
+ if (dis_int_logic( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* Integer Rotate Instructions */
+ case 0x14: case 0x15: case 0x17: // rlwimi, rlwinm, rlwnm
+ if (dis_int_rot( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* 64bit Integer Rotate Instructions */
+ case 0x1E: // rldcl, rldcr, rldic, rldicl, rldicr, rldimi
+ if (dis_int_rot( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* Integer Load Instructions */
+ case 0x22: case 0x23: case 0x2A: // lbz, lbzu, lha
+ case 0x2B: case 0x28: case 0x29: // lhau, lhz, lhzu
+ case 0x20: case 0x21: // lwz, lwzu
+ if (dis_int_load( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* Integer Store Instructions */
+ case 0x26: case 0x27: case 0x2C: // stb, stbu, sth
+ case 0x2D: case 0x24: case 0x25: // sthu, stw, stwu
+ if (dis_int_store( theInstr, abiinfo )) goto decode_success;
+ goto decode_failure;
+
+ /* Integer Load and Store Multiple Instructions */
+ case 0x2E: case 0x2F: // lmw, stmw
+ if (dis_int_ldst_mult( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* Branch Instructions */
+ case 0x12: case 0x10: // b, bc
+ if (dis_branch(theInstr, abiinfo, &dres,
+ resteerOkFn, callback_opaque))
+ goto decode_success;
+ goto decode_failure;
+
+ /* System Linkage Instructions */
+ case 0x11: // sc
+ if (dis_syslink(theInstr, abiinfo, &dres)) goto decode_success;
+ goto decode_failure;
+
+ /* Trap Instructions */
+ case 0x02: case 0x03: // tdi, twi
+ if (dis_trapi(theInstr, &dres)) goto decode_success;
+ goto decode_failure;
+
+ /* Floating Point Load Instructions */
+ case 0x30: case 0x31: case 0x32: // lfs, lfsu, lfd
+ case 0x33: // lfdu
+ if (!allow_F) goto decode_noF;
+ if (dis_fp_load( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* Floating Point Store Instructions */
+ case 0x34: case 0x35: case 0x36: // stfsx, stfsux, stfdx
+ case 0x37: // stfdux
+ if (!allow_F) goto decode_noF;
+ if (dis_fp_store( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* Floating Point Load Double Pair Instructions */
+ case 0x39: case 0x3D:
+ if (!allow_F) goto decode_noF;
+ if (dis_fp_pair( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* 64bit Integer Loads */
+ case 0x3A: // ld, ldu, lwa
+ if (!mode64) goto decode_failure;
+ if (dis_int_load( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ case 0x3B:
+ if (!allow_F) goto decode_noF;
+ opc2 = IFIELD(theInstr, 1, 5);
+ switch (opc2) {
+ /* Floating Point Arith Instructions */
+ case 0x12: case 0x14: case 0x15: // fdivs, fsubs, fadds
+ case 0x19: // fmuls
+ if (dis_fp_arith(theInstr)) goto decode_success;
+ goto decode_failure;
+ case 0x16: // fsqrts
+ if (!allow_FX) goto decode_noFX;
+ if (dis_fp_arith(theInstr)) goto decode_success;
+ goto decode_failure;
+ case 0x18: // fres
+ if (!allow_GX) goto decode_noGX;
+ if (dis_fp_arith(theInstr)) goto decode_success;
+ goto decode_failure;
+
+ /* Floating Point Mult-Add Instructions */
+ case 0x1C: case 0x1D: case 0x1E: // fmsubs, fmadds, fnmsubs
+ case 0x1F: // fnmadds
+ if (dis_fp_multadd(theInstr)) goto decode_success;
+ goto decode_failure;
+
+ case 0x1A: // frsqrtes
+ if (!allow_GX) goto decode_noGX;
+ if (dis_fp_arith(theInstr)) goto decode_success;
+ goto decode_failure;
+
+ default:
+ goto decode_failure;
+ }
+ break;
+
+ /* 64bit Integer Stores */
+ case 0x3E: // std, stdu
+ if (!mode64) goto decode_failure;
+ if (dis_int_store( theInstr, abiinfo )) goto decode_success;
+ goto decode_failure;
+
+ case 0x3F:
+ if (!allow_F) goto decode_noF;
+ /* Instrs using opc[1:5] never overlap instrs using opc[1:10],
+ so we can simply fall through the first switch statement */
+
+ opc2 = IFIELD(theInstr, 1, 5);
+ switch (opc2) {
+ /* Floating Point Arith Instructions */
+ case 0x12: case 0x14: case 0x15: // fdiv, fsub, fadd
+ case 0x19: // fmul
+ if (dis_fp_arith(theInstr)) goto decode_success;
+ goto decode_failure;
+ case 0x16: // fsqrt
+ if (!allow_FX) goto decode_noFX;
+ if (dis_fp_arith(theInstr)) goto decode_success;
+ goto decode_failure;
+ case 0x17: case 0x1A: // fsel, frsqrte
+ if (!allow_GX) goto decode_noGX;
+ if (dis_fp_arith(theInstr)) goto decode_success;
+ goto decode_failure;
+
+ /* Floating Point Mult-Add Instructions */
+ case 0x1C: case 0x1D: case 0x1E: // fmsub, fmadd, fnmsub
+ case 0x1F: // fnmadd
+ if (dis_fp_multadd(theInstr)) goto decode_success;
+ goto decode_failure;
+
+ case 0x18: // fre
+ if (!allow_GX) goto decode_noGX;
+ if (dis_fp_arith(theInstr)) goto decode_success;
+ goto decode_failure;
+
+ default:
+ break; // Fall through
+ }
+
+ opc2 = IFIELD(theInstr, 1, 10);
+ switch (opc2) {
+ /* Floating Point Compare Instructions */
+ case 0x000: // fcmpu
+ case 0x020: // fcmpo
+ if (dis_fp_cmp(theInstr)) goto decode_success;
+ goto decode_failure;
+
+ /* Floating Point Rounding/Conversion Instructions */
+ case 0x00C: // frsp
+ case 0x00E: // fctiw
+ case 0x00F: // fctiwz
+ case 0x32E: // fctid
+ case 0x32F: // fctidz
+ case 0x34E: // fcfid
+ if (dis_fp_round(theInstr)) goto decode_success;
+ goto decode_failure;
+
+ /* Power6 rounding stuff */
+ case 0x1E8: // frim
+ case 0x1C8: // frip
+ case 0x188: // frin
+ case 0x1A8: // friz
+ /* A hack to check for P6 capability . . . */
+ if ((allow_F && allow_V && allow_FX && allow_GX) &&
+ (dis_fp_round(theInstr)))
+ goto decode_success;
+ goto decode_failure;
+
+ /* Floating Point Move Instructions */
+ case 0x008: // fcpsgn
+ case 0x028: // fneg
+ case 0x048: // fmr
+ case 0x088: // fnabs
+ case 0x108: // fabs
+ if (dis_fp_move( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* Floating Point Status/Control Register Instructions */
+ case 0x026: // mtfsb1
+ case 0x040: // mcrfs
+ case 0x046: // mtfsb0
+ case 0x086: // mtfsfi
+ case 0x247: // mffs
+ case 0x2C7: // mtfsf
+ if (dis_fp_scr( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ default:
+ goto decode_failure;
+ }
+ break;
+
+ case 0x13:
+ switch (opc2) {
+
+ /* Condition Register Logical Instructions */
+ case 0x101: case 0x081: case 0x121: // crand, crandc, creqv
+ case 0x0E1: case 0x021: case 0x1C1: // crnand, crnor, cror
+ case 0x1A1: case 0x0C1: case 0x000: // crorc, crxor, mcrf
+ if (dis_cond_logic( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* Branch Instructions */
+ case 0x210: case 0x010: // bcctr, bclr
+ if (dis_branch(theInstr, abiinfo, &dres,
+ resteerOkFn, callback_opaque))
+ goto decode_success;
+ goto decode_failure;
+
+ /* Memory Synchronization Instructions */
+ case 0x096: // isync
+ if (dis_memsync( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ default:
+ goto decode_failure;
+ }
+ break;
+
+
+ case 0x1F:
+
+ /* For arith instns, bit10 is the OE flag (overflow enable) */
+
+ opc2 = IFIELD(theInstr, 1, 9);
+ switch (opc2) {
+ /* Integer Arithmetic Instructions */
+ case 0x10A: case 0x00A: case 0x08A: // add, addc, adde
+ case 0x0EA: case 0x0CA: case 0x1EB: // addme, addze, divw
+ case 0x1CB: case 0x04B: case 0x00B: // divwu, mulhw, mulhwu
+ case 0x0EB: case 0x068: case 0x028: // mullw, neg, subf
+ case 0x008: case 0x088: case 0x0E8: // subfc, subfe, subfme
+ case 0x0C8: // subfze
+ if (dis_int_arith( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* 64bit Integer Arithmetic */
+ case 0x009: case 0x049: case 0x0E9: // mulhdu, mulhd, mulld
+ case 0x1C9: case 0x1E9: // divdu, divd
+ if (!mode64) goto decode_failure;
+ if (dis_int_arith( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ case 0x1FC: // cmpb
+ if (dis_int_logic( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ default:
+ break; // Fall through...
+ }
+
+ /* All remaining opcodes use full 10 bits. */
+
+ opc2 = IFIELD(theInstr, 1, 10);
+ switch (opc2) {
+ /* Integer Compare Instructions */
+ case 0x000: case 0x020: // cmp, cmpl
+ if (dis_int_cmp( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* Integer Logical Instructions */
+ case 0x01C: case 0x03C: case 0x01A: // and, andc, cntlzw
+ case 0x11C: case 0x3BA: case 0x39A: // eqv, extsb, extsh
+ case 0x1DC: case 0x07C: case 0x1BC: // nand, nor, or
+ case 0x19C: case 0x13C: // orc, xor
+ case 0x2DF: case 0x25F: // mftgpr, mffgpr
+ if (dis_int_logic( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* 64bit Integer Logical Instructions */
+ case 0x3DA: case 0x03A: // extsw, cntlzd
+ if (!mode64) goto decode_failure;
+ if (dis_int_logic( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* 64bit Integer Parity Instructions */
+ case 0xba: case 0x9a: // prtyd, prtyw
+ if (dis_int_parity( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* Integer Shift Instructions */
+ case 0x018: case 0x318: case 0x338: // slw, sraw, srawi
+ case 0x218: // srw
+ if (dis_int_shift( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* 64bit Integer Shift Instructions */
+ case 0x01B: case 0x31A: // sld, srad
+ case 0x33A: case 0x33B: // sradi
+ case 0x21B: // srd
+ if (!mode64) goto decode_failure;
+ if (dis_int_shift( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* Integer Load Instructions */
+ case 0x057: case 0x077: case 0x157: // lbzx, lbzux, lhax
+ case 0x177: case 0x117: case 0x137: // lhaux, lhzx, lhzux
+ case 0x017: case 0x037: // lwzx, lwzux
+ if (dis_int_load( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* 64bit Integer Load Instructions */
+ case 0x035: case 0x015: // ldux, ldx
+ case 0x175: case 0x155: // lwaux, lwax
+ if (!mode64) goto decode_failure;
+ if (dis_int_load( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* Integer Store Instructions */
+ case 0x0F7: case 0x0D7: case 0x1B7: // stbux, stbx, sthux
+ case 0x197: case 0x0B7: case 0x097: // sthx, stwux, stwx
+ if (dis_int_store( theInstr, abiinfo )) goto decode_success;
+ goto decode_failure;
+
+ /* 64bit Integer Store Instructions */
+ case 0x0B5: case 0x095: // stdux, stdx
+ if (!mode64) goto decode_failure;
+ if (dis_int_store( theInstr, abiinfo )) goto decode_success;
+ goto decode_failure;
+
+ /* Integer Load and Store with Byte Reverse Instructions */
+ case 0x316: case 0x216: case 0x396: // lhbrx, lwbrx, sthbrx
+ case 0x296: // stwbrx
+ if (dis_int_ldst_rev( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* Integer Load and Store String Instructions */
+ case 0x255: case 0x215: case 0x2D5: // lswi, lswx, stswi
+ case 0x295: { // stswx
+ Bool stopHere = False;
+ Bool ok = dis_int_ldst_str( theInstr, &stopHere );
+ if (!ok) goto decode_failure;
+ if (stopHere) {
+ irsb->next = mkSzImm(ty, nextInsnAddr());
+ irsb->jumpkind = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
+ }
+ goto decode_success;
+ }
+
+ /* Memory Synchronization Instructions */
+ case 0x356: case 0x014: case 0x096: // eieio, lwarx, stwcx.
+ case 0x256: // sync
+ if (dis_memsync( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* 64bit Memory Synchronization Instructions */
+ case 0x054: case 0x0D6: // ldarx, stdcx.
+ if (!mode64) goto decode_failure;
+ if (dis_memsync( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* Processor Control Instructions */
+ case 0x200: case 0x013: case 0x153: // mcrxr, mfcr, mfspr
+ case 0x173: case 0x090: case 0x1D3: // mftb, mtcrf, mtspr
+ if (dis_proc_ctl( abiinfo, theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* Cache Management Instructions */
+ case 0x2F6: case 0x056: case 0x036: // dcba, dcbf, dcbst
+ case 0x116: case 0x0F6: case 0x3F6: // dcbt, dcbtst, dcbz
+ case 0x3D6: // icbi
+ if (dis_cache_manage( theInstr, &dres, archinfo ))
+ goto decode_success;
+ goto decode_failure;
+
+//zz /* External Control Instructions */
+//zz case 0x136: case 0x1B6: // eciwx, ecowx
+//zz DIP("external control op => not implemented\n");
+//zz goto decode_failure;
+
+ /* Trap Instructions */
+ case 0x004: case 0x044: // tw, td
+ if (dis_trap(theInstr, &dres)) goto decode_success;
+ goto decode_failure;
+
+ /* Floating Point Load Instructions */
+ case 0x217: case 0x237: case 0x257: // lfsx, lfsux, lfdx
+ case 0x277: // lfdux
+ if (!allow_F) goto decode_noF;
+ if (dis_fp_load( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* Floating Point Store Instructions */
+ case 0x297: case 0x2B7: case 0x2D7: // stfs, stfsu, stfd
+ case 0x2F7: // stfdu, stfiwx
+ if (!allow_F) goto decode_noF;
+ if (dis_fp_store( theInstr )) goto decode_success;
+ goto decode_failure;
+ case 0x3D7: // stfiwx
+ if (!allow_F) goto decode_noF;
+ if (!allow_GX) goto decode_noGX;
+ if (dis_fp_store( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* Floating Point Double Pair Indexed Instructions */
+ case 0x317: // lfdpx (Power6)
+ case 0x397: // stfdpx (Power6)
+ if (!allow_F) goto decode_noF;
+ if (dis_fp_pair(theInstr)) goto decode_success;
+ goto decode_failure;
+
+ case 0x357: // lfiwax
+ if (!allow_F) goto decode_noF;
+ if (dis_fp_load( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* AltiVec instructions */
+
+ /* AV Cache Control - Data streams */
+ case 0x156: case 0x176: case 0x336: // dst, dstst, dss
+ if (!allow_V) goto decode_noV;
+ if (dis_av_datastream( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* AV Load */
+ case 0x006: case 0x026: // lvsl, lvsr
+ case 0x007: case 0x027: case 0x047: // lvebx, lvehx, lvewx
+ case 0x067: case 0x167: // lvx, lvxl
+ if (!allow_V) goto decode_noV;
+ if (dis_av_load( abiinfo, theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* AV Store */
+ case 0x087: case 0x0A7: case 0x0C7: // stvebx, stvehx, stvewx
+ case 0x0E7: case 0x1E7: // stvx, stvxl
+ if (!allow_V) goto decode_noV;
+ if (dis_av_store( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ default:
+ /* Deal with some other cases that we would otherwise have
+ punted on. */
+ /* --- ISEL (PowerISA_V2.05.pdf, p74) --- */
+ /* only decode this insn when reserved bit 0 (31 in IBM's
+ notation) is zero */
+ if (IFIELD(theInstr, 0, 6) == (15<<1)) {
+ UInt rT = ifieldRegDS( theInstr );
+ UInt rA = ifieldRegA( theInstr );
+ UInt rB = ifieldRegB( theInstr );
+ UInt bi = ifieldRegC( theInstr );
+ putIReg(
+ rT,
+ IRExpr_Mux0X( unop(Iop_32to8,getCRbit( bi )),
+ getIReg(rB),
+ rA == 0 ? (mode64 ? mkU64(0) : mkU32(0))
+ : getIReg(rA) )
+ );
+ DIP("isel r%u,r%u,r%u,crb%u\n", rT,rA,rB,bi);
+ goto decode_success;
+ }
+ goto decode_failure;
+ }
+ break;
+
+
+ case 0x04:
+ /* AltiVec instructions */
+
+ opc2 = IFIELD(theInstr, 0, 6);
+ switch (opc2) {
+ /* AV Mult-Add, Mult-Sum */
+ case 0x20: case 0x21: case 0x22: // vmhaddshs, vmhraddshs, vmladduhm
+ case 0x24: case 0x25: case 0x26: // vmsumubm, vmsummbm, vmsumuhm
+ case 0x27: case 0x28: case 0x29: // vmsumuhs, vmsumshm, vmsumshs
+ if (!allow_V) goto decode_noV;
+ if (dis_av_multarith( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* AV Permutations */
+ case 0x2A: // vsel
+ case 0x2B: // vperm
+ case 0x2C: // vsldoi
+ if (!allow_V) goto decode_noV;
+ if (dis_av_permute( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* AV Floating Point Mult-Add/Sub */
+ case 0x2E: case 0x2F: // vmaddfp, vnmsubfp
+ if (!allow_V) goto decode_noV;
+ if (dis_av_fp_arith( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ default:
+ break; // Fall through...
+ }
+
+ opc2 = IFIELD(theInstr, 0, 11);
+ switch (opc2) {
+ /* AV Arithmetic */
+ case 0x180: // vaddcuw
+ case 0x000: case 0x040: case 0x080: // vaddubm, vadduhm, vadduwm
+ case 0x200: case 0x240: case 0x280: // vaddubs, vadduhs, vadduws
+ case 0x300: case 0x340: case 0x380: // vaddsbs, vaddshs, vaddsws
+ case 0x580: // vsubcuw
+ case 0x400: case 0x440: case 0x480: // vsububm, vsubuhm, vsubuwm
+ case 0x600: case 0x640: case 0x680: // vsububs, vsubuhs, vsubuws
+ case 0x700: case 0x740: case 0x780: // vsubsbs, vsubshs, vsubsws
+ case 0x402: case 0x442: case 0x482: // vavgub, vavguh, vavguw
+ case 0x502: case 0x542: case 0x582: // vavgsb, vavgsh, vavgsw
+ case 0x002: case 0x042: case 0x082: // vmaxub, vmaxuh, vmaxuw
+ case 0x102: case 0x142: case 0x182: // vmaxsb, vmaxsh, vmaxsw
+ case 0x202: case 0x242: case 0x282: // vminub, vminuh, vminuw
+ case 0x302: case 0x342: case 0x382: // vminsb, vminsh, vminsw
+ case 0x008: case 0x048: // vmuloub, vmulouh
+ case 0x108: case 0x148: // vmulosb, vmulosh
+ case 0x208: case 0x248: // vmuleub, vmuleuh
+ case 0x308: case 0x348: // vmulesb, vmulesh
+ case 0x608: case 0x708: case 0x648: // vsum4ubs, vsum4sbs, vsum4shs
+ case 0x688: case 0x788: // vsum2sws, vsumsws
+ if (!allow_V) goto decode_noV;
+ if (dis_av_arith( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* AV Rotate, Shift */
+ case 0x004: case 0x044: case 0x084: // vrlb, vrlh, vrlw
+ case 0x104: case 0x144: case 0x184: // vslb, vslh, vslw
+ case 0x204: case 0x244: case 0x284: // vsrb, vsrh, vsrw
+ case 0x304: case 0x344: case 0x384: // vsrab, vsrah, vsraw
+ case 0x1C4: case 0x2C4: // vsl, vsr
+ case 0x40C: case 0x44C: // vslo, vsro
+ if (!allow_V) goto decode_noV;
+ if (dis_av_shift( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* AV Logic */
+ case 0x404: case 0x444: case 0x484: // vand, vandc, vor
+ case 0x4C4: case 0x504: // vxor, vnor
+ if (!allow_V) goto decode_noV;
+ if (dis_av_logic( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* AV Processor Control */
+ case 0x604: case 0x644: // mfvscr, mtvscr
+ if (!allow_V) goto decode_noV;
+ if (dis_av_procctl( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* AV Floating Point Arithmetic */
+ case 0x00A: case 0x04A: // vaddfp, vsubfp
+ case 0x10A: case 0x14A: case 0x18A: // vrefp, vrsqrtefp, vexptefp
+ case 0x1CA: // vlogefp
+ case 0x40A: case 0x44A: // vmaxfp, vminfp
+ if (!allow_V) goto decode_noV;
+ if (dis_av_fp_arith( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* AV Floating Point Round/Convert */
+ case 0x20A: case 0x24A: case 0x28A: // vrfin, vrfiz, vrfip
+ case 0x2CA: // vrfim
+ case 0x30A: case 0x34A: case 0x38A: // vcfux, vcfsx, vctuxs
+ case 0x3CA: // vctsxs
+ if (!allow_V) goto decode_noV;
+ if (dis_av_fp_convert( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* AV Merge, Splat */
+ case 0x00C: case 0x04C: case 0x08C: // vmrghb, vmrghh, vmrghw
+ case 0x10C: case 0x14C: case 0x18C: // vmrglb, vmrglh, vmrglw
+ case 0x20C: case 0x24C: case 0x28C: // vspltb, vsplth, vspltw
+ case 0x30C: case 0x34C: case 0x38C: // vspltisb, vspltish, vspltisw
+ if (!allow_V) goto decode_noV;
+ if (dis_av_permute( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* AV Pack, Unpack */
+ case 0x00E: case 0x04E: case 0x08E: // vpkuhum, vpkuwum, vpkuhus
+ case 0x0CE: // vpkuwus
+ case 0x10E: case 0x14E: case 0x18E: // vpkshus, vpkswus, vpkshss
+ case 0x1CE: // vpkswss
+ case 0x20E: case 0x24E: case 0x28E: // vupkhsb, vupkhsh, vupklsb
+ case 0x2CE: // vupklsh
+ case 0x30E: case 0x34E: case 0x3CE: // vpkpx, vupkhpx, vupklpx
+ if (!allow_V) goto decode_noV;
+ if (dis_av_pack( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ default:
+ break; // Fall through...
+ }
+
+ opc2 = IFIELD(theInstr, 0, 10);
+ switch (opc2) {
+
+ /* AV Compare */
+ case 0x006: case 0x046: case 0x086: // vcmpequb, vcmpequh, vcmpequw
+ case 0x206: case 0x246: case 0x286: // vcmpgtub, vcmpgtuh, vcmpgtuw
+ case 0x306: case 0x346: case 0x386: // vcmpgtsb, vcmpgtsh, vcmpgtsw
+ if (!allow_V) goto decode_noV;
+ if (dis_av_cmp( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ /* AV Floating Point Compare */
+ case 0x0C6: case 0x1C6: case 0x2C6: // vcmpeqfp, vcmpgefp, vcmpgtfp
+ case 0x3C6: // vcmpbfp
+ if (!allow_V) goto decode_noV;
+ if (dis_av_fp_cmp( theInstr )) goto decode_success;
+ goto decode_failure;
+
+ default:
+ goto decode_failure;
+ }
+ break;
+
+ default:
+ goto decode_failure;
+
+ decode_noF:
+ vassert(!allow_F);
+ vex_printf("disInstr(ppc): declined to decode an FP insn.\n");
+ goto decode_failure;
+ decode_noV:
+ vassert(!allow_V);
+ vex_printf("disInstr(ppc): declined to decode an AltiVec insn.\n");
+ goto decode_failure;
+ decode_noFX:
+ vassert(!allow_FX);
+ vex_printf("disInstr(ppc): "
+ "declined to decode a GeneralPurpose-Optional insn.\n");
+ goto decode_failure;
+ decode_noGX:
+ vassert(!allow_GX);
+ vex_printf("disInstr(ppc): "
+ "declined to decode a Graphics-Optional insn.\n");
+ goto decode_failure;
+
+ decode_failure:
+ /* All decode failures end up here. */
+ opc2 = (theInstr) & 0x7FF;
+ vex_printf("disInstr(ppc): unhandled instruction: "
+ "0x%x\n", theInstr);
+ vex_printf(" primary %d(0x%x), secondary %u(0x%x)\n",
+ opc1, opc1, opc2, opc2);
+
+ /* Tell the dispatcher that this insn cannot be decoded, and so has
+ not been executed, and (is currently) the next to be executed.
+ CIA should be up-to-date since it made so at the start of each
+ insn, but nevertheless be paranoid and update it again right
+ now. */
+ putGST( PPC_GST_CIA, mkSzImm(ty, guest_CIA_curr_instr) );
+ irsb->next = mkSzImm(ty, guest_CIA_curr_instr);
+ irsb->jumpkind = Ijk_NoDecode;
+ dres.whatNext = Dis_StopHere;
+ dres.len = 0;
+ return dres;
+
+ } /* switch (opc) for the main (primary) opcode switch. */
+
+ decode_success:
+ /* All decode successes end up here. */
+ DIP("\n");
+
+ if (dres.len == 0) {
+ dres.len = 4;
+ } else {
+ vassert(dres.len == 20);
+ }
+ return dres;
+}
+
+#undef DIP
+#undef DIS
+
+
+/*------------------------------------------------------------*/
+/*--- Top-level fn ---*/
+/*------------------------------------------------------------*/
+
+/* Disassemble a single instruction into IR. The instruction
+ is located in host memory at &guest_code[delta]. */
+
+DisResult disInstr_PPC ( IRSB* irsb_IN,
+ Bool put_IP,
+ Bool (*resteerOkFn) ( void*, Addr64 ),
+ Bool resteerCisOk,
+ void* callback_opaque,
+ UChar* guest_code_IN,
+ Long delta,
+ Addr64 guest_IP,
+ VexArch guest_arch,
+ VexArchInfo* archinfo,
+ VexAbiInfo* abiinfo,
+ Bool host_bigendian_IN )
+{
+ IRType ty;
+ DisResult dres;
+ UInt mask32, mask64;
+ UInt hwcaps_guest = archinfo->hwcaps;
+
+ vassert(guest_arch == VexArchPPC32 || guest_arch == VexArchPPC64);
+
+ /* global -- ick */
+ mode64 = guest_arch == VexArchPPC64;
+ ty = mode64 ? Ity_I64 : Ity_I32;
+
+ /* do some sanity checks */
+ mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
+ | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX;
+
+ mask64 = VEX_HWCAPS_PPC64_V
+ | VEX_HWCAPS_PPC64_FX | VEX_HWCAPS_PPC64_GX;
+
+ if (mode64) {
+ vassert((hwcaps_guest & mask32) == 0);
+ } else {
+ vassert((hwcaps_guest & mask64) == 0);
+ }
+
+ /* Set globals (see top of this file) */
+ guest_code = guest_code_IN;
+ irsb = irsb_IN;
+ host_is_bigendian = host_bigendian_IN;
+
+ guest_CIA_curr_instr = mkSzAddr(ty, guest_IP);
+ guest_CIA_bbstart = mkSzAddr(ty, guest_IP - delta);
+
+ dres = disInstr_PPC_WRK ( put_IP,
+ resteerOkFn, resteerCisOk, callback_opaque,
+ delta, archinfo, abiinfo );
+
+ return dres;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Unused stuff ---*/
+/*------------------------------------------------------------*/
+
+///* A potentially more memcheck-friendly implementation of Clz32, with
+// the boundary case Clz32(0) = 32, which is what ppc requires. */
+//
+//static IRExpr* /* :: Ity_I32 */ verbose_Clz32 ( IRTemp arg )
+//{
+// /* Welcome ... to SSA R Us. */
+// IRTemp n1 = newTemp(Ity_I32);
+// IRTemp n2 = newTemp(Ity_I32);
+// IRTemp n3 = newTemp(Ity_I32);
+// IRTemp n4 = newTemp(Ity_I32);
+// IRTemp n5 = newTemp(Ity_I32);
+// IRTemp n6 = newTemp(Ity_I32);
+// IRTemp n7 = newTemp(Ity_I32);
+// IRTemp n8 = newTemp(Ity_I32);
+// IRTemp n9 = newTemp(Ity_I32);
+// IRTemp n10 = newTemp(Ity_I32);
+// IRTemp n11 = newTemp(Ity_I32);
+// IRTemp n12 = newTemp(Ity_I32);
+//
+// /* First, propagate the most significant 1-bit into all lower
+// positions in the word. */
+// /* unsigned int clz ( unsigned int n )
+// {
+// n |= (n >> 1);
+// n |= (n >> 2);
+// n |= (n >> 4);
+// n |= (n >> 8);
+// n |= (n >> 16);
+// return bitcount(~n);
+// }
+// */
+// assign(n1, mkexpr(arg));
+// assign(n2, binop(Iop_Or32, mkexpr(n1), binop(Iop_Shr32, mkexpr(n1), mkU8(1))));
+// assign(n3, binop(Iop_Or32, mkexpr(n2), binop(Iop_Shr32, mkexpr(n2), mkU8(2))));
+// assign(n4, binop(Iop_Or32, mkexpr(n3), binop(Iop_Shr32, mkexpr(n3), mkU8(4))));
+// assign(n5, binop(Iop_Or32, mkexpr(n4), binop(Iop_Shr32, mkexpr(n4), mkU8(8))));
+// assign(n6, binop(Iop_Or32, mkexpr(n5), binop(Iop_Shr32, mkexpr(n5), mkU8(16))));
+// /* This gives a word of the form 0---01---1. Now invert it, giving
+// a word of the form 1---10---0, then do a population-count idiom
+// (to count the 1s, which is the number of leading zeroes, or 32
+// if the original word was 0. */
+// assign(n7, unop(Iop_Not32, mkexpr(n6)));
+//
+// /* unsigned int bitcount ( unsigned int n )
+// {
+// n = n - ((n >> 1) & 0x55555555);
+// n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
+// n = (n + (n >> 4)) & 0x0F0F0F0F;
+// n = n + (n >> 8);
+// n = (n + (n >> 16)) & 0x3F;
+// return n;
+// }
+// */
+// assign(n8,
+// binop(Iop_Sub32,
+// mkexpr(n7),
+// binop(Iop_And32,
+// binop(Iop_Shr32, mkexpr(n7), mkU8(1)),
+// mkU32(0x55555555))));
+// assign(n9,
+// binop(Iop_Add32,
+// binop(Iop_And32, mkexpr(n8), mkU32(0x33333333)),
+// binop(Iop_And32,
+// binop(Iop_Shr32, mkexpr(n8), mkU8(2)),
+// mkU32(0x33333333))));
+// assign(n10,
+// binop(Iop_And32,
+// binop(Iop_Add32,
+// mkexpr(n9),
+// binop(Iop_Shr32, mkexpr(n9), mkU8(4))),
+// mkU32(0x0F0F0F0F)));
+// assign(n11,
+// binop(Iop_Add32,
+// mkexpr(n10),
+// binop(Iop_Shr32, mkexpr(n10), mkU8(8))));
+// assign(n12,
+// binop(Iop_Add32,
+// mkexpr(n11),
+// binop(Iop_Shr32, mkexpr(n11), mkU8(16))));
+// return
+// binop(Iop_And32, mkexpr(n12), mkU32(0x3F));
+//}
+
+/*--------------------------------------------------------------------*/
+/*--- end guest_ppc_toIR.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/VEX/priv/guest_x86_defs.h b/VEX/priv/guest_x86_defs.h
new file mode 100644
index 0000000..09d647a
--- /dev/null
+++ b/VEX/priv/guest_x86_defs.h
@@ -0,0 +1,412 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin guest_x86_defs.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+/* Only to be used within the guest-x86 directory. */
+
+#ifndef __VEX_GUEST_X86_DEFS_H
+#define __VEX_GUEST_X86_DEFS_H
+
+
+/*---------------------------------------------------------*/
+/*--- x86 to IR conversion ---*/
+/*---------------------------------------------------------*/
+
+/* Convert one x86 insn to IR. See the type DisOneInstrFn in
+ bb_to_IR.h. */
+extern
+DisResult disInstr_X86 ( IRSB* irbb,
+ Bool put_IP,
+ Bool (*resteerOkFn) ( void*, Addr64 ),
+ Bool resteerCisOk,
+ void* callback_opaque,
+ UChar* guest_code,
+ Long delta,
+ Addr64 guest_IP,
+ VexArch guest_arch,
+ VexArchInfo* archinfo,
+ VexAbiInfo* abiinfo,
+ Bool host_bigendian );
+
+/* Used by the optimiser to specialise calls to helpers. */
+extern
+IRExpr* guest_x86_spechelper ( HChar* function_name,
+ IRExpr** args,
+ IRStmt** precedingStmts,
+ Int n_precedingStmts );
+
+/* Describes to the optimiser which part of the guest state require
+ precise memory exceptions. This is logically part of the guest
+ state description. */
+extern
+Bool guest_x86_state_requires_precise_mem_exns ( Int, Int );
+
+extern
+VexGuestLayout x86guest_layout;
+
+
+/*---------------------------------------------------------*/
+/*--- x86 guest helpers ---*/
+/*---------------------------------------------------------*/
+
+/* --- CLEAN HELPERS --- */
+
+extern UInt x86g_calculate_eflags_all (
+ UInt cc_op, UInt cc_dep1, UInt cc_dep2, UInt cc_ndep
+ );
+
+__attribute((regparm(3)))
+extern UInt x86g_calculate_eflags_c (
+ UInt cc_op, UInt cc_dep1, UInt cc_dep2, UInt cc_ndep
+ );
+
+extern UInt x86g_calculate_condition (
+ UInt/*X86Condcode*/ cond,
+ UInt cc_op,
+ UInt cc_dep1, UInt cc_dep2, UInt cc_ndep
+ );
+
+extern UInt x86g_calculate_FXAM ( UInt tag, ULong dbl );
+
+extern ULong x86g_calculate_RCR (
+ UInt arg, UInt rot_amt, UInt eflags_in, UInt sz
+ );
+extern ULong x86g_calculate_RCL (
+ UInt arg, UInt rot_amt, UInt eflags_in, UInt sz
+ );
+
+extern UInt x86g_calculate_daa_das_aaa_aas ( UInt AX_and_flags, UInt opcode );
+
+extern ULong x86g_check_fldcw ( UInt fpucw );
+
+extern UInt x86g_create_fpucw ( UInt fpround );
+
+extern ULong x86g_check_ldmxcsr ( UInt mxcsr );
+
+extern UInt x86g_create_mxcsr ( UInt sseround );
+
+
+/* Translate a guest virtual_addr into a guest linear address by
+ consulting the supplied LDT/GDT structures. Their representation
+ must be as specified in pub/libvex_guest_x86.h. To indicate a
+ translation failure, 1<<32 is returned. On success, the lower 32
+ bits of the returned result indicate the linear address.
+*/
+extern
+ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
+ UInt seg_selector, UInt virtual_addr );
+
+extern ULong x86g_calculate_mmx_pmaddwd ( ULong, ULong );
+extern ULong x86g_calculate_mmx_psadbw ( ULong, ULong );
+extern UInt x86g_calculate_mmx_pmovmskb ( ULong );
+extern UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo );
+
+
+/* --- DIRTY HELPERS --- */
+
+extern ULong x86g_dirtyhelper_loadF80le ( UInt );
+
+extern void x86g_dirtyhelper_storeF80le ( UInt, ULong );
+
+extern void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* );
+extern void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* );
+extern void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* );
+
+extern void x86g_dirtyhelper_FINIT ( VexGuestX86State* );
+
+extern void x86g_dirtyhelper_FXSAVE ( VexGuestX86State*, HWord );
+extern void x86g_dirtyhelper_FSAVE ( VexGuestX86State*, HWord );
+extern void x86g_dirtyhelper_FSTENV ( VexGuestX86State*, HWord );
+
+extern ULong x86g_dirtyhelper_RDTSC ( void );
+
+extern UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ );
+extern void x86g_dirtyhelper_OUT ( UInt portno, UInt data,
+ UInt sz/*1,2 or 4*/ );
+
+extern void x86g_dirtyhelper_SxDT ( void* address,
+ UInt op /* 0 or 1 */ );
+
+extern VexEmWarn
+ x86g_dirtyhelper_FXRSTOR ( VexGuestX86State*, HWord );
+
+extern VexEmWarn
+ x86g_dirtyhelper_FRSTOR ( VexGuestX86State*, HWord );
+
+extern VexEmWarn
+ x86g_dirtyhelper_FLDENV ( VexGuestX86State*, HWord );
+
+
+/*---------------------------------------------------------*/
+/*--- Condition code stuff ---*/
+/*---------------------------------------------------------*/
+
+/* eflags masks */
+#define X86G_CC_SHIFT_O 11
+#define X86G_CC_SHIFT_S 7
+#define X86G_CC_SHIFT_Z 6
+#define X86G_CC_SHIFT_A 4
+#define X86G_CC_SHIFT_C 0
+#define X86G_CC_SHIFT_P 2
+
+#define X86G_CC_MASK_O (1 << X86G_CC_SHIFT_O)
+#define X86G_CC_MASK_S (1 << X86G_CC_SHIFT_S)
+#define X86G_CC_MASK_Z (1 << X86G_CC_SHIFT_Z)
+#define X86G_CC_MASK_A (1 << X86G_CC_SHIFT_A)
+#define X86G_CC_MASK_C (1 << X86G_CC_SHIFT_C)
+#define X86G_CC_MASK_P (1 << X86G_CC_SHIFT_P)
+
+/* FPU flag masks */
+#define X86G_FC_SHIFT_C3 14
+#define X86G_FC_SHIFT_C2 10
+#define X86G_FC_SHIFT_C1 9
+#define X86G_FC_SHIFT_C0 8
+
+#define X86G_FC_MASK_C3 (1 << X86G_FC_SHIFT_C3)
+#define X86G_FC_MASK_C2 (1 << X86G_FC_SHIFT_C2)
+#define X86G_FC_MASK_C1 (1 << X86G_FC_SHIFT_C1)
+#define X86G_FC_MASK_C0 (1 << X86G_FC_SHIFT_C0)
+
+
+/* %EFLAGS thunk descriptors. A four-word thunk is used to record
+ details of the most recent flag-setting operation, so the flags can
+ be computed later if needed. It is possible to do this a little
+ more efficiently using a 3-word thunk, but that makes it impossible
+ to describe the flag data dependencies sufficiently accurately for
+ Memcheck. Hence 4 words are used, with minimal loss of efficiency.
+
+ The four words are:
+
+ CC_OP, which describes the operation.
+
+ CC_DEP1 and CC_DEP2. These are arguments to the operation.
+ We want Memcheck to believe that the resulting flags are
+ data-dependent on both CC_DEP1 and CC_DEP2, hence the
+ name DEP.
+
+ CC_NDEP. This is a 3rd argument to the operation which is
+ sometimes needed. We arrange things so that Memcheck does
+ not believe the resulting flags are data-dependent on CC_NDEP
+ ("not dependent").
+
+ To make Memcheck believe that (the definedness of) the encoded
+ flags depends only on (the definedness of) CC_DEP1 and CC_DEP2
+ requires two things:
+
+ (1) In the guest state layout info (x86guest_layout), CC_OP and
+ CC_NDEP are marked as always defined.
+
+ (2) When passing the thunk components to an evaluation function
+ (calculate_condition, calculate_eflags, calculate_eflags_c) the
+ IRCallee's mcx_mask must be set so as to exclude from
+ consideration all passed args except CC_DEP1 and CC_DEP2.
+
+ Strictly speaking only (2) is necessary for correctness. However,
+ (1) helps efficiency in that since (2) means we never ask about the
+ definedness of CC_OP or CC_NDEP, we may as well not even bother to
+ track their definedness.
+
+ When building the thunk, it is always necessary to write words into
+ CC_DEP1 and CC_DEP2, even if those args are not used given the
+ CC_OP field (eg, CC_DEP2 is not used if CC_OP is CC_LOGIC1/2/4).
+ This is important because otherwise Memcheck could give false
+ positives as it does not understand the relationship between the
+ CC_OP field and CC_DEP1 and CC_DEP2, and so believes that the
+ definedness of the stored flags always depends on both CC_DEP1 and
+ CC_DEP2.
+
+ However, it is only necessary to set CC_NDEP when the CC_OP value
+ requires it, because Memcheck ignores CC_NDEP, and the evaluation
+ functions do understand the CC_OP fields and will only examine
+ CC_NDEP for suitable values of CC_OP.
+
+ A summary of the field usages is:
+
+ Operation DEP1 DEP2 NDEP
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ add/sub/mul first arg second arg unused
+
+ adc/sbb first arg (second arg)
+ XOR old_carry old_carry
+
+ and/or/xor result zero unused
+
+ inc/dec result zero old_carry
+
+ shl/shr/sar result subshifted- unused
+ result
+
+ rol/ror result zero old_flags
+
+ copy old_flags zero unused.
+
+
+ Therefore Memcheck will believe the following:
+
+ * add/sub/mul -- definedness of result flags depends on definedness
+ of both args.
+
+ * adc/sbb -- definedness of result flags depends on definedness of
+ both args and definedness of the old C flag. Because only two
+ DEP fields are available, the old C flag is XOR'd into the second
+ arg so that Memcheck sees the data dependency on it. That means
+ the NDEP field must contain a second copy of the old C flag
+ so that the evaluation functions can correctly recover the second
+ arg.
+
+ * and/or/xor are straightforward -- definedness of result flags
+ depends on definedness of result value.
+
+ * inc/dec -- definedness of result flags depends only on
+ definedness of result. This isn't really true -- it also depends
+ on the old C flag. However, we don't want Memcheck to see that,
+ and so the old C flag must be passed in NDEP and not in DEP2.
+ It's inconceivable that a compiler would generate code that puts
+ the C flag in an undefined state, then does an inc/dec, which
+ leaves C unchanged, and then makes a conditional jump/move based
+ on C. So our fiction seems a good approximation.
+
+ * shl/shr/sar -- straightforward, again, definedness of result
+ flags depends on definedness of result value. The subshifted
+ value (value shifted one less) is also needed, but its
+ definedness is the same as the definedness of the shifted value.
+
+ * rol/ror -- these only set O and C, and leave A Z C P alone.
+ However it seems prudent (as per inc/dec) to say the definedness
+ of all resulting flags depends on the definedness of the result,
+ hence the old flags must go in as NDEP and not DEP2.
+
+ * rcl/rcr are too difficult to do in-line, and so are done by a
+ helper function. They are not part of this scheme. The helper
+ function takes the value to be rotated, the rotate amount and the
+ old flags, and returns the new flags and the rotated value.
+ Since the helper's mcx_mask does not have any set bits, Memcheck
+ will lazily propagate undefinedness from any of the 3 args into
+ both results (flags and actual value).
+*/
+enum {
+ X86G_CC_OP_COPY=0, /* DEP1 = current flags, DEP2 = 0, NDEP = unused */
+ /* just copy DEP1 to output */
+
+ X86G_CC_OP_ADDB, /* 1 */
+ X86G_CC_OP_ADDW, /* 2 DEP1 = argL, DEP2 = argR, NDEP = unused */
+ X86G_CC_OP_ADDL, /* 3 */
+
+ X86G_CC_OP_SUBB, /* 4 */
+ X86G_CC_OP_SUBW, /* 5 DEP1 = argL, DEP2 = argR, NDEP = unused */
+ X86G_CC_OP_SUBL, /* 6 */
+
+ X86G_CC_OP_ADCB, /* 7 */
+ X86G_CC_OP_ADCW, /* 8 DEP1 = argL, DEP2 = argR ^ oldCarry, NDEP = oldCarry */
+ X86G_CC_OP_ADCL, /* 9 */
+
+ X86G_CC_OP_SBBB, /* 10 */
+ X86G_CC_OP_SBBW, /* 11 DEP1 = argL, DEP2 = argR ^ oldCarry, NDEP = oldCarry */
+ X86G_CC_OP_SBBL, /* 12 */
+
+ X86G_CC_OP_LOGICB, /* 13 */
+ X86G_CC_OP_LOGICW, /* 14 DEP1 = result, DEP2 = 0, NDEP = unused */
+ X86G_CC_OP_LOGICL, /* 15 */
+
+ X86G_CC_OP_INCB, /* 16 */
+ X86G_CC_OP_INCW, /* 17 DEP1 = result, DEP2 = 0, NDEP = oldCarry (0 or 1) */
+ X86G_CC_OP_INCL, /* 18 */
+
+ X86G_CC_OP_DECB, /* 19 */
+ X86G_CC_OP_DECW, /* 20 DEP1 = result, DEP2 = 0, NDEP = oldCarry (0 or 1) */
+ X86G_CC_OP_DECL, /* 21 */
+
+ X86G_CC_OP_SHLB, /* 22 DEP1 = res, DEP2 = res', NDEP = unused */
+ X86G_CC_OP_SHLW, /* 23 where res' is like res but shifted one bit less */
+ X86G_CC_OP_SHLL, /* 24 */
+
+ X86G_CC_OP_SHRB, /* 25 DEP1 = res, DEP2 = res', NDEP = unused */
+ X86G_CC_OP_SHRW, /* 26 where res' is like res but shifted one bit less */
+ X86G_CC_OP_SHRL, /* 27 */
+
+ X86G_CC_OP_ROLB, /* 28 */
+ X86G_CC_OP_ROLW, /* 29 DEP1 = res, DEP2 = 0, NDEP = old flags */
+ X86G_CC_OP_ROLL, /* 30 */
+
+ X86G_CC_OP_RORB, /* 31 */
+ X86G_CC_OP_RORW, /* 32 DEP1 = res, DEP2 = 0, NDEP = old flags */
+ X86G_CC_OP_RORL, /* 33 */
+
+ X86G_CC_OP_UMULB, /* 34 */
+ X86G_CC_OP_UMULW, /* 35 DEP1 = argL, DEP2 = argR, NDEP = unused */
+ X86G_CC_OP_UMULL, /* 36 */
+
+ X86G_CC_OP_SMULB, /* 37 */
+ X86G_CC_OP_SMULW, /* 38 DEP1 = argL, DEP2 = argR, NDEP = unused */
+ X86G_CC_OP_SMULL, /* 39 */
+
+ X86G_CC_OP_NUMBER
+};
+
+typedef
+ enum {
+ X86CondO = 0, /* overflow */
+ X86CondNO = 1, /* no overflow */
+
+ X86CondB = 2, /* below */
+ X86CondNB = 3, /* not below */
+
+ X86CondZ = 4, /* zero */
+ X86CondNZ = 5, /* not zero */
+
+ X86CondBE = 6, /* below or equal */
+ X86CondNBE = 7, /* not below or equal */
+
+ X86CondS = 8, /* negative */
+ X86CondNS = 9, /* not negative */
+
+ X86CondP = 10, /* parity even */
+ X86CondNP = 11, /* not parity even */
+
+ X86CondL = 12, /* jump less */
+ X86CondNL = 13, /* not less */
+
+ X86CondLE = 14, /* less or equal */
+ X86CondNLE = 15, /* not less or equal */
+
+ X86CondAlways = 16 /* HACK */
+ }
+ X86Condcode;
+
+#endif /* ndef __VEX_GUEST_X86_DEFS_H */
+
+/*---------------------------------------------------------------*/
+/*--- end guest_x86_defs.h ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/guest_x86_helpers.c b/VEX/priv/guest_x86_helpers.c
new file mode 100644
index 0000000..7aa7a33
--- /dev/null
+++ b/VEX/priv/guest_x86_helpers.c
@@ -0,0 +1,2777 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin guest_x86_helpers.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex_emwarn.h"
+#include "libvex_guest_x86.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+
+#include "main_util.h"
+#include "guest_generic_bb_to_IR.h"
+#include "guest_x86_defs.h"
+#include "guest_generic_x87.h"
+
+
+/* This file contains helper functions for x86 guest code.
+ Calls to these functions are generated by the back end.
+ These calls are of course in the host machine code and
+ this file will be compiled to host machine code, so that
+ all makes sense.
+
+ Only change the signatures of these helper functions very
+ carefully. If you change the signature here, you'll have to change
+ the parameters passed to it in the IR calls constructed by
+ guest-x86/toIR.c.
+
+ The convention used is that all functions called from generated
+ code are named x86g_<something>, and any function whose name lacks
+ that prefix is not called from generated code. Note that some
+ LibVEX_* functions can however be called by VEX's client, but that
+ is not the same as calling them from VEX-generated code.
+*/
+
+
+/* Set to 1 to get detailed profiling info about use of the flag
+ machinery. */
+#define PROFILE_EFLAGS 0
+
+
+/*---------------------------------------------------------------*/
+/*--- %eflags run-time helpers. ---*/
+/*---------------------------------------------------------------*/
+
+static const UChar parity_table[256] = {
+ X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
+ 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
+ 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
+ X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
+ 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
+ X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
+ X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
+ 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
+ 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
+ X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
+ X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
+ 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
+ X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
+ 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
+ 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
+ X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
+ 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
+ X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
+ X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
+ 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
+ X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
+ 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
+ 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
+ X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
+ X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
+ 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
+ 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
+ X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
+ 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
+ X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
+ X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0,
+ 0, X86G_CC_MASK_P, X86G_CC_MASK_P, 0, X86G_CC_MASK_P, 0, 0, X86G_CC_MASK_P,
+};
+
+/* generalised left-shifter */
+inline static Int lshift ( Int x, Int n )
+{
+ if (n >= 0)
+ return x << n;
+ else
+ return x >> (-n);
+}
+
+/* identity on ULong */
+static inline ULong idULong ( ULong x )
+{
+ return x;
+}
+
+
+#define PREAMBLE(__data_bits) \
+ /* const */ UInt DATA_MASK \
+ = __data_bits==8 ? 0xFF \
+ : (__data_bits==16 ? 0xFFFF \
+ : 0xFFFFFFFF); \
+ /* const */ UInt SIGN_MASK = 1 << (__data_bits - 1); \
+ /* const */ UInt CC_DEP1 = cc_dep1_formal; \
+ /* const */ UInt CC_DEP2 = cc_dep2_formal; \
+ /* const */ UInt CC_NDEP = cc_ndep_formal; \
+ /* Four bogus assignments, which hopefully gcc can */ \
+ /* optimise away, and which stop it complaining about */ \
+ /* unused variables. */ \
+ SIGN_MASK = SIGN_MASK; \
+ DATA_MASK = DATA_MASK; \
+ CC_DEP2 = CC_DEP2; \
+ CC_NDEP = CC_NDEP;
+
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_ADD(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Int cf, pf, af, zf, sf, of; \
+ Int argL, argR, res; \
+ argL = CC_DEP1; \
+ argR = CC_DEP2; \
+ res = argL + argR; \
+ cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
+ pf = parity_table[(UChar)res]; \
+ af = (res ^ argL ^ argR) & 0x10; \
+ zf = ((DATA_UTYPE)res == 0) << 6; \
+ sf = lshift(res, 8 - DATA_BITS) & 0x80; \
+ of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
+ 12 - DATA_BITS) & X86G_CC_MASK_O; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_SUB(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Int cf, pf, af, zf, sf, of; \
+ Int argL, argR, res; \
+ argL = CC_DEP1; \
+ argR = CC_DEP2; \
+ res = argL - argR; \
+ cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
+ pf = parity_table[(UChar)res]; \
+ af = (res ^ argL ^ argR) & 0x10; \
+ zf = ((DATA_UTYPE)res == 0) << 6; \
+ sf = lshift(res, 8 - DATA_BITS) & 0x80; \
+ of = lshift((argL ^ argR) & (argL ^ res), \
+ 12 - DATA_BITS) & X86G_CC_MASK_O; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_ADC(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Int cf, pf, af, zf, sf, of; \
+ Int argL, argR, oldC, res; \
+ oldC = CC_NDEP & X86G_CC_MASK_C; \
+ argL = CC_DEP1; \
+ argR = CC_DEP2 ^ oldC; \
+ res = (argL + argR) + oldC; \
+ if (oldC) \
+ cf = (DATA_UTYPE)res <= (DATA_UTYPE)argL; \
+ else \
+ cf = (DATA_UTYPE)res < (DATA_UTYPE)argL; \
+ pf = parity_table[(UChar)res]; \
+ af = (res ^ argL ^ argR) & 0x10; \
+ zf = ((DATA_UTYPE)res == 0) << 6; \
+ sf = lshift(res, 8 - DATA_BITS) & 0x80; \
+ of = lshift((argL ^ argR ^ -1) & (argL ^ res), \
+ 12 - DATA_BITS) & X86G_CC_MASK_O; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_SBB(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Int cf, pf, af, zf, sf, of; \
+ Int argL, argR, oldC, res; \
+ oldC = CC_NDEP & X86G_CC_MASK_C; \
+ argL = CC_DEP1; \
+ argR = CC_DEP2 ^ oldC; \
+ res = (argL - argR) - oldC; \
+ if (oldC) \
+ cf = (DATA_UTYPE)argL <= (DATA_UTYPE)argR; \
+ else \
+ cf = (DATA_UTYPE)argL < (DATA_UTYPE)argR; \
+ pf = parity_table[(UChar)res]; \
+ af = (res ^ argL ^ argR) & 0x10; \
+ zf = ((DATA_UTYPE)res == 0) << 6; \
+ sf = lshift(res, 8 - DATA_BITS) & 0x80; \
+ of = lshift((argL ^ argR) & (argL ^ res), \
+ 12 - DATA_BITS) & X86G_CC_MASK_O; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_LOGIC(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Int cf, pf, af, zf, sf, of; \
+ cf = 0; \
+ pf = parity_table[(UChar)CC_DEP1]; \
+ af = 0; \
+ zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
+ sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
+ of = 0; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_INC(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Int cf, pf, af, zf, sf, of; \
+ Int argL, argR, res; \
+ res = CC_DEP1; \
+ argL = res - 1; \
+ argR = 1; \
+ cf = CC_NDEP & X86G_CC_MASK_C; \
+ pf = parity_table[(UChar)res]; \
+ af = (res ^ argL ^ argR) & 0x10; \
+ zf = ((DATA_UTYPE)res == 0) << 6; \
+ sf = lshift(res, 8 - DATA_BITS) & 0x80; \
+ of = ((res & DATA_MASK) == SIGN_MASK) << 11; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_DEC(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Int cf, pf, af, zf, sf, of; \
+ Int argL, argR, res; \
+ res = CC_DEP1; \
+ argL = res + 1; \
+ argR = 1; \
+ cf = CC_NDEP & X86G_CC_MASK_C; \
+ pf = parity_table[(UChar)res]; \
+ af = (res ^ argL ^ argR) & 0x10; \
+ zf = ((DATA_UTYPE)res == 0) << 6; \
+ sf = lshift(res, 8 - DATA_BITS) & 0x80; \
+ of = ((res & DATA_MASK) \
+ == ((UInt)SIGN_MASK - 1)) << 11; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_SHL(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Int cf, pf, af, zf, sf, of; \
+ cf = (CC_DEP2 >> (DATA_BITS - 1)) & X86G_CC_MASK_C; \
+ pf = parity_table[(UChar)CC_DEP1]; \
+ af = 0; /* undefined */ \
+ zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
+ sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
+ /* of is defined if shift count == 1 */ \
+ of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
+ & X86G_CC_MASK_O; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_SHR(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Int cf, pf, af, zf, sf, of; \
+ cf = CC_DEP2 & 1; \
+ pf = parity_table[(UChar)CC_DEP1]; \
+ af = 0; /* undefined */ \
+ zf = ((DATA_UTYPE)CC_DEP1 == 0) << 6; \
+ sf = lshift(CC_DEP1, 8 - DATA_BITS) & 0x80; \
+ /* of is defined if shift count == 1 */ \
+ of = lshift(CC_DEP2 ^ CC_DEP1, 12 - DATA_BITS) \
+ & X86G_CC_MASK_O; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+/* ROL: cf' = lsb(result). of' = msb(result) ^ lsb(result). */
+/* DEP1 = result, NDEP = old flags */
+#define ACTIONS_ROL(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Int fl \
+ = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \
+ | (X86G_CC_MASK_C & CC_DEP1) \
+ | (X86G_CC_MASK_O & (lshift(CC_DEP1, \
+ 11-(DATA_BITS-1)) \
+ ^ lshift(CC_DEP1, 11))); \
+ return fl; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+/* ROR: cf' = msb(result). of' = msb(result) ^ msb-1(result). */
+/* DEP1 = result, NDEP = old flags */
+#define ACTIONS_ROR(DATA_BITS,DATA_UTYPE) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Int fl \
+ = (CC_NDEP & ~(X86G_CC_MASK_O | X86G_CC_MASK_C)) \
+ | (X86G_CC_MASK_C & (CC_DEP1 >> (DATA_BITS-1))) \
+ | (X86G_CC_MASK_O & (lshift(CC_DEP1, \
+ 11-(DATA_BITS-1)) \
+ ^ lshift(CC_DEP1, 11-(DATA_BITS-1)+1))); \
+ return fl; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_UMUL(DATA_BITS, DATA_UTYPE, NARROWtoU, \
+ DATA_U2TYPE, NARROWto2U) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Int cf, pf, af, zf, sf, of; \
+ DATA_UTYPE hi; \
+ DATA_UTYPE lo \
+ = NARROWtoU( ((DATA_UTYPE)CC_DEP1) \
+ * ((DATA_UTYPE)CC_DEP2) ); \
+ DATA_U2TYPE rr \
+ = NARROWto2U( \
+ ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP1)) \
+ * ((DATA_U2TYPE)((DATA_UTYPE)CC_DEP2)) ); \
+ hi = NARROWtoU(rr >>/*u*/ DATA_BITS); \
+ cf = (hi != 0); \
+ pf = parity_table[(UChar)lo]; \
+ af = 0; /* undefined */ \
+ zf = (lo == 0) << 6; \
+ sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
+ of = cf << 11; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_SMUL(DATA_BITS, DATA_STYPE, NARROWtoS, \
+ DATA_S2TYPE, NARROWto2S) \
+{ \
+ PREAMBLE(DATA_BITS); \
+ { Int cf, pf, af, zf, sf, of; \
+ DATA_STYPE hi; \
+ DATA_STYPE lo \
+ = NARROWtoS( ((DATA_STYPE)CC_DEP1) \
+ * ((DATA_STYPE)CC_DEP2) ); \
+ DATA_S2TYPE rr \
+ = NARROWto2S( \
+ ((DATA_S2TYPE)((DATA_STYPE)CC_DEP1)) \
+ * ((DATA_S2TYPE)((DATA_STYPE)CC_DEP2)) ); \
+ hi = NARROWtoS(rr >>/*s*/ DATA_BITS); \
+ cf = (hi != (lo >>/*s*/ (DATA_BITS-1))); \
+ pf = parity_table[(UChar)lo]; \
+ af = 0; /* undefined */ \
+ zf = (lo == 0) << 6; \
+ sf = lshift(lo, 8 - DATA_BITS) & 0x80; \
+ of = cf << 11; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+
+#if PROFILE_EFLAGS
+
+static Bool initted = False;
+
+/* C flag, fast route */
+static UInt tabc_fast[X86G_CC_OP_NUMBER];
+/* C flag, slow route */
+static UInt tabc_slow[X86G_CC_OP_NUMBER];
+/* table for calculate_cond */
+static UInt tab_cond[X86G_CC_OP_NUMBER][16];
+/* total entry counts for calc_all, calc_c, calc_cond. */
+static UInt n_calc_all = 0;
+static UInt n_calc_c = 0;
+static UInt n_calc_cond = 0;
+
+#define SHOW_COUNTS_NOW (0 == (0x3FFFFF & (n_calc_all+n_calc_c+n_calc_cond)))
+
+
+static void showCounts ( void )
+{
+ Int op, co;
+ Char ch;
+ vex_printf("\nTotal calls: calc_all=%u calc_cond=%u calc_c=%u\n",
+ n_calc_all, n_calc_cond, n_calc_c);
+
+ vex_printf(" cSLOW cFAST O NO B NB Z NZ BE NBE"
+ " S NS P NP L NL LE NLE\n");
+ vex_printf(" -----------------------------------------------------"
+ "----------------------------------------\n");
+ for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
+
+ ch = ' ';
+ if (op > 0 && (op-1) % 3 == 0)
+ ch = 'B';
+ if (op > 0 && (op-1) % 3 == 1)
+ ch = 'W';
+ if (op > 0 && (op-1) % 3 == 2)
+ ch = 'L';
+
+ vex_printf("%2d%c: ", op, ch);
+ vex_printf("%6u ", tabc_slow[op]);
+ vex_printf("%6u ", tabc_fast[op]);
+ for (co = 0; co < 16; co++) {
+ Int n = tab_cond[op][co];
+ if (n >= 1000) {
+ vex_printf(" %3dK", n / 1000);
+ } else
+ if (n >= 0) {
+ vex_printf(" %3d ", n );
+ } else {
+ vex_printf(" ");
+ }
+ }
+ vex_printf("\n");
+ }
+ vex_printf("\n");
+}
+
+static void initCounts ( void )
+{
+ Int op, co;
+ initted = True;
+ for (op = 0; op < X86G_CC_OP_NUMBER; op++) {
+ tabc_fast[op] = tabc_slow[op] = 0;
+ for (co = 0; co < 16; co++)
+ tab_cond[op][co] = 0;
+ }
+}
+
+#endif /* PROFILE_EFLAGS */
+
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* Calculate all the 6 flags from the supplied thunk parameters.
+ Worker function, not directly called from generated code. */
+static
+UInt x86g_calculate_eflags_all_WRK ( UInt cc_op,
+ UInt cc_dep1_formal,
+ UInt cc_dep2_formal,
+ UInt cc_ndep_formal )
+{
+ switch (cc_op) {
+ case X86G_CC_OP_COPY:
+ return cc_dep1_formal
+ & (X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
+ | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P);
+
+ case X86G_CC_OP_ADDB: ACTIONS_ADD( 8, UChar );
+ case X86G_CC_OP_ADDW: ACTIONS_ADD( 16, UShort );
+ case X86G_CC_OP_ADDL: ACTIONS_ADD( 32, UInt );
+
+ case X86G_CC_OP_ADCB: ACTIONS_ADC( 8, UChar );
+ case X86G_CC_OP_ADCW: ACTIONS_ADC( 16, UShort );
+ case X86G_CC_OP_ADCL: ACTIONS_ADC( 32, UInt );
+
+ case X86G_CC_OP_SUBB: ACTIONS_SUB( 8, UChar );
+ case X86G_CC_OP_SUBW: ACTIONS_SUB( 16, UShort );
+ case X86G_CC_OP_SUBL: ACTIONS_SUB( 32, UInt );
+
+ case X86G_CC_OP_SBBB: ACTIONS_SBB( 8, UChar );
+ case X86G_CC_OP_SBBW: ACTIONS_SBB( 16, UShort );
+ case X86G_CC_OP_SBBL: ACTIONS_SBB( 32, UInt );
+
+ case X86G_CC_OP_LOGICB: ACTIONS_LOGIC( 8, UChar );
+ case X86G_CC_OP_LOGICW: ACTIONS_LOGIC( 16, UShort );
+ case X86G_CC_OP_LOGICL: ACTIONS_LOGIC( 32, UInt );
+
+ case X86G_CC_OP_INCB: ACTIONS_INC( 8, UChar );
+ case X86G_CC_OP_INCW: ACTIONS_INC( 16, UShort );
+ case X86G_CC_OP_INCL: ACTIONS_INC( 32, UInt );
+
+ case X86G_CC_OP_DECB: ACTIONS_DEC( 8, UChar );
+ case X86G_CC_OP_DECW: ACTIONS_DEC( 16, UShort );
+ case X86G_CC_OP_DECL: ACTIONS_DEC( 32, UInt );
+
+ case X86G_CC_OP_SHLB: ACTIONS_SHL( 8, UChar );
+ case X86G_CC_OP_SHLW: ACTIONS_SHL( 16, UShort );
+ case X86G_CC_OP_SHLL: ACTIONS_SHL( 32, UInt );
+
+ case X86G_CC_OP_SHRB: ACTIONS_SHR( 8, UChar );
+ case X86G_CC_OP_SHRW: ACTIONS_SHR( 16, UShort );
+ case X86G_CC_OP_SHRL: ACTIONS_SHR( 32, UInt );
+
+ case X86G_CC_OP_ROLB: ACTIONS_ROL( 8, UChar );
+ case X86G_CC_OP_ROLW: ACTIONS_ROL( 16, UShort );
+ case X86G_CC_OP_ROLL: ACTIONS_ROL( 32, UInt );
+
+ case X86G_CC_OP_RORB: ACTIONS_ROR( 8, UChar );
+ case X86G_CC_OP_RORW: ACTIONS_ROR( 16, UShort );
+ case X86G_CC_OP_RORL: ACTIONS_ROR( 32, UInt );
+
+ case X86G_CC_OP_UMULB: ACTIONS_UMUL( 8, UChar, toUChar,
+ UShort, toUShort );
+ case X86G_CC_OP_UMULW: ACTIONS_UMUL( 16, UShort, toUShort,
+ UInt, toUInt );
+ case X86G_CC_OP_UMULL: ACTIONS_UMUL( 32, UInt, toUInt,
+ ULong, idULong );
+
+ case X86G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, toUChar,
+ Short, toUShort );
+ case X86G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, toUShort,
+ Int, toUInt );
+ case X86G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, toUInt,
+ Long, idULong );
+
+ default:
+ /* shouldn't really make these calls from generated code */
+ vex_printf("x86g_calculate_eflags_all_WRK(X86)"
+ "( %u, 0x%x, 0x%x, 0x%x )\n",
+ cc_op, cc_dep1_formal, cc_dep2_formal, cc_ndep_formal );
+ vpanic("x86g_calculate_eflags_all_WRK(X86)");
+ }
+}
+
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* Calculate all the 6 flags from the supplied thunk parameters. */
+UInt x86g_calculate_eflags_all ( UInt cc_op,
+ UInt cc_dep1,
+ UInt cc_dep2,
+ UInt cc_ndep )
+{
+# if PROFILE_EFLAGS
+ if (!initted) initCounts();
+ n_calc_all++;
+ if (SHOW_COUNTS_NOW) showCounts();
+# endif
+ return
+ x86g_calculate_eflags_all_WRK ( cc_op, cc_dep1, cc_dep2, cc_ndep );
+}
+
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* Calculate just the carry flag from the supplied thunk parameters. */
+__attribute((regparm(3)))
+UInt x86g_calculate_eflags_c ( UInt cc_op,
+ UInt cc_dep1,
+ UInt cc_dep2,
+ UInt cc_ndep )
+{
+# if PROFILE_EFLAGS
+ if (!initted) initCounts();
+ n_calc_c++;
+ tabc_fast[cc_op]++;
+ if (SHOW_COUNTS_NOW) showCounts();
+# endif
+
+ /* Fast-case some common ones. */
+ switch (cc_op) {
+ case X86G_CC_OP_LOGICL:
+ case X86G_CC_OP_LOGICW:
+ case X86G_CC_OP_LOGICB:
+ return 0;
+ case X86G_CC_OP_SUBL:
+ return ((UInt)cc_dep1) < ((UInt)cc_dep2)
+ ? X86G_CC_MASK_C : 0;
+ case X86G_CC_OP_SUBW:
+ return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
+ ? X86G_CC_MASK_C : 0;
+ case X86G_CC_OP_SUBB:
+ return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
+ ? X86G_CC_MASK_C : 0;
+ case X86G_CC_OP_INCL:
+ case X86G_CC_OP_DECL:
+ return cc_ndep & X86G_CC_MASK_C;
+ default:
+ break;
+ }
+
+# if PROFILE_EFLAGS
+ tabc_fast[cc_op]--;
+ tabc_slow[cc_op]++;
+# endif
+
+ return x86g_calculate_eflags_all_WRK(cc_op,cc_dep1,cc_dep2,cc_ndep)
+ & X86G_CC_MASK_C;
+}
+
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* returns 1 or 0 */
+UInt x86g_calculate_condition ( UInt/*X86Condcode*/ cond,
+ UInt cc_op,
+ UInt cc_dep1,
+ UInt cc_dep2,
+ UInt cc_ndep )
+{
+ UInt eflags = x86g_calculate_eflags_all_WRK(cc_op, cc_dep1,
+ cc_dep2, cc_ndep);
+ UInt of,sf,zf,cf,pf;
+ UInt inv = cond & 1;
+
+# if PROFILE_EFLAGS
+ if (!initted) initCounts();
+ tab_cond[cc_op][cond]++;
+ n_calc_cond++;
+ if (SHOW_COUNTS_NOW) showCounts();
+# endif
+
+ switch (cond) {
+ case X86CondNO:
+ case X86CondO: /* OF == 1 */
+ of = eflags >> X86G_CC_SHIFT_O;
+ return 1 & (inv ^ of);
+
+ case X86CondNZ:
+ case X86CondZ: /* ZF == 1 */
+ zf = eflags >> X86G_CC_SHIFT_Z;
+ return 1 & (inv ^ zf);
+
+ case X86CondNB:
+ case X86CondB: /* CF == 1 */
+ cf = eflags >> X86G_CC_SHIFT_C;
+ return 1 & (inv ^ cf);
+ break;
+
+ case X86CondNBE:
+ case X86CondBE: /* (CF or ZF) == 1 */
+ cf = eflags >> X86G_CC_SHIFT_C;
+ zf = eflags >> X86G_CC_SHIFT_Z;
+ return 1 & (inv ^ (cf | zf));
+ break;
+
+ case X86CondNS:
+ case X86CondS: /* SF == 1 */
+ sf = eflags >> X86G_CC_SHIFT_S;
+ return 1 & (inv ^ sf);
+
+ case X86CondNP:
+ case X86CondP: /* PF == 1 */
+ pf = eflags >> X86G_CC_SHIFT_P;
+ return 1 & (inv ^ pf);
+
+ case X86CondNL:
+ case X86CondL: /* (SF xor OF) == 1 */
+ sf = eflags >> X86G_CC_SHIFT_S;
+ of = eflags >> X86G_CC_SHIFT_O;
+ return 1 & (inv ^ (sf ^ of));
+ break;
+
+ case X86CondNLE:
+ case X86CondLE: /* ((SF xor OF) or ZF) == 1 */
+ sf = eflags >> X86G_CC_SHIFT_S;
+ of = eflags >> X86G_CC_SHIFT_O;
+ zf = eflags >> X86G_CC_SHIFT_Z;
+ return 1 & (inv ^ ((sf ^ of) | zf));
+ break;
+
+ default:
+ /* shouldn't really make these calls from generated code */
+ vex_printf("x86g_calculate_condition( %u, %u, 0x%x, 0x%x, 0x%x )\n",
+ cond, cc_op, cc_dep1, cc_dep2, cc_ndep );
+ vpanic("x86g_calculate_condition");
+ }
+}
+
+
+/* VISIBLE TO LIBVEX CLIENT */
+UInt LibVEX_GuestX86_get_eflags ( /*IN*/VexGuestX86State* vex_state )
+{
+ UInt eflags = x86g_calculate_eflags_all_WRK(
+ vex_state->guest_CC_OP,
+ vex_state->guest_CC_DEP1,
+ vex_state->guest_CC_DEP2,
+ vex_state->guest_CC_NDEP
+ );
+ UInt dflag = vex_state->guest_DFLAG;
+ vassert(dflag == 1 || dflag == 0xFFFFFFFF);
+ if (dflag == 0xFFFFFFFF)
+ eflags |= (1<<10);
+ if (vex_state->guest_IDFLAG == 1)
+ eflags |= (1<<21);
+ if (vex_state->guest_ACFLAG == 1)
+ eflags |= (1<<18);
+
+ return eflags;
+}
+
+/* VISIBLE TO LIBVEX CLIENT */
+void
+LibVEX_GuestX86_put_eflag_c ( UInt new_carry_flag,
+ /*MOD*/VexGuestX86State* vex_state )
+{
+ UInt oszacp = x86g_calculate_eflags_all_WRK(
+ vex_state->guest_CC_OP,
+ vex_state->guest_CC_DEP1,
+ vex_state->guest_CC_DEP2,
+ vex_state->guest_CC_NDEP
+ );
+ if (new_carry_flag & 1) {
+ oszacp |= X86G_CC_MASK_C;
+ } else {
+ oszacp &= ~X86G_CC_MASK_C;
+ }
+ vex_state->guest_CC_OP = X86G_CC_OP_COPY;
+ vex_state->guest_CC_DEP1 = oszacp;
+ vex_state->guest_CC_DEP2 = 0;
+ vex_state->guest_CC_NDEP = 0;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- %eflags translation-time function specialisers. ---*/
+/*--- These help iropt specialise calls the above run-time ---*/
+/*--- %eflags functions. ---*/
+/*---------------------------------------------------------------*/
+
+/* Used by the optimiser to try specialisations. Returns an
+ equivalent expression, or NULL if none. */
+
+static inline Bool isU32 ( IRExpr* e, UInt n )
+{
+ return
+ toBool( e->tag == Iex_Const
+ && e->Iex.Const.con->tag == Ico_U32
+ && e->Iex.Const.con->Ico.U32 == n );
+}
+
+IRExpr* guest_x86_spechelper ( HChar* function_name,
+ IRExpr** args,
+ IRStmt** precedingStmts,
+ Int n_precedingStmts )
+{
+# define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
+# define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
+# define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
+# define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
+
+ Int i, arity = 0;
+ for (i = 0; args[i]; i++)
+ arity++;
+# if 0
+ vex_printf("spec request:\n");
+ vex_printf(" %s ", function_name);
+ for (i = 0; i < arity; i++) {
+ vex_printf(" ");
+ ppIRExpr(args[i]);
+ }
+ vex_printf("\n");
+# endif
+
+ /* --------- specialising "x86g_calculate_condition" --------- */
+
+ if (vex_streq(function_name, "x86g_calculate_condition")) {
+ /* specialise calls to above "calculate condition" function */
+ IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
+ vassert(arity == 5);
+ cond = args[0];
+ cc_op = args[1];
+ cc_dep1 = args[2];
+ cc_dep2 = args[3];
+
+ /*---------------- ADDL ----------------*/
+
+ if (isU32(cc_op, X86G_CC_OP_ADDL) && isU32(cond, X86CondZ)) {
+ /* long add, then Z --> test (dst+src == 0) */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpEQ32,
+ binop(Iop_Add32, cc_dep1, cc_dep2),
+ mkU32(0)));
+ }
+
+ /*---------------- SUBL ----------------*/
+
+ if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondZ)) {
+ /* long sub/cmp, then Z --> test dst==src */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpEQ32, cc_dep1, cc_dep2));
+ }
+ if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNZ)) {
+ /* long sub/cmp, then NZ --> test dst!=src */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpNE32, cc_dep1, cc_dep2));
+ }
+
+ if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondL)) {
+ /* long sub/cmp, then L (signed less than)
+ --> test dst <s src */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
+ }
+ if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNL)) {
+ /* long sub/cmp, then NL (signed greater than or equal)
+ --> test !(dst <s src) */
+ return binop(Iop_Xor32,
+ unop(Iop_1Uto32,
+ binop(Iop_CmpLT32S, cc_dep1, cc_dep2)),
+ mkU32(1));
+ }
+
+ if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondLE)) {
+ /* long sub/cmp, then LE (signed less than or equal)
+ --> test dst <=s src */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
+ }
+ if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNLE)) {
+ /* long sub/cmp, then NLE (signed not less than or equal)
+ --> test dst >s src
+ --> test !(dst <=s src) */
+ return binop(Iop_Xor32,
+ unop(Iop_1Uto32,
+ binop(Iop_CmpLE32S, cc_dep1, cc_dep2)),
+ mkU32(1));
+ }
+
+ if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondBE)) {
+ /* long sub/cmp, then BE (unsigned less than or equal)
+ --> test dst <=u src */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
+ }
+ if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNBE)) {
+ /* long sub/cmp, then BE (unsigned greater than)
+ --> test !(dst <=u src) */
+ return binop(Iop_Xor32,
+ unop(Iop_1Uto32,
+ binop(Iop_CmpLE32U, cc_dep1, cc_dep2)),
+ mkU32(1));
+ }
+
+ if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondB)) {
+ /* long sub/cmp, then B (unsigned less than)
+ --> test dst <u src */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
+ }
+ if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNB)) {
+ /* long sub/cmp, then NB (unsigned greater than or equal)
+ --> test !(dst <u src) */
+ return binop(Iop_Xor32,
+ unop(Iop_1Uto32,
+ binop(Iop_CmpLT32U, cc_dep1, cc_dep2)),
+ mkU32(1));
+ }
+
+ if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondS)) {
+ /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpLT32S,
+ binop(Iop_Sub32, cc_dep1, cc_dep2),
+ mkU32(0)));
+ }
+ if (isU32(cc_op, X86G_CC_OP_SUBL) && isU32(cond, X86CondNS)) {
+ /* long sub/cmp, then NS (not negative) --> test !(dst-src <s 0) */
+ return binop(Iop_Xor32,
+ unop(Iop_1Uto32,
+ binop(Iop_CmpLT32S,
+ binop(Iop_Sub32, cc_dep1, cc_dep2),
+ mkU32(0))),
+ mkU32(1));
+ }
+
+ /*---------------- SUBW ----------------*/
+
+ if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondZ)) {
+ /* word sub/cmp, then Z --> test dst==src */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpEQ16,
+ unop(Iop_32to16,cc_dep1),
+ unop(Iop_32to16,cc_dep2)));
+ }
+ if (isU32(cc_op, X86G_CC_OP_SUBW) && isU32(cond, X86CondNZ)) {
+ /* word sub/cmp, then NZ --> test dst!=src */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpNE16,
+ unop(Iop_32to16,cc_dep1),
+ unop(Iop_32to16,cc_dep2)));
+ }
+
+ /*---------------- SUBB ----------------*/
+
+ if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondZ)) {
+ /* byte sub/cmp, then Z --> test dst==src */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpEQ8,
+ unop(Iop_32to8,cc_dep1),
+ unop(Iop_32to8,cc_dep2)));
+ }
+ if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNZ)) {
+ /* byte sub/cmp, then NZ --> test dst!=src */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpNE8,
+ unop(Iop_32to8,cc_dep1),
+ unop(Iop_32to8,cc_dep2)));
+ }
+
+ if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) {
+ /* byte sub/cmp, then NBE (unsigned greater than)
+ --> test src <u dst */
+ /* Note, args are opposite way round from the usual */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpLT32U,
+ binop(Iop_And32,cc_dep2,mkU32(0xFF)),
+ binop(Iop_And32,cc_dep1,mkU32(0xFF))));
+ }
+
+ if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondS)
+ && isU32(cc_dep2, 0)) {
+ /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
+ --> test dst <s 0
+ --> (UInt)dst[7]
+ This is yet another scheme by which gcc figures out if the
+ top bit of a byte is 1 or 0. See also LOGICB/CondS below. */
+ /* Note: isU32(cc_dep2, 0) is correct, even though this is
+ for an 8-bit comparison, since the args to the helper
+ function are always U32s. */
+ return binop(Iop_And32,
+ binop(Iop_Shr32,cc_dep1,mkU8(7)),
+ mkU32(1));
+ }
+ if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNS)
+ && isU32(cc_dep2, 0)) {
+ /* byte sub/cmp of zero, then NS --> test !(dst-0 <s 0)
+ --> test !(dst <s 0)
+ --> (UInt) !dst[7]
+ */
+ return binop(Iop_Xor32,
+ binop(Iop_And32,
+ binop(Iop_Shr32,cc_dep1,mkU8(7)),
+ mkU32(1)),
+ mkU32(1));
+ }
+
+ /*---------------- LOGICL ----------------*/
+
+ if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondZ)) {
+ /* long and/or/xor, then Z --> test dst==0 */
+ return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
+ }
+ if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNZ)) {
+ /* long and/or/xor, then NZ --> test dst!=0 */
+ return unop(Iop_1Uto32,binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
+ }
+
+ if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondLE)) {
+ /* long and/or/xor, then LE
+ This is pretty subtle. LOGIC sets SF and ZF according to the
+ result and makes OF be zero. LE computes (SZ ^ OF) | ZF, but
+ OF is zero, so this reduces to SZ | ZF -- which will be 1 iff
+ the result is <=signed 0. Hence ...
+ */
+ return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(0)));
+ }
+
+ if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondBE)) {
+ /* long and/or/xor, then BE
+ LOGIC sets ZF according to the result and makes CF be zero.
+ BE computes (CF | ZF), but CF is zero, so this reduces ZF
+ -- which will be 1 iff the result is zero. Hence ...
+ */
+ return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
+ }
+
+ if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondS)) {
+ /* see comment below for (LOGICB, CondS) */
+ /* long and/or/xor, then S --> (UInt)result[31] */
+ return binop(Iop_And32,
+ binop(Iop_Shr32,cc_dep1,mkU8(31)),
+ mkU32(1));
+ }
+ if (isU32(cc_op, X86G_CC_OP_LOGICL) && isU32(cond, X86CondNS)) {
+ /* see comment below for (LOGICB, CondNS) */
+ /* long and/or/xor, then S --> (UInt) ~ result[31] */
+ return binop(Iop_Xor32,
+ binop(Iop_And32,
+ binop(Iop_Shr32,cc_dep1,mkU8(31)),
+ mkU32(1)),
+ mkU32(1));
+ }
+
+ /*---------------- LOGICW ----------------*/
+
+ if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondZ)) {
+ /* word and/or/xor, then Z --> test dst==0 */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(0xFFFF)),
+ mkU32(0)));
+ }
+
+ if (isU32(cc_op, X86G_CC_OP_LOGICW) && isU32(cond, X86CondS)) {
+ /* see comment below for (LOGICB, CondS) */
+ /* word and/or/xor, then S --> (UInt)result[15] */
+ return binop(Iop_And32,
+ binop(Iop_Shr32,cc_dep1,mkU8(15)),
+ mkU32(1));
+ }
+
+ /*---------------- LOGICB ----------------*/
+
+ if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondZ)) {
+ /* byte and/or/xor, then Z --> test dst==0 */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpEQ32, binop(Iop_And32,cc_dep1,mkU32(255)),
+ mkU32(0)));
+ }
+ if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNZ)) {
+ /* byte and/or/xor, then Z --> test dst!=0 */
+ /* b9ac9: 84 c0 test %al,%al
+ b9acb: 75 0d jne b9ada */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpNE32, binop(Iop_And32,cc_dep1,mkU32(255)),
+ mkU32(0)));
+ }
+
+ if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondS)) {
+ /* this is an idiom gcc sometimes uses to find out if the top
+ bit of a byte register is set: eg testb %al,%al; js ..
+ Since it just depends on the top bit of the byte, extract
+ that bit and explicitly get rid of all the rest. This
+ helps memcheck avoid false positives in the case where any
+ of the other bits in the byte are undefined. */
+ /* byte and/or/xor, then S --> (UInt)result[7] */
+ return binop(Iop_And32,
+ binop(Iop_Shr32,cc_dep1,mkU8(7)),
+ mkU32(1));
+ }
+ if (isU32(cc_op, X86G_CC_OP_LOGICB) && isU32(cond, X86CondNS)) {
+ /* ditto, for negation-of-S. */
+ /* byte and/or/xor, then S --> (UInt) ~ result[7] */
+ return binop(Iop_Xor32,
+ binop(Iop_And32,
+ binop(Iop_Shr32,cc_dep1,mkU8(7)),
+ mkU32(1)),
+ mkU32(1));
+ }
+
+ /*---------------- DECL ----------------*/
+
+ if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondZ)) {
+ /* dec L, then Z --> test dst == 0 */
+ return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
+ }
+
+ if (isU32(cc_op, X86G_CC_OP_DECL) && isU32(cond, X86CondS)) {
+ /* dec L, then S --> compare DST <s 0 */
+ return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(0)));
+ }
+
+ /*---------------- DECW ----------------*/
+
+ if (isU32(cc_op, X86G_CC_OP_DECW) && isU32(cond, X86CondZ)) {
+ /* dec W, then Z --> test dst == 0 */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpEQ32,
+ binop(Iop_Shl32,cc_dep1,mkU8(16)),
+ mkU32(0)));
+ }
+
+ /*---------------- INCW ----------------*/
+
+ if (isU32(cc_op, X86G_CC_OP_INCW) && isU32(cond, X86CondZ)) {
+ /* This rewrite helps memcheck on 'incw %ax ; je ...'. */
+ /* inc W, then Z --> test dst == 0 */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpEQ32,
+ binop(Iop_Shl32,cc_dep1,mkU8(16)),
+ mkU32(0)));
+ }
+
+ /*---------------- SHRL ----------------*/
+
+ if (isU32(cc_op, X86G_CC_OP_SHRL) && isU32(cond, X86CondZ)) {
+ /* SHRL, then Z --> test dep1 == 0 */
+ return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0)));
+ }
+
+ /*---------------- COPY ----------------*/
+ /* This can happen, as a result of x87 FP compares: "fcom ... ;
+ fnstsw %ax ; sahf ; jbe" for example. */
+
+ if (isU32(cc_op, X86G_CC_OP_COPY) &&
+ (isU32(cond, X86CondBE) || isU32(cond, X86CondNBE))) {
+ /* COPY, then BE --> extract C and Z from dep1, and test
+ (C or Z) == 1. */
+ /* COPY, then NBE --> extract C and Z from dep1, and test
+ (C or Z) == 0. */
+ UInt nnn = isU32(cond, X86CondBE) ? 1 : 0;
+ return
+ unop(
+ Iop_1Uto32,
+ binop(
+ Iop_CmpEQ32,
+ binop(
+ Iop_And32,
+ binop(
+ Iop_Or32,
+ binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
+ binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z))
+ ),
+ mkU32(1)
+ ),
+ mkU32(nnn)
+ )
+ );
+ }
+
+ if (isU32(cc_op, X86G_CC_OP_COPY)
+ && (isU32(cond, X86CondB) || isU32(cond, X86CondNB))) {
+ /* COPY, then B --> extract C from dep1, and test (C == 1). */
+ /* COPY, then NB --> extract C from dep1, and test (C == 0). */
+ UInt nnn = isU32(cond, X86CondB) ? 1 : 0;
+ return
+ unop(
+ Iop_1Uto32,
+ binop(
+ Iop_CmpEQ32,
+ binop(
+ Iop_And32,
+ binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
+ mkU32(1)
+ ),
+ mkU32(nnn)
+ )
+ );
+ }
+
+ if (isU32(cc_op, X86G_CC_OP_COPY)
+ && (isU32(cond, X86CondZ) || isU32(cond, X86CondNZ))) {
+ /* COPY, then Z --> extract Z from dep1, and test (Z == 1). */
+ /* COPY, then NZ --> extract Z from dep1, and test (Z == 0). */
+ UInt nnn = isU32(cond, X86CondZ) ? 1 : 0;
+ return
+ unop(
+ Iop_1Uto32,
+ binop(
+ Iop_CmpEQ32,
+ binop(
+ Iop_And32,
+ binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_Z)),
+ mkU32(1)
+ ),
+ mkU32(nnn)
+ )
+ );
+ }
+
+ if (isU32(cc_op, X86G_CC_OP_COPY)
+ && (isU32(cond, X86CondP) || isU32(cond, X86CondNP))) {
+ /* COPY, then P --> extract P from dep1, and test (P == 1). */
+ /* COPY, then NP --> extract P from dep1, and test (P == 0). */
+ UInt nnn = isU32(cond, X86CondP) ? 1 : 0;
+ return
+ unop(
+ Iop_1Uto32,
+ binop(
+ Iop_CmpEQ32,
+ binop(
+ Iop_And32,
+ binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_P)),
+ mkU32(1)
+ ),
+ mkU32(nnn)
+ )
+ );
+ }
+
+ return NULL;
+ }
+
+ /* --------- specialising "x86g_calculate_eflags_c" --------- */
+
+ if (vex_streq(function_name, "x86g_calculate_eflags_c")) {
+ /* specialise calls to above "calculate_eflags_c" function */
+ IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
+ vassert(arity == 4);
+ cc_op = args[0];
+ cc_dep1 = args[1];
+ cc_dep2 = args[2];
+ cc_ndep = args[3];
+
+ if (isU32(cc_op, X86G_CC_OP_SUBL)) {
+ /* C after sub denotes unsigned less than */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
+ }
+ if (isU32(cc_op, X86G_CC_OP_SUBB)) {
+ /* C after sub denotes unsigned less than */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpLT32U,
+ binop(Iop_And32,cc_dep1,mkU32(0xFF)),
+ binop(Iop_And32,cc_dep2,mkU32(0xFF))));
+ }
+ if (isU32(cc_op, X86G_CC_OP_LOGICL)
+ || isU32(cc_op, X86G_CC_OP_LOGICW)
+ || isU32(cc_op, X86G_CC_OP_LOGICB)) {
+ /* cflag after logic is zero */
+ return mkU32(0);
+ }
+ if (isU32(cc_op, X86G_CC_OP_DECL) || isU32(cc_op, X86G_CC_OP_INCL)) {
+ /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP. */
+ return cc_ndep;
+ }
+ if (isU32(cc_op, X86G_CC_OP_COPY)) {
+ /* cflag after COPY is stored in DEP1. */
+ return
+ binop(
+ Iop_And32,
+ binop(Iop_Shr32, cc_dep1, mkU8(X86G_CC_SHIFT_C)),
+ mkU32(1)
+ );
+ }
+ if (isU32(cc_op, X86G_CC_OP_ADDL)) {
+ /* C after add denotes sum <u either arg */
+ return unop(Iop_1Uto32,
+ binop(Iop_CmpLT32U,
+ binop(Iop_Add32, cc_dep1, cc_dep2),
+ cc_dep1));
+ }
+ // ATC, requires verification, no test case known
+ //if (isU32(cc_op, X86G_CC_OP_SMULL)) {
+ // /* C after signed widening multiply denotes the case where
+ // the top half of the result isn't simply the sign extension
+ // of the bottom half (iow the result doesn't fit completely
+ // in the bottom half). Hence:
+ // C = hi-half(dep1 x dep2) != lo-half(dep1 x dep2) >>s 31
+ // where 'x' denotes signed widening multiply.*/
+ // return
+ // unop(Iop_1Uto32,
+ // binop(Iop_CmpNE32,
+ // unop(Iop_64HIto32,
+ // binop(Iop_MullS32, cc_dep1, cc_dep2)),
+ // binop(Iop_Sar32,
+ // binop(Iop_Mul32, cc_dep1, cc_dep2), mkU8(31)) ));
+ //}
+# if 0
+ if (cc_op->tag == Iex_Const) {
+ vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
+ }
+# endif
+
+ return NULL;
+ }
+
+ /* --------- specialising "x86g_calculate_eflags_all" --------- */
+
+ if (vex_streq(function_name, "x86g_calculate_eflags_all")) {
+ /* specialise calls to above "calculate_eflags_all" function */
+ IRExpr *cc_op, *cc_dep1; /*, *cc_dep2, *cc_ndep; */
+ vassert(arity == 4);
+ cc_op = args[0];
+ cc_dep1 = args[1];
+ /* cc_dep2 = args[2]; */
+ /* cc_ndep = args[3]; */
+
+ if (isU32(cc_op, X86G_CC_OP_COPY)) {
+ /* eflags after COPY are stored in DEP1. */
+ return
+ binop(
+ Iop_And32,
+ cc_dep1,
+ mkU32(X86G_CC_MASK_O | X86G_CC_MASK_S | X86G_CC_MASK_Z
+ | X86G_CC_MASK_A | X86G_CC_MASK_C | X86G_CC_MASK_P)
+ );
+ }
+ return NULL;
+ }
+
+# undef unop
+# undef binop
+# undef mkU32
+# undef mkU8
+
+ return NULL;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Supporting functions for x87 FPU activities. ---*/
+/*---------------------------------------------------------------*/
+
+static inline Bool host_is_little_endian ( void )
+{
+ UInt x = 0x76543210;
+ UChar* p = (UChar*)(&x);
+ return toBool(*p == 0x10);
+}
+
+/* 80 and 64-bit floating point formats:
+
+ 80-bit:
+
+ S 0 0-------0 zero
+ S 0 0X------X denormals
+ S 1-7FFE 1X------X normals (all normals have leading 1)
+ S 7FFF 10------0 infinity
+ S 7FFF 10X-----X snan
+ S 7FFF 11X-----X qnan
+
+ S is the sign bit. For runs X----X, at least one of the Xs must be
+ nonzero. Exponent is 15 bits, fractional part is 63 bits, and
+ there is an explicitly represented leading 1, and a sign bit,
+ giving 80 in total.
+
+ 64-bit avoids the confusion of an explicitly represented leading 1
+ and so is simpler:
+
+ S 0 0------0 zero
+ S 0 X------X denormals
+ S 1-7FE any normals
+ S 7FF 0------0 infinity
+ S 7FF 0X-----X snan
+ S 7FF 1X-----X qnan
+
+ Exponent is 11 bits, fractional part is 52 bits, and there is a
+ sign bit, giving 64 in total.
+*/
+
+/* Inspect a value and its tag, as per the x87 'FXAM' instruction. */
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+UInt x86g_calculate_FXAM ( UInt tag, ULong dbl )
+{
+ Bool mantissaIsZero;
+ Int bexp;
+ UChar sign;
+ UChar* f64;
+
+ vassert(host_is_little_endian());
+
+ /* vex_printf("calculate_FXAM ( %d, %llx ) .. ", tag, dbl ); */
+
+ f64 = (UChar*)(&dbl);
+ sign = toUChar( (f64[7] >> 7) & 1 );
+
+ /* First off, if the tag indicates the register was empty,
+ return 1,0,sign,1 */
+ if (tag == 0) {
+ /* vex_printf("Empty\n"); */
+ return X86G_FC_MASK_C3 | 0 | (sign << X86G_FC_SHIFT_C1)
+ | X86G_FC_MASK_C0;
+ }
+
+ bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
+ bexp &= 0x7FF;
+
+ mantissaIsZero
+ = toBool(
+ (f64[6] & 0x0F) == 0
+ && (f64[5] | f64[4] | f64[3] | f64[2] | f64[1] | f64[0]) == 0
+ );
+
+ /* If both exponent and mantissa are zero, the value is zero.
+ Return 1,0,sign,0. */
+ if (bexp == 0 && mantissaIsZero) {
+ /* vex_printf("Zero\n"); */
+ return X86G_FC_MASK_C3 | 0
+ | (sign << X86G_FC_SHIFT_C1) | 0;
+ }
+
+ /* If exponent is zero but mantissa isn't, it's a denormal.
+ Return 1,1,sign,0. */
+ if (bexp == 0 && !mantissaIsZero) {
+ /* vex_printf("Denormal\n"); */
+ return X86G_FC_MASK_C3 | X86G_FC_MASK_C2
+ | (sign << X86G_FC_SHIFT_C1) | 0;
+ }
+
+ /* If the exponent is 7FF and the mantissa is zero, this is an infinity.
+ Return 0,1,sign,1. */
+ if (bexp == 0x7FF && mantissaIsZero) {
+ /* vex_printf("Inf\n"); */
+ return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1)
+ | X86G_FC_MASK_C0;
+ }
+
+ /* If the exponent is 7FF and the mantissa isn't zero, this is a NaN.
+ Return 0,0,sign,1. */
+ if (bexp == 0x7FF && !mantissaIsZero) {
+ /* vex_printf("NaN\n"); */
+ return 0 | 0 | (sign << X86G_FC_SHIFT_C1) | X86G_FC_MASK_C0;
+ }
+
+ /* Uh, ok, we give up. It must be a normal finite number.
+ Return 0,1,sign,0.
+ */
+ /* vex_printf("normal\n"); */
+ return 0 | X86G_FC_MASK_C2 | (sign << X86G_FC_SHIFT_C1) | 0;
+}
+
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (reads guest memory) */
+ULong x86g_dirtyhelper_loadF80le ( UInt addrU )
+{
+ ULong f64;
+ convert_f80le_to_f64le ( (UChar*)ULong_to_Ptr(addrU), (UChar*)&f64 );
+ return f64;
+}
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (writes guest memory) */
+void x86g_dirtyhelper_storeF80le ( UInt addrU, ULong f64 )
+{
+ convert_f64le_to_f80le( (UChar*)&f64, (UChar*)ULong_to_Ptr(addrU) );
+}
+
+
+/*----------------------------------------------*/
+/*--- The exported fns .. ---*/
+/*----------------------------------------------*/
+
+/* Layout of the real x87 state. */
+/* 13 June 05: Fpu_State and auxiliary constants was moved to
+ g_generic_x87.h */
+
+
+/* CLEAN HELPER */
+/* fpucw[15:0] contains a x87 native format FPU control word.
+ Extract from it the required FPROUND value and any resulting
+ emulation warning, and return (warn << 32) | fpround value.
+*/
+ULong x86g_check_fldcw ( UInt fpucw )
+{
+ /* Decide on a rounding mode. fpucw[11:10] holds it. */
+ /* NOTE, encoded exactly as per enum IRRoundingMode. */
+ UInt rmode = (fpucw >> 10) & 3;
+
+ /* Detect any required emulation warnings. */
+ VexEmWarn ew = EmWarn_NONE;
+
+ if ((fpucw & 0x3F) != 0x3F) {
+ /* unmasked exceptions! */
+ ew = EmWarn_X86_x87exns;
+ }
+ else
+ if (((fpucw >> 8) & 3) != 3) {
+ /* unsupported precision */
+ ew = EmWarn_X86_x87precision;
+ }
+
+ return (((ULong)ew) << 32) | ((ULong)rmode);
+}
+
+/* CLEAN HELPER */
+/* Given fpround as an IRRoundingMode value, create a suitable x87
+ native format FPU control word. */
+UInt x86g_create_fpucw ( UInt fpround )
+{
+ fpround &= 3;
+ return 0x037F | (fpround << 10);
+}
+
+
+/* CLEAN HELPER */
+/* mxcsr[15:0] contains a SSE native format MXCSR value.
+ Extract from it the required SSEROUND value and any resulting
+ emulation warning, and return (warn << 32) | sseround value.
+*/
+ULong x86g_check_ldmxcsr ( UInt mxcsr )
+{
+ /* Decide on a rounding mode. mxcsr[14:13] holds it. */
+ /* NOTE, encoded exactly as per enum IRRoundingMode. */
+ UInt rmode = (mxcsr >> 13) & 3;
+
+ /* Detect any required emulation warnings. */
+ VexEmWarn ew = EmWarn_NONE;
+
+ if ((mxcsr & 0x1F80) != 0x1F80) {
+ /* unmasked exceptions! */
+ ew = EmWarn_X86_sseExns;
+ }
+ else
+ if (mxcsr & (1<<15)) {
+ /* FZ is set */
+ ew = EmWarn_X86_fz;
+ }
+ else
+ if (mxcsr & (1<<6)) {
+ /* DAZ is set */
+ ew = EmWarn_X86_daz;
+ }
+
+ return (((ULong)ew) << 32) | ((ULong)rmode);
+}
+
+
+/* CLEAN HELPER */
+/* Given sseround as an IRRoundingMode value, create a suitable SSE
+ native format MXCSR value. */
+UInt x86g_create_mxcsr ( UInt sseround )
+{
+ sseround &= 3;
+ return 0x1F80 | (sseround << 13);
+}
+
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (writes guest state) */
+/* Initialise the x87 FPU state as per 'finit'. */
+void x86g_dirtyhelper_FINIT ( VexGuestX86State* gst )
+{
+ Int i;
+ gst->guest_FTOP = 0;
+ for (i = 0; i < 8; i++) {
+ gst->guest_FPTAG[i] = 0; /* empty */
+ gst->guest_FPREG[i] = 0; /* IEEE754 64-bit zero */
+ }
+ gst->guest_FPROUND = (UInt)Irrm_NEAREST;
+ gst->guest_FC3210 = 0;
+}
+
+
+/* This is used to implement both 'frstor' and 'fldenv'. The latter
+ appears to differ from the former only in that the 8 FP registers
+ themselves are not transferred into the guest state. */
+static
+VexEmWarn do_put_x87 ( Bool moveRegs,
+ /*IN*/UChar* x87_state,
+ /*OUT*/VexGuestX86State* vex_state )
+{
+ Int stno, preg;
+ UInt tag;
+ ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
+ UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
+ Fpu_State* x87 = (Fpu_State*)x87_state;
+ UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7;
+ UInt tagw = x87->env[FP_ENV_TAG];
+ UInt fpucw = x87->env[FP_ENV_CTRL];
+ UInt c3210 = x87->env[FP_ENV_STAT] & 0x4700;
+ VexEmWarn ew;
+ UInt fpround;
+ ULong pair;
+
+ /* Copy registers and tags */
+ for (stno = 0; stno < 8; stno++) {
+ preg = (stno + ftop) & 7;
+ tag = (tagw >> (2*preg)) & 3;
+ if (tag == 3) {
+ /* register is empty */
+ /* hmm, if it's empty, does it still get written? Probably
+ safer to say it does. If we don't, memcheck could get out
+ of sync, in that it thinks all FP registers are defined by
+ this helper, but in reality some have not been updated. */
+ if (moveRegs)
+ vexRegs[preg] = 0; /* IEEE754 64-bit zero */
+ vexTags[preg] = 0;
+ } else {
+ /* register is non-empty */
+ if (moveRegs)
+ convert_f80le_to_f64le( &x87->reg[10*stno],
+ (UChar*)&vexRegs[preg] );
+ vexTags[preg] = 1;
+ }
+ }
+
+ /* stack pointer */
+ vex_state->guest_FTOP = ftop;
+
+ /* status word */
+ vex_state->guest_FC3210 = c3210;
+
+ /* handle the control word, setting FPROUND and detecting any
+ emulation warnings. */
+ pair = x86g_check_fldcw ( (UInt)fpucw );
+ fpround = (UInt)pair;
+ ew = (VexEmWarn)(pair >> 32);
+
+ vex_state->guest_FPROUND = fpround & 3;
+
+ /* emulation warnings --> caller */
+ return ew;
+}
+
+
+/* Create an x87 FPU state from the guest state, as close as
+ we can approximate it. */
+static
+void do_get_x87 ( /*IN*/VexGuestX86State* vex_state,
+ /*OUT*/UChar* x87_state )
+{
+ Int i, stno, preg;
+ UInt tagw;
+ ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]);
+ UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]);
+ Fpu_State* x87 = (Fpu_State*)x87_state;
+ UInt ftop = vex_state->guest_FTOP;
+ UInt c3210 = vex_state->guest_FC3210;
+
+ for (i = 0; i < 14; i++)
+ x87->env[i] = 0;
+
+ x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF;
+ x87->env[FP_ENV_STAT]
+ = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700));
+ x87->env[FP_ENV_CTRL]
+ = toUShort(x86g_create_fpucw( vex_state->guest_FPROUND ));
+
+ /* Dump the register stack in ST order. */
+ tagw = 0;
+ for (stno = 0; stno < 8; stno++) {
+ preg = (stno + ftop) & 7;
+ if (vexTags[preg] == 0) {
+ /* register is empty */
+ tagw |= (3 << (2*preg));
+ convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
+ &x87->reg[10*stno] );
+ } else {
+ /* register is full. */
+ tagw |= (0 << (2*preg));
+ convert_f64le_to_f80le( (UChar*)&vexRegs[preg],
+ &x87->reg[10*stno] );
+ }
+ }
+ x87->env[FP_ENV_TAG] = toUShort(tagw);
+}
+
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (reads guest state, writes guest mem) */
+void x86g_dirtyhelper_FXSAVE ( VexGuestX86State* gst, HWord addr )
+{
+ /* Somewhat roundabout, but at least it's simple. */
+ Fpu_State tmp;
+ UShort* addrS = (UShort*)addr;
+ UChar* addrC = (UChar*)addr;
+ U128* xmm = (U128*)(addr + 160);
+ UInt mxcsr;
+ UShort fp_tags;
+ UInt summary_tags;
+ Int r, stno;
+ UShort *srcS, *dstS;
+
+ do_get_x87( gst, (UChar*)&tmp );
+ mxcsr = x86g_create_mxcsr( gst->guest_SSEROUND );
+
+ /* Now build the proper fxsave image from the x87 image we just
+ made. */
+
+ addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
+ addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
+
+ /* set addrS[2] in an endian-independent way */
+ summary_tags = 0;
+ fp_tags = tmp.env[FP_ENV_TAG];
+ for (r = 0; r < 8; r++) {
+ if ( ((fp_tags >> (2*r)) & 3) != 3 )
+ summary_tags |= (1 << r);
+ }
+ addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */
+ addrC[5] = 0; /* pad */
+
+ addrS[3] = 0; /* FOP: fpu opcode (bogus) */
+ addrS[4] = 0;
+ addrS[5] = 0; /* FPU IP (bogus) */
+ addrS[6] = 0; /* FPU IP's segment selector (bogus) (although we
+ could conceivably dump %CS here) */
+
+ addrS[7] = 0; /* Intel reserved */
+
+ addrS[8] = 0; /* FPU DP (operand pointer) (bogus) */
+ addrS[9] = 0; /* FPU DP (operand pointer) (bogus) */
+ addrS[10] = 0; /* segment selector for above operand pointer; %DS
+ perhaps? */
+ addrS[11] = 0; /* Intel reserved */
+
+ addrS[12] = toUShort(mxcsr); /* MXCSR */
+ addrS[13] = toUShort(mxcsr >> 16);
+
+ addrS[14] = 0xFFFF; /* MXCSR mask (lo16); who knows what for */
+ addrS[15] = 0xFFFF; /* MXCSR mask (hi16); who knows what for */
+
+ /* Copy in the FP registers, in ST order. */
+ for (stno = 0; stno < 8; stno++) {
+ srcS = (UShort*)(&tmp.reg[10*stno]);
+ dstS = (UShort*)(&addrS[16 + 8*stno]);
+ dstS[0] = srcS[0];
+ dstS[1] = srcS[1];
+ dstS[2] = srcS[2];
+ dstS[3] = srcS[3];
+ dstS[4] = srcS[4];
+ dstS[5] = 0;
+ dstS[6] = 0;
+ dstS[7] = 0;
+ }
+
+ /* That's the first 160 bytes of the image done. Now only %xmm0
+ .. %xmm7 remain to be copied. If the host is big-endian, these
+ need to be byte-swapped. */
+ vassert(host_is_little_endian());
+
+# define COPY_U128(_dst,_src) \
+ do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
+ _dst[2] = _src[2]; _dst[3] = _src[3]; } \
+ while (0)
+
+ COPY_U128( xmm[0], gst->guest_XMM0 );
+ COPY_U128( xmm[1], gst->guest_XMM1 );
+ COPY_U128( xmm[2], gst->guest_XMM2 );
+ COPY_U128( xmm[3], gst->guest_XMM3 );
+ COPY_U128( xmm[4], gst->guest_XMM4 );
+ COPY_U128( xmm[5], gst->guest_XMM5 );
+ COPY_U128( xmm[6], gst->guest_XMM6 );
+ COPY_U128( xmm[7], gst->guest_XMM7 );
+
+# undef COPY_U128
+}
+
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (writes guest state, reads guest mem) */
+VexEmWarn x86g_dirtyhelper_FXRSTOR ( VexGuestX86State* gst, HWord addr )
+{
+ Fpu_State tmp;
+ VexEmWarn warnX87 = EmWarn_NONE;
+ VexEmWarn warnXMM = EmWarn_NONE;
+ UShort* addrS = (UShort*)addr;
+ UChar* addrC = (UChar*)addr;
+ U128* xmm = (U128*)(addr + 160);
+ UShort fp_tags;
+ Int r, stno, i;
+
+ /* Restore %xmm0 .. %xmm7. If the host is big-endian, these need
+ to be byte-swapped. */
+ vassert(host_is_little_endian());
+
+# define COPY_U128(_dst,_src) \
+ do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
+ _dst[2] = _src[2]; _dst[3] = _src[3]; } \
+ while (0)
+
+ COPY_U128( gst->guest_XMM0, xmm[0] );
+ COPY_U128( gst->guest_XMM1, xmm[1] );
+ COPY_U128( gst->guest_XMM2, xmm[2] );
+ COPY_U128( gst->guest_XMM3, xmm[3] );
+ COPY_U128( gst->guest_XMM4, xmm[4] );
+ COPY_U128( gst->guest_XMM5, xmm[5] );
+ COPY_U128( gst->guest_XMM6, xmm[6] );
+ COPY_U128( gst->guest_XMM7, xmm[7] );
+
+# undef COPY_U128
+
+ /* Copy the x87 registers out of the image, into a temporary
+ Fpu_State struct. */
+ for (i = 0; i < 14; i++) tmp.env[i] = 0;
+ for (i = 0; i < 80; i++) tmp.reg[i] = 0;
+ /* fill in tmp.reg[0..7] */
+ for (stno = 0; stno < 8; stno++) {
+ UShort* dstS = (UShort*)(&tmp.reg[10*stno]);
+ UShort* srcS = (UShort*)(&addrS[16 + 8*stno]);
+ dstS[0] = srcS[0];
+ dstS[1] = srcS[1];
+ dstS[2] = srcS[2];
+ dstS[3] = srcS[3];
+ dstS[4] = srcS[4];
+ }
+ /* fill in tmp.env[0..13] */
+ tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */
+ tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */
+
+ fp_tags = 0;
+ for (r = 0; r < 8; r++) {
+ if (addrC[4] & (1<<r))
+ fp_tags |= (0 << (2*r)); /* EMPTY */
+ else
+ fp_tags |= (3 << (2*r)); /* VALID -- not really precise enough. */
+ }
+ tmp.env[FP_ENV_TAG] = fp_tags;
+
+ /* Now write 'tmp' into the guest state. */
+ warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
+
+ { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
+ | ((((UInt)addrS[13]) & 0xFFFF) << 16);
+ ULong w64 = x86g_check_ldmxcsr( w32 );
+
+ warnXMM = (VexEmWarn)(w64 >> 32);
+
+ gst->guest_SSEROUND = (UInt)w64;
+ }
+
+ /* Prefer an X87 emwarn over an XMM one, if both exist. */
+ if (warnX87 != EmWarn_NONE)
+ return warnX87;
+ else
+ return warnXMM;
+}
+
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (reads guest state, writes guest mem) */
+void x86g_dirtyhelper_FSAVE ( VexGuestX86State* gst, HWord addr )
+{
+ do_get_x87( gst, (UChar*)addr );
+}
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (writes guest state, reads guest mem) */
+VexEmWarn x86g_dirtyhelper_FRSTOR ( VexGuestX86State* gst, HWord addr )
+{
+ return do_put_x87( True/*regs too*/, (UChar*)addr, gst );
+}
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (reads guest state, writes guest mem) */
+void x86g_dirtyhelper_FSTENV ( VexGuestX86State* gst, HWord addr )
+{
+ /* Somewhat roundabout, but at least it's simple. */
+ Int i;
+ UShort* addrP = (UShort*)addr;
+ Fpu_State tmp;
+ do_get_x87( gst, (UChar*)&tmp );
+ for (i = 0; i < 14; i++)
+ addrP[i] = tmp.env[i];
+}
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (writes guest state, reads guest mem) */
+VexEmWarn x86g_dirtyhelper_FLDENV ( VexGuestX86State* gst, HWord addr )
+{
+ return do_put_x87( False/*don't move regs*/, (UChar*)addr, gst);
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Misc integer helpers, including rotates and CPUID. ---*/
+/*---------------------------------------------------------------*/
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* Calculate both flags and value result for rotate right
+ through the carry bit. Result in low 32 bits,
+ new flags (OSZACP) in high 32 bits.
+*/
+ULong x86g_calculate_RCR ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
+{
+ UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
+
+ switch (sz) {
+ case 4:
+ cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
+ of = ((arg >> 31) ^ cf) & 1;
+ while (tempCOUNT > 0) {
+ tempcf = arg & 1;
+ arg = (arg >> 1) | (cf << 31);
+ cf = tempcf;
+ tempCOUNT--;
+ }
+ break;
+ case 2:
+ while (tempCOUNT >= 17) tempCOUNT -= 17;
+ cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
+ of = ((arg >> 15) ^ cf) & 1;
+ while (tempCOUNT > 0) {
+ tempcf = arg & 1;
+ arg = ((arg >> 1) & 0x7FFF) | (cf << 15);
+ cf = tempcf;
+ tempCOUNT--;
+ }
+ break;
+ case 1:
+ while (tempCOUNT >= 9) tempCOUNT -= 9;
+ cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
+ of = ((arg >> 7) ^ cf) & 1;
+ while (tempCOUNT > 0) {
+ tempcf = arg & 1;
+ arg = ((arg >> 1) & 0x7F) | (cf << 7);
+ cf = tempcf;
+ tempCOUNT--;
+ }
+ break;
+ default:
+ vpanic("calculate_RCR: invalid size");
+ }
+
+ cf &= 1;
+ of &= 1;
+ eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
+ eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
+
+ return (((ULong)eflags_in) << 32) | ((ULong)arg);
+}
+
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* Calculate both flags and value result for rotate left
+ through the carry bit. Result in low 32 bits,
+ new flags (OSZACP) in high 32 bits.
+*/
+ULong x86g_calculate_RCL ( UInt arg, UInt rot_amt, UInt eflags_in, UInt sz )
+{
+ UInt tempCOUNT = rot_amt & 0x1F, cf=0, of=0, tempcf;
+
+ switch (sz) {
+ case 4:
+ cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
+ while (tempCOUNT > 0) {
+ tempcf = (arg >> 31) & 1;
+ arg = (arg << 1) | (cf & 1);
+ cf = tempcf;
+ tempCOUNT--;
+ }
+ of = ((arg >> 31) ^ cf) & 1;
+ break;
+ case 2:
+ while (tempCOUNT >= 17) tempCOUNT -= 17;
+ cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
+ while (tempCOUNT > 0) {
+ tempcf = (arg >> 15) & 1;
+ arg = 0xFFFF & ((arg << 1) | (cf & 1));
+ cf = tempcf;
+ tempCOUNT--;
+ }
+ of = ((arg >> 15) ^ cf) & 1;
+ break;
+ case 1:
+ while (tempCOUNT >= 9) tempCOUNT -= 9;
+ cf = (eflags_in >> X86G_CC_SHIFT_C) & 1;
+ while (tempCOUNT > 0) {
+ tempcf = (arg >> 7) & 1;
+ arg = 0xFF & ((arg << 1) | (cf & 1));
+ cf = tempcf;
+ tempCOUNT--;
+ }
+ of = ((arg >> 7) ^ cf) & 1;
+ break;
+ default:
+ vpanic("calculate_RCL: invalid size");
+ }
+
+ cf &= 1;
+ of &= 1;
+ eflags_in &= ~(X86G_CC_MASK_C | X86G_CC_MASK_O);
+ eflags_in |= (cf << X86G_CC_SHIFT_C) | (of << X86G_CC_SHIFT_O);
+
+ return (((ULong)eflags_in) << 32) | ((ULong)arg);
+}
+
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* Calculate both flags and value result for DAA/DAS/AAA/AAS.
+ AX value in low half of arg, OSZACP in upper half.
+ See guest-x86/toIR.c usage point for details.
+*/
+static UInt calc_parity_8bit ( UInt w32 ) {
+ UInt i;
+ UInt p = 1;
+ for (i = 0; i < 8; i++)
+ p ^= (1 & (w32 >> i));
+ return p;
+}
+UInt x86g_calculate_daa_das_aaa_aas ( UInt flags_and_AX, UInt opcode )
+{
+ UInt r_AL = (flags_and_AX >> 0) & 0xFF;
+ UInt r_AH = (flags_and_AX >> 8) & 0xFF;
+ UInt r_O = (flags_and_AX >> (16 + X86G_CC_SHIFT_O)) & 1;
+ UInt r_S = (flags_and_AX >> (16 + X86G_CC_SHIFT_S)) & 1;
+ UInt r_Z = (flags_and_AX >> (16 + X86G_CC_SHIFT_Z)) & 1;
+ UInt r_A = (flags_and_AX >> (16 + X86G_CC_SHIFT_A)) & 1;
+ UInt r_C = (flags_and_AX >> (16 + X86G_CC_SHIFT_C)) & 1;
+ UInt r_P = (flags_and_AX >> (16 + X86G_CC_SHIFT_P)) & 1;
+ UInt result = 0;
+
+ switch (opcode) {
+ case 0x27: { /* DAA */
+ UInt old_AL = r_AL;
+ UInt old_C = r_C;
+ r_C = 0;
+ if ((r_AL & 0xF) > 9 || r_A == 1) {
+ r_AL = r_AL + 6;
+ r_C = old_C;
+ if (r_AL >= 0x100) r_C = 1;
+ r_A = 1;
+ } else {
+ r_A = 0;
+ }
+ if (old_AL > 0x99 || old_C == 1) {
+ r_AL = r_AL + 0x60;
+ r_C = 1;
+ } else {
+ r_C = 0;
+ }
+ /* O is undefined. S Z and P are set according to the
+ result. */
+ r_AL &= 0xFF;
+ r_O = 0; /* let's say */
+ r_S = (r_AL & 0x80) ? 1 : 0;
+ r_Z = (r_AL == 0) ? 1 : 0;
+ r_P = calc_parity_8bit( r_AL );
+ break;
+ }
+ case 0x2F: { /* DAS */
+ UInt old_AL = r_AL;
+ UInt old_C = r_C;
+ r_C = 0;
+ if ((r_AL & 0xF) > 9 || r_A == 1) {
+ Bool borrow = r_AL < 6;
+ r_AL = r_AL - 6;
+ r_C = old_C;
+ if (borrow) r_C = 1;
+ r_A = 1;
+ } else {
+ r_A = 0;
+ }
+ if (old_AL > 0x99 || old_C == 1) {
+ r_AL = r_AL - 0x60;
+ r_C = 1;
+ } else {
+ /* Intel docs are wrong: r_C = 0; */
+ }
+ /* O is undefined. S Z and P are set according to the
+ result. */
+ r_AL &= 0xFF;
+ r_O = 0; /* let's say */
+ r_S = (r_AL & 0x80) ? 1 : 0;
+ r_Z = (r_AL == 0) ? 1 : 0;
+ r_P = calc_parity_8bit( r_AL );
+ break;
+ }
+ case 0x37: { /* AAA */
+ Bool nudge = r_AL > 0xF9;
+ if ((r_AL & 0xF) > 9 || r_A == 1) {
+ r_AL = r_AL + 6;
+ r_AH = r_AH + 1 + (nudge ? 1 : 0);
+ r_A = 1;
+ r_C = 1;
+ r_AL = r_AL & 0xF;
+ } else {
+ r_A = 0;
+ r_C = 0;
+ r_AL = r_AL & 0xF;
+ }
+ /* O S Z and P are undefined. */
+ r_O = r_S = r_Z = r_P = 0; /* let's say */
+ break;
+ }
+ case 0x3F: { /* AAS */
+ Bool nudge = r_AL < 0x06;
+ if ((r_AL & 0xF) > 9 || r_A == 1) {
+ r_AL = r_AL - 6;
+ r_AH = r_AH - 1 - (nudge ? 1 : 0);
+ r_A = 1;
+ r_C = 1;
+ r_AL = r_AL & 0xF;
+ } else {
+ r_A = 0;
+ r_C = 0;
+ r_AL = r_AL & 0xF;
+ }
+ /* O S Z and P are undefined. */
+ r_O = r_S = r_Z = r_P = 0; /* let's say */
+ break;
+ }
+ default:
+ vassert(0);
+ }
+ result = ( (r_O & 1) << (16 + X86G_CC_SHIFT_O) )
+ | ( (r_S & 1) << (16 + X86G_CC_SHIFT_S) )
+ | ( (r_Z & 1) << (16 + X86G_CC_SHIFT_Z) )
+ | ( (r_A & 1) << (16 + X86G_CC_SHIFT_A) )
+ | ( (r_C & 1) << (16 + X86G_CC_SHIFT_C) )
+ | ( (r_P & 1) << (16 + X86G_CC_SHIFT_P) )
+ | ( (r_AH & 0xFF) << 8 )
+ | ( (r_AL & 0xFF) << 0 );
+ return result;
+}
+
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (non-referentially-transparent) */
+/* Horrible hack. On non-x86 platforms, return 1. */
+ULong x86g_dirtyhelper_RDTSC ( void )
+{
+# if defined(__i386__)
+ ULong res;
+ __asm__ __volatile__("rdtsc" : "=A" (res));
+ return res;
+# else
+ return 1ULL;
+# endif
+}
+
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (modifies guest state) */
+/* Claim to be a P55C (Intel Pentium/MMX) */
+void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st )
+{
+ switch (st->guest_EAX) {
+ case 0:
+ st->guest_EAX = 0x1;
+ st->guest_EBX = 0x756e6547;
+ st->guest_ECX = 0x6c65746e;
+ st->guest_EDX = 0x49656e69;
+ break;
+ default:
+ st->guest_EAX = 0x543;
+ st->guest_EBX = 0x0;
+ st->guest_ECX = 0x0;
+ st->guest_EDX = 0x8001bf;
+ break;
+ }
+}
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (modifies guest state) */
+/* Claim to be the following SSE1-capable CPU:
+ vendor_id : GenuineIntel
+ cpu family : 6
+ model : 11
+ model name : Intel(R) Pentium(R) III CPU family 1133MHz
+ stepping : 1
+ cpu MHz : 1131.013
+ cache size : 512 KB
+*/
+void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st )
+{
+ switch (st->guest_EAX) {
+ case 0:
+ st->guest_EAX = 0x00000002;
+ st->guest_EBX = 0x756e6547;
+ st->guest_ECX = 0x6c65746e;
+ st->guest_EDX = 0x49656e69;
+ break;
+ case 1:
+ st->guest_EAX = 0x000006b1;
+ st->guest_EBX = 0x00000004;
+ st->guest_ECX = 0x00000000;
+ st->guest_EDX = 0x0383fbff;
+ break;
+ default:
+ st->guest_EAX = 0x03020101;
+ st->guest_EBX = 0x00000000;
+ st->guest_ECX = 0x00000000;
+ st->guest_EDX = 0x0c040883;
+ break;
+ }
+}
+
+/* Claim to be the following SSSE3-capable CPU (2 x ...):
+ vendor_id : GenuineIntel
+ cpu family : 6
+ model : 15
+ model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
+ stepping : 6
+ cpu MHz : 2394.000
+ cache size : 4096 KB
+ physical id : 0
+ siblings : 2
+ core id : 0
+ cpu cores : 2
+ fpu : yes
+ fpu_exception : yes
+ cpuid level : 10
+ wp : yes
+ flags : fpu vme de pse tsc msr pae mce cx8 apic sep
+ mtrr pge mca cmov pat pse36 clflush dts acpi
+ mmx fxsr sse sse2 ss ht tm syscall nx lm
+ constant_tsc pni monitor ds_cpl vmx est tm2
+ cx16 xtpr lahf_lm
+ bogomips : 4798.78
+ clflush size : 64
+ cache_alignment : 64
+ address sizes : 36 bits physical, 48 bits virtual
+ power management:
+*/
+void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st )
+{
+# define SET_ABCD(_a,_b,_c,_d) \
+ do { st->guest_EAX = (UInt)(_a); \
+ st->guest_EBX = (UInt)(_b); \
+ st->guest_ECX = (UInt)(_c); \
+ st->guest_EDX = (UInt)(_d); \
+ } while (0)
+
+ switch (st->guest_EAX) {
+ case 0x00000000:
+ SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
+ break;
+ case 0x00000001:
+ SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
+ break;
+ case 0x00000002:
+ SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
+ break;
+ case 0x00000003:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x00000004: {
+ switch (st->guest_ECX) {
+ case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
+ 0x0000003f, 0x00000001); break;
+ case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
+ 0x0000003f, 0x00000001); break;
+ case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
+ 0x00000fff, 0x00000001); break;
+ default: SET_ABCD(0x00000000, 0x00000000,
+ 0x00000000, 0x00000000); break;
+ }
+ break;
+ }
+ case 0x00000005:
+ SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
+ break;
+ case 0x00000006:
+ SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
+ break;
+ case 0x00000007:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x00000008:
+ SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x00000009:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x0000000a:
+ unhandled_eax_value:
+ SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000000:
+ SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000001:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
+ break;
+ case 0x80000002:
+ SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
+ break;
+ case 0x80000003:
+ SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
+ break;
+ case 0x80000004:
+ SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
+ break;
+ case 0x80000005:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000006:
+ SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
+ break;
+ case 0x80000007:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000008:
+ SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ default:
+ goto unhandled_eax_value;
+ }
+# undef SET_ABCD
+}
+
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (non-referentially-transparent) */
+/* Horrible hack. On non-x86 platforms, return 0. */
+UInt x86g_dirtyhelper_IN ( UInt portno, UInt sz/*1,2 or 4*/ )
+{
+# if defined(__i386__)
+ UInt r = 0;
+ portno &= 0xFFFF;
+ switch (sz) {
+ case 4:
+ __asm__ __volatile__("movl $0,%%eax; inl %w1,%0"
+ : "=a" (r) : "Nd" (portno));
+ break;
+ case 2:
+ __asm__ __volatile__("movl $0,%%eax; inw %w1,%w0"
+ : "=a" (r) : "Nd" (portno));
+ break;
+ case 1:
+ __asm__ __volatile__("movl $0,%%eax; inb %w1,%b0"
+ : "=a" (r) : "Nd" (portno));
+ break;
+ default:
+ break;
+ }
+ return r;
+# else
+ return 0;
+# endif
+}
+
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (non-referentially-transparent) */
+/* Horrible hack. On non-x86 platforms, do nothing. */
+void x86g_dirtyhelper_OUT ( UInt portno, UInt data, UInt sz/*1,2 or 4*/ )
+{
+# if defined(__i386__)
+ portno &= 0xFFFF;
+ switch (sz) {
+ case 4:
+ __asm__ __volatile__("outl %0, %w1"
+ : : "a" (data), "Nd" (portno));
+ break;
+ case 2:
+ __asm__ __volatile__("outw %w0, %w1"
+ : : "a" (data), "Nd" (portno));
+ break;
+ case 1:
+ __asm__ __volatile__("outb %b0, %w1"
+ : : "a" (data), "Nd" (portno));
+ break;
+ default:
+ break;
+ }
+# else
+ /* do nothing */
+# endif
+}
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (non-referentially-transparent) */
+/* Horrible hack. On non-x86 platforms, do nothing. */
+/* op = 0: call the native SGDT instruction.
+ op = 1: call the native SIDT instruction.
+*/
+void x86g_dirtyhelper_SxDT ( void *address, UInt op ) {
+# if defined(__i386__)
+ switch (op) {
+ case 0:
+ __asm__ __volatile__("sgdt (%0)" : : "r" (address) : "memory");
+ break;
+ case 1:
+ __asm__ __volatile__("sidt (%0)" : : "r" (address) : "memory");
+ break;
+ default:
+ vpanic("x86g_dirtyhelper_SxDT");
+ }
+# else
+ /* do nothing */
+ UChar* p = (UChar*)address;
+ p[0] = p[1] = p[2] = p[3] = p[4] = p[5] = 0;
+# endif
+}
+
+/*---------------------------------------------------------------*/
+/*--- Helpers for MMX/SSE/SSE2. ---*/
+/*---------------------------------------------------------------*/
+
+static inline UChar abdU8 ( UChar xx, UChar yy ) {
+ return toUChar(xx>yy ? xx-yy : yy-xx);
+}
+
+static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
+ return (((ULong)w1) << 32) | ((ULong)w0);
+}
+
+static inline UShort sel16x4_3 ( ULong w64 ) {
+ UInt hi32 = toUInt(w64 >> 32);
+ return toUShort(hi32 >> 16);
+}
+static inline UShort sel16x4_2 ( ULong w64 ) {
+ UInt hi32 = toUInt(w64 >> 32);
+ return toUShort(hi32);
+}
+static inline UShort sel16x4_1 ( ULong w64 ) {
+ UInt lo32 = toUInt(w64);
+ return toUShort(lo32 >> 16);
+}
+static inline UShort sel16x4_0 ( ULong w64 ) {
+ UInt lo32 = toUInt(w64);
+ return toUShort(lo32);
+}
+
+static inline UChar sel8x8_7 ( ULong w64 ) {
+ UInt hi32 = toUInt(w64 >> 32);
+ return toUChar(hi32 >> 24);
+}
+static inline UChar sel8x8_6 ( ULong w64 ) {
+ UInt hi32 = toUInt(w64 >> 32);
+ return toUChar(hi32 >> 16);
+}
+static inline UChar sel8x8_5 ( ULong w64 ) {
+ UInt hi32 = toUInt(w64 >> 32);
+ return toUChar(hi32 >> 8);
+}
+static inline UChar sel8x8_4 ( ULong w64 ) {
+ UInt hi32 = toUInt(w64 >> 32);
+ return toUChar(hi32 >> 0);
+}
+static inline UChar sel8x8_3 ( ULong w64 ) {
+ UInt lo32 = toUInt(w64);
+ return toUChar(lo32 >> 24);
+}
+static inline UChar sel8x8_2 ( ULong w64 ) {
+ UInt lo32 = toUInt(w64);
+ return toUChar(lo32 >> 16);
+}
+static inline UChar sel8x8_1 ( ULong w64 ) {
+ UInt lo32 = toUInt(w64);
+ return toUChar(lo32 >> 8);
+}
+static inline UChar sel8x8_0 ( ULong w64 ) {
+ UInt lo32 = toUInt(w64);
+ return toUChar(lo32 >> 0);
+}
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
+{
+ return
+ mk32x2(
+ (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
+ + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))),
+ (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
+ + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy)))
+ );
+}
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+UInt x86g_calculate_mmx_pmovmskb ( ULong xx )
+{
+ UInt r = 0;
+ if (xx & (1ULL << (64-1))) r |= (1<<7);
+ if (xx & (1ULL << (56-1))) r |= (1<<6);
+ if (xx & (1ULL << (48-1))) r |= (1<<5);
+ if (xx & (1ULL << (40-1))) r |= (1<<4);
+ if (xx & (1ULL << (32-1))) r |= (1<<3);
+ if (xx & (1ULL << (24-1))) r |= (1<<2);
+ if (xx & (1ULL << (16-1))) r |= (1<<1);
+ if (xx & (1ULL << ( 8-1))) r |= (1<<0);
+ return r;
+}
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+ULong x86g_calculate_mmx_psadbw ( ULong xx, ULong yy )
+{
+ UInt t = 0;
+ t += (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
+ t += (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
+ t += (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
+ t += (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
+ t += (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
+ t += (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
+ t += (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
+ t += (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
+ t &= 0xFFFF;
+ return (ULong)t;
+}
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo )
+{
+ UInt rHi8 = x86g_calculate_mmx_pmovmskb ( w64hi );
+ UInt rLo8 = x86g_calculate_mmx_pmovmskb ( w64lo );
+ return ((rHi8 & 0xFF) << 8) | (rLo8 & 0xFF);
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Helpers for dealing with segment overrides. ---*/
+/*---------------------------------------------------------------*/
+
+static inline
+UInt get_segdescr_base ( VexGuestX86SegDescr* ent )
+{
+ UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.BaseLow;
+ UInt mid = 0xFF & (UInt)ent->LdtEnt.Bits.BaseMid;
+ UInt hi = 0xFF & (UInt)ent->LdtEnt.Bits.BaseHi;
+ return (hi << 24) | (mid << 16) | lo;
+}
+
+static inline
+UInt get_segdescr_limit ( VexGuestX86SegDescr* ent )
+{
+ UInt lo = 0xFFFF & (UInt)ent->LdtEnt.Bits.LimitLow;
+ UInt hi = 0xF & (UInt)ent->LdtEnt.Bits.LimitHi;
+ UInt limit = (hi << 16) | lo;
+ if (ent->LdtEnt.Bits.Granularity)
+ limit = (limit << 12) | 0xFFF;
+ return limit;
+}
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
+ UInt seg_selector, UInt virtual_addr )
+{
+ UInt tiBit, base, limit;
+ VexGuestX86SegDescr* the_descrs;
+
+ Bool verboze = False;
+
+ /* If this isn't true, we're in Big Trouble. */
+ vassert(8 == sizeof(VexGuestX86SegDescr));
+
+ if (verboze)
+ vex_printf("x86h_use_seg_selector: "
+ "seg_selector = 0x%x, vaddr = 0x%x\n",
+ seg_selector, virtual_addr);
+
+ /* Check for wildly invalid selector. */
+ if (seg_selector & ~0xFFFF)
+ goto bad;
+
+ seg_selector &= 0x0000FFFF;
+
+ /* Sanity check the segment selector. Ensure that RPL=11b (least
+ privilege). This forms the bottom 2 bits of the selector. */
+ if ((seg_selector & 3) != 3)
+ goto bad;
+
+ /* Extract the TI bit (0 means GDT, 1 means LDT) */
+ tiBit = (seg_selector >> 2) & 1;
+
+ /* Convert the segment selector onto a table index */
+ seg_selector >>= 3;
+ vassert(seg_selector >= 0 && seg_selector < 8192);
+
+ if (tiBit == 0) {
+
+ /* GDT access. */
+ /* Do we actually have a GDT to look at? */
+ if (gdt == 0)
+ goto bad;
+
+ /* Check for access to non-existent entry. */
+ if (seg_selector >= VEX_GUEST_X86_GDT_NENT)
+ goto bad;
+
+ the_descrs = (VexGuestX86SegDescr*)gdt;
+ base = get_segdescr_base (&the_descrs[seg_selector]);
+ limit = get_segdescr_limit(&the_descrs[seg_selector]);
+
+ } else {
+
+ /* All the same stuff, except for the LDT. */
+ if (ldt == 0)
+ goto bad;
+
+ if (seg_selector >= VEX_GUEST_X86_LDT_NENT)
+ goto bad;
+
+ the_descrs = (VexGuestX86SegDescr*)ldt;
+ base = get_segdescr_base (&the_descrs[seg_selector]);
+ limit = get_segdescr_limit(&the_descrs[seg_selector]);
+
+ }
+
+ /* Do the limit check. Note, this check is just slightly too
+ slack. Really it should be "if (virtual_addr + size - 1 >=
+ limit)," but we don't have the size info to hand. Getting it
+ could be significantly complex. */
+ if (virtual_addr >= limit)
+ goto bad;
+
+ if (verboze)
+ vex_printf("x86h_use_seg_selector: "
+ "base = 0x%x, addr = 0x%x\n",
+ base, base + virtual_addr);
+
+ /* High 32 bits are zero, indicating success. */
+ return (ULong)( ((UInt)virtual_addr) + base );
+
+ bad:
+ return 1ULL << 32;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Helpers for dealing with, and describing, ---*/
+/*--- guest state as a whole. ---*/
+/*---------------------------------------------------------------*/
+
+/* Initialise the entire x86 guest state. */
+/* VISIBLE TO LIBVEX CLIENT */
+void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state )
+{
+ vex_state->guest_EAX = 0;
+ vex_state->guest_ECX = 0;
+ vex_state->guest_EDX = 0;
+ vex_state->guest_EBX = 0;
+ vex_state->guest_ESP = 0;
+ vex_state->guest_EBP = 0;
+ vex_state->guest_ESI = 0;
+ vex_state->guest_EDI = 0;
+
+ vex_state->guest_CC_OP = X86G_CC_OP_COPY;
+ vex_state->guest_CC_DEP1 = 0;
+ vex_state->guest_CC_DEP2 = 0;
+ vex_state->guest_CC_NDEP = 0;
+ vex_state->guest_DFLAG = 1; /* forwards */
+ vex_state->guest_IDFLAG = 0;
+ vex_state->guest_ACFLAG = 0;
+
+ vex_state->guest_EIP = 0;
+
+ /* Initialise the simulated FPU */
+ x86g_dirtyhelper_FINIT( vex_state );
+
+ /* Initialse the SSE state. */
+# define SSEZERO(_xmm) _xmm[0]=_xmm[1]=_xmm[2]=_xmm[3] = 0;
+
+ vex_state->guest_SSEROUND = (UInt)Irrm_NEAREST;
+ SSEZERO(vex_state->guest_XMM0);
+ SSEZERO(vex_state->guest_XMM1);
+ SSEZERO(vex_state->guest_XMM2);
+ SSEZERO(vex_state->guest_XMM3);
+ SSEZERO(vex_state->guest_XMM4);
+ SSEZERO(vex_state->guest_XMM5);
+ SSEZERO(vex_state->guest_XMM6);
+ SSEZERO(vex_state->guest_XMM7);
+
+# undef SSEZERO
+
+ vex_state->guest_CS = 0;
+ vex_state->guest_DS = 0;
+ vex_state->guest_ES = 0;
+ vex_state->guest_FS = 0;
+ vex_state->guest_GS = 0;
+ vex_state->guest_SS = 0;
+ vex_state->guest_LDT = 0;
+ vex_state->guest_GDT = 0;
+
+ vex_state->guest_EMWARN = EmWarn_NONE;
+
+ /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
+ vex_state->guest_TISTART = 0;
+ vex_state->guest_TILEN = 0;
+
+ vex_state->guest_NRADDR = 0;
+ vex_state->guest_SC_CLASS = 0;
+ vex_state->guest_IP_AT_SYSCALL = 0;
+
+ vex_state->padding1 = 0;
+ vex_state->padding2 = 0;
+ vex_state->padding3 = 0;
+}
+
+
+/* Figure out if any part of the guest state contained in minoff
+ .. maxoff requires precise memory exceptions. If in doubt return
+ True (but this is generates significantly slower code).
+
+ By default we enforce precise exns for guest %ESP, %EBP and %EIP
+ only. These are the minimum needed to extract correct stack
+ backtraces from x86 code.
+*/
+Bool guest_x86_state_requires_precise_mem_exns ( Int minoff,
+ Int maxoff)
+{
+ Int ebp_min = offsetof(VexGuestX86State, guest_EBP);
+ Int ebp_max = ebp_min + 4 - 1;
+ Int esp_min = offsetof(VexGuestX86State, guest_ESP);
+ Int esp_max = esp_min + 4 - 1;
+ Int eip_min = offsetof(VexGuestX86State, guest_EIP);
+ Int eip_max = eip_min + 4 - 1;
+
+ if (maxoff < ebp_min || minoff > ebp_max) {
+ /* no overlap with ebp */
+ } else {
+ return True;
+ }
+
+ if (maxoff < esp_min || minoff > esp_max) {
+ /* no overlap with esp */
+ } else {
+ return True;
+ }
+
+ if (maxoff < eip_min || minoff > eip_max) {
+ /* no overlap with eip */
+ } else {
+ return True;
+ }
+
+ return False;
+}
+
+
+#define ALWAYSDEFD(field) \
+ { offsetof(VexGuestX86State, field), \
+ (sizeof ((VexGuestX86State*)0)->field) }
+
+VexGuestLayout
+ x86guest_layout
+ = {
+ /* Total size of the guest state, in bytes. */
+ .total_sizeB = sizeof(VexGuestX86State),
+
+ /* Describe the stack pointer. */
+ .offset_SP = offsetof(VexGuestX86State,guest_ESP),
+ .sizeof_SP = 4,
+
+ /* Describe the frame pointer. */
+ .offset_FP = offsetof(VexGuestX86State,guest_EBP),
+ .sizeof_FP = 4,
+
+ /* Describe the instruction pointer. */
+ .offset_IP = offsetof(VexGuestX86State,guest_EIP),
+ .sizeof_IP = 4,
+
+ /* Describe any sections to be regarded by Memcheck as
+ 'always-defined'. */
+ .n_alwaysDefd = 24,
+
+ /* flags thunk: OP and NDEP are always defd, whereas DEP1
+ and DEP2 have to be tracked. See detailed comment in
+ gdefs.h on meaning of thunk fields. */
+ .alwaysDefd
+ = { /* 0 */ ALWAYSDEFD(guest_CC_OP),
+ /* 1 */ ALWAYSDEFD(guest_CC_NDEP),
+ /* 2 */ ALWAYSDEFD(guest_DFLAG),
+ /* 3 */ ALWAYSDEFD(guest_IDFLAG),
+ /* 4 */ ALWAYSDEFD(guest_ACFLAG),
+ /* 5 */ ALWAYSDEFD(guest_EIP),
+ /* 6 */ ALWAYSDEFD(guest_FTOP),
+ /* 7 */ ALWAYSDEFD(guest_FPTAG),
+ /* 8 */ ALWAYSDEFD(guest_FPROUND),
+ /* 9 */ ALWAYSDEFD(guest_FC3210),
+ /* 10 */ ALWAYSDEFD(guest_CS),
+ /* 11 */ ALWAYSDEFD(guest_DS),
+ /* 12 */ ALWAYSDEFD(guest_ES),
+ /* 13 */ ALWAYSDEFD(guest_FS),
+ /* 14 */ ALWAYSDEFD(guest_GS),
+ /* 15 */ ALWAYSDEFD(guest_SS),
+ /* 16 */ ALWAYSDEFD(guest_LDT),
+ /* 17 */ ALWAYSDEFD(guest_GDT),
+ /* 18 */ ALWAYSDEFD(guest_EMWARN),
+ /* 19 */ ALWAYSDEFD(guest_SSEROUND),
+ /* 20 */ ALWAYSDEFD(guest_TISTART),
+ /* 21 */ ALWAYSDEFD(guest_TILEN),
+ /* 22 */ ALWAYSDEFD(guest_SC_CLASS),
+ /* 23 */ ALWAYSDEFD(guest_IP_AT_SYSCALL)
+ }
+ };
+
+
+/*---------------------------------------------------------------*/
+/*--- end guest_x86_helpers.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/guest_x86_toIR.c b/VEX/priv/guest_x86_toIR.c
new file mode 100644
index 0000000..d03b6f1
--- /dev/null
+++ b/VEX/priv/guest_x86_toIR.c
@@ -0,0 +1,15138 @@
+
+/*--------------------------------------------------------------------*/
+/*--- begin guest_x86_toIR.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+/* Translates x86 code to IR. */
+
+/* TODO:
+
+ All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
+ to ensure a 32-bit value is being written.
+
+ FUCOMI(P): what happens to A and S flags? Currently are forced
+ to zero.
+
+ x87 FP Limitations:
+
+ * all arithmetic done at 64 bits
+
+ * no FP exceptions, except for handling stack over/underflow
+
+ * FP rounding mode observed only for float->int conversions
+ and int->float conversions which could lose accuracy, and
+ for float-to-float rounding. For all other operations,
+ round-to-nearest is used, regardless.
+
+ * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the
+ simulation claims the argument is in-range (-2^63 <= arg <= 2^63)
+ even when it isn't.
+
+ * some of the FCOM cases could do with testing -- not convinced
+ that the args are the right way round.
+
+ * FSAVE does not re-initialise the FPU; it should do
+
+ * FINIT not only initialises the FPU environment, it also
+ zeroes all the FP registers. It should leave the registers
+ unchanged.
+
+ SAHF should cause eflags[1] == 1, and in fact it produces 0. As
+ per Intel docs this bit has no meaning anyway. Since PUSHF is the
+ only way to observe eflags[1], a proper fix would be to make that
+ bit be set by PUSHF.
+
+ The state of %eflags.AC (alignment check, bit 18) is recorded by
+ the simulation (viz, if you set it with popf then a pushf produces
+ the value you set it to), but it is otherwise ignored. In
+ particular, setting it to 1 does NOT cause alignment checking to
+ happen. Programs that set it to 1 and then rely on the resulting
+ SIGBUSs to inform them of misaligned accesses will not work.
+
+ Implementation of sysenter is necessarily partial. sysenter is a
+ kind of system call entry. When doing a sysenter, the return
+ address is not known -- that is something that is beyond Vex's
+ knowledge. So the generated IR forces a return to the scheduler,
+ which can do what it likes to simulate the systenter, but it MUST
+ set this thread's guest_EIP field with the continuation address
+ before resuming execution. If that doesn't happen, the thread will
+ jump to address zero, which is probably fatal.
+
+ This module uses global variables and so is not MT-safe (if that
+ should ever become relevant).
+
+ The delta values are 32-bit ints, not 64-bit ints. That means
+ this module may not work right if run on a 64-bit host. That should
+ be fixed properly, really -- if anyone ever wants to use Vex to
+ translate x86 code for execution on a 64-bit host.
+
+ casLE (implementation of lock-prefixed insns) and rep-prefixed
+ insns: the side-exit back to the start of the insn is done with
+ Ijk_Boring. This is quite wrong, it should be done with
+ Ijk_NoRedir, since otherwise the side exit, which is intended to
+ restart the instruction for whatever reason, could go somewhere
+ entirely else. Doing it right (with Ijk_NoRedir jumps) would make
+ no-redir jumps performance critical, at least for rep-prefixed
+ instructions, since all iterations thereof would involve such a
+ jump. It's not such a big deal with casLE since the side exit is
+ only taken if the CAS fails, that is, the location is contended,
+ which is relatively unlikely.
+
+ XXXX: Nov 2009: handling of SWP on ARM suffers from the same
+ problem.
+
+ Note also, the test for CAS success vs failure is done using
+ Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
+ Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
+ shouldn't definedness-check these comparisons. See
+ COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
+ background/rationale.
+*/
+
+/* Performance holes:
+
+ - fcom ; fstsw %ax ; sahf
+ sahf does not update the O flag (sigh) and so O needs to
+ be computed. This is done expensively; it would be better
+ to have a calculate_eflags_o helper.
+
+ - emwarns; some FP codes can generate huge numbers of these
+ if the fpucw is changed in an inner loop. It would be
+ better for the guest state to have an emwarn-enable reg
+ which can be set zero or nonzero. If it is zero, emwarns
+ are not flagged, and instead control just flows all the
+ way through bbs as usual.
+*/
+
+/* "Special" instructions.
+
+ This instruction decoder can decode three special instructions
+ which mean nothing natively (are no-ops as far as regs/mem are
+ concerned) but have meaning for supporting Valgrind. A special
+ instruction is flagged by the 12-byte preamble C1C703 C1C70D C1C71D
+ C1C713 (in the standard interpretation, that means: roll $3, %edi;
+ roll $13, %edi; roll $29, %edi; roll $19, %edi). Following that,
+ one of the following 3 are allowed (standard interpretation in
+ parentheses):
+
+ 87DB (xchgl %ebx,%ebx) %EDX = client_request ( %EAX )
+ 87C9 (xchgl %ecx,%ecx) %EAX = guest_NRADDR
+ 87D2 (xchgl %edx,%edx) call-noredir *%EAX
+
+ Any other bytes following the 12-byte preamble are illegal and
+ constitute a failure in instruction decoding. This all assumes
+ that the preamble will never occur except in specific code
+ fragments designed for Valgrind to catch.
+
+ No prefixes may precede a "Special" instruction.
+*/
+
+/* LOCK prefixed instructions. These are translated using IR-level
+ CAS statements (IRCAS) and are believed to preserve atomicity, even
+ from the point of view of some other process racing against a
+ simulated one (presumably they communicate via a shared memory
+ segment).
+
+ Handlers which are aware of LOCK prefixes are:
+ dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
+ dis_cmpxchg_G_E (cmpxchg)
+ dis_Grp1 (add, or, adc, sbb, and, sub, xor)
+ dis_Grp3 (not, neg)
+ dis_Grp4 (inc, dec)
+ dis_Grp5 (inc, dec)
+ dis_Grp8_Imm (bts, btc, btr)
+ dis_bt_G_E (bts, btc, btr)
+ dis_xadd_G_E (xadd)
+*/
+
+
+#include "libvex_basictypes.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+#include "libvex_guest_x86.h"
+
+#include "main_util.h"
+#include "main_globals.h"
+#include "guest_generic_bb_to_IR.h"
+#include "guest_generic_x87.h"
+#include "guest_x86_defs.h"
+
+
+/*------------------------------------------------------------*/
+/*--- Globals ---*/
+/*------------------------------------------------------------*/
+
+/* These are set at the start of the translation of an insn, right
+ down in disInstr_X86, so that we don't have to pass them around
+ endlessly. They are all constant during the translation of any
+ given insn. */
+
+/* We need to know this to do sub-register accesses correctly. */
+static Bool host_is_bigendian;
+
+/* Pointer to the guest code area (points to start of BB, not to the
+ insn being processed). */
+static UChar* guest_code;
+
+/* The guest address corresponding to guest_code[0]. */
+static Addr32 guest_EIP_bbstart;
+
+/* The guest address for the instruction currently being
+ translated. */
+static Addr32 guest_EIP_curr_instr;
+
+/* The IRSB* into which we're generating code. */
+static IRSB* irsb;
+
+
+/*------------------------------------------------------------*/
+/*--- Debugging output ---*/
+/*------------------------------------------------------------*/
+
+#define DIP(format, args...) \
+ if (vex_traceflags & VEX_TRACE_FE) \
+ vex_printf(format, ## args)
+
+#define DIS(buf, format, args...) \
+ if (vex_traceflags & VEX_TRACE_FE) \
+ vex_sprintf(buf, format, ## args)
+
+
+/*------------------------------------------------------------*/
+/*--- Offsets of various parts of the x86 guest state. ---*/
+/*------------------------------------------------------------*/
+
+#define OFFB_EAX offsetof(VexGuestX86State,guest_EAX)
+#define OFFB_EBX offsetof(VexGuestX86State,guest_EBX)
+#define OFFB_ECX offsetof(VexGuestX86State,guest_ECX)
+#define OFFB_EDX offsetof(VexGuestX86State,guest_EDX)
+#define OFFB_ESP offsetof(VexGuestX86State,guest_ESP)
+#define OFFB_EBP offsetof(VexGuestX86State,guest_EBP)
+#define OFFB_ESI offsetof(VexGuestX86State,guest_ESI)
+#define OFFB_EDI offsetof(VexGuestX86State,guest_EDI)
+
+#define OFFB_EIP offsetof(VexGuestX86State,guest_EIP)
+
+#define OFFB_CC_OP offsetof(VexGuestX86State,guest_CC_OP)
+#define OFFB_CC_DEP1 offsetof(VexGuestX86State,guest_CC_DEP1)
+#define OFFB_CC_DEP2 offsetof(VexGuestX86State,guest_CC_DEP2)
+#define OFFB_CC_NDEP offsetof(VexGuestX86State,guest_CC_NDEP)
+
+#define OFFB_FPREGS offsetof(VexGuestX86State,guest_FPREG[0])
+#define OFFB_FPTAGS offsetof(VexGuestX86State,guest_FPTAG[0])
+#define OFFB_DFLAG offsetof(VexGuestX86State,guest_DFLAG)
+#define OFFB_IDFLAG offsetof(VexGuestX86State,guest_IDFLAG)
+#define OFFB_ACFLAG offsetof(VexGuestX86State,guest_ACFLAG)
+#define OFFB_FTOP offsetof(VexGuestX86State,guest_FTOP)
+#define OFFB_FC3210 offsetof(VexGuestX86State,guest_FC3210)
+#define OFFB_FPROUND offsetof(VexGuestX86State,guest_FPROUND)
+
+#define OFFB_CS offsetof(VexGuestX86State,guest_CS)
+#define OFFB_DS offsetof(VexGuestX86State,guest_DS)
+#define OFFB_ES offsetof(VexGuestX86State,guest_ES)
+#define OFFB_FS offsetof(VexGuestX86State,guest_FS)
+#define OFFB_GS offsetof(VexGuestX86State,guest_GS)
+#define OFFB_SS offsetof(VexGuestX86State,guest_SS)
+#define OFFB_LDT offsetof(VexGuestX86State,guest_LDT)
+#define OFFB_GDT offsetof(VexGuestX86State,guest_GDT)
+
+#define OFFB_SSEROUND offsetof(VexGuestX86State,guest_SSEROUND)
+#define OFFB_XMM0 offsetof(VexGuestX86State,guest_XMM0)
+#define OFFB_XMM1 offsetof(VexGuestX86State,guest_XMM1)
+#define OFFB_XMM2 offsetof(VexGuestX86State,guest_XMM2)
+#define OFFB_XMM3 offsetof(VexGuestX86State,guest_XMM3)
+#define OFFB_XMM4 offsetof(VexGuestX86State,guest_XMM4)
+#define OFFB_XMM5 offsetof(VexGuestX86State,guest_XMM5)
+#define OFFB_XMM6 offsetof(VexGuestX86State,guest_XMM6)
+#define OFFB_XMM7 offsetof(VexGuestX86State,guest_XMM7)
+
+#define OFFB_EMWARN offsetof(VexGuestX86State,guest_EMWARN)
+
+#define OFFB_TISTART offsetof(VexGuestX86State,guest_TISTART)
+#define OFFB_TILEN offsetof(VexGuestX86State,guest_TILEN)
+#define OFFB_NRADDR offsetof(VexGuestX86State,guest_NRADDR)
+
+#define OFFB_IP_AT_SYSCALL offsetof(VexGuestX86State,guest_IP_AT_SYSCALL)
+
+
+/*------------------------------------------------------------*/
+/*--- Helper bits and pieces for deconstructing the ---*/
+/*--- x86 insn stream. ---*/
+/*------------------------------------------------------------*/
+
+/* This is the Intel register encoding -- integer regs. */
+#define R_EAX 0
+#define R_ECX 1
+#define R_EDX 2
+#define R_EBX 3
+#define R_ESP 4
+#define R_EBP 5
+#define R_ESI 6
+#define R_EDI 7
+
+#define R_AL (0+R_EAX)
+#define R_AH (4+R_EAX)
+
+/* This is the Intel register encoding -- segment regs. */
+#define R_ES 0
+#define R_CS 1
+#define R_SS 2
+#define R_DS 3
+#define R_FS 4
+#define R_GS 5
+
+
+/* Add a statement to the list held by "irbb". */
+static void stmt ( IRStmt* st )
+{
+ addStmtToIRSB( irsb, st );
+}
+
+/* Generate a new temporary of the given type. */
+static IRTemp newTemp ( IRType ty )
+{
+ vassert(isPlausibleIRType(ty));
+ return newIRTemp( irsb->tyenv, ty );
+}
+
+/* Various simple conversions */
+
+static UInt extend_s_8to32( UInt x )
+{
+ return (UInt)((((Int)x) << 24) >> 24);
+}
+
+static UInt extend_s_16to32 ( UInt x )
+{
+ return (UInt)((((Int)x) << 16) >> 16);
+}
+
+/* Fetch a byte from the guest insn stream. */
+static UChar getIByte ( Int delta )
+{
+ return guest_code[delta];
+}
+
+/* Extract the reg field from a modRM byte. */
+static Int gregOfRM ( UChar mod_reg_rm )
+{
+ return (Int)( (mod_reg_rm >> 3) & 7 );
+}
+
+/* Figure out whether the mod and rm parts of a modRM byte refer to a
+ register or memory. If so, the byte will have the form 11XXXYYY,
+ where YYY is the register number. */
+static Bool epartIsReg ( UChar mod_reg_rm )
+{
+ return toBool(0xC0 == (mod_reg_rm & 0xC0));
+}
+
+/* ... and extract the register number ... */
+static Int eregOfRM ( UChar mod_reg_rm )
+{
+ return (Int)(mod_reg_rm & 0x7);
+}
+
+/* Get a 8/16/32-bit unsigned value out of the insn stream. */
+
+static UChar getUChar ( Int delta )
+{
+ UChar v = guest_code[delta+0];
+ return toUChar(v);
+}
+
+static UInt getUDisp16 ( Int delta )
+{
+ UInt v = guest_code[delta+1]; v <<= 8;
+ v |= guest_code[delta+0];
+ return v & 0xFFFF;
+}
+
+static UInt getUDisp32 ( Int delta )
+{
+ UInt v = guest_code[delta+3]; v <<= 8;
+ v |= guest_code[delta+2]; v <<= 8;
+ v |= guest_code[delta+1]; v <<= 8;
+ v |= guest_code[delta+0];
+ return v;
+}
+
+static UInt getUDisp ( Int size, Int delta )
+{
+ switch (size) {
+ case 4: return getUDisp32(delta);
+ case 2: return getUDisp16(delta);
+ case 1: return (UInt)getUChar(delta);
+ default: vpanic("getUDisp(x86)");
+ }
+ return 0; /*notreached*/
+}
+
+
+/* Get a byte value out of the insn stream and sign-extend to 32
+ bits. */
+static UInt getSDisp8 ( Int delta )
+{
+ return extend_s_8to32( (UInt) (guest_code[delta]) );
+}
+
+static UInt getSDisp16 ( Int delta0 )
+{
+ UChar* eip = (UChar*)(&guest_code[delta0]);
+ UInt d = *eip++;
+ d |= ((*eip++) << 8);
+ return extend_s_16to32(d);
+}
+
+static UInt getSDisp ( Int size, Int delta )
+{
+ switch (size) {
+ case 4: return getUDisp32(delta);
+ case 2: return getSDisp16(delta);
+ case 1: return getSDisp8(delta);
+ default: vpanic("getSDisp(x86)");
+ }
+ return 0; /*notreached*/
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Helpers for constructing IR. ---*/
+/*------------------------------------------------------------*/
+
+/* Create a 1/2/4 byte read of an x86 integer registers. For 16/8 bit
+ register references, we need to take the host endianness into
+ account. Supplied value is 0 .. 7 and in the Intel instruction
+ encoding. */
+
+static IRType szToITy ( Int n )
+{
+ switch (n) {
+ case 1: return Ity_I8;
+ case 2: return Ity_I16;
+ case 4: return Ity_I32;
+ default: vpanic("szToITy(x86)");
+ }
+}
+
+/* On a little-endian host, less significant bits of the guest
+ registers are at lower addresses. Therefore, if a reference to a
+ register low half has the safe guest state offset as a reference to
+ the full register.
+*/
+static Int integerGuestRegOffset ( Int sz, UInt archreg )
+{
+ vassert(archreg < 8);
+
+ /* Correct for little-endian host only. */
+ vassert(!host_is_bigendian);
+
+ if (sz == 4 || sz == 2 || (sz == 1 && archreg < 4)) {
+ switch (archreg) {
+ case R_EAX: return OFFB_EAX;
+ case R_EBX: return OFFB_EBX;
+ case R_ECX: return OFFB_ECX;
+ case R_EDX: return OFFB_EDX;
+ case R_ESI: return OFFB_ESI;
+ case R_EDI: return OFFB_EDI;
+ case R_ESP: return OFFB_ESP;
+ case R_EBP: return OFFB_EBP;
+ default: vpanic("integerGuestRegOffset(x86,le)(4,2)");
+ }
+ }
+
+ vassert(archreg >= 4 && archreg < 8 && sz == 1);
+ switch (archreg-4) {
+ case R_EAX: return 1+ OFFB_EAX;
+ case R_EBX: return 1+ OFFB_EBX;
+ case R_ECX: return 1+ OFFB_ECX;
+ case R_EDX: return 1+ OFFB_EDX;
+ default: vpanic("integerGuestRegOffset(x86,le)(1h)");
+ }
+
+ /* NOTREACHED */
+ vpanic("integerGuestRegOffset(x86,le)");
+}
+
+static Int segmentGuestRegOffset ( UInt sreg )
+{
+ switch (sreg) {
+ case R_ES: return OFFB_ES;
+ case R_CS: return OFFB_CS;
+ case R_SS: return OFFB_SS;
+ case R_DS: return OFFB_DS;
+ case R_FS: return OFFB_FS;
+ case R_GS: return OFFB_GS;
+ default: vpanic("segmentGuestRegOffset(x86)");
+ }
+}
+
+static Int xmmGuestRegOffset ( UInt xmmreg )
+{
+ switch (xmmreg) {
+ case 0: return OFFB_XMM0;
+ case 1: return OFFB_XMM1;
+ case 2: return OFFB_XMM2;
+ case 3: return OFFB_XMM3;
+ case 4: return OFFB_XMM4;
+ case 5: return OFFB_XMM5;
+ case 6: return OFFB_XMM6;
+ case 7: return OFFB_XMM7;
+ default: vpanic("xmmGuestRegOffset");
+ }
+}
+
+/* Lanes of vector registers are always numbered from zero being the
+ least significant lane (rightmost in the register). */
+
+static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
+{
+ /* Correct for little-endian host only. */
+ vassert(!host_is_bigendian);
+ vassert(laneno >= 0 && laneno < 8);
+ return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
+}
+
+static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno )
+{
+ /* Correct for little-endian host only. */
+ vassert(!host_is_bigendian);
+ vassert(laneno >= 0 && laneno < 4);
+ return xmmGuestRegOffset( xmmreg ) + 4 * laneno;
+}
+
+static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno )
+{
+ /* Correct for little-endian host only. */
+ vassert(!host_is_bigendian);
+ vassert(laneno >= 0 && laneno < 2);
+ return xmmGuestRegOffset( xmmreg ) + 8 * laneno;
+}
+
+static IRExpr* getIReg ( Int sz, UInt archreg )
+{
+ vassert(sz == 1 || sz == 2 || sz == 4);
+ vassert(archreg < 8);
+ return IRExpr_Get( integerGuestRegOffset(sz,archreg),
+ szToITy(sz) );
+}
+
+/* Ditto, but write to a reg instead. */
+static void putIReg ( Int sz, UInt archreg, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(irsb->tyenv, e);
+ switch (sz) {
+ case 1: vassert(ty == Ity_I8); break;
+ case 2: vassert(ty == Ity_I16); break;
+ case 4: vassert(ty == Ity_I32); break;
+ default: vpanic("putIReg(x86)");
+ }
+ vassert(archreg < 8);
+ stmt( IRStmt_Put(integerGuestRegOffset(sz,archreg), e) );
+}
+
+static IRExpr* getSReg ( UInt sreg )
+{
+ return IRExpr_Get( segmentGuestRegOffset(sreg), Ity_I16 );
+}
+
+static void putSReg ( UInt sreg, IRExpr* e )
+{
+ vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
+ stmt( IRStmt_Put( segmentGuestRegOffset(sreg), e ) );
+}
+
+static IRExpr* getXMMReg ( UInt xmmreg )
+{
+ return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 );
+}
+
+static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno )
+{
+ return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 );
+}
+
+static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno )
+{
+ return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 );
+}
+
+static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno )
+{
+ return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 );
+}
+
+static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno )
+{
+ return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 );
+}
+
+static void putXMMReg ( UInt xmmreg, IRExpr* e )
+{
+ vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
+ stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) );
+}
+
+static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e )
+{
+ vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
+ stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
+}
+
+static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
+{
+ vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
+ stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
+}
+
+static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e )
+{
+ vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
+ stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
+}
+
+static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e )
+{
+ vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
+ stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
+}
+
+static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e )
+{
+ vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
+ stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) );
+}
+
+static void assign ( IRTemp dst, IRExpr* e )
+{
+ stmt( IRStmt_WrTmp(dst, e) );
+}
+
+static void storeLE ( IRExpr* addr, IRExpr* data )
+{
+ stmt( IRStmt_Store(Iend_LE, addr, data) );
+}
+
+static IRExpr* unop ( IROp op, IRExpr* a )
+{
+ return IRExpr_Unop(op, a);
+}
+
+static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
+{
+ return IRExpr_Binop(op, a1, a2);
+}
+
+static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
+{
+ return IRExpr_Triop(op, a1, a2, a3);
+}
+
+static IRExpr* mkexpr ( IRTemp tmp )
+{
+ return IRExpr_RdTmp(tmp);
+}
+
+static IRExpr* mkU8 ( UInt i )
+{
+ vassert(i < 256);
+ return IRExpr_Const(IRConst_U8( (UChar)i ));
+}
+
+static IRExpr* mkU16 ( UInt i )
+{
+ vassert(i < 65536);
+ return IRExpr_Const(IRConst_U16( (UShort)i ));
+}
+
+static IRExpr* mkU32 ( UInt i )
+{
+ return IRExpr_Const(IRConst_U32(i));
+}
+
+static IRExpr* mkU64 ( ULong i )
+{
+ return IRExpr_Const(IRConst_U64(i));
+}
+
+static IRExpr* mkU ( IRType ty, UInt i )
+{
+ if (ty == Ity_I8) return mkU8(i);
+ if (ty == Ity_I16) return mkU16(i);
+ if (ty == Ity_I32) return mkU32(i);
+ /* If this panics, it usually means you passed a size (1,2,4)
+ value as the IRType, rather than a real IRType. */
+ vpanic("mkU(x86)");
+}
+
+static IRExpr* mkV128 ( UShort mask )
+{
+ return IRExpr_Const(IRConst_V128(mask));
+}
+
+static IRExpr* loadLE ( IRType ty, IRExpr* addr )
+{
+ return IRExpr_Load(Iend_LE, ty, addr);
+}
+
+static IROp mkSizedOp ( IRType ty, IROp op8 )
+{
+ Int adj;
+ vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
+ vassert(op8 == Iop_Add8 || op8 == Iop_Sub8
+ || op8 == Iop_Mul8
+ || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8
+ || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8
+ || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8
+ || op8 == Iop_CasCmpNE8
+ || op8 == Iop_Not8);
+ adj = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
+ return adj + op8;
+}
+
+static IROp mkWidenOp ( Int szSmall, Int szBig, Bool signd )
+{
+ if (szSmall == 1 && szBig == 4) {
+ return signd ? Iop_8Sto32 : Iop_8Uto32;
+ }
+ if (szSmall == 1 && szBig == 2) {
+ return signd ? Iop_8Sto16 : Iop_8Uto16;
+ }
+ if (szSmall == 2 && szBig == 4) {
+ return signd ? Iop_16Sto32 : Iop_16Uto32;
+ }
+ vpanic("mkWidenOp(x86,guest)");
+}
+
+static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y )
+{
+ vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1);
+ vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1);
+ return unop(Iop_32to1,
+ binop(Iop_And32,
+ unop(Iop_1Uto32,x),
+ unop(Iop_1Uto32,y)));
+}
+
+/* Generate a compare-and-swap operation, operating on memory at
+ 'addr'. The expected value is 'expVal' and the new value is
+ 'newVal'. If the operation fails, then transfer control (with a
+ no-redir jump (XXX no -- see comment at top of this file)) to
+ 'restart_point', which is presumably the address of the guest
+ instruction again -- retrying, essentially. */
+static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
+ Addr32 restart_point )
+{
+ IRCAS* cas;
+ IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
+ IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
+ IRTemp oldTmp = newTemp(tyE);
+ IRTemp expTmp = newTemp(tyE);
+ vassert(tyE == tyN);
+ vassert(tyE == Ity_I32 || tyE == Ity_I16 || tyE == Ity_I8);
+ assign(expTmp, expVal);
+ cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
+ NULL, mkexpr(expTmp), NULL, newVal );
+ stmt( IRStmt_CAS(cas) );
+ stmt( IRStmt_Exit(
+ binop( mkSizedOp(tyE,Iop_CasCmpNE8),
+ mkexpr(oldTmp), mkexpr(expTmp) ),
+ Ijk_Boring, /*Ijk_NoRedir*/
+ IRConst_U32( restart_point )
+ ));
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Helpers for %eflags. ---*/
+/*------------------------------------------------------------*/
+
+/* -------------- Evaluating the flags-thunk. -------------- */
+
+/* Build IR to calculate all the eflags from stored
+ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
+ Ity_I32. */
+static IRExpr* mk_x86g_calculate_eflags_all ( void )
+{
+ IRExpr** args
+ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
+ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
+ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
+ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
+ IRExpr* call
+ = mkIRExprCCall(
+ Ity_I32,
+ 0/*regparm*/,
+ "x86g_calculate_eflags_all", &x86g_calculate_eflags_all,
+ args
+ );
+ /* Exclude OP and NDEP from definedness checking. We're only
+ interested in DEP1 and DEP2. */
+ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
+ return call;
+}
+
+/* Build IR to calculate some particular condition from stored
+ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
+ Ity_Bit. */
+static IRExpr* mk_x86g_calculate_condition ( X86Condcode cond )
+{
+ IRExpr** args
+ = mkIRExprVec_5( mkU32(cond),
+ IRExpr_Get(OFFB_CC_OP, Ity_I32),
+ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
+ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
+ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
+ IRExpr* call
+ = mkIRExprCCall(
+ Ity_I32,
+ 0/*regparm*/,
+ "x86g_calculate_condition", &x86g_calculate_condition,
+ args
+ );
+ /* Exclude the requested condition, OP and NDEP from definedness
+ checking. We're only interested in DEP1 and DEP2. */
+ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4);
+ return unop(Iop_32to1, call);
+}
+
+/* Build IR to calculate just the carry flag from stored
+ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I32. */
+static IRExpr* mk_x86g_calculate_eflags_c ( void )
+{
+ IRExpr** args
+ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
+ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
+ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
+ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
+ IRExpr* call
+ = mkIRExprCCall(
+ Ity_I32,
+ 3/*regparm*/,
+ "x86g_calculate_eflags_c", &x86g_calculate_eflags_c,
+ args
+ );
+ /* Exclude OP and NDEP from definedness checking. We're only
+ interested in DEP1 and DEP2. */
+ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
+ return call;
+}
+
+
+/* -------------- Building the flags-thunk. -------------- */
+
+/* The machinery in this section builds the flag-thunk following a
+ flag-setting operation. Hence the various setFlags_* functions.
+*/
+
+static Bool isAddSub ( IROp op8 )
+{
+ return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8);
+}
+
+static Bool isLogic ( IROp op8 )
+{
+ return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8);
+}
+
+/* U-widen 8/16/32 bit int expr to 32. */
+static IRExpr* widenUto32 ( IRExpr* e )
+{
+ switch (typeOfIRExpr(irsb->tyenv,e)) {
+ case Ity_I32: return e;
+ case Ity_I16: return unop(Iop_16Uto32,e);
+ case Ity_I8: return unop(Iop_8Uto32,e);
+ default: vpanic("widenUto32");
+ }
+}
+
+/* S-widen 8/16/32 bit int expr to 32. */
+static IRExpr* widenSto32 ( IRExpr* e )
+{
+ switch (typeOfIRExpr(irsb->tyenv,e)) {
+ case Ity_I32: return e;
+ case Ity_I16: return unop(Iop_16Sto32,e);
+ case Ity_I8: return unop(Iop_8Sto32,e);
+ default: vpanic("widenSto32");
+ }
+}
+
+/* Narrow 8/16/32 bit int expr to 8/16/32. Clearly only some
+ of these combinations make sense. */
+static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e )
+{
+ IRType src_ty = typeOfIRExpr(irsb->tyenv,e);
+ if (src_ty == dst_ty)
+ return e;
+ if (src_ty == Ity_I32 && dst_ty == Ity_I16)
+ return unop(Iop_32to16, e);
+ if (src_ty == Ity_I32 && dst_ty == Ity_I8)
+ return unop(Iop_32to8, e);
+
+ vex_printf("\nsrc, dst tys are: ");
+ ppIRType(src_ty);
+ vex_printf(", ");
+ ppIRType(dst_ty);
+ vex_printf("\n");
+ vpanic("narrowTo(x86)");
+}
+
+
+/* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
+ auto-sized up to the real op. */
+
+static
+void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty )
+{
+ Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
+
+ vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
+
+ switch (op8) {
+ case Iop_Add8: ccOp += X86G_CC_OP_ADDB; break;
+ case Iop_Sub8: ccOp += X86G_CC_OP_SUBB; break;
+ default: ppIROp(op8);
+ vpanic("setFlags_DEP1_DEP2(x86)");
+ }
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) );
+ stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) );
+ stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(dep2))) );
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+}
+
+
+/* Set the OP and DEP1 fields only, and write zero to DEP2. */
+
+static
+void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty )
+{
+ Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
+
+ vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
+
+ switch (op8) {
+ case Iop_Or8:
+ case Iop_And8:
+ case Iop_Xor8: ccOp += X86G_CC_OP_LOGICB; break;
+ default: ppIROp(op8);
+ vpanic("setFlags_DEP1(x86)");
+ }
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) );
+ stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(dep1))) );
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) );
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+}
+
+
+/* For shift operations, we put in the result and the undershifted
+ result. Except if the shift amount is zero, the thunk is left
+ unchanged. */
+
+static void setFlags_DEP1_DEP2_shift ( IROp op32,
+ IRTemp res,
+ IRTemp resUS,
+ IRType ty,
+ IRTemp guard )
+{
+ Int ccOp = ty==Ity_I8 ? 2 : (ty==Ity_I16 ? 1 : 0);
+
+ vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
+ vassert(guard);
+
+ /* Both kinds of right shifts are handled by the same thunk
+ operation. */
+ switch (op32) {
+ case Iop_Shr32:
+ case Iop_Sar32: ccOp = X86G_CC_OP_SHRL - ccOp; break;
+ case Iop_Shl32: ccOp = X86G_CC_OP_SHLL - ccOp; break;
+ default: ppIROp(op32);
+ vpanic("setFlags_DEP1_DEP2_shift(x86)");
+ }
+
+ /* DEP1 contains the result, DEP2 contains the undershifted value. */
+ stmt( IRStmt_Put( OFFB_CC_OP,
+ IRExpr_Mux0X( mkexpr(guard),
+ IRExpr_Get(OFFB_CC_OP,Ity_I32),
+ mkU32(ccOp))) );
+ stmt( IRStmt_Put( OFFB_CC_DEP1,
+ IRExpr_Mux0X( mkexpr(guard),
+ IRExpr_Get(OFFB_CC_DEP1,Ity_I32),
+ widenUto32(mkexpr(res)))) );
+ stmt( IRStmt_Put( OFFB_CC_DEP2,
+ IRExpr_Mux0X( mkexpr(guard),
+ IRExpr_Get(OFFB_CC_DEP2,Ity_I32),
+ widenUto32(mkexpr(resUS)))) );
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+}
+
+
+/* For the inc/dec case, we store in DEP1 the result value and in NDEP
+ the former value of the carry flag, which unfortunately we have to
+ compute. */
+
+static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
+{
+ Int ccOp = inc ? X86G_CC_OP_INCB : X86G_CC_OP_DECB;
+
+ ccOp += ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
+ vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
+
+ /* This has to come first, because calculating the C flag
+ may require reading all four thunk fields. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mk_x86g_calculate_eflags_c()) );
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(ccOp)) );
+ stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(res))) );
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0)) );
+}
+
+
+/* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
+ two arguments. */
+
+static
+void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, UInt base_op )
+{
+ switch (ty) {
+ case Ity_I8:
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+0) ) );
+ break;
+ case Ity_I16:
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+1) ) );
+ break;
+ case Ity_I32:
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(base_op+2) ) );
+ break;
+ default:
+ vpanic("setFlags_MUL(x86)");
+ }
+ stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(arg1)) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(mkexpr(arg2)) ));
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+}
+
+
+/* -------------- Condition codes. -------------- */
+
+/* Condition codes, using the Intel encoding. */
+
+static HChar* name_X86Condcode ( X86Condcode cond )
+{
+ switch (cond) {
+ case X86CondO: return "o";
+ case X86CondNO: return "no";
+ case X86CondB: return "b";
+ case X86CondNB: return "nb";
+ case X86CondZ: return "z";
+ case X86CondNZ: return "nz";
+ case X86CondBE: return "be";
+ case X86CondNBE: return "nbe";
+ case X86CondS: return "s";
+ case X86CondNS: return "ns";
+ case X86CondP: return "p";
+ case X86CondNP: return "np";
+ case X86CondL: return "l";
+ case X86CondNL: return "nl";
+ case X86CondLE: return "le";
+ case X86CondNLE: return "nle";
+ case X86CondAlways: return "ALWAYS";
+ default: vpanic("name_X86Condcode");
+ }
+}
+
+static
+X86Condcode positiveIse_X86Condcode ( X86Condcode cond,
+ Bool* needInvert )
+{
+ vassert(cond >= X86CondO && cond <= X86CondNLE);
+ if (cond & 1) {
+ *needInvert = True;
+ return cond-1;
+ } else {
+ *needInvert = False;
+ return cond;
+ }
+}
+
+
+/* -------------- Helpers for ADD/SUB with carry. -------------- */
+
+/* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
+ appropriately.
+
+ Optionally, generate a store for the 'tres' value. This can either
+ be a normal store, or it can be a cas-with-possible-failure style
+ store:
+
+ if taddr is IRTemp_INVALID, then no store is generated.
+
+ if taddr is not IRTemp_INVALID, then a store (using taddr as
+ the address) is generated:
+
+ if texpVal is IRTemp_INVALID then a normal store is
+ generated, and restart_point must be zero (it is irrelevant).
+
+ if texpVal is not IRTemp_INVALID then a cas-style store is
+ generated. texpVal is the expected value, restart_point
+ is the restart point if the store fails, and texpVal must
+ have the same type as tres.
+*/
+static void helper_ADC ( Int sz,
+ IRTemp tres, IRTemp ta1, IRTemp ta2,
+ /* info about optional store: */
+ IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
+{
+ UInt thunkOp;
+ IRType ty = szToITy(sz);
+ IRTemp oldc = newTemp(Ity_I32);
+ IRTemp oldcn = newTemp(ty);
+ IROp plus = mkSizedOp(ty, Iop_Add8);
+ IROp xor = mkSizedOp(ty, Iop_Xor8);
+
+ vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
+ vassert(sz == 1 || sz == 2 || sz == 4);
+ thunkOp = sz==4 ? X86G_CC_OP_ADCL
+ : (sz==2 ? X86G_CC_OP_ADCW : X86G_CC_OP_ADCB);
+
+ /* oldc = old carry flag, 0 or 1 */
+ assign( oldc, binop(Iop_And32,
+ mk_x86g_calculate_eflags_c(),
+ mkU32(1)) );
+
+ assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
+
+ assign( tres, binop(plus,
+ binop(plus,mkexpr(ta1),mkexpr(ta2)),
+ mkexpr(oldcn)) );
+
+ /* Possibly generate a store of 'tres' to 'taddr'. See comment at
+ start of this function. */
+ if (taddr != IRTemp_INVALID) {
+ if (texpVal == IRTemp_INVALID) {
+ vassert(restart_point == 0);
+ storeLE( mkexpr(taddr), mkexpr(tres) );
+ } else {
+ vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
+ /* .. and hence 'texpVal' has the same type as 'tres'. */
+ casLE( mkexpr(taddr),
+ mkexpr(texpVal), mkexpr(tres), restart_point );
+ }
+ }
+
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) );
+ stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1)) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2),
+ mkexpr(oldcn)) )) );
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
+}
+
+
+/* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
+ appropriately. As with helper_ADC, possibly generate a store of
+ the result -- see comments on helper_ADC for details.
+*/
+static void helper_SBB ( Int sz,
+ IRTemp tres, IRTemp ta1, IRTemp ta2,
+ /* info about optional store: */
+ IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
+{
+ UInt thunkOp;
+ IRType ty = szToITy(sz);
+ IRTemp oldc = newTemp(Ity_I32);
+ IRTemp oldcn = newTemp(ty);
+ IROp minus = mkSizedOp(ty, Iop_Sub8);
+ IROp xor = mkSizedOp(ty, Iop_Xor8);
+
+ vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
+ vassert(sz == 1 || sz == 2 || sz == 4);
+ thunkOp = sz==4 ? X86G_CC_OP_SBBL
+ : (sz==2 ? X86G_CC_OP_SBBW : X86G_CC_OP_SBBB);
+
+ /* oldc = old carry flag, 0 or 1 */
+ assign( oldc, binop(Iop_And32,
+ mk_x86g_calculate_eflags_c(),
+ mkU32(1)) );
+
+ assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
+
+ assign( tres, binop(minus,
+ binop(minus,mkexpr(ta1),mkexpr(ta2)),
+ mkexpr(oldcn)) );
+
+ /* Possibly generate a store of 'tres' to 'taddr'. See comment at
+ start of this function. */
+ if (taddr != IRTemp_INVALID) {
+ if (texpVal == IRTemp_INVALID) {
+ vassert(restart_point == 0);
+ storeLE( mkexpr(taddr), mkexpr(tres) );
+ } else {
+ vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
+ /* .. and hence 'texpVal' has the same type as 'tres'. */
+ casLE( mkexpr(taddr),
+ mkexpr(texpVal), mkexpr(tres), restart_point );
+ }
+ }
+
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(thunkOp) ) );
+ stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto32(mkexpr(ta1) )) );
+ stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto32(binop(xor, mkexpr(ta2),
+ mkexpr(oldcn)) )) );
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
+}
+
+
+/* -------------- Helpers for disassembly printing. -------------- */
+
+static HChar* nameGrp1 ( Int opc_aux )
+{
+ static HChar* grp1_names[8]
+ = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
+ if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(x86)");
+ return grp1_names[opc_aux];
+}
+
+static HChar* nameGrp2 ( Int opc_aux )
+{
+ static HChar* grp2_names[8]
+ = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
+ if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(x86)");
+ return grp2_names[opc_aux];
+}
+
+static HChar* nameGrp4 ( Int opc_aux )
+{
+ static HChar* grp4_names[8]
+ = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
+ if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(x86)");
+ return grp4_names[opc_aux];
+}
+
+static HChar* nameGrp5 ( Int opc_aux )
+{
+ static HChar* grp5_names[8]
+ = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
+ if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(x86)");
+ return grp5_names[opc_aux];
+}
+
+static HChar* nameGrp8 ( Int opc_aux )
+{
+ static HChar* grp8_names[8]
+ = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
+ if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(x86)");
+ return grp8_names[opc_aux];
+}
+
+static HChar* nameIReg ( Int size, Int reg )
+{
+ static HChar* ireg32_names[8]
+ = { "%eax", "%ecx", "%edx", "%ebx",
+ "%esp", "%ebp", "%esi", "%edi" };
+ static HChar* ireg16_names[8]
+ = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di" };
+ static HChar* ireg8_names[8]
+ = { "%al", "%cl", "%dl", "%bl",
+ "%ah{sp}", "%ch{bp}", "%dh{si}", "%bh{di}" };
+ if (reg < 0 || reg > 7) goto bad;
+ switch (size) {
+ case 4: return ireg32_names[reg];
+ case 2: return ireg16_names[reg];
+ case 1: return ireg8_names[reg];
+ }
+ bad:
+ vpanic("nameIReg(X86)");
+ return NULL; /*notreached*/
+}
+
+static HChar* nameSReg ( UInt sreg )
+{
+ switch (sreg) {
+ case R_ES: return "%es";
+ case R_CS: return "%cs";
+ case R_SS: return "%ss";
+ case R_DS: return "%ds";
+ case R_FS: return "%fs";
+ case R_GS: return "%gs";
+ default: vpanic("nameSReg(x86)");
+ }
+}
+
+static HChar* nameMMXReg ( Int mmxreg )
+{
+ static HChar* mmx_names[8]
+ = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
+ if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(x86,guest)");
+ return mmx_names[mmxreg];
+}
+
+static HChar* nameXMMReg ( Int xmmreg )
+{
+ static HChar* xmm_names[8]
+ = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
+ "%xmm4", "%xmm5", "%xmm6", "%xmm7" };
+ if (xmmreg < 0 || xmmreg > 7) vpanic("name_of_xmm_reg");
+ return xmm_names[xmmreg];
+}
+
+static HChar* nameMMXGran ( Int gran )
+{
+ switch (gran) {
+ case 0: return "b";
+ case 1: return "w";
+ case 2: return "d";
+ case 3: return "q";
+ default: vpanic("nameMMXGran(x86,guest)");
+ }
+}
+
+static HChar nameISize ( Int size )
+{
+ switch (size) {
+ case 4: return 'l';
+ case 2: return 'w';
+ case 1: return 'b';
+ default: vpanic("nameISize(x86)");
+ }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- JMP helpers ---*/
+/*------------------------------------------------------------*/
+
+static void jmp_lit( IRJumpKind kind, Addr32 d32 )
+{
+ irsb->next = mkU32(d32);
+ irsb->jumpkind = kind;
+}
+
+static void jmp_treg( IRJumpKind kind, IRTemp t )
+{
+ irsb->next = mkexpr(t);
+ irsb->jumpkind = kind;
+}
+
+static
+void jcc_01( X86Condcode cond, Addr32 d32_false, Addr32 d32_true )
+{
+ Bool invert;
+ X86Condcode condPos;
+ condPos = positiveIse_X86Condcode ( cond, &invert );
+ if (invert) {
+ stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos),
+ Ijk_Boring,
+ IRConst_U32(d32_false) ) );
+ irsb->next = mkU32(d32_true);
+ irsb->jumpkind = Ijk_Boring;
+ } else {
+ stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos),
+ Ijk_Boring,
+ IRConst_U32(d32_true) ) );
+ irsb->next = mkU32(d32_false);
+ irsb->jumpkind = Ijk_Boring;
+ }
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Disassembling addressing modes ---*/
+/*------------------------------------------------------------*/
+
+static
+HChar* sorbTxt ( UChar sorb )
+{
+ switch (sorb) {
+ case 0: return ""; /* no override */
+ case 0x3E: return "%ds";
+ case 0x26: return "%es:";
+ case 0x64: return "%fs:";
+ case 0x65: return "%gs:";
+ default: vpanic("sorbTxt(x86,guest)");
+ }
+}
+
+
+/* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
+ linear address by adding any required segment override as indicated
+ by sorb. */
+static
+IRExpr* handleSegOverride ( UChar sorb, IRExpr* virtual )
+{
+ Int sreg;
+ IRType hWordTy;
+ IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
+
+ if (sorb == 0)
+ /* the common case - no override */
+ return virtual;
+
+ switch (sorb) {
+ case 0x3E: sreg = R_DS; break;
+ case 0x26: sreg = R_ES; break;
+ case 0x64: sreg = R_FS; break;
+ case 0x65: sreg = R_GS; break;
+ default: vpanic("handleSegOverride(x86,guest)");
+ }
+
+ hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
+
+ seg_selector = newTemp(Ity_I32);
+ ldt_ptr = newTemp(hWordTy);
+ gdt_ptr = newTemp(hWordTy);
+ r64 = newTemp(Ity_I64);
+
+ assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
+ assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
+ assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
+
+ /*
+ Call this to do the translation and limit checks:
+ ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
+ UInt seg_selector, UInt virtual_addr )
+ */
+ assign(
+ r64,
+ mkIRExprCCall(
+ Ity_I64,
+ 0/*regparms*/,
+ "x86g_use_seg_selector",
+ &x86g_use_seg_selector,
+ mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
+ mkexpr(seg_selector), virtual)
+ )
+ );
+
+ /* If the high 32 of the result are non-zero, there was a
+ failure in address translation. In which case, make a
+ quick exit.
+ */
+ stmt(
+ IRStmt_Exit(
+ binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
+ Ijk_MapFail,
+ IRConst_U32( guest_EIP_curr_instr )
+ )
+ );
+
+ /* otherwise, here's the translated result. */
+ return unop(Iop_64to32, mkexpr(r64));
+}
+
+
+/* Generate IR to calculate an address indicated by a ModRM and
+ following SIB bytes. The expression, and the number of bytes in
+ the address mode, are returned. Note that this fn should not be
+ called if the R/M part of the address denotes a register instead of
+ memory. If print_codegen is true, text of the addressing mode is
+ placed in buf.
+
+ The computed address is stored in a new tempreg, and the
+ identity of the tempreg is returned. */
+
+static IRTemp disAMode_copy2tmp ( IRExpr* addr32 )
+{
+ IRTemp tmp = newTemp(Ity_I32);
+ assign( tmp, addr32 );
+ return tmp;
+}
+
+static
+IRTemp disAMode ( Int* len, UChar sorb, Int delta, HChar* buf )
+{
+ UChar mod_reg_rm = getIByte(delta);
+ delta++;
+
+ buf[0] = (UChar)0;
+
+ /* squeeze out the reg field from mod_reg_rm, since a 256-entry
+ jump table seems a bit excessive.
+ */
+ mod_reg_rm &= 0xC7; /* is now XX000YYY */
+ mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
+ /* is now XX0XXYYY */
+ mod_reg_rm &= 0x1F; /* is now 000XXYYY */
+ switch (mod_reg_rm) {
+
+ /* (%eax) .. (%edi), not including (%esp) or (%ebp).
+ --> GET %reg, t
+ */
+ case 0x00: case 0x01: case 0x02: case 0x03:
+ /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
+ { UChar rm = mod_reg_rm;
+ DIS(buf, "%s(%s)", sorbTxt(sorb), nameIReg(4,rm));
+ *len = 1;
+ return disAMode_copy2tmp(
+ handleSegOverride(sorb, getIReg(4,rm)));
+ }
+
+ /* d8(%eax) ... d8(%edi), not including d8(%esp)
+ --> GET %reg, t ; ADDL d8, t
+ */
+ case 0x08: case 0x09: case 0x0A: case 0x0B:
+ /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
+ { UChar rm = toUChar(mod_reg_rm & 7);
+ UInt d = getSDisp8(delta);
+ DIS(buf, "%s%d(%s)", sorbTxt(sorb), (Int)d, nameIReg(4,rm));
+ *len = 2;
+ return disAMode_copy2tmp(
+ handleSegOverride(sorb,
+ binop(Iop_Add32,getIReg(4,rm),mkU32(d))));
+ }
+
+ /* d32(%eax) ... d32(%edi), not including d32(%esp)
+ --> GET %reg, t ; ADDL d8, t
+ */
+ case 0x10: case 0x11: case 0x12: case 0x13:
+ /* ! 14 */ case 0x15: case 0x16: case 0x17:
+ { UChar rm = toUChar(mod_reg_rm & 7);
+ UInt d = getUDisp32(delta);
+ DIS(buf, "%s0x%x(%s)", sorbTxt(sorb), (Int)d, nameIReg(4,rm));
+ *len = 5;
+ return disAMode_copy2tmp(
+ handleSegOverride(sorb,
+ binop(Iop_Add32,getIReg(4,rm),mkU32(d))));
+ }
+
+ /* a register, %eax .. %edi. This shouldn't happen. */
+ case 0x18: case 0x19: case 0x1A: case 0x1B:
+ case 0x1C: case 0x1D: case 0x1E: case 0x1F:
+ vpanic("disAMode(x86): not an addr!");
+
+ /* a 32-bit literal address
+ --> MOV d32, tmp
+ */
+ case 0x05:
+ { UInt d = getUDisp32(delta);
+ *len = 5;
+ DIS(buf, "%s(0x%x)", sorbTxt(sorb), d);
+ return disAMode_copy2tmp(
+ handleSegOverride(sorb, mkU32(d)));
+ }
+
+ case 0x04: {
+ /* SIB, with no displacement. Special cases:
+ -- %esp cannot act as an index value.
+ If index_r indicates %esp, zero is used for the index.
+ -- when mod is zero and base indicates EBP, base is instead
+ a 32-bit literal.
+ It's all madness, I tell you. Extract %index, %base and
+ scale from the SIB byte. The value denoted is then:
+ | %index == %ESP && %base == %EBP
+ = d32 following SIB byte
+ | %index == %ESP && %base != %EBP
+ = %base
+ | %index != %ESP && %base == %EBP
+ = d32 following SIB byte + (%index << scale)
+ | %index != %ESP && %base != %ESP
+ = %base + (%index << scale)
+
+ What happens to the souls of CPU architects who dream up such
+ horrendous schemes, do you suppose?
+ */
+ UChar sib = getIByte(delta);
+ UChar scale = toUChar((sib >> 6) & 3);
+ UChar index_r = toUChar((sib >> 3) & 7);
+ UChar base_r = toUChar(sib & 7);
+ delta++;
+
+ if (index_r != R_ESP && base_r != R_EBP) {
+ DIS(buf, "%s(%s,%s,%d)", sorbTxt(sorb),
+ nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale);
+ *len = 2;
+ return
+ disAMode_copy2tmp(
+ handleSegOverride(sorb,
+ binop(Iop_Add32,
+ getIReg(4,base_r),
+ binop(Iop_Shl32, getIReg(4,index_r),
+ mkU8(scale)))));
+ }
+
+ if (index_r != R_ESP && base_r == R_EBP) {
+ UInt d = getUDisp32(delta);
+ DIS(buf, "%s0x%x(,%s,%d)", sorbTxt(sorb), d,
+ nameIReg(4,index_r), 1<<scale);
+ *len = 6;
+ return
+ disAMode_copy2tmp(
+ handleSegOverride(sorb,
+ binop(Iop_Add32,
+ binop(Iop_Shl32, getIReg(4,index_r), mkU8(scale)),
+ mkU32(d))));
+ }
+
+ if (index_r == R_ESP && base_r != R_EBP) {
+ DIS(buf, "%s(%s,,)", sorbTxt(sorb), nameIReg(4,base_r));
+ *len = 2;
+ return disAMode_copy2tmp(
+ handleSegOverride(sorb, getIReg(4,base_r)));
+ }
+
+ if (index_r == R_ESP && base_r == R_EBP) {
+ UInt d = getUDisp32(delta);
+ DIS(buf, "%s0x%x(,,)", sorbTxt(sorb), d);
+ *len = 6;
+ return disAMode_copy2tmp(
+ handleSegOverride(sorb, mkU32(d)));
+ }
+ /*NOTREACHED*/
+ vassert(0);
+ }
+
+ /* SIB, with 8-bit displacement. Special cases:
+ -- %esp cannot act as an index value.
+ If index_r indicates %esp, zero is used for the index.
+ Denoted value is:
+ | %index == %ESP
+ = d8 + %base
+ | %index != %ESP
+ = d8 + %base + (%index << scale)
+ */
+ case 0x0C: {
+ UChar sib = getIByte(delta);
+ UChar scale = toUChar((sib >> 6) & 3);
+ UChar index_r = toUChar((sib >> 3) & 7);
+ UChar base_r = toUChar(sib & 7);
+ UInt d = getSDisp8(delta+1);
+
+ if (index_r == R_ESP) {
+ DIS(buf, "%s%d(%s,,)", sorbTxt(sorb),
+ (Int)d, nameIReg(4,base_r));
+ *len = 3;
+ return disAMode_copy2tmp(
+ handleSegOverride(sorb,
+ binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) ));
+ } else {
+ DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d,
+ nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale);
+ *len = 3;
+ return
+ disAMode_copy2tmp(
+ handleSegOverride(sorb,
+ binop(Iop_Add32,
+ binop(Iop_Add32,
+ getIReg(4,base_r),
+ binop(Iop_Shl32,
+ getIReg(4,index_r), mkU8(scale))),
+ mkU32(d))));
+ }
+ /*NOTREACHED*/
+ vassert(0);
+ }
+
+ /* SIB, with 32-bit displacement. Special cases:
+ -- %esp cannot act as an index value.
+ If index_r indicates %esp, zero is used for the index.
+ Denoted value is:
+ | %index == %ESP
+ = d32 + %base
+ | %index != %ESP
+ = d32 + %base + (%index << scale)
+ */
+ case 0x14: {
+ UChar sib = getIByte(delta);
+ UChar scale = toUChar((sib >> 6) & 3);
+ UChar index_r = toUChar((sib >> 3) & 7);
+ UChar base_r = toUChar(sib & 7);
+ UInt d = getUDisp32(delta+1);
+
+ if (index_r == R_ESP) {
+ DIS(buf, "%s%d(%s,,)", sorbTxt(sorb),
+ (Int)d, nameIReg(4,base_r));
+ *len = 6;
+ return disAMode_copy2tmp(
+ handleSegOverride(sorb,
+ binop(Iop_Add32, getIReg(4,base_r), mkU32(d)) ));
+ } else {
+ DIS(buf, "%s%d(%s,%s,%d)", sorbTxt(sorb), (Int)d,
+ nameIReg(4,base_r), nameIReg(4,index_r), 1<<scale);
+ *len = 6;
+ return
+ disAMode_copy2tmp(
+ handleSegOverride(sorb,
+ binop(Iop_Add32,
+ binop(Iop_Add32,
+ getIReg(4,base_r),
+ binop(Iop_Shl32,
+ getIReg(4,index_r), mkU8(scale))),
+ mkU32(d))));
+ }
+ /*NOTREACHED*/
+ vassert(0);
+ }
+
+ default:
+ vpanic("disAMode(x86)");
+ return 0; /*notreached*/
+ }
+}
+
+
+/* Figure out the number of (insn-stream) bytes constituting the amode
+ beginning at delta. Is useful for getting hold of literals beyond
+ the end of the amode before it has been disassembled. */
+
+static UInt lengthAMode ( Int delta )
+{
+ UChar mod_reg_rm = getIByte(delta); delta++;
+
+ /* squeeze out the reg field from mod_reg_rm, since a 256-entry
+ jump table seems a bit excessive.
+ */
+ mod_reg_rm &= 0xC7; /* is now XX000YYY */
+ mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
+ /* is now XX0XXYYY */
+ mod_reg_rm &= 0x1F; /* is now 000XXYYY */
+ switch (mod_reg_rm) {
+
+ /* (%eax) .. (%edi), not including (%esp) or (%ebp). */
+ case 0x00: case 0x01: case 0x02: case 0x03:
+ /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
+ return 1;
+
+ /* d8(%eax) ... d8(%edi), not including d8(%esp). */
+ case 0x08: case 0x09: case 0x0A: case 0x0B:
+ /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
+ return 2;
+
+ /* d32(%eax) ... d32(%edi), not including d32(%esp). */
+ case 0x10: case 0x11: case 0x12: case 0x13:
+ /* ! 14 */ case 0x15: case 0x16: case 0x17:
+ return 5;
+
+ /* a register, %eax .. %edi. (Not an addr, but still handled.) */
+ case 0x18: case 0x19: case 0x1A: case 0x1B:
+ case 0x1C: case 0x1D: case 0x1E: case 0x1F:
+ return 1;
+
+ /* a 32-bit literal address. */
+ case 0x05: return 5;
+
+ /* SIB, no displacement. */
+ case 0x04: {
+ UChar sib = getIByte(delta);
+ UChar base_r = toUChar(sib & 7);
+ if (base_r == R_EBP) return 6; else return 2;
+ }
+ /* SIB, with 8-bit displacement. */
+ case 0x0C: return 3;
+
+ /* SIB, with 32-bit displacement. */
+ case 0x14: return 6;
+
+ default:
+ vpanic("lengthAMode");
+ return 0; /*notreached*/
+ }
+}
+
+/*------------------------------------------------------------*/
+/*--- Disassembling common idioms ---*/
+/*------------------------------------------------------------*/
+
+/* Handle binary integer instructions of the form
+ op E, G meaning
+ op reg-or-mem, reg
+ Is passed the a ptr to the modRM byte, the actual operation, and the
+ data size. Returns the address advanced completely over this
+ instruction.
+
+ E(src) is reg-or-mem
+ G(dst) is reg.
+
+ If E is reg, --> GET %G, tmp
+ OP %E, tmp
+ PUT tmp, %G
+
+ If E is mem and OP is not reversible,
+ --> (getAddr E) -> tmpa
+ LD (tmpa), tmpa
+ GET %G, tmp2
+ OP tmpa, tmp2
+ PUT tmp2, %G
+
+ If E is mem and OP is reversible
+ --> (getAddr E) -> tmpa
+ LD (tmpa), tmpa
+ OP %G, tmpa
+ PUT tmpa, %G
+*/
+static
+UInt dis_op2_E_G ( UChar sorb,
+ Bool addSubCarry,
+ IROp op8,
+ Bool keep,
+ Int size,
+ Int delta0,
+ HChar* t_x86opc )
+{
+ HChar dis_buf[50];
+ Int len;
+ IRType ty = szToITy(size);
+ IRTemp dst1 = newTemp(ty);
+ IRTemp src = newTemp(ty);
+ IRTemp dst0 = newTemp(ty);
+ UChar rm = getUChar(delta0);
+ IRTemp addr = IRTemp_INVALID;
+
+ /* addSubCarry == True indicates the intended operation is
+ add-with-carry or subtract-with-borrow. */
+ if (addSubCarry) {
+ vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
+ vassert(keep);
+ }
+
+ if (epartIsReg(rm)) {
+ /* Specially handle XOR reg,reg, because that doesn't really
+ depend on reg, and doing the obvious thing potentially
+ generates a spurious value check failure due to the bogus
+ dependency. Ditto SBB reg,reg. */
+ if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
+ && gregOfRM(rm) == eregOfRM(rm)) {
+ putIReg(size, gregOfRM(rm), mkU(ty,0));
+ }
+ assign( dst0, getIReg(size,gregOfRM(rm)) );
+ assign( src, getIReg(size,eregOfRM(rm)) );
+
+ if (addSubCarry && op8 == Iop_Add8) {
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
+ putIReg(size, gregOfRM(rm), mkexpr(dst1));
+ } else
+ if (addSubCarry && op8 == Iop_Sub8) {
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
+ putIReg(size, gregOfRM(rm), mkexpr(dst1));
+ } else {
+ assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
+ if (isAddSub(op8))
+ setFlags_DEP1_DEP2(op8, dst0, src, ty);
+ else
+ setFlags_DEP1(op8, dst1, ty);
+ if (keep)
+ putIReg(size, gregOfRM(rm), mkexpr(dst1));
+ }
+
+ DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
+ nameIReg(size,eregOfRM(rm)),
+ nameIReg(size,gregOfRM(rm)));
+ return 1+delta0;
+ } else {
+ /* E refers to memory */
+ addr = disAMode ( &len, sorb, delta0, dis_buf);
+ assign( dst0, getIReg(size,gregOfRM(rm)) );
+ assign( src, loadLE(szToITy(size), mkexpr(addr)) );
+
+ if (addSubCarry && op8 == Iop_Add8) {
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
+ putIReg(size, gregOfRM(rm), mkexpr(dst1));
+ } else
+ if (addSubCarry && op8 == Iop_Sub8) {
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
+ putIReg(size, gregOfRM(rm), mkexpr(dst1));
+ } else {
+ assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
+ if (isAddSub(op8))
+ setFlags_DEP1_DEP2(op8, dst0, src, ty);
+ else
+ setFlags_DEP1(op8, dst1, ty);
+ if (keep)
+ putIReg(size, gregOfRM(rm), mkexpr(dst1));
+ }
+
+ DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
+ dis_buf,nameIReg(size,gregOfRM(rm)));
+ return len+delta0;
+ }
+}
+
+
+
+/* Handle binary integer instructions of the form
+ op G, E meaning
+ op reg, reg-or-mem
+ Is passed the a ptr to the modRM byte, the actual operation, and the
+ data size. Returns the address advanced completely over this
+ instruction.
+
+ G(src) is reg.
+ E(dst) is reg-or-mem
+
+ If E is reg, --> GET %E, tmp
+ OP %G, tmp
+ PUT tmp, %E
+
+ If E is mem, --> (getAddr E) -> tmpa
+ LD (tmpa), tmpv
+ OP %G, tmpv
+ ST tmpv, (tmpa)
+*/
+static
+UInt dis_op2_G_E ( UChar sorb,
+ Bool locked,
+ Bool addSubCarry,
+ IROp op8,
+ Bool keep,
+ Int size,
+ Int delta0,
+ HChar* t_x86opc )
+{
+ HChar dis_buf[50];
+ Int len;
+ IRType ty = szToITy(size);
+ IRTemp dst1 = newTemp(ty);
+ IRTemp src = newTemp(ty);
+ IRTemp dst0 = newTemp(ty);
+ UChar rm = getIByte(delta0);
+ IRTemp addr = IRTemp_INVALID;
+
+ /* addSubCarry == True indicates the intended operation is
+ add-with-carry or subtract-with-borrow. */
+ if (addSubCarry) {
+ vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
+ vassert(keep);
+ }
+
+ if (epartIsReg(rm)) {
+ /* Specially handle XOR reg,reg, because that doesn't really
+ depend on reg, and doing the obvious thing potentially
+ generates a spurious value check failure due to the bogus
+ dependency. Ditto SBB reg,reg.*/
+ if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
+ && gregOfRM(rm) == eregOfRM(rm)) {
+ putIReg(size, eregOfRM(rm), mkU(ty,0));
+ }
+ assign(dst0, getIReg(size,eregOfRM(rm)));
+ assign(src, getIReg(size,gregOfRM(rm)));
+
+ if (addSubCarry && op8 == Iop_Add8) {
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
+ putIReg(size, eregOfRM(rm), mkexpr(dst1));
+ } else
+ if (addSubCarry && op8 == Iop_Sub8) {
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
+ putIReg(size, eregOfRM(rm), mkexpr(dst1));
+ } else {
+ assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
+ if (isAddSub(op8))
+ setFlags_DEP1_DEP2(op8, dst0, src, ty);
+ else
+ setFlags_DEP1(op8, dst1, ty);
+ if (keep)
+ putIReg(size, eregOfRM(rm), mkexpr(dst1));
+ }
+
+ DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
+ nameIReg(size,gregOfRM(rm)),
+ nameIReg(size,eregOfRM(rm)));
+ return 1+delta0;
+ }
+
+ /* E refers to memory */
+ {
+ addr = disAMode ( &len, sorb, delta0, dis_buf);
+ assign(dst0, loadLE(ty,mkexpr(addr)));
+ assign(src, getIReg(size,gregOfRM(rm)));
+
+ if (addSubCarry && op8 == Iop_Add8) {
+ if (locked) {
+ /* cas-style store */
+ helper_ADC( size, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_ADC( size, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
+ } else
+ if (addSubCarry && op8 == Iop_Sub8) {
+ if (locked) {
+ /* cas-style store */
+ helper_SBB( size, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_SBB( size, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
+ } else {
+ assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
+ if (keep) {
+ if (locked) {
+ if (0) vex_printf("locked case\n" );
+ casLE( mkexpr(addr),
+ mkexpr(dst0)/*expval*/,
+ mkexpr(dst1)/*newval*/, guest_EIP_curr_instr );
+ } else {
+ if (0) vex_printf("nonlocked case\n");
+ storeLE(mkexpr(addr), mkexpr(dst1));
+ }
+ }
+ if (isAddSub(op8))
+ setFlags_DEP1_DEP2(op8, dst0, src, ty);
+ else
+ setFlags_DEP1(op8, dst1, ty);
+ }
+
+ DIP("%s%c %s,%s\n", t_x86opc, nameISize(size),
+ nameIReg(size,gregOfRM(rm)), dis_buf);
+ return len+delta0;
+ }
+}
+
+
+/* Handle move instructions of the form
+ mov E, G meaning
+ mov reg-or-mem, reg
+ Is passed the a ptr to the modRM byte, and the data size. Returns
+ the address advanced completely over this instruction.
+
+ E(src) is reg-or-mem
+ G(dst) is reg.
+
+ If E is reg, --> GET %E, tmpv
+ PUT tmpv, %G
+
+ If E is mem --> (getAddr E) -> tmpa
+ LD (tmpa), tmpb
+ PUT tmpb, %G
+*/
+static
+UInt dis_mov_E_G ( UChar sorb,
+ Int size,
+ Int delta0 )
+{
+ Int len;
+ UChar rm = getIByte(delta0);
+ HChar dis_buf[50];
+
+ if (epartIsReg(rm)) {
+ putIReg(size, gregOfRM(rm), getIReg(size, eregOfRM(rm)));
+ DIP("mov%c %s,%s\n", nameISize(size),
+ nameIReg(size,eregOfRM(rm)),
+ nameIReg(size,gregOfRM(rm)));
+ return 1+delta0;
+ }
+
+ /* E refers to memory */
+ {
+ IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
+ putIReg(size, gregOfRM(rm), loadLE(szToITy(size), mkexpr(addr)));
+ DIP("mov%c %s,%s\n", nameISize(size),
+ dis_buf,nameIReg(size,gregOfRM(rm)));
+ return delta0+len;
+ }
+}
+
+
+/* Handle move instructions of the form
+ mov G, E meaning
+ mov reg, reg-or-mem
+ Is passed the a ptr to the modRM byte, and the data size. Returns
+ the address advanced completely over this instruction.
+
+ G(src) is reg.
+ E(dst) is reg-or-mem
+
+ If E is reg, --> GET %G, tmp
+ PUT tmp, %E
+
+ If E is mem, --> (getAddr E) -> tmpa
+ GET %G, tmpv
+ ST tmpv, (tmpa)
+*/
+static
+UInt dis_mov_G_E ( UChar sorb,
+ Int size,
+ Int delta0 )
+{
+ Int len;
+ UChar rm = getIByte(delta0);
+ HChar dis_buf[50];
+
+ if (epartIsReg(rm)) {
+ putIReg(size, eregOfRM(rm), getIReg(size, gregOfRM(rm)));
+ DIP("mov%c %s,%s\n", nameISize(size),
+ nameIReg(size,gregOfRM(rm)),
+ nameIReg(size,eregOfRM(rm)));
+ return 1+delta0;
+ }
+
+ /* E refers to memory */
+ {
+ IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf);
+ storeLE( mkexpr(addr), getIReg(size, gregOfRM(rm)) );
+ DIP("mov%c %s,%s\n", nameISize(size),
+ nameIReg(size,gregOfRM(rm)), dis_buf);
+ return len+delta0;
+ }
+}
+
+
+/* op $immediate, AL/AX/EAX. */
+static
+UInt dis_op_imm_A ( Int size,
+ Bool carrying,
+ IROp op8,
+ Bool keep,
+ Int delta,
+ HChar* t_x86opc )
+{
+ IRType ty = szToITy(size);
+ IRTemp dst0 = newTemp(ty);
+ IRTemp src = newTemp(ty);
+ IRTemp dst1 = newTemp(ty);
+ UInt lit = getUDisp(size,delta);
+ assign(dst0, getIReg(size,R_EAX));
+ assign(src, mkU(ty,lit));
+
+ if (isAddSub(op8) && !carrying) {
+ assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
+ setFlags_DEP1_DEP2(op8, dst0, src, ty);
+ }
+ else
+ if (isLogic(op8)) {
+ vassert(!carrying);
+ assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
+ setFlags_DEP1(op8, dst1, ty);
+ }
+ else
+ if (op8 == Iop_Add8 && carrying) {
+ helper_ADC( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
+ }
+ else
+ if (op8 == Iop_Sub8 && carrying) {
+ helper_SBB( size, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
+ }
+ else
+ vpanic("dis_op_imm_A(x86,guest)");
+
+ if (keep)
+ putIReg(size, R_EAX, mkexpr(dst1));
+
+ DIP("%s%c $0x%x, %s\n", t_x86opc, nameISize(size),
+ lit, nameIReg(size,R_EAX));
+ return delta+size;
+}
+
+
+/* Sign- and Zero-extending moves. */
+static
+UInt dis_movx_E_G ( UChar sorb,
+ Int delta, Int szs, Int szd, Bool sign_extend )
+{
+ UChar rm = getIByte(delta);
+ if (epartIsReg(rm)) {
+ if (szd == szs) {
+ // mutant case. See #250799
+ putIReg(szd, gregOfRM(rm),
+ getIReg(szs,eregOfRM(rm)));
+ } else {
+ // normal case
+ putIReg(szd, gregOfRM(rm),
+ unop(mkWidenOp(szs,szd,sign_extend),
+ getIReg(szs,eregOfRM(rm))));
+ }
+ DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
+ nameISize(szs), nameISize(szd),
+ nameIReg(szs,eregOfRM(rm)),
+ nameIReg(szd,gregOfRM(rm)));
+ return 1+delta;
+ }
+
+ /* E refers to memory */
+ {
+ Int len;
+ HChar dis_buf[50];
+ IRTemp addr = disAMode ( &len, sorb, delta, dis_buf );
+ if (szd == szs) {
+ // mutant case. See #250799
+ putIReg(szd, gregOfRM(rm),
+ loadLE(szToITy(szs),mkexpr(addr)));
+ } else {
+ // normal case
+ putIReg(szd, gregOfRM(rm),
+ unop(mkWidenOp(szs,szd,sign_extend),
+ loadLE(szToITy(szs),mkexpr(addr))));
+ }
+ DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
+ nameISize(szs), nameISize(szd),
+ dis_buf, nameIReg(szd,gregOfRM(rm)));
+ return len+delta;
+ }
+}
+
+
+/* Generate code to divide ArchRegs EDX:EAX / DX:AX / AX by the 32 /
+ 16 / 8 bit quantity in the given IRTemp. */
+static
+void codegen_div ( Int sz, IRTemp t, Bool signed_divide )
+{
+ IROp op = signed_divide ? Iop_DivModS64to32 : Iop_DivModU64to32;
+ IRTemp src64 = newTemp(Ity_I64);
+ IRTemp dst64 = newTemp(Ity_I64);
+ switch (sz) {
+ case 4:
+ assign( src64, binop(Iop_32HLto64,
+ getIReg(4,R_EDX), getIReg(4,R_EAX)) );
+ assign( dst64, binop(op, mkexpr(src64), mkexpr(t)) );
+ putIReg( 4, R_EAX, unop(Iop_64to32,mkexpr(dst64)) );
+ putIReg( 4, R_EDX, unop(Iop_64HIto32,mkexpr(dst64)) );
+ break;
+ case 2: {
+ IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
+ IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
+ assign( src64, unop(widen3264,
+ binop(Iop_16HLto32,
+ getIReg(2,R_EDX), getIReg(2,R_EAX))) );
+ assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) );
+ putIReg( 2, R_EAX, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) );
+ putIReg( 2, R_EDX, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) );
+ break;
+ }
+ case 1: {
+ IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
+ IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
+ IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16;
+ assign( src64, unop(widen3264, unop(widen1632, getIReg(2,R_EAX))) );
+ assign( dst64,
+ binop(op, mkexpr(src64),
+ unop(widen1632, unop(widen816, mkexpr(t)))) );
+ putIReg( 1, R_AL, unop(Iop_16to8, unop(Iop_32to16,
+ unop(Iop_64to32,mkexpr(dst64)))) );
+ putIReg( 1, R_AH, unop(Iop_16to8, unop(Iop_32to16,
+ unop(Iop_64HIto32,mkexpr(dst64)))) );
+ break;
+ }
+ default: vpanic("codegen_div(x86)");
+ }
+}
+
+
+static
+UInt dis_Grp1 ( UChar sorb, Bool locked,
+ Int delta, UChar modrm,
+ Int am_sz, Int d_sz, Int sz, UInt d32 )
+{
+ Int len;
+ HChar dis_buf[50];
+ IRType ty = szToITy(sz);
+ IRTemp dst1 = newTemp(ty);
+ IRTemp src = newTemp(ty);
+ IRTemp dst0 = newTemp(ty);
+ IRTemp addr = IRTemp_INVALID;
+ IROp op8 = Iop_INVALID;
+ UInt mask = sz==1 ? 0xFF : (sz==2 ? 0xFFFF : 0xFFFFFFFF);
+
+ switch (gregOfRM(modrm)) {
+ case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break;
+ case 2: break; // ADC
+ case 3: break; // SBB
+ case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break;
+ case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break;
+ /*NOTREACHED*/
+ default: vpanic("dis_Grp1: unhandled case");
+ }
+
+ if (epartIsReg(modrm)) {
+ vassert(am_sz == 1);
+
+ assign(dst0, getIReg(sz,eregOfRM(modrm)));
+ assign(src, mkU(ty,d32 & mask));
+
+ if (gregOfRM(modrm) == 2 /* ADC */) {
+ helper_ADC( sz, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
+ } else
+ if (gregOfRM(modrm) == 3 /* SBB */) {
+ helper_SBB( sz, dst1, dst0, src,
+ /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
+ } else {
+ assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
+ if (isAddSub(op8))
+ setFlags_DEP1_DEP2(op8, dst0, src, ty);
+ else
+ setFlags_DEP1(op8, dst1, ty);
+ }
+
+ if (gregOfRM(modrm) < 7)
+ putIReg(sz, eregOfRM(modrm), mkexpr(dst1));
+
+ delta += (am_sz + d_sz);
+ DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz), d32,
+ nameIReg(sz,eregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &len, sorb, delta, dis_buf);
+
+ assign(dst0, loadLE(ty,mkexpr(addr)));
+ assign(src, mkU(ty,d32 & mask));
+
+ if (gregOfRM(modrm) == 2 /* ADC */) {
+ if (locked) {
+ /* cas-style store */
+ helper_ADC( sz, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_ADC( sz, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
+ } else
+ if (gregOfRM(modrm) == 3 /* SBB */) {
+ if (locked) {
+ /* cas-style store */
+ helper_SBB( sz, dst1, dst0, src,
+ /*store*/addr, dst0/*expVal*/, guest_EIP_curr_instr );
+ } else {
+ /* normal store */
+ helper_SBB( sz, dst1, dst0, src,
+ /*store*/addr, IRTemp_INVALID, 0 );
+ }
+ } else {
+ assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
+ if (gregOfRM(modrm) < 7) {
+ if (locked) {
+ casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
+ mkexpr(dst1)/*newVal*/,
+ guest_EIP_curr_instr );
+ } else {
+ storeLE(mkexpr(addr), mkexpr(dst1));
+ }
+ }
+ if (isAddSub(op8))
+ setFlags_DEP1_DEP2(op8, dst0, src, ty);
+ else
+ setFlags_DEP1(op8, dst1, ty);
+ }
+
+ delta += (len+d_sz);
+ DIP("%s%c $0x%x, %s\n", nameGrp1(gregOfRM(modrm)), nameISize(sz),
+ d32, dis_buf);
+ }
+ return delta;
+}
+
+
+/* Group 2 extended opcodes. shift_expr must be an 8-bit typed
+ expression. */
+
+static
+UInt dis_Grp2 ( UChar sorb,
+ Int delta, UChar modrm,
+ Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr,
+ HChar* shift_expr_txt, Bool* decode_OK )
+{
+ /* delta on entry points at the modrm byte. */
+ HChar dis_buf[50];
+ Int len;
+ Bool isShift, isRotate, isRotateC;
+ IRType ty = szToITy(sz);
+ IRTemp dst0 = newTemp(ty);
+ IRTemp dst1 = newTemp(ty);
+ IRTemp addr = IRTemp_INVALID;
+
+ *decode_OK = True;
+
+ vassert(sz == 1 || sz == 2 || sz == 4);
+
+ /* Put value to shift/rotate in dst0. */
+ if (epartIsReg(modrm)) {
+ assign(dst0, getIReg(sz, eregOfRM(modrm)));
+ delta += (am_sz + d_sz);
+ } else {
+ addr = disAMode ( &len, sorb, delta, dis_buf);
+ assign(dst0, loadLE(ty,mkexpr(addr)));
+ delta += len + d_sz;
+ }
+
+ isShift = False;
+ switch (gregOfRM(modrm)) { case 4: case 5: case 7: isShift = True; }
+
+ isRotate = False;
+ switch (gregOfRM(modrm)) { case 0: case 1: isRotate = True; }
+
+ isRotateC = False;
+ switch (gregOfRM(modrm)) { case 2: case 3: isRotateC = True; }
+
+ if (gregOfRM(modrm) == 6) {
+ *decode_OK = False;
+ return delta;
+ }
+
+ if (!isShift && !isRotate && !isRotateC) {
+ /*NOTREACHED*/
+ vpanic("dis_Grp2(Reg): unhandled case(x86)");
+ }
+
+ if (isRotateC) {
+ /* call a helper; these insns are so ridiculous they do not
+ deserve better */
+ Bool left = toBool(gregOfRM(modrm) == 2);
+ IRTemp r64 = newTemp(Ity_I64);
+ IRExpr** args
+ = mkIRExprVec_4( widenUto32(mkexpr(dst0)), /* thing to rotate */
+ widenUto32(shift_expr), /* rotate amount */
+ widenUto32(mk_x86g_calculate_eflags_all()),
+ mkU32(sz) );
+ assign( r64, mkIRExprCCall(
+ Ity_I64,
+ 0/*regparm*/,
+ left ? "x86g_calculate_RCL" : "x86g_calculate_RCR",
+ left ? &x86g_calculate_RCL : &x86g_calculate_RCR,
+ args
+ )
+ );
+ /* new eflags in hi half r64; new value in lo half r64 */
+ assign( dst1, narrowTo(ty, unop(Iop_64to32, mkexpr(r64))) );
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1, unop(Iop_64HIto32, mkexpr(r64)) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+ }
+
+ if (isShift) {
+
+ IRTemp pre32 = newTemp(Ity_I32);
+ IRTemp res32 = newTemp(Ity_I32);
+ IRTemp res32ss = newTemp(Ity_I32);
+ IRTemp shift_amt = newTemp(Ity_I8);
+ IROp op32;
+
+ switch (gregOfRM(modrm)) {
+ case 4: op32 = Iop_Shl32; break;
+ case 5: op32 = Iop_Shr32; break;
+ case 7: op32 = Iop_Sar32; break;
+ /*NOTREACHED*/
+ default: vpanic("dis_Grp2:shift"); break;
+ }
+
+ /* Widen the value to be shifted to 32 bits, do the shift, and
+ narrow back down. This seems surprisingly long-winded, but
+ unfortunately the Intel semantics requires that 8/16-bit
+ shifts give defined results for shift values all the way up
+ to 31, and this seems the simplest way to do it. It has the
+ advantage that the only IR level shifts generated are of 32
+ bit values, and the shift amount is guaranteed to be in the
+ range 0 .. 31, thereby observing the IR semantics requiring
+ all shift values to be in the range 0 .. 2^word_size-1. */
+
+ /* shift_amt = shift_expr & 31, regardless of operation size */
+ assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(31)) );
+
+ /* suitably widen the value to be shifted to 32 bits. */
+ assign( pre32, op32==Iop_Sar32 ? widenSto32(mkexpr(dst0))
+ : widenUto32(mkexpr(dst0)) );
+
+ /* res32 = pre32 `shift` shift_amt */
+ assign( res32, binop(op32, mkexpr(pre32), mkexpr(shift_amt)) );
+
+ /* res32ss = pre32 `shift` ((shift_amt - 1) & 31) */
+ assign( res32ss,
+ binop(op32,
+ mkexpr(pre32),
+ binop(Iop_And8,
+ binop(Iop_Sub8,
+ mkexpr(shift_amt), mkU8(1)),
+ mkU8(31))) );
+
+ /* Build the flags thunk. */
+ setFlags_DEP1_DEP2_shift(op32, res32, res32ss, ty, shift_amt);
+
+ /* Narrow the result back down. */
+ assign( dst1, narrowTo(ty, mkexpr(res32)) );
+
+ } /* if (isShift) */
+
+ else
+ if (isRotate) {
+ Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1 : 2);
+ Bool left = toBool(gregOfRM(modrm) == 0);
+ IRTemp rot_amt = newTemp(Ity_I8);
+ IRTemp rot_amt32 = newTemp(Ity_I8);
+ IRTemp oldFlags = newTemp(Ity_I32);
+
+ /* rot_amt = shift_expr & mask */
+ /* By masking the rotate amount thusly, the IR-level Shl/Shr
+ expressions never shift beyond the word size and thus remain
+ well defined. */
+ assign(rot_amt32, binop(Iop_And8, shift_expr, mkU8(31)));
+
+ if (ty == Ity_I32)
+ assign(rot_amt, mkexpr(rot_amt32));
+ else
+ assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt32), mkU8(8*sz-1)));
+
+ if (left) {
+
+ /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
+ assign(dst1,
+ binop( mkSizedOp(ty,Iop_Or8),
+ binop( mkSizedOp(ty,Iop_Shl8),
+ mkexpr(dst0),
+ mkexpr(rot_amt)
+ ),
+ binop( mkSizedOp(ty,Iop_Shr8),
+ mkexpr(dst0),
+ binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
+ )
+ )
+ );
+ ccOp += X86G_CC_OP_ROLB;
+
+ } else { /* right */
+
+ /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
+ assign(dst1,
+ binop( mkSizedOp(ty,Iop_Or8),
+ binop( mkSizedOp(ty,Iop_Shr8),
+ mkexpr(dst0),
+ mkexpr(rot_amt)
+ ),
+ binop( mkSizedOp(ty,Iop_Shl8),
+ mkexpr(dst0),
+ binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
+ )
+ )
+ );
+ ccOp += X86G_CC_OP_RORB;
+
+ }
+
+ /* dst1 now holds the rotated value. Build flag thunk. We
+ need the resulting value for this, and the previous flags.
+ Except don't set it if the rotate count is zero. */
+
+ assign(oldFlags, mk_x86g_calculate_eflags_all());
+
+ /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
+ stmt( IRStmt_Put( OFFB_CC_OP,
+ IRExpr_Mux0X( mkexpr(rot_amt32),
+ IRExpr_Get(OFFB_CC_OP,Ity_I32),
+ mkU32(ccOp))) );
+ stmt( IRStmt_Put( OFFB_CC_DEP1,
+ IRExpr_Mux0X( mkexpr(rot_amt32),
+ IRExpr_Get(OFFB_CC_DEP1,Ity_I32),
+ widenUto32(mkexpr(dst1)))) );
+ stmt( IRStmt_Put( OFFB_CC_DEP2,
+ IRExpr_Mux0X( mkexpr(rot_amt32),
+ IRExpr_Get(OFFB_CC_DEP2,Ity_I32),
+ mkU32(0))) );
+ stmt( IRStmt_Put( OFFB_CC_NDEP,
+ IRExpr_Mux0X( mkexpr(rot_amt32),
+ IRExpr_Get(OFFB_CC_NDEP,Ity_I32),
+ mkexpr(oldFlags))) );
+ } /* if (isRotate) */
+
+ /* Save result, and finish up. */
+ if (epartIsReg(modrm)) {
+ putIReg(sz, eregOfRM(modrm), mkexpr(dst1));
+ if (vex_traceflags & VEX_TRACE_FE) {
+ vex_printf("%s%c ",
+ nameGrp2(gregOfRM(modrm)), nameISize(sz) );
+ if (shift_expr_txt)
+ vex_printf("%s", shift_expr_txt);
+ else
+ ppIRExpr(shift_expr);
+ vex_printf(", %s\n", nameIReg(sz,eregOfRM(modrm)));
+ }
+ } else {
+ storeLE(mkexpr(addr), mkexpr(dst1));
+ if (vex_traceflags & VEX_TRACE_FE) {
+ vex_printf("%s%c ",
+ nameGrp2(gregOfRM(modrm)), nameISize(sz) );
+ if (shift_expr_txt)
+ vex_printf("%s", shift_expr_txt);
+ else
+ ppIRExpr(shift_expr);
+ vex_printf(", %s\n", dis_buf);
+ }
+ }
+ return delta;
+}
+
+
+/* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
+static
+UInt dis_Grp8_Imm ( UChar sorb,
+ Bool locked,
+ Int delta, UChar modrm,
+ Int am_sz, Int sz, UInt src_val,
+ Bool* decode_OK )
+{
+ /* src_val denotes a d8.
+ And delta on entry points at the modrm byte. */
+
+ IRType ty = szToITy(sz);
+ IRTemp t2 = newTemp(Ity_I32);
+ IRTemp t2m = newTemp(Ity_I32);
+ IRTemp t_addr = IRTemp_INVALID;
+ HChar dis_buf[50];
+ UInt mask;
+
+ /* we're optimists :-) */
+ *decode_OK = True;
+
+ /* Limit src_val -- the bit offset -- to something within a word.
+ The Intel docs say that literal offsets larger than a word are
+ masked in this way. */
+ switch (sz) {
+ case 2: src_val &= 15; break;
+ case 4: src_val &= 31; break;
+ default: *decode_OK = False; return delta;
+ }
+
+ /* Invent a mask suitable for the operation. */
+ switch (gregOfRM(modrm)) {
+ case 4: /* BT */ mask = 0; break;
+ case 5: /* BTS */ mask = 1 << src_val; break;
+ case 6: /* BTR */ mask = ~(1 << src_val); break;
+ case 7: /* BTC */ mask = 1 << src_val; break;
+ /* If this needs to be extended, probably simplest to make a
+ new function to handle the other cases (0 .. 3). The
+ Intel docs do however not indicate any use for 0 .. 3, so
+ we don't expect this to happen. */
+ default: *decode_OK = False; return delta;
+ }
+
+ /* Fetch the value to be tested and modified into t2, which is
+ 32-bits wide regardless of sz. */
+ if (epartIsReg(modrm)) {
+ vassert(am_sz == 1);
+ assign( t2, widenUto32(getIReg(sz, eregOfRM(modrm))) );
+ delta += (am_sz + 1);
+ DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz),
+ src_val, nameIReg(sz,eregOfRM(modrm)));
+ } else {
+ Int len;
+ t_addr = disAMode ( &len, sorb, delta, dis_buf);
+ delta += (len+1);
+ assign( t2, widenUto32(loadLE(ty, mkexpr(t_addr))) );
+ DIP("%s%c $0x%x, %s\n", nameGrp8(gregOfRM(modrm)), nameISize(sz),
+ src_val, dis_buf);
+ }
+
+ /* Compute the new value into t2m, if non-BT. */
+ switch (gregOfRM(modrm)) {
+ case 4: /* BT */
+ break;
+ case 5: /* BTS */
+ assign( t2m, binop(Iop_Or32, mkU32(mask), mkexpr(t2)) );
+ break;
+ case 6: /* BTR */
+ assign( t2m, binop(Iop_And32, mkU32(mask), mkexpr(t2)) );
+ break;
+ case 7: /* BTC */
+ assign( t2m, binop(Iop_Xor32, mkU32(mask), mkexpr(t2)) );
+ break;
+ default:
+ /*NOTREACHED*/ /*the previous switch guards this*/
+ vassert(0);
+ }
+
+ /* Write the result back, if non-BT. If the CAS fails then we
+ side-exit from the trace at this point, and so the flag state is
+ not affected. This is of course as required. */
+ if (gregOfRM(modrm) != 4 /* BT */) {
+ if (epartIsReg(modrm)) {
+ putIReg(sz, eregOfRM(modrm), narrowTo(ty, mkexpr(t2m)));
+ } else {
+ if (locked) {
+ casLE( mkexpr(t_addr),
+ narrowTo(ty, mkexpr(t2))/*expd*/,
+ narrowTo(ty, mkexpr(t2m))/*new*/,
+ guest_EIP_curr_instr );
+ } else {
+ storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
+ }
+ }
+ }
+
+ /* Copy relevant bit from t2 into the carry flag. */
+ /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
+ stmt( IRStmt_Put(
+ OFFB_CC_DEP1,
+ binop(Iop_And32,
+ binop(Iop_Shr32, mkexpr(t2), mkU8(src_val)),
+ mkU32(1))
+ ));
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+
+ return delta;
+}
+
+
+/* Signed/unsigned widening multiply. Generate IR to multiply the
+ value in EAX/AX/AL by the given IRTemp, and park the result in
+ EDX:EAX/DX:AX/AX.
+*/
+static void codegen_mulL_A_D ( Int sz, Bool syned,
+ IRTemp tmp, HChar* tmp_txt )
+{
+ IRType ty = szToITy(sz);
+ IRTemp t1 = newTemp(ty);
+
+ assign( t1, getIReg(sz, R_EAX) );
+
+ switch (ty) {
+ case Ity_I32: {
+ IRTemp res64 = newTemp(Ity_I64);
+ IRTemp resHi = newTemp(Ity_I32);
+ IRTemp resLo = newTemp(Ity_I32);
+ IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32;
+ UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB;
+ setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp );
+ assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
+ assign( resHi, unop(Iop_64HIto32,mkexpr(res64)));
+ assign( resLo, unop(Iop_64to32,mkexpr(res64)));
+ putIReg(4, R_EDX, mkexpr(resHi));
+ putIReg(4, R_EAX, mkexpr(resLo));
+ break;
+ }
+ case Ity_I16: {
+ IRTemp res32 = newTemp(Ity_I32);
+ IRTemp resHi = newTemp(Ity_I16);
+ IRTemp resLo = newTemp(Ity_I16);
+ IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16;
+ UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB;
+ setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp );
+ assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
+ assign( resHi, unop(Iop_32HIto16,mkexpr(res32)));
+ assign( resLo, unop(Iop_32to16,mkexpr(res32)));
+ putIReg(2, R_EDX, mkexpr(resHi));
+ putIReg(2, R_EAX, mkexpr(resLo));
+ break;
+ }
+ case Ity_I8: {
+ IRTemp res16 = newTemp(Ity_I16);
+ IRTemp resHi = newTemp(Ity_I8);
+ IRTemp resLo = newTemp(Ity_I8);
+ IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8;
+ UInt tBaseOp = syned ? X86G_CC_OP_SMULB : X86G_CC_OP_UMULB;
+ setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp );
+ assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
+ assign( resHi, unop(Iop_16HIto8,mkexpr(res16)));
+ assign( resLo, unop(Iop_16to8,mkexpr(res16)));
+ putIReg(2, R_EAX, mkexpr(res16));
+ break;
+ }
+ default:
+ vpanic("codegen_mulL_A_D(x86)");
+ }
+ DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt);
+}
+
+
+/* Group 3 extended opcodes. */
+static
+UInt dis_Grp3 ( UChar sorb, Bool locked, Int sz, Int delta, Bool* decode_OK )
+{
+ UInt d32;
+ UChar modrm;
+ HChar dis_buf[50];
+ Int len;
+ IRTemp addr;
+ IRType ty = szToITy(sz);
+ IRTemp t1 = newTemp(ty);
+ IRTemp dst1, src, dst0;
+
+ *decode_OK = True; /* may change this later */
+
+ modrm = getIByte(delta);
+
+ if (locked && (gregOfRM(modrm) != 2 && gregOfRM(modrm) != 3)) {
+ /* LOCK prefix only allowed with not and neg subopcodes */
+ *decode_OK = False;
+ return delta;
+ }
+
+ if (epartIsReg(modrm)) {
+ switch (gregOfRM(modrm)) {
+ case 0: { /* TEST */
+ delta++; d32 = getUDisp(sz, delta); delta += sz;
+ dst1 = newTemp(ty);
+ assign(dst1, binop(mkSizedOp(ty,Iop_And8),
+ getIReg(sz,eregOfRM(modrm)),
+ mkU(ty,d32)));
+ setFlags_DEP1( Iop_And8, dst1, ty );
+ DIP("test%c $0x%x, %s\n", nameISize(sz), d32,
+ nameIReg(sz, eregOfRM(modrm)));
+ break;
+ }
+ case 1: /* UNDEFINED */
+ /* The Intel docs imply this insn is undefined and binutils
+ agrees. Unfortunately Core 2 will run it (with who
+ knows what result?) sandpile.org reckons it's an alias
+ for case 0. We play safe. */
+ *decode_OK = False;
+ break;
+ case 2: /* NOT */
+ delta++;
+ putIReg(sz, eregOfRM(modrm),
+ unop(mkSizedOp(ty,Iop_Not8),
+ getIReg(sz, eregOfRM(modrm))));
+ DIP("not%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
+ break;
+ case 3: /* NEG */
+ delta++;
+ dst0 = newTemp(ty);
+ src = newTemp(ty);
+ dst1 = newTemp(ty);
+ assign(dst0, mkU(ty,0));
+ assign(src, getIReg(sz,eregOfRM(modrm)));
+ assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0), mkexpr(src)));
+ setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
+ putIReg(sz, eregOfRM(modrm), mkexpr(dst1));
+ DIP("neg%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
+ break;
+ case 4: /* MUL (unsigned widening) */
+ delta++;
+ src = newTemp(ty);
+ assign(src, getIReg(sz,eregOfRM(modrm)));
+ codegen_mulL_A_D ( sz, False, src, nameIReg(sz,eregOfRM(modrm)) );
+ break;
+ case 5: /* IMUL (signed widening) */
+ delta++;
+ src = newTemp(ty);
+ assign(src, getIReg(sz,eregOfRM(modrm)));
+ codegen_mulL_A_D ( sz, True, src, nameIReg(sz,eregOfRM(modrm)) );
+ break;
+ case 6: /* DIV */
+ delta++;
+ assign( t1, getIReg(sz, eregOfRM(modrm)) );
+ codegen_div ( sz, t1, False );
+ DIP("div%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
+ break;
+ case 7: /* IDIV */
+ delta++;
+ assign( t1, getIReg(sz, eregOfRM(modrm)) );
+ codegen_div ( sz, t1, True );
+ DIP("idiv%c %s\n", nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
+ break;
+ default:
+ /* This can't happen - gregOfRM should return 0 .. 7 only */
+ vpanic("Grp3(x86)");
+ }
+ } else {
+ addr = disAMode ( &len, sorb, delta, dis_buf );
+ t1 = newTemp(ty);
+ delta += len;
+ assign(t1, loadLE(ty,mkexpr(addr)));
+ switch (gregOfRM(modrm)) {
+ case 0: { /* TEST */
+ d32 = getUDisp(sz, delta); delta += sz;
+ dst1 = newTemp(ty);
+ assign(dst1, binop(mkSizedOp(ty,Iop_And8),
+ mkexpr(t1), mkU(ty,d32)));
+ setFlags_DEP1( Iop_And8, dst1, ty );
+ DIP("test%c $0x%x, %s\n", nameISize(sz), d32, dis_buf);
+ break;
+ }
+ case 1: /* UNDEFINED */
+ /* See comment above on R case */
+ *decode_OK = False;
+ break;
+ case 2: /* NOT */
+ dst1 = newTemp(ty);
+ assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
+ if (locked) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
+ guest_EIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(dst1) );
+ }
+ DIP("not%c %s\n", nameISize(sz), dis_buf);
+ break;
+ case 3: /* NEG */
+ dst0 = newTemp(ty);
+ src = newTemp(ty);
+ dst1 = newTemp(ty);
+ assign(dst0, mkU(ty,0));
+ assign(src, mkexpr(t1));
+ assign(dst1, binop(mkSizedOp(ty,Iop_Sub8),
+ mkexpr(dst0), mkexpr(src)));
+ if (locked) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
+ guest_EIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(dst1) );
+ }
+ setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
+ DIP("neg%c %s\n", nameISize(sz), dis_buf);
+ break;
+ case 4: /* MUL */
+ codegen_mulL_A_D ( sz, False, t1, dis_buf );
+ break;
+ case 5: /* IMUL */
+ codegen_mulL_A_D ( sz, True, t1, dis_buf );
+ break;
+ case 6: /* DIV */
+ codegen_div ( sz, t1, False );
+ DIP("div%c %s\n", nameISize(sz), dis_buf);
+ break;
+ case 7: /* IDIV */
+ codegen_div ( sz, t1, True );
+ DIP("idiv%c %s\n", nameISize(sz), dis_buf);
+ break;
+ default:
+ /* This can't happen - gregOfRM should return 0 .. 7 only */
+ vpanic("Grp3(x86)");
+ }
+ }
+ return delta;
+}
+
+
+/* Group 4 extended opcodes. */
+static
+UInt dis_Grp4 ( UChar sorb, Bool locked, Int delta, Bool* decode_OK )
+{
+ Int alen;
+ UChar modrm;
+ HChar dis_buf[50];
+ IRType ty = Ity_I8;
+ IRTemp t1 = newTemp(ty);
+ IRTemp t2 = newTemp(ty);
+
+ *decode_OK = True;
+
+ modrm = getIByte(delta);
+
+ if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) {
+ /* LOCK prefix only allowed with inc and dec subopcodes */
+ *decode_OK = False;
+ return delta;
+ }
+
+ if (epartIsReg(modrm)) {
+ assign(t1, getIReg(1, eregOfRM(modrm)));
+ switch (gregOfRM(modrm)) {
+ case 0: /* INC */
+ assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
+ putIReg(1, eregOfRM(modrm), mkexpr(t2));
+ setFlags_INC_DEC( True, t2, ty );
+ break;
+ case 1: /* DEC */
+ assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
+ putIReg(1, eregOfRM(modrm), mkexpr(t2));
+ setFlags_INC_DEC( False, t2, ty );
+ break;
+ default:
+ *decode_OK = False;
+ return delta;
+ }
+ delta++;
+ DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)),
+ nameIReg(1, eregOfRM(modrm)));
+ } else {
+ IRTemp addr = disAMode ( &alen, sorb, delta, dis_buf );
+ assign( t1, loadLE(ty, mkexpr(addr)) );
+ switch (gregOfRM(modrm)) {
+ case 0: /* INC */
+ assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
+ if (locked) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
+ guest_EIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(t2) );
+ }
+ setFlags_INC_DEC( True, t2, ty );
+ break;
+ case 1: /* DEC */
+ assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
+ if (locked) {
+ casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
+ guest_EIP_curr_instr );
+ } else {
+ storeLE( mkexpr(addr), mkexpr(t2) );
+ }
+ setFlags_INC_DEC( False, t2, ty );
+ break;
+ default:
+ *decode_OK = False;
+ return delta;
+ }
+ delta += alen;
+ DIP("%sb %s\n", nameGrp4(gregOfRM(modrm)), dis_buf);
+ }
+ return delta;
+}
+
+
+/* Group 5 extended opcodes. */
+static
+UInt dis_Grp5 ( UChar sorb, Bool locked, Int sz, Int delta,
+ DisResult* dres, Bool* decode_OK )
+{
+ Int len;
+ UChar modrm;
+ HChar dis_buf[50];
+ IRTemp addr = IRTemp_INVALID;
+ IRType ty = szToITy(sz);
+ IRTemp t1 = newTemp(ty);
+ IRTemp t2 = IRTemp_INVALID;
+
+ *decode_OK = True;
+
+ modrm = getIByte(delta);
+
+ if (locked && (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)) {
+ /* LOCK prefix only allowed with inc and dec subopcodes */
+ *decode_OK = False;
+ return delta;
+ }
+
+ if (epartIsReg(modrm)) {
+ assign(t1, getIReg(sz,eregOfRM(modrm)));
+ switch (gregOfRM(modrm)) {
+ case 0: /* INC */
+ vassert(sz == 2 || sz == 4);
+ t2 = newTemp(ty);
+ assign(t2, binop(mkSizedOp(ty,Iop_Add8),
+ mkexpr(t1), mkU(ty,1)));
+ setFlags_INC_DEC( True, t2, ty );
+ putIReg(sz,eregOfRM(modrm),mkexpr(t2));
+ break;
+ case 1: /* DEC */
+ vassert(sz == 2 || sz == 4);
+ t2 = newTemp(ty);
+ assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
+ mkexpr(t1), mkU(ty,1)));
+ setFlags_INC_DEC( False, t2, ty );
+ putIReg(sz,eregOfRM(modrm),mkexpr(t2));
+ break;
+ case 2: /* call Ev */
+ vassert(sz == 4);
+ t2 = newTemp(Ity_I32);
+ assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
+ putIReg(4, R_ESP, mkexpr(t2));
+ storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+1));
+ jmp_treg(Ijk_Call,t1);
+ dres->whatNext = Dis_StopHere;
+ break;
+ case 4: /* jmp Ev */
+ vassert(sz == 4);
+ jmp_treg(Ijk_Boring,t1);
+ dres->whatNext = Dis_StopHere;
+ break;
+ case 6: /* PUSH Ev */
+ vassert(sz == 4 || sz == 2);
+ t2 = newTemp(Ity_I32);
+ assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
+ putIReg(4, R_ESP, mkexpr(t2) );
+ storeLE( mkexpr(t2), mkexpr(t1) );
+ break;
+ default:
+ *decode_OK = False;
+ return delta;
+ }
+ delta++;
+ DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)),
+ nameISize(sz), nameIReg(sz, eregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &len, sorb, delta, dis_buf );
+ assign(t1, loadLE(ty,mkexpr(addr)));
+ switch (gregOfRM(modrm)) {
+ case 0: /* INC */
+ t2 = newTemp(ty);
+ assign(t2, binop(mkSizedOp(ty,Iop_Add8),
+ mkexpr(t1), mkU(ty,1)));
+ if (locked) {
+ casLE( mkexpr(addr),
+ mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
+ } else {
+ storeLE(mkexpr(addr),mkexpr(t2));
+ }
+ setFlags_INC_DEC( True, t2, ty );
+ break;
+ case 1: /* DEC */
+ t2 = newTemp(ty);
+ assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
+ mkexpr(t1), mkU(ty,1)));
+ if (locked) {
+ casLE( mkexpr(addr),
+ mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
+ } else {
+ storeLE(mkexpr(addr),mkexpr(t2));
+ }
+ setFlags_INC_DEC( False, t2, ty );
+ break;
+ case 2: /* call Ev */
+ vassert(sz == 4);
+ t2 = newTemp(Ity_I32);
+ assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
+ putIReg(4, R_ESP, mkexpr(t2));
+ storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+len));
+ jmp_treg(Ijk_Call,t1);
+ dres->whatNext = Dis_StopHere;
+ break;
+ case 4: /* JMP Ev */
+ vassert(sz == 4);
+ jmp_treg(Ijk_Boring,t1);
+ dres->whatNext = Dis_StopHere;
+ break;
+ case 6: /* PUSH Ev */
+ vassert(sz == 4 || sz == 2);
+ t2 = newTemp(Ity_I32);
+ assign( t2, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
+ putIReg(4, R_ESP, mkexpr(t2) );
+ storeLE( mkexpr(t2), mkexpr(t1) );
+ break;
+ default:
+ *decode_OK = False;
+ return delta;
+ }
+ delta += len;
+ DIP("%s%c %s\n", nameGrp5(gregOfRM(modrm)),
+ nameISize(sz), dis_buf);
+ }
+ return delta;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Disassembling string ops (including REP prefixes) ---*/
+/*------------------------------------------------------------*/
+
+/* Code shared by all the string ops */
+static
+void dis_string_op_increment(Int sz, Int t_inc)
+{
+ if (sz == 4 || sz == 2) {
+ assign( t_inc,
+ binop(Iop_Shl32, IRExpr_Get( OFFB_DFLAG, Ity_I32 ),
+ mkU8(sz/2) ) );
+ } else {
+ assign( t_inc,
+ IRExpr_Get( OFFB_DFLAG, Ity_I32 ) );
+ }
+}
+
+static
+void dis_string_op( void (*dis_OP)( Int, IRTemp ),
+ Int sz, HChar* name, UChar sorb )
+{
+ IRTemp t_inc = newTemp(Ity_I32);
+ vassert(sorb == 0); /* hmm. so what was the point of passing it in? */
+ dis_string_op_increment(sz, t_inc);
+ dis_OP( sz, t_inc );
+ DIP("%s%c\n", name, nameISize(sz));
+}
+
+static
+void dis_MOVS ( Int sz, IRTemp t_inc )
+{
+ IRType ty = szToITy(sz);
+ IRTemp td = newTemp(Ity_I32); /* EDI */
+ IRTemp ts = newTemp(Ity_I32); /* ESI */
+
+ assign( td, getIReg(4, R_EDI) );
+ assign( ts, getIReg(4, R_ESI) );
+
+ storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) );
+
+ putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
+ putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) );
+}
+
+static
+void dis_LODS ( Int sz, IRTemp t_inc )
+{
+ IRType ty = szToITy(sz);
+ IRTemp ts = newTemp(Ity_I32); /* ESI */
+
+ assign( ts, getIReg(4, R_ESI) );
+
+ putIReg( sz, R_EAX, loadLE(ty, mkexpr(ts)) );
+
+ putIReg( 4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) );
+}
+
+static
+void dis_STOS ( Int sz, IRTemp t_inc )
+{
+ IRType ty = szToITy(sz);
+ IRTemp ta = newTemp(ty); /* EAX */
+ IRTemp td = newTemp(Ity_I32); /* EDI */
+
+ assign( ta, getIReg(sz, R_EAX) );
+ assign( td, getIReg(4, R_EDI) );
+
+ storeLE( mkexpr(td), mkexpr(ta) );
+
+ putIReg( 4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
+}
+
+static
+void dis_CMPS ( Int sz, IRTemp t_inc )
+{
+ IRType ty = szToITy(sz);
+ IRTemp tdv = newTemp(ty); /* (EDI) */
+ IRTemp tsv = newTemp(ty); /* (ESI) */
+ IRTemp td = newTemp(Ity_I32); /* EDI */
+ IRTemp ts = newTemp(Ity_I32); /* ESI */
+
+ assign( td, getIReg(4, R_EDI) );
+ assign( ts, getIReg(4, R_ESI) );
+
+ assign( tdv, loadLE(ty,mkexpr(td)) );
+ assign( tsv, loadLE(ty,mkexpr(ts)) );
+
+ setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty );
+
+ putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
+ putIReg(4, R_ESI, binop(Iop_Add32, mkexpr(ts), mkexpr(t_inc)) );
+}
+
+static
+void dis_SCAS ( Int sz, IRTemp t_inc )
+{
+ IRType ty = szToITy(sz);
+ IRTemp ta = newTemp(ty); /* EAX */
+ IRTemp td = newTemp(Ity_I32); /* EDI */
+ IRTemp tdv = newTemp(ty); /* (EDI) */
+
+ assign( ta, getIReg(sz, R_EAX) );
+ assign( td, getIReg(4, R_EDI) );
+
+ assign( tdv, loadLE(ty,mkexpr(td)) );
+ setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty );
+
+ putIReg(4, R_EDI, binop(Iop_Add32, mkexpr(td), mkexpr(t_inc)) );
+}
+
+
+/* Wrap the appropriate string op inside a REP/REPE/REPNE.
+ We assume the insn is the last one in the basic block, and so emit a jump
+ to the next insn, rather than just falling through. */
+static
+void dis_REP_op ( X86Condcode cond,
+ void (*dis_OP)(Int, IRTemp),
+ Int sz, Addr32 eip, Addr32 eip_next, HChar* name )
+{
+ IRTemp t_inc = newTemp(Ity_I32);
+ IRTemp tc = newTemp(Ity_I32); /* ECX */
+
+ assign( tc, getIReg(4,R_ECX) );
+
+ stmt( IRStmt_Exit( binop(Iop_CmpEQ32,mkexpr(tc),mkU32(0)),
+ Ijk_Boring,
+ IRConst_U32(eip_next) ) );
+
+ putIReg(4, R_ECX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
+
+ dis_string_op_increment(sz, t_inc);
+ dis_OP (sz, t_inc);
+
+ if (cond == X86CondAlways) {
+ jmp_lit(Ijk_Boring,eip);
+ } else {
+ stmt( IRStmt_Exit( mk_x86g_calculate_condition(cond),
+ Ijk_Boring,
+ IRConst_U32(eip) ) );
+ jmp_lit(Ijk_Boring,eip_next);
+ }
+ DIP("%s%c\n", name, nameISize(sz));
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Arithmetic, etc. ---*/
+/*------------------------------------------------------------*/
+
+/* IMUL E, G. Supplied eip points to the modR/M byte. */
+static
+UInt dis_mul_E_G ( UChar sorb,
+ Int size,
+ Int delta0 )
+{
+ Int alen;
+ HChar dis_buf[50];
+ UChar rm = getIByte(delta0);
+ IRType ty = szToITy(size);
+ IRTemp te = newTemp(ty);
+ IRTemp tg = newTemp(ty);
+ IRTemp resLo = newTemp(ty);
+
+ assign( tg, getIReg(size, gregOfRM(rm)) );
+ if (epartIsReg(rm)) {
+ assign( te, getIReg(size, eregOfRM(rm)) );
+ } else {
+ IRTemp addr = disAMode( &alen, sorb, delta0, dis_buf );
+ assign( te, loadLE(ty,mkexpr(addr)) );
+ }
+
+ setFlags_MUL ( ty, te, tg, X86G_CC_OP_SMULB );
+
+ assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) );
+
+ putIReg(size, gregOfRM(rm), mkexpr(resLo) );
+
+ if (epartIsReg(rm)) {
+ DIP("imul%c %s, %s\n", nameISize(size),
+ nameIReg(size,eregOfRM(rm)),
+ nameIReg(size,gregOfRM(rm)));
+ return 1+delta0;
+ } else {
+ DIP("imul%c %s, %s\n", nameISize(size),
+ dis_buf, nameIReg(size,gregOfRM(rm)));
+ return alen+delta0;
+ }
+}
+
+
+/* IMUL I * E -> G. Supplied eip points to the modR/M byte. */
+static
+UInt dis_imul_I_E_G ( UChar sorb,
+ Int size,
+ Int delta,
+ Int litsize )
+{
+ Int d32, alen;
+ HChar dis_buf[50];
+ UChar rm = getIByte(delta);
+ IRType ty = szToITy(size);
+ IRTemp te = newTemp(ty);
+ IRTemp tl = newTemp(ty);
+ IRTemp resLo = newTemp(ty);
+
+ vassert(size == 1 || size == 2 || size == 4);
+
+ if (epartIsReg(rm)) {
+ assign(te, getIReg(size, eregOfRM(rm)));
+ delta++;
+ } else {
+ IRTemp addr = disAMode( &alen, sorb, delta, dis_buf );
+ assign(te, loadLE(ty, mkexpr(addr)));
+ delta += alen;
+ }
+ d32 = getSDisp(litsize,delta);
+ delta += litsize;
+
+ if (size == 1) d32 &= 0xFF;
+ if (size == 2) d32 &= 0xFFFF;
+
+ assign(tl, mkU(ty,d32));
+
+ assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) ));
+
+ setFlags_MUL ( ty, te, tl, X86G_CC_OP_SMULB );
+
+ putIReg(size, gregOfRM(rm), mkexpr(resLo));
+
+ DIP("imul %d, %s, %s\n", d32,
+ ( epartIsReg(rm) ? nameIReg(size,eregOfRM(rm)) : dis_buf ),
+ nameIReg(size,gregOfRM(rm)) );
+ return delta;
+}
+
+
+/* Generate an IR sequence to do a count-leading-zeroes operation on
+ the supplied IRTemp, and return a new IRTemp holding the result.
+ 'ty' may be Ity_I16 or Ity_I32 only. In the case where the
+ argument is zero, return the number of bits in the word (the
+ natural semantics). */
+static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
+{
+ vassert(ty == Ity_I32 || ty == Ity_I16);
+
+ IRTemp src32 = newTemp(Ity_I32);
+ assign(src32, widenUto32( mkexpr(src) ));
+
+ IRTemp src32x = newTemp(Ity_I32);
+ assign(src32x,
+ binop(Iop_Shl32, mkexpr(src32),
+ mkU8(32 - 8 * sizeofIRType(ty))));
+
+ // Clz32 has undefined semantics when its input is zero, so
+ // special-case around that.
+ IRTemp res32 = newTemp(Ity_I32);
+ assign(res32,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ binop(Iop_CmpEQ32, mkexpr(src32x), mkU32(0))),
+ unop(Iop_Clz32, mkexpr(src32x)),
+ mkU32(8 * sizeofIRType(ty))
+ ));
+
+ IRTemp res = newTemp(ty);
+ assign(res, narrowTo(ty, mkexpr(res32)));
+ return res;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- ---*/
+/*--- x87 FLOATING POINT INSTRUCTIONS ---*/
+/*--- ---*/
+/*------------------------------------------------------------*/
+
+/* --- Helper functions for dealing with the register stack. --- */
+
+/* --- Set the emulation-warning pseudo-register. --- */
+
+static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ )
+{
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
+ stmt( IRStmt_Put( OFFB_EMWARN, e ) );
+}
+
+/* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
+
+static IRExpr* mkQNaN64 ( void )
+{
+ /* QNaN is 0 2047 1 0(51times)
+ == 0b 11111111111b 1 0(51times)
+ == 0x7FF8 0000 0000 0000
+ */
+ return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL));
+}
+
+/* --------- Get/put the top-of-stack pointer. --------- */
+
+static IRExpr* get_ftop ( void )
+{
+ return IRExpr_Get( OFFB_FTOP, Ity_I32 );
+}
+
+static void put_ftop ( IRExpr* e )
+{
+ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
+ stmt( IRStmt_Put( OFFB_FTOP, e ) );
+}
+
+/* --------- Get/put the C3210 bits. --------- */
+
+static IRExpr* get_C3210 ( void )
+{
+ return IRExpr_Get( OFFB_FC3210, Ity_I32 );
+}
+
+static void put_C3210 ( IRExpr* e )
+{
+ stmt( IRStmt_Put( OFFB_FC3210, e ) );
+}
+
+/* --------- Get/put the FPU rounding mode. --------- */
+static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
+{
+ return IRExpr_Get( OFFB_FPROUND, Ity_I32 );
+}
+
+static void put_fpround ( IRExpr* /* :: Ity_I32 */ e )
+{
+ stmt( IRStmt_Put( OFFB_FPROUND, e ) );
+}
+
+
+/* --------- Synthesise a 2-bit FPU rounding mode. --------- */
+/* Produces a value in 0 .. 3, which is encoded as per the type
+ IRRoundingMode. Since the guest_FPROUND value is also encoded as
+ per IRRoundingMode, we merely need to get it and mask it for
+ safety.
+*/
+static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
+{
+ return binop( Iop_And32, get_fpround(), mkU32(3) );
+}
+
+static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
+{
+ return mkU32(Irrm_NEAREST);
+}
+
+
+/* --------- Get/set FP register tag bytes. --------- */
+
+/* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
+
+static void put_ST_TAG ( Int i, IRExpr* value )
+{
+ IRRegArray* descr;
+ vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8);
+ descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
+ stmt( IRStmt_PutI( descr, get_ftop(), i, value ) );
+}
+
+/* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
+ zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
+
+static IRExpr* get_ST_TAG ( Int i )
+{
+ IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
+ return IRExpr_GetI( descr, get_ftop(), i );
+}
+
+
+/* --------- Get/set FP registers. --------- */
+
+/* Given i, and some expression e, emit 'ST(i) = e' and set the
+ register's tag to indicate the register is full. The previous
+ state of the register is not checked. */
+
+static void put_ST_UNCHECKED ( Int i, IRExpr* value )
+{
+ IRRegArray* descr;
+ vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64);
+ descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
+ stmt( IRStmt_PutI( descr, get_ftop(), i, value ) );
+ /* Mark the register as in-use. */
+ put_ST_TAG(i, mkU8(1));
+}
+
+/* Given i, and some expression e, emit
+ ST(i) = is_full(i) ? NaN : e
+ and set the tag accordingly.
+*/
+
+static void put_ST ( Int i, IRExpr* value )
+{
+ put_ST_UNCHECKED( i,
+ IRExpr_Mux0X( get_ST_TAG(i),
+ /* 0 means empty */
+ value,
+ /* non-0 means full */
+ mkQNaN64()
+ )
+ );
+}
+
+
+/* Given i, generate an expression yielding 'ST(i)'. */
+
+static IRExpr* get_ST_UNCHECKED ( Int i )
+{
+ IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
+ return IRExpr_GetI( descr, get_ftop(), i );
+}
+
+
+/* Given i, generate an expression yielding
+ is_full(i) ? ST(i) : NaN
+*/
+
+static IRExpr* get_ST ( Int i )
+{
+ return
+ IRExpr_Mux0X( get_ST_TAG(i),
+ /* 0 means empty */
+ mkQNaN64(),
+ /* non-0 means full */
+ get_ST_UNCHECKED(i));
+}
+
+
+/* Adjust FTOP downwards by one register. */
+
+static void fp_push ( void )
+{
+ put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) );
+}
+
+/* Adjust FTOP upwards by one register, and mark the vacated register
+ as empty. */
+
+static void fp_pop ( void )
+{
+ put_ST_TAG(0, mkU8(0));
+ put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
+}
+
+/* Clear the C2 bit of the FPU status register, for
+ sin/cos/tan/sincos. */
+
+static void clear_C2 ( void )
+{
+ put_C3210( binop(Iop_And32, get_C3210(), mkU32(~X86G_FC_MASK_C2)) );
+}
+
+/* Invent a plausible-looking FPU status word value:
+ ((ftop & 7) << 11) | (c3210 & 0x4700)
+ */
+static IRExpr* get_FPU_sw ( void )
+{
+ return
+ unop(Iop_32to16,
+ binop(Iop_Or32,
+ binop(Iop_Shl32,
+ binop(Iop_And32, get_ftop(), mkU32(7)),
+ mkU8(11)),
+ binop(Iop_And32, get_C3210(), mkU32(0x4700))
+ ));
+}
+
+
+/* ------------------------------------------------------- */
+/* Given all that stack-mangling junk, we can now go ahead
+ and describe FP instructions.
+*/
+
+/* ST(0) = ST(0) `op` mem64/32(addr)
+ Need to check ST(0)'s tag on read, but not on write.
+*/
+static
+void fp_do_op_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf,
+ IROp op, Bool dbl )
+{
+ DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
+ if (dbl) {
+ put_ST_UNCHECKED(0,
+ triop( op,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(0),
+ loadLE(Ity_F64,mkexpr(addr))
+ ));
+ } else {
+ put_ST_UNCHECKED(0,
+ triop( op,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(0),
+ unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr)))
+ ));
+ }
+}
+
+
+/* ST(0) = mem64/32(addr) `op` ST(0)
+ Need to check ST(0)'s tag on read, but not on write.
+*/
+static
+void fp_do_oprev_mem_ST_0 ( IRTemp addr, HChar* op_txt, HChar* dis_buf,
+ IROp op, Bool dbl )
+{
+ DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
+ if (dbl) {
+ put_ST_UNCHECKED(0,
+ triop( op,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ loadLE(Ity_F64,mkexpr(addr)),
+ get_ST(0)
+ ));
+ } else {
+ put_ST_UNCHECKED(0,
+ triop( op,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))),
+ get_ST(0)
+ ));
+ }
+}
+
+
+/* ST(dst) = ST(dst) `op` ST(src).
+ Check dst and src tags when reading but not on write.
+*/
+static
+void fp_do_op_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
+ Bool pop_after )
+{
+ DIP("f%s%s st(%d), st(%d)\n", op_txt, pop_after?"p":"",
+ (Int)st_src, (Int)st_dst );
+ put_ST_UNCHECKED(
+ st_dst,
+ triop( op,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(st_dst),
+ get_ST(st_src) )
+ );
+ if (pop_after)
+ fp_pop();
+}
+
+/* ST(dst) = ST(src) `op` ST(dst).
+ Check dst and src tags when reading but not on write.
+*/
+static
+void fp_do_oprev_ST_ST ( HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
+ Bool pop_after )
+{
+ DIP("f%s%s st(%d), st(%d)\n", op_txt, pop_after?"p":"",
+ (Int)st_src, (Int)st_dst );
+ put_ST_UNCHECKED(
+ st_dst,
+ triop( op,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(st_src),
+ get_ST(st_dst) )
+ );
+ if (pop_after)
+ fp_pop();
+}
+
+/* %eflags(Z,P,C) = UCOMI( st(0), st(i) ) */
+static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after )
+{
+ DIP("fucomi%s %%st(0),%%st(%d)\n", pop_after ? "p" : "", (Int)i );
+ /* This is a bit of a hack (and isn't really right). It sets
+ Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
+ documentation implies A and S are unchanged.
+ */
+ /* It's also fishy in that it is used both for COMIP and
+ UCOMIP, and they aren't the same (although similar). */
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1,
+ binop( Iop_And32,
+ binop(Iop_CmpF64, get_ST(0), get_ST(i)),
+ mkU32(0x45)
+ )));
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+ if (pop_after)
+ fp_pop();
+}
+
+
+static
+UInt dis_FPU ( Bool* decode_ok, UChar sorb, Int delta )
+{
+ Int len;
+ UInt r_src, r_dst;
+ HChar dis_buf[50];
+ IRTemp t1, t2;
+
+ /* On entry, delta points at the second byte of the insn (the modrm
+ byte).*/
+ UChar first_opcode = getIByte(delta-1);
+ UChar modrm = getIByte(delta+0);
+
+ /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
+
+ if (first_opcode == 0xD8) {
+ if (modrm < 0xC0) {
+
+ /* bits 5,4,3 are an opcode extension, and the modRM also
+ specifies an address. */
+ IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
+ delta += len;
+
+ switch (gregOfRM(modrm)) {
+
+ case 0: /* FADD single-real */
+ fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False );
+ break;
+
+ case 1: /* FMUL single-real */
+ fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False );
+ break;
+
+ case 2: /* FCOM single-real */
+ DIP("fcoms %s\n", dis_buf);
+ /* This forces C1 to zero, which isn't right. */
+ put_C3210(
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64,
+ get_ST(0),
+ unop(Iop_F32toF64,
+ loadLE(Ity_F32,mkexpr(addr)))),
+ mkU8(8)),
+ mkU32(0x4500)
+ ));
+ break;
+
+ case 3: /* FCOMP single-real */
+ DIP("fcomps %s\n", dis_buf);
+ /* This forces C1 to zero, which isn't right. */
+ put_C3210(
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64,
+ get_ST(0),
+ unop(Iop_F32toF64,
+ loadLE(Ity_F32,mkexpr(addr)))),
+ mkU8(8)),
+ mkU32(0x4500)
+ ));
+ fp_pop();
+ break;
+
+ case 4: /* FSUB single-real */
+ fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False );
+ break;
+
+ case 5: /* FSUBR single-real */
+ fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False );
+ break;
+
+ case 6: /* FDIV single-real */
+ fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False );
+ break;
+
+ case 7: /* FDIVR single-real */
+ fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False );
+ break;
+
+ default:
+ vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
+ vex_printf("first_opcode == 0xD8\n");
+ goto decode_fail;
+ }
+ } else {
+ delta++;
+ switch (modrm) {
+
+ case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
+ fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False );
+ break;
+
+ case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
+ fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False );
+ break;
+
+ /* Dunno if this is right */
+ case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
+ r_dst = (UInt)modrm - 0xD0;
+ DIP("fcom %%st(0),%%st(%d)\n", (Int)r_dst);
+ /* This forces C1 to zero, which isn't right. */
+ put_C3210(
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
+ mkU8(8)),
+ mkU32(0x4500)
+ ));
+ break;
+
+ /* Dunno if this is right */
+ case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
+ r_dst = (UInt)modrm - 0xD8;
+ DIP("fcomp %%st(0),%%st(%d)\n", (Int)r_dst);
+ /* This forces C1 to zero, which isn't right. */
+ put_C3210(
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
+ mkU8(8)),
+ mkU32(0x4500)
+ ));
+ fp_pop();
+ break;
+
+ case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
+ fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False );
+ break;
+
+ case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
+ fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False );
+ break;
+
+ case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
+ fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False );
+ break;
+
+ case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
+ fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False );
+ break;
+
+ default:
+ goto decode_fail;
+ }
+ }
+ }
+
+ /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
+ else
+ if (first_opcode == 0xD9) {
+ if (modrm < 0xC0) {
+
+ /* bits 5,4,3 are an opcode extension, and the modRM also
+ specifies an address. */
+ IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
+ delta += len;
+
+ switch (gregOfRM(modrm)) {
+
+ case 0: /* FLD single-real */
+ DIP("flds %s\n", dis_buf);
+ fp_push();
+ put_ST(0, unop(Iop_F32toF64,
+ loadLE(Ity_F32, mkexpr(addr))));
+ break;
+
+ case 2: /* FST single-real */
+ DIP("fsts %s\n", dis_buf);
+ storeLE(mkexpr(addr),
+ binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
+ break;
+
+ case 3: /* FSTP single-real */
+ DIP("fstps %s\n", dis_buf);
+ storeLE(mkexpr(addr),
+ binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
+ fp_pop();
+ break;
+
+ case 4: { /* FLDENV m28 */
+ /* Uses dirty helper:
+ VexEmWarn x86g_do_FLDENV ( VexGuestX86State*, HWord ) */
+ IRTemp ew = newTemp(Ity_I32);
+ IRDirty* d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "x86g_dirtyhelper_FLDENV",
+ &x86g_dirtyhelper_FLDENV,
+ mkIRExprVec_1( mkexpr(addr) )
+ );
+ d->needsBBP = True;
+ d->tmp = ew;
+ /* declare we're reading memory */
+ d->mFx = Ifx_Read;
+ d->mAddr = mkexpr(addr);
+ d->mSize = 28;
+
+ /* declare we're writing guest state */
+ d->nFxState = 4;
+
+ d->fxState[0].fx = Ifx_Write;
+ d->fxState[0].offset = OFFB_FTOP;
+ d->fxState[0].size = sizeof(UInt);
+
+ d->fxState[1].fx = Ifx_Write;
+ d->fxState[1].offset = OFFB_FPTAGS;
+ d->fxState[1].size = 8 * sizeof(UChar);
+
+ d->fxState[2].fx = Ifx_Write;
+ d->fxState[2].offset = OFFB_FPROUND;
+ d->fxState[2].size = sizeof(UInt);
+
+ d->fxState[3].fx = Ifx_Write;
+ d->fxState[3].offset = OFFB_FC3210;
+ d->fxState[3].size = sizeof(UInt);
+
+ stmt( IRStmt_Dirty(d) );
+
+ /* ew contains any emulation warning we may need to
+ issue. If needed, side-exit to the next insn,
+ reporting the warning, so that Valgrind's dispatcher
+ sees the warning. */
+ put_emwarn( mkexpr(ew) );
+ stmt(
+ IRStmt_Exit(
+ binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
+ Ijk_EmWarn,
+ IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta)
+ )
+ );
+
+ DIP("fldenv %s\n", dis_buf);
+ break;
+ }
+
+ case 5: {/* FLDCW */
+ /* The only thing we observe in the control word is the
+ rounding mode. Therefore, pass the 16-bit value
+ (x87 native-format control word) to a clean helper,
+ getting back a 64-bit value, the lower half of which
+ is the FPROUND value to store, and the upper half of
+ which is the emulation-warning token which may be
+ generated.
+ */
+ /* ULong x86h_check_fldcw ( UInt ); */
+ IRTemp t64 = newTemp(Ity_I64);
+ IRTemp ew = newTemp(Ity_I32);
+ DIP("fldcw %s\n", dis_buf);
+ assign( t64, mkIRExprCCall(
+ Ity_I64, 0/*regparms*/,
+ "x86g_check_fldcw",
+ &x86g_check_fldcw,
+ mkIRExprVec_1(
+ unop( Iop_16Uto32,
+ loadLE(Ity_I16, mkexpr(addr)))
+ )
+ )
+ );
+
+ put_fpround( unop(Iop_64to32, mkexpr(t64)) );
+ assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
+ put_emwarn( mkexpr(ew) );
+ /* Finally, if an emulation warning was reported,
+ side-exit to the next insn, reporting the warning,
+ so that Valgrind's dispatcher sees the warning. */
+ stmt(
+ IRStmt_Exit(
+ binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
+ Ijk_EmWarn,
+ IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta)
+ )
+ );
+ break;
+ }
+
+ case 6: { /* FNSTENV m28 */
+ /* Uses dirty helper:
+ void x86g_do_FSTENV ( VexGuestX86State*, HWord ) */
+ IRDirty* d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "x86g_dirtyhelper_FSTENV",
+ &x86g_dirtyhelper_FSTENV,
+ mkIRExprVec_1( mkexpr(addr) )
+ );
+ d->needsBBP = True;
+ /* declare we're writing memory */
+ d->mFx = Ifx_Write;
+ d->mAddr = mkexpr(addr);
+ d->mSize = 28;
+
+ /* declare we're reading guest state */
+ d->nFxState = 4;
+
+ d->fxState[0].fx = Ifx_Read;
+ d->fxState[0].offset = OFFB_FTOP;
+ d->fxState[0].size = sizeof(UInt);
+
+ d->fxState[1].fx = Ifx_Read;
+ d->fxState[1].offset = OFFB_FPTAGS;
+ d->fxState[1].size = 8 * sizeof(UChar);
+
+ d->fxState[2].fx = Ifx_Read;
+ d->fxState[2].offset = OFFB_FPROUND;
+ d->fxState[2].size = sizeof(UInt);
+
+ d->fxState[3].fx = Ifx_Read;
+ d->fxState[3].offset = OFFB_FC3210;
+ d->fxState[3].size = sizeof(UInt);
+
+ stmt( IRStmt_Dirty(d) );
+
+ DIP("fnstenv %s\n", dis_buf);
+ break;
+ }
+
+ case 7: /* FNSTCW */
+ /* Fake up a native x87 FPU control word. The only
+ thing it depends on is FPROUND[1:0], so call a clean
+ helper to cook it up. */
+ /* UInt x86h_create_fpucw ( UInt fpround ) */
+ DIP("fnstcw %s\n", dis_buf);
+ storeLE(
+ mkexpr(addr),
+ unop( Iop_32to16,
+ mkIRExprCCall(
+ Ity_I32, 0/*regp*/,
+ "x86g_create_fpucw", &x86g_create_fpucw,
+ mkIRExprVec_1( get_fpround() )
+ )
+ )
+ );
+ break;
+
+ default:
+ vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
+ vex_printf("first_opcode == 0xD9\n");
+ goto decode_fail;
+ }
+
+ } else {
+ delta++;
+ switch (modrm) {
+
+ case 0xC0 ... 0xC7: /* FLD %st(?) */
+ r_src = (UInt)modrm - 0xC0;
+ DIP("fld %%st(%d)\n", (Int)r_src);
+ t1 = newTemp(Ity_F64);
+ assign(t1, get_ST(r_src));
+ fp_push();
+ put_ST(0, mkexpr(t1));
+ break;
+
+ case 0xC8 ... 0xCF: /* FXCH %st(?) */
+ r_src = (UInt)modrm - 0xC8;
+ DIP("fxch %%st(%d)\n", (Int)r_src);
+ t1 = newTemp(Ity_F64);
+ t2 = newTemp(Ity_F64);
+ assign(t1, get_ST(0));
+ assign(t2, get_ST(r_src));
+ put_ST_UNCHECKED(0, mkexpr(t2));
+ put_ST_UNCHECKED(r_src, mkexpr(t1));
+ break;
+
+ case 0xE0: /* FCHS */
+ DIP("fchs\n");
+ put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0)));
+ break;
+
+ case 0xE1: /* FABS */
+ DIP("fabs\n");
+ put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0)));
+ break;
+
+ case 0xE4: /* FTST */
+ DIP("ftst\n");
+ /* This forces C1 to zero, which isn't right. */
+ /* Well, in fact the Intel docs say (bizarrely): "C1 is
+ set to 0 if stack underflow occurred; otherwise, set
+ to 0" which is pretty nonsensical. I guess it's a
+ typo. */
+ put_C3210(
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64,
+ get_ST(0),
+ IRExpr_Const(IRConst_F64i(0x0ULL))),
+ mkU8(8)),
+ mkU32(0x4500)
+ ));
+ break;
+
+ case 0xE5: { /* FXAM */
+ /* This is an interesting one. It examines %st(0),
+ regardless of whether the tag says it's empty or not.
+ Here, just pass both the tag (in our format) and the
+ value (as a double, actually a ULong) to a helper
+ function. */
+ IRExpr** args
+ = mkIRExprVec_2( unop(Iop_8Uto32, get_ST_TAG(0)),
+ unop(Iop_ReinterpF64asI64,
+ get_ST_UNCHECKED(0)) );
+ put_C3210(mkIRExprCCall(
+ Ity_I32,
+ 0/*regparm*/,
+ "x86g_calculate_FXAM", &x86g_calculate_FXAM,
+ args
+ ));
+ DIP("fxam\n");
+ break;
+ }
+
+ case 0xE8: /* FLD1 */
+ DIP("fld1\n");
+ fp_push();
+ /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
+ put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)));
+ break;
+
+ case 0xE9: /* FLDL2T */
+ DIP("fldl2t\n");
+ fp_push();
+ /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
+ put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL)));
+ break;
+
+ case 0xEA: /* FLDL2E */
+ DIP("fldl2e\n");
+ fp_push();
+ /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
+ put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL)));
+ break;
+
+ case 0xEB: /* FLDPI */
+ DIP("fldpi\n");
+ fp_push();
+ /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
+ put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL)));
+ break;
+
+ case 0xEC: /* FLDLG2 */
+ DIP("fldlg2\n");
+ fp_push();
+ /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
+ put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL)));
+ break;
+
+ case 0xED: /* FLDLN2 */
+ DIP("fldln2\n");
+ fp_push();
+ /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
+ put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL)));
+ break;
+
+ case 0xEE: /* FLDZ */
+ DIP("fldz\n");
+ fp_push();
+ /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
+ put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL)));
+ break;
+
+ case 0xF0: /* F2XM1 */
+ DIP("f2xm1\n");
+ put_ST_UNCHECKED(0,
+ binop(Iop_2xm1F64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(0)));
+ break;
+
+ case 0xF1: /* FYL2X */
+ DIP("fyl2x\n");
+ put_ST_UNCHECKED(1,
+ triop(Iop_Yl2xF64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(1),
+ get_ST(0)));
+ fp_pop();
+ break;
+
+ case 0xF2: /* FPTAN */
+ DIP("ftan\n");
+ put_ST_UNCHECKED(0,
+ binop(Iop_TanF64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(0)));
+ fp_push();
+ put_ST(0, IRExpr_Const(IRConst_F64(1.0)));
+ clear_C2(); /* HACK */
+ break;
+
+ case 0xF3: /* FPATAN */
+ DIP("fpatan\n");
+ put_ST_UNCHECKED(1,
+ triop(Iop_AtanF64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(1),
+ get_ST(0)));
+ fp_pop();
+ break;
+
+ case 0xF4: { /* FXTRACT */
+ IRTemp argF = newTemp(Ity_F64);
+ IRTemp sigF = newTemp(Ity_F64);
+ IRTemp expF = newTemp(Ity_F64);
+ IRTemp argI = newTemp(Ity_I64);
+ IRTemp sigI = newTemp(Ity_I64);
+ IRTemp expI = newTemp(Ity_I64);
+ DIP("fxtract\n");
+ assign( argF, get_ST(0) );
+ assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF)));
+ assign( sigI,
+ mkIRExprCCall(
+ Ity_I64, 0/*regparms*/,
+ "x86amd64g_calculate_FXTRACT",
+ &x86amd64g_calculate_FXTRACT,
+ mkIRExprVec_2( mkexpr(argI),
+ mkIRExpr_HWord(0)/*sig*/ ))
+ );
+ assign( expI,
+ mkIRExprCCall(
+ Ity_I64, 0/*regparms*/,
+ "x86amd64g_calculate_FXTRACT",
+ &x86amd64g_calculate_FXTRACT,
+ mkIRExprVec_2( mkexpr(argI),
+ mkIRExpr_HWord(1)/*exp*/ ))
+ );
+ assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) );
+ assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) );
+ /* exponent */
+ put_ST_UNCHECKED(0, mkexpr(expF) );
+ fp_push();
+ /* significand */
+ put_ST(0, mkexpr(sigF) );
+ break;
+ }
+
+ case 0xF5: { /* FPREM1 -- IEEE compliant */
+ IRTemp a1 = newTemp(Ity_F64);
+ IRTemp a2 = newTemp(Ity_F64);
+ DIP("fprem1\n");
+ /* Do FPREM1 twice, once to get the remainder, and once
+ to get the C3210 flag values. */
+ assign( a1, get_ST(0) );
+ assign( a2, get_ST(1) );
+ put_ST_UNCHECKED(0,
+ triop(Iop_PRem1F64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ mkexpr(a1),
+ mkexpr(a2)));
+ put_C3210(
+ triop(Iop_PRem1C3210F64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ mkexpr(a1),
+ mkexpr(a2)) );
+ break;
+ }
+
+ case 0xF7: /* FINCSTP */
+ DIP("fprem\n");
+ put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
+ break;
+
+ case 0xF8: { /* FPREM -- not IEEE compliant */
+ IRTemp a1 = newTemp(Ity_F64);
+ IRTemp a2 = newTemp(Ity_F64);
+ DIP("fprem\n");
+ /* Do FPREM twice, once to get the remainder, and once
+ to get the C3210 flag values. */
+ assign( a1, get_ST(0) );
+ assign( a2, get_ST(1) );
+ put_ST_UNCHECKED(0,
+ triop(Iop_PRemF64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ mkexpr(a1),
+ mkexpr(a2)));
+ put_C3210(
+ triop(Iop_PRemC3210F64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ mkexpr(a1),
+ mkexpr(a2)) );
+ break;
+ }
+
+ case 0xF9: /* FYL2XP1 */
+ DIP("fyl2xp1\n");
+ put_ST_UNCHECKED(1,
+ triop(Iop_Yl2xp1F64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(1),
+ get_ST(0)));
+ fp_pop();
+ break;
+
+ case 0xFA: /* FSQRT */
+ DIP("fsqrt\n");
+ put_ST_UNCHECKED(0,
+ binop(Iop_SqrtF64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(0)));
+ break;
+
+ case 0xFB: { /* FSINCOS */
+ IRTemp a1 = newTemp(Ity_F64);
+ assign( a1, get_ST(0) );
+ DIP("fsincos\n");
+ put_ST_UNCHECKED(0,
+ binop(Iop_SinF64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ mkexpr(a1)));
+ fp_push();
+ put_ST(0,
+ binop(Iop_CosF64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ mkexpr(a1)));
+ clear_C2(); /* HACK */
+ break;
+ }
+
+ case 0xFC: /* FRNDINT */
+ DIP("frndint\n");
+ put_ST_UNCHECKED(0,
+ binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
+ break;
+
+ case 0xFD: /* FSCALE */
+ DIP("fscale\n");
+ put_ST_UNCHECKED(0,
+ triop(Iop_ScaleF64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(0),
+ get_ST(1)));
+ break;
+
+ case 0xFE: /* FSIN */
+ DIP("fsin\n");
+ put_ST_UNCHECKED(0,
+ binop(Iop_SinF64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(0)));
+ clear_C2(); /* HACK */
+ break;
+
+ case 0xFF: /* FCOS */
+ DIP("fcos\n");
+ put_ST_UNCHECKED(0,
+ binop(Iop_CosF64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(0)));
+ clear_C2(); /* HACK */
+ break;
+
+ default:
+ goto decode_fail;
+ }
+ }
+ }
+
+ /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
+ else
+ if (first_opcode == 0xDA) {
+
+ if (modrm < 0xC0) {
+
+ /* bits 5,4,3 are an opcode extension, and the modRM also
+ specifies an address. */
+ IROp fop;
+ IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
+ delta += len;
+ switch (gregOfRM(modrm)) {
+
+ case 0: /* FIADD m32int */ /* ST(0) += m32int */
+ DIP("fiaddl %s\n", dis_buf);
+ fop = Iop_AddF64;
+ goto do_fop_m32;
+
+ case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
+ DIP("fimull %s\n", dis_buf);
+ fop = Iop_MulF64;
+ goto do_fop_m32;
+
+ case 2: /* FICOM m32int */
+ DIP("ficoml %s\n", dis_buf);
+ /* This forces C1 to zero, which isn't right. */
+ put_C3210(
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64,
+ get_ST(0),
+ unop(Iop_I32StoF64,
+ loadLE(Ity_I32,mkexpr(addr)))),
+ mkU8(8)),
+ mkU32(0x4500)
+ ));
+ break;
+
+ case 3: /* FICOMP m32int */
+ DIP("ficompl %s\n", dis_buf);
+ /* This forces C1 to zero, which isn't right. */
+ put_C3210(
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64,
+ get_ST(0),
+ unop(Iop_I32StoF64,
+ loadLE(Ity_I32,mkexpr(addr)))),
+ mkU8(8)),
+ mkU32(0x4500)
+ ));
+ fp_pop();
+ break;
+
+ case 4: /* FISUB m32int */ /* ST(0) -= m32int */
+ DIP("fisubl %s\n", dis_buf);
+ fop = Iop_SubF64;
+ goto do_fop_m32;
+
+ case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
+ DIP("fisubrl %s\n", dis_buf);
+ fop = Iop_SubF64;
+ goto do_foprev_m32;
+
+ case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
+ DIP("fidivl %s\n", dis_buf);
+ fop = Iop_DivF64;
+ goto do_fop_m32;
+
+ case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
+ DIP("fidivrl %s\n", dis_buf);
+ fop = Iop_DivF64;
+ goto do_foprev_m32;
+
+ do_fop_m32:
+ put_ST_UNCHECKED(0,
+ triop(fop,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(0),
+ unop(Iop_I32StoF64,
+ loadLE(Ity_I32, mkexpr(addr)))));
+ break;
+
+ do_foprev_m32:
+ put_ST_UNCHECKED(0,
+ triop(fop,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ unop(Iop_I32StoF64,
+ loadLE(Ity_I32, mkexpr(addr))),
+ get_ST(0)));
+ break;
+
+ default:
+ vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
+ vex_printf("first_opcode == 0xDA\n");
+ goto decode_fail;
+ }
+
+ } else {
+
+ delta++;
+ switch (modrm) {
+
+ case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
+ r_src = (UInt)modrm - 0xC0;
+ DIP("fcmovb %%st(%d), %%st(0)\n", (Int)r_src);
+ put_ST_UNCHECKED(0,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ mk_x86g_calculate_condition(X86CondB)),
+ get_ST(0), get_ST(r_src)) );
+ break;
+
+ case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
+ r_src = (UInt)modrm - 0xC8;
+ DIP("fcmovz %%st(%d), %%st(0)\n", (Int)r_src);
+ put_ST_UNCHECKED(0,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ mk_x86g_calculate_condition(X86CondZ)),
+ get_ST(0), get_ST(r_src)) );
+ break;
+
+ case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
+ r_src = (UInt)modrm - 0xD0;
+ DIP("fcmovbe %%st(%d), %%st(0)\n", (Int)r_src);
+ put_ST_UNCHECKED(0,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ mk_x86g_calculate_condition(X86CondBE)),
+ get_ST(0), get_ST(r_src)) );
+ break;
+
+ case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
+ r_src = (UInt)modrm - 0xD8;
+ DIP("fcmovu %%st(%d), %%st(0)\n", (Int)r_src);
+ put_ST_UNCHECKED(0,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ mk_x86g_calculate_condition(X86CondP)),
+ get_ST(0), get_ST(r_src)) );
+ break;
+
+ case 0xE9: /* FUCOMPP %st(0),%st(1) */
+ DIP("fucompp %%st(0),%%st(1)\n");
+ /* This forces C1 to zero, which isn't right. */
+ put_C3210(
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64, get_ST(0), get_ST(1)),
+ mkU8(8)),
+ mkU32(0x4500)
+ ));
+ fp_pop();
+ fp_pop();
+ break;
+
+ default:
+ goto decode_fail;
+ }
+
+ }
+ }
+
+ /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
+ else
+ if (first_opcode == 0xDB) {
+ if (modrm < 0xC0) {
+
+ /* bits 5,4,3 are an opcode extension, and the modRM also
+ specifies an address. */
+ IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
+ delta += len;
+
+ switch (gregOfRM(modrm)) {
+
+ case 0: /* FILD m32int */
+ DIP("fildl %s\n", dis_buf);
+ fp_push();
+ put_ST(0, unop(Iop_I32StoF64,
+ loadLE(Ity_I32, mkexpr(addr))));
+ break;
+
+ case 1: /* FISTTPL m32 (SSE3) */
+ DIP("fisttpl %s\n", dis_buf);
+ storeLE( mkexpr(addr),
+ binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) );
+ fp_pop();
+ break;
+
+ case 2: /* FIST m32 */
+ DIP("fistl %s\n", dis_buf);
+ storeLE( mkexpr(addr),
+ binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
+ break;
+
+ case 3: /* FISTP m32 */
+ DIP("fistpl %s\n", dis_buf);
+ storeLE( mkexpr(addr),
+ binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
+ fp_pop();
+ break;
+
+ case 5: { /* FLD extended-real */
+ /* Uses dirty helper:
+ ULong x86g_loadF80le ( UInt )
+ addr holds the address. First, do a dirty call to
+ get hold of the data. */
+ IRTemp val = newTemp(Ity_I64);
+ IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) );
+
+ IRDirty* d = unsafeIRDirty_1_N (
+ val,
+ 0/*regparms*/,
+ "x86g_dirtyhelper_loadF80le",
+ &x86g_dirtyhelper_loadF80le,
+ args
+ );
+ /* declare that we're reading memory */
+ d->mFx = Ifx_Read;
+ d->mAddr = mkexpr(addr);
+ d->mSize = 10;
+
+ /* execute the dirty call, dumping the result in val. */
+ stmt( IRStmt_Dirty(d) );
+ fp_push();
+ put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val)));
+
+ DIP("fldt %s\n", dis_buf);
+ break;
+ }
+
+ case 7: { /* FSTP extended-real */
+ /* Uses dirty helper: void x86g_storeF80le ( UInt, ULong ) */
+ IRExpr** args
+ = mkIRExprVec_2( mkexpr(addr),
+ unop(Iop_ReinterpF64asI64, get_ST(0)) );
+
+ IRDirty* d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "x86g_dirtyhelper_storeF80le",
+ &x86g_dirtyhelper_storeF80le,
+ args
+ );
+ /* declare we're writing memory */
+ d->mFx = Ifx_Write;
+ d->mAddr = mkexpr(addr);
+ d->mSize = 10;
+
+ /* execute the dirty call. */
+ stmt( IRStmt_Dirty(d) );
+ fp_pop();
+
+ DIP("fstpt\n %s", dis_buf);
+ break;
+ }
+
+ default:
+ vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
+ vex_printf("first_opcode == 0xDB\n");
+ goto decode_fail;
+ }
+
+ } else {
+
+ delta++;
+ switch (modrm) {
+
+ case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
+ r_src = (UInt)modrm - 0xC0;
+ DIP("fcmovnb %%st(%d), %%st(0)\n", (Int)r_src);
+ put_ST_UNCHECKED(0,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ mk_x86g_calculate_condition(X86CondNB)),
+ get_ST(0), get_ST(r_src)) );
+ break;
+
+ case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
+ r_src = (UInt)modrm - 0xC8;
+ DIP("fcmovnz %%st(%d), %%st(0)\n", (Int)r_src);
+ put_ST_UNCHECKED(0,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ mk_x86g_calculate_condition(X86CondNZ)),
+ get_ST(0), get_ST(r_src)) );
+ break;
+
+ case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
+ r_src = (UInt)modrm - 0xD0;
+ DIP("fcmovnbe %%st(%d), %%st(0)\n", (Int)r_src);
+ put_ST_UNCHECKED(0,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ mk_x86g_calculate_condition(X86CondNBE)),
+ get_ST(0), get_ST(r_src)) );
+ break;
+
+ case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
+ r_src = (UInt)modrm - 0xD8;
+ DIP("fcmovnu %%st(%d), %%st(0)\n", (Int)r_src);
+ put_ST_UNCHECKED(0,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ mk_x86g_calculate_condition(X86CondNP)),
+ get_ST(0), get_ST(r_src)) );
+ break;
+
+ case 0xE2:
+ DIP("fnclex\n");
+ break;
+
+ case 0xE3: {
+ /* Uses dirty helper:
+ void x86g_do_FINIT ( VexGuestX86State* ) */
+ IRDirty* d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "x86g_dirtyhelper_FINIT",
+ &x86g_dirtyhelper_FINIT,
+ mkIRExprVec_0()
+ );
+ d->needsBBP = True;
+
+ /* declare we're writing guest state */
+ d->nFxState = 5;
+
+ d->fxState[0].fx = Ifx_Write;
+ d->fxState[0].offset = OFFB_FTOP;
+ d->fxState[0].size = sizeof(UInt);
+
+ d->fxState[1].fx = Ifx_Write;
+ d->fxState[1].offset = OFFB_FPREGS;
+ d->fxState[1].size = 8 * sizeof(ULong);
+
+ d->fxState[2].fx = Ifx_Write;
+ d->fxState[2].offset = OFFB_FPTAGS;
+ d->fxState[2].size = 8 * sizeof(UChar);
+
+ d->fxState[3].fx = Ifx_Write;
+ d->fxState[3].offset = OFFB_FPROUND;
+ d->fxState[3].size = sizeof(UInt);
+
+ d->fxState[4].fx = Ifx_Write;
+ d->fxState[4].offset = OFFB_FC3210;
+ d->fxState[4].size = sizeof(UInt);
+
+ stmt( IRStmt_Dirty(d) );
+
+ DIP("fninit\n");
+ break;
+ }
+
+ case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
+ fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False );
+ break;
+
+ case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
+ fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False );
+ break;
+
+ default:
+ goto decode_fail;
+ }
+ }
+ }
+
+ /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
+ else
+ if (first_opcode == 0xDC) {
+ if (modrm < 0xC0) {
+
+ /* bits 5,4,3 are an opcode extension, and the modRM also
+ specifies an address. */
+ IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
+ delta += len;
+
+ switch (gregOfRM(modrm)) {
+
+ case 0: /* FADD double-real */
+ fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True );
+ break;
+
+ case 1: /* FMUL double-real */
+ fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True );
+ break;
+
+ case 2: /* FCOM double-real */
+ DIP("fcoml %s\n", dis_buf);
+ /* This forces C1 to zero, which isn't right. */
+ put_C3210(
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64,
+ get_ST(0),
+ loadLE(Ity_F64,mkexpr(addr))),
+ mkU8(8)),
+ mkU32(0x4500)
+ ));
+ break;
+
+ case 3: /* FCOMP double-real */
+ DIP("fcompl %s\n", dis_buf);
+ /* This forces C1 to zero, which isn't right. */
+ put_C3210(
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64,
+ get_ST(0),
+ loadLE(Ity_F64,mkexpr(addr))),
+ mkU8(8)),
+ mkU32(0x4500)
+ ));
+ fp_pop();
+ break;
+
+ case 4: /* FSUB double-real */
+ fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True );
+ break;
+
+ case 5: /* FSUBR double-real */
+ fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True );
+ break;
+
+ case 6: /* FDIV double-real */
+ fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True );
+ break;
+
+ case 7: /* FDIVR double-real */
+ fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True );
+ break;
+
+ default:
+ vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
+ vex_printf("first_opcode == 0xDC\n");
+ goto decode_fail;
+ }
+
+ } else {
+
+ delta++;
+ switch (modrm) {
+
+ case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
+ fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False );
+ break;
+
+ case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
+ fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False );
+ break;
+
+ case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
+ fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False );
+ break;
+
+ case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
+ fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False );
+ break;
+
+ case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
+ fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False );
+ break;
+
+ case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
+ fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False );
+ break;
+
+ default:
+ goto decode_fail;
+ }
+
+ }
+ }
+
+ /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
+ else
+ if (first_opcode == 0xDD) {
+
+ if (modrm < 0xC0) {
+
+ /* bits 5,4,3 are an opcode extension, and the modRM also
+ specifies an address. */
+ IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
+ delta += len;
+
+ switch (gregOfRM(modrm)) {
+
+ case 0: /* FLD double-real */
+ DIP("fldl %s\n", dis_buf);
+ fp_push();
+ put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
+ break;
+
+ case 1: /* FISTTPQ m64 (SSE3) */
+ DIP("fistppll %s\n", dis_buf);
+ storeLE( mkexpr(addr),
+ binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) );
+ fp_pop();
+ break;
+
+ case 2: /* FST double-real */
+ DIP("fstl %s\n", dis_buf);
+ storeLE(mkexpr(addr), get_ST(0));
+ break;
+
+ case 3: /* FSTP double-real */
+ DIP("fstpl %s\n", dis_buf);
+ storeLE(mkexpr(addr), get_ST(0));
+ fp_pop();
+ break;
+
+ case 4: { /* FRSTOR m108 */
+ /* Uses dirty helper:
+ VexEmWarn x86g_do_FRSTOR ( VexGuestX86State*, Addr32 ) */
+ IRTemp ew = newTemp(Ity_I32);
+ IRDirty* d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "x86g_dirtyhelper_FRSTOR",
+ &x86g_dirtyhelper_FRSTOR,
+ mkIRExprVec_1( mkexpr(addr) )
+ );
+ d->needsBBP = True;
+ d->tmp = ew;
+ /* declare we're reading memory */
+ d->mFx = Ifx_Read;
+ d->mAddr = mkexpr(addr);
+ d->mSize = 108;
+
+ /* declare we're writing guest state */
+ d->nFxState = 5;
+
+ d->fxState[0].fx = Ifx_Write;
+ d->fxState[0].offset = OFFB_FTOP;
+ d->fxState[0].size = sizeof(UInt);
+
+ d->fxState[1].fx = Ifx_Write;
+ d->fxState[1].offset = OFFB_FPREGS;
+ d->fxState[1].size = 8 * sizeof(ULong);
+
+ d->fxState[2].fx = Ifx_Write;
+ d->fxState[2].offset = OFFB_FPTAGS;
+ d->fxState[2].size = 8 * sizeof(UChar);
+
+ d->fxState[3].fx = Ifx_Write;
+ d->fxState[3].offset = OFFB_FPROUND;
+ d->fxState[3].size = sizeof(UInt);
+
+ d->fxState[4].fx = Ifx_Write;
+ d->fxState[4].offset = OFFB_FC3210;
+ d->fxState[4].size = sizeof(UInt);
+
+ stmt( IRStmt_Dirty(d) );
+
+ /* ew contains any emulation warning we may need to
+ issue. If needed, side-exit to the next insn,
+ reporting the warning, so that Valgrind's dispatcher
+ sees the warning. */
+ put_emwarn( mkexpr(ew) );
+ stmt(
+ IRStmt_Exit(
+ binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
+ Ijk_EmWarn,
+ IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta)
+ )
+ );
+
+ DIP("frstor %s\n", dis_buf);
+ break;
+ }
+
+ case 6: { /* FNSAVE m108 */
+ /* Uses dirty helper:
+ void x86g_do_FSAVE ( VexGuestX86State*, UInt ) */
+ IRDirty* d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "x86g_dirtyhelper_FSAVE",
+ &x86g_dirtyhelper_FSAVE,
+ mkIRExprVec_1( mkexpr(addr) )
+ );
+ d->needsBBP = True;
+ /* declare we're writing memory */
+ d->mFx = Ifx_Write;
+ d->mAddr = mkexpr(addr);
+ d->mSize = 108;
+
+ /* declare we're reading guest state */
+ d->nFxState = 5;
+
+ d->fxState[0].fx = Ifx_Read;
+ d->fxState[0].offset = OFFB_FTOP;
+ d->fxState[0].size = sizeof(UInt);
+
+ d->fxState[1].fx = Ifx_Read;
+ d->fxState[1].offset = OFFB_FPREGS;
+ d->fxState[1].size = 8 * sizeof(ULong);
+
+ d->fxState[2].fx = Ifx_Read;
+ d->fxState[2].offset = OFFB_FPTAGS;
+ d->fxState[2].size = 8 * sizeof(UChar);
+
+ d->fxState[3].fx = Ifx_Read;
+ d->fxState[3].offset = OFFB_FPROUND;
+ d->fxState[3].size = sizeof(UInt);
+
+ d->fxState[4].fx = Ifx_Read;
+ d->fxState[4].offset = OFFB_FC3210;
+ d->fxState[4].size = sizeof(UInt);
+
+ stmt( IRStmt_Dirty(d) );
+
+ DIP("fnsave %s\n", dis_buf);
+ break;
+ }
+
+ case 7: { /* FNSTSW m16 */
+ IRExpr* sw = get_FPU_sw();
+ vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16);
+ storeLE( mkexpr(addr), sw );
+ DIP("fnstsw %s\n", dis_buf);
+ break;
+ }
+
+ default:
+ vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
+ vex_printf("first_opcode == 0xDD\n");
+ goto decode_fail;
+ }
+ } else {
+ delta++;
+ switch (modrm) {
+
+ case 0xC0 ... 0xC7: /* FFREE %st(?) */
+ r_dst = (UInt)modrm - 0xC0;
+ DIP("ffree %%st(%d)\n", (Int)r_dst);
+ put_ST_TAG ( r_dst, mkU8(0) );
+ break;
+
+ case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
+ r_dst = (UInt)modrm - 0xD0;
+ DIP("fst %%st(0),%%st(%d)\n", (Int)r_dst);
+ /* P4 manual says: "If the destination operand is a
+ non-empty register, the invalid-operation exception
+ is not generated. Hence put_ST_UNCHECKED. */
+ put_ST_UNCHECKED(r_dst, get_ST(0));
+ break;
+
+ case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
+ r_dst = (UInt)modrm - 0xD8;
+ DIP("fstp %%st(0),%%st(%d)\n", (Int)r_dst);
+ /* P4 manual says: "If the destination operand is a
+ non-empty register, the invalid-operation exception
+ is not generated. Hence put_ST_UNCHECKED. */
+ put_ST_UNCHECKED(r_dst, get_ST(0));
+ fp_pop();
+ break;
+
+ case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
+ r_dst = (UInt)modrm - 0xE0;
+ DIP("fucom %%st(0),%%st(%d)\n", (Int)r_dst);
+ /* This forces C1 to zero, which isn't right. */
+ put_C3210(
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
+ mkU8(8)),
+ mkU32(0x4500)
+ ));
+ break;
+
+ case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
+ r_dst = (UInt)modrm - 0xE8;
+ DIP("fucomp %%st(0),%%st(%d)\n", (Int)r_dst);
+ /* This forces C1 to zero, which isn't right. */
+ put_C3210(
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
+ mkU8(8)),
+ mkU32(0x4500)
+ ));
+ fp_pop();
+ break;
+
+ default:
+ goto decode_fail;
+ }
+ }
+ }
+
+ /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
+ else
+ if (first_opcode == 0xDE) {
+
+ if (modrm < 0xC0) {
+
+ /* bits 5,4,3 are an opcode extension, and the modRM also
+ specifies an address. */
+ IROp fop;
+ IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
+ delta += len;
+
+ switch (gregOfRM(modrm)) {
+
+ case 0: /* FIADD m16int */ /* ST(0) += m16int */
+ DIP("fiaddw %s\n", dis_buf);
+ fop = Iop_AddF64;
+ goto do_fop_m16;
+
+ case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
+ DIP("fimulw %s\n", dis_buf);
+ fop = Iop_MulF64;
+ goto do_fop_m16;
+
+ case 2: /* FICOM m16int */
+ DIP("ficomw %s\n", dis_buf);
+ /* This forces C1 to zero, which isn't right. */
+ put_C3210(
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64,
+ get_ST(0),
+ unop(Iop_I32StoF64,
+ unop(Iop_16Sto32,
+ loadLE(Ity_I16,mkexpr(addr))))),
+ mkU8(8)),
+ mkU32(0x4500)
+ ));
+ break;
+
+ case 3: /* FICOMP m16int */
+ DIP("ficompw %s\n", dis_buf);
+ /* This forces C1 to zero, which isn't right. */
+ put_C3210(
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64,
+ get_ST(0),
+ unop(Iop_I32StoF64,
+ unop(Iop_16Sto32,
+ loadLE(Ity_I16,mkexpr(addr))))),
+ mkU8(8)),
+ mkU32(0x4500)
+ ));
+ fp_pop();
+ break;
+
+ case 4: /* FISUB m16int */ /* ST(0) -= m16int */
+ DIP("fisubw %s\n", dis_buf);
+ fop = Iop_SubF64;
+ goto do_fop_m16;
+
+ case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
+ DIP("fisubrw %s\n", dis_buf);
+ fop = Iop_SubF64;
+ goto do_foprev_m16;
+
+ case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
+ DIP("fisubw %s\n", dis_buf);
+ fop = Iop_DivF64;
+ goto do_fop_m16;
+
+ case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
+ DIP("fidivrw %s\n", dis_buf);
+ fop = Iop_DivF64;
+ goto do_foprev_m16;
+
+ do_fop_m16:
+ put_ST_UNCHECKED(0,
+ triop(fop,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ get_ST(0),
+ unop(Iop_I32StoF64,
+ unop(Iop_16Sto32,
+ loadLE(Ity_I16, mkexpr(addr))))));
+ break;
+
+ do_foprev_m16:
+ put_ST_UNCHECKED(0,
+ triop(fop,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ unop(Iop_I32StoF64,
+ unop(Iop_16Sto32,
+ loadLE(Ity_I16, mkexpr(addr)))),
+ get_ST(0)));
+ break;
+
+ default:
+ vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
+ vex_printf("first_opcode == 0xDE\n");
+ goto decode_fail;
+ }
+
+ } else {
+
+ delta++;
+ switch (modrm) {
+
+ case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
+ fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True );
+ break;
+
+ case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
+ fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True );
+ break;
+
+ case 0xD9: /* FCOMPP %st(0),%st(1) */
+ DIP("fuompp %%st(0),%%st(1)\n");
+ /* This forces C1 to zero, which isn't right. */
+ put_C3210(
+ binop( Iop_And32,
+ binop(Iop_Shl32,
+ binop(Iop_CmpF64, get_ST(0), get_ST(1)),
+ mkU8(8)),
+ mkU32(0x4500)
+ ));
+ fp_pop();
+ fp_pop();
+ break;
+
+ case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
+ fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True );
+ break;
+
+ case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
+ fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True );
+ break;
+
+ case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
+ fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True );
+ break;
+
+ case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
+ fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True );
+ break;
+
+ default:
+ goto decode_fail;
+ }
+
+ }
+ }
+
+ /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
+ else
+ if (first_opcode == 0xDF) {
+
+ if (modrm < 0xC0) {
+
+ /* bits 5,4,3 are an opcode extension, and the modRM also
+ specifies an address. */
+ IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
+ delta += len;
+
+ switch (gregOfRM(modrm)) {
+
+ case 0: /* FILD m16int */
+ DIP("fildw %s\n", dis_buf);
+ fp_push();
+ put_ST(0, unop(Iop_I32StoF64,
+ unop(Iop_16Sto32,
+ loadLE(Ity_I16, mkexpr(addr)))));
+ break;
+
+ case 1: /* FISTTPS m16 (SSE3) */
+ DIP("fisttps %s\n", dis_buf);
+ storeLE( mkexpr(addr),
+ binop(Iop_F64toI16S, mkU32(Irrm_ZERO), get_ST(0)) );
+ fp_pop();
+ break;
+
+ case 2: /* FIST m16 */
+ DIP("fistp %s\n", dis_buf);
+ storeLE( mkexpr(addr),
+ binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) );
+ break;
+
+ case 3: /* FISTP m16 */
+ DIP("fistps %s\n", dis_buf);
+ storeLE( mkexpr(addr),
+ binop(Iop_F64toI16S, get_roundingmode(), get_ST(0)) );
+ fp_pop();
+ break;
+
+ case 5: /* FILD m64 */
+ DIP("fildll %s\n", dis_buf);
+ fp_push();
+ put_ST(0, binop(Iop_I64StoF64,
+ get_roundingmode(),
+ loadLE(Ity_I64, mkexpr(addr))));
+ break;
+
+ case 7: /* FISTP m64 */
+ DIP("fistpll %s\n", dis_buf);
+ storeLE( mkexpr(addr),
+ binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) );
+ fp_pop();
+ break;
+
+ default:
+ vex_printf("unhandled opc_aux = 0x%2x\n", gregOfRM(modrm));
+ vex_printf("first_opcode == 0xDF\n");
+ goto decode_fail;
+ }
+
+ } else {
+
+ delta++;
+ switch (modrm) {
+
+ case 0xC0: /* FFREEP %st(0) */
+ DIP("ffreep %%st(%d)\n", 0);
+ put_ST_TAG ( 0, mkU8(0) );
+ fp_pop();
+ break;
+
+ case 0xE0: /* FNSTSW %ax */
+ DIP("fnstsw %%ax\n");
+ /* Get the FPU status word value and dump it in %AX. */
+ if (0) {
+ /* The obvious thing to do is simply dump the 16-bit
+ status word value in %AX. However, due to a
+ limitation in Memcheck's origin tracking
+ machinery, this causes Memcheck not to track the
+ origin of any undefinedness into %AH (only into
+ %AL/%AX/%EAX), which means origins are lost in
+ the sequence "fnstsw %ax; test $M,%ah; jcond .." */
+ putIReg(2, R_EAX, get_FPU_sw());
+ } else {
+ /* So a somewhat lame kludge is to make it very
+ clear to Memcheck that the value is written to
+ both %AH and %AL. This generates marginally
+ worse code, but I don't think it matters much. */
+ IRTemp t16 = newTemp(Ity_I16);
+ assign(t16, get_FPU_sw());
+ putIReg( 1, R_AL, unop(Iop_16to8, mkexpr(t16)) );
+ putIReg( 1, R_AH, unop(Iop_16HIto8, mkexpr(t16)) );
+ }
+ break;
+
+ case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
+ fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True );
+ break;
+
+ case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
+ /* not really right since COMIP != UCOMIP */
+ fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True );
+ break;
+
+ default:
+ goto decode_fail;
+ }
+ }
+
+ }
+
+ else
+ vpanic("dis_FPU(x86): invalid primary opcode");
+
+ *decode_ok = True;
+ return delta;
+
+ decode_fail:
+ *decode_ok = False;
+ return delta;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- ---*/
+/*--- MMX INSTRUCTIONS ---*/
+/*--- ---*/
+/*------------------------------------------------------------*/
+
+/* Effect of MMX insns on x87 FPU state (table 11-2 of
+ IA32 arch manual, volume 3):
+
+ Read from, or write to MMX register (viz, any insn except EMMS):
+ * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
+ * FP stack pointer set to zero
+
+ EMMS:
+ * All tags set to Invalid (empty) -- FPTAGS[i] := zero
+ * FP stack pointer set to zero
+*/
+
+static void do_MMX_preamble ( void )
+{
+ Int i;
+ IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
+ IRExpr* zero = mkU32(0);
+ IRExpr* tag1 = mkU8(1);
+ put_ftop(zero);
+ for (i = 0; i < 8; i++)
+ stmt( IRStmt_PutI( descr, zero, i, tag1 ) );
+}
+
+static void do_EMMS_preamble ( void )
+{
+ Int i;
+ IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
+ IRExpr* zero = mkU32(0);
+ IRExpr* tag0 = mkU8(0);
+ put_ftop(zero);
+ for (i = 0; i < 8; i++)
+ stmt( IRStmt_PutI( descr, zero, i, tag0 ) );
+}
+
+
+static IRExpr* getMMXReg ( UInt archreg )
+{
+ vassert(archreg < 8);
+ return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 );
+}
+
+
+static void putMMXReg ( UInt archreg, IRExpr* e )
+{
+ vassert(archreg < 8);
+ vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
+ stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) );
+}
+
+
+/* Helper for non-shift MMX insns. Note this is incomplete in the
+ sense that it does not first call do_MMX_preamble() -- that is the
+ responsibility of its caller. */
+
+static
+UInt dis_MMXop_regmem_to_reg ( UChar sorb,
+ Int delta,
+ UChar opc,
+ HChar* name,
+ Bool show_granularity )
+{
+ HChar dis_buf[50];
+ UChar modrm = getIByte(delta);
+ Bool isReg = epartIsReg(modrm);
+ IRExpr* argL = NULL;
+ IRExpr* argR = NULL;
+ IRExpr* argG = NULL;
+ IRExpr* argE = NULL;
+ IRTemp res = newTemp(Ity_I64);
+
+ Bool invG = False;
+ IROp op = Iop_INVALID;
+ void* hAddr = NULL;
+ HChar* hName = NULL;
+ Bool eLeft = False;
+
+# define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
+
+ switch (opc) {
+ /* Original MMX ones */
+ case 0xFC: op = Iop_Add8x8; break;
+ case 0xFD: op = Iop_Add16x4; break;
+ case 0xFE: op = Iop_Add32x2; break;
+
+ case 0xEC: op = Iop_QAdd8Sx8; break;
+ case 0xED: op = Iop_QAdd16Sx4; break;
+
+ case 0xDC: op = Iop_QAdd8Ux8; break;
+ case 0xDD: op = Iop_QAdd16Ux4; break;
+
+ case 0xF8: op = Iop_Sub8x8; break;
+ case 0xF9: op = Iop_Sub16x4; break;
+ case 0xFA: op = Iop_Sub32x2; break;
+
+ case 0xE8: op = Iop_QSub8Sx8; break;
+ case 0xE9: op = Iop_QSub16Sx4; break;
+
+ case 0xD8: op = Iop_QSub8Ux8; break;
+ case 0xD9: op = Iop_QSub16Ux4; break;
+
+ case 0xE5: op = Iop_MulHi16Sx4; break;
+ case 0xD5: op = Iop_Mul16x4; break;
+ case 0xF5: XXX(x86g_calculate_mmx_pmaddwd); break;
+
+ case 0x74: op = Iop_CmpEQ8x8; break;
+ case 0x75: op = Iop_CmpEQ16x4; break;
+ case 0x76: op = Iop_CmpEQ32x2; break;
+
+ case 0x64: op = Iop_CmpGT8Sx8; break;
+ case 0x65: op = Iop_CmpGT16Sx4; break;
+ case 0x66: op = Iop_CmpGT32Sx2; break;
+
+ case 0x6B: op = Iop_QNarrow32Sx2; eLeft = True; break;
+ case 0x63: op = Iop_QNarrow16Sx4; eLeft = True; break;
+ case 0x67: op = Iop_QNarrow16Ux4; eLeft = True; break;
+
+ case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break;
+ case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
+ case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break;
+
+ case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break;
+ case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break;
+ case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break;
+
+ case 0xDB: op = Iop_And64; break;
+ case 0xDF: op = Iop_And64; invG = True; break;
+ case 0xEB: op = Iop_Or64; break;
+ case 0xEF: /* Possibly do better here if argL and argR are the
+ same reg */
+ op = Iop_Xor64; break;
+
+ /* Introduced in SSE1 */
+ case 0xE0: op = Iop_Avg8Ux8; break;
+ case 0xE3: op = Iop_Avg16Ux4; break;
+ case 0xEE: op = Iop_Max16Sx4; break;
+ case 0xDE: op = Iop_Max8Ux8; break;
+ case 0xEA: op = Iop_Min16Sx4; break;
+ case 0xDA: op = Iop_Min8Ux8; break;
+ case 0xE4: op = Iop_MulHi16Ux4; break;
+ case 0xF6: XXX(x86g_calculate_mmx_psadbw); break;
+
+ /* Introduced in SSE2 */
+ case 0xD4: op = Iop_Add64; break;
+ case 0xFB: op = Iop_Sub64; break;
+
+ default:
+ vex_printf("\n0x%x\n", (Int)opc);
+ vpanic("dis_MMXop_regmem_to_reg");
+ }
+
+# undef XXX
+
+ argG = getMMXReg(gregOfRM(modrm));
+ if (invG)
+ argG = unop(Iop_Not64, argG);
+
+ if (isReg) {
+ delta++;
+ argE = getMMXReg(eregOfRM(modrm));
+ } else {
+ Int len;
+ IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
+ delta += len;
+ argE = loadLE(Ity_I64, mkexpr(addr));
+ }
+
+ if (eLeft) {
+ argL = argE;
+ argR = argG;
+ } else {
+ argL = argG;
+ argR = argE;
+ }
+
+ if (op != Iop_INVALID) {
+ vassert(hName == NULL);
+ vassert(hAddr == NULL);
+ assign(res, binop(op, argL, argR));
+ } else {
+ vassert(hName != NULL);
+ vassert(hAddr != NULL);
+ assign( res,
+ mkIRExprCCall(
+ Ity_I64,
+ 0/*regparms*/, hName, hAddr,
+ mkIRExprVec_2( argL, argR )
+ )
+ );
+ }
+
+ putMMXReg( gregOfRM(modrm), mkexpr(res) );
+
+ DIP("%s%s %s, %s\n",
+ name, show_granularity ? nameMMXGran(opc & 3) : "",
+ ( isReg ? nameMMXReg(eregOfRM(modrm)) : dis_buf ),
+ nameMMXReg(gregOfRM(modrm)) );
+
+ return delta;
+}
+
+
+/* Vector by scalar shift of G by the amount specified at the bottom
+ of E. This is a straight copy of dis_SSE_shiftG_byE. */
+
+static UInt dis_MMX_shiftG_byE ( UChar sorb, Int delta,
+ HChar* opname, IROp op )
+{
+ HChar dis_buf[50];
+ Int alen, size;
+ IRTemp addr;
+ Bool shl, shr, sar;
+ UChar rm = getIByte(delta);
+ IRTemp g0 = newTemp(Ity_I64);
+ IRTemp g1 = newTemp(Ity_I64);
+ IRTemp amt = newTemp(Ity_I32);
+ IRTemp amt8 = newTemp(Ity_I8);
+
+ if (epartIsReg(rm)) {
+ assign( amt, unop(Iop_64to32, getMMXReg(eregOfRM(rm))) );
+ DIP("%s %s,%s\n", opname,
+ nameMMXReg(eregOfRM(rm)),
+ nameMMXReg(gregOfRM(rm)) );
+ delta++;
+ } else {
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ assign( amt, loadLE(Ity_I32, mkexpr(addr)) );
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameMMXReg(gregOfRM(rm)) );
+ delta += alen;
+ }
+ assign( g0, getMMXReg(gregOfRM(rm)) );
+ assign( amt8, unop(Iop_32to8, mkexpr(amt)) );
+
+ shl = shr = sar = False;
+ size = 0;
+ switch (op) {
+ case Iop_ShlN16x4: shl = True; size = 32; break;
+ case Iop_ShlN32x2: shl = True; size = 32; break;
+ case Iop_Shl64: shl = True; size = 64; break;
+ case Iop_ShrN16x4: shr = True; size = 16; break;
+ case Iop_ShrN32x2: shr = True; size = 32; break;
+ case Iop_Shr64: shr = True; size = 64; break;
+ case Iop_SarN16x4: sar = True; size = 16; break;
+ case Iop_SarN32x2: sar = True; size = 32; break;
+ default: vassert(0);
+ }
+
+ if (shl || shr) {
+ assign(
+ g1,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))),
+ mkU64(0),
+ binop(op, mkexpr(g0), mkexpr(amt8))
+ )
+ );
+ } else
+ if (sar) {
+ assign(
+ g1,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))),
+ binop(op, mkexpr(g0), mkU8(size-1)),
+ binop(op, mkexpr(g0), mkexpr(amt8))
+ )
+ );
+ } else {
+ /*NOTREACHED*/
+ vassert(0);
+ }
+
+ putMMXReg( gregOfRM(rm), mkexpr(g1) );
+ return delta;
+}
+
+
+/* Vector by scalar shift of E by an immediate byte. This is a
+ straight copy of dis_SSE_shiftE_imm. */
+
+static
+UInt dis_MMX_shiftE_imm ( Int delta, HChar* opname, IROp op )
+{
+ Bool shl, shr, sar;
+ UChar rm = getIByte(delta);
+ IRTemp e0 = newTemp(Ity_I64);
+ IRTemp e1 = newTemp(Ity_I64);
+ UChar amt, size;
+ vassert(epartIsReg(rm));
+ vassert(gregOfRM(rm) == 2
+ || gregOfRM(rm) == 4 || gregOfRM(rm) == 6);
+ amt = getIByte(delta+1);
+ delta += 2;
+ DIP("%s $%d,%s\n", opname,
+ (Int)amt,
+ nameMMXReg(eregOfRM(rm)) );
+
+ assign( e0, getMMXReg(eregOfRM(rm)) );
+
+ shl = shr = sar = False;
+ size = 0;
+ switch (op) {
+ case Iop_ShlN16x4: shl = True; size = 16; break;
+ case Iop_ShlN32x2: shl = True; size = 32; break;
+ case Iop_Shl64: shl = True; size = 64; break;
+ case Iop_SarN16x4: sar = True; size = 16; break;
+ case Iop_SarN32x2: sar = True; size = 32; break;
+ case Iop_ShrN16x4: shr = True; size = 16; break;
+ case Iop_ShrN32x2: shr = True; size = 32; break;
+ case Iop_Shr64: shr = True; size = 64; break;
+ default: vassert(0);
+ }
+
+ if (shl || shr) {
+ assign( e1, amt >= size
+ ? mkU64(0)
+ : binop(op, mkexpr(e0), mkU8(amt))
+ );
+ } else
+ if (sar) {
+ assign( e1, amt >= size
+ ? binop(op, mkexpr(e0), mkU8(size-1))
+ : binop(op, mkexpr(e0), mkU8(amt))
+ );
+ } else {
+ /*NOTREACHED*/
+ vassert(0);
+ }
+
+ putMMXReg( eregOfRM(rm), mkexpr(e1) );
+ return delta;
+}
+
+
+/* Completely handle all MMX instructions except emms. */
+
+static
+UInt dis_MMX ( Bool* decode_ok, UChar sorb, Int sz, Int delta )
+{
+ Int len;
+ UChar modrm;
+ HChar dis_buf[50];
+ UChar opc = getIByte(delta);
+ delta++;
+
+ /* dis_MMX handles all insns except emms. */
+ do_MMX_preamble();
+
+ switch (opc) {
+
+ case 0x6E:
+ /* MOVD (src)ireg-or-mem (E), (dst)mmxreg (G)*/
+ if (sz != 4)
+ goto mmx_decode_failure;
+ modrm = getIByte(delta);
+ if (epartIsReg(modrm)) {
+ delta++;
+ putMMXReg(
+ gregOfRM(modrm),
+ binop( Iop_32HLto64,
+ mkU32(0),
+ getIReg(4, eregOfRM(modrm)) ) );
+ DIP("movd %s, %s\n",
+ nameIReg(4,eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm)));
+ } else {
+ IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
+ delta += len;
+ putMMXReg(
+ gregOfRM(modrm),
+ binop( Iop_32HLto64,
+ mkU32(0),
+ loadLE(Ity_I32, mkexpr(addr)) ) );
+ DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregOfRM(modrm)));
+ }
+ break;
+
+ case 0x7E: /* MOVD (src)mmxreg (G), (dst)ireg-or-mem (E) */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ modrm = getIByte(delta);
+ if (epartIsReg(modrm)) {
+ delta++;
+ putIReg( 4, eregOfRM(modrm),
+ unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) );
+ DIP("movd %s, %s\n",
+ nameMMXReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm)));
+ } else {
+ IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
+ delta += len;
+ storeLE( mkexpr(addr),
+ unop(Iop_64to32, getMMXReg(gregOfRM(modrm)) ) );
+ DIP("movd %s, %s\n", nameMMXReg(gregOfRM(modrm)), dis_buf);
+ }
+ break;
+
+ case 0x6F:
+ /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ modrm = getIByte(delta);
+ if (epartIsReg(modrm)) {
+ delta++;
+ putMMXReg( gregOfRM(modrm), getMMXReg(eregOfRM(modrm)) );
+ DIP("movq %s, %s\n",
+ nameMMXReg(eregOfRM(modrm)), nameMMXReg(gregOfRM(modrm)));
+ } else {
+ IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
+ delta += len;
+ putMMXReg( gregOfRM(modrm), loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("movq %s, %s\n",
+ dis_buf, nameMMXReg(gregOfRM(modrm)));
+ }
+ break;
+
+ case 0x7F:
+ /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ modrm = getIByte(delta);
+ if (epartIsReg(modrm)) {
+ delta++;
+ putMMXReg( eregOfRM(modrm), getMMXReg(gregOfRM(modrm)) );
+ DIP("movq %s, %s\n",
+ nameMMXReg(gregOfRM(modrm)), nameMMXReg(eregOfRM(modrm)));
+ } else {
+ IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
+ delta += len;
+ storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) );
+ DIP("mov(nt)q %s, %s\n",
+ nameMMXReg(gregOfRM(modrm)), dis_buf);
+ }
+ break;
+
+ case 0xFC:
+ case 0xFD:
+ case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padd", True );
+ break;
+
+ case 0xEC:
+ case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "padds", True );
+ break;
+
+ case 0xDC:
+ case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "paddus", True );
+ break;
+
+ case 0xF8:
+ case 0xF9:
+ case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psub", True );
+ break;
+
+ case 0xE8:
+ case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubs", True );
+ break;
+
+ case 0xD8:
+ case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psubus", True );
+ break;
+
+ case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmulhw", False );
+ break;
+
+ case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmullw", False );
+ break;
+
+ case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
+ vassert(sz == 4);
+ delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmaddwd", False );
+ break;
+
+ case 0x74:
+ case 0x75:
+ case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpeq", True );
+ break;
+
+ case 0x64:
+ case 0x65:
+ case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcmpgt", True );
+ break;
+
+ case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packssdw", False );
+ break;
+
+ case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packsswb", False );
+ break;
+
+ case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "packuswb", False );
+ break;
+
+ case 0x68:
+ case 0x69:
+ case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckh", True );
+ break;
+
+ case 0x60:
+ case 0x61:
+ case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "punpckl", True );
+ break;
+
+ case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pand", False );
+ break;
+
+ case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pandn", False );
+ break;
+
+ case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "por", False );
+ break;
+
+ case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz != 4)
+ goto mmx_decode_failure;
+ delta = dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pxor", False );
+ break;
+
+# define SHIFT_BY_REG(_name,_op) \
+ delta = dis_MMX_shiftG_byE(sorb, delta, _name, _op); \
+ break;
+
+ /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4);
+ case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2);
+ case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64);
+
+ /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4);
+ case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2);
+ case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64);
+
+ /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4);
+ case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2);
+
+# undef SHIFT_BY_REG
+
+ case 0x71:
+ case 0x72:
+ case 0x73: {
+ /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
+ UChar byte2, subopc;
+ if (sz != 4)
+ goto mmx_decode_failure;
+ byte2 = getIByte(delta); /* amode / sub-opcode */
+ subopc = toUChar( (byte2 >> 3) & 7 );
+
+# define SHIFT_BY_IMM(_name,_op) \
+ do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
+ } while (0)
+
+ if (subopc == 2 /*SRL*/ && opc == 0x71)
+ SHIFT_BY_IMM("psrlw", Iop_ShrN16x4);
+ else if (subopc == 2 /*SRL*/ && opc == 0x72)
+ SHIFT_BY_IMM("psrld", Iop_ShrN32x2);
+ else if (subopc == 2 /*SRL*/ && opc == 0x73)
+ SHIFT_BY_IMM("psrlq", Iop_Shr64);
+
+ else if (subopc == 4 /*SAR*/ && opc == 0x71)
+ SHIFT_BY_IMM("psraw", Iop_SarN16x4);
+ else if (subopc == 4 /*SAR*/ && opc == 0x72)
+ SHIFT_BY_IMM("psrad", Iop_SarN32x2);
+
+ else if (subopc == 6 /*SHL*/ && opc == 0x71)
+ SHIFT_BY_IMM("psllw", Iop_ShlN16x4);
+ else if (subopc == 6 /*SHL*/ && opc == 0x72)
+ SHIFT_BY_IMM("pslld", Iop_ShlN32x2);
+ else if (subopc == 6 /*SHL*/ && opc == 0x73)
+ SHIFT_BY_IMM("psllq", Iop_Shl64);
+
+ else goto mmx_decode_failure;
+
+# undef SHIFT_BY_IMM
+ break;
+ }
+
+ case 0xF7: {
+ IRTemp addr = newTemp(Ity_I32);
+ IRTemp regD = newTemp(Ity_I64);
+ IRTemp regM = newTemp(Ity_I64);
+ IRTemp mask = newTemp(Ity_I64);
+ IRTemp olddata = newTemp(Ity_I64);
+ IRTemp newdata = newTemp(Ity_I64);
+
+ modrm = getIByte(delta);
+ if (sz != 4 || (!epartIsReg(modrm)))
+ goto mmx_decode_failure;
+ delta++;
+
+ assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) ));
+ assign( regM, getMMXReg( eregOfRM(modrm) ));
+ assign( regD, getMMXReg( gregOfRM(modrm) ));
+ assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) );
+ assign( olddata, loadLE( Ity_I64, mkexpr(addr) ));
+ assign( newdata,
+ binop(Iop_Or64,
+ binop(Iop_And64,
+ mkexpr(regD),
+ mkexpr(mask) ),
+ binop(Iop_And64,
+ mkexpr(olddata),
+ unop(Iop_Not64, mkexpr(mask)))) );
+ storeLE( mkexpr(addr), mkexpr(newdata) );
+ DIP("maskmovq %s,%s\n", nameMMXReg( eregOfRM(modrm) ),
+ nameMMXReg( gregOfRM(modrm) ) );
+ break;
+ }
+
+ /* --- MMX decode failure --- */
+ default:
+ mmx_decode_failure:
+ *decode_ok = False;
+ return delta; /* ignored */
+
+ }
+
+ *decode_ok = True;
+ return delta;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- More misc arithmetic and other obscure insns. ---*/
+/*------------------------------------------------------------*/
+
+/* Double length left and right shifts. Apparently only required in
+ v-size (no b- variant). */
+static
+UInt dis_SHLRD_Gv_Ev ( UChar sorb,
+ Int delta, UChar modrm,
+ Int sz,
+ IRExpr* shift_amt,
+ Bool amt_is_literal,
+ HChar* shift_amt_txt,
+ Bool left_shift )
+{
+ /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
+ for printing it. And eip on entry points at the modrm byte. */
+ Int len;
+ HChar dis_buf[50];
+
+ IRType ty = szToITy(sz);
+ IRTemp gsrc = newTemp(ty);
+ IRTemp esrc = newTemp(ty);
+ IRTemp addr = IRTemp_INVALID;
+ IRTemp tmpSH = newTemp(Ity_I8);
+ IRTemp tmpL = IRTemp_INVALID;
+ IRTemp tmpRes = IRTemp_INVALID;
+ IRTemp tmpSubSh = IRTemp_INVALID;
+ IROp mkpair;
+ IROp getres;
+ IROp shift;
+ IRExpr* mask = NULL;
+
+ vassert(sz == 2 || sz == 4);
+
+ /* The E-part is the destination; this is shifted. The G-part
+ supplies bits to be shifted into the E-part, but is not
+ changed.
+
+ If shifting left, form a double-length word with E at the top
+ and G at the bottom, and shift this left. The result is then in
+ the high part.
+
+ If shifting right, form a double-length word with G at the top
+ and E at the bottom, and shift this right. The result is then
+ at the bottom. */
+
+ /* Fetch the operands. */
+
+ assign( gsrc, getIReg(sz, gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ delta++;
+ assign( esrc, getIReg(sz, eregOfRM(modrm)) );
+ DIP("sh%cd%c %s, %s, %s\n",
+ ( left_shift ? 'l' : 'r' ), nameISize(sz),
+ shift_amt_txt,
+ nameIReg(sz, gregOfRM(modrm)), nameIReg(sz, eregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &len, sorb, delta, dis_buf );
+ delta += len;
+ assign( esrc, loadLE(ty, mkexpr(addr)) );
+ DIP("sh%cd%c %s, %s, %s\n",
+ ( left_shift ? 'l' : 'r' ), nameISize(sz),
+ shift_amt_txt,
+ nameIReg(sz, gregOfRM(modrm)), dis_buf);
+ }
+
+ /* Round up the relevant primops. */
+
+ if (sz == 4) {
+ tmpL = newTemp(Ity_I64);
+ tmpRes = newTemp(Ity_I32);
+ tmpSubSh = newTemp(Ity_I32);
+ mkpair = Iop_32HLto64;
+ getres = left_shift ? Iop_64HIto32 : Iop_64to32;
+ shift = left_shift ? Iop_Shl64 : Iop_Shr64;
+ mask = mkU8(31);
+ } else {
+ /* sz == 2 */
+ tmpL = newTemp(Ity_I32);
+ tmpRes = newTemp(Ity_I16);
+ tmpSubSh = newTemp(Ity_I16);
+ mkpair = Iop_16HLto32;
+ getres = left_shift ? Iop_32HIto16 : Iop_32to16;
+ shift = left_shift ? Iop_Shl32 : Iop_Shr32;
+ mask = mkU8(15);
+ }
+
+ /* Do the shift, calculate the subshift value, and set
+ the flag thunk. */
+
+ assign( tmpSH, binop(Iop_And8, shift_amt, mask) );
+
+ if (left_shift)
+ assign( tmpL, binop(mkpair, mkexpr(esrc), mkexpr(gsrc)) );
+ else
+ assign( tmpL, binop(mkpair, mkexpr(gsrc), mkexpr(esrc)) );
+
+ assign( tmpRes, unop(getres, binop(shift, mkexpr(tmpL), mkexpr(tmpSH)) ) );
+ assign( tmpSubSh,
+ unop(getres,
+ binop(shift,
+ mkexpr(tmpL),
+ binop(Iop_And8,
+ binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ),
+ mask))) );
+
+ setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl32 : Iop_Sar32,
+ tmpRes, tmpSubSh, ty, tmpSH );
+
+ /* Put result back. */
+
+ if (epartIsReg(modrm)) {
+ putIReg(sz, eregOfRM(modrm), mkexpr(tmpRes));
+ } else {
+ storeLE( mkexpr(addr), mkexpr(tmpRes) );
+ }
+
+ if (amt_is_literal) delta++;
+ return delta;
+}
+
+
+/* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
+ required. */
+
+typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp;
+
+static HChar* nameBtOp ( BtOp op )
+{
+ switch (op) {
+ case BtOpNone: return "";
+ case BtOpSet: return "s";
+ case BtOpReset: return "r";
+ case BtOpComp: return "c";
+ default: vpanic("nameBtOp(x86)");
+ }
+}
+
+
+static
+UInt dis_bt_G_E ( VexAbiInfo* vbi,
+ UChar sorb, Bool locked, Int sz, Int delta, BtOp op )
+{
+ HChar dis_buf[50];
+ UChar modrm;
+ Int len;
+ IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
+ t_addr1, t_esp, t_mask, t_new;
+
+ vassert(sz == 2 || sz == 4);
+
+ t_fetched = t_bitno0 = t_bitno1 = t_bitno2
+ = t_addr0 = t_addr1 = t_esp
+ = t_mask = t_new = IRTemp_INVALID;
+
+ t_fetched = newTemp(Ity_I8);
+ t_new = newTemp(Ity_I8);
+ t_bitno0 = newTemp(Ity_I32);
+ t_bitno1 = newTemp(Ity_I32);
+ t_bitno2 = newTemp(Ity_I8);
+ t_addr1 = newTemp(Ity_I32);
+ modrm = getIByte(delta);
+
+ assign( t_bitno0, widenSto32(getIReg(sz, gregOfRM(modrm))) );
+
+ if (epartIsReg(modrm)) {
+ delta++;
+ /* Get it onto the client's stack. */
+ t_esp = newTemp(Ity_I32);
+ t_addr0 = newTemp(Ity_I32);
+
+ /* For the choice of the value 128, see comment in dis_bt_G_E in
+ guest_amd64_toIR.c. We point out here only that 128 is
+ fast-cased in Memcheck and is > 0, so seems like a good
+ choice. */
+ vassert(vbi->guest_stack_redzone_size == 0);
+ assign( t_esp, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(128)) );
+ putIReg(4, R_ESP, mkexpr(t_esp));
+
+ storeLE( mkexpr(t_esp), getIReg(sz, eregOfRM(modrm)) );
+
+ /* Make t_addr0 point at it. */
+ assign( t_addr0, mkexpr(t_esp) );
+
+ /* Mask out upper bits of the shift amount, since we're doing a
+ reg. */
+ assign( t_bitno1, binop(Iop_And32,
+ mkexpr(t_bitno0),
+ mkU32(sz == 4 ? 31 : 15)) );
+
+ } else {
+ t_addr0 = disAMode ( &len, sorb, delta, dis_buf );
+ delta += len;
+ assign( t_bitno1, mkexpr(t_bitno0) );
+ }
+
+ /* At this point: t_addr0 is the address being operated on. If it
+ was a reg, we will have pushed it onto the client's stack.
+ t_bitno1 is the bit number, suitably masked in the case of a
+ reg. */
+
+ /* Now the main sequence. */
+ assign( t_addr1,
+ binop(Iop_Add32,
+ mkexpr(t_addr0),
+ binop(Iop_Sar32, mkexpr(t_bitno1), mkU8(3))) );
+
+ /* t_addr1 now holds effective address */
+
+ assign( t_bitno2,
+ unop(Iop_32to8,
+ binop(Iop_And32, mkexpr(t_bitno1), mkU32(7))) );
+
+ /* t_bitno2 contains offset of bit within byte */
+
+ if (op != BtOpNone) {
+ t_mask = newTemp(Ity_I8);
+ assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) );
+ }
+
+ /* t_mask is now a suitable byte mask */
+
+ assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) );
+
+ if (op != BtOpNone) {
+ switch (op) {
+ case BtOpSet:
+ assign( t_new,
+ binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
+ break;
+ case BtOpComp:
+ assign( t_new,
+ binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
+ break;
+ case BtOpReset:
+ assign( t_new,
+ binop(Iop_And8, mkexpr(t_fetched),
+ unop(Iop_Not8, mkexpr(t_mask))) );
+ break;
+ default:
+ vpanic("dis_bt_G_E(x86)");
+ }
+ if (locked && !epartIsReg(modrm)) {
+ casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
+ mkexpr(t_new)/*new*/,
+ guest_EIP_curr_instr );
+ } else {
+ storeLE( mkexpr(t_addr1), mkexpr(t_new) );
+ }
+ }
+
+ /* Side effect done; now get selected bit into Carry flag */
+ /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
+ stmt( IRStmt_Put(
+ OFFB_CC_DEP1,
+ binop(Iop_And32,
+ binop(Iop_Shr32,
+ unop(Iop_8Uto32, mkexpr(t_fetched)),
+ mkexpr(t_bitno2)),
+ mkU32(1)))
+ );
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+
+ /* Move reg operand from stack back to reg */
+ if (epartIsReg(modrm)) {
+ /* t_esp still points at it. */
+ putIReg(sz, eregOfRM(modrm), loadLE(szToITy(sz), mkexpr(t_esp)) );
+ putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t_esp), mkU32(128)) );
+ }
+
+ DIP("bt%s%c %s, %s\n",
+ nameBtOp(op), nameISize(sz), nameIReg(sz, gregOfRM(modrm)),
+ ( epartIsReg(modrm) ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ) );
+
+ return delta;
+}
+
+
+
+/* Handle BSF/BSR. Only v-size seems necessary. */
+static
+UInt dis_bs_E_G ( UChar sorb, Int sz, Int delta, Bool fwds )
+{
+ Bool isReg;
+ UChar modrm;
+ HChar dis_buf[50];
+
+ IRType ty = szToITy(sz);
+ IRTemp src = newTemp(ty);
+ IRTemp dst = newTemp(ty);
+
+ IRTemp src32 = newTemp(Ity_I32);
+ IRTemp dst32 = newTemp(Ity_I32);
+ IRTemp src8 = newTemp(Ity_I8);
+
+ vassert(sz == 4 || sz == 2);
+
+ modrm = getIByte(delta);
+
+ isReg = epartIsReg(modrm);
+ if (isReg) {
+ delta++;
+ assign( src, getIReg(sz, eregOfRM(modrm)) );
+ } else {
+ Int len;
+ IRTemp addr = disAMode( &len, sorb, delta, dis_buf );
+ delta += len;
+ assign( src, loadLE(ty, mkexpr(addr)) );
+ }
+
+ DIP("bs%c%c %s, %s\n",
+ fwds ? 'f' : 'r', nameISize(sz),
+ ( isReg ? nameIReg(sz, eregOfRM(modrm)) : dis_buf ),
+ nameIReg(sz, gregOfRM(modrm)));
+
+ /* Generate an 8-bit expression which is zero iff the
+ original is zero, and nonzero otherwise */
+ assign( src8,
+ unop(Iop_1Uto8, binop(mkSizedOp(ty,Iop_CmpNE8),
+ mkexpr(src), mkU(ty,0))) );
+
+ /* Flags: Z is 1 iff source value is zero. All others
+ are undefined -- we force them to zero. */
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
+ stmt( IRStmt_Put(
+ OFFB_CC_DEP1,
+ IRExpr_Mux0X( mkexpr(src8),
+ /* src==0 */
+ mkU32(X86G_CC_MASK_Z),
+ /* src!=0 */
+ mkU32(0)
+ )
+ ));
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+
+ /* Result: iff source value is zero, we can't use
+ Iop_Clz32/Iop_Ctz32 as they have no defined result in that case.
+ But anyway, Intel x86 semantics say the result is undefined in
+ such situations. Hence handle the zero case specially. */
+
+ /* Bleh. What we compute:
+
+ bsf32: if src == 0 then 0 else Ctz32(src)
+ bsr32: if src == 0 then 0 else 31 - Clz32(src)
+
+ bsf16: if src == 0 then 0 else Ctz32(16Uto32(src))
+ bsr16: if src == 0 then 0 else 31 - Clz32(16Uto32(src))
+
+ First, widen src to 32 bits if it is not already.
+
+ Postscript 15 Oct 04: it seems that at least VIA Nehemiah leaves the
+ dst register unchanged when src == 0. Hence change accordingly.
+ */
+ if (sz == 2)
+ assign( src32, unop(Iop_16Uto32, mkexpr(src)) );
+ else
+ assign( src32, mkexpr(src) );
+
+ /* The main computation, guarding against zero. */
+ assign( dst32,
+ IRExpr_Mux0X(
+ mkexpr(src8),
+ /* src == 0 -- leave dst unchanged */
+ widenUto32( getIReg( sz, gregOfRM(modrm) ) ),
+ /* src != 0 */
+ fwds ? unop(Iop_Ctz32, mkexpr(src32))
+ : binop(Iop_Sub32,
+ mkU32(31),
+ unop(Iop_Clz32, mkexpr(src32)))
+ )
+ );
+
+ if (sz == 2)
+ assign( dst, unop(Iop_32to16, mkexpr(dst32)) );
+ else
+ assign( dst, mkexpr(dst32) );
+
+ /* dump result back */
+ putIReg( sz, gregOfRM(modrm), mkexpr(dst) );
+
+ return delta;
+}
+
+
+static
+void codegen_xchg_eAX_Reg ( Int sz, Int reg )
+{
+ IRType ty = szToITy(sz);
+ IRTemp t1 = newTemp(ty);
+ IRTemp t2 = newTemp(ty);
+ vassert(sz == 2 || sz == 4);
+ assign( t1, getIReg(sz, R_EAX) );
+ assign( t2, getIReg(sz, reg) );
+ putIReg( sz, R_EAX, mkexpr(t2) );
+ putIReg( sz, reg, mkexpr(t1) );
+ DIP("xchg%c %s, %s\n",
+ nameISize(sz), nameIReg(sz, R_EAX), nameIReg(sz, reg));
+}
+
+
+static
+void codegen_SAHF ( void )
+{
+ /* Set the flags to:
+ (x86g_calculate_flags_all() & X86G_CC_MASK_O) -- retain the old O flag
+ | (%AH & (X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
+ |X86G_CC_MASK_P|X86G_CC_MASK_C)
+ */
+ UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
+ |X86G_CC_MASK_C|X86G_CC_MASK_P;
+ IRTemp oldflags = newTemp(Ity_I32);
+ assign( oldflags, mk_x86g_calculate_eflags_all() );
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1,
+ binop(Iop_Or32,
+ binop(Iop_And32, mkexpr(oldflags), mkU32(X86G_CC_MASK_O)),
+ binop(Iop_And32,
+ binop(Iop_Shr32, getIReg(4, R_EAX), mkU8(8)),
+ mkU32(mask_SZACP))
+ )
+ ));
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+}
+
+
+static
+void codegen_LAHF ( void )
+{
+ /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
+ IRExpr* eax_with_hole;
+ IRExpr* new_byte;
+ IRExpr* new_eax;
+ UInt mask_SZACP = X86G_CC_MASK_S|X86G_CC_MASK_Z|X86G_CC_MASK_A
+ |X86G_CC_MASK_C|X86G_CC_MASK_P;
+
+ IRTemp flags = newTemp(Ity_I32);
+ assign( flags, mk_x86g_calculate_eflags_all() );
+
+ eax_with_hole
+ = binop(Iop_And32, getIReg(4, R_EAX), mkU32(0xFFFF00FF));
+ new_byte
+ = binop(Iop_Or32, binop(Iop_And32, mkexpr(flags), mkU32(mask_SZACP)),
+ mkU32(1<<1));
+ new_eax
+ = binop(Iop_Or32, eax_with_hole,
+ binop(Iop_Shl32, new_byte, mkU8(8)));
+ putIReg(4, R_EAX, new_eax);
+}
+
+
+static
+UInt dis_cmpxchg_G_E ( UChar sorb,
+ Bool locked,
+ Int size,
+ Int delta0 )
+{
+ HChar dis_buf[50];
+ Int len;
+
+ IRType ty = szToITy(size);
+ IRTemp acc = newTemp(ty);
+ IRTemp src = newTemp(ty);
+ IRTemp dest = newTemp(ty);
+ IRTemp dest2 = newTemp(ty);
+ IRTemp acc2 = newTemp(ty);
+ IRTemp cond8 = newTemp(Ity_I8);
+ IRTemp addr = IRTemp_INVALID;
+ UChar rm = getUChar(delta0);
+
+ /* There are 3 cases to consider:
+
+ reg-reg: ignore any lock prefix, generate sequence based
+ on Mux0X
+
+ reg-mem, not locked: ignore any lock prefix, generate sequence
+ based on Mux0X
+
+ reg-mem, locked: use IRCAS
+ */
+ if (epartIsReg(rm)) {
+ /* case 1 */
+ assign( dest, getIReg(size, eregOfRM(rm)) );
+ delta0++;
+ assign( src, getIReg(size, gregOfRM(rm)) );
+ assign( acc, getIReg(size, R_EAX) );
+ setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
+ assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) );
+ assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
+ assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
+ putIReg(size, R_EAX, mkexpr(acc2));
+ putIReg(size, eregOfRM(rm), mkexpr(dest2));
+ DIP("cmpxchg%c %s,%s\n", nameISize(size),
+ nameIReg(size,gregOfRM(rm)),
+ nameIReg(size,eregOfRM(rm)) );
+ }
+ else if (!epartIsReg(rm) && !locked) {
+ /* case 2 */
+ addr = disAMode ( &len, sorb, delta0, dis_buf );
+ assign( dest, loadLE(ty, mkexpr(addr)) );
+ delta0 += len;
+ assign( src, getIReg(size, gregOfRM(rm)) );
+ assign( acc, getIReg(size, R_EAX) );
+ setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
+ assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) );
+ assign( dest2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(src)) );
+ assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
+ putIReg(size, R_EAX, mkexpr(acc2));
+ storeLE( mkexpr(addr), mkexpr(dest2) );
+ DIP("cmpxchg%c %s,%s\n", nameISize(size),
+ nameIReg(size,gregOfRM(rm)), dis_buf);
+ }
+ else if (!epartIsReg(rm) && locked) {
+ /* case 3 */
+ /* src is new value. acc is expected value. dest is old value.
+ Compute success from the output of the IRCAS, and steer the
+ new value for EAX accordingly: in case of success, EAX is
+ unchanged. */
+ addr = disAMode ( &len, sorb, delta0, dis_buf );
+ delta0 += len;
+ assign( src, getIReg(size, gregOfRM(rm)) );
+ assign( acc, getIReg(size, R_EAX) );
+ stmt( IRStmt_CAS(
+ mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
+ NULL, mkexpr(acc), NULL, mkexpr(src) )
+ ));
+ setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
+ assign( cond8, unop(Iop_1Uto8, mk_x86g_calculate_condition(X86CondZ)) );
+ assign( acc2, IRExpr_Mux0X(mkexpr(cond8), mkexpr(dest), mkexpr(acc)) );
+ putIReg(size, R_EAX, mkexpr(acc2));
+ DIP("cmpxchg%c %s,%s\n", nameISize(size),
+ nameIReg(size,gregOfRM(rm)), dis_buf);
+ }
+ else vassert(0);
+
+ return delta0;
+}
+
+
+/* Handle conditional move instructions of the form
+ cmovcc E(reg-or-mem), G(reg)
+
+ E(src) is reg-or-mem
+ G(dst) is reg.
+
+ If E is reg, --> GET %E, tmps
+ GET %G, tmpd
+ CMOVcc tmps, tmpd
+ PUT tmpd, %G
+
+ If E is mem --> (getAddr E) -> tmpa
+ LD (tmpa), tmps
+ GET %G, tmpd
+ CMOVcc tmps, tmpd
+ PUT tmpd, %G
+*/
+static
+UInt dis_cmov_E_G ( UChar sorb,
+ Int sz,
+ X86Condcode cond,
+ Int delta0 )
+{
+ UChar rm = getIByte(delta0);
+ HChar dis_buf[50];
+ Int len;
+
+ IRType ty = szToITy(sz);
+ IRTemp tmps = newTemp(ty);
+ IRTemp tmpd = newTemp(ty);
+
+ if (epartIsReg(rm)) {
+ assign( tmps, getIReg(sz, eregOfRM(rm)) );
+ assign( tmpd, getIReg(sz, gregOfRM(rm)) );
+
+ putIReg(sz, gregOfRM(rm),
+ IRExpr_Mux0X( unop(Iop_1Uto8,
+ mk_x86g_calculate_condition(cond)),
+ mkexpr(tmpd),
+ mkexpr(tmps) )
+ );
+ DIP("cmov%c%s %s,%s\n", nameISize(sz),
+ name_X86Condcode(cond),
+ nameIReg(sz,eregOfRM(rm)),
+ nameIReg(sz,gregOfRM(rm)));
+ return 1+delta0;
+ }
+
+ /* E refers to memory */
+ {
+ IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
+ assign( tmps, loadLE(ty, mkexpr(addr)) );
+ assign( tmpd, getIReg(sz, gregOfRM(rm)) );
+
+ putIReg(sz, gregOfRM(rm),
+ IRExpr_Mux0X( unop(Iop_1Uto8,
+ mk_x86g_calculate_condition(cond)),
+ mkexpr(tmpd),
+ mkexpr(tmps) )
+ );
+
+ DIP("cmov%c%s %s,%s\n", nameISize(sz),
+ name_X86Condcode(cond),
+ dis_buf,
+ nameIReg(sz,gregOfRM(rm)));
+ return len+delta0;
+ }
+}
+
+
+static
+UInt dis_xadd_G_E ( UChar sorb, Bool locked, Int sz, Int delta0,
+ Bool* decodeOK )
+{
+ Int len;
+ UChar rm = getIByte(delta0);
+ HChar dis_buf[50];
+
+ IRType ty = szToITy(sz);
+ IRTemp tmpd = newTemp(ty);
+ IRTemp tmpt0 = newTemp(ty);
+ IRTemp tmpt1 = newTemp(ty);
+
+ /* There are 3 cases to consider:
+
+ reg-reg: ignore any lock prefix,
+ generate 'naive' (non-atomic) sequence
+
+ reg-mem, not locked: ignore any lock prefix, generate 'naive'
+ (non-atomic) sequence
+
+ reg-mem, locked: use IRCAS
+ */
+
+ if (epartIsReg(rm)) {
+ /* case 1 */
+ assign( tmpd, getIReg(sz, eregOfRM(rm)));
+ assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
+ assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
+ mkexpr(tmpd), mkexpr(tmpt0)) );
+ setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
+ putIReg(sz, eregOfRM(rm), mkexpr(tmpt1));
+ putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
+ DIP("xadd%c %s, %s\n",
+ nameISize(sz), nameIReg(sz,gregOfRM(rm)),
+ nameIReg(sz,eregOfRM(rm)));
+ *decodeOK = True;
+ return 1+delta0;
+ }
+ else if (!epartIsReg(rm) && !locked) {
+ /* case 2 */
+ IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
+ assign( tmpd, loadLE(ty, mkexpr(addr)) );
+ assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
+ assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
+ mkexpr(tmpd), mkexpr(tmpt0)) );
+ storeLE( mkexpr(addr), mkexpr(tmpt1) );
+ setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
+ putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
+ DIP("xadd%c %s, %s\n",
+ nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf);
+ *decodeOK = True;
+ return len+delta0;
+ }
+ else if (!epartIsReg(rm) && locked) {
+ /* case 3 */
+ IRTemp addr = disAMode ( &len, sorb, delta0, dis_buf );
+ assign( tmpd, loadLE(ty, mkexpr(addr)) );
+ assign( tmpt0, getIReg(sz, gregOfRM(rm)) );
+ assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
+ mkexpr(tmpd), mkexpr(tmpt0)) );
+ casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
+ mkexpr(tmpt1)/*newVal*/, guest_EIP_curr_instr );
+ setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
+ putIReg(sz, gregOfRM(rm), mkexpr(tmpd));
+ DIP("xadd%c %s, %s\n",
+ nameISize(sz), nameIReg(sz,gregOfRM(rm)), dis_buf);
+ *decodeOK = True;
+ return len+delta0;
+ }
+ /*UNREACHED*/
+ vassert(0);
+}
+
+/* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
+
+static
+UInt dis_mov_Ew_Sw ( UChar sorb, Int delta0 )
+{
+ Int len;
+ IRTemp addr;
+ UChar rm = getIByte(delta0);
+ HChar dis_buf[50];
+
+ if (epartIsReg(rm)) {
+ putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
+ DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
+ return 1+delta0;
+ } else {
+ addr = disAMode ( &len, sorb, delta0, dis_buf );
+ putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
+ DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
+ return len+delta0;
+ }
+}
+
+/* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
+ dst is ireg and sz==4, zero out top half of it. */
+
+static
+UInt dis_mov_Sw_Ew ( UChar sorb,
+ Int sz,
+ Int delta0 )
+{
+ Int len;
+ IRTemp addr;
+ UChar rm = getIByte(delta0);
+ HChar dis_buf[50];
+
+ vassert(sz == 2 || sz == 4);
+
+ if (epartIsReg(rm)) {
+ if (sz == 4)
+ putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
+ else
+ putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
+
+ DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
+ return 1+delta0;
+ } else {
+ addr = disAMode ( &len, sorb, delta0, dis_buf );
+ storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
+ DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
+ return len+delta0;
+ }
+}
+
+
+static
+void dis_push_segreg ( UInt sreg, Int sz )
+{
+ IRTemp t1 = newTemp(Ity_I16);
+ IRTemp ta = newTemp(Ity_I32);
+ vassert(sz == 2 || sz == 4);
+
+ assign( t1, getSReg(sreg) );
+ assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
+ putIReg(4, R_ESP, mkexpr(ta));
+ storeLE( mkexpr(ta), mkexpr(t1) );
+
+ DIP("push%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg));
+}
+
+static
+void dis_pop_segreg ( UInt sreg, Int sz )
+{
+ IRTemp t1 = newTemp(Ity_I16);
+ IRTemp ta = newTemp(Ity_I32);
+ vassert(sz == 2 || sz == 4);
+
+ assign( ta, getIReg(4, R_ESP) );
+ assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
+
+ putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
+ putSReg( sreg, mkexpr(t1) );
+ DIP("pop%c %s\n", sz==2 ? 'w' : 'l', nameSReg(sreg));
+}
+
+static
+void dis_ret ( UInt d32 )
+{
+ IRTemp t1 = newTemp(Ity_I32), t2 = newTemp(Ity_I32);
+ assign(t1, getIReg(4,R_ESP));
+ assign(t2, loadLE(Ity_I32,mkexpr(t1)));
+ putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(4+d32)));
+ jmp_treg(Ijk_Ret,t2);
+}
+
+/*------------------------------------------------------------*/
+/*--- SSE/SSE2/SSE3 helpers ---*/
+/*------------------------------------------------------------*/
+
+/* Worker function; do not call directly.
+ Handles full width G = G `op` E and G = (not G) `op` E.
+*/
+
+static UInt dis_SSE_E_to_G_all_wrk (
+ UChar sorb, Int delta,
+ HChar* opname, IROp op,
+ Bool invertG
+ )
+{
+ HChar dis_buf[50];
+ Int alen;
+ IRTemp addr;
+ UChar rm = getIByte(delta);
+ IRExpr* gpart
+ = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRM(rm)))
+ : getXMMReg(gregOfRM(rm));
+ if (epartIsReg(rm)) {
+ putXMMReg( gregOfRM(rm),
+ binop(op, gpart,
+ getXMMReg(eregOfRM(rm))) );
+ DIP("%s %s,%s\n", opname,
+ nameXMMReg(eregOfRM(rm)),
+ nameXMMReg(gregOfRM(rm)) );
+ return delta+1;
+ } else {
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ putXMMReg( gregOfRM(rm),
+ binop(op, gpart,
+ loadLE(Ity_V128, mkexpr(addr))) );
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameXMMReg(gregOfRM(rm)) );
+ return delta+alen;
+ }
+}
+
+
+/* All lanes SSE binary operation, G = G `op` E. */
+
+static
+UInt dis_SSE_E_to_G_all ( UChar sorb, Int delta, HChar* opname, IROp op )
+{
+ return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, False );
+}
+
+/* All lanes SSE binary operation, G = (not G) `op` E. */
+
+static
+UInt dis_SSE_E_to_G_all_invG ( UChar sorb, Int delta,
+ HChar* opname, IROp op )
+{
+ return dis_SSE_E_to_G_all_wrk( sorb, delta, opname, op, True );
+}
+
+
+/* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
+
+static UInt dis_SSE_E_to_G_lo32 ( UChar sorb, Int delta,
+ HChar* opname, IROp op )
+{
+ HChar dis_buf[50];
+ Int alen;
+ IRTemp addr;
+ UChar rm = getIByte(delta);
+ IRExpr* gpart = getXMMReg(gregOfRM(rm));
+ if (epartIsReg(rm)) {
+ putXMMReg( gregOfRM(rm),
+ binop(op, gpart,
+ getXMMReg(eregOfRM(rm))) );
+ DIP("%s %s,%s\n", opname,
+ nameXMMReg(eregOfRM(rm)),
+ nameXMMReg(gregOfRM(rm)) );
+ return delta+1;
+ } else {
+ /* We can only do a 32-bit memory read, so the upper 3/4 of the
+ E operand needs to be made simply of zeroes. */
+ IRTemp epart = newTemp(Ity_V128);
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ assign( epart, unop( Iop_32UtoV128,
+ loadLE(Ity_I32, mkexpr(addr))) );
+ putXMMReg( gregOfRM(rm),
+ binop(op, gpart, mkexpr(epart)) );
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameXMMReg(gregOfRM(rm)) );
+ return delta+alen;
+ }
+}
+
+
+/* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
+
+static UInt dis_SSE_E_to_G_lo64 ( UChar sorb, Int delta,
+ HChar* opname, IROp op )
+{
+ HChar dis_buf[50];
+ Int alen;
+ IRTemp addr;
+ UChar rm = getIByte(delta);
+ IRExpr* gpart = getXMMReg(gregOfRM(rm));
+ if (epartIsReg(rm)) {
+ putXMMReg( gregOfRM(rm),
+ binop(op, gpart,
+ getXMMReg(eregOfRM(rm))) );
+ DIP("%s %s,%s\n", opname,
+ nameXMMReg(eregOfRM(rm)),
+ nameXMMReg(gregOfRM(rm)) );
+ return delta+1;
+ } else {
+ /* We can only do a 64-bit memory read, so the upper half of the
+ E operand needs to be made simply of zeroes. */
+ IRTemp epart = newTemp(Ity_V128);
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ assign( epart, unop( Iop_64UtoV128,
+ loadLE(Ity_I64, mkexpr(addr))) );
+ putXMMReg( gregOfRM(rm),
+ binop(op, gpart, mkexpr(epart)) );
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameXMMReg(gregOfRM(rm)) );
+ return delta+alen;
+ }
+}
+
+
+/* All lanes unary SSE operation, G = op(E). */
+
+static UInt dis_SSE_E_to_G_unary_all (
+ UChar sorb, Int delta,
+ HChar* opname, IROp op
+ )
+{
+ HChar dis_buf[50];
+ Int alen;
+ IRTemp addr;
+ UChar rm = getIByte(delta);
+ if (epartIsReg(rm)) {
+ putXMMReg( gregOfRM(rm),
+ unop(op, getXMMReg(eregOfRM(rm))) );
+ DIP("%s %s,%s\n", opname,
+ nameXMMReg(eregOfRM(rm)),
+ nameXMMReg(gregOfRM(rm)) );
+ return delta+1;
+ } else {
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ putXMMReg( gregOfRM(rm),
+ unop(op, loadLE(Ity_V128, mkexpr(addr))) );
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameXMMReg(gregOfRM(rm)) );
+ return delta+alen;
+ }
+}
+
+
+/* Lowest 32-bit lane only unary SSE operation, G = op(E). */
+
+static UInt dis_SSE_E_to_G_unary_lo32 (
+ UChar sorb, Int delta,
+ HChar* opname, IROp op
+ )
+{
+ /* First we need to get the old G value and patch the low 32 bits
+ of the E operand into it. Then apply op and write back to G. */
+ HChar dis_buf[50];
+ Int alen;
+ IRTemp addr;
+ UChar rm = getIByte(delta);
+ IRTemp oldG0 = newTemp(Ity_V128);
+ IRTemp oldG1 = newTemp(Ity_V128);
+
+ assign( oldG0, getXMMReg(gregOfRM(rm)) );
+
+ if (epartIsReg(rm)) {
+ assign( oldG1,
+ binop( Iop_SetV128lo32,
+ mkexpr(oldG0),
+ getXMMRegLane32(eregOfRM(rm), 0)) );
+ putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
+ DIP("%s %s,%s\n", opname,
+ nameXMMReg(eregOfRM(rm)),
+ nameXMMReg(gregOfRM(rm)) );
+ return delta+1;
+ } else {
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ assign( oldG1,
+ binop( Iop_SetV128lo32,
+ mkexpr(oldG0),
+ loadLE(Ity_I32, mkexpr(addr)) ));
+ putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameXMMReg(gregOfRM(rm)) );
+ return delta+alen;
+ }
+}
+
+
+/* Lowest 64-bit lane only unary SSE operation, G = op(E). */
+
+static UInt dis_SSE_E_to_G_unary_lo64 (
+ UChar sorb, Int delta,
+ HChar* opname, IROp op
+ )
+{
+ /* First we need to get the old G value and patch the low 64 bits
+ of the E operand into it. Then apply op and write back to G. */
+ HChar dis_buf[50];
+ Int alen;
+ IRTemp addr;
+ UChar rm = getIByte(delta);
+ IRTemp oldG0 = newTemp(Ity_V128);
+ IRTemp oldG1 = newTemp(Ity_V128);
+
+ assign( oldG0, getXMMReg(gregOfRM(rm)) );
+
+ if (epartIsReg(rm)) {
+ assign( oldG1,
+ binop( Iop_SetV128lo64,
+ mkexpr(oldG0),
+ getXMMRegLane64(eregOfRM(rm), 0)) );
+ putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
+ DIP("%s %s,%s\n", opname,
+ nameXMMReg(eregOfRM(rm)),
+ nameXMMReg(gregOfRM(rm)) );
+ return delta+1;
+ } else {
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ assign( oldG1,
+ binop( Iop_SetV128lo64,
+ mkexpr(oldG0),
+ loadLE(Ity_I64, mkexpr(addr)) ));
+ putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameXMMReg(gregOfRM(rm)) );
+ return delta+alen;
+ }
+}
+
+
+/* SSE integer binary operation:
+ G = G `op` E (eLeft == False)
+ G = E `op` G (eLeft == True)
+*/
+static UInt dis_SSEint_E_to_G(
+ UChar sorb, Int delta,
+ HChar* opname, IROp op,
+ Bool eLeft
+ )
+{
+ HChar dis_buf[50];
+ Int alen;
+ IRTemp addr;
+ UChar rm = getIByte(delta);
+ IRExpr* gpart = getXMMReg(gregOfRM(rm));
+ IRExpr* epart = NULL;
+ if (epartIsReg(rm)) {
+ epart = getXMMReg(eregOfRM(rm));
+ DIP("%s %s,%s\n", opname,
+ nameXMMReg(eregOfRM(rm)),
+ nameXMMReg(gregOfRM(rm)) );
+ delta += 1;
+ } else {
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ epart = loadLE(Ity_V128, mkexpr(addr));
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameXMMReg(gregOfRM(rm)) );
+ delta += alen;
+ }
+ putXMMReg( gregOfRM(rm),
+ eLeft ? binop(op, epart, gpart)
+ : binop(op, gpart, epart) );
+ return delta;
+}
+
+
+/* Helper for doing SSE FP comparisons. */
+
+static void findSSECmpOp ( Bool* needNot, IROp* op,
+ Int imm8, Bool all_lanes, Int sz )
+{
+ imm8 &= 7;
+ *needNot = False;
+ *op = Iop_INVALID;
+ if (imm8 >= 4) {
+ *needNot = True;
+ imm8 -= 4;
+ }
+
+ if (sz == 4 && all_lanes) {
+ switch (imm8) {
+ case 0: *op = Iop_CmpEQ32Fx4; return;
+ case 1: *op = Iop_CmpLT32Fx4; return;
+ case 2: *op = Iop_CmpLE32Fx4; return;
+ case 3: *op = Iop_CmpUN32Fx4; return;
+ default: break;
+ }
+ }
+ if (sz == 4 && !all_lanes) {
+ switch (imm8) {
+ case 0: *op = Iop_CmpEQ32F0x4; return;
+ case 1: *op = Iop_CmpLT32F0x4; return;
+ case 2: *op = Iop_CmpLE32F0x4; return;
+ case 3: *op = Iop_CmpUN32F0x4; return;
+ default: break;
+ }
+ }
+ if (sz == 8 && all_lanes) {
+ switch (imm8) {
+ case 0: *op = Iop_CmpEQ64Fx2; return;
+ case 1: *op = Iop_CmpLT64Fx2; return;
+ case 2: *op = Iop_CmpLE64Fx2; return;
+ case 3: *op = Iop_CmpUN64Fx2; return;
+ default: break;
+ }
+ }
+ if (sz == 8 && !all_lanes) {
+ switch (imm8) {
+ case 0: *op = Iop_CmpEQ64F0x2; return;
+ case 1: *op = Iop_CmpLT64F0x2; return;
+ case 2: *op = Iop_CmpLE64F0x2; return;
+ case 3: *op = Iop_CmpUN64F0x2; return;
+ default: break;
+ }
+ }
+ vpanic("findSSECmpOp(x86,guest)");
+}
+
+/* Handles SSE 32F/64F comparisons. */
+
+static UInt dis_SSEcmp_E_to_G ( UChar sorb, Int delta,
+ HChar* opname, Bool all_lanes, Int sz )
+{
+ HChar dis_buf[50];
+ Int alen, imm8;
+ IRTemp addr;
+ Bool needNot = False;
+ IROp op = Iop_INVALID;
+ IRTemp plain = newTemp(Ity_V128);
+ UChar rm = getIByte(delta);
+ UShort mask = 0;
+ vassert(sz == 4 || sz == 8);
+ if (epartIsReg(rm)) {
+ imm8 = getIByte(delta+1);
+ findSSECmpOp(&needNot, &op, imm8, all_lanes, sz);
+ assign( plain, binop(op, getXMMReg(gregOfRM(rm)),
+ getXMMReg(eregOfRM(rm))) );
+ delta += 2;
+ DIP("%s $%d,%s,%s\n", opname,
+ (Int)imm8,
+ nameXMMReg(eregOfRM(rm)),
+ nameXMMReg(gregOfRM(rm)) );
+ } else {
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ imm8 = getIByte(delta+alen);
+ findSSECmpOp(&needNot, &op, imm8, all_lanes, sz);
+ assign( plain,
+ binop(
+ op,
+ getXMMReg(gregOfRM(rm)),
+ all_lanes ? loadLE(Ity_V128, mkexpr(addr))
+ : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
+ : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))
+ )
+ );
+ delta += alen+1;
+ DIP("%s $%d,%s,%s\n", opname,
+ (Int)imm8,
+ dis_buf,
+ nameXMMReg(gregOfRM(rm)) );
+ }
+
+ if (needNot && all_lanes) {
+ putXMMReg( gregOfRM(rm),
+ unop(Iop_NotV128, mkexpr(plain)) );
+ }
+ else
+ if (needNot && !all_lanes) {
+ mask = toUShort( sz==4 ? 0x000F : 0x00FF );
+ putXMMReg( gregOfRM(rm),
+ binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) );
+ }
+ else {
+ putXMMReg( gregOfRM(rm), mkexpr(plain) );
+ }
+
+ return delta;
+}
+
+
+/* Vector by scalar shift of G by the amount specified at the bottom
+ of E. */
+
+static UInt dis_SSE_shiftG_byE ( UChar sorb, Int delta,
+ HChar* opname, IROp op )
+{
+ HChar dis_buf[50];
+ Int alen, size;
+ IRTemp addr;
+ Bool shl, shr, sar;
+ UChar rm = getIByte(delta);
+ IRTemp g0 = newTemp(Ity_V128);
+ IRTemp g1 = newTemp(Ity_V128);
+ IRTemp amt = newTemp(Ity_I32);
+ IRTemp amt8 = newTemp(Ity_I8);
+ if (epartIsReg(rm)) {
+ assign( amt, getXMMRegLane32(eregOfRM(rm), 0) );
+ DIP("%s %s,%s\n", opname,
+ nameXMMReg(eregOfRM(rm)),
+ nameXMMReg(gregOfRM(rm)) );
+ delta++;
+ } else {
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ assign( amt, loadLE(Ity_I32, mkexpr(addr)) );
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameXMMReg(gregOfRM(rm)) );
+ delta += alen;
+ }
+ assign( g0, getXMMReg(gregOfRM(rm)) );
+ assign( amt8, unop(Iop_32to8, mkexpr(amt)) );
+
+ shl = shr = sar = False;
+ size = 0;
+ switch (op) {
+ case Iop_ShlN16x8: shl = True; size = 32; break;
+ case Iop_ShlN32x4: shl = True; size = 32; break;
+ case Iop_ShlN64x2: shl = True; size = 64; break;
+ case Iop_SarN16x8: sar = True; size = 16; break;
+ case Iop_SarN32x4: sar = True; size = 32; break;
+ case Iop_ShrN16x8: shr = True; size = 16; break;
+ case Iop_ShrN32x4: shr = True; size = 32; break;
+ case Iop_ShrN64x2: shr = True; size = 64; break;
+ default: vassert(0);
+ }
+
+ if (shl || shr) {
+ assign(
+ g1,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))),
+ mkV128(0x0000),
+ binop(op, mkexpr(g0), mkexpr(amt8))
+ )
+ );
+ } else
+ if (sar) {
+ assign(
+ g1,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size))),
+ binop(op, mkexpr(g0), mkU8(size-1)),
+ binop(op, mkexpr(g0), mkexpr(amt8))
+ )
+ );
+ } else {
+ /*NOTREACHED*/
+ vassert(0);
+ }
+
+ putXMMReg( gregOfRM(rm), mkexpr(g1) );
+ return delta;
+}
+
+
+/* Vector by scalar shift of E by an immediate byte. */
+
+static
+UInt dis_SSE_shiftE_imm ( Int delta, HChar* opname, IROp op )
+{
+ Bool shl, shr, sar;
+ UChar rm = getIByte(delta);
+ IRTemp e0 = newTemp(Ity_V128);
+ IRTemp e1 = newTemp(Ity_V128);
+ UChar amt, size;
+ vassert(epartIsReg(rm));
+ vassert(gregOfRM(rm) == 2
+ || gregOfRM(rm) == 4 || gregOfRM(rm) == 6);
+ amt = getIByte(delta+1);
+ delta += 2;
+ DIP("%s $%d,%s\n", opname,
+ (Int)amt,
+ nameXMMReg(eregOfRM(rm)) );
+ assign( e0, getXMMReg(eregOfRM(rm)) );
+
+ shl = shr = sar = False;
+ size = 0;
+ switch (op) {
+ case Iop_ShlN16x8: shl = True; size = 16; break;
+ case Iop_ShlN32x4: shl = True; size = 32; break;
+ case Iop_ShlN64x2: shl = True; size = 64; break;
+ case Iop_SarN16x8: sar = True; size = 16; break;
+ case Iop_SarN32x4: sar = True; size = 32; break;
+ case Iop_ShrN16x8: shr = True; size = 16; break;
+ case Iop_ShrN32x4: shr = True; size = 32; break;
+ case Iop_ShrN64x2: shr = True; size = 64; break;
+ default: vassert(0);
+ }
+
+ if (shl || shr) {
+ assign( e1, amt >= size
+ ? mkV128(0x0000)
+ : binop(op, mkexpr(e0), mkU8(amt))
+ );
+ } else
+ if (sar) {
+ assign( e1, amt >= size
+ ? binop(op, mkexpr(e0), mkU8(size-1))
+ : binop(op, mkexpr(e0), mkU8(amt))
+ );
+ } else {
+ /*NOTREACHED*/
+ vassert(0);
+ }
+
+ putXMMReg( eregOfRM(rm), mkexpr(e1) );
+ return delta;
+}
+
+
+/* Get the current SSE rounding mode. */
+
+static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void )
+{
+ return binop( Iop_And32,
+ IRExpr_Get( OFFB_SSEROUND, Ity_I32 ),
+ mkU32(3) );
+}
+
+static void put_sse_roundingmode ( IRExpr* sseround )
+{
+ vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32);
+ stmt( IRStmt_Put( OFFB_SSEROUND, sseround ) );
+}
+
+/* Break a 128-bit value up into four 32-bit ints. */
+
+static void breakup128to32s ( IRTemp t128,
+ /*OUTs*/
+ IRTemp* t3, IRTemp* t2,
+ IRTemp* t1, IRTemp* t0 )
+{
+ IRTemp hi64 = newTemp(Ity_I64);
+ IRTemp lo64 = newTemp(Ity_I64);
+ assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
+ assign( lo64, unop(Iop_V128to64, mkexpr(t128)) );
+
+ vassert(t0 && *t0 == IRTemp_INVALID);
+ vassert(t1 && *t1 == IRTemp_INVALID);
+ vassert(t2 && *t2 == IRTemp_INVALID);
+ vassert(t3 && *t3 == IRTemp_INVALID);
+
+ *t0 = newTemp(Ity_I32);
+ *t1 = newTemp(Ity_I32);
+ *t2 = newTemp(Ity_I32);
+ *t3 = newTemp(Ity_I32);
+ assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
+ assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
+ assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
+ assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
+}
+
+/* Construct a 128-bit value from four 32-bit ints. */
+
+static IRExpr* mk128from32s ( IRTemp t3, IRTemp t2,
+ IRTemp t1, IRTemp t0 )
+{
+ return
+ binop( Iop_64HLtoV128,
+ binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
+ binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
+ );
+}
+
+/* Break a 64-bit value up into four 16-bit ints. */
+
+static void breakup64to16s ( IRTemp t64,
+ /*OUTs*/
+ IRTemp* t3, IRTemp* t2,
+ IRTemp* t1, IRTemp* t0 )
+{
+ IRTemp hi32 = newTemp(Ity_I32);
+ IRTemp lo32 = newTemp(Ity_I32);
+ assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) );
+ assign( lo32, unop(Iop_64to32, mkexpr(t64)) );
+
+ vassert(t0 && *t0 == IRTemp_INVALID);
+ vassert(t1 && *t1 == IRTemp_INVALID);
+ vassert(t2 && *t2 == IRTemp_INVALID);
+ vassert(t3 && *t3 == IRTemp_INVALID);
+
+ *t0 = newTemp(Ity_I16);
+ *t1 = newTemp(Ity_I16);
+ *t2 = newTemp(Ity_I16);
+ *t3 = newTemp(Ity_I16);
+ assign( *t0, unop(Iop_32to16, mkexpr(lo32)) );
+ assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) );
+ assign( *t2, unop(Iop_32to16, mkexpr(hi32)) );
+ assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) );
+}
+
+/* Construct a 64-bit value from four 16-bit ints. */
+
+static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2,
+ IRTemp t1, IRTemp t0 )
+{
+ return
+ binop( Iop_32HLto64,
+ binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)),
+ binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0))
+ );
+}
+
+/* Generate IR to set the guest %EFLAGS from the pushfl-format image
+ in the given 32-bit temporary. The flags that are set are: O S Z A
+ C P D ID AC.
+
+ In all cases, code to set AC is generated. However, VEX actually
+ ignores the AC value and so can optionally emit an emulation
+ warning when it is enabled. In this routine, an emulation warning
+ is only emitted if emit_AC_emwarn is True, in which case
+ next_insn_EIP must be correct (this allows for correct code
+ generation for popfl/popfw). If emit_AC_emwarn is False,
+ next_insn_EIP is unimportant (this allows for easy if kludgey code
+ generation for IRET.) */
+
+static
+void set_EFLAGS_from_value ( IRTemp t1,
+ Bool emit_AC_emwarn,
+ Addr32 next_insn_EIP )
+{
+ vassert(typeOfIRTemp(irsb->tyenv,t1) == Ity_I32);
+
+ /* t1 is the flag word. Mask out everything except OSZACP and set
+ the flags thunk to X86G_CC_OP_COPY. */
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1,
+ binop(Iop_And32,
+ mkexpr(t1),
+ mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P
+ | X86G_CC_MASK_A | X86G_CC_MASK_Z
+ | X86G_CC_MASK_S| X86G_CC_MASK_O )
+ )
+ )
+ );
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+
+ /* Also need to set the D flag, which is held in bit 10 of t1.
+ If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
+ stmt( IRStmt_Put(
+ OFFB_DFLAG,
+ IRExpr_Mux0X(
+ unop(Iop_32to8,
+ binop(Iop_And32,
+ binop(Iop_Shr32, mkexpr(t1), mkU8(10)),
+ mkU32(1))),
+ mkU32(1),
+ mkU32(0xFFFFFFFF)))
+ );
+
+ /* Set the ID flag */
+ stmt( IRStmt_Put(
+ OFFB_IDFLAG,
+ IRExpr_Mux0X(
+ unop(Iop_32to8,
+ binop(Iop_And32,
+ binop(Iop_Shr32, mkexpr(t1), mkU8(21)),
+ mkU32(1))),
+ mkU32(0),
+ mkU32(1)))
+ );
+
+ /* And set the AC flag. If setting it 1 to, possibly emit an
+ emulation warning. */
+ stmt( IRStmt_Put(
+ OFFB_ACFLAG,
+ IRExpr_Mux0X(
+ unop(Iop_32to8,
+ binop(Iop_And32,
+ binop(Iop_Shr32, mkexpr(t1), mkU8(18)),
+ mkU32(1))),
+ mkU32(0),
+ mkU32(1)))
+ );
+
+ if (emit_AC_emwarn) {
+ put_emwarn( mkU32(EmWarn_X86_acFlag) );
+ stmt(
+ IRStmt_Exit(
+ binop( Iop_CmpNE32,
+ binop(Iop_And32, mkexpr(t1), mkU32(1<<18)),
+ mkU32(0) ),
+ Ijk_EmWarn,
+ IRConst_U32( next_insn_EIP )
+ )
+ );
+ }
+}
+
+
+/* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
+ values (aa,bb), computes, for each of the 4 16-bit lanes:
+
+ (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
+*/
+static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx )
+{
+ IRTemp aa = newTemp(Ity_I64);
+ IRTemp bb = newTemp(Ity_I64);
+ IRTemp aahi32s = newTemp(Ity_I64);
+ IRTemp aalo32s = newTemp(Ity_I64);
+ IRTemp bbhi32s = newTemp(Ity_I64);
+ IRTemp bblo32s = newTemp(Ity_I64);
+ IRTemp rHi = newTemp(Ity_I64);
+ IRTemp rLo = newTemp(Ity_I64);
+ IRTemp one32x2 = newTemp(Ity_I64);
+ assign(aa, aax);
+ assign(bb, bbx);
+ assign( aahi32s,
+ binop(Iop_SarN32x2,
+ binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)),
+ mkU8(16) ));
+ assign( aalo32s,
+ binop(Iop_SarN32x2,
+ binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)),
+ mkU8(16) ));
+ assign( bbhi32s,
+ binop(Iop_SarN32x2,
+ binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)),
+ mkU8(16) ));
+ assign( bblo32s,
+ binop(Iop_SarN32x2,
+ binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)),
+ mkU8(16) ));
+ assign(one32x2, mkU64( (1ULL << 32) + 1 ));
+ assign(
+ rHi,
+ binop(
+ Iop_ShrN32x2,
+ binop(
+ Iop_Add32x2,
+ binop(
+ Iop_ShrN32x2,
+ binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)),
+ mkU8(14)
+ ),
+ mkexpr(one32x2)
+ ),
+ mkU8(1)
+ )
+ );
+ assign(
+ rLo,
+ binop(
+ Iop_ShrN32x2,
+ binop(
+ Iop_Add32x2,
+ binop(
+ Iop_ShrN32x2,
+ binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)),
+ mkU8(14)
+ ),
+ mkexpr(one32x2)
+ ),
+ mkU8(1)
+ )
+ );
+ return
+ binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo));
+}
+
+/* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
+ values (aa,bb), computes, for each lane:
+
+ if aa_lane < 0 then - bb_lane
+ else if aa_lane > 0 then bb_lane
+ else 0
+*/
+static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB )
+{
+ IRTemp aa = newTemp(Ity_I64);
+ IRTemp bb = newTemp(Ity_I64);
+ IRTemp zero = newTemp(Ity_I64);
+ IRTemp bbNeg = newTemp(Ity_I64);
+ IRTemp negMask = newTemp(Ity_I64);
+ IRTemp posMask = newTemp(Ity_I64);
+ IROp opSub = Iop_INVALID;
+ IROp opCmpGTS = Iop_INVALID;
+
+ switch (laneszB) {
+ case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break;
+ case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break;
+ case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break;
+ default: vassert(0);
+ }
+
+ assign( aa, aax );
+ assign( bb, bbx );
+ assign( zero, mkU64(0) );
+ assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) );
+ assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) );
+ assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) );
+
+ return
+ binop(Iop_Or64,
+ binop(Iop_And64, mkexpr(bb), mkexpr(posMask)),
+ binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) );
+
+}
+
+/* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
+ value aa, computes, for each lane
+
+ if aa < 0 then -aa else aa
+
+ Note that the result is interpreted as unsigned, so that the
+ absolute value of the most negative signed input can be
+ represented.
+*/
+static IRExpr* dis_PABS_helper ( IRExpr* aax, Int laneszB )
+{
+ IRTemp aa = newTemp(Ity_I64);
+ IRTemp zero = newTemp(Ity_I64);
+ IRTemp aaNeg = newTemp(Ity_I64);
+ IRTemp negMask = newTemp(Ity_I64);
+ IRTemp posMask = newTemp(Ity_I64);
+ IROp opSub = Iop_INVALID;
+ IROp opSarN = Iop_INVALID;
+
+ switch (laneszB) {
+ case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break;
+ case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break;
+ case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break;
+ default: vassert(0);
+ }
+
+ assign( aa, aax );
+ assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) );
+ assign( posMask, unop(Iop_Not64, mkexpr(negMask)) );
+ assign( zero, mkU64(0) );
+ assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) );
+ return
+ binop(Iop_Or64,
+ binop(Iop_And64, mkexpr(aa), mkexpr(posMask)),
+ binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) );
+}
+
+static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64,
+ IRTemp lo64, Int byteShift )
+{
+ vassert(byteShift >= 1 && byteShift <= 7);
+ return
+ binop(Iop_Or64,
+ binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))),
+ binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift))
+ );
+}
+
+/* Generate a SIGSEGV followed by a restart of the current instruction
+ if effective_addr is not 16-aligned. This is required behaviour
+ for some SSE3 instructions and all 128-bit SSSE3 instructions.
+ This assumes that guest_RIP_curr_instr is set correctly! */
+static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr )
+{
+ stmt(
+ IRStmt_Exit(
+ binop(Iop_CmpNE32,
+ binop(Iop_And32,mkexpr(effective_addr),mkU32(0xF)),
+ mkU32(0)),
+ Ijk_SigSEGV,
+ IRConst_U32(guest_EIP_curr_instr)
+ )
+ );
+}
+
+
+/* Helper for deciding whether a given insn (starting at the opcode
+ byte) may validly be used with a LOCK prefix. The following insns
+ may be used with LOCK when their destination operand is in memory.
+ AFAICS this is exactly the same for both 32-bit and 64-bit mode.
+
+ ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
+ OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
+ ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
+ SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
+ AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
+ SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
+ XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
+
+ DEC FE /1, FF /1
+ INC FE /0, FF /0
+
+ NEG F6 /3, F7 /3
+ NOT F6 /2, F7 /2
+
+ XCHG 86, 87
+
+ BTC 0F BB, 0F BA /7
+ BTR 0F B3, 0F BA /6
+ BTS 0F AB, 0F BA /5
+
+ CMPXCHG 0F B0, 0F B1
+ CMPXCHG8B 0F C7 /1
+
+ XADD 0F C0, 0F C1
+
+ ------------------------------
+
+ 80 /0 = addb $imm8, rm8
+ 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
+ 82 /0 = addb $imm8, rm8
+ 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
+
+ 00 = addb r8, rm8
+ 01 = addl r32, rm32 and addw r16, rm16
+
+ Same for ADD OR ADC SBB AND SUB XOR
+
+ FE /1 = dec rm8
+ FF /1 = dec rm32 and dec rm16
+
+ FE /0 = inc rm8
+ FF /0 = inc rm32 and inc rm16
+
+ F6 /3 = neg rm8
+ F7 /3 = neg rm32 and neg rm16
+
+ F6 /2 = not rm8
+ F7 /2 = not rm32 and not rm16
+
+ 0F BB = btcw r16, rm16 and btcl r32, rm32
+ OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
+
+ Same for BTS, BTR
+*/
+static Bool can_be_used_with_LOCK_prefix ( UChar* opc )
+{
+ switch (opc[0]) {
+ case 0x00: case 0x01: case 0x08: case 0x09:
+ case 0x10: case 0x11: case 0x18: case 0x19:
+ case 0x20: case 0x21: case 0x28: case 0x29:
+ case 0x30: case 0x31:
+ if (!epartIsReg(opc[1]))
+ return True;
+ break;
+
+ case 0x80: case 0x81: case 0x82: case 0x83:
+ if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 6
+ && !epartIsReg(opc[1]))
+ return True;
+ break;
+
+ case 0xFE: case 0xFF:
+ if (gregOfRM(opc[1]) >= 0 && gregOfRM(opc[1]) <= 1
+ && !epartIsReg(opc[1]))
+ return True;
+ break;
+
+ case 0xF6: case 0xF7:
+ if (gregOfRM(opc[1]) >= 2 && gregOfRM(opc[1]) <= 3
+ && !epartIsReg(opc[1]))
+ return True;
+ break;
+
+ case 0x86: case 0x87:
+ if (!epartIsReg(opc[1]))
+ return True;
+ break;
+
+ case 0x0F: {
+ switch (opc[1]) {
+ case 0xBB: case 0xB3: case 0xAB:
+ if (!epartIsReg(opc[2]))
+ return True;
+ break;
+ case 0xBA:
+ if (gregOfRM(opc[2]) >= 5 && gregOfRM(opc[2]) <= 7
+ && !epartIsReg(opc[2]))
+ return True;
+ break;
+ case 0xB0: case 0xB1:
+ if (!epartIsReg(opc[2]))
+ return True;
+ break;
+ case 0xC7:
+ if (gregOfRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
+ return True;
+ break;
+ case 0xC0: case 0xC1:
+ if (!epartIsReg(opc[2]))
+ return True;
+ break;
+ default:
+ break;
+ } /* switch (opc[1]) */
+ break;
+ }
+
+ default:
+ break;
+ } /* switch (opc[0]) */
+
+ return False;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Disassemble a single instruction ---*/
+/*------------------------------------------------------------*/
+
+/* Disassemble a single instruction into IR. The instruction is
+ located in host memory at &guest_code[delta]. *expect_CAS is set
+ to True if the resulting IR is expected to contain an IRCAS
+ statement, and False if it's not expected to. This makes it
+ possible for the caller of disInstr_X86_WRK to check that
+ LOCK-prefixed instructions are at least plausibly translated, in
+ that it becomes possible to check that a (validly) LOCK-prefixed
+ instruction generates a translation containing an IRCAS, and
+ instructions without LOCK prefixes don't generate translations
+ containing an IRCAS.
+*/
+static
+DisResult disInstr_X86_WRK (
+ /*OUT*/Bool* expect_CAS,
+ Bool put_IP,
+ Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
+ Bool resteerCisOk,
+ void* callback_opaque,
+ Long delta64,
+ VexArchInfo* archinfo,
+ VexAbiInfo* vbi
+ )
+{
+ IRType ty;
+ IRTemp addr, t0, t1, t2, t3, t4, t5, t6;
+ Int alen;
+ UChar opc, modrm, abyte, pre;
+ UInt d32;
+ HChar dis_buf[50];
+ Int am_sz, d_sz, n_prefixes;
+ DisResult dres;
+ UChar* insn; /* used in SSE decoders */
+
+ /* The running delta */
+ Int delta = (Int)delta64;
+
+ /* Holds eip at the start of the insn, so that we can print
+ consistent error messages for unimplemented insns. */
+ Int delta_start = delta;
+
+ /* sz denotes the nominal data-op size of the insn; we change it to
+ 2 if an 0x66 prefix is seen */
+ Int sz = 4;
+
+ /* sorb holds the segment-override-prefix byte, if any. Zero if no
+ prefix has been seen, else one of {0x26, 0x3E, 0x64, 0x65}
+ indicating the prefix. */
+ UChar sorb = 0;
+
+ /* Gets set to True if a LOCK prefix is seen. */
+ Bool pfx_lock = False;
+
+ /* Set result defaults. */
+ dres.whatNext = Dis_Continue;
+ dres.len = 0;
+ dres.continueAt = 0;
+
+ *expect_CAS = False;
+
+ addr = t0 = t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID;
+
+ vassert(guest_EIP_bbstart + delta == guest_EIP_curr_instr);
+ DIP("\t0x%x: ", guest_EIP_bbstart+delta);
+
+ /* We may be asked to update the guest EIP before going further. */
+ if (put_IP)
+ stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr)) );
+
+ /* Spot "Special" instructions (see comment at top of file). */
+ {
+ UChar* code = (UChar*)(guest_code + delta);
+ /* Spot the 12-byte preamble:
+ C1C703 roll $3, %edi
+ C1C70D roll $13, %edi
+ C1C71D roll $29, %edi
+ C1C713 roll $19, %edi
+ */
+ if (code[ 0] == 0xC1 && code[ 1] == 0xC7 && code[ 2] == 0x03 &&
+ code[ 3] == 0xC1 && code[ 4] == 0xC7 && code[ 5] == 0x0D &&
+ code[ 6] == 0xC1 && code[ 7] == 0xC7 && code[ 8] == 0x1D &&
+ code[ 9] == 0xC1 && code[10] == 0xC7 && code[11] == 0x13) {
+ /* Got a "Special" instruction preamble. Which one is it? */
+ if (code[12] == 0x87 && code[13] == 0xDB /* xchgl %ebx,%ebx */) {
+ /* %EDX = client_request ( %EAX ) */
+ DIP("%%edx = client_request ( %%eax )\n");
+ delta += 14;
+ jmp_lit(Ijk_ClientReq, guest_EIP_bbstart+delta);
+ dres.whatNext = Dis_StopHere;
+ goto decode_success;
+ }
+ else
+ if (code[12] == 0x87 && code[13] == 0xC9 /* xchgl %ecx,%ecx */) {
+ /* %EAX = guest_NRADDR */
+ DIP("%%eax = guest_NRADDR\n");
+ delta += 14;
+ putIReg(4, R_EAX, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
+ goto decode_success;
+ }
+ else
+ if (code[12] == 0x87 && code[13] == 0xD2 /* xchgl %edx,%edx */) {
+ /* call-noredir *%EAX */
+ DIP("call-noredir *%%eax\n");
+ delta += 14;
+ t1 = newTemp(Ity_I32);
+ assign(t1, getIReg(4,R_EAX));
+ t2 = newTemp(Ity_I32);
+ assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
+ putIReg(4, R_ESP, mkexpr(t2));
+ storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta));
+ jmp_treg(Ijk_NoRedir,t1);
+ dres.whatNext = Dis_StopHere;
+ goto decode_success;
+ }
+ /* We don't know what it is. */
+ goto decode_failure;
+ /*NOTREACHED*/
+ }
+ }
+
+ /* Handle a couple of weird-ass NOPs that have been observed in the
+ wild. */
+ {
+ UChar* code = (UChar*)(guest_code + delta);
+ /* Sun's JVM 1.5.0 uses the following as a NOP:
+ 26 2E 64 65 90 %es:%cs:%fs:%gs:nop */
+ if (code[0] == 0x26 && code[1] == 0x2E && code[2] == 0x64
+ && code[3] == 0x65 && code[4] == 0x90) {
+ DIP("%%es:%%cs:%%fs:%%gs:nop\n");
+ delta += 5;
+ goto decode_success;
+ }
+ /* Don't barf on recent binutils padding,
+ all variants of which are: nopw %cs:0x0(%eax,%eax,1)
+ 66 2e 0f 1f 84 00 00 00 00 00
+ 66 66 2e 0f 1f 84 00 00 00 00 00
+ 66 66 66 2e 0f 1f 84 00 00 00 00 00
+ 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
+ 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
+ 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
+ */
+ if (code[0] == 0x66) {
+ Int data16_cnt;
+ for (data16_cnt = 1; data16_cnt < 6; data16_cnt++)
+ if (code[data16_cnt] != 0x66)
+ break;
+ if (code[data16_cnt] == 0x2E && code[data16_cnt + 1] == 0x0F
+ && code[data16_cnt + 2] == 0x1F && code[data16_cnt + 3] == 0x84
+ && code[data16_cnt + 4] == 0x00 && code[data16_cnt + 5] == 0x00
+ && code[data16_cnt + 6] == 0x00 && code[data16_cnt + 7] == 0x00
+ && code[data16_cnt + 8] == 0x00 ) {
+ DIP("nopw %%cs:0x0(%%eax,%%eax,1)\n");
+ delta += 9 + data16_cnt;
+ goto decode_success;
+ }
+ }
+ }
+
+ /* Normal instruction handling starts here. */
+
+ /* Deal with some but not all prefixes:
+ 66(oso)
+ F0(lock)
+ 2E(cs:) 3E(ds:) 26(es:) 64(fs:) 65(gs:) 36(ss:)
+ Not dealt with (left in place):
+ F2 F3
+ */
+ n_prefixes = 0;
+ while (True) {
+ if (n_prefixes > 7) goto decode_failure;
+ pre = getUChar(delta);
+ switch (pre) {
+ case 0x66:
+ sz = 2;
+ break;
+ case 0xF0:
+ pfx_lock = True;
+ *expect_CAS = True;
+ break;
+ case 0x3E: /* %DS: */
+ case 0x26: /* %ES: */
+ case 0x64: /* %FS: */
+ case 0x65: /* %GS: */
+ if (sorb != 0)
+ goto decode_failure; /* only one seg override allowed */
+ sorb = pre;
+ break;
+ case 0x2E: { /* %CS: */
+ /* 2E prefix on a conditional branch instruction is a
+ branch-prediction hint, which can safely be ignored. */
+ UChar op1 = getIByte(delta+1);
+ UChar op2 = getIByte(delta+2);
+ if ((op1 >= 0x70 && op1 <= 0x7F)
+ || (op1 == 0xE3)
+ || (op1 == 0x0F && op2 >= 0x80 && op2 <= 0x8F)) {
+ if (0) vex_printf("vex x86->IR: ignoring branch hint\n");
+ } else {
+ /* All other CS override cases are not handled */
+ goto decode_failure;
+ }
+ break;
+ }
+ case 0x36: /* %SS: */
+ /* SS override cases are not handled */
+ goto decode_failure;
+ default:
+ goto not_a_prefix;
+ }
+ n_prefixes++;
+ delta++;
+ }
+
+ not_a_prefix:
+
+ /* Now we should be looking at the primary opcode byte or the
+ leading F2 or F3. Check that any LOCK prefix is actually
+ allowed. */
+
+ if (pfx_lock) {
+ if (can_be_used_with_LOCK_prefix( (UChar*)&guest_code[delta] )) {
+ DIP("lock ");
+ } else {
+ *expect_CAS = False;
+ goto decode_failure;
+ }
+ }
+
+
+ /* ---------------------------------------------------- */
+ /* --- The SSE decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* What did I do to deserve SSE ? Perhaps I was really bad in a
+ previous life? */
+
+ /* Note, this doesn't handle SSE2 or SSE3. That is handled in a
+ later section, further on. */
+
+ insn = (UChar*)&guest_code[delta];
+
+ /* Treat fxsave specially. It should be doable even on an SSE0
+ (Pentium-II class) CPU. Hence be prepared to handle it on
+ any subarchitecture variant.
+ */
+
+ /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE
+ && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 0) {
+ IRDirty* d;
+ modrm = getIByte(delta+2);
+ vassert(sz == 4);
+ vassert(!epartIsReg(modrm));
+
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ delta += 2+alen;
+
+ DIP("fxsave %s\n", dis_buf);
+
+ /* Uses dirty helper:
+ void x86g_do_FXSAVE ( VexGuestX86State*, UInt ) */
+ d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "x86g_dirtyhelper_FXSAVE",
+ &x86g_dirtyhelper_FXSAVE,
+ mkIRExprVec_1( mkexpr(addr) )
+ );
+ d->needsBBP = True;
+
+ /* declare we're writing memory */
+ d->mFx = Ifx_Write;
+ d->mAddr = mkexpr(addr);
+ d->mSize = 512;
+
+ /* declare we're reading guest state */
+ d->nFxState = 7;
+
+ d->fxState[0].fx = Ifx_Read;
+ d->fxState[0].offset = OFFB_FTOP;
+ d->fxState[0].size = sizeof(UInt);
+
+ d->fxState[1].fx = Ifx_Read;
+ d->fxState[1].offset = OFFB_FPREGS;
+ d->fxState[1].size = 8 * sizeof(ULong);
+
+ d->fxState[2].fx = Ifx_Read;
+ d->fxState[2].offset = OFFB_FPTAGS;
+ d->fxState[2].size = 8 * sizeof(UChar);
+
+ d->fxState[3].fx = Ifx_Read;
+ d->fxState[3].offset = OFFB_FPROUND;
+ d->fxState[3].size = sizeof(UInt);
+
+ d->fxState[4].fx = Ifx_Read;
+ d->fxState[4].offset = OFFB_FC3210;
+ d->fxState[4].size = sizeof(UInt);
+
+ d->fxState[5].fx = Ifx_Read;
+ d->fxState[5].offset = OFFB_XMM0;
+ d->fxState[5].size = 8 * sizeof(U128);
+
+ d->fxState[6].fx = Ifx_Read;
+ d->fxState[6].offset = OFFB_SSEROUND;
+ d->fxState[6].size = sizeof(UInt);
+
+ /* Be paranoid ... this assertion tries to ensure the 8 %xmm
+ images are packed back-to-back. If not, the value of
+ d->fxState[5].size is wrong. */
+ vassert(16 == sizeof(U128));
+ vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16));
+
+ stmt( IRStmt_Dirty(d) );
+
+ goto decode_success;
+ }
+
+ /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE
+ && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 1) {
+ IRDirty* d;
+ modrm = getIByte(delta+2);
+ vassert(sz == 4);
+ vassert(!epartIsReg(modrm));
+
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ delta += 2+alen;
+
+ DIP("fxrstor %s\n", dis_buf);
+
+ /* Uses dirty helper:
+ void x86g_do_FXRSTOR ( VexGuestX86State*, UInt ) */
+ d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "x86g_dirtyhelper_FXRSTOR",
+ &x86g_dirtyhelper_FXRSTOR,
+ mkIRExprVec_1( mkexpr(addr) )
+ );
+ d->needsBBP = True;
+
+ /* declare we're reading memory */
+ d->mFx = Ifx_Read;
+ d->mAddr = mkexpr(addr);
+ d->mSize = 512;
+
+ /* declare we're writing guest state */
+ d->nFxState = 7;
+
+ d->fxState[0].fx = Ifx_Write;
+ d->fxState[0].offset = OFFB_FTOP;
+ d->fxState[0].size = sizeof(UInt);
+
+ d->fxState[1].fx = Ifx_Write;
+ d->fxState[1].offset = OFFB_FPREGS;
+ d->fxState[1].size = 8 * sizeof(ULong);
+
+ d->fxState[2].fx = Ifx_Write;
+ d->fxState[2].offset = OFFB_FPTAGS;
+ d->fxState[2].size = 8 * sizeof(UChar);
+
+ d->fxState[3].fx = Ifx_Write;
+ d->fxState[3].offset = OFFB_FPROUND;
+ d->fxState[3].size = sizeof(UInt);
+
+ d->fxState[4].fx = Ifx_Write;
+ d->fxState[4].offset = OFFB_FC3210;
+ d->fxState[4].size = sizeof(UInt);
+
+ d->fxState[5].fx = Ifx_Write;
+ d->fxState[5].offset = OFFB_XMM0;
+ d->fxState[5].size = 8 * sizeof(U128);
+
+ d->fxState[6].fx = Ifx_Write;
+ d->fxState[6].offset = OFFB_SSEROUND;
+ d->fxState[6].size = sizeof(UInt);
+
+ /* Be paranoid ... this assertion tries to ensure the 8 %xmm
+ images are packed back-to-back. If not, the value of
+ d->fxState[5].size is wrong. */
+ vassert(16 == sizeof(U128));
+ vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16));
+
+ stmt( IRStmt_Dirty(d) );
+
+ goto decode_success;
+ }
+
+ /* ------ SSE decoder main ------ */
+
+ /* Skip parts of the decoder which don't apply given the stated
+ guest subarchitecture. */
+ if (archinfo->hwcaps == 0/*baseline, no sse at all*/)
+ goto after_sse_decoders;
+
+ /* Otherwise we must be doing sse1 or sse2, so we can at least try
+ for SSE1 here. */
+
+ /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x58) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "addps", Iop_Add32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x58) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "addss", Iop_Add32F0x4 );
+ goto decode_success;
+ }
+
+ /* 0F 55 = ANDNPS -- G = (not G) and E */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x55) {
+ delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnps", Iop_AndV128 );
+ goto decode_success;
+ }
+
+ /* 0F 54 = ANDPS -- G = G and E */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x54) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "andps", Iop_AndV128 );
+ goto decode_success;
+ }
+
+ /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC2) {
+ delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmpps", True, 4 );
+ goto decode_success;
+ }
+
+ /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xC2) {
+ vassert(sz == 4);
+ delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpss", False, 4 );
+ goto decode_success;
+ }
+
+ /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
+ /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
+ if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) {
+ IRTemp argL = newTemp(Ity_F32);
+ IRTemp argR = newTemp(Ity_F32);
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ assign( argR, getXMMRegLane32F( eregOfRM(modrm), 0/*lowest lane*/ ) );
+ delta += 2+1;
+ DIP("[u]comiss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)) );
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( argR, loadLE(Ity_F32, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("[u]comiss %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)) );
+ }
+ assign( argL, getXMMRegLane32F( gregOfRM(modrm), 0/*lowest lane*/ ) );
+
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
+ stmt( IRStmt_Put(
+ OFFB_CC_DEP1,
+ binop( Iop_And32,
+ binop(Iop_CmpF64,
+ unop(Iop_F32toF64,mkexpr(argL)),
+ unop(Iop_F32toF64,mkexpr(argR))),
+ mkU32(0x45)
+ )));
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+ goto decode_success;
+ }
+
+ /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
+ half xmm */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x2A) {
+ IRTemp arg64 = newTemp(Ity_I64);
+ IRTemp rmode = newTemp(Ity_I32);
+ vassert(sz == 4);
+
+ modrm = getIByte(delta+2);
+ do_MMX_preamble();
+ if (epartIsReg(modrm)) {
+ assign( arg64, getMMXReg(eregOfRM(modrm)) );
+ delta += 2+1;
+ DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("cvtpi2ps %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)) );
+ }
+
+ assign( rmode, get_sse_roundingmode() );
+
+ putXMMRegLane32F(
+ gregOfRM(modrm), 0,
+ binop(Iop_F64toF32,
+ mkexpr(rmode),
+ unop(Iop_I32StoF64,
+ unop(Iop_64to32, mkexpr(arg64)) )) );
+
+ putXMMRegLane32F(
+ gregOfRM(modrm), 1,
+ binop(Iop_F64toF32,
+ mkexpr(rmode),
+ unop(Iop_I32StoF64,
+ unop(Iop_64HIto32, mkexpr(arg64)) )) );
+
+ goto decode_success;
+ }
+
+ /* F3 0F 2A = CVTSI2SS -- convert I32 in mem/ireg to F32 in low
+ quarter xmm */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x2A) {
+ IRTemp arg32 = newTemp(Ity_I32);
+ IRTemp rmode = newTemp(Ity_I32);
+ vassert(sz == 4);
+
+ modrm = getIByte(delta+3);
+ if (epartIsReg(modrm)) {
+ assign( arg32, getIReg(4, eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("cvtsi2ss %s,%s\n", nameIReg(4, eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("cvtsi2ss %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)) );
+ }
+
+ assign( rmode, get_sse_roundingmode() );
+
+ putXMMRegLane32F(
+ gregOfRM(modrm), 0,
+ binop(Iop_F64toF32,
+ mkexpr(rmode),
+ unop(Iop_I32StoF64, mkexpr(arg32)) ) );
+
+ goto decode_success;
+ }
+
+ /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
+ I32 in mmx, according to prevailing SSE rounding mode */
+ /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
+ I32 in mmx, rounding towards zero */
+ if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) {
+ IRTemp dst64 = newTemp(Ity_I64);
+ IRTemp rmode = newTemp(Ity_I32);
+ IRTemp f32lo = newTemp(Ity_F32);
+ IRTemp f32hi = newTemp(Ity_F32);
+ Bool r2zero = toBool(insn[1] == 0x2C);
+
+ do_MMX_preamble();
+ modrm = getIByte(delta+2);
+
+ if (epartIsReg(modrm)) {
+ delta += 2+1;
+ assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
+ assign(f32hi, getXMMRegLane32F(eregOfRM(modrm), 1));
+ DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
+ nameXMMReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
+ assign(f32hi, loadLE(Ity_F32, binop( Iop_Add32,
+ mkexpr(addr),
+ mkU32(4) )));
+ delta += 2+alen;
+ DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
+ dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+
+ if (r2zero) {
+ assign(rmode, mkU32((UInt)Irrm_ZERO) );
+ } else {
+ assign( rmode, get_sse_roundingmode() );
+ }
+
+ assign(
+ dst64,
+ binop( Iop_32HLto64,
+ binop( Iop_F64toI32S,
+ mkexpr(rmode),
+ unop( Iop_F32toF64, mkexpr(f32hi) ) ),
+ binop( Iop_F64toI32S,
+ mkexpr(rmode),
+ unop( Iop_F32toF64, mkexpr(f32lo) ) )
+ )
+ );
+
+ putMMXReg(gregOfRM(modrm), mkexpr(dst64));
+ goto decode_success;
+ }
+
+ /* F3 0F 2D = CVTSS2SI -- convert F32 in mem/low quarter xmm to
+ I32 in ireg, according to prevailing SSE rounding mode */
+ /* F3 0F 2C = CVTTSS2SI -- convert F32 in mem/low quarter xmm to
+ I32 in ireg, rounding towards zero */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F
+ && (insn[2] == 0x2D || insn[2] == 0x2C)) {
+ IRTemp rmode = newTemp(Ity_I32);
+ IRTemp f32lo = newTemp(Ity_F32);
+ Bool r2zero = toBool(insn[2] == 0x2C);
+ vassert(sz == 4);
+
+ modrm = getIByte(delta+3);
+ if (epartIsReg(modrm)) {
+ delta += 3+1;
+ assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
+ DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "",
+ nameXMMReg(eregOfRM(modrm)),
+ nameIReg(4, gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
+ delta += 3+alen;
+ DIP("cvt%sss2si %s,%s\n", r2zero ? "t" : "",
+ dis_buf,
+ nameIReg(4, gregOfRM(modrm)));
+ }
+
+ if (r2zero) {
+ assign( rmode, mkU32((UInt)Irrm_ZERO) );
+ } else {
+ assign( rmode, get_sse_roundingmode() );
+ }
+
+ putIReg(4, gregOfRM(modrm),
+ binop( Iop_F64toI32S,
+ mkexpr(rmode),
+ unop( Iop_F32toF64, mkexpr(f32lo) ) )
+ );
+
+ goto decode_success;
+ }
+
+ /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5E) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "divps", Iop_Div32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5E) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "divss", Iop_Div32F0x4 );
+ goto decode_success;
+ }
+
+ /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
+ if (insn[0] == 0x0F && insn[1] == 0xAE
+ && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 2) {
+
+ IRTemp t64 = newTemp(Ity_I64);
+ IRTemp ew = newTemp(Ity_I32);
+
+ modrm = getIByte(delta+2);
+ vassert(!epartIsReg(modrm));
+ vassert(sz == 4);
+
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ delta += 2+alen;
+ DIP("ldmxcsr %s\n", dis_buf);
+
+ /* The only thing we observe in %mxcsr is the rounding mode.
+ Therefore, pass the 32-bit value (SSE native-format control
+ word) to a clean helper, getting back a 64-bit value, the
+ lower half of which is the SSEROUND value to store, and the
+ upper half of which is the emulation-warning token which may
+ be generated.
+ */
+ /* ULong x86h_check_ldmxcsr ( UInt ); */
+ assign( t64, mkIRExprCCall(
+ Ity_I64, 0/*regparms*/,
+ "x86g_check_ldmxcsr",
+ &x86g_check_ldmxcsr,
+ mkIRExprVec_1( loadLE(Ity_I32, mkexpr(addr)) )
+ )
+ );
+
+ put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) );
+ assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
+ put_emwarn( mkexpr(ew) );
+ /* Finally, if an emulation warning was reported, side-exit to
+ the next insn, reporting the warning, so that Valgrind's
+ dispatcher sees the warning. */
+ stmt(
+ IRStmt_Exit(
+ binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
+ Ijk_EmWarn,
+ IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta)
+ )
+ );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F F7 = MASKMOVQ -- 8x8 masked store */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF7) {
+ Bool ok = False;
+ delta = dis_MMX( &ok, sorb, sz, delta+1 );
+ if (!ok)
+ goto decode_failure;
+ goto decode_success;
+ }
+
+ /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5F) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5F) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "maxss", Iop_Max32F0x4 );
+ goto decode_success;
+ }
+
+ /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5D) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "minps", Iop_Min32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5D) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "minss", Iop_Min32F0x4 );
+ goto decode_success;
+ }
+
+ /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
+ /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
+ if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) {
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ putXMMReg( gregOfRM(modrm),
+ getXMMReg( eregOfRM(modrm) ));
+ DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 2+1;
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ if (insn[1] == 0x28/*movaps*/)
+ gen_SEGV_if_not_16_aligned( addr );
+ putXMMReg( gregOfRM(modrm),
+ loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("mov[ua]ps %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 2+alen;
+ }
+ goto decode_success;
+ }
+
+ /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
+ /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
+ if (sz == 4 && insn[0] == 0x0F
+ && (insn[1] == 0x29 || insn[1] == 0x11)) {
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ /* fall through; awaiting test case */
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ if (insn[1] == 0x29/*movaps*/)
+ gen_SEGV_if_not_16_aligned( addr );
+ storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
+ DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm)),
+ dis_buf );
+ delta += 2+alen;
+ goto decode_success;
+ }
+ }
+
+ /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
+ /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x16) {
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ delta += 2+1;
+ putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
+ getXMMRegLane64( eregOfRM(modrm), 0 ) );
+ DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ delta += 2+alen;
+ putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
+ loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("movhps %s,%s\n", dis_buf,
+ nameXMMReg( gregOfRM(modrm) ));
+ }
+ goto decode_success;
+ }
+
+ /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x17) {
+ if (!epartIsReg(insn[2])) {
+ delta += 2;
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ delta += alen;
+ storeLE( mkexpr(addr),
+ getXMMRegLane64( gregOfRM(insn[2]),
+ 1/*upper lane*/ ) );
+ DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ),
+ dis_buf);
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
+ /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x12) {
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ delta += 2+1;
+ putXMMRegLane64( gregOfRM(modrm),
+ 0/*lower lane*/,
+ getXMMRegLane64( eregOfRM(modrm), 1 ));
+ DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ delta += 2+alen;
+ putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/,
+ loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("movlps %s, %s\n",
+ dis_buf, nameXMMReg( gregOfRM(modrm) ));
+ }
+ goto decode_success;
+ }
+
+ /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x13) {
+ if (!epartIsReg(insn[2])) {
+ delta += 2;
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ delta += alen;
+ storeLE( mkexpr(addr),
+ getXMMRegLane64( gregOfRM(insn[2]),
+ 0/*lower lane*/ ) );
+ DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ),
+ dis_buf);
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
+ to 4 lowest bits of ireg(G) */
+ if (insn[0] == 0x0F && insn[1] == 0x50) {
+ modrm = getIByte(delta+2);
+ if (sz == 4 && epartIsReg(modrm)) {
+ Int src;
+ t0 = newTemp(Ity_I32);
+ t1 = newTemp(Ity_I32);
+ t2 = newTemp(Ity_I32);
+ t3 = newTemp(Ity_I32);
+ delta += 2+1;
+ src = eregOfRM(modrm);
+ assign( t0, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)),
+ mkU32(1) ));
+ assign( t1, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)),
+ mkU32(2) ));
+ assign( t2, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)),
+ mkU32(4) ));
+ assign( t3, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)),
+ mkU32(8) ));
+ putIReg(4, gregOfRM(modrm),
+ binop(Iop_Or32,
+ binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
+ binop(Iop_Or32, mkexpr(t2), mkexpr(t3))
+ )
+ );
+ DIP("movmskps %s,%s\n", nameXMMReg(src),
+ nameIReg(4, gregOfRM(modrm)));
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
+ /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
+ if (insn[0] == 0x0F && insn[1] == 0x2B) {
+ modrm = getIByte(delta+2);
+ if (!epartIsReg(modrm)) {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ gen_SEGV_if_not_16_aligned( addr );
+ storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
+ DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
+ dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 2+alen;
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
+ Intel manual does not say anything about the usual business of
+ the FP reg tags getting trashed whenever an MMX insn happens.
+ So we just leave them alone.
+ */
+ if (insn[0] == 0x0F && insn[1] == 0xE7) {
+ modrm = getIByte(delta+2);
+ if (sz == 4 && !epartIsReg(modrm)) {
+ /* do_MMX_preamble(); Intel docs don't specify this */
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) );
+ DIP("movntq %s,%s\n", dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ delta += 2+alen;
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
+ (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x10) {
+ vassert(sz == 4);
+ modrm = getIByte(delta+3);
+ if (epartIsReg(modrm)) {
+ putXMMRegLane32( gregOfRM(modrm), 0,
+ getXMMRegLane32( eregOfRM(modrm), 0 ));
+ DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+1;
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ /* zero bits 127:64 */
+ putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
+ /* zero bits 63:32 */
+ putXMMRegLane32( gregOfRM(modrm), 1, mkU32(0) );
+ /* write bits 31:0 */
+ putXMMRegLane32( gregOfRM(modrm), 0,
+ loadLE(Ity_I32, mkexpr(addr)) );
+ DIP("movss %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+alen;
+ }
+ goto decode_success;
+ }
+
+ /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
+ or lo 1/4 xmm). */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x11) {
+ vassert(sz == 4);
+ modrm = getIByte(delta+3);
+ if (epartIsReg(modrm)) {
+ /* fall through, we don't yet have a test case */
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ storeLE( mkexpr(addr),
+ getXMMRegLane32(gregOfRM(modrm), 0) );
+ DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm)),
+ dis_buf);
+ delta += 3+alen;
+ goto decode_success;
+ }
+ }
+
+ /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x59) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulps", Iop_Mul32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x59) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "mulss", Iop_Mul32F0x4 );
+ goto decode_success;
+ }
+
+ /* 0F 56 = ORPS -- G = G and E */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x56) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "orps", Iop_OrV128 );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE0) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ sorb, delta+2, insn[1], "pavgb", False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE3) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ sorb, delta+2, insn[1], "pavgw", False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
+ zero-extend of it in ireg(G). */
+ if (insn[0] == 0x0F && insn[1] == 0xC5) {
+ modrm = insn[2];
+ if (sz == 4 && epartIsReg(modrm)) {
+ IRTemp sV = newTemp(Ity_I64);
+ t5 = newTemp(Ity_I16);
+ do_MMX_preamble();
+ assign(sV, getMMXReg(eregOfRM(modrm)));
+ breakup64to16s( sV, &t3, &t2, &t1, &t0 );
+ switch (insn[3] & 3) {
+ case 0: assign(t5, mkexpr(t0)); break;
+ case 1: assign(t5, mkexpr(t1)); break;
+ case 2: assign(t5, mkexpr(t2)); break;
+ case 3: assign(t5, mkexpr(t3)); break;
+ default: vassert(0); /*NOTREACHED*/
+ }
+ putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t5)));
+ DIP("pextrw $%d,%s,%s\n",
+ (Int)insn[3], nameMMXReg(eregOfRM(modrm)),
+ nameIReg(4,gregOfRM(modrm)));
+ delta += 4;
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
+ put it into the specified lane of mmx(G). */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC4) {
+ /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
+ mmx reg. t4 is the new lane value. t5 is the original
+ mmx value. t6 is the new mmx value. */
+ Int lane;
+ t4 = newTemp(Ity_I16);
+ t5 = newTemp(Ity_I64);
+ t6 = newTemp(Ity_I64);
+ modrm = insn[2];
+ do_MMX_preamble();
+
+ assign(t5, getMMXReg(gregOfRM(modrm)));
+ breakup64to16s( t5, &t3, &t2, &t1, &t0 );
+
+ if (epartIsReg(modrm)) {
+ assign(t4, getIReg(2, eregOfRM(modrm)));
+ delta += 3+1;
+ lane = insn[3+1-1];
+ DIP("pinsrw $%d,%s,%s\n", (Int)lane,
+ nameIReg(2,eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ delta += 3+alen;
+ lane = insn[3+alen-1];
+ assign(t4, loadLE(Ity_I16, mkexpr(addr)));
+ DIP("pinsrw $%d,%s,%s\n", (Int)lane,
+ dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+
+ switch (lane & 3) {
+ case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
+ case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
+ case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
+ case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
+ default: vassert(0); /*NOTREACHED*/
+ }
+ putMMXReg(gregOfRM(modrm), mkexpr(t6));
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F EE = PMAXSW -- 16x4 signed max */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEE) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ sorb, delta+2, insn[1], "pmaxsw", False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F DE = PMAXUB -- 8x8 unsigned max */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDE) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ sorb, delta+2, insn[1], "pmaxub", False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F EA = PMINSW -- 16x4 signed min */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEA) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ sorb, delta+2, insn[1], "pminsw", False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F DA = PMINUB -- 8x8 unsigned min */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDA) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ sorb, delta+2, insn[1], "pminub", False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
+ mmx(G), turn them into a byte, and put zero-extend of it in
+ ireg(G). */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD7) {
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ do_MMX_preamble();
+ t0 = newTemp(Ity_I64);
+ t1 = newTemp(Ity_I32);
+ assign(t0, getMMXReg(eregOfRM(modrm)));
+ assign(t1, mkIRExprCCall(
+ Ity_I32, 0/*regparms*/,
+ "x86g_calculate_mmx_pmovmskb",
+ &x86g_calculate_mmx_pmovmskb,
+ mkIRExprVec_1(mkexpr(t0))));
+ putIReg(4, gregOfRM(modrm), mkexpr(t1));
+ DIP("pmovmskb %s,%s\n", nameMMXReg(eregOfRM(modrm)),
+ nameIReg(4,gregOfRM(modrm)));
+ delta += 3;
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE4) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ sorb, delta+2, insn[1], "pmuluh", False );
+ goto decode_success;
+ }
+
+ /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
+ /* 0F 18 /1 = PREFETCH0 -- with various different hints */
+ /* 0F 18 /2 = PREFETCH1 */
+ /* 0F 18 /3 = PREFETCH2 */
+ if (insn[0] == 0x0F && insn[1] == 0x18
+ && !epartIsReg(insn[2])
+ && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 3) {
+ HChar* hintstr = "??";
+
+ modrm = getIByte(delta+2);
+ vassert(!epartIsReg(modrm));
+
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ delta += 2+alen;
+
+ switch (gregOfRM(modrm)) {
+ case 0: hintstr = "nta"; break;
+ case 1: hintstr = "t0"; break;
+ case 2: hintstr = "t1"; break;
+ case 3: hintstr = "t2"; break;
+ default: vassert(0); /*NOTREACHED*/
+ }
+
+ DIP("prefetch%s %s\n", hintstr, dis_buf);
+ goto decode_success;
+ }
+
+ /* 0F 0D /0 = PREFETCH m8 -- 3DNow! prefetch */
+ /* 0F 0D /1 = PREFETCHW m8 -- ditto, with some other hint */
+ if (insn[0] == 0x0F && insn[1] == 0x0D
+ && !epartIsReg(insn[2])
+ && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 1) {
+ HChar* hintstr = "??";
+
+ modrm = getIByte(delta+2);
+ vassert(!epartIsReg(modrm));
+
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ delta += 2+alen;
+
+ switch (gregOfRM(modrm)) {
+ case 0: hintstr = ""; break;
+ case 1: hintstr = "w"; break;
+ default: vassert(0); /*NOTREACHED*/
+ }
+
+ DIP("prefetch%s %s\n", hintstr, dis_buf);
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF6) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ sorb, delta+2, insn[1], "psadbw", False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x70) {
+ Int order;
+ IRTemp sV, dV, s3, s2, s1, s0;
+ s3 = s2 = s1 = s0 = IRTemp_INVALID;
+ sV = newTemp(Ity_I64);
+ dV = newTemp(Ity_I64);
+ do_MMX_preamble();
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregOfRM(modrm)) );
+ order = (Int)insn[3];
+ delta += 2+2;
+ DIP("pshufw $%d,%s,%s\n", order,
+ nameMMXReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ order = (Int)insn[2+alen];
+ delta += 3+alen;
+ DIP("pshufw $%d,%s,%s\n", order,
+ dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+ breakup64to16s( sV, &s3, &s2, &s1, &s0 );
+
+# define SEL(n) \
+ ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
+ assign(dV,
+ mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
+ SEL((order>>2)&3), SEL((order>>0)&3) )
+ );
+ putMMXReg(gregOfRM(modrm), mkexpr(dV));
+# undef SEL
+ goto decode_success;
+ }
+
+ /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
+ if (insn[0] == 0x0F && insn[1] == 0x53) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
+ "rcpps", Iop_Recip32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x53) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3,
+ "rcpss", Iop_Recip32F0x4 );
+ goto decode_success;
+ }
+
+ /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
+ if (insn[0] == 0x0F && insn[1] == 0x52) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
+ "rsqrtps", Iop_RSqrt32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x52) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3,
+ "rsqrtss", Iop_RSqrt32F0x4 );
+ goto decode_success;
+ }
+
+ /* 0F AE /7 = SFENCE -- flush pending operations to memory */
+ if (insn[0] == 0x0F && insn[1] == 0xAE
+ && epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) {
+ vassert(sz == 4);
+ delta += 3;
+ /* Insert a memory fence. It's sometimes important that these
+ are carried through to the generated code. */
+ stmt( IRStmt_MBE(Imbe_Fence) );
+ DIP("sfence\n");
+ goto decode_success;
+ }
+
+ /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC6) {
+ Int select;
+ IRTemp sV, dV;
+ IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
+ sV = newTemp(Ity_V128);
+ dV = newTemp(Ity_V128);
+ s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
+ modrm = insn[2];
+ assign( dV, getXMMReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRM(modrm)) );
+ select = (Int)insn[3];
+ delta += 2+2;
+ DIP("shufps $%d,%s,%s\n", select,
+ nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ select = (Int)insn[2+alen];
+ delta += 3+alen;
+ DIP("shufps $%d,%s,%s\n", select,
+ dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+
+ breakup128to32s( dV, &d3, &d2, &d1, &d0 );
+ breakup128to32s( sV, &s3, &s2, &s1, &s0 );
+
+# define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
+# define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
+
+ putXMMReg(
+ gregOfRM(modrm),
+ mk128from32s( SELS((select>>6)&3), SELS((select>>4)&3),
+ SELD((select>>2)&3), SELD((select>>0)&3) )
+ );
+
+# undef SELD
+# undef SELS
+
+ goto decode_success;
+ }
+
+ /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x51) {
+ delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
+ "sqrtps", Iop_Sqrt32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x51) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_unary_lo32( sorb, delta+3,
+ "sqrtss", Iop_Sqrt32F0x4 );
+ goto decode_success;
+ }
+
+ /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
+ if (insn[0] == 0x0F && insn[1] == 0xAE
+ && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 3) {
+ modrm = getIByte(delta+2);
+ vassert(sz == 4);
+ vassert(!epartIsReg(modrm));
+
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ delta += 2+alen;
+
+ /* Fake up a native SSE mxcsr word. The only thing it depends
+ on is SSEROUND[1:0], so call a clean helper to cook it up.
+ */
+ /* UInt x86h_create_mxcsr ( UInt sseround ) */
+ DIP("stmxcsr %s\n", dis_buf);
+ storeLE( mkexpr(addr),
+ mkIRExprCCall(
+ Ity_I32, 0/*regp*/,
+ "x86g_create_mxcsr", &x86g_create_mxcsr,
+ mkIRExprVec_1( get_sse_roundingmode() )
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5C) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "subps", Iop_Sub32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5C) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "subss", Iop_Sub32F0x4 );
+ goto decode_success;
+ }
+
+ /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
+ /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
+ /* These just appear to be special cases of SHUFPS */
+ if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
+ IRTemp sV, dV;
+ IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
+ Bool hi = toBool(insn[1] == 0x15);
+ sV = newTemp(Ity_V128);
+ dV = newTemp(Ity_V128);
+ s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
+ modrm = insn[2];
+ assign( dV, getXMMReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRM(modrm)) );
+ delta += 2+1;
+ DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
+ nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
+ dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+
+ breakup128to32s( dV, &d3, &d2, &d1, &d0 );
+ breakup128to32s( sV, &s3, &s2, &s1, &s0 );
+
+ if (hi) {
+ putXMMReg( gregOfRM(modrm), mk128from32s( s3, d3, s2, d2 ) );
+ } else {
+ putXMMReg( gregOfRM(modrm), mk128from32s( s1, d1, s0, d0 ) );
+ }
+
+ goto decode_success;
+ }
+
+ /* 0F 57 = XORPS -- G = G and E */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x57) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorps", Iop_XorV128 );
+ goto decode_success;
+ }
+
+ /* ---------------------------------------------------- */
+ /* --- end of the SSE decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* ---------------------------------------------------- */
+ /* --- start of the SSE2 decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* Skip parts of the decoder which don't apply given the stated
+ guest subarchitecture. */
+ if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2))
+ goto after_sse_decoders; /* no SSE2 capabilities */
+
+ insn = (UChar*)&guest_code[delta];
+
+ /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x58) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "addpd", Iop_Add64Fx2 );
+ goto decode_success;
+ }
+
+ /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
+ if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x58) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "addsd", Iop_Add64F0x2 );
+ goto decode_success;
+ }
+
+ /* 66 0F 55 = ANDNPD -- G = (not G) and E */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x55) {
+ delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnpd", Iop_AndV128 );
+ goto decode_success;
+ }
+
+ /* 66 0F 54 = ANDPD -- G = G and E */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x54) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "andpd", Iop_AndV128 );
+ goto decode_success;
+ }
+
+ /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC2) {
+ delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmppd", True, 8 );
+ goto decode_success;
+ }
+
+ /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
+ if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xC2) {
+ vassert(sz == 4);
+ delta = dis_SSEcmp_E_to_G( sorb, delta+3, "cmpsd", False, 8 );
+ goto decode_success;
+ }
+
+ /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
+ /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
+ if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2F || insn[1] == 0x2E)) {
+ IRTemp argL = newTemp(Ity_F64);
+ IRTemp argR = newTemp(Ity_F64);
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ assign( argR, getXMMRegLane64F( eregOfRM(modrm), 0/*lowest lane*/ ) );
+ delta += 2+1;
+ DIP("[u]comisd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)) );
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( argR, loadLE(Ity_F64, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("[u]comisd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)) );
+ }
+ assign( argL, getXMMRegLane64F( gregOfRM(modrm), 0/*lowest lane*/ ) );
+
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
+ stmt( IRStmt_Put(
+ OFFB_CC_DEP1,
+ binop( Iop_And32,
+ binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)),
+ mkU32(0x45)
+ )));
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+ goto decode_success;
+ }
+
+ /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
+ F64 in xmm(G) */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xE6) {
+ IRTemp arg64 = newTemp(Ity_I64);
+ vassert(sz == 4);
+
+ modrm = getIByte(delta+3);
+ if (epartIsReg(modrm)) {
+ assign( arg64, getXMMRegLane64(eregOfRM(modrm), 0) );
+ delta += 3+1;
+ DIP("cvtdq2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("cvtdq2pd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)) );
+ }
+
+ putXMMRegLane64F(
+ gregOfRM(modrm), 0,
+ unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)))
+ );
+
+ putXMMRegLane64F(
+ gregOfRM(modrm), 1,
+ unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)))
+ );
+
+ goto decode_success;
+ }
+
+ /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
+ xmm(G) */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5B) {
+ IRTemp argV = newTemp(Ity_V128);
+ IRTemp rmode = newTemp(Ity_I32);
+
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ assign( argV, getXMMReg(eregOfRM(modrm)) );
+ delta += 2+1;
+ DIP("cvtdq2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("cvtdq2ps %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)) );
+ }
+
+ assign( rmode, get_sse_roundingmode() );
+ breakup128to32s( argV, &t3, &t2, &t1, &t0 );
+
+# define CVT(_t) binop( Iop_F64toF32, \
+ mkexpr(rmode), \
+ unop(Iop_I32StoF64,mkexpr(_t)))
+
+ putXMMRegLane32F( gregOfRM(modrm), 3, CVT(t3) );
+ putXMMRegLane32F( gregOfRM(modrm), 2, CVT(t2) );
+ putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) );
+ putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) );
+
+# undef CVT
+
+ goto decode_success;
+ }
+
+ /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
+ lo half xmm(G), and zero upper half */
+ if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xE6) {
+ IRTemp argV = newTemp(Ity_V128);
+ IRTemp rmode = newTemp(Ity_I32);
+ vassert(sz == 4);
+
+ modrm = getIByte(delta+3);
+ if (epartIsReg(modrm)) {
+ assign( argV, getXMMReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("cvtpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("cvtpd2dq %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)) );
+ }
+
+ assign( rmode, get_sse_roundingmode() );
+ t0 = newTemp(Ity_F64);
+ t1 = newTemp(Ity_F64);
+ assign( t0, unop(Iop_ReinterpI64asF64,
+ unop(Iop_V128to64, mkexpr(argV))) );
+ assign( t1, unop(Iop_ReinterpI64asF64,
+ unop(Iop_V128HIto64, mkexpr(argV))) );
+
+# define CVT(_t) binop( Iop_F64toI32S, \
+ mkexpr(rmode), \
+ mkexpr(_t) )
+
+ putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
+ putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
+ putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
+ putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
+
+# undef CVT
+
+ goto decode_success;
+ }
+
+ /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
+ I32 in mmx, according to prevailing SSE rounding mode */
+ /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
+ I32 in mmx, rounding towards zero */
+ if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x2D || insn[1] == 0x2C)) {
+ IRTemp dst64 = newTemp(Ity_I64);
+ IRTemp rmode = newTemp(Ity_I32);
+ IRTemp f64lo = newTemp(Ity_F64);
+ IRTemp f64hi = newTemp(Ity_F64);
+ Bool r2zero = toBool(insn[1] == 0x2C);
+
+ do_MMX_preamble();
+ modrm = getIByte(delta+2);
+
+ if (epartIsReg(modrm)) {
+ delta += 2+1;
+ assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
+ assign(f64hi, getXMMRegLane64F(eregOfRM(modrm), 1));
+ DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "",
+ nameXMMReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
+ assign(f64hi, loadLE(Ity_F64, binop( Iop_Add32,
+ mkexpr(addr),
+ mkU32(8) )));
+ delta += 2+alen;
+ DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "",
+ dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+
+ if (r2zero) {
+ assign(rmode, mkU32((UInt)Irrm_ZERO) );
+ } else {
+ assign( rmode, get_sse_roundingmode() );
+ }
+
+ assign(
+ dst64,
+ binop( Iop_32HLto64,
+ binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ),
+ binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) )
+ )
+ );
+
+ putMMXReg(gregOfRM(modrm), mkexpr(dst64));
+ goto decode_success;
+ }
+
+ /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
+ lo half xmm(G), and zero upper half */
+ /* Note, this is practically identical to CVTPD2DQ. It would have
+ been nicer to merge them together, but the insn[] offsets differ
+ by one. */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5A) {
+ IRTemp argV = newTemp(Ity_V128);
+ IRTemp rmode = newTemp(Ity_I32);
+
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ assign( argV, getXMMReg(eregOfRM(modrm)) );
+ delta += 2+1;
+ DIP("cvtpd2ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("cvtpd2ps %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)) );
+ }
+
+ assign( rmode, get_sse_roundingmode() );
+ t0 = newTemp(Ity_F64);
+ t1 = newTemp(Ity_F64);
+ assign( t0, unop(Iop_ReinterpI64asF64,
+ unop(Iop_V128to64, mkexpr(argV))) );
+ assign( t1, unop(Iop_ReinterpI64asF64,
+ unop(Iop_V128HIto64, mkexpr(argV))) );
+
+# define CVT(_t) binop( Iop_F64toF32, \
+ mkexpr(rmode), \
+ mkexpr(_t) )
+
+ putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
+ putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
+ putXMMRegLane32F( gregOfRM(modrm), 1, CVT(t1) );
+ putXMMRegLane32F( gregOfRM(modrm), 0, CVT(t0) );
+
+# undef CVT
+
+ goto decode_success;
+ }
+
+ /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
+ xmm(G) */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x2A) {
+ IRTemp arg64 = newTemp(Ity_I64);
+
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ /* Only switch to MMX mode if the source is a MMX register.
+ This is inconsistent with all other instructions which
+ convert between XMM and (M64 or MMX), which always switch
+ to MMX mode even if 64-bit operand is M64 and not MMX. At
+ least, that's what the Intel docs seem to me to say.
+ Fixes #210264. */
+ do_MMX_preamble();
+ assign( arg64, getMMXReg(eregOfRM(modrm)) );
+ delta += 2+1;
+ DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("cvtpi2pd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)) );
+ }
+
+ putXMMRegLane64F(
+ gregOfRM(modrm), 0,
+ unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) )
+ );
+
+ putXMMRegLane64F(
+ gregOfRM(modrm), 1,
+ unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) )
+ );
+
+ goto decode_success;
+ }
+
+ /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
+ xmm(G) */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5B) {
+ IRTemp argV = newTemp(Ity_V128);
+ IRTemp rmode = newTemp(Ity_I32);
+
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ assign( argV, getXMMReg(eregOfRM(modrm)) );
+ delta += 2+1;
+ DIP("cvtps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("cvtps2dq %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)) );
+ }
+
+ assign( rmode, get_sse_roundingmode() );
+ breakup128to32s( argV, &t3, &t2, &t1, &t0 );
+
+ /* This is less than ideal. If it turns out to be a performance
+ bottleneck it can be improved. */
+# define CVT(_t) \
+ binop( Iop_F64toI32S, \
+ mkexpr(rmode), \
+ unop( Iop_F32toF64, \
+ unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
+
+ putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) );
+ putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) );
+ putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
+ putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
+
+# undef CVT
+
+ goto decode_success;
+ }
+
+ /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
+ F64 in xmm(G). */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5A) {
+ IRTemp f32lo = newTemp(Ity_F32);
+ IRTemp f32hi = newTemp(Ity_F32);
+
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ assign( f32lo, getXMMRegLane32F(eregOfRM(modrm), 0) );
+ assign( f32hi, getXMMRegLane32F(eregOfRM(modrm), 1) );
+ delta += 2+1;
+ DIP("cvtps2pd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) );
+ assign( f32hi, loadLE(Ity_F32,
+ binop(Iop_Add32,mkexpr(addr),mkU32(4))) );
+ delta += 2+alen;
+ DIP("cvtps2pd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)) );
+ }
+
+ putXMMRegLane64F( gregOfRM(modrm), 1,
+ unop(Iop_F32toF64, mkexpr(f32hi)) );
+ putXMMRegLane64F( gregOfRM(modrm), 0,
+ unop(Iop_F32toF64, mkexpr(f32lo)) );
+
+ goto decode_success;
+ }
+
+ /* F2 0F 2D = CVTSD2SI -- convert F64 in mem/low half xmm to
+ I32 in ireg, according to prevailing SSE rounding mode */
+ /* F2 0F 2C = CVTTSD2SI -- convert F64 in mem/low half xmm to
+ I32 in ireg, rounding towards zero */
+ if (insn[0] == 0xF2 && insn[1] == 0x0F
+ && (insn[2] == 0x2D || insn[2] == 0x2C)) {
+ IRTemp rmode = newTemp(Ity_I32);
+ IRTemp f64lo = newTemp(Ity_F64);
+ Bool r2zero = toBool(insn[2] == 0x2C);
+ vassert(sz == 4);
+
+ modrm = getIByte(delta+3);
+ if (epartIsReg(modrm)) {
+ delta += 3+1;
+ assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
+ DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
+ nameXMMReg(eregOfRM(modrm)),
+ nameIReg(4, gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
+ delta += 3+alen;
+ DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
+ dis_buf,
+ nameIReg(4, gregOfRM(modrm)));
+ }
+
+ if (r2zero) {
+ assign( rmode, mkU32((UInt)Irrm_ZERO) );
+ } else {
+ assign( rmode, get_sse_roundingmode() );
+ }
+
+ putIReg(4, gregOfRM(modrm),
+ binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) );
+
+ goto decode_success;
+ }
+
+ /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
+ low 1/4 xmm(G), according to prevailing SSE rounding mode */
+ if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5A) {
+ IRTemp rmode = newTemp(Ity_I32);
+ IRTemp f64lo = newTemp(Ity_F64);
+ vassert(sz == 4);
+
+ modrm = getIByte(delta+3);
+ if (epartIsReg(modrm)) {
+ delta += 3+1;
+ assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
+ DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
+ delta += 3+alen;
+ DIP("cvtsd2ss %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+
+ assign( rmode, get_sse_roundingmode() );
+ putXMMRegLane32F(
+ gregOfRM(modrm), 0,
+ binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) )
+ );
+
+ goto decode_success;
+ }
+
+ /* F2 0F 2A = CVTSI2SD -- convert I32 in mem/ireg to F64 in low
+ half xmm */
+ if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x2A) {
+ IRTemp arg32 = newTemp(Ity_I32);
+ vassert(sz == 4);
+
+ modrm = getIByte(delta+3);
+ if (epartIsReg(modrm)) {
+ assign( arg32, getIReg(4, eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("cvtsi2sd %s,%s\n", nameIReg(4, eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("cvtsi2sd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)) );
+ }
+
+ putXMMRegLane64F(
+ gregOfRM(modrm), 0,
+ unop(Iop_I32StoF64, mkexpr(arg32)) );
+
+ goto decode_success;
+ }
+
+ /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
+ low half xmm(G) */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5A) {
+ IRTemp f32lo = newTemp(Ity_F32);
+ vassert(sz == 4);
+
+ modrm = getIByte(delta+3);
+ if (epartIsReg(modrm)) {
+ delta += 3+1;
+ assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
+ DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
+ delta += 3+alen;
+ DIP("cvtss2sd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+
+ putXMMRegLane64F( gregOfRM(modrm), 0,
+ unop( Iop_F32toF64, mkexpr(f32lo) ) );
+
+ goto decode_success;
+ }
+
+ /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
+ lo half xmm(G), and zero upper half, rounding towards zero */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE6) {
+ IRTemp argV = newTemp(Ity_V128);
+ IRTemp rmode = newTemp(Ity_I32);
+
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ assign( argV, getXMMReg(eregOfRM(modrm)) );
+ delta += 2+1;
+ DIP("cvttpd2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("cvttpd2dq %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)) );
+ }
+
+ assign( rmode, mkU32((UInt)Irrm_ZERO) );
+
+ t0 = newTemp(Ity_F64);
+ t1 = newTemp(Ity_F64);
+ assign( t0, unop(Iop_ReinterpI64asF64,
+ unop(Iop_V128to64, mkexpr(argV))) );
+ assign( t1, unop(Iop_ReinterpI64asF64,
+ unop(Iop_V128HIto64, mkexpr(argV))) );
+
+# define CVT(_t) binop( Iop_F64toI32S, \
+ mkexpr(rmode), \
+ mkexpr(_t) )
+
+ putXMMRegLane32( gregOfRM(modrm), 3, mkU32(0) );
+ putXMMRegLane32( gregOfRM(modrm), 2, mkU32(0) );
+ putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
+ putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
+
+# undef CVT
+
+ goto decode_success;
+ }
+
+ /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
+ xmm(G), rounding towards zero */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5B) {
+ IRTemp argV = newTemp(Ity_V128);
+ IRTemp rmode = newTemp(Ity_I32);
+ vassert(sz == 4);
+
+ modrm = getIByte(delta+3);
+ if (epartIsReg(modrm)) {
+ assign( argV, getXMMReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("cvttps2dq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("cvttps2dq %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)) );
+ }
+
+ assign( rmode, mkU32((UInt)Irrm_ZERO) );
+ breakup128to32s( argV, &t3, &t2, &t1, &t0 );
+
+ /* This is less than ideal. If it turns out to be a performance
+ bottleneck it can be improved. */
+# define CVT(_t) \
+ binop( Iop_F64toI32S, \
+ mkexpr(rmode), \
+ unop( Iop_F32toF64, \
+ unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
+
+ putXMMRegLane32( gregOfRM(modrm), 3, CVT(t3) );
+ putXMMRegLane32( gregOfRM(modrm), 2, CVT(t2) );
+ putXMMRegLane32( gregOfRM(modrm), 1, CVT(t1) );
+ putXMMRegLane32( gregOfRM(modrm), 0, CVT(t0) );
+
+# undef CVT
+
+ goto decode_success;
+ }
+
+ /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5E) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "divpd", Iop_Div64Fx2 );
+ goto decode_success;
+ }
+
+ /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
+ if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5E) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "divsd", Iop_Div64F0x2 );
+ goto decode_success;
+ }
+
+ /* 0F AE /5 = LFENCE -- flush pending operations to memory */
+ /* 0F AE /6 = MFENCE -- flush pending operations to memory */
+ if (insn[0] == 0x0F && insn[1] == 0xAE
+ && epartIsReg(insn[2])
+ && (gregOfRM(insn[2]) == 5 || gregOfRM(insn[2]) == 6)) {
+ vassert(sz == 4);
+ delta += 3;
+ /* Insert a memory fence. It's sometimes important that these
+ are carried through to the generated code. */
+ stmt( IRStmt_MBE(Imbe_Fence) );
+ DIP("%sfence\n", gregOfRM(insn[2])==5 ? "l" : "m");
+ goto decode_success;
+ }
+
+ /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5F) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxpd", Iop_Max64Fx2 );
+ goto decode_success;
+ }
+
+ /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
+ if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5F) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "maxsd", Iop_Max64F0x2 );
+ goto decode_success;
+ }
+
+ /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5D) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "minpd", Iop_Min64Fx2 );
+ goto decode_success;
+ }
+
+ /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
+ if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5D) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "minsd", Iop_Min64F0x2 );
+ goto decode_success;
+ }
+
+ /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
+ /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
+ /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
+ if (sz == 2 && insn[0] == 0x0F
+ && (insn[1] == 0x28 || insn[1] == 0x10 || insn[1] == 0x6F)) {
+ HChar* wot = insn[1]==0x28 ? "apd" :
+ insn[1]==0x10 ? "upd" : "dqa";
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ putXMMReg( gregOfRM(modrm),
+ getXMMReg( eregOfRM(modrm) ));
+ DIP("mov%s %s,%s\n", wot, nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 2+1;
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ if (insn[1] == 0x28/*movapd*/ || insn[1] == 0x6F/*movdqa*/)
+ gen_SEGV_if_not_16_aligned( addr );
+ putXMMReg( gregOfRM(modrm),
+ loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("mov%s %s,%s\n", wot, dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 2+alen;
+ }
+ goto decode_success;
+ }
+
+ /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
+ /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
+ if (sz == 2 && insn[0] == 0x0F
+ && (insn[1] == 0x29 || insn[1] == 0x11)) {
+ HChar* wot = insn[1]==0x29 ? "apd" : "upd";
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ /* fall through; awaiting test case */
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ if (insn[1] == 0x29/*movapd*/)
+ gen_SEGV_if_not_16_aligned( addr );
+ storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
+ DIP("mov%s %s,%s\n", wot, nameXMMReg(gregOfRM(modrm)),
+ dis_buf );
+ delta += 2+alen;
+ goto decode_success;
+ }
+ }
+
+ /* 66 0F 6E = MOVD from r/m32 to xmm, zeroing high 3/4 of xmm. */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6E) {
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ delta += 2+1;
+ putXMMReg(
+ gregOfRM(modrm),
+ unop( Iop_32UtoV128, getIReg(4, eregOfRM(modrm)) )
+ );
+ DIP("movd %s, %s\n",
+ nameIReg(4,eregOfRM(modrm)), nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode( &alen, sorb, delta+2, dis_buf );
+ delta += 2+alen;
+ putXMMReg(
+ gregOfRM(modrm),
+ unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) )
+ );
+ DIP("movd %s, %s\n", dis_buf, nameXMMReg(gregOfRM(modrm)));
+ }
+ goto decode_success;
+ }
+
+ /* 66 0F 7E = MOVD from xmm low 1/4 to r/m32. */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7E) {
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ delta += 2+1;
+ putIReg( 4, eregOfRM(modrm),
+ getXMMRegLane32(gregOfRM(modrm), 0) );
+ DIP("movd %s, %s\n",
+ nameXMMReg(gregOfRM(modrm)), nameIReg(4,eregOfRM(modrm)));
+ } else {
+ addr = disAMode( &alen, sorb, delta+2, dis_buf );
+ delta += 2+alen;
+ storeLE( mkexpr(addr),
+ getXMMRegLane32(gregOfRM(modrm), 0) );
+ DIP("movd %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf);
+ }
+ goto decode_success;
+ }
+
+ /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x7F) {
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ delta += 2+1;
+ putXMMReg( eregOfRM(modrm),
+ getXMMReg(gregOfRM(modrm)) );
+ DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)),
+ nameXMMReg(eregOfRM(modrm)));
+ } else {
+ addr = disAMode( &alen, sorb, delta+2, dis_buf );
+ delta += 2+alen;
+ gen_SEGV_if_not_16_aligned( addr );
+ storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
+ DIP("movdqa %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf);
+ }
+ goto decode_success;
+ }
+
+ /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
+ /* Unfortunately can't simply use the MOVDQA case since the
+ prefix lengths are different (66 vs F3) */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x6F) {
+ vassert(sz == 4);
+ modrm = getIByte(delta+3);
+ if (epartIsReg(modrm)) {
+ putXMMReg( gregOfRM(modrm),
+ getXMMReg( eregOfRM(modrm) ));
+ DIP("movdqu %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+1;
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ putXMMReg( gregOfRM(modrm),
+ loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("movdqu %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+alen;
+ }
+ goto decode_success;
+ }
+
+ /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
+ /* Unfortunately can't simply use the MOVDQA case since the
+ prefix lengths are different (66 vs F3) */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7F) {
+ vassert(sz == 4);
+ modrm = getIByte(delta+3);
+ if (epartIsReg(modrm)) {
+ delta += 3+1;
+ putXMMReg( eregOfRM(modrm),
+ getXMMReg(gregOfRM(modrm)) );
+ DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)),
+ nameXMMReg(eregOfRM(modrm)));
+ } else {
+ addr = disAMode( &alen, sorb, delta+3, dis_buf );
+ delta += 3+alen;
+ storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
+ DIP("movdqu %s, %s\n", nameXMMReg(gregOfRM(modrm)), dis_buf);
+ }
+ goto decode_success;
+ }
+
+ /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
+ if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD6) {
+ vassert(sz == 4);
+ modrm = getIByte(delta+3);
+ if (epartIsReg(modrm)) {
+ do_MMX_preamble();
+ putMMXReg( gregOfRM(modrm),
+ getXMMRegLane64( eregOfRM(modrm), 0 ));
+ DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
+ delta += 3+1;
+ goto decode_success;
+ } else {
+ /* fall through, apparently no mem case for this insn */
+ }
+ }
+
+ /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
+ /* These seems identical to MOVHPS. This instruction encoding is
+ completely crazy. */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x16) {
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ /* fall through; apparently reg-reg is not possible */
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ delta += 2+alen;
+ putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
+ loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("movhpd %s,%s\n", dis_buf,
+ nameXMMReg( gregOfRM(modrm) ));
+ goto decode_success;
+ }
+ }
+
+ /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
+ /* Again, this seems identical to MOVHPS. */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x17) {
+ if (!epartIsReg(insn[2])) {
+ delta += 2;
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ delta += alen;
+ storeLE( mkexpr(addr),
+ getXMMRegLane64( gregOfRM(insn[2]),
+ 1/*upper lane*/ ) );
+ DIP("movhpd %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ),
+ dis_buf);
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
+ /* Identical to MOVLPS ? */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x12) {
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ /* fall through; apparently reg-reg is not possible */
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ delta += 2+alen;
+ putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/,
+ loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("movlpd %s, %s\n",
+ dis_buf, nameXMMReg( gregOfRM(modrm) ));
+ goto decode_success;
+ }
+ }
+
+ /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
+ /* Identical to MOVLPS ? */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x13) {
+ if (!epartIsReg(insn[2])) {
+ delta += 2;
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ delta += alen;
+ storeLE( mkexpr(addr),
+ getXMMRegLane64( gregOfRM(insn[2]),
+ 0/*lower lane*/ ) );
+ DIP("movlpd %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ),
+ dis_buf);
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
+ 2 lowest bits of ireg(G) */
+ if (insn[0] == 0x0F && insn[1] == 0x50) {
+ modrm = getIByte(delta+2);
+ if (sz == 2 && epartIsReg(modrm)) {
+ Int src;
+ t0 = newTemp(Ity_I32);
+ t1 = newTemp(Ity_I32);
+ delta += 2+1;
+ src = eregOfRM(modrm);
+ assign( t0, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(31)),
+ mkU32(1) ));
+ assign( t1, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(30)),
+ mkU32(2) ));
+ putIReg(4, gregOfRM(modrm),
+ binop(Iop_Or32, mkexpr(t0), mkexpr(t1))
+ );
+ DIP("movmskpd %s,%s\n", nameXMMReg(src),
+ nameIReg(4, gregOfRM(modrm)));
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
+ if (insn[0] == 0x0F && insn[1] == 0xF7) {
+ modrm = getIByte(delta+2);
+ if (sz == 2 && epartIsReg(modrm)) {
+ IRTemp regD = newTemp(Ity_V128);
+ IRTemp mask = newTemp(Ity_V128);
+ IRTemp olddata = newTemp(Ity_V128);
+ IRTemp newdata = newTemp(Ity_V128);
+ addr = newTemp(Ity_I32);
+
+ assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) ));
+ assign( regD, getXMMReg( gregOfRM(modrm) ));
+
+ /* Unfortunately can't do the obvious thing with SarN8x16
+ here since that can't be re-emitted as SSE2 code - no such
+ insn. */
+ assign(
+ mask,
+ binop(Iop_64HLtoV128,
+ binop(Iop_SarN8x8,
+ getXMMRegLane64( eregOfRM(modrm), 1 ),
+ mkU8(7) ),
+ binop(Iop_SarN8x8,
+ getXMMRegLane64( eregOfRM(modrm), 0 ),
+ mkU8(7) ) ));
+ assign( olddata, loadLE( Ity_V128, mkexpr(addr) ));
+ assign( newdata,
+ binop(Iop_OrV128,
+ binop(Iop_AndV128,
+ mkexpr(regD),
+ mkexpr(mask) ),
+ binop(Iop_AndV128,
+ mkexpr(olddata),
+ unop(Iop_NotV128, mkexpr(mask)))) );
+ storeLE( mkexpr(addr), mkexpr(newdata) );
+
+ delta += 2+1;
+ DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRM(modrm) ),
+ nameXMMReg( gregOfRM(modrm) ) );
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
+ if (insn[0] == 0x0F && insn[1] == 0xE7) {
+ modrm = getIByte(delta+2);
+ if (sz == 2 && !epartIsReg(modrm)) {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ gen_SEGV_if_not_16_aligned( addr );
+ storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
+ DIP("movntdq %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 2+alen;
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
+ if (insn[0] == 0x0F && insn[1] == 0xC3) {
+ vassert(sz == 4);
+ modrm = getIByte(delta+2);
+ if (!epartIsReg(modrm)) {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ storeLE( mkexpr(addr), getIReg(4, gregOfRM(modrm)) );
+ DIP("movnti %s,%s\n", dis_buf,
+ nameIReg(4, gregOfRM(modrm)));
+ delta += 2+alen;
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
+ or lo half xmm). */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD6) {
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ /* fall through, awaiting test case */
+ /* dst: lo half copied, hi half zeroed */
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ storeLE( mkexpr(addr),
+ getXMMRegLane64( gregOfRM(modrm), 0 ));
+ DIP("movq %s,%s\n", nameXMMReg(gregOfRM(modrm)), dis_buf );
+ delta += 2+alen;
+ goto decode_success;
+ }
+ }
+
+ /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
+ hi half). */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xD6) {
+ vassert(sz == 4);
+ modrm = getIByte(delta+3);
+ if (epartIsReg(modrm)) {
+ do_MMX_preamble();
+ putXMMReg( gregOfRM(modrm),
+ unop(Iop_64UtoV128, getMMXReg( eregOfRM(modrm) )) );
+ DIP("movq2dq %s,%s\n", nameMMXReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+1;
+ goto decode_success;
+ } else {
+ /* fall through, apparently no mem case for this insn */
+ }
+ }
+
+ /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
+ G (lo half xmm). Upper half of G is zeroed out. */
+ /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
+ G (lo half xmm). If E is mem, upper half of G is zeroed out.
+ If E is reg, upper half of G is unchanged. */
+ if ((insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x10)
+ || (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x7E)) {
+ vassert(sz == 4);
+ modrm = getIByte(delta+3);
+ if (epartIsReg(modrm)) {
+ putXMMRegLane64( gregOfRM(modrm), 0,
+ getXMMRegLane64( eregOfRM(modrm), 0 ));
+ if (insn[0] == 0xF3/*MOVQ*/) {
+ /* zero bits 127:64 */
+ putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
+ }
+ DIP("movsd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+1;
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ /* zero bits 127:64 */
+ putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
+ /* write bits 63:0 */
+ putXMMRegLane64( gregOfRM(modrm), 0,
+ loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("movsd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+alen;
+ }
+ goto decode_success;
+ }
+
+ /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
+ or lo half xmm). */
+ if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x11) {
+ vassert(sz == 4);
+ modrm = getIByte(delta+3);
+ if (epartIsReg(modrm)) {
+ putXMMRegLane64( eregOfRM(modrm), 0,
+ getXMMRegLane64( gregOfRM(modrm), 0 ));
+ DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)),
+ nameXMMReg(eregOfRM(modrm)));
+ delta += 3+1;
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ storeLE( mkexpr(addr),
+ getXMMRegLane64(gregOfRM(modrm), 0) );
+ DIP("movsd %s,%s\n", nameXMMReg(gregOfRM(modrm)),
+ dis_buf);
+ delta += 3+alen;
+ }
+ goto decode_success;
+ }
+
+ /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x59) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulpd", Iop_Mul64Fx2 );
+ goto decode_success;
+ }
+
+ /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
+ if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x59) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "mulsd", Iop_Mul64F0x2 );
+ goto decode_success;
+ }
+
+ /* 66 0F 56 = ORPD -- G = G and E */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x56) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "orpd", Iop_OrV128 );
+ goto decode_success;
+ }
+
+ /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC6) {
+ Int select;
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp s1 = newTemp(Ity_I64);
+ IRTemp s0 = newTemp(Ity_I64);
+ IRTemp d1 = newTemp(Ity_I64);
+ IRTemp d0 = newTemp(Ity_I64);
+
+ modrm = insn[2];
+ assign( dV, getXMMReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRM(modrm)) );
+ select = (Int)insn[3];
+ delta += 2+2;
+ DIP("shufpd $%d,%s,%s\n", select,
+ nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ select = (Int)insn[2+alen];
+ delta += 3+alen;
+ DIP("shufpd $%d,%s,%s\n", select,
+ dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+
+ assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
+
+# define SELD(n) mkexpr((n)==0 ? d0 : d1)
+# define SELS(n) mkexpr((n)==0 ? s0 : s1)
+
+ putXMMReg(
+ gregOfRM(modrm),
+ binop(Iop_64HLtoV128, SELS((select>>1)&1), SELD((select>>0)&1) )
+ );
+
+# undef SELD
+# undef SELS
+
+ goto decode_success;
+ }
+
+ /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x51) {
+ delta = dis_SSE_E_to_G_unary_all( sorb, delta+2,
+ "sqrtpd", Iop_Sqrt64Fx2 );
+ goto decode_success;
+ }
+
+ /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
+ if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x51) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_unary_lo64( sorb, delta+3,
+ "sqrtsd", Iop_Sqrt64F0x2 );
+ goto decode_success;
+ }
+
+ /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5C) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "subpd", Iop_Sub64Fx2 );
+ goto decode_success;
+ }
+
+ /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
+ if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5C) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "subsd", Iop_Sub64F0x2 );
+ goto decode_success;
+ }
+
+ /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
+ /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
+ /* These just appear to be special cases of SHUFPS */
+ if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
+ IRTemp s1 = newTemp(Ity_I64);
+ IRTemp s0 = newTemp(Ity_I64);
+ IRTemp d1 = newTemp(Ity_I64);
+ IRTemp d0 = newTemp(Ity_I64);
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ Bool hi = toBool(insn[1] == 0x15);
+
+ modrm = insn[2];
+ assign( dV, getXMMReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRM(modrm)) );
+ delta += 2+1;
+ DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
+ nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
+ dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+
+ assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
+
+ if (hi) {
+ putXMMReg( gregOfRM(modrm),
+ binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) );
+ } else {
+ putXMMReg( gregOfRM(modrm),
+ binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) );
+ }
+
+ goto decode_success;
+ }
+
+ /* 66 0F 57 = XORPD -- G = G and E */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x57) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "xorpd", Iop_XorV128 );
+ goto decode_success;
+ }
+
+ /* 66 0F 6B = PACKSSDW */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6B) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "packssdw", Iop_QNarrow32Sx4, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 63 = PACKSSWB */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x63) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "packsswb", Iop_QNarrow16Sx8, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 67 = PACKUSWB */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x67) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "packuswb", Iop_QNarrow16Ux8, True );
+ goto decode_success;
+ }
+
+ /* 66 0F FC = PADDB */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFC) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "paddb", Iop_Add8x16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F FE = PADDD */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFE) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "paddd", Iop_Add32x4, False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
+ /* 0F D4 = PADDQ -- add 64x1 */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD4) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ sorb, delta+2, insn[1], "paddq", False );
+ goto decode_success;
+ }
+
+ /* 66 0F D4 = PADDQ */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD4) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "paddq", Iop_Add64x2, False );
+ goto decode_success;
+ }
+
+ /* 66 0F FD = PADDW */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFD) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "paddw", Iop_Add16x8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F EC = PADDSB */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEC) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "paddsb", Iop_QAdd8Sx16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F ED = PADDSW */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xED) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "paddsw", Iop_QAdd16Sx8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F DC = PADDUSB */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDC) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "paddusb", Iop_QAdd8Ux16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F DD = PADDUSW */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDD) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "paddusw", Iop_QAdd16Ux8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F DB = PAND */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDB) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "pand", Iop_AndV128 );
+ goto decode_success;
+ }
+
+ /* 66 0F DF = PANDN */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDF) {
+ delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "pandn", Iop_AndV128 );
+ goto decode_success;
+ }
+
+ /* 66 0F E0 = PAVGB */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE0) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "pavgb", Iop_Avg8Ux16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F E3 = PAVGW */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE3) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "pavgw", Iop_Avg16Ux8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 74 = PCMPEQB */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x74) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "pcmpeqb", Iop_CmpEQ8x16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 76 = PCMPEQD */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x76) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "pcmpeqd", Iop_CmpEQ32x4, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 75 = PCMPEQW */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x75) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "pcmpeqw", Iop_CmpEQ16x8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 64 = PCMPGTB */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x64) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "pcmpgtb", Iop_CmpGT8Sx16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 66 = PCMPGTD */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x66) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "pcmpgtd", Iop_CmpGT32Sx4, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 65 = PCMPGTW */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x65) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "pcmpgtw", Iop_CmpGT16Sx8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
+ zero-extend of it in ireg(G). */
+ if (insn[0] == 0x0F && insn[1] == 0xC5) {
+ modrm = insn[2];
+ if (sz == 2 && epartIsReg(modrm)) {
+ t5 = newTemp(Ity_V128);
+ t4 = newTemp(Ity_I16);
+ assign(t5, getXMMReg(eregOfRM(modrm)));
+ breakup128to32s( t5, &t3, &t2, &t1, &t0 );
+ switch (insn[3] & 7) {
+ case 0: assign(t4, unop(Iop_32to16, mkexpr(t0))); break;
+ case 1: assign(t4, unop(Iop_32HIto16, mkexpr(t0))); break;
+ case 2: assign(t4, unop(Iop_32to16, mkexpr(t1))); break;
+ case 3: assign(t4, unop(Iop_32HIto16, mkexpr(t1))); break;
+ case 4: assign(t4, unop(Iop_32to16, mkexpr(t2))); break;
+ case 5: assign(t4, unop(Iop_32HIto16, mkexpr(t2))); break;
+ case 6: assign(t4, unop(Iop_32to16, mkexpr(t3))); break;
+ case 7: assign(t4, unop(Iop_32HIto16, mkexpr(t3))); break;
+ default: vassert(0); /*NOTREACHED*/
+ }
+ putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t4)));
+ DIP("pextrw $%d,%s,%s\n",
+ (Int)insn[3], nameXMMReg(eregOfRM(modrm)),
+ nameIReg(4,gregOfRM(modrm)));
+ delta += 4;
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
+ put it into the specified lane of xmm(G). */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC4) {
+ Int lane;
+ t4 = newTemp(Ity_I16);
+ modrm = insn[2];
+
+ if (epartIsReg(modrm)) {
+ assign(t4, getIReg(2, eregOfRM(modrm)));
+ delta += 3+1;
+ lane = insn[3+1-1];
+ DIP("pinsrw $%d,%s,%s\n", (Int)lane,
+ nameIReg(2,eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ delta += 3+alen;
+ lane = insn[3+alen-1];
+ assign(t4, loadLE(Ity_I16, mkexpr(addr)));
+ DIP("pinsrw $%d,%s,%s\n", (Int)lane,
+ dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+
+ putXMMRegLane16( gregOfRM(modrm), lane & 7, mkexpr(t4) );
+ goto decode_success;
+ }
+
+ /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
+ E(xmm or mem) to G(xmm) */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF5) {
+ IRTemp s1V = newTemp(Ity_V128);
+ IRTemp s2V = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp s1Hi = newTemp(Ity_I64);
+ IRTemp s1Lo = newTemp(Ity_I64);
+ IRTemp s2Hi = newTemp(Ity_I64);
+ IRTemp s2Lo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ assign( s1V, getXMMReg(eregOfRM(modrm)) );
+ delta += 2+1;
+ DIP("pmaddwd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("pmaddwd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+ assign( s2V, getXMMReg(gregOfRM(modrm)) );
+ assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
+ assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
+ assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
+ assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
+ assign( dHi, mkIRExprCCall(
+ Ity_I64, 0/*regparms*/,
+ "x86g_calculate_mmx_pmaddwd",
+ &x86g_calculate_mmx_pmaddwd,
+ mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
+ ));
+ assign( dLo, mkIRExprCCall(
+ Ity_I64, 0/*regparms*/,
+ "x86g_calculate_mmx_pmaddwd",
+ &x86g_calculate_mmx_pmaddwd,
+ mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
+ ));
+ assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
+ putXMMReg(gregOfRM(modrm), mkexpr(dV));
+ goto decode_success;
+ }
+
+ /* 66 0F EE = PMAXSW -- 16x8 signed max */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEE) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "pmaxsw", Iop_Max16Sx8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDE) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "pmaxub", Iop_Max8Ux16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F EA = PMINSW -- 16x8 signed min */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEA) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "pminsw", Iop_Min16Sx8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F DA = PMINUB -- 8x16 unsigned min */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xDA) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "pminub", Iop_Min8Ux16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16 lanes in
+ xmm(G), turn them into a byte, and put zero-extend of it in
+ ireg(G). Doing this directly is just too cumbersome; give up
+ therefore and call a helper. */
+ /* UInt x86g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo ); */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD7) {
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ t0 = newTemp(Ity_I64);
+ t1 = newTemp(Ity_I64);
+ assign(t0, getXMMRegLane64(eregOfRM(modrm), 0));
+ assign(t1, getXMMRegLane64(eregOfRM(modrm), 1));
+ t5 = newTemp(Ity_I32);
+ assign(t5, mkIRExprCCall(
+ Ity_I32, 0/*regparms*/,
+ "x86g_calculate_sse_pmovmskb",
+ &x86g_calculate_sse_pmovmskb,
+ mkIRExprVec_2( mkexpr(t1), mkexpr(t0) )));
+ putIReg(4, gregOfRM(modrm), mkexpr(t5));
+ DIP("pmovmskb %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameIReg(4,gregOfRM(modrm)));
+ delta += 3;
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE4) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "pmulhuw", Iop_MulHi16Ux8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE5) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "pmulhw", Iop_MulHi16Sx8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F D5 = PMULHL -- 16x8 multiply */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD5) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "pmullw", Iop_Mul16x8, False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
+ /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
+ 0 to form 64-bit result */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF4) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+ t1 = newTemp(Ity_I32);
+ t0 = newTemp(Ity_I32);
+ modrm = insn[2];
+
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregOfRM(modrm)) );
+ delta += 2+1;
+ DIP("pmuludq %s,%s\n", nameMMXReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("pmuludq %s,%s\n", dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+
+ assign( t0, unop(Iop_64to32, mkexpr(dV)) );
+ assign( t1, unop(Iop_64to32, mkexpr(sV)) );
+ putMMXReg( gregOfRM(modrm),
+ binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) );
+ goto decode_success;
+ }
+
+ /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
+ 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
+ half */
+ /* This is a really poor translation -- could be improved if
+ performance critical */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF4) {
+ IRTemp sV, dV;
+ IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
+ sV = newTemp(Ity_V128);
+ dV = newTemp(Ity_V128);
+ s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
+ t1 = newTemp(Ity_I64);
+ t0 = newTemp(Ity_I64);
+ modrm = insn[2];
+ assign( dV, getXMMReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRM(modrm)) );
+ delta += 2+1;
+ DIP("pmuludq %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("pmuludq %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+
+ breakup128to32s( dV, &d3, &d2, &d1, &d0 );
+ breakup128to32s( sV, &s3, &s2, &s1, &s0 );
+
+ assign( t0, binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) );
+ putXMMRegLane64( gregOfRM(modrm), 0, mkexpr(t0) );
+ assign( t1, binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)) );
+ putXMMRegLane64( gregOfRM(modrm), 1, mkexpr(t1) );
+ goto decode_success;
+ }
+
+ /* 66 0F EB = POR */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEB) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "por", Iop_OrV128 );
+ goto decode_success;
+ }
+
+ /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
+ from E(xmm or mem) to G(xmm) */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF6) {
+ IRTemp s1V = newTemp(Ity_V128);
+ IRTemp s2V = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp s1Hi = newTemp(Ity_I64);
+ IRTemp s1Lo = newTemp(Ity_I64);
+ IRTemp s2Hi = newTemp(Ity_I64);
+ IRTemp s2Lo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ assign( s1V, getXMMReg(eregOfRM(modrm)) );
+ delta += 2+1;
+ DIP("psadbw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( s1V, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("psadbw %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+ assign( s2V, getXMMReg(gregOfRM(modrm)) );
+ assign( s1Hi, unop(Iop_V128HIto64, mkexpr(s1V)) );
+ assign( s1Lo, unop(Iop_V128to64, mkexpr(s1V)) );
+ assign( s2Hi, unop(Iop_V128HIto64, mkexpr(s2V)) );
+ assign( s2Lo, unop(Iop_V128to64, mkexpr(s2V)) );
+ assign( dHi, mkIRExprCCall(
+ Ity_I64, 0/*regparms*/,
+ "x86g_calculate_mmx_psadbw",
+ &x86g_calculate_mmx_psadbw,
+ mkIRExprVec_2( mkexpr(s1Hi), mkexpr(s2Hi))
+ ));
+ assign( dLo, mkIRExprCCall(
+ Ity_I64, 0/*regparms*/,
+ "x86g_calculate_mmx_psadbw",
+ &x86g_calculate_mmx_psadbw,
+ mkIRExprVec_2( mkexpr(s1Lo), mkexpr(s2Lo))
+ ));
+ assign( dV, binop(Iop_64HLtoV128, mkexpr(dHi), mkexpr(dLo))) ;
+ putXMMReg(gregOfRM(modrm), mkexpr(dV));
+ goto decode_success;
+ }
+
+ /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x70) {
+ Int order;
+ IRTemp sV, dV, s3, s2, s1, s0;
+ s3 = s2 = s1 = s0 = IRTemp_INVALID;
+ sV = newTemp(Ity_V128);
+ dV = newTemp(Ity_V128);
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRM(modrm)) );
+ order = (Int)insn[3];
+ delta += 2+2;
+ DIP("pshufd $%d,%s,%s\n", order,
+ nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ order = (Int)insn[2+alen];
+ delta += 3+alen;
+ DIP("pshufd $%d,%s,%s\n", order,
+ dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+ breakup128to32s( sV, &s3, &s2, &s1, &s0 );
+
+# define SEL(n) \
+ ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
+ assign(dV,
+ mk128from32s( SEL((order>>6)&3), SEL((order>>4)&3),
+ SEL((order>>2)&3), SEL((order>>0)&3) )
+ );
+ putXMMReg(gregOfRM(modrm), mkexpr(dV));
+# undef SEL
+ goto decode_success;
+ }
+
+ /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
+ mem) to G(xmm), and copy lower half */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x70) {
+ Int order;
+ IRTemp sVhi, dVhi, sV, dV, s3, s2, s1, s0;
+ s3 = s2 = s1 = s0 = IRTemp_INVALID;
+ sV = newTemp(Ity_V128);
+ dV = newTemp(Ity_V128);
+ sVhi = newTemp(Ity_I64);
+ dVhi = newTemp(Ity_I64);
+ modrm = insn[3];
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRM(modrm)) );
+ order = (Int)insn[4];
+ delta += 4+1;
+ DIP("pshufhw $%d,%s,%s\n", order,
+ nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ order = (Int)insn[3+alen];
+ delta += 4+alen;
+ DIP("pshufhw $%d,%s,%s\n", order,
+ dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+ assign( sVhi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ breakup64to16s( sVhi, &s3, &s2, &s1, &s0 );
+
+# define SEL(n) \
+ ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
+ assign(dVhi,
+ mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
+ SEL((order>>2)&3), SEL((order>>0)&3) )
+ );
+ assign(dV, binop( Iop_64HLtoV128,
+ mkexpr(dVhi),
+ unop(Iop_V128to64, mkexpr(sV))) );
+ putXMMReg(gregOfRM(modrm), mkexpr(dV));
+# undef SEL
+ goto decode_success;
+ }
+
+ /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
+ mem) to G(xmm), and copy upper half */
+ if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x70) {
+ Int order;
+ IRTemp sVlo, dVlo, sV, dV, s3, s2, s1, s0;
+ s3 = s2 = s1 = s0 = IRTemp_INVALID;
+ sV = newTemp(Ity_V128);
+ dV = newTemp(Ity_V128);
+ sVlo = newTemp(Ity_I64);
+ dVlo = newTemp(Ity_I64);
+ modrm = insn[3];
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRM(modrm)) );
+ order = (Int)insn[4];
+ delta += 4+1;
+ DIP("pshuflw $%d,%s,%s\n", order,
+ nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ order = (Int)insn[3+alen];
+ delta += 4+alen;
+ DIP("pshuflw $%d,%s,%s\n", order,
+ dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+ assign( sVlo, unop(Iop_V128to64, mkexpr(sV)) );
+ breakup64to16s( sVlo, &s3, &s2, &s1, &s0 );
+
+# define SEL(n) \
+ ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
+ assign(dVlo,
+ mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
+ SEL((order>>2)&3), SEL((order>>0)&3) )
+ );
+ assign(dV, binop( Iop_64HLtoV128,
+ unop(Iop_V128HIto64, mkexpr(sV)),
+ mkexpr(dVlo) ) );
+ putXMMReg(gregOfRM(modrm), mkexpr(dV));
+# undef SEL
+ goto decode_success;
+ }
+
+ /* 66 0F 72 /6 ib = PSLLD by immediate */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72
+ && epartIsReg(insn[2])
+ && gregOfRM(insn[2]) == 6) {
+ delta = dis_SSE_shiftE_imm( delta+2, "pslld", Iop_ShlN32x4 );
+ goto decode_success;
+ }
+
+ /* 66 0F F2 = PSLLD by E */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF2) {
+ delta = dis_SSE_shiftG_byE( sorb, delta+2, "pslld", Iop_ShlN32x4 );
+ goto decode_success;
+ }
+
+ /* 66 0F 73 /7 ib = PSLLDQ by immediate */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
+ && epartIsReg(insn[2])
+ && gregOfRM(insn[2]) == 7) {
+ IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
+ Int imm = (Int)insn[3];
+ Int reg = eregOfRM(insn[2]);
+ DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg));
+ vassert(imm >= 0 && imm <= 255);
+ delta += 4;
+
+ sV = newTemp(Ity_V128);
+ dV = newTemp(Ity_V128);
+ hi64 = newTemp(Ity_I64);
+ lo64 = newTemp(Ity_I64);
+ hi64r = newTemp(Ity_I64);
+ lo64r = newTemp(Ity_I64);
+
+ if (imm >= 16) {
+ putXMMReg(reg, mkV128(0x0000));
+ goto decode_success;
+ }
+
+ assign( sV, getXMMReg(reg) );
+ assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
+
+ if (imm == 0) {
+ assign( lo64r, mkexpr(lo64) );
+ assign( hi64r, mkexpr(hi64) );
+ }
+ else
+ if (imm == 8) {
+ assign( lo64r, mkU64(0) );
+ assign( hi64r, mkexpr(lo64) );
+ }
+ else
+ if (imm > 8) {
+ assign( lo64r, mkU64(0) );
+ assign( hi64r, binop( Iop_Shl64,
+ mkexpr(lo64),
+ mkU8( 8*(imm-8) ) ));
+ } else {
+ assign( lo64r, binop( Iop_Shl64,
+ mkexpr(lo64),
+ mkU8(8 * imm) ));
+ assign( hi64r,
+ binop( Iop_Or64,
+ binop(Iop_Shl64, mkexpr(hi64),
+ mkU8(8 * imm)),
+ binop(Iop_Shr64, mkexpr(lo64),
+ mkU8(8 * (8 - imm)) )
+ )
+ );
+ }
+ assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
+ putXMMReg(reg, mkexpr(dV));
+ goto decode_success;
+ }
+
+ /* 66 0F 73 /6 ib = PSLLQ by immediate */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
+ && epartIsReg(insn[2])
+ && gregOfRM(insn[2]) == 6) {
+ delta = dis_SSE_shiftE_imm( delta+2, "psllq", Iop_ShlN64x2 );
+ goto decode_success;
+ }
+
+ /* 66 0F F3 = PSLLQ by E */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF3) {
+ delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllq", Iop_ShlN64x2 );
+ goto decode_success;
+ }
+
+ /* 66 0F 71 /6 ib = PSLLW by immediate */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71
+ && epartIsReg(insn[2])
+ && gregOfRM(insn[2]) == 6) {
+ delta = dis_SSE_shiftE_imm( delta+2, "psllw", Iop_ShlN16x8 );
+ goto decode_success;
+ }
+
+ /* 66 0F F1 = PSLLW by E */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF1) {
+ delta = dis_SSE_shiftG_byE( sorb, delta+2, "psllw", Iop_ShlN16x8 );
+ goto decode_success;
+ }
+
+ /* 66 0F 72 /4 ib = PSRAD by immediate */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72
+ && epartIsReg(insn[2])
+ && gregOfRM(insn[2]) == 4) {
+ delta = dis_SSE_shiftE_imm( delta+2, "psrad", Iop_SarN32x4 );
+ goto decode_success;
+ }
+
+ /* 66 0F E2 = PSRAD by E */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE2) {
+ delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrad", Iop_SarN32x4 );
+ goto decode_success;
+ }
+
+ /* 66 0F 71 /4 ib = PSRAW by immediate */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71
+ && epartIsReg(insn[2])
+ && gregOfRM(insn[2]) == 4) {
+ delta = dis_SSE_shiftE_imm( delta+2, "psraw", Iop_SarN16x8 );
+ goto decode_success;
+ }
+
+ /* 66 0F E1 = PSRAW by E */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE1) {
+ delta = dis_SSE_shiftG_byE( sorb, delta+2, "psraw", Iop_SarN16x8 );
+ goto decode_success;
+ }
+
+ /* 66 0F 72 /2 ib = PSRLD by immediate */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x72
+ && epartIsReg(insn[2])
+ && gregOfRM(insn[2]) == 2) {
+ delta = dis_SSE_shiftE_imm( delta+2, "psrld", Iop_ShrN32x4 );
+ goto decode_success;
+ }
+
+ /* 66 0F D2 = PSRLD by E */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD2) {
+ delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrld", Iop_ShrN32x4 );
+ goto decode_success;
+ }
+
+ /* 66 0F 73 /3 ib = PSRLDQ by immediate */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
+ && epartIsReg(insn[2])
+ && gregOfRM(insn[2]) == 3) {
+ IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
+ Int imm = (Int)insn[3];
+ Int reg = eregOfRM(insn[2]);
+ DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg));
+ vassert(imm >= 0 && imm <= 255);
+ delta += 4;
+
+ sV = newTemp(Ity_V128);
+ dV = newTemp(Ity_V128);
+ hi64 = newTemp(Ity_I64);
+ lo64 = newTemp(Ity_I64);
+ hi64r = newTemp(Ity_I64);
+ lo64r = newTemp(Ity_I64);
+
+ if (imm >= 16) {
+ putXMMReg(reg, mkV128(0x0000));
+ goto decode_success;
+ }
+
+ assign( sV, getXMMReg(reg) );
+ assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
+
+ if (imm == 0) {
+ assign( lo64r, mkexpr(lo64) );
+ assign( hi64r, mkexpr(hi64) );
+ }
+ else
+ if (imm == 8) {
+ assign( hi64r, mkU64(0) );
+ assign( lo64r, mkexpr(hi64) );
+ }
+ else
+ if (imm > 8) {
+ assign( hi64r, mkU64(0) );
+ assign( lo64r, binop( Iop_Shr64,
+ mkexpr(hi64),
+ mkU8( 8*(imm-8) ) ));
+ } else {
+ assign( hi64r, binop( Iop_Shr64,
+ mkexpr(hi64),
+ mkU8(8 * imm) ));
+ assign( lo64r,
+ binop( Iop_Or64,
+ binop(Iop_Shr64, mkexpr(lo64),
+ mkU8(8 * imm)),
+ binop(Iop_Shl64, mkexpr(hi64),
+ mkU8(8 * (8 - imm)) )
+ )
+ );
+ }
+
+ assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
+ putXMMReg(reg, mkexpr(dV));
+ goto decode_success;
+ }
+
+ /* 66 0F 73 /2 ib = PSRLQ by immediate */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x73
+ && epartIsReg(insn[2])
+ && gregOfRM(insn[2]) == 2) {
+ delta = dis_SSE_shiftE_imm( delta+2, "psrlq", Iop_ShrN64x2 );
+ goto decode_success;
+ }
+
+ /* 66 0F D3 = PSRLQ by E */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD3) {
+ delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlq", Iop_ShrN64x2 );
+ goto decode_success;
+ }
+
+ /* 66 0F 71 /2 ib = PSRLW by immediate */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x71
+ && epartIsReg(insn[2])
+ && gregOfRM(insn[2]) == 2) {
+ delta = dis_SSE_shiftE_imm( delta+2, "psrlw", Iop_ShrN16x8 );
+ goto decode_success;
+ }
+
+ /* 66 0F D1 = PSRLW by E */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD1) {
+ delta = dis_SSE_shiftG_byE( sorb, delta+2, "psrlw", Iop_ShrN16x8 );
+ goto decode_success;
+ }
+
+ /* 66 0F F8 = PSUBB */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF8) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "psubb", Iop_Sub8x16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F FA = PSUBD */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFA) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "psubd", Iop_Sub32x4, False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
+ /* 0F FB = PSUBQ -- sub 64x1 */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xFB) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ sorb, delta+2, insn[1], "psubq", False );
+ goto decode_success;
+ }
+
+ /* 66 0F FB = PSUBQ */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xFB) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "psubq", Iop_Sub64x2, False );
+ goto decode_success;
+ }
+
+ /* 66 0F F9 = PSUBW */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xF9) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "psubw", Iop_Sub16x8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F E8 = PSUBSB */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE8) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "psubsb", Iop_QSub8Sx16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F E9 = PSUBSW */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xE9) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "psubsw", Iop_QSub16Sx8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F D8 = PSUBSB */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD8) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "psubusb", Iop_QSub8Ux16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F D9 = PSUBSW */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD9) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "psubusw", Iop_QSub16Ux8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 68 = PUNPCKHBW */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x68) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "punpckhbw",
+ Iop_InterleaveHI8x16, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 6A = PUNPCKHDQ */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6A) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "punpckhdq",
+ Iop_InterleaveHI32x4, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 6D = PUNPCKHQDQ */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6D) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "punpckhqdq",
+ Iop_InterleaveHI64x2, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 69 = PUNPCKHWD */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x69) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "punpckhwd",
+ Iop_InterleaveHI16x8, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 60 = PUNPCKLBW */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x60) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "punpcklbw",
+ Iop_InterleaveLO8x16, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 62 = PUNPCKLDQ */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x62) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "punpckldq",
+ Iop_InterleaveLO32x4, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 6C = PUNPCKLQDQ */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6C) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "punpcklqdq",
+ Iop_InterleaveLO64x2, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 61 = PUNPCKLWD */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x61) {
+ delta = dis_SSEint_E_to_G( sorb, delta+2,
+ "punpcklwd",
+ Iop_InterleaveLO16x8, True );
+ goto decode_success;
+ }
+
+ /* 66 0F EF = PXOR */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xEF) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "pxor", Iop_XorV128 );
+ goto decode_success;
+ }
+
+//-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */
+//-- if (insn[0] == 0x0F && insn[1] == 0xAE
+//-- && (!epartIsReg(insn[2]))
+//-- && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) {
+//-- Bool store = gregOfRM(insn[2]) == 0;
+//-- vg_assert(sz == 4);
+//-- pair = disAMode ( cb, sorb, eip+2, dis_buf );
+//-- t1 = LOW24(pair);
+//-- eip += 2+HI8(pair);
+//-- uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512,
+//-- Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1],
+//-- Lit16, (UShort)insn[2],
+//-- TempReg, t1 );
+//-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf );
+//-- goto decode_success;
+//-- }
+
+ /* 0F AE /7 = CLFLUSH -- flush cache line */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE
+ && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) {
+
+ /* This is something of a hack. We need to know the size of the
+ cache line containing addr. Since we don't (easily), assume
+ 256 on the basis that no real cache would have a line that
+ big. It's safe to invalidate more stuff than we need, just
+ inefficient. */
+ UInt lineszB = 256;
+
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ delta += 2+alen;
+
+ /* Round addr down to the start of the containing block. */
+ stmt( IRStmt_Put(
+ OFFB_TISTART,
+ binop( Iop_And32,
+ mkexpr(addr),
+ mkU32( ~(lineszB-1) ))) );
+
+ stmt( IRStmt_Put(OFFB_TILEN, mkU32(lineszB) ) );
+
+ irsb->jumpkind = Ijk_TInval;
+ irsb->next = mkU32(guest_EIP_bbstart+delta);
+ dres.whatNext = Dis_StopHere;
+
+ DIP("clflush %s\n", dis_buf);
+ goto decode_success;
+ }
+
+ /* ---------------------------------------------------- */
+ /* --- end of the SSE2 decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* ---------------------------------------------------- */
+ /* --- start of the SSE3 decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* Skip parts of the decoder which don't apply given the stated
+ guest subarchitecture. */
+ /* if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE3)) */
+ /* In fact this is highly bogus; we accept SSE3 insns even on a
+ SSE2-only guest since they turn into IR which can be re-emitted
+ successfully on an SSE2 host. */
+ if (0 == (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2))
+ goto after_sse_decoders; /* no SSE3 capabilities */
+
+ insn = (UChar*)&guest_code[delta];
+
+ /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
+ duplicating some lanes (2:2:0:0). */
+ /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
+ duplicating some lanes (3:3:1:1). */
+ if (sz == 4 && insn[0] == 0xF3 && insn[1] == 0x0F
+ && (insn[2] == 0x12 || insn[2] == 0x16)) {
+ IRTemp s3, s2, s1, s0;
+ IRTemp sV = newTemp(Ity_V128);
+ Bool isH = insn[2] == 0x16;
+ s3 = s2 = s1 = s0 = IRTemp_INVALID;
+
+ modrm = insn[3];
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg( eregOfRM(modrm)) );
+ DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
+ nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+1;
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
+ dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+alen;
+ }
+
+ breakup128to32s( sV, &s3, &s2, &s1, &s0 );
+ putXMMReg( gregOfRM(modrm),
+ isH ? mk128from32s( s3, s3, s1, s1 )
+ : mk128from32s( s2, s2, s0, s0 ) );
+ goto decode_success;
+ }
+
+ /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
+ duplicating some lanes (0:1:0:1). */
+ if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x12) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp d0 = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg( eregOfRM(modrm)) );
+ DIP("movddup %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+1;
+ assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("movddup %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+alen;
+ }
+
+ putXMMReg( gregOfRM(modrm), binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) );
+ goto decode_success;
+ }
+
+ /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
+ if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xD0) {
+ IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
+ IRTemp eV = newTemp(Ity_V128);
+ IRTemp gV = newTemp(Ity_V128);
+ IRTemp addV = newTemp(Ity_V128);
+ IRTemp subV = newTemp(Ity_V128);
+ a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
+
+ modrm = insn[3];
+ if (epartIsReg(modrm)) {
+ assign( eV, getXMMReg( eregOfRM(modrm)) );
+ DIP("addsubps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+1;
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("addsubps %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+alen;
+ }
+
+ assign( gV, getXMMReg(gregOfRM(modrm)) );
+
+ assign( addV, binop(Iop_Add32Fx4, mkexpr(gV), mkexpr(eV)) );
+ assign( subV, binop(Iop_Sub32Fx4, mkexpr(gV), mkexpr(eV)) );
+
+ breakup128to32s( addV, &a3, &a2, &a1, &a0 );
+ breakup128to32s( subV, &s3, &s2, &s1, &s0 );
+
+ putXMMReg( gregOfRM(modrm), mk128from32s( a3, s2, a1, s0 ));
+ goto decode_success;
+ }
+
+ /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xD0) {
+ IRTemp eV = newTemp(Ity_V128);
+ IRTemp gV = newTemp(Ity_V128);
+ IRTemp addV = newTemp(Ity_V128);
+ IRTemp subV = newTemp(Ity_V128);
+ IRTemp a1 = newTemp(Ity_I64);
+ IRTemp s0 = newTemp(Ity_I64);
+
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ assign( eV, getXMMReg( eregOfRM(modrm)) );
+ DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 2+1;
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("addsubpd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 2+alen;
+ }
+
+ assign( gV, getXMMReg(gregOfRM(modrm)) );
+
+ assign( addV, binop(Iop_Add64Fx2, mkexpr(gV), mkexpr(eV)) );
+ assign( subV, binop(Iop_Sub64Fx2, mkexpr(gV), mkexpr(eV)) );
+
+ assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
+ assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
+
+ putXMMReg( gregOfRM(modrm),
+ binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
+ goto decode_success;
+ }
+
+ /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
+ /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
+ if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F
+ && (insn[2] == 0x7C || insn[2] == 0x7D)) {
+ IRTemp e3, e2, e1, e0, g3, g2, g1, g0;
+ IRTemp eV = newTemp(Ity_V128);
+ IRTemp gV = newTemp(Ity_V128);
+ IRTemp leftV = newTemp(Ity_V128);
+ IRTemp rightV = newTemp(Ity_V128);
+ Bool isAdd = insn[2] == 0x7C;
+ HChar* str = isAdd ? "add" : "sub";
+ e3 = e2 = e1 = e0 = g3 = g2 = g1 = g0 = IRTemp_INVALID;
+
+ modrm = insn[3];
+ if (epartIsReg(modrm)) {
+ assign( eV, getXMMReg( eregOfRM(modrm)) );
+ DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+1;
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("h%sps %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+alen;
+ }
+
+ assign( gV, getXMMReg(gregOfRM(modrm)) );
+
+ breakup128to32s( eV, &e3, &e2, &e1, &e0 );
+ breakup128to32s( gV, &g3, &g2, &g1, &g0 );
+
+ assign( leftV, mk128from32s( e2, e0, g2, g0 ) );
+ assign( rightV, mk128from32s( e3, e1, g3, g1 ) );
+
+ putXMMReg( gregOfRM(modrm),
+ binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
+ mkexpr(leftV), mkexpr(rightV) ) );
+ goto decode_success;
+ }
+
+ /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
+ /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
+ if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x7C || insn[1] == 0x7D)) {
+ IRTemp e1 = newTemp(Ity_I64);
+ IRTemp e0 = newTemp(Ity_I64);
+ IRTemp g1 = newTemp(Ity_I64);
+ IRTemp g0 = newTemp(Ity_I64);
+ IRTemp eV = newTemp(Ity_V128);
+ IRTemp gV = newTemp(Ity_V128);
+ IRTemp leftV = newTemp(Ity_V128);
+ IRTemp rightV = newTemp(Ity_V128);
+ Bool isAdd = insn[1] == 0x7C;
+ HChar* str = isAdd ? "add" : "sub";
+
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ assign( eV, getXMMReg( eregOfRM(modrm)) );
+ DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 2+1;
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("h%spd %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 2+alen;
+ }
+
+ assign( gV, getXMMReg(gregOfRM(modrm)) );
+
+ assign( e1, unop(Iop_V128HIto64, mkexpr(eV) ));
+ assign( e0, unop(Iop_V128to64, mkexpr(eV) ));
+ assign( g1, unop(Iop_V128HIto64, mkexpr(gV) ));
+ assign( g0, unop(Iop_V128to64, mkexpr(gV) ));
+
+ assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) );
+ assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) );
+
+ putXMMReg( gregOfRM(modrm),
+ binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
+ mkexpr(leftV), mkexpr(rightV) ) );
+ goto decode_success;
+ }
+
+ /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
+ if (sz == 4 && insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xF0) {
+ modrm = getIByte(delta+3);
+ if (epartIsReg(modrm)) {
+ goto decode_failure;
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ putXMMReg( gregOfRM(modrm),
+ loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("lddqu %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+alen;
+ }
+ goto decode_success;
+ }
+
+ /* ---------------------------------------------------- */
+ /* --- end of the SSE3 decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* ---------------------------------------------------- */
+ /* --- start of the SSSE3 decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
+ Unsigned Bytes (MMX) */
+ if (sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+ IRTemp sVoddsSX = newTemp(Ity_I64);
+ IRTemp sVevensSX = newTemp(Ity_I64);
+ IRTemp dVoddsZX = newTemp(Ity_I64);
+ IRTemp dVevensZX = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("pmaddubsw %s,%s\n", nameMMXReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pmaddubsw %s,%s\n", dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+
+ /* compute dV unsigned x sV signed */
+ assign( sVoddsSX,
+ binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) );
+ assign( sVevensSX,
+ binop(Iop_SarN16x4,
+ binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)),
+ mkU8(8)) );
+ assign( dVoddsZX,
+ binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) );
+ assign( dVevensZX,
+ binop(Iop_ShrN16x4,
+ binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)),
+ mkU8(8)) );
+
+ putMMXReg(
+ gregOfRM(modrm),
+ binop(Iop_QAdd16Sx4,
+ binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
+ binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX))
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
+ Unsigned Bytes (XMM) */
+ if (sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sVoddsSX = newTemp(Ity_V128);
+ IRTemp sVevensSX = newTemp(Ity_V128);
+ IRTemp dVoddsZX = newTemp(Ity_V128);
+ IRTemp dVevensZX = newTemp(Ity_V128);
+
+ modrm = insn[3];
+ assign( dV, getXMMReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pmaddubsw %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+
+ /* compute dV unsigned x sV signed */
+ assign( sVoddsSX,
+ binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) );
+ assign( sVevensSX,
+ binop(Iop_SarN16x8,
+ binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)),
+ mkU8(8)) );
+ assign( dVoddsZX,
+ binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) );
+ assign( dVevensZX,
+ binop(Iop_ShrN16x8,
+ binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)),
+ mkU8(8)) );
+
+ putXMMReg(
+ gregOfRM(modrm),
+ binop(Iop_QAdd16Sx8,
+ binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
+ binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX))
+ )
+ );
+ goto decode_success;
+ }
+
+ /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
+ /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
+ mmx) and G to G (mmx). */
+ /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
+ mmx) and G to G (mmx). */
+ /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
+ to G (mmx). */
+ /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
+ to G (mmx). */
+ /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
+ to G (mmx). */
+ /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
+ to G (mmx). */
+
+ if (sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
+ || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
+ HChar* str = "???";
+ IROp opV64 = Iop_INVALID;
+ IROp opCatO = Iop_CatOddLanes16x4;
+ IROp opCatE = Iop_CatEvenLanes16x4;
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+
+ modrm = insn[3];
+
+ switch (insn[2]) {
+ case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
+ case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
+ case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
+ case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
+ case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
+ case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
+ default: vassert(0);
+ }
+ if (insn[2] == 0x02 || insn[2] == 0x06) {
+ opCatO = Iop_InterleaveHI32x2;
+ opCatE = Iop_InterleaveLO32x2;
+ }
+
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("ph%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("ph%s %s,%s\n", str, dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+
+ putMMXReg(
+ gregOfRM(modrm),
+ binop(opV64,
+ binop(opCatE,mkexpr(sV),mkexpr(dV)),
+ binop(opCatO,mkexpr(sV),mkexpr(dV))
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
+ xmm) and G to G (xmm). */
+ /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
+ xmm) and G to G (xmm). */
+ /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
+ G to G (xmm). */
+ /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
+ G to G (xmm). */
+ /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
+ G to G (xmm). */
+ /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
+ G to G (xmm). */
+
+ if (sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
+ || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
+ HChar* str = "???";
+ IROp opV64 = Iop_INVALID;
+ IROp opCatO = Iop_CatOddLanes16x4;
+ IROp opCatE = Iop_CatEvenLanes16x4;
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+
+ modrm = insn[3];
+
+ switch (insn[2]) {
+ case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
+ case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
+ case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
+ case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
+ case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
+ case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
+ default: vassert(0);
+ }
+ if (insn[2] == 0x02 || insn[2] == 0x06) {
+ opCatO = Iop_InterleaveHI32x2;
+ opCatE = Iop_InterleaveLO32x2;
+ }
+
+ assign( dV, getXMMReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg( eregOfRM(modrm)) );
+ DIP("ph%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+1;
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("ph%s %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+alen;
+ }
+
+ assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ /* This isn't a particularly efficient way to compute the
+ result, but at least it avoids a proliferation of IROps,
+ hence avoids complication all the backends. */
+ putXMMReg(
+ gregOfRM(modrm),
+ binop(Iop_64HLtoV128,
+ binop(opV64,
+ binop(opCatE,mkexpr(sHi),mkexpr(sLo)),
+ binop(opCatO,mkexpr(sHi),mkexpr(sLo))
+ ),
+ binop(opV64,
+ binop(opCatE,mkexpr(dHi),mkexpr(dLo)),
+ binop(opCatO,mkexpr(dHi),mkexpr(dLo))
+ )
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
+ (MMX) */
+ if (sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("pmulhrsw %s,%s\n", nameMMXReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pmulhrsw %s,%s\n", dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+
+ putMMXReg(
+ gregOfRM(modrm),
+ dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
+ Scale (XMM) */
+ if (sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ assign( dV, getXMMReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pmulhrsw %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+
+ assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ putXMMReg(
+ gregOfRM(modrm),
+ binop(Iop_64HLtoV128,
+ dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
+ dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
+ /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
+ /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */
+ if (sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+ HChar* str = "???";
+ Int laneszB = 0;
+
+ switch (insn[2]) {
+ case 0x08: laneszB = 1; str = "b"; break;
+ case 0x09: laneszB = 2; str = "w"; break;
+ case 0x0A: laneszB = 4; str = "d"; break;
+ default: vassert(0);
+ }
+
+ modrm = insn[3];
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("psign%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("psign%s %s,%s\n", str, dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+
+ putMMXReg(
+ gregOfRM(modrm),
+ dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
+ /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
+ /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */
+ if (sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+ HChar* str = "???";
+ Int laneszB = 0;
+
+ switch (insn[2]) {
+ case 0x08: laneszB = 1; str = "b"; break;
+ case 0x09: laneszB = 2; str = "w"; break;
+ case 0x0A: laneszB = 4; str = "d"; break;
+ default: vassert(0);
+ }
+
+ modrm = insn[3];
+ assign( dV, getXMMReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("psign%s %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+
+ assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ putXMMReg(
+ gregOfRM(modrm),
+ binop(Iop_64HLtoV128,
+ dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
+ dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
+ /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
+ /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
+ if (sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
+ IRTemp sV = newTemp(Ity_I64);
+ HChar* str = "???";
+ Int laneszB = 0;
+
+ switch (insn[2]) {
+ case 0x1C: laneszB = 1; str = "b"; break;
+ case 0x1D: laneszB = 2; str = "w"; break;
+ case 0x1E: laneszB = 4; str = "d"; break;
+ default: vassert(0);
+ }
+
+ modrm = insn[3];
+ do_MMX_preamble();
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("pabs%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pabs%s %s,%s\n", str, dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+
+ putMMXReg(
+ gregOfRM(modrm),
+ dis_PABS_helper( mkexpr(sV), laneszB )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
+ /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
+ /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
+ if (sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ HChar* str = "???";
+ Int laneszB = 0;
+
+ switch (insn[2]) {
+ case 0x1C: laneszB = 1; str = "b"; break;
+ case 0x1D: laneszB = 2; str = "w"; break;
+ case 0x1E: laneszB = 4; str = "d"; break;
+ default: vassert(0);
+ }
+
+ modrm = insn[3];
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pabs%s %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ putXMMReg(
+ gregOfRM(modrm),
+ binop(Iop_64HLtoV128,
+ dis_PABS_helper( mkexpr(sHi), laneszB ),
+ dis_PABS_helper( mkexpr(sLo), laneszB )
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
+ if (sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+ IRTemp res = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregOfRM(modrm)) );
+ d32 = (UInt)insn[3+1];
+ delta += 3+1+1;
+ DIP("palignr $%d,%s,%s\n", (Int)d32,
+ nameMMXReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ d32 = (UInt)insn[3+alen];
+ delta += 3+alen+1;
+ DIP("palignr $%d%s,%s\n", (Int)d32,
+ dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+
+ if (d32 == 0) {
+ assign( res, mkexpr(sV) );
+ }
+ else if (d32 >= 1 && d32 <= 7) {
+ assign(res,
+ binop(Iop_Or64,
+ binop(Iop_Shr64, mkexpr(sV), mkU8(8*d32)),
+ binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d32))
+ )));
+ }
+ else if (d32 == 8) {
+ assign( res, mkexpr(dV) );
+ }
+ else if (d32 >= 9 && d32 <= 15) {
+ assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d32-8))) );
+ }
+ else if (d32 >= 16 && d32 <= 255) {
+ assign( res, mkU64(0) );
+ }
+ else
+ vassert(0);
+
+ putMMXReg( gregOfRM(modrm), mkexpr(res) );
+ goto decode_success;
+ }
+
+ /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
+ if (sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+ IRTemp rHi = newTemp(Ity_I64);
+ IRTemp rLo = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ assign( dV, getXMMReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRM(modrm)) );
+ d32 = (UInt)insn[3+1];
+ delta += 3+1+1;
+ DIP("palignr $%d,%s,%s\n", (Int)d32,
+ nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ d32 = (UInt)insn[3+alen];
+ delta += 3+alen+1;
+ DIP("palignr $%d,%s,%s\n", (Int)d32,
+ dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+
+ assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ if (d32 == 0) {
+ assign( rHi, mkexpr(sHi) );
+ assign( rLo, mkexpr(sLo) );
+ }
+ else if (d32 >= 1 && d32 <= 7) {
+ assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d32) );
+ assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d32) );
+ }
+ else if (d32 == 8) {
+ assign( rHi, mkexpr(dLo) );
+ assign( rLo, mkexpr(sHi) );
+ }
+ else if (d32 >= 9 && d32 <= 15) {
+ assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d32-8) );
+ assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d32-8) );
+ }
+ else if (d32 == 16) {
+ assign( rHi, mkexpr(dHi) );
+ assign( rLo, mkexpr(dLo) );
+ }
+ else if (d32 >= 17 && d32 <= 23) {
+ assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-16))) );
+ assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d32-16) );
+ }
+ else if (d32 == 24) {
+ assign( rHi, mkU64(0) );
+ assign( rLo, mkexpr(dHi) );
+ }
+ else if (d32 >= 25 && d32 <= 31) {
+ assign( rHi, mkU64(0) );
+ assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-24))) );
+ }
+ else if (d32 >= 32 && d32 <= 255) {
+ assign( rHi, mkU64(0) );
+ assign( rLo, mkU64(0) );
+ }
+ else
+ vassert(0);
+
+ putXMMReg(
+ gregOfRM(modrm),
+ binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
+ );
+ goto decode_success;
+ }
+
+ /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
+ if (sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("pshufb %s,%s\n", nameMMXReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pshufb %s,%s\n", dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+
+ putMMXReg(
+ gregOfRM(modrm),
+ binop(
+ Iop_And64,
+ /* permute the lanes */
+ binop(
+ Iop_Perm8x8,
+ mkexpr(dV),
+ binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL))
+ ),
+ /* mask off lanes which have (index & 0x80) == 0x80 */
+ unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7)))
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
+ if (sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+ IRTemp rHi = newTemp(Ity_I64);
+ IRTemp rLo = newTemp(Ity_I64);
+ IRTemp sevens = newTemp(Ity_I64);
+ IRTemp mask0x80hi = newTemp(Ity_I64);
+ IRTemp mask0x80lo = newTemp(Ity_I64);
+ IRTemp maskBit3hi = newTemp(Ity_I64);
+ IRTemp maskBit3lo = newTemp(Ity_I64);
+ IRTemp sAnd7hi = newTemp(Ity_I64);
+ IRTemp sAnd7lo = newTemp(Ity_I64);
+ IRTemp permdHi = newTemp(Ity_I64);
+ IRTemp permdLo = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ assign( dV, getXMMReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("pshufb %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pshufb %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+
+ assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ assign( sevens, mkU64(0x0707070707070707ULL) );
+
+ /*
+ mask0x80hi = Not(SarN8x8(sHi,7))
+ maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7)
+ sAnd7hi = And(sHi,sevens)
+ permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi),
+ And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) )
+ rHi = And(permdHi,mask0x80hi)
+ */
+ assign(
+ mask0x80hi,
+ unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7))));
+
+ assign(
+ maskBit3hi,
+ binop(Iop_SarN8x8,
+ binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)),
+ mkU8(7)));
+
+ assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens)));
+
+ assign(
+ permdHi,
+ binop(
+ Iop_Or64,
+ binop(Iop_And64,
+ binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)),
+ mkexpr(maskBit3hi)),
+ binop(Iop_And64,
+ binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)),
+ unop(Iop_Not64,mkexpr(maskBit3hi))) ));
+
+ assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) );
+
+ /* And the same for the lower half of the result. What fun. */
+
+ assign(
+ mask0x80lo,
+ unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7))));
+
+ assign(
+ maskBit3lo,
+ binop(Iop_SarN8x8,
+ binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)),
+ mkU8(7)));
+
+ assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens)));
+
+ assign(
+ permdLo,
+ binop(
+ Iop_Or64,
+ binop(Iop_And64,
+ binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)),
+ mkexpr(maskBit3lo)),
+ binop(Iop_And64,
+ binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)),
+ unop(Iop_Not64,mkexpr(maskBit3lo))) ));
+
+ assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) );
+
+ putXMMReg(
+ gregOfRM(modrm),
+ binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
+ );
+ goto decode_success;
+ }
+
+ /* ---------------------------------------------------- */
+ /* --- end of the SSSE3 decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* ---------------------------------------------------- */
+ /* --- start of the SSE4 decoder --- */
+ /* ---------------------------------------------------- */
+
+ /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
+ (Partial implementation only -- only deal with cases where
+ the rounding mode is specified directly by the immediate byte.)
+ 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
+ (Limitations ditto)
+ */
+ if (sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x3A
+ && (/*insn[2] == 0x0B || */insn[2] == 0x0A)) {
+
+ Bool isD = insn[2] == 0x0B;
+ IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
+ IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
+ Int imm = 0;
+
+ modrm = insn[3];
+
+ if (epartIsReg(modrm)) {
+ assign( src,
+ isD ? getXMMRegLane64F( eregOfRM(modrm), 0 )
+ : getXMMRegLane32F( eregOfRM(modrm), 0 ) );
+ imm = insn[3+1];
+ if (imm & ~3) goto decode_failure;
+ delta += 3+1+1;
+ DIP( "rounds%c $%d,%s,%s\n",
+ isD ? 'd' : 's',
+ imm, nameXMMReg( eregOfRM(modrm) ),
+ nameXMMReg( gregOfRM(modrm) ) );
+ } else {
+ addr = disAMode( &alen, sorb, delta+3, dis_buf );
+ assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
+ imm = insn[3+alen];
+ if (imm & ~3) goto decode_failure;
+ delta += 3+alen+1;
+ DIP( "roundsd $%d,%s,%s\n",
+ imm, dis_buf, nameXMMReg( gregOfRM(modrm) ) );
+ }
+
+ /* (imm & 3) contains an Intel-encoded rounding mode. Because
+ that encoding is the same as the encoding for IRRoundingMode,
+ we can use that value directly in the IR as a rounding
+ mode. */
+ assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
+ mkU32(imm & 3), mkexpr(src)) );
+
+ if (isD)
+ putXMMRegLane64F( gregOfRM(modrm), 0, mkexpr(res) );
+ else
+ putXMMRegLane32F( gregOfRM(modrm), 0, mkexpr(res) );
+
+ goto decode_success;
+ }
+
+ /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
+ which we can only decode if we're sure this is an AMD cpu that
+ supports LZCNT, since otherwise it's BSR, which behaves
+ differently. */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xBD
+ && 0 != (archinfo->hwcaps & VEX_HWCAPS_X86_LZCNT)) {
+ vassert(sz == 2 || sz == 4);
+ /*IRType*/ ty = szToITy(sz);
+ IRTemp src = newTemp(ty);
+ modrm = insn[3];
+ if (epartIsReg(modrm)) {
+ assign(src, getIReg(sz, eregOfRM(modrm)));
+ delta += 3+1;
+ DIP("lzcnt%c %s, %s\n", nameISize(sz),
+ nameIReg(sz, eregOfRM(modrm)),
+ nameIReg(sz, gregOfRM(modrm)));
+ } else {
+ addr = disAMode( &alen, sorb, delta+3, dis_buf );
+ assign(src, loadLE(ty, mkexpr(addr)));
+ delta += 3+alen;
+ DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf,
+ nameIReg(sz, gregOfRM(modrm)));
+ }
+
+ IRTemp res = gen_LZCNT(ty, src);
+ putIReg(sz, gregOfRM(modrm), mkexpr(res));
+
+ // Update flags. This is pretty lame .. perhaps can do better
+ // if this turns out to be performance critical.
+ // O S A P are cleared. Z is set if RESULT == 0.
+ // C is set if SRC is zero.
+ IRTemp src32 = newTemp(Ity_I32);
+ IRTemp res32 = newTemp(Ity_I32);
+ assign(src32, widenUto32(mkexpr(src)));
+ assign(res32, widenUto32(mkexpr(res)));
+
+ IRTemp oszacp = newTemp(Ity_I32);
+ assign(
+ oszacp,
+ binop(Iop_Or32,
+ binop(Iop_Shl32,
+ unop(Iop_1Uto32,
+ binop(Iop_CmpEQ32, mkexpr(res32), mkU32(0))),
+ mkU8(X86G_CC_SHIFT_Z)),
+ binop(Iop_Shl32,
+ unop(Iop_1Uto32,
+ binop(Iop_CmpEQ32, mkexpr(src32), mkU32(0))),
+ mkU8(X86G_CC_SHIFT_C))
+ )
+ );
+
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
+
+ goto decode_success;
+ }
+
+ /* ---------------------------------------------------- */
+ /* --- end of the SSE4 decoder --- */
+ /* ---------------------------------------------------- */
+
+ after_sse_decoders:
+
+ /* ---------------------------------------------------- */
+ /* --- deal with misc 0x67 pfxs (addr size override) -- */
+ /* ---------------------------------------------------- */
+
+ /* 67 E3 = JCXZ (for JECXZ see below) */
+ if (insn[0] == 0x67 && insn[1] == 0xE3 && sz == 4) {
+ delta += 2;
+ d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
+ delta ++;
+ stmt( IRStmt_Exit(
+ binop(Iop_CmpEQ16, getIReg(2,R_ECX), mkU16(0)),
+ Ijk_Boring,
+ IRConst_U32(d32)
+ ));
+ DIP("jcxz 0x%x\n", d32);
+ goto decode_success;
+ }
+
+ /* ---------------------------------------------------- */
+ /* --- start of the baseline insn decoder -- */
+ /* ---------------------------------------------------- */
+
+ /* Get the primary opcode. */
+ opc = getIByte(delta); delta++;
+
+ /* We get here if the current insn isn't SSE, or this CPU doesn't
+ support SSE. */
+
+ switch (opc) {
+
+ /* ------------------------ Control flow --------------- */
+
+ case 0xC2: /* RET imm16 */
+ d32 = getUDisp16(delta);
+ delta += 2;
+ dis_ret(d32);
+ dres.whatNext = Dis_StopHere;
+ DIP("ret %d\n", (Int)d32);
+ break;
+ case 0xC3: /* RET */
+ dis_ret(0);
+ dres.whatNext = Dis_StopHere;
+ DIP("ret\n");
+ break;
+
+ case 0xCF: /* IRET */
+ /* Note, this is an extremely kludgey and limited implementation
+ of iret. All it really does is:
+ popl %EIP; popl %CS; popl %EFLAGS.
+ %CS is set but ignored (as it is in (eg) popw %cs)". */
+ t1 = newTemp(Ity_I32); /* ESP */
+ t2 = newTemp(Ity_I32); /* new EIP */
+ t3 = newTemp(Ity_I32); /* new CS */
+ t4 = newTemp(Ity_I32); /* new EFLAGS */
+ assign(t1, getIReg(4,R_ESP));
+ assign(t2, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(0) )));
+ assign(t3, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(4) )));
+ assign(t4, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t1),mkU32(8) )));
+ /* Get stuff off stack */
+ putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(12)));
+ /* set %CS (which is ignored anyway) */
+ putSReg( R_CS, unop(Iop_32to16, mkexpr(t3)) );
+ /* set %EFLAGS */
+ set_EFLAGS_from_value( t4, False/*!emit_AC_emwarn*/, 0/*unused*/ );
+ /* goto new EIP value */
+ jmp_treg(Ijk_Ret,t2);
+ dres.whatNext = Dis_StopHere;
+ DIP("iret (very kludgey)\n");
+ break;
+
+ case 0xE8: /* CALL J4 */
+ d32 = getUDisp32(delta); delta += 4;
+ d32 += (guest_EIP_bbstart+delta);
+ /* (guest_eip_bbstart+delta) == return-to addr, d32 == call-to addr */
+ if (d32 == guest_EIP_bbstart+delta && getIByte(delta) >= 0x58
+ && getIByte(delta) <= 0x5F) {
+ /* Specially treat the position-independent-code idiom
+ call X
+ X: popl %reg
+ as
+ movl %eip, %reg.
+ since this generates better code, but for no other reason. */
+ Int archReg = getIByte(delta) - 0x58;
+ /* vex_printf("-- fPIC thingy\n"); */
+ putIReg(4, archReg, mkU32(guest_EIP_bbstart+delta));
+ delta++; /* Step over the POP */
+ DIP("call 0x%x ; popl %s\n",d32,nameIReg(4,archReg));
+ } else {
+ /* The normal sequence for a call. */
+ t1 = newTemp(Ity_I32);
+ assign(t1, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
+ putIReg(4, R_ESP, mkexpr(t1));
+ storeLE( mkexpr(t1), mkU32(guest_EIP_bbstart+delta));
+ if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32 )) {
+ /* follow into the call target. */
+ dres.whatNext = Dis_ResteerU;
+ dres.continueAt = (Addr64)(Addr32)d32;
+ } else {
+ jmp_lit(Ijk_Call,d32);
+ dres.whatNext = Dis_StopHere;
+ }
+ DIP("call 0x%x\n",d32);
+ }
+ break;
+
+//-- case 0xC8: /* ENTER */
+//-- d32 = getUDisp16(eip); eip += 2;
+//-- abyte = getIByte(delta); delta++;
+//--
+//-- vg_assert(sz == 4);
+//-- vg_assert(abyte == 0);
+//--
+//-- t1 = newTemp(cb); t2 = newTemp(cb);
+//-- uInstr2(cb, GET, sz, ArchReg, R_EBP, TempReg, t1);
+//-- uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2);
+//-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
+//-- uLiteral(cb, sz);
+//-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
+//-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
+//-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP);
+//-- if (d32) {
+//-- uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2);
+//-- uLiteral(cb, d32);
+//-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP);
+//-- }
+//-- DIP("enter 0x%x, 0x%x", d32, abyte);
+//-- break;
+
+ case 0xC9: /* LEAVE */
+ vassert(sz == 4);
+ t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32);
+ assign(t1, getIReg(4,R_EBP));
+ /* First PUT ESP looks redundant, but need it because ESP must
+ always be up-to-date for Memcheck to work... */
+ putIReg(4, R_ESP, mkexpr(t1));
+ assign(t2, loadLE(Ity_I32,mkexpr(t1)));
+ putIReg(4, R_EBP, mkexpr(t2));
+ putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(4)) );
+ DIP("leave\n");
+ break;
+
+ /* ---------------- Misc weird-ass insns --------------- */
+
+ case 0x27: /* DAA */
+ case 0x2F: /* DAS */
+ case 0x37: /* AAA */
+ case 0x3F: /* AAS */
+ /* An ugly implementation for some ugly instructions. Oh
+ well. */
+ if (sz != 4) goto decode_failure;
+ t1 = newTemp(Ity_I32);
+ t2 = newTemp(Ity_I32);
+ /* Make up a 32-bit value (t1), with the old value of AX in the
+ bottom 16 bits, and the old OSZACP bitmask in the upper 16
+ bits. */
+ assign(t1,
+ binop(Iop_16HLto32,
+ unop(Iop_32to16,
+ mk_x86g_calculate_eflags_all()),
+ getIReg(2, R_EAX)
+ ));
+ /* Call the helper fn, to get a new AX and OSZACP value, and
+ poke both back into the guest state. Also pass the helper
+ the actual opcode so it knows which of the 4 instructions it
+ is doing the computation for. */
+ vassert(opc == 0x27 || opc == 0x2F || opc == 0x37 || opc == 0x3F);
+ assign(t2,
+ mkIRExprCCall(
+ Ity_I32, 0/*regparm*/, "x86g_calculate_daa_das_aaa_aas",
+ &x86g_calculate_daa_das_aaa_aas,
+ mkIRExprVec_2( mkexpr(t1), mkU32( opc & 0xFF) )
+ ));
+ putIReg(2, R_EAX, unop(Iop_32to16, mkexpr(t2) ));
+
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1,
+ binop(Iop_And32,
+ binop(Iop_Shr32, mkexpr(t2), mkU8(16)),
+ mkU32( X86G_CC_MASK_C | X86G_CC_MASK_P
+ | X86G_CC_MASK_A | X86G_CC_MASK_Z
+ | X86G_CC_MASK_S| X86G_CC_MASK_O )
+ )
+ )
+ );
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+ switch (opc) {
+ case 0x27: DIP("daa\n"); break;
+ case 0x2F: DIP("das\n"); break;
+ case 0x37: DIP("aaa\n"); break;
+ case 0x3F: DIP("aas\n"); break;
+ default: vassert(0);
+ }
+ break;
+
+//-- case 0xD4: /* AAM */
+//-- case 0xD5: /* AAD */
+//-- d32 = getIByte(delta); delta++;
+//-- if (d32 != 10) VG_(core_panic)("disInstr: AAM/AAD but base not 10 !");
+//-- t1 = newTemp(cb);
+//-- uInstr2(cb, GET, 2, ArchReg, R_EAX, TempReg, t1);
+//-- /* Widen %AX to 32 bits, so it's all defined when we push it. */
+//-- uInstr1(cb, WIDEN, 4, TempReg, t1);
+//-- uWiden(cb, 2, False);
+//-- uInstr0(cb, CALLM_S, 0);
+//-- uInstr1(cb, PUSH, 4, TempReg, t1);
+//-- uInstr1(cb, CALLM, 0, Lit16,
+//-- opc == 0xD4 ? VGOFF_(helper_AAM) : VGOFF_(helper_AAD) );
+//-- uFlagsRWU(cb, FlagsEmpty, FlagsSZP, FlagsEmpty);
+//-- uInstr1(cb, POP, 4, TempReg, t1);
+//-- uInstr0(cb, CALLM_E, 0);
+//-- uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX);
+//-- DIP(opc == 0xD4 ? "aam\n" : "aad\n");
+//-- break;
+
+ /* ------------------------ CWD/CDQ -------------------- */
+
+ case 0x98: /* CBW */
+ if (sz == 4) {
+ putIReg(4, R_EAX, unop(Iop_16Sto32, getIReg(2, R_EAX)));
+ DIP("cwde\n");
+ } else {
+ vassert(sz == 2);
+ putIReg(2, R_EAX, unop(Iop_8Sto16, getIReg(1, R_EAX)));
+ DIP("cbw\n");
+ }
+ break;
+
+ case 0x99: /* CWD/CDQ */
+ ty = szToITy(sz);
+ putIReg(sz, R_EDX,
+ binop(mkSizedOp(ty,Iop_Sar8),
+ getIReg(sz, R_EAX),
+ mkU8(sz == 2 ? 15 : 31)) );
+ DIP(sz == 2 ? "cwdq\n" : "cdqq\n");
+ break;
+
+ /* ------------------------ FPU ops -------------------- */
+
+ case 0x9E: /* SAHF */
+ codegen_SAHF();
+ DIP("sahf\n");
+ break;
+
+ case 0x9F: /* LAHF */
+ codegen_LAHF();
+ DIP("lahf\n");
+ break;
+
+ case 0x9B: /* FWAIT */
+ /* ignore? */
+ DIP("fwait\n");
+ break;
+
+ case 0xD8:
+ case 0xD9:
+ case 0xDA:
+ case 0xDB:
+ case 0xDC:
+ case 0xDD:
+ case 0xDE:
+ case 0xDF: {
+ Int delta0 = delta;
+ Bool decode_OK = False;
+ delta = dis_FPU ( &decode_OK, sorb, delta );
+ if (!decode_OK) {
+ delta = delta0;
+ goto decode_failure;
+ }
+ break;
+ }
+
+ /* ------------------------ INC & DEC ------------------ */
+
+ case 0x40: /* INC eAX */
+ case 0x41: /* INC eCX */
+ case 0x42: /* INC eDX */
+ case 0x43: /* INC eBX */
+ case 0x44: /* INC eSP */
+ case 0x45: /* INC eBP */
+ case 0x46: /* INC eSI */
+ case 0x47: /* INC eDI */
+ vassert(sz == 2 || sz == 4);
+ ty = szToITy(sz);
+ t1 = newTemp(ty);
+ assign( t1, binop(mkSizedOp(ty,Iop_Add8),
+ getIReg(sz, (UInt)(opc - 0x40)),
+ mkU(ty,1)) );
+ setFlags_INC_DEC( True, t1, ty );
+ putIReg(sz, (UInt)(opc - 0x40), mkexpr(t1));
+ DIP("inc%c %s\n", nameISize(sz), nameIReg(sz,opc-0x40));
+ break;
+
+ case 0x48: /* DEC eAX */
+ case 0x49: /* DEC eCX */
+ case 0x4A: /* DEC eDX */
+ case 0x4B: /* DEC eBX */
+ case 0x4C: /* DEC eSP */
+ case 0x4D: /* DEC eBP */
+ case 0x4E: /* DEC eSI */
+ case 0x4F: /* DEC eDI */
+ vassert(sz == 2 || sz == 4);
+ ty = szToITy(sz);
+ t1 = newTemp(ty);
+ assign( t1, binop(mkSizedOp(ty,Iop_Sub8),
+ getIReg(sz, (UInt)(opc - 0x48)),
+ mkU(ty,1)) );
+ setFlags_INC_DEC( False, t1, ty );
+ putIReg(sz, (UInt)(opc - 0x48), mkexpr(t1));
+ DIP("dec%c %s\n", nameISize(sz), nameIReg(sz,opc-0x48));
+ break;
+
+ /* ------------------------ INT ------------------------ */
+
+ case 0xCC: /* INT 3 */
+ jmp_lit(Ijk_SigTRAP,((Addr32)guest_EIP_bbstart)+delta);
+ dres.whatNext = Dis_StopHere;
+ DIP("int $0x3\n");
+ break;
+
+ case 0xCD: /* INT imm8 */
+ d32 = getIByte(delta); delta++;
+
+ /* For any of the cases where we emit a jump (that is, for all
+ currently handled cases), it's important that all ArchRegs
+ carry their up-to-date value at this point. So we declare an
+ end-of-block here, which forces any TempRegs caching ArchRegs
+ to be flushed. */
+
+ /* Handle int $0x40 .. $0x43 by synthesising a segfault and a
+ restart of this instruction (hence the "-2" two lines below,
+ to get the restart EIP to be this instruction. This is
+ probably Linux-specific and it would be more correct to only
+ do this if the VexAbiInfo says that is what we should do. */
+ if (d32 >= 0x40 && d32 <= 0x43) {
+ jmp_lit(Ijk_SigSEGV,((Addr32)guest_EIP_bbstart)+delta-2);
+ dres.whatNext = Dis_StopHere;
+ DIP("int $0x%x\n", (Int)d32);
+ break;
+ }
+
+ /* Handle int $0x80 (linux syscalls), int $0x81 and $0x82
+ (darwin syscalls). As part of this, note where we are, so we
+ can back up the guest to this point if the syscall needs to
+ be restarted. */
+ if (d32 == 0x80) {
+ stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
+ mkU32(guest_EIP_curr_instr) ) );
+ jmp_lit(Ijk_Sys_int128,((Addr32)guest_EIP_bbstart)+delta);
+ dres.whatNext = Dis_StopHere;
+ DIP("int $0x80\n");
+ break;
+ }
+ if (d32 == 0x81) {
+ stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
+ mkU32(guest_EIP_curr_instr) ) );
+ jmp_lit(Ijk_Sys_int129,((Addr32)guest_EIP_bbstart)+delta);
+ dres.whatNext = Dis_StopHere;
+ DIP("int $0x81\n");
+ break;
+ }
+ if (d32 == 0x82) {
+ stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
+ mkU32(guest_EIP_curr_instr) ) );
+ jmp_lit(Ijk_Sys_int130,((Addr32)guest_EIP_bbstart)+delta);
+ dres.whatNext = Dis_StopHere;
+ DIP("int $0x82\n");
+ break;
+ }
+
+ /* none of the above */
+ goto decode_failure;
+
+ /* ------------------------ Jcond, byte offset --------- */
+
+ case 0xEB: /* Jb (jump, byte offset) */
+ d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
+ delta++;
+ if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) {
+ dres.whatNext = Dis_ResteerU;
+ dres.continueAt = (Addr64)(Addr32)d32;
+ } else {
+ jmp_lit(Ijk_Boring,d32);
+ dres.whatNext = Dis_StopHere;
+ }
+ DIP("jmp-8 0x%x\n", d32);
+ break;
+
+ case 0xE9: /* Jv (jump, 16/32 offset) */
+ vassert(sz == 4); /* JRS added 2004 July 11 */
+ d32 = (((Addr32)guest_EIP_bbstart)+delta+sz) + getSDisp(sz,delta);
+ delta += sz;
+ if (resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) {
+ dres.whatNext = Dis_ResteerU;
+ dres.continueAt = (Addr64)(Addr32)d32;
+ } else {
+ jmp_lit(Ijk_Boring,d32);
+ dres.whatNext = Dis_StopHere;
+ }
+ DIP("jmp 0x%x\n", d32);
+ break;
+
+ case 0x70:
+ case 0x71:
+ case 0x72: /* JBb/JNAEb (jump below) */
+ case 0x73: /* JNBb/JAEb (jump not below) */
+ case 0x74: /* JZb/JEb (jump zero) */
+ case 0x75: /* JNZb/JNEb (jump not zero) */
+ case 0x76: /* JBEb/JNAb (jump below or equal) */
+ case 0x77: /* JNBEb/JAb (jump not below or equal) */
+ case 0x78: /* JSb (jump negative) */
+ case 0x79: /* JSb (jump not negative) */
+ case 0x7A: /* JP (jump parity even) */
+ case 0x7B: /* JNP/JPO (jump parity odd) */
+ case 0x7C: /* JLb/JNGEb (jump less) */
+ case 0x7D: /* JGEb/JNLb (jump greater or equal) */
+ case 0x7E: /* JLEb/JNGb (jump less or equal) */
+ case 0x7F: /* JGb/JNLEb (jump greater) */
+ { Int jmpDelta;
+ HChar* comment = "";
+ jmpDelta = (Int)getSDisp8(delta);
+ vassert(-128 <= jmpDelta && jmpDelta < 128);
+ d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + jmpDelta;
+ delta++;
+ if (resteerCisOk
+ && vex_control.guest_chase_cond
+ && (Addr32)d32 != (Addr32)guest_EIP_bbstart
+ && jmpDelta < 0
+ && resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) {
+ /* Speculation: assume this backward branch is taken. So we
+ need to emit a side-exit to the insn following this one,
+ on the negation of the condition, and continue at the
+ branch target address (d32). If we wind up back at the
+ first instruction of the trace, just stop; it's better to
+ let the IR loop unroller handle that case. */
+ stmt( IRStmt_Exit(
+ mk_x86g_calculate_condition((X86Condcode)(1 ^ (opc - 0x70))),
+ Ijk_Boring,
+ IRConst_U32(guest_EIP_bbstart+delta) ) );
+ dres.whatNext = Dis_ResteerC;
+ dres.continueAt = (Addr64)(Addr32)d32;
+ comment = "(assumed taken)";
+ }
+ else
+ if (resteerCisOk
+ && vex_control.guest_chase_cond
+ && (Addr32)d32 != (Addr32)guest_EIP_bbstart
+ && jmpDelta >= 0
+ && resteerOkFn( callback_opaque,
+ (Addr64)(Addr32)(guest_EIP_bbstart+delta)) ) {
+ /* Speculation: assume this forward branch is not taken. So
+ we need to emit a side-exit to d32 (the dest) and continue
+ disassembling at the insn immediately following this
+ one. */
+ stmt( IRStmt_Exit(
+ mk_x86g_calculate_condition((X86Condcode)(opc - 0x70)),
+ Ijk_Boring,
+ IRConst_U32(d32) ) );
+ dres.whatNext = Dis_ResteerC;
+ dres.continueAt = (Addr64)(Addr32)(guest_EIP_bbstart+delta);
+ comment = "(assumed not taken)";
+ }
+ else {
+ /* Conservative default translation - end the block at this
+ point. */
+ jcc_01( (X86Condcode)(opc - 0x70),
+ (Addr32)(guest_EIP_bbstart+delta), d32);
+ dres.whatNext = Dis_StopHere;
+ }
+ DIP("j%s-8 0x%x %s\n", name_X86Condcode(opc - 0x70), d32, comment);
+ break;
+ }
+
+ case 0xE3: /* JECXZ (for JCXZ see above) */
+ if (sz != 4) goto decode_failure;
+ d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
+ delta ++;
+ stmt( IRStmt_Exit(
+ binop(Iop_CmpEQ32, getIReg(4,R_ECX), mkU32(0)),
+ Ijk_Boring,
+ IRConst_U32(d32)
+ ));
+ DIP("jecxz 0x%x\n", d32);
+ break;
+
+ case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
+ case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
+ case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
+ { /* Again, the docs say this uses ECX/CX as a count depending on
+ the address size override, not the operand one. Since we
+ don't handle address size overrides, I guess that means
+ ECX. */
+ IRExpr* zbit = NULL;
+ IRExpr* count = NULL;
+ IRExpr* cond = NULL;
+ HChar* xtra = NULL;
+
+ if (sz != 4) goto decode_failure;
+ d32 = (((Addr32)guest_EIP_bbstart)+delta+1) + getSDisp8(delta);
+ delta++;
+ putIReg(4, R_ECX, binop(Iop_Sub32, getIReg(4,R_ECX), mkU32(1)));
+
+ count = getIReg(4,R_ECX);
+ cond = binop(Iop_CmpNE32, count, mkU32(0));
+ switch (opc) {
+ case 0xE2:
+ xtra = "";
+ break;
+ case 0xE1:
+ xtra = "e";
+ zbit = mk_x86g_calculate_condition( X86CondZ );
+ cond = mkAnd1(cond, zbit);
+ break;
+ case 0xE0:
+ xtra = "ne";
+ zbit = mk_x86g_calculate_condition( X86CondNZ );
+ cond = mkAnd1(cond, zbit);
+ break;
+ default:
+ vassert(0);
+ }
+ stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U32(d32)) );
+
+ DIP("loop%s 0x%x\n", xtra, d32);
+ break;
+ }
+
+ /* ------------------------ IMUL ----------------------- */
+
+ case 0x69: /* IMUL Iv, Ev, Gv */
+ delta = dis_imul_I_E_G ( sorb, sz, delta, sz );
+ break;
+ case 0x6B: /* IMUL Ib, Ev, Gv */
+ delta = dis_imul_I_E_G ( sorb, sz, delta, 1 );
+ break;
+
+ /* ------------------------ MOV ------------------------ */
+
+ case 0x88: /* MOV Gb,Eb */
+ delta = dis_mov_G_E(sorb, 1, delta);
+ break;
+
+ case 0x89: /* MOV Gv,Ev */
+ delta = dis_mov_G_E(sorb, sz, delta);
+ break;
+
+ case 0x8A: /* MOV Eb,Gb */
+ delta = dis_mov_E_G(sorb, 1, delta);
+ break;
+
+ case 0x8B: /* MOV Ev,Gv */
+ delta = dis_mov_E_G(sorb, sz, delta);
+ break;
+
+ case 0x8D: /* LEA M,Gv */
+ if (sz != 4)
+ goto decode_failure;
+ modrm = getIByte(delta);
+ if (epartIsReg(modrm))
+ goto decode_failure;
+ /* NOTE! this is the one place where a segment override prefix
+ has no effect on the address calculation. Therefore we pass
+ zero instead of sorb here. */
+ addr = disAMode ( &alen, /*sorb*/ 0, delta, dis_buf );
+ delta += alen;
+ putIReg(sz, gregOfRM(modrm), mkexpr(addr));
+ DIP("lea%c %s, %s\n", nameISize(sz), dis_buf,
+ nameIReg(sz,gregOfRM(modrm)));
+ break;
+
+ case 0x8C: /* MOV Sw,Ew -- MOV from a SEGMENT REGISTER */
+ delta = dis_mov_Sw_Ew(sorb, sz, delta);
+ break;
+
+ case 0x8E: /* MOV Ew,Sw -- MOV to a SEGMENT REGISTER */
+ delta = dis_mov_Ew_Sw(sorb, delta);
+ break;
+
+ case 0xA0: /* MOV Ob,AL */
+ sz = 1;
+ /* Fall through ... */
+ case 0xA1: /* MOV Ov,eAX */
+ d32 = getUDisp32(delta); delta += 4;
+ ty = szToITy(sz);
+ addr = newTemp(Ity_I32);
+ assign( addr, handleSegOverride(sorb, mkU32(d32)) );
+ putIReg(sz, R_EAX, loadLE(ty, mkexpr(addr)));
+ DIP("mov%c %s0x%x, %s\n", nameISize(sz), sorbTxt(sorb),
+ d32, nameIReg(sz,R_EAX));
+ break;
+
+ case 0xA2: /* MOV Ob,AL */
+ sz = 1;
+ /* Fall through ... */
+ case 0xA3: /* MOV eAX,Ov */
+ d32 = getUDisp32(delta); delta += 4;
+ ty = szToITy(sz);
+ addr = newTemp(Ity_I32);
+ assign( addr, handleSegOverride(sorb, mkU32(d32)) );
+ storeLE( mkexpr(addr), getIReg(sz,R_EAX) );
+ DIP("mov%c %s, %s0x%x\n", nameISize(sz), nameIReg(sz,R_EAX),
+ sorbTxt(sorb), d32);
+ break;
+
+ case 0xB0: /* MOV imm,AL */
+ case 0xB1: /* MOV imm,CL */
+ case 0xB2: /* MOV imm,DL */
+ case 0xB3: /* MOV imm,BL */
+ case 0xB4: /* MOV imm,AH */
+ case 0xB5: /* MOV imm,CH */
+ case 0xB6: /* MOV imm,DH */
+ case 0xB7: /* MOV imm,BH */
+ d32 = getIByte(delta); delta += 1;
+ putIReg(1, opc-0xB0, mkU8(d32));
+ DIP("movb $0x%x,%s\n", d32, nameIReg(1,opc-0xB0));
+ break;
+
+ case 0xB8: /* MOV imm,eAX */
+ case 0xB9: /* MOV imm,eCX */
+ case 0xBA: /* MOV imm,eDX */
+ case 0xBB: /* MOV imm,eBX */
+ case 0xBC: /* MOV imm,eSP */
+ case 0xBD: /* MOV imm,eBP */
+ case 0xBE: /* MOV imm,eSI */
+ case 0xBF: /* MOV imm,eDI */
+ d32 = getUDisp(sz,delta); delta += sz;
+ putIReg(sz, opc-0xB8, mkU(szToITy(sz), d32));
+ DIP("mov%c $0x%x,%s\n", nameISize(sz), d32, nameIReg(sz,opc-0xB8));
+ break;
+
+ case 0xC6: /* MOV Ib,Eb */
+ sz = 1;
+ goto do_Mov_I_E;
+ case 0xC7: /* MOV Iv,Ev */
+ goto do_Mov_I_E;
+
+ do_Mov_I_E:
+ modrm = getIByte(delta);
+ if (epartIsReg(modrm)) {
+ delta++; /* mod/rm byte */
+ d32 = getUDisp(sz,delta); delta += sz;
+ putIReg(sz, eregOfRM(modrm), mkU(szToITy(sz), d32));
+ DIP("mov%c $0x%x, %s\n", nameISize(sz), d32,
+ nameIReg(sz,eregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ delta += alen;
+ d32 = getUDisp(sz,delta); delta += sz;
+ storeLE(mkexpr(addr), mkU(szToITy(sz), d32));
+ DIP("mov%c $0x%x, %s\n", nameISize(sz), d32, dis_buf);
+ }
+ break;
+
+ /* ------------------------ opl imm, A ----------------- */
+
+ case 0x04: /* ADD Ib, AL */
+ delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" );
+ break;
+ case 0x05: /* ADD Iv, eAX */
+ delta = dis_op_imm_A( sz, False, Iop_Add8, True, delta, "add" );
+ break;
+
+ case 0x0C: /* OR Ib, AL */
+ delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" );
+ break;
+ case 0x0D: /* OR Iv, eAX */
+ delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" );
+ break;
+
+ case 0x14: /* ADC Ib, AL */
+ delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" );
+ break;
+ case 0x15: /* ADC Iv, eAX */
+ delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" );
+ break;
+
+ case 0x1C: /* SBB Ib, AL */
+ delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" );
+ break;
+ case 0x1D: /* SBB Iv, eAX */
+ delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" );
+ break;
+
+ case 0x24: /* AND Ib, AL */
+ delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" );
+ break;
+ case 0x25: /* AND Iv, eAX */
+ delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" );
+ break;
+
+ case 0x2C: /* SUB Ib, AL */
+ delta = dis_op_imm_A( 1, False, Iop_Sub8, True, delta, "sub" );
+ break;
+ case 0x2D: /* SUB Iv, eAX */
+ delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" );
+ break;
+
+ case 0x34: /* XOR Ib, AL */
+ delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" );
+ break;
+ case 0x35: /* XOR Iv, eAX */
+ delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" );
+ break;
+
+ case 0x3C: /* CMP Ib, AL */
+ delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" );
+ break;
+ case 0x3D: /* CMP Iv, eAX */
+ delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" );
+ break;
+
+ case 0xA8: /* TEST Ib, AL */
+ delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" );
+ break;
+ case 0xA9: /* TEST Iv, eAX */
+ delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" );
+ break;
+
+ /* ------------------------ opl Ev, Gv ----------------- */
+
+ case 0x02: /* ADD Eb,Gb */
+ delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, 1, delta, "add" );
+ break;
+ case 0x03: /* ADD Ev,Gv */
+ delta = dis_op2_E_G ( sorb, False, Iop_Add8, True, sz, delta, "add" );
+ break;
+
+ case 0x0A: /* OR Eb,Gb */
+ delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, 1, delta, "or" );
+ break;
+ case 0x0B: /* OR Ev,Gv */
+ delta = dis_op2_E_G ( sorb, False, Iop_Or8, True, sz, delta, "or" );
+ break;
+
+ case 0x12: /* ADC Eb,Gb */
+ delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, 1, delta, "adc" );
+ break;
+ case 0x13: /* ADC Ev,Gv */
+ delta = dis_op2_E_G ( sorb, True, Iop_Add8, True, sz, delta, "adc" );
+ break;
+
+ case 0x1A: /* SBB Eb,Gb */
+ delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, 1, delta, "sbb" );
+ break;
+ case 0x1B: /* SBB Ev,Gv */
+ delta = dis_op2_E_G ( sorb, True, Iop_Sub8, True, sz, delta, "sbb" );
+ break;
+
+ case 0x22: /* AND Eb,Gb */
+ delta = dis_op2_E_G ( sorb, False, Iop_And8, True, 1, delta, "and" );
+ break;
+ case 0x23: /* AND Ev,Gv */
+ delta = dis_op2_E_G ( sorb, False, Iop_And8, True, sz, delta, "and" );
+ break;
+
+ case 0x2A: /* SUB Eb,Gb */
+ delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, 1, delta, "sub" );
+ break;
+ case 0x2B: /* SUB Ev,Gv */
+ delta = dis_op2_E_G ( sorb, False, Iop_Sub8, True, sz, delta, "sub" );
+ break;
+
+ case 0x32: /* XOR Eb,Gb */
+ delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, 1, delta, "xor" );
+ break;
+ case 0x33: /* XOR Ev,Gv */
+ delta = dis_op2_E_G ( sorb, False, Iop_Xor8, True, sz, delta, "xor" );
+ break;
+
+ case 0x3A: /* CMP Eb,Gb */
+ delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, 1, delta, "cmp" );
+ break;
+ case 0x3B: /* CMP Ev,Gv */
+ delta = dis_op2_E_G ( sorb, False, Iop_Sub8, False, sz, delta, "cmp" );
+ break;
+
+ case 0x84: /* TEST Eb,Gb */
+ delta = dis_op2_E_G ( sorb, False, Iop_And8, False, 1, delta, "test" );
+ break;
+ case 0x85: /* TEST Ev,Gv */
+ delta = dis_op2_E_G ( sorb, False, Iop_And8, False, sz, delta, "test" );
+ break;
+
+ /* ------------------------ opl Gv, Ev ----------------- */
+
+ case 0x00: /* ADD Gb,Eb */
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Add8, True, 1, delta, "add" );
+ break;
+ case 0x01: /* ADD Gv,Ev */
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Add8, True, sz, delta, "add" );
+ break;
+
+ case 0x08: /* OR Gb,Eb */
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Or8, True, 1, delta, "or" );
+ break;
+ case 0x09: /* OR Gv,Ev */
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Or8, True, sz, delta, "or" );
+ break;
+
+ case 0x10: /* ADC Gb,Eb */
+ delta = dis_op2_G_E ( sorb, pfx_lock, True,
+ Iop_Add8, True, 1, delta, "adc" );
+ break;
+ case 0x11: /* ADC Gv,Ev */
+ delta = dis_op2_G_E ( sorb, pfx_lock, True,
+ Iop_Add8, True, sz, delta, "adc" );
+ break;
+
+ case 0x18: /* SBB Gb,Eb */
+ delta = dis_op2_G_E ( sorb, pfx_lock, True,
+ Iop_Sub8, True, 1, delta, "sbb" );
+ break;
+ case 0x19: /* SBB Gv,Ev */
+ delta = dis_op2_G_E ( sorb, pfx_lock, True,
+ Iop_Sub8, True, sz, delta, "sbb" );
+ break;
+
+ case 0x20: /* AND Gb,Eb */
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_And8, True, 1, delta, "and" );
+ break;
+ case 0x21: /* AND Gv,Ev */
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_And8, True, sz, delta, "and" );
+ break;
+
+ case 0x28: /* SUB Gb,Eb */
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Sub8, True, 1, delta, "sub" );
+ break;
+ case 0x29: /* SUB Gv,Ev */
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Sub8, True, sz, delta, "sub" );
+ break;
+
+ case 0x30: /* XOR Gb,Eb */
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Xor8, True, 1, delta, "xor" );
+ break;
+ case 0x31: /* XOR Gv,Ev */
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Xor8, True, sz, delta, "xor" );
+ break;
+
+ case 0x38: /* CMP Gb,Eb */
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Sub8, False, 1, delta, "cmp" );
+ break;
+ case 0x39: /* CMP Gv,Ev */
+ delta = dis_op2_G_E ( sorb, pfx_lock, False,
+ Iop_Sub8, False, sz, delta, "cmp" );
+ break;
+
+ /* ------------------------ POP ------------------------ */
+
+ case 0x58: /* POP eAX */
+ case 0x59: /* POP eCX */
+ case 0x5A: /* POP eDX */
+ case 0x5B: /* POP eBX */
+ case 0x5D: /* POP eBP */
+ case 0x5E: /* POP eSI */
+ case 0x5F: /* POP eDI */
+ case 0x5C: /* POP eSP */
+ vassert(sz == 2 || sz == 4);
+ t1 = newTemp(szToITy(sz)); t2 = newTemp(Ity_I32);
+ assign(t2, getIReg(4, R_ESP));
+ assign(t1, loadLE(szToITy(sz),mkexpr(t2)));
+ putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz)));
+ putIReg(sz, opc-0x58, mkexpr(t1));
+ DIP("pop%c %s\n", nameISize(sz), nameIReg(sz,opc-0x58));
+ break;
+
+ case 0x9D: /* POPF */
+ vassert(sz == 2 || sz == 4);
+ t1 = newTemp(Ity_I32); t2 = newTemp(Ity_I32);
+ assign(t2, getIReg(4, R_ESP));
+ assign(t1, widenUto32(loadLE(szToITy(sz),mkexpr(t2))));
+ putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t2), mkU32(sz)));
+
+ /* Generate IR to set %EFLAGS{O,S,Z,A,C,P,D,ID,AC} from the
+ value in t1. */
+ set_EFLAGS_from_value( t1, True/*emit_AC_emwarn*/,
+ ((Addr32)guest_EIP_bbstart)+delta );
+
+ DIP("popf%c\n", nameISize(sz));
+ break;
+
+ case 0x61: /* POPA */
+ /* This is almost certainly wrong for sz==2. So ... */
+ if (sz != 4) goto decode_failure;
+
+ /* t5 is the old %ESP value. */
+ t5 = newTemp(Ity_I32);
+ assign( t5, getIReg(4, R_ESP) );
+
+ /* Reload all the registers, except %esp. */
+ putIReg(4,R_EAX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(28)) ));
+ putIReg(4,R_ECX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(24)) ));
+ putIReg(4,R_EDX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(20)) ));
+ putIReg(4,R_EBX, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32(16)) ));
+ /* ignore saved %ESP */
+ putIReg(4,R_EBP, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 8)) ));
+ putIReg(4,R_ESI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 4)) ));
+ putIReg(4,R_EDI, loadLE(Ity_I32, binop(Iop_Add32,mkexpr(t5),mkU32( 0)) ));
+
+ /* and move %ESP back up */
+ putIReg( 4, R_ESP, binop(Iop_Add32, mkexpr(t5), mkU32(8*4)) );
+
+ DIP("popa%c\n", nameISize(sz));
+ break;
+
+ case 0x8F: /* POPL/POPW m32 */
+ { Int len;
+ UChar rm = getIByte(delta);
+
+ /* make sure this instruction is correct POP */
+ if (epartIsReg(rm) || gregOfRM(rm) != 0)
+ goto decode_failure;
+ /* and has correct size */
+ if (sz != 4 && sz != 2)
+ goto decode_failure;
+ ty = szToITy(sz);
+
+ t1 = newTemp(Ity_I32); /* stack address */
+ t3 = newTemp(ty); /* data */
+ /* set t1 to ESP: t1 = ESP */
+ assign( t1, getIReg(4, R_ESP) );
+ /* load M[ESP] to virtual register t3: t3 = M[t1] */
+ assign( t3, loadLE(ty, mkexpr(t1)) );
+
+ /* increase ESP; must be done before the STORE. Intel manual says:
+ If the ESP register is used as a base register for addressing
+ a destination operand in memory, the POP instruction computes
+ the effective address of the operand after it increments the
+ ESP register.
+ */
+ putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(t1), mkU32(sz)) );
+
+ /* resolve MODR/M */
+ addr = disAMode ( &len, sorb, delta, dis_buf);
+ storeLE( mkexpr(addr), mkexpr(t3) );
+
+ DIP("pop%c %s\n", sz==2 ? 'w' : 'l', dis_buf);
+
+ delta += len;
+ break;
+ }
+
+ case 0x1F: /* POP %DS */
+ dis_pop_segreg( R_DS, sz ); break;
+ case 0x07: /* POP %ES */
+ dis_pop_segreg( R_ES, sz ); break;
+ case 0x17: /* POP %SS */
+ dis_pop_segreg( R_SS, sz ); break;
+
+ /* ------------------------ PUSH ----------------------- */
+
+ case 0x50: /* PUSH eAX */
+ case 0x51: /* PUSH eCX */
+ case 0x52: /* PUSH eDX */
+ case 0x53: /* PUSH eBX */
+ case 0x55: /* PUSH eBP */
+ case 0x56: /* PUSH eSI */
+ case 0x57: /* PUSH eDI */
+ case 0x54: /* PUSH eSP */
+ /* This is the Right Way, in that the value to be pushed is
+ established before %esp is changed, so that pushl %esp
+ correctly pushes the old value. */
+ vassert(sz == 2 || sz == 4);
+ ty = sz==2 ? Ity_I16 : Ity_I32;
+ t1 = newTemp(ty); t2 = newTemp(Ity_I32);
+ assign(t1, getIReg(sz, opc-0x50));
+ assign(t2, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)));
+ putIReg(4, R_ESP, mkexpr(t2) );
+ storeLE(mkexpr(t2),mkexpr(t1));
+ DIP("push%c %s\n", nameISize(sz), nameIReg(sz,opc-0x50));
+ break;
+
+
+ case 0x68: /* PUSH Iv */
+ d32 = getUDisp(sz,delta); delta += sz;
+ goto do_push_I;
+ case 0x6A: /* PUSH Ib, sign-extended to sz */
+ d32 = getSDisp8(delta); delta += 1;
+ goto do_push_I;
+ do_push_I:
+ ty = szToITy(sz);
+ t1 = newTemp(Ity_I32); t2 = newTemp(ty);
+ assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
+ putIReg(4, R_ESP, mkexpr(t1) );
+ /* stop mkU16 asserting if d32 is a negative 16-bit number
+ (bug #132813) */
+ if (ty == Ity_I16)
+ d32 &= 0xFFFF;
+ storeLE( mkexpr(t1), mkU(ty,d32) );
+ DIP("push%c $0x%x\n", nameISize(sz), d32);
+ break;
+
+ case 0x9C: /* PUSHF */ {
+ vassert(sz == 2 || sz == 4);
+
+ t1 = newTemp(Ity_I32);
+ assign( t1, binop(Iop_Sub32,getIReg(4,R_ESP),mkU32(sz)) );
+ putIReg(4, R_ESP, mkexpr(t1) );
+
+ /* Calculate OSZACP, and patch in fixed fields as per
+ Intel docs.
+ - bit 1 is always 1
+ - bit 9 is Interrupt Enable (should always be 1 in user mode?)
+ */
+ t2 = newTemp(Ity_I32);
+ assign( t2, binop(Iop_Or32,
+ mk_x86g_calculate_eflags_all(),
+ mkU32( (1<<1)|(1<<9) ) ));
+
+ /* Patch in the D flag. This can simply be a copy of bit 10 of
+ baseBlock[OFFB_DFLAG]. */
+ t3 = newTemp(Ity_I32);
+ assign( t3, binop(Iop_Or32,
+ mkexpr(t2),
+ binop(Iop_And32,
+ IRExpr_Get(OFFB_DFLAG,Ity_I32),
+ mkU32(1<<10)))
+ );
+
+ /* And patch in the ID flag. */
+ t4 = newTemp(Ity_I32);
+ assign( t4, binop(Iop_Or32,
+ mkexpr(t3),
+ binop(Iop_And32,
+ binop(Iop_Shl32, IRExpr_Get(OFFB_IDFLAG,Ity_I32),
+ mkU8(21)),
+ mkU32(1<<21)))
+ );
+
+ /* And patch in the AC flag. */
+ t5 = newTemp(Ity_I32);
+ assign( t5, binop(Iop_Or32,
+ mkexpr(t4),
+ binop(Iop_And32,
+ binop(Iop_Shl32, IRExpr_Get(OFFB_ACFLAG,Ity_I32),
+ mkU8(18)),
+ mkU32(1<<18)))
+ );
+
+ /* if sz==2, the stored value needs to be narrowed. */
+ if (sz == 2)
+ storeLE( mkexpr(t1), unop(Iop_32to16,mkexpr(t5)) );
+ else
+ storeLE( mkexpr(t1), mkexpr(t5) );
+
+ DIP("pushf%c\n", nameISize(sz));
+ break;
+ }
+
+ case 0x60: /* PUSHA */
+ /* This is almost certainly wrong for sz==2. So ... */
+ if (sz != 4) goto decode_failure;
+
+ /* This is the Right Way, in that the value to be pushed is
+ established before %esp is changed, so that pusha
+ correctly pushes the old %esp value. New value of %esp is
+ pushed at start. */
+ /* t0 is the %ESP value we're going to push. */
+ t0 = newTemp(Ity_I32);
+ assign( t0, getIReg(4, R_ESP) );
+
+ /* t5 will be the new %ESP value. */
+ t5 = newTemp(Ity_I32);
+ assign( t5, binop(Iop_Sub32, mkexpr(t0), mkU32(8*4)) );
+
+ /* Update guest state before prodding memory. */
+ putIReg(4, R_ESP, mkexpr(t5));
+
+ /* Dump all the registers. */
+ storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(28)), getIReg(4,R_EAX) );
+ storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(24)), getIReg(4,R_ECX) );
+ storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(20)), getIReg(4,R_EDX) );
+ storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(16)), getIReg(4,R_EBX) );
+ storeLE( binop(Iop_Add32,mkexpr(t5),mkU32(12)), mkexpr(t0) /*esp*/);
+ storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 8)), getIReg(4,R_EBP) );
+ storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 4)), getIReg(4,R_ESI) );
+ storeLE( binop(Iop_Add32,mkexpr(t5),mkU32( 0)), getIReg(4,R_EDI) );
+
+ DIP("pusha%c\n", nameISize(sz));
+ break;
+
+ case 0x0E: /* PUSH %CS */
+ dis_push_segreg( R_CS, sz ); break;
+ case 0x1E: /* PUSH %DS */
+ dis_push_segreg( R_DS, sz ); break;
+ case 0x06: /* PUSH %ES */
+ dis_push_segreg( R_ES, sz ); break;
+ case 0x16: /* PUSH %SS */
+ dis_push_segreg( R_SS, sz ); break;
+
+ /* ------------------------ SCAS et al ----------------- */
+
+ case 0xA4: /* MOVS, no REP prefix */
+ case 0xA5:
+ if (sorb != 0)
+ goto decode_failure; /* else dis_string_op asserts */
+ dis_string_op( dis_MOVS, ( opc == 0xA4 ? 1 : sz ), "movs", sorb );
+ break;
+
+ case 0xA6: /* CMPSb, no REP prefix */
+ case 0xA7:
+ if (sorb != 0)
+ goto decode_failure; /* else dis_string_op asserts */
+ dis_string_op( dis_CMPS, ( opc == 0xA6 ? 1 : sz ), "cmps", sorb );
+ break;
+
+ case 0xAA: /* STOS, no REP prefix */
+ case 0xAB:
+ if (sorb != 0)
+ goto decode_failure; /* else dis_string_op asserts */
+ dis_string_op( dis_STOS, ( opc == 0xAA ? 1 : sz ), "stos", sorb );
+ break;
+
+ case 0xAC: /* LODS, no REP prefix */
+ case 0xAD:
+ if (sorb != 0)
+ goto decode_failure; /* else dis_string_op asserts */
+ dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", sorb );
+ break;
+
+ case 0xAE: /* SCAS, no REP prefix */
+ case 0xAF:
+ if (sorb != 0)
+ goto decode_failure; /* else dis_string_op asserts */
+ dis_string_op( dis_SCAS, ( opc == 0xAE ? 1 : sz ), "scas", sorb );
+ break;
+
+
+ case 0xFC: /* CLD */
+ stmt( IRStmt_Put( OFFB_DFLAG, mkU32(1)) );
+ DIP("cld\n");
+ break;
+
+ case 0xFD: /* STD */
+ stmt( IRStmt_Put( OFFB_DFLAG, mkU32(0xFFFFFFFF)) );
+ DIP("std\n");
+ break;
+
+ case 0xF8: /* CLC */
+ case 0xF9: /* STC */
+ case 0xF5: /* CMC */
+ t0 = newTemp(Ity_I32);
+ t1 = newTemp(Ity_I32);
+ assign( t0, mk_x86g_calculate_eflags_all() );
+ switch (opc) {
+ case 0xF8:
+ assign( t1, binop(Iop_And32, mkexpr(t0),
+ mkU32(~X86G_CC_MASK_C)));
+ DIP("clc\n");
+ break;
+ case 0xF9:
+ assign( t1, binop(Iop_Or32, mkexpr(t0),
+ mkU32(X86G_CC_MASK_C)));
+ DIP("stc\n");
+ break;
+ case 0xF5:
+ assign( t1, binop(Iop_Xor32, mkexpr(t0),
+ mkU32(X86G_CC_MASK_C)));
+ DIP("cmc\n");
+ break;
+ default:
+ vpanic("disInstr(x86)(clc/stc/cmc)");
+ }
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t1) ));
+ /* Set NDEP even though it isn't used. This makes redundant-PUT
+ elimination of previous stores to this field work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+ break;
+
+ case 0xD6: /* SALC */
+ t0 = newTemp(Ity_I32);
+ t1 = newTemp(Ity_I32);
+ assign( t0, binop(Iop_And32,
+ mk_x86g_calculate_eflags_c(),
+ mkU32(1)) );
+ assign( t1, binop(Iop_Sar32,
+ binop(Iop_Shl32, mkexpr(t0), mkU8(31)),
+ mkU8(31)) );
+ putIReg(1, R_EAX, unop(Iop_32to8, mkexpr(t1)) );
+ DIP("salc\n");
+ break;
+
+ /* REPNE prefix insn */
+ case 0xF2: {
+ Addr32 eip_orig = guest_EIP_bbstart + delta_start;
+ if (sorb != 0) goto decode_failure;
+ abyte = getIByte(delta); delta++;
+
+ if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; }
+ dres.whatNext = Dis_StopHere;
+
+ switch (abyte) {
+ /* According to the Intel manual, "repne movs" should never occur, but
+ * in practice it has happened, so allow for it here... */
+ case 0xA4: sz = 1; /* REPNE MOVS<sz> */
+ case 0xA5:
+ dis_REP_op ( X86CondNZ, dis_MOVS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "repne movs" );
+ break;
+
+ case 0xA6: sz = 1; /* REPNE CMP<sz> */
+ case 0xA7:
+ dis_REP_op ( X86CondNZ, dis_CMPS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "repne cmps" );
+ break;
+
+ case 0xAA: sz = 1; /* REPNE STOS<sz> */
+ case 0xAB:
+ dis_REP_op ( X86CondNZ, dis_STOS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "repne stos" );
+ break;
+
+ case 0xAE: sz = 1; /* REPNE SCAS<sz> */
+ case 0xAF:
+ dis_REP_op ( X86CondNZ, dis_SCAS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "repne scas" );
+ break;
+
+ default:
+ goto decode_failure;
+ }
+ break;
+ }
+
+ /* REP/REPE prefix insn (for SCAS and CMPS, 0xF3 means REPE,
+ for the rest, it means REP) */
+ case 0xF3: {
+ Addr32 eip_orig = guest_EIP_bbstart + delta_start;
+ if (sorb != 0) goto decode_failure;
+ abyte = getIByte(delta); delta++;
+
+ if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; }
+ dres.whatNext = Dis_StopHere;
+
+ switch (abyte) {
+ case 0xA4: sz = 1; /* REP MOVS<sz> */
+ case 0xA5:
+ dis_REP_op ( X86CondAlways, dis_MOVS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "rep movs" );
+ break;
+
+ case 0xA6: sz = 1; /* REPE CMP<sz> */
+ case 0xA7:
+ dis_REP_op ( X86CondZ, dis_CMPS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "repe cmps" );
+ break;
+
+ case 0xAA: sz = 1; /* REP STOS<sz> */
+ case 0xAB:
+ dis_REP_op ( X86CondAlways, dis_STOS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "rep stos" );
+ break;
+
+ case 0xAC: sz = 1; /* REP LODS<sz> */
+ case 0xAD:
+ dis_REP_op ( X86CondAlways, dis_LODS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "rep lods" );
+ break;
+
+ case 0xAE: sz = 1; /* REPE SCAS<sz> */
+ case 0xAF:
+ dis_REP_op ( X86CondZ, dis_SCAS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "repe scas" );
+ break;
+
+ case 0x90: /* REP NOP (PAUSE) */
+ /* a hint to the P4 re spin-wait loop */
+ DIP("rep nop (P4 pause)\n");
+ /* "observe" the hint. The Vex client needs to be careful not
+ to cause very long delays as a result, though. */
+ jmp_lit(Ijk_Yield, ((Addr32)guest_EIP_bbstart)+delta);
+ dres.whatNext = Dis_StopHere;
+ break;
+
+ case 0xC3: /* REP RET -- same as normal ret? */
+ dis_ret(0);
+ dres.whatNext = Dis_StopHere;
+ DIP("rep ret\n");
+ break;
+
+ default:
+ goto decode_failure;
+ }
+ break;
+ }
+
+ /* ------------------------ XCHG ----------------------- */
+
+ /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
+ prefix; hence it must be translated with an IRCAS (at least, the
+ memory variant). */
+ case 0x86: /* XCHG Gb,Eb */
+ sz = 1;
+ /* Fall through ... */
+ case 0x87: /* XCHG Gv,Ev */
+ modrm = getIByte(delta);
+ ty = szToITy(sz);
+ t1 = newTemp(ty); t2 = newTemp(ty);
+ if (epartIsReg(modrm)) {
+ assign(t1, getIReg(sz, eregOfRM(modrm)));
+ assign(t2, getIReg(sz, gregOfRM(modrm)));
+ putIReg(sz, gregOfRM(modrm), mkexpr(t1));
+ putIReg(sz, eregOfRM(modrm), mkexpr(t2));
+ delta++;
+ DIP("xchg%c %s, %s\n",
+ nameISize(sz), nameIReg(sz,gregOfRM(modrm)),
+ nameIReg(sz,eregOfRM(modrm)));
+ } else {
+ *expect_CAS = True;
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ assign( t1, loadLE(ty,mkexpr(addr)) );
+ assign( t2, getIReg(sz,gregOfRM(modrm)) );
+ casLE( mkexpr(addr),
+ mkexpr(t1), mkexpr(t2), guest_EIP_curr_instr );
+ putIReg( sz, gregOfRM(modrm), mkexpr(t1) );
+ delta += alen;
+ DIP("xchg%c %s, %s\n", nameISize(sz),
+ nameIReg(sz,gregOfRM(modrm)), dis_buf);
+ }
+ break;
+
+ case 0x90: /* XCHG eAX,eAX */
+ DIP("nop\n");
+ break;
+ case 0x91: /* XCHG eAX,eCX */
+ case 0x92: /* XCHG eAX,eDX */
+ case 0x93: /* XCHG eAX,eBX */
+ case 0x94: /* XCHG eAX,eSP */
+ case 0x95: /* XCHG eAX,eBP */
+ case 0x96: /* XCHG eAX,eSI */
+ case 0x97: /* XCHG eAX,eDI */
+ codegen_xchg_eAX_Reg ( sz, opc - 0x90 );
+ break;
+
+ /* ------------------------ XLAT ----------------------- */
+
+ case 0xD7: /* XLAT */
+ if (sz != 4) goto decode_failure; /* sz == 2 is also allowed (0x66) */
+ putIReg(
+ 1,
+ R_EAX/*AL*/,
+ loadLE(Ity_I8,
+ handleSegOverride(
+ sorb,
+ binop(Iop_Add32,
+ getIReg(4, R_EBX),
+ unop(Iop_8Uto32, getIReg(1, R_EAX/*AL*/))))));
+
+ DIP("xlat%c [ebx]\n", nameISize(sz));
+ break;
+
+ /* ------------------------ IN / OUT ----------------------- */
+
+ case 0xE4: /* IN imm8, AL */
+ sz = 1;
+ t1 = newTemp(Ity_I32);
+ abyte = getIByte(delta); delta++;
+ assign(t1, mkU32( abyte & 0xFF ));
+ DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIReg(sz,R_EAX));
+ goto do_IN;
+ case 0xE5: /* IN imm8, eAX */
+ vassert(sz == 2 || sz == 4);
+ t1 = newTemp(Ity_I32);
+ abyte = getIByte(delta); delta++;
+ assign(t1, mkU32( abyte & 0xFF ));
+ DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIReg(sz,R_EAX));
+ goto do_IN;
+ case 0xEC: /* IN %DX, AL */
+ sz = 1;
+ t1 = newTemp(Ity_I32);
+ assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX)));
+ DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX),
+ nameIReg(sz,R_EAX));
+ goto do_IN;
+ case 0xED: /* IN %DX, eAX */
+ vassert(sz == 2 || sz == 4);
+ t1 = newTemp(Ity_I32);
+ assign(t1, unop(Iop_16Uto32, getIReg(2, R_EDX)));
+ DIP("in%c %s,%s\n", nameISize(sz), nameIReg(2,R_EDX),
+ nameIReg(sz,R_EAX));
+ goto do_IN;
+ do_IN: {
+ /* At this point, sz indicates the width, and t1 is a 32-bit
+ value giving port number. */
+ IRDirty* d;
+ vassert(sz == 1 || sz == 2 || sz == 4);
+ ty = szToITy(sz);
+ t2 = newTemp(Ity_I32);
+ d = unsafeIRDirty_1_N(
+ t2,
+ 0/*regparms*/,
+ "x86g_dirtyhelper_IN",
+ &x86g_dirtyhelper_IN,
+ mkIRExprVec_2( mkexpr(t1), mkU32(sz) )
+ );
+ /* do the call, dumping the result in t2. */
+ stmt( IRStmt_Dirty(d) );
+ putIReg(sz, R_EAX, narrowTo( ty, mkexpr(t2) ) );
+ break;
+ }
+
+ case 0xE6: /* OUT AL, imm8 */
+ sz = 1;
+ t1 = newTemp(Ity_I32);
+ abyte = getIByte(delta); delta++;
+ assign( t1, mkU32( abyte & 0xFF ) );
+ DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), (Int)abyte);
+ goto do_OUT;
+ case 0xE7: /* OUT eAX, imm8 */
+ vassert(sz == 2 || sz == 4);
+ t1 = newTemp(Ity_I32);
+ abyte = getIByte(delta); delta++;
+ assign( t1, mkU32( abyte & 0xFF ) );
+ DIP("out%c %s,$%d\n", nameISize(sz), nameIReg(sz,R_EAX), (Int)abyte);
+ goto do_OUT;
+ case 0xEE: /* OUT AL, %DX */
+ sz = 1;
+ t1 = newTemp(Ity_I32);
+ assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) );
+ DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX),
+ nameIReg(2,R_EDX));
+ goto do_OUT;
+ case 0xEF: /* OUT eAX, %DX */
+ vassert(sz == 2 || sz == 4);
+ t1 = newTemp(Ity_I32);
+ assign( t1, unop(Iop_16Uto32, getIReg(2, R_EDX)) );
+ DIP("out%c %s,%s\n", nameISize(sz), nameIReg(sz,R_EAX),
+ nameIReg(2,R_EDX));
+ goto do_OUT;
+ do_OUT: {
+ /* At this point, sz indicates the width, and t1 is a 32-bit
+ value giving port number. */
+ IRDirty* d;
+ vassert(sz == 1 || sz == 2 || sz == 4);
+ ty = szToITy(sz);
+ d = unsafeIRDirty_0_N(
+ 0/*regparms*/,
+ "x86g_dirtyhelper_OUT",
+ &x86g_dirtyhelper_OUT,
+ mkIRExprVec_3( mkexpr(t1),
+ widenUto32( getIReg(sz, R_EAX) ),
+ mkU32(sz) )
+ );
+ stmt( IRStmt_Dirty(d) );
+ break;
+ }
+
+ /* ------------------------ (Grp1 extensions) ---------- */
+
+ case 0x82: /* Grp1 Ib,Eb too. Apparently this is the same as
+ case 0x80, but only in 32-bit mode. */
+ /* fallthru */
+ case 0x80: /* Grp1 Ib,Eb */
+ modrm = getIByte(delta);
+ am_sz = lengthAMode(delta);
+ sz = 1;
+ d_sz = 1;
+ d32 = getUChar(delta + am_sz);
+ delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
+ break;
+
+ case 0x81: /* Grp1 Iv,Ev */
+ modrm = getIByte(delta);
+ am_sz = lengthAMode(delta);
+ d_sz = sz;
+ d32 = getUDisp(d_sz, delta + am_sz);
+ delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
+ break;
+
+ case 0x83: /* Grp1 Ib,Ev */
+ modrm = getIByte(delta);
+ am_sz = lengthAMode(delta);
+ d_sz = 1;
+ d32 = getSDisp8(delta + am_sz);
+ delta = dis_Grp1 ( sorb, pfx_lock, delta, modrm, am_sz, d_sz, sz, d32 );
+ break;
+
+ /* ------------------------ (Grp2 extensions) ---------- */
+
+ case 0xC0: { /* Grp2 Ib,Eb */
+ Bool decode_OK = True;
+ modrm = getIByte(delta);
+ am_sz = lengthAMode(delta);
+ d_sz = 1;
+ d32 = getUChar(delta + am_sz);
+ sz = 1;
+ delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
+ mkU8(d32 & 0xFF), NULL, &decode_OK );
+ if (!decode_OK)
+ goto decode_failure;
+ break;
+ }
+ case 0xC1: { /* Grp2 Ib,Ev */
+ Bool decode_OK = True;
+ modrm = getIByte(delta);
+ am_sz = lengthAMode(delta);
+ d_sz = 1;
+ d32 = getUChar(delta + am_sz);
+ delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
+ mkU8(d32 & 0xFF), NULL, &decode_OK );
+ if (!decode_OK)
+ goto decode_failure;
+ break;
+ }
+ case 0xD0: { /* Grp2 1,Eb */
+ Bool decode_OK = True;
+ modrm = getIByte(delta);
+ am_sz = lengthAMode(delta);
+ d_sz = 0;
+ d32 = 1;
+ sz = 1;
+ delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
+ mkU8(d32), NULL, &decode_OK );
+ if (!decode_OK)
+ goto decode_failure;
+ break;
+ }
+ case 0xD1: { /* Grp2 1,Ev */
+ Bool decode_OK = True;
+ modrm = getUChar(delta);
+ am_sz = lengthAMode(delta);
+ d_sz = 0;
+ d32 = 1;
+ delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
+ mkU8(d32), NULL, &decode_OK );
+ if (!decode_OK)
+ goto decode_failure;
+ break;
+ }
+ case 0xD2: { /* Grp2 CL,Eb */
+ Bool decode_OK = True;
+ modrm = getUChar(delta);
+ am_sz = lengthAMode(delta);
+ d_sz = 0;
+ sz = 1;
+ delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
+ getIReg(1,R_ECX), "%cl", &decode_OK );
+ if (!decode_OK)
+ goto decode_failure;
+ break;
+ }
+ case 0xD3: { /* Grp2 CL,Ev */
+ Bool decode_OK = True;
+ modrm = getIByte(delta);
+ am_sz = lengthAMode(delta);
+ d_sz = 0;
+ delta = dis_Grp2 ( sorb, delta, modrm, am_sz, d_sz, sz,
+ getIReg(1,R_ECX), "%cl", &decode_OK );
+ if (!decode_OK)
+ goto decode_failure;
+ break;
+ }
+
+ /* ------------------------ (Grp3 extensions) ---------- */
+
+ case 0xF6: { /* Grp3 Eb */
+ Bool decode_OK = True;
+ delta = dis_Grp3 ( sorb, pfx_lock, 1, delta, &decode_OK );
+ if (!decode_OK)
+ goto decode_failure;
+ break;
+ }
+ case 0xF7: { /* Grp3 Ev */
+ Bool decode_OK = True;
+ delta = dis_Grp3 ( sorb, pfx_lock, sz, delta, &decode_OK );
+ if (!decode_OK)
+ goto decode_failure;
+ break;
+ }
+
+ /* ------------------------ (Grp4 extensions) ---------- */
+
+ case 0xFE: { /* Grp4 Eb */
+ Bool decode_OK = True;
+ delta = dis_Grp4 ( sorb, pfx_lock, delta, &decode_OK );
+ if (!decode_OK)
+ goto decode_failure;
+ break;
+ }
+
+ /* ------------------------ (Grp5 extensions) ---------- */
+
+ case 0xFF: { /* Grp5 Ev */
+ Bool decode_OK = True;
+ delta = dis_Grp5 ( sorb, pfx_lock, sz, delta, &dres, &decode_OK );
+ if (!decode_OK)
+ goto decode_failure;
+ break;
+ }
+
+ /* ------------------------ Escapes to 2-byte opcodes -- */
+
+ case 0x0F: {
+ opc = getIByte(delta); delta++;
+ switch (opc) {
+
+ /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */
+
+ case 0xBA: { /* Grp8 Ib,Ev */
+ Bool decode_OK = False;
+ modrm = getUChar(delta);
+ am_sz = lengthAMode(delta);
+ d32 = getSDisp8(delta + am_sz);
+ delta = dis_Grp8_Imm ( sorb, pfx_lock, delta, modrm,
+ am_sz, sz, d32, &decode_OK );
+ if (!decode_OK)
+ goto decode_failure;
+ break;
+ }
+
+ /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */
+
+ case 0xBC: /* BSF Gv,Ev */
+ delta = dis_bs_E_G ( sorb, sz, delta, True );
+ break;
+ case 0xBD: /* BSR Gv,Ev */
+ delta = dis_bs_E_G ( sorb, sz, delta, False );
+ break;
+
+ /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */
+
+ case 0xC8: /* BSWAP %eax */
+ case 0xC9:
+ case 0xCA:
+ case 0xCB:
+ case 0xCC:
+ case 0xCD:
+ case 0xCE:
+ case 0xCF: /* BSWAP %edi */
+ /* AFAICS from the Intel docs, this only exists at size 4. */
+ vassert(sz == 4);
+ t1 = newTemp(Ity_I32);
+ t2 = newTemp(Ity_I32);
+ assign( t1, getIReg(4, opc-0xC8) );
+
+ assign( t2,
+ binop(Iop_Or32,
+ binop(Iop_Shl32, mkexpr(t1), mkU8(24)),
+ binop(Iop_Or32,
+ binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)),
+ mkU32(0x00FF0000)),
+ binop(Iop_Or32,
+ binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)),
+ mkU32(0x0000FF00)),
+ binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)),
+ mkU32(0x000000FF) )
+ )))
+ );
+
+ putIReg(4, opc-0xC8, mkexpr(t2));
+ DIP("bswapl %s\n", nameIReg(4, opc-0xC8));
+ break;
+
+ /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */
+
+ case 0xA3: /* BT Gv,Ev */
+ delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpNone );
+ break;
+ case 0xB3: /* BTR Gv,Ev */
+ delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpReset );
+ break;
+ case 0xAB: /* BTS Gv,Ev */
+ delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpSet );
+ break;
+ case 0xBB: /* BTC Gv,Ev */
+ delta = dis_bt_G_E ( vbi, sorb, pfx_lock, sz, delta, BtOpComp );
+ break;
+
+ /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */
+
+ case 0x40:
+ case 0x41:
+ case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
+ case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
+ case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
+ case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
+ case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
+ case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
+ case 0x48: /* CMOVSb (cmov negative) */
+ case 0x49: /* CMOVSb (cmov not negative) */
+ case 0x4A: /* CMOVP (cmov parity even) */
+ case 0x4B: /* CMOVNP (cmov parity odd) */
+ case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
+ case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
+ case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
+ case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
+ delta = dis_cmov_E_G(sorb, sz, (X86Condcode)(opc - 0x40), delta);
+ break;
+
+ /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
+
+ case 0xB0: /* CMPXCHG Gb,Eb */
+ delta = dis_cmpxchg_G_E ( sorb, pfx_lock, 1, delta );
+ break;
+ case 0xB1: /* CMPXCHG Gv,Ev */
+ delta = dis_cmpxchg_G_E ( sorb, pfx_lock, sz, delta );
+ break;
+
+ case 0xC7: { /* CMPXCHG8B Gv (0F C7 /1) */
+ IRTemp expdHi = newTemp(Ity_I32);
+ IRTemp expdLo = newTemp(Ity_I32);
+ IRTemp dataHi = newTemp(Ity_I32);
+ IRTemp dataLo = newTemp(Ity_I32);
+ IRTemp oldHi = newTemp(Ity_I32);
+ IRTemp oldLo = newTemp(Ity_I32);
+ IRTemp flags_old = newTemp(Ity_I32);
+ IRTemp flags_new = newTemp(Ity_I32);
+ IRTemp success = newTemp(Ity_I1);
+
+ /* Translate this using a DCAS, even if there is no LOCK
+ prefix. Life is too short to bother with generating two
+ different translations for the with/without-LOCK-prefix
+ cases. */
+ *expect_CAS = True;
+
+ /* Decode, and generate address. */
+ if (sz != 4) goto decode_failure;
+ modrm = getIByte(delta);
+ if (epartIsReg(modrm)) goto decode_failure;
+ if (gregOfRM(modrm) != 1) goto decode_failure;
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ delta += alen;
+
+ /* Get the expected and new values. */
+ assign( expdHi, getIReg(4,R_EDX) );
+ assign( expdLo, getIReg(4,R_EAX) );
+ assign( dataHi, getIReg(4,R_ECX) );
+ assign( dataLo, getIReg(4,R_EBX) );
+
+ /* Do the DCAS */
+ stmt( IRStmt_CAS(
+ mkIRCAS( oldHi, oldLo,
+ Iend_LE, mkexpr(addr),
+ mkexpr(expdHi), mkexpr(expdLo),
+ mkexpr(dataHi), mkexpr(dataLo)
+ )));
+
+ /* success when oldHi:oldLo == expdHi:expdLo */
+ assign( success,
+ binop(Iop_CasCmpEQ32,
+ binop(Iop_Or32,
+ binop(Iop_Xor32, mkexpr(oldHi), mkexpr(expdHi)),
+ binop(Iop_Xor32, mkexpr(oldLo), mkexpr(expdLo))
+ ),
+ mkU32(0)
+ ));
+
+ /* If the DCAS is successful, that is to say oldHi:oldLo ==
+ expdHi:expdLo, then put expdHi:expdLo back in EDX:EAX,
+ which is where they came from originally. Both the actual
+ contents of these two regs, and any shadow values, are
+ unchanged. If the DCAS fails then we're putting into
+ EDX:EAX the value seen in memory. */
+ putIReg(4, R_EDX,
+ IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
+ mkexpr(oldHi),
+ mkexpr(expdHi)
+ ));
+ putIReg(4, R_EAX,
+ IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(success)),
+ mkexpr(oldLo),
+ mkexpr(expdLo)
+ ));
+
+ /* Copy the success bit into the Z flag and leave the others
+ unchanged */
+ assign( flags_old, widenUto32(mk_x86g_calculate_eflags_all()));
+ assign(
+ flags_new,
+ binop(Iop_Or32,
+ binop(Iop_And32, mkexpr(flags_old),
+ mkU32(~X86G_CC_MASK_Z)),
+ binop(Iop_Shl32,
+ binop(Iop_And32,
+ unop(Iop_1Uto32, mkexpr(success)), mkU32(1)),
+ mkU8(X86G_CC_SHIFT_Z)) ));
+
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU32(X86G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU32(0) ));
+ /* Set NDEP even though it isn't used. This makes
+ redundant-PUT elimination of previous stores to this field
+ work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+
+ /* Sheesh. Aren't you glad it was me and not you that had to
+ write and validate all this grunge? */
+
+ DIP("cmpxchg8b %s\n", dis_buf);
+ break;
+ }
+
+ /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */
+
+ case 0xA2: { /* CPUID */
+ /* Uses dirty helper:
+ void dirtyhelper_CPUID_sse[012] ( VexGuestX86State* )
+ declared to mod eax, wr ebx, ecx, edx
+ */
+ IRDirty* d = NULL;
+ HChar* fName = NULL;
+ void* fAddr = NULL;
+ if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE2) {
+ fName = "x86g_dirtyhelper_CPUID_sse2";
+ fAddr = &x86g_dirtyhelper_CPUID_sse2;
+ }
+ else
+ if (archinfo->hwcaps & VEX_HWCAPS_X86_SSE1) {
+ fName = "x86g_dirtyhelper_CPUID_sse1";
+ fAddr = &x86g_dirtyhelper_CPUID_sse1;
+ }
+ else
+ if (archinfo->hwcaps == 0/*no SSE*/) {
+ fName = "x86g_dirtyhelper_CPUID_sse0";
+ fAddr = &x86g_dirtyhelper_CPUID_sse0;
+ } else
+ vpanic("disInstr(x86)(cpuid)");
+
+ vassert(fName); vassert(fAddr);
+ d = unsafeIRDirty_0_N ( 0/*regparms*/,
+ fName, fAddr, mkIRExprVec_0() );
+ /* declare guest state effects */
+ d->needsBBP = True;
+ d->nFxState = 4;
+ d->fxState[0].fx = Ifx_Modify;
+ d->fxState[0].offset = OFFB_EAX;
+ d->fxState[0].size = 4;
+ d->fxState[1].fx = Ifx_Write;
+ d->fxState[1].offset = OFFB_EBX;
+ d->fxState[1].size = 4;
+ d->fxState[2].fx = Ifx_Modify;
+ d->fxState[2].offset = OFFB_ECX;
+ d->fxState[2].size = 4;
+ d->fxState[3].fx = Ifx_Write;
+ d->fxState[3].offset = OFFB_EDX;
+ d->fxState[3].size = 4;
+ /* execute the dirty call, side-effecting guest state */
+ stmt( IRStmt_Dirty(d) );
+ /* CPUID is a serialising insn. So, just in case someone is
+ using it as a memory fence ... */
+ stmt( IRStmt_MBE(Imbe_Fence) );
+ DIP("cpuid\n");
+ break;
+ }
+
+//-- if (!VG_(cpu_has_feature)(VG_X86_FEAT_CPUID))
+//-- goto decode_failure;
+//--
+//-- t1 = newTemp(cb);
+//-- t2 = newTemp(cb);
+//-- t3 = newTemp(cb);
+//-- t4 = newTemp(cb);
+//-- uInstr0(cb, CALLM_S, 0);
+//--
+//-- uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, t1);
+//-- uInstr1(cb, PUSH, 4, TempReg, t1);
+//--
+//-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2);
+//-- uLiteral(cb, 0);
+//-- uInstr1(cb, PUSH, 4, TempReg, t2);
+//--
+//-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t3);
+//-- uLiteral(cb, 0);
+//-- uInstr1(cb, PUSH, 4, TempReg, t3);
+//--
+//-- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t4);
+//-- uLiteral(cb, 0);
+//-- uInstr1(cb, PUSH, 4, TempReg, t4);
+//--
+//-- uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_CPUID));
+//-- uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty);
+//--
+//-- uInstr1(cb, POP, 4, TempReg, t4);
+//-- uInstr2(cb, PUT, 4, TempReg, t4, ArchReg, R_EDX);
+//--
+//-- uInstr1(cb, POP, 4, TempReg, t3);
+//-- uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_ECX);
+//--
+//-- uInstr1(cb, POP, 4, TempReg, t2);
+//-- uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBX);
+//--
+//-- uInstr1(cb, POP, 4, TempReg, t1);
+//-- uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_EAX);
+//--
+//-- uInstr0(cb, CALLM_E, 0);
+//-- DIP("cpuid\n");
+//-- break;
+//--
+ /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */
+
+ case 0xB6: /* MOVZXb Eb,Gv */
+ if (sz != 2 && sz != 4)
+ goto decode_failure;
+ delta = dis_movx_E_G ( sorb, delta, 1, sz, False );
+ break;
+
+ case 0xB7: /* MOVZXw Ew,Gv */
+ if (sz != 4)
+ goto decode_failure;
+ delta = dis_movx_E_G ( sorb, delta, 2, 4, False );
+ break;
+
+ case 0xBE: /* MOVSXb Eb,Gv */
+ if (sz != 2 && sz != 4)
+ goto decode_failure;
+ delta = dis_movx_E_G ( sorb, delta, 1, sz, True );
+ break;
+
+ case 0xBF: /* MOVSXw Ew,Gv */
+ if (sz != 4 && /* accept movsww, sigh, see #250799 */sz != 2)
+ goto decode_failure;
+ delta = dis_movx_E_G ( sorb, delta, 2, sz, True );
+ break;
+
+//-- /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */
+//--
+//-- case 0xC3: /* MOVNTI Gv,Ev */
+//-- vg_assert(sz == 4);
+//-- modrm = getUChar(eip);
+//-- vg_assert(!epartIsReg(modrm));
+//-- t1 = newTemp(cb);
+//-- uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1);
+//-- pair = disAMode ( cb, sorb, eip, dis_buf );
+//-- t2 = LOW24(pair);
+//-- eip += HI8(pair);
+//-- uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
+//-- DIP("movnti %s,%s\n", nameIReg(4,gregOfRM(modrm)), dis_buf);
+//-- break;
+
+ /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */
+
+ case 0xAF: /* IMUL Ev, Gv */
+ delta = dis_mul_E_G ( sorb, sz, delta );
+ break;
+
+ /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */
+
+ case 0x1F:
+ modrm = getUChar(delta);
+ if (epartIsReg(modrm)) goto decode_failure;
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ delta += alen;
+ DIP("nop%c %s\n", nameISize(sz), dis_buf);
+ break;
+
+ /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */
+ case 0x80:
+ case 0x81:
+ case 0x82: /* JBb/JNAEb (jump below) */
+ case 0x83: /* JNBb/JAEb (jump not below) */
+ case 0x84: /* JZb/JEb (jump zero) */
+ case 0x85: /* JNZb/JNEb (jump not zero) */
+ case 0x86: /* JBEb/JNAb (jump below or equal) */
+ case 0x87: /* JNBEb/JAb (jump not below or equal) */
+ case 0x88: /* JSb (jump negative) */
+ case 0x89: /* JSb (jump not negative) */
+ case 0x8A: /* JP (jump parity even) */
+ case 0x8B: /* JNP/JPO (jump parity odd) */
+ case 0x8C: /* JLb/JNGEb (jump less) */
+ case 0x8D: /* JGEb/JNLb (jump greater or equal) */
+ case 0x8E: /* JLEb/JNGb (jump less or equal) */
+ case 0x8F: /* JGb/JNLEb (jump greater) */
+ { Int jmpDelta;
+ HChar* comment = "";
+ jmpDelta = (Int)getUDisp32(delta);
+ d32 = (((Addr32)guest_EIP_bbstart)+delta+4) + jmpDelta;
+ delta += 4;
+ if (resteerCisOk
+ && vex_control.guest_chase_cond
+ && (Addr32)d32 != (Addr32)guest_EIP_bbstart
+ && jmpDelta < 0
+ && resteerOkFn( callback_opaque, (Addr64)(Addr32)d32) ) {
+ /* Speculation: assume this backward branch is taken. So
+ we need to emit a side-exit to the insn following this
+ one, on the negation of the condition, and continue at
+ the branch target address (d32). If we wind up back at
+ the first instruction of the trace, just stop; it's
+ better to let the IR loop unroller handle that case.*/
+ stmt( IRStmt_Exit(
+ mk_x86g_calculate_condition((X86Condcode)
+ (1 ^ (opc - 0x80))),
+ Ijk_Boring,
+ IRConst_U32(guest_EIP_bbstart+delta) ) );
+ dres.whatNext = Dis_ResteerC;
+ dres.continueAt = (Addr64)(Addr32)d32;
+ comment = "(assumed taken)";
+ }
+ else
+ if (resteerCisOk
+ && vex_control.guest_chase_cond
+ && (Addr32)d32 != (Addr32)guest_EIP_bbstart
+ && jmpDelta >= 0
+ && resteerOkFn( callback_opaque,
+ (Addr64)(Addr32)(guest_EIP_bbstart+delta)) ) {
+ /* Speculation: assume this forward branch is not taken.
+ So we need to emit a side-exit to d32 (the dest) and
+ continue disassembling at the insn immediately
+ following this one. */
+ stmt( IRStmt_Exit(
+ mk_x86g_calculate_condition((X86Condcode)(opc - 0x80)),
+ Ijk_Boring,
+ IRConst_U32(d32) ) );
+ dres.whatNext = Dis_ResteerC;
+ dres.continueAt = (Addr64)(Addr32)(guest_EIP_bbstart+delta);
+ comment = "(assumed not taken)";
+ }
+ else {
+ /* Conservative default translation - end the block at
+ this point. */
+ jcc_01( (X86Condcode)(opc - 0x80),
+ (Addr32)(guest_EIP_bbstart+delta), d32);
+ dres.whatNext = Dis_StopHere;
+ }
+ DIP("j%s-32 0x%x %s\n", name_X86Condcode(opc - 0x80), d32, comment);
+ break;
+ }
+
+ /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */
+ case 0x31: { /* RDTSC */
+ IRTemp val = newTemp(Ity_I64);
+ IRExpr** args = mkIRExprVec_0();
+ IRDirty* d = unsafeIRDirty_1_N (
+ val,
+ 0/*regparms*/,
+ "x86g_dirtyhelper_RDTSC",
+ &x86g_dirtyhelper_RDTSC,
+ args
+ );
+ /* execute the dirty call, dumping the result in val. */
+ stmt( IRStmt_Dirty(d) );
+ putIReg(4, R_EDX, unop(Iop_64HIto32, mkexpr(val)));
+ putIReg(4, R_EAX, unop(Iop_64to32, mkexpr(val)));
+ DIP("rdtsc\n");
+ break;
+ }
+
+ /* =-=-=-=-=-=-=-=-=- PUSH/POP Sreg =-=-=-=-=-=-=-=-=-= */
+
+ case 0xA1: /* POP %FS */
+ dis_pop_segreg( R_FS, sz ); break;
+ case 0xA9: /* POP %GS */
+ dis_pop_segreg( R_GS, sz ); break;
+
+ case 0xA0: /* PUSH %FS */
+ dis_push_segreg( R_FS, sz ); break;
+ case 0xA8: /* PUSH %GS */
+ dis_push_segreg( R_GS, sz ); break;
+
+ /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */
+ case 0x90:
+ case 0x91:
+ case 0x92: /* set-Bb/set-NAEb (jump below) */
+ case 0x93: /* set-NBb/set-AEb (jump not below) */
+ case 0x94: /* set-Zb/set-Eb (jump zero) */
+ case 0x95: /* set-NZb/set-NEb (jump not zero) */
+ case 0x96: /* set-BEb/set-NAb (jump below or equal) */
+ case 0x97: /* set-NBEb/set-Ab (jump not below or equal) */
+ case 0x98: /* set-Sb (jump negative) */
+ case 0x99: /* set-Sb (jump not negative) */
+ case 0x9A: /* set-P (jump parity even) */
+ case 0x9B: /* set-NP (jump parity odd) */
+ case 0x9C: /* set-Lb/set-NGEb (jump less) */
+ case 0x9D: /* set-GEb/set-NLb (jump greater or equal) */
+ case 0x9E: /* set-LEb/set-NGb (jump less or equal) */
+ case 0x9F: /* set-Gb/set-NLEb (jump greater) */
+ t1 = newTemp(Ity_I8);
+ assign( t1, unop(Iop_1Uto8,mk_x86g_calculate_condition(opc-0x90)) );
+ modrm = getIByte(delta);
+ if (epartIsReg(modrm)) {
+ delta++;
+ putIReg(1, eregOfRM(modrm), mkexpr(t1));
+ DIP("set%s %s\n", name_X86Condcode(opc-0x90),
+ nameIReg(1,eregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ delta += alen;
+ storeLE( mkexpr(addr), mkexpr(t1) );
+ DIP("set%s %s\n", name_X86Condcode(opc-0x90), dis_buf);
+ }
+ break;
+
+ /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */
+
+ case 0xA4: /* SHLDv imm8,Gv,Ev */
+ modrm = getIByte(delta);
+ d32 = delta + lengthAMode(delta);
+ vex_sprintf(dis_buf, "$%d", getIByte(d32));
+ delta = dis_SHLRD_Gv_Ev (
+ sorb, delta, modrm, sz,
+ mkU8(getIByte(d32)), True, /* literal */
+ dis_buf, True );
+ break;
+ case 0xA5: /* SHLDv %cl,Gv,Ev */
+ modrm = getIByte(delta);
+ delta = dis_SHLRD_Gv_Ev (
+ sorb, delta, modrm, sz,
+ getIReg(1,R_ECX), False, /* not literal */
+ "%cl", True );
+ break;
+
+ case 0xAC: /* SHRDv imm8,Gv,Ev */
+ modrm = getIByte(delta);
+ d32 = delta + lengthAMode(delta);
+ vex_sprintf(dis_buf, "$%d", getIByte(d32));
+ delta = dis_SHLRD_Gv_Ev (
+ sorb, delta, modrm, sz,
+ mkU8(getIByte(d32)), True, /* literal */
+ dis_buf, False );
+ break;
+ case 0xAD: /* SHRDv %cl,Gv,Ev */
+ modrm = getIByte(delta);
+ delta = dis_SHLRD_Gv_Ev (
+ sorb, delta, modrm, sz,
+ getIReg(1,R_ECX), False, /* not literal */
+ "%cl", False );
+ break;
+
+ /* =-=-=-=-=-=-=-=-=- SYSENTER -=-=-=-=-=-=-=-=-=-= */
+
+ case 0x34:
+ /* Simple implementation needing a long explaination.
+
+ sysenter is a kind of syscall entry. The key thing here
+ is that the return address is not known -- that is
+ something that is beyond Vex's knowledge. So this IR
+ forces a return to the scheduler, which can do what it
+ likes to simulate the systenter, but it MUST set this
+ thread's guest_EIP field with the continuation address
+ before resuming execution. If that doesn't happen, the
+ thread will jump to address zero, which is probably
+ fatal.
+ */
+
+ /* Note where we are, so we can back up the guest to this
+ point if the syscall needs to be restarted. */
+ stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
+ mkU32(guest_EIP_curr_instr) ) );
+ jmp_lit(Ijk_Sys_sysenter, 0/*bogus next EIP value*/);
+ dres.whatNext = Dis_StopHere;
+ DIP("sysenter");
+ break;
+
+ /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */
+
+ case 0xC0: { /* XADD Gb,Eb */
+ Bool decodeOK;
+ delta = dis_xadd_G_E ( sorb, pfx_lock, 1, delta, &decodeOK );
+ if (!decodeOK) goto decode_failure;
+ break;
+ }
+ case 0xC1: { /* XADD Gv,Ev */
+ Bool decodeOK;
+ delta = dis_xadd_G_E ( sorb, pfx_lock, sz, delta, &decodeOK );
+ if (!decodeOK) goto decode_failure;
+ break;
+ }
+
+ /* =-=-=-=-=-=-=-=-=- MMXery =-=-=-=-=-=-=-=-=-=-= */
+
+ case 0x71:
+ case 0x72:
+ case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
+
+ case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
+ case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
+ case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
+ case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xFC:
+ case 0xFD:
+ case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xEC:
+ case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xDC:
+ case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xF8:
+ case 0xF9:
+ case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xE8:
+ case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xD8:
+ case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0x74:
+ case 0x75:
+ case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0x64:
+ case 0x65:
+ case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0x68:
+ case 0x69:
+ case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0x60:
+ case 0x61:
+ case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xF2:
+ case 0xF3:
+
+ case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xD2:
+ case 0xD3:
+
+ case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xE2:
+ {
+ Int delta0 = delta-1;
+ Bool decode_OK = False;
+
+ /* If sz==2 this is SSE, and we assume sse idec has
+ already spotted those cases by now. */
+ if (sz != 4)
+ goto decode_failure;
+
+ delta = dis_MMX ( &decode_OK, sorb, sz, delta-1 );
+ if (!decode_OK) {
+ delta = delta0;
+ goto decode_failure;
+ }
+ break;
+ }
+
+ case 0x77: /* EMMS */
+ if (sz != 4)
+ goto decode_failure;
+ do_EMMS_preamble();
+ DIP("emms\n");
+ break;
+
+ /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */
+ case 0x01: /* 0F 01 /0 -- SGDT */
+ /* 0F 01 /1 -- SIDT */
+ {
+ /* This is really revolting, but ... since each processor
+ (core) only has one IDT and one GDT, just let the guest
+ see it (pass-through semantics). I can't see any way to
+ construct a faked-up value, so don't bother to try. */
+ modrm = getUChar(delta);
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ delta += alen;
+ if (epartIsReg(modrm)) goto decode_failure;
+ if (gregOfRM(modrm) != 0 && gregOfRM(modrm) != 1)
+ goto decode_failure;
+ switch (gregOfRM(modrm)) {
+ case 0: DIP("sgdt %s\n", dis_buf); break;
+ case 1: DIP("sidt %s\n", dis_buf); break;
+ default: vassert(0); /*NOTREACHED*/
+ }
+
+ IRDirty* d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "x86g_dirtyhelper_SxDT",
+ &x86g_dirtyhelper_SxDT,
+ mkIRExprVec_2( mkexpr(addr),
+ mkU32(gregOfRM(modrm)) )
+ );
+ /* declare we're writing memory */
+ d->mFx = Ifx_Write;
+ d->mAddr = mkexpr(addr);
+ d->mSize = 6;
+ stmt( IRStmt_Dirty(d) );
+ break;
+ }
+
+ /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */
+
+ default:
+ goto decode_failure;
+ } /* switch (opc) for the 2-byte opcodes */
+ goto decode_success;
+ } /* case 0x0F: of primary opcode */
+
+ /* ------------------------ ??? ------------------------ */
+
+ default:
+ decode_failure:
+ /* All decode failures end up here. */
+ vex_printf("vex x86->IR: unhandled instruction bytes: "
+ "0x%x 0x%x 0x%x 0x%x\n",
+ (Int)getIByte(delta_start+0),
+ (Int)getIByte(delta_start+1),
+ (Int)getIByte(delta_start+2),
+ (Int)getIByte(delta_start+3) );
+
+ /* Tell the dispatcher that this insn cannot be decoded, and so has
+ not been executed, and (is currently) the next to be executed.
+ EIP should be up-to-date since it made so at the start of each
+ insn, but nevertheless be paranoid and update it again right
+ now. */
+ stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr) ) );
+ jmp_lit(Ijk_NoDecode, guest_EIP_curr_instr);
+ dres.whatNext = Dis_StopHere;
+ dres.len = 0;
+ /* We also need to say that a CAS is not expected now, regardless
+ of what it might have been set to at the start of the function,
+ since the IR that we've emitted just above (to synthesis a
+ SIGILL) does not involve any CAS, and presumably no other IR has
+ been emitted for this (non-decoded) insn. */
+ *expect_CAS = False;
+ return dres;
+
+ } /* switch (opc) for the main (primary) opcode switch. */
+
+ decode_success:
+ /* All decode successes end up here. */
+ DIP("\n");
+ dres.len = delta - delta_start;
+ return dres;
+}
+
+#undef DIP
+#undef DIS
+
+
+/*------------------------------------------------------------*/
+/*--- Top-level fn ---*/
+/*------------------------------------------------------------*/
+
+/* Disassemble a single instruction into IR. The instruction
+ is located in host memory at &guest_code[delta]. */
+
+DisResult disInstr_X86 ( IRSB* irsb_IN,
+ Bool put_IP,
+ Bool (*resteerOkFn) ( void*, Addr64 ),
+ Bool resteerCisOk,
+ void* callback_opaque,
+ UChar* guest_code_IN,
+ Long delta,
+ Addr64 guest_IP,
+ VexArch guest_arch,
+ VexArchInfo* archinfo,
+ VexAbiInfo* abiinfo,
+ Bool host_bigendian_IN )
+{
+ Int i, x1, x2;
+ Bool expect_CAS, has_CAS;
+ DisResult dres;
+
+ /* Set globals (see top of this file) */
+ vassert(guest_arch == VexArchX86);
+ guest_code = guest_code_IN;
+ irsb = irsb_IN;
+ host_is_bigendian = host_bigendian_IN;
+ guest_EIP_curr_instr = (Addr32)guest_IP;
+ guest_EIP_bbstart = (Addr32)toUInt(guest_IP - delta);
+
+ x1 = irsb_IN->stmts_used;
+ expect_CAS = False;
+ dres = disInstr_X86_WRK ( &expect_CAS, put_IP, resteerOkFn,
+ resteerCisOk,
+ callback_opaque,
+ delta, archinfo, abiinfo );
+ x2 = irsb_IN->stmts_used;
+ vassert(x2 >= x1);
+
+ /* See comment at the top of disInstr_X86_WRK for meaning of
+ expect_CAS. Here, we (sanity-)check for the presence/absence of
+ IRCAS as directed by the returned expect_CAS value. */
+ has_CAS = False;
+ for (i = x1; i < x2; i++) {
+ if (irsb_IN->stmts[i]->tag == Ist_CAS)
+ has_CAS = True;
+ }
+
+ if (expect_CAS != has_CAS) {
+ /* inconsistency detected. re-disassemble the instruction so as
+ to generate a useful error message; then assert. */
+ vex_traceflags |= VEX_TRACE_FE;
+ dres = disInstr_X86_WRK ( &expect_CAS, put_IP, resteerOkFn,
+ resteerCisOk,
+ callback_opaque,
+ delta, archinfo, abiinfo );
+ for (i = x1; i < x2; i++) {
+ vex_printf("\t\t");
+ ppIRStmt(irsb_IN->stmts[i]);
+ vex_printf("\n");
+ }
+ /* Failure of this assertion is serious and denotes a bug in
+ disInstr. */
+ vpanic("disInstr_X86: inconsistency in LOCK prefix handling");
+ }
+
+ return dres;
+}
+
+
+/*--------------------------------------------------------------------*/
+/*--- end guest_x86_toIR.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c
new file mode 100644
index 0000000..80bb6d8
--- /dev/null
+++ b/VEX/priv/host_amd64_defs.c
@@ -0,0 +1,3576 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin host_amd64_defs.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex.h"
+#include "libvex_trc_values.h"
+
+#include "main_util.h"
+#include "host_generic_regs.h"
+#include "host_amd64_defs.h"
+
+
+/* --------- Registers. --------- */
+
+void ppHRegAMD64 ( HReg reg )
+{
+ Int r;
+ static HChar* ireg64_names[16]
+ = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
+ "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
+ /* Be generic for all virtual regs. */
+ if (hregIsVirtual(reg)) {
+ ppHReg(reg);
+ return;
+ }
+ /* But specific for real regs. */
+ switch (hregClass(reg)) {
+ case HRcInt64:
+ r = hregNumber(reg);
+ vassert(r >= 0 && r < 16);
+ vex_printf("%s", ireg64_names[r]);
+ return;
+ case HRcFlt64:
+ r = hregNumber(reg);
+ vassert(r >= 0 && r < 6);
+ vex_printf("%%fake%d", r);
+ return;
+ case HRcVec128:
+ r = hregNumber(reg);
+ vassert(r >= 0 && r < 16);
+ vex_printf("%%xmm%d", r);
+ return;
+ default:
+ vpanic("ppHRegAMD64");
+ }
+}
+
+static void ppHRegAMD64_lo32 ( HReg reg )
+{
+ Int r;
+ static HChar* ireg32_names[16]
+ = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
+ "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" };
+ /* Be generic for all virtual regs. */
+ if (hregIsVirtual(reg)) {
+ ppHReg(reg);
+ vex_printf("d");
+ return;
+ }
+ /* But specific for real regs. */
+ switch (hregClass(reg)) {
+ case HRcInt64:
+ r = hregNumber(reg);
+ vassert(r >= 0 && r < 16);
+ vex_printf("%s", ireg32_names[r]);
+ return;
+ default:
+ vpanic("ppHRegAMD64_lo32: invalid regclass");
+ }
+}
+
+HReg hregAMD64_RAX ( void ) { return mkHReg( 0, HRcInt64, False); }
+HReg hregAMD64_RCX ( void ) { return mkHReg( 1, HRcInt64, False); }
+HReg hregAMD64_RDX ( void ) { return mkHReg( 2, HRcInt64, False); }
+HReg hregAMD64_RBX ( void ) { return mkHReg( 3, HRcInt64, False); }
+HReg hregAMD64_RSP ( void ) { return mkHReg( 4, HRcInt64, False); }
+HReg hregAMD64_RBP ( void ) { return mkHReg( 5, HRcInt64, False); }
+HReg hregAMD64_RSI ( void ) { return mkHReg( 6, HRcInt64, False); }
+HReg hregAMD64_RDI ( void ) { return mkHReg( 7, HRcInt64, False); }
+HReg hregAMD64_R8 ( void ) { return mkHReg( 8, HRcInt64, False); }
+HReg hregAMD64_R9 ( void ) { return mkHReg( 9, HRcInt64, False); }
+HReg hregAMD64_R10 ( void ) { return mkHReg(10, HRcInt64, False); }
+HReg hregAMD64_R11 ( void ) { return mkHReg(11, HRcInt64, False); }
+HReg hregAMD64_R12 ( void ) { return mkHReg(12, HRcInt64, False); }
+HReg hregAMD64_R13 ( void ) { return mkHReg(13, HRcInt64, False); }
+HReg hregAMD64_R14 ( void ) { return mkHReg(14, HRcInt64, False); }
+HReg hregAMD64_R15 ( void ) { return mkHReg(15, HRcInt64, False); }
+
+//.. HReg hregAMD64_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); }
+//.. HReg hregAMD64_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); }
+//.. HReg hregAMD64_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); }
+//.. HReg hregAMD64_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); }
+//.. HReg hregAMD64_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); }
+//.. HReg hregAMD64_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); }
+//..
+HReg hregAMD64_XMM0 ( void ) { return mkHReg( 0, HRcVec128, False); }
+HReg hregAMD64_XMM1 ( void ) { return mkHReg( 1, HRcVec128, False); }
+HReg hregAMD64_XMM2 ( void ) { return mkHReg( 2, HRcVec128, False); }
+HReg hregAMD64_XMM3 ( void ) { return mkHReg( 3, HRcVec128, False); }
+HReg hregAMD64_XMM4 ( void ) { return mkHReg( 4, HRcVec128, False); }
+HReg hregAMD64_XMM5 ( void ) { return mkHReg( 5, HRcVec128, False); }
+HReg hregAMD64_XMM6 ( void ) { return mkHReg( 6, HRcVec128, False); }
+HReg hregAMD64_XMM7 ( void ) { return mkHReg( 7, HRcVec128, False); }
+HReg hregAMD64_XMM8 ( void ) { return mkHReg( 8, HRcVec128, False); }
+HReg hregAMD64_XMM9 ( void ) { return mkHReg( 9, HRcVec128, False); }
+HReg hregAMD64_XMM10 ( void ) { return mkHReg(10, HRcVec128, False); }
+HReg hregAMD64_XMM11 ( void ) { return mkHReg(11, HRcVec128, False); }
+HReg hregAMD64_XMM12 ( void ) { return mkHReg(12, HRcVec128, False); }
+HReg hregAMD64_XMM13 ( void ) { return mkHReg(13, HRcVec128, False); }
+HReg hregAMD64_XMM14 ( void ) { return mkHReg(14, HRcVec128, False); }
+HReg hregAMD64_XMM15 ( void ) { return mkHReg(15, HRcVec128, False); }
+
+
+void getAllocableRegs_AMD64 ( Int* nregs, HReg** arr )
+{
+#if 0
+ *nregs = 6;
+ *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
+ (*arr)[ 0] = hregAMD64_RSI();
+ (*arr)[ 1] = hregAMD64_RDI();
+ (*arr)[ 2] = hregAMD64_RBX();
+
+ (*arr)[ 3] = hregAMD64_XMM7();
+ (*arr)[ 4] = hregAMD64_XMM8();
+ (*arr)[ 5] = hregAMD64_XMM9();
+#endif
+#if 1
+ *nregs = 20;
+ *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
+ (*arr)[ 0] = hregAMD64_RSI();
+ (*arr)[ 1] = hregAMD64_RDI();
+ (*arr)[ 2] = hregAMD64_R8();
+ (*arr)[ 3] = hregAMD64_R9();
+ (*arr)[ 4] = hregAMD64_R12();
+ (*arr)[ 5] = hregAMD64_R13();
+ (*arr)[ 6] = hregAMD64_R14();
+ (*arr)[ 7] = hregAMD64_R15();
+ (*arr)[ 8] = hregAMD64_RBX();
+
+ (*arr)[ 9] = hregAMD64_XMM3();
+ (*arr)[10] = hregAMD64_XMM4();
+ (*arr)[11] = hregAMD64_XMM5();
+ (*arr)[12] = hregAMD64_XMM6();
+ (*arr)[13] = hregAMD64_XMM7();
+ (*arr)[14] = hregAMD64_XMM8();
+ (*arr)[15] = hregAMD64_XMM9();
+ (*arr)[16] = hregAMD64_XMM10();
+ (*arr)[17] = hregAMD64_XMM11();
+ (*arr)[18] = hregAMD64_XMM12();
+ (*arr)[19] = hregAMD64_R10();
+#endif
+}
+
+
+/* --------- Condition codes, Intel encoding. --------- */
+
+HChar* showAMD64CondCode ( AMD64CondCode cond )
+{
+ switch (cond) {
+ case Acc_O: return "o";
+ case Acc_NO: return "no";
+ case Acc_B: return "b";
+ case Acc_NB: return "nb";
+ case Acc_Z: return "z";
+ case Acc_NZ: return "nz";
+ case Acc_BE: return "be";
+ case Acc_NBE: return "nbe";
+ case Acc_S: return "s";
+ case Acc_NS: return "ns";
+ case Acc_P: return "p";
+ case Acc_NP: return "np";
+ case Acc_L: return "l";
+ case Acc_NL: return "nl";
+ case Acc_LE: return "le";
+ case Acc_NLE: return "nle";
+ case Acc_ALWAYS: return "ALWAYS";
+ default: vpanic("ppAMD64CondCode");
+ }
+}
+
+
+/* --------- AMD64AMode: memory address expressions. --------- */
+
+AMD64AMode* AMD64AMode_IR ( UInt imm32, HReg reg ) {
+ AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode));
+ am->tag = Aam_IR;
+ am->Aam.IR.imm = imm32;
+ am->Aam.IR.reg = reg;
+ return am;
+}
+AMD64AMode* AMD64AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
+ AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode));
+ am->tag = Aam_IRRS;
+ am->Aam.IRRS.imm = imm32;
+ am->Aam.IRRS.base = base;
+ am->Aam.IRRS.index = indEx;
+ am->Aam.IRRS.shift = shift;
+ vassert(shift >= 0 && shift <= 3);
+ return am;
+}
+
+//.. AMD64AMode* dopyAMD64AMode ( AMD64AMode* am ) {
+//.. switch (am->tag) {
+//.. case Xam_IR:
+//.. return AMD64AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg );
+//.. case Xam_IRRS:
+//.. return AMD64AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base,
+//.. am->Xam.IRRS.index, am->Xam.IRRS.shift );
+//.. default:
+//.. vpanic("dopyAMD64AMode");
+//.. }
+//.. }
+
+void ppAMD64AMode ( AMD64AMode* am ) {
+ switch (am->tag) {
+ case Aam_IR:
+ if (am->Aam.IR.imm == 0)
+ vex_printf("(");
+ else
+ vex_printf("0x%x(", am->Aam.IR.imm);
+ ppHRegAMD64(am->Aam.IR.reg);
+ vex_printf(")");
+ return;
+ case Aam_IRRS:
+ vex_printf("0x%x(", am->Aam.IRRS.imm);
+ ppHRegAMD64(am->Aam.IRRS.base);
+ vex_printf(",");
+ ppHRegAMD64(am->Aam.IRRS.index);
+ vex_printf(",%d)", 1 << am->Aam.IRRS.shift);
+ return;
+ default:
+ vpanic("ppAMD64AMode");
+ }
+}
+
+static void addRegUsage_AMD64AMode ( HRegUsage* u, AMD64AMode* am ) {
+ switch (am->tag) {
+ case Aam_IR:
+ addHRegUse(u, HRmRead, am->Aam.IR.reg);
+ return;
+ case Aam_IRRS:
+ addHRegUse(u, HRmRead, am->Aam.IRRS.base);
+ addHRegUse(u, HRmRead, am->Aam.IRRS.index);
+ return;
+ default:
+ vpanic("addRegUsage_AMD64AMode");
+ }
+}
+
+static void mapRegs_AMD64AMode ( HRegRemap* m, AMD64AMode* am ) {
+ switch (am->tag) {
+ case Aam_IR:
+ am->Aam.IR.reg = lookupHRegRemap(m, am->Aam.IR.reg);
+ return;
+ case Aam_IRRS:
+ am->Aam.IRRS.base = lookupHRegRemap(m, am->Aam.IRRS.base);
+ am->Aam.IRRS.index = lookupHRegRemap(m, am->Aam.IRRS.index);
+ return;
+ default:
+ vpanic("mapRegs_AMD64AMode");
+ }
+}
+
+/* --------- Operand, which can be reg, immediate or memory. --------- */
+
+AMD64RMI* AMD64RMI_Imm ( UInt imm32 ) {
+ AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI));
+ op->tag = Armi_Imm;
+ op->Armi.Imm.imm32 = imm32;
+ return op;
+}
+AMD64RMI* AMD64RMI_Reg ( HReg reg ) {
+ AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI));
+ op->tag = Armi_Reg;
+ op->Armi.Reg.reg = reg;
+ return op;
+}
+AMD64RMI* AMD64RMI_Mem ( AMD64AMode* am ) {
+ AMD64RMI* op = LibVEX_Alloc(sizeof(AMD64RMI));
+ op->tag = Armi_Mem;
+ op->Armi.Mem.am = am;
+ return op;
+}
+
+void ppAMD64RMI ( AMD64RMI* op ) {
+ switch (op->tag) {
+ case Armi_Imm:
+ vex_printf("$0x%x", op->Armi.Imm.imm32);
+ return;
+ case Armi_Reg:
+ ppHRegAMD64(op->Armi.Reg.reg);
+ return;
+ case Armi_Mem:
+ ppAMD64AMode(op->Armi.Mem.am);
+ return;
+ default:
+ vpanic("ppAMD64RMI");
+ }
+}
+
+/* An AMD64RMI can only be used in a "read" context (what would it mean
+ to write or modify a literal?) and so we enumerate its registers
+ accordingly. */
+static void addRegUsage_AMD64RMI ( HRegUsage* u, AMD64RMI* op ) {
+ switch (op->tag) {
+ case Armi_Imm:
+ return;
+ case Armi_Reg:
+ addHRegUse(u, HRmRead, op->Armi.Reg.reg);
+ return;
+ case Armi_Mem:
+ addRegUsage_AMD64AMode(u, op->Armi.Mem.am);
+ return;
+ default:
+ vpanic("addRegUsage_AMD64RMI");
+ }
+}
+
+static void mapRegs_AMD64RMI ( HRegRemap* m, AMD64RMI* op ) {
+ switch (op->tag) {
+ case Armi_Imm:
+ return;
+ case Armi_Reg:
+ op->Armi.Reg.reg = lookupHRegRemap(m, op->Armi.Reg.reg);
+ return;
+ case Armi_Mem:
+ mapRegs_AMD64AMode(m, op->Armi.Mem.am);
+ return;
+ default:
+ vpanic("mapRegs_AMD64RMI");
+ }
+}
+
+
+/* --------- Operand, which can be reg or immediate only. --------- */
+
+AMD64RI* AMD64RI_Imm ( UInt imm32 ) {
+ AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI));
+ op->tag = Ari_Imm;
+ op->Ari.Imm.imm32 = imm32;
+ return op;
+}
+AMD64RI* AMD64RI_Reg ( HReg reg ) {
+ AMD64RI* op = LibVEX_Alloc(sizeof(AMD64RI));
+ op->tag = Ari_Reg;
+ op->Ari.Reg.reg = reg;
+ return op;
+}
+
+void ppAMD64RI ( AMD64RI* op ) {
+ switch (op->tag) {
+ case Ari_Imm:
+ vex_printf("$0x%x", op->Ari.Imm.imm32);
+ return;
+ case Ari_Reg:
+ ppHRegAMD64(op->Ari.Reg.reg);
+ return;
+ default:
+ vpanic("ppAMD64RI");
+ }
+}
+
+/* An AMD64RI can only be used in a "read" context (what would it mean
+ to write or modify a literal?) and so we enumerate its registers
+ accordingly. */
+static void addRegUsage_AMD64RI ( HRegUsage* u, AMD64RI* op ) {
+ switch (op->tag) {
+ case Ari_Imm:
+ return;
+ case Ari_Reg:
+ addHRegUse(u, HRmRead, op->Ari.Reg.reg);
+ return;
+ default:
+ vpanic("addRegUsage_AMD64RI");
+ }
+}
+
+static void mapRegs_AMD64RI ( HRegRemap* m, AMD64RI* op ) {
+ switch (op->tag) {
+ case Ari_Imm:
+ return;
+ case Ari_Reg:
+ op->Ari.Reg.reg = lookupHRegRemap(m, op->Ari.Reg.reg);
+ return;
+ default:
+ vpanic("mapRegs_AMD64RI");
+ }
+}
+
+
+/* --------- Operand, which can be reg or memory only. --------- */
+
+AMD64RM* AMD64RM_Reg ( HReg reg ) {
+ AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM));
+ op->tag = Arm_Reg;
+ op->Arm.Reg.reg = reg;
+ return op;
+}
+AMD64RM* AMD64RM_Mem ( AMD64AMode* am ) {
+ AMD64RM* op = LibVEX_Alloc(sizeof(AMD64RM));
+ op->tag = Arm_Mem;
+ op->Arm.Mem.am = am;
+ return op;
+}
+
+void ppAMD64RM ( AMD64RM* op ) {
+ switch (op->tag) {
+ case Arm_Mem:
+ ppAMD64AMode(op->Arm.Mem.am);
+ return;
+ case Arm_Reg:
+ ppHRegAMD64(op->Arm.Reg.reg);
+ return;
+ default:
+ vpanic("ppAMD64RM");
+ }
+}
+
+/* Because an AMD64RM can be both a source or destination operand, we
+ have to supply a mode -- pertaining to the operand as a whole --
+ indicating how it's being used. */
+static void addRegUsage_AMD64RM ( HRegUsage* u, AMD64RM* op, HRegMode mode ) {
+ switch (op->tag) {
+ case Arm_Mem:
+ /* Memory is read, written or modified. So we just want to
+ know the regs read by the amode. */
+ addRegUsage_AMD64AMode(u, op->Arm.Mem.am);
+ return;
+ case Arm_Reg:
+ /* reg is read, written or modified. Add it in the
+ appropriate way. */
+ addHRegUse(u, mode, op->Arm.Reg.reg);
+ return;
+ default:
+ vpanic("addRegUsage_AMD64RM");
+ }
+}
+
+static void mapRegs_AMD64RM ( HRegRemap* m, AMD64RM* op )
+{
+ switch (op->tag) {
+ case Arm_Mem:
+ mapRegs_AMD64AMode(m, op->Arm.Mem.am);
+ return;
+ case Arm_Reg:
+ op->Arm.Reg.reg = lookupHRegRemap(m, op->Arm.Reg.reg);
+ return;
+ default:
+ vpanic("mapRegs_AMD64RM");
+ }
+}
+
+
+/* --------- Instructions. --------- */
+
+static HChar* showAMD64ScalarSz ( Int sz ) {
+ switch (sz) {
+ case 2: return "w";
+ case 4: return "l";
+ case 8: return "q";
+ default: vpanic("showAMD64ScalarSz");
+ }
+}
+
+HChar* showAMD64UnaryOp ( AMD64UnaryOp op ) {
+ switch (op) {
+ case Aun_NOT: return "not";
+ case Aun_NEG: return "neg";
+ default: vpanic("showAMD64UnaryOp");
+ }
+}
+
+HChar* showAMD64AluOp ( AMD64AluOp op ) {
+ switch (op) {
+ case Aalu_MOV: return "mov";
+ case Aalu_CMP: return "cmp";
+ case Aalu_ADD: return "add";
+ case Aalu_SUB: return "sub";
+ case Aalu_ADC: return "adc";
+ case Aalu_SBB: return "sbb";
+ case Aalu_AND: return "and";
+ case Aalu_OR: return "or";
+ case Aalu_XOR: return "xor";
+ case Aalu_MUL: return "imul";
+ default: vpanic("showAMD64AluOp");
+ }
+}
+
+HChar* showAMD64ShiftOp ( AMD64ShiftOp op ) {
+ switch (op) {
+ case Ash_SHL: return "shl";
+ case Ash_SHR: return "shr";
+ case Ash_SAR: return "sar";
+ default: vpanic("showAMD64ShiftOp");
+ }
+}
+
+HChar* showA87FpOp ( A87FpOp op ) {
+ switch (op) {
+//.. case Xfp_ADD: return "add";
+//.. case Xfp_SUB: return "sub";
+//.. case Xfp_MUL: return "mul";
+//.. case Xfp_DIV: return "div";
+ case Afp_SCALE: return "scale";
+ case Afp_ATAN: return "atan";
+ case Afp_YL2X: return "yl2x";
+ case Afp_YL2XP1: return "yl2xp1";
+ case Afp_PREM: return "prem";
+ case Afp_PREM1: return "prem1";
+ case Afp_SQRT: return "sqrt";
+//.. case Xfp_ABS: return "abs";
+//.. case Xfp_NEG: return "chs";
+//.. case Xfp_MOV: return "mov";
+ case Afp_SIN: return "sin";
+ case Afp_COS: return "cos";
+ case Afp_TAN: return "tan";
+ case Afp_ROUND: return "round";
+ case Afp_2XM1: return "2xm1";
+ default: vpanic("showA87FpOp");
+ }
+}
+
+HChar* showAMD64SseOp ( AMD64SseOp op ) {
+ switch (op) {
+ case Asse_MOV: return "movups";
+ case Asse_ADDF: return "add";
+ case Asse_SUBF: return "sub";
+ case Asse_MULF: return "mul";
+ case Asse_DIVF: return "div";
+ case Asse_MAXF: return "max";
+ case Asse_MINF: return "min";
+ case Asse_CMPEQF: return "cmpFeq";
+ case Asse_CMPLTF: return "cmpFlt";
+ case Asse_CMPLEF: return "cmpFle";
+ case Asse_CMPUNF: return "cmpFun";
+ case Asse_RCPF: return "rcp";
+ case Asse_RSQRTF: return "rsqrt";
+ case Asse_SQRTF: return "sqrt";
+ case Asse_AND: return "and";
+ case Asse_OR: return "or";
+ case Asse_XOR: return "xor";
+ case Asse_ANDN: return "andn";
+ case Asse_ADD8: return "paddb";
+ case Asse_ADD16: return "paddw";
+ case Asse_ADD32: return "paddd";
+ case Asse_ADD64: return "paddq";
+ case Asse_QADD8U: return "paddusb";
+ case Asse_QADD16U: return "paddusw";
+ case Asse_QADD8S: return "paddsb";
+ case Asse_QADD16S: return "paddsw";
+ case Asse_SUB8: return "psubb";
+ case Asse_SUB16: return "psubw";
+ case Asse_SUB32: return "psubd";
+ case Asse_SUB64: return "psubq";
+ case Asse_QSUB8U: return "psubusb";
+ case Asse_QSUB16U: return "psubusw";
+ case Asse_QSUB8S: return "psubsb";
+ case Asse_QSUB16S: return "psubsw";
+ case Asse_MUL16: return "pmullw";
+ case Asse_MULHI16U: return "pmulhuw";
+ case Asse_MULHI16S: return "pmulhw";
+ case Asse_AVG8U: return "pavgb";
+ case Asse_AVG16U: return "pavgw";
+ case Asse_MAX16S: return "pmaxw";
+ case Asse_MAX8U: return "pmaxub";
+ case Asse_MIN16S: return "pminw";
+ case Asse_MIN8U: return "pminub";
+ case Asse_CMPEQ8: return "pcmpeqb";
+ case Asse_CMPEQ16: return "pcmpeqw";
+ case Asse_CMPEQ32: return "pcmpeqd";
+ case Asse_CMPGT8S: return "pcmpgtb";
+ case Asse_CMPGT16S: return "pcmpgtw";
+ case Asse_CMPGT32S: return "pcmpgtd";
+ case Asse_SHL16: return "psllw";
+ case Asse_SHL32: return "pslld";
+ case Asse_SHL64: return "psllq";
+ case Asse_SHR16: return "psrlw";
+ case Asse_SHR32: return "psrld";
+ case Asse_SHR64: return "psrlq";
+ case Asse_SAR16: return "psraw";
+ case Asse_SAR32: return "psrad";
+ case Asse_PACKSSD: return "packssdw";
+ case Asse_PACKSSW: return "packsswb";
+ case Asse_PACKUSW: return "packuswb";
+ case Asse_UNPCKHB: return "punpckhb";
+ case Asse_UNPCKHW: return "punpckhw";
+ case Asse_UNPCKHD: return "punpckhd";
+ case Asse_UNPCKHQ: return "punpckhq";
+ case Asse_UNPCKLB: return "punpcklb";
+ case Asse_UNPCKLW: return "punpcklw";
+ case Asse_UNPCKLD: return "punpckld";
+ case Asse_UNPCKLQ: return "punpcklq";
+ default: vpanic("showAMD64SseOp");
+ }
+}
+
+AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_Imm64;
+ i->Ain.Imm64.imm64 = imm64;
+ i->Ain.Imm64.dst = dst;
+ return i;
+}
+AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_Alu64R;
+ i->Ain.Alu64R.op = op;
+ i->Ain.Alu64R.src = src;
+ i->Ain.Alu64R.dst = dst;
+ return i;
+}
+AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp op, AMD64RI* src, AMD64AMode* dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_Alu64M;
+ i->Ain.Alu64M.op = op;
+ i->Ain.Alu64M.src = src;
+ i->Ain.Alu64M.dst = dst;
+ vassert(op != Aalu_MUL);
+ return i;
+}
+AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_Sh64;
+ i->Ain.Sh64.op = op;
+ i->Ain.Sh64.src = src;
+ i->Ain.Sh64.dst = dst;
+ return i;
+}
+AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_Test64;
+ i->Ain.Test64.imm32 = imm32;
+ i->Ain.Test64.dst = dst;
+ return i;
+}
+AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_Unary64;
+ i->Ain.Unary64.op = op;
+ i->Ain.Unary64.dst = dst;
+ return i;
+}
+AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_Lea64;
+ i->Ain.Lea64.am = am;
+ i->Ain.Lea64.dst = dst;
+ return i;
+}
+AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* src ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_MulL;
+ i->Ain.MulL.syned = syned;
+ i->Ain.MulL.src = src;
+ return i;
+}
+AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* src ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_Div;
+ i->Ain.Div.syned = syned;
+ i->Ain.Div.sz = sz;
+ i->Ain.Div.src = src;
+ vassert(sz == 4 || sz == 8);
+ return i;
+}
+//.. AMD64Instr* AMD64Instr_Sh3232 ( AMD64ShiftOp op, UInt amt, HReg src, HReg dst ) {
+//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+//.. i->tag = Xin_Sh3232;
+//.. i->Xin.Sh3232.op = op;
+//.. i->Xin.Sh3232.amt = amt;
+//.. i->Xin.Sh3232.src = src;
+//.. i->Xin.Sh3232.dst = dst;
+//.. vassert(op == Xsh_SHL || op == Xsh_SHR);
+//.. return i;
+//.. }
+AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_Push;
+ i->Ain.Push.src = src;
+ return i;
+}
+AMD64Instr* AMD64Instr_Call ( AMD64CondCode cond, Addr64 target, Int regparms ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_Call;
+ i->Ain.Call.cond = cond;
+ i->Ain.Call.target = target;
+ i->Ain.Call.regparms = regparms;
+ vassert(regparms >= 0 && regparms <= 6);
+ return i;
+}
+AMD64Instr* AMD64Instr_Goto ( IRJumpKind jk, AMD64CondCode cond, AMD64RI* dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_Goto;
+ i->Ain.Goto.cond = cond;
+ i->Ain.Goto.dst = dst;
+ i->Ain.Goto.jk = jk;
+ return i;
+}
+AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, AMD64RM* src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_CMov64;
+ i->Ain.CMov64.cond = cond;
+ i->Ain.CMov64.src = src;
+ i->Ain.CMov64.dst = dst;
+ vassert(cond != Acc_ALWAYS);
+ return i;
+}
+AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_MovxLQ;
+ i->Ain.MovxLQ.syned = syned;
+ i->Ain.MovxLQ.src = src;
+ i->Ain.MovxLQ.dst = dst;
+ return i;
+}
+AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
+ AMD64AMode* src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_LoadEX;
+ i->Ain.LoadEX.szSmall = szSmall;
+ i->Ain.LoadEX.syned = syned;
+ i->Ain.LoadEX.src = src;
+ i->Ain.LoadEX.dst = dst;
+ vassert(szSmall == 1 || szSmall == 2 || szSmall == 4);
+ return i;
+}
+AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_Store;
+ i->Ain.Store.sz = sz;
+ i->Ain.Store.src = src;
+ i->Ain.Store.dst = dst;
+ vassert(sz == 1 || sz == 2 || sz == 4);
+ return i;
+}
+AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_Set64;
+ i->Ain.Set64.cond = cond;
+ i->Ain.Set64.dst = dst;
+ return i;
+}
+AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_Bsfr64;
+ i->Ain.Bsfr64.isFwds = isFwds;
+ i->Ain.Bsfr64.src = src;
+ i->Ain.Bsfr64.dst = dst;
+ return i;
+}
+AMD64Instr* AMD64Instr_MFence ( void ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_MFence;
+ return i;
+}
+AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_ACAS;
+ i->Ain.ACAS.addr = addr;
+ i->Ain.ACAS.sz = sz;
+ vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
+ return i;
+}
+AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_DACAS;
+ i->Ain.DACAS.addr = addr;
+ i->Ain.DACAS.sz = sz;
+ vassert(sz == 8 || sz == 4);
+ return i;
+}
+
+AMD64Instr* AMD64Instr_A87Free ( Int nregs )
+{
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_A87Free;
+ i->Ain.A87Free.nregs = nregs;
+ vassert(nregs >= 1 && nregs <= 7);
+ return i;
+}
+AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB )
+{
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_A87PushPop;
+ i->Ain.A87PushPop.addr = addr;
+ i->Ain.A87PushPop.isPush = isPush;
+ i->Ain.A87PushPop.szB = szB;
+ vassert(szB == 8 || szB == 4);
+ return i;
+}
+AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op )
+{
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_A87FpOp;
+ i->Ain.A87FpOp.op = op;
+ return i;
+}
+AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr )
+{
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_A87LdCW;
+ i->Ain.A87LdCW.addr = addr;
+ return i;
+}
+AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr )
+{
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_A87StSW;
+ i->Ain.A87StSW.addr = addr;
+ return i;
+}
+
+//.. AMD64Instr* AMD64Instr_FpUnary ( AMD64FpOp op, HReg src, HReg dst ) {
+//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+//.. i->tag = Xin_FpUnary;
+//.. i->Xin.FpUnary.op = op;
+//.. i->Xin.FpUnary.src = src;
+//.. i->Xin.FpUnary.dst = dst;
+//.. return i;
+//.. }
+//.. AMD64Instr* AMD64Instr_FpBinary ( AMD64FpOp op, HReg srcL, HReg srcR, HReg dst ) {
+//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+//.. i->tag = Xin_FpBinary;
+//.. i->Xin.FpBinary.op = op;
+//.. i->Xin.FpBinary.srcL = srcL;
+//.. i->Xin.FpBinary.srcR = srcR;
+//.. i->Xin.FpBinary.dst = dst;
+//.. return i;
+//.. }
+//.. AMD64Instr* AMD64Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, AMD64AMode* addr ) {
+//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+//.. i->tag = Xin_FpLdSt;
+//.. i->Xin.FpLdSt.isLoad = isLoad;
+//.. i->Xin.FpLdSt.sz = sz;
+//.. i->Xin.FpLdSt.reg = reg;
+//.. i->Xin.FpLdSt.addr = addr;
+//.. vassert(sz == 4 || sz == 8);
+//.. return i;
+//.. }
+//.. AMD64Instr* AMD64Instr_FpLdStI ( Bool isLoad, UChar sz,
+//.. HReg reg, AMD64AMode* addr ) {
+//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+//.. i->tag = Xin_FpLdStI;
+//.. i->Xin.FpLdStI.isLoad = isLoad;
+//.. i->Xin.FpLdStI.sz = sz;
+//.. i->Xin.FpLdStI.reg = reg;
+//.. i->Xin.FpLdStI.addr = addr;
+//.. vassert(sz == 2 || sz == 4 || sz == 8);
+//.. return i;
+//.. }
+//.. AMD64Instr* AMD64Instr_Fp64to32 ( HReg src, HReg dst ) {
+//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+//.. i->tag = Xin_Fp64to32;
+//.. i->Xin.Fp64to32.src = src;
+//.. i->Xin.Fp64to32.dst = dst;
+//.. return i;
+//.. }
+//.. AMD64Instr* AMD64Instr_FpCMov ( AMD64CondCode cond, HReg src, HReg dst ) {
+//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+//.. i->tag = Xin_FpCMov;
+//.. i->Xin.FpCMov.cond = cond;
+//.. i->Xin.FpCMov.src = src;
+//.. i->Xin.FpCMov.dst = dst;
+//.. vassert(cond != Xcc_ALWAYS);
+//.. return i;
+//.. }
+AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_LdMXCSR;
+ i->Ain.LdMXCSR.addr = addr;
+ return i;
+}
+//.. AMD64Instr* AMD64Instr_FpStSW_AX ( void ) {
+//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+//.. i->tag = Xin_FpStSW_AX;
+//.. return i;
+//.. }
+AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_SseUComIS;
+ i->Ain.SseUComIS.sz = toUChar(sz);
+ i->Ain.SseUComIS.srcL = srcL;
+ i->Ain.SseUComIS.srcR = srcR;
+ i->Ain.SseUComIS.dst = dst;
+ vassert(sz == 4 || sz == 8);
+ return i;
+}
+AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_SseSI2SF;
+ i->Ain.SseSI2SF.szS = toUChar(szS);
+ i->Ain.SseSI2SF.szD = toUChar(szD);
+ i->Ain.SseSI2SF.src = src;
+ i->Ain.SseSI2SF.dst = dst;
+ vassert(szS == 4 || szS == 8);
+ vassert(szD == 4 || szD == 8);
+ return i;
+}
+AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_SseSF2SI;
+ i->Ain.SseSF2SI.szS = toUChar(szS);
+ i->Ain.SseSF2SI.szD = toUChar(szD);
+ i->Ain.SseSF2SI.src = src;
+ i->Ain.SseSF2SI.dst = dst;
+ vassert(szS == 4 || szS == 8);
+ vassert(szD == 4 || szD == 8);
+ return i;
+}
+AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst )
+{
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_SseSDSS;
+ i->Ain.SseSDSS.from64 = from64;
+ i->Ain.SseSDSS.src = src;
+ i->Ain.SseSDSS.dst = dst;
+ return i;
+}
+
+//.. AMD64Instr* AMD64Instr_SseConst ( UShort con, HReg dst ) {
+//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+//.. i->tag = Xin_SseConst;
+//.. i->Xin.SseConst.con = con;
+//.. i->Xin.SseConst.dst = dst;
+//.. vassert(hregClass(dst) == HRcVec128);
+//.. return i;
+//.. }
+AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz,
+ HReg reg, AMD64AMode* addr ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_SseLdSt;
+ i->Ain.SseLdSt.isLoad = isLoad;
+ i->Ain.SseLdSt.sz = toUChar(sz);
+ i->Ain.SseLdSt.reg = reg;
+ i->Ain.SseLdSt.addr = addr;
+ vassert(sz == 4 || sz == 8 || sz == 16);
+ return i;
+}
+AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg reg, AMD64AMode* addr )
+{
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_SseLdzLO;
+ i->Ain.SseLdzLO.sz = sz;
+ i->Ain.SseLdzLO.reg = reg;
+ i->Ain.SseLdzLO.addr = addr;
+ vassert(sz == 4 || sz == 8);
+ return i;
+}
+AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp op, HReg src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_Sse32Fx4;
+ i->Ain.Sse32Fx4.op = op;
+ i->Ain.Sse32Fx4.src = src;
+ i->Ain.Sse32Fx4.dst = dst;
+ vassert(op != Asse_MOV);
+ return i;
+}
+AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp op, HReg src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_Sse32FLo;
+ i->Ain.Sse32FLo.op = op;
+ i->Ain.Sse32FLo.src = src;
+ i->Ain.Sse32FLo.dst = dst;
+ vassert(op != Asse_MOV);
+ return i;
+}
+AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp op, HReg src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_Sse64Fx2;
+ i->Ain.Sse64Fx2.op = op;
+ i->Ain.Sse64Fx2.src = src;
+ i->Ain.Sse64Fx2.dst = dst;
+ vassert(op != Asse_MOV);
+ return i;
+}
+AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp op, HReg src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_Sse64FLo;
+ i->Ain.Sse64FLo.op = op;
+ i->Ain.Sse64FLo.src = src;
+ i->Ain.Sse64FLo.dst = dst;
+ vassert(op != Asse_MOV);
+ return i;
+}
+AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp op, HReg re, HReg rg ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_SseReRg;
+ i->Ain.SseReRg.op = op;
+ i->Ain.SseReRg.src = re;
+ i->Ain.SseReRg.dst = rg;
+ return i;
+}
+AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode cond, HReg src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_SseCMov;
+ i->Ain.SseCMov.cond = cond;
+ i->Ain.SseCMov.src = src;
+ i->Ain.SseCMov.dst = dst;
+ vassert(cond != Acc_ALWAYS);
+ return i;
+}
+AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_SseShuf;
+ i->Ain.SseShuf.order = order;
+ i->Ain.SseShuf.src = src;
+ i->Ain.SseShuf.dst = dst;
+ vassert(order >= 0 && order <= 0xFF);
+ return i;
+}
+
+void ppAMD64Instr ( AMD64Instr* i, Bool mode64 )
+{
+ vassert(mode64 == True);
+ switch (i->tag) {
+ case Ain_Imm64:
+ vex_printf("movabsq $0x%llx,", i->Ain.Imm64.imm64);
+ ppHRegAMD64(i->Ain.Imm64.dst);
+ return;
+ case Ain_Alu64R:
+ vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64R.op));
+ ppAMD64RMI(i->Ain.Alu64R.src);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.Alu64R.dst);
+ return;
+ case Ain_Alu64M:
+ vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64M.op));
+ ppAMD64RI(i->Ain.Alu64M.src);
+ vex_printf(",");
+ ppAMD64AMode(i->Ain.Alu64M.dst);
+ return;
+ case Ain_Sh64:
+ vex_printf("%sq ", showAMD64ShiftOp(i->Ain.Sh64.op));
+ if (i->Ain.Sh64.src == 0)
+ vex_printf("%%cl,");
+ else
+ vex_printf("$%d,", (Int)i->Ain.Sh64.src);
+ ppHRegAMD64(i->Ain.Sh64.dst);
+ return;
+ case Ain_Test64:
+ vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32);
+ ppHRegAMD64(i->Ain.Test64.dst);
+ return;
+ case Ain_Unary64:
+ vex_printf("%sq ", showAMD64UnaryOp(i->Ain.Unary64.op));
+ ppHRegAMD64(i->Ain.Unary64.dst);
+ return;
+ case Ain_Lea64:
+ vex_printf("leaq ");
+ ppAMD64AMode(i->Ain.Lea64.am);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.Lea64.dst);
+ return;
+ case Ain_MulL:
+ vex_printf("%cmulq ", i->Ain.MulL.syned ? 's' : 'u');
+ ppAMD64RM(i->Ain.MulL.src);
+ return;
+ case Ain_Div:
+ vex_printf("%cdiv%s ",
+ i->Ain.Div.syned ? 's' : 'u',
+ showAMD64ScalarSz(i->Ain.Div.sz));
+ ppAMD64RM(i->Ain.Div.src);
+ return;
+//.. case Xin_Sh3232:
+//.. vex_printf("%sdl ", showAMD64ShiftOp(i->Xin.Sh3232.op));
+//.. if (i->Xin.Sh3232.amt == 0)
+//.. vex_printf(" %%cl,");
+//.. else
+//.. vex_printf(" $%d,", i->Xin.Sh3232.amt);
+//.. ppHRegAMD64(i->Xin.Sh3232.src);
+//.. vex_printf(",");
+//.. ppHRegAMD64(i->Xin.Sh3232.dst);
+//.. return;
+ case Ain_Push:
+ vex_printf("pushq ");
+ ppAMD64RMI(i->Ain.Push.src);
+ return;
+ case Ain_Call:
+ vex_printf("call%s[%d] ",
+ i->Ain.Call.cond==Acc_ALWAYS
+ ? "" : showAMD64CondCode(i->Ain.Call.cond),
+ i->Ain.Call.regparms );
+ vex_printf("0x%llx", i->Ain.Call.target);
+ break;
+ case Ain_Goto:
+ if (i->Ain.Goto.cond != Acc_ALWAYS) {
+ vex_printf("if (%%rflags.%s) { ",
+ showAMD64CondCode(i->Ain.Goto.cond));
+ }
+ if (i->Ain.Goto.jk != Ijk_Boring
+ && i->Ain.Goto.jk != Ijk_Call
+ && i->Ain.Goto.jk != Ijk_Ret) {
+ vex_printf("movl $");
+ ppIRJumpKind(i->Ain.Goto.jk);
+ vex_printf(",%%ebp ; ");
+ }
+ vex_printf("movq ");
+ ppAMD64RI(i->Ain.Goto.dst);
+ vex_printf(",%%rax ; movabsq $dispatcher_addr,%%rdx ; jmp *%%rdx");
+ if (i->Ain.Goto.cond != Acc_ALWAYS) {
+ vex_printf(" }");
+ }
+ return;
+ case Ain_CMov64:
+ vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond));
+ ppAMD64RM(i->Ain.CMov64.src);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.CMov64.dst);
+ return;
+ case Ain_MovxLQ:
+ vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z');
+ ppHRegAMD64_lo32(i->Ain.MovxLQ.src);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.MovxLQ.dst);
+ return;
+ case Ain_LoadEX:
+ if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) {
+ vex_printf("movl ");
+ ppAMD64AMode(i->Ain.LoadEX.src);
+ vex_printf(",");
+ ppHRegAMD64_lo32(i->Ain.LoadEX.dst);
+ } else {
+ vex_printf("mov%c%cq ",
+ i->Ain.LoadEX.syned ? 's' : 'z',
+ i->Ain.LoadEX.szSmall==1
+ ? 'b'
+ : (i->Ain.LoadEX.szSmall==2 ? 'w' : 'l'));
+ ppAMD64AMode(i->Ain.LoadEX.src);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.LoadEX.dst);
+ }
+ return;
+ case Ain_Store:
+ vex_printf("mov%c ", i->Ain.Store.sz==1 ? 'b'
+ : (i->Ain.Store.sz==2 ? 'w' : 'l'));
+ ppHRegAMD64(i->Ain.Store.src);
+ vex_printf(",");
+ ppAMD64AMode(i->Ain.Store.dst);
+ return;
+ case Ain_Set64:
+ vex_printf("setq%s ", showAMD64CondCode(i->Ain.Set64.cond));
+ ppHRegAMD64(i->Ain.Set64.dst);
+ return;
+ case Ain_Bsfr64:
+ vex_printf("bs%cq ", i->Ain.Bsfr64.isFwds ? 'f' : 'r');
+ ppHRegAMD64(i->Ain.Bsfr64.src);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.Bsfr64.dst);
+ return;
+ case Ain_MFence:
+ vex_printf("mfence" );
+ return;
+ case Ain_ACAS:
+ vex_printf("lock cmpxchg%c ",
+ i->Ain.ACAS.sz==1 ? 'b' : i->Ain.ACAS.sz==2 ? 'w'
+ : i->Ain.ACAS.sz==4 ? 'l' : 'q' );
+ vex_printf("{%%rax->%%rbx},");
+ ppAMD64AMode(i->Ain.ACAS.addr);
+ return;
+ case Ain_DACAS:
+ vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},",
+ (Int)(2 * i->Ain.DACAS.sz));
+ ppAMD64AMode(i->Ain.DACAS.addr);
+ return;
+ case Ain_A87Free:
+ vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs );
+ break;
+ case Ain_A87PushPop:
+ vex_printf(i->Ain.A87PushPop.isPush ? "fld%c " : "fstp%c ",
+ i->Ain.A87PushPop.szB == 4 ? 's' : 'l');
+ ppAMD64AMode(i->Ain.A87PushPop.addr);
+ break;
+ case Ain_A87FpOp:
+ vex_printf("f%s", showA87FpOp(i->Ain.A87FpOp.op));
+ break;
+ case Ain_A87LdCW:
+ vex_printf("fldcw ");
+ ppAMD64AMode(i->Ain.A87LdCW.addr);
+ break;
+ case Ain_A87StSW:
+ vex_printf("fstsw ");
+ ppAMD64AMode(i->Ain.A87StSW.addr);
+ break;
+//.. case Xin_FpUnary:
+//.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpUnary.op));
+//.. ppHRegAMD64(i->Xin.FpUnary.src);
+//.. vex_printf(",");
+//.. ppHRegAMD64(i->Xin.FpUnary.dst);
+//.. break;
+//.. case Xin_FpBinary:
+//.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpBinary.op));
+//.. ppHRegAMD64(i->Xin.FpBinary.srcL);
+//.. vex_printf(",");
+//.. ppHRegAMD64(i->Xin.FpBinary.srcR);
+//.. vex_printf(",");
+//.. ppHRegAMD64(i->Xin.FpBinary.dst);
+//.. break;
+//.. case Xin_FpLdSt:
+//.. if (i->Xin.FpLdSt.isLoad) {
+//.. vex_printf("gld%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F');
+//.. ppAMD64AMode(i->Xin.FpLdSt.addr);
+//.. vex_printf(", ");
+//.. ppHRegAMD64(i->Xin.FpLdSt.reg);
+//.. } else {
+//.. vex_printf("gst%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F');
+//.. ppHRegAMD64(i->Xin.FpLdSt.reg);
+//.. vex_printf(", ");
+//.. ppAMD64AMode(i->Xin.FpLdSt.addr);
+//.. }
+//.. return;
+//.. case Xin_FpLdStI:
+//.. if (i->Xin.FpLdStI.isLoad) {
+//.. vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
+//.. i->Xin.FpLdStI.sz==4 ? "l" : "w");
+//.. ppAMD64AMode(i->Xin.FpLdStI.addr);
+//.. vex_printf(", ");
+//.. ppHRegAMD64(i->Xin.FpLdStI.reg);
+//.. } else {
+//.. vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
+//.. i->Xin.FpLdStI.sz==4 ? "l" : "w");
+//.. ppHRegAMD64(i->Xin.FpLdStI.reg);
+//.. vex_printf(", ");
+//.. ppAMD64AMode(i->Xin.FpLdStI.addr);
+//.. }
+//.. return;
+//.. case Xin_Fp64to32:
+//.. vex_printf("gdtof ");
+//.. ppHRegAMD64(i->Xin.Fp64to32.src);
+//.. vex_printf(",");
+//.. ppHRegAMD64(i->Xin.Fp64to32.dst);
+//.. return;
+//.. case Xin_FpCMov:
+//.. vex_printf("gcmov%s ", showAMD64CondCode(i->Xin.FpCMov.cond));
+//.. ppHRegAMD64(i->Xin.FpCMov.src);
+//.. vex_printf(",");
+//.. ppHRegAMD64(i->Xin.FpCMov.dst);
+//.. return;
+//.. case Xin_FpLdStCW:
+//.. vex_printf(i->Xin.FpLdStCW.isLoad ? "fldcw " : "fstcw ");
+//.. ppAMD64AMode(i->Xin.FpLdStCW.addr);
+//.. return;
+//.. case Xin_FpStSW_AX:
+//.. vex_printf("fstsw %%ax");
+//.. return;
+ case Ain_LdMXCSR:
+ vex_printf("ldmxcsr ");
+ ppAMD64AMode(i->Ain.LdMXCSR.addr);
+ break;
+ case Ain_SseUComIS:
+ vex_printf("ucomis%s ", i->Ain.SseUComIS.sz==4 ? "s" : "d");
+ ppHRegAMD64(i->Ain.SseUComIS.srcL);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.SseUComIS.srcR);
+ vex_printf(" ; pushfq ; popq ");
+ ppHRegAMD64(i->Ain.SseUComIS.dst);
+ break;
+ case Ain_SseSI2SF:
+ vex_printf("cvtsi2s%s ", i->Ain.SseSI2SF.szD==4 ? "s" : "d");
+ (i->Ain.SseSI2SF.szS==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
+ (i->Ain.SseSI2SF.src);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.SseSI2SF.dst);
+ break;
+ case Ain_SseSF2SI:
+ vex_printf("cvts%s2si ", i->Ain.SseSF2SI.szS==4 ? "s" : "d");
+ ppHRegAMD64(i->Ain.SseSF2SI.src);
+ vex_printf(",");
+ (i->Ain.SseSF2SI.szD==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
+ (i->Ain.SseSF2SI.dst);
+ break;
+ case Ain_SseSDSS:
+ vex_printf(i->Ain.SseSDSS.from64 ? "cvtsd2ss " : "cvtss2sd ");
+ ppHRegAMD64(i->Ain.SseSDSS.src);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.SseSDSS.dst);
+ break;
+//.. case Xin_SseConst:
+//.. vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con);
+//.. ppHRegAMD64(i->Xin.SseConst.dst);
+//.. break;
+ case Ain_SseLdSt:
+ switch (i->Ain.SseLdSt.sz) {
+ case 4: vex_printf("movss "); break;
+ case 8: vex_printf("movsd "); break;
+ case 16: vex_printf("movups "); break;
+ default: vassert(0);
+ }
+ if (i->Ain.SseLdSt.isLoad) {
+ ppAMD64AMode(i->Ain.SseLdSt.addr);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.SseLdSt.reg);
+ } else {
+ ppHRegAMD64(i->Ain.SseLdSt.reg);
+ vex_printf(",");
+ ppAMD64AMode(i->Ain.SseLdSt.addr);
+ }
+ return;
+ case Ain_SseLdzLO:
+ vex_printf("movs%s ", i->Ain.SseLdzLO.sz==4 ? "s" : "d");
+ ppAMD64AMode(i->Ain.SseLdzLO.addr);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.SseLdzLO.reg);
+ return;
+ case Ain_Sse32Fx4:
+ vex_printf("%sps ", showAMD64SseOp(i->Ain.Sse32Fx4.op));
+ ppHRegAMD64(i->Ain.Sse32Fx4.src);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.Sse32Fx4.dst);
+ return;
+ case Ain_Sse32FLo:
+ vex_printf("%sss ", showAMD64SseOp(i->Ain.Sse32FLo.op));
+ ppHRegAMD64(i->Ain.Sse32FLo.src);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.Sse32FLo.dst);
+ return;
+ case Ain_Sse64Fx2:
+ vex_printf("%spd ", showAMD64SseOp(i->Ain.Sse64Fx2.op));
+ ppHRegAMD64(i->Ain.Sse64Fx2.src);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.Sse64Fx2.dst);
+ return;
+ case Ain_Sse64FLo:
+ vex_printf("%ssd ", showAMD64SseOp(i->Ain.Sse64FLo.op));
+ ppHRegAMD64(i->Ain.Sse64FLo.src);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.Sse64FLo.dst);
+ return;
+ case Ain_SseReRg:
+ vex_printf("%s ", showAMD64SseOp(i->Ain.SseReRg.op));
+ ppHRegAMD64(i->Ain.SseReRg.src);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.SseReRg.dst);
+ return;
+ case Ain_SseCMov:
+ vex_printf("cmov%s ", showAMD64CondCode(i->Ain.SseCMov.cond));
+ ppHRegAMD64(i->Ain.SseCMov.src);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.SseCMov.dst);
+ return;
+ case Ain_SseShuf:
+ vex_printf("pshufd $0x%x,", i->Ain.SseShuf.order);
+ ppHRegAMD64(i->Ain.SseShuf.src);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.SseShuf.dst);
+ return;
+
+ default:
+ vpanic("ppAMD64Instr");
+ }
+}
+
+/* --------- Helpers for register allocation. --------- */
+
+void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i, Bool mode64 )
+{
+ Bool unary;
+ vassert(mode64 == True);
+ initHRegUsage(u);
+ switch (i->tag) {
+ case Ain_Imm64:
+ addHRegUse(u, HRmWrite, i->Ain.Imm64.dst);
+ return;
+ case Ain_Alu64R:
+ addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src);
+ if (i->Ain.Alu64R.op == Aalu_MOV) {
+ addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst);
+ return;
+ }
+ if (i->Ain.Alu64R.op == Aalu_CMP) {
+ addHRegUse(u, HRmRead, i->Ain.Alu64R.dst);
+ return;
+ }
+ addHRegUse(u, HRmModify, i->Ain.Alu64R.dst);
+ return;
+ case Ain_Alu64M:
+ addRegUsage_AMD64RI(u, i->Ain.Alu64M.src);
+ addRegUsage_AMD64AMode(u, i->Ain.Alu64M.dst);
+ return;
+ case Ain_Sh64:
+ addHRegUse(u, HRmModify, i->Ain.Sh64.dst);
+ if (i->Ain.Sh64.src == 0)
+ addHRegUse(u, HRmRead, hregAMD64_RCX());
+ return;
+ case Ain_Test64:
+ addHRegUse(u, HRmRead, i->Ain.Test64.dst);
+ return;
+ case Ain_Unary64:
+ addHRegUse(u, HRmModify, i->Ain.Unary64.dst);
+ return;
+ case Ain_Lea64:
+ addRegUsage_AMD64AMode(u, i->Ain.Lea64.am);
+ addHRegUse(u, HRmWrite, i->Ain.Lea64.dst);
+ return;
+ case Ain_MulL:
+ addRegUsage_AMD64RM(u, i->Ain.MulL.src, HRmRead);
+ addHRegUse(u, HRmModify, hregAMD64_RAX());
+ addHRegUse(u, HRmWrite, hregAMD64_RDX());
+ return;
+ case Ain_Div:
+ addRegUsage_AMD64RM(u, i->Ain.Div.src, HRmRead);
+ addHRegUse(u, HRmModify, hregAMD64_RAX());
+ addHRegUse(u, HRmModify, hregAMD64_RDX());
+ return;
+//.. case Xin_Sh3232:
+//.. addHRegUse(u, HRmRead, i->Xin.Sh3232.src);
+//.. addHRegUse(u, HRmModify, i->Xin.Sh3232.dst);
+//.. if (i->Xin.Sh3232.amt == 0)
+//.. addHRegUse(u, HRmRead, hregAMD64_ECX());
+//.. return;
+ case Ain_Push:
+ addRegUsage_AMD64RMI(u, i->Ain.Push.src);
+ addHRegUse(u, HRmModify, hregAMD64_RSP());
+ return;
+ case Ain_Call:
+ /* This is a bit subtle. */
+ /* First off, claim it trashes all the caller-saved regs
+ which fall within the register allocator's jurisdiction.
+ These I believe to be: rax rcx rdx rsi rdi r8 r9 r10 r11
+ and all the xmm registers.
+ */
+ addHRegUse(u, HRmWrite, hregAMD64_RAX());
+ addHRegUse(u, HRmWrite, hregAMD64_RCX());
+ addHRegUse(u, HRmWrite, hregAMD64_RDX());
+ addHRegUse(u, HRmWrite, hregAMD64_RSI());
+ addHRegUse(u, HRmWrite, hregAMD64_RDI());
+ addHRegUse(u, HRmWrite, hregAMD64_R8());
+ addHRegUse(u, HRmWrite, hregAMD64_R9());
+ addHRegUse(u, HRmWrite, hregAMD64_R10());
+ addHRegUse(u, HRmWrite, hregAMD64_R11());
+ addHRegUse(u, HRmWrite, hregAMD64_XMM0());
+ addHRegUse(u, HRmWrite, hregAMD64_XMM1());
+ addHRegUse(u, HRmWrite, hregAMD64_XMM2());
+ addHRegUse(u, HRmWrite, hregAMD64_XMM3());
+ addHRegUse(u, HRmWrite, hregAMD64_XMM4());
+ addHRegUse(u, HRmWrite, hregAMD64_XMM5());
+ addHRegUse(u, HRmWrite, hregAMD64_XMM6());
+ addHRegUse(u, HRmWrite, hregAMD64_XMM7());
+ addHRegUse(u, HRmWrite, hregAMD64_XMM8());
+ addHRegUse(u, HRmWrite, hregAMD64_XMM9());
+ addHRegUse(u, HRmWrite, hregAMD64_XMM10());
+ addHRegUse(u, HRmWrite, hregAMD64_XMM11());
+ addHRegUse(u, HRmWrite, hregAMD64_XMM12());
+ addHRegUse(u, HRmWrite, hregAMD64_XMM13());
+ addHRegUse(u, HRmWrite, hregAMD64_XMM14());
+ addHRegUse(u, HRmWrite, hregAMD64_XMM15());
+
+ /* Now we have to state any parameter-carrying registers
+ which might be read. This depends on the regparmness. */
+ switch (i->Ain.Call.regparms) {
+ case 6: addHRegUse(u, HRmRead, hregAMD64_R9()); /*fallthru*/
+ case 5: addHRegUse(u, HRmRead, hregAMD64_R8()); /*fallthru*/
+ case 4: addHRegUse(u, HRmRead, hregAMD64_RCX()); /*fallthru*/
+ case 3: addHRegUse(u, HRmRead, hregAMD64_RDX()); /*fallthru*/
+ case 2: addHRegUse(u, HRmRead, hregAMD64_RSI()); /*fallthru*/
+ case 1: addHRegUse(u, HRmRead, hregAMD64_RDI()); break;
+ case 0: break;
+ default: vpanic("getRegUsage_AMD64Instr:Call:regparms");
+ }
+ /* Finally, there is the issue that the insn trashes a
+ register because the literal target address has to be
+ loaded into a register. Fortunately, r11 is stated in the
+ ABI as a scratch register, and so seems a suitable victim. */
+ addHRegUse(u, HRmWrite, hregAMD64_R11());
+ /* Upshot of this is that the assembler really must use r11,
+ and no other, as a destination temporary. */
+ return;
+ case Ain_Goto:
+ addRegUsage_AMD64RI(u, i->Ain.Goto.dst);
+ addHRegUse(u, HRmWrite, hregAMD64_RAX()); /* used for next guest addr */
+ addHRegUse(u, HRmWrite, hregAMD64_RDX()); /* used for dispatcher addr */
+ if (i->Ain.Goto.jk != Ijk_Boring
+ && i->Ain.Goto.jk != Ijk_Call
+ && i->Ain.Goto.jk != Ijk_Ret)
+ /* note, this is irrelevant since rbp is not actually
+ available to the allocator. But still .. */
+ addHRegUse(u, HRmWrite, hregAMD64_RBP());
+ return;
+ case Ain_CMov64:
+ addRegUsage_AMD64RM(u, i->Ain.CMov64.src, HRmRead);
+ addHRegUse(u, HRmModify, i->Ain.CMov64.dst);
+ return;
+ case Ain_MovxLQ:
+ addHRegUse(u, HRmRead, i->Ain.MovxLQ.src);
+ addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst);
+ return;
+ case Ain_LoadEX:
+ addRegUsage_AMD64AMode(u, i->Ain.LoadEX.src);
+ addHRegUse(u, HRmWrite, i->Ain.LoadEX.dst);
+ return;
+ case Ain_Store:
+ addHRegUse(u, HRmRead, i->Ain.Store.src);
+ addRegUsage_AMD64AMode(u, i->Ain.Store.dst);
+ return;
+ case Ain_Set64:
+ addHRegUse(u, HRmWrite, i->Ain.Set64.dst);
+ return;
+ case Ain_Bsfr64:
+ addHRegUse(u, HRmRead, i->Ain.Bsfr64.src);
+ addHRegUse(u, HRmWrite, i->Ain.Bsfr64.dst);
+ return;
+ case Ain_MFence:
+ return;
+ case Ain_ACAS:
+ addRegUsage_AMD64AMode(u, i->Ain.ACAS.addr);
+ addHRegUse(u, HRmRead, hregAMD64_RBX());
+ addHRegUse(u, HRmModify, hregAMD64_RAX());
+ return;
+ case Ain_DACAS:
+ addRegUsage_AMD64AMode(u, i->Ain.DACAS.addr);
+ addHRegUse(u, HRmRead, hregAMD64_RCX());
+ addHRegUse(u, HRmRead, hregAMD64_RBX());
+ addHRegUse(u, HRmModify, hregAMD64_RDX());
+ addHRegUse(u, HRmModify, hregAMD64_RAX());
+ return;
+ case Ain_A87Free:
+ return;
+ case Ain_A87PushPop:
+ addRegUsage_AMD64AMode(u, i->Ain.A87PushPop.addr);
+ return;
+ case Ain_A87FpOp:
+ return;
+ case Ain_A87LdCW:
+ addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr);
+ return;
+ case Ain_A87StSW:
+ addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr);
+ return;
+//.. case Xin_FpUnary:
+//.. addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
+//.. addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
+//.. return;
+//.. case Xin_FpBinary:
+//.. addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL);
+//.. addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR);
+//.. addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst);
+//.. return;
+//.. case Xin_FpLdSt:
+//.. addRegUsage_AMD64AMode(u, i->Xin.FpLdSt.addr);
+//.. addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead,
+//.. i->Xin.FpLdSt.reg);
+//.. return;
+//.. case Xin_FpLdStI:
+//.. addRegUsage_AMD64AMode(u, i->Xin.FpLdStI.addr);
+//.. addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead,
+//.. i->Xin.FpLdStI.reg);
+//.. return;
+//.. case Xin_Fp64to32:
+//.. addHRegUse(u, HRmRead, i->Xin.Fp64to32.src);
+//.. addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst);
+//.. return;
+//.. case Xin_FpCMov:
+//.. addHRegUse(u, HRmRead, i->Xin.FpCMov.src);
+//.. addHRegUse(u, HRmModify, i->Xin.FpCMov.dst);
+//.. return;
+ case Ain_LdMXCSR:
+ addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr);
+ return;
+//.. case Xin_FpStSW_AX:
+//.. addHRegUse(u, HRmWrite, hregAMD64_EAX());
+//.. return;
+ case Ain_SseUComIS:
+ addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcL);
+ addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR);
+ addHRegUse(u, HRmWrite, i->Ain.SseUComIS.dst);
+ return;
+ case Ain_SseSI2SF:
+ addHRegUse(u, HRmRead, i->Ain.SseSI2SF.src);
+ addHRegUse(u, HRmWrite, i->Ain.SseSI2SF.dst);
+ return;
+ case Ain_SseSF2SI:
+ addHRegUse(u, HRmRead, i->Ain.SseSF2SI.src);
+ addHRegUse(u, HRmWrite, i->Ain.SseSF2SI.dst);
+ return;
+ case Ain_SseSDSS:
+ addHRegUse(u, HRmRead, i->Ain.SseSDSS.src);
+ addHRegUse(u, HRmWrite, i->Ain.SseSDSS.dst);
+ return;
+ case Ain_SseLdSt:
+ addRegUsage_AMD64AMode(u, i->Ain.SseLdSt.addr);
+ addHRegUse(u, i->Ain.SseLdSt.isLoad ? HRmWrite : HRmRead,
+ i->Ain.SseLdSt.reg);
+ return;
+ case Ain_SseLdzLO:
+ addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr);
+ addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg);
+ return;
+//.. case Xin_SseConst:
+//.. addHRegUse(u, HRmWrite, i->Xin.SseConst.dst);
+//.. return;
+ case Ain_Sse32Fx4:
+ vassert(i->Ain.Sse32Fx4.op != Asse_MOV);
+ unary = toBool( i->Ain.Sse32Fx4.op == Asse_RCPF
+ || i->Ain.Sse32Fx4.op == Asse_RSQRTF
+ || i->Ain.Sse32Fx4.op == Asse_SQRTF );
+ addHRegUse(u, HRmRead, i->Ain.Sse32Fx4.src);
+ addHRegUse(u, unary ? HRmWrite : HRmModify,
+ i->Ain.Sse32Fx4.dst);
+ return;
+ case Ain_Sse32FLo:
+ vassert(i->Ain.Sse32FLo.op != Asse_MOV);
+ unary = toBool( i->Ain.Sse32FLo.op == Asse_RCPF
+ || i->Ain.Sse32FLo.op == Asse_RSQRTF
+ || i->Ain.Sse32FLo.op == Asse_SQRTF );
+ addHRegUse(u, HRmRead, i->Ain.Sse32FLo.src);
+ addHRegUse(u, unary ? HRmWrite : HRmModify,
+ i->Ain.Sse32FLo.dst);
+ return;
+ case Ain_Sse64Fx2:
+ vassert(i->Ain.Sse64Fx2.op != Asse_MOV);
+ unary = toBool( i->Ain.Sse64Fx2.op == Asse_RCPF
+ || i->Ain.Sse64Fx2.op == Asse_RSQRTF
+ || i->Ain.Sse64Fx2.op == Asse_SQRTF );
+ addHRegUse(u, HRmRead, i->Ain.Sse64Fx2.src);
+ addHRegUse(u, unary ? HRmWrite : HRmModify,
+ i->Ain.Sse64Fx2.dst);
+ return;
+ case Ain_Sse64FLo:
+ vassert(i->Ain.Sse64FLo.op != Asse_MOV);
+ unary = toBool( i->Ain.Sse64FLo.op == Asse_RCPF
+ || i->Ain.Sse64FLo.op == Asse_RSQRTF
+ || i->Ain.Sse64FLo.op == Asse_SQRTF );
+ addHRegUse(u, HRmRead, i->Ain.Sse64FLo.src);
+ addHRegUse(u, unary ? HRmWrite : HRmModify,
+ i->Ain.Sse64FLo.dst);
+ return;
+ case Ain_SseReRg:
+ if ( (i->Ain.SseReRg.op == Asse_XOR
+ || i->Ain.SseReRg.op == Asse_CMPEQ32)
+ && i->Ain.SseReRg.src == i->Ain.SseReRg.dst) {
+ /* reg-alloc needs to understand 'xor r,r' and 'cmpeqd
+ r,r' as a write of a value to r, and independent of any
+ previous value in r */
+ /* (as opposed to a rite of passage :-) */
+ addHRegUse(u, HRmWrite, i->Ain.SseReRg.dst);
+ } else {
+ addHRegUse(u, HRmRead, i->Ain.SseReRg.src);
+ addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV
+ ? HRmWrite : HRmModify,
+ i->Ain.SseReRg.dst);
+ }
+ return;
+ case Ain_SseCMov:
+ addHRegUse(u, HRmRead, i->Ain.SseCMov.src);
+ addHRegUse(u, HRmModify, i->Ain.SseCMov.dst);
+ return;
+ case Ain_SseShuf:
+ addHRegUse(u, HRmRead, i->Ain.SseShuf.src);
+ addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst);
+ return;
+ default:
+ ppAMD64Instr(i, mode64);
+ vpanic("getRegUsage_AMD64Instr");
+ }
+}
+
+/* local helper */
+static inline void mapReg(HRegRemap* m, HReg* r)
+{
+ *r = lookupHRegRemap(m, *r);
+}
+
+void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
+{
+ vassert(mode64 == True);
+ switch (i->tag) {
+ case Ain_Imm64:
+ mapReg(m, &i->Ain.Imm64.dst);
+ return;
+ case Ain_Alu64R:
+ mapRegs_AMD64RMI(m, i->Ain.Alu64R.src);
+ mapReg(m, &i->Ain.Alu64R.dst);
+ return;
+ case Ain_Alu64M:
+ mapRegs_AMD64RI(m, i->Ain.Alu64M.src);
+ mapRegs_AMD64AMode(m, i->Ain.Alu64M.dst);
+ return;
+ case Ain_Sh64:
+ mapReg(m, &i->Ain.Sh64.dst);
+ return;
+ case Ain_Test64:
+ mapReg(m, &i->Ain.Test64.dst);
+ return;
+ case Ain_Unary64:
+ mapReg(m, &i->Ain.Unary64.dst);
+ return;
+ case Ain_Lea64:
+ mapRegs_AMD64AMode(m, i->Ain.Lea64.am);
+ mapReg(m, &i->Ain.Lea64.dst);
+ return;
+ case Ain_MulL:
+ mapRegs_AMD64RM(m, i->Ain.MulL.src);
+ return;
+ case Ain_Div:
+ mapRegs_AMD64RM(m, i->Ain.Div.src);
+ return;
+//.. case Xin_Sh3232:
+//.. mapReg(m, &i->Xin.Sh3232.src);
+//.. mapReg(m, &i->Xin.Sh3232.dst);
+//.. return;
+ case Ain_Push:
+ mapRegs_AMD64RMI(m, i->Ain.Push.src);
+ return;
+ case Ain_Call:
+ return;
+ case Ain_Goto:
+ mapRegs_AMD64RI(m, i->Ain.Goto.dst);
+ return;
+ case Ain_CMov64:
+ mapRegs_AMD64RM(m, i->Ain.CMov64.src);
+ mapReg(m, &i->Ain.CMov64.dst);
+ return;
+ case Ain_MovxLQ:
+ mapReg(m, &i->Ain.MovxLQ.src);
+ mapReg(m, &i->Ain.MovxLQ.dst);
+ return;
+ case Ain_LoadEX:
+ mapRegs_AMD64AMode(m, i->Ain.LoadEX.src);
+ mapReg(m, &i->Ain.LoadEX.dst);
+ return;
+ case Ain_Store:
+ mapReg(m, &i->Ain.Store.src);
+ mapRegs_AMD64AMode(m, i->Ain.Store.dst);
+ return;
+ case Ain_Set64:
+ mapReg(m, &i->Ain.Set64.dst);
+ return;
+ case Ain_Bsfr64:
+ mapReg(m, &i->Ain.Bsfr64.src);
+ mapReg(m, &i->Ain.Bsfr64.dst);
+ return;
+ case Ain_MFence:
+ return;
+ case Ain_ACAS:
+ mapRegs_AMD64AMode(m, i->Ain.ACAS.addr);
+ return;
+ case Ain_DACAS:
+ mapRegs_AMD64AMode(m, i->Ain.DACAS.addr);
+ return;
+ case Ain_A87Free:
+ return;
+ case Ain_A87PushPop:
+ mapRegs_AMD64AMode(m, i->Ain.A87PushPop.addr);
+ return;
+ case Ain_A87FpOp:
+ return;
+ case Ain_A87LdCW:
+ mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr);
+ return;
+ case Ain_A87StSW:
+ mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr);
+ return;
+//.. case Xin_FpUnary:
+//.. mapReg(m, &i->Xin.FpUnary.src);
+//.. mapReg(m, &i->Xin.FpUnary.dst);
+//.. return;
+//.. case Xin_FpBinary:
+//.. mapReg(m, &i->Xin.FpBinary.srcL);
+//.. mapReg(m, &i->Xin.FpBinary.srcR);
+//.. mapReg(m, &i->Xin.FpBinary.dst);
+//.. return;
+//.. case Xin_FpLdSt:
+//.. mapRegs_AMD64AMode(m, i->Xin.FpLdSt.addr);
+//.. mapReg(m, &i->Xin.FpLdSt.reg);
+//.. return;
+//.. case Xin_FpLdStI:
+//.. mapRegs_AMD64AMode(m, i->Xin.FpLdStI.addr);
+//.. mapReg(m, &i->Xin.FpLdStI.reg);
+//.. return;
+//.. case Xin_Fp64to32:
+//.. mapReg(m, &i->Xin.Fp64to32.src);
+//.. mapReg(m, &i->Xin.Fp64to32.dst);
+//.. return;
+//.. case Xin_FpCMov:
+//.. mapReg(m, &i->Xin.FpCMov.src);
+//.. mapReg(m, &i->Xin.FpCMov.dst);
+//.. return;
+ case Ain_LdMXCSR:
+ mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr);
+ return;
+//.. case Xin_FpStSW_AX:
+//.. return;
+ case Ain_SseUComIS:
+ mapReg(m, &i->Ain.SseUComIS.srcL);
+ mapReg(m, &i->Ain.SseUComIS.srcR);
+ mapReg(m, &i->Ain.SseUComIS.dst);
+ return;
+ case Ain_SseSI2SF:
+ mapReg(m, &i->Ain.SseSI2SF.src);
+ mapReg(m, &i->Ain.SseSI2SF.dst);
+ return;
+ case Ain_SseSF2SI:
+ mapReg(m, &i->Ain.SseSF2SI.src);
+ mapReg(m, &i->Ain.SseSF2SI.dst);
+ return;
+ case Ain_SseSDSS:
+ mapReg(m, &i->Ain.SseSDSS.src);
+ mapReg(m, &i->Ain.SseSDSS.dst);
+ return;
+//.. case Xin_SseConst:
+//.. mapReg(m, &i->Xin.SseConst.dst);
+//.. return;
+ case Ain_SseLdSt:
+ mapReg(m, &i->Ain.SseLdSt.reg);
+ mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr);
+ break;
+ case Ain_SseLdzLO:
+ mapReg(m, &i->Ain.SseLdzLO.reg);
+ mapRegs_AMD64AMode(m, i->Ain.SseLdzLO.addr);
+ break;
+ case Ain_Sse32Fx4:
+ mapReg(m, &i->Ain.Sse32Fx4.src);
+ mapReg(m, &i->Ain.Sse32Fx4.dst);
+ return;
+ case Ain_Sse32FLo:
+ mapReg(m, &i->Ain.Sse32FLo.src);
+ mapReg(m, &i->Ain.Sse32FLo.dst);
+ return;
+ case Ain_Sse64Fx2:
+ mapReg(m, &i->Ain.Sse64Fx2.src);
+ mapReg(m, &i->Ain.Sse64Fx2.dst);
+ return;
+ case Ain_Sse64FLo:
+ mapReg(m, &i->Ain.Sse64FLo.src);
+ mapReg(m, &i->Ain.Sse64FLo.dst);
+ return;
+ case Ain_SseReRg:
+ mapReg(m, &i->Ain.SseReRg.src);
+ mapReg(m, &i->Ain.SseReRg.dst);
+ return;
+ case Ain_SseCMov:
+ mapReg(m, &i->Ain.SseCMov.src);
+ mapReg(m, &i->Ain.SseCMov.dst);
+ return;
+ case Ain_SseShuf:
+ mapReg(m, &i->Ain.SseShuf.src);
+ mapReg(m, &i->Ain.SseShuf.dst);
+ return;
+ default:
+ ppAMD64Instr(i, mode64);
+ vpanic("mapRegs_AMD64Instr");
+ }
+}
+
+/* Figure out if i represents a reg-reg move, and if so assign the
+ source and destination to *src and *dst. If in doubt say No. Used
+ by the register allocator to do move coalescing.
+*/
+Bool isMove_AMD64Instr ( AMD64Instr* i, HReg* src, HReg* dst )
+{
+ /* Moves between integer regs */
+ if (i->tag == Ain_Alu64R) {
+ if (i->Ain.Alu64R.op != Aalu_MOV)
+ return False;
+ if (i->Ain.Alu64R.src->tag != Armi_Reg)
+ return False;
+ *src = i->Ain.Alu64R.src->Armi.Reg.reg;
+ *dst = i->Ain.Alu64R.dst;
+ return True;
+ }
+ /* Moves between vector regs */
+ if (i->tag == Ain_SseReRg) {
+ if (i->Ain.SseReRg.op != Asse_MOV)
+ return False;
+ *src = i->Ain.SseReRg.src;
+ *dst = i->Ain.SseReRg.dst;
+ return True;
+ }
+ return False;
+}
+
+
+/* Generate amd64 spill/reload instructions under the direction of the
+ register allocator. Note it's critical these don't write the
+ condition codes. */
+
+void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
+ HReg rreg, Int offsetB, Bool mode64 )
+{
+ AMD64AMode* am;
+ vassert(offsetB >= 0);
+ vassert(!hregIsVirtual(rreg));
+ vassert(mode64 == True);
+ *i1 = *i2 = NULL;
+ am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
+ switch (hregClass(rreg)) {
+ case HRcInt64:
+ *i1 = AMD64Instr_Alu64M ( Aalu_MOV, AMD64RI_Reg(rreg), am );
+ return;
+ case HRcVec128:
+ *i1 = AMD64Instr_SseLdSt ( False/*store*/, 16, rreg, am );
+ return;
+ default:
+ ppHRegClass(hregClass(rreg));
+ vpanic("genSpill_AMD64: unimplemented regclass");
+ }
+}
+
+void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
+ HReg rreg, Int offsetB, Bool mode64 )
+{
+ AMD64AMode* am;
+ vassert(offsetB >= 0);
+ vassert(!hregIsVirtual(rreg));
+ vassert(mode64 == True);
+ *i1 = *i2 = NULL;
+ am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
+ switch (hregClass(rreg)) {
+ case HRcInt64:
+ *i1 = AMD64Instr_Alu64R ( Aalu_MOV, AMD64RMI_Mem(am), rreg );
+ return;
+ case HRcVec128:
+ *i1 = AMD64Instr_SseLdSt ( True/*load*/, 16, rreg, am );
+ return;
+ default:
+ ppHRegClass(hregClass(rreg));
+ vpanic("genReload_AMD64: unimplemented regclass");
+ }
+}
+
+
+/* --------- The amd64 assembler (bleh.) --------- */
+
+/* Produce the low three bits of an integer register number. */
+static UChar iregBits210 ( HReg r )
+{
+ UInt n;
+ vassert(hregClass(r) == HRcInt64);
+ vassert(!hregIsVirtual(r));
+ n = hregNumber(r);
+ vassert(n <= 15);
+ return toUChar(n & 7);
+}
+
+/* Produce bit 3 of an integer register number. */
+static UChar iregBit3 ( HReg r )
+{
+ UInt n;
+ vassert(hregClass(r) == HRcInt64);
+ vassert(!hregIsVirtual(r));
+ n = hregNumber(r);
+ vassert(n <= 15);
+ return toUChar((n >> 3) & 1);
+}
+
+/* Produce a complete 4-bit integer register number. */
+static UChar iregBits3210 ( HReg r )
+{
+ UInt n;
+ vassert(hregClass(r) == HRcInt64);
+ vassert(!hregIsVirtual(r));
+ n = hregNumber(r);
+ vassert(n <= 15);
+ return toUChar(n);
+}
+
+/* Given an xmm (128bit V-class) register number, produce the
+ equivalent numbered register in 64-bit I-class. This is a bit of
+ fakery which facilitates using functions that work on integer
+ register numbers to be used when assembling SSE instructions
+ too. */
+static UInt vreg2ireg ( HReg r )
+{
+ UInt n;
+ vassert(hregClass(r) == HRcVec128);
+ vassert(!hregIsVirtual(r));
+ n = hregNumber(r);
+ vassert(n <= 15);
+ return mkHReg(n, HRcInt64, False);
+}
+
+static UChar mkModRegRM ( UChar mod, UChar reg, UChar regmem )
+{
+ return toUChar( ((mod & 3) << 6)
+ | ((reg & 7) << 3)
+ | (regmem & 7) );
+}
+
+static UChar mkSIB ( Int shift, Int regindex, Int regbase )
+{
+ return toUChar( ((shift & 3) << 6)
+ | ((regindex & 7) << 3)
+ | (regbase & 7) );
+}
+
+static UChar* emit32 ( UChar* p, UInt w32 )
+{
+ *p++ = toUChar((w32) & 0x000000FF);
+ *p++ = toUChar((w32 >> 8) & 0x000000FF);
+ *p++ = toUChar((w32 >> 16) & 0x000000FF);
+ *p++ = toUChar((w32 >> 24) & 0x000000FF);
+ return p;
+}
+
+static UChar* emit64 ( UChar* p, ULong w64 )
+{
+ p = emit32(p, toUInt(w64 & 0xFFFFFFFF));
+ p = emit32(p, toUInt((w64 >> 32) & 0xFFFFFFFF));
+ return p;
+}
+
+/* Does a sign-extend of the lowest 8 bits give
+ the original number? */
+static Bool fits8bits ( UInt w32 )
+{
+ Int i32 = (Int)w32;
+ return toBool(i32 == ((i32 << 24) >> 24));
+}
+/* Can the lower 32 bits be signedly widened to produce the whole
+ 64-bit value? In other words, are the top 33 bits either all 0 or
+ all 1 ? */
+static Bool fitsIn32Bits ( ULong x )
+{
+ Long y0 = (Long)x;
+ Long y1 = y0;
+ y1 <<= 32;
+ y1 >>=/*s*/ 32;
+ return toBool(x == y1);
+}
+
+
+/* Forming mod-reg-rm bytes and scale-index-base bytes.
+
+ greg, 0(ereg) | ereg is not any of: RSP RBP R12 R13
+ = 00 greg ereg
+
+ greg, d8(ereg) | ereg is neither of: RSP R12
+ = 01 greg ereg, d8
+
+ greg, d32(ereg) | ereg is neither of: RSP R12
+ = 10 greg ereg, d32
+
+ greg, d8(ereg) | ereg is either: RSP R12
+ = 01 greg 100, 0x24, d8
+ (lowest bit of rex distinguishes R12/RSP)
+
+ greg, d32(ereg) | ereg is either: RSP R12
+ = 10 greg 100, 0x24, d32
+ (lowest bit of rex distinguishes R12/RSP)
+
+ -----------------------------------------------
+
+ greg, d8(base,index,scale)
+ | index != RSP
+ = 01 greg 100, scale index base, d8
+
+ greg, d32(base,index,scale)
+ | index != RSP
+ = 10 greg 100, scale index base, d32
+*/
+static UChar* doAMode_M ( UChar* p, HReg greg, AMD64AMode* am )
+{
+ if (am->tag == Aam_IR) {
+ if (am->Aam.IR.imm == 0
+ && am->Aam.IR.reg != hregAMD64_RSP()
+ && am->Aam.IR.reg != hregAMD64_RBP()
+ && am->Aam.IR.reg != hregAMD64_R12()
+ && am->Aam.IR.reg != hregAMD64_R13()
+ ) {
+ *p++ = mkModRegRM(0, iregBits210(greg),
+ iregBits210(am->Aam.IR.reg));
+ return p;
+ }
+ if (fits8bits(am->Aam.IR.imm)
+ && am->Aam.IR.reg != hregAMD64_RSP()
+ && am->Aam.IR.reg != hregAMD64_R12()
+ ) {
+ *p++ = mkModRegRM(1, iregBits210(greg),
+ iregBits210(am->Aam.IR.reg));
+ *p++ = toUChar(am->Aam.IR.imm & 0xFF);
+ return p;
+ }
+ if (am->Aam.IR.reg != hregAMD64_RSP()
+ && am->Aam.IR.reg != hregAMD64_R12()
+ ) {
+ *p++ = mkModRegRM(2, iregBits210(greg),
+ iregBits210(am->Aam.IR.reg));
+ p = emit32(p, am->Aam.IR.imm);
+ return p;
+ }
+ if ((am->Aam.IR.reg == hregAMD64_RSP()
+ || am->Aam.IR.reg == hregAMD64_R12())
+ && fits8bits(am->Aam.IR.imm)) {
+ *p++ = mkModRegRM(1, iregBits210(greg), 4);
+ *p++ = 0x24;
+ *p++ = toUChar(am->Aam.IR.imm & 0xFF);
+ return p;
+ }
+ if (/* (am->Aam.IR.reg == hregAMD64_RSP()
+ || wait for test case for RSP case */
+ am->Aam.IR.reg == hregAMD64_R12()) {
+ *p++ = mkModRegRM(2, iregBits210(greg), 4);
+ *p++ = 0x24;
+ p = emit32(p, am->Aam.IR.imm);
+ return p;
+ }
+ ppAMD64AMode(am);
+ vpanic("doAMode_M: can't emit amode IR");
+ /*NOTREACHED*/
+ }
+ if (am->tag == Aam_IRRS) {
+ if (fits8bits(am->Aam.IRRS.imm)
+ && am->Aam.IRRS.index != hregAMD64_RSP()) {
+ *p++ = mkModRegRM(1, iregBits210(greg), 4);
+ *p++ = mkSIB(am->Aam.IRRS.shift, am->Aam.IRRS.index,
+ am->Aam.IRRS.base);
+ *p++ = toUChar(am->Aam.IRRS.imm & 0xFF);
+ return p;
+ }
+ if (am->Aam.IRRS.index != hregAMD64_RSP()) {
+ *p++ = mkModRegRM(2, iregBits210(greg), 4);
+ *p++ = mkSIB(am->Aam.IRRS.shift, am->Aam.IRRS.index,
+ am->Aam.IRRS.base);
+ p = emit32(p, am->Aam.IRRS.imm);
+ return p;
+ }
+ ppAMD64AMode(am);
+ vpanic("doAMode_M: can't emit amode IRRS");
+ /*NOTREACHED*/
+ }
+ vpanic("doAMode_M: unknown amode");
+ /*NOTREACHED*/
+}
+
+
+/* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
+static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
+{
+ *p++ = mkModRegRM(3, iregBits210(greg), iregBits210(ereg));
+ return p;
+}
+
+
+/* Clear the W bit on a REX byte, thereby changing the operand size
+ back to whatever that instruction's default operand size is. */
+static inline UChar clearWBit ( UChar rex )
+{
+ return toUChar(rex & ~(1<<3));
+}
+
+
+/* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */
+static UChar rexAMode_M ( HReg greg, AMD64AMode* am )
+{
+ if (am->tag == Aam_IR) {
+ UChar W = 1; /* we want 64-bit mode */
+ UChar R = iregBit3(greg);
+ UChar X = 0; /* not relevant */
+ UChar B = iregBit3(am->Aam.IR.reg);
+ return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
+ }
+ if (am->tag == Aam_IRRS) {
+ UChar W = 1; /* we want 64-bit mode */
+ UChar R = iregBit3(greg);
+ UChar X = iregBit3(am->Aam.IRRS.index);
+ UChar B = iregBit3(am->Aam.IRRS.base);
+ return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
+ }
+ vassert(0);
+ return 0; /*NOTREACHED*/
+}
+
+/* Make up a REX byte, with W=1 (size=64), for a (greg,ereg) pair. */
+static UChar rexAMode_R ( HReg greg, HReg ereg )
+{
+ UChar W = 1; /* we want 64-bit mode */
+ UChar R = iregBit3(greg);
+ UChar X = 0; /* not relevant */
+ UChar B = iregBit3(ereg);
+ return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
+}
+
+
+/* Emit ffree %st(N) */
+static UChar* do_ffree_st ( UChar* p, Int n )
+{
+ vassert(n >= 0 && n <= 7);
+ *p++ = 0xDD;
+ *p++ = toUChar(0xC0 + n);
+ return p;
+}
+
+//.. /* Emit fstp %st(i), 1 <= i <= 7 */
+//.. static UChar* do_fstp_st ( UChar* p, Int i )
+//.. {
+//.. vassert(1 <= i && i <= 7);
+//.. *p++ = 0xDD;
+//.. *p++ = 0xD8+i;
+//.. return p;
+//.. }
+//..
+//.. /* Emit fld %st(i), 0 <= i <= 6 */
+//.. static UChar* do_fld_st ( UChar* p, Int i )
+//.. {
+//.. vassert(0 <= i && i <= 6);
+//.. *p++ = 0xD9;
+//.. *p++ = 0xC0+i;
+//.. return p;
+//.. }
+//..
+//.. /* Emit f<op> %st(0) */
+//.. static UChar* do_fop1_st ( UChar* p, AMD64FpOp op )
+//.. {
+//.. switch (op) {
+//.. case Xfp_NEG: *p++ = 0xD9; *p++ = 0xE0; break;
+//.. case Xfp_ABS: *p++ = 0xD9; *p++ = 0xE1; break;
+//.. case Xfp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
+//.. case Xfp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
+//.. case Xfp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
+//.. case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
+//.. case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
+//.. case Xfp_MOV: break;
+//.. case Xfp_TAN: p = do_ffree_st7(p); /* since fptan pushes 1.0 */
+//.. *p++ = 0xD9; *p++ = 0xF2; /* fptan */
+//.. *p++ = 0xD9; *p++ = 0xF7; /* fincstp */
+//.. break;
+//.. default: vpanic("do_fop1_st: unknown op");
+//.. }
+//.. return p;
+//.. }
+//..
+//.. /* Emit f<op> %st(i), 1 <= i <= 5 */
+//.. static UChar* do_fop2_st ( UChar* p, AMD64FpOp op, Int i )
+//.. {
+//.. # define fake(_n) mkHReg((_n), HRcInt32, False)
+//.. Int subopc;
+//.. switch (op) {
+//.. case Xfp_ADD: subopc = 0; break;
+//.. case Xfp_SUB: subopc = 4; break;
+//.. case Xfp_MUL: subopc = 1; break;
+//.. case Xfp_DIV: subopc = 6; break;
+//.. default: vpanic("do_fop2_st: unknown op");
+//.. }
+//.. *p++ = 0xD8;
+//.. p = doAMode_R(p, fake(subopc), fake(i));
+//.. return p;
+//.. # undef fake
+//.. }
+//..
+//.. /* Push a 32-bit word on the stack. The word depends on tags[3:0];
+//.. each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[].
+//.. */
+//.. static UChar* push_word_from_tags ( UChar* p, UShort tags )
+//.. {
+//.. UInt w;
+//.. vassert(0 == (tags & ~0xF));
+//.. if (tags == 0) {
+//.. /* pushl $0x00000000 */
+//.. *p++ = 0x6A;
+//.. *p++ = 0x00;
+//.. }
+//.. else
+//.. /* pushl $0xFFFFFFFF */
+//.. if (tags == 0xF) {
+//.. *p++ = 0x6A;
+//.. *p++ = 0xFF;
+//.. } else {
+//.. vassert(0); /* awaiting test case */
+//.. w = 0;
+//.. if (tags & 1) w |= 0x000000FF;
+//.. if (tags & 2) w |= 0x0000FF00;
+//.. if (tags & 4) w |= 0x00FF0000;
+//.. if (tags & 8) w |= 0xFF000000;
+//.. *p++ = 0x68;
+//.. p = emit32(p, w);
+//.. }
+//.. return p;
+//.. }
+
+/* Emit an instruction into buf and return the number of bytes used.
+ Note that buf is not the insn's final place, and therefore it is
+ imperative to emit position-independent code. */
+
+Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i,
+ Bool mode64, void* dispatch )
+{
+ UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
+ UInt xtra;
+ UInt reg;
+ UChar rex;
+ UChar* p = &buf[0];
+ UChar* ptmp;
+ Int j;
+ vassert(nbuf >= 32);
+ vassert(mode64 == True);
+
+ /* Wrap an integer as a int register, for use assembling
+ GrpN insns, in which the greg field is used as a sub-opcode
+ and does not really contain a register. */
+# define fake(_n) mkHReg((_n), HRcInt64, False)
+
+ /* vex_printf("asm "); ppAMD64Instr(i, mode64); vex_printf("\n"); */
+
+ switch (i->tag) {
+
+ case Ain_Imm64:
+ *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Imm64.dst)));
+ *p++ = toUChar(0xB8 + iregBits210(i->Ain.Imm64.dst));
+ p = emit64(p, i->Ain.Imm64.imm64);
+ goto done;
+
+ case Ain_Alu64R:
+ /* Deal specially with MOV */
+ if (i->Ain.Alu64R.op == Aalu_MOV) {
+ switch (i->Ain.Alu64R.src->tag) {
+ case Armi_Imm:
+ if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFF)) {
+ /* Actually we could use this form for constants in
+ the range 0 through 0x7FFFFFFF inclusive, but
+ limit it to a small range for verifiability
+ purposes. */
+ /* Generate "movl $imm32, 32-bit-register" and let
+ the default zero-extend rule cause the upper half
+ of the dst to be zeroed out too. This saves 1
+ and sometimes 2 bytes compared to the more
+ obvious encoding in the 'else' branch. */
+ if (1 & iregBit3(i->Ain.Alu64R.dst))
+ *p++ = 0x41;
+ *p++ = 0xB8 + iregBits210(i->Ain.Alu64R.dst);
+ p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
+ } else {
+ *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Alu64R.dst)));
+ *p++ = 0xC7;
+ *p++ = toUChar(0xC0 + iregBits210(i->Ain.Alu64R.dst));
+ p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
+ }
+ goto done;
+ case Armi_Reg:
+ *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
+ i->Ain.Alu64R.dst );
+ *p++ = 0x89;
+ p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
+ i->Ain.Alu64R.dst);
+ goto done;
+ case Armi_Mem:
+ *p++ = rexAMode_M(i->Ain.Alu64R.dst,
+ i->Ain.Alu64R.src->Armi.Mem.am);
+ *p++ = 0x8B;
+ p = doAMode_M(p, i->Ain.Alu64R.dst,
+ i->Ain.Alu64R.src->Armi.Mem.am);
+ goto done;
+ default:
+ goto bad;
+ }
+ }
+ /* MUL */
+ if (i->Ain.Alu64R.op == Aalu_MUL) {
+ switch (i->Ain.Alu64R.src->tag) {
+ case Armi_Reg:
+ *p++ = rexAMode_R( i->Ain.Alu64R.dst,
+ i->Ain.Alu64R.src->Armi.Reg.reg);
+ *p++ = 0x0F;
+ *p++ = 0xAF;
+ p = doAMode_R(p, i->Ain.Alu64R.dst,
+ i->Ain.Alu64R.src->Armi.Reg.reg);
+ goto done;
+ case Armi_Mem:
+ *p++ = rexAMode_M(i->Ain.Alu64R.dst,
+ i->Ain.Alu64R.src->Armi.Mem.am);
+ *p++ = 0x0F;
+ *p++ = 0xAF;
+ p = doAMode_M(p, i->Ain.Alu64R.dst,
+ i->Ain.Alu64R.src->Armi.Mem.am);
+ goto done;
+ case Armi_Imm:
+ if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
+ *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
+ *p++ = 0x6B;
+ p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
+ *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
+ } else {
+ *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
+ *p++ = 0x69;
+ p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
+ p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
+ }
+ goto done;
+ default:
+ goto bad;
+ }
+ }
+ /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
+ opc = opc_rr = subopc_imm = opc_imma = 0;
+ switch (i->Ain.Alu64R.op) {
+ case Aalu_ADC: opc = 0x13; opc_rr = 0x11;
+ subopc_imm = 2; opc_imma = 0x15; break;
+ case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
+ subopc_imm = 0; opc_imma = 0x05; break;
+ case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
+ subopc_imm = 5; opc_imma = 0x2D; break;
+ case Aalu_SBB: opc = 0x1B; opc_rr = 0x19;
+ subopc_imm = 3; opc_imma = 0x1D; break;
+ case Aalu_AND: opc = 0x23; opc_rr = 0x21;
+ subopc_imm = 4; opc_imma = 0x25; break;
+ case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
+ subopc_imm = 6; opc_imma = 0x35; break;
+ case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
+ subopc_imm = 1; opc_imma = 0x0D; break;
+ case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
+ subopc_imm = 7; opc_imma = 0x3D; break;
+ default: goto bad;
+ }
+ switch (i->Ain.Alu64R.src->tag) {
+ case Armi_Imm:
+ if (i->Ain.Alu64R.dst == hregAMD64_RAX()
+ && !fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
+ goto bad; /* FIXME: awaiting test case */
+ *p++ = toUChar(opc_imma);
+ p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
+ } else
+ if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
+ *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst );
+ *p++ = 0x83;
+ p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst);
+ *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
+ } else {
+ *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst);
+ *p++ = 0x81;
+ p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst);
+ p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
+ }
+ goto done;
+ case Armi_Reg:
+ *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
+ i->Ain.Alu64R.dst);
+ *p++ = toUChar(opc_rr);
+ p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
+ i->Ain.Alu64R.dst);
+ goto done;
+ case Armi_Mem:
+ *p++ = rexAMode_M( i->Ain.Alu64R.dst,
+ i->Ain.Alu64R.src->Armi.Mem.am);
+ *p++ = toUChar(opc);
+ p = doAMode_M(p, i->Ain.Alu64R.dst,
+ i->Ain.Alu64R.src->Armi.Mem.am);
+ goto done;
+ default:
+ goto bad;
+ }
+ break;
+
+ case Ain_Alu64M:
+ /* Deal specially with MOV */
+ if (i->Ain.Alu64M.op == Aalu_MOV) {
+ switch (i->Ain.Alu64M.src->tag) {
+ case Ari_Reg:
+ *p++ = rexAMode_M(i->Ain.Alu64M.src->Ari.Reg.reg,
+ i->Ain.Alu64M.dst);
+ *p++ = 0x89;
+ p = doAMode_M(p, i->Ain.Alu64M.src->Ari.Reg.reg,
+ i->Ain.Alu64M.dst);
+ goto done;
+ case Ari_Imm:
+ *p++ = rexAMode_M(fake(0), i->Ain.Alu64M.dst);
+ *p++ = 0xC7;
+ p = doAMode_M(p, fake(0), i->Ain.Alu64M.dst);
+ p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
+ goto done;
+ default:
+ goto bad;
+ }
+ }
+//.. /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not
+//.. allowed here. */
+//.. opc = subopc_imm = opc_imma = 0;
+//.. switch (i->Xin.Alu32M.op) {
+//.. case Xalu_ADD: opc = 0x01; subopc_imm = 0; break;
+//.. case Xalu_SUB: opc = 0x29; subopc_imm = 5; break;
+//.. default: goto bad;
+//.. }
+//.. switch (i->Xin.Alu32M.src->tag) {
+//.. case Xri_Reg:
+//.. *p++ = opc;
+//.. p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
+//.. i->Xin.Alu32M.dst);
+//.. goto done;
+//.. case Xri_Imm:
+//.. if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) {
+//.. *p++ = 0x83;
+//.. p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
+//.. *p++ = 0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32;
+//.. goto done;
+//.. } else {
+//.. *p++ = 0x81;
+//.. p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
+//.. p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
+//.. goto done;
+//.. }
+//.. default:
+//.. goto bad;
+//.. }
+ break;
+
+ case Ain_Sh64:
+ opc_cl = opc_imm = subopc = 0;
+ switch (i->Ain.Sh64.op) {
+ case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
+ case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
+ case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
+ default: goto bad;
+ }
+ if (i->Ain.Sh64.src == 0) {
+ *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst);
+ *p++ = toUChar(opc_cl);
+ p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst);
+ goto done;
+ } else {
+ *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst);
+ *p++ = toUChar(opc_imm);
+ p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst);
+ *p++ = (UChar)(i->Ain.Sh64.src);
+ goto done;
+ }
+ break;
+
+ case Ain_Test64:
+ /* testq sign-extend($imm32), %reg */
+ *p++ = rexAMode_R(fake(0), i->Ain.Test64.dst);
+ *p++ = 0xF7;
+ p = doAMode_R(p, fake(0), i->Ain.Test64.dst);
+ p = emit32(p, i->Ain.Test64.imm32);
+ goto done;
+
+ case Ain_Unary64:
+ if (i->Ain.Unary64.op == Aun_NOT) {
+ *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst);
+ *p++ = 0xF7;
+ p = doAMode_R(p, fake(2), i->Ain.Unary64.dst);
+ goto done;
+ }
+ if (i->Ain.Unary64.op == Aun_NEG) {
+ *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst);
+ *p++ = 0xF7;
+ p = doAMode_R(p, fake(3), i->Ain.Unary64.dst);
+ goto done;
+ }
+ break;
+
+ case Ain_Lea64:
+ *p++ = rexAMode_M(i->Ain.Lea64.dst, i->Ain.Lea64.am);
+ *p++ = 0x8D;
+ p = doAMode_M(p, i->Ain.Lea64.dst, i->Ain.Lea64.am);
+ goto done;
+
+ case Ain_MulL:
+ subopc = i->Ain.MulL.syned ? 5 : 4;
+ switch (i->Ain.MulL.src->tag) {
+ case Arm_Mem:
+ *p++ = rexAMode_M( fake(0),
+ i->Ain.MulL.src->Arm.Mem.am);
+ *p++ = 0xF7;
+ p = doAMode_M(p, fake(subopc),
+ i->Ain.MulL.src->Arm.Mem.am);
+ goto done;
+ case Arm_Reg:
+ *p++ = rexAMode_R(fake(0),
+ i->Ain.MulL.src->Arm.Reg.reg);
+ *p++ = 0xF7;
+ p = doAMode_R(p, fake(subopc),
+ i->Ain.MulL.src->Arm.Reg.reg);
+ goto done;
+ default:
+ goto bad;
+ }
+ break;
+
+ case Ain_Div:
+ subopc = i->Ain.Div.syned ? 7 : 6;
+ if (i->Ain.Div.sz == 4) {
+ switch (i->Ain.Div.src->tag) {
+ case Arm_Mem:
+ goto bad;
+ /*FIXME*/
+ *p++ = 0xF7;
+ p = doAMode_M(p, fake(subopc),
+ i->Ain.Div.src->Arm.Mem.am);
+ goto done;
+ case Arm_Reg:
+ *p++ = clearWBit(
+ rexAMode_R( fake(0), i->Ain.Div.src->Arm.Reg.reg));
+ *p++ = 0xF7;
+ p = doAMode_R(p, fake(subopc),
+ i->Ain.Div.src->Arm.Reg.reg);
+ goto done;
+ default:
+ goto bad;
+ }
+ }
+ if (i->Ain.Div.sz == 8) {
+ switch (i->Ain.Div.src->tag) {
+ case Arm_Mem:
+ *p++ = rexAMode_M( fake(0),
+ i->Ain.Div.src->Arm.Mem.am);
+ *p++ = 0xF7;
+ p = doAMode_M(p, fake(subopc),
+ i->Ain.Div.src->Arm.Mem.am);
+ goto done;
+ case Arm_Reg:
+ *p++ = rexAMode_R( fake(0),
+ i->Ain.Div.src->Arm.Reg.reg);
+ *p++ = 0xF7;
+ p = doAMode_R(p, fake(subopc),
+ i->Ain.Div.src->Arm.Reg.reg);
+ goto done;
+ default:
+ goto bad;
+ }
+ }
+ break;
+
+//.. case Xin_Sh3232:
+//.. vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR);
+//.. if (i->Xin.Sh3232.amt == 0) {
+//.. /* shldl/shrdl by %cl */
+//.. *p++ = 0x0F;
+//.. if (i->Xin.Sh3232.op == Xsh_SHL) {
+//.. *p++ = 0xA5;
+//.. } else {
+//.. *p++ = 0xAD;
+//.. }
+//.. p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst);
+//.. goto done;
+//.. }
+//.. break;
+
+ case Ain_Push:
+ switch (i->Ain.Push.src->tag) {
+ case Armi_Mem:
+ *p++ = clearWBit(
+ rexAMode_M(fake(0), i->Ain.Push.src->Armi.Mem.am));
+ *p++ = 0xFF;
+ p = doAMode_M(p, fake(6), i->Ain.Push.src->Armi.Mem.am);
+ goto done;
+ case Armi_Imm:
+ *p++ = 0x68;
+ p = emit32(p, i->Ain.Push.src->Armi.Imm.imm32);
+ goto done;
+ case Armi_Reg:
+ *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.Push.src->Armi.Reg.reg)));
+ *p++ = toUChar(0x50 + iregBits210(i->Ain.Push.src->Armi.Reg.reg));
+ goto done;
+ default:
+ goto bad;
+ }
+
+ case Ain_Call: {
+ /* As per detailed comment for Ain_Call in
+ getRegUsage_AMD64Instr above, %r11 is used as an address
+ temporary. */
+ /* jump over the following two insns if the condition does not
+ hold */
+ Bool shortImm = fitsIn32Bits(i->Ain.Call.target);
+ if (i->Ain.Call.cond != Acc_ALWAYS) {
+ *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
+ *p++ = shortImm ? 10 : 13;
+ /* 10 or 13 bytes in the next two insns */
+ }
+ if (shortImm) {
+ /* 7 bytes: movl sign-extend(imm32), %r11 */
+ *p++ = 0x49;
+ *p++ = 0xC7;
+ *p++ = 0xC3;
+ p = emit32(p, (UInt)i->Ain.Call.target);
+ } else {
+ /* 10 bytes: movabsq $target, %r11 */
+ *p++ = 0x49;
+ *p++ = 0xBB;
+ p = emit64(p, i->Ain.Call.target);
+ }
+ /* 3 bytes: call *%r11 */
+ *p++ = 0x41;
+ *p++ = 0xFF;
+ *p++ = 0xD3;
+ goto done;
+ }
+
+ case Ain_Goto:
+ /* Use ptmp for backpatching conditional jumps. */
+ ptmp = NULL;
+
+ /* First off, if this is conditional, create a conditional
+ jump over the rest of it. */
+ if (i->Ain.Goto.cond != Acc_ALWAYS) {
+ /* jmp fwds if !condition */
+ *p++ = toUChar(0x70 + (i->Ain.Goto.cond ^ 1));
+ ptmp = p; /* fill in this bit later */
+ *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
+ }
+
+ /* If a non-boring, set %rbp (the guest state pointer)
+ appropriately. Since these numbers are all small positive
+ integers, we can get away with "movl $N, %ebp" rather than
+ the longer "movq $N, %rbp". */
+ /* movl $magic_number, %ebp */
+ switch (i->Ain.Goto.jk) {
+ case Ijk_ClientReq:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_CLIENTREQ); break;
+ case Ijk_Sys_syscall:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_SYS_SYSCALL); break;
+ case Ijk_Sys_int32:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_SYS_INT32); break;
+ case Ijk_Yield:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_YIELD); break;
+ case Ijk_EmWarn:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_EMWARN); break;
+ case Ijk_MapFail:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_MAPFAIL); break;
+ case Ijk_NoDecode:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_NODECODE); break;
+ case Ijk_TInval:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_TINVAL); break;
+ case Ijk_NoRedir:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_NOREDIR); break;
+ case Ijk_SigTRAP:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_SIGTRAP); break;
+ case Ijk_SigSEGV:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_SIGSEGV); break;
+ case Ijk_Ret:
+ case Ijk_Call:
+ case Ijk_Boring:
+ break;
+ default:
+ ppIRJumpKind(i->Ain.Goto.jk);
+ vpanic("emit_AMD64Instr.Ain_Goto: unknown jump kind");
+ }
+
+ /* Get the destination address into %rax */
+ if (i->Ain.Goto.dst->tag == Ari_Imm) {
+ /* movl sign-ext($immediate), %rax ; ret */
+ *p++ = 0x48;
+ *p++ = 0xC7;
+ *p++ = 0xC0;
+ p = emit32(p, i->Ain.Goto.dst->Ari.Imm.imm32);
+ } else {
+ vassert(i->Ain.Goto.dst->tag == Ari_Reg);
+ /* movq %reg, %rax ; ret */
+ if (i->Ain.Goto.dst->Ari.Reg.reg != hregAMD64_RAX()) {
+ *p++ = rexAMode_R(i->Ain.Goto.dst->Ari.Reg.reg, hregAMD64_RAX());
+ *p++ = 0x89;
+ p = doAMode_R(p, i->Ain.Goto.dst->Ari.Reg.reg, hregAMD64_RAX());
+ }
+ }
+
+ /* Get the dispatcher address into %rdx. This has to happen
+ after the load of %rax since %rdx might be carrying the value
+ destined for %rax immediately prior to this Ain_Goto. */
+ vassert(sizeof(ULong) == sizeof(void*));
+ vassert(dispatch != NULL);
+
+ if (fitsIn32Bits(Ptr_to_ULong(dispatch))) {
+ /* movl sign-extend(imm32), %rdx */
+ *p++ = 0x48;
+ *p++ = 0xC7;
+ *p++ = 0xC2;
+ p = emit32(p, (UInt)Ptr_to_ULong(dispatch));
+ } else {
+ /* movabsq $imm64, %rdx */
+ *p++ = 0x48;
+ *p++ = 0xBA;
+ p = emit64(p, Ptr_to_ULong(dispatch));
+ }
+ /* jmp *%rdx */
+ *p++ = 0xFF;
+ *p++ = 0xE2;
+
+ /* Fix up the conditional jump, if there was one. */
+ if (i->Ain.Goto.cond != Acc_ALWAYS) {
+ Int delta = p - ptmp;
+ vassert(delta > 0 && delta < 30);
+ *ptmp = toUChar(delta-1);
+ }
+ goto done;
+
+ case Ain_CMov64:
+ vassert(i->Ain.CMov64.cond != Acc_ALWAYS);
+ if (i->Ain.CMov64.src->tag == Arm_Reg) {
+ *p++ = rexAMode_R(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg);
+ *p++ = 0x0F;
+ *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond));
+ p = doAMode_R(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Reg.reg);
+ goto done;
+ }
+ if (i->Ain.CMov64.src->tag == Arm_Mem) {
+ *p++ = rexAMode_M(i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am);
+ *p++ = 0x0F;
+ *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond));
+ p = doAMode_M(p, i->Ain.CMov64.dst, i->Ain.CMov64.src->Arm.Mem.am);
+ goto done;
+ }
+ break;
+
+ case Ain_MovxLQ:
+ /* No, _don't_ ask me why the sense of the args has to be
+ different in the S vs Z case. I don't know. */
+ if (i->Ain.MovxLQ.syned) {
+ /* Need REX.W = 1 here, but rexAMode_R does that for us. */
+ *p++ = rexAMode_R(i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
+ *p++ = 0x63;
+ p = doAMode_R(p, i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
+ } else {
+ /* Produce a 32-bit reg-reg move, since the implicit
+ zero-extend does what we want. */
+ *p++ = clearWBit (
+ rexAMode_R(i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst));
+ *p++ = 0x89;
+ p = doAMode_R(p, i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst);
+ }
+ goto done;
+
+ case Ain_LoadEX:
+ if (i->Ain.LoadEX.szSmall == 1 && !i->Ain.LoadEX.syned) {
+ /* movzbq */
+ *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
+ *p++ = 0x0F;
+ *p++ = 0xB6;
+ p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
+ goto done;
+ }
+ if (i->Ain.LoadEX.szSmall == 2 && !i->Ain.LoadEX.syned) {
+ /* movzwq */
+ *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
+ *p++ = 0x0F;
+ *p++ = 0xB7;
+ p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
+ goto done;
+ }
+ if (i->Ain.LoadEX.szSmall == 4 && !i->Ain.LoadEX.syned) {
+ /* movzlq */
+ /* This isn't really an existing AMD64 instruction per se.
+ Rather, we have to do a 32-bit load. Because a 32-bit
+ write implicitly clears the upper 32 bits of the target
+ register, we get what we want. */
+ *p++ = clearWBit(
+ rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src));
+ *p++ = 0x8B;
+ p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
+ goto done;
+ }
+ break;
+
+ case Ain_Set64:
+ /* Make the destination register be 1 or 0, depending on whether
+ the relevant condition holds. Complication: the top 56 bits
+ of the destination should be forced to zero, but doing 'xorq
+ %r,%r' kills the flag(s) we are about to read. Sigh. So
+ start off my moving $0 into the dest. */
+ reg = iregBits3210(i->Ain.Set64.dst);
+ vassert(reg < 16);
+
+ /* movq $0, %dst */
+ *p++ = toUChar(reg >= 8 ? 0x49 : 0x48);
+ *p++ = 0xC7;
+ *p++ = toUChar(0xC0 + (reg & 7));
+ p = emit32(p, 0);
+
+ /* setb lo8(%dst) */
+ /* note, 8-bit register rex trickyness. Be careful here. */
+ *p++ = toUChar(reg >= 8 ? 0x41 : 0x40);
+ *p++ = 0x0F;
+ *p++ = toUChar(0x90 + (0x0F & i->Ain.Set64.cond));
+ *p++ = toUChar(0xC0 + (reg & 7));
+ goto done;
+
+ case Ain_Bsfr64:
+ *p++ = rexAMode_R(i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
+ *p++ = 0x0F;
+ if (i->Ain.Bsfr64.isFwds) {
+ *p++ = 0xBC;
+ } else {
+ *p++ = 0xBD;
+ }
+ p = doAMode_R(p, i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
+ goto done;
+
+ case Ain_MFence:
+ /* mfence */
+ *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
+ goto done;
+
+ case Ain_ACAS:
+ /* lock */
+ *p++ = 0xF0;
+ if (i->Ain.ACAS.sz == 2) *p++ = 0x66;
+ /* cmpxchg{b,w,l,q} %rbx,mem. Expected-value in %rax, new value
+ in %rbx. The new-value register is hardwired to be %rbx
+ since dealing with byte integer registers is too much hassle,
+ so we force the register operand to %rbx (could equally be
+ %rcx or %rdx). */
+ rex = rexAMode_M( hregAMD64_RBX(), i->Ain.ACAS.addr );
+ if (i->Ain.ACAS.sz != 8)
+ rex = clearWBit(rex);
+
+ *p++ = rex; /* this can emit 0x40, which is pointless. oh well. */
+ *p++ = 0x0F;
+ if (i->Ain.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
+ p = doAMode_M(p, hregAMD64_RBX(), i->Ain.ACAS.addr);
+ goto done;
+
+ case Ain_DACAS:
+ /* lock */
+ *p++ = 0xF0;
+ /* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new
+ value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so
+ aren't encoded in the insn. */
+ rex = rexAMode_M( fake(1), i->Ain.ACAS.addr );
+ if (i->Ain.ACAS.sz != 8)
+ rex = clearWBit(rex);
+ *p++ = rex;
+ *p++ = 0x0F;
+ *p++ = 0xC7;
+ p = doAMode_M(p, fake(1), i->Ain.DACAS.addr);
+ goto done;
+
+ case Ain_A87Free:
+ vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7);
+ for (j = 0; j < i->Ain.A87Free.nregs; j++) {
+ p = do_ffree_st(p, 7-j);
+ }
+ goto done;
+
+ case Ain_A87PushPop:
+ vassert(i->Ain.A87PushPop.szB == 8 || i->Ain.A87PushPop.szB == 4);
+ if (i->Ain.A87PushPop.isPush) {
+ /* Load from memory into %st(0): flds/fldl amode */
+ *p++ = clearWBit(
+ rexAMode_M(fake(0), i->Ain.A87PushPop.addr) );
+ *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
+ p = doAMode_M(p, fake(0)/*subopcode*/, i->Ain.A87PushPop.addr);
+ } else {
+ /* Dump %st(0) to memory: fstps/fstpl amode */
+ *p++ = clearWBit(
+ rexAMode_M(fake(3), i->Ain.A87PushPop.addr) );
+ *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
+ p = doAMode_M(p, fake(3)/*subopcode*/, i->Ain.A87PushPop.addr);
+ goto done;
+ }
+ goto done;
+
+ case Ain_A87FpOp:
+ switch (i->Ain.A87FpOp.op) {
+ case Afp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
+ case Afp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
+ case Afp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
+ case Afp_TAN: *p++ = 0xD9; *p++ = 0xF2; break;
+ case Afp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
+ case Afp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
+ case Afp_SCALE: *p++ = 0xD9; *p++ = 0xFD; break;
+ case Afp_ATAN: *p++ = 0xD9; *p++ = 0xF3; break;
+ case Afp_YL2X: *p++ = 0xD9; *p++ = 0xF1; break;
+ case Afp_YL2XP1: *p++ = 0xD9; *p++ = 0xF9; break;
+ case Afp_PREM: *p++ = 0xD9; *p++ = 0xF8; break;
+ case Afp_PREM1: *p++ = 0xD9; *p++ = 0xF5; break;
+ default: goto bad;
+ }
+ goto done;
+
+ case Ain_A87LdCW:
+ *p++ = clearWBit(
+ rexAMode_M(fake(5), i->Ain.A87LdCW.addr) );
+ *p++ = 0xD9;
+ p = doAMode_M(p, fake(5)/*subopcode*/, i->Ain.A87LdCW.addr);
+ goto done;
+
+ case Ain_A87StSW:
+ *p++ = clearWBit(
+ rexAMode_M(fake(7), i->Ain.A87StSW.addr) );
+ *p++ = 0xDD;
+ p = doAMode_M(p, fake(7)/*subopcode*/, i->Ain.A87StSW.addr);
+ goto done;
+
+ case Ain_Store:
+ if (i->Ain.Store.sz == 2) {
+ /* This just goes to show the crazyness of the instruction
+ set encoding. We have to insert two prefix bytes, but be
+ careful to avoid a conflict in what the size should be, by
+ ensuring that REX.W = 0. */
+ *p++ = 0x66; /* override to 16-bits */
+ *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
+ *p++ = 0x89;
+ p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
+ goto done;
+ }
+ if (i->Ain.Store.sz == 4) {
+ *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
+ *p++ = 0x89;
+ p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
+ goto done;
+ }
+ if (i->Ain.Store.sz == 1) {
+ /* This is one place where it would be wrong to skip emitting
+ a rex byte of 0x40, since the mere presence of rex changes
+ the meaning of the byte register access. Be careful. */
+ *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
+ *p++ = 0x88;
+ p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
+ goto done;
+ }
+ break;
+
+//.. case Xin_FpUnary:
+//.. /* gop %src, %dst
+//.. --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst)
+//.. */
+//.. p = do_ffree_st7(p);
+//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpUnary.src));
+//.. p = do_fop1_st(p, i->Xin.FpUnary.op);
+//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpUnary.dst));
+//.. goto done;
+//..
+//.. case Xin_FpBinary:
+//.. if (i->Xin.FpBinary.op == Xfp_YL2X
+//.. || i->Xin.FpBinary.op == Xfp_YL2XP1) {
+//.. /* Have to do this specially. */
+//.. /* ffree %st7 ; fld %st(srcL) ;
+//.. ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */
+//.. p = do_ffree_st7(p);
+//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
+//.. p = do_ffree_st7(p);
+//.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
+//.. *p++ = 0xD9;
+//.. *p++ = i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9;
+//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
+//.. goto done;
+//.. }
+//.. if (i->Xin.FpBinary.op == Xfp_ATAN) {
+//.. /* Have to do this specially. */
+//.. /* ffree %st7 ; fld %st(srcL) ;
+//.. ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */
+//.. p = do_ffree_st7(p);
+//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
+//.. p = do_ffree_st7(p);
+//.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
+//.. *p++ = 0xD9; *p++ = 0xF3;
+//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
+//.. goto done;
+//.. }
+//.. if (i->Xin.FpBinary.op == Xfp_PREM
+//.. || i->Xin.FpBinary.op == Xfp_PREM1
+//.. || i->Xin.FpBinary.op == Xfp_SCALE) {
+//.. /* Have to do this specially. */
+//.. /* ffree %st7 ; fld %st(srcR) ;
+//.. ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ;
+//.. fincstp ; ffree %st7 */
+//.. p = do_ffree_st7(p);
+//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcR));
+//.. p = do_ffree_st7(p);
+//.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcL));
+//.. *p++ = 0xD9;
+//.. switch (i->Xin.FpBinary.op) {
+//.. case Xfp_PREM: *p++ = 0xF8; break;
+//.. case Xfp_PREM1: *p++ = 0xF5; break;
+//.. case Xfp_SCALE: *p++ = 0xFD; break;
+//.. default: vpanic("emitAMD64Instr(FpBinary,PREM/PREM1/SCALE)");
+//.. }
+//.. p = do_fstp_st(p, 2+hregNumber(i->Xin.FpBinary.dst));
+//.. *p++ = 0xD9; *p++ = 0xF7;
+//.. p = do_ffree_st7(p);
+//.. goto done;
+//.. }
+//.. /* General case */
+//.. /* gop %srcL, %srcR, %dst
+//.. --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst)
+//.. */
+//.. p = do_ffree_st7(p);
+//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
+//.. p = do_fop2_st(p, i->Xin.FpBinary.op,
+//.. 1+hregNumber(i->Xin.FpBinary.srcR));
+//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
+//.. goto done;
+//..
+//.. case Xin_FpLdSt:
+//.. vassert(i->Xin.FpLdSt.sz == 4 || i->Xin.FpLdSt.sz == 8);
+//.. if (i->Xin.FpLdSt.isLoad) {
+//.. /* Load from memory into %fakeN.
+//.. --> ffree %st(7) ; fld{s/l} amode ; fstp st(N+1)
+//.. */
+//.. p = do_ffree_st7(p);
+//.. *p++ = i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD;
+//.. p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
+//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg));
+//.. goto done;
+//.. } else {
+//.. /* Store from %fakeN into memory.
+//.. --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode
+//.. */
+//.. p = do_ffree_st7(p);
+//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg));
+//.. *p++ = i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD;
+//.. p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
+//.. goto done;
+//.. }
+//.. break;
+//..
+//.. case Xin_FpLdStI:
+//.. if (i->Xin.FpLdStI.isLoad) {
+//.. /* Load from memory into %fakeN, converting from an int.
+//.. --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1)
+//.. */
+//.. switch (i->Xin.FpLdStI.sz) {
+//.. case 8: opc = 0xDF; subopc_imm = 5; break;
+//.. case 4: opc = 0xDB; subopc_imm = 0; break;
+//.. case 2: vassert(0); opc = 0xDF; subopc_imm = 0; break;
+//.. default: vpanic("emitAMD64Instr(Xin_FpLdStI-load)");
+//.. }
+//.. p = do_ffree_st7(p);
+//.. *p++ = opc;
+//.. p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
+//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdStI.reg));
+//.. goto done;
+//.. } else {
+//.. /* Store from %fakeN into memory, converting to an int.
+//.. --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode
+//.. */
+//.. switch (i->Xin.FpLdStI.sz) {
+//.. case 8: opc = 0xDF; subopc_imm = 7; break;
+//.. case 4: opc = 0xDB; subopc_imm = 3; break;
+//.. case 2: opc = 0xDF; subopc_imm = 3; break;
+//.. default: vpanic("emitAMD64Instr(Xin_FpLdStI-store)");
+//.. }
+//.. p = do_ffree_st7(p);
+//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdStI.reg));
+//.. *p++ = opc;
+//.. p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
+//.. goto done;
+//.. }
+//.. break;
+//..
+//.. case Xin_Fp64to32:
+//.. /* ffree %st7 ; fld %st(src) */
+//.. p = do_ffree_st7(p);
+//.. p = do_fld_st(p, 0+fregNo(i->Xin.Fp64to32.src));
+//.. /* subl $4, %esp */
+//.. *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04;
+//.. /* fstps (%esp) */
+//.. *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24;
+//.. /* flds (%esp) */
+//.. *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24;
+//.. /* addl $4, %esp */
+//.. *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04;
+//.. /* fstp %st(1+dst) */
+//.. p = do_fstp_st(p, 1+fregNo(i->Xin.Fp64to32.dst));
+//.. goto done;
+//..
+//.. case Xin_FpCMov:
+//.. /* jmp fwds if !condition */
+//.. *p++ = 0x70 + (i->Xin.FpCMov.cond ^ 1);
+//.. *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
+//.. ptmp = p;
+//..
+//.. /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */
+//.. p = do_ffree_st7(p);
+//.. p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src));
+//.. p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst));
+//..
+//.. /* Fill in the jump offset. */
+//.. *(ptmp-1) = p - ptmp;
+//.. goto done;
+
+ case Ain_LdMXCSR:
+ *p++ = clearWBit(rexAMode_M( fake(0), i->Ain.LdMXCSR.addr));
+ *p++ = 0x0F;
+ *p++ = 0xAE;
+ p = doAMode_M(p, fake(2)/*subopcode*/, i->Ain.LdMXCSR.addr);
+ goto done;
+
+//.. case Xin_FpStSW_AX:
+//.. /* note, this emits fnstsw %ax, not fstsw %ax */
+//.. *p++ = 0xDF;
+//.. *p++ = 0xE0;
+//.. goto done;
+
+ case Ain_SseUComIS:
+ /* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */
+ /* ucomi[sd] %srcL, %srcR */
+ if (i->Ain.SseUComIS.sz == 8) {
+ *p++ = 0x66;
+ } else {
+ goto bad;
+ vassert(i->Ain.SseUComIS.sz == 4);
+ }
+ *p++ = clearWBit (
+ rexAMode_R( vreg2ireg(i->Ain.SseUComIS.srcL),
+ vreg2ireg(i->Ain.SseUComIS.srcR) ));
+ *p++ = 0x0F;
+ *p++ = 0x2E;
+ p = doAMode_R(p, vreg2ireg(i->Ain.SseUComIS.srcL),
+ vreg2ireg(i->Ain.SseUComIS.srcR) );
+ /* pushfq */
+ *p++ = 0x9C;
+ /* popq %dst */
+ *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.SseUComIS.dst)));
+ *p++ = toUChar(0x58 + iregBits210(i->Ain.SseUComIS.dst));
+ goto done;
+
+ case Ain_SseSI2SF:
+ /* cvssi2s[sd] %src, %dst */
+ rex = rexAMode_R( vreg2ireg(i->Ain.SseSI2SF.dst),
+ i->Ain.SseSI2SF.src );
+ *p++ = toUChar(i->Ain.SseSI2SF.szD==4 ? 0xF3 : 0xF2);
+ *p++ = toUChar(i->Ain.SseSI2SF.szS==4 ? clearWBit(rex) : rex);
+ *p++ = 0x0F;
+ *p++ = 0x2A;
+ p = doAMode_R( p, vreg2ireg(i->Ain.SseSI2SF.dst),
+ i->Ain.SseSI2SF.src );
+ goto done;
+
+ case Ain_SseSF2SI:
+ /* cvss[sd]2si %src, %dst */
+ rex = rexAMode_R( i->Ain.SseSF2SI.dst,
+ vreg2ireg(i->Ain.SseSF2SI.src) );
+ *p++ = toUChar(i->Ain.SseSF2SI.szS==4 ? 0xF3 : 0xF2);
+ *p++ = toUChar(i->Ain.SseSF2SI.szD==4 ? clearWBit(rex) : rex);
+ *p++ = 0x0F;
+ *p++ = 0x2D;
+ p = doAMode_R( p, i->Ain.SseSF2SI.dst,
+ vreg2ireg(i->Ain.SseSF2SI.src) );
+ goto done;
+
+ case Ain_SseSDSS:
+ /* cvtsd2ss/cvtss2sd %src, %dst */
+ *p++ = toUChar(i->Ain.SseSDSS.from64 ? 0xF2 : 0xF3);
+ *p++ = clearWBit(
+ rexAMode_R( vreg2ireg(i->Ain.SseSDSS.dst),
+ vreg2ireg(i->Ain.SseSDSS.src) ));
+ *p++ = 0x0F;
+ *p++ = 0x5A;
+ p = doAMode_R( p, vreg2ireg(i->Ain.SseSDSS.dst),
+ vreg2ireg(i->Ain.SseSDSS.src) );
+ goto done;
+
+//..
+//.. case Xin_FpCmp:
+//.. /* gcmp %fL, %fR, %dst
+//.. -> ffree %st7; fpush %fL ; fucomp %(fR+1) ;
+//.. fnstsw %ax ; movl %eax, %dst
+//.. */
+//.. /* ffree %st7 */
+//.. p = do_ffree_st7(p);
+//.. /* fpush %fL */
+//.. p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL));
+//.. /* fucomp %(fR+1) */
+//.. *p++ = 0xDD;
+//.. *p++ = 0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR)));
+//.. /* fnstsw %ax */
+//.. *p++ = 0xDF;
+//.. *p++ = 0xE0;
+//.. /* movl %eax, %dst */
+//.. *p++ = 0x89;
+//.. p = doAMode_R(p, hregAMD64_EAX(), i->Xin.FpCmp.dst);
+//.. goto done;
+//..
+//.. case Xin_SseConst: {
+//.. UShort con = i->Xin.SseConst.con;
+//.. p = push_word_from_tags(p, (con >> 12) & 0xF);
+//.. p = push_word_from_tags(p, (con >> 8) & 0xF);
+//.. p = push_word_from_tags(p, (con >> 4) & 0xF);
+//.. p = push_word_from_tags(p, con & 0xF);
+//.. /* movl (%esp), %xmm-dst */
+//.. *p++ = 0x0F;
+//.. *p++ = 0x10;
+//.. *p++ = 0x04 + 8 * (7 & vregNo(i->Xin.SseConst.dst));
+//.. *p++ = 0x24;
+//.. /* addl $16, %esp */
+//.. *p++ = 0x83;
+//.. *p++ = 0xC4;
+//.. *p++ = 0x10;
+//.. goto done;
+//.. }
+
+ case Ain_SseLdSt:
+ if (i->Ain.SseLdSt.sz == 8) {
+ *p++ = 0xF2;
+ } else
+ if (i->Ain.SseLdSt.sz == 4) {
+ *p++ = 0xF3;
+ } else
+ if (i->Ain.SseLdSt.sz != 16) {
+ vassert(0);
+ }
+ *p++ = clearWBit(
+ rexAMode_M( vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr));
+ *p++ = 0x0F;
+ *p++ = toUChar(i->Ain.SseLdSt.isLoad ? 0x10 : 0x11);
+ p = doAMode_M(p, vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr);
+ goto done;
+
+ case Ain_SseLdzLO:
+ vassert(i->Ain.SseLdzLO.sz == 4 || i->Ain.SseLdzLO.sz == 8);
+ /* movs[sd] amode, %xmm-dst */
+ *p++ = toUChar(i->Ain.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
+ *p++ = clearWBit(
+ rexAMode_M(vreg2ireg(i->Ain.SseLdzLO.reg),
+ i->Ain.SseLdzLO.addr));
+ *p++ = 0x0F;
+ *p++ = 0x10;
+ p = doAMode_M(p, vreg2ireg(i->Ain.SseLdzLO.reg),
+ i->Ain.SseLdzLO.addr);
+ goto done;
+
+ case Ain_Sse32Fx4:
+ xtra = 0;
+ *p++ = clearWBit(
+ rexAMode_R( vreg2ireg(i->Ain.Sse32Fx4.dst),
+ vreg2ireg(i->Ain.Sse32Fx4.src) ));
+ *p++ = 0x0F;
+ switch (i->Ain.Sse32Fx4.op) {
+ case Asse_ADDF: *p++ = 0x58; break;
+ case Asse_DIVF: *p++ = 0x5E; break;
+ case Asse_MAXF: *p++ = 0x5F; break;
+ case Asse_MINF: *p++ = 0x5D; break;
+ case Asse_MULF: *p++ = 0x59; break;
+ case Asse_RCPF: *p++ = 0x53; break;
+ case Asse_RSQRTF: *p++ = 0x52; break;
+ case Asse_SQRTF: *p++ = 0x51; break;
+ case Asse_SUBF: *p++ = 0x5C; break;
+ case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
+ case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
+ case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
+ case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
+ default: goto bad;
+ }
+ p = doAMode_R(p, vreg2ireg(i->Ain.Sse32Fx4.dst),
+ vreg2ireg(i->Ain.Sse32Fx4.src) );
+ if (xtra & 0x100)
+ *p++ = toUChar(xtra & 0xFF);
+ goto done;
+
+ case Ain_Sse64Fx2:
+ xtra = 0;
+ *p++ = 0x66;
+ *p++ = clearWBit(
+ rexAMode_R( vreg2ireg(i->Ain.Sse64Fx2.dst),
+ vreg2ireg(i->Ain.Sse64Fx2.src) ));
+ *p++ = 0x0F;
+ switch (i->Ain.Sse64Fx2.op) {
+ case Asse_ADDF: *p++ = 0x58; break;
+ case Asse_DIVF: *p++ = 0x5E; break;
+ case Asse_MAXF: *p++ = 0x5F; break;
+ case Asse_MINF: *p++ = 0x5D; break;
+ case Asse_MULF: *p++ = 0x59; break;
+//.. case Xsse_RCPF: *p++ = 0x53; break;
+//.. case Xsse_RSQRTF: *p++ = 0x52; break;
+ case Asse_SQRTF: *p++ = 0x51; break;
+ case Asse_SUBF: *p++ = 0x5C; break;
+ case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
+ case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
+ case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
+ case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
+ default: goto bad;
+ }
+ p = doAMode_R(p, vreg2ireg(i->Ain.Sse64Fx2.dst),
+ vreg2ireg(i->Ain.Sse64Fx2.src) );
+ if (xtra & 0x100)
+ *p++ = toUChar(xtra & 0xFF);
+ goto done;
+
+ case Ain_Sse32FLo:
+ xtra = 0;
+ *p++ = 0xF3;
+ *p++ = clearWBit(
+ rexAMode_R( vreg2ireg(i->Ain.Sse32FLo.dst),
+ vreg2ireg(i->Ain.Sse32FLo.src) ));
+ *p++ = 0x0F;
+ switch (i->Ain.Sse32FLo.op) {
+ case Asse_ADDF: *p++ = 0x58; break;
+ case Asse_DIVF: *p++ = 0x5E; break;
+ case Asse_MAXF: *p++ = 0x5F; break;
+ case Asse_MINF: *p++ = 0x5D; break;
+ case Asse_MULF: *p++ = 0x59; break;
+ case Asse_RCPF: *p++ = 0x53; break;
+ case Asse_RSQRTF: *p++ = 0x52; break;
+ case Asse_SQRTF: *p++ = 0x51; break;
+ case Asse_SUBF: *p++ = 0x5C; break;
+ case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
+ case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
+ case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
+ case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
+ default: goto bad;
+ }
+ p = doAMode_R(p, vreg2ireg(i->Ain.Sse32FLo.dst),
+ vreg2ireg(i->Ain.Sse32FLo.src) );
+ if (xtra & 0x100)
+ *p++ = toUChar(xtra & 0xFF);
+ goto done;
+
+ case Ain_Sse64FLo:
+ xtra = 0;
+ *p++ = 0xF2;
+ *p++ = clearWBit(
+ rexAMode_R( vreg2ireg(i->Ain.Sse64FLo.dst),
+ vreg2ireg(i->Ain.Sse64FLo.src) ));
+ *p++ = 0x0F;
+ switch (i->Ain.Sse64FLo.op) {
+ case Asse_ADDF: *p++ = 0x58; break;
+ case Asse_DIVF: *p++ = 0x5E; break;
+ case Asse_MAXF: *p++ = 0x5F; break;
+ case Asse_MINF: *p++ = 0x5D; break;
+ case Asse_MULF: *p++ = 0x59; break;
+//.. case Xsse_RCPF: *p++ = 0x53; break;
+//.. case Xsse_RSQRTF: *p++ = 0x52; break;
+ case Asse_SQRTF: *p++ = 0x51; break;
+ case Asse_SUBF: *p++ = 0x5C; break;
+ case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
+ case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
+ case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
+ case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
+ default: goto bad;
+ }
+ p = doAMode_R(p, vreg2ireg(i->Ain.Sse64FLo.dst),
+ vreg2ireg(i->Ain.Sse64FLo.src) );
+ if (xtra & 0x100)
+ *p++ = toUChar(xtra & 0xFF);
+ goto done;
+
+ case Ain_SseReRg:
+# define XX(_n) *p++ = (_n)
+
+ rex = clearWBit(
+ rexAMode_R( vreg2ireg(i->Ain.SseReRg.dst),
+ vreg2ireg(i->Ain.SseReRg.src) ));
+
+ switch (i->Ain.SseReRg.op) {
+ case Asse_MOV: /*movups*/ XX(rex); XX(0x0F); XX(0x10); break;
+ case Asse_OR: XX(rex); XX(0x0F); XX(0x56); break;
+ case Asse_XOR: XX(rex); XX(0x0F); XX(0x57); break;
+ case Asse_AND: XX(rex); XX(0x0F); XX(0x54); break;
+ case Asse_ANDN: XX(rex); XX(0x0F); XX(0x55); break;
+ case Asse_PACKSSD: XX(0x66); XX(rex); XX(0x0F); XX(0x6B); break;
+ case Asse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63); break;
+ case Asse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67); break;
+ case Asse_ADD8: XX(0x66); XX(rex); XX(0x0F); XX(0xFC); break;
+ case Asse_ADD16: XX(0x66); XX(rex); XX(0x0F); XX(0xFD); break;
+ case Asse_ADD32: XX(0x66); XX(rex); XX(0x0F); XX(0xFE); break;
+ case Asse_ADD64: XX(0x66); XX(rex); XX(0x0F); XX(0xD4); break;
+ case Asse_QADD8S: XX(0x66); XX(rex); XX(0x0F); XX(0xEC); break;
+ case Asse_QADD16S: XX(0x66); XX(rex); XX(0x0F); XX(0xED); break;
+ case Asse_QADD8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDC); break;
+ case Asse_QADD16U: XX(0x66); XX(rex); XX(0x0F); XX(0xDD); break;
+ case Asse_AVG8U: XX(0x66); XX(rex); XX(0x0F); XX(0xE0); break;
+ case Asse_AVG16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE3); break;
+ case Asse_CMPEQ8: XX(0x66); XX(rex); XX(0x0F); XX(0x74); break;
+ case Asse_CMPEQ16: XX(0x66); XX(rex); XX(0x0F); XX(0x75); break;
+ case Asse_CMPEQ32: XX(0x66); XX(rex); XX(0x0F); XX(0x76); break;
+ case Asse_CMPGT8S: XX(0x66); XX(rex); XX(0x0F); XX(0x64); break;
+ case Asse_CMPGT16S: XX(0x66); XX(rex); XX(0x0F); XX(0x65); break;
+ case Asse_CMPGT32S: XX(0x66); XX(rex); XX(0x0F); XX(0x66); break;
+ case Asse_MAX16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEE); break;
+ case Asse_MAX8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDE); break;
+ case Asse_MIN16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEA); break;
+ case Asse_MIN8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDA); break;
+ case Asse_MULHI16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE4); break;
+ case Asse_MULHI16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE5); break;
+ case Asse_MUL16: XX(0x66); XX(rex); XX(0x0F); XX(0xD5); break;
+ case Asse_SHL16: XX(0x66); XX(rex); XX(0x0F); XX(0xF1); break;
+ case Asse_SHL32: XX(0x66); XX(rex); XX(0x0F); XX(0xF2); break;
+ case Asse_SHL64: XX(0x66); XX(rex); XX(0x0F); XX(0xF3); break;
+ case Asse_SAR16: XX(0x66); XX(rex); XX(0x0F); XX(0xE1); break;
+ case Asse_SAR32: XX(0x66); XX(rex); XX(0x0F); XX(0xE2); break;
+ case Asse_SHR16: XX(0x66); XX(rex); XX(0x0F); XX(0xD1); break;
+ case Asse_SHR32: XX(0x66); XX(rex); XX(0x0F); XX(0xD2); break;
+ case Asse_SHR64: XX(0x66); XX(rex); XX(0x0F); XX(0xD3); break;
+ case Asse_SUB8: XX(0x66); XX(rex); XX(0x0F); XX(0xF8); break;
+ case Asse_SUB16: XX(0x66); XX(rex); XX(0x0F); XX(0xF9); break;
+ case Asse_SUB32: XX(0x66); XX(rex); XX(0x0F); XX(0xFA); break;
+ case Asse_SUB64: XX(0x66); XX(rex); XX(0x0F); XX(0xFB); break;
+ case Asse_QSUB8S: XX(0x66); XX(rex); XX(0x0F); XX(0xE8); break;
+ case Asse_QSUB16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE9); break;
+ case Asse_QSUB8U: XX(0x66); XX(rex); XX(0x0F); XX(0xD8); break;
+ case Asse_QSUB16U: XX(0x66); XX(rex); XX(0x0F); XX(0xD9); break;
+ case Asse_UNPCKHB: XX(0x66); XX(rex); XX(0x0F); XX(0x68); break;
+ case Asse_UNPCKHW: XX(0x66); XX(rex); XX(0x0F); XX(0x69); break;
+ case Asse_UNPCKHD: XX(0x66); XX(rex); XX(0x0F); XX(0x6A); break;
+ case Asse_UNPCKHQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6D); break;
+ case Asse_UNPCKLB: XX(0x66); XX(rex); XX(0x0F); XX(0x60); break;
+ case Asse_UNPCKLW: XX(0x66); XX(rex); XX(0x0F); XX(0x61); break;
+ case Asse_UNPCKLD: XX(0x66); XX(rex); XX(0x0F); XX(0x62); break;
+ case Asse_UNPCKLQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6C); break;
+ default: goto bad;
+ }
+ p = doAMode_R(p, vreg2ireg(i->Ain.SseReRg.dst),
+ vreg2ireg(i->Ain.SseReRg.src) );
+# undef XX
+ goto done;
+
+ case Ain_SseCMov:
+ /* jmp fwds if !condition */
+ *p++ = toUChar(0x70 + (i->Ain.SseCMov.cond ^ 1));
+ *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
+ ptmp = p;
+
+ /* movaps %src, %dst */
+ *p++ = clearWBit(
+ rexAMode_R( vreg2ireg(i->Ain.SseCMov.dst),
+ vreg2ireg(i->Ain.SseCMov.src) ));
+ *p++ = 0x0F;
+ *p++ = 0x28;
+ p = doAMode_R(p, vreg2ireg(i->Ain.SseCMov.dst),
+ vreg2ireg(i->Ain.SseCMov.src) );
+
+ /* Fill in the jump offset. */
+ *(ptmp-1) = toUChar(p - ptmp);
+ goto done;
+
+ case Ain_SseShuf:
+ *p++ = 0x66;
+ *p++ = clearWBit(
+ rexAMode_R( vreg2ireg(i->Ain.SseShuf.dst),
+ vreg2ireg(i->Ain.SseShuf.src) ));
+ *p++ = 0x0F;
+ *p++ = 0x70;
+ p = doAMode_R(p, vreg2ireg(i->Ain.SseShuf.dst),
+ vreg2ireg(i->Ain.SseShuf.src) );
+ *p++ = (UChar)(i->Ain.SseShuf.order);
+ goto done;
+
+ default:
+ goto bad;
+ }
+
+ bad:
+ ppAMD64Instr(i, mode64);
+ vpanic("emit_AMD64Instr");
+ /*NOTREACHED*/
+
+ done:
+ vassert(p - &buf[0] <= 32);
+ return p - &buf[0];
+
+# undef fake
+}
+
+/*---------------------------------------------------------------*/
+/*--- end host_amd64_defs.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h
new file mode 100644
index 0000000..cf19bac
--- /dev/null
+++ b/VEX/priv/host_amd64_defs.h
@@ -0,0 +1,753 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin host_amd64_defs.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#ifndef __VEX_HOST_AMD64_DEFS_H
+#define __VEX_HOST_AMD64_DEFS_H
+
+
+/* --------- Registers. --------- */
+
+/* The usual HReg abstraction. There are 16 real int regs, 6 real
+ float regs, and 16 real vector regs.
+*/
+
+extern void ppHRegAMD64 ( HReg );
+
+extern HReg hregAMD64_RAX ( void );
+extern HReg hregAMD64_RBX ( void );
+extern HReg hregAMD64_RCX ( void );
+extern HReg hregAMD64_RDX ( void );
+extern HReg hregAMD64_RSP ( void );
+extern HReg hregAMD64_RBP ( void );
+extern HReg hregAMD64_RSI ( void );
+extern HReg hregAMD64_RDI ( void );
+extern HReg hregAMD64_R8 ( void );
+extern HReg hregAMD64_R9 ( void );
+extern HReg hregAMD64_R10 ( void );
+extern HReg hregAMD64_R11 ( void );
+extern HReg hregAMD64_R12 ( void );
+extern HReg hregAMD64_R13 ( void );
+extern HReg hregAMD64_R14 ( void );
+extern HReg hregAMD64_R15 ( void );
+
+extern HReg hregAMD64_FAKE0 ( void );
+extern HReg hregAMD64_FAKE1 ( void );
+extern HReg hregAMD64_FAKE2 ( void );
+extern HReg hregAMD64_FAKE3 ( void );
+extern HReg hregAMD64_FAKE4 ( void );
+extern HReg hregAMD64_FAKE5 ( void );
+
+extern HReg hregAMD64_XMM0 ( void );
+extern HReg hregAMD64_XMM1 ( void );
+extern HReg hregAMD64_XMM2 ( void );
+extern HReg hregAMD64_XMM3 ( void );
+extern HReg hregAMD64_XMM4 ( void );
+extern HReg hregAMD64_XMM5 ( void );
+extern HReg hregAMD64_XMM6 ( void );
+extern HReg hregAMD64_XMM7 ( void );
+extern HReg hregAMD64_XMM8 ( void );
+extern HReg hregAMD64_XMM9 ( void );
+extern HReg hregAMD64_XMM10 ( void );
+extern HReg hregAMD64_XMM11 ( void );
+extern HReg hregAMD64_XMM12 ( void );
+extern HReg hregAMD64_XMM13 ( void );
+extern HReg hregAMD64_XMM14 ( void );
+extern HReg hregAMD64_XMM15 ( void );
+
+
+/* --------- Condition codes, AMD encoding. --------- */
+
+typedef
+ enum {
+ Acc_O = 0, /* overflow */
+ Acc_NO = 1, /* no overflow */
+
+ Acc_B = 2, /* below */
+ Acc_NB = 3, /* not below */
+
+ Acc_Z = 4, /* zero */
+ Acc_NZ = 5, /* not zero */
+
+ Acc_BE = 6, /* below or equal */
+ Acc_NBE = 7, /* not below or equal */
+
+ Acc_S = 8, /* negative */
+ Acc_NS = 9, /* not negative */
+
+ Acc_P = 10, /* parity even */
+ Acc_NP = 11, /* not parity even */
+
+ Acc_L = 12, /* jump less */
+ Acc_NL = 13, /* not less */
+
+ Acc_LE = 14, /* less or equal */
+ Acc_NLE = 15, /* not less or equal */
+
+ Acc_ALWAYS = 16 /* the usual hack */
+ }
+ AMD64CondCode;
+
+extern HChar* showAMD64CondCode ( AMD64CondCode );
+
+
+/* --------- Memory address expressions (amodes). --------- */
+
+typedef
+ enum {
+ Aam_IR, /* Immediate + Reg */
+ Aam_IRRS /* Immediate + Reg1 + (Reg2 << Shift) */
+ }
+ AMD64AModeTag;
+
+typedef
+ struct {
+ AMD64AModeTag tag;
+ union {
+ struct {
+ UInt imm;
+ HReg reg;
+ } IR;
+ struct {
+ UInt imm;
+ HReg base;
+ HReg index;
+ Int shift; /* 0, 1, 2 or 3 only */
+ } IRRS;
+ } Aam;
+ }
+ AMD64AMode;
+
+extern AMD64AMode* AMD64AMode_IR ( UInt, HReg );
+extern AMD64AMode* AMD64AMode_IRRS ( UInt, HReg, HReg, Int );
+
+extern AMD64AMode* dopyAMD64AMode ( AMD64AMode* );
+
+extern void ppAMD64AMode ( AMD64AMode* );
+
+
+/* --------- Operand, which can be reg, immediate or memory. --------- */
+
+typedef
+ enum {
+ Armi_Imm,
+ Armi_Reg,
+ Armi_Mem
+ }
+ AMD64RMITag;
+
+typedef
+ struct {
+ AMD64RMITag tag;
+ union {
+ struct {
+ UInt imm32;
+ } Imm;
+ struct {
+ HReg reg;
+ } Reg;
+ struct {
+ AMD64AMode* am;
+ } Mem;
+ }
+ Armi;
+ }
+ AMD64RMI;
+
+extern AMD64RMI* AMD64RMI_Imm ( UInt );
+extern AMD64RMI* AMD64RMI_Reg ( HReg );
+extern AMD64RMI* AMD64RMI_Mem ( AMD64AMode* );
+
+extern void ppAMD64RMI ( AMD64RMI* );
+
+
+/* --------- Operand, which can be reg or immediate only. --------- */
+
+typedef
+ enum {
+ Ari_Imm,
+ Ari_Reg
+ }
+ AMD64RITag;
+
+typedef
+ struct {
+ AMD64RITag tag;
+ union {
+ struct {
+ UInt imm32;
+ } Imm;
+ struct {
+ HReg reg;
+ } Reg;
+ }
+ Ari;
+ }
+ AMD64RI;
+
+extern AMD64RI* AMD64RI_Imm ( UInt );
+extern AMD64RI* AMD64RI_Reg ( HReg );
+
+extern void ppAMD64RI ( AMD64RI* );
+
+
+/* --------- Operand, which can be reg or memory only. --------- */
+
+typedef
+ enum {
+ Arm_Reg,
+ Arm_Mem
+ }
+ AMD64RMTag;
+
+typedef
+ struct {
+ AMD64RMTag tag;
+ union {
+ struct {
+ HReg reg;
+ } Reg;
+ struct {
+ AMD64AMode* am;
+ } Mem;
+ }
+ Arm;
+ }
+ AMD64RM;
+
+extern AMD64RM* AMD64RM_Reg ( HReg );
+extern AMD64RM* AMD64RM_Mem ( AMD64AMode* );
+
+extern void ppAMD64RM ( AMD64RM* );
+
+
+/* --------- Instructions. --------- */
+
+/* --------- */
+typedef
+ enum {
+ Aun_NEG,
+ Aun_NOT
+ }
+ AMD64UnaryOp;
+
+extern HChar* showAMD64UnaryOp ( AMD64UnaryOp );
+
+
+/* --------- */
+typedef
+ enum {
+ Aalu_INVALID,
+ Aalu_MOV,
+ Aalu_CMP,
+ Aalu_ADD, Aalu_SUB, Aalu_ADC, Aalu_SBB,
+ Aalu_AND, Aalu_OR, Aalu_XOR,
+ Aalu_MUL
+ }
+ AMD64AluOp;
+
+extern HChar* showAMD64AluOp ( AMD64AluOp );
+
+
+/* --------- */
+typedef
+ enum {
+ Ash_INVALID,
+ Ash_SHL, Ash_SHR, Ash_SAR
+ }
+ AMD64ShiftOp;
+
+extern HChar* showAMD64ShiftOp ( AMD64ShiftOp );
+
+
+/* --------- */
+typedef
+ enum {
+ Afp_INVALID,
+ /* Binary */
+ Afp_SCALE, Afp_ATAN, Afp_YL2X, Afp_YL2XP1, Afp_PREM, Afp_PREM1,
+ /* Unary */
+ Afp_SQRT,
+ Afp_SIN, Afp_COS, Afp_TAN,
+ Afp_ROUND, Afp_2XM1
+ }
+ A87FpOp;
+
+extern HChar* showA87FpOp ( A87FpOp );
+
+
+/* --------- */
+typedef
+ enum {
+ Asse_INVALID,
+ /* mov */
+ Asse_MOV,
+ /* Floating point binary */
+ Asse_ADDF, Asse_SUBF, Asse_MULF, Asse_DIVF,
+ Asse_MAXF, Asse_MINF,
+ Asse_CMPEQF, Asse_CMPLTF, Asse_CMPLEF, Asse_CMPUNF,
+ /* Floating point unary */
+ Asse_RCPF, Asse_RSQRTF, Asse_SQRTF,
+ /* Bitwise */
+ Asse_AND, Asse_OR, Asse_XOR, Asse_ANDN,
+ Asse_ADD8, Asse_ADD16, Asse_ADD32, Asse_ADD64,
+ Asse_QADD8U, Asse_QADD16U,
+ Asse_QADD8S, Asse_QADD16S,
+ Asse_SUB8, Asse_SUB16, Asse_SUB32, Asse_SUB64,
+ Asse_QSUB8U, Asse_QSUB16U,
+ Asse_QSUB8S, Asse_QSUB16S,
+ Asse_MUL16,
+ Asse_MULHI16U,
+ Asse_MULHI16S,
+ Asse_AVG8U, Asse_AVG16U,
+ Asse_MAX16S,
+ Asse_MAX8U,
+ Asse_MIN16S,
+ Asse_MIN8U,
+ Asse_CMPEQ8, Asse_CMPEQ16, Asse_CMPEQ32,
+ Asse_CMPGT8S, Asse_CMPGT16S, Asse_CMPGT32S,
+ Asse_SHL16, Asse_SHL32, Asse_SHL64,
+ Asse_SHR16, Asse_SHR32, Asse_SHR64,
+ Asse_SAR16, Asse_SAR32,
+ Asse_PACKSSD, Asse_PACKSSW, Asse_PACKUSW,
+ Asse_UNPCKHB, Asse_UNPCKHW, Asse_UNPCKHD, Asse_UNPCKHQ,
+ Asse_UNPCKLB, Asse_UNPCKLW, Asse_UNPCKLD, Asse_UNPCKLQ
+ }
+ AMD64SseOp;
+
+extern HChar* showAMD64SseOp ( AMD64SseOp );
+
+
+/* --------- */
+typedef
+ enum {
+ Ain_Imm64, /* Generate 64-bit literal to register */
+ Ain_Alu64R, /* 64-bit mov/arith/logical, dst=REG */
+ Ain_Alu64M, /* 64-bit mov/arith/logical, dst=MEM */
+ Ain_Sh64, /* 64-bit shift/rotate, dst=REG or MEM */
+ Ain_Test64, /* 64-bit test (AND, set flags, discard result) */
+ Ain_Unary64, /* 64-bit not and neg */
+ Ain_Lea64, /* 64-bit compute EA into a reg */
+ Ain_MulL, /* widening multiply */
+ Ain_Div, /* div and mod */
+//.. Xin_Sh3232, /* shldl or shrdl */
+ Ain_Push, /* push 64-bit value on stack */
+ Ain_Call, /* call to address in register */
+ Ain_Goto, /* conditional/unconditional jmp to dst */
+ Ain_CMov64, /* conditional move */
+ Ain_MovxLQ, /* reg-reg move, zx-ing/sx-ing top half */
+ Ain_LoadEX, /* mov{s,z}{b,w,l}q from mem to reg */
+ Ain_Store, /* store 32/16/8 bit value in memory */
+ Ain_Set64, /* convert condition code to 64-bit value */
+ Ain_Bsfr64, /* 64-bit bsf/bsr */
+ Ain_MFence, /* mem fence */
+ Ain_ACAS, /* 8/16/32/64-bit lock;cmpxchg */
+ Ain_DACAS, /* lock;cmpxchg8b/16b (doubleword ACAS, 2 x
+ 32-bit or 2 x 64-bit only) */
+
+ Ain_A87Free, /* free up x87 registers */
+ Ain_A87PushPop, /* x87 loads/stores */
+ Ain_A87FpOp, /* x87 operations */
+ Ain_A87LdCW, /* load x87 control word */
+ Ain_A87StSW, /* store x87 status word */
+//..
+//.. Xin_FpUnary, /* FP fake unary op */
+//.. Xin_FpBinary, /* FP fake binary op */
+//.. Xin_FpLdSt, /* FP fake load/store */
+//.. Xin_FpLdStI, /* FP fake load/store, converting to/from Int */
+//.. Xin_Fp64to32, /* FP round IEEE754 double to IEEE754 single */
+//.. Xin_FpCMov, /* FP fake floating point conditional move */
+ Ain_LdMXCSR, /* load %mxcsr */
+//.. Xin_FpStSW_AX, /* fstsw %ax */
+ Ain_SseUComIS, /* ucomisd/ucomiss, then get %rflags into int
+ register */
+ Ain_SseSI2SF, /* scalar 32/64 int to 32/64 float conversion */
+ Ain_SseSF2SI, /* scalar 32/64 float to 32/64 int conversion */
+ Ain_SseSDSS, /* scalar float32 to/from float64 */
+//..
+//.. Xin_SseConst, /* Generate restricted SSE literal */
+ Ain_SseLdSt, /* SSE load/store 32/64/128 bits, no alignment
+ constraints, upper 96/64/0 bits arbitrary */
+ Ain_SseLdzLO, /* SSE load low 32/64 bits, zero remainder of reg */
+ Ain_Sse32Fx4, /* SSE binary, 32Fx4 */
+ Ain_Sse32FLo, /* SSE binary, 32F in lowest lane only */
+ Ain_Sse64Fx2, /* SSE binary, 64Fx2 */
+ Ain_Sse64FLo, /* SSE binary, 64F in lowest lane only */
+ Ain_SseReRg, /* SSE binary general reg-reg, Re, Rg */
+ Ain_SseCMov, /* SSE conditional move */
+ Ain_SseShuf /* SSE2 shuffle (pshufd) */
+ }
+ AMD64InstrTag;
+
+/* Destinations are on the RIGHT (second operand) */
+
+typedef
+ struct {
+ AMD64InstrTag tag;
+ union {
+ struct {
+ ULong imm64;
+ HReg dst;
+ } Imm64;
+ struct {
+ AMD64AluOp op;
+ AMD64RMI* src;
+ HReg dst;
+ } Alu64R;
+ struct {
+ AMD64AluOp op;
+ AMD64RI* src;
+ AMD64AMode* dst;
+ } Alu64M;
+ struct {
+ AMD64ShiftOp op;
+ UInt src; /* shift amount, or 0 means %cl */
+ HReg dst;
+ } Sh64;
+ struct {
+ UInt imm32;
+ HReg dst;
+ } Test64;
+ /* Not and Neg */
+ struct {
+ AMD64UnaryOp op;
+ HReg dst;
+ } Unary64;
+ /* 64-bit compute EA into a reg */
+ struct {
+ AMD64AMode* am;
+ HReg dst;
+ } Lea64;
+ /* 64 x 64 -> 128 bit widening multiply: RDX:RAX = RAX *s/u
+ r/m64 */
+ struct {
+ Bool syned;
+ AMD64RM* src;
+ } MulL;
+ /* amd64 div/idiv instruction. Modifies RDX and RAX and
+ reads src. */
+ struct {
+ Bool syned;
+ Int sz; /* 4 or 8 only */
+ AMD64RM* src;
+ } Div;
+//.. /* shld/shrd. op may only be Xsh_SHL or Xsh_SHR */
+//.. struct {
+//.. X86ShiftOp op;
+//.. UInt amt; /* shift amount, or 0 means %cl */
+//.. HReg src;
+//.. HReg dst;
+//.. } Sh3232;
+ struct {
+ AMD64RMI* src;
+ } Push;
+ /* Pseudo-insn. Call target (an absolute address), on given
+ condition (which could be Xcc_ALWAYS). */
+ struct {
+ AMD64CondCode cond;
+ Addr64 target;
+ Int regparms; /* 0 .. 6 */
+ } Call;
+ /* Pseudo-insn. Goto dst, on given condition (which could be
+ Acc_ALWAYS). */
+ struct {
+ IRJumpKind jk;
+ AMD64CondCode cond;
+ AMD64RI* dst;
+ } Goto;
+ /* Mov src to dst on the given condition, which may not
+ be the bogus Acc_ALWAYS. */
+ struct {
+ AMD64CondCode cond;
+ AMD64RM* src;
+ HReg dst;
+ } CMov64;
+ /* reg-reg move, sx-ing/zx-ing top half */
+ struct {
+ Bool syned;
+ HReg src;
+ HReg dst;
+ } MovxLQ;
+ /* Sign/Zero extending loads. Dst size is always 64 bits. */
+ struct {
+ UChar szSmall; /* only 1, 2 or 4 */
+ Bool syned;
+ AMD64AMode* src;
+ HReg dst;
+ } LoadEX;
+ /* 32/16/8 bit stores. */
+ struct {
+ UChar sz; /* only 1, 2 or 4 */
+ HReg src;
+ AMD64AMode* dst;
+ } Store;
+ /* Convert an amd64 condition code to a 64-bit value (0 or 1). */
+ struct {
+ AMD64CondCode cond;
+ HReg dst;
+ } Set64;
+ /* 64-bit bsf or bsr. */
+ struct {
+ Bool isFwds;
+ HReg src;
+ HReg dst;
+ } Bsfr64;
+ /* Mem fence. In short, an insn which flushes all preceding
+ loads and stores as much as possible before continuing.
+ On AMD64 we emit a real "mfence". */
+ struct {
+ } MFence;
+ struct {
+ AMD64AMode* addr;
+ UChar sz; /* 1, 2, 4 or 8 */
+ } ACAS;
+ struct {
+ AMD64AMode* addr;
+ UChar sz; /* 4 or 8 only */
+ } DACAS;
+
+ /* --- X87 --- */
+
+ /* A very minimal set of x87 insns, that operate exactly in a
+ stack-like way so no need to think about x87 registers. */
+
+ /* Do 'ffree' on %st(7) .. %st(7-nregs) */
+ struct {
+ Int nregs; /* 1 <= nregs <= 7 */
+ } A87Free;
+
+ /* Push a 32- or 64-bit FP value from memory onto the stack,
+ or move a value from the stack to memory and remove it
+ from the stack. */
+ struct {
+ AMD64AMode* addr;
+ Bool isPush;
+ UChar szB; /* 4 or 8 */
+ } A87PushPop;
+
+ /* Do an operation on the top-of-stack. This can be unary, in
+ which case it is %st0 = OP( %st0 ), or binary: %st0 = OP(
+ %st0, %st1 ). */
+ struct {
+ A87FpOp op;
+ } A87FpOp;
+
+ /* Load the FPU control word. */
+ struct {
+ AMD64AMode* addr;
+ } A87LdCW;
+
+ /* Store the FPU status word (fstsw m16) */
+ struct {
+ AMD64AMode* addr;
+ } A87StSW;
+
+ /* --- SSE --- */
+
+ /* Load 32 bits into %mxcsr. */
+ struct {
+ AMD64AMode* addr;
+ }
+ LdMXCSR;
+//.. /* fstsw %ax */
+//.. struct {
+//.. /* no fields */
+//.. }
+//.. FpStSW_AX;
+ /* ucomisd/ucomiss, then get %rflags into int register */
+ struct {
+ UChar sz; /* 4 or 8 only */
+ HReg srcL; /* xmm */
+ HReg srcR; /* xmm */
+ HReg dst; /* int */
+ } SseUComIS;
+ /* scalar 32/64 int to 32/64 float conversion */
+ struct {
+ UChar szS; /* 4 or 8 */
+ UChar szD; /* 4 or 8 */
+ HReg src; /* i class */
+ HReg dst; /* v class */
+ } SseSI2SF;
+ /* scalar 32/64 float to 32/64 int conversion */
+ struct {
+ UChar szS; /* 4 or 8 */
+ UChar szD; /* 4 or 8 */
+ HReg src; /* v class */
+ HReg dst; /* i class */
+ } SseSF2SI;
+ /* scalar float32 to/from float64 */
+ struct {
+ Bool from64; /* True: 64->32; False: 32->64 */
+ HReg src;
+ HReg dst;
+ } SseSDSS;
+//..
+//.. /* Simplistic SSE[123] */
+//.. struct {
+//.. UShort con;
+//.. HReg dst;
+//.. } SseConst;
+ struct {
+ Bool isLoad;
+ UChar sz; /* 4, 8 or 16 only */
+ HReg reg;
+ AMD64AMode* addr;
+ } SseLdSt;
+ struct {
+ Int sz; /* 4 or 8 only */
+ HReg reg;
+ AMD64AMode* addr;
+ } SseLdzLO;
+ struct {
+ AMD64SseOp op;
+ HReg src;
+ HReg dst;
+ } Sse32Fx4;
+ struct {
+ AMD64SseOp op;
+ HReg src;
+ HReg dst;
+ } Sse32FLo;
+ struct {
+ AMD64SseOp op;
+ HReg src;
+ HReg dst;
+ } Sse64Fx2;
+ struct {
+ AMD64SseOp op;
+ HReg src;
+ HReg dst;
+ } Sse64FLo;
+ struct {
+ AMD64SseOp op;
+ HReg src;
+ HReg dst;
+ } SseReRg;
+ /* Mov src to dst on the given condition, which may not
+ be the bogus Xcc_ALWAYS. */
+ struct {
+ AMD64CondCode cond;
+ HReg src;
+ HReg dst;
+ } SseCMov;
+ struct {
+ Int order; /* 0 <= order <= 0xFF */
+ HReg src;
+ HReg dst;
+ } SseShuf;
+
+ } Ain;
+ }
+ AMD64Instr;
+
+extern AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst );
+extern AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp, AMD64RMI*, HReg );
+extern AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp, AMD64RI*, AMD64AMode* );
+extern AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst );
+extern AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst );
+extern AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp, UInt, HReg );
+extern AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst );
+extern AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* );
+extern AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* );
+//.. extern AMD64Instr* AMD64Instr_Sh3232 ( AMD64ShiftOp, UInt amt, HReg src, HReg dst );
+extern AMD64Instr* AMD64Instr_Push ( AMD64RMI* );
+extern AMD64Instr* AMD64Instr_Call ( AMD64CondCode, Addr64, Int );
+extern AMD64Instr* AMD64Instr_Goto ( IRJumpKind, AMD64CondCode cond, AMD64RI* dst );
+extern AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode, AMD64RM* src, HReg dst );
+extern AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst );
+extern AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
+ AMD64AMode* src, HReg dst );
+extern AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst );
+extern AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst );
+extern AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst );
+extern AMD64Instr* AMD64Instr_MFence ( void );
+extern AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz );
+extern AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz );
+
+extern AMD64Instr* AMD64Instr_A87Free ( Int nregs );
+extern AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB );
+extern AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op );
+extern AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr );
+extern AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr );
+//..
+//.. extern AMD64Instr* AMD64Instr_FpUnary ( AMD64FpOp op, HReg src, HReg dst );
+//.. extern AMD64Instr* AMD64Instr_FpBinary ( AMD64FpOp op, HReg srcL, HReg srcR, HReg dst );
+//.. extern AMD64Instr* AMD64Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, AMD64AMode* );
+//.. extern AMD64Instr* AMD64Instr_FpLdStI ( Bool isLoad, UChar sz, HReg reg, AMD64AMode* );
+//.. extern AMD64Instr* AMD64Instr_Fp64to32 ( HReg src, HReg dst );
+//.. extern AMD64Instr* AMD64Instr_FpCMov ( AMD64CondCode, HReg src, HReg dst );
+extern AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* );
+//.. extern AMD64Instr* AMD64Instr_FpStSW_AX ( void );
+extern AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst );
+extern AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst );
+extern AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst );
+extern AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst );
+//..
+//.. extern AMD64Instr* AMD64Instr_SseConst ( UShort con, HReg dst );
+extern AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz, HReg, AMD64AMode* );
+extern AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg, AMD64AMode* );
+extern AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp, HReg, HReg );
+extern AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp, HReg, HReg );
+extern AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp, HReg, HReg );
+extern AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp, HReg, HReg );
+extern AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp, HReg, HReg );
+extern AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode, HReg src, HReg dst );
+extern AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst );
+
+
+extern void ppAMD64Instr ( AMD64Instr*, Bool );
+
+/* Some functions that insulate the register allocator from details
+ of the underlying instruction set. */
+extern void getRegUsage_AMD64Instr ( HRegUsage*, AMD64Instr*, Bool );
+extern void mapRegs_AMD64Instr ( HRegRemap*, AMD64Instr*, Bool );
+extern Bool isMove_AMD64Instr ( AMD64Instr*, HReg*, HReg* );
+extern Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr*,
+ Bool, void* dispatch );
+
+extern void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
+ HReg rreg, Int offset, Bool );
+extern void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
+ HReg rreg, Int offset, Bool );
+
+extern void getAllocableRegs_AMD64 ( Int*, HReg** );
+extern HInstrArray* iselSB_AMD64 ( IRSB*, VexArch,
+ VexArchInfo*,
+ VexAbiInfo* );
+
+#endif /* ndef __VEX_HOST_AMD64_DEFS_H */
+
+/*---------------------------------------------------------------*/
+/*--- end host_amd64_defs.h ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c
new file mode 100644
index 0000000..a54444a
--- /dev/null
+++ b/VEX/priv/host_amd64_isel.c
@@ -0,0 +1,4140 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin host_amd64_isel.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+
+#include "ir_match.h"
+#include "main_util.h"
+#include "main_globals.h"
+#include "host_generic_regs.h"
+#include "host_generic_simd64.h"
+#include "host_generic_simd128.h"
+#include "host_amd64_defs.h"
+
+
+/*---------------------------------------------------------*/
+/*--- x87/SSE control word stuff ---*/
+/*---------------------------------------------------------*/
+
+/* Vex-generated code expects to run with the FPU set as follows: all
+ exceptions masked, round-to-nearest, precision = 53 bits. This
+ corresponds to a FPU control word value of 0x027F.
+
+ Similarly the SSE control word (%mxcsr) should be 0x1F80.
+
+ %fpucw and %mxcsr should have these values on entry to
+ Vex-generated code, and should those values should be
+ unchanged at exit.
+*/
+
+#define DEFAULT_FPUCW 0x027F
+
+#define DEFAULT_MXCSR 0x1F80
+
+/* debugging only, do not use */
+/* define DEFAULT_FPUCW 0x037F */
+
+
+/*---------------------------------------------------------*/
+/*--- misc helpers ---*/
+/*---------------------------------------------------------*/
+
+/* These are duplicated in guest-amd64/toIR.c */
+static IRExpr* unop ( IROp op, IRExpr* a )
+{
+ return IRExpr_Unop(op, a);
+}
+
+static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
+{
+ return IRExpr_Binop(op, a1, a2);
+}
+
+static IRExpr* bind ( Int binder )
+{
+ return IRExpr_Binder(binder);
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISelEnv ---*/
+/*---------------------------------------------------------*/
+
+/* This carries around:
+
+ - A mapping from IRTemp to IRType, giving the type of any IRTemp we
+ might encounter. This is computed before insn selection starts,
+ and does not change.
+
+ - A mapping from IRTemp to HReg. This tells the insn selector
+ which virtual register is associated with each IRTemp
+ temporary. This is computed before insn selection starts, and
+ does not change. We expect this mapping to map precisely the
+ same set of IRTemps as the type mapping does.
+
+ - vregmap holds the primary register for the IRTemp.
+ - vregmapHI is only used for 128-bit integer-typed
+ IRTemps. It holds the identity of a second
+ 64-bit virtual HReg, which holds the high half
+ of the value.
+
+ - The code array, that is, the insns selected so far.
+
+ - A counter, for generating new virtual registers.
+
+ - The host subarchitecture we are selecting insns for.
+ This is set at the start and does not change.
+
+ Note, this is all host-independent. (JRS 20050201: well, kinda
+ ... not completely. Compare with ISelEnv for X86.)
+*/
+
+typedef
+ struct {
+ IRTypeEnv* type_env;
+
+ HReg* vregmap;
+ HReg* vregmapHI;
+ Int n_vregmap;
+
+ HInstrArray* code;
+
+ Int vreg_ctr;
+
+ UInt hwcaps;
+ }
+ ISelEnv;
+
+
+static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
+{
+ vassert(tmp >= 0);
+ vassert(tmp < env->n_vregmap);
+ return env->vregmap[tmp];
+}
+
+static void lookupIRTemp128 ( HReg* vrHI, HReg* vrLO,
+ ISelEnv* env, IRTemp tmp )
+{
+ vassert(tmp >= 0);
+ vassert(tmp < env->n_vregmap);
+ vassert(env->vregmapHI[tmp] != INVALID_HREG);
+ *vrLO = env->vregmap[tmp];
+ *vrHI = env->vregmapHI[tmp];
+}
+
+static void addInstr ( ISelEnv* env, AMD64Instr* instr )
+{
+ addHInstr(env->code, instr);
+ if (vex_traceflags & VEX_TRACE_VCODE) {
+ ppAMD64Instr(instr, True);
+ vex_printf("\n");
+ }
+}
+
+static HReg newVRegI ( ISelEnv* env )
+{
+ HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/);
+ env->vreg_ctr++;
+ return reg;
+}
+
+//.. static HReg newVRegF ( ISelEnv* env )
+//.. {
+//.. HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
+//.. env->vreg_ctr++;
+//.. return reg;
+//.. }
+
+static HReg newVRegV ( ISelEnv* env )
+{
+ HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
+ env->vreg_ctr++;
+ return reg;
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Forward declarations ---*/
+/*---------------------------------------------------------*/
+
+/* These are organised as iselXXX and iselXXX_wrk pairs. The
+ iselXXX_wrk do the real work, but are not to be called directly.
+ For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
+ checks that all returned registers are virtual. You should not
+ call the _wrk version directly.
+*/
+static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
+static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e );
+
+static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
+static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e );
+
+static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
+static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e );
+
+static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
+static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
+
+static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
+static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e );
+
+static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
+ ISelEnv* env, IRExpr* e );
+static void iselInt128Expr ( HReg* rHi, HReg* rLo,
+ ISelEnv* env, IRExpr* e );
+
+static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
+static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
+
+static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
+static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
+
+static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
+static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
+
+static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
+static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Misc helpers ---*/
+/*---------------------------------------------------------*/
+
+static Bool sane_AMode ( AMD64AMode* am )
+{
+ switch (am->tag) {
+ case Aam_IR:
+ return
+ toBool( hregClass(am->Aam.IR.reg) == HRcInt64
+ && (hregIsVirtual(am->Aam.IR.reg)
+ || am->Aam.IR.reg == hregAMD64_RBP()) );
+ case Aam_IRRS:
+ return
+ toBool( hregClass(am->Aam.IRRS.base) == HRcInt64
+ && hregIsVirtual(am->Aam.IRRS.base)
+ && hregClass(am->Aam.IRRS.index) == HRcInt64
+ && hregIsVirtual(am->Aam.IRRS.index) );
+ default:
+ vpanic("sane_AMode: unknown amd64 amode tag");
+ }
+}
+
+
+/* Can the lower 32 bits be signedly widened to produce the whole
+ 64-bit value? In other words, are the top 33 bits either all 0 or
+ all 1 ? */
+static Bool fitsIn32Bits ( ULong x )
+{
+ Long y0 = (Long)x;
+ Long y1 = y0;
+ y1 <<= 32;
+ y1 >>=/*s*/ 32;
+ return toBool(x == y1);
+}
+
+/* Is this a 64-bit zero expression? */
+
+static Bool isZeroU64 ( IRExpr* e )
+{
+ return e->tag == Iex_Const
+ && e->Iex.Const.con->tag == Ico_U64
+ && e->Iex.Const.con->Ico.U64 == 0ULL;
+}
+
+static Bool isZeroU32 ( IRExpr* e )
+{
+ return e->tag == Iex_Const
+ && e->Iex.Const.con->tag == Ico_U32
+ && e->Iex.Const.con->Ico.U32 == 0;
+}
+
+/* Make a int reg-reg move. */
+
+static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
+{
+ vassert(hregClass(src) == HRcInt64);
+ vassert(hregClass(dst) == HRcInt64);
+ return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst);
+}
+
+/* Make a vector reg-reg move. */
+
+static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
+{
+ vassert(hregClass(src) == HRcVec128);
+ vassert(hregClass(dst) == HRcVec128);
+ return AMD64Instr_SseReRg(Asse_MOV, src, dst);
+}
+
+/* Advance/retreat %rsp by n. */
+
+static void add_to_rsp ( ISelEnv* env, Int n )
+{
+ vassert(n > 0 && n < 256 && (n%8) == 0);
+ addInstr(env,
+ AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(n),
+ hregAMD64_RSP()));
+}
+
+static void sub_from_rsp ( ISelEnv* env, Int n )
+{
+ vassert(n > 0 && n < 256 && (n%8) == 0);
+ addInstr(env,
+ AMD64Instr_Alu64R(Aalu_SUB, AMD64RMI_Imm(n),
+ hregAMD64_RSP()));
+}
+
+/* Push 64-bit constants on the stack. */
+static void push_uimm64( ISelEnv* env, ULong uimm64 )
+{
+ /* If uimm64 can be expressed as the sign extension of its
+ lower 32 bits, we can do it the easy way. */
+ Long simm64 = (Long)uimm64;
+ if ( simm64 == ((simm64 << 32) >> 32) ) {
+ addInstr( env, AMD64Instr_Push(AMD64RMI_Imm( (UInt)uimm64 )) );
+ } else {
+ HReg tmp = newVRegI(env);
+ addInstr( env, AMD64Instr_Imm64(uimm64, tmp) );
+ addInstr( env, AMD64Instr_Push(AMD64RMI_Reg(tmp)) );
+ }
+}
+
+//.. /* Given an amode, return one which references 4 bytes further
+//.. along. */
+//..
+//.. static X86AMode* advance4 ( X86AMode* am )
+//.. {
+//.. X86AMode* am4 = dopyX86AMode(am);
+//.. switch (am4->tag) {
+//.. case Xam_IRRS:
+//.. am4->Xam.IRRS.imm += 4; break;
+//.. case Xam_IR:
+//.. am4->Xam.IR.imm += 4; break;
+//.. default:
+//.. vpanic("advance4(x86,host)");
+//.. }
+//.. return am4;
+//.. }
+//..
+//..
+//.. /* Push an arg onto the host stack, in preparation for a call to a
+//.. helper function of some kind. Returns the number of 32-bit words
+//.. pushed. */
+//..
+//.. static Int pushArg ( ISelEnv* env, IRExpr* arg )
+//.. {
+//.. IRType arg_ty = typeOfIRExpr(env->type_env, arg);
+//.. if (arg_ty == Ity_I32) {
+//.. addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
+//.. return 1;
+//.. } else
+//.. if (arg_ty == Ity_I64) {
+//.. HReg rHi, rLo;
+//.. iselInt64Expr(&rHi, &rLo, env, arg);
+//.. addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
+//.. addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
+//.. return 2;
+//.. }
+//.. ppIRExpr(arg);
+//.. vpanic("pushArg(x86): can't handle arg of this type");
+//.. }
+
+
+/* Used only in doHelperCall. If possible, produce a single
+ instruction which computes 'e' into 'dst'. If not possible, return
+ NULL. */
+
+static AMD64Instr* iselIntExpr_single_instruction ( ISelEnv* env,
+ HReg dst,
+ IRExpr* e )
+{
+ vassert(typeOfIRExpr(env->type_env, e) == Ity_I64);
+
+ if (e->tag == Iex_Const) {
+ vassert(e->Iex.Const.con->tag == Ico_U64);
+ if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
+ return AMD64Instr_Alu64R(
+ Aalu_MOV,
+ AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64)),
+ dst
+ );
+ } else {
+ return AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, dst);
+ }
+ }
+
+ if (e->tag == Iex_RdTmp) {
+ HReg src = lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ return mk_iMOVsd_RR(src, dst);
+ }
+
+ if (e->tag == Iex_Get) {
+ vassert(e->Iex.Get.ty == Ity_I64);
+ return AMD64Instr_Alu64R(
+ Aalu_MOV,
+ AMD64RMI_Mem(
+ AMD64AMode_IR(e->Iex.Get.offset,
+ hregAMD64_RBP())),
+ dst);
+ }
+
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.op == Iop_32Uto64
+ && e->Iex.Unop.arg->tag == Iex_RdTmp) {
+ HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
+ return AMD64Instr_MovxLQ(False, src, dst);
+ }
+
+ if (0) { ppIRExpr(e); vex_printf("\n"); }
+
+ return NULL;
+}
+
+
+/* Do a complete function call. guard is a Ity_Bit expression
+ indicating whether or not the call happens. If guard==NULL, the
+ call is unconditional. */
+
+static
+void doHelperCall ( ISelEnv* env,
+ Bool passBBP,
+ IRExpr* guard, IRCallee* cee, IRExpr** args )
+{
+ AMD64CondCode cc;
+ HReg argregs[6];
+ HReg tmpregs[6];
+ AMD64Instr* fastinstrs[6];
+ Int n_args, i, argreg;
+
+ /* Marshal args for a call and do the call.
+
+ If passBBP is True, %rbp (the baseblock pointer) is to be passed
+ as the first arg.
+
+ This function only deals with a tiny set of possibilities, which
+ cover all helpers in practice. The restrictions are that only
+ arguments in registers are supported, hence only 6x64 integer
+ bits in total can be passed. In fact the only supported arg
+ type is I64.
+
+ Generating code which is both efficient and correct when
+ parameters are to be passed in registers is difficult, for the
+ reasons elaborated in detail in comments attached to
+ doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
+ of the method described in those comments.
+
+ The problem is split into two cases: the fast scheme and the
+ slow scheme. In the fast scheme, arguments are computed
+ directly into the target (real) registers. This is only safe
+ when we can be sure that computation of each argument will not
+ trash any real registers set by computation of any other
+ argument.
+
+ In the slow scheme, all args are first computed into vregs, and
+ once they are all done, they are moved to the relevant real
+ regs. This always gives correct code, but it also gives a bunch
+ of vreg-to-rreg moves which are usually redundant but are hard
+ for the register allocator to get rid of.
+
+ To decide which scheme to use, all argument expressions are
+ first examined. If they are all so simple that it is clear they
+ will be evaluated without use of any fixed registers, use the
+ fast scheme, else use the slow scheme. Note also that only
+ unconditional calls may use the fast scheme, since having to
+ compute a condition expression could itself trash real
+ registers.
+
+ Note this requires being able to examine an expression and
+ determine whether or not evaluation of it might use a fixed
+ register. That requires knowledge of how the rest of this insn
+ selector works. Currently just the following 3 are regarded as
+ safe -- hopefully they cover the majority of arguments in
+ practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
+ */
+
+ /* Note that the cee->regparms field is meaningless on AMD64 host
+ (since there is only one calling convention) and so we always
+ ignore it. */
+
+ n_args = 0;
+ for (i = 0; args[i]; i++)
+ n_args++;
+
+ if (6 < n_args + (passBBP ? 1 : 0))
+ vpanic("doHelperCall(AMD64): cannot currently handle > 6 args");
+
+ argregs[0] = hregAMD64_RDI();
+ argregs[1] = hregAMD64_RSI();
+ argregs[2] = hregAMD64_RDX();
+ argregs[3] = hregAMD64_RCX();
+ argregs[4] = hregAMD64_R8();
+ argregs[5] = hregAMD64_R9();
+
+ tmpregs[0] = tmpregs[1] = tmpregs[2] =
+ tmpregs[3] = tmpregs[4] = tmpregs[5] = INVALID_HREG;
+
+ fastinstrs[0] = fastinstrs[1] = fastinstrs[2] =
+ fastinstrs[3] = fastinstrs[4] = fastinstrs[5] = NULL;
+
+ /* First decide which scheme (slow or fast) is to be used. First
+ assume the fast scheme, and select slow if any contraindications
+ (wow) appear. */
+
+ if (guard) {
+ if (guard->tag == Iex_Const
+ && guard->Iex.Const.con->tag == Ico_U1
+ && guard->Iex.Const.con->Ico.U1 == True) {
+ /* unconditional */
+ } else {
+ /* Not manifestly unconditional -- be conservative. */
+ goto slowscheme;
+ }
+ }
+
+ /* Ok, let's try for the fast scheme. If it doesn't pan out, we'll
+ use the slow scheme. Because this is tentative, we can't call
+ addInstr (that is, commit to) any instructions until we're
+ handled all the arguments. So park the resulting instructions
+ in a buffer and emit that if we're successful. */
+
+ /* FAST SCHEME */
+ argreg = 0;
+ if (passBBP) {
+ fastinstrs[argreg] = mk_iMOVsd_RR( hregAMD64_RBP(), argregs[argreg]);
+ argreg++;
+ }
+
+ for (i = 0; i < n_args; i++) {
+ vassert(argreg < 6);
+ vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
+ fastinstrs[argreg]
+ = iselIntExpr_single_instruction( env, argregs[argreg], args[i] );
+ if (fastinstrs[argreg] == NULL)
+ goto slowscheme;
+ argreg++;
+ }
+
+ /* Looks like we're in luck. Emit the accumulated instructions and
+ move on to doing the call itself. */
+ vassert(argreg <= 6);
+ for (i = 0; i < argreg; i++)
+ addInstr(env, fastinstrs[i]);
+
+ /* Fast scheme only applies for unconditional calls. Hence: */
+ cc = Acc_ALWAYS;
+
+ goto handle_call;
+
+
+ /* SLOW SCHEME; move via temporaries */
+ slowscheme:
+#if 0
+if (n_args > 0) {for (i = 0; args[i]; i++) {
+ppIRExpr(args[i]); vex_printf(" "); }
+vex_printf("\n");}
+#endif
+ argreg = 0;
+
+ if (passBBP) {
+ /* This is pretty stupid; better to move directly to rdi
+ after the rest of the args are done. */
+ tmpregs[argreg] = newVRegI(env);
+ addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[argreg]));
+ argreg++;
+ }
+
+ for (i = 0; i < n_args; i++) {
+ vassert(argreg < 6);
+ vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
+ tmpregs[argreg] = iselIntExpr_R(env, args[i]);
+ argreg++;
+ }
+
+ /* Now we can compute the condition. We can't do it earlier
+ because the argument computations could trash the condition
+ codes. Be a bit clever to handle the common case where the
+ guard is 1:Bit. */
+ cc = Acc_ALWAYS;
+ if (guard) {
+ if (guard->tag == Iex_Const
+ && guard->Iex.Const.con->tag == Ico_U1
+ && guard->Iex.Const.con->Ico.U1 == True) {
+ /* unconditional -- do nothing */
+ } else {
+ cc = iselCondCode( env, guard );
+ }
+ }
+
+ /* Move the args to their final destinations. */
+ for (i = 0; i < argreg; i++) {
+ /* None of these insns, including any spill code that might
+ be generated, may alter the condition codes. */
+ addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) );
+ }
+
+
+ /* Finally, the call itself. */
+ handle_call:
+ addInstr(env, AMD64Instr_Call(
+ cc,
+ Ptr_to_ULong(cee->addr),
+ n_args + (passBBP ? 1 : 0)
+ )
+ );
+}
+
+
+/* Given a guest-state array descriptor, an index expression and a
+ bias, generate an AMD64AMode holding the relevant guest state
+ offset. */
+
+static
+AMD64AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
+ IRExpr* off, Int bias )
+{
+ HReg tmp, roff;
+ Int elemSz = sizeofIRType(descr->elemTy);
+ Int nElems = descr->nElems;
+
+ /* Throw out any cases not generated by an amd64 front end. In
+ theory there might be a day where we need to handle them -- if
+ we ever run non-amd64-guest on amd64 host. */
+
+ if (nElems != 8 || (elemSz != 1 && elemSz != 8))
+ vpanic("genGuestArrayOffset(amd64 host)");
+
+ /* Compute off into a reg, %off. Then return:
+
+ movq %off, %tmp
+ addq $bias, %tmp (if bias != 0)
+ andq %tmp, 7
+ ... base(%rbp, %tmp, shift) ...
+ */
+ tmp = newVRegI(env);
+ roff = iselIntExpr_R(env, off);
+ addInstr(env, mk_iMOVsd_RR(roff, tmp));
+ if (bias != 0) {
+ /* Make sure the bias is sane, in the sense that there are
+ no significant bits above bit 30 in it. */
+ vassert(-10000 < bias && bias < 10000);
+ addInstr(env,
+ AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(bias), tmp));
+ }
+ addInstr(env,
+ AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(7), tmp));
+ vassert(elemSz == 1 || elemSz == 8);
+ return
+ AMD64AMode_IRRS( descr->base, hregAMD64_RBP(), tmp,
+ elemSz==8 ? 3 : 0);
+}
+
+
+/* Set the SSE unit's rounding mode to default (%mxcsr = 0x1F80) */
+static
+void set_SSE_rounding_default ( ISelEnv* env )
+{
+ /* pushq $DEFAULT_MXCSR
+ ldmxcsr 0(%rsp)
+ addq $8, %rsp
+ */
+ AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
+ addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(DEFAULT_MXCSR)));
+ addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
+ add_to_rsp(env, 8);
+}
+
+/* Mess with the FPU's rounding mode: set to the default rounding mode
+ (DEFAULT_FPUCW). */
+static
+void set_FPU_rounding_default ( ISelEnv* env )
+{
+ /* movq $DEFAULT_FPUCW, -8(%rsp)
+ fldcw -8(%esp)
+ */
+ AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
+ addInstr(env, AMD64Instr_Alu64M(
+ Aalu_MOV, AMD64RI_Imm(DEFAULT_FPUCW), m8_rsp));
+ addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
+}
+
+
+/* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed
+ expression denoting a value in the range 0 .. 3, indicating a round
+ mode encoded as per type IRRoundingMode. Set the SSE machinery to
+ have the same rounding.
+*/
+static
+void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode )
+{
+ /* Note: this sequence only makes sense because DEFAULT_MXCSR has
+ both rounding bits == 0. If that wasn't the case, we couldn't
+ create a new rounding field simply by ORing the new value into
+ place. */
+
+ /* movq $3, %reg
+ andq [[mode]], %reg -- shouldn't be needed; paranoia
+ shlq $13, %reg
+ orq $DEFAULT_MXCSR, %reg
+ pushq %reg
+ ldmxcsr 0(%esp)
+ addq $8, %rsp
+ */
+ HReg reg = newVRegI(env);
+ AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
+ addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Imm(3), reg));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
+ iselIntExpr_RMI(env, mode), reg));
+ addInstr(env, AMD64Instr_Sh64(Ash_SHL, 13, reg));
+ addInstr(env, AMD64Instr_Alu64R(
+ Aalu_OR, AMD64RMI_Imm(DEFAULT_MXCSR), reg));
+ addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(reg)));
+ addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
+ add_to_rsp(env, 8);
+}
+
+
+/* Mess with the FPU's rounding mode: 'mode' is an I32-typed
+ expression denoting a value in the range 0 .. 3, indicating a round
+ mode encoded as per type IRRoundingMode. Set the x87 FPU to have
+ the same rounding.
+*/
+static
+void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
+{
+ HReg rrm = iselIntExpr_R(env, mode);
+ HReg rrm2 = newVRegI(env);
+ AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
+
+ /* movq %rrm, %rrm2
+ andq $3, %rrm2 -- shouldn't be needed; paranoia
+ shlq $10, %rrm2
+ orq $DEFAULT_FPUCW, %rrm2
+ movq %rrm2, -8(%rsp)
+ fldcw -8(%esp)
+ */
+ addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(3), rrm2));
+ addInstr(env, AMD64Instr_Sh64(Ash_SHL, 10, rrm2));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
+ AMD64RMI_Imm(DEFAULT_FPUCW), rrm2));
+ addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,
+ AMD64RI_Reg(rrm2), m8_rsp));
+ addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
+}
+
+
+/* Generate all-zeroes into a new vector register.
+*/
+static HReg generate_zeroes_V128 ( ISelEnv* env )
+{
+ HReg dst = newVRegV(env);
+ addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst));
+ return dst;
+}
+
+/* Generate all-ones into a new vector register.
+*/
+static HReg generate_ones_V128 ( ISelEnv* env )
+{
+ HReg dst = newVRegV(env);
+ addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, dst, dst));
+ return dst;
+}
+
+
+/* Generate !src into a new vector register. Amazing that there isn't
+ a less crappy way to do this.
+*/
+static HReg do_sse_NotV128 ( ISelEnv* env, HReg src )
+{
+ HReg dst = generate_ones_V128(env);
+ addInstr(env, AMD64Instr_SseReRg(Asse_XOR, src, dst));
+ return dst;
+}
+
+
+/* Expand the given byte into a 64-bit word, by cloning each bit
+ 8 times. */
+static ULong bitmask8_to_bytemask64 ( UShort w8 )
+{
+ vassert(w8 == (w8 & 0xFF));
+ ULong w64 = 0;
+ Int i;
+ for (i = 0; i < 8; i++) {
+ if (w8 & (1<<i))
+ w64 |= (0xFFULL << (8 * i));
+ }
+ return w64;
+}
+
+
+//.. /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
+//.. after most non-simple FPU operations (simple = +, -, *, / and
+//.. sqrt).
+//..
+//.. This could be done a lot more efficiently if needed, by loading
+//.. zero and adding it to the value to be rounded (fldz ; faddp?).
+//.. */
+//.. static void roundToF64 ( ISelEnv* env, HReg reg )
+//.. {
+//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
+//.. sub_from_esp(env, 8);
+//.. addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
+//.. addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
+//.. add_to_esp(env, 8);
+//.. }
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/
+/*---------------------------------------------------------*/
+
+/* Select insns for an integer-typed expression, and add them to the
+ code list. Return a reg holding the result. This reg will be a
+ virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
+ want to modify it, ask for a new vreg, copy it in there, and modify
+ the copy. The register allocator will do its best to map both
+ vregs to the same real register, so the copies will often disappear
+ later in the game.
+
+ This should handle expressions of 64, 32, 16 and 8-bit type. All
+ results are returned in a 64-bit register. For 32-, 16- and 8-bit
+ expressions, the upper 32/16/24 bits are arbitrary, so you should
+ mask or sign extend partial values if necessary.
+*/
+
+static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
+{
+ HReg r = iselIntExpr_R_wrk(env, e);
+ /* sanity checks ... */
+# if 0
+ vex_printf("\niselIntExpr_R: "); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(r) == HRcInt64);
+ vassert(hregIsVirtual(r));
+ return r;
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
+{
+ /* Used for unary/binary SIMD64 ops. */
+ HWord fn = 0;
+ Bool second_is_UInt;
+
+ MatchInfo mi;
+ DECLARE_PATTERN(p_1Uto8_64to1);
+ DECLARE_PATTERN(p_LDle8_then_8Uto64);
+ DECLARE_PATTERN(p_LDle16_then_16Uto64);
+
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_I32 || Ity_I16 || Ity_I8);
+
+ switch (e->tag) {
+
+ /* --------- TEMP --------- */
+ case Iex_RdTmp: {
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ }
+
+ /* --------- LOAD --------- */
+ case Iex_Load: {
+ HReg dst = newVRegI(env);
+ AMD64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
+
+ /* We can't handle big-endian loads, nor load-linked. */
+ if (e->Iex.Load.end != Iend_LE)
+ goto irreducible;
+
+ if (ty == Ity_I64) {
+ addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
+ AMD64RMI_Mem(amode), dst) );
+ return dst;
+ }
+ if (ty == Ity_I32) {
+ addInstr(env, AMD64Instr_LoadEX(4,False,amode,dst));
+ return dst;
+ }
+ if (ty == Ity_I16) {
+ addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
+ return dst;
+ }
+ if (ty == Ity_I8) {
+ addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
+ return dst;
+ }
+ break;
+ }
+
+ /* --------- BINARY OP --------- */
+ case Iex_Binop: {
+ AMD64AluOp aluOp;
+ AMD64ShiftOp shOp;
+
+ /* Pattern: Sub64(0,x) */
+ /* and: Sub32(0,x) */
+ if ((e->Iex.Binop.op == Iop_Sub64 && isZeroU64(e->Iex.Binop.arg1))
+ || (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1))) {
+ HReg dst = newVRegI(env);
+ HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ addInstr(env, mk_iMOVsd_RR(reg,dst));
+ addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
+ return dst;
+ }
+
+ /* Is it an addition or logical style op? */
+ switch (e->Iex.Binop.op) {
+ case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
+ aluOp = Aalu_ADD; break;
+ case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
+ aluOp = Aalu_SUB; break;
+ case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
+ aluOp = Aalu_AND; break;
+ case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64:
+ aluOp = Aalu_OR; break;
+ case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
+ aluOp = Aalu_XOR; break;
+ case Iop_Mul16: case Iop_Mul32: case Iop_Mul64:
+ aluOp = Aalu_MUL; break;
+ default:
+ aluOp = Aalu_INVALID; break;
+ }
+ /* For commutative ops we assume any literal
+ values are on the second operand. */
+ if (aluOp != Aalu_INVALID) {
+ HReg dst = newVRegI(env);
+ HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
+ addInstr(env, mk_iMOVsd_RR(reg,dst));
+ addInstr(env, AMD64Instr_Alu64R(aluOp, rmi, dst));
+ return dst;
+ }
+
+ /* Perhaps a shift op? */
+ switch (e->Iex.Binop.op) {
+ case Iop_Shl64: case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
+ shOp = Ash_SHL; break;
+ case Iop_Shr64: case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
+ shOp = Ash_SHR; break;
+ case Iop_Sar64: case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
+ shOp = Ash_SAR; break;
+ default:
+ shOp = Ash_INVALID; break;
+ }
+ if (shOp != Ash_INVALID) {
+ HReg dst = newVRegI(env);
+
+ /* regL = the value to be shifted */
+ HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ addInstr(env, mk_iMOVsd_RR(regL,dst));
+
+ /* Do any necessary widening for 32/16/8 bit operands */
+ switch (e->Iex.Binop.op) {
+ case Iop_Shr64: case Iop_Shl64: case Iop_Sar64:
+ break;
+ case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
+ break;
+ case Iop_Shr8:
+ addInstr(env, AMD64Instr_Alu64R(
+ Aalu_AND, AMD64RMI_Imm(0xFF), dst));
+ break;
+ case Iop_Shr16:
+ addInstr(env, AMD64Instr_Alu64R(
+ Aalu_AND, AMD64RMI_Imm(0xFFFF), dst));
+ break;
+ case Iop_Shr32:
+ addInstr(env, AMD64Instr_MovxLQ(False, dst, dst));
+ break;
+ case Iop_Sar8:
+ addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
+ addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst));
+ break;
+ case Iop_Sar16:
+ addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst));
+ addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst));
+ break;
+ case Iop_Sar32:
+ addInstr(env, AMD64Instr_MovxLQ(True, dst, dst));
+ break;
+ default:
+ ppIROp(e->Iex.Binop.op);
+ vassert(0);
+ }
+
+ /* Now consider the shift amount. If it's a literal, we
+ can do a much better job than the general case. */
+ if (e->Iex.Binop.arg2->tag == Iex_Const) {
+ /* assert that the IR is well-typed */
+ Int nshift;
+ vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+ nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ vassert(nshift >= 0);
+ if (nshift > 0)
+ /* Can't allow nshift==0 since that means %cl */
+ addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst));
+ } else {
+ /* General case; we have to force the amount into %cl. */
+ HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX()));
+ addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst));
+ }
+ return dst;
+ }
+
+ /* Deal with 64-bit SIMD binary ops */
+ second_is_UInt = False;
+ switch (e->Iex.Binop.op) {
+ case Iop_Add8x8:
+ fn = (HWord)h_generic_calc_Add8x8; break;
+ case Iop_Add16x4:
+ fn = (HWord)h_generic_calc_Add16x4; break;
+ case Iop_Add32x2:
+ fn = (HWord)h_generic_calc_Add32x2; break;
+
+ case Iop_Avg8Ux8:
+ fn = (HWord)h_generic_calc_Avg8Ux8; break;
+ case Iop_Avg16Ux4:
+ fn = (HWord)h_generic_calc_Avg16Ux4; break;
+
+ case Iop_CmpEQ8x8:
+ fn = (HWord)h_generic_calc_CmpEQ8x8; break;
+ case Iop_CmpEQ16x4:
+ fn = (HWord)h_generic_calc_CmpEQ16x4; break;
+ case Iop_CmpEQ32x2:
+ fn = (HWord)h_generic_calc_CmpEQ32x2; break;
+
+ case Iop_CmpGT8Sx8:
+ fn = (HWord)h_generic_calc_CmpGT8Sx8; break;
+ case Iop_CmpGT16Sx4:
+ fn = (HWord)h_generic_calc_CmpGT16Sx4; break;
+ case Iop_CmpGT32Sx2:
+ fn = (HWord)h_generic_calc_CmpGT32Sx2; break;
+
+ case Iop_InterleaveHI8x8:
+ fn = (HWord)h_generic_calc_InterleaveHI8x8; break;
+ case Iop_InterleaveLO8x8:
+ fn = (HWord)h_generic_calc_InterleaveLO8x8; break;
+ case Iop_InterleaveHI16x4:
+ fn = (HWord)h_generic_calc_InterleaveHI16x4; break;
+ case Iop_InterleaveLO16x4:
+ fn = (HWord)h_generic_calc_InterleaveLO16x4; break;
+ case Iop_InterleaveHI32x2:
+ fn = (HWord)h_generic_calc_InterleaveHI32x2; break;
+ case Iop_InterleaveLO32x2:
+ fn = (HWord)h_generic_calc_InterleaveLO32x2; break;
+ case Iop_CatOddLanes16x4:
+ fn = (HWord)h_generic_calc_CatOddLanes16x4; break;
+ case Iop_CatEvenLanes16x4:
+ fn = (HWord)h_generic_calc_CatEvenLanes16x4; break;
+ case Iop_Perm8x8:
+ fn = (HWord)h_generic_calc_Perm8x8; break;
+
+ case Iop_Max8Ux8:
+ fn = (HWord)h_generic_calc_Max8Ux8; break;
+ case Iop_Max16Sx4:
+ fn = (HWord)h_generic_calc_Max16Sx4; break;
+ case Iop_Min8Ux8:
+ fn = (HWord)h_generic_calc_Min8Ux8; break;
+ case Iop_Min16Sx4:
+ fn = (HWord)h_generic_calc_Min16Sx4; break;
+
+ case Iop_Mul16x4:
+ fn = (HWord)h_generic_calc_Mul16x4; break;
+ case Iop_Mul32x2:
+ fn = (HWord)h_generic_calc_Mul32x2; break;
+ case Iop_MulHi16Sx4:
+ fn = (HWord)h_generic_calc_MulHi16Sx4; break;
+ case Iop_MulHi16Ux4:
+ fn = (HWord)h_generic_calc_MulHi16Ux4; break;
+
+ case Iop_QAdd8Sx8:
+ fn = (HWord)h_generic_calc_QAdd8Sx8; break;
+ case Iop_QAdd16Sx4:
+ fn = (HWord)h_generic_calc_QAdd16Sx4; break;
+ case Iop_QAdd8Ux8:
+ fn = (HWord)h_generic_calc_QAdd8Ux8; break;
+ case Iop_QAdd16Ux4:
+ fn = (HWord)h_generic_calc_QAdd16Ux4; break;
+
+ case Iop_QNarrow32Sx2:
+ fn = (HWord)h_generic_calc_QNarrow32Sx2; break;
+ case Iop_QNarrow16Sx4:
+ fn = (HWord)h_generic_calc_QNarrow16Sx4; break;
+ case Iop_QNarrow16Ux4:
+ fn = (HWord)h_generic_calc_QNarrow16Ux4; break;
+
+ case Iop_QSub8Sx8:
+ fn = (HWord)h_generic_calc_QSub8Sx8; break;
+ case Iop_QSub16Sx4:
+ fn = (HWord)h_generic_calc_QSub16Sx4; break;
+ case Iop_QSub8Ux8:
+ fn = (HWord)h_generic_calc_QSub8Ux8; break;
+ case Iop_QSub16Ux4:
+ fn = (HWord)h_generic_calc_QSub16Ux4; break;
+
+ case Iop_Sub8x8:
+ fn = (HWord)h_generic_calc_Sub8x8; break;
+ case Iop_Sub16x4:
+ fn = (HWord)h_generic_calc_Sub16x4; break;
+ case Iop_Sub32x2:
+ fn = (HWord)h_generic_calc_Sub32x2; break;
+
+ case Iop_ShlN32x2:
+ fn = (HWord)h_generic_calc_ShlN32x2;
+ second_is_UInt = True;
+ break;
+ case Iop_ShlN16x4:
+ fn = (HWord)h_generic_calc_ShlN16x4;
+ second_is_UInt = True;
+ break;
+ case Iop_ShlN8x8:
+ fn = (HWord)h_generic_calc_ShlN8x8;
+ second_is_UInt = True;
+ break;
+ case Iop_ShrN32x2:
+ fn = (HWord)h_generic_calc_ShrN32x2;
+ second_is_UInt = True;
+ break;
+ case Iop_ShrN16x4:
+ fn = (HWord)h_generic_calc_ShrN16x4;
+ second_is_UInt = True;
+ break;
+ case Iop_SarN32x2:
+ fn = (HWord)h_generic_calc_SarN32x2;
+ second_is_UInt = True;
+ break;
+ case Iop_SarN16x4:
+ fn = (HWord)h_generic_calc_SarN16x4;
+ second_is_UInt = True;
+ break;
+ case Iop_SarN8x8:
+ fn = (HWord)h_generic_calc_SarN8x8;
+ second_is_UInt = True;
+ break;
+
+ default:
+ fn = (HWord)0; break;
+ }
+ if (fn != (HWord)0) {
+ /* Note: the following assumes all helpers are of signature
+ ULong fn ( ULong, ULong ), and they are
+ not marked as regparm functions.
+ */
+ HReg dst = newVRegI(env);
+ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ if (second_is_UInt)
+ addInstr(env, AMD64Instr_MovxLQ(False, argR, argR));
+ addInstr(env, mk_iMOVsd_RR(argL, hregAMD64_RDI()) );
+ addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RSI()) );
+ addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 2 ));
+ addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
+ return dst;
+ }
+
+ /* Handle misc other ops. */
+
+ if (e->Iex.Binop.op == Iop_Max32U) {
+ /* This generates a truly rotten piece of code. Just as well
+ it doesn't happen very often. */
+ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ HReg src1L = newVRegI(env);
+ HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ HReg src2L = newVRegI(env);
+ HReg dst = newVRegI(env);
+ addInstr(env, mk_iMOVsd_RR(src1,dst));
+ addInstr(env, mk_iMOVsd_RR(src1,src1L));
+ addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, src1L));
+ addInstr(env, mk_iMOVsd_RR(src2,src2L));
+ addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, src2L));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_CMP, AMD64RMI_Reg(src2L), src1L));
+ addInstr(env, AMD64Instr_CMov64(Acc_B, AMD64RM_Reg(src2), dst));
+ return dst;
+ }
+
+ if (e->Iex.Binop.op == Iop_DivModS64to32
+ || e->Iex.Binop.op == Iop_DivModU64to32) {
+ /* 64 x 32 -> (32(rem),32(div)) division */
+ /* Get the 64-bit operand into edx:eax, and the other into
+ any old R/M. */
+ HReg rax = hregAMD64_RAX();
+ HReg rdx = hregAMD64_RDX();
+ HReg dst = newVRegI(env);
+ Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
+ AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
+ /* Compute the left operand into a reg, and then
+ put the top half in edx and the bottom in eax. */
+ HReg left64 = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ addInstr(env, mk_iMOVsd_RR(left64, rdx));
+ addInstr(env, mk_iMOVsd_RR(left64, rax));
+ addInstr(env, AMD64Instr_Sh64(Ash_SHR, 32, rdx));
+ addInstr(env, AMD64Instr_Div(syned, 4, rmRight));
+ addInstr(env, AMD64Instr_MovxLQ(False, rdx, rdx));
+ addInstr(env, AMD64Instr_MovxLQ(False, rax, rax));
+ addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, rdx));
+ addInstr(env, mk_iMOVsd_RR(rax, dst));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(rdx), dst));
+ return dst;
+ }
+
+ if (e->Iex.Binop.op == Iop_32HLto64) {
+ HReg hi32 = newVRegI(env);
+ HReg lo32 = newVRegI(env);
+ HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ addInstr(env, mk_iMOVsd_RR(hi32s, hi32));
+ addInstr(env, mk_iMOVsd_RR(lo32s, lo32));
+ addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, hi32));
+ addInstr(env, AMD64Instr_MovxLQ(False, lo32, lo32));
+ addInstr(env, AMD64Instr_Alu64R(
+ Aalu_OR, AMD64RMI_Reg(lo32), hi32));
+ return hi32;
+ }
+
+ if (e->Iex.Binop.op == Iop_16HLto32) {
+ HReg hi16 = newVRegI(env);
+ HReg lo16 = newVRegI(env);
+ HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
+ addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
+ addInstr(env, AMD64Instr_Sh64(Ash_SHL, 16, hi16));
+ addInstr(env, AMD64Instr_Alu64R(
+ Aalu_AND, AMD64RMI_Imm(0xFFFF), lo16));
+ addInstr(env, AMD64Instr_Alu64R(
+ Aalu_OR, AMD64RMI_Reg(lo16), hi16));
+ return hi16;
+ }
+
+ if (e->Iex.Binop.op == Iop_8HLto16) {
+ HReg hi8 = newVRegI(env);
+ HReg lo8 = newVRegI(env);
+ HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
+ addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
+ addInstr(env, AMD64Instr_Sh64(Ash_SHL, 8, hi8));
+ addInstr(env, AMD64Instr_Alu64R(
+ Aalu_AND, AMD64RMI_Imm(0xFF), lo8));
+ addInstr(env, AMD64Instr_Alu64R(
+ Aalu_OR, AMD64RMI_Reg(lo8), hi8));
+ return hi8;
+ }
+
+ if (e->Iex.Binop.op == Iop_MullS32
+ || e->Iex.Binop.op == Iop_MullS16
+ || e->Iex.Binop.op == Iop_MullS8
+ || e->Iex.Binop.op == Iop_MullU32
+ || e->Iex.Binop.op == Iop_MullU16
+ || e->Iex.Binop.op == Iop_MullU8) {
+ HReg a32 = newVRegI(env);
+ HReg b32 = newVRegI(env);
+ HReg a32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ HReg b32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ Int shift = 0;
+ AMD64ShiftOp shr_op = Ash_SHR;
+ switch (e->Iex.Binop.op) {
+ case Iop_MullS32: shr_op = Ash_SAR; shift = 32; break;
+ case Iop_MullS16: shr_op = Ash_SAR; shift = 48; break;
+ case Iop_MullS8: shr_op = Ash_SAR; shift = 56; break;
+ case Iop_MullU32: shr_op = Ash_SHR; shift = 32; break;
+ case Iop_MullU16: shr_op = Ash_SHR; shift = 48; break;
+ case Iop_MullU8: shr_op = Ash_SHR; shift = 56; break;
+ default: vassert(0);
+ }
+
+ addInstr(env, mk_iMOVsd_RR(a32s, a32));
+ addInstr(env, mk_iMOVsd_RR(b32s, b32));
+ addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, a32));
+ addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, b32));
+ addInstr(env, AMD64Instr_Sh64(shr_op, shift, a32));
+ addInstr(env, AMD64Instr_Sh64(shr_op, shift, b32));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_MUL, AMD64RMI_Reg(a32), b32));
+ return b32;
+ }
+
+ if (e->Iex.Binop.op == Iop_CmpF64) {
+ HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
+ HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegI(env);
+ addInstr(env, AMD64Instr_SseUComIS(8,fL,fR,dst));
+ /* Mask out irrelevant parts of the result so as to conform
+ to the CmpF64 definition. */
+ addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(0x45), dst));
+ return dst;
+ }
+
+ if (e->Iex.Binop.op == Iop_F64toI32S
+ || e->Iex.Binop.op == Iop_F64toI64S) {
+ Int szD = e->Iex.Binop.op==Iop_F64toI32S ? 4 : 8;
+ HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegI(env);
+ set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
+ addInstr(env, AMD64Instr_SseSF2SI( 8, szD, rf, dst ));
+ set_SSE_rounding_default(env);
+ return dst;
+ }
+
+//.. if (e->Iex.Binop.op == Iop_F64toI32 || e->Iex.Binop.op == Iop_F64toI16) {
+//.. Int sz = e->Iex.Binop.op == Iop_F64toI16 ? 2 : 4;
+//.. HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
+//.. HReg dst = newVRegI(env);
+//..
+//.. /* Used several times ... */
+//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
+//..
+//.. /* rf now holds the value to be converted, and rrm holds the
+//.. rounding mode value, encoded as per the IRRoundingMode
+//.. enum. The first thing to do is set the FPU's rounding
+//.. mode accordingly. */
+//..
+//.. /* Create a space for the format conversion. */
+//.. /* subl $4, %esp */
+//.. sub_from_esp(env, 4);
+//..
+//.. /* Set host rounding mode */
+//.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+//..
+//.. /* gistw/l %rf, 0(%esp) */
+//.. addInstr(env, X86Instr_FpLdStI(False/*store*/, sz, rf, zero_esp));
+//..
+//.. if (sz == 2) {
+//.. /* movzwl 0(%esp), %dst */
+//.. addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
+//.. } else {
+//.. /* movl 0(%esp), %dst */
+//.. vassert(sz == 4);
+//.. addInstr(env, X86Instr_Alu32R(
+//.. Xalu_MOV, X86RMI_Mem(zero_esp), dst));
+//.. }
+//..
+//.. /* Restore default FPU rounding. */
+//.. set_FPU_rounding_default( env );
+//..
+//.. /* addl $4, %esp */
+//.. add_to_esp(env, 4);
+//.. return dst;
+//.. }
+//..
+//.. /* C3210 flags following FPU partial remainder (fprem), both
+//.. IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
+//.. if (e->Iex.Binop.op == Iop_PRemC3210F64
+//.. || e->Iex.Binop.op == Iop_PRem1C3210F64) {
+//.. HReg junk = newVRegF(env);
+//.. HReg dst = newVRegI(env);
+//.. HReg srcL = iselDblExpr(env, e->Iex.Binop.arg1);
+//.. HReg srcR = iselDblExpr(env, e->Iex.Binop.arg2);
+//.. addInstr(env, X86Instr_FpBinary(
+//.. e->Iex.Binop.op==Iop_PRemC3210F64
+//.. ? Xfp_PREM : Xfp_PREM1,
+//.. srcL,srcR,junk
+//.. ));
+//.. /* The previous pseudo-insn will have left the FPU's C3210
+//.. flags set correctly. So bag them. */
+//.. addInstr(env, X86Instr_FpStSW_AX());
+//.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
+//.. addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
+//.. return dst;
+//.. }
+
+ break;
+ }
+
+ /* --------- UNARY OP --------- */
+ case Iex_Unop: {
+
+ /* 1Uto8(64to1(expr64)) */
+ {
+ DEFINE_PATTERN( p_1Uto8_64to1,
+ unop(Iop_1Uto8, unop(Iop_64to1, bind(0))) );
+ if (matchIRExpr(&mi,p_1Uto8_64to1,e)) {
+ IRExpr* expr64 = mi.bindee[0];
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, expr64);
+ addInstr(env, mk_iMOVsd_RR(src,dst) );
+ addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
+ AMD64RMI_Imm(1), dst));
+ return dst;
+ }
+ }
+
+ /* 8Uto64(LDle(expr64)) */
+ {
+ DEFINE_PATTERN(p_LDle8_then_8Uto64,
+ unop(Iop_8Uto64,
+ IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
+ if (matchIRExpr(&mi,p_LDle8_then_8Uto64,e)) {
+ HReg dst = newVRegI(env);
+ AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
+ addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
+ return dst;
+ }
+ }
+
+ /* 16Uto64(LDle(expr64)) */
+ {
+ DEFINE_PATTERN(p_LDle16_then_16Uto64,
+ unop(Iop_16Uto64,
+ IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
+ if (matchIRExpr(&mi,p_LDle16_then_16Uto64,e)) {
+ HReg dst = newVRegI(env);
+ AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
+ addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
+ return dst;
+ }
+ }
+
+ switch (e->Iex.Unop.op) {
+ case Iop_32Uto64:
+ case Iop_32Sto64: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, AMD64Instr_MovxLQ(e->Iex.Unop.op == Iop_32Sto64,
+ src, dst) );
+ return dst;
+ }
+ case Iop_128HIto64: {
+ HReg rHi, rLo;
+ iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
+ return rHi; /* and abandon rLo */
+ }
+ case Iop_128to64: {
+ HReg rHi, rLo;
+ iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
+ return rLo; /* and abandon rHi */
+ }
+ case Iop_8Uto16:
+ case Iop_8Uto32:
+ case Iop_8Uto64:
+ case Iop_16Uto64:
+ case Iop_16Uto32: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Uto32
+ || e->Iex.Unop.op==Iop_16Uto64 );
+ UInt mask = srcIs16 ? 0xFFFF : 0xFF;
+ addInstr(env, mk_iMOVsd_RR(src,dst) );
+ addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
+ AMD64RMI_Imm(mask), dst));
+ return dst;
+ }
+ case Iop_8Sto16:
+ case Iop_8Sto64:
+ case Iop_8Sto32:
+ case Iop_16Sto32:
+ case Iop_16Sto64: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Sto32
+ || e->Iex.Unop.op==Iop_16Sto64 );
+ UInt amt = srcIs16 ? 48 : 56;
+ addInstr(env, mk_iMOVsd_RR(src,dst) );
+ addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst));
+ addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst));
+ return dst;
+ }
+ case Iop_Not8:
+ case Iop_Not16:
+ case Iop_Not32:
+ case Iop_Not64: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, mk_iMOVsd_RR(src,dst) );
+ addInstr(env, AMD64Instr_Unary64(Aun_NOT,dst));
+ return dst;
+ }
+//.. case Iop_64HIto32: {
+//.. HReg rHi, rLo;
+//.. iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
+//.. return rHi; /* and abandon rLo .. poor wee thing :-) */
+//.. }
+//.. case Iop_64to32: {
+//.. HReg rHi, rLo;
+//.. iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
+//.. return rLo; /* similar stupid comment to the above ... */
+//.. }
+ case Iop_16HIto8:
+ case Iop_32HIto16:
+ case Iop_64HIto32: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ Int shift = 0;
+ switch (e->Iex.Unop.op) {
+ case Iop_16HIto8: shift = 8; break;
+ case Iop_32HIto16: shift = 16; break;
+ case Iop_64HIto32: shift = 32; break;
+ default: vassert(0);
+ }
+ addInstr(env, mk_iMOVsd_RR(src,dst) );
+ addInstr(env, AMD64Instr_Sh64(Ash_SHR, shift, dst));
+ return dst;
+ }
+ case Iop_1Uto64:
+ case Iop_1Uto32:
+ case Iop_1Uto8: {
+ HReg dst = newVRegI(env);
+ AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
+ addInstr(env, AMD64Instr_Set64(cond,dst));
+ return dst;
+ }
+ case Iop_1Sto8:
+ case Iop_1Sto16:
+ case Iop_1Sto32:
+ case Iop_1Sto64: {
+ /* could do better than this, but for now ... */
+ HReg dst = newVRegI(env);
+ AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
+ addInstr(env, AMD64Instr_Set64(cond,dst));
+ addInstr(env, AMD64Instr_Sh64(Ash_SHL, 63, dst));
+ addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
+ return dst;
+ }
+ case Iop_Ctz64: {
+ /* Count trailing zeroes, implemented by amd64 'bsfq' */
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, AMD64Instr_Bsfr64(True,src,dst));
+ return dst;
+ }
+ case Iop_Clz64: {
+ /* Count leading zeroes. Do 'bsrq' to establish the index
+ of the highest set bit, and subtract that value from
+ 63. */
+ HReg tmp = newVRegI(env);
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, AMD64Instr_Bsfr64(False,src,tmp));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
+ AMD64RMI_Imm(63), dst));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_SUB,
+ AMD64RMI_Reg(tmp), dst));
+ return dst;
+ }
+
+ case Iop_CmpwNEZ64: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, mk_iMOVsd_RR(src,dst));
+ addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
+ AMD64RMI_Reg(src), dst));
+ addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
+ return dst;
+ }
+
+ case Iop_CmpwNEZ32: {
+ HReg src = newVRegI(env);
+ HReg dst = newVRegI(env);
+ HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, mk_iMOVsd_RR(pre,src));
+ addInstr(env, AMD64Instr_MovxLQ(False, src, src));
+ addInstr(env, mk_iMOVsd_RR(src,dst));
+ addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
+ AMD64RMI_Reg(src), dst));
+ addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
+ return dst;
+ }
+
+ case Iop_Left8:
+ case Iop_Left16:
+ case Iop_Left32:
+ case Iop_Left64: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, mk_iMOVsd_RR(src, dst));
+ addInstr(env, AMD64Instr_Unary64(Aun_NEG, dst));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(src), dst));
+ return dst;
+ }
+
+ case Iop_V128to32: {
+ HReg dst = newVRegI(env);
+ HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
+ AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp_m16));
+ addInstr(env, AMD64Instr_LoadEX(4, False/*z-widen*/, rsp_m16, dst));
+ return dst;
+ }
+
+ /* V128{HI}to64 */
+ case Iop_V128HIto64:
+ case Iop_V128to64: {
+ Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
+ HReg dst = newVRegI(env);
+ HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
+ AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
+ AMD64AMode* rspN = AMD64AMode_IR(off, hregAMD64_RSP());
+ sub_from_rsp(env, 16);
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp0));
+ addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
+ AMD64RMI_Mem(rspN), dst ));
+ add_to_rsp(env, 16);
+ return dst;
+ }
+
+ /* ReinterpF64asI64(e) */
+ /* Given an IEEE754 double, produce an I64 with the same bit
+ pattern. */
+ case Iop_ReinterpF64asI64: {
+ AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
+ HReg dst = newVRegI(env);
+ HReg src = iselDblExpr(env, e->Iex.Unop.arg);
+ /* paranoia */
+ set_SSE_rounding_default(env);
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, src, m8_rsp));
+ addInstr(env, AMD64Instr_Alu64R(
+ Aalu_MOV, AMD64RMI_Mem(m8_rsp), dst));
+ return dst;
+ }
+
+ /* ReinterpF32asI32(e) */
+ /* Given an IEEE754 single, produce an I64 with the same bit
+ pattern in the lower half. */
+ case Iop_ReinterpF32asI32: {
+ AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
+ HReg dst = newVRegI(env);
+ HReg src = iselFltExpr(env, e->Iex.Unop.arg);
+ /* paranoia */
+ set_SSE_rounding_default(env);
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, src, m8_rsp));
+ addInstr(env, AMD64Instr_LoadEX(4, False/*unsigned*/, m8_rsp, dst ));
+ return dst;
+ }
+
+ case Iop_16to8:
+ case Iop_32to8:
+ case Iop_64to8:
+ case Iop_32to16:
+ case Iop_64to16:
+ case Iop_64to32:
+ /* These are no-ops. */
+ return iselIntExpr_R(env, e->Iex.Unop.arg);
+
+ default:
+ break;
+ }
+
+ /* Deal with unary 64-bit SIMD ops. */
+ switch (e->Iex.Unop.op) {
+ case Iop_CmpNEZ32x2:
+ fn = (HWord)h_generic_calc_CmpNEZ32x2; break;
+ case Iop_CmpNEZ16x4:
+ fn = (HWord)h_generic_calc_CmpNEZ16x4; break;
+ case Iop_CmpNEZ8x8:
+ fn = (HWord)h_generic_calc_CmpNEZ8x8; break;
+ default:
+ fn = (HWord)0; break;
+ }
+ if (fn != (HWord)0) {
+ /* Note: the following assumes all helpers are of
+ signature
+ ULong fn ( ULong ), and they are
+ not marked as regparm functions.
+ */
+ HReg dst = newVRegI(env);
+ HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
+ addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1 ));
+ addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
+ return dst;
+ }
+
+ break;
+ }
+
+ /* --------- GET --------- */
+ case Iex_Get: {
+ if (ty == Ity_I64) {
+ HReg dst = newVRegI(env);
+ addInstr(env, AMD64Instr_Alu64R(
+ Aalu_MOV,
+ AMD64RMI_Mem(
+ AMD64AMode_IR(e->Iex.Get.offset,
+ hregAMD64_RBP())),
+ dst));
+ return dst;
+ }
+ if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
+ HReg dst = newVRegI(env);
+ addInstr(env, AMD64Instr_LoadEX(
+ toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
+ False,
+ AMD64AMode_IR(e->Iex.Get.offset,hregAMD64_RBP()),
+ dst));
+ return dst;
+ }
+ break;
+ }
+
+ case Iex_GetI: {
+ AMD64AMode* am
+ = genGuestArrayOffset(
+ env, e->Iex.GetI.descr,
+ e->Iex.GetI.ix, e->Iex.GetI.bias );
+ HReg dst = newVRegI(env);
+ if (ty == Ity_I8) {
+ addInstr(env, AMD64Instr_LoadEX( 1, False, am, dst ));
+ return dst;
+ }
+ if (ty == Ity_I64) {
+ addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, AMD64RMI_Mem(am), dst ));
+ return dst;
+ }
+ break;
+ }
+
+ /* --------- CCALL --------- */
+ case Iex_CCall: {
+ HReg dst = newVRegI(env);
+ vassert(ty == e->Iex.CCall.retty);
+
+ /* be very restrictive for now. Only 64-bit ints allowed
+ for args, and 64 or 32 bits for return type. */
+ if (e->Iex.CCall.retty != Ity_I64 && e->Iex.CCall.retty != Ity_I32)
+ goto irreducible;
+
+ /* Marshal args, do the call. */
+ doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
+
+ /* Move to dst, and zero out the top 32 bits if the result type is
+ Ity_I32. Probably overkill, but still .. */
+ if (e->Iex.CCall.retty == Ity_I64)
+ addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
+ else
+ addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
+
+ return dst;
+ }
+
+ /* --------- LITERAL --------- */
+ /* 64/32/16/8-bit literals */
+ case Iex_Const:
+ if (ty == Ity_I64) {
+ HReg r = newVRegI(env);
+ addInstr(env, AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, r));
+ return r;
+ } else {
+ AMD64RMI* rmi = iselIntExpr_RMI ( env, e );
+ HReg r = newVRegI(env);
+ addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, rmi, r));
+ return r;
+ }
+
+ /* --------- MULTIPLEX --------- */
+ case Iex_Mux0X: {
+ if ((ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
+ && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
+ HReg r8;
+ HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
+ AMD64RM* r0 = iselIntExpr_RM(env, e->Iex.Mux0X.expr0);
+ HReg dst = newVRegI(env);
+ addInstr(env, mk_iMOVsd_RR(rX,dst));
+ r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
+ addInstr(env, AMD64Instr_Test64(0xFF, r8));
+ addInstr(env, AMD64Instr_CMov64(Acc_Z,r0,dst));
+ return dst;
+ }
+ break;
+ }
+
+ /* --------- TERNARY OP --------- */
+ case Iex_Triop: {
+ /* C3210 flags following FPU partial remainder (fprem), both
+ IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
+ if (e->Iex.Triop.op == Iop_PRemC3210F64
+ || e->Iex.Triop.op == Iop_PRem1C3210F64) {
+ AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
+ HReg arg1 = iselDblExpr(env, e->Iex.Triop.arg2);
+ HReg arg2 = iselDblExpr(env, e->Iex.Triop.arg3);
+ HReg dst = newVRegI(env);
+ addInstr(env, AMD64Instr_A87Free(2));
+
+ /* one arg -> top of x87 stack */
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rsp));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
+
+ /* other arg -> top of x87 stack */
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rsp));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
+
+ switch (e->Iex.Triop.op) {
+ case Iop_PRemC3210F64:
+ addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
+ break;
+ case Iop_PRem1C3210F64:
+ addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
+ break;
+ default:
+ vassert(0);
+ }
+ /* Ignore the result, and instead make off with the FPU's
+ C3210 flags (in the status word). */
+ addInstr(env, AMD64Instr_A87StSW(m8_rsp));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Mem(m8_rsp),dst));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0x4700),dst));
+ return dst;
+ }
+ break;
+ }
+
+ default:
+ break;
+ } /* switch (e->tag) */
+
+ /* We get here if no pattern matched. */
+ irreducible:
+ ppIRExpr(e);
+ vpanic("iselIntExpr_R(amd64): cannot reduce tree");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Integer expression auxiliaries ---*/
+/*---------------------------------------------------------*/
+
+/* --------------------- AMODEs --------------------- */
+
+/* Return an AMode which computes the value of the specified
+ expression, possibly also adding insns to the code list as a
+ result. The expression may only be a 32-bit one.
+*/
+
+static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
+{
+ AMD64AMode* am = iselIntExpr_AMode_wrk(env, e);
+ vassert(sane_AMode(am));
+ return am;
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
+{
+ MatchInfo mi;
+ DECLARE_PATTERN(p_complex);
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_I64);
+
+ /* Add64( Add64(expr1, Shl64(expr2, imm8)), simm32 ) */
+ /* bind0 bind1 bind2 bind3 */
+ DEFINE_PATTERN(p_complex,
+ binop( Iop_Add64,
+ binop( Iop_Add64,
+ bind(0),
+ binop(Iop_Shl64, bind(1), bind(2))
+ ),
+ bind(3)
+ )
+ );
+ if (matchIRExpr(&mi, p_complex, e)) {
+ IRExpr* expr1 = mi.bindee[0];
+ IRExpr* expr2 = mi.bindee[1];
+ IRExpr* imm8 = mi.bindee[2];
+ IRExpr* simm32 = mi.bindee[3];
+ if (imm8->tag == Iex_Const
+ && imm8->Iex.Const.con->tag == Ico_U8
+ && imm8->Iex.Const.con->Ico.U8 < 4
+ /* imm8 is OK, now check simm32 */
+ && simm32->tag == Iex_Const
+ && simm32->Iex.Const.con->tag == Ico_U64
+ && fitsIn32Bits(simm32->Iex.Const.con->Ico.U64)) {
+ UInt shift = imm8->Iex.Const.con->Ico.U8;
+ UInt offset = toUInt(simm32->Iex.Const.con->Ico.U64);
+ HReg r1 = iselIntExpr_R(env, expr1);
+ HReg r2 = iselIntExpr_R(env, expr2);
+ vassert(shift == 0 || shift == 1 || shift == 2 || shift == 3);
+ return AMD64AMode_IRRS(offset, r1, r2, shift);
+ }
+ }
+
+ /* Add64(expr1, Shl64(expr2, imm)) */
+ if (e->tag == Iex_Binop
+ && e->Iex.Binop.op == Iop_Add64
+ && e->Iex.Binop.arg2->tag == Iex_Binop
+ && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl64
+ && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
+ && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
+ UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ if (shift == 1 || shift == 2 || shift == 3) {
+ HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
+ return AMD64AMode_IRRS(0, r1, r2, shift);
+ }
+ }
+
+ /* Add64(expr,i) */
+ if (e->tag == Iex_Binop
+ && e->Iex.Binop.op == Iop_Add64
+ && e->Iex.Binop.arg2->tag == Iex_Const
+ && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
+ && fitsIn32Bits(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)) {
+ HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ return AMD64AMode_IR(
+ toUInt(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64),
+ r1
+ );
+ }
+
+ /* Doesn't match anything in particular. Generate it into
+ a register and use that. */
+ {
+ HReg r1 = iselIntExpr_R(env, e);
+ return AMD64AMode_IR(0, r1);
+ }
+}
+
+
+/* --------------------- RMIs --------------------- */
+
+/* Similarly, calculate an expression into an X86RMI operand. As with
+ iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
+
+static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
+{
+ AMD64RMI* rmi = iselIntExpr_RMI_wrk(env, e);
+ /* sanity checks ... */
+ switch (rmi->tag) {
+ case Armi_Imm:
+ return rmi;
+ case Armi_Reg:
+ vassert(hregClass(rmi->Armi.Reg.reg) == HRcInt64);
+ vassert(hregIsVirtual(rmi->Armi.Reg.reg));
+ return rmi;
+ case Armi_Mem:
+ vassert(sane_AMode(rmi->Armi.Mem.am));
+ return rmi;
+ default:
+ vpanic("iselIntExpr_RMI: unknown amd64 RMI tag");
+ }
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_I64 || ty == Ity_I32
+ || ty == Ity_I16 || ty == Ity_I8);
+
+ /* special case: immediate 64/32/16/8 */
+ if (e->tag == Iex_Const) {
+ switch (e->Iex.Const.con->tag) {
+ case Ico_U64:
+ if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
+ return AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
+ }
+ break;
+ case Ico_U32:
+ return AMD64RMI_Imm(e->Iex.Const.con->Ico.U32); break;
+ case Ico_U16:
+ return AMD64RMI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); break;
+ case Ico_U8:
+ return AMD64RMI_Imm(0xFF & e->Iex.Const.con->Ico.U8); break;
+ default:
+ vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
+ }
+ }
+
+ /* special case: 64-bit GET */
+ if (e->tag == Iex_Get && ty == Ity_I64) {
+ return AMD64RMI_Mem(AMD64AMode_IR(e->Iex.Get.offset,
+ hregAMD64_RBP()));
+ }
+
+ /* special case: 64-bit load from memory */
+ if (e->tag == Iex_Load && ty == Ity_I64
+ && e->Iex.Load.end == Iend_LE) {
+ AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
+ return AMD64RMI_Mem(am);
+ }
+
+ /* default case: calculate into a register and return that */
+ {
+ HReg r = iselIntExpr_R ( env, e );
+ return AMD64RMI_Reg(r);
+ }
+}
+
+
+/* --------------------- RIs --------------------- */
+
+/* Calculate an expression into an AMD64RI operand. As with
+ iselIntExpr_R, the expression can have type 64, 32, 16 or 8
+ bits. */
+
+static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
+{
+ AMD64RI* ri = iselIntExpr_RI_wrk(env, e);
+ /* sanity checks ... */
+ switch (ri->tag) {
+ case Ari_Imm:
+ return ri;
+ case Ari_Reg:
+ vassert(hregClass(ri->Ari.Reg.reg) == HRcInt64);
+ vassert(hregIsVirtual(ri->Ari.Reg.reg));
+ return ri;
+ default:
+ vpanic("iselIntExpr_RI: unknown amd64 RI tag");
+ }
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_I64 || ty == Ity_I32
+ || ty == Ity_I16 || ty == Ity_I8);
+
+ /* special case: immediate */
+ if (e->tag == Iex_Const) {
+ switch (e->Iex.Const.con->tag) {
+ case Ico_U64:
+ if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
+ return AMD64RI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
+ }
+ break;
+ case Ico_U32:
+ return AMD64RI_Imm(e->Iex.Const.con->Ico.U32);
+ case Ico_U16:
+ return AMD64RI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16);
+ case Ico_U8:
+ return AMD64RI_Imm(0xFF & e->Iex.Const.con->Ico.U8);
+ default:
+ vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
+ }
+ }
+
+ /* default case: calculate into a register and return that */
+ {
+ HReg r = iselIntExpr_R ( env, e );
+ return AMD64RI_Reg(r);
+ }
+}
+
+
+/* --------------------- RMs --------------------- */
+
+/* Similarly, calculate an expression into an AMD64RM operand. As
+ with iselIntExpr_R, the expression can have type 64, 32, 16 or 8
+ bits. */
+
+static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
+{
+ AMD64RM* rm = iselIntExpr_RM_wrk(env, e);
+ /* sanity checks ... */
+ switch (rm->tag) {
+ case Arm_Reg:
+ vassert(hregClass(rm->Arm.Reg.reg) == HRcInt64);
+ vassert(hregIsVirtual(rm->Arm.Reg.reg));
+ return rm;
+ case Arm_Mem:
+ vassert(sane_AMode(rm->Arm.Mem.am));
+ return rm;
+ default:
+ vpanic("iselIntExpr_RM: unknown amd64 RM tag");
+ }
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
+
+ /* special case: 64-bit GET */
+ if (e->tag == Iex_Get && ty == Ity_I64) {
+ return AMD64RM_Mem(AMD64AMode_IR(e->Iex.Get.offset,
+ hregAMD64_RBP()));
+ }
+
+ /* special case: load from memory */
+
+ /* default case: calculate into a register and return that */
+ {
+ HReg r = iselIntExpr_R ( env, e );
+ return AMD64RM_Reg(r);
+ }
+}
+
+
+/* --------------------- CONDCODE --------------------- */
+
+/* Generate code to evaluated a bit-typed expression, returning the
+ condition code which would correspond when the expression would
+ notionally have returned 1. */
+
+static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
+{
+ /* Uh, there's nothing we can sanity check here, unfortunately. */
+ return iselCondCode_wrk(env,e);
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
+{
+ MatchInfo mi;
+
+ vassert(e);
+ vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
+
+ /* var */
+ if (e->tag == Iex_RdTmp) {
+ HReg r64 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ HReg dst = newVRegI(env);
+ addInstr(env, mk_iMOVsd_RR(r64,dst));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(1),dst));
+ return Acc_NZ;
+ }
+
+ /* Constant 1:Bit */
+ if (e->tag == Iex_Const) {
+ HReg r;
+ vassert(e->Iex.Const.con->tag == Ico_U1);
+ vassert(e->Iex.Const.con->Ico.U1 == True
+ || e->Iex.Const.con->Ico.U1 == False);
+ r = newVRegI(env);
+ addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Imm(0),r));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,AMD64RMI_Reg(r),r));
+ return e->Iex.Const.con->Ico.U1 ? Acc_Z : Acc_NZ;
+ }
+
+ /* Not1(...) */
+ if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
+ /* Generate code for the arg, and negate the test condition */
+ return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
+ }
+
+ /* --- patterns rooted at: 64to1 --- */
+
+ /* 64to1 */
+ if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_64to1) {
+ HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, AMD64Instr_Test64(1,reg));
+ return Acc_NZ;
+ }
+
+ /* --- patterns rooted at: CmpNEZ8 --- */
+
+ /* CmpNEZ8(x) */
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.op == Iop_CmpNEZ8) {
+ HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, AMD64Instr_Test64(0xFF,r));
+ return Acc_NZ;
+ }
+
+ /* --- patterns rooted at: CmpNEZ16 --- */
+
+ /* CmpNEZ16(x) */
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.op == Iop_CmpNEZ16) {
+ HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, AMD64Instr_Test64(0xFFFF,r));
+ return Acc_NZ;
+ }
+
+ /* --- patterns rooted at: CmpNEZ32 --- */
+
+ /* CmpNEZ32(x) */
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.op == Iop_CmpNEZ32) {
+ HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
+ HReg tmp = newVRegI(env);
+ AMD64RMI* rmi2 = AMD64RMI_Imm(0);
+ addInstr(env, AMD64Instr_MovxLQ(False, r1, tmp));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,tmp));
+ return Acc_NZ;
+ }
+
+ /* --- patterns rooted at: CmpNEZ64 --- */
+
+ /* CmpNEZ64(Or64(x,y)) */
+ {
+ DECLARE_PATTERN(p_CmpNEZ64_Or64);
+ DEFINE_PATTERN(p_CmpNEZ64_Or64,
+ unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
+ if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
+ HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
+ AMD64RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
+ HReg tmp = newVRegI(env);
+ addInstr(env, mk_iMOVsd_RR(r0, tmp));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_OR,rmi1,tmp));
+ return Acc_NZ;
+ }
+ }
+
+ /* CmpNEZ64(x) */
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.op == Iop_CmpNEZ64) {
+ HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
+ AMD64RMI* rmi2 = AMD64RMI_Imm(0);
+ addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
+ return Acc_NZ;
+ }
+
+ /* --- patterns rooted at: Cmp{EQ,NE}{8,16,32} --- */
+
+ /* CmpEQ8 / CmpNE8 */
+ if (e->tag == Iex_Binop
+ && (e->Iex.Binop.op == Iop_CmpEQ8
+ || e->Iex.Binop.op == Iop_CmpNE8
+ || e->Iex.Binop.op == Iop_CasCmpEQ8
+ || e->Iex.Binop.op == Iop_CasCmpNE8)) {
+ HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
+ HReg r = newVRegI(env);
+ addInstr(env, mk_iMOVsd_RR(r1,r));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFF),r));
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z;
+ case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ;
+ default: vpanic("iselCondCode(amd64): CmpXX8");
+ }
+ }
+
+ /* CmpEQ16 / CmpNE16 */
+ if (e->tag == Iex_Binop
+ && (e->Iex.Binop.op == Iop_CmpEQ16
+ || e->Iex.Binop.op == Iop_CmpNE16
+ || e->Iex.Binop.op == Iop_CasCmpEQ16
+ || e->Iex.Binop.op == Iop_CasCmpNE16)) {
+ HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
+ HReg r = newVRegI(env);
+ addInstr(env, mk_iMOVsd_RR(r1,r));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFFFF),r));
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Acc_Z;
+ case Iop_CmpNE16: case Iop_CasCmpNE16: return Acc_NZ;
+ default: vpanic("iselCondCode(amd64): CmpXX16");
+ }
+ }
+
+ /* CmpEQ32 / CmpNE32 */
+ if (e->tag == Iex_Binop
+ && (e->Iex.Binop.op == Iop_CmpEQ32
+ || e->Iex.Binop.op == Iop_CmpNE32
+ || e->Iex.Binop.op == Iop_CasCmpEQ32
+ || e->Iex.Binop.op == Iop_CasCmpNE32)) {
+ HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
+ HReg r = newVRegI(env);
+ addInstr(env, mk_iMOVsd_RR(r1,r));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
+ addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, r));
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Acc_Z;
+ case Iop_CmpNE32: case Iop_CasCmpNE32: return Acc_NZ;
+ default: vpanic("iselCondCode(amd64): CmpXX32");
+ }
+ }
+
+ /* Cmp*64*(x,y) */
+ if (e->tag == Iex_Binop
+ && (e->Iex.Binop.op == Iop_CmpEQ64
+ || e->Iex.Binop.op == Iop_CmpNE64
+ || e->Iex.Binop.op == Iop_CmpLT64S
+ || e->Iex.Binop.op == Iop_CmpLT64U
+ || e->Iex.Binop.op == Iop_CmpLE64S
+ || e->Iex.Binop.op == Iop_CmpLE64U
+ || e->Iex.Binop.op == Iop_CasCmpEQ64
+ || e->Iex.Binop.op == Iop_CasCmpNE64)) {
+ HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
+ addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpEQ64: case Iop_CasCmpEQ64: return Acc_Z;
+ case Iop_CmpNE64: case Iop_CasCmpNE64: return Acc_NZ;
+ case Iop_CmpLT64S: return Acc_L;
+ case Iop_CmpLT64U: return Acc_B;
+ case Iop_CmpLE64S: return Acc_LE;
+ case Iop_CmpLE64U: return Acc_BE;
+ default: vpanic("iselCondCode(amd64): CmpXX64");
+ }
+ }
+
+ ppIRExpr(e);
+ vpanic("iselCondCode(amd64)");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Integer expressions (128 bit) ---*/
+/*---------------------------------------------------------*/
+
+/* Compute a 128-bit value into a register pair, which is returned as
+ the first two parameters. As with iselIntExpr_R, these may be
+ either real or virtual regs; in any case they must not be changed
+ by subsequent code emitted by the caller. */
+
+static void iselInt128Expr ( HReg* rHi, HReg* rLo,
+ ISelEnv* env, IRExpr* e )
+{
+ iselInt128Expr_wrk(rHi, rLo, env, e);
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(*rHi) == HRcInt64);
+ vassert(hregIsVirtual(*rHi));
+ vassert(hregClass(*rLo) == HRcInt64);
+ vassert(hregIsVirtual(*rLo));
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
+ ISelEnv* env, IRExpr* e )
+{
+//.. HWord fn = 0; /* helper fn for most SIMD64 stuff */
+ vassert(e);
+ vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
+
+//.. /* 64-bit literal */
+//.. if (e->tag == Iex_Const) {
+//.. ULong w64 = e->Iex.Const.con->Ico.U64;
+//.. UInt wHi = ((UInt)(w64 >> 32)) & 0xFFFFFFFF;
+//.. UInt wLo = ((UInt)w64) & 0xFFFFFFFF;
+//.. HReg tLo = newVRegI(env);
+//.. HReg tHi = newVRegI(env);
+//.. vassert(e->Iex.Const.con->tag == Ico_U64);
+//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
+//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
+//.. *rHi = tHi;
+//.. *rLo = tLo;
+//.. return;
+//.. }
+
+ /* read 128-bit IRTemp */
+ if (e->tag == Iex_RdTmp) {
+ lookupIRTemp128( rHi, rLo, env, e->Iex.RdTmp.tmp);
+ return;
+ }
+
+//.. /* 64-bit load */
+//.. if (e->tag == Iex_LDle) {
+//.. HReg tLo, tHi;
+//.. X86AMode *am0, *am4;
+//.. vassert(e->Iex.LDle.ty == Ity_I64);
+//.. tLo = newVRegI(env);
+//.. tHi = newVRegI(env);
+//.. am0 = iselIntExpr_AMode(env, e->Iex.LDle.addr);
+//.. am4 = advance4(am0);
+//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
+//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
+//.. *rHi = tHi;
+//.. *rLo = tLo;
+//.. return;
+//.. }
+//..
+//.. /* 64-bit GET */
+//.. if (e->tag == Iex_Get) {
+//.. X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
+//.. X86AMode* am4 = advance4(am);
+//.. HReg tLo = newVRegI(env);
+//.. HReg tHi = newVRegI(env);
+//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
+//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
+//.. *rHi = tHi;
+//.. *rLo = tLo;
+//.. return;
+//.. }
+//..
+//.. /* 64-bit GETI */
+//.. if (e->tag == Iex_GetI) {
+//.. X86AMode* am
+//.. = genGuestArrayOffset( env, e->Iex.GetI.descr,
+//.. e->Iex.GetI.ix, e->Iex.GetI.bias );
+//.. X86AMode* am4 = advance4(am);
+//.. HReg tLo = newVRegI(env);
+//.. HReg tHi = newVRegI(env);
+//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
+//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
+//.. *rHi = tHi;
+//.. *rLo = tLo;
+//.. return;
+//.. }
+//..
+//.. /* 64-bit Mux0X */
+//.. if (e->tag == Iex_Mux0X) {
+//.. HReg e0Lo, e0Hi, eXLo, eXHi, r8;
+//.. HReg tLo = newVRegI(env);
+//.. HReg tHi = newVRegI(env);
+//.. iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0);
+//.. iselInt64Expr(&eXHi, &eXLo, env, e->Iex.Mux0X.exprX);
+//.. addInstr(env, mk_iMOVsd_RR(eXHi, tHi));
+//.. addInstr(env, mk_iMOVsd_RR(eXLo, tLo));
+//.. r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
+//.. addInstr(env, X86Instr_Test32(X86RI_Imm(0xFF), X86RM_Reg(r8)));
+//.. /* This assumes the first cmov32 doesn't trash the condition
+//.. codes, so they are still available for the second cmov32 */
+//.. addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Hi),tHi));
+//.. addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Lo),tLo));
+//.. *rHi = tHi;
+//.. *rLo = tLo;
+//.. return;
+//.. }
+
+ /* --------- BINARY ops --------- */
+ if (e->tag == Iex_Binop) {
+ switch (e->Iex.Binop.op) {
+ /* 64 x 64 -> 128 multiply */
+ case Iop_MullU64:
+ case Iop_MullS64: {
+ /* get one operand into %rax, and the other into a R/M.
+ Need to make an educated guess about which is better in
+ which. */
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
+ AMD64RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
+ HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ addInstr(env, mk_iMOVsd_RR(rRight, hregAMD64_RAX()));
+ addInstr(env, AMD64Instr_MulL(syned, rmLeft));
+ /* Result is now in RDX:RAX. Tell the caller. */
+ addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
+ addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* 128 x 64 -> (64(rem),64(div)) division */
+ case Iop_DivModU128to64:
+ case Iop_DivModS128to64: {
+ /* Get the 128-bit operand into rdx:rax, and the other into
+ any old R/M. */
+ HReg sHi, sLo;
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS128to64);
+ AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
+ iselInt128Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
+ addInstr(env, mk_iMOVsd_RR(sHi, hregAMD64_RDX()));
+ addInstr(env, mk_iMOVsd_RR(sLo, hregAMD64_RAX()));
+ addInstr(env, AMD64Instr_Div(syned, 8, rmRight));
+ addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
+ addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* 64HLto128(e1,e2) */
+ case Iop_64HLto128:
+ *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ return;
+
+//.. /* Or64/And64/Xor64 */
+//.. case Iop_Or64:
+//.. case Iop_And64:
+//.. case Iop_Xor64: {
+//.. HReg xLo, xHi, yLo, yHi;
+//.. HReg tLo = newVRegI(env);
+//.. HReg tHi = newVRegI(env);
+//.. X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
+//.. : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
+//.. : Xalu_XOR;
+//.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
+//.. addInstr(env, mk_iMOVsd_RR(xHi, tHi));
+//.. addInstr(env, mk_iMOVsd_RR(xLo, tLo));
+//.. iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
+//.. addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
+//.. addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
+//.. *rHi = tHi;
+//.. *rLo = tLo;
+//.. return;
+//.. }
+//..
+//.. /* Add64/Sub64 */
+//.. case Iop_Add64:
+//.. case Iop_Sub64: {
+//.. HReg xLo, xHi, yLo, yHi;
+//.. HReg tLo = newVRegI(env);
+//.. HReg tHi = newVRegI(env);
+//.. iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
+//.. addInstr(env, mk_iMOVsd_RR(xHi, tHi));
+//.. addInstr(env, mk_iMOVsd_RR(xLo, tLo));
+//.. iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
+//.. if (e->Iex.Binop.op==Iop_Add64) {
+//.. addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
+//.. addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
+//.. } else {
+//.. addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
+//.. addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
+//.. }
+//.. *rHi = tHi;
+//.. *rLo = tLo;
+//.. return;
+//.. }
+//..
+//.. /* 32HLto64(e1,e2) */
+//.. case Iop_32HLto64:
+//.. *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
+//.. *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
+//.. return;
+//..
+//.. /* 64-bit shifts */
+//.. case Iop_Shl64: {
+//.. /* We use the same ingenious scheme as gcc. Put the value
+//.. to be shifted into %hi:%lo, and the shift amount into
+//.. %cl. Then (dsts on right, a la ATT syntax):
+//..
+//.. shldl %cl, %lo, %hi -- make %hi be right for the
+//.. -- shift amt %cl % 32
+//.. shll %cl, %lo -- make %lo be right for the
+//.. -- shift amt %cl % 32
+//..
+//.. Now, if (shift amount % 64) is in the range 32 .. 63,
+//.. we have to do a fixup, which puts the result low half
+//.. into the result high half, and zeroes the low half:
+//..
+//.. testl $32, %ecx
+//..
+//.. cmovnz %lo, %hi
+//.. movl $0, %tmp -- sigh; need yet another reg
+//.. cmovnz %tmp, %lo
+//.. */
+//.. HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
+//.. tLo = newVRegI(env);
+//.. tHi = newVRegI(env);
+//.. tTemp = newVRegI(env);
+//.. rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
+//.. iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
+//.. addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
+//.. addInstr(env, mk_iMOVsd_RR(sHi, tHi));
+//.. addInstr(env, mk_iMOVsd_RR(sLo, tLo));
+//.. /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
+//.. and those regs are legitimately modifiable. */
+//.. addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
+//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, X86RM_Reg(tLo)));
+//.. addInstr(env, X86Instr_Test32(X86RI_Imm(32),
+//.. X86RM_Reg(hregX86_ECX())));
+//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
+//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
+//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
+//.. *rHi = tHi;
+//.. *rLo = tLo;
+//.. return;
+//.. }
+//..
+//.. case Iop_Shr64: {
+//.. /* We use the same ingenious scheme as gcc. Put the value
+//.. to be shifted into %hi:%lo, and the shift amount into
+//.. %cl. Then:
+//..
+//.. shrdl %cl, %hi, %lo -- make %lo be right for the
+//.. -- shift amt %cl % 32
+//.. shrl %cl, %hi -- make %hi be right for the
+//.. -- shift amt %cl % 32
+//..
+//.. Now, if (shift amount % 64) is in the range 32 .. 63,
+//.. we have to do a fixup, which puts the result high half
+//.. into the result low half, and zeroes the high half:
+//..
+//.. testl $32, %ecx
+//..
+//.. cmovnz %hi, %lo
+//.. movl $0, %tmp -- sigh; need yet another reg
+//.. cmovnz %tmp, %hi
+//.. */
+//.. HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
+//.. tLo = newVRegI(env);
+//.. tHi = newVRegI(env);
+//.. tTemp = newVRegI(env);
+//.. rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
+//.. iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
+//.. addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
+//.. addInstr(env, mk_iMOVsd_RR(sHi, tHi));
+//.. addInstr(env, mk_iMOVsd_RR(sLo, tLo));
+//.. /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
+//.. and those regs are legitimately modifiable. */
+//.. addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
+//.. addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, X86RM_Reg(tHi)));
+//.. addInstr(env, X86Instr_Test32(X86RI_Imm(32),
+//.. X86RM_Reg(hregX86_ECX())));
+//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
+//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
+//.. addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
+//.. *rHi = tHi;
+//.. *rLo = tLo;
+//.. return;
+//.. }
+//..
+//.. /* F64 -> I64 */
+//.. /* Sigh, this is an almost exact copy of the F64 -> I32/I16
+//.. case. Unfortunately I see no easy way to avoid the
+//.. duplication. */
+//.. case Iop_F64toI64: {
+//.. HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
+//.. HReg tLo = newVRegI(env);
+//.. HReg tHi = newVRegI(env);
+//..
+//.. /* Used several times ... */
+//.. /* Careful ... this sharing is only safe because
+//.. zero_esp/four_esp do not hold any registers which the
+//.. register allocator could attempt to swizzle later. */
+//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
+//.. X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
+//..
+//.. /* rf now holds the value to be converted, and rrm holds
+//.. the rounding mode value, encoded as per the
+//.. IRRoundingMode enum. The first thing to do is set the
+//.. FPU's rounding mode accordingly. */
+//..
+//.. /* Create a space for the format conversion. */
+//.. /* subl $8, %esp */
+//.. sub_from_esp(env, 8);
+//..
+//.. /* Set host rounding mode */
+//.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+//..
+//.. /* gistll %rf, 0(%esp) */
+//.. addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
+//..
+//.. /* movl 0(%esp), %dstLo */
+//.. /* movl 4(%esp), %dstHi */
+//.. addInstr(env, X86Instr_Alu32R(
+//.. Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
+//.. addInstr(env, X86Instr_Alu32R(
+//.. Xalu_MOV, X86RMI_Mem(four_esp), tHi));
+//..
+//.. /* Restore default FPU rounding. */
+//.. set_FPU_rounding_default( env );
+//..
+//.. /* addl $8, %esp */
+//.. add_to_esp(env, 8);
+//..
+//.. *rHi = tHi;
+//.. *rLo = tLo;
+//.. return;
+//.. }
+//..
+ default:
+ break;
+ }
+ } /* if (e->tag == Iex_Binop) */
+
+
+//.. /* --------- UNARY ops --------- */
+//.. if (e->tag == Iex_Unop) {
+//.. switch (e->Iex.Unop.op) {
+//..
+//.. /* 32Sto64(e) */
+//.. case Iop_32Sto64: {
+//.. HReg tLo = newVRegI(env);
+//.. HReg tHi = newVRegI(env);
+//.. HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+//.. addInstr(env, mk_iMOVsd_RR(src,tHi));
+//.. addInstr(env, mk_iMOVsd_RR(src,tLo));
+//.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, X86RM_Reg(tHi)));
+//.. *rHi = tHi;
+//.. *rLo = tLo;
+//.. return;
+//.. }
+//..
+//.. /* 32Uto64(e) */
+//.. case Iop_32Uto64: {
+//.. HReg tLo = newVRegI(env);
+//.. HReg tHi = newVRegI(env);
+//.. HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+//.. addInstr(env, mk_iMOVsd_RR(src,tLo));
+//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
+//.. *rHi = tHi;
+//.. *rLo = tLo;
+//.. return;
+//.. }
+
+//.. /* could do better than this, but for now ... */
+//.. case Iop_1Sto64: {
+//.. HReg tLo = newVRegI(env);
+//.. HReg tHi = newVRegI(env);
+//.. X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
+//.. addInstr(env, X86Instr_Set32(cond,tLo));
+//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, X86RM_Reg(tLo)));
+//.. addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, X86RM_Reg(tLo)));
+//.. addInstr(env, mk_iMOVsd_RR(tLo, tHi));
+//.. *rHi = tHi;
+//.. *rLo = tLo;
+//.. return;
+//.. }
+//..
+//.. /* Not64(e) */
+//.. case Iop_Not64: {
+//.. HReg tLo = newVRegI(env);
+//.. HReg tHi = newVRegI(env);
+//.. HReg sHi, sLo;
+//.. iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
+//.. addInstr(env, mk_iMOVsd_RR(sHi, tHi));
+//.. addInstr(env, mk_iMOVsd_RR(sLo, tLo));
+//.. addInstr(env, X86Instr_Unary32(Xun_NOT,X86RM_Reg(tHi)));
+//.. addInstr(env, X86Instr_Unary32(Xun_NOT,X86RM_Reg(tLo)));
+//.. *rHi = tHi;
+//.. *rLo = tLo;
+//.. return;
+//.. }
+//..
+//.. default:
+//.. break;
+//.. }
+//.. } /* if (e->tag == Iex_Unop) */
+//..
+//..
+//.. /* --------- CCALL --------- */
+//.. if (e->tag == Iex_CCall) {
+//.. HReg tLo = newVRegI(env);
+//.. HReg tHi = newVRegI(env);
+//..
+//.. /* Marshal args, do the call, clear stack. */
+//.. doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
+//..
+//.. addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
+//.. addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
+//.. *rHi = tHi;
+//.. *rLo = tLo;
+//.. return;
+//.. }
+
+ ppIRExpr(e);
+ vpanic("iselInt128Expr");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Floating point expressions (32 bit) ---*/
+/*---------------------------------------------------------*/
+
+/* Nothing interesting here; really just wrappers for
+ 64-bit stuff. */
+
+static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
+{
+ HReg r = iselFltExpr_wrk( env, e );
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(r) == HRcVec128);
+ vassert(hregIsVirtual(r));
+ return r;
+}
+
+/* DO NOT CALL THIS DIRECTLY */
+static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_F32);
+
+ if (e->tag == Iex_RdTmp) {
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ }
+
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
+ AMD64AMode* am;
+ HReg res = newVRegV(env);
+ vassert(e->Iex.Load.ty == Ity_F32);
+ am = iselIntExpr_AMode(env, e->Iex.Load.addr);
+ addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, res, am));
+ return res;
+ }
+
+ if (e->tag == Iex_Binop
+ && e->Iex.Binop.op == Iop_F64toF32) {
+ /* Although the result is still held in a standard SSE register,
+ we need to round it to reflect the loss of accuracy/range
+ entailed in casting it to a 32-bit float. */
+ HReg dst = newVRegV(env);
+ HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
+ set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
+ addInstr(env, AMD64Instr_SseSDSS(True/*D->S*/,src,dst));
+ set_SSE_rounding_default( env );
+ return dst;
+ }
+
+ if (e->tag == Iex_Get) {
+ AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
+ hregAMD64_RBP() );
+ HReg res = newVRegV(env);
+ addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, res, am ));
+ return res;
+ }
+
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
+ /* Given an I32, produce an IEEE754 float with the same bit
+ pattern. */
+ HReg dst = newVRegV(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ AMD64AMode* m4_rsp = AMD64AMode_IR(-4, hregAMD64_RSP());
+ addInstr(env, AMD64Instr_Store(4, src, m4_rsp));
+ addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, dst, m4_rsp ));
+ return dst;
+ }
+
+ if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
+ AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
+ HReg arg = iselFltExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+
+ /* rf now holds the value to be rounded. The first thing to do
+ is set the FPU's rounding mode accordingly. */
+
+ /* Set host x87 rounding mode */
+ set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, arg, m8_rsp));
+ addInstr(env, AMD64Instr_A87Free(1));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 4));
+ addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 4));
+ addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, dst, m8_rsp));
+
+ /* Restore default x87 rounding. */
+ set_FPU_rounding_default( env );
+
+ return dst;
+ }
+
+ ppIRExpr(e);
+ vpanic("iselFltExpr_wrk");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Floating point expressions (64 bit) ---*/
+/*---------------------------------------------------------*/
+
+/* Compute a 64-bit floating point value into the lower half of an xmm
+ register, the identity of which is returned. As with
+ iselIntExpr_R, the returned reg will be virtual, and it must not be
+ changed by subsequent code emitted by the caller.
+*/
+
+/* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
+
+ Type S (1 bit) E (11 bits) F (52 bits)
+ ---- --------- ----------- -----------
+ signalling NaN u 2047 (max) .0uuuuu---u
+ (with at least
+ one 1 bit)
+ quiet NaN u 2047 (max) .1uuuuu---u
+
+ negative infinity 1 2047 (max) .000000---0
+
+ positive infinity 0 2047 (max) .000000---0
+
+ negative zero 1 0 .000000---0
+
+ positive zero 0 0 .000000---0
+*/
+
+static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
+{
+ HReg r = iselDblExpr_wrk( env, e );
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(r) == HRcVec128);
+ vassert(hregIsVirtual(r));
+ return r;
+}
+
+/* DO NOT CALL THIS DIRECTLY */
+static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(e);
+ vassert(ty == Ity_F64);
+
+ if (e->tag == Iex_RdTmp) {
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ }
+
+ if (e->tag == Iex_Const) {
+ union { ULong u64; Double f64; } u;
+ HReg res = newVRegV(env);
+ HReg tmp = newVRegI(env);
+ vassert(sizeof(u) == 8);
+ vassert(sizeof(u.u64) == 8);
+ vassert(sizeof(u.f64) == 8);
+
+ if (e->Iex.Const.con->tag == Ico_F64) {
+ u.f64 = e->Iex.Const.con->Ico.F64;
+ }
+ else if (e->Iex.Const.con->tag == Ico_F64i) {
+ u.u64 = e->Iex.Const.con->Ico.F64i;
+ }
+ else
+ vpanic("iselDblExpr(amd64): const");
+
+ addInstr(env, AMD64Instr_Imm64(u.u64, tmp));
+ addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(tmp)));
+ addInstr(env, AMD64Instr_SseLdSt(
+ True/*load*/, 8, res,
+ AMD64AMode_IR(0, hregAMD64_RSP())
+ ));
+ add_to_rsp(env, 8);
+ return res;
+ }
+
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
+ AMD64AMode* am;
+ HReg res = newVRegV(env);
+ vassert(e->Iex.Load.ty == Ity_F64);
+ am = iselIntExpr_AMode(env, e->Iex.Load.addr);
+ addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
+ return res;
+ }
+
+ if (e->tag == Iex_Get) {
+ AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
+ hregAMD64_RBP() );
+ HReg res = newVRegV(env);
+ addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
+ return res;
+ }
+
+ if (e->tag == Iex_GetI) {
+ AMD64AMode* am
+ = genGuestArrayOffset(
+ env, e->Iex.GetI.descr,
+ e->Iex.GetI.ix, e->Iex.GetI.bias );
+ HReg res = newVRegV(env);
+ addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
+ return res;
+ }
+
+ if (e->tag == Iex_Triop) {
+ AMD64SseOp op = Asse_INVALID;
+ switch (e->Iex.Triop.op) {
+ case Iop_AddF64: op = Asse_ADDF; break;
+ case Iop_SubF64: op = Asse_SUBF; break;
+ case Iop_MulF64: op = Asse_MULF; break;
+ case Iop_DivF64: op = Asse_DIVF; break;
+ default: break;
+ }
+ if (op != Asse_INVALID) {
+ HReg dst = newVRegV(env);
+ HReg argL = iselDblExpr(env, e->Iex.Triop.arg2);
+ HReg argR = iselDblExpr(env, e->Iex.Triop.arg3);
+ addInstr(env, mk_vMOVsd_RR(argL, dst));
+ /* XXXROUNDINGFIXME */
+ /* set roundingmode here */
+ addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
+ return dst;
+ }
+ }
+
+ if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
+ AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
+ HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+
+ /* rf now holds the value to be rounded. The first thing to do
+ is set the FPU's rounding mode accordingly. */
+
+ /* Set host x87 rounding mode */
+ set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
+ addInstr(env, AMD64Instr_A87Free(1));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
+ addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
+ addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
+
+ /* Restore default x87 rounding. */
+ set_FPU_rounding_default( env );
+
+ return dst;
+ }
+
+ if (e->tag == Iex_Triop
+ && (e->Iex.Triop.op == Iop_ScaleF64
+ || e->Iex.Triop.op == Iop_AtanF64
+ || e->Iex.Triop.op == Iop_Yl2xF64
+ || e->Iex.Triop.op == Iop_Yl2xp1F64
+ || e->Iex.Triop.op == Iop_PRemF64
+ || e->Iex.Triop.op == Iop_PRem1F64)
+ ) {
+ AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
+ HReg arg1 = iselDblExpr(env, e->Iex.Triop.arg2);
+ HReg arg2 = iselDblExpr(env, e->Iex.Triop.arg3);
+ HReg dst = newVRegV(env);
+ Bool arg2first = toBool(e->Iex.Triop.op == Iop_ScaleF64
+ || e->Iex.Triop.op == Iop_PRemF64
+ || e->Iex.Triop.op == Iop_PRem1F64);
+ addInstr(env, AMD64Instr_A87Free(2));
+
+ /* one arg -> top of x87 stack */
+ addInstr(env, AMD64Instr_SseLdSt(
+ False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
+
+ /* other arg -> top of x87 stack */
+ addInstr(env, AMD64Instr_SseLdSt(
+ False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
+
+ /* do it */
+ /* XXXROUNDINGFIXME */
+ /* set roundingmode here */
+ switch (e->Iex.Triop.op) {
+ case Iop_ScaleF64:
+ addInstr(env, AMD64Instr_A87FpOp(Afp_SCALE));
+ break;
+ case Iop_AtanF64:
+ addInstr(env, AMD64Instr_A87FpOp(Afp_ATAN));
+ break;
+ case Iop_Yl2xF64:
+ addInstr(env, AMD64Instr_A87FpOp(Afp_YL2X));
+ break;
+ case Iop_Yl2xp1F64:
+ addInstr(env, AMD64Instr_A87FpOp(Afp_YL2XP1));
+ break;
+ case Iop_PRemF64:
+ addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
+ break;
+ case Iop_PRem1F64:
+ addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
+ break;
+ default:
+ vassert(0);
+ }
+
+ /* save result */
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
+ addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
+ return dst;
+ }
+
+ if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
+ HReg dst = newVRegV(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
+ addInstr(env, AMD64Instr_SseSI2SF( 8, 8, src, dst ));
+ set_SSE_rounding_default( env );
+ return dst;
+ }
+
+ if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_I32StoF64) {
+ HReg dst = newVRegV(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ set_SSE_rounding_default( env );
+ addInstr(env, AMD64Instr_SseSI2SF( 4, 8, src, dst ));
+ return dst;
+ }
+
+ if (e->tag == Iex_Unop
+ && (e->Iex.Unop.op == Iop_NegF64
+ || e->Iex.Unop.op == Iop_AbsF64)) {
+ /* Sigh ... very rough code. Could do much better. */
+ /* Get the 128-bit literal 00---0 10---0 into a register
+ and xor/nand it with the value to be negated. */
+ HReg r1 = newVRegI(env);
+ HReg dst = newVRegV(env);
+ HReg tmp = newVRegV(env);
+ HReg src = iselDblExpr(env, e->Iex.Unop.arg);
+ AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
+ addInstr(env, mk_vMOVsd_RR(src,tmp));
+ addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
+ addInstr(env, AMD64Instr_Imm64( 1ULL<<63, r1 ));
+ addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1)));
+ addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0));
+
+ if (e->Iex.Unop.op == Iop_NegF64)
+ addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst));
+ else
+ addInstr(env, AMD64Instr_SseReRg(Asse_ANDN, tmp, dst));
+
+ add_to_rsp(env, 16);
+ return dst;
+ }
+
+ if (e->tag == Iex_Binop) {
+ A87FpOp fpop = Afp_INVALID;
+ switch (e->Iex.Binop.op) {
+ case Iop_SqrtF64: fpop = Afp_SQRT; break;
+ case Iop_SinF64: fpop = Afp_SIN; break;
+ case Iop_CosF64: fpop = Afp_COS; break;
+ case Iop_TanF64: fpop = Afp_TAN; break;
+ case Iop_2xm1F64: fpop = Afp_2XM1; break;
+ default: break;
+ }
+ if (fpop != Afp_INVALID) {
+ AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
+ HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+ Int nNeeded = e->Iex.Binop.op==Iop_TanF64 ? 2 : 1;
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
+ addInstr(env, AMD64Instr_A87Free(nNeeded));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
+ /* XXXROUNDINGFIXME */
+ /* set roundingmode here */
+ addInstr(env, AMD64Instr_A87FpOp(fpop));
+ if (e->Iex.Binop.op==Iop_TanF64) {
+ /* get rid of the extra 1.0 that fptan pushes */
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
+ }
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
+ addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
+ return dst;
+ }
+ }
+
+ if (e->tag == Iex_Unop) {
+ switch (e->Iex.Unop.op) {
+//.. case Iop_I32toF64: {
+//.. HReg dst = newVRegF(env);
+//.. HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
+//.. addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
+//.. set_FPU_rounding_default(env);
+//.. addInstr(env, X86Instr_FpLdStI(
+//.. True/*load*/, 4, dst,
+//.. X86AMode_IR(0, hregX86_ESP())));
+//.. add_to_esp(env, 4);
+//.. return dst;
+//.. }
+ case Iop_ReinterpI64asF64: {
+ /* Given an I64, produce an IEEE754 double with the same
+ bit pattern. */
+ AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
+ HReg dst = newVRegV(env);
+ AMD64RI* src = iselIntExpr_RI(env, e->Iex.Unop.arg);
+ /* paranoia */
+ set_SSE_rounding_default(env);
+ addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, src, m8_rsp));
+ addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
+ return dst;
+ }
+ case Iop_F32toF64: {
+ HReg f32;
+ HReg f64 = newVRegV(env);
+ /* this shouldn't be necessary, but be paranoid ... */
+ set_SSE_rounding_default(env);
+ f32 = iselFltExpr(env, e->Iex.Unop.arg);
+ addInstr(env, AMD64Instr_SseSDSS(False/*S->D*/, f32, f64));
+ return f64;
+ }
+ default:
+ break;
+ }
+ }
+
+ /* --------- MULTIPLEX --------- */
+ if (e->tag == Iex_Mux0X) {
+ HReg r8, rX, r0, dst;
+ vassert(ty == Ity_F64);
+ vassert(typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8);
+ r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
+ rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
+ r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
+ dst = newVRegV(env);
+ addInstr(env, mk_vMOVsd_RR(rX,dst));
+ addInstr(env, AMD64Instr_Test64(0xFF, r8));
+ addInstr(env, AMD64Instr_SseCMov(Acc_Z,r0,dst));
+ return dst;
+ }
+
+ ppIRExpr(e);
+ vpanic("iselDblExpr_wrk");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
+/*---------------------------------------------------------*/
+
+static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
+{
+ HReg r = iselVecExpr_wrk( env, e );
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(r) == HRcVec128);
+ vassert(hregIsVirtual(r));
+ return r;
+}
+
+
+/* DO NOT CALL THIS DIRECTLY */
+static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
+{
+ HWord fn = 0; /* address of helper fn, if required */
+ Bool arg1isEReg = False;
+ AMD64SseOp op = Asse_INVALID;
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(e);
+ vassert(ty == Ity_V128);
+
+ if (e->tag == Iex_RdTmp) {
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ }
+
+ if (e->tag == Iex_Get) {
+ HReg dst = newVRegV(env);
+ addInstr(env, AMD64Instr_SseLdSt(
+ True/*load*/,
+ 16,
+ dst,
+ AMD64AMode_IR(e->Iex.Get.offset, hregAMD64_RBP())
+ )
+ );
+ return dst;
+ }
+
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
+ HReg dst = newVRegV(env);
+ AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
+ addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
+ return dst;
+ }
+
+ if (e->tag == Iex_Const) {
+ HReg dst = newVRegV(env);
+ vassert(e->Iex.Const.con->tag == Ico_V128);
+ switch (e->Iex.Const.con->Ico.V128) {
+ case 0x0000:
+ dst = generate_zeroes_V128(env);
+ break;
+ case 0xFFFF:
+ dst = generate_ones_V128(env);
+ break;
+ default: {
+ AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
+ /* do push_uimm64 twice, first time for the high-order half. */
+ push_uimm64(env, bitmask8_to_bytemask64(
+ (e->Iex.Const.con->Ico.V128 >> 8) & 0xFF
+ ));
+ push_uimm64(env, bitmask8_to_bytemask64(
+ (e->Iex.Const.con->Ico.V128 >> 0) & 0xFF
+ ));
+ addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 ));
+ add_to_rsp(env, 16);
+ break;
+ }
+ }
+ return dst;
+ }
+
+ if (e->tag == Iex_Unop) {
+ switch (e->Iex.Unop.op) {
+
+ case Iop_NotV128: {
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ return do_sse_NotV128(env, arg);
+ }
+
+ case Iop_CmpNEZ64x2: {
+ /* We can use SSE2 instructions for this. */
+ /* Ideally, we want to do a 64Ix2 comparison against zero of
+ the operand. Problem is no such insn exists. Solution
+ therefore is to do a 32Ix4 comparison instead, and bitwise-
+ negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
+ let the not'd result of this initial comparison be a:b:c:d.
+ What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
+ pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
+ giving the required result.
+
+ The required selection sequence is 2,3,0,1, which
+ according to Intel's documentation means the pshufd
+ literal value is 0xB1, that is,
+ (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
+ */
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg tmp = generate_zeroes_V128(env);
+ HReg dst = newVRegV(env);
+ addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, arg, tmp));
+ tmp = do_sse_NotV128(env, tmp);
+ addInstr(env, AMD64Instr_SseShuf(0xB1, tmp, dst));
+ addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmp, dst));
+ return dst;
+ }
+
+ case Iop_CmpNEZ32x4: op = Asse_CMPEQ32; goto do_CmpNEZ_vector;
+ case Iop_CmpNEZ16x8: op = Asse_CMPEQ16; goto do_CmpNEZ_vector;
+ case Iop_CmpNEZ8x16: op = Asse_CMPEQ8; goto do_CmpNEZ_vector;
+ do_CmpNEZ_vector:
+ {
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg tmp = newVRegV(env);
+ HReg zero = generate_zeroes_V128(env);
+ HReg dst;
+ addInstr(env, mk_vMOVsd_RR(arg, tmp));
+ addInstr(env, AMD64Instr_SseReRg(op, zero, tmp));
+ dst = do_sse_NotV128(env, tmp);
+ return dst;
+ }
+
+ case Iop_Recip32Fx4: op = Asse_RCPF; goto do_32Fx4_unary;
+ case Iop_RSqrt32Fx4: op = Asse_RSQRTF; goto do_32Fx4_unary;
+ case Iop_Sqrt32Fx4: op = Asse_SQRTF; goto do_32Fx4_unary;
+ do_32Fx4_unary:
+ {
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegV(env);
+ addInstr(env, AMD64Instr_Sse32Fx4(op, arg, dst));
+ return dst;
+ }
+
+//.. case Iop_Recip64Fx2: op = Xsse_RCPF; goto do_64Fx2_unary;
+//.. case Iop_RSqrt64Fx2: op = Asse_RSQRTF; goto do_64Fx2_unary;
+ case Iop_Sqrt64Fx2: op = Asse_SQRTF; goto do_64Fx2_unary;
+ do_64Fx2_unary:
+ {
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegV(env);
+ addInstr(env, AMD64Instr_Sse64Fx2(op, arg, dst));
+ return dst;
+ }
+
+ case Iop_Recip32F0x4: op = Asse_RCPF; goto do_32F0x4_unary;
+ case Iop_RSqrt32F0x4: op = Asse_RSQRTF; goto do_32F0x4_unary;
+ case Iop_Sqrt32F0x4: op = Asse_SQRTF; goto do_32F0x4_unary;
+ do_32F0x4_unary:
+ {
+ /* A bit subtle. We have to copy the arg to the result
+ register first, because actually doing the SSE scalar insn
+ leaves the upper 3/4 of the destination register
+ unchanged. Whereas the required semantics of these
+ primops is that the upper 3/4 is simply copied in from the
+ argument. */
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegV(env);
+ addInstr(env, mk_vMOVsd_RR(arg, dst));
+ addInstr(env, AMD64Instr_Sse32FLo(op, arg, dst));
+ return dst;
+ }
+
+//.. case Iop_Recip64F0x2: op = Xsse_RCPF; goto do_64F0x2_unary;
+//.. case Iop_RSqrt64F0x2: op = Xsse_RSQRTF; goto do_64F0x2_unary;
+ case Iop_Sqrt64F0x2: op = Asse_SQRTF; goto do_64F0x2_unary;
+ do_64F0x2_unary:
+ {
+ /* A bit subtle. We have to copy the arg to the result
+ register first, because actually doing the SSE scalar insn
+ leaves the upper half of the destination register
+ unchanged. Whereas the required semantics of these
+ primops is that the upper half is simply copied in from the
+ argument. */
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegV(env);
+ addInstr(env, mk_vMOVsd_RR(arg, dst));
+ addInstr(env, AMD64Instr_Sse64FLo(op, arg, dst));
+ return dst;
+ }
+
+ case Iop_32UtoV128: {
+ HReg dst = newVRegV(env);
+ AMD64AMode* rsp_m32 = AMD64AMode_IR(-32, hregAMD64_RSP());
+ AMD64RI* ri = iselIntExpr_RI(env, e->Iex.Unop.arg);
+ addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, ri, rsp_m32));
+ addInstr(env, AMD64Instr_SseLdzLO(4, dst, rsp_m32));
+ return dst;
+ }
+
+ case Iop_64UtoV128: {
+ HReg dst = newVRegV(env);
+ AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
+ AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
+ addInstr(env, AMD64Instr_Push(rmi));
+ addInstr(env, AMD64Instr_SseLdzLO(8, dst, rsp0));
+ add_to_rsp(env, 8);
+ return dst;
+ }
+
+ default:
+ break;
+ } /* switch (e->Iex.Unop.op) */
+ } /* if (e->tag == Iex_Unop) */
+
+ if (e->tag == Iex_Binop) {
+ switch (e->Iex.Binop.op) {
+
+ case Iop_SetV128lo64: {
+ HReg dst = newVRegV(env);
+ HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
+ addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, AMD64RI_Reg(srcI), rsp_m16));
+ addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
+ return dst;
+ }
+
+ case Iop_SetV128lo32: {
+ HReg dst = newVRegV(env);
+ HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
+ addInstr(env, AMD64Instr_Store(4, srcI, rsp_m16));
+ addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
+ return dst;
+ }
+
+ case Iop_64HLtoV128: {
+ AMD64AMode* rsp = AMD64AMode_IR(0, hregAMD64_RSP());
+ HReg dst = newVRegV(env);
+ /* do this via the stack (easy, convenient, etc) */
+ addInstr(env, AMD64Instr_Push(iselIntExpr_RMI(env, e->Iex.Binop.arg1)));
+ addInstr(env, AMD64Instr_Push(iselIntExpr_RMI(env, e->Iex.Binop.arg2)));
+ addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp));
+ add_to_rsp(env, 16);
+ return dst;
+ }
+
+ case Iop_CmpEQ32Fx4: op = Asse_CMPEQF; goto do_32Fx4;
+ case Iop_CmpLT32Fx4: op = Asse_CMPLTF; goto do_32Fx4;
+ case Iop_CmpLE32Fx4: op = Asse_CMPLEF; goto do_32Fx4;
+ case Iop_CmpUN32Fx4: op = Asse_CMPUNF; goto do_32Fx4;
+ case Iop_Add32Fx4: op = Asse_ADDF; goto do_32Fx4;
+ case Iop_Div32Fx4: op = Asse_DIVF; goto do_32Fx4;
+ case Iop_Max32Fx4: op = Asse_MAXF; goto do_32Fx4;
+ case Iop_Min32Fx4: op = Asse_MINF; goto do_32Fx4;
+ case Iop_Mul32Fx4: op = Asse_MULF; goto do_32Fx4;
+ case Iop_Sub32Fx4: op = Asse_SUBF; goto do_32Fx4;
+ do_32Fx4:
+ {
+ HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+ addInstr(env, mk_vMOVsd_RR(argL, dst));
+ addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst));
+ return dst;
+ }
+
+ case Iop_CmpEQ64Fx2: op = Asse_CMPEQF; goto do_64Fx2;
+ case Iop_CmpLT64Fx2: op = Asse_CMPLTF; goto do_64Fx2;
+ case Iop_CmpLE64Fx2: op = Asse_CMPLEF; goto do_64Fx2;
+ case Iop_CmpUN64Fx2: op = Asse_CMPUNF; goto do_64Fx2;
+ case Iop_Add64Fx2: op = Asse_ADDF; goto do_64Fx2;
+ case Iop_Div64Fx2: op = Asse_DIVF; goto do_64Fx2;
+ case Iop_Max64Fx2: op = Asse_MAXF; goto do_64Fx2;
+ case Iop_Min64Fx2: op = Asse_MINF; goto do_64Fx2;
+ case Iop_Mul64Fx2: op = Asse_MULF; goto do_64Fx2;
+ case Iop_Sub64Fx2: op = Asse_SUBF; goto do_64Fx2;
+ do_64Fx2:
+ {
+ HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+ addInstr(env, mk_vMOVsd_RR(argL, dst));
+ addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst));
+ return dst;
+ }
+
+ case Iop_CmpEQ32F0x4: op = Asse_CMPEQF; goto do_32F0x4;
+ case Iop_CmpLT32F0x4: op = Asse_CMPLTF; goto do_32F0x4;
+ case Iop_CmpLE32F0x4: op = Asse_CMPLEF; goto do_32F0x4;
+ case Iop_CmpUN32F0x4: op = Asse_CMPUNF; goto do_32F0x4;
+ case Iop_Add32F0x4: op = Asse_ADDF; goto do_32F0x4;
+ case Iop_Div32F0x4: op = Asse_DIVF; goto do_32F0x4;
+ case Iop_Max32F0x4: op = Asse_MAXF; goto do_32F0x4;
+ case Iop_Min32F0x4: op = Asse_MINF; goto do_32F0x4;
+ case Iop_Mul32F0x4: op = Asse_MULF; goto do_32F0x4;
+ case Iop_Sub32F0x4: op = Asse_SUBF; goto do_32F0x4;
+ do_32F0x4: {
+ HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+ addInstr(env, mk_vMOVsd_RR(argL, dst));
+ addInstr(env, AMD64Instr_Sse32FLo(op, argR, dst));
+ return dst;
+ }
+
+ case Iop_CmpEQ64F0x2: op = Asse_CMPEQF; goto do_64F0x2;
+ case Iop_CmpLT64F0x2: op = Asse_CMPLTF; goto do_64F0x2;
+ case Iop_CmpLE64F0x2: op = Asse_CMPLEF; goto do_64F0x2;
+ case Iop_CmpUN64F0x2: op = Asse_CMPUNF; goto do_64F0x2;
+ case Iop_Add64F0x2: op = Asse_ADDF; goto do_64F0x2;
+ case Iop_Div64F0x2: op = Asse_DIVF; goto do_64F0x2;
+ case Iop_Max64F0x2: op = Asse_MAXF; goto do_64F0x2;
+ case Iop_Min64F0x2: op = Asse_MINF; goto do_64F0x2;
+ case Iop_Mul64F0x2: op = Asse_MULF; goto do_64F0x2;
+ case Iop_Sub64F0x2: op = Asse_SUBF; goto do_64F0x2;
+ do_64F0x2: {
+ HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+ addInstr(env, mk_vMOVsd_RR(argL, dst));
+ addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
+ return dst;
+ }
+
+ case Iop_QNarrow32Sx4:
+ op = Asse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
+ case Iop_QNarrow16Sx8:
+ op = Asse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
+ case Iop_QNarrow16Ux8:
+ op = Asse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
+
+ case Iop_InterleaveHI8x16:
+ op = Asse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
+ case Iop_InterleaveHI16x8:
+ op = Asse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
+ case Iop_InterleaveHI32x4:
+ op = Asse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
+ case Iop_InterleaveHI64x2:
+ op = Asse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
+
+ case Iop_InterleaveLO8x16:
+ op = Asse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
+ case Iop_InterleaveLO16x8:
+ op = Asse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
+ case Iop_InterleaveLO32x4:
+ op = Asse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
+ case Iop_InterleaveLO64x2:
+ op = Asse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
+
+ case Iop_AndV128: op = Asse_AND; goto do_SseReRg;
+ case Iop_OrV128: op = Asse_OR; goto do_SseReRg;
+ case Iop_XorV128: op = Asse_XOR; goto do_SseReRg;
+ case Iop_Add8x16: op = Asse_ADD8; goto do_SseReRg;
+ case Iop_Add16x8: op = Asse_ADD16; goto do_SseReRg;
+ case Iop_Add32x4: op = Asse_ADD32; goto do_SseReRg;
+ case Iop_Add64x2: op = Asse_ADD64; goto do_SseReRg;
+ case Iop_QAdd8Sx16: op = Asse_QADD8S; goto do_SseReRg;
+ case Iop_QAdd16Sx8: op = Asse_QADD16S; goto do_SseReRg;
+ case Iop_QAdd8Ux16: op = Asse_QADD8U; goto do_SseReRg;
+ case Iop_QAdd16Ux8: op = Asse_QADD16U; goto do_SseReRg;
+ case Iop_Avg8Ux16: op = Asse_AVG8U; goto do_SseReRg;
+ case Iop_Avg16Ux8: op = Asse_AVG16U; goto do_SseReRg;
+ case Iop_CmpEQ8x16: op = Asse_CMPEQ8; goto do_SseReRg;
+ case Iop_CmpEQ16x8: op = Asse_CMPEQ16; goto do_SseReRg;
+ case Iop_CmpEQ32x4: op = Asse_CMPEQ32; goto do_SseReRg;
+ case Iop_CmpGT8Sx16: op = Asse_CMPGT8S; goto do_SseReRg;
+ case Iop_CmpGT16Sx8: op = Asse_CMPGT16S; goto do_SseReRg;
+ case Iop_CmpGT32Sx4: op = Asse_CMPGT32S; goto do_SseReRg;
+ case Iop_Max16Sx8: op = Asse_MAX16S; goto do_SseReRg;
+ case Iop_Max8Ux16: op = Asse_MAX8U; goto do_SseReRg;
+ case Iop_Min16Sx8: op = Asse_MIN16S; goto do_SseReRg;
+ case Iop_Min8Ux16: op = Asse_MIN8U; goto do_SseReRg;
+ case Iop_MulHi16Ux8: op = Asse_MULHI16U; goto do_SseReRg;
+ case Iop_MulHi16Sx8: op = Asse_MULHI16S; goto do_SseReRg;
+ case Iop_Mul16x8: op = Asse_MUL16; goto do_SseReRg;
+ case Iop_Sub8x16: op = Asse_SUB8; goto do_SseReRg;
+ case Iop_Sub16x8: op = Asse_SUB16; goto do_SseReRg;
+ case Iop_Sub32x4: op = Asse_SUB32; goto do_SseReRg;
+ case Iop_Sub64x2: op = Asse_SUB64; goto do_SseReRg;
+ case Iop_QSub8Sx16: op = Asse_QSUB8S; goto do_SseReRg;
+ case Iop_QSub16Sx8: op = Asse_QSUB16S; goto do_SseReRg;
+ case Iop_QSub8Ux16: op = Asse_QSUB8U; goto do_SseReRg;
+ case Iop_QSub16Ux8: op = Asse_QSUB16U; goto do_SseReRg;
+ do_SseReRg: {
+ HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+ if (arg1isEReg) {
+ addInstr(env, mk_vMOVsd_RR(arg2, dst));
+ addInstr(env, AMD64Instr_SseReRg(op, arg1, dst));
+ } else {
+ addInstr(env, mk_vMOVsd_RR(arg1, dst));
+ addInstr(env, AMD64Instr_SseReRg(op, arg2, dst));
+ }
+ return dst;
+ }
+
+ case Iop_ShlN16x8: op = Asse_SHL16; goto do_SseShift;
+ case Iop_ShlN32x4: op = Asse_SHL32; goto do_SseShift;
+ case Iop_ShlN64x2: op = Asse_SHL64; goto do_SseShift;
+ case Iop_SarN16x8: op = Asse_SAR16; goto do_SseShift;
+ case Iop_SarN32x4: op = Asse_SAR32; goto do_SseShift;
+ case Iop_ShrN16x8: op = Asse_SHR16; goto do_SseShift;
+ case Iop_ShrN32x4: op = Asse_SHR32; goto do_SseShift;
+ case Iop_ShrN64x2: op = Asse_SHR64; goto do_SseShift;
+ do_SseShift: {
+ HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
+ AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
+ AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
+ HReg ereg = newVRegV(env);
+ HReg dst = newVRegV(env);
+ addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
+ addInstr(env, AMD64Instr_Push(rmi));
+ addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0));
+ addInstr(env, mk_vMOVsd_RR(greg, dst));
+ addInstr(env, AMD64Instr_SseReRg(op, ereg, dst));
+ add_to_rsp(env, 16);
+ return dst;
+ }
+
+ case Iop_Mul32x4: fn = (HWord)h_generic_calc_Mul32x4;
+ goto do_SseAssistedBinary;
+ case Iop_Max32Sx4: fn = (HWord)h_generic_calc_Max32Sx4;
+ goto do_SseAssistedBinary;
+ case Iop_Min32Sx4: fn = (HWord)h_generic_calc_Min32Sx4;
+ goto do_SseAssistedBinary;
+ case Iop_Max32Ux4: fn = (HWord)h_generic_calc_Max32Ux4;
+ goto do_SseAssistedBinary;
+ case Iop_Min32Ux4: fn = (HWord)h_generic_calc_Min32Ux4;
+ goto do_SseAssistedBinary;
+ case Iop_Max16Ux8: fn = (HWord)h_generic_calc_Max16Ux8;
+ goto do_SseAssistedBinary;
+ case Iop_Min16Ux8: fn = (HWord)h_generic_calc_Min16Ux8;
+ goto do_SseAssistedBinary;
+ case Iop_Max8Sx16: fn = (HWord)h_generic_calc_Max8Sx16;
+ goto do_SseAssistedBinary;
+ case Iop_Min8Sx16: fn = (HWord)h_generic_calc_Min8Sx16;
+ goto do_SseAssistedBinary;
+ case Iop_CmpGT64Sx2: fn = (HWord)h_generic_calc_CmpGT64Sx2;
+ goto do_SseAssistedBinary;
+ do_SseAssistedBinary: {
+ /* RRRufff! RRRufff code is what we're generating here. Oh
+ well. */
+ vassert(fn != 0);
+ HReg dst = newVRegV(env);
+ HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg argp = newVRegI(env);
+ /* subq $112, %rsp -- make a space*/
+ sub_from_rsp(env, 112);
+ /* leaq 48(%rsp), %r_argp -- point into it */
+ addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
+ argp));
+ /* andq $-16, %r_argp -- 16-align the pointer */
+ addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
+ AMD64RMI_Imm( ~(UInt)15 ),
+ argp));
+ /* Prepare 3 arg regs:
+ leaq 0(%r_argp), %rdi
+ leaq 16(%r_argp), %rsi
+ leaq 32(%r_argp), %rdx
+ */
+ addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
+ hregAMD64_RDI()));
+ addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
+ hregAMD64_RSI()));
+ addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp),
+ hregAMD64_RDX()));
+ /* Store the two args, at (%rsi) and (%rdx):
+ movupd %argL, 0(%rsi)
+ movupd %argR, 0(%rdx)
+ */
+ addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL,
+ AMD64AMode_IR(0, hregAMD64_RSI())));
+ addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argR,
+ AMD64AMode_IR(0, hregAMD64_RDX())));
+ /* call the helper */
+ addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3 ));
+ /* fetch the result from memory, using %r_argp, which the
+ register allocator will keep alive across the call. */
+ addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst,
+ AMD64AMode_IR(0, argp)));
+ /* and finally, clear the space */
+ add_to_rsp(env, 112);
+ return dst;
+ }
+
+ default:
+ break;
+ } /* switch (e->Iex.Binop.op) */
+ } /* if (e->tag == Iex_Binop) */
+
+ if (e->tag == Iex_Mux0X) {
+ HReg r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
+ HReg rX = iselVecExpr(env, e->Iex.Mux0X.exprX);
+ HReg r0 = iselVecExpr(env, e->Iex.Mux0X.expr0);
+ HReg dst = newVRegV(env);
+ addInstr(env, mk_vMOVsd_RR(rX,dst));
+ addInstr(env, AMD64Instr_Test64(0xFF, r8));
+ addInstr(env, AMD64Instr_SseCMov(Acc_Z,r0,dst));
+ return dst;
+ }
+
+ //vec_fail:
+ vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n",
+ LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
+ ppIRExpr(e);
+ vpanic("iselVecExpr_wrk");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Statements ---*/
+/*---------------------------------------------------------*/
+
+static void iselStmt ( ISelEnv* env, IRStmt* stmt )
+{
+ if (vex_traceflags & VEX_TRACE_VCODE) {
+ vex_printf("\n-- ");
+ ppIRStmt(stmt);
+ vex_printf("\n");
+ }
+
+ switch (stmt->tag) {
+
+ /* --------- STORE --------- */
+ case Ist_Store: {
+ IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
+ IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
+ IREndness end = stmt->Ist.Store.end;
+
+ if (tya != Ity_I64 || end != Iend_LE)
+ goto stmt_fail;
+
+ if (tyd == Ity_I64) {
+ AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
+ AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
+ addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,ri,am));
+ return;
+ }
+ if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32) {
+ AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
+ HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
+ addInstr(env, AMD64Instr_Store(
+ toUChar(tyd==Ity_I8 ? 1 : (tyd==Ity_I16 ? 2 : 4)),
+ r,am));
+ return;
+ }
+ if (tyd == Ity_F64) {
+ AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
+ HReg r = iselDblExpr(env, stmt->Ist.Store.data);
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, r, am));
+ return;
+ }
+ if (tyd == Ity_F32) {
+ AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
+ HReg r = iselFltExpr(env, stmt->Ist.Store.data);
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, r, am));
+ return;
+ }
+ if (tyd == Ity_V128) {
+ AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
+ HReg r = iselVecExpr(env, stmt->Ist.Store.data);
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, r, am));
+ return;
+ }
+ break;
+ }
+
+ /* --------- PUT --------- */
+ case Ist_Put: {
+ IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
+ if (ty == Ity_I64) {
+ /* We're going to write to memory, so compute the RHS into an
+ AMD64RI. */
+ AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
+ addInstr(env,
+ AMD64Instr_Alu64M(
+ Aalu_MOV,
+ ri,
+ AMD64AMode_IR(stmt->Ist.Put.offset,
+ hregAMD64_RBP())
+ ));
+ return;
+ }
+ if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
+ HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
+ addInstr(env, AMD64Instr_Store(
+ toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
+ r,
+ AMD64AMode_IR(stmt->Ist.Put.offset,
+ hregAMD64_RBP())));
+ return;
+ }
+ if (ty == Ity_V128) {
+ HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
+ AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset,
+ hregAMD64_RBP());
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, am));
+ return;
+ }
+ if (ty == Ity_F32) {
+ HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
+ AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, hregAMD64_RBP());
+ set_SSE_rounding_default(env); /* paranoia */
+ addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 4, f32, am ));
+ return;
+ }
+ if (ty == Ity_F64) {
+ HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
+ AMD64AMode* am = AMD64AMode_IR( stmt->Ist.Put.offset,
+ hregAMD64_RBP() );
+ addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, f64, am ));
+ return;
+ }
+ break;
+ }
+
+ /* --------- Indexed PUT --------- */
+ case Ist_PutI: {
+ AMD64AMode* am
+ = genGuestArrayOffset(
+ env, stmt->Ist.PutI.descr,
+ stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
+
+ IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
+ if (ty == Ity_F64) {
+ HReg val = iselDblExpr(env, stmt->Ist.PutI.data);
+ addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, val, am ));
+ return;
+ }
+ if (ty == Ity_I8) {
+ HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data);
+ addInstr(env, AMD64Instr_Store( 1, r, am ));
+ return;
+ }
+ if (ty == Ity_I64) {
+ AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.PutI.data);
+ addInstr(env, AMD64Instr_Alu64M( Aalu_MOV, ri, am ));
+ return;
+ }
+ break;
+ }
+
+ /* --------- TMP --------- */
+ case Ist_WrTmp: {
+ IRTemp tmp = stmt->Ist.WrTmp.tmp;
+ IRType ty = typeOfIRTemp(env->type_env, tmp);
+
+ /* optimisation: if stmt->Ist.WrTmp.data is Add64(..,..),
+ compute it into an AMode and then use LEA. This usually
+ produces fewer instructions, often because (for memcheck
+ created IR) we get t = address-expression, (t is later used
+ twice) and so doing this naturally turns address-expression
+ back into an AMD64 amode. */
+ if (ty == Ity_I64
+ && stmt->Ist.WrTmp.data->tag == Iex_Binop
+ && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add64) {
+ AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
+ HReg dst = lookupIRTemp(env, tmp);
+ if (am->tag == Aam_IR && am->Aam.IR.imm == 0) {
+ /* Hmm, iselIntExpr_AMode wimped out and just computed the
+ value into a register. Just emit a normal reg-reg move
+ so reg-alloc can coalesce it away in the usual way. */
+ HReg src = am->Aam.IR.reg;
+ addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst));
+ } else {
+ addInstr(env, AMD64Instr_Lea64(am,dst));
+ }
+ return;
+ }
+
+ if (ty == Ity_I64 || ty == Ity_I32
+ || ty == Ity_I16 || ty == Ity_I8) {
+ AMD64RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
+ HReg dst = lookupIRTemp(env, tmp);
+ addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,rmi,dst));
+ return;
+ }
+ if (ty == Ity_I128) {
+ HReg rHi, rLo, dstHi, dstLo;
+ iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
+ lookupIRTemp128( &dstHi, &dstLo, env, tmp);
+ addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
+ addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
+ return;
+ }
+ if (ty == Ity_I1) {
+ AMD64CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
+ HReg dst = lookupIRTemp(env, tmp);
+ addInstr(env, AMD64Instr_Set64(cond, dst));
+ return;
+ }
+ if (ty == Ity_F64) {
+ HReg dst = lookupIRTemp(env, tmp);
+ HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
+ addInstr(env, mk_vMOVsd_RR(src, dst));
+ return;
+ }
+ if (ty == Ity_F32) {
+ HReg dst = lookupIRTemp(env, tmp);
+ HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
+ addInstr(env, mk_vMOVsd_RR(src, dst));
+ return;
+ }
+ if (ty == Ity_V128) {
+ HReg dst = lookupIRTemp(env, tmp);
+ HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
+ addInstr(env, mk_vMOVsd_RR(src, dst));
+ return;
+ }
+ break;
+ }
+
+ /* --------- Call to DIRTY helper --------- */
+ case Ist_Dirty: {
+ IRType retty;
+ IRDirty* d = stmt->Ist.Dirty.details;
+ Bool passBBP = False;
+
+ if (d->nFxState == 0)
+ vassert(!d->needsBBP);
+
+ passBBP = toBool(d->nFxState > 0 && d->needsBBP);
+
+ /* Marshal args, do the call, clear stack. */
+ doHelperCall( env, passBBP, d->guard, d->cee, d->args );
+
+ /* Now figure out what to do with the returned value, if any. */
+ if (d->tmp == IRTemp_INVALID)
+ /* No return value. Nothing to do. */
+ return;
+
+ retty = typeOfIRTemp(env->type_env, d->tmp);
+ if (retty == Ity_I64 || retty == Ity_I32
+ || retty == Ity_I16 || retty == Ity_I8) {
+ /* The returned value is in %rax. Park it in the register
+ associated with tmp. */
+ HReg dst = lookupIRTemp(env, d->tmp);
+ addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(),dst) );
+ return;
+ }
+ break;
+ }
+
+ /* --------- MEM FENCE --------- */
+ case Ist_MBE:
+ switch (stmt->Ist.MBE.event) {
+ case Imbe_Fence:
+ addInstr(env, AMD64Instr_MFence());
+ return;
+ default:
+ break;
+ }
+ break;
+
+ /* --------- ACAS --------- */
+ case Ist_CAS:
+ if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
+ /* "normal" singleton CAS */
+ UChar sz;
+ IRCAS* cas = stmt->Ist.CAS.details;
+ IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
+ /* get: cas->expd into %rax, and cas->data into %rbx */
+ AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
+ HReg rData = iselIntExpr_R(env, cas->dataLo);
+ HReg rExpd = iselIntExpr_R(env, cas->expdLo);
+ HReg rOld = lookupIRTemp(env, cas->oldLo);
+ vassert(cas->expdHi == NULL);
+ vassert(cas->dataHi == NULL);
+ addInstr(env, mk_iMOVsd_RR(rExpd, rOld));
+ addInstr(env, mk_iMOVsd_RR(rExpd, hregAMD64_RAX()));
+ addInstr(env, mk_iMOVsd_RR(rData, hregAMD64_RBX()));
+ switch (ty) {
+ case Ity_I64: sz = 8; break;
+ case Ity_I32: sz = 4; break;
+ case Ity_I16: sz = 2; break;
+ case Ity_I8: sz = 1; break;
+ default: goto unhandled_cas;
+ }
+ addInstr(env, AMD64Instr_ACAS(am, sz));
+ addInstr(env, AMD64Instr_CMov64(
+ Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOld));
+ return;
+ } else {
+ /* double CAS */
+ UChar sz;
+ IRCAS* cas = stmt->Ist.CAS.details;
+ IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
+ /* only 32-bit and 64-bit allowed in this case */
+ /* get: cas->expdLo into %rax, and cas->dataLo into %rbx */
+ /* get: cas->expdHi into %rdx, and cas->dataHi into %rcx */
+ AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
+ HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
+ HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
+ HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
+ HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
+ HReg rOldHi = lookupIRTemp(env, cas->oldHi);
+ HReg rOldLo = lookupIRTemp(env, cas->oldLo);
+ switch (ty) {
+ case Ity_I64:
+ if (!(env->hwcaps & VEX_HWCAPS_AMD64_CX16))
+ goto unhandled_cas; /* we'd have to generate
+ cmpxchg16b, but the host
+ doesn't support that */
+ sz = 8;
+ break;
+ case Ity_I32:
+ sz = 4;
+ break;
+ default:
+ goto unhandled_cas;
+ }
+ addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
+ addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
+ addInstr(env, mk_iMOVsd_RR(rExpdHi, hregAMD64_RDX()));
+ addInstr(env, mk_iMOVsd_RR(rExpdLo, hregAMD64_RAX()));
+ addInstr(env, mk_iMOVsd_RR(rDataHi, hregAMD64_RCX()));
+ addInstr(env, mk_iMOVsd_RR(rDataLo, hregAMD64_RBX()));
+ addInstr(env, AMD64Instr_DACAS(am, sz));
+ addInstr(env,
+ AMD64Instr_CMov64(
+ Acc_NZ, AMD64RM_Reg(hregAMD64_RDX()), rOldHi));
+ addInstr(env,
+ AMD64Instr_CMov64(
+ Acc_NZ, AMD64RM_Reg(hregAMD64_RAX()), rOldLo));
+ return;
+ }
+ unhandled_cas:
+ break;
+
+ /* --------- INSTR MARK --------- */
+ /* Doesn't generate any executable code ... */
+ case Ist_IMark:
+ return;
+
+ /* --------- ABI HINT --------- */
+ /* These have no meaning (denotation in the IR) and so we ignore
+ them ... if any actually made it this far. */
+ case Ist_AbiHint:
+ return;
+
+ /* --------- NO-OP --------- */
+ case Ist_NoOp:
+ return;
+
+ /* --------- EXIT --------- */
+ case Ist_Exit: {
+ AMD64RI* dst;
+ AMD64CondCode cc;
+ if (stmt->Ist.Exit.dst->tag != Ico_U64)
+ vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value");
+ dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ cc = iselCondCode(env,stmt->Ist.Exit.guard);
+ addInstr(env, AMD64Instr_Goto(stmt->Ist.Exit.jk, cc, dst));
+ return;
+ }
+
+ default: break;
+ }
+ stmt_fail:
+ ppIRStmt(stmt);
+ vpanic("iselStmt(amd64)");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Basic block terminators (Nexts) ---*/
+/*---------------------------------------------------------*/
+
+static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
+{
+ AMD64RI* ri;
+ if (vex_traceflags & VEX_TRACE_VCODE) {
+ vex_printf("\n-- goto {");
+ ppIRJumpKind(jk);
+ vex_printf("} ");
+ ppIRExpr(next);
+ vex_printf("\n");
+ }
+ ri = iselIntExpr_RI(env, next);
+ addInstr(env, AMD64Instr_Goto(jk, Acc_ALWAYS,ri));
+}
+
+
+/*---------------------------------------------------------*/
+/*--- Insn selector top-level ---*/
+/*---------------------------------------------------------*/
+
+/* Translate an entire SB to amd64 code. */
+
+HInstrArray* iselSB_AMD64 ( IRSB* bb, VexArch arch_host,
+ VexArchInfo* archinfo_host,
+ VexAbiInfo* vbi/*UNUSED*/ )
+{
+ Int i, j;
+ HReg hreg, hregHI;
+ ISelEnv* env;
+ UInt hwcaps_host = archinfo_host->hwcaps;
+
+ /* sanity ... */
+ vassert(arch_host == VexArchAMD64);
+ vassert(0 == (hwcaps_host
+ & ~(VEX_HWCAPS_AMD64_SSE3
+ | VEX_HWCAPS_AMD64_CX16
+ | VEX_HWCAPS_AMD64_LZCNT)));
+
+ /* Make up an initial environment to use. */
+ env = LibVEX_Alloc(sizeof(ISelEnv));
+ env->vreg_ctr = 0;
+
+ /* Set up output code array. */
+ env->code = newHInstrArray();
+
+ /* Copy BB's type env. */
+ env->type_env = bb->tyenv;
+
+ /* Make up an IRTemp -> virtual HReg mapping. This doesn't
+ change as we go along. */
+ env->n_vregmap = bb->tyenv->types_used;
+ env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
+ env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
+
+ /* and finally ... */
+ env->hwcaps = hwcaps_host;
+
+ /* For each IR temporary, allocate a suitably-kinded virtual
+ register. */
+ j = 0;
+ for (i = 0; i < env->n_vregmap; i++) {
+ hregHI = hreg = INVALID_HREG;
+ switch (bb->tyenv->types[i]) {
+ case Ity_I1:
+ case Ity_I8:
+ case Ity_I16:
+ case Ity_I32:
+ case Ity_I64: hreg = mkHReg(j++, HRcInt64, True); break;
+ case Ity_I128: hreg = mkHReg(j++, HRcInt64, True);
+ hregHI = mkHReg(j++, HRcInt64, True); break;
+ case Ity_F32:
+ case Ity_F64:
+ case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break;
+ default: ppIRType(bb->tyenv->types[i]);
+ vpanic("iselBB(amd64): IRTemp type");
+ }
+ env->vregmap[i] = hreg;
+ env->vregmapHI[i] = hregHI;
+ }
+ env->vreg_ctr = j;
+
+ /* Ok, finally we can iterate over the statements. */
+ for (i = 0; i < bb->stmts_used; i++)
+ if (bb->stmts[i])
+ iselStmt(env,bb->stmts[i]);
+
+ iselNext(env,bb->next,bb->jumpkind);
+
+ /* record the number of vregs we used. */
+ env->code->n_vregs = env->vreg_ctr;
+ return env->code;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- end host_amd64_isel.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/host_arm_defs.c b/VEX/priv/host_arm_defs.c
new file mode 100644
index 0000000..122a9f9
--- /dev/null
+++ b/VEX/priv/host_arm_defs.c
@@ -0,0 +1,4097 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin host_arm_defs.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ NEON support is
+ Copyright (C) 2010-2010 Samsung Electronics
+ contributed by Dmitry Zhurikhin <zhur@ispras.ru>
+ and Kirill Batuzov <batuzovk@ispras.ru>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex.h"
+#include "libvex_trc_values.h"
+
+#include "main_util.h"
+#include "host_generic_regs.h"
+#include "host_arm_defs.h"
+
+UInt arm_hwcaps = 0;
+
+
+/* --------- Registers. --------- */
+
+/* The usual HReg abstraction.
+ There are 16 general purpose regs.
+*/
+
+void ppHRegARM ( HReg reg ) {
+ Int r;
+ /* Be generic for all virtual regs. */
+ if (hregIsVirtual(reg)) {
+ ppHReg(reg);
+ return;
+ }
+ /* But specific for real regs. */
+ switch (hregClass(reg)) {
+ case HRcInt32:
+ r = hregNumber(reg);
+ vassert(r >= 0 && r < 16);
+ vex_printf("r%d", r);
+ return;
+ case HRcFlt64:
+ r = hregNumber(reg);
+ vassert(r >= 0 && r < 32);
+ vex_printf("d%d", r);
+ return;
+ case HRcFlt32:
+ r = hregNumber(reg);
+ vassert(r >= 0 && r < 32);
+ vex_printf("s%d", r);
+ return;
+ case HRcVec128:
+ r = hregNumber(reg);
+ vassert(r >= 0 && r < 16);
+ vex_printf("q%d", r);
+ return;
+ default:
+ vpanic("ppHRegARM");
+ }
+}
+
+HReg hregARM_R0 ( void ) { return mkHReg(0, HRcInt32, False); }
+HReg hregARM_R1 ( void ) { return mkHReg(1, HRcInt32, False); }
+HReg hregARM_R2 ( void ) { return mkHReg(2, HRcInt32, False); }
+HReg hregARM_R3 ( void ) { return mkHReg(3, HRcInt32, False); }
+HReg hregARM_R4 ( void ) { return mkHReg(4, HRcInt32, False); }
+HReg hregARM_R5 ( void ) { return mkHReg(5, HRcInt32, False); }
+HReg hregARM_R6 ( void ) { return mkHReg(6, HRcInt32, False); }
+HReg hregARM_R7 ( void ) { return mkHReg(7, HRcInt32, False); }
+HReg hregARM_R8 ( void ) { return mkHReg(8, HRcInt32, False); }
+HReg hregARM_R9 ( void ) { return mkHReg(9, HRcInt32, False); }
+HReg hregARM_R10 ( void ) { return mkHReg(10, HRcInt32, False); }
+HReg hregARM_R11 ( void ) { return mkHReg(11, HRcInt32, False); }
+HReg hregARM_R12 ( void ) { return mkHReg(12, HRcInt32, False); }
+HReg hregARM_R13 ( void ) { return mkHReg(13, HRcInt32, False); }
+HReg hregARM_R14 ( void ) { return mkHReg(14, HRcInt32, False); }
+HReg hregARM_R15 ( void ) { return mkHReg(15, HRcInt32, False); }
+HReg hregARM_D8 ( void ) { return mkHReg(8, HRcFlt64, False); }
+HReg hregARM_D9 ( void ) { return mkHReg(9, HRcFlt64, False); }
+HReg hregARM_D10 ( void ) { return mkHReg(10, HRcFlt64, False); }
+HReg hregARM_D11 ( void ) { return mkHReg(11, HRcFlt64, False); }
+HReg hregARM_D12 ( void ) { return mkHReg(12, HRcFlt64, False); }
+HReg hregARM_S26 ( void ) { return mkHReg(26, HRcFlt32, False); }
+HReg hregARM_S27 ( void ) { return mkHReg(27, HRcFlt32, False); }
+HReg hregARM_S28 ( void ) { return mkHReg(28, HRcFlt32, False); }
+HReg hregARM_S29 ( void ) { return mkHReg(29, HRcFlt32, False); }
+HReg hregARM_S30 ( void ) { return mkHReg(30, HRcFlt32, False); }
+HReg hregARM_Q8 ( void ) { return mkHReg(8, HRcVec128, False); }
+HReg hregARM_Q9 ( void ) { return mkHReg(9, HRcVec128, False); }
+HReg hregARM_Q10 ( void ) { return mkHReg(10, HRcVec128, False); }
+HReg hregARM_Q11 ( void ) { return mkHReg(11, HRcVec128, False); }
+HReg hregARM_Q12 ( void ) { return mkHReg(12, HRcVec128, False); }
+HReg hregARM_Q13 ( void ) { return mkHReg(13, HRcVec128, False); }
+HReg hregARM_Q14 ( void ) { return mkHReg(14, HRcVec128, False); }
+HReg hregARM_Q15 ( void ) { return mkHReg(15, HRcVec128, False); }
+
+void getAllocableRegs_ARM ( Int* nregs, HReg** arr )
+{
+ Int i = 0;
+ *nregs = 26;
+ *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
+ // callee saves ones are listed first, since we prefer them
+ // if they're available
+ (*arr)[i++] = hregARM_R4();
+ (*arr)[i++] = hregARM_R5();
+ (*arr)[i++] = hregARM_R6();
+ (*arr)[i++] = hregARM_R7();
+ (*arr)[i++] = hregARM_R10();
+ (*arr)[i++] = hregARM_R11();
+ // otherwise we'll have to slum it out with caller-saves ones
+ (*arr)[i++] = hregARM_R0();
+ (*arr)[i++] = hregARM_R1();
+ (*arr)[i++] = hregARM_R2();
+ (*arr)[i++] = hregARM_R3();
+ (*arr)[i++] = hregARM_R9();
+ // FP hreegisters. Note: these are all callee-save. Yay!
+ // Hence we don't need to mention them as trashed in
+ // getHRegUsage for ARMInstr_Call.
+ (*arr)[i++] = hregARM_D8();
+ (*arr)[i++] = hregARM_D9();
+ (*arr)[i++] = hregARM_D10();
+ (*arr)[i++] = hregARM_D11();
+ (*arr)[i++] = hregARM_D12();
+ (*arr)[i++] = hregARM_S26();
+ (*arr)[i++] = hregARM_S27();
+ (*arr)[i++] = hregARM_S28();
+ (*arr)[i++] = hregARM_S29();
+ (*arr)[i++] = hregARM_S30();
+
+ (*arr)[i++] = hregARM_Q8();
+ (*arr)[i++] = hregARM_Q9();
+ (*arr)[i++] = hregARM_Q10();
+ (*arr)[i++] = hregARM_Q11();
+ (*arr)[i++] = hregARM_Q12();
+
+ //(*arr)[i++] = hregARM_Q13();
+ //(*arr)[i++] = hregARM_Q14();
+ //(*arr)[i++] = hregARM_Q15();
+
+ // unavail: r8 as GSP
+ // r12 is used as a spill/reload temporary
+ // r13 as SP
+ // r14 as LR
+ // r15 as PC
+ //
+ // All in all, we have 11 allocatable integer registers:
+ // 0 1 2 3 4 5 6 7 9 10 11, with r8 dedicated as GSP
+ // and r12 dedicated as a spill temporary.
+ // 13 14 and 15 are not under the allocator's control.
+ //
+ // Hence for the allocatable registers we have:
+ //
+ // callee-saved: 4 5 6 7 (8) 9 10 11
+ // caller-saved: 0 1 2 3
+ // Note 9 is ambiguous: the base EABI does not give an e/r-saved
+ // designation for it, but the Linux instantiation of the ABI
+ // specifies it as callee-saved.
+ //
+ // If the set of available registers changes or if the e/r status
+ // changes, be sure to re-check/sync the definition of
+ // getHRegUsage for ARMInstr_Call too.
+ vassert(i == *nregs);
+}
+
+
+
+/* --------- Condition codes, ARM encoding. --------- */
+
+HChar* showARMCondCode ( ARMCondCode cond ) {
+ switch (cond) {
+ case ARMcc_EQ: return "eq";
+ case ARMcc_NE: return "ne";
+ case ARMcc_HS: return "hs";
+ case ARMcc_LO: return "lo";
+ case ARMcc_MI: return "mi";
+ case ARMcc_PL: return "pl";
+ case ARMcc_VS: return "vs";
+ case ARMcc_VC: return "vc";
+ case ARMcc_HI: return "hi";
+ case ARMcc_LS: return "ls";
+ case ARMcc_GE: return "ge";
+ case ARMcc_LT: return "lt";
+ case ARMcc_GT: return "gt";
+ case ARMcc_LE: return "le";
+ case ARMcc_AL: return "al"; // default
+ case ARMcc_NV: return "nv";
+ default: vpanic("showARMCondCode");
+ }
+}
+
+
+/* --------- Mem AModes: Addressing Mode 1 --------- */
+
+ARMAMode1* ARMAMode1_RI ( HReg reg, Int simm13 ) {
+ ARMAMode1* am = LibVEX_Alloc(sizeof(ARMAMode1));
+ am->tag = ARMam1_RI;
+ am->ARMam1.RI.reg = reg;
+ am->ARMam1.RI.simm13 = simm13;
+ vassert(-4095 <= simm13 && simm13 <= 4095);
+ return am;
+}
+ARMAMode1* ARMAMode1_RRS ( HReg base, HReg index, UInt shift ) {
+ ARMAMode1* am = LibVEX_Alloc(sizeof(ARMAMode1));
+ am->tag = ARMam1_RRS;
+ am->ARMam1.RRS.base = base;
+ am->ARMam1.RRS.index = index;
+ am->ARMam1.RRS.shift = shift;
+ vassert(0 <= shift && shift <= 3);
+ return am;
+}
+
+void ppARMAMode1 ( ARMAMode1* am ) {
+ switch (am->tag) {
+ case ARMam1_RI:
+ vex_printf("%d(", am->ARMam1.RI.simm13);
+ ppHRegARM(am->ARMam1.RI.reg);
+ vex_printf(")");
+ break;
+ case ARMam1_RRS:
+ vex_printf("(");
+ ppHRegARM(am->ARMam1.RRS.base);
+ vex_printf(",");
+ ppHRegARM(am->ARMam1.RRS.index);
+ vex_printf(",%u)", am->ARMam1.RRS.shift);
+ break;
+ default:
+ vassert(0);
+ }
+}
+
+static void addRegUsage_ARMAMode1 ( HRegUsage* u, ARMAMode1* am ) {
+ switch (am->tag) {
+ case ARMam1_RI:
+ addHRegUse(u, HRmRead, am->ARMam1.RI.reg);
+ return;
+ case ARMam1_RRS:
+ // addHRegUse(u, HRmRead, am->ARMam1.RRS.base);
+ // addHRegUse(u, HRmRead, am->ARMam1.RRS.index);
+ // return;
+ default:
+ vpanic("addRegUsage_ARMAmode1");
+ }
+}
+
+static void mapRegs_ARMAMode1 ( HRegRemap* m, ARMAMode1* am ) {
+ switch (am->tag) {
+ case ARMam1_RI:
+ am->ARMam1.RI.reg = lookupHRegRemap(m, am->ARMam1.RI.reg);
+ return;
+ case ARMam1_RRS:
+ //am->ARMam1.RR.base =lookupHRegRemap(m, am->ARMam1.RR.base);
+ //am->ARMam1.RR.index = lookupHRegRemap(m, am->ARMam1.RR.index);
+ //return;
+ default:
+ vpanic("mapRegs_ARMAmode1");
+ }
+}
+
+
+/* --------- Mem AModes: Addressing Mode 2 --------- */
+
+ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) {
+ ARMAMode2* am = LibVEX_Alloc(sizeof(ARMAMode2));
+ am->tag = ARMam2_RI;
+ am->ARMam2.RI.reg = reg;
+ am->ARMam2.RI.simm9 = simm9;
+ vassert(-255 <= simm9 && simm9 <= 255);
+ return am;
+}
+ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) {
+ ARMAMode2* am = LibVEX_Alloc(sizeof(ARMAMode2));
+ am->tag = ARMam2_RR;
+ am->ARMam2.RR.base = base;
+ am->ARMam2.RR.index = index;
+ return am;
+}
+
+void ppARMAMode2 ( ARMAMode2* am ) {
+ switch (am->tag) {
+ case ARMam2_RI:
+ vex_printf("%d(", am->ARMam2.RI.simm9);
+ ppHRegARM(am->ARMam2.RI.reg);
+ vex_printf(")");
+ break;
+ case ARMam2_RR:
+ vex_printf("(");
+ ppHRegARM(am->ARMam2.RR.base);
+ vex_printf(",");
+ ppHRegARM(am->ARMam2.RR.index);
+ vex_printf(")");
+ break;
+ default:
+ vassert(0);
+ }
+}
+
+static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) {
+ switch (am->tag) {
+ case ARMam2_RI:
+ addHRegUse(u, HRmRead, am->ARMam2.RI.reg);
+ return;
+ case ARMam2_RR:
+ // addHRegUse(u, HRmRead, am->ARMam2.RR.base);
+ // addHRegUse(u, HRmRead, am->ARMam2.RR.index);
+ // return;
+ default:
+ vpanic("addRegUsage_ARMAmode2");
+ }
+}
+
+static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) {
+ switch (am->tag) {
+ case ARMam2_RI:
+ am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg);
+ return;
+ case ARMam2_RR:
+ //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base);
+ //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index);
+ //return;
+ default:
+ vpanic("mapRegs_ARMAmode2");
+ }
+}
+
+
+/* --------- Mem AModes: Addressing Mode VFP --------- */
+
+ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) {
+ ARMAModeV* am = LibVEX_Alloc(sizeof(ARMAModeV));
+ vassert(simm11 >= -1020 && simm11 <= 1020);
+ vassert(0 == (simm11 & 3));
+ am->reg = reg;
+ am->simm11 = simm11;
+ return am;
+}
+
+void ppARMAModeV ( ARMAModeV* am ) {
+ vex_printf("%d(", am->simm11);
+ ppHRegARM(am->reg);
+ vex_printf(")");
+}
+
+static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) {
+ addHRegUse(u, HRmRead, am->reg);
+}
+
+static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) {
+ am->reg = lookupHRegRemap(m, am->reg);
+}
+
+
+/* --------- Mem AModes: Addressing Mode Neon ------- */
+
+ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
+ ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
+ am->tag = ARMamN_RR;
+ am->ARMamN.RR.rN = rN;
+ am->ARMamN.RR.rM = rM;
+ return am;
+}
+
+ARMAModeN *mkARMAModeN_R ( HReg rN ) {
+ ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
+ am->tag = ARMamN_R;
+ am->ARMamN.R.rN = rN;
+ return am;
+}
+
+static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
+ if (am->tag == ARMamN_R) {
+ addHRegUse(u, HRmRead, am->ARMamN.R.rN);
+ } else {
+ addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
+ addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
+ }
+}
+
+static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
+ if (am->tag == ARMamN_R) {
+ am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
+ } else {
+ am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
+ am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
+ }
+}
+
+void ppARMAModeN ( ARMAModeN* am ) {
+ vex_printf("[");
+ if (am->tag == ARMamN_R) {
+ ppHRegARM(am->ARMamN.R.rN);
+ } else {
+ ppHRegARM(am->ARMamN.RR.rN);
+ }
+ vex_printf("]");
+ if (am->tag == ARMamN_RR) {
+ vex_printf(", ");
+ ppHRegARM(am->ARMamN.RR.rM);
+ }
+}
+
+
+/* --------- Reg or imm-8x4 operands --------- */
+
+static UInt ROR32 ( UInt x, UInt sh ) {
+ vassert(sh >= 0 && sh < 32);
+ if (sh == 0)
+ return x;
+ else
+ return (x << (32-sh)) | (x >> sh);
+}
+
+ARMRI84* ARMRI84_I84 ( UShort imm8, UShort imm4 ) {
+ ARMRI84* ri84 = LibVEX_Alloc(sizeof(ARMRI84));
+ ri84->tag = ARMri84_I84;
+ ri84->ARMri84.I84.imm8 = imm8;
+ ri84->ARMri84.I84.imm4 = imm4;
+ vassert(imm8 >= 0 && imm8 <= 255);
+ vassert(imm4 >= 0 && imm4 <= 15);
+ return ri84;
+}
+ARMRI84* ARMRI84_R ( HReg reg ) {
+ ARMRI84* ri84 = LibVEX_Alloc(sizeof(ARMRI84));
+ ri84->tag = ARMri84_R;
+ ri84->ARMri84.R.reg = reg;
+ return ri84;
+}
+
+void ppARMRI84 ( ARMRI84* ri84 ) {
+ switch (ri84->tag) {
+ case ARMri84_I84:
+ vex_printf("0x%x", ROR32(ri84->ARMri84.I84.imm8,
+ 2 * ri84->ARMri84.I84.imm4));
+ break;
+ case ARMri84_R:
+ ppHRegARM(ri84->ARMri84.R.reg);
+ break;
+ default:
+ vassert(0);
+ }
+}
+
+static void addRegUsage_ARMRI84 ( HRegUsage* u, ARMRI84* ri84 ) {
+ switch (ri84->tag) {
+ case ARMri84_I84:
+ return;
+ case ARMri84_R:
+ addHRegUse(u, HRmRead, ri84->ARMri84.R.reg);
+ return;
+ default:
+ vpanic("addRegUsage_ARMRI84");
+ }
+}
+
+static void mapRegs_ARMRI84 ( HRegRemap* m, ARMRI84* ri84 ) {
+ switch (ri84->tag) {
+ case ARMri84_I84:
+ return;
+ case ARMri84_R:
+ ri84->ARMri84.R.reg = lookupHRegRemap(m, ri84->ARMri84.R.reg);
+ return;
+ default:
+ vpanic("mapRegs_ARMRI84");
+ }
+}
+
+
+/* --------- Reg or imm5 operands --------- */
+
+ARMRI5* ARMRI5_I5 ( UInt imm5 ) {
+ ARMRI5* ri5 = LibVEX_Alloc(sizeof(ARMRI5));
+ ri5->tag = ARMri5_I5;
+ ri5->ARMri5.I5.imm5 = imm5;
+ vassert(imm5 > 0 && imm5 <= 31); // zero is not allowed
+ return ri5;
+}
+ARMRI5* ARMRI5_R ( HReg reg ) {
+ ARMRI5* ri5 = LibVEX_Alloc(sizeof(ARMRI5));
+ ri5->tag = ARMri5_R;
+ ri5->ARMri5.R.reg = reg;
+ return ri5;
+}
+
+void ppARMRI5 ( ARMRI5* ri5 ) {
+ switch (ri5->tag) {
+ case ARMri5_I5:
+ vex_printf("%u", ri5->ARMri5.I5.imm5);
+ break;
+ case ARMri5_R:
+ ppHRegARM(ri5->ARMri5.R.reg);
+ break;
+ default:
+ vassert(0);
+ }
+}
+
+static void addRegUsage_ARMRI5 ( HRegUsage* u, ARMRI5* ri5 ) {
+ switch (ri5->tag) {
+ case ARMri5_I5:
+ return;
+ case ARMri5_R:
+ addHRegUse(u, HRmRead, ri5->ARMri5.R.reg);
+ return;
+ default:
+ vpanic("addRegUsage_ARMRI5");
+ }
+}
+
+static void mapRegs_ARMRI5 ( HRegRemap* m, ARMRI5* ri5 ) {
+ switch (ri5->tag) {
+ case ARMri5_I5:
+ return;
+ case ARMri5_R:
+ ri5->ARMri5.R.reg = lookupHRegRemap(m, ri5->ARMri5.R.reg);
+ return;
+ default:
+ vpanic("mapRegs_ARMRI5");
+ }
+}
+
+/* -------- Neon Immediate operatnd --------- */
+
+ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
+ ARMNImm* i = LibVEX_Alloc(sizeof(ARMNImm));
+ i->type = type;
+ i->imm8 = imm8;
+ return i;
+}
+
+ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
+ int i, j;
+ ULong y, x = imm->imm8;
+ switch (imm->type) {
+ case 3:
+ x = x << 8;
+ case 2:
+ x = x << 8;
+ case 1:
+ x = x << 8;
+ case 0:
+ return (x << 32) | x;
+ case 5:
+ case 6:
+ if (imm->type == 5)
+ x = x << 8;
+ else
+ x = (x << 8) | x;
+ case 4:
+ x = (x << 16) | x;
+ return (x << 32) | x;
+ case 8:
+ x = (x << 8) | 0xFF;
+ case 7:
+ x = (x << 8) | 0xFF;
+ return (x << 32) | x;
+ case 9:
+ x = 0;
+ for (i = 7; i >= 0; i--) {
+ y = ((ULong)imm->imm8 >> i) & 1;
+ for (j = 0; j < 8; j++) {
+ x = (x << 1) | y;
+ }
+ }
+ return x;
+ case 10:
+ x |= (x & 0x80) << 5;
+ x |= ~(x & 0x40) << 5;
+ x &= 0x187F; /* 0001 1000 0111 1111 */
+ x |= (x & 0x40) << 4;
+ x |= (x & 0x40) << 3;
+ x |= (x & 0x40) << 2;
+ x |= (x & 0x40) << 1;
+ x = x << 19;
+ x = (x << 32) | x;
+ return x;
+ default:
+ vpanic("ARMNImm_to_Imm64");
+ }
+}
+
+ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
+ ARMNImm tmp;
+ if ((x & 0xFFFFFFFF) == (x >> 32)) {
+ if ((x & 0xFFFFFF00) == 0)
+ return ARMNImm_TI(0, x & 0xFF);
+ if ((x & 0xFFFF00FF) == 0)
+ return ARMNImm_TI(1, (x >> 8) & 0xFF);
+ if ((x & 0xFF00FFFF) == 0)
+ return ARMNImm_TI(2, (x >> 16) & 0xFF);
+ if ((x & 0x00FFFFFF) == 0)
+ return ARMNImm_TI(3, (x >> 24) & 0xFF);
+ if ((x & 0xFFFF00FF) == 0xFF)
+ return ARMNImm_TI(7, (x >> 8) & 0xFF);
+ if ((x & 0xFF00FFFF) == 0xFFFF)
+ return ARMNImm_TI(8, (x >> 16) & 0xFF);
+ if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
+ if ((x & 0xFF00) == 0)
+ return ARMNImm_TI(4, x & 0xFF);
+ if ((x & 0x00FF) == 0)
+ return ARMNImm_TI(5, (x >> 8) & 0xFF);
+ if ((x & 0xFF) == ((x >> 8) & 0xFF))
+ return ARMNImm_TI(6, x & 0xFF);
+ }
+ if ((x & 0x7FFFF) == 0) {
+ tmp.type = 10;
+ tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
+ if (ARMNImm_to_Imm64(&tmp) == x)
+ return ARMNImm_TI(tmp.type, tmp.imm8);
+ }
+ } else {
+ /* This can only be type 9. */
+ tmp.imm8 = (((x >> 56) & 1) << 7)
+ | (((x >> 48) & 1) << 6)
+ | (((x >> 40) & 1) << 5)
+ | (((x >> 32) & 1) << 4)
+ | (((x >> 24) & 1) << 3)
+ | (((x >> 16) & 1) << 2)
+ | (((x >> 8) & 1) << 1)
+ | (((x >> 0) & 1) << 0);
+ tmp.type = 9;
+ if (ARMNImm_to_Imm64 (&tmp) == x)
+ return ARMNImm_TI(tmp.type, tmp.imm8);
+ }
+ return NULL;
+}
+
+void ppARMNImm (ARMNImm* i) {
+ ULong x = ARMNImm_to_Imm64(i);
+ vex_printf("0x%llX%llX", x, x);
+}
+
+/* -- Register or scalar operand --- */
+
+ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
+{
+ ARMNRS *p = LibVEX_Alloc(sizeof(ARMNRS));
+ p->tag = tag;
+ p->reg = reg;
+ p->index = index;
+ return p;
+}
+
+void ppARMNRS(ARMNRS *p)
+{
+ ppHRegARM(p->reg);
+ if (p->tag == ARMNRS_Scalar) {
+ vex_printf("[%d]", p->index);
+ }
+}
+
+/* --------- Instructions. --------- */
+
+HChar* showARMAluOp ( ARMAluOp op ) {
+ switch (op) {
+ case ARMalu_ADD: return "add";
+ case ARMalu_ADDS: return "adds";
+ case ARMalu_ADC: return "adc";
+ case ARMalu_SUB: return "sub";
+ case ARMalu_SUBS: return "subs";
+ case ARMalu_SBC: return "sbc";
+ case ARMalu_AND: return "and";
+ case ARMalu_BIC: return "bic";
+ case ARMalu_OR: return "orr";
+ case ARMalu_XOR: return "xor";
+ default: vpanic("showARMAluOp");
+ }
+}
+
+HChar* showARMShiftOp ( ARMShiftOp op ) {
+ switch (op) {
+ case ARMsh_SHL: return "shl";
+ case ARMsh_SHR: return "shr";
+ case ARMsh_SAR: return "sar";
+ default: vpanic("showARMShiftOp");
+ }
+}
+
+HChar* showARMUnaryOp ( ARMUnaryOp op ) {
+ switch (op) {
+ case ARMun_NEG: return "neg";
+ case ARMun_NOT: return "not";
+ case ARMun_CLZ: return "clz";
+ default: vpanic("showARMUnaryOp");
+ }
+}
+
+HChar* showARMMulOp ( ARMMulOp op ) {
+ switch (op) {
+ case ARMmul_PLAIN: return "mul";
+ case ARMmul_ZX: return "umull";
+ case ARMmul_SX: return "smull";
+ default: vpanic("showARMMulOp");
+ }
+}
+
+HChar* showARMVfpOp ( ARMVfpOp op ) {
+ switch (op) {
+ case ARMvfp_ADD: return "add";
+ case ARMvfp_SUB: return "sub";
+ case ARMvfp_MUL: return "mul";
+ case ARMvfp_DIV: return "div";
+ default: vpanic("showARMVfpOp");
+ }
+}
+
+HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op ) {
+ switch (op) {
+ case ARMvfpu_COPY: return "cpy";
+ case ARMvfpu_NEG: return "neg";
+ case ARMvfpu_ABS: return "abs";
+ case ARMvfpu_SQRT: return "sqrt";
+ default: vpanic("showARMVfpUnaryOp");
+ }
+}
+
+HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
+ switch (op) {
+ case ARMneon_VAND: return "vand";
+ case ARMneon_VORR: return "vorr";
+ case ARMneon_VXOR: return "veor";
+ case ARMneon_VADD: return "vadd";
+ case ARMneon_VRHADDS: return "vrhadd";
+ case ARMneon_VRHADDU: return "vrhadd";
+ case ARMneon_VADDFP: return "vadd";
+ case ARMneon_VPADDFP: return "vpadd";
+ case ARMneon_VABDFP: return "vabd";
+ case ARMneon_VSUB: return "vsub";
+ case ARMneon_VSUBFP: return "vsub";
+ case ARMneon_VMINU: return "vmin";
+ case ARMneon_VMINS: return "vmin";
+ case ARMneon_VMINF: return "vmin";
+ case ARMneon_VMAXU: return "vmax";
+ case ARMneon_VMAXS: return "vmax";
+ case ARMneon_VMAXF: return "vmax";
+ case ARMneon_VQADDU: return "vqadd";
+ case ARMneon_VQADDS: return "vqadd";
+ case ARMneon_VQSUBU: return "vqsub";
+ case ARMneon_VQSUBS: return "vqsub";
+ case ARMneon_VCGTU: return "vcgt";
+ case ARMneon_VCGTS: return "vcgt";
+ case ARMneon_VCGTF: return "vcgt";
+ case ARMneon_VCGEF: return "vcgt";
+ case ARMneon_VCGEU: return "vcge";
+ case ARMneon_VCGES: return "vcge";
+ case ARMneon_VCEQ: return "vceq";
+ case ARMneon_VCEQF: return "vceq";
+ case ARMneon_VPADD: return "vpadd";
+ case ARMneon_VPMINU: return "vpmin";
+ case ARMneon_VPMINS: return "vpmin";
+ case ARMneon_VPMINF: return "vpmin";
+ case ARMneon_VPMAXU: return "vpmax";
+ case ARMneon_VPMAXS: return "vpmax";
+ case ARMneon_VPMAXF: return "vpmax";
+ case ARMneon_VEXT: return "vext";
+ case ARMneon_VMUL: return "vmuli";
+ case ARMneon_VMULLU: return "vmull";
+ case ARMneon_VMULLS: return "vmull";
+ case ARMneon_VMULP: return "vmul";
+ case ARMneon_VMULFP: return "vmul";
+ case ARMneon_VMULLP: return "vmul";
+ case ARMneon_VQDMULH: return "vqdmulh";
+ case ARMneon_VQRDMULH: return "vqrdmulh";
+ case ARMneon_VQDMULL: return "vqdmull";
+ case ARMneon_VTBL: return "vtbl";
+ case ARMneon_VRECPS: return "vrecps";
+ case ARMneon_VRSQRTS: return "vrecps";
+ /* ... */
+ default: vpanic("showARMNeonBinOp");
+ }
+}
+
+HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
+ switch (op) {
+ case ARMneon_VAND:
+ case ARMneon_VORR:
+ case ARMneon_VXOR:
+ return "";
+ case ARMneon_VADD:
+ case ARMneon_VSUB:
+ case ARMneon_VEXT:
+ case ARMneon_VMUL:
+ case ARMneon_VPADD:
+ case ARMneon_VTBL:
+ case ARMneon_VCEQ:
+ return ".i";
+ case ARMneon_VRHADDU:
+ case ARMneon_VMINU:
+ case ARMneon_VMAXU:
+ case ARMneon_VQADDU:
+ case ARMneon_VQSUBU:
+ case ARMneon_VCGTU:
+ case ARMneon_VCGEU:
+ case ARMneon_VMULLU:
+ case ARMneon_VPMINU:
+ case ARMneon_VPMAXU:
+ return ".u";
+ case ARMneon_VRHADDS:
+ case ARMneon_VMINS:
+ case ARMneon_VMAXS:
+ case ARMneon_VQADDS:
+ case ARMneon_VQSUBS:
+ case ARMneon_VCGTS:
+ case ARMneon_VCGES:
+ case ARMneon_VQDMULL:
+ case ARMneon_VMULLS:
+ case ARMneon_VPMINS:
+ case ARMneon_VPMAXS:
+ case ARMneon_VQDMULH:
+ case ARMneon_VQRDMULH:
+ return ".s";
+ case ARMneon_VMULP:
+ case ARMneon_VMULLP:
+ return ".p";
+ case ARMneon_VADDFP:
+ case ARMneon_VABDFP:
+ case ARMneon_VPADDFP:
+ case ARMneon_VSUBFP:
+ case ARMneon_VMULFP:
+ case ARMneon_VMINF:
+ case ARMneon_VMAXF:
+ case ARMneon_VPMINF:
+ case ARMneon_VPMAXF:
+ case ARMneon_VCGTF:
+ case ARMneon_VCGEF:
+ case ARMneon_VCEQF:
+ case ARMneon_VRECPS:
+ case ARMneon_VRSQRTS:
+ return ".f";
+ /* ... */
+ default: vpanic("showARMNeonBinOpDataType");
+ }
+}
+
+HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
+ switch (op) {
+ case ARMneon_COPY: return "vmov";
+ case ARMneon_COPYLS: return "vmov";
+ case ARMneon_COPYLU: return "vmov";
+ case ARMneon_COPYN: return "vmov";
+ case ARMneon_COPYQNSS: return "vqmovn";
+ case ARMneon_COPYQNUS: return "vqmovun";
+ case ARMneon_COPYQNUU: return "vqmovn";
+ case ARMneon_NOT: return "vmvn";
+ case ARMneon_EQZ: return "vceq";
+ case ARMneon_CNT: return "vcnt";
+ case ARMneon_CLS: return "vcls";
+ case ARMneon_CLZ: return "vclz";
+ case ARMneon_DUP: return "vdup";
+ case ARMneon_PADDLS: return "vpaddl";
+ case ARMneon_PADDLU: return "vpaddl";
+ case ARMneon_VQSHLNSS: return "vqshl";
+ case ARMneon_VQSHLNUU: return "vqshl";
+ case ARMneon_VQSHLNUS: return "vqshlu";
+ case ARMneon_REV16: return "vrev16";
+ case ARMneon_REV32: return "vrev32";
+ case ARMneon_REV64: return "vrev64";
+ case ARMneon_VCVTFtoU: return "vcvt";
+ case ARMneon_VCVTFtoS: return "vcvt";
+ case ARMneon_VCVTUtoF: return "vcvt";
+ case ARMneon_VCVTStoF: return "vcvt";
+ case ARMneon_VCVTFtoFixedU: return "vcvt";
+ case ARMneon_VCVTFtoFixedS: return "vcvt";
+ case ARMneon_VCVTFixedUtoF: return "vcvt";
+ case ARMneon_VCVTFixedStoF: return "vcvt";
+ case ARMneon_VCVTF32toF16: return "vcvt";
+ case ARMneon_VCVTF16toF32: return "vcvt";
+ case ARMneon_VRECIP: return "vrecip";
+ case ARMneon_VRECIPF: return "vrecipf";
+ case ARMneon_VNEGF: return "vneg";
+ case ARMneon_ABS: return "vabs";
+ case ARMneon_VABSFP: return "vabsfp";
+ case ARMneon_VRSQRTEFP: return "vrsqrtefp";
+ case ARMneon_VRSQRTE: return "vrsqrte";
+ /* ... */
+ default: vpanic("showARMNeonUnOp");
+ }
+}
+
+HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
+ switch (op) {
+ case ARMneon_COPY:
+ case ARMneon_NOT:
+ return "";
+ case ARMneon_COPYN:
+ case ARMneon_EQZ:
+ case ARMneon_CNT:
+ case ARMneon_DUP:
+ case ARMneon_REV16:
+ case ARMneon_REV32:
+ case ARMneon_REV64:
+ return ".i";
+ case ARMneon_COPYLU:
+ case ARMneon_PADDLU:
+ case ARMneon_COPYQNUU:
+ case ARMneon_VQSHLNUU:
+ case ARMneon_VRECIP:
+ case ARMneon_VRSQRTE:
+ return ".u";
+ case ARMneon_CLS:
+ case ARMneon_CLZ:
+ case ARMneon_COPYLS:
+ case ARMneon_PADDLS:
+ case ARMneon_COPYQNSS:
+ case ARMneon_COPYQNUS:
+ case ARMneon_VQSHLNSS:
+ case ARMneon_VQSHLNUS:
+ case ARMneon_ABS:
+ return ".s";
+ case ARMneon_VRECIPF:
+ case ARMneon_VNEGF:
+ case ARMneon_VABSFP:
+ case ARMneon_VRSQRTEFP:
+ return ".f";
+ case ARMneon_VCVTFtoU: return ".u32.f32";
+ case ARMneon_VCVTFtoS: return ".s32.f32";
+ case ARMneon_VCVTUtoF: return ".f32.u32";
+ case ARMneon_VCVTStoF: return ".f32.s32";
+ case ARMneon_VCVTF16toF32: return ".f32.f16";
+ case ARMneon_VCVTF32toF16: return ".f16.f32";
+ case ARMneon_VCVTFtoFixedU: return ".u32.f32";
+ case ARMneon_VCVTFtoFixedS: return ".s32.f32";
+ case ARMneon_VCVTFixedUtoF: return ".f32.u32";
+ case ARMneon_VCVTFixedStoF: return ".f32.s32";
+ /* ... */
+ default: vpanic("showARMNeonUnOpDataType");
+ }
+}
+
+HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
+ switch (op) {
+ case ARMneon_SETELEM: return "vmov";
+ case ARMneon_GETELEMU: return "vmov";
+ case ARMneon_GETELEMS: return "vmov";
+ case ARMneon_VDUP: return "vdup";
+ /* ... */
+ default: vpanic("showARMNeonUnarySOp");
+ }
+}
+
+HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
+ switch (op) {
+ case ARMneon_SETELEM:
+ case ARMneon_VDUP:
+ return ".i";
+ case ARMneon_GETELEMS:
+ return ".s";
+ case ARMneon_GETELEMU:
+ return ".u";
+ /* ... */
+ default: vpanic("showARMNeonUnarySOp");
+ }
+}
+
+HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
+ switch (op) {
+ case ARMneon_VSHL: return "vshl";
+ case ARMneon_VSAL: return "vshl";
+ case ARMneon_VQSHL: return "vqshl";
+ case ARMneon_VQSAL: return "vqshl";
+ /* ... */
+ default: vpanic("showARMNeonShiftOp");
+ }
+}
+
+HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
+ switch (op) {
+ case ARMneon_VSHL:
+ case ARMneon_VQSHL:
+ return ".u";
+ case ARMneon_VSAL:
+ case ARMneon_VQSAL:
+ return ".s";
+ /* ... */
+ default: vpanic("showARMNeonShiftOpDataType");
+ }
+}
+
+HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
+ switch (op) {
+ case ARMneon_TRN: return "vtrn";
+ case ARMneon_ZIP: return "vzip";
+ case ARMneon_UZP: return "vuzp";
+ /* ... */
+ default: vpanic("showARMNeonDualOp");
+ }
+}
+
+HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
+ switch (op) {
+ case ARMneon_TRN:
+ case ARMneon_ZIP:
+ case ARMneon_UZP:
+ return "i";
+ /* ... */
+ default: vpanic("showARMNeonDualOp");
+ }
+}
+
+static HChar* showARMNeonDataSize_wrk ( UInt size )
+{
+ switch (size) {
+ case 0: return "8";
+ case 1: return "16";
+ case 2: return "32";
+ case 3: return "64";
+ default: vpanic("showARMNeonDataSize");
+ }
+}
+
+static HChar* showARMNeonDataSize ( ARMInstr* i )
+{
+ switch (i->tag) {
+ case ARMin_NBinary:
+ if (i->ARMin.NBinary.op == ARMneon_VEXT)
+ return "8";
+ if (i->ARMin.NBinary.op == ARMneon_VAND ||
+ i->ARMin.NBinary.op == ARMneon_VORR ||
+ i->ARMin.NBinary.op == ARMneon_VXOR)
+ return "";
+ return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
+ case ARMin_NUnary:
+ if (i->ARMin.NUnary.op == ARMneon_COPY ||
+ i->ARMin.NUnary.op == ARMneon_NOT ||
+ i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
+ i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
+ i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
+ i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
+ i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
+ i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
+ i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
+ i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
+ i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
+ i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
+ return "";
+ if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
+ i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
+ i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
+ UInt size;
+ size = i->ARMin.NUnary.size;
+ if (size & 0x40)
+ return "64";
+ if (size & 0x20)
+ return "32";
+ if (size & 0x10)
+ return "16";
+ if (size & 0x08)
+ return "8";
+ vpanic("showARMNeonDataSize");
+ }
+ return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
+ case ARMin_NUnaryS:
+ if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
+ int size;
+ size = i->ARMin.NUnaryS.size;
+ if ((size & 1) == 1)
+ return "8";
+ if ((size & 3) == 2)
+ return "16";
+ if ((size & 7) == 4)
+ return "32";
+ vpanic("showARMNeonDataSize");
+ }
+ return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
+ case ARMin_NShift:
+ return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
+ case ARMin_NDual:
+ return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
+ default:
+ vpanic("showARMNeonDataSize");
+ }
+}
+
+ARMInstr* ARMInstr_Alu ( ARMAluOp op,
+ HReg dst, HReg argL, ARMRI84* argR ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_Alu;
+ i->ARMin.Alu.op = op;
+ i->ARMin.Alu.dst = dst;
+ i->ARMin.Alu.argL = argL;
+ i->ARMin.Alu.argR = argR;
+ return i;
+}
+ARMInstr* ARMInstr_Shift ( ARMShiftOp op,
+ HReg dst, HReg argL, ARMRI5* argR ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_Shift;
+ i->ARMin.Shift.op = op;
+ i->ARMin.Shift.dst = dst;
+ i->ARMin.Shift.argL = argL;
+ i->ARMin.Shift.argR = argR;
+ return i;
+}
+ARMInstr* ARMInstr_Unary ( ARMUnaryOp op, HReg dst, HReg src ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_Unary;
+ i->ARMin.Unary.op = op;
+ i->ARMin.Unary.dst = dst;
+ i->ARMin.Unary.src = src;
+ return i;
+}
+ARMInstr* ARMInstr_CmpOrTst ( Bool isCmp, HReg argL, ARMRI84* argR ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_CmpOrTst;
+ i->ARMin.CmpOrTst.isCmp = isCmp;
+ i->ARMin.CmpOrTst.argL = argL;
+ i->ARMin.CmpOrTst.argR = argR;
+ return i;
+}
+ARMInstr* ARMInstr_Mov ( HReg dst, ARMRI84* src ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_Mov;
+ i->ARMin.Mov.dst = dst;
+ i->ARMin.Mov.src = src;
+ return i;
+}
+ARMInstr* ARMInstr_Imm32 ( HReg dst, UInt imm32 ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_Imm32;
+ i->ARMin.Imm32.dst = dst;
+ i->ARMin.Imm32.imm32 = imm32;
+ return i;
+}
+ARMInstr* ARMInstr_LdSt32 ( Bool isLoad, HReg rD, ARMAMode1* amode ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_LdSt32;
+ i->ARMin.LdSt32.isLoad = isLoad;
+ i->ARMin.LdSt32.rD = rD;
+ i->ARMin.LdSt32.amode = amode;
+ return i;
+}
+ARMInstr* ARMInstr_LdSt16 ( Bool isLoad, Bool signedLoad,
+ HReg rD, ARMAMode2* amode ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_LdSt16;
+ i->ARMin.LdSt16.isLoad = isLoad;
+ i->ARMin.LdSt16.signedLoad = signedLoad;
+ i->ARMin.LdSt16.rD = rD;
+ i->ARMin.LdSt16.amode = amode;
+ return i;
+}
+ARMInstr* ARMInstr_LdSt8U ( Bool isLoad, HReg rD, ARMAMode1* amode ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_LdSt8U;
+ i->ARMin.LdSt8U.isLoad = isLoad;
+ i->ARMin.LdSt8U.rD = rD;
+ i->ARMin.LdSt8U.amode = amode;
+ return i;
+}
+//extern ARMInstr* ARMInstr_Ld8S ( HReg, ARMAMode2* );
+ARMInstr* ARMInstr_Goto ( IRJumpKind jk, ARMCondCode cond, HReg gnext ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_Goto;
+ i->ARMin.Goto.jk = jk;
+ i->ARMin.Goto.cond = cond;
+ i->ARMin.Goto.gnext = gnext;
+ return i;
+}
+ARMInstr* ARMInstr_CMov ( ARMCondCode cond, HReg dst, ARMRI84* src ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_CMov;
+ i->ARMin.CMov.cond = cond;
+ i->ARMin.CMov.dst = dst;
+ i->ARMin.CMov.src = src;
+ vassert(cond != ARMcc_AL);
+ return i;
+}
+ARMInstr* ARMInstr_Call ( ARMCondCode cond, HWord target, Int nArgRegs ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_Call;
+ i->ARMin.Call.cond = cond;
+ i->ARMin.Call.target = target;
+ i->ARMin.Call.nArgRegs = nArgRegs;
+ return i;
+}
+ARMInstr* ARMInstr_Mul ( ARMMulOp op ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_Mul;
+ i->ARMin.Mul.op = op;
+ return i;
+}
+ARMInstr* ARMInstr_LdrEX ( Int szB ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_LdrEX;
+ i->ARMin.LdrEX.szB = szB;
+ vassert(szB == 4 || szB == 1);
+ return i;
+}
+ARMInstr* ARMInstr_StrEX ( Int szB ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_StrEX;
+ i->ARMin.StrEX.szB = szB;
+ vassert(szB == 4 || szB == 1);
+ return i;
+}
+ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg dD, ARMAModeV* am ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_VLdStD;
+ i->ARMin.VLdStD.isLoad = isLoad;
+ i->ARMin.VLdStD.dD = dD;
+ i->ARMin.VLdStD.amode = am;
+ return i;
+}
+ARMInstr* ARMInstr_VLdStS ( Bool isLoad, HReg fD, ARMAModeV* am ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_VLdStS;
+ i->ARMin.VLdStS.isLoad = isLoad;
+ i->ARMin.VLdStS.fD = fD;
+ i->ARMin.VLdStS.amode = am;
+ return i;
+}
+ARMInstr* ARMInstr_VAluD ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_VAluD;
+ i->ARMin.VAluD.op = op;
+ i->ARMin.VAluD.dst = dst;
+ i->ARMin.VAluD.argL = argL;
+ i->ARMin.VAluD.argR = argR;
+ return i;
+}
+ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_VAluS;
+ i->ARMin.VAluS.op = op;
+ i->ARMin.VAluS.dst = dst;
+ i->ARMin.VAluS.argL = argL;
+ i->ARMin.VAluS.argR = argR;
+ return i;
+}
+ARMInstr* ARMInstr_VUnaryD ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_VUnaryD;
+ i->ARMin.VUnaryD.op = op;
+ i->ARMin.VUnaryD.dst = dst;
+ i->ARMin.VUnaryD.src = src;
+ return i;
+}
+ARMInstr* ARMInstr_VUnaryS ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_VUnaryS;
+ i->ARMin.VUnaryS.op = op;
+ i->ARMin.VUnaryS.dst = dst;
+ i->ARMin.VUnaryS.src = src;
+ return i;
+}
+ARMInstr* ARMInstr_VCmpD ( HReg argL, HReg argR ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_VCmpD;
+ i->ARMin.VCmpD.argL = argL;
+ i->ARMin.VCmpD.argR = argR;
+ return i;
+}
+ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_VCMovD;
+ i->ARMin.VCMovD.cond = cond;
+ i->ARMin.VCMovD.dst = dst;
+ i->ARMin.VCMovD.src = src;
+ vassert(cond != ARMcc_AL);
+ return i;
+}
+ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_VCMovS;
+ i->ARMin.VCMovS.cond = cond;
+ i->ARMin.VCMovS.dst = dst;
+ i->ARMin.VCMovS.src = src;
+ vassert(cond != ARMcc_AL);
+ return i;
+}
+ARMInstr* ARMInstr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_VCvtSD;
+ i->ARMin.VCvtSD.sToD = sToD;
+ i->ARMin.VCvtSD.dst = dst;
+ i->ARMin.VCvtSD.src = src;
+ return i;
+}
+ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_VXferD;
+ i->ARMin.VXferD.toD = toD;
+ i->ARMin.VXferD.dD = dD;
+ i->ARMin.VXferD.rHi = rHi;
+ i->ARMin.VXferD.rLo = rLo;
+ return i;
+}
+ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_VXferS;
+ i->ARMin.VXferS.toS = toS;
+ i->ARMin.VXferS.fD = fD;
+ i->ARMin.VXferS.rLo = rLo;
+ return i;
+}
+ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
+ HReg dst, HReg src ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_VCvtID;
+ i->ARMin.VCvtID.iToD = iToD;
+ i->ARMin.VCvtID.syned = syned;
+ i->ARMin.VCvtID.dst = dst;
+ i->ARMin.VCvtID.src = src;
+ return i;
+}
+ARMInstr* ARMInstr_FPSCR ( Bool toFPSCR, HReg iReg ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_FPSCR;
+ i->ARMin.FPSCR.toFPSCR = toFPSCR;
+ i->ARMin.FPSCR.iReg = iReg;
+ return i;
+}
+ARMInstr* ARMInstr_MFence ( void ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_MFence;
+ return i;
+}
+
+ARMInstr* ARMInstr_NLdStQ ( Bool isLoad, HReg dQ, ARMAModeN *amode ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_NLdStQ;
+ i->ARMin.NLdStQ.isLoad = isLoad;
+ i->ARMin.NLdStQ.dQ = dQ;
+ i->ARMin.NLdStQ.amode = amode;
+ return i;
+}
+
+ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_NLdStD;
+ i->ARMin.NLdStD.isLoad = isLoad;
+ i->ARMin.NLdStD.dD = dD;
+ i->ARMin.NLdStD.amode = amode;
+ return i;
+}
+
+ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
+ UInt size, Bool Q ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_NUnary;
+ i->ARMin.NUnary.op = op;
+ i->ARMin.NUnary.src = nQ;
+ i->ARMin.NUnary.dst = dQ;
+ i->ARMin.NUnary.size = size;
+ i->ARMin.NUnary.Q = Q;
+ return i;
+}
+
+ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOp op, ARMNRS* dst, ARMNRS* src,
+ UInt size, Bool Q ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_NUnaryS;
+ i->ARMin.NUnaryS.op = op;
+ i->ARMin.NUnaryS.src = src;
+ i->ARMin.NUnaryS.dst = dst;
+ i->ARMin.NUnaryS.size = size;
+ i->ARMin.NUnaryS.Q = Q;
+ return i;
+}
+
+ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
+ UInt size, Bool Q ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_NDual;
+ i->ARMin.NDual.op = op;
+ i->ARMin.NDual.arg1 = nQ;
+ i->ARMin.NDual.arg2 = mQ;
+ i->ARMin.NDual.size = size;
+ i->ARMin.NDual.Q = Q;
+ return i;
+}
+
+ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
+ HReg dst, HReg argL, HReg argR,
+ UInt size, Bool Q ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_NBinary;
+ i->ARMin.NBinary.op = op;
+ i->ARMin.NBinary.argL = argL;
+ i->ARMin.NBinary.argR = argR;
+ i->ARMin.NBinary.dst = dst;
+ i->ARMin.NBinary.size = size;
+ i->ARMin.NBinary.Q = Q;
+ return i;
+}
+
+ARMInstr* ARMInstr_NeonImm (HReg dst, ARMNImm* imm ) {
+ ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_NeonImm;
+ i->ARMin.NeonImm.dst = dst;
+ i->ARMin.NeonImm.imm = imm;
+ return i;
+}
+
+ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_NCMovQ;
+ i->ARMin.NCMovQ.cond = cond;
+ i->ARMin.NCMovQ.dst = dst;
+ i->ARMin.NCMovQ.src = src;
+ vassert(cond != ARMcc_AL);
+ return i;
+}
+
+ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
+ HReg dst, HReg argL, HReg argR,
+ UInt size, Bool Q ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_NShift;
+ i->ARMin.NShift.op = op;
+ i->ARMin.NShift.argL = argL;
+ i->ARMin.NShift.argR = argR;
+ i->ARMin.NShift.dst = dst;
+ i->ARMin.NShift.size = size;
+ i->ARMin.NShift.Q = Q;
+ return i;
+}
+
+/* Helper copy-pasted from isel.c */
+static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
+{
+ UInt i;
+ for (i = 0; i < 16; i++) {
+ if (0 == (u & 0xFFFFFF00)) {
+ *u8 = u;
+ *u4 = i;
+ return True;
+ }
+ u = ROR32(u, 30);
+ }
+ vassert(i == 16);
+ return False;
+}
+
+ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
+ UInt u8, u4;
+ ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
+ /* Try to generate single ADD if possible */
+ if (fitsIn8x4(&u8, &u4, imm32)) {
+ i->tag = ARMin_Alu;
+ i->ARMin.Alu.op = ARMalu_ADD;
+ i->ARMin.Alu.dst = rD;
+ i->ARMin.Alu.argL = rN;
+ i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
+ } else {
+ i->tag = ARMin_Add32;
+ i->ARMin.Add32.rD = rD;
+ i->ARMin.Add32.rN = rN;
+ i->ARMin.Add32.imm32 = imm32;
+ }
+ return i;
+}
+
+/* ... */
+
+void ppARMInstr ( ARMInstr* i ) {
+ switch (i->tag) {
+ case ARMin_Alu:
+ vex_printf("%-4s ", showARMAluOp(i->ARMin.Alu.op));
+ ppHRegARM(i->ARMin.Alu.dst);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.Alu.argL);
+ vex_printf(", ");
+ ppARMRI84(i->ARMin.Alu.argR);
+ return;
+ case ARMin_Shift:
+ vex_printf("%s ", showARMShiftOp(i->ARMin.Shift.op));
+ ppHRegARM(i->ARMin.Shift.dst);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.Shift.argL);
+ vex_printf(", ");
+ ppARMRI5(i->ARMin.Shift.argR);
+ return;
+ case ARMin_Unary:
+ vex_printf("%s ", showARMUnaryOp(i->ARMin.Unary.op));
+ ppHRegARM(i->ARMin.Unary.dst);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.Unary.src);
+ return;
+ case ARMin_CmpOrTst:
+ vex_printf("%s ", i->ARMin.CmpOrTst.isCmp ? "cmp" : "tst");
+ ppHRegARM(i->ARMin.CmpOrTst.argL);
+ vex_printf(", ");
+ ppARMRI84(i->ARMin.CmpOrTst.argR);
+ return;
+ case ARMin_Mov:
+ vex_printf("mov ");
+ ppHRegARM(i->ARMin.Mov.dst);
+ vex_printf(", ");
+ ppARMRI84(i->ARMin.Mov.src);
+ return;
+ case ARMin_Imm32:
+ vex_printf("imm ");
+ ppHRegARM(i->ARMin.Imm32.dst);
+ vex_printf(", 0x%x", i->ARMin.Imm32.imm32);
+ return;
+ case ARMin_LdSt32:
+ if (i->ARMin.LdSt32.isLoad) {
+ vex_printf("ldr ");
+ ppHRegARM(i->ARMin.LdSt32.rD);
+ vex_printf(", ");
+ ppARMAMode1(i->ARMin.LdSt32.amode);
+ } else {
+ vex_printf("str ");
+ ppARMAMode1(i->ARMin.LdSt32.amode);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.LdSt32.rD);
+ }
+ return;
+ case ARMin_LdSt16:
+ if (i->ARMin.LdSt16.isLoad) {
+ vex_printf("%s", i->ARMin.LdSt16.signedLoad
+ ? "ldrsh " : "ldrh " );
+ ppHRegARM(i->ARMin.LdSt16.rD);
+ vex_printf(", ");
+ ppARMAMode2(i->ARMin.LdSt16.amode);
+ } else {
+ vex_printf("strh ");
+ ppARMAMode2(i->ARMin.LdSt16.amode);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.LdSt16.rD);
+ }
+ return;
+ case ARMin_LdSt8U:
+ if (i->ARMin.LdSt8U.isLoad) {
+ vex_printf("ldrb ");
+ ppHRegARM(i->ARMin.LdSt8U.rD);
+ vex_printf(", ");
+ ppARMAMode1(i->ARMin.LdSt8U.amode);
+ } else {
+ vex_printf("strb ");
+ ppARMAMode1(i->ARMin.LdSt8U.amode);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.LdSt8U.rD);
+ }
+ return;
+ case ARMin_Ld8S:
+ goto unhandled;
+ case ARMin_Goto:
+ if (i->ARMin.Goto.cond != ARMcc_AL) {
+ vex_printf("if (%%cpsr.%s) { ",
+ showARMCondCode(i->ARMin.Goto.cond));
+ } else {
+ vex_printf("if (1) { ");
+ }
+ if (i->ARMin.Goto.jk != Ijk_Boring
+ && i->ARMin.Goto.jk != Ijk_Call
+ && i->ARMin.Goto.jk != Ijk_Ret) {
+ vex_printf("mov r8, $");
+ ppIRJumpKind(i->ARMin.Goto.jk);
+ vex_printf(" ; ");
+ }
+ vex_printf("mov r0, ");
+ ppHRegARM(i->ARMin.Goto.gnext);
+ vex_printf(" ; bx r14");
+ if (i->ARMin.Goto.cond != ARMcc_AL) {
+ vex_printf(" }");
+ } else {
+ vex_printf(" }");
+ }
+ return;
+ case ARMin_CMov:
+ vex_printf("mov%s ", showARMCondCode(i->ARMin.CMov.cond));
+ ppHRegARM(i->ARMin.CMov.dst);
+ vex_printf(", ");
+ ppARMRI84(i->ARMin.CMov.src);
+ return;
+ case ARMin_Call:
+ vex_printf("call%s ",
+ i->ARMin.Call.cond==ARMcc_AL
+ ? "" : showARMCondCode(i->ARMin.Call.cond));
+ vex_printf("0x%lx [nArgRegs=%d]",
+ i->ARMin.Call.target, i->ARMin.Call.nArgRegs);
+ return;
+ case ARMin_Mul:
+ vex_printf("%-5s ", showARMMulOp(i->ARMin.Mul.op));
+ if (i->ARMin.Mul.op == ARMmul_PLAIN) {
+ vex_printf("r0, r2, r3");
+ } else {
+ vex_printf("r1:r0, r2, r3");
+ }
+ return;
+ case ARMin_LdrEX:
+ vex_printf("ldrex%s ", i->ARMin.LdrEX.szB == 1 ? "b"
+ : i->ARMin.LdrEX.szB == 2 ? "h" : "");
+ vex_printf("r0, [r1]");
+ return;
+ case ARMin_StrEX:
+ vex_printf("strex%s ", i->ARMin.StrEX.szB == 1 ? "b"
+ : i->ARMin.StrEX.szB == 2 ? "h" : "");
+ vex_printf("r0, r1, [r2]");
+ return;
+ case ARMin_VLdStD:
+ if (i->ARMin.VLdStD.isLoad) {
+ vex_printf("fldd ");
+ ppHRegARM(i->ARMin.VLdStD.dD);
+ vex_printf(", ");
+ ppARMAModeV(i->ARMin.VLdStD.amode);
+ } else {
+ vex_printf("fstd ");
+ ppARMAModeV(i->ARMin.VLdStD.amode);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VLdStD.dD);
+ }
+ return;
+ case ARMin_VLdStS:
+ if (i->ARMin.VLdStS.isLoad) {
+ vex_printf("flds ");
+ ppHRegARM(i->ARMin.VLdStS.fD);
+ vex_printf(", ");
+ ppARMAModeV(i->ARMin.VLdStS.amode);
+ } else {
+ vex_printf("fsts ");
+ ppARMAModeV(i->ARMin.VLdStS.amode);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VLdStS.fD);
+ }
+ return;
+ case ARMin_VAluD:
+ vex_printf("f%-3sd ", showARMVfpOp(i->ARMin.VAluD.op));
+ ppHRegARM(i->ARMin.VAluD.dst);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VAluD.argL);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VAluD.argR);
+ return;
+ case ARMin_VAluS:
+ vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
+ ppHRegARM(i->ARMin.VAluS.dst);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VAluS.argL);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VAluS.argR);
+ return;
+ case ARMin_VUnaryD:
+ vex_printf("f%-3sd ", showARMVfpUnaryOp(i->ARMin.VUnaryD.op));
+ ppHRegARM(i->ARMin.VUnaryD.dst);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VUnaryD.src);
+ return;
+ case ARMin_VUnaryS:
+ vex_printf("f%-3ss ", showARMVfpUnaryOp(i->ARMin.VUnaryS.op));
+ ppHRegARM(i->ARMin.VUnaryS.dst);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VUnaryS.src);
+ return;
+ case ARMin_VCmpD:
+ vex_printf("fcmpd ");
+ ppHRegARM(i->ARMin.VCmpD.argL);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VCmpD.argR);
+ vex_printf(" ; fmstat");
+ return;
+ case ARMin_VCMovD:
+ vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond));
+ ppHRegARM(i->ARMin.VCMovD.dst);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VCMovD.src);
+ return;
+ case ARMin_VCMovS:
+ vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond));
+ ppHRegARM(i->ARMin.VCMovS.dst);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VCMovS.src);
+ return;
+ case ARMin_VCvtSD:
+ vex_printf("fcvt%s ", i->ARMin.VCvtSD.sToD ? "ds" : "sd");
+ ppHRegARM(i->ARMin.VCvtSD.dst);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VCvtSD.src);
+ return;
+ case ARMin_VXferD:
+ vex_printf("vmov ");
+ if (i->ARMin.VXferD.toD) {
+ ppHRegARM(i->ARMin.VXferD.dD);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VXferD.rLo);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VXferD.rHi);
+ } else {
+ ppHRegARM(i->ARMin.VXferD.rLo);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VXferD.rHi);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VXferD.dD);
+ }
+ return;
+ case ARMin_VXferS:
+ vex_printf("vmov ");
+ if (i->ARMin.VXferS.toS) {
+ ppHRegARM(i->ARMin.VXferS.fD);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VXferS.rLo);
+ } else {
+ ppHRegARM(i->ARMin.VXferS.rLo);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VXferS.fD);
+ }
+ return;
+ case ARMin_VCvtID: {
+ HChar* nm = "?";
+ if (i->ARMin.VCvtID.iToD) {
+ nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod";
+ } else {
+ nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid";
+ }
+ vex_printf("%s ", nm);
+ ppHRegARM(i->ARMin.VCvtID.dst);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VCvtID.src);
+ return;
+ }
+ case ARMin_FPSCR:
+ if (i->ARMin.FPSCR.toFPSCR) {
+ vex_printf("fmxr fpscr, ");
+ ppHRegARM(i->ARMin.FPSCR.iReg);
+ } else {
+ vex_printf("fmrx ");
+ ppHRegARM(i->ARMin.FPSCR.iReg);
+ vex_printf(", fpscr");
+ }
+ return;
+ case ARMin_MFence:
+ vex_printf("mfence (mcr 15,0,r0,c7,c10,4; 15,0,r0,c7,c10,5; "
+ "15,0,r0,c7,c5,4)");
+ return;
+ case ARMin_NLdStQ:
+ if (i->ARMin.NLdStQ.isLoad)
+ vex_printf("vld1.32 {");
+ else
+ vex_printf("vst1.32 {");
+ ppHRegARM(i->ARMin.NLdStQ.dQ);
+ vex_printf("} ");
+ ppARMAModeN(i->ARMin.NLdStQ.amode);
+ return;
+ case ARMin_NLdStD:
+ if (i->ARMin.NLdStD.isLoad)
+ vex_printf("vld1.32 {");
+ else
+ vex_printf("vst1.32 {");
+ ppHRegARM(i->ARMin.NLdStD.dD);
+ vex_printf("} ");
+ ppARMAModeN(i->ARMin.NLdStD.amode);
+ return;
+ case ARMin_NUnary:
+ vex_printf("%s%s%s ",
+ showARMNeonUnOp(i->ARMin.NUnary.op),
+ showARMNeonUnOpDataType(i->ARMin.NUnary.op),
+ showARMNeonDataSize(i));
+ ppHRegARM(i->ARMin.NUnary.dst);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.NUnary.src);
+ if (i->ARMin.NUnary.op == ARMneon_EQZ)
+ vex_printf(", #0");
+ if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
+ i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
+ i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
+ i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
+ vex_printf(", #%d", i->ARMin.NUnary.size);
+ }
+ if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
+ i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
+ i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
+ UInt size;
+ size = i->ARMin.NUnary.size;
+ if (size & 0x40) {
+ vex_printf(", #%d", size - 64);
+ } else if (size & 0x20) {
+ vex_printf(", #%d", size - 32);
+ } else if (size & 0x10) {
+ vex_printf(", #%d", size - 16);
+ } else if (size & 0x08) {
+ vex_printf(", #%d", size - 8);
+ }
+ }
+ return;
+ case ARMin_NUnaryS:
+ vex_printf("%s%s%s ",
+ showARMNeonUnOpS(i->ARMin.NUnary.op),
+ showARMNeonUnOpSDataType(i->ARMin.NUnary.op),
+ showARMNeonDataSize(i));
+ ppARMNRS(i->ARMin.NUnaryS.dst);
+ vex_printf(", ");
+ ppARMNRS(i->ARMin.NUnaryS.src);
+ return;
+ case ARMin_NShift:
+ vex_printf("%s%s%s ",
+ showARMNeonShiftOp(i->ARMin.NShift.op),
+ showARMNeonShiftOpDataType(i->ARMin.NShift.op),
+ showARMNeonDataSize(i));
+ ppHRegARM(i->ARMin.NShift.dst);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.NShift.argL);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.NShift.argR);
+ return;
+ case ARMin_NDual:
+ vex_printf("%s%s%s ",
+ showARMNeonDualOp(i->ARMin.NDual.op),
+ showARMNeonDualOpDataType(i->ARMin.NDual.op),
+ showARMNeonDataSize(i));
+ ppHRegARM(i->ARMin.NDual.arg1);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.NDual.arg2);
+ return;
+ case ARMin_NBinary:
+ vex_printf("%s%s%s",
+ showARMNeonBinOp(i->ARMin.NBinary.op),
+ showARMNeonBinOpDataType(i->ARMin.NBinary.op),
+ showARMNeonDataSize(i));
+ vex_printf(" ");
+ ppHRegARM(i->ARMin.NBinary.dst);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.NBinary.argL);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.NBinary.argR);
+ return;
+ case ARMin_NeonImm:
+ vex_printf("vmov ");
+ ppHRegARM(i->ARMin.NeonImm.dst);
+ vex_printf(", ");
+ ppARMNImm(i->ARMin.NeonImm.imm);
+ return;
+ case ARMin_NCMovQ:
+ vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
+ ppHRegARM(i->ARMin.NCMovQ.dst);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.NCMovQ.src);
+ return;
+ case ARMin_Add32:
+ vex_printf("add32 ");
+ ppHRegARM(i->ARMin.Add32.rD);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.Add32.rN);
+ vex_printf(", ");
+ vex_printf("%d", i->ARMin.Add32.imm32);
+ return;
+ default:
+ unhandled:
+ vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
+ vpanic("ppARMInstr(1)");
+ return;
+ }
+}
+
+
+/* --------- Helpers for register allocation. --------- */
+
+void getRegUsage_ARMInstr ( HRegUsage* u, ARMInstr* i, Bool mode64 )
+{
+ vassert(mode64 == False);
+ initHRegUsage(u);
+ switch (i->tag) {
+ case ARMin_Alu:
+ addHRegUse(u, HRmWrite, i->ARMin.Alu.dst);
+ addHRegUse(u, HRmRead, i->ARMin.Alu.argL);
+ addRegUsage_ARMRI84(u, i->ARMin.Alu.argR);
+ return;
+ case ARMin_Shift:
+ addHRegUse(u, HRmWrite, i->ARMin.Shift.dst);
+ addHRegUse(u, HRmRead, i->ARMin.Shift.argL);
+ addRegUsage_ARMRI5(u, i->ARMin.Shift.argR);
+ return;
+ case ARMin_Unary:
+ addHRegUse(u, HRmWrite, i->ARMin.Unary.dst);
+ addHRegUse(u, HRmRead, i->ARMin.Unary.src);
+ return;
+ case ARMin_CmpOrTst:
+ addHRegUse(u, HRmRead, i->ARMin.CmpOrTst.argL);
+ addRegUsage_ARMRI84(u, i->ARMin.CmpOrTst.argR);
+ return;
+ case ARMin_Mov:
+ addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
+ addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
+ return;
+ case ARMin_Imm32:
+ addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
+ return;
+ case ARMin_LdSt32:
+ addRegUsage_ARMAMode1(u, i->ARMin.LdSt32.amode);
+ if (i->ARMin.LdSt32.isLoad) {
+ addHRegUse(u, HRmWrite, i->ARMin.LdSt32.rD);
+ } else {
+ addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
+ }
+ return;
+ case ARMin_LdSt16:
+ addRegUsage_ARMAMode2(u, i->ARMin.LdSt16.amode);
+ if (i->ARMin.LdSt16.isLoad) {
+ addHRegUse(u, HRmWrite, i->ARMin.LdSt16.rD);
+ } else {
+ addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
+ }
+ return;
+ case ARMin_LdSt8U:
+ addRegUsage_ARMAMode1(u, i->ARMin.LdSt8U.amode);
+ if (i->ARMin.LdSt8U.isLoad) {
+ addHRegUse(u, HRmWrite, i->ARMin.LdSt8U.rD);
+ } else {
+ addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
+ }
+ return;
+ case ARMin_Ld8S:
+ goto unhandled;
+ case ARMin_Goto:
+ /* reads the reg holding the next guest addr */
+ addHRegUse(u, HRmRead, i->ARMin.Goto.gnext);
+ /* writes it to the standard integer return register */
+ addHRegUse(u, HRmWrite, hregARM_R0());
+ /* possibly messes with the baseblock pointer */
+ if (i->ARMin.Goto.jk != Ijk_Boring
+ && i->ARMin.Goto.jk != Ijk_Call
+ && i->ARMin.Goto.jk != Ijk_Ret)
+ /* note, this is irrelevant since r8 is not actually
+ available to the allocator. But still .. */
+ addHRegUse(u, HRmWrite, hregARM_R8());
+ return;
+ case ARMin_CMov:
+ addHRegUse(u, HRmWrite, i->ARMin.CMov.dst);
+ addHRegUse(u, HRmRead, i->ARMin.CMov.dst);
+ addRegUsage_ARMRI84(u, i->ARMin.CMov.src);
+ return;
+ case ARMin_Call:
+ /* logic and comments copied/modified from x86 back end */
+ /* This is a bit subtle. */
+ /* First off, claim it trashes all the caller-saved regs
+ which fall within the register allocator's jurisdiction.
+ These I believe to be r0,1,2,3. If it turns out that r9
+ is also caller-saved, then we'll have to add that here
+ too. */
+ addHRegUse(u, HRmWrite, hregARM_R0());
+ addHRegUse(u, HRmWrite, hregARM_R1());
+ addHRegUse(u, HRmWrite, hregARM_R2());
+ addHRegUse(u, HRmWrite, hregARM_R3());
+ /* Now we have to state any parameter-carrying registers
+ which might be read. This depends on nArgRegs. */
+ switch (i->ARMin.Call.nArgRegs) {
+ case 4: addHRegUse(u, HRmRead, hregARM_R3()); /*fallthru*/
+ case 3: addHRegUse(u, HRmRead, hregARM_R2()); /*fallthru*/
+ case 2: addHRegUse(u, HRmRead, hregARM_R1()); /*fallthru*/
+ case 1: addHRegUse(u, HRmRead, hregARM_R0()); break;
+ case 0: break;
+ default: vpanic("getRegUsage_ARM:Call:regparms");
+ }
+ /* Finally, there is the issue that the insn trashes a
+ register because the literal target address has to be
+ loaded into a register. Fortunately, for the nArgRegs=
+ 0/1/2/3 case, we can use r0, r1, r2 or r3 respectively, so
+ this does not cause any further damage. For the
+ nArgRegs=4 case, we'll have to choose another register
+ arbitrarily since all the caller saved regs are used for
+ parameters, and so we might as well choose r11.
+ */
+ if (i->ARMin.Call.nArgRegs == 4)
+ addHRegUse(u, HRmWrite, hregARM_R11());
+ /* Upshot of this is that the assembler really must observe
+ the here-stated convention of which register to use as an
+ address temporary, depending on nArgRegs: 0==r0,
+ 1==r1, 2==r2, 3==r3, 4==r11 */
+ return;
+ case ARMin_Mul:
+ addHRegUse(u, HRmRead, hregARM_R2());
+ addHRegUse(u, HRmRead, hregARM_R3());
+ addHRegUse(u, HRmWrite, hregARM_R0());
+ if (i->ARMin.Mul.op != ARMmul_PLAIN)
+ addHRegUse(u, HRmWrite, hregARM_R1());
+ return;
+ case ARMin_LdrEX:
+ addHRegUse(u, HRmWrite, hregARM_R0());
+ addHRegUse(u, HRmRead, hregARM_R1());
+ return;
+ case ARMin_StrEX:
+ addHRegUse(u, HRmWrite, hregARM_R0());
+ addHRegUse(u, HRmRead, hregARM_R1());
+ addHRegUse(u, HRmRead, hregARM_R2());
+ return;
+ case ARMin_VLdStD:
+ addRegUsage_ARMAModeV(u, i->ARMin.VLdStD.amode);
+ if (i->ARMin.VLdStD.isLoad) {
+ addHRegUse(u, HRmWrite, i->ARMin.VLdStD.dD);
+ } else {
+ addHRegUse(u, HRmRead, i->ARMin.VLdStD.dD);
+ }
+ return;
+ case ARMin_VLdStS:
+ addRegUsage_ARMAModeV(u, i->ARMin.VLdStS.amode);
+ if (i->ARMin.VLdStS.isLoad) {
+ addHRegUse(u, HRmWrite, i->ARMin.VLdStS.fD);
+ } else {
+ addHRegUse(u, HRmRead, i->ARMin.VLdStS.fD);
+ }
+ return;
+ case ARMin_VAluD:
+ addHRegUse(u, HRmWrite, i->ARMin.VAluD.dst);
+ addHRegUse(u, HRmRead, i->ARMin.VAluD.argL);
+ addHRegUse(u, HRmRead, i->ARMin.VAluD.argR);
+ return;
+ case ARMin_VAluS:
+ addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
+ addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
+ addHRegUse(u, HRmRead, i->ARMin.VAluS.argR);
+ return;
+ case ARMin_VUnaryD:
+ addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
+ addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
+ return;
+ case ARMin_VUnaryS:
+ addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
+ addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
+ return;
+ case ARMin_VCmpD:
+ addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
+ addHRegUse(u, HRmRead, i->ARMin.VCmpD.argR);
+ return;
+ case ARMin_VCMovD:
+ addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst);
+ addHRegUse(u, HRmRead, i->ARMin.VCMovD.dst);
+ addHRegUse(u, HRmRead, i->ARMin.VCMovD.src);
+ return;
+ case ARMin_VCMovS:
+ addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst);
+ addHRegUse(u, HRmRead, i->ARMin.VCMovS.dst);
+ addHRegUse(u, HRmRead, i->ARMin.VCMovS.src);
+ return;
+ case ARMin_VCvtSD:
+ addHRegUse(u, HRmWrite, i->ARMin.VCvtSD.dst);
+ addHRegUse(u, HRmRead, i->ARMin.VCvtSD.src);
+ return;
+ case ARMin_VXferD:
+ if (i->ARMin.VXferD.toD) {
+ addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
+ addHRegUse(u, HRmRead, i->ARMin.VXferD.rHi);
+ addHRegUse(u, HRmRead, i->ARMin.VXferD.rLo);
+ } else {
+ addHRegUse(u, HRmRead, i->ARMin.VXferD.dD);
+ addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi);
+ addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo);
+ }
+ return;
+ case ARMin_VXferS:
+ if (i->ARMin.VXferS.toS) {
+ addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD);
+ addHRegUse(u, HRmRead, i->ARMin.VXferS.rLo);
+ } else {
+ addHRegUse(u, HRmRead, i->ARMin.VXferS.fD);
+ addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo);
+ }
+ return;
+ case ARMin_VCvtID:
+ addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst);
+ addHRegUse(u, HRmRead, i->ARMin.VCvtID.src);
+ return;
+ case ARMin_FPSCR:
+ if (i->ARMin.FPSCR.toFPSCR)
+ addHRegUse(u, HRmRead, i->ARMin.FPSCR.iReg);
+ else
+ addHRegUse(u, HRmWrite, i->ARMin.FPSCR.iReg);
+ return;
+ case ARMin_MFence:
+ return;
+ case ARMin_NLdStQ:
+ if (i->ARMin.NLdStQ.isLoad)
+ addHRegUse(u, HRmWrite, i->ARMin.NLdStQ.dQ);
+ else
+ addHRegUse(u, HRmRead, i->ARMin.NLdStQ.dQ);
+ addRegUsage_ARMAModeN(u, i->ARMin.NLdStQ.amode);
+ return;
+ case ARMin_NLdStD:
+ if (i->ARMin.NLdStD.isLoad)
+ addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
+ else
+ addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
+ addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
+ return;
+ case ARMin_NUnary:
+ addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
+ addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
+ return;
+ case ARMin_NUnaryS:
+ addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
+ addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
+ return;
+ case ARMin_NShift:
+ addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
+ addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
+ addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
+ return;
+ case ARMin_NDual:
+ addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
+ addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
+ addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
+ addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
+ return;
+ case ARMin_NBinary:
+ addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
+ /* TODO: sometimes dst is also being read! */
+ // XXX fix this
+ addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
+ addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
+ return;
+ case ARMin_NeonImm:
+ addHRegUse(u, HRmWrite, i->ARMin.NeonImm.dst);
+ return;
+ case ARMin_NCMovQ:
+ addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
+ addHRegUse(u, HRmRead, i->ARMin.NCMovQ.dst);
+ addHRegUse(u, HRmRead, i->ARMin.NCMovQ.src);
+ return;
+ case ARMin_Add32:
+ addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
+ addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
+ return;
+ unhandled:
+ default:
+ ppARMInstr(i);
+ vpanic("getRegUsage_ARMInstr");
+ }
+}
+
+
+void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 )
+{
+ vassert(mode64 == False);
+ switch (i->tag) {
+ case ARMin_Alu:
+ i->ARMin.Alu.dst = lookupHRegRemap(m, i->ARMin.Alu.dst);
+ i->ARMin.Alu.argL = lookupHRegRemap(m, i->ARMin.Alu.argL);
+ mapRegs_ARMRI84(m, i->ARMin.Alu.argR);
+ return;
+ case ARMin_Shift:
+ i->ARMin.Shift.dst = lookupHRegRemap(m, i->ARMin.Shift.dst);
+ i->ARMin.Shift.argL = lookupHRegRemap(m, i->ARMin.Shift.argL);
+ mapRegs_ARMRI5(m, i->ARMin.Shift.argR);
+ return;
+ case ARMin_Unary:
+ i->ARMin.Unary.dst = lookupHRegRemap(m, i->ARMin.Unary.dst);
+ i->ARMin.Unary.src = lookupHRegRemap(m, i->ARMin.Unary.src);
+ return;
+ case ARMin_CmpOrTst:
+ i->ARMin.CmpOrTst.argL = lookupHRegRemap(m, i->ARMin.CmpOrTst.argL);
+ mapRegs_ARMRI84(m, i->ARMin.CmpOrTst.argR);
+ return;
+ case ARMin_Mov:
+ i->ARMin.Mov.dst = lookupHRegRemap(m, i->ARMin.Mov.dst);
+ mapRegs_ARMRI84(m, i->ARMin.Mov.src);
+ return;
+ case ARMin_Imm32:
+ i->ARMin.Imm32.dst = lookupHRegRemap(m, i->ARMin.Imm32.dst);
+ return;
+ case ARMin_LdSt32:
+ i->ARMin.LdSt32.rD = lookupHRegRemap(m, i->ARMin.LdSt32.rD);
+ mapRegs_ARMAMode1(m, i->ARMin.LdSt32.amode);
+ return;
+ case ARMin_LdSt16:
+ i->ARMin.LdSt16.rD = lookupHRegRemap(m, i->ARMin.LdSt16.rD);
+ mapRegs_ARMAMode2(m, i->ARMin.LdSt16.amode);
+ return;
+ case ARMin_LdSt8U:
+ i->ARMin.LdSt8U.rD = lookupHRegRemap(m, i->ARMin.LdSt8U.rD);
+ mapRegs_ARMAMode1(m, i->ARMin.LdSt8U.amode);
+ return;
+ case ARMin_Ld8S:
+ goto unhandled;
+ case ARMin_Goto:
+ i->ARMin.Goto.gnext = lookupHRegRemap(m, i->ARMin.Goto.gnext);
+ return;
+ case ARMin_CMov:
+ i->ARMin.CMov.dst = lookupHRegRemap(m, i->ARMin.CMov.dst);
+ mapRegs_ARMRI84(m, i->ARMin.CMov.src);
+ return;
+ case ARMin_Call:
+ return;
+ case ARMin_Mul:
+ return;
+ case ARMin_LdrEX:
+ return;
+ case ARMin_StrEX:
+ return;
+ case ARMin_VLdStD:
+ i->ARMin.VLdStD.dD = lookupHRegRemap(m, i->ARMin.VLdStD.dD);
+ mapRegs_ARMAModeV(m, i->ARMin.VLdStD.amode);
+ return;
+ case ARMin_VLdStS:
+ i->ARMin.VLdStS.fD = lookupHRegRemap(m, i->ARMin.VLdStS.fD);
+ mapRegs_ARMAModeV(m, i->ARMin.VLdStS.amode);
+ return;
+ case ARMin_VAluD:
+ i->ARMin.VAluD.dst = lookupHRegRemap(m, i->ARMin.VAluD.dst);
+ i->ARMin.VAluD.argL = lookupHRegRemap(m, i->ARMin.VAluD.argL);
+ i->ARMin.VAluD.argR = lookupHRegRemap(m, i->ARMin.VAluD.argR);
+ return;
+ case ARMin_VAluS:
+ i->ARMin.VAluS.dst = lookupHRegRemap(m, i->ARMin.VAluS.dst);
+ i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
+ i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR);
+ return;
+ case ARMin_VUnaryD:
+ i->ARMin.VUnaryD.dst = lookupHRegRemap(m, i->ARMin.VUnaryD.dst);
+ i->ARMin.VUnaryD.src = lookupHRegRemap(m, i->ARMin.VUnaryD.src);
+ return;
+ case ARMin_VUnaryS:
+ i->ARMin.VUnaryS.dst = lookupHRegRemap(m, i->ARMin.VUnaryS.dst);
+ i->ARMin.VUnaryS.src = lookupHRegRemap(m, i->ARMin.VUnaryS.src);
+ return;
+ case ARMin_VCmpD:
+ i->ARMin.VCmpD.argL = lookupHRegRemap(m, i->ARMin.VCmpD.argL);
+ i->ARMin.VCmpD.argR = lookupHRegRemap(m, i->ARMin.VCmpD.argR);
+ return;
+ case ARMin_VCMovD:
+ i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst);
+ i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src);
+ return;
+ case ARMin_VCMovS:
+ i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst);
+ i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src);
+ return;
+ case ARMin_VCvtSD:
+ i->ARMin.VCvtSD.dst = lookupHRegRemap(m, i->ARMin.VCvtSD.dst);
+ i->ARMin.VCvtSD.src = lookupHRegRemap(m, i->ARMin.VCvtSD.src);
+ return;
+ case ARMin_VXferD:
+ i->ARMin.VXferD.dD = lookupHRegRemap(m, i->ARMin.VXferD.dD);
+ i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
+ i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo);
+ return;
+ case ARMin_VXferS:
+ i->ARMin.VXferS.fD = lookupHRegRemap(m, i->ARMin.VXferS.fD);
+ i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo);
+ return;
+ case ARMin_VCvtID:
+ i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst);
+ i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src);
+ return;
+ case ARMin_FPSCR:
+ i->ARMin.FPSCR.iReg = lookupHRegRemap(m, i->ARMin.FPSCR.iReg);
+ return;
+ case ARMin_MFence:
+ return;
+ case ARMin_NLdStQ:
+ i->ARMin.NLdStQ.dQ = lookupHRegRemap(m, i->ARMin.NLdStQ.dQ);
+ mapRegs_ARMAModeN(m, i->ARMin.NLdStQ.amode);
+ return;
+ case ARMin_NLdStD:
+ i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
+ mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
+ return;
+ case ARMin_NUnary:
+ i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
+ i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
+ return;
+ case ARMin_NUnaryS:
+ i->ARMin.NUnaryS.src->reg
+ = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
+ i->ARMin.NUnaryS.dst->reg
+ = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
+ return;
+ case ARMin_NShift:
+ i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
+ i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
+ i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
+ return;
+ case ARMin_NDual:
+ i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
+ i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
+ return;
+ case ARMin_NBinary:
+ i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
+ i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
+ i->ARMin.NBinary.dst = lookupHRegRemap(m, i->ARMin.NBinary.dst);
+ return;
+ case ARMin_NeonImm:
+ i->ARMin.NeonImm.dst = lookupHRegRemap(m, i->ARMin.NeonImm.dst);
+ return;
+ case ARMin_NCMovQ:
+ i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
+ i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
+ return;
+ case ARMin_Add32:
+ i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
+ i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
+ unhandled:
+ default:
+ ppARMInstr(i);
+ vpanic("mapRegs_ARMInstr");
+ }
+}
+
+/* Figure out if i represents a reg-reg move, and if so assign the
+ source and destination to *src and *dst. If in doubt say No. Used
+ by the register allocator to do move coalescing.
+*/
+Bool isMove_ARMInstr ( ARMInstr* i, HReg* src, HReg* dst )
+{
+ /* Moves between integer regs */
+ switch (i->tag) {
+ case ARMin_Mov:
+ if (i->ARMin.Mov.src->tag == ARMri84_R) {
+ *src = i->ARMin.Mov.src->ARMri84.R.reg;
+ *dst = i->ARMin.Mov.dst;
+ return True;
+ }
+ break;
+ case ARMin_VUnaryD:
+ if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
+ *src = i->ARMin.VUnaryD.src;
+ *dst = i->ARMin.VUnaryD.dst;
+ return True;
+ }
+ break;
+ case ARMin_VUnaryS:
+ if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
+ *src = i->ARMin.VUnaryS.src;
+ *dst = i->ARMin.VUnaryS.dst;
+ return True;
+ }
+ break;
+ default:
+ break;
+ }
+
+ // todo: float, vector moves
+ return False;
+}
+
+
+/* Generate arm spill/reload instructions under the direction of the
+ register allocator. Note it's critical these don't write the
+ condition codes. */
+
+void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
+ HReg rreg, Int offsetB, Bool mode64 )
+{
+ HRegClass rclass;
+ vassert(offsetB >= 0);
+ vassert(!hregIsVirtual(rreg));
+ vassert(mode64 == False);
+ *i1 = *i2 = NULL;
+ rclass = hregClass(rreg);
+ switch (rclass) {
+ case HRcInt32:
+ vassert(offsetB <= 4095);
+ *i1 = ARMInstr_LdSt32( False/*!isLoad*/,
+ rreg,
+ ARMAMode1_RI(hregARM_R8(), offsetB) );
+ return;
+ case HRcFlt32:
+ case HRcFlt64: {
+ HReg r8 = hregARM_R8(); /* baseblock */
+ HReg r12 = hregARM_R12(); /* spill temp */
+ HReg base = r8;
+ vassert(0 == (offsetB & 3));
+ if (offsetB >= 1024) {
+ Int offsetKB = offsetB / 1024;
+ /* r12 = r8 + (1024 * offsetKB) */
+ *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
+ ARMRI84_I84(offsetKB, 11));
+ offsetB -= (1024 * offsetKB);
+ base = r12;
+ }
+ vassert(offsetB <= 1020);
+ if (rclass == HRcFlt32) {
+ *i2 = ARMInstr_VLdStS( False/*!isLoad*/,
+ rreg,
+ mkARMAModeV(base, offsetB) );
+ } else {
+ *i2 = ARMInstr_VLdStD( False/*!isLoad*/,
+ rreg,
+ mkARMAModeV(base, offsetB) );
+ }
+ return;
+ }
+ case HRcVec128: {
+ HReg r8 = hregARM_R8();
+ HReg r12 = hregARM_R12();
+ *i1 = ARMInstr_Add32(r12, r8, offsetB);
+ *i2 = ARMInstr_NLdStQ(False, rreg, mkARMAModeN_R(r12));
+ return;
+ }
+ default:
+ ppHRegClass(rclass);
+ vpanic("genSpill_ARM: unimplemented regclass");
+ }
+}
+
+void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
+ HReg rreg, Int offsetB, Bool mode64 )
+{
+ HRegClass rclass;
+ vassert(offsetB >= 0);
+ vassert(!hregIsVirtual(rreg));
+ vassert(mode64 == False);
+ *i1 = *i2 = NULL;
+ rclass = hregClass(rreg);
+ switch (rclass) {
+ case HRcInt32:
+ vassert(offsetB <= 4095);
+ *i1 = ARMInstr_LdSt32( True/*isLoad*/,
+ rreg,
+ ARMAMode1_RI(hregARM_R8(), offsetB) );
+ return;
+ case HRcFlt32:
+ case HRcFlt64: {
+ HReg r8 = hregARM_R8(); /* baseblock */
+ HReg r12 = hregARM_R12(); /* spill temp */
+ HReg base = r8;
+ vassert(0 == (offsetB & 3));
+ if (offsetB >= 1024) {
+ Int offsetKB = offsetB / 1024;
+ /* r12 = r8 + (1024 * offsetKB) */
+ *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
+ ARMRI84_I84(offsetKB, 11));
+ offsetB -= (1024 * offsetKB);
+ base = r12;
+ }
+ vassert(offsetB <= 1020);
+ if (rclass == HRcFlt32) {
+ *i2 = ARMInstr_VLdStS( True/*isLoad*/,
+ rreg,
+ mkARMAModeV(base, offsetB) );
+ } else {
+ *i2 = ARMInstr_VLdStD( True/*isLoad*/,
+ rreg,
+ mkARMAModeV(base, offsetB) );
+ }
+ return;
+ }
+ case HRcVec128: {
+ HReg r8 = hregARM_R8();
+ HReg r12 = hregARM_R12();
+ *i1 = ARMInstr_Add32(r12, r8, offsetB);
+ *i2 = ARMInstr_NLdStQ(True, rreg, mkARMAModeN_R(r12));
+ return;
+ }
+ default:
+ ppHRegClass(rclass);
+ vpanic("genReload_ARM: unimplemented regclass");
+ }
+}
+
+
+/* Emit an instruction into buf and return the number of bytes used.
+ Note that buf is not the insn's final place, and therefore it is
+ imperative to emit position-independent code. */
+
+static inline UChar iregNo ( HReg r )
+{
+ UInt n;
+ vassert(hregClass(r) == HRcInt32);
+ vassert(!hregIsVirtual(r));
+ n = hregNumber(r);
+ vassert(n <= 15);
+ return toUChar(n);
+}
+
+static inline UChar dregNo ( HReg r )
+{
+ UInt n;
+ if (hregClass(r) != HRcFlt64)
+ ppHRegClass(hregClass(r));
+ vassert(hregClass(r) == HRcFlt64);
+ vassert(!hregIsVirtual(r));
+ n = hregNumber(r);
+ vassert(n <= 31);
+ return toUChar(n);
+}
+
+static inline UChar fregNo ( HReg r )
+{
+ UInt n;
+ vassert(hregClass(r) == HRcFlt32);
+ vassert(!hregIsVirtual(r));
+ n = hregNumber(r);
+ vassert(n <= 31);
+ return toUChar(n);
+}
+
+static inline UChar qregNo ( HReg r )
+{
+ UInt n;
+ vassert(hregClass(r) == HRcVec128);
+ vassert(!hregIsVirtual(r));
+ n = hregNumber(r);
+ vassert(n <= 15);
+ return toUChar(n);
+}
+
+#define BITS4(zzb3,zzb2,zzb1,zzb0) \
+ (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
+#define X0000 BITS4(0,0,0,0)
+#define X0001 BITS4(0,0,0,1)
+#define X0010 BITS4(0,0,1,0)
+#define X0011 BITS4(0,0,1,1)
+#define X0100 BITS4(0,1,0,0)
+#define X0101 BITS4(0,1,0,1)
+#define X0110 BITS4(0,1,1,0)
+#define X0111 BITS4(0,1,1,1)
+#define X1000 BITS4(1,0,0,0)
+#define X1001 BITS4(1,0,0,1)
+#define X1010 BITS4(1,0,1,0)
+#define X1011 BITS4(1,0,1,1)
+#define X1100 BITS4(1,1,0,0)
+#define X1101 BITS4(1,1,0,1)
+#define X1110 BITS4(1,1,1,0)
+#define X1111 BITS4(1,1,1,1)
+
+#define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
+ ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
+ (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
+ (((zzx3) & 0xF) << 12))
+
+#define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2) \
+ ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
+ (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
+ (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8))
+
+#define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0) \
+ ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
+ (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
+ (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) << 0))
+
+#define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
+ ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
+ (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
+ (((zzx0) & 0xF) << 0))
+
+#define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0) \
+ ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
+ (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
+ (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \
+ (((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0))
+
+/* Generate a skeletal insn that involves an a RI84 shifter operand.
+ Returns a word which is all zeroes apart from bits 25 and 11..0,
+ since it is those that encode the shifter operand (at least to the
+ extent that we care about it.) */
+static UInt skeletal_RI84 ( ARMRI84* ri )
+{
+ UInt instr;
+ if (ri->tag == ARMri84_I84) {
+ vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F));
+ vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF));
+ instr = 1 << 25;
+ instr |= (ri->ARMri84.I84.imm4 << 8);
+ instr |= ri->ARMri84.I84.imm8;
+ } else {
+ instr = 0 << 25;
+ instr |= iregNo(ri->ARMri84.R.reg);
+ }
+ return instr;
+}
+
+/* Ditto for RI5. Resulting word is zeroes apart from bit 4 and bits
+ 11..7. */
+static UInt skeletal_RI5 ( ARMRI5* ri )
+{
+ UInt instr;
+ if (ri->tag == ARMri5_I5) {
+ UInt imm5 = ri->ARMri5.I5.imm5;
+ vassert(imm5 >= 1 && imm5 <= 31);
+ instr = 0 << 4;
+ instr |= imm5 << 7;
+ } else {
+ instr = 1 << 4;
+ instr |= iregNo(ri->ARMri5.R.reg) << 8;
+ }
+ return instr;
+}
+
+
+/* Get an immediate into a register, using only that
+ register. (very lame..) */
+static UInt* imm32_to_iregNo ( UInt* p, Int rD, UInt imm32 )
+{
+ UInt instr;
+ vassert(rD >= 0 && rD <= 14); // r15 not good to mess with!
+#if 0
+ if (0 == (imm32 & ~0xFF)) {
+ /* mov with a immediate shifter operand of (0, imm32) (??) */
+ instr = XXXXXX__(X1110,X0011,X1010,X0000,rD,X0000);
+ instr |= imm32;
+ *p++ = instr;
+ } else {
+ // this is very bad; causes Dcache pollution
+ // ldr rD, [pc]
+ instr = XXXXX___(X1110,X0101,X1001,X1111,rD);
+ *p++ = instr;
+ // b .+8
+ instr = 0xEA000000;
+ *p++ = instr;
+ // .word imm32
+ *p++ = imm32;
+ }
+#else
+ if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
+ /* Generate movw rD, #low16. Then, if the high 16 are
+ nonzero, generate movt rD, #high16. */
+ UInt lo16 = imm32 & 0xFFFF;
+ UInt hi16 = (imm32 >> 16) & 0xFFFF;
+ instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
+ (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
+ lo16 & 0xF);
+ *p++ = instr;
+ if (hi16 != 0) {
+ instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
+ (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
+ hi16 & 0xF);
+ *p++ = instr;
+ }
+ } else {
+ UInt imm, rot;
+ UInt op = X1010;
+ UInt rN = 0;
+ if ((imm32 & 0xFF) || (imm32 == 0)) {
+ imm = imm32 & 0xFF;
+ rot = 0;
+ instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
+ *p++ = instr;
+ op = X1000;
+ rN = rD;
+ }
+ if (imm32 & 0xFF000000) {
+ imm = (imm32 >> 24) & 0xFF;
+ rot = 4;
+ instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
+ *p++ = instr;
+ op = X1000;
+ rN = rD;
+ }
+ if (imm32 & 0xFF0000) {
+ imm = (imm32 >> 16) & 0xFF;
+ rot = 8;
+ instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
+ *p++ = instr;
+ op = X1000;
+ rN = rD;
+ }
+ if (imm32 & 0xFF00) {
+ imm = (imm32 >> 8) & 0xFF;
+ rot = 12;
+ instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
+ *p++ = instr;
+ op = X1000;
+ rN = rD;
+ }
+ }
+#endif
+ return p;
+}
+
+
+Int emit_ARMInstr ( UChar* buf, Int nbuf, ARMInstr* i,
+ Bool mode64, void* dispatch )
+{
+ UInt* p = (UInt*)buf;
+ vassert(nbuf >= 32);
+ vassert(mode64 == False);
+ vassert(0 == (((HWord)buf) & 3));
+ /* since we branch to lr(r13) to get back to dispatch: */
+ vassert(dispatch == NULL);
+
+ switch (i->tag) {
+ case ARMin_Alu: {
+ UInt instr, subopc;
+ UInt rD = iregNo(i->ARMin.Alu.dst);
+ UInt rN = iregNo(i->ARMin.Alu.argL);
+ ARMRI84* argR = i->ARMin.Alu.argR;
+ switch (i->ARMin.Alu.op) {
+ case ARMalu_ADDS: /* fallthru */
+ case ARMalu_ADD: subopc = X0100; break;
+ case ARMalu_ADC: subopc = X0101; break;
+ case ARMalu_SUBS: /* fallthru */
+ case ARMalu_SUB: subopc = X0010; break;
+ case ARMalu_SBC: subopc = X0110; break;
+ case ARMalu_AND: subopc = X0000; break;
+ case ARMalu_BIC: subopc = X1110; break;
+ case ARMalu_OR: subopc = X1100; break;
+ case ARMalu_XOR: subopc = X0001; break;
+ default: goto bad;
+ }
+ instr = skeletal_RI84(argR);
+ instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
+ (subopc << 1) & 0xF, rN, rD);
+ if (i->ARMin.Alu.op == ARMalu_ADDS
+ || i->ARMin.Alu.op == ARMalu_SUBS) {
+ instr |= 1<<20; /* set the S bit */
+ }
+ *p++ = instr;
+ goto done;
+ }
+ case ARMin_Shift: {
+ UInt instr, subopc;
+ HReg rD = iregNo(i->ARMin.Shift.dst);
+ HReg rM = iregNo(i->ARMin.Shift.argL);
+ ARMRI5* argR = i->ARMin.Shift.argR;
+ switch (i->ARMin.Shift.op) {
+ case ARMsh_SHL: subopc = X0000; break;
+ case ARMsh_SHR: subopc = X0001; break;
+ case ARMsh_SAR: subopc = X0010; break;
+ default: goto bad;
+ }
+ instr = skeletal_RI5(argR);
+ instr |= XXXXX__X(X1110,X0001,X1010,X0000,rD, /* _ _ */ rM);
+ instr |= (subopc & 3) << 5;
+ *p++ = instr;
+ goto done;
+ }
+ case ARMin_Unary: {
+ UInt instr;
+ HReg rDst = iregNo(i->ARMin.Unary.dst);
+ HReg rSrc = iregNo(i->ARMin.Unary.src);
+ switch (i->ARMin.Unary.op) {
+ case ARMun_CLZ:
+ instr = XXXXXXXX(X1110,X0001,X0110,X1111,
+ rDst,X1111,X0001,rSrc);
+ *p++ = instr;
+ goto done;
+ case ARMun_NEG: /* RSB rD,rS,#0 */
+ instr = XXXXX___(X1110,0x2,0x6,rSrc,rDst);
+ *p++ = instr;
+ goto done;
+ case ARMun_NOT: {
+ UInt subopc = X1111; /* MVN */
+ instr = rSrc;
+ instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
+ (subopc << 1) & 0xF, 0, rDst);
+ *p++ = instr;
+ goto done;
+ }
+ default:
+ break;
+ }
+ goto bad;
+ }
+ case ARMin_CmpOrTst: {
+ UInt instr = skeletal_RI84(i->ARMin.CmpOrTst.argR);
+ UInt subopc = i->ARMin.CmpOrTst.isCmp ? X1010 : X1000;
+ UInt SBZ = 0;
+ instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
+ ((subopc << 1) & 0xF) | 1,
+ i->ARMin.CmpOrTst.argL, SBZ );
+ *p++ = instr;
+ goto done;
+ }
+ case ARMin_Mov: {
+ UInt instr = skeletal_RI84(i->ARMin.Mov.src);
+ UInt subopc = X1101; /* MOV */
+ UInt SBZ = 0;
+ instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
+ (subopc << 1) & 0xF, SBZ, i->ARMin.Mov.dst);
+ *p++ = instr;
+ goto done;
+ }
+ case ARMin_Imm32: {
+ p = imm32_to_iregNo( (UInt*)p, iregNo(i->ARMin.Imm32.dst),
+ i->ARMin.Imm32.imm32 );
+ goto done;
+ }
+ case ARMin_LdSt32:
+ case ARMin_LdSt8U: {
+ UInt bL, bB;
+ HReg rD;
+ ARMAMode1* am;
+ if (i->tag == ARMin_LdSt32) {
+ bB = 0;
+ bL = i->ARMin.LdSt32.isLoad ? 1 : 0;
+ am = i->ARMin.LdSt32.amode;
+ rD = i->ARMin.LdSt32.rD;
+ } else {
+ bB = 1;
+ bL = i->ARMin.LdSt8U.isLoad ? 1 : 0;
+ am = i->ARMin.LdSt8U.amode;
+ rD = i->ARMin.LdSt8U.rD;
+ }
+ if (am->tag == ARMam1_RI) {
+ Int simm12;
+ UInt instr, bP;
+ if (am->ARMam1.RI.simm13 < 0) {
+ bP = 0;
+ simm12 = -am->ARMam1.RI.simm13;
+ } else {
+ bP = 1;
+ simm12 = am->ARMam1.RI.simm13;
+ }
+ vassert(simm12 >= 0 && simm12 <= 4095);
+ instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
+ iregNo(am->ARMam1.RI.reg),
+ iregNo(rD));
+ instr |= simm12;
+ *p++ = instr;
+ goto done;
+ } else {
+ // RR case
+ goto bad;
+ }
+ }
+ case ARMin_LdSt16: {
+ HReg rD = i->ARMin.LdSt16.rD;
+ UInt bS = i->ARMin.LdSt16.signedLoad ? 1 : 0;
+ UInt bL = i->ARMin.LdSt16.isLoad ? 1 : 0;
+ ARMAMode2* am = i->ARMin.LdSt16.amode;
+ if (am->tag == ARMam2_RI) {
+ HReg rN = am->ARMam2.RI.reg;
+ Int simm8;
+ UInt bP, imm8hi, imm8lo, instr;
+ if (am->ARMam2.RI.simm9 < 0) {
+ bP = 0;
+ simm8 = -am->ARMam2.RI.simm9;
+ } else {
+ bP = 1;
+ simm8 = am->ARMam2.RI.simm9;
+ }
+ vassert(simm8 >= 0 && simm8 <= 255);
+ imm8hi = (simm8 >> 4) & 0xF;
+ imm8lo = simm8 & 0xF;
+ vassert(!(bL == 0 && bS == 1)); // "! signed store"
+ /**/ if (bL == 0 && bS == 0) {
+ // strh
+ instr = XXXXXXXX(X1110,X0001, BITS4(bP,1,0,0), iregNo(rN),
+ iregNo(rD), imm8hi, X1011, imm8lo);
+ *p++ = instr;
+ goto done;
+ }
+ else if (bL == 1 && bS == 0) {
+ // ldrh
+ instr = XXXXXXXX(X1110,X0001, BITS4(bP,1,0,1), iregNo(rN),
+ iregNo(rD), imm8hi, X1011, imm8lo);
+ *p++ = instr;
+ goto done;
+ }
+ else if (bL == 1 && bS == 1) {
+ goto bad;
+ }
+ else vassert(0); // ill-constructed insn
+ } else {
+ // RR case
+ goto bad;
+ }
+ }
+ case ARMin_Ld8S:
+ goto bad;
+ case ARMin_Goto: {
+ UInt instr;
+ IRJumpKind jk = i->ARMin.Goto.jk;
+ ARMCondCode cond = i->ARMin.Goto.cond;
+ UInt rnext = iregNo(i->ARMin.Goto.gnext);
+ Int trc = -1;
+ switch (jk) {
+ case Ijk_Ret: case Ijk_Call: case Ijk_Boring:
+ break; /* no need to set GST in these common cases */
+ case Ijk_ClientReq:
+ trc = VEX_TRC_JMP_CLIENTREQ; break;
+ case Ijk_Sys_int128:
+ case Ijk_Sys_int129:
+ case Ijk_Sys_int130:
+ case Ijk_Yield:
+ case Ijk_EmWarn:
+ case Ijk_MapFail:
+ goto unhandled_jk;
+ case Ijk_NoDecode:
+ trc = VEX_TRC_JMP_NODECODE; break;
+ case Ijk_TInval:
+ trc = VEX_TRC_JMP_TINVAL; break;
+ case Ijk_NoRedir:
+ trc = VEX_TRC_JMP_NOREDIR; break;
+ case Ijk_Sys_sysenter:
+ case Ijk_SigTRAP:
+ case Ijk_SigSEGV:
+ goto unhandled_jk;
+ case Ijk_Sys_syscall:
+ trc = VEX_TRC_JMP_SYS_SYSCALL; break;
+ unhandled_jk:
+ default:
+ goto bad;
+ }
+ if (trc != -1) {
+ // mov{cond} r8, #trc
+ vassert(trc >= 0 && trc <= 255);
+ instr = (cond << 28) | 0x03A08000 | (0xFF & (UInt)trc);
+ *p++ = instr;
+ }
+ // mov{cond} r0, rnext
+ if (rnext != 0) {
+ instr = (cond << 28) | 0x01A00000 | rnext;
+ *p++ = instr;
+ }
+ // bx{cond} r14
+ instr =(cond << 28) | 0x012FFF1E;
+ *p++ = instr;
+ goto done;
+ }
+ case ARMin_CMov: {
+ UInt instr = skeletal_RI84(i->ARMin.CMov.src);
+ UInt subopc = X1101; /* MOV */
+ UInt SBZ = 0;
+ instr |= XXXXX___(i->ARMin.CMov.cond, (1 & (subopc >> 3)),
+ (subopc << 1) & 0xF, SBZ, i->ARMin.CMov.dst);
+ *p++ = instr;
+ goto done;
+ }
+ case ARMin_Call: {
+ UInt instr;
+ /* Decide on a scratch reg used to hold to the call address.
+ This has to be done as per the comments in getRegUsage. */
+ Int scratchNo;
+ switch (i->ARMin.Call.nArgRegs) {
+ case 0: scratchNo = 0; break;
+ case 1: scratchNo = 1; break;
+ case 2: scratchNo = 2; break;
+ case 3: scratchNo = 3; break;
+ case 4: scratchNo = 11; break;
+ default: vassert(0);
+ }
+ // r"scratchNo" = &target
+ p = imm32_to_iregNo( (UInt*)p,
+ scratchNo, (UInt)i->ARMin.Call.target );
+ // blx{cond} r"scratchNo"
+ instr = XXX___XX(i->ARMin.Call.cond, X0001, X0010, /*___*/
+ X0011, scratchNo);
+ instr |= 0xFFF << 8; // stick in the SBOnes
+ *p++ = instr;
+ goto done;
+ }
+ case ARMin_Mul: {
+ /* E0000392 mul r0, r2, r3
+ E0810392 umull r0(LO), r1(HI), r2, r3
+ E0C10392 smull r0(LO), r1(HI), r2, r3
+ */
+ switch (i->ARMin.Mul.op) {
+ case ARMmul_PLAIN: *p++ = 0xE0000392; goto done;
+ case ARMmul_ZX: *p++ = 0xE0810392; goto done;
+ case ARMmul_SX: *p++ = 0xE0C10392; goto done;
+ default: vassert(0);
+ }
+ goto bad;
+ }
+ case ARMin_LdrEX: {
+ /* E1910F9F ldrex r0, [r1]
+ E1F10F9F ldrexh r0, [r1]
+ E1D10F9F ldrexb r0, [r1]
+ */
+ switch (i->ARMin.LdrEX.szB) {
+ case 4: *p++ = 0xE1910F9F; goto done;
+ //case 2: *p++ = 0xE1F10F9F; goto done;
+ case 1: *p++ = 0xE1D10F9F; goto done;
+ default: break;
+ }
+ goto bad;
+ }
+ case ARMin_StrEX: {
+ /* E1820F91 strex r0, r1, [r2]
+ E1E20F91 strexh r0, r1, [r2]
+ E1C20F91 strexb r0, r1, [r2]
+ */
+ switch (i->ARMin.StrEX.szB) {
+ case 4: *p++ = 0xE1820F91; goto done;
+ //case 2: *p++ = 0xE1E20F91; goto done;
+ case 1: *p++ = 0xE1C20F91; goto done;
+ default: break;
+ }
+ goto bad;
+ }
+ case ARMin_VLdStD: {
+ UInt dD = dregNo(i->ARMin.VLdStD.dD);
+ UInt rN = iregNo(i->ARMin.VLdStD.amode->reg);
+ Int simm11 = i->ARMin.VLdStD.amode->simm11;
+ UInt off8 = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
+ UInt bU = simm11 >= 0 ? 1 : 0;
+ UInt bL = i->ARMin.VLdStD.isLoad ? 1 : 0;
+ UInt insn;
+ vassert(0 == (off8 & 3));
+ off8 >>= 2;
+ vassert(0 == (off8 & 0xFFFFFF00));
+ insn = XXXXXX__(0xE,X1101,BITS4(bU,0,0,bL),rN,dD,X1011);
+ insn |= off8;
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_VLdStS: {
+ UInt fD = fregNo(i->ARMin.VLdStS.fD);
+ UInt rN = iregNo(i->ARMin.VLdStS.amode->reg);
+ Int simm11 = i->ARMin.VLdStS.amode->simm11;
+ UInt off8 = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
+ UInt bU = simm11 >= 0 ? 1 : 0;
+ UInt bL = i->ARMin.VLdStS.isLoad ? 1 : 0;
+ UInt bD = fD & 1;
+ UInt insn;
+ vassert(0 == (off8 & 3));
+ off8 >>= 2;
+ vassert(0 == (off8 & 0xFFFFFF00));
+ insn = XXXXXX__(0xE,X1101,BITS4(bU,bD,0,bL),rN, (fD >> 1), X1010);
+ insn |= off8;
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_VAluD: {
+ UInt dN = dregNo(i->ARMin.VAluD.argL);
+ UInt dD = dregNo(i->ARMin.VAluD.dst);
+ UInt dM = dregNo(i->ARMin.VAluD.argR);
+ UInt pqrs = X1111; /* undefined */
+ switch (i->ARMin.VAluD.op) {
+ case ARMvfp_ADD: pqrs = X0110; break;
+ case ARMvfp_SUB: pqrs = X0111; break;
+ case ARMvfp_MUL: pqrs = X0100; break;
+ case ARMvfp_DIV: pqrs = X1000; break;
+ default: goto bad;
+ }
+ vassert(pqrs != X1111);
+ UInt bP = (pqrs >> 3) & 1;
+ UInt bQ = (pqrs >> 2) & 1;
+ UInt bR = (pqrs >> 1) & 1;
+ UInt bS = (pqrs >> 0) & 1;
+ UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,0,bQ,bR), dN, dD,
+ X1011, BITS4(0,bS,0,0), dM);
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_VAluS: {
+ UInt dN = fregNo(i->ARMin.VAluS.argL);
+ UInt dD = fregNo(i->ARMin.VAluS.dst);
+ UInt dM = fregNo(i->ARMin.VAluS.argR);
+ UInt bN = dN & 1;
+ UInt bD = dD & 1;
+ UInt bM = dM & 1;
+ UInt pqrs = X1111; /* undefined */
+ switch (i->ARMin.VAluS.op) {
+ case ARMvfp_ADD: pqrs = X0110; break;
+ case ARMvfp_SUB: pqrs = X0111; break;
+ case ARMvfp_MUL: pqrs = X0100; break;
+ case ARMvfp_DIV: pqrs = X1000; break;
+ default: goto bad;
+ }
+ vassert(pqrs != X1111);
+ UInt bP = (pqrs >> 3) & 1;
+ UInt bQ = (pqrs >> 2) & 1;
+ UInt bR = (pqrs >> 1) & 1;
+ UInt bS = (pqrs >> 0) & 1;
+ UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR),
+ (dN >> 1), (dD >> 1),
+ X1010, BITS4(bN,bS,bM,0), (dM >> 1));
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_VUnaryD: {
+ UInt dD = dregNo(i->ARMin.VUnaryD.dst);
+ UInt dM = dregNo(i->ARMin.VUnaryD.src);
+ UInt insn = 0;
+ switch (i->ARMin.VUnaryD.op) {
+ case ARMvfpu_COPY:
+ insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X0100,dM);
+ break;
+ case ARMvfpu_ABS:
+ insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X1100,dM);
+ break;
+ case ARMvfpu_NEG:
+ insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X0100,dM);
+ break;
+ case ARMvfpu_SQRT:
+ insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X1100,dM);
+ break;
+ default:
+ goto bad;
+ }
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_VUnaryS: {
+ UInt fD = fregNo(i->ARMin.VUnaryS.dst);
+ UInt fM = fregNo(i->ARMin.VUnaryS.src);
+ UInt insn = 0;
+ switch (i->ARMin.VUnaryS.op) {
+ case ARMvfpu_COPY:
+ insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
+ (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
+ (fM >> 1));
+ break;
+ case ARMvfpu_ABS:
+ insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
+ (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
+ (fM >> 1));
+ break;
+ case ARMvfpu_NEG:
+ insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
+ (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
+ (fM >> 1));
+ break;
+ case ARMvfpu_SQRT:
+ insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
+ (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
+ (fM >> 1));
+ break;
+ default:
+ goto bad;
+ }
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_VCmpD: {
+ UInt dD = dregNo(i->ARMin.VCmpD.argL);
+ UInt dM = dregNo(i->ARMin.VCmpD.argR);
+ UInt insn = XXXXXXXX(0xE, X1110, X1011, X0100, dD, X1011, X0100, dM);
+ *p++ = insn; /* FCMPD dD, dM */
+ *p++ = 0xEEF1FA10; /* FMSTAT */
+ goto done;
+ }
+ case ARMin_VCMovD: {
+ UInt cc = (UInt)i->ARMin.VCMovD.cond;
+ UInt dD = dregNo(i->ARMin.VCMovD.dst);
+ UInt dM = dregNo(i->ARMin.VCMovD.src);
+ vassert(cc < 16 && cc != ARMcc_AL);
+ UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM);
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_VCMovS: {
+ UInt cc = (UInt)i->ARMin.VCMovS.cond;
+ UInt fD = fregNo(i->ARMin.VCMovS.dst);
+ UInt fM = fregNo(i->ARMin.VCMovS.src);
+ vassert(cc < 16 && cc != ARMcc_AL);
+ UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1),
+ X0000,(fD >> 1),X1010,
+ BITS4(0,1,(fM & 1),0), (fM >> 1));
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_VCvtSD: {
+ if (i->ARMin.VCvtSD.sToD) {
+ UInt dD = dregNo(i->ARMin.VCvtSD.dst);
+ UInt fM = fregNo(i->ARMin.VCvtSD.src);
+ UInt insn = XXXXXXXX(0xE, X1110, X1011, X0111, dD, X1010,
+ BITS4(1,1, (fM & 1), 0),
+ (fM >> 1));
+ *p++ = insn;
+ goto done;
+ } else {
+ UInt fD = fregNo(i->ARMin.VCvtSD.dst);
+ UInt dM = dregNo(i->ARMin.VCvtSD.src);
+ UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1),
+ X0111, (fD >> 1),
+ X1011, X1100, dM);
+ *p++ = insn;
+ goto done;
+ }
+ goto bad;
+ }
+ case ARMin_VXferD: {
+ UInt dD = dregNo(i->ARMin.VXferD.dD);
+ UInt rHi = iregNo(i->ARMin.VXferD.rHi);
+ UInt rLo = iregNo(i->ARMin.VXferD.rLo);
+ /* vmov dD, rLo, rHi is
+ E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0]
+ vmov rLo, rHi, dD is
+ E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0]
+ */
+ UInt insn
+ = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5,
+ rHi, rLo, 0xB,
+ BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF));
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_VXferS: {
+ UInt fD = fregNo(i->ARMin.VXferS.fD);
+ UInt rLo = iregNo(i->ARMin.VXferS.rLo);
+ /* vmov fD, rLo is
+ E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0
+ vmov rLo, fD is
+ E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0
+ */
+ UInt insn
+ = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1,
+ (fD >> 1) & 0xF, rLo, 0xA,
+ BITS4((fD & 1),0,0,1), 0);
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_VCvtID: {
+ Bool iToD = i->ARMin.VCvtID.iToD;
+ Bool syned = i->ARMin.VCvtID.syned;
+ if (iToD && syned) {
+ // FSITOD: I32S-in-freg to F64-in-dreg
+ UInt regF = fregNo(i->ARMin.VCvtID.src);
+ UInt regD = dregNo(i->ARMin.VCvtID.dst);
+ UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
+ X1011, BITS4(1,1,(regF & 1),0),
+ (regF >> 1) & 0xF);
+ *p++ = insn;
+ goto done;
+ }
+ if (iToD && (!syned)) {
+ // FUITOD: I32U-in-freg to F64-in-dreg
+ UInt regF = fregNo(i->ARMin.VCvtID.src);
+ UInt regD = dregNo(i->ARMin.VCvtID.dst);
+ UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
+ X1011, BITS4(0,1,(regF & 1),0),
+ (regF >> 1) & 0xF);
+ *p++ = insn;
+ goto done;
+ }
+ if ((!iToD) && syned) {
+ // FTOSID: F64-in-dreg to I32S-in-freg
+ UInt regD = dregNo(i->ARMin.VCvtID.src);
+ UInt regF = fregNo(i->ARMin.VCvtID.dst);
+ UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
+ X1101, (regF >> 1) & 0xF,
+ X1011, X0100, regD);
+ *p++ = insn;
+ goto done;
+ }
+ if ((!iToD) && (!syned)) {
+ // FTOUID: F64-in-dreg to I32U-in-freg
+ UInt regD = dregNo(i->ARMin.VCvtID.src);
+ UInt regF = fregNo(i->ARMin.VCvtID.dst);
+ UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
+ X1100, (regF >> 1) & 0xF,
+ X1011, X0100, regD);
+ *p++ = insn;
+ goto done;
+ }
+ /*UNREACHED*/
+ vassert(0);
+ }
+ case ARMin_FPSCR: {
+ Bool toFPSCR = i->ARMin.FPSCR.toFPSCR;
+ HReg iReg = iregNo(i->ARMin.FPSCR.iReg);
+ if (toFPSCR) {
+ /* fmxr fpscr, iReg is EEE1 iReg A10 */
+ *p++ = 0xEEE10A10 | ((iReg & 0xF) << 12);
+ goto done;
+ }
+ goto bad; // FPSCR -> iReg case currently ATC
+ }
+ case ARMin_MFence: {
+ *p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */
+ *p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */
+ *p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4 (ISB) */
+ goto done;
+ }
+ case ARMin_NLdStQ: {
+ UInt regD = qregNo(i->ARMin.NLdStQ.dQ) << 1;
+ UInt regN, regM;
+ UInt D = regD >> 4;
+ UInt bL = i->ARMin.NLdStQ.isLoad ? 1 : 0;
+ UInt insn;
+ vassert(hregClass(i->ARMin.NLdStQ.dQ) == HRcVec128);
+ regD &= 0xF;
+ if (i->ARMin.NLdStQ.amode->tag == ARMamN_RR) {
+ regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rN);
+ regM = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rM);
+ } else {
+ regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.R.rN);
+ regM = 15;
+ }
+ insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
+ regN, regD, X1010, X1000, regM);
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_NLdStD: {
+ UInt regD = dregNo(i->ARMin.NLdStD.dD);
+ UInt regN, regM;
+ UInt D = regD >> 4;
+ UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
+ UInt insn;
+ vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
+ regD &= 0xF;
+ if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
+ regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
+ regM = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
+ } else {
+ regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.R.rN);
+ regM = 15;
+ }
+ insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
+ regN, regD, X0111, X1000, regM);
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_NUnaryS: {
+ UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
+ UInt regD, D;
+ UInt regM, M;
+ UInt size = i->ARMin.NUnaryS.size;
+ UInt insn;
+ UInt opc, opc1, opc2;
+ switch (i->ARMin.NUnaryS.op) {
+ case ARMneon_VDUP:
+ if (i->ARMin.NUnaryS.size >= 16)
+ goto bad;
+ if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
+ goto bad;
+ if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
+ goto bad;
+ regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
+ ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1)
+ : dregNo(i->ARMin.NUnaryS.dst->reg);
+ regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
+ ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1)
+ : dregNo(i->ARMin.NUnaryS.src->reg);
+ D = regD >> 4;
+ M = regM >> 4;
+ regD &= 0xf;
+ regM &= 0xf;
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
+ (i->ARMin.NUnaryS.size & 0xf), regD,
+ X1100, BITS4(0,Q,M,0), regM);
+ *p++ = insn;
+ goto done;
+ case ARMneon_SETELEM:
+ regD = Q ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1) :
+ dregNo(i->ARMin.NUnaryS.dst->reg);
+ regM = iregNo(i->ARMin.NUnaryS.src->reg);
+ M = regM >> 4;
+ D = regD >> 4;
+ regM &= 0xF;
+ regD &= 0xF;
+ if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
+ goto bad;
+ switch (size) {
+ case 0:
+ if (i->ARMin.NUnaryS.dst->index > 7)
+ goto bad;
+ opc = X1000 | i->ARMin.NUnaryS.dst->index;
+ break;
+ case 1:
+ if (i->ARMin.NUnaryS.dst->index > 3)
+ goto bad;
+ opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
+ break;
+ case 2:
+ if (i->ARMin.NUnaryS.dst->index > 1)
+ goto bad;
+ opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
+ break;
+ default:
+ goto bad;
+ }
+ opc1 = (opc >> 2) & 3;
+ opc2 = opc & 3;
+ insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
+ regD, regM, X1011,
+ BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
+ *p++ = insn;
+ goto done;
+ case ARMneon_GETELEMU:
+ regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
+ dregNo(i->ARMin.NUnaryS.src->reg);
+ regD = iregNo(i->ARMin.NUnaryS.dst->reg);
+ M = regM >> 4;
+ D = regD >> 4;
+ regM &= 0xF;
+ regD &= 0xF;
+ if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
+ goto bad;
+ switch (size) {
+ case 0:
+ if (Q && i->ARMin.NUnaryS.src->index > 7) {
+ regM++;
+ i->ARMin.NUnaryS.src->index -= 8;
+ }
+ if (i->ARMin.NUnaryS.src->index > 7)
+ goto bad;
+ opc = X1000 | i->ARMin.NUnaryS.src->index;
+ break;
+ case 1:
+ if (Q && i->ARMin.NUnaryS.src->index > 3) {
+ regM++;
+ i->ARMin.NUnaryS.src->index -= 4;
+ }
+ if (i->ARMin.NUnaryS.src->index > 3)
+ goto bad;
+ opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
+ break;
+ case 2:
+ goto bad;
+ default:
+ goto bad;
+ }
+ opc1 = (opc >> 2) & 3;
+ opc2 = opc & 3;
+ insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
+ regM, regD, X1011,
+ BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
+ *p++ = insn;
+ goto done;
+ case ARMneon_GETELEMS:
+ regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
+ dregNo(i->ARMin.NUnaryS.src->reg);
+ regD = iregNo(i->ARMin.NUnaryS.dst->reg);
+ M = regM >> 4;
+ D = regD >> 4;
+ regM &= 0xF;
+ regD &= 0xF;
+ if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
+ goto bad;
+ switch (size) {
+ case 0:
+ if (Q && i->ARMin.NUnaryS.src->index > 7) {
+ regM++;
+ i->ARMin.NUnaryS.src->index -= 8;
+ }
+ if (i->ARMin.NUnaryS.src->index > 7)
+ goto bad;
+ opc = X1000 | i->ARMin.NUnaryS.src->index;
+ break;
+ case 1:
+ if (Q && i->ARMin.NUnaryS.src->index > 3) {
+ regM++;
+ i->ARMin.NUnaryS.src->index -= 4;
+ }
+ if (i->ARMin.NUnaryS.src->index > 3)
+ goto bad;
+ opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
+ break;
+ case 2:
+ if (Q && i->ARMin.NUnaryS.src->index > 1) {
+ regM++;
+ i->ARMin.NUnaryS.src->index -= 2;
+ }
+ if (i->ARMin.NUnaryS.src->index > 1)
+ goto bad;
+ opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
+ break;
+ default:
+ goto bad;
+ }
+ opc1 = (opc >> 2) & 3;
+ opc2 = opc & 3;
+ insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
+ regM, regD, X1011,
+ BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
+ *p++ = insn;
+ goto done;
+ default:
+ goto bad;
+ }
+ }
+ case ARMin_NUnary: {
+ UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
+ UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
+ ? (qregNo(i->ARMin.NUnary.dst) << 1)
+ : dregNo(i->ARMin.NUnary.dst);
+ UInt regM, M;
+ UInt D = regD >> 4;
+ UInt sz1 = i->ARMin.NUnary.size >> 1;
+ UInt sz2 = i->ARMin.NUnary.size & 1;
+ UInt sz = i->ARMin.NUnary.size;
+ UInt insn;
+ UInt F = 0; /* TODO: floating point EQZ ??? */
+ if (i->ARMin.NUnary.op != ARMneon_DUP) {
+ regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
+ ? (qregNo(i->ARMin.NUnary.src) << 1)
+ : dregNo(i->ARMin.NUnary.src);
+ M = regM >> 4;
+ } else {
+ regM = iregNo(i->ARMin.NUnary.src);
+ M = regM >> 4;
+ }
+ regD &= 0xF;
+ regM &= 0xF;
+ switch (i->ARMin.NUnary.op) {
+ case ARMneon_COPY: /* VMOV reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
+ BITS4(M,Q,M,1), regM);
+ break;
+ case ARMneon_COPYN: /* VMOVN regD, regQ */
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
+ regD, X0010, BITS4(0,0,M,0), regM);
+ break;
+ case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
+ regD, X0010, BITS4(1,0,M,0), regM);
+ break;
+ case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
+ regD, X0010, BITS4(0,1,M,0), regM);
+ break;
+ case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
+ regD, X0010, BITS4(1,1,M,0), regM);
+ break;
+ case ARMneon_COPYLS: /* VMOVL regQ, regD */
+ if (sz >= 3)
+ goto bad;
+ insn = XXXXXXXX(0xF, X0010,
+ BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
+ BITS4((sz == 0) ? 1 : 0,0,0,0),
+ regD, X1010, BITS4(0,0,M,1), regM);
+ break;
+ case ARMneon_COPYLU: /* VMOVL regQ, regD */
+ if (sz >= 3)
+ goto bad;
+ insn = XXXXXXXX(0xF, X0011,
+ BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
+ BITS4((sz == 0) ? 1 : 0,0,0,0),
+ regD, X1010, BITS4(0,0,M,1), regM);
+ break;
+ case ARMneon_NOT: /* VMVN reg, reg*/
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
+ BITS4(1,Q,M,0), regM);
+ break;
+ case ARMneon_EQZ:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
+ regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_CNT:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
+ BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_CLZ:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
+ regD, X0100, BITS4(1,Q,M,0), regM);
+ break;
+ case ARMneon_CLS:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
+ regD, X0100, BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_ABS:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
+ regD, X0011, BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_DUP:
+ sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
+ sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
+ vassert(sz1 + sz2 < 2);
+ insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
+ X1011, BITS4(D,0,sz2,1), X0000);
+ break;
+ case ARMneon_REV16:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
+ regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_REV32:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
+ regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
+ break;
+ case ARMneon_REV64:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
+ regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_PADDLU:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
+ regD, X0010, BITS4(1,Q,M,0), regM);
+ break;
+ case ARMneon_PADDLS:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
+ regD, X0010, BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_VQSHLNUU:
+ insn = XXXXXXXX(0xF, X0011,
+ (1 << 3) | (D << 2) | ((sz >> 4) & 3),
+ sz & 0xf, regD, X0111,
+ BITS4(sz >> 6,Q,M,1), regM);
+ break;
+ case ARMneon_VQSHLNSS:
+ insn = XXXXXXXX(0xF, X0010,
+ (1 << 3) | (D << 2) | ((sz >> 4) & 3),
+ sz & 0xf, regD, X0111,
+ BITS4(sz >> 6,Q,M,1), regM);
+ break;
+ case ARMneon_VQSHLNUS:
+ insn = XXXXXXXX(0xF, X0011,
+ (1 << 3) | (D << 2) | ((sz >> 4) & 3),
+ sz & 0xf, regD, X0110,
+ BITS4(sz >> 6,Q,M,1), regM);
+ break;
+ case ARMneon_VCVTFtoS:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
+ BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_VCVTFtoU:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
+ BITS4(1,Q,M,0), regM);
+ break;
+ case ARMneon_VCVTStoF:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
+ BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_VCVTUtoF:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
+ BITS4(1,Q,M,0), regM);
+ break;
+ case ARMneon_VCVTFtoFixedU:
+ sz1 = (sz >> 5) & 1;
+ sz2 = (sz >> 4) & 1;
+ sz &= 0xf;
+ insn = XXXXXXXX(0xF, X0011,
+ BITS4(1,D,sz1,sz2), sz, regD, X1111,
+ BITS4(0,Q,M,1), regM);
+ break;
+ case ARMneon_VCVTFtoFixedS:
+ sz1 = (sz >> 5) & 1;
+ sz2 = (sz >> 4) & 1;
+ sz &= 0xf;
+ insn = XXXXXXXX(0xF, X0010,
+ BITS4(1,D,sz1,sz2), sz, regD, X1111,
+ BITS4(0,Q,M,1), regM);
+ break;
+ case ARMneon_VCVTFixedUtoF:
+ sz1 = (sz >> 5) & 1;
+ sz2 = (sz >> 4) & 1;
+ sz &= 0xf;
+ insn = XXXXXXXX(0xF, X0011,
+ BITS4(1,D,sz1,sz2), sz, regD, X1110,
+ BITS4(0,Q,M,1), regM);
+ break;
+ case ARMneon_VCVTFixedStoF:
+ sz1 = (sz >> 5) & 1;
+ sz2 = (sz >> 4) & 1;
+ sz &= 0xf;
+ insn = XXXXXXXX(0xF, X0010,
+ BITS4(1,D,sz1,sz2), sz, regD, X1110,
+ BITS4(0,Q,M,1), regM);
+ break;
+ case ARMneon_VCVTF32toF16:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
+ BITS4(0,0,M,0), regM);
+ break;
+ case ARMneon_VCVTF16toF32:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
+ BITS4(0,0,M,0), regM);
+ break;
+ case ARMneon_VRECIP:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
+ BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_VRECIPF:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
+ BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_VABSFP:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
+ BITS4(0,Q,M,0), regM);
+ break;
+ case ARMneon_VRSQRTEFP:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
+ BITS4(1,Q,M,0), regM);
+ break;
+ case ARMneon_VRSQRTE:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
+ BITS4(1,Q,M,0), regM);
+ break;
+ case ARMneon_VNEGF:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
+ BITS4(1,Q,M,0), regM);
+ break;
+
+ default:
+ goto bad;
+ }
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_NDual: {
+ UInt Q = i->ARMin.NDual.Q ? 1 : 0;
+ UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
+ ? (qregNo(i->ARMin.NDual.arg1) << 1)
+ : dregNo(i->ARMin.NDual.arg1);
+ UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
+ ? (qregNo(i->ARMin.NDual.arg2) << 1)
+ : dregNo(i->ARMin.NDual.arg2);
+ UInt D = regD >> 4;
+ UInt M = regM >> 4;
+ UInt sz1 = i->ARMin.NDual.size >> 1;
+ UInt sz2 = i->ARMin.NDual.size & 1;
+ UInt insn;
+ regD &= 0xF;
+ regM &= 0xF;
+ switch (i->ARMin.NDual.op) {
+ case ARMneon_TRN: /* VTRN reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
+ regD, X0000, BITS4(1,Q,M,0), regM);
+ break;
+ case ARMneon_ZIP: /* VZIP reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
+ regD, X0001, BITS4(1,Q,M,0), regM);
+ break;
+ case ARMneon_UZP: /* VUZP reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
+ regD, X0001, BITS4(0,Q,M,0), regM);
+ break;
+ default:
+ goto bad;
+ }
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_NBinary: {
+ UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
+ UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
+ ? (qregNo(i->ARMin.NBinary.dst) << 1)
+ : dregNo(i->ARMin.NBinary.dst);
+ UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
+ ? (qregNo(i->ARMin.NBinary.argL) << 1)
+ : dregNo(i->ARMin.NBinary.argL);
+ UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
+ ? (qregNo(i->ARMin.NBinary.argR) << 1)
+ : dregNo(i->ARMin.NBinary.argR);
+ UInt sz1 = i->ARMin.NBinary.size >> 1;
+ UInt sz2 = i->ARMin.NBinary.size & 1;
+ UInt D = regD >> 4;
+ UInt N = regN >> 4;
+ UInt M = regM >> 4;
+ UInt insn;
+ regD &= 0xF;
+ regM &= 0xF;
+ regN &= 0xF;
+ switch (i->ARMin.NBinary.op) {
+ case ARMneon_VAND: /* VAND reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
+ BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VORR: /* VORR reg, reg, reg*/
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
+ BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VXOR: /* VEOR reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
+ BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VADD: /* VADD reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X1000, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VSUB: /* VSUB reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X1000, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X0110, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X0110, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X0110, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X0110, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X0001, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X0001, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X0000, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X0000, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X0010, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X0010, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X0011, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X0011, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X0011, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X0011, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X1000, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
+ if (i->ARMin.NBinary.size >= 16)
+ goto bad;
+ insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
+ i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
+ regM);
+ break;
+ case ARMneon_VMUL:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X1001, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VMULLU:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
+ X1100, BITS4(N,0,M,0), regM);
+ break;
+ case ARMneon_VMULLS:
+ insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
+ X1100, BITS4(N,0,M,0), regM);
+ break;
+ case ARMneon_VMULP:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X1001, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VMULFP:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
+ X1101, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VMULLP:
+ insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
+ X1110, BITS4(N,0,M,0), regM);
+ break;
+ case ARMneon_VQDMULH:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X1011, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VQRDMULH:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X1011, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VQDMULL:
+ insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
+ X1101, BITS4(N,0,M,0), regM);
+ break;
+ case ARMneon_VTBL:
+ insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
+ X1000, BITS4(N,0,M,0), regM);
+ break;
+ case ARMneon_VPADD:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X1011, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VPADDFP:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
+ X1101, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VPMINU:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X1010, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VPMINS:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X1010, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VPMAXU:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X1010, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VPMAXS:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X1010, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VADDFP: /* VADD reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
+ X1101, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VSUBFP: /* VADD reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
+ X1101, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VABDFP: /* VABD reg, reg, reg */
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
+ X1101, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VMINF:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
+ X1111, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VMAXF:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
+ X1111, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VPMINF:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
+ X1111, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VPMAXF:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
+ X1111, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VRECPS:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
+ BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VCGTF:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
+ BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VCGEF:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
+ BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VCEQF:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
+ BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VRSQRTS:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
+ BITS4(N,Q,M,1), regM);
+ break;
+ default:
+ goto bad;
+ }
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_NShift: {
+ UInt Q = i->ARMin.NShift.Q ? 1 : 0;
+ UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
+ ? (qregNo(i->ARMin.NShift.dst) << 1)
+ : dregNo(i->ARMin.NShift.dst);
+ UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
+ ? (qregNo(i->ARMin.NShift.argL) << 1)
+ : dregNo(i->ARMin.NShift.argL);
+ UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
+ ? (qregNo(i->ARMin.NShift.argR) << 1)
+ : dregNo(i->ARMin.NShift.argR);
+ UInt sz1 = i->ARMin.NShift.size >> 1;
+ UInt sz2 = i->ARMin.NShift.size & 1;
+ UInt D = regD >> 4;
+ UInt N = regN >> 4;
+ UInt M = regM >> 4;
+ UInt insn;
+ regD &= 0xF;
+ regM &= 0xF;
+ regN &= 0xF;
+ switch (i->ARMin.NShift.op) {
+ case ARMneon_VSHL:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X0100, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VSAL:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X0100, BITS4(N,Q,M,0), regM);
+ break;
+ case ARMneon_VQSHL:
+ insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
+ X0100, BITS4(N,Q,M,1), regM);
+ break;
+ case ARMneon_VQSAL:
+ insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
+ X0100, BITS4(N,Q,M,1), regM);
+ break;
+ default:
+ goto bad;
+ }
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_NeonImm: {
+ UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
+ UInt regD = Q ? (qregNo(i->ARMin.NeonImm.dst) << 1) :
+ dregNo(i->ARMin.NeonImm.dst);
+ UInt D = regD >> 4;
+ UInt imm = i->ARMin.NeonImm.imm->imm8;
+ UInt tp = i->ARMin.NeonImm.imm->type;
+ UInt j = imm >> 7;
+ UInt imm3 = (imm >> 4) & 0x7;
+ UInt imm4 = imm & 0xF;
+ UInt cmode, op;
+ UInt insn;
+ regD &= 0xF;
+ if (tp == 9)
+ op = 1;
+ else
+ op = 0;
+ switch (tp) {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ cmode = tp << 1;
+ break;
+ case 9:
+ case 6:
+ cmode = 14;
+ break;
+ case 7:
+ cmode = 12;
+ break;
+ case 8:
+ cmode = 13;
+ break;
+ case 10:
+ cmode = 15;
+ break;
+ default:
+ vpanic("ARMin_NeonImm");
+
+ }
+ insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
+ cmode, BITS4(0,Q,op,1), imm4);
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_NCMovQ: {
+ UInt cc = (UInt)i->ARMin.NCMovQ.cond;
+ UInt qM = qregNo(i->ARMin.NCMovQ.src) << 1;
+ UInt qD = qregNo(i->ARMin.NCMovQ.dst) << 1;
+ UInt vM = qM & 0xF;
+ UInt vD = qD & 0xF;
+ UInt M = (qM >> 4) & 1;
+ UInt D = (qD >> 4) & 1;
+ vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
+ /* b!cc here+8: !cc A00 0000 */
+ UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
+ *p++ = insn;
+ /* vmov qD, qM */
+ insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
+ vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
+ *p++ = insn;
+ goto done;
+ }
+ case ARMin_Add32: {
+ UInt regD = iregNo(i->ARMin.Add32.rD);
+ UInt regN = iregNo(i->ARMin.Add32.rN);
+ UInt imm32 = i->ARMin.Add32.imm32;
+ vassert(regD != regN);
+ /* MOV regD, imm32 */
+ p = imm32_to_iregNo((UInt *)p, regD, imm32);
+ /* ADD regD, regN, regD */
+ UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
+ *p++ = insn;
+ goto done;
+ }
+ /* ... */
+ default:
+ goto bad;
+ }
+
+ bad:
+ ppARMInstr(i);
+ vpanic("emit_ARMInstr");
+ /*NOTREACHED*/
+
+ done:
+ vassert(((UChar*)p) - &buf[0] <= 32);
+ return ((UChar*)p) - &buf[0];
+}
+
+#undef BITS4
+#undef X0000
+#undef X0001
+#undef X0010
+#undef X0011
+#undef X0100
+#undef X0101
+#undef X0110
+#undef X0111
+#undef X1000
+#undef X1001
+#undef X1010
+#undef X1011
+#undef X1100
+#undef X1101
+#undef X1110
+#undef X1111
+#undef XXXXX___
+#undef XXXXXX__
+#undef XXX___XX
+#undef XXXXX__X
+#undef XXXXXXXX
+
+/*---------------------------------------------------------------*/
+/*--- end host_arm_defs.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/host_arm_defs.h b/VEX/priv/host_arm_defs.h
new file mode 100644
index 0000000..1901e80
--- /dev/null
+++ b/VEX/priv/host_arm_defs.h
@@ -0,0 +1,978 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin host_arm_defs.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __VEX_HOST_ARM_DEFS_H
+#define __VEX_HOST_ARM_DEFS_H
+
+extern UInt arm_hwcaps;
+
+
+/* --------- Registers. --------- */
+
+/* The usual HReg abstraction.
+ There are 16 general purpose regs.
+*/
+
+extern void ppHRegARM ( HReg );
+
+extern HReg hregARM_R0 ( void );
+extern HReg hregARM_R1 ( void );
+extern HReg hregARM_R2 ( void );
+extern HReg hregARM_R3 ( void );
+extern HReg hregARM_R4 ( void );
+extern HReg hregARM_R5 ( void );
+extern HReg hregARM_R6 ( void );
+extern HReg hregARM_R7 ( void );
+extern HReg hregARM_R8 ( void );
+extern HReg hregARM_R9 ( void );
+extern HReg hregARM_R10 ( void );
+extern HReg hregARM_R11 ( void );
+extern HReg hregARM_R12 ( void );
+extern HReg hregARM_R13 ( void );
+extern HReg hregARM_R14 ( void );
+extern HReg hregARM_R15 ( void );
+extern HReg hregARM_D8 ( void );
+extern HReg hregARM_D9 ( void );
+extern HReg hregARM_D10 ( void );
+extern HReg hregARM_D11 ( void );
+extern HReg hregARM_D12 ( void );
+extern HReg hregARM_S26 ( void );
+extern HReg hregARM_S27 ( void );
+extern HReg hregARM_S28 ( void );
+extern HReg hregARM_S29 ( void );
+extern HReg hregARM_S30 ( void );
+extern HReg hregARM_Q8 ( void );
+extern HReg hregARM_Q9 ( void );
+extern HReg hregARM_Q10 ( void );
+extern HReg hregARM_Q11 ( void );
+extern HReg hregARM_Q12 ( void );
+extern HReg hregARM_Q13 ( void );
+extern HReg hregARM_Q14 ( void );
+extern HReg hregARM_Q15 ( void );
+
+/* Number of registers used arg passing in function calls */
+#define ARM_N_ARGREGS 4 /* r0, r1, r2, r3 */
+
+
+/* --------- Condition codes. --------- */
+
+typedef
+ enum {
+ ARMcc_EQ = 0, /* equal : Z=1 */
+ ARMcc_NE = 1, /* not equal : Z=0 */
+
+ ARMcc_HS = 2, /* >=u (higher or same) : C=1 */
+ ARMcc_LO = 3, /* <u (lower) : C=0 */
+
+ ARMcc_MI = 4, /* minus (negative) : N=1 */
+ ARMcc_PL = 5, /* plus (zero or +ve) : N=0 */
+
+ ARMcc_VS = 6, /* overflow : V=1 */
+ ARMcc_VC = 7, /* no overflow : V=0 */
+
+ ARMcc_HI = 8, /* >u (higher) : C=1 && Z=0 */
+ ARMcc_LS = 9, /* <=u (lower or same) : C=0 || Z=1 */
+
+ ARMcc_GE = 10, /* >=s (signed greater or equal) : N=V */
+ ARMcc_LT = 11, /* <s (signed less than) : N!=V */
+
+ ARMcc_GT = 12, /* >s (signed greater) : Z=0 && N=V */
+ ARMcc_LE = 13, /* <=s (signed less or equal) : Z=1 || N!=V */
+
+ ARMcc_AL = 14, /* always (unconditional) */
+ ARMcc_NV = 15 /* never (basically undefined meaning), deprecated */
+ }
+ ARMCondCode;
+
+extern HChar* showARMCondCode ( ARMCondCode );
+
+
+
+/* --------- Memory address expressions (amodes). --------- */
+
+/* --- Addressing Mode 1 --- */
+typedef
+ enum {
+ ARMam1_RI=1, /* reg +/- imm12 */
+ ARMam1_RRS /* reg1 + (reg2 << 0, 1 2 or 3) */
+ }
+ ARMAMode1Tag;
+
+typedef
+ struct {
+ ARMAMode1Tag tag;
+ union {
+ struct {
+ HReg reg;
+ Int simm13; /* -4095 .. +4095 */
+ } RI;
+ struct {
+ HReg base;
+ HReg index;
+ UInt shift; /* 0, 1 2 or 3 */
+ } RRS;
+ } ARMam1;
+ }
+ ARMAMode1;
+
+extern ARMAMode1* ARMAMode1_RI ( HReg reg, Int simm13 );
+extern ARMAMode1* ARMAMode1_RRS ( HReg base, HReg index, UInt shift );
+
+extern void ppARMAMode1 ( ARMAMode1* );
+
+
+/* --- Addressing Mode 2 --- */
+typedef
+ enum {
+ ARMam2_RI=3, /* reg +/- imm8 */
+ ARMam2_RR /* reg1 + reg2 */
+ }
+ ARMAMode2Tag;
+
+typedef
+ struct {
+ ARMAMode2Tag tag;
+ union {
+ struct {
+ HReg reg;
+ Int simm9; /* -255 .. 255 */
+ } RI;
+ struct {
+ HReg base;
+ HReg index;
+ } RR;
+ } ARMam2;
+ }
+ ARMAMode2;
+
+extern ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 );
+extern ARMAMode2* ARMAMode2_RR ( HReg base, HReg index );
+
+extern void ppARMAMode2 ( ARMAMode2* );
+
+
+/* --- Addressing Mode suitable for VFP --- */
+/* The simm11 is encoded as 8 bits + 1 sign bit,
+ so can only be 0 % 4. */
+typedef
+ struct {
+ HReg reg;
+ Int simm11; /* -1020, -1016 .. 1016, 1020 */
+ }
+ ARMAModeV;
+
+extern ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 );
+
+extern void ppARMAModeV ( ARMAModeV* );
+
+/* --- Addressing Mode suitable for Neon --- */
+typedef
+ enum {
+ ARMamN_R=5,
+ ARMamN_RR
+ /* ... */
+ }
+ ARMAModeNTag;
+
+typedef
+ struct {
+ ARMAModeNTag tag;
+ union {
+ struct {
+ HReg rN;
+ HReg rM;
+ } RR;
+ struct {
+ HReg rN;
+ } R;
+ /* ... */
+ } ARMamN;
+ }
+ ARMAModeN;
+
+extern ARMAModeN* mkARMAModeN_RR ( HReg, HReg );
+extern ARMAModeN* mkARMAModeN_R ( HReg );
+extern void ppARMAModeN ( ARMAModeN* );
+
+/* --------- Reg or imm-8x4 operands --------- */
+/* a.k.a (a very restricted form of) Shifter Operand,
+ in the ARM parlance. */
+
+typedef
+ enum {
+ ARMri84_I84=7, /* imm8 `ror` (2 * imm4) */
+ ARMri84_R /* reg */
+ }
+ ARMRI84Tag;
+
+typedef
+ struct {
+ ARMRI84Tag tag;
+ union {
+ struct {
+ UShort imm8;
+ UShort imm4;
+ } I84;
+ struct {
+ HReg reg;
+ } R;
+ } ARMri84;
+ }
+ ARMRI84;
+
+extern ARMRI84* ARMRI84_I84 ( UShort imm8, UShort imm4 );
+extern ARMRI84* ARMRI84_R ( HReg );
+
+extern void ppARMRI84 ( ARMRI84* );
+
+
+/* --------- Reg or imm5 operands --------- */
+typedef
+ enum {
+ ARMri5_I5=9, /* imm5, 1 .. 31 only (no zero!) */
+ ARMri5_R /* reg */
+ }
+ ARMRI5Tag;
+
+typedef
+ struct {
+ ARMRI5Tag tag;
+ union {
+ struct {
+ UInt imm5;
+ } I5;
+ struct {
+ HReg reg;
+ } R;
+ } ARMri5;
+ }
+ ARMRI5;
+
+extern ARMRI5* ARMRI5_I5 ( UInt imm5 );
+extern ARMRI5* ARMRI5_R ( HReg );
+
+extern void ppARMRI5 ( ARMRI5* );
+
+/* -------- Neon Immediate operand -------- */
+
+/* imm8 = abcdefgh, B = NOT(b);
+
+type | value (64bit binary)
+-----+-------------------------------------------------------------------------
+ 0 | 00000000 00000000 00000000 abcdefgh 00000000 00000000 00000000 abcdefgh
+ 1 | 00000000 00000000 abcdefgh 00000000 00000000 00000000 abcdefgh 00000000
+ 2 | 00000000 abcdefgh 00000000 00000000 00000000 abcdefgh 00000000 00000000
+ 3 | abcdefgh 00000000 00000000 00000000 abcdefgh 00000000 00000000 00000000
+ 4 | 00000000 abcdefgh 00000000 abcdefgh 00000000 abcdefgh 00000000 abcdefgh
+ 5 | abcdefgh 00000000 abcdefgh 00000000 abcdefgh 00000000 abcdefgh 00000000
+ 6 | abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh
+ 7 | 00000000 00000000 abcdefgh 11111111 00000000 00000000 abcdefgh 11111111
+ 8 | 00000000 abcdefgh 11111111 11111111 00000000 abcdefgh 11111111 11111111
+ 9 | aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh
+ 10 | aBbbbbbc defgh000 00000000 00000000 aBbbbbbc defgh000 00000000 00000000
+-----+-------------------------------------------------------------------------
+
+Type 10 is:
+ (-1)^S * 2^exp * mantissa
+where S = a, exp = UInt(B:c:d) - 3, mantissa = (16 + UInt(e:f:g:h)) / 16
+*/
+
+typedef
+ struct {
+ UInt type;
+ UInt imm8;
+ }
+ ARMNImm;
+
+extern ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 );
+extern ULong ARMNImm_to_Imm64 ( ARMNImm* );
+extern ARMNImm* Imm64_to_ARMNImm ( ULong );
+
+extern void ppARMNImm ( ARMNImm* );
+
+/* ------ Neon Register or Scalar Operand ------ */
+
+typedef
+ enum {
+ ARMNRS_Reg=11,
+ ARMNRS_Scalar
+ }
+ ARMNRS_tag;
+
+typedef
+ struct {
+ ARMNRS_tag tag;
+ HReg reg;
+ UInt index;
+ }
+ ARMNRS;
+
+extern ARMNRS* mkARMNRS(ARMNRS_tag, HReg reg, UInt index);
+extern void ppARMNRS ( ARMNRS* );
+
+/* --------- Instructions. --------- */
+
+/* --------- */
+typedef
+ enum {
+ ARMalu_ADD=20, /* plain 32-bit add */
+ ARMalu_ADDS, /* 32-bit add, and set the flags */
+ ARMalu_ADC, /* 32-bit add with carry */
+ ARMalu_SUB, /* plain 32-bit subtract */
+ ARMalu_SUBS, /* 32-bit subtract, and set the flags */
+ ARMalu_SBC, /* 32-bit subtract with carry */
+ ARMalu_AND,
+ ARMalu_BIC,
+ ARMalu_OR,
+ ARMalu_XOR
+ }
+ ARMAluOp;
+
+extern HChar* showARMAluOp ( ARMAluOp op );
+
+
+typedef
+ enum {
+ ARMsh_SHL=40,
+ ARMsh_SHR,
+ ARMsh_SAR
+ }
+ ARMShiftOp;
+
+extern HChar* showARMShiftOp ( ARMShiftOp op );
+
+
+typedef
+ enum {
+ ARMun_NEG=50,
+ ARMun_NOT,
+ ARMun_CLZ
+ }
+ ARMUnaryOp;
+
+extern HChar* showARMUnaryOp ( ARMUnaryOp op );
+
+
+typedef
+ enum {
+ ARMmul_PLAIN=60,
+ ARMmul_ZX,
+ ARMmul_SX
+ }
+ ARMMulOp;
+
+extern HChar* showARMMulOp ( ARMMulOp op );
+
+
+typedef
+ enum {
+ ARMvfp_ADD=70,
+ ARMvfp_SUB,
+ ARMvfp_MUL,
+ ARMvfp_DIV
+ }
+ ARMVfpOp;
+
+extern HChar* showARMVfpOp ( ARMVfpOp op );
+
+
+typedef
+ enum {
+ ARMvfpu_COPY=80,
+ ARMvfpu_NEG,
+ ARMvfpu_ABS,
+ ARMvfpu_SQRT
+ }
+ ARMVfpUnaryOp;
+
+extern HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op );
+
+typedef
+ enum {
+ ARMneon_VAND=90,
+ ARMneon_VORR,
+ ARMneon_VXOR,
+ ARMneon_VADD,
+ ARMneon_VADDFP,
+ ARMneon_VRHADDS,
+ ARMneon_VRHADDU,
+ ARMneon_VPADDFP,
+ ARMneon_VABDFP,
+ ARMneon_VSUB,
+ ARMneon_VSUBFP,
+ ARMneon_VMAXU,
+ ARMneon_VMAXS,
+ ARMneon_VMAXF,
+ ARMneon_VMINU,
+ ARMneon_VMINS,
+ ARMneon_VMINF,
+ ARMneon_VQADDU,
+ ARMneon_VQADDS,
+ ARMneon_VQSUBU,
+ ARMneon_VQSUBS,
+ ARMneon_VCGTU,
+ ARMneon_VCGTS,
+ ARMneon_VCGEU,
+ ARMneon_VCGES,
+ ARMneon_VCGTF,
+ ARMneon_VCGEF,
+ ARMneon_VCEQ,
+ ARMneon_VCEQF,
+ ARMneon_VEXT,
+ ARMneon_VMUL,
+ ARMneon_VMULFP,
+ ARMneon_VMULLU,
+ ARMneon_VMULLS,
+ ARMneon_VMULP,
+ ARMneon_VMULLP,
+ ARMneon_VQDMULH,
+ ARMneon_VQRDMULH,
+ ARMneon_VPADD,
+ ARMneon_VPMINU,
+ ARMneon_VPMINS,
+ ARMneon_VPMINF,
+ ARMneon_VPMAXU,
+ ARMneon_VPMAXS,
+ ARMneon_VPMAXF,
+ ARMneon_VTBL,
+ ARMneon_VQDMULL,
+ ARMneon_VRECPS,
+ ARMneon_VRSQRTS,
+ /* ... */
+ }
+ ARMNeonBinOp;
+
+typedef
+ enum {
+ ARMneon_VSHL=150,
+ ARMneon_VSAL, /* Yah, not SAR but SAL */
+ ARMneon_VQSHL,
+ ARMneon_VQSAL
+ }
+ ARMNeonShiftOp;
+
+typedef
+ enum {
+ ARMneon_COPY=160,
+ ARMneon_COPYLU,
+ ARMneon_COPYLS,
+ ARMneon_COPYN,
+ ARMneon_COPYQNSS,
+ ARMneon_COPYQNUS,
+ ARMneon_COPYQNUU,
+ ARMneon_NOT,
+ ARMneon_EQZ,
+ ARMneon_DUP,
+ ARMneon_PADDLS,
+ ARMneon_PADDLU,
+ ARMneon_CNT,
+ ARMneon_CLZ,
+ ARMneon_CLS,
+ ARMneon_VCVTxFPxINT,
+ ARMneon_VQSHLNSS,
+ ARMneon_VQSHLNUU,
+ ARMneon_VQSHLNUS,
+ ARMneon_VCVTFtoU,
+ ARMneon_VCVTFtoS,
+ ARMneon_VCVTUtoF,
+ ARMneon_VCVTStoF,
+ ARMneon_VCVTFtoFixedU,
+ ARMneon_VCVTFtoFixedS,
+ ARMneon_VCVTFixedUtoF,
+ ARMneon_VCVTFixedStoF,
+ ARMneon_VCVTF16toF32,
+ ARMneon_VCVTF32toF16,
+ ARMneon_REV16,
+ ARMneon_REV32,
+ ARMneon_REV64,
+ ARMneon_ABS,
+ ARMneon_VNEGF,
+ ARMneon_VRECIP,
+ ARMneon_VRECIPF,
+ ARMneon_VABSFP,
+ ARMneon_VRSQRTEFP,
+ ARMneon_VRSQRTE
+ /* ... */
+ }
+ ARMNeonUnOp;
+
+typedef
+ enum {
+ ARMneon_SETELEM=200,
+ ARMneon_GETELEMU,
+ ARMneon_GETELEMS,
+ ARMneon_VDUP,
+ }
+ ARMNeonUnOpS;
+
+typedef
+ enum {
+ ARMneon_TRN=210,
+ ARMneon_ZIP,
+ ARMneon_UZP
+ /* ... */
+ }
+ ARMNeonDualOp;
+
+extern HChar* showARMNeonBinOp ( ARMNeonBinOp op );
+extern HChar* showARMNeonUnOp ( ARMNeonUnOp op );
+extern HChar* showARMNeonUnOpS ( ARMNeonUnOpS op );
+extern HChar* showARMNeonShiftOp ( ARMNeonShiftOp op );
+extern HChar* showARMNeonDualOp ( ARMNeonDualOp op );
+extern HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op );
+extern HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op );
+extern HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op );
+extern HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op );
+extern HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op );
+
+typedef
+ enum {
+ /* baseline */
+ ARMin_Alu=220,
+ ARMin_Shift,
+ ARMin_Unary,
+ ARMin_CmpOrTst,
+ ARMin_Mov,
+ ARMin_Imm32,
+ ARMin_LdSt32,
+ ARMin_LdSt16,
+ ARMin_LdSt8U,
+ ARMin_Ld8S,
+ ARMin_Goto,
+ ARMin_CMov,
+ ARMin_Call,
+ ARMin_Mul,
+ ARMin_LdrEX,
+ ARMin_StrEX,
+ /* vfp */
+ ARMin_VLdStD,
+ ARMin_VLdStS,
+ ARMin_VAluD,
+ ARMin_VAluS,
+ ARMin_VUnaryD,
+ ARMin_VUnaryS,
+ ARMin_VCmpD,
+ ARMin_VCMovD,
+ ARMin_VCMovS,
+ ARMin_VCvtSD,
+ ARMin_VXferD,
+ ARMin_VXferS,
+ ARMin_VCvtID,
+ ARMin_FPSCR,
+ ARMin_MFence,
+ /* Neon */
+ ARMin_NLdStQ,
+ ARMin_NLdStD,
+ ARMin_NUnary,
+ ARMin_NUnaryS,
+ ARMin_NDual,
+ ARMin_NBinary,
+ ARMin_NBinaryS,
+ ARMin_NShift,
+ ARMin_NeonImm,
+ ARMin_NCMovQ,
+ /* This is not a NEON instruction. Actually there is no corresponding
+ instruction in ARM instruction set at all. We need this one to
+ generate spill/reload of 128-bit registers since current register
+ allocator demands them to consist of no more than two instructions.
+ We will split this instruction into 2 or 3 ARM instructions on the
+ emiting phase.
+
+ NOTE: source and destination registers should be different! */
+ ARMin_Add32
+ }
+ ARMInstrTag;
+
+/* Destinations are on the LEFT (first operand) */
+
+typedef
+ struct {
+ ARMInstrTag tag;
+ union {
+ /* ADD/SUB/AND/OR/XOR, vanilla ALU op */
+ struct {
+ ARMAluOp op;
+ HReg dst;
+ HReg argL;
+ ARMRI84* argR;
+ } Alu;
+ /* SHL/SHR/SAR, 2nd arg is reg or imm */
+ struct {
+ ARMShiftOp op;
+ HReg dst;
+ HReg argL;
+ ARMRI5* argR;
+ } Shift;
+ /* NOT/NEG/CLZ */
+ struct {
+ ARMUnaryOp op;
+ HReg dst;
+ HReg src;
+ } Unary;
+ /* CMP/TST; subtract/and, discard result, set NZCV */
+ struct {
+ Bool isCmp;
+ HReg argL;
+ ARMRI84* argR;
+ } CmpOrTst;
+ /* MOV dst, src -- reg-reg (or reg-imm8x4) move */
+ struct {
+ HReg dst;
+ ARMRI84* src;
+ } Mov;
+ /* Pseudo-insn; make a 32-bit immediate */
+ struct {
+ HReg dst;
+ UInt imm32;
+ } Imm32;
+ /* 32-bit load or store */
+ struct {
+ Bool isLoad;
+ HReg rD;
+ ARMAMode1* amode;
+ } LdSt32;
+ /* 16-bit load or store */
+ struct {
+ Bool isLoad;
+ Bool signedLoad;
+ HReg rD;
+ ARMAMode2* amode;
+ } LdSt16;
+ /* 8-bit (unsigned) load or store */
+ struct {
+ Bool isLoad;
+ HReg rD;
+ ARMAMode1* amode;
+ } LdSt8U;
+ /* 8-bit signed load */
+ struct {
+ HReg rD;
+ ARMAMode2* amode;
+ } Ld8S;
+ /* Pseudo-insn. Go to guest address gnext, on given
+ condition, which could be ARMcc_AL. */
+ struct {
+ IRJumpKind jk;
+ ARMCondCode cond;
+ HReg gnext;
+ } Goto;
+ /* Mov src to dst on the given condition, which may not
+ be ARMcc_AL. */
+ struct {
+ ARMCondCode cond;
+ HReg dst;
+ ARMRI84* src;
+ } CMov;
+ /* Pseudo-insn. Call target (an absolute address), on given
+ condition (which could be ARMcc_AL). */
+ struct {
+ ARMCondCode cond;
+ HWord target;
+ Int nArgRegs; /* # regs carrying args: 0 .. 4 */
+ } Call;
+ /* (PLAIN) 32 * 32 -> 32: r0 = r2 * r3
+ (ZX) 32 *u 32 -> 64: r1:r0 = r2 *u r3
+ (SX) 32 *s 32 -> 64: r1:r0 = r2 *s r3
+ Why hardwired registers? Because the ARM ARM specifies
+ (eg for straight MUL) the result (Rd) and the left arg (Rm)
+ may not be the same register. That's not a constraint we
+ can enforce in the register allocator (without mucho extra
+ complexity). Hence hardwire it. At least using caller-saves
+ registers, which are less likely to be in use. */
+ struct {
+ ARMMulOp op;
+ } Mul;
+ /* LDREX{,H,B} r0, [r1]
+ Again, hardwired registers since this is not performance
+ critical, and there are possibly constraints on the
+ registers that we can't express in the register allocator.*/
+ struct {
+ Int szB; /* currently only 4 is allowed */
+ } LdrEX;
+ /* STREX{,H,B} r0, r1, [r2]
+ r0 = SC( [r2] = r1 )
+ Ditto comment re fixed registers. */
+ struct {
+ Int szB; /* currently only 4 is allowed */
+ } StrEX;
+ /* VFP INSTRUCTIONS */
+ /* 64-bit Fp load/store */
+ struct {
+ Bool isLoad;
+ HReg dD;
+ ARMAModeV* amode;
+ } VLdStD;
+ /* 32-bit Fp load/store */
+ struct {
+ Bool isLoad;
+ HReg fD;
+ ARMAModeV* amode;
+ } VLdStS;
+ /* 64-bit FP binary arithmetic */
+ struct {
+ ARMVfpOp op;
+ HReg dst;
+ HReg argL;
+ HReg argR;
+ } VAluD;
+ /* 32-bit FP binary arithmetic */
+ struct {
+ ARMVfpOp op;
+ HReg dst;
+ HReg argL;
+ HReg argR;
+ } VAluS;
+ /* 64-bit FP unary, also reg-reg move */
+ struct {
+ ARMVfpUnaryOp op;
+ HReg dst;
+ HReg src;
+ } VUnaryD;
+ /* 32-bit FP unary, also reg-reg move */
+ struct {
+ ARMVfpUnaryOp op;
+ HReg dst;
+ HReg src;
+ } VUnaryS;
+ /* 64-bit FP compare and move results to CPSR (FCMPD;FMSTAT) */
+ struct {
+ HReg argL;
+ HReg argR;
+ } VCmpD;
+ /* 64-bit FP mov src to dst on the given condition, which may
+ not be ARMcc_AL. */
+ struct {
+ ARMCondCode cond;
+ HReg dst;
+ HReg src;
+ } VCMovD;
+ /* 32-bit FP mov src to dst on the given condition, which may
+ not be ARMcc_AL. */
+ struct {
+ ARMCondCode cond;
+ HReg dst;
+ HReg src;
+ } VCMovS;
+ /* Convert between 32-bit and 64-bit FP values (both ways).
+ (FCVTSD, FCVTDS) */
+ struct {
+ Bool sToD; /* True: F32->F64. False: F64->F32 */
+ HReg dst;
+ HReg src;
+ } VCvtSD;
+ /* Transfer a VFP D reg to/from two integer registers (VMOV) */
+ struct {
+ Bool toD;
+ HReg dD;
+ HReg rHi;
+ HReg rLo;
+ } VXferD;
+ /* Transfer a VFP S reg to/from an integer register (VMOV) */
+ struct {
+ Bool toS;
+ HReg fD;
+ HReg rLo;
+ } VXferS;
+ /* Convert between 32-bit ints and 64-bit FP values (both ways
+ and both signednesses). (FSITOD, FUITOD, FTOSID, FTOUID) */
+ struct {
+ Bool iToD; /* True: I32->F64. False: F64->I32 */
+ Bool syned; /* True: I32 is signed. False: I32 is unsigned */
+ HReg dst;
+ HReg src;
+ } VCvtID;
+ /* Move a 32-bit value to/from the FPSCR (FMXR, FMRX) */
+ struct {
+ Bool toFPSCR;
+ HReg iReg;
+ } FPSCR;
+ /* Mem fence. An insn which fences all loads and stores as
+ much as possible before continuing. On ARM we emit the
+ sequence
+ mcr 15,0,r0,c7,c10,4 (DSB)
+ mcr 15,0,r0,c7,c10,5 (DMB)
+ mcr 15,0,r0,c7,c5,4 (ISB)
+ which is probably total overkill, but better safe than
+ sorry.
+ */
+ struct {
+ } MFence;
+ /* Neon data processing instruction: 3 registers of the same
+ length */
+ struct {
+ ARMNeonBinOp op;
+ HReg dst;
+ HReg argL;
+ HReg argR;
+ UInt size;
+ Bool Q;
+ } NBinary;
+ struct {
+ ARMNeonBinOp op;
+ ARMNRS* dst;
+ ARMNRS* argL;
+ ARMNRS* argR;
+ UInt size;
+ Bool Q;
+ } NBinaryS;
+ struct {
+ ARMNeonShiftOp op;
+ HReg dst;
+ HReg argL;
+ HReg argR;
+ UInt size;
+ Bool Q;
+ } NShift;
+ struct {
+ Bool isLoad;
+ HReg dQ;
+ ARMAModeN *amode;
+ } NLdStQ;
+ struct {
+ Bool isLoad;
+ HReg dD;
+ ARMAModeN *amode;
+ } NLdStD;
+ struct {
+ ARMNeonUnOpS op;
+ ARMNRS* dst;
+ ARMNRS* src;
+ UInt size;
+ Bool Q;
+ } NUnaryS;
+ struct {
+ ARMNeonUnOp op;
+ HReg dst;
+ HReg src;
+ UInt size;
+ Bool Q;
+ } NUnary;
+ /* Takes two arguments and modifies them both. */
+ struct {
+ ARMNeonDualOp op;
+ HReg arg1;
+ HReg arg2;
+ UInt size;
+ Bool Q;
+ } NDual;
+ struct {
+ HReg dst;
+ ARMNImm* imm;
+ } NeonImm;
+ /* 128-bit Neon move src to dst on the given condition, which
+ may not be ARMcc_AL. */
+ struct {
+ ARMCondCode cond;
+ HReg dst;
+ HReg src;
+ } NCMovQ;
+ struct {
+ /* Note: rD != rN */
+ HReg rD;
+ HReg rN;
+ UInt imm32;
+ } Add32;
+ } ARMin;
+ }
+ ARMInstr;
+
+
+extern ARMInstr* ARMInstr_Alu ( ARMAluOp, HReg, HReg, ARMRI84* );
+extern ARMInstr* ARMInstr_Shift ( ARMShiftOp, HReg, HReg, ARMRI5* );
+extern ARMInstr* ARMInstr_Unary ( ARMUnaryOp, HReg, HReg );
+extern ARMInstr* ARMInstr_CmpOrTst ( Bool isCmp, HReg, ARMRI84* );
+extern ARMInstr* ARMInstr_Mov ( HReg, ARMRI84* );
+extern ARMInstr* ARMInstr_Imm32 ( HReg, UInt );
+extern ARMInstr* ARMInstr_LdSt32 ( Bool isLoad, HReg, ARMAMode1* );
+extern ARMInstr* ARMInstr_LdSt16 ( Bool isLoad, Bool signedLoad,
+ HReg, ARMAMode2* );
+extern ARMInstr* ARMInstr_LdSt8U ( Bool isLoad, HReg, ARMAMode1* );
+extern ARMInstr* ARMInstr_Ld8S ( HReg, ARMAMode2* );
+extern ARMInstr* ARMInstr_Goto ( IRJumpKind, ARMCondCode, HReg gnext );
+extern ARMInstr* ARMInstr_CMov ( ARMCondCode, HReg dst, ARMRI84* src );
+extern ARMInstr* ARMInstr_Call ( ARMCondCode, HWord, Int nArgRegs );
+extern ARMInstr* ARMInstr_Mul ( ARMMulOp op );
+extern ARMInstr* ARMInstr_LdrEX ( Int szB );
+extern ARMInstr* ARMInstr_StrEX ( Int szB );
+extern ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg, ARMAModeV* );
+extern ARMInstr* ARMInstr_VLdStS ( Bool isLoad, HReg, ARMAModeV* );
+extern ARMInstr* ARMInstr_VAluD ( ARMVfpOp op, HReg, HReg, HReg );
+extern ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg, HReg, HReg );
+extern ARMInstr* ARMInstr_VUnaryD ( ARMVfpUnaryOp, HReg dst, HReg src );
+extern ARMInstr* ARMInstr_VUnaryS ( ARMVfpUnaryOp, HReg dst, HReg src );
+extern ARMInstr* ARMInstr_VCmpD ( HReg argL, HReg argR );
+extern ARMInstr* ARMInstr_VCMovD ( ARMCondCode, HReg dst, HReg src );
+extern ARMInstr* ARMInstr_VCMovS ( ARMCondCode, HReg dst, HReg src );
+extern ARMInstr* ARMInstr_VCvtSD ( Bool sToD, HReg dst, HReg src );
+extern ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo );
+extern ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo );
+extern ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
+ HReg dst, HReg src );
+extern ARMInstr* ARMInstr_FPSCR ( Bool toFPSCR, HReg iReg );
+extern ARMInstr* ARMInstr_MFence ( void );
+extern ARMInstr* ARMInstr_NLdStQ ( Bool isLoad, HReg, ARMAModeN* );
+extern ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg, ARMAModeN* );
+extern ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp, HReg, HReg, UInt, Bool );
+extern ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOp, ARMNRS*, ARMNRS*,
+ UInt, Bool );
+extern ARMInstr* ARMInstr_NDual ( ARMNeonDualOp, HReg, HReg, UInt, Bool );
+extern ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp, HReg, HReg, HReg,
+ UInt, Bool );
+extern ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp, HReg, HReg, HReg,
+ UInt, Bool );
+extern ARMInstr* ARMInstr_NeonImm ( HReg, ARMNImm* );
+extern ARMInstr* ARMInstr_NCMovQ ( ARMCondCode, HReg, HReg );
+extern ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 );
+
+extern void ppARMInstr ( ARMInstr* );
+
+
+/* Some functions that insulate the register allocator from details
+ of the underlying instruction set. */
+extern void getRegUsage_ARMInstr ( HRegUsage*, ARMInstr*, Bool );
+extern void mapRegs_ARMInstr ( HRegRemap*, ARMInstr*, Bool );
+extern Bool isMove_ARMInstr ( ARMInstr*, HReg*, HReg* );
+extern Int emit_ARMInstr ( UChar* buf, Int nbuf, ARMInstr*,
+ Bool, void* dispatch );
+
+extern void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
+ HReg rreg, Int offset, Bool );
+extern void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
+ HReg rreg, Int offset, Bool );
+
+extern void getAllocableRegs_ARM ( Int*, HReg** );
+extern HInstrArray* iselSB_ARM ( IRSB*, VexArch,
+ VexArchInfo*, VexAbiInfo* );
+
+#endif /* ndef __VEX_HOST_ARM_DEFS_H */
+
+/*---------------------------------------------------------------*/
+/*--- end host_arm_defs.h ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/host_arm_isel.c b/VEX/priv/host_arm_isel.c
new file mode 100644
index 0000000..4bba9a3
--- /dev/null
+++ b/VEX/priv/host_arm_isel.c
@@ -0,0 +1,6023 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin host_arm_isel.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ NEON support is
+ Copyright (C) 2010-2010 Samsung Electronics
+ contributed by Dmitry Zhurikhin <zhur@ispras.ru>
+ and Kirill Batuzov <batuzovk@ispras.ru>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+#include "ir_match.h"
+
+#include "main_util.h"
+#include "main_globals.h"
+#include "host_generic_regs.h"
+#include "host_generic_simd64.h" // for 32-bit SIMD helpers
+#include "host_arm_defs.h"
+
+
+/*---------------------------------------------------------*/
+/*--- ARMvfp control word stuff ---*/
+/*---------------------------------------------------------*/
+
+/* Vex-generated code expects to run with the FPU set as follows: all
+ exceptions masked, round-to-nearest, non-vector mode, with the NZCV
+ flags cleared, and FZ (flush to zero) disabled. Curiously enough,
+ this corresponds to a FPSCR value of zero.
+
+ fpscr should therefore be zero on entry to Vex-generated code, and
+ should be unchanged at exit. (Or at least the bottom 28 bits
+ should be zero).
+*/
+
+#define DEFAULT_FPSCR 0
+
+
+/*---------------------------------------------------------*/
+/*--- ISelEnv ---*/
+/*---------------------------------------------------------*/
+
+/* This carries around:
+
+ - A mapping from IRTemp to IRType, giving the type of any IRTemp we
+ might encounter. This is computed before insn selection starts,
+ and does not change.
+
+ - A mapping from IRTemp to HReg. This tells the insn selector
+ which virtual register(s) are associated with each IRTemp
+ temporary. This is computed before insn selection starts, and
+ does not change. We expect this mapping to map precisely the
+ same set of IRTemps as the type mapping does.
+
+ - vregmap holds the primary register for the IRTemp.
+ - vregmapHI is only used for 64-bit integer-typed
+ IRTemps. It holds the identity of a second
+ 32-bit virtual HReg, which holds the high half
+ of the value.
+
+ - The name of the vreg in which we stash a copy of the link reg, so
+ helper functions don't kill it.
+
+ - The code array, that is, the insns selected so far.
+
+ - A counter, for generating new virtual registers.
+
+ - The host hardware capabilities word. This is set at the start
+ and does not change.
+
+ Note, this is all host-independent. */
+
+typedef
+ struct {
+ IRTypeEnv* type_env;
+
+ HReg* vregmap;
+ HReg* vregmapHI;
+ Int n_vregmap;
+
+ HReg savedLR;
+
+ HInstrArray* code;
+
+ Int vreg_ctr;
+
+ UInt hwcaps;
+ }
+ ISelEnv;
+
+static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
+{
+ vassert(tmp >= 0);
+ vassert(tmp < env->n_vregmap);
+ return env->vregmap[tmp];
+}
+
+static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
+{
+ vassert(tmp >= 0);
+ vassert(tmp < env->n_vregmap);
+ vassert(env->vregmapHI[tmp] != INVALID_HREG);
+ *vrLO = env->vregmap[tmp];
+ *vrHI = env->vregmapHI[tmp];
+}
+
+static void addInstr ( ISelEnv* env, ARMInstr* instr )
+{
+ addHInstr(env->code, instr);
+ if (vex_traceflags & VEX_TRACE_VCODE) {
+ ppARMInstr(instr);
+ vex_printf("\n");
+ }
+#if 0
+ if (instr->tag == ARMin_NUnary || instr->tag == ARMin_NBinary
+ || instr->tag == ARMin_NUnaryS || instr->tag == ARMin_NBinaryS
+ || instr->tag == ARMin_NDual || instr->tag == ARMin_NShift) {
+ ppARMInstr(instr);
+ vex_printf("\n");
+ }
+#endif
+}
+
+static HReg newVRegI ( ISelEnv* env )
+{
+ HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
+ env->vreg_ctr++;
+ return reg;
+}
+
+static HReg newVRegD ( ISelEnv* env )
+{
+ HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
+ env->vreg_ctr++;
+ return reg;
+}
+
+static HReg newVRegF ( ISelEnv* env )
+{
+ HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/);
+ env->vreg_ctr++;
+ return reg;
+}
+
+static HReg newVRegV ( ISelEnv* env )
+{
+ HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
+ env->vreg_ctr++;
+ return reg;
+}
+
+/* These are duplicated in guest_arm_toIR.c */
+static IRExpr* unop ( IROp op, IRExpr* a )
+{
+ return IRExpr_Unop(op, a);
+}
+
+static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
+{
+ return IRExpr_Binop(op, a1, a2);
+}
+
+static IRExpr* bind ( Int binder )
+{
+ return IRExpr_Binder(binder);
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Forward declarations ---*/
+/*---------------------------------------------------------*/
+
+/* These are organised as iselXXX and iselXXX_wrk pairs. The
+ iselXXX_wrk do the real work, but are not to be called directly.
+ For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
+ checks that all returned registers are virtual. You should not
+ call the _wrk version directly.
+*/
+static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
+static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e );
+
+static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
+static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e );
+
+static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
+static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e );
+
+static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
+static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e );
+
+static ARMRI84* iselIntExpr_RI84_wrk
+ ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
+static ARMRI84* iselIntExpr_RI84
+ ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
+
+static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e );
+static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e );
+
+static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
+static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e );
+
+static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
+static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
+
+static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
+ ISelEnv* env, IRExpr* e );
+static void iselInt64Expr ( HReg* rHi, HReg* rLo,
+ ISelEnv* env, IRExpr* e );
+
+static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
+static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
+
+static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
+static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
+
+static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e );
+static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e );
+
+static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e );
+static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e );
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Misc helpers ---*/
+/*---------------------------------------------------------*/
+
+static UInt ROR32 ( UInt x, UInt sh ) {
+ vassert(sh >= 0 && sh < 32);
+ if (sh == 0)
+ return x;
+ else
+ return (x << (32-sh)) | (x >> sh);
+}
+
+/* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
+ form, and if so return the components. */
+static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
+{
+ UInt i;
+ for (i = 0; i < 16; i++) {
+ if (0 == (u & 0xFFFFFF00)) {
+ *u8 = u;
+ *u4 = i;
+ return True;
+ }
+ u = ROR32(u, 30);
+ }
+ vassert(i == 16);
+ return False;
+}
+
+/* Make a int reg-reg move. */
+static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
+{
+ vassert(hregClass(src) == HRcInt32);
+ vassert(hregClass(dst) == HRcInt32);
+ return ARMInstr_Mov(dst, ARMRI84_R(src));
+}
+
+/* Set the VFP unit's rounding mode to default (round to nearest). */
+static void set_VFP_rounding_default ( ISelEnv* env )
+{
+ /* mov rTmp, #DEFAULT_FPSCR
+ fmxr fpscr, rTmp
+ */
+ HReg rTmp = newVRegI(env);
+ addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
+ addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
+}
+
+/* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
+ expression denoting a value in the range 0 .. 3, indicating a round
+ mode encoded as per type IRRoundingMode. Set FPSCR to have the
+ same rounding.
+*/
+static
+void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
+{
+ /* This isn't simple, because 'mode' carries an IR rounding
+ encoding, and we need to translate that to an ARMvfp one:
+ The IR encoding:
+ 00 to nearest (the default)
+ 10 to +infinity
+ 01 to -infinity
+ 11 to zero
+ The ARMvfp encoding:
+ 00 to nearest
+ 01 to +infinity
+ 10 to -infinity
+ 11 to zero
+ Easy enough to do; just swap the two bits.
+ */
+ HReg irrm = iselIntExpr_R(env, mode);
+ HReg tL = newVRegI(env);
+ HReg tR = newVRegI(env);
+ HReg t3 = newVRegI(env);
+ /* tL = irrm << 1;
+ tR = irrm >> 1; if we're lucky, these will issue together
+ tL &= 2;
+ tR &= 1; ditto
+ t3 = tL | tR;
+ t3 <<= 22;
+ fmxr fpscr, t3
+ */
+ addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
+ addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
+ addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
+ addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
+ addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
+ addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
+ addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Function call helpers ---*/
+/*---------------------------------------------------------*/
+
+/* Used only in doHelperCall. See big comment in doHelperCall re
+ handling of register-parameter args. This function figures out
+ whether evaluation of an expression might require use of a fixed
+ register. If in doubt return True (safe but suboptimal).
+*/
+static
+Bool mightRequireFixedRegs ( IRExpr* e )
+{
+ switch (e->tag) {
+ case Iex_RdTmp: case Iex_Const: case Iex_Get:
+ return False;
+ default:
+ return True;
+ }
+}
+
+
+/* Do a complete function call. guard is a Ity_Bit expression
+ indicating whether or not the call happens. If guard==NULL, the
+ call is unconditional. Returns True iff it managed to handle this
+ combination of arg/return types, else returns False. */
+
+static
+Bool doHelperCall ( ISelEnv* env,
+ Bool passBBP,
+ IRExpr* guard, IRCallee* cee, IRExpr** args )
+{
+ ARMCondCode cc;
+ HReg argregs[ARM_N_ARGREGS];
+ HReg tmpregs[ARM_N_ARGREGS];
+ Bool go_fast;
+ Int n_args, i, nextArgReg;
+ ULong target;
+
+ vassert(ARM_N_ARGREGS == 4);
+
+ /* Marshal args for a call and do the call.
+
+ If passBBP is True, r8 (the baseblock pointer) is to be passed
+ as the first arg.
+
+ This function only deals with a tiny set of possibilities, which
+ cover all helpers in practice. The restrictions are that only
+ arguments in registers are supported, hence only ARM_N_REGPARMS
+ x 32 integer bits in total can be passed. In fact the only
+ supported arg types are I32 and I64.
+
+ Generating code which is both efficient and correct when
+ parameters are to be passed in registers is difficult, for the
+ reasons elaborated in detail in comments attached to
+ doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
+ of the method described in those comments.
+
+ The problem is split into two cases: the fast scheme and the
+ slow scheme. In the fast scheme, arguments are computed
+ directly into the target (real) registers. This is only safe
+ when we can be sure that computation of each argument will not
+ trash any real registers set by computation of any other
+ argument.
+
+ In the slow scheme, all args are first computed into vregs, and
+ once they are all done, they are moved to the relevant real
+ regs. This always gives correct code, but it also gives a bunch
+ of vreg-to-rreg moves which are usually redundant but are hard
+ for the register allocator to get rid of.
+
+ To decide which scheme to use, all argument expressions are
+ first examined. If they are all so simple that it is clear they
+ will be evaluated without use of any fixed registers, use the
+ fast scheme, else use the slow scheme. Note also that only
+ unconditional calls may use the fast scheme, since having to
+ compute a condition expression could itself trash real
+ registers.
+
+ Note this requires being able to examine an expression and
+ determine whether or not evaluation of it might use a fixed
+ register. That requires knowledge of how the rest of this insn
+ selector works. Currently just the following 3 are regarded as
+ safe -- hopefully they cover the majority of arguments in
+ practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
+ */
+
+ /* Note that the cee->regparms field is meaningless on ARM hosts
+ (since there is only one calling convention) and so we always
+ ignore it. */
+
+ n_args = 0;
+ for (i = 0; args[i]; i++)
+ n_args++;
+
+ argregs[0] = hregARM_R0();
+ argregs[1] = hregARM_R1();
+ argregs[2] = hregARM_R2();
+ argregs[3] = hregARM_R3();
+
+ tmpregs[0] = tmpregs[1] = tmpregs[2] =
+ tmpregs[3] = INVALID_HREG;
+
+ /* First decide which scheme (slow or fast) is to be used. First
+ assume the fast scheme, and select slow if any contraindications
+ (wow) appear. */
+
+ go_fast = True;
+
+ if (guard) {
+ if (guard->tag == Iex_Const
+ && guard->Iex.Const.con->tag == Ico_U1
+ && guard->Iex.Const.con->Ico.U1 == True) {
+ /* unconditional */
+ } else {
+ /* Not manifestly unconditional -- be conservative. */
+ go_fast = False;
+ }
+ }
+
+ if (go_fast) {
+ for (i = 0; i < n_args; i++) {
+ if (mightRequireFixedRegs(args[i])) {
+ go_fast = False;
+ break;
+ }
+ }
+ }
+ /* At this point the scheme to use has been established. Generate
+ code to get the arg values into the argument rregs. If we run
+ out of arg regs, give up. */
+
+ if (go_fast) {
+
+ /* FAST SCHEME */
+ nextArgReg = 0;
+ if (passBBP) {
+ addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
+ hregARM_R8() ));
+ nextArgReg++;
+ }
+
+ for (i = 0; i < n_args; i++) {
+ IRType aTy = typeOfIRExpr(env->type_env, args[i]);
+ if (nextArgReg >= ARM_N_ARGREGS)
+ return False; /* out of argregs */
+ if (aTy == Ity_I32) {
+ addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
+ iselIntExpr_R(env, args[i]) ));
+ nextArgReg++;
+ }
+ else if (aTy == Ity_I64) {
+ /* 64-bit args must be passed in an a reg-pair of the form
+ n:n+1, where n is even. Hence either r0:r1 or r2:r3.
+ On a little-endian host, the less significant word is
+ passed in the lower-numbered register. */
+ if (nextArgReg & 1) {
+ if (nextArgReg >= ARM_N_ARGREGS)
+ return False; /* out of argregs */
+ addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
+ nextArgReg++;
+ }
+ if (nextArgReg >= ARM_N_ARGREGS)
+ return False; /* out of argregs */
+ HReg raHi, raLo;
+ iselInt64Expr(&raHi, &raLo, env, args[i]);
+ addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
+ nextArgReg++;
+ addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
+ nextArgReg++;
+ }
+ else
+ return False; /* unhandled arg type */
+ }
+
+ /* Fast scheme only applies for unconditional calls. Hence: */
+ cc = ARMcc_AL;
+
+ } else {
+
+ /* SLOW SCHEME; move via temporaries */
+ nextArgReg = 0;
+
+ if (passBBP) {
+ /* This is pretty stupid; better to move directly to r0
+ after the rest of the args are done. */
+ tmpregs[nextArgReg] = newVRegI(env);
+ addInstr(env, mk_iMOVds_RR( tmpregs[nextArgReg],
+ hregARM_R8() ));
+ nextArgReg++;
+ }
+
+ for (i = 0; i < n_args; i++) {
+ IRType aTy = typeOfIRExpr(env->type_env, args[i]);
+ if (nextArgReg >= ARM_N_ARGREGS)
+ return False; /* out of argregs */
+ if (aTy == Ity_I32) {
+ tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
+ nextArgReg++;
+ }
+ else if (aTy == Ity_I64) {
+ /* Same comment applies as in the Fast-scheme case. */
+ if (nextArgReg & 1)
+ nextArgReg++;
+ if (nextArgReg + 1 >= ARM_N_ARGREGS)
+ return False; /* out of argregs */
+ HReg raHi, raLo;
+ iselInt64Expr(&raHi, &raLo, env, args[i]);
+ tmpregs[nextArgReg] = raLo;
+ nextArgReg++;
+ tmpregs[nextArgReg] = raHi;
+ nextArgReg++;
+ }
+ }
+
+ /* Now we can compute the condition. We can't do it earlier
+ because the argument computations could trash the condition
+ codes. Be a bit clever to handle the common case where the
+ guard is 1:Bit. */
+ cc = ARMcc_AL;
+ if (guard) {
+ if (guard->tag == Iex_Const
+ && guard->Iex.Const.con->tag == Ico_U1
+ && guard->Iex.Const.con->Ico.U1 == True) {
+ /* unconditional -- do nothing */
+ } else {
+ cc = iselCondCode( env, guard );
+ }
+ }
+
+ /* Move the args to their final destinations. */
+ for (i = 0; i < nextArgReg; i++) {
+ if (tmpregs[i] == INVALID_HREG) { // Skip invalid regs
+ addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
+ continue;
+ }
+ /* None of these insns, including any spill code that might
+ be generated, may alter the condition codes. */
+ addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
+ }
+
+ }
+
+ /* Should be assured by checks above */
+ vassert(nextArgReg <= ARM_N_ARGREGS);
+
+ target = (HWord)Ptr_to_ULong(cee->addr);
+
+ /* nextArgReg doles out argument registers. Since these are
+ assigned in the order r0, r1, r2, r3, its numeric value at this
+ point, which must be between 0 and 4 inclusive, is going to be
+ equal to the number of arg regs in use for the call. Hence bake
+ that number into the call (we'll need to know it when doing
+ register allocation, to know what regs the call reads.)
+
+ There is a bit of a twist -- harmless but worth recording.
+ Suppose the arg types are (Ity_I32, Ity_I64). Then we will have
+ the first arg in r0 and the second in r3:r2, but r1 isn't used.
+ We nevertheless have nextArgReg==4 and bake that into the call
+ instruction. This will mean the register allocator wil believe
+ this insn reads r1 when in fact it doesn't. But that's
+ harmless; it just artificially extends the live range of r1
+ unnecessarily. The best fix would be to put into the
+ instruction, a bitmask indicating which of r0/1/2/3 carry live
+ values. But that's too much hassle. */
+
+ /* Finally, the call itself. */
+ addInstr(env, ARMInstr_Call( cc, target, nextArgReg ));
+
+ return True; /* success */
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Integer expressions (32/16/8 bit) ---*/
+/*---------------------------------------------------------*/
+
+/* Select insns for an integer-typed expression, and add them to the
+ code list. Return a reg holding the result. This reg will be a
+ virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
+ want to modify it, ask for a new vreg, copy it in there, and modify
+ the copy. The register allocator will do its best to map both
+ vregs to the same real register, so the copies will often disappear
+ later in the game.
+
+ This should handle expressions of 32, 16 and 8-bit type. All
+ results are returned in a 32-bit register. For 16- and 8-bit
+ expressions, the upper 16/24 bits are arbitrary, so you should mask
+ or sign extend partial values if necessary.
+*/
+
+/* --------------------- AMode1 --------------------- */
+
+/* Return an AMode1 which computes the value of the specified
+ expression, possibly also adding insns to the code list as a
+ result. The expression may only be a 32-bit one.
+*/
+
+static Bool sane_AMode1 ( ARMAMode1* am )
+{
+ switch (am->tag) {
+ case ARMam1_RI:
+ return
+ toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
+ && (hregIsVirtual(am->ARMam1.RI.reg)
+ || am->ARMam1.RI.reg == hregARM_R8())
+ && am->ARMam1.RI.simm13 >= -4095
+ && am->ARMam1.RI.simm13 <= 4095 );
+ case ARMam1_RRS:
+ return
+ toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
+ && hregIsVirtual(am->ARMam1.RRS.base)
+ && hregClass(am->ARMam1.RRS.index) == HRcInt32
+ && hregIsVirtual(am->ARMam1.RRS.index)
+ && am->ARMam1.RRS.shift >= 0
+ && am->ARMam1.RRS.shift <= 3 );
+ default:
+ vpanic("sane_AMode: unknown ARM AMode1 tag");
+ }
+}
+
+static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
+{
+ ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
+ vassert(sane_AMode1(am));
+ return am;
+}
+
+static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_I32);
+
+ /* FIXME: add RRS matching */
+
+ /* {Add32,Sub32}(expr,simm13) */
+ if (e->tag == Iex_Binop
+ && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
+ && e->Iex.Binop.arg2->tag == Iex_Const
+ && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
+ Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
+ if (simm >= -4095 && simm <= 4095) {
+ HReg reg;
+ if (e->Iex.Binop.op == Iop_Sub32)
+ simm = -simm;
+ reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ return ARMAMode1_RI(reg, simm);
+ }
+ }
+
+ /* Doesn't match anything in particular. Generate it into
+ a register and use that. */
+ {
+ HReg reg = iselIntExpr_R(env, e);
+ return ARMAMode1_RI(reg, 0);
+ }
+
+}
+
+
+/* --------------------- AMode2 --------------------- */
+
+/* Return an AMode2 which computes the value of the specified
+ expression, possibly also adding insns to the code list as a
+ result. The expression may only be a 32-bit one.
+*/
+
+static Bool sane_AMode2 ( ARMAMode2* am )
+{
+ switch (am->tag) {
+ case ARMam2_RI:
+ return
+ toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
+ && hregIsVirtual(am->ARMam2.RI.reg)
+ && am->ARMam2.RI.simm9 >= -255
+ && am->ARMam2.RI.simm9 <= 255 );
+ case ARMam2_RR:
+ return
+ toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
+ && hregIsVirtual(am->ARMam2.RR.base)
+ && hregClass(am->ARMam2.RR.index) == HRcInt32
+ && hregIsVirtual(am->ARMam2.RR.index) );
+ default:
+ vpanic("sane_AMode: unknown ARM AMode2 tag");
+ }
+}
+
+static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
+{
+ ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
+ vassert(sane_AMode2(am));
+ return am;
+}
+
+static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_I32);
+
+ /* FIXME: add RR matching */
+
+ /* {Add32,Sub32}(expr,simm8) */
+ if (e->tag == Iex_Binop
+ && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
+ && e->Iex.Binop.arg2->tag == Iex_Const
+ && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
+ Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
+ if (simm >= -255 && simm <= 255) {
+ HReg reg;
+ if (e->Iex.Binop.op == Iop_Sub32)
+ simm = -simm;
+ reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ return ARMAMode2_RI(reg, simm);
+ }
+ }
+
+ /* Doesn't match anything in particular. Generate it into
+ a register and use that. */
+ {
+ HReg reg = iselIntExpr_R(env, e);
+ return ARMAMode2_RI(reg, 0);
+ }
+
+}
+
+
+/* --------------------- AModeV --------------------- */
+
+/* Return an AModeV which computes the value of the specified
+ expression, possibly also adding insns to the code list as a
+ result. The expression may only be a 32-bit one.
+*/
+
+static Bool sane_AModeV ( ARMAModeV* am )
+{
+ return toBool( hregClass(am->reg) == HRcInt32
+ && hregIsVirtual(am->reg)
+ && am->simm11 >= -1020 && am->simm11 <= 1020
+ && 0 == (am->simm11 & 3) );
+}
+
+static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
+{
+ ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
+ vassert(sane_AModeV(am));
+ return am;
+}
+
+static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_I32);
+
+ /* {Add32,Sub32}(expr, simm8 << 2) */
+ if (e->tag == Iex_Binop
+ && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
+ && e->Iex.Binop.arg2->tag == Iex_Const
+ && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
+ Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
+ if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
+ HReg reg;
+ if (e->Iex.Binop.op == Iop_Sub32)
+ simm = -simm;
+ reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ return mkARMAModeV(reg, simm);
+ }
+ }
+
+ /* Doesn't match anything in particular. Generate it into
+ a register and use that. */
+ {
+ HReg reg = iselIntExpr_R(env, e);
+ return mkARMAModeV(reg, 0);
+ }
+
+}
+
+/* -------------------- AModeN -------------------- */
+
+static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
+{
+ return iselIntExpr_AModeN_wrk(env, e);
+}
+
+static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
+{
+ HReg reg = iselIntExpr_R(env, e);
+ return mkARMAModeN_R(reg);
+}
+
+
+/* --------------------- RI84 --------------------- */
+
+/* Select instructions to generate 'e' into a RI84. If mayInv is
+ true, then the caller will also accept an I84 form that denotes
+ 'not e'. In this case didInv may not be NULL, and *didInv is set
+ to True. This complication is so as to allow generation of an RI84
+ which is suitable for use in either an AND or BIC instruction,
+ without knowing (before this call) which one.
+*/
+static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
+ ISelEnv* env, IRExpr* e )
+{
+ ARMRI84* ri;
+ if (mayInv)
+ vassert(didInv != NULL);
+ ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
+ /* sanity checks ... */
+ switch (ri->tag) {
+ case ARMri84_I84:
+ return ri;
+ case ARMri84_R:
+ vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
+ vassert(hregIsVirtual(ri->ARMri84.R.reg));
+ return ri;
+ default:
+ vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
+ }
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
+ ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
+
+ if (didInv) *didInv = False;
+
+ /* special case: immediate */
+ if (e->tag == Iex_Const) {
+ UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
+ switch (e->Iex.Const.con->tag) {
+ case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
+ case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
+ case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
+ default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
+ }
+ if (fitsIn8x4(&u8, &u4, u)) {
+ return ARMRI84_I84( (UShort)u8, (UShort)u4 );
+ }
+ if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
+ vassert(didInv);
+ *didInv = True;
+ return ARMRI84_I84( (UShort)u8, (UShort)u4 );
+ }
+ /* else fail, fall through to default case */
+ }
+
+ /* default case: calculate into a register and return that */
+ {
+ HReg r = iselIntExpr_R ( env, e );
+ return ARMRI84_R(r);
+ }
+}
+
+
+/* --------------------- RI5 --------------------- */
+
+/* Select instructions to generate 'e' into a RI5. */
+
+static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
+{
+ ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
+ /* sanity checks ... */
+ switch (ri->tag) {
+ case ARMri5_I5:
+ return ri;
+ case ARMri5_R:
+ vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
+ vassert(hregIsVirtual(ri->ARMri5.R.reg));
+ return ri;
+ default:
+ vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
+ }
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_I32 || ty == Ity_I8);
+
+ /* special case: immediate */
+ if (e->tag == Iex_Const) {
+ UInt u; /* both invalid */
+ switch (e->Iex.Const.con->tag) {
+ case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
+ case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
+ case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
+ default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
+ }
+ if (u >= 1 && u <= 31) {
+ return ARMRI5_I5(u);
+ }
+ /* else fail, fall through to default case */
+ }
+
+ /* default case: calculate into a register and return that */
+ {
+ HReg r = iselIntExpr_R ( env, e );
+ return ARMRI5_R(r);
+ }
+}
+
+
+/* ------------------- CondCode ------------------- */
+
+/* Generate code to evaluated a bit-typed expression, returning the
+ condition code which would correspond when the expression would
+ notionally have returned 1. */
+
+static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
+{
+ ARMCondCode cc = iselCondCode_wrk(env,e);
+ vassert(cc != ARMcc_NV);
+ return cc;
+}
+
+static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
+{
+ vassert(e);
+ vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
+
+ /* var */
+ if (e->tag == Iex_RdTmp) {
+ HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ /* CmpOrTst doesn't modify rTmp; so this is OK. */
+ ARMRI84* one = ARMRI84_I84(1,0);
+ addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
+ return ARMcc_NE;
+ }
+
+ /* Not1(e) */
+ if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
+ /* Generate code for the arg, and negate the test condition */
+ return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
+ }
+
+ /* --- patterns rooted at: 32to1 --- */
+
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.op == Iop_32to1) {
+ HReg rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
+ ARMRI84* one = ARMRI84_I84(1,0);
+ addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
+ return ARMcc_NE;
+ }
+
+ /* --- patterns rooted at: CmpNEZ8 --- */
+
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.op == Iop_CmpNEZ8) {
+ HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
+ ARMRI84* xFF = ARMRI84_I84(0xFF,0);
+ addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
+ return ARMcc_NE;
+ }
+
+ /* --- patterns rooted at: CmpNEZ32 --- */
+
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.op == Iop_CmpNEZ32) {
+ HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
+ ARMRI84* zero = ARMRI84_I84(0,0);
+ addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
+ return ARMcc_NE;
+ }
+
+ /* --- patterns rooted at: CmpNEZ64 --- */
+
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.op == Iop_CmpNEZ64) {
+ HReg tHi, tLo;
+ HReg tmp = newVRegI(env);
+ ARMRI84* zero = ARMRI84_I84(0,0);
+ iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
+ addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
+ return ARMcc_NE;
+ }
+
+ /* --- Cmp*32*(x,y) --- */
+ if (e->tag == Iex_Binop
+ && (e->Iex.Binop.op == Iop_CmpEQ32
+ || e->Iex.Binop.op == Iop_CmpNE32
+ || e->Iex.Binop.op == Iop_CmpLT32S
+ || e->Iex.Binop.op == Iop_CmpLT32U
+ || e->Iex.Binop.op == Iop_CmpLE32S
+ || e->Iex.Binop.op == Iop_CmpLE32U)) {
+ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ ARMRI84* argR = iselIntExpr_RI84(NULL,False,
+ env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpEQ32: return ARMcc_EQ;
+ case Iop_CmpNE32: return ARMcc_NE;
+ case Iop_CmpLT32S: return ARMcc_LT;
+ case Iop_CmpLT32U: return ARMcc_LO;
+ case Iop_CmpLE32S: return ARMcc_LE;
+ case Iop_CmpLE32U: return ARMcc_LS;
+ default: vpanic("iselCondCode(arm): CmpXX32");
+ }
+ }
+
+ /* --- CasCmpEQ* --- */
+ /* Ist_Cas has a dummy argument to compare with, so comparison is
+ always true. */
+ if (e->tag == Iex_Binop
+ && (e->Iex.Binop.op == Iop_CasCmpEQ32
+ || e->Iex.Binop.op == Iop_CasCmpEQ16
+ || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
+ return ARMcc_AL;
+ }
+
+ ppIRExpr(e);
+ vpanic("iselCondCode");
+}
+
+
+/* --------------------- Reg --------------------- */
+
+static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
+{
+ HReg r = iselIntExpr_R_wrk(env, e);
+ /* sanity checks ... */
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(r) == HRcInt32);
+ vassert(hregIsVirtual(r));
+ return r;
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
+// vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
+
+ switch (e->tag) {
+
+ /* --------- TEMP --------- */
+ case Iex_RdTmp: {
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ }
+
+ /* --------- LOAD --------- */
+ case Iex_Load: {
+ HReg dst = newVRegI(env);
+
+ if (e->Iex.Load.end != Iend_LE)
+ goto irreducible;
+
+ if (ty == Ity_I32) {
+ ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
+ addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, dst, amode));
+ return dst;
+ }
+ if (ty == Ity_I16) {
+ ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
+ addInstr(env, ARMInstr_LdSt16(True/*isLoad*/, False/*!signedLoad*/,
+ dst, amode));
+ return dst;
+ }
+ if (ty == Ity_I8) {
+ ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
+ addInstr(env, ARMInstr_LdSt8U(True/*isLoad*/, dst, amode));
+ return dst;
+ }
+
+//zz if (ty == Ity_I16) {
+//zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
+//zz return dst;
+//zz }
+//zz if (ty == Ity_I8) {
+//zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
+//zz return dst;
+//zz }
+ break;
+ }
+
+//zz /* --------- TERNARY OP --------- */
+//zz case Iex_Triop: {
+//zz /* C3210 flags following FPU partial remainder (fprem), both
+//zz IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
+//zz if (e->Iex.Triop.op == Iop_PRemC3210F64
+//zz || e->Iex.Triop.op == Iop_PRem1C3210F64) {
+//zz HReg junk = newVRegF(env);
+//zz HReg dst = newVRegI(env);
+//zz HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2);
+//zz HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3);
+//zz /* XXXROUNDINGFIXME */
+//zz /* set roundingmode here */
+//zz addInstr(env, X86Instr_FpBinary(
+//zz e->Iex.Binop.op==Iop_PRemC3210F64
+//zz ? Xfp_PREM : Xfp_PREM1,
+//zz srcL,srcR,junk
+//zz ));
+//zz /* The previous pseudo-insn will have left the FPU's C3210
+//zz flags set correctly. So bag them. */
+//zz addInstr(env, X86Instr_FpStSW_AX());
+//zz addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
+//zz addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
+//zz return dst;
+//zz }
+//zz
+//zz break;
+//zz }
+
+ /* --------- BINARY OP --------- */
+ case Iex_Binop: {
+
+ ARMAluOp aop = 0; /* invalid */
+ ARMShiftOp sop = 0; /* invalid */
+
+ /* ADD/SUB/AND/OR/XOR */
+ switch (e->Iex.Binop.op) {
+ case Iop_And32: {
+ Bool didInv = False;
+ HReg dst = newVRegI(env);
+ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ ARMRI84* argR = iselIntExpr_RI84(&didInv, True/*mayInv*/,
+ env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
+ dst, argL, argR));
+ return dst;
+ }
+ case Iop_Or32: aop = ARMalu_OR; goto std_binop;
+ case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
+ case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
+ case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
+ std_binop: {
+ HReg dst = newVRegI(env);
+ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
+ env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
+ return dst;
+ }
+ default: break;
+ }
+
+ /* SHL/SHR/SAR */
+ switch (e->Iex.Binop.op) {
+ case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
+ case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
+ case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
+ sh_binop: {
+ HReg dst = newVRegI(env);
+ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
+ vassert(ty == Ity_I32); /* else the IR is ill-typed */
+ return dst;
+ }
+ default: break;
+ }
+
+ /* MUL */
+ if (e->Iex.Binop.op == Iop_Mul32) {
+ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegI(env);
+ addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
+ addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
+ addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
+ addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
+ return dst;
+ }
+
+ /* Handle misc other ops. */
+
+ if (e->Iex.Binop.op == Iop_Max32U) {
+ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegI(env);
+ addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
+ ARMRI84_R(argR)));
+ addInstr(env, mk_iMOVds_RR(dst, argL));
+ addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
+ return dst;
+ }
+
+ if (e->Iex.Binop.op == Iop_CmpF64) {
+ HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
+ HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegI(env);
+ /* Do the compare (FCMPD) and set NZCV in FPSCR. Then also do
+ FMSTAT, so we can examine the results directly. */
+ addInstr(env, ARMInstr_VCmpD(dL, dR));
+ /* Create in dst, the IRCmpF64Result encoded result. */
+ addInstr(env, ARMInstr_Imm32(dst, 0));
+ addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
+ addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
+ addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
+ addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
+ return dst;
+ }
+
+ if (e->Iex.Binop.op == Iop_F64toI32S
+ || e->Iex.Binop.op == Iop_F64toI32U) {
+ /* Wretched uglyness all round, due to having to deal
+ with rounding modes. Oh well. */
+ /* FIXME: if arg1 is a constant indicating round-to-zero,
+ then we could skip all this arsing around with FPSCR and
+ simply emit FTO{S,U}IZD. */
+ Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
+ HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
+ set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
+ /* FTO{S,U}ID valF, valD */
+ HReg valF = newVRegF(env);
+ addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
+ valF, valD));
+ set_VFP_rounding_default(env);
+ /* VMOV dst, valF */
+ HReg dst = newVRegI(env);
+ addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
+ return dst;
+ }
+
+ if (e->Iex.Binop.op == Iop_GetElem8x8
+ || e->Iex.Binop.op == Iop_GetElem16x4
+ || e->Iex.Binop.op == Iop_GetElem32x2) {
+ HReg res = newVRegI(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Triop.arg1);
+ UInt index, size;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM target supports GetElem with constant "
+ "second argument only\n");
+ }
+ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ switch (e->Iex.Binop.op) {
+ case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
+ case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
+ case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
+ mkARMNRS(ARMNRS_Reg, res, 0),
+ mkARMNRS(ARMNRS_Scalar, arg, index),
+ size, False));
+ return res;
+ }
+
+ if (e->Iex.Binop.op == Iop_GetElem8x16
+ || e->Iex.Binop.op == Iop_GetElem16x8
+ || e->Iex.Binop.op == Iop_GetElem32x4) {
+ HReg res = newVRegI(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Triop.arg1);
+ UInt index, size;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM target supports GetElem with constant "
+ "second argument only\n");
+ }
+ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ switch (e->Iex.Binop.op) {
+ case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
+ case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
+ case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
+ mkARMNRS(ARMNRS_Reg, res, 0),
+ mkARMNRS(ARMNRS_Scalar, arg, index),
+ size, True));
+ return res;
+ }
+
+ /* All cases involving host-side helper calls. */
+ void* fn = NULL;
+ switch (e->Iex.Binop.op) {
+ case Iop_Add16x2:
+ fn = &h_generic_calc_Add16x2; break;
+ case Iop_Sub16x2:
+ fn = &h_generic_calc_Sub16x2; break;
+ case Iop_HAdd16Ux2:
+ fn = &h_generic_calc_HAdd16Ux2; break;
+ case Iop_HAdd16Sx2:
+ fn = &h_generic_calc_HAdd16Sx2; break;
+ case Iop_HSub16Ux2:
+ fn = &h_generic_calc_HSub16Ux2; break;
+ case Iop_HSub16Sx2:
+ fn = &h_generic_calc_HSub16Sx2; break;
+ case Iop_QAdd16Sx2:
+ fn = &h_generic_calc_QAdd16Sx2; break;
+ case Iop_QSub16Sx2:
+ fn = &h_generic_calc_QSub16Sx2; break;
+ case Iop_Add8x4:
+ fn = &h_generic_calc_Add8x4; break;
+ case Iop_Sub8x4:
+ fn = &h_generic_calc_Sub8x4; break;
+ case Iop_HAdd8Ux4:
+ fn = &h_generic_calc_HAdd8Ux4; break;
+ case Iop_HAdd8Sx4:
+ fn = &h_generic_calc_HAdd8Sx4; break;
+ case Iop_HSub8Ux4:
+ fn = &h_generic_calc_HSub8Ux4; break;
+ case Iop_HSub8Sx4:
+ fn = &h_generic_calc_HSub8Sx4; break;
+ case Iop_QAdd8Sx4:
+ fn = &h_generic_calc_QAdd8Sx4; break;
+ case Iop_QAdd8Ux4:
+ fn = &h_generic_calc_QAdd8Ux4; break;
+ case Iop_QSub8Sx4:
+ fn = &h_generic_calc_QSub8Sx4; break;
+ case Iop_QSub8Ux4:
+ fn = &h_generic_calc_QSub8Ux4; break;
+ case Iop_Sad8Ux4:
+ fn = &h_generic_calc_Sad8Ux4; break;
+ default:
+ break;
+ }
+
+ if (fn) {
+ HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ HReg res = newVRegI(env);
+ addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
+ addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
+ addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 2 ));
+ addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
+ return res;
+ }
+
+ break;
+ }
+
+ /* --------- UNARY OP --------- */
+ case Iex_Unop: {
+
+//zz /* 1Uto8(32to1(expr32)) */
+//zz if (e->Iex.Unop.op == Iop_1Uto8) {
+//zz DECLARE_PATTERN(p_32to1_then_1Uto8);
+//zz DEFINE_PATTERN(p_32to1_then_1Uto8,
+//zz unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
+//zz if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
+//zz IRExpr* expr32 = mi.bindee[0];
+//zz HReg dst = newVRegI(env);
+//zz HReg src = iselIntExpr_R(env, expr32);
+//zz addInstr(env, mk_iMOVsd_RR(src,dst) );
+//zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
+//zz X86RMI_Imm(1), dst));
+//zz return dst;
+//zz }
+//zz }
+//zz
+//zz /* 8Uto32(LDle(expr32)) */
+//zz if (e->Iex.Unop.op == Iop_8Uto32) {
+//zz DECLARE_PATTERN(p_LDle8_then_8Uto32);
+//zz DEFINE_PATTERN(p_LDle8_then_8Uto32,
+//zz unop(Iop_8Uto32,
+//zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
+//zz if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
+//zz HReg dst = newVRegI(env);
+//zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
+//zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
+//zz return dst;
+//zz }
+//zz }
+//zz
+//zz /* 8Sto32(LDle(expr32)) */
+//zz if (e->Iex.Unop.op == Iop_8Sto32) {
+//zz DECLARE_PATTERN(p_LDle8_then_8Sto32);
+//zz DEFINE_PATTERN(p_LDle8_then_8Sto32,
+//zz unop(Iop_8Sto32,
+//zz IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
+//zz if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
+//zz HReg dst = newVRegI(env);
+//zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
+//zz addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
+//zz return dst;
+//zz }
+//zz }
+//zz
+//zz /* 16Uto32(LDle(expr32)) */
+//zz if (e->Iex.Unop.op == Iop_16Uto32) {
+//zz DECLARE_PATTERN(p_LDle16_then_16Uto32);
+//zz DEFINE_PATTERN(p_LDle16_then_16Uto32,
+//zz unop(Iop_16Uto32,
+//zz IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
+//zz if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
+//zz HReg dst = newVRegI(env);
+//zz X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
+//zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
+//zz return dst;
+//zz }
+//zz }
+//zz
+//zz /* 8Uto32(GET:I8) */
+//zz if (e->Iex.Unop.op == Iop_8Uto32) {
+//zz if (e->Iex.Unop.arg->tag == Iex_Get) {
+//zz HReg dst;
+//zz X86AMode* amode;
+//zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
+//zz dst = newVRegI(env);
+//zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
+//zz hregX86_EBP());
+//zz addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
+//zz return dst;
+//zz }
+//zz }
+//zz
+//zz /* 16to32(GET:I16) */
+//zz if (e->Iex.Unop.op == Iop_16Uto32) {
+//zz if (e->Iex.Unop.arg->tag == Iex_Get) {
+//zz HReg dst;
+//zz X86AMode* amode;
+//zz vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
+//zz dst = newVRegI(env);
+//zz amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
+//zz hregX86_EBP());
+//zz addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
+//zz return dst;
+//zz }
+//zz }
+
+ switch (e->Iex.Unop.op) {
+ case Iop_8Uto32: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_Alu(ARMalu_AND,
+ dst, src, ARMRI84_I84(0xFF,0)));
+ return dst;
+ }
+//zz case Iop_8Uto16:
+//zz case Iop_8Uto32:
+//zz case Iop_16Uto32: {
+//zz HReg dst = newVRegI(env);
+//zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+//zz UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
+//zz addInstr(env, mk_iMOVsd_RR(src,dst) );
+//zz addInstr(env, X86Instr_Alu32R(Xalu_AND,
+//zz X86RMI_Imm(mask), dst));
+//zz return dst;
+//zz }
+//zz case Iop_8Sto16:
+//zz case Iop_8Sto32:
+ case Iop_16Uto32: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ ARMRI5* amt = ARMRI5_I5(16);
+ addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
+ addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
+ return dst;
+ }
+ case Iop_8Sto32:
+ case Iop_16Sto32: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
+ addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
+ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
+ return dst;
+ }
+//zz case Iop_Not8:
+//zz case Iop_Not16:
+ case Iop_Not32: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
+ return dst;
+ }
+ case Iop_64HIto32: {
+ HReg rHi, rLo;
+ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
+ return rHi; /* and abandon rLo .. poor wee thing :-) */
+ }
+ case Iop_64to32: {
+ HReg rHi, rLo;
+ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
+ return rLo; /* similar stupid comment to the above ... */
+ }
+ case Iop_64to8: {
+ HReg rHi, rLo;
+ if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ HReg tHi = newVRegI(env);
+ HReg tLo = newVRegI(env);
+ HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
+ rHi = tHi;
+ rLo = tLo;
+ } else {
+ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
+ }
+ return rLo;
+ }
+//zz case Iop_16HIto8:
+//zz case Iop_32HIto16: {
+//zz HReg dst = newVRegI(env);
+//zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+//zz Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
+//zz addInstr(env, mk_iMOVsd_RR(src,dst) );
+//zz addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
+//zz return dst;
+//zz }
+ case Iop_1Uto32:
+ case Iop_1Uto8: {
+ HReg dst = newVRegI(env);
+ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
+ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
+ return dst;
+ }
+
+ case Iop_1Sto32: {
+ HReg dst = newVRegI(env);
+ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
+ ARMRI5* amt = ARMRI5_I5(31);
+ /* This is really rough. We could do much better here;
+ perhaps mvn{cond} dst, #0 as the second insn?
+ (same applies to 1Sto64) */
+ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
+ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
+ addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
+ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
+ return dst;
+ }
+
+
+//zz case Iop_1Sto8:
+//zz case Iop_1Sto16:
+//zz case Iop_1Sto32: {
+//zz /* could do better than this, but for now ... */
+//zz HReg dst = newVRegI(env);
+//zz X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
+//zz addInstr(env, X86Instr_Set32(cond,dst));
+//zz addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
+//zz addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
+//zz return dst;
+//zz }
+//zz case Iop_Ctz32: {
+//zz /* Count trailing zeroes, implemented by x86 'bsfl' */
+//zz HReg dst = newVRegI(env);
+//zz HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+//zz addInstr(env, X86Instr_Bsfr32(True,src,dst));
+//zz return dst;
+//zz }
+ case Iop_Clz32: {
+ /* Count leading zeroes; easy on ARM. */
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
+ return dst;
+ }
+
+ case Iop_CmpwNEZ32: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
+ addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
+ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
+ return dst;
+ }
+
+ case Iop_Left32: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
+ addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
+ return dst;
+ }
+
+//zz case Iop_V128to32: {
+//zz HReg dst = newVRegI(env);
+//zz HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
+//zz X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
+//zz sub_from_esp(env, 16);
+//zz addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
+//zz addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
+//zz add_to_esp(env, 16);
+//zz return dst;
+//zz }
+//zz
+ case Iop_ReinterpF32asI32: {
+ HReg dst = newVRegI(env);
+ HReg src = iselFltExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
+ return dst;
+ }
+
+//zz
+//zz case Iop_16to8:
+ case Iop_32to8:
+ case Iop_32to16:
+ /* These are no-ops. */
+ return iselIntExpr_R(env, e->Iex.Unop.arg);
+
+ default:
+ break;
+ }
+
+ /* All Unop cases involving host-side helper calls. */
+ void* fn = NULL;
+ switch (e->Iex.Unop.op) {
+ case Iop_CmpNEZ16x2:
+ fn = &h_generic_calc_CmpNEZ16x2; break;
+ case Iop_CmpNEZ8x4:
+ fn = &h_generic_calc_CmpNEZ8x4; break;
+ default:
+ break;
+ }
+
+ if (fn) {
+ HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
+ HReg res = newVRegI(env);
+ addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
+ addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 1 ));
+ addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
+ return res;
+ }
+
+ break;
+ }
+
+ /* --------- GET --------- */
+ case Iex_Get: {
+ if (ty == Ity_I32
+ && 0 == (e->Iex.Get.offset & 3)
+ && e->Iex.Get.offset < 4096-4) {
+ HReg dst = newVRegI(env);
+ addInstr(env, ARMInstr_LdSt32(
+ True/*isLoad*/,
+ dst,
+ ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
+ return dst;
+ }
+//zz if (ty == Ity_I8 || ty == Ity_I16) {
+//zz HReg dst = newVRegI(env);
+//zz addInstr(env, X86Instr_LoadEX(
+//zz toUChar(ty==Ity_I8 ? 1 : 2),
+//zz False,
+//zz X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
+//zz dst));
+//zz return dst;
+//zz }
+ break;
+ }
+
+//zz case Iex_GetI: {
+//zz X86AMode* am
+//zz = genGuestArrayOffset(
+//zz env, e->Iex.GetI.descr,
+//zz e->Iex.GetI.ix, e->Iex.GetI.bias );
+//zz HReg dst = newVRegI(env);
+//zz if (ty == Ity_I8) {
+//zz addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
+//zz return dst;
+//zz }
+//zz if (ty == Ity_I32) {
+//zz addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
+//zz return dst;
+//zz }
+//zz break;
+//zz }
+
+ /* --------- CCALL --------- */
+ case Iex_CCall: {
+ HReg dst = newVRegI(env);
+ vassert(ty == e->Iex.CCall.retty);
+
+ /* be very restrictive for now. Only 32/64-bit ints allowed
+ for args, and 32 bits for return type. */
+ if (e->Iex.CCall.retty != Ity_I32)
+ goto irreducible;
+
+ /* Marshal args, do the call, clear stack. */
+ Bool ok = doHelperCall( env, False,
+ NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
+ if (ok) {
+ addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
+ return dst;
+ }
+ /* else fall through; will hit the irreducible: label */
+ }
+
+ /* --------- LITERAL --------- */
+ /* 32 literals */
+ case Iex_Const: {
+ UInt u = 0;
+ HReg dst = newVRegI(env);
+ switch (e->Iex.Const.con->tag) {
+ case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
+ case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
+ case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
+ default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
+ }
+ addInstr(env, ARMInstr_Imm32(dst, u));
+ return dst;
+ }
+
+ /* --------- MULTIPLEX --------- */
+ case Iex_Mux0X: {
+ IRExpr* cond = e->Iex.Mux0X.cond;
+
+ /* Mux0X( 32to8(1Uto32(ccexpr)), expr0, exprX ) */
+ if (ty == Ity_I32
+ && cond->tag == Iex_Unop
+ && cond->Iex.Unop.op == Iop_32to8
+ && cond->Iex.Unop.arg->tag == Iex_Unop
+ && cond->Iex.Unop.arg->Iex.Unop.op == Iop_1Uto32) {
+ ARMCondCode cc;
+ HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
+ ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
+ HReg dst = newVRegI(env);
+ addInstr(env, mk_iMOVds_RR(dst, rX));
+ cc = iselCondCode(env, cond->Iex.Unop.arg->Iex.Unop.arg);
+ addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
+ return dst;
+ }
+
+ /* Mux0X(cond, expr0, exprX) (general case) */
+ if (ty == Ity_I32) {
+ HReg r8;
+ HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
+ ARMRI84* r0 = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
+ HReg dst = newVRegI(env);
+ addInstr(env, mk_iMOVds_RR(dst, rX));
+ r8 = iselIntExpr_R(env, cond);
+ addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
+ ARMRI84_I84(0xFF,0)));
+ addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, r0));
+ return dst;
+ }
+ break;
+ }
+
+ default:
+ break;
+ } /* switch (e->tag) */
+
+ /* We get here if no pattern matched. */
+ irreducible:
+ ppIRExpr(e);
+ vpanic("iselIntExpr_R: cannot reduce tree");
+}
+
+
+/* -------------------- 64-bit -------------------- */
+
+/* Compute a 64-bit value into a register pair, which is returned as
+ the first two parameters. As with iselIntExpr_R, these may be
+ either real or virtual regs; in any case they must not be changed
+ by subsequent code emitted by the caller. */
+
+static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
+{
+ iselInt64Expr_wrk(rHi, rLo, env, e);
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(*rHi) == HRcInt32);
+ vassert(hregIsVirtual(*rHi));
+ vassert(hregClass(*rLo) == HRcInt32);
+ vassert(hregIsVirtual(*rLo));
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
+{
+ vassert(e);
+ vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
+
+ /* 64-bit literal */
+ if (e->tag == Iex_Const) {
+ ULong w64 = e->Iex.Const.con->Ico.U64;
+ UInt wHi = toUInt(w64 >> 32);
+ UInt wLo = toUInt(w64);
+ HReg tHi = newVRegI(env);
+ HReg tLo = newVRegI(env);
+ vassert(e->Iex.Const.con->tag == Ico_U64);
+ addInstr(env, ARMInstr_Imm32(tHi, wHi));
+ addInstr(env, ARMInstr_Imm32(tLo, wLo));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* read 64-bit IRTemp */
+ if (e->tag == Iex_RdTmp) {
+ if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ HReg tHi = newVRegI(env);
+ HReg tLo = newVRegI(env);
+ HReg tmp = iselNeon64Expr(env, e);
+ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
+ *rHi = tHi;
+ *rLo = tLo;
+ } else {
+ lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
+ }
+ return;
+ }
+
+ /* 64-bit load */
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
+ HReg tLo, tHi, rA;
+ vassert(e->Iex.Load.ty == Ity_I64);
+ rA = iselIntExpr_R(env, e->Iex.Load.addr);
+ tHi = newVRegI(env);
+ tLo = newVRegI(env);
+ addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, ARMAMode1_RI(rA, 4)));
+ addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, ARMAMode1_RI(rA, 0)));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* 64-bit GET */
+ if (e->tag == Iex_Get) {
+ ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
+ ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
+ HReg tHi = newVRegI(env);
+ HReg tLo = newVRegI(env);
+ addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, am4));
+ addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, am0));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* --------- BINARY ops --------- */
+ if (e->tag == Iex_Binop) {
+ switch (e->Iex.Binop.op) {
+
+ /* 32 x 32 -> 64 multiply */
+ case Iop_MullS32:
+ case Iop_MullU32: {
+ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ HReg tHi = newVRegI(env);
+ HReg tLo = newVRegI(env);
+ ARMMulOp mop = e->Iex.Binop.op == Iop_MullS32
+ ? ARMmul_SX : ARMmul_ZX;
+ addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
+ addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
+ addInstr(env, ARMInstr_Mul(mop));
+ addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
+ addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ case Iop_Or64: {
+ HReg xLo, xHi, yLo, yHi;
+ HReg tHi = newVRegI(env);
+ HReg tLo = newVRegI(env);
+ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
+ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
+ addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ case Iop_Add64: {
+ HReg xLo, xHi, yLo, yHi;
+ HReg tHi = newVRegI(env);
+ HReg tLo = newVRegI(env);
+ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
+ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
+ addInstr(env, ARMInstr_Alu(ARMalu_ADC, tHi, xHi, ARMRI84_R(yHi)));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* 32HLto64(e1,e2) */
+ case Iop_32HLto64: {
+ *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ return;
+ }
+
+ default:
+ break;
+ }
+ }
+
+ /* --------- UNARY ops --------- */
+ if (e->tag == Iex_Unop) {
+ switch (e->Iex.Unop.op) {
+
+ /* ReinterpF64asI64 */
+ case Iop_ReinterpF64asI64: {
+ HReg dstHi = newVRegI(env);
+ HReg dstLo = newVRegI(env);
+ HReg src = iselDblExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
+ *rHi = dstHi;
+ *rLo = dstLo;
+ return;
+ }
+
+ /* Left64(e) */
+ case Iop_Left64: {
+ HReg yLo, yHi;
+ HReg tHi = newVRegI(env);
+ HReg tLo = newVRegI(env);
+ HReg zero = newVRegI(env);
+ /* yHi:yLo = arg */
+ iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
+ /* zero = 0 */
+ addInstr(env, ARMInstr_Imm32(zero, 0));
+ /* tLo = 0 - yLo, and set carry */
+ addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
+ tLo, zero, ARMRI84_R(yLo)));
+ /* tHi = 0 - yHi - carry */
+ addInstr(env, ARMInstr_Alu(ARMalu_SBC,
+ tHi, zero, ARMRI84_R(yHi)));
+ /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
+ back in, so as to give the final result
+ tHi:tLo = arg | -arg. */
+ addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
+ addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* CmpwNEZ64(e) */
+ case Iop_CmpwNEZ64: {
+ HReg srcLo, srcHi;
+ HReg tmp1 = newVRegI(env);
+ HReg tmp2 = newVRegI(env);
+ /* srcHi:srcLo = arg */
+ iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
+ /* tmp1 = srcHi | srcLo */
+ addInstr(env, ARMInstr_Alu(ARMalu_OR,
+ tmp1, srcHi, ARMRI84_R(srcLo)));
+ /* tmp2 = (tmp1 | -tmp1) >>s 31 */
+ addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
+ addInstr(env, ARMInstr_Alu(ARMalu_OR,
+ tmp2, tmp2, ARMRI84_R(tmp1)));
+ addInstr(env, ARMInstr_Shift(ARMsh_SAR,
+ tmp2, tmp2, ARMRI5_I5(31)));
+ *rHi = tmp2;
+ *rLo = tmp2;
+ return;
+ }
+
+ case Iop_1Sto64: {
+ HReg dst = newVRegI(env);
+ ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
+ ARMRI5* amt = ARMRI5_I5(31);
+ /* This is really rough. We could do much better here;
+ perhaps mvn{cond} dst, #0 as the second insn?
+ (same applies to 1Sto32) */
+ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
+ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
+ addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
+ addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
+ *rHi = dst;
+ *rLo = dst;
+ return;
+ }
+
+ default:
+ break;
+ }
+ } /* if (e->tag == Iex_Unop) */
+
+ /* --------- MULTIPLEX --------- */
+ if (e->tag == Iex_Mux0X) {
+ IRType ty8;
+ HReg r8, rXhi, rXlo, r0hi, r0lo, dstHi, dstLo;
+ ty8 = typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond);
+ vassert(ty8 == Ity_I8);
+ iselInt64Expr(&rXhi, &rXlo, env, e->Iex.Mux0X.exprX);
+ iselInt64Expr(&r0hi, &r0lo, env, e->Iex.Mux0X.expr0);
+ dstHi = newVRegI(env);
+ dstLo = newVRegI(env);
+ addInstr(env, mk_iMOVds_RR(dstHi, rXhi));
+ addInstr(env, mk_iMOVds_RR(dstLo, rXlo));
+ r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
+ addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
+ ARMRI84_I84(0xFF,0)));
+ addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstHi, ARMRI84_R(r0hi)));
+ addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstLo, ARMRI84_R(r0lo)));
+ *rHi = dstHi;
+ *rLo = dstLo;
+ return;
+ }
+
+ /* It is convenient sometimes to call iselInt64Expr even when we
+ have NEON support (e.g. in do_helper_call we need 64-bit
+ arguments as 2 x 32 regs). */
+ if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ HReg tHi = newVRegI(env);
+ HReg tLo = newVRegI(env);
+ HReg tmp = iselNeon64Expr(env, e);
+ addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
+ *rHi = tHi;
+ *rLo = tLo;
+ return ;
+ }
+
+ ppIRExpr(e);
+ vpanic("iselInt64Expr");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Vector (NEON) expressions (64 or 128 bit) ---*/
+/*---------------------------------------------------------*/
+
+static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
+{
+ HReg r = iselNeon64Expr_wrk( env, e );
+ vassert(hregClass(r) == HRcFlt64);
+ vassert(hregIsVirtual(r));
+ return r;
+}
+
+/* DO NOT CALL THIS DIRECTLY */
+static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env, e);
+ MatchInfo mi;
+ vassert(e);
+ vassert(ty == Ity_I64);
+
+ if (e->tag == Iex_RdTmp) {
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ }
+
+ if (e->tag == Iex_Const) {
+ HReg rLo, rHi;
+ HReg res = newVRegD(env);
+ iselInt64Expr(&rHi, &rLo, env, e);
+ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
+ return res;
+ }
+
+ /* 64-bit load */
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
+ HReg res = newVRegD(env);
+ ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
+ vassert(ty == Ity_I64);
+ addInstr(env, ARMInstr_NLdStD(True, res, am));
+ return res;
+ }
+
+ /* 64-bit GET */
+ if (e->tag == Iex_Get) {
+ HReg addr = newVRegI(env);
+ HReg res = newVRegD(env);
+ vassert(ty == Ity_I64);
+ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
+ addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
+ return res;
+ }
+
+ /* --------- BINARY ops --------- */
+ if (e->tag == Iex_Binop) {
+ switch (e->Iex.Binop.op) {
+
+ /* 32 x 32 -> 64 multiply */
+ case Iop_MullS32:
+ case Iop_MullU32: {
+ HReg rLo, rHi;
+ HReg res = newVRegD(env);
+ iselInt64Expr(&rHi, &rLo, env, e);
+ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
+ return res;
+ }
+
+ case Iop_And64: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
+ res, argL, argR, 4, False));
+ return res;
+ }
+ case Iop_Or64: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
+ res, argL, argR, 4, False));
+ return res;
+ }
+ case Iop_Xor64: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
+ res, argL, argR, 4, False));
+ return res;
+ }
+
+ /* 32HLto64(e1,e2) */
+ case Iop_32HLto64: {
+ HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ HReg res = newVRegD(env);
+ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
+ return res;
+ }
+
+ case Iop_Add8x8:
+ case Iop_Add16x4:
+ case Iop_Add32x2:
+ case Iop_Add64: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Add8x8: size = 0; break;
+ case Iop_Add16x4: size = 1; break;
+ case Iop_Add32x2: size = 2; break;
+ case Iop_Add64: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Add32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Recps32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Rsqrts32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_InterleaveOddLanes8x8:
+ case Iop_InterleaveOddLanes16x4:
+ case Iop_InterleaveLO32x2:
+ case Iop_InterleaveEvenLanes8x8:
+ case Iop_InterleaveEvenLanes16x4:
+ case Iop_InterleaveHI32x2: {
+ HReg tmp = newVRegD(env);
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ UInt is_lo;
+ switch (e->Iex.Binop.op) {
+ case Iop_InterleaveOddLanes8x8: is_lo = 1; size = 0; break;
+ case Iop_InterleaveEvenLanes8x8: is_lo = 0; size = 0; break;
+ case Iop_InterleaveOddLanes16x4: is_lo = 1; size = 1; break;
+ case Iop_InterleaveEvenLanes16x4: is_lo = 0; size = 1; break;
+ case Iop_InterleaveLO32x2: is_lo = 1; size = 2; break;
+ case Iop_InterleaveHI32x2: is_lo = 0; size = 2; break;
+ default: vassert(0);
+ }
+ if (is_lo) {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argL, 4, False));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argR, 4, False));
+ addInstr(env, ARMInstr_NDual(ARMneon_TRN,
+ res, tmp, size, False));
+ } else {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argR, 4, False));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argL, 4, False));
+ addInstr(env, ARMInstr_NDual(ARMneon_TRN,
+ tmp, res, size, False));
+ }
+ return res;
+ }
+ case Iop_InterleaveHI8x8:
+ case Iop_InterleaveHI16x4:
+ case Iop_InterleaveLO8x8:
+ case Iop_InterleaveLO16x4: {
+ HReg tmp = newVRegD(env);
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ UInt is_lo;
+ switch (e->Iex.Binop.op) {
+ case Iop_InterleaveHI8x8: is_lo = 1; size = 0; break;
+ case Iop_InterleaveLO8x8: is_lo = 0; size = 0; break;
+ case Iop_InterleaveHI16x4: is_lo = 1; size = 1; break;
+ case Iop_InterleaveLO16x4: is_lo = 0; size = 1; break;
+ default: vassert(0);
+ }
+ if (is_lo) {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argL, 4, False));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argR, 4, False));
+ addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
+ res, tmp, size, False));
+ } else {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argR, 4, False));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argL, 4, False));
+ addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
+ tmp, res, size, False));
+ }
+ return res;
+ }
+ case Iop_CatOddLanes8x8:
+ case Iop_CatOddLanes16x4:
+ case Iop_CatEvenLanes8x8:
+ case Iop_CatEvenLanes16x4: {
+ HReg tmp = newVRegD(env);
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ UInt is_lo;
+ switch (e->Iex.Binop.op) {
+ case Iop_CatOddLanes8x8: is_lo = 1; size = 0; break;
+ case Iop_CatEvenLanes8x8: is_lo = 0; size = 0; break;
+ case Iop_CatOddLanes16x4: is_lo = 1; size = 1; break;
+ case Iop_CatEvenLanes16x4: is_lo = 0; size = 1; break;
+ default: vassert(0);
+ }
+ if (is_lo) {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argL, 4, False));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argR, 4, False));
+ addInstr(env, ARMInstr_NDual(ARMneon_UZP,
+ res, tmp, size, False));
+ } else {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argR, 4, False));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argL, 4, False));
+ addInstr(env, ARMInstr_NDual(ARMneon_UZP,
+ tmp, res, size, False));
+ }
+ return res;
+ }
+ case Iop_QAdd8Ux8:
+ case Iop_QAdd16Ux4:
+ case Iop_QAdd32Ux2:
+ case Iop_QAdd64Ux1: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QAdd8Ux8: size = 0; break;
+ case Iop_QAdd16Ux4: size = 1; break;
+ case Iop_QAdd32Ux2: size = 2; break;
+ case Iop_QAdd64Ux1: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_QAdd8Sx8:
+ case Iop_QAdd16Sx4:
+ case Iop_QAdd32Sx2:
+ case Iop_QAdd64Sx1: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QAdd8Sx8: size = 0; break;
+ case Iop_QAdd16Sx4: size = 1; break;
+ case Iop_QAdd32Sx2: size = 2; break;
+ case Iop_QAdd64Sx1: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Sub8x8:
+ case Iop_Sub16x4:
+ case Iop_Sub32x2:
+ case Iop_Sub64: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Sub8x8: size = 0; break;
+ case Iop_Sub16x4: size = 1; break;
+ case Iop_Sub32x2: size = 2; break;
+ case Iop_Sub64: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Sub32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_QSub8Ux8:
+ case Iop_QSub16Ux4:
+ case Iop_QSub32Ux2:
+ case Iop_QSub64Ux1: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QSub8Ux8: size = 0; break;
+ case Iop_QSub16Ux4: size = 1; break;
+ case Iop_QSub32Ux2: size = 2; break;
+ case Iop_QSub64Ux1: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_QSub8Sx8:
+ case Iop_QSub16Sx4:
+ case Iop_QSub32Sx2:
+ case Iop_QSub64Sx1: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QSub8Sx8: size = 0; break;
+ case Iop_QSub16Sx4: size = 1; break;
+ case Iop_QSub32Sx2: size = 2; break;
+ case Iop_QSub64Sx1: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Max8Ux8:
+ case Iop_Max16Ux4:
+ case Iop_Max32Ux2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Max8Ux8: size = 0; break;
+ case Iop_Max16Ux4: size = 1; break;
+ case Iop_Max32Ux2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Max8Sx8:
+ case Iop_Max16Sx4:
+ case Iop_Max32Sx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Max8Sx8: size = 0; break;
+ case Iop_Max16Sx4: size = 1; break;
+ case Iop_Max32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Min8Ux8:
+ case Iop_Min16Ux4:
+ case Iop_Min32Ux2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Min8Ux8: size = 0; break;
+ case Iop_Min16Ux4: size = 1; break;
+ case Iop_Min32Ux2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Min8Sx8:
+ case Iop_Min16Sx4:
+ case Iop_Min32Sx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Min8Sx8: size = 0; break;
+ case Iop_Min16Sx4: size = 1; break;
+ case Iop_Min32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Sar8x8:
+ case Iop_Sar16x4:
+ case Iop_Sar32x2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ HReg argR2 = newVRegD(env);
+ HReg zero = newVRegD(env);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Sar8x8: size = 0; break;
+ case Iop_Sar16x4: size = 1; break;
+ case Iop_Sar32x2: size = 2; break;
+ case Iop_Sar64: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
+ argR2, zero, argR, size, False));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
+ res, argL, argR2, size, False));
+ return res;
+ }
+ case Iop_Sal8x8:
+ case Iop_Sal16x4:
+ case Iop_Sal32x2:
+ case Iop_Sal64x1: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Sal8x8: size = 0; break;
+ case Iop_Sal16x4: size = 1; break;
+ case Iop_Sal32x2: size = 2; break;
+ case Iop_Sal64x1: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Shr8x8:
+ case Iop_Shr16x4:
+ case Iop_Shr32x2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ HReg argR2 = newVRegD(env);
+ HReg zero = newVRegD(env);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Shr8x8: size = 0; break;
+ case Iop_Shr16x4: size = 1; break;
+ case Iop_Shr32x2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
+ argR2, zero, argR, size, False));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ res, argL, argR2, size, False));
+ return res;
+ }
+ case Iop_Shl8x8:
+ case Iop_Shl16x4:
+ case Iop_Shl32x2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Shl8x8: size = 0; break;
+ case Iop_Shl16x4: size = 1; break;
+ case Iop_Shl32x2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_QShl8x8:
+ case Iop_QShl16x4:
+ case Iop_QShl32x2:
+ case Iop_QShl64x1: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QShl8x8: size = 0; break;
+ case Iop_QShl16x4: size = 1; break;
+ case Iop_QShl32x2: size = 2; break;
+ case Iop_QShl64x1: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_QSal8x8:
+ case Iop_QSal16x4:
+ case Iop_QSal32x2:
+ case Iop_QSal64x1: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QSal8x8: size = 0; break;
+ case Iop_QSal16x4: size = 1; break;
+ case Iop_QSal32x2: size = 2; break;
+ case Iop_QSal64x1: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_QShlN8x8:
+ case Iop_QShlN16x4:
+ case Iop_QShlN32x2:
+ case Iop_QShlN64x1: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ UInt size, imm;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM taget supports Iop_QShlNAxB with constant "
+ "second argument only\n");
+ }
+ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ switch (e->Iex.Binop.op) {
+ case Iop_QShlN8x8: size = 8 | imm; break;
+ case Iop_QShlN16x4: size = 16 | imm; break;
+ case Iop_QShlN32x2: size = 32 | imm; break;
+ case Iop_QShlN64x1: size = 64 | imm; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
+ res, argL, size, False));
+ return res;
+ }
+ case Iop_QShlN8Sx8:
+ case Iop_QShlN16Sx4:
+ case Iop_QShlN32Sx2:
+ case Iop_QShlN64Sx1: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ UInt size, imm;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM taget supports Iop_QShlNAxB with constant "
+ "second argument only\n");
+ }
+ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ switch (e->Iex.Binop.op) {
+ case Iop_QShlN8Sx8: size = 8 | imm; break;
+ case Iop_QShlN16Sx4: size = 16 | imm; break;
+ case Iop_QShlN32Sx2: size = 32 | imm; break;
+ case Iop_QShlN64Sx1: size = 64 | imm; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
+ res, argL, size, False));
+ return res;
+ }
+ case Iop_QSalN8x8:
+ case Iop_QSalN16x4:
+ case Iop_QSalN32x2:
+ case Iop_QSalN64x1: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ UInt size, imm;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM taget supports Iop_QShlNAxB with constant "
+ "second argument only\n");
+ }
+ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ switch (e->Iex.Binop.op) {
+ case Iop_QSalN8x8: size = 8 | imm; break;
+ case Iop_QSalN16x4: size = 16 | imm; break;
+ case Iop_QSalN32x2: size = 32 | imm; break;
+ case Iop_QSalN64x1: size = 64 | imm; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
+ res, argL, size, False));
+ return res;
+ }
+ case Iop_ShrN8x8:
+ case Iop_ShrN16x4:
+ case Iop_ShrN32x2:
+ case Iop_Shr64: {
+ HReg res = newVRegD(env);
+ HReg tmp = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ HReg argR2 = newVRegI(env);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_ShrN8x8: size = 0; break;
+ case Iop_ShrN16x4: size = 1; break;
+ case Iop_ShrN32x2: size = 2; break;
+ case Iop_Shr64: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
+ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ res, argL, tmp, size, False));
+ return res;
+ }
+ case Iop_ShlN8x8:
+ case Iop_ShlN16x4:
+ case Iop_ShlN32x2:
+ case Iop_Shl64: {
+ HReg res = newVRegD(env);
+ HReg tmp = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_ShlN8x8: size = 0; break;
+ case Iop_ShlN16x4: size = 1; break;
+ case Iop_ShlN32x2: size = 2; break;
+ case Iop_Shl64: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, False));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ res, argL, tmp, size, False));
+ return res;
+ }
+ case Iop_SarN8x8:
+ case Iop_SarN16x4:
+ case Iop_SarN32x2:
+ case Iop_Sar64: {
+ HReg res = newVRegD(env);
+ HReg tmp = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ HReg argR2 = newVRegI(env);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_SarN8x8: size = 0; break;
+ case Iop_SarN16x4: size = 1; break;
+ case Iop_SarN32x2: size = 2; break;
+ case Iop_Sar64: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
+ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
+ res, argL, tmp, size, False));
+ return res;
+ }
+ case Iop_CmpGT8Ux8:
+ case Iop_CmpGT16Ux4:
+ case Iop_CmpGT32Ux2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpGT8Ux8: size = 0; break;
+ case Iop_CmpGT16Ux4: size = 1; break;
+ case Iop_CmpGT32Ux2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_CmpGT8Sx8:
+ case Iop_CmpGT16Sx4:
+ case Iop_CmpGT32Sx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpGT8Sx8: size = 0; break;
+ case Iop_CmpGT16Sx4: size = 1; break;
+ case Iop_CmpGT32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_CmpEQ8x8:
+ case Iop_CmpEQ16x4:
+ case Iop_CmpEQ32x2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpEQ8x8: size = 0; break;
+ case Iop_CmpEQ16x4: size = 1; break;
+ case Iop_CmpEQ32x2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Mul8x8:
+ case Iop_Mul16x4:
+ case Iop_Mul32x2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Mul8x8: size = 0; break;
+ case Iop_Mul16x4: size = 1; break;
+ case Iop_Mul32x2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Mul32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_QDMulHi16Sx4:
+ case Iop_QDMulHi32Sx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_QDMulHi16Sx4: size = 1; break;
+ case Iop_QDMulHi32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
+ res, argL, argR, size, False));
+ return res;
+ }
+
+ case Iop_QRDMulHi16Sx4:
+ case Iop_QRDMulHi32Sx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_QRDMulHi16Sx4: size = 1; break;
+ case Iop_QRDMulHi32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
+ res, argL, argR, size, False));
+ return res;
+ }
+
+ case Iop_PwAdd8x8:
+ case Iop_PwAdd16x4:
+ case Iop_PwAdd32x2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_PwAdd8x8: size = 0; break;
+ case Iop_PwAdd16x4: size = 1; break;
+ case Iop_PwAdd32x2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_PwAdd32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_PwMin8Ux8:
+ case Iop_PwMin16Ux4:
+ case Iop_PwMin32Ux2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_PwMin8Ux8: size = 0; break;
+ case Iop_PwMin16Ux4: size = 1; break;
+ case Iop_PwMin32Ux2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_PwMin8Sx8:
+ case Iop_PwMin16Sx4:
+ case Iop_PwMin32Sx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_PwMin8Sx8: size = 0; break;
+ case Iop_PwMin16Sx4: size = 1; break;
+ case Iop_PwMin32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_PwMax8Ux8:
+ case Iop_PwMax16Ux4:
+ case Iop_PwMax32Ux2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_PwMax8Ux8: size = 0; break;
+ case Iop_PwMax16Ux4: size = 1; break;
+ case Iop_PwMax32Ux2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_PwMax8Sx8:
+ case Iop_PwMax16Sx4:
+ case Iop_PwMax32Sx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_PwMax8Sx8: size = 0; break;
+ case Iop_PwMax16Sx4: size = 1; break;
+ case Iop_PwMax32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Perm8x8: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
+ res, argL, argR, 0, False));
+ return res;
+ }
+ case Iop_PolynomialMul8x8: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
+ res, argL, argR, size, False));
+ return res;
+ }
+ case Iop_Max32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
+ res, argL, argR, 2, False));
+ return res;
+ }
+ case Iop_Min32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
+ res, argL, argR, 2, False));
+ return res;
+ }
+ case Iop_PwMax32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
+ res, argL, argR, 2, False));
+ return res;
+ }
+ case Iop_PwMin32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
+ res, argL, argR, 2, False));
+ return res;
+ }
+ case Iop_CmpGT32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
+ res, argL, argR, 2, False));
+ return res;
+ }
+ case Iop_CmpGE32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
+ res, argL, argR, 2, False));
+ return res;
+ }
+ case Iop_CmpEQ32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
+ res, argL, argR, 2, False));
+ return res;
+ }
+ case Iop_F32ToFixed32Ux2_RZ:
+ case Iop_F32ToFixed32Sx2_RZ:
+ case Iop_Fixed32UToF32x2_RN:
+ case Iop_Fixed32SToF32x2_RN: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ ARMNeonUnOp op;
+ UInt imm6;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM supports FP <-> Fixed conversion with constant "
+ "second argument less than 33 only\n");
+ }
+ imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ vassert(imm6 <= 32 && imm6 > 0);
+ imm6 = 64 - imm6;
+ switch(e->Iex.Binop.op) {
+ case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
+ case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
+ case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
+ case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
+ return res;
+ }
+ /*
+ FIXME: is this here or not?
+ case Iop_VDup8x8:
+ case Iop_VDup16x4:
+ case Iop_VDup32x2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ UInt index;
+ UInt imm4;
+ UInt size = 0;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM supports Iop_VDup with constant "
+ "second argument less than 16 only\n");
+ }
+ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ switch(e->Iex.Binop.op) {
+ case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
+ case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
+ case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
+ default: vassert(0);
+ }
+ if (imm4 >= 16) {
+ vpanic("ARM supports Iop_VDup with constant "
+ "second argument less than 16 only\n");
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
+ res, argL, imm4, False));
+ return res;
+ }
+ */
+ default:
+ break;
+ }
+ }
+
+ /* --------- UNARY ops --------- */
+ if (e->tag == Iex_Unop) {
+ switch (e->Iex.Unop.op) {
+
+ /* ReinterpF64asI64 */
+ case Iop_ReinterpF64asI64:
+ /* Left64(e) */
+ case Iop_Left64:
+ /* CmpwNEZ64(e) */
+ //case Iop_CmpwNEZ64:
+ case Iop_1Sto64: {
+ HReg rLo, rHi;
+ HReg res = newVRegD(env);
+ iselInt64Expr(&rHi, &rLo, env, e);
+ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
+ return res;
+ }
+ case Iop_Not64: {
+ DECLARE_PATTERN(p_veqz_8x8);
+ DECLARE_PATTERN(p_veqz_16x4);
+ DECLARE_PATTERN(p_veqz_32x2);
+ DECLARE_PATTERN(p_vcge_8sx8);
+ DECLARE_PATTERN(p_vcge_16sx4);
+ DECLARE_PATTERN(p_vcge_32sx2);
+ DECLARE_PATTERN(p_vcge_8ux8);
+ DECLARE_PATTERN(p_vcge_16ux4);
+ DECLARE_PATTERN(p_vcge_32ux2);
+ DEFINE_PATTERN(p_veqz_8x8,
+ unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
+ DEFINE_PATTERN(p_veqz_16x4,
+ unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
+ DEFINE_PATTERN(p_veqz_32x2,
+ unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
+ DEFINE_PATTERN(p_vcge_8sx8,
+ unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
+ DEFINE_PATTERN(p_vcge_16sx4,
+ unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
+ DEFINE_PATTERN(p_vcge_32sx2,
+ unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
+ DEFINE_PATTERN(p_vcge_8ux8,
+ unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
+ DEFINE_PATTERN(p_vcge_16ux4,
+ unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
+ DEFINE_PATTERN(p_vcge_32ux2,
+ unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
+ if (matchIRExpr(&mi, p_veqz_8x8, e)) {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
+ return res;
+ } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
+ return res;
+ } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
+ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
+ res, argL, argR, 0, False));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
+ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
+ res, argL, argR, 1, False));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
+ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
+ res, argL, argR, 2, False));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
+ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
+ res, argL, argR, 0, False));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
+ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
+ res, argL, argR, 1, False));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
+ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
+ res, argL, argR, 2, False));
+ return res;
+ } else {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
+ return res;
+ }
+ }
+ case Iop_Dup8x8:
+ case Iop_Dup16x4:
+ case Iop_Dup32x2: {
+ HReg res, arg;
+ UInt size;
+ DECLARE_PATTERN(p_vdup_8x8);
+ DECLARE_PATTERN(p_vdup_16x4);
+ DECLARE_PATTERN(p_vdup_32x2);
+ DEFINE_PATTERN(p_vdup_8x8,
+ unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
+ DEFINE_PATTERN(p_vdup_16x4,
+ unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
+ DEFINE_PATTERN(p_vdup_32x2,
+ unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
+ if (matchIRExpr(&mi, p_vdup_8x8, e)) {
+ UInt index;
+ UInt imm4;
+ if (mi.bindee[1]->tag == Iex_Const &&
+ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
+ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
+ imm4 = (index << 1) + 1;
+ if (index < 8) {
+ res = newVRegD(env);
+ arg = iselNeon64Expr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnaryS(
+ ARMneon_VDUP,
+ mkARMNRS(ARMNRS_Reg, res, 0),
+ mkARMNRS(ARMNRS_Scalar, arg, index),
+ imm4, False
+ ));
+ return res;
+ }
+ }
+ } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
+ UInt index;
+ UInt imm4;
+ if (mi.bindee[1]->tag == Iex_Const &&
+ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
+ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
+ imm4 = (index << 2) + 2;
+ if (index < 4) {
+ res = newVRegD(env);
+ arg = iselNeon64Expr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnaryS(
+ ARMneon_VDUP,
+ mkARMNRS(ARMNRS_Reg, res, 0),
+ mkARMNRS(ARMNRS_Scalar, arg, index),
+ imm4, False
+ ));
+ return res;
+ }
+ }
+ } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
+ UInt index;
+ UInt imm4;
+ if (mi.bindee[1]->tag == Iex_Const &&
+ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
+ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
+ imm4 = (index << 3) + 4;
+ if (index < 2) {
+ res = newVRegD(env);
+ arg = iselNeon64Expr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnaryS(
+ ARMneon_VDUP,
+ mkARMNRS(ARMNRS_Reg, res, 0),
+ mkARMNRS(ARMNRS_Scalar, arg, index),
+ imm4, False
+ ));
+ return res;
+ }
+ }
+ }
+ arg = iselIntExpr_R(env, e->Iex.Unop.arg);
+ res = newVRegD(env);
+ switch (e->Iex.Unop.op) {
+ case Iop_Dup8x8: size = 0; break;
+ case Iop_Dup16x4: size = 1; break;
+ case Iop_Dup32x2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
+ return res;
+ }
+ case Iop_Abs8x8:
+ case Iop_Abs16x4:
+ case Iop_Abs32x2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Abs8x8: size = 0; break;
+ case Iop_Abs16x4: size = 1; break;
+ case Iop_Abs32x2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
+ return res;
+ }
+ case Iop_Reverse64_8x8:
+ case Iop_Reverse64_16x4:
+ case Iop_Reverse64_32x2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Reverse64_8x8: size = 0; break;
+ case Iop_Reverse64_16x4: size = 1; break;
+ case Iop_Reverse64_32x2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_Reverse32_8x8:
+ case Iop_Reverse32_16x4: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Reverse32_8x8: size = 0; break;
+ case Iop_Reverse32_16x4: size = 1; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_Reverse16_8x8: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_CmpwNEZ64: {
+ HReg x_lsh = newVRegD(env);
+ HReg x_rsh = newVRegD(env);
+ HReg lsh_amt = newVRegD(env);
+ HReg rsh_amt = newVRegD(env);
+ HReg zero = newVRegD(env);
+ HReg tmp = newVRegD(env);
+ HReg tmp2 = newVRegD(env);
+ HReg res = newVRegD(env);
+ HReg x = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
+ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
+ addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
+ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
+ rsh_amt, zero, lsh_amt, 2, False));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ x_lsh, x, lsh_amt, 3, False));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ x_rsh, x, rsh_amt, 3, False));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
+ tmp, x_lsh, x_rsh, 0, False));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
+ res, tmp, x, 0, False));
+ return res;
+ }
+ case Iop_CmpNEZ8x8:
+ case Iop_CmpNEZ16x4:
+ case Iop_CmpNEZ32x2: {
+ HReg res = newVRegD(env);
+ HReg tmp = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size;
+ switch (e->Iex.Unop.op) {
+ case Iop_CmpNEZ8x8: size = 0; break;
+ case Iop_CmpNEZ16x4: size = 1; break;
+ case Iop_CmpNEZ32x2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
+ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
+ return res;
+ }
+ case Iop_Shorten16x8:
+ case Iop_Shorten32x4:
+ case Iop_Shorten64x2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Shorten16x8: size = 0; break;
+ case Iop_Shorten32x4: size = 1; break;
+ case Iop_Shorten64x2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_QShortenS16Sx8:
+ case Iop_QShortenS32Sx4:
+ case Iop_QShortenS64Sx2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_QShortenS16Sx8: size = 0; break;
+ case Iop_QShortenS32Sx4: size = 1; break;
+ case Iop_QShortenS64Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_QShortenU16Sx8:
+ case Iop_QShortenU32Sx4:
+ case Iop_QShortenU64Sx2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_QShortenU16Sx8: size = 0; break;
+ case Iop_QShortenU32Sx4: size = 1; break;
+ case Iop_QShortenU64Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_QShortenU16Ux8:
+ case Iop_QShortenU32Ux4:
+ case Iop_QShortenU64Ux2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_QShortenU16Ux8: size = 0; break;
+ case Iop_QShortenU32Ux4: size = 1; break;
+ case Iop_QShortenU64Ux2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_PwAddL8Sx8:
+ case Iop_PwAddL16Sx4:
+ case Iop_PwAddL32Sx2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_PwAddL8Sx8: size = 0; break;
+ case Iop_PwAddL16Sx4: size = 1; break;
+ case Iop_PwAddL32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_PwAddL8Ux8:
+ case Iop_PwAddL16Ux4:
+ case Iop_PwAddL32Ux2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_PwAddL8Ux8: size = 0; break;
+ case Iop_PwAddL16Ux4: size = 1; break;
+ case Iop_PwAddL32Ux2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_Cnt8x8: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_Clz8Sx8:
+ case Iop_Clz16Sx4:
+ case Iop_Clz32Sx2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Clz8Sx8: size = 0; break;
+ case Iop_Clz16Sx4: size = 1; break;
+ case Iop_Clz32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_Cls8Sx8:
+ case Iop_Cls16Sx4:
+ case Iop_Cls32Sx2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Cls8Sx8: size = 0; break;
+ case Iop_Cls16Sx4: size = 1; break;
+ case Iop_Cls32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
+ res, arg, size, False));
+ return res;
+ }
+ case Iop_FtoI32Sx2_RZ: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
+ res, arg, 2, False));
+ return res;
+ }
+ case Iop_FtoI32Ux2_RZ: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
+ res, arg, 2, False));
+ return res;
+ }
+ case Iop_I32StoFx2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
+ res, arg, 2, False));
+ return res;
+ }
+ case Iop_I32UtoFx2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
+ res, arg, 2, False));
+ return res;
+ }
+ case Iop_F32toF16x4: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
+ res, arg, 2, False));
+ return res;
+ }
+ case Iop_Recip32Fx2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
+ res, argL, 0, False));
+ return res;
+ }
+ case Iop_Recip32x2: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
+ res, argL, 0, False));
+ return res;
+ }
+ case Iop_Abs32Fx2: {
+ DECLARE_PATTERN(p_vabd_32fx2);
+ DEFINE_PATTERN(p_vabd_32fx2,
+ unop(Iop_Abs32Fx2,
+ binop(Iop_Sub32Fx2,
+ bind(0),
+ bind(1))));
+ if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, mi.bindee[0]);
+ HReg argR = iselNeon64Expr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
+ res, argL, argR, 0, False));
+ return res;
+ } else {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
+ res, arg, 0, False));
+ return res;
+ }
+ }
+ case Iop_Rsqrte32Fx2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
+ res, arg, 0, False));
+ return res;
+ }
+ case Iop_Rsqrte32x2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
+ res, arg, 0, False));
+ return res;
+ }
+ case Iop_Neg32Fx2: {
+ HReg res = newVRegD(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
+ res, arg, 0, False));
+ return res;
+ }
+ default:
+ break;
+ }
+ } /* if (e->tag == Iex_Unop) */
+
+ if (e->tag == Iex_Triop) {
+ switch (e->Iex.Triop.op) {
+ case Iop_Extract64: {
+ HReg res = newVRegD(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Triop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Triop.arg2);
+ UInt imm4;
+ if (e->Iex.Triop.arg3->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Triop.arg3) != Ity_I8) {
+ vpanic("ARM target supports Iop_Extract64 with constant "
+ "third argument less than 16 only\n");
+ }
+ imm4 = e->Iex.Triop.arg3->Iex.Const.con->Ico.U8;
+ if (imm4 >= 8) {
+ vpanic("ARM target supports Iop_Extract64 with constant "
+ "third argument less than 16 only\n");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
+ res, argL, argR, imm4, False));
+ return res;
+ }
+ case Iop_SetElem8x8:
+ case Iop_SetElem16x4:
+ case Iop_SetElem32x2: {
+ HReg res = newVRegD(env);
+ HReg dreg = iselNeon64Expr(env, e->Iex.Triop.arg1);
+ HReg arg = iselIntExpr_R(env, e->Iex.Triop.arg3);
+ UInt index, size;
+ if (e->Iex.Triop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Triop.arg2) != Ity_I8) {
+ vpanic("ARM target supports SetElem with constant "
+ "second argument only\n");
+ }
+ index = e->Iex.Triop.arg2->Iex.Const.con->Ico.U8;
+ switch (e->Iex.Triop.op) {
+ case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
+ case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
+ case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
+ addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
+ mkARMNRS(ARMNRS_Scalar, res, index),
+ mkARMNRS(ARMNRS_Reg, arg, 0),
+ size, False));
+ return res;
+ }
+ default:
+ break;
+ }
+ }
+
+ /* --------- MULTIPLEX --------- */
+ if (e->tag == Iex_Mux0X) {
+ HReg rLo, rHi;
+ HReg res = newVRegD(env);
+ iselInt64Expr(&rHi, &rLo, env, e);
+ addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
+ return res;
+ }
+
+ ppIRExpr(e);
+ vpanic("iselNeon64Expr");
+}
+
+static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e )
+{
+ HReg r = iselNeonExpr_wrk( env, e );
+ vassert(hregClass(r) == HRcVec128);
+ vassert(hregIsVirtual(r));
+ return r;
+}
+
+/* DO NOT CALL THIS DIRECTLY */
+static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env, e);
+ MatchInfo mi;
+ vassert(e);
+ vassert(ty == Ity_V128);
+
+ if (e->tag == Iex_RdTmp) {
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ }
+
+ if (e->tag == Iex_Const) {
+ /* At the moment there should be no 128-bit constants in IR for ARM
+ generated during disassemble. They are represented as Iop_64HLtoV128
+ binary operation and are handled among binary ops. */
+ /* But zero can be created by valgrind internal optimizer */
+ if (e->Iex.Const.con->Ico.V128 == 0) {
+ HReg res = newVRegV(env);
+ addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(0, 0)));
+ return res;
+ }
+ ppIRExpr(e);
+ vpanic("128-bit constant is not implemented");
+ }
+
+ if (e->tag == Iex_Load) {
+ HReg res = newVRegV(env);
+ ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
+ vassert(ty == Ity_V128);
+ addInstr(env, ARMInstr_NLdStQ(True, res, am));
+ return res;
+ }
+
+ if (e->tag == Iex_Get) {
+ HReg addr = newVRegI(env);
+ HReg res = newVRegV(env);
+ vassert(ty == Ity_V128);
+ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
+ addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
+ return res;
+ }
+
+ if (e->tag == Iex_Unop) {
+ switch (e->Iex.Unop.op) {
+ case Iop_NotV128: {
+ DECLARE_PATTERN(p_veqz_8x16);
+ DECLARE_PATTERN(p_veqz_16x8);
+ DECLARE_PATTERN(p_veqz_32x4);
+ DECLARE_PATTERN(p_vcge_8sx16);
+ DECLARE_PATTERN(p_vcge_16sx8);
+ DECLARE_PATTERN(p_vcge_32sx4);
+ DECLARE_PATTERN(p_vcge_8ux16);
+ DECLARE_PATTERN(p_vcge_16ux8);
+ DECLARE_PATTERN(p_vcge_32ux4);
+ DEFINE_PATTERN(p_veqz_8x16,
+ unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
+ DEFINE_PATTERN(p_veqz_16x8,
+ unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
+ DEFINE_PATTERN(p_veqz_32x4,
+ unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
+ DEFINE_PATTERN(p_vcge_8sx16,
+ unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
+ DEFINE_PATTERN(p_vcge_16sx8,
+ unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
+ DEFINE_PATTERN(p_vcge_32sx4,
+ unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
+ DEFINE_PATTERN(p_vcge_8ux16,
+ unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
+ DEFINE_PATTERN(p_vcge_16ux8,
+ unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
+ DEFINE_PATTERN(p_vcge_32ux4,
+ unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
+ if (matchIRExpr(&mi, p_veqz_8x16, e)) {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
+ return res;
+ } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
+ return res;
+ } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, mi.bindee[0]);
+ HReg argR = iselNeonExpr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
+ res, argL, argR, 0, True));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, mi.bindee[0]);
+ HReg argR = iselNeonExpr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
+ res, argL, argR, 1, True));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, mi.bindee[0]);
+ HReg argR = iselNeonExpr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
+ res, argL, argR, 2, True));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, mi.bindee[0]);
+ HReg argR = iselNeonExpr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
+ res, argL, argR, 0, True));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, mi.bindee[0]);
+ HReg argR = iselNeonExpr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
+ res, argL, argR, 1, True));
+ return res;
+ } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, mi.bindee[0]);
+ HReg argR = iselNeonExpr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
+ res, argL, argR, 2, True));
+ return res;
+ } else {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
+ return res;
+ }
+ }
+ case Iop_Dup8x16:
+ case Iop_Dup16x8:
+ case Iop_Dup32x4: {
+ HReg res, arg;
+ UInt size;
+ DECLARE_PATTERN(p_vdup_8x16);
+ DECLARE_PATTERN(p_vdup_16x8);
+ DECLARE_PATTERN(p_vdup_32x4);
+ DEFINE_PATTERN(p_vdup_8x16,
+ unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
+ DEFINE_PATTERN(p_vdup_16x8,
+ unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
+ DEFINE_PATTERN(p_vdup_32x4,
+ unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
+ if (matchIRExpr(&mi, p_vdup_8x16, e)) {
+ UInt index;
+ UInt imm4;
+ if (mi.bindee[1]->tag == Iex_Const &&
+ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
+ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
+ imm4 = (index << 1) + 1;
+ if (index < 8) {
+ res = newVRegV(env);
+ arg = iselNeon64Expr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnaryS(
+ ARMneon_VDUP,
+ mkARMNRS(ARMNRS_Reg, res, 0),
+ mkARMNRS(ARMNRS_Scalar, arg, index),
+ imm4, True
+ ));
+ return res;
+ }
+ }
+ } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
+ UInt index;
+ UInt imm4;
+ if (mi.bindee[1]->tag == Iex_Const &&
+ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
+ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
+ imm4 = (index << 2) + 2;
+ if (index < 4) {
+ res = newVRegV(env);
+ arg = iselNeon64Expr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnaryS(
+ ARMneon_VDUP,
+ mkARMNRS(ARMNRS_Reg, res, 0),
+ mkARMNRS(ARMNRS_Scalar, arg, index),
+ imm4, True
+ ));
+ return res;
+ }
+ }
+ } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
+ UInt index;
+ UInt imm4;
+ if (mi.bindee[1]->tag == Iex_Const &&
+ typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
+ index = mi.bindee[1]->Iex.Const.con->Ico.U8;
+ imm4 = (index << 3) + 4;
+ if (index < 2) {
+ res = newVRegV(env);
+ arg = iselNeon64Expr(env, mi.bindee[0]);
+ addInstr(env, ARMInstr_NUnaryS(
+ ARMneon_VDUP,
+ mkARMNRS(ARMNRS_Reg, res, 0),
+ mkARMNRS(ARMNRS_Scalar, arg, index),
+ imm4, True
+ ));
+ return res;
+ }
+ }
+ }
+ arg = iselIntExpr_R(env, e->Iex.Unop.arg);
+ res = newVRegV(env);
+ switch (e->Iex.Unop.op) {
+ case Iop_Dup8x16: size = 0; break;
+ case Iop_Dup16x8: size = 1; break;
+ case Iop_Dup32x4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
+ return res;
+ }
+ case Iop_Abs8x16:
+ case Iop_Abs16x8:
+ case Iop_Abs32x4: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Abs8x16: size = 0; break;
+ case Iop_Abs16x8: size = 1; break;
+ case Iop_Abs32x4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
+ return res;
+ }
+ case Iop_Reverse64_8x16:
+ case Iop_Reverse64_16x8:
+ case Iop_Reverse64_32x4: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Reverse64_8x16: size = 0; break;
+ case Iop_Reverse64_16x8: size = 1; break;
+ case Iop_Reverse64_32x4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
+ res, arg, size, True));
+ return res;
+ }
+ case Iop_Reverse32_8x16:
+ case Iop_Reverse32_16x8: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Reverse32_8x16: size = 0; break;
+ case Iop_Reverse32_16x8: size = 1; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
+ res, arg, size, True));
+ return res;
+ }
+ case Iop_Reverse16_8x16: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
+ res, arg, size, True));
+ return res;
+ }
+ case Iop_CmpNEZ64x2: {
+ HReg x_lsh = newVRegV(env);
+ HReg x_rsh = newVRegV(env);
+ HReg lsh_amt = newVRegV(env);
+ HReg rsh_amt = newVRegV(env);
+ HReg zero = newVRegV(env);
+ HReg tmp = newVRegV(env);
+ HReg tmp2 = newVRegV(env);
+ HReg res = newVRegV(env);
+ HReg x = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
+ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
+ addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
+ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
+ rsh_amt, zero, lsh_amt, 2, True));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ x_lsh, x, lsh_amt, 3, True));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ x_rsh, x, rsh_amt, 3, True));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
+ tmp, x_lsh, x_rsh, 0, True));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
+ res, tmp, x, 0, True));
+ return res;
+ }
+ case Iop_CmpNEZ8x16:
+ case Iop_CmpNEZ16x8:
+ case Iop_CmpNEZ32x4: {
+ HReg res = newVRegV(env);
+ HReg tmp = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size;
+ switch (e->Iex.Unop.op) {
+ case Iop_CmpNEZ8x16: size = 0; break;
+ case Iop_CmpNEZ16x8: size = 1; break;
+ case Iop_CmpNEZ32x4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
+ addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
+ return res;
+ }
+ case Iop_Longen8Ux8:
+ case Iop_Longen16Ux4:
+ case Iop_Longen32Ux2: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size;
+ switch (e->Iex.Unop.op) {
+ case Iop_Longen8Ux8: size = 0; break;
+ case Iop_Longen16Ux4: size = 1; break;
+ case Iop_Longen32Ux2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
+ res, arg, size, True));
+ return res;
+ }
+ case Iop_Longen8Sx8:
+ case Iop_Longen16Sx4:
+ case Iop_Longen32Sx2: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ UInt size;
+ switch (e->Iex.Unop.op) {
+ case Iop_Longen8Sx8: size = 0; break;
+ case Iop_Longen16Sx4: size = 1; break;
+ case Iop_Longen32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
+ res, arg, size, True));
+ return res;
+ }
+ case Iop_PwAddL8Sx16:
+ case Iop_PwAddL16Sx8:
+ case Iop_PwAddL32Sx4: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_PwAddL8Sx16: size = 0; break;
+ case Iop_PwAddL16Sx8: size = 1; break;
+ case Iop_PwAddL32Sx4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
+ res, arg, size, True));
+ return res;
+ }
+ case Iop_PwAddL8Ux16:
+ case Iop_PwAddL16Ux8:
+ case Iop_PwAddL32Ux4: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_PwAddL8Ux16: size = 0; break;
+ case Iop_PwAddL16Ux8: size = 1; break;
+ case Iop_PwAddL32Ux4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
+ res, arg, size, True));
+ return res;
+ }
+ case Iop_Cnt8x16: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
+ return res;
+ }
+ case Iop_Clz8Sx16:
+ case Iop_Clz16Sx8:
+ case Iop_Clz32Sx4: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Clz8Sx16: size = 0; break;
+ case Iop_Clz16Sx8: size = 1; break;
+ case Iop_Clz32Sx4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
+ return res;
+ }
+ case Iop_Cls8Sx16:
+ case Iop_Cls16Sx8:
+ case Iop_Cls32Sx4: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Cls8Sx16: size = 0; break;
+ case Iop_Cls16Sx8: size = 1; break;
+ case Iop_Cls32Sx4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
+ return res;
+ }
+ case Iop_FtoI32Sx4_RZ: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
+ res, arg, 2, True));
+ return res;
+ }
+ case Iop_FtoI32Ux4_RZ: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
+ res, arg, 2, True));
+ return res;
+ }
+ case Iop_I32StoFx4: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
+ res, arg, 2, True));
+ return res;
+ }
+ case Iop_I32UtoFx4: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
+ res, arg, 2, True));
+ return res;
+ }
+ case Iop_F16toF32x4: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
+ res, arg, 2, True));
+ return res;
+ }
+ case Iop_Recip32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
+ res, argL, 0, True));
+ return res;
+ }
+ case Iop_Recip32x4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
+ res, argL, 0, True));
+ return res;
+ }
+ case Iop_Abs32Fx4: {
+ DECLARE_PATTERN(p_vabd_32fx4);
+ DEFINE_PATTERN(p_vabd_32fx4,
+ unop(Iop_Abs32Fx4,
+ binop(Iop_Sub32Fx4,
+ bind(0),
+ bind(1))));
+ if (matchIRExpr(&mi, p_vabd_32fx4, e)) {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, mi.bindee[0]);
+ HReg argR = iselNeonExpr(env, mi.bindee[1]);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
+ res, argL, argR, 0, True));
+ return res;
+ } else {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
+ res, argL, 0, True));
+ return res;
+ }
+ }
+ case Iop_Rsqrte32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
+ res, argL, 0, True));
+ return res;
+ }
+ case Iop_Rsqrte32x4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
+ res, argL, 0, True));
+ return res;
+ }
+ case Iop_Neg32Fx4: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
+ res, arg, 0, True));
+ return res;
+ }
+ /* ... */
+ default:
+ break;
+ }
+ }
+
+ if (e->tag == Iex_Binop) {
+ switch (e->Iex.Binop.op) {
+ case Iop_64HLtoV128:
+ /* Try to match into single "VMOV reg, imm" instruction */
+ if (e->Iex.Binop.arg1->tag == Iex_Const &&
+ e->Iex.Binop.arg2->tag == Iex_Const &&
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
+ e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
+ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
+ ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
+ ARMNImm *imm = Imm64_to_ARMNImm(imm64);
+ if (imm) {
+ HReg res = newVRegV(env);
+ addInstr(env, ARMInstr_NeonImm(res, imm));
+ return res;
+ }
+ if ((imm64 >> 32) == 0LL &&
+ (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
+ HReg tmp1 = newVRegV(env);
+ HReg tmp2 = newVRegV(env);
+ HReg res = newVRegV(env);
+ if (imm->type < 10) {
+ addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
+ addInstr(env, ARMInstr_NeonImm(tmp2, imm));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
+ res, tmp1, tmp2, 4, True));
+ return res;
+ }
+ }
+ if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
+ (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
+ HReg tmp1 = newVRegV(env);
+ HReg tmp2 = newVRegV(env);
+ HReg res = newVRegV(env);
+ if (imm->type < 10) {
+ addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
+ addInstr(env, ARMInstr_NeonImm(tmp2, imm));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
+ res, tmp1, tmp2, 4, True));
+ return res;
+ }
+ }
+ }
+ /* Does not match "VMOV Reg, Imm" form */
+ goto neon_expr_bad;
+ case Iop_AndV128: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
+ res, argL, argR, 4, True));
+ return res;
+ }
+ case Iop_OrV128: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
+ res, argL, argR, 4, True));
+ return res;
+ }
+ case Iop_XorV128: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
+ res, argL, argR, 4, True));
+ return res;
+ }
+ case Iop_Add8x16:
+ case Iop_Add16x8:
+ case Iop_Add32x4:
+ case Iop_Add64x2: {
+ /*
+ FIXME: remove this if not used
+ DECLARE_PATTERN(p_vrhadd_32sx4);
+ ULong one = (1LL << 32) | 1LL;
+ DEFINE_PATTERN(p_vrhadd_32sx4,
+ binop(Iop_Add32x4,
+ binop(Iop_Add32x4,
+ binop(Iop_SarN32x4,
+ bind(0),
+ mkU8(1)),
+ binop(Iop_SarN32x4,
+ bind(1),
+ mkU8(1))),
+ binop(Iop_SarN32x4,
+ binop(Iop_Add32x4,
+ binop(Iop_Add32x4,
+ binop(Iop_AndV128,
+ bind(0),
+ mkU128(one)),
+ binop(Iop_AndV128,
+ bind(1),
+ mkU128(one))),
+ mkU128(one)),
+ mkU8(1))));
+ */
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Add8x16: size = 0; break;
+ case Iop_Add16x8: size = 1; break;
+ case Iop_Add32x4: size = 2; break;
+ case Iop_Add64x2: size = 3; break;
+ default:
+ ppIROp(e->Iex.Binop.op);
+ vpanic("Illegal element size in VADD");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Add32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Recps32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Rsqrts32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_InterleaveEvenLanes8x16:
+ case Iop_InterleaveEvenLanes16x8:
+ case Iop_InterleaveEvenLanes32x4:
+ case Iop_InterleaveOddLanes8x16:
+ case Iop_InterleaveOddLanes16x8:
+ case Iop_InterleaveOddLanes32x4: {
+ HReg tmp = newVRegV(env);
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ UInt is_lo;
+ switch (e->Iex.Binop.op) {
+ case Iop_InterleaveEvenLanes8x16: is_lo = 0; size = 0; break;
+ case Iop_InterleaveOddLanes8x16: is_lo = 1; size = 0; break;
+ case Iop_InterleaveEvenLanes16x8: is_lo = 0; size = 1; break;
+ case Iop_InterleaveOddLanes16x8: is_lo = 1; size = 1; break;
+ case Iop_InterleaveEvenLanes32x4: is_lo = 0; size = 2; break;
+ case Iop_InterleaveOddLanes32x4: is_lo = 1; size = 2; break;
+ default:
+ ppIROp(e->Iex.Binop.op);
+ vpanic("Illegal element size in VTRN");
+ }
+ if (is_lo) {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argL, 4, True));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argR, 4, True));
+ addInstr(env, ARMInstr_NDual(ARMneon_TRN,
+ res, tmp, size, True));
+ } else {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argR, 4, True));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argL, 4, True));
+ addInstr(env, ARMInstr_NDual(ARMneon_TRN,
+ tmp, res, size, True));
+ }
+ return res;
+ }
+ case Iop_InterleaveHI8x16:
+ case Iop_InterleaveHI16x8:
+ case Iop_InterleaveHI32x4:
+ case Iop_InterleaveLO8x16:
+ case Iop_InterleaveLO16x8:
+ case Iop_InterleaveLO32x4: {
+ HReg tmp = newVRegV(env);
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ UInt is_lo;
+ switch (e->Iex.Binop.op) {
+ case Iop_InterleaveHI8x16: is_lo = 1; size = 0; break;
+ case Iop_InterleaveLO8x16: is_lo = 0; size = 0; break;
+ case Iop_InterleaveHI16x8: is_lo = 1; size = 1; break;
+ case Iop_InterleaveLO16x8: is_lo = 0; size = 1; break;
+ case Iop_InterleaveHI32x4: is_lo = 1; size = 2; break;
+ case Iop_InterleaveLO32x4: is_lo = 0; size = 2; break;
+ default:
+ ppIROp(e->Iex.Binop.op);
+ vpanic("Illegal element size in VZIP");
+ }
+ if (is_lo) {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argL, 4, True));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argR, 4, True));
+ addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
+ res, tmp, size, True));
+ } else {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argR, 4, True));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argL, 4, True));
+ addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
+ tmp, res, size, True));
+ }
+ return res;
+ }
+ case Iop_CatOddLanes8x16:
+ case Iop_CatOddLanes16x8:
+ case Iop_CatOddLanes32x4:
+ case Iop_CatEvenLanes8x16:
+ case Iop_CatEvenLanes16x8:
+ case Iop_CatEvenLanes32x4: {
+ HReg tmp = newVRegV(env);
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ UInt is_lo;
+ switch (e->Iex.Binop.op) {
+ case Iop_CatOddLanes8x16: is_lo = 1; size = 0; break;
+ case Iop_CatEvenLanes8x16: is_lo = 0; size = 0; break;
+ case Iop_CatOddLanes16x8: is_lo = 1; size = 1; break;
+ case Iop_CatEvenLanes16x8: is_lo = 0; size = 1; break;
+ case Iop_CatOddLanes32x4: is_lo = 1; size = 2; break;
+ case Iop_CatEvenLanes32x4: is_lo = 0; size = 2; break;
+ default:
+ ppIROp(e->Iex.Binop.op);
+ vpanic("Illegal element size in VUZP");
+ }
+ if (is_lo) {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argL, 4, True));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argR, 4, True));
+ addInstr(env, ARMInstr_NDual(ARMneon_UZP,
+ res, tmp, size, True));
+ } else {
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ tmp, argR, 4, True));
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
+ res, argL, 4, True));
+ addInstr(env, ARMInstr_NDual(ARMneon_UZP,
+ tmp, res, size, True));
+ }
+ return res;
+ }
+ case Iop_QAdd8Ux16:
+ case Iop_QAdd16Ux8:
+ case Iop_QAdd32Ux4:
+ case Iop_QAdd64Ux2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QAdd8Ux16: size = 0; break;
+ case Iop_QAdd16Ux8: size = 1; break;
+ case Iop_QAdd32Ux4: size = 2; break;
+ case Iop_QAdd64Ux2: size = 3; break;
+ default:
+ ppIROp(e->Iex.Binop.op);
+ vpanic("Illegal element size in VQADDU");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_QAdd8Sx16:
+ case Iop_QAdd16Sx8:
+ case Iop_QAdd32Sx4:
+ case Iop_QAdd64Sx2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QAdd8Sx16: size = 0; break;
+ case Iop_QAdd16Sx8: size = 1; break;
+ case Iop_QAdd32Sx4: size = 2; break;
+ case Iop_QAdd64Sx2: size = 3; break;
+ default:
+ ppIROp(e->Iex.Binop.op);
+ vpanic("Illegal element size in VQADDS");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Sub8x16:
+ case Iop_Sub16x8:
+ case Iop_Sub32x4:
+ case Iop_Sub64x2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Sub8x16: size = 0; break;
+ case Iop_Sub16x8: size = 1; break;
+ case Iop_Sub32x4: size = 2; break;
+ case Iop_Sub64x2: size = 3; break;
+ default:
+ ppIROp(e->Iex.Binop.op);
+ vpanic("Illegal element size in VSUB");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Sub32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_QSub8Ux16:
+ case Iop_QSub16Ux8:
+ case Iop_QSub32Ux4:
+ case Iop_QSub64Ux2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QSub8Ux16: size = 0; break;
+ case Iop_QSub16Ux8: size = 1; break;
+ case Iop_QSub32Ux4: size = 2; break;
+ case Iop_QSub64Ux2: size = 3; break;
+ default:
+ ppIROp(e->Iex.Binop.op);
+ vpanic("Illegal element size in VQSUBU");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_QSub8Sx16:
+ case Iop_QSub16Sx8:
+ case Iop_QSub32Sx4:
+ case Iop_QSub64Sx2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QSub8Sx16: size = 0; break;
+ case Iop_QSub16Sx8: size = 1; break;
+ case Iop_QSub32Sx4: size = 2; break;
+ case Iop_QSub64Sx2: size = 3; break;
+ default:
+ ppIROp(e->Iex.Binop.op);
+ vpanic("Illegal element size in VQSUBS");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Max8Ux16:
+ case Iop_Max16Ux8:
+ case Iop_Max32Ux4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Max8Ux16: size = 0; break;
+ case Iop_Max16Ux8: size = 1; break;
+ case Iop_Max32Ux4: size = 2; break;
+ default: vpanic("Illegal element size in VMAXU");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Max8Sx16:
+ case Iop_Max16Sx8:
+ case Iop_Max32Sx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Max8Sx16: size = 0; break;
+ case Iop_Max16Sx8: size = 1; break;
+ case Iop_Max32Sx4: size = 2; break;
+ default: vpanic("Illegal element size in VMAXU");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Min8Ux16:
+ case Iop_Min16Ux8:
+ case Iop_Min32Ux4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Min8Ux16: size = 0; break;
+ case Iop_Min16Ux8: size = 1; break;
+ case Iop_Min32Ux4: size = 2; break;
+ default: vpanic("Illegal element size in VMAXU");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Min8Sx16:
+ case Iop_Min16Sx8:
+ case Iop_Min32Sx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Min8Sx16: size = 0; break;
+ case Iop_Min16Sx8: size = 1; break;
+ case Iop_Min32Sx4: size = 2; break;
+ default: vpanic("Illegal element size in VMAXU");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Sar8x16:
+ case Iop_Sar16x8:
+ case Iop_Sar32x4:
+ case Iop_Sar64x2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ HReg argR2 = newVRegV(env);
+ HReg zero = newVRegV(env);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Sar8x16: size = 0; break;
+ case Iop_Sar16x8: size = 1; break;
+ case Iop_Sar32x4: size = 2; break;
+ case Iop_Sar64x2: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
+ argR2, zero, argR, size, True));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
+ res, argL, argR2, size, True));
+ return res;
+ }
+ case Iop_Sal8x16:
+ case Iop_Sal16x8:
+ case Iop_Sal32x4:
+ case Iop_Sal64x2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Sal8x16: size = 0; break;
+ case Iop_Sal16x8: size = 1; break;
+ case Iop_Sal32x4: size = 2; break;
+ case Iop_Sal64x2: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Shr8x16:
+ case Iop_Shr16x8:
+ case Iop_Shr32x4:
+ case Iop_Shr64x2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ HReg argR2 = newVRegV(env);
+ HReg zero = newVRegV(env);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Shr8x16: size = 0; break;
+ case Iop_Shr16x8: size = 1; break;
+ case Iop_Shr32x4: size = 2; break;
+ case Iop_Shr64x2: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
+ addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
+ argR2, zero, argR, size, True));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ res, argL, argR2, size, True));
+ return res;
+ }
+ case Iop_Shl8x16:
+ case Iop_Shl16x8:
+ case Iop_Shl32x4:
+ case Iop_Shl64x2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_Shl8x16: size = 0; break;
+ case Iop_Shl16x8: size = 1; break;
+ case Iop_Shl32x4: size = 2; break;
+ case Iop_Shl64x2: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_QShl8x16:
+ case Iop_QShl16x8:
+ case Iop_QShl32x4:
+ case Iop_QShl64x2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QShl8x16: size = 0; break;
+ case Iop_QShl16x8: size = 1; break;
+ case Iop_QShl32x4: size = 2; break;
+ case Iop_QShl64x2: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_QSal8x16:
+ case Iop_QSal16x8:
+ case Iop_QSal32x4:
+ case Iop_QSal64x2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_QSal8x16: size = 0; break;
+ case Iop_QSal16x8: size = 1; break;
+ case Iop_QSal32x4: size = 2; break;
+ case Iop_QSal64x2: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_QShlN8x16:
+ case Iop_QShlN16x8:
+ case Iop_QShlN32x4:
+ case Iop_QShlN64x2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ UInt size, imm;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM taget supports Iop_QShlNAxB with constant "
+ "second argument only\n");
+ }
+ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ switch (e->Iex.Binop.op) {
+ case Iop_QShlN8x16: size = 8 | imm; break;
+ case Iop_QShlN16x8: size = 16 | imm; break;
+ case Iop_QShlN32x4: size = 32 | imm; break;
+ case Iop_QShlN64x2: size = 64 | imm; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
+ res, argL, size, True));
+ return res;
+ }
+ case Iop_QShlN8Sx16:
+ case Iop_QShlN16Sx8:
+ case Iop_QShlN32Sx4:
+ case Iop_QShlN64Sx2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ UInt size, imm;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM taget supports Iop_QShlNASxB with constant "
+ "second argument only\n");
+ }
+ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ switch (e->Iex.Binop.op) {
+ case Iop_QShlN8Sx16: size = 8 | imm; break;
+ case Iop_QShlN16Sx8: size = 16 | imm; break;
+ case Iop_QShlN32Sx4: size = 32 | imm; break;
+ case Iop_QShlN64Sx2: size = 64 | imm; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
+ res, argL, size, True));
+ return res;
+ }
+ case Iop_QSalN8x16:
+ case Iop_QSalN16x8:
+ case Iop_QSalN32x4:
+ case Iop_QSalN64x2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ UInt size, imm;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM taget supports Iop_QShlNAxB with constant "
+ "second argument only\n");
+ }
+ imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ switch (e->Iex.Binop.op) {
+ case Iop_QSalN8x16: size = 8 | imm; break;
+ case Iop_QSalN16x8: size = 16 | imm; break;
+ case Iop_QSalN32x4: size = 32 | imm; break;
+ case Iop_QSalN64x2: size = 64 | imm; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
+ res, argL, size, True));
+ return res;
+ }
+ case Iop_ShrN8x16:
+ case Iop_ShrN16x8:
+ case Iop_ShrN32x4:
+ case Iop_ShrN64x2: {
+ HReg res = newVRegV(env);
+ HReg tmp = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ HReg argR2 = newVRegI(env);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_ShrN8x16: size = 0; break;
+ case Iop_ShrN16x8: size = 1; break;
+ case Iop_ShrN32x4: size = 2; break;
+ case Iop_ShrN64x2: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
+ addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
+ tmp, argR2, 0, True));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ res, argL, tmp, size, True));
+ return res;
+ }
+ case Iop_ShlN8x16:
+ case Iop_ShlN16x8:
+ case Iop_ShlN32x4:
+ case Iop_ShlN64x2: {
+ HReg res = newVRegV(env);
+ HReg tmp = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_ShlN8x16: size = 0; break;
+ case Iop_ShlN16x8: size = 1; break;
+ case Iop_ShlN32x4: size = 2; break;
+ case Iop_ShlN64x2: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
+ res, argL, tmp, size, True));
+ return res;
+ }
+ case Iop_SarN8x16:
+ case Iop_SarN16x8:
+ case Iop_SarN32x4:
+ case Iop_SarN64x2: {
+ HReg res = newVRegV(env);
+ HReg tmp = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ HReg argR2 = newVRegI(env);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_SarN8x16: size = 0; break;
+ case Iop_SarN16x8: size = 1; break;
+ case Iop_SarN32x4: size = 2; break;
+ case Iop_SarN64x2: size = 3; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
+ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
+ addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
+ res, argL, tmp, size, True));
+ return res;
+ }
+ case Iop_CmpGT8Ux16:
+ case Iop_CmpGT16Ux8:
+ case Iop_CmpGT32Ux4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpGT8Ux16: size = 0; break;
+ case Iop_CmpGT16Ux8: size = 1; break;
+ case Iop_CmpGT32Ux4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_CmpGT8Sx16:
+ case Iop_CmpGT16Sx8:
+ case Iop_CmpGT32Sx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpGT8Sx16: size = 0; break;
+ case Iop_CmpGT16Sx8: size = 1; break;
+ case Iop_CmpGT32Sx4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_CmpEQ8x16:
+ case Iop_CmpEQ16x8:
+ case Iop_CmpEQ32x4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size;
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpEQ8x16: size = 0; break;
+ case Iop_CmpEQ16x8: size = 1; break;
+ case Iop_CmpEQ32x4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Mul8x16:
+ case Iop_Mul16x8:
+ case Iop_Mul32x4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Mul8x16: size = 0; break;
+ case Iop_Mul16x8: size = 1; break;
+ case Iop_Mul32x4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Mul32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Mull8Ux8:
+ case Iop_Mull16Ux4:
+ case Iop_Mull32Ux2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Mull8Ux8: size = 0; break;
+ case Iop_Mull16Ux4: size = 1; break;
+ case Iop_Mull32Ux2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
+ res, argL, argR, size, True));
+ return res;
+ }
+
+ case Iop_Mull8Sx8:
+ case Iop_Mull16Sx4:
+ case Iop_Mull32Sx2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_Mull8Sx8: size = 0; break;
+ case Iop_Mull16Sx4: size = 1; break;
+ case Iop_Mull32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
+ res, argL, argR, size, True));
+ return res;
+ }
+
+ case Iop_QDMulHi16Sx8:
+ case Iop_QDMulHi32Sx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_QDMulHi16Sx8: size = 1; break;
+ case Iop_QDMulHi32Sx4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
+ res, argL, argR, size, True));
+ return res;
+ }
+
+ case Iop_QRDMulHi16Sx8:
+ case Iop_QRDMulHi32Sx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_QRDMulHi16Sx8: size = 1; break;
+ case Iop_QRDMulHi32Sx4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
+ res, argL, argR, size, True));
+ return res;
+ }
+
+ case Iop_QDMulLong16Sx4:
+ case Iop_QDMulLong32Sx2: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_QDMulLong16Sx4: size = 1; break;
+ case Iop_QDMulLong32Sx2: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_PolynomialMul8x16: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_Max32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
+ res, argL, argR, 2, True));
+ return res;
+ }
+ case Iop_Min32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
+ res, argL, argR, 2, True));
+ return res;
+ }
+ case Iop_PwMax32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
+ res, argL, argR, 2, True));
+ return res;
+ }
+ case Iop_PwMin32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
+ res, argL, argR, 2, True));
+ return res;
+ }
+ case Iop_CmpGT32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
+ res, argL, argR, 2, True));
+ return res;
+ }
+ case Iop_CmpGE32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
+ res, argL, argR, 2, True));
+ return res;
+ }
+ case Iop_CmpEQ32Fx4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
+ res, argL, argR, 2, True));
+ return res;
+ }
+
+ case Iop_PolynomialMull8x8: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
+ res, argL, argR, size, True));
+ return res;
+ }
+ case Iop_F32ToFixed32Ux4_RZ:
+ case Iop_F32ToFixed32Sx4_RZ:
+ case Iop_Fixed32UToF32x4_RN:
+ case Iop_Fixed32SToF32x4_RN: {
+ HReg res = newVRegV(env);
+ HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
+ ARMNeonUnOp op;
+ UInt imm6;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM supports FP <-> Fixed conversion with constant "
+ "second argument less than 33 only\n");
+ }
+ imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ vassert(imm6 <= 32 && imm6 > 0);
+ imm6 = 64 - imm6;
+ switch(e->Iex.Binop.op) {
+ case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
+ case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
+ case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
+ case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
+ return res;
+ }
+ /*
+ FIXME remove if not used
+ case Iop_VDup8x16:
+ case Iop_VDup16x8:
+ case Iop_VDup32x4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ UInt imm4;
+ UInt index;
+ if (e->Iex.Binop.arg2->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
+ vpanic("ARM supports Iop_VDup with constant "
+ "second argument less than 16 only\n");
+ }
+ index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ switch(e->Iex.Binop.op) {
+ case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
+ case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
+ case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
+ default: vassert(0);
+ }
+ if (imm4 >= 16) {
+ vpanic("ARM supports Iop_VDup with constant "
+ "second argument less than 16 only\n");
+ }
+ addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
+ res, argL, imm4, True));
+ return res;
+ }
+ */
+ case Iop_PwAdd8x16:
+ case Iop_PwAdd16x8:
+ case Iop_PwAdd32x4: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
+ UInt size = 0;
+ switch(e->Iex.Binop.op) {
+ case Iop_PwAdd8x16: size = 0; break;
+ case Iop_PwAdd16x8: size = 1; break;
+ case Iop_PwAdd32x4: size = 2; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
+ res, argL, argR, size, True));
+ return res;
+ }
+ /* ... */
+ default:
+ break;
+ }
+ }
+
+ if (e->tag == Iex_Triop) {
+ switch (e->Iex.Triop.op) {
+ case Iop_ExtractV128: {
+ HReg res = newVRegV(env);
+ HReg argL = iselNeonExpr(env, e->Iex.Triop.arg1);
+ HReg argR = iselNeonExpr(env, e->Iex.Triop.arg2);
+ UInt imm4;
+ if (e->Iex.Triop.arg3->tag != Iex_Const ||
+ typeOfIRExpr(env->type_env, e->Iex.Triop.arg3) != Ity_I8) {
+ vpanic("ARM target supports Iop_ExtractV128 with constant "
+ "third argument less than 16 only\n");
+ }
+ imm4 = e->Iex.Triop.arg3->Iex.Const.con->Ico.U8;
+ if (imm4 >= 16) {
+ vpanic("ARM target supports Iop_ExtractV128 with constant "
+ "third argument less than 16 only\n");
+ }
+ addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
+ res, argL, argR, imm4, True));
+ return res;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (e->tag == Iex_Mux0X) {
+ HReg r8;
+ HReg rX = iselNeonExpr(env, e->Iex.Mux0X.exprX);
+ HReg r0 = iselNeonExpr(env, e->Iex.Mux0X.expr0);
+ HReg dst = newVRegV(env);
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, rX, 4, True));
+ r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
+ addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
+ ARMRI84_I84(0xFF,0)));
+ addInstr(env, ARMInstr_NCMovQ(ARMcc_EQ, dst, r0));
+ return dst;
+ }
+
+ neon_expr_bad:
+ ppIRExpr(e);
+ vpanic("iselNeonExpr_wrk");
+}
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Floating point expressions (64 bit) ---*/
+/*---------------------------------------------------------*/
+
+/* Compute a 64-bit floating point value into a register, the identity
+ of which is returned. As with iselIntExpr_R, the reg may be either
+ real or virtual; in any case it must not be changed by subsequent
+ code emitted by the caller. */
+
+static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
+{
+ HReg r = iselDblExpr_wrk( env, e );
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(r) == HRcFlt64);
+ vassert(hregIsVirtual(r));
+ return r;
+}
+
+/* DO NOT CALL THIS DIRECTLY */
+static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(e);
+ vassert(ty == Ity_F64);
+
+ if (e->tag == Iex_RdTmp) {
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ }
+
+ if (e->tag == Iex_Const) {
+ /* Just handle the zero case. */
+ IRConst* con = e->Iex.Const.con;
+ if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
+ HReg z32 = newVRegI(env);
+ HReg dst = newVRegD(env);
+ addInstr(env, ARMInstr_Imm32(z32, 0));
+ addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
+ return dst;
+ }
+ }
+
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
+ ARMAModeV* am;
+ HReg res = newVRegD(env);
+ vassert(e->Iex.Load.ty == Ity_F64);
+ am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
+ addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
+ return res;
+ }
+
+ if (e->tag == Iex_Get) {
+ // XXX This won't work if offset > 1020 or is not 0 % 4.
+ // In which case we'll have to generate more longwinded code.
+ ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
+ HReg res = newVRegD(env);
+ addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
+ return res;
+ }
+
+ if (e->tag == Iex_Unop) {
+ switch (e->Iex.Unop.op) {
+ case Iop_ReinterpI64asF64: {
+ if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ return iselNeon64Expr(env, e->Iex.Unop.arg);
+ } else {
+ HReg srcHi, srcLo;
+ HReg dst = newVRegD(env);
+ iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
+ return dst;
+ }
+ }
+ case Iop_NegF64: {
+ HReg src = iselDblExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegD(env);
+ addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
+ return dst;
+ }
+ case Iop_AbsF64: {
+ HReg src = iselDblExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegD(env);
+ addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
+ return dst;
+ }
+ case Iop_F32toF64: {
+ HReg src = iselFltExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegD(env);
+ addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
+ return dst;
+ }
+ case Iop_I32UtoF64:
+ case Iop_I32StoF64: {
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ HReg f32 = newVRegF(env);
+ HReg dst = newVRegD(env);
+ Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
+ /* VMOV f32, src */
+ addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
+ /* FSITOD dst, f32 */
+ addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
+ dst, f32));
+ return dst;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (e->tag == Iex_Binop) {
+ switch (e->Iex.Binop.op) {
+ case Iop_SqrtF64: {
+ /* first arg is rounding mode; we ignore it. */
+ HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegD(env);
+ addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
+ return dst;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (e->tag == Iex_Triop) {
+ switch (e->Iex.Triop.op) {
+ case Iop_DivF64:
+ case Iop_MulF64:
+ case Iop_AddF64:
+ case Iop_SubF64: {
+ ARMVfpOp op = 0; /*INVALID*/
+ HReg argL = iselDblExpr(env, e->Iex.Triop.arg2);
+ HReg argR = iselDblExpr(env, e->Iex.Triop.arg3);
+ HReg dst = newVRegD(env);
+ switch (e->Iex.Triop.op) {
+ case Iop_DivF64: op = ARMvfp_DIV; break;
+ case Iop_MulF64: op = ARMvfp_MUL; break;
+ case Iop_AddF64: op = ARMvfp_ADD; break;
+ case Iop_SubF64: op = ARMvfp_SUB; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
+ return dst;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (e->tag == Iex_Mux0X) {
+ if (ty == Ity_F64
+ && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
+ HReg r8;
+ HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
+ HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
+ HReg dst = newVRegD(env);
+ addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, rX));
+ r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
+ addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
+ ARMRI84_I84(0xFF,0)));
+ addInstr(env, ARMInstr_VCMovD(ARMcc_EQ, dst, r0));
+ return dst;
+ }
+ }
+
+ ppIRExpr(e);
+ vpanic("iselDblExpr_wrk");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Floating point expressions (32 bit) ---*/
+/*---------------------------------------------------------*/
+
+/* Compute a 64-bit floating point value into a register, the identity
+ of which is returned. As with iselIntExpr_R, the reg may be either
+ real or virtual; in any case it must not be changed by subsequent
+ code emitted by the caller. */
+
+static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
+{
+ HReg r = iselFltExpr_wrk( env, e );
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(r) == HRcFlt32);
+ vassert(hregIsVirtual(r));
+ return r;
+}
+
+/* DO NOT CALL THIS DIRECTLY */
+static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(e);
+ vassert(ty == Ity_F32);
+
+ if (e->tag == Iex_RdTmp) {
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ }
+
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
+ ARMAModeV* am;
+ HReg res = newVRegF(env);
+ vassert(e->Iex.Load.ty == Ity_F32);
+ am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
+ addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
+ return res;
+ }
+
+ if (e->tag == Iex_Get) {
+ // XXX This won't work if offset > 1020 or is not 0 % 4.
+ // In which case we'll have to generate more longwinded code.
+ ARMAModeV* am = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
+ HReg res = newVRegF(env);
+ addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
+ return res;
+ }
+
+ if (e->tag == Iex_Unop) {
+ switch (e->Iex.Unop.op) {
+ case Iop_ReinterpI32asF32: {
+ HReg dst = newVRegF(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
+ return dst;
+ }
+ case Iop_NegF32: {
+ HReg src = iselFltExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegF(env);
+ addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
+ return dst;
+ }
+ case Iop_AbsF32: {
+ HReg src = iselFltExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegF(env);
+ addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
+ return dst;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (e->tag == Iex_Binop) {
+ switch (e->Iex.Binop.op) {
+ case Iop_SqrtF32: {
+ /* first arg is rounding mode; we ignore it. */
+ HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegF(env);
+ addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
+ return dst;
+ }
+ case Iop_F64toF32: {
+ HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
+ set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
+ HReg valS = newVRegF(env);
+ /* FCVTSD valS, valD */
+ addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
+ set_VFP_rounding_default(env);
+ return valS;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (e->tag == Iex_Triop) {
+ switch (e->Iex.Triop.op) {
+ case Iop_DivF32:
+ case Iop_MulF32:
+ case Iop_AddF32:
+ case Iop_SubF32: {
+ ARMVfpOp op = 0; /*INVALID*/
+ HReg argL = iselFltExpr(env, e->Iex.Triop.arg2);
+ HReg argR = iselFltExpr(env, e->Iex.Triop.arg3);
+ HReg dst = newVRegF(env);
+ switch (e->Iex.Triop.op) {
+ case Iop_DivF32: op = ARMvfp_DIV; break;
+ case Iop_MulF32: op = ARMvfp_MUL; break;
+ case Iop_AddF32: op = ARMvfp_ADD; break;
+ case Iop_SubF32: op = ARMvfp_SUB; break;
+ default: vassert(0);
+ }
+ addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
+ return dst;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (e->tag == Iex_Mux0X) {
+ if (ty == Ity_F32
+ && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
+ HReg r8;
+ HReg rX = iselFltExpr(env, e->Iex.Mux0X.exprX);
+ HReg r0 = iselFltExpr(env, e->Iex.Mux0X.expr0);
+ HReg dst = newVRegF(env);
+ addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, rX));
+ r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
+ addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
+ ARMRI84_I84(0xFF,0)));
+ addInstr(env, ARMInstr_VCMovS(ARMcc_EQ, dst, r0));
+ return dst;
+ }
+ }
+
+ ppIRExpr(e);
+ vpanic("iselFltExpr_wrk");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Statements ---*/
+/*---------------------------------------------------------*/
+
+static void iselStmt ( ISelEnv* env, IRStmt* stmt )
+{
+ if (vex_traceflags & VEX_TRACE_VCODE) {
+ vex_printf("\n-- ");
+ ppIRStmt(stmt);
+ vex_printf("\n");
+ }
+ switch (stmt->tag) {
+
+ /* --------- STORE --------- */
+ /* little-endian write to memory */
+ case Ist_Store: {
+ IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
+ IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
+ IREndness end = stmt->Ist.Store.end;
+
+ if (tya != Ity_I32 || end != Iend_LE)
+ goto stmt_fail;
+
+ if (tyd == Ity_I32) {
+ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
+ ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
+ addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
+ return;
+ }
+ if (tyd == Ity_I16) {
+ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
+ ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
+ addInstr(env, ARMInstr_LdSt16(False/*!isLoad*/,
+ False/*!isSignedLoad*/, rD, am));
+ return;
+ }
+ if (tyd == Ity_I8) {
+ HReg rD = iselIntExpr_R(env, stmt->Ist.Store.data);
+ ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
+ addInstr(env, ARMInstr_LdSt8U(False/*!isLoad*/, rD, am));
+ return;
+ }
+ if (tyd == Ity_I64) {
+ if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
+ ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
+ addInstr(env, ARMInstr_NLdStD(False, dD, am));
+ } else {
+ HReg rDhi, rDlo, rA;
+ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
+ rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
+ addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDhi,
+ ARMAMode1_RI(rA,4)));
+ addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDlo,
+ ARMAMode1_RI(rA,0)));
+ }
+ return;
+ }
+ if (tyd == Ity_F64) {
+ HReg dD = iselDblExpr(env, stmt->Ist.Store.data);
+ ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
+ addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
+ return;
+ }
+ if (tyd == Ity_F32) {
+ HReg fD = iselFltExpr(env, stmt->Ist.Store.data);
+ ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
+ addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
+ return;
+ }
+ if (tyd == Ity_V128) {
+ HReg qD = iselNeonExpr(env, stmt->Ist.Store.data);
+ ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
+ addInstr(env, ARMInstr_NLdStQ(False, qD, am));
+ return;
+ }
+
+ break;
+ }
+
+ /* --------- PUT --------- */
+ /* write guest state, fixed offset */
+ case Ist_Put: {
+ IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
+
+ if (tyd == Ity_I32) {
+ HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data);
+ ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
+ addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
+ return;
+ }
+ if (tyd == Ity_I64) {
+ if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ HReg addr = newVRegI(env);
+ HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
+ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
+ stmt->Ist.Put.offset));
+ addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
+ } else {
+ HReg rDhi, rDlo;
+ ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
+ stmt->Ist.Put.offset + 0);
+ ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
+ stmt->Ist.Put.offset + 4);
+ iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
+ addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDhi, am4));
+ addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDlo, am0));
+ }
+ return;
+ }
+ if (tyd == Ity_F64) {
+ // XXX This won't work if offset > 1020 or is not 0 % 4.
+ // In which case we'll have to generate more longwinded code.
+ ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
+ HReg rD = iselDblExpr(env, stmt->Ist.Put.data);
+ addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
+ return;
+ }
+ if (tyd == Ity_F32) {
+ // XXX This won't work if offset > 1020 or is not 0 % 4.
+ // In which case we'll have to generate more longwinded code.
+ ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
+ HReg rD = iselFltExpr(env, stmt->Ist.Put.data);
+ addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
+ return;
+ }
+ if (tyd == Ity_V128) {
+ HReg addr = newVRegI(env);
+ HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
+ addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
+ stmt->Ist.Put.offset));
+ addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
+ return;
+ }
+ break;
+ }
+
+//zz /* --------- Indexed PUT --------- */
+//zz /* write guest state, run-time offset */
+//zz case Ist_PutI: {
+//zz ARMAMode2* am2
+//zz = genGuestArrayOffset(
+//zz env, stmt->Ist.PutI.descr,
+//zz stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
+//zz
+//zz IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
+//zz
+//zz if (tyd == Ity_I8) {
+//zz HReg reg = iselIntExpr_R(env, stmt->Ist.PutI.data);
+//zz addInstr(env, ARMInstr_StoreB(reg, am2));
+//zz return;
+//zz }
+//zz// CAB: Ity_I32, Ity_I16 ?
+//zz break;
+//zz }
+
+ /* --------- TMP --------- */
+ /* assign value to temporary */
+ case Ist_WrTmp: {
+ IRTemp tmp = stmt->Ist.WrTmp.tmp;
+ IRType ty = typeOfIRTemp(env->type_env, tmp);
+
+ if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
+ ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
+ env, stmt->Ist.WrTmp.data);
+ HReg dst = lookupIRTemp(env, tmp);
+ addInstr(env, ARMInstr_Mov(dst,ri84));
+ return;
+ }
+ if (ty == Ity_I1) {
+ HReg dst = lookupIRTemp(env, tmp);
+ ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
+ addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
+ addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
+ return;
+ }
+ if (ty == Ity_I64) {
+ if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
+ HReg dst = lookupIRTemp(env, tmp);
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
+ } else {
+ HReg rHi, rLo, dstHi, dstLo;
+ iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
+ lookupIRTemp64( &dstHi, &dstLo, env, tmp);
+ addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
+ addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
+ }
+ return;
+ }
+ if (ty == Ity_F64) {
+ HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
+ HReg dst = lookupIRTemp(env, tmp);
+ addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
+ return;
+ }
+ if (ty == Ity_F32) {
+ HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
+ HReg dst = lookupIRTemp(env, tmp);
+ addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
+ return;
+ }
+ if (ty == Ity_V128) {
+ HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
+ HReg dst = lookupIRTemp(env, tmp);
+ addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
+ return;
+ }
+ break;
+ }
+
+ /* --------- Call to DIRTY helper --------- */
+ /* call complex ("dirty") helper function */
+ case Ist_Dirty: {
+ IRType retty;
+ IRDirty* d = stmt->Ist.Dirty.details;
+ Bool passBBP = False;
+
+ if (d->nFxState == 0)
+ vassert(!d->needsBBP);
+
+ passBBP = toBool(d->nFxState > 0 && d->needsBBP);
+
+ /* Marshal args, do the call, clear stack. */
+ Bool ok = doHelperCall( env, passBBP, d->guard, d->cee, d->args );
+ if (!ok)
+ break; /* will go to stmt_fail: */
+
+ /* Now figure out what to do with the returned value, if any. */
+ if (d->tmp == IRTemp_INVALID)
+ /* No return value. Nothing to do. */
+ return;
+
+ retty = typeOfIRTemp(env->type_env, d->tmp);
+
+ if (retty == Ity_I64) {
+ if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ HReg tmp = lookupIRTemp(env, d->tmp);
+ addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
+ hregARM_R0()));
+ } else {
+ HReg dstHi, dstLo;
+ /* The returned value is in r1:r0. Park it in the
+ register-pair associated with tmp. */
+ lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
+ addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
+ addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
+ }
+ return;
+ }
+ if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) {
+ /* The returned value is in r0. Park it in the register
+ associated with tmp. */
+ HReg dst = lookupIRTemp(env, d->tmp);
+ addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
+ return;
+ }
+
+ break;
+ }
+
+ /* --------- Load Linked and Store Conditional --------- */
+ case Ist_LLSC: {
+ if (stmt->Ist.LLSC.storedata == NULL) {
+ /* LL */
+ IRTemp res = stmt->Ist.LLSC.result;
+ IRType ty = typeOfIRTemp(env->type_env, res);
+ if (ty == Ity_I32 || ty == Ity_I8) {
+ Int szB = 0;
+ HReg r_dst = lookupIRTemp(env, res);
+ HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
+ switch (ty) {
+ case Ity_I8: szB = 1; break;
+ case Ity_I32: szB = 4; break;
+ default: vassert(0);
+ }
+ addInstr(env, mk_iMOVds_RR(hregARM_R1(), raddr));
+ addInstr(env, ARMInstr_LdrEX(szB));
+ addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R0()));
+ return;
+ }
+ /* else fall thru; is unhandled */
+ } else {
+ /* SC */
+ IRTemp res = stmt->Ist.LLSC.result;
+ IRType ty = typeOfIRTemp(env->type_env, res);
+ IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
+ vassert(ty == Ity_I1);
+ if (tyd == Ity_I32 || tyd == Ity_I8) {
+ Int szB = 0;
+ HReg r_res = lookupIRTemp(env, res);
+ HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
+ HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
+ ARMRI84* one = ARMRI84_I84(1,0);
+ switch (tyd) {
+ case Ity_I8: szB = 1; break;
+ case Ity_I32: szB = 4; break;
+ default: vassert(0);
+ }
+ addInstr(env, mk_iMOVds_RR(hregARM_R1(), rD));
+ addInstr(env, mk_iMOVds_RR(hregARM_R2(), rA));
+ addInstr(env, ARMInstr_StrEX(szB));
+ /* now r0 is 1 if failed, 0 if success. Change to IR
+ conventions (0 is fail, 1 is success). Also transfer
+ result to r_res. */
+ addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
+ /* And be conservative -- mask off all but the lowest bit */
+ addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
+ return;
+ }
+ /* else fall thru; is unhandled */
+ }
+ break;
+ }
+
+ /* --------- MEM FENCE --------- */
+ case Ist_MBE:
+ switch (stmt->Ist.MBE.event) {
+ case Imbe_Fence:
+ addInstr(env,ARMInstr_MFence());
+ return;
+ default:
+ break;
+ }
+ break;
+
+ /* --------- INSTR MARK --------- */
+ /* Doesn't generate any executable code ... */
+ case Ist_IMark:
+ return;
+
+ /* --------- NO-OP --------- */
+ case Ist_NoOp:
+ return;
+
+ /* --------- EXIT --------- */
+ case Ist_Exit: {
+ HReg gnext;
+ ARMCondCode cc;
+ if (stmt->Ist.Exit.dst->tag != Ico_U32)
+ vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
+ gnext = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ cc = iselCondCode(env, stmt->Ist.Exit.guard);
+ addInstr(env, mk_iMOVds_RR(hregARM_R14(), env->savedLR));
+ addInstr(env, ARMInstr_Goto(stmt->Ist.Exit.jk, cc, gnext));
+ return;
+ }
+
+ default: break;
+ }
+ stmt_fail:
+ ppIRStmt(stmt);
+ vpanic("iselStmt");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Basic block terminators (Nexts) ---*/
+/*---------------------------------------------------------*/
+
+static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
+{
+ HReg rDst;
+ if (vex_traceflags & VEX_TRACE_VCODE) {
+ vex_printf("\n-- goto {");
+ ppIRJumpKind(jk);
+ vex_printf("} ");
+ ppIRExpr(next);
+ vex_printf("\n");
+ }
+ rDst = iselIntExpr_R(env, next);
+ addInstr(env, mk_iMOVds_RR(hregARM_R14(), env->savedLR));
+ addInstr(env, ARMInstr_Goto(jk, ARMcc_AL, rDst));
+}
+
+
+/*---------------------------------------------------------*/
+/*--- Insn selector top-level ---*/
+/*---------------------------------------------------------*/
+
+/* Translate an entire SB to arm code. */
+
+HInstrArray* iselSB_ARM ( IRSB* bb, VexArch arch_host,
+ VexArchInfo* archinfo_host,
+ VexAbiInfo* vbi/*UNUSED*/ )
+{
+ Int i, j;
+ HReg hreg, hregHI;
+ ISelEnv* env;
+ UInt hwcaps_host = archinfo_host->hwcaps;
+ Bool neon = False;
+ static UInt counter = 0;
+
+ /* sanity ... */
+ vassert(arch_host == VexArchARM);
+
+ /* hwcaps should not change from one ISEL call to another. */
+ arm_hwcaps = hwcaps_host;
+
+ /* Make up an initial environment to use. */
+ env = LibVEX_Alloc(sizeof(ISelEnv));
+ env->vreg_ctr = 0;
+
+ /* Set up output code array. */
+ env->code = newHInstrArray();
+
+ /* Copy BB's type env. */
+ env->type_env = bb->tyenv;
+
+ /* Make up an IRTemp -> virtual HReg mapping. This doesn't
+ change as we go along. */
+ env->n_vregmap = bb->tyenv->types_used;
+ env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
+ env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
+
+ /* For each IR temporary, allocate a suitably-kinded virtual
+ register. */
+ j = 0;
+ for (i = 0; i < env->n_vregmap; i++) {
+ hregHI = hreg = INVALID_HREG;
+ switch (bb->tyenv->types[i]) {
+ case Ity_I1:
+ case Ity_I8:
+ case Ity_I16:
+ case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break;
+ case Ity_I64:
+ if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ hreg = mkHReg(j++, HRcFlt64, True);
+ neon = True;
+ } else {
+ hregHI = mkHReg(j++, HRcInt32, True);
+ hreg = mkHReg(j++, HRcInt32, True);
+ }
+ break;
+ case Ity_F32: hreg = mkHReg(j++, HRcFlt32, True); break;
+ case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break;
+ case Ity_V128: hreg = mkHReg(j++, HRcVec128, True);
+ neon = True; break;
+ default: ppIRType(bb->tyenv->types[i]);
+ vpanic("iselBB: IRTemp type");
+ }
+ env->vregmap[i] = hreg;
+ env->vregmapHI[i] = hregHI;
+ }
+ env->vreg_ctr = j;
+
+ /* Keep a copy of the link reg, since any call to a helper function
+ will trash it, and we can't get back to the dispatcher once that
+ happens. */
+ env->savedLR = newVRegI(env);
+ addInstr(env, mk_iMOVds_RR(env->savedLR, hregARM_R14()));
+
+ /* Ok, finally we can iterate over the statements. */
+ for (i = 0; i < bb->stmts_used; i++)
+ iselStmt(env,bb->stmts[i]);
+
+ iselNext(env,bb->next,bb->jumpkind);
+
+ /* record the number of vregs we used. */
+ env->code->n_vregs = env->vreg_ctr;
+ counter++;
+ return env->code;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- end host_arm_isel.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/host_generic_reg_alloc2.c b/VEX/priv/host_generic_reg_alloc2.c
new file mode 100644
index 0000000..48303ff
--- /dev/null
+++ b/VEX/priv/host_generic_reg_alloc2.c
@@ -0,0 +1,1549 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin host_reg_alloc2.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex.h"
+
+#include "main_util.h"
+#include "host_generic_regs.h"
+
+/* Set to 1 for lots of debugging output. */
+#define DEBUG_REGALLOC 0
+
+
+/* TODO 27 Oct 04:
+
+ Better consistency checking from what isMove tells us.
+
+ We can possibly do V-V coalescing even when the src is spilled,
+ providing we can arrange for the dst to have the same spill slot.
+
+ Note that state[].hreg is the same as the available real regs.
+
+ Generally rationalise data structures. */
+
+
+/* Records information on virtual register live ranges. Computed once
+ and remains unchanged after that. */
+typedef
+ struct {
+ /* Becomes live for the first time after this insn ... */
+ Short live_after;
+ /* Becomes dead for the last time before this insn ... */
+ Short dead_before;
+ /* The "home" spill slot, if needed. Never changes. */
+ Short spill_offset;
+ Short spill_size;
+ /* What kind of register this is. */
+ HRegClass reg_class;
+ }
+ VRegLR;
+
+
+/* Records information on real-register live ranges. Computed once
+ and remains unchanged after that. */
+typedef
+ struct {
+ HReg rreg;
+ /* Becomes live after this insn ... */
+ Short live_after;
+ /* Becomes dead before this insn ... */
+ Short dead_before;
+ }
+ RRegLR;
+
+
+/* An array of the following structs (rreg_state) comprises the
+ running state of the allocator. It indicates what the current
+ disposition of each allocatable real register is. The array gets
+ updated as the allocator processes instructions. */
+typedef
+ struct {
+ /* ------ FIELDS WHICH DO NOT CHANGE ------ */
+ /* Which rreg is this for? */
+ HReg rreg;
+ /* Is this involved in any HLRs? (only an optimisation hint) */
+ Bool has_hlrs;
+ /* ------ FIELDS WHICH DO CHANGE ------ */
+ /* 6 May 07: rearranged fields below so the whole struct fits
+ into 16 bytes on both x86 and amd64. */
+ /* Used when .disp == Bound and we are looking for vregs to
+ spill. */
+ Bool is_spill_cand;
+ /* Optimisation: used when .disp == Bound. Indicates when the
+ rreg has the same value as the spill slot for the associated
+ vreg. Is safely left at False, and becomes True after a
+ spill store or reload for this rreg. */
+ Bool eq_spill_slot;
+ /* What's it's current disposition? */
+ enum { Free, /* available for use */
+ Unavail, /* in a real-reg live range */
+ Bound /* in use (holding value of some vreg) */
+ }
+ disp;
+ /* If .disp == Bound, what vreg is it bound to? */
+ HReg vreg;
+ }
+ RRegState;
+
+
+/* The allocator also maintains a redundant array of indexes
+ (vreg_state) from vreg numbers back to entries in rreg_state. It
+ is redundant because iff vreg_state[i] == j then
+ hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
+ point at each other. The purpose of this is to speed up activities
+ which involve looking for a particular vreg: there is no need to
+ scan the rreg_state looking for it, just index directly into
+ vreg_state. The FAQ "does this vreg already have an associated
+ rreg" is the main beneficiary.
+
+ To indicate, in vreg_state[i], that a given vreg is not currently
+ associated with any rreg, that entry can be set to INVALID_RREG_NO.
+
+ Because the vreg_state entries are signed Shorts, the max number
+ of vregs that can be handed by regalloc is 32767.
+*/
+
+#define INVALID_RREG_NO ((Short)(-1))
+
+#define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
+#define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
+
+
+/* Does this instruction mention a particular reg? */
+static Bool instrMentionsReg (
+ void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
+ HInstr* instr,
+ HReg r,
+ Bool mode64
+)
+{
+ Int i;
+ HRegUsage reg_usage;
+ (*getRegUsage)(®_usage, instr, mode64);
+ for (i = 0; i < reg_usage.n_used; i++)
+ if (reg_usage.hreg[i] == r)
+ return True;
+ return False;
+}
+
+
+/* Search forward from some given point in the incoming instruction
+ sequence. Point is to select a virtual register to spill, by
+ finding the vreg which is mentioned as far ahead as possible, in
+ the hope that this will minimise the number of consequent reloads.
+
+ Only do the search for vregs which are Bound in the running state,
+ and for which the .is_spill_cand field is set. This allows the
+ caller to arbitrarily restrict the set of spill candidates to be
+ considered.
+
+ Returns an index into the state array indicating the (v,r) pair to
+ spill, or -1 if none was found. */
+static
+Int findMostDistantlyMentionedVReg (
+ void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
+ HInstrArray* instrs_in,
+ Int search_from_instr,
+ RRegState* state,
+ Int n_state,
+ Bool mode64
+)
+{
+ Int k, m;
+ Int furthest_k = -1;
+ Int furthest = -1;
+ vassert(search_from_instr >= 0);
+ for (k = 0; k < n_state; k++) {
+ if (!state[k].is_spill_cand)
+ continue;
+ vassert(state[k].disp == Bound);
+ for (m = search_from_instr; m < instrs_in->arr_used; m++) {
+ if (instrMentionsReg(getRegUsage,
+ instrs_in->arr[m], state[k].vreg, mode64))
+ break;
+ }
+ if (m > furthest) {
+ furthest = m;
+ furthest_k = k;
+ }
+ }
+ return furthest_k;
+}
+
+
+/* Check that this vreg has been assigned a sane spill offset. */
+static inline void sanity_check_spill_offset ( VRegLR* vreg )
+{
+ if (vreg->reg_class == HRcVec128 || vreg->reg_class == HRcFlt64) {
+ vassert(0 == ((UShort)vreg->spill_offset % 16));
+ } else {
+ vassert(0 == ((UShort)vreg->spill_offset % 8));
+ }
+}
+
+
+/* Double the size of the real-reg live-range array, if needed. */
+static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
+{
+ Int k;
+ RRegLR* arr2;
+ if (used < *size) return;
+ if (0)
+ vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size);
+ vassert(used == *size);
+ arr2 = LibVEX_Alloc(2 * *size * sizeof(RRegLR));
+ for (k = 0; k < *size; k++)
+ arr2[k] = (*info)[k];
+ *size *= 2;
+ *info = arr2;
+}
+
+
+/* Sort an array of RRegLR entries by either the .live_after or
+ .dead_before fields. This is performance-critical. */
+static void sortRRLRarray ( RRegLR* arr,
+ Int size, Bool by_live_after )
+{
+ Int incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
+ 9841, 29524, 88573, 265720,
+ 797161, 2391484 };
+ Int lo = 0;
+ Int hi = size-1;
+ Int i, j, h, bigN, hp;
+ RRegLR v;
+
+ vassert(size >= 0);
+ if (size == 0)
+ return;
+
+ bigN = hi - lo + 1; if (bigN < 2) return;
+ hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
+
+ if (by_live_after) {
+
+ for ( ; hp >= 0; hp--) {
+ h = incs[hp];
+ for (i = lo + h; i <= hi; i++) {
+ v = arr[i];
+ j = i;
+ while (arr[j-h].live_after > v.live_after) {
+ arr[j] = arr[j-h];
+ j = j - h;
+ if (j <= (lo + h - 1)) break;
+ }
+ arr[j] = v;
+ }
+ }
+
+ } else {
+
+ for ( ; hp >= 0; hp--) {
+ h = incs[hp];
+ for (i = lo + h; i <= hi; i++) {
+ v = arr[i];
+ j = i;
+ while (arr[j-h].dead_before > v.dead_before) {
+ arr[j] = arr[j-h];
+ j = j - h;
+ if (j <= (lo + h - 1)) break;
+ }
+ arr[j] = v;
+ }
+ }
+
+ }
+}
+
+
+/* A target-independent register allocator. Requires various
+ functions which it uses to deal abstractly with instructions and
+ registers, since it cannot have any target-specific knowledge.
+
+ Returns a new list of instructions, which, as a result of the
+ behaviour of mapRegs, will be in-place modifications of the
+ original instructions.
+
+ Requires that the incoming code has been generated using
+ vreg numbers 0, 1 .. n_vregs-1. Appearance of a vreg outside
+ that range is a checked run-time error.
+
+ Takes an expandable array of pointers to unallocated insns.
+ Returns an expandable array of pointers to allocated insns.
+*/
+HInstrArray* doRegisterAllocation (
+
+ /* Incoming virtual-registerised code. */
+ HInstrArray* instrs_in,
+
+ /* An array listing all the real registers the allocator may use,
+ in no particular order. */
+ HReg* available_real_regs,
+ Int n_available_real_regs,
+
+ /* Return True iff the given insn is a reg-reg move, in which
+ case also return the src and dst regs. */
+ Bool (*isMove) ( HInstr*, HReg*, HReg* ),
+
+ /* Get info about register usage in this insn. */
+ void (*getRegUsage) ( HRegUsage*, HInstr*, Bool ),
+
+ /* Apply a reg-reg mapping to an insn. */
+ void (*mapRegs) ( HRegRemap*, HInstr*, Bool ),
+
+ /* Return one, or, if we're unlucky, two insn(s) to spill/restore a
+ real reg to a spill slot byte offset. The two leading HInstr**
+ args are out parameters, through which the generated insns are
+ returned. Also (optionally) a 'directReload' function, which
+ attempts to replace a given instruction by one which reads
+ directly from a specified spill slot. May be NULL, in which
+ case the optimisation is not attempted. */
+ void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ),
+ void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
+ HInstr* (*directReload) ( HInstr*, HReg, Short ),
+ Int guest_sizeB,
+
+ /* For debug printing only. */
+ void (*ppInstr) ( HInstr*, Bool ),
+ void (*ppReg) ( HReg ),
+
+ /* 32/64bit mode */
+ Bool mode64
+)
+{
+# define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8)
+
+ const Bool eq_spill_opt = True;
+
+ /* Iterators and temporaries. */
+ Int ii, j, k, m, spillee, k_suboptimal;
+ HReg rreg, vreg, vregS, vregD;
+ HRegUsage reg_usage;
+
+ /* Info on vregs and rregs. Computed once and remains
+ unchanged. */
+ Int n_vregs;
+ VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
+
+ /* We keep two copies of the real-reg live range info, one sorted
+ by .live_after and the other by .dead_before. First the
+ unsorted info is created in the _la variant is copied into the
+ _db variant. Once that's done both of them are sorted.
+ We also need two integer cursors which record the next
+ location in the two arrays to consider. */
+ RRegLR* rreg_lrs_la;
+ RRegLR* rreg_lrs_db;
+ Int rreg_lrs_size;
+ Int rreg_lrs_used;
+ Int rreg_lrs_la_next;
+ Int rreg_lrs_db_next;
+
+ /* Used when constructing vreg_lrs (for allocating stack
+ slots). */
+ Int ss_busy_until_before[N_SPILL64S];
+
+ /* Used when constructing rreg_lrs. */
+ Int* rreg_live_after;
+ Int* rreg_dead_before;
+
+ /* Running state of the core allocation algorithm. */
+ RRegState* rreg_state; /* [0 .. n_rregs-1] */
+ Int n_rregs;
+
+ /* .. and the redundant backward map */
+ /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
+ This inplies n_rregs must be <= 32768. */
+ Short* vreg_state; /* [0 .. n_vregs-1] */
+
+ /* The vreg -> rreg map constructed and then applied to each
+ instr. */
+ HRegRemap remap;
+
+ /* The output array of instructions. */
+ HInstrArray* instrs_out;
+
+ /* Sanity checks are expensive. They are only done periodically,
+ not at each insn processed. */
+ Bool do_sanity_check;
+
+ vassert(0 == (guest_sizeB % 16));
+ vassert(0 == (LibVEX_N_SPILL_BYTES % 16));
+ vassert(0 == (N_SPILL64S % 2));
+
+ /* The live range numbers are signed shorts, and so limiting the
+ number of insns to 10000 comfortably guards against them
+ overflowing 32k. */
+ vassert(instrs_in->arr_used <= 10000);
+
+# define INVALID_INSTRNO (-2)
+
+# define EMIT_INSTR(_instr) \
+ do { \
+ HInstr* _tmp = (_instr); \
+ if (DEBUG_REGALLOC) { \
+ vex_printf("** "); \
+ (*ppInstr)(_tmp, mode64); \
+ vex_printf("\n\n"); \
+ } \
+ addHInstr ( instrs_out, _tmp ); \
+ } while (0)
+
+# define PRINT_STATE \
+ do { \
+ Int z, q; \
+ for (z = 0; z < n_rregs; z++) { \
+ vex_printf(" rreg_state[%2d] = ", z); \
+ (*ppReg)(rreg_state[z].rreg); \
+ vex_printf(" \t"); \
+ switch (rreg_state[z].disp) { \
+ case Free: vex_printf("Free\n"); break; \
+ case Unavail: vex_printf("Unavail\n"); break; \
+ case Bound: vex_printf("BoundTo "); \
+ (*ppReg)(rreg_state[z].vreg); \
+ vex_printf("\n"); break; \
+ } \
+ } \
+ vex_printf("\n vreg_state[0 .. %d]:\n ", n_vregs-1); \
+ q = 0; \
+ for (z = 0; z < n_vregs; z++) { \
+ if (vreg_state[z] == INVALID_RREG_NO) \
+ continue; \
+ vex_printf("[%d] -> %d ", z, vreg_state[z]); \
+ q++; \
+ if (q > 0 && (q % 6) == 0) \
+ vex_printf("\n "); \
+ } \
+ vex_printf("\n"); \
+ } while (0)
+
+
+ /* --------- Stage 0: set up output array --------- */
+ /* --------- and allocate/initialise running state. --------- */
+
+ instrs_out = newHInstrArray();
+
+ /* ... and initialise running state. */
+ /* n_rregs is no more than a short name for n_available_real_regs. */
+ n_rregs = n_available_real_regs;
+ n_vregs = instrs_in->n_vregs;
+
+ /* If this is not so, vreg_state entries will overflow. */
+ vassert(n_vregs < 32767);
+
+ rreg_state = LibVEX_Alloc(n_rregs * sizeof(RRegState));
+ vreg_state = LibVEX_Alloc(n_vregs * sizeof(Short));
+
+ for (j = 0; j < n_rregs; j++) {
+ rreg_state[j].rreg = available_real_regs[j];
+ rreg_state[j].has_hlrs = False;
+ rreg_state[j].disp = Free;
+ rreg_state[j].vreg = INVALID_HREG;
+ rreg_state[j].is_spill_cand = False;
+ rreg_state[j].eq_spill_slot = False;
+ }
+
+ for (j = 0; j < n_vregs; j++)
+ vreg_state[j] = INVALID_RREG_NO;
+
+
+ /* --------- Stage 1: compute vreg live ranges. --------- */
+ /* --------- Stage 2: compute rreg live ranges. --------- */
+
+ /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
+
+ /* This is relatively simple, because (1) we only seek the complete
+ end-to-end live range of each vreg, and are not interested in
+ any holes in it, and (2) the vregs are conveniently numbered 0
+ .. n_vregs-1, so we can just dump the results in a
+ pre-allocated array. */
+
+ vreg_lrs = NULL;
+ if (n_vregs > 0)
+ vreg_lrs = LibVEX_Alloc(sizeof(VRegLR) * n_vregs);
+
+ for (j = 0; j < n_vregs; j++) {
+ vreg_lrs[j].live_after = INVALID_INSTRNO;
+ vreg_lrs[j].dead_before = INVALID_INSTRNO;
+ vreg_lrs[j].spill_offset = 0;
+ vreg_lrs[j].spill_size = 0;
+ vreg_lrs[j].reg_class = HRcINVALID;
+ }
+
+ /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
+
+ /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
+
+ /* This is more complex than Stage 1, because we need to compute
+ exactly all the live ranges of all the allocatable real regs,
+ and we don't know in advance how many there will be. */
+
+ rreg_lrs_used = 0;
+ rreg_lrs_size = 4;
+ rreg_lrs_la = LibVEX_Alloc(rreg_lrs_size * sizeof(RRegLR));
+ rreg_lrs_db = NULL; /* we'll create this later */
+
+ /* We'll need to track live range start/end points seperately for
+ each rreg. Sigh. */
+ vassert(n_available_real_regs > 0);
+ rreg_live_after = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
+ rreg_dead_before = LibVEX_Alloc(n_available_real_regs * sizeof(Int));
+
+ for (j = 0; j < n_available_real_regs; j++) {
+ rreg_live_after[j] =
+ rreg_dead_before[j] = INVALID_INSTRNO;
+ }
+
+ /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
+
+ /* ------ start of ITERATE OVER INSNS ------ */
+
+ for (ii = 0; ii < instrs_in->arr_used; ii++) {
+
+ (*getRegUsage)( ®_usage, instrs_in->arr[ii], mode64 );
+
+# if 0
+ vex_printf("\n%d stage1: ", ii);
+ (*ppInstr)(instrs_in->arr[ii], mode64);
+ vex_printf("\n");
+ ppHRegUsage(®_usage);
+# endif
+
+ /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
+
+ /* for each reg mentioned in the insn ... */
+ for (j = 0; j < reg_usage.n_used; j++) {
+
+ vreg = reg_usage.hreg[j];
+ /* only interested in virtual registers right now. */
+ if (!hregIsVirtual(vreg))
+ continue;
+ k = hregNumber(vreg);
+ if (k < 0 || k >= n_vregs) {
+ vex_printf("\n");
+ (*ppInstr)(instrs_in->arr[ii], mode64);
+ vex_printf("\n");
+ vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
+ vpanic("doRegisterAllocation: out-of-range vreg");
+ }
+
+ /* Take the opportunity to note its regclass. We'll need
+ that when allocating spill slots. */
+ if (vreg_lrs[k].reg_class == HRcINVALID) {
+ /* First mention of this vreg. */
+ vreg_lrs[k].reg_class = hregClass(vreg);
+ } else {
+ /* Seen it before, so check for consistency. */
+ vassert(vreg_lrs[k].reg_class == hregClass(vreg));
+ }
+
+ /* Now consider live ranges. */
+ switch (reg_usage.mode[j]) {
+ case HRmRead:
+ if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
+ vex_printf("\n\nOFFENDING VREG = %d\n", k);
+ vpanic("doRegisterAllocation: "
+ "first event for vreg is Read");
+ }
+ vreg_lrs[k].dead_before = toShort(ii + 1);
+ break;
+ case HRmWrite:
+ if (vreg_lrs[k].live_after == INVALID_INSTRNO)
+ vreg_lrs[k].live_after = toShort(ii);
+ vreg_lrs[k].dead_before = toShort(ii + 1);
+ break;
+ case HRmModify:
+ if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
+ vex_printf("\n\nOFFENDING VREG = %d\n", k);
+ vpanic("doRegisterAllocation: "
+ "first event for vreg is Modify");
+ }
+ vreg_lrs[k].dead_before = toShort(ii + 1);
+ break;
+ default:
+ vpanic("doRegisterAllocation(1)");
+ } /* switch */
+
+ } /* iterate over registers */
+
+ /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
+
+ /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
+
+ /* for each reg mentioned in the insn ... */
+ for (j = 0; j < reg_usage.n_used; j++) {
+
+ /* Dummy initialisations of flush_la and flush_db to avoid
+ possible bogus uninit-var warnings from gcc. */
+ Int flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
+ Bool flush;
+
+ rreg = reg_usage.hreg[j];
+
+ /* only interested in real registers right now. */
+ if (hregIsVirtual(rreg))
+ continue;
+
+ /* Furthermore, we're not interested in this rreg unless it's
+ one of the allocatable ones. For example, it could be a
+ stack pointer register, or some other register beyond our
+ control, in which case we should just ignore it. */
+ for (k = 0; k < n_available_real_regs; k++)
+ if (available_real_regs[k] == rreg)
+ break;
+ if (k == n_available_real_regs)
+ continue; /* not found -- ignore. */
+ flush = False;
+ switch (reg_usage.mode[j]) {
+ case HRmWrite:
+ flush_la = rreg_live_after[k];
+ flush_db = rreg_dead_before[k];
+ if (flush_la != INVALID_INSTRNO
+ && flush_db != INVALID_INSTRNO)
+ flush = True;
+ rreg_live_after[k] = ii;
+ rreg_dead_before[k] = ii+1;
+ break;
+ case HRmRead:
+ if (rreg_live_after[k] == INVALID_INSTRNO) {
+ vex_printf("\nOFFENDING RREG = ");
+ (*ppReg)(available_real_regs[k]);
+ vex_printf("\n");
+ vex_printf("\nOFFENDING instr = ");
+ (*ppInstr)(instrs_in->arr[ii], mode64);
+ vex_printf("\n");
+ vpanic("doRegisterAllocation: "
+ "first event for rreg is Read");
+ }
+ rreg_dead_before[k] = ii+1;
+ break;
+ case HRmModify:
+ if (rreg_live_after[k] == INVALID_INSTRNO) {
+ vex_printf("\nOFFENDING RREG = ");
+ (*ppReg)(available_real_regs[k]);
+ vex_printf("\n");
+ vex_printf("\nOFFENDING instr = ");
+ (*ppInstr)(instrs_in->arr[ii], mode64);
+ vex_printf("\n");
+ vpanic("doRegisterAllocation: "
+ "first event for rreg is Modify");
+ }
+ rreg_dead_before[k] = ii+1;
+ break;
+ default:
+ vpanic("doRegisterAllocation(2)");
+ }
+
+ if (flush) {
+ vassert(flush_la != INVALID_INSTRNO);
+ vassert(flush_db != INVALID_INSTRNO);
+ ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
+ if (0)
+ vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
+ rreg_lrs_la[rreg_lrs_used].rreg = rreg;
+ rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la);
+ rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
+ rreg_lrs_used++;
+ }
+
+ } /* iterate over regs in the instr */
+
+ /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
+
+ } /* iterate over insns */
+
+ /* ------ end of ITERATE OVER INSNS ------ */
+
+ /* ------ start of FINALISE RREG LIVE RANGES ------ */
+
+ /* Now finish up any live ranges left over. */
+ for (j = 0; j < n_available_real_regs; j++) {
+
+# if 0
+ vex_printf("residual %d: %d %d\n", j, rreg_live_after[j],
+ rreg_dead_before[j]);
+# endif
+ vassert( (rreg_live_after[j] == INVALID_INSTRNO
+ && rreg_dead_before[j] == INVALID_INSTRNO)
+ ||
+ (rreg_live_after[j] != INVALID_INSTRNO
+ && rreg_dead_before[j] != INVALID_INSTRNO)
+ );
+
+ if (rreg_live_after[j] == INVALID_INSTRNO)
+ continue;
+
+ ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
+ if (0)
+ vex_printf("FLUSH 2 (%d,%d)\n",
+ rreg_live_after[j], rreg_dead_before[j]);
+ rreg_lrs_la[rreg_lrs_used].rreg = available_real_regs[j];
+ rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]);
+ rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
+ rreg_lrs_used++;
+ }
+
+ /* Compute summary hints for choosing real regs. If a real reg is
+ involved in a hard live range, record that fact in the fixed
+ part of the running rreg_state. Later, when offered a choice between
+ rregs, it's better to choose one which is not marked as having
+ any HLRs, since ones with HLRs may need to be spilled around
+ their HLRs. Correctness of final assignment is unaffected by
+ this mechanism -- it is only an optimisation. */
+
+ for (j = 0; j < rreg_lrs_used; j++) {
+ rreg = rreg_lrs_la[j].rreg;
+ vassert(!hregIsVirtual(rreg));
+ /* rreg is involved in a HLR. Record this info in the array, if
+ there is space. */
+ for (k = 0; k < n_rregs; k++)
+ if (rreg_state[k].rreg == rreg)
+ break;
+ vassert(k < n_rregs); /* else rreg was not found in rreg_state?! */
+ rreg_state[k].has_hlrs = True;
+ }
+ if (0) {
+ for (j = 0; j < n_rregs; j++) {
+ if (!rreg_state[j].has_hlrs)
+ continue;
+ ppReg(rreg_state[j].rreg);
+ vex_printf(" hinted\n");
+ }
+ }
+
+ /* Finally, copy the _la variant into the _db variant and
+ sort both by their respective fields. */
+ rreg_lrs_db = LibVEX_Alloc(rreg_lrs_used * sizeof(RRegLR));
+ for (j = 0; j < rreg_lrs_used; j++)
+ rreg_lrs_db[j] = rreg_lrs_la[j];
+
+ sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/ );
+ sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
+
+ /* And set up the cursors. */
+ rreg_lrs_la_next = 0;
+ rreg_lrs_db_next = 0;
+
+ for (j = 1; j < rreg_lrs_used; j++) {
+ vassert(rreg_lrs_la[j-1].live_after <= rreg_lrs_la[j].live_after);
+ vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
+ }
+
+ /* ------ end of FINALISE RREG LIVE RANGES ------ */
+
+# if DEBUG_REGALLOC
+ for (j = 0; j < n_vregs; j++) {
+ vex_printf("vreg %d: la = %d, db = %d\n",
+ j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
+ }
+# endif
+
+# if DEBUG_REGALLOC
+ vex_printf("RRegLRs by LA:\n");
+ for (j = 0; j < rreg_lrs_used; j++) {
+ vex_printf(" ");
+ (*ppReg)(rreg_lrs_la[j].rreg);
+ vex_printf(" la = %d, db = %d\n",
+ rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
+ }
+ vex_printf("RRegLRs by DB:\n");
+ for (j = 0; j < rreg_lrs_used; j++) {
+ vex_printf(" ");
+ (*ppReg)(rreg_lrs_db[j].rreg);
+ vex_printf(" la = %d, db = %d\n",
+ rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
+ }
+# endif
+
+ /* --------- Stage 3: allocate spill slots. --------- */
+
+ /* Each spill slot is 8 bytes long. For vregs which take more than
+ 64 bits to spill (classes Flt64 and Vec128), we have to allocate
+ two spill slots.
+
+ For Vec128-class on PowerPC, the spill slot's actual address
+ must be 16-byte aligned. Since the spill slot's address is
+ computed as an offset from the guest state pointer, and since
+ the user of the generated code must set that pointer to a
+ 16-aligned value, we have the residual obligation here of
+ choosing a 16-aligned spill slot offset for Vec128-class values.
+ Since each spill slot is 8 bytes long, that means for
+ Vec128-class values we must allocated a spill slot number which
+ is zero mod 2.
+
+ Do a rank-based allocation of vregs to spill slot numbers. We
+ put as few values as possible in spill slots, but nevertheless
+ need to have a spill slot available for all vregs, just in case.
+ */
+ /* max_ss_no = -1; */
+
+ for (j = 0; j < N_SPILL64S; j++)
+ ss_busy_until_before[j] = 0;
+
+ for (j = 0; j < n_vregs; j++) {
+
+ /* True iff this vreg is unused. In which case we also expect
+ that the reg_class field for it has not been set. */
+ if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
+ vassert(vreg_lrs[j].reg_class == HRcINVALID);
+ continue;
+ }
+
+ /* The spill slots are 64 bits in size. As per the comment on
+ definition of HRegClass in host_generic_regs.h, that means, to
+ spill a vreg of class Flt64 or Vec128, we'll need to find two
+ adjacent spill slots to use. Note, this logic needs to kept
+ in sync with the size info on the definition of HRegClass. */
+
+ if (vreg_lrs[j].reg_class == HRcVec128
+ || vreg_lrs[j].reg_class == HRcFlt64) {
+
+ /* Find two adjacent free slots in which between them provide
+ up to 128 bits in which to spill the vreg. Since we are
+ trying to find an even:odd pair, move along in steps of 2
+ (slots). */
+
+ for (k = 0; k < N_SPILL64S-1; k += 2)
+ if (ss_busy_until_before[k] <= vreg_lrs[j].live_after
+ && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after)
+ break;
+ if (k >= N_SPILL64S-1) {
+ vpanic("LibVEX_N_SPILL_BYTES is too low. "
+ "Increase and recompile.");
+ }
+ if (0) vex_printf("16-byte spill offset in spill slot %d\n", (Int)k);
+ ss_busy_until_before[k+0] = vreg_lrs[j].dead_before;
+ ss_busy_until_before[k+1] = vreg_lrs[j].dead_before;
+
+ } else {
+
+ /* The ordinary case -- just find a single spill slot. */
+
+ /* Find the lowest-numbered spill slot which is available at
+ the start point of this interval, and assign the interval
+ to it. */
+ for (k = 0; k < N_SPILL64S; k++)
+ if (ss_busy_until_before[k] <= vreg_lrs[j].live_after)
+ break;
+ if (k == N_SPILL64S) {
+ vpanic("LibVEX_N_SPILL_BYTES is too low. "
+ "Increase and recompile.");
+ }
+ ss_busy_until_before[k] = vreg_lrs[j].dead_before;
+
+ }
+
+ /* This reflects LibVEX's hard-wired knowledge of the baseBlock
+ layout: the guest state, then two equal sized areas following
+ it for two sets of shadow state, and then the spill area. */
+ vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + k * 8);
+
+ /* Independent check that we've made a sane choice of slot */
+ sanity_check_spill_offset( &vreg_lrs[j] );
+ /* if (j > max_ss_no) */
+ /* max_ss_no = j; */
+ }
+
+# if 0
+ vex_printf("\n\n");
+ for (j = 0; j < n_vregs; j++)
+ vex_printf("vreg %d --> spill offset %d\n",
+ j, vreg_lrs[j].spill_offset);
+# endif
+
+ /* --------- Stage 4: establish rreg preferences --------- */
+
+ /* It may be advantageous to allocating certain vregs to specific
+ rregs, as a way of avoiding reg-reg moves later. Here we
+ establish which, if any, rreg each vreg would prefer to be in.
+ Note that this constrains the allocator -- ideally we end up
+ with as few as possible vregs expressing a preference.
+
+ This is an optimisation: if the .preferred_rreg field is never
+ set to anything different from INVALID_HREG, the allocator still
+ works. */
+
+ /* 30 Dec 04: removed this mechanism as it does not seem to
+ help. */
+
+ /* --------- Stage 5: process instructions --------- */
+
+ /* This is the main loop of the allocator. First, we need to
+ correctly set up our running state, which tracks the status of
+ each real register. */
+
+ /* ------ BEGIN: Process each insn in turn. ------ */
+
+ for (ii = 0; ii < instrs_in->arr_used; ii++) {
+
+# if DEBUG_REGALLOC
+ vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
+ vex_printf("---- ");
+ (*ppInstr)(instrs_in->arr[ii], mode64);
+ vex_printf("\n\nInitial state:\n");
+ PRINT_STATE;
+ vex_printf("\n");
+# endif
+
+ /* ------------ Sanity checks ------------ */
+
+ /* Sanity checks are expensive. So they are done only once
+ every 7 instructions, and just before the last
+ instruction. */
+ do_sanity_check
+ = toBool(
+ False /* Set to True for sanity checking of all insns. */
+ || ii == instrs_in->arr_used-1
+ || (ii > 0 && (ii % 7) == 0)
+ );
+
+ if (do_sanity_check) {
+
+ /* Sanity check 1: all rregs with a hard live range crossing
+ this insn must be marked as unavailable in the running
+ state. */
+ for (j = 0; j < rreg_lrs_used; j++) {
+ if (rreg_lrs_la[j].live_after < ii
+ && ii < rreg_lrs_la[j].dead_before) {
+ /* ii is the middle of a hard live range for some real
+ reg. Check it's marked as such in the running
+ state. */
+
+# if 0
+ vex_printf("considering la %d .. db %d reg = ",
+ rreg_lrs[j].live_after,
+ rreg_lrs[j].dead_before);
+ (*ppReg)(rreg_lrs[j].rreg);
+ vex_printf("\n");
+# endif
+
+ /* find the state entry for this rreg */
+ for (k = 0; k < n_rregs; k++)
+ if (rreg_state[k].rreg == rreg_lrs_la[j].rreg)
+ break;
+
+ /* and assert that this rreg is marked as unavailable */
+ vassert(rreg_state[k].disp == Unavail);
+ }
+ }
+
+ /* Sanity check 2: conversely, all rregs marked as
+ unavailable in the running rreg_state must have a
+ corresponding hard live range entry in the rreg_lrs
+ array. */
+ for (j = 0; j < n_available_real_regs; j++) {
+ vassert(rreg_state[j].disp == Bound
+ || rreg_state[j].disp == Free
+ || rreg_state[j].disp == Unavail);
+ if (rreg_state[j].disp != Unavail)
+ continue;
+ for (k = 0; k < rreg_lrs_used; k++)
+ if (rreg_lrs_la[k].rreg == rreg_state[j].rreg
+ && rreg_lrs_la[k].live_after < ii
+ && ii < rreg_lrs_la[k].dead_before)
+ break;
+ /* If this vassertion fails, we couldn't find a
+ corresponding HLR. */
+ vassert(k < rreg_lrs_used);
+ }
+
+ /* Sanity check 3: all vreg-rreg bindings must bind registers
+ of the same class. */
+ for (j = 0; j < n_rregs; j++) {
+ if (rreg_state[j].disp != Bound) {
+ vassert(rreg_state[j].eq_spill_slot == False);
+ continue;
+ }
+ vassert(hregClass(rreg_state[j].rreg)
+ == hregClass(rreg_state[j].vreg));
+ vassert( hregIsVirtual(rreg_state[j].vreg));
+ vassert(!hregIsVirtual(rreg_state[j].rreg));
+ }
+
+ /* Sanity check 4: the vreg_state and rreg_state
+ mutually-redundant mappings are consistent. If
+ rreg_state[j].vreg points at some vreg_state entry then
+ that vreg_state entry should point back at
+ rreg_state[j]. */
+ for (j = 0; j < n_rregs; j++) {
+ if (rreg_state[j].disp != Bound)
+ continue;
+ k = hregNumber(rreg_state[j].vreg);
+ vassert(IS_VALID_VREGNO(k));
+ vassert(vreg_state[k] == j);
+ }
+ for (j = 0; j < n_vregs; j++) {
+ k = vreg_state[j];
+ if (k == INVALID_RREG_NO)
+ continue;
+ vassert(IS_VALID_RREGNO(k));
+ vassert(rreg_state[k].disp == Bound);
+ vassert(hregNumber(rreg_state[k].vreg) == j);
+ }
+
+ } /* if (do_sanity_check) */
+
+ /* ------------ end of Sanity checks ------------ */
+
+ /* Do various optimisations pertaining to register coalescing
+ and preferencing:
+ MOV v <-> v coalescing (done here).
+ MOV v <-> r coalescing (not yet, if ever)
+ */
+ /* If doing a reg-reg move between two vregs, and the src's live
+ range ends here and the dst's live range starts here, bind
+ the dst to the src's rreg, and that's all. */
+ if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) {
+ if (!hregIsVirtual(vregS)) goto cannot_coalesce;
+ if (!hregIsVirtual(vregD)) goto cannot_coalesce;
+ /* Check that *isMove is not telling us a bunch of lies ... */
+ vassert(hregClass(vregS) == hregClass(vregD));
+ k = hregNumber(vregS);
+ m = hregNumber(vregD);
+ vassert(IS_VALID_VREGNO(k));
+ vassert(IS_VALID_VREGNO(m));
+ if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
+ if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
+# if DEBUG_REGALLOC
+ vex_printf("COALESCE ");
+ (*ppReg)(vregS);
+ vex_printf(" -> ");
+ (*ppReg)(vregD);
+ vex_printf("\n\n");
+# endif
+ /* Find the state entry for vregS. */
+ for (m = 0; m < n_rregs; m++)
+ if (rreg_state[m].disp == Bound && rreg_state[m].vreg == vregS)
+ break;
+ if (m == n_rregs)
+ /* We failed to find a binding for vregS, which means it's
+ currently not in a register. So we can't do the
+ coalescing. Give up. */
+ goto cannot_coalesce;
+
+ /* Finally, we can do the coalescing. It's trivial -- merely
+ claim vregS's register for vregD. */
+ rreg_state[m].vreg = vregD;
+ vassert(IS_VALID_VREGNO(hregNumber(vregD)));
+ vassert(IS_VALID_VREGNO(hregNumber(vregS)));
+ vreg_state[hregNumber(vregD)] = toShort(m);
+ vreg_state[hregNumber(vregS)] = INVALID_RREG_NO;
+
+ /* This rreg has become associated with a different vreg and
+ hence with a different spill slot. Play safe. */
+ rreg_state[m].eq_spill_slot = False;
+
+ /* Move on to the next insn. We skip the post-insn stuff for
+ fixed registers, since this move should not interact with
+ them in any way. */
+ continue;
+ }
+ cannot_coalesce:
+
+ /* ------ Free up rregs bound to dead vregs ------ */
+
+ /* Look for vregs whose live range has just ended, and
+ mark the associated rreg as free. */
+
+ for (j = 0; j < n_rregs; j++) {
+ if (rreg_state[j].disp != Bound)
+ continue;
+ vreg = hregNumber(rreg_state[j].vreg);
+ vassert(IS_VALID_VREGNO(vreg));
+ if (vreg_lrs[vreg].dead_before <= ii) {
+ rreg_state[j].disp = Free;
+ rreg_state[j].eq_spill_slot = False;
+ m = hregNumber(rreg_state[j].vreg);
+ vassert(IS_VALID_VREGNO(m));
+ vreg_state[m] = INVALID_RREG_NO;
+ if (DEBUG_REGALLOC) {
+ vex_printf("free up ");
+ (*ppReg)(rreg_state[j].rreg);
+ vex_printf("\n");
+ }
+ }
+ }
+
+ /* ------ Pre-instruction actions for fixed rreg uses ------ */
+
+ /* Now we have to deal with rregs which are about to be made
+ live by this instruction -- in other words, are entering into
+ one of their live ranges. If any such rreg holds a vreg, we
+ will have to free up the rreg. The simplest solution which
+ is correct is to spill the rreg.
+
+ Note we could do better:
+ * Could move it into some other free rreg, if one is available
+
+ Do this efficiently, by incrementally stepping along an array
+ of rreg HLRs that are known to be sorted by start point
+ (their .live_after field).
+ */
+ while (True) {
+ vassert(rreg_lrs_la_next >= 0);
+ vassert(rreg_lrs_la_next <= rreg_lrs_used);
+ if (rreg_lrs_la_next == rreg_lrs_used)
+ break; /* no more real reg live ranges to consider */
+ if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
+ break; /* next live range does not yet start */
+ vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
+ /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
+ Find the associated rreg_state entry. */
+ /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
+ Real register live ranges are guaranteed to be well-formed
+ in that they start with a write to the register -- Stage 2
+ rejects any code not satisfying this. So the correct
+ question to ask is whether
+ rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
+ whether the reg becomes live after this insn -- rather
+ than before it. */
+# if DEBUG_REGALLOC
+ vex_printf("need to free up rreg: ");
+ (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg);
+ vex_printf("\n\n");
+# endif
+ for (k = 0; k < n_rregs; k++)
+ if (rreg_state[k].rreg == rreg_lrs_la[rreg_lrs_la_next].rreg)
+ break;
+ /* If this fails, we don't have an entry for this rreg.
+ Which we should. */
+ vassert(IS_VALID_RREGNO(k));
+ m = hregNumber(rreg_state[k].vreg);
+ if (rreg_state[k].disp == Bound) {
+ /* Yes, there is an associated vreg. Spill it if it's
+ still live. */
+ vassert(IS_VALID_VREGNO(m));
+ vreg_state[m] = INVALID_RREG_NO;
+ if (vreg_lrs[m].dead_before > ii) {
+ vassert(vreg_lrs[m].reg_class != HRcINVALID);
+ if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
+ HInstr* spill1 = NULL;
+ HInstr* spill2 = NULL;
+ (*genSpill)( &spill1, &spill2, rreg_state[k].rreg,
+ vreg_lrs[m].spill_offset, mode64 );
+ vassert(spill1 || spill2); /* can't both be NULL */
+ if (spill1)
+ EMIT_INSTR(spill1);
+ if (spill2)
+ EMIT_INSTR(spill2);
+ }
+ rreg_state[k].eq_spill_slot = True;
+ }
+ }
+ rreg_state[k].disp = Unavail;
+ rreg_state[k].vreg = INVALID_HREG;
+ rreg_state[k].eq_spill_slot = False;
+
+ /* check for further rregs entering HLRs at this point */
+ rreg_lrs_la_next++;
+ }
+
+
+# if DEBUG_REGALLOC
+ vex_printf("After pre-insn actions for fixed regs:\n");
+ PRINT_STATE;
+ vex_printf("\n");
+# endif
+
+
+ /* ------ Deal with the current instruction. ------ */
+
+ /* Finally we can begin the processing of this instruction
+ itself. The aim is to free up enough rregs for this insn.
+ This may generate spill stores since we may have to evict
+ some vregs currently in rregs. Also generates spill loads.
+ We also build up the final vreg->rreg mapping to be applied
+ to the insn. */
+
+ (*getRegUsage)( ®_usage, instrs_in->arr[ii], mode64 );
+
+ initHRegRemap(&remap);
+
+ /* ------------ BEGIN directReload optimisation ----------- */
+
+ /* If the instruction reads exactly one vreg which is currently
+ in a spill slot, and this is last use of that vreg, see if we
+ can convert the instruction into one reads directly from the
+ spill slot. This is clearly only possible for x86 and amd64
+ targets, since ppc and arm load-store architectures. If
+ successful, replace instrs_in->arr[ii] with this new
+ instruction, and recompute its reg usage, so that the change
+ is invisible to the standard-case handling that follows. */
+
+ if (directReload && reg_usage.n_used <= 2) {
+ Bool debug_direct_reload = True && False;
+ HReg cand = INVALID_HREG;
+ Bool nreads = 0;
+ Short spilloff = 0;
+
+ for (j = 0; j < reg_usage.n_used; j++) {
+
+ vreg = reg_usage.hreg[j];
+
+ if (!hregIsVirtual(vreg))
+ continue;
+
+ if (reg_usage.mode[j] == HRmRead) {
+ nreads++;
+ m = hregNumber(vreg);
+ vassert(IS_VALID_VREGNO(m));
+ k = vreg_state[m];
+ if (!IS_VALID_RREGNO(k)) {
+ /* ok, it is spilled. Now, is this its last use? */
+ vassert(vreg_lrs[m].dead_before >= ii+1);
+ if (vreg_lrs[m].dead_before == ii+1
+ && cand == INVALID_HREG) {
+ spilloff = vreg_lrs[m].spill_offset;
+ cand = vreg;
+ }
+ }
+ }
+ }
+
+ if (nreads == 1 && cand != INVALID_HREG) {
+ HInstr* reloaded;
+ if (reg_usage.n_used == 2)
+ vassert(reg_usage.hreg[0] != reg_usage.hreg[1]);
+
+ reloaded = directReload ( instrs_in->arr[ii], cand, spilloff );
+ if (debug_direct_reload && !reloaded) {
+ vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
+ ppInstr(instrs_in->arr[ii], mode64);
+ }
+ if (reloaded) {
+ /* Update info about the insn, so it looks as if it had
+ been in this form all along. */
+ instrs_in->arr[ii] = reloaded;
+ (*getRegUsage)( ®_usage, instrs_in->arr[ii], mode64 );
+ if (debug_direct_reload && !reloaded) {
+ vex_printf(" --> ");
+ ppInstr(reloaded, mode64);
+ }
+ }
+
+ if (debug_direct_reload && !reloaded)
+ vex_printf("\n");
+ }
+
+ }
+
+ /* ------------ END directReload optimisation ------------ */
+
+ /* for each reg mentioned in the insn ... */
+ for (j = 0; j < reg_usage.n_used; j++) {
+
+ vreg = reg_usage.hreg[j];
+
+ /* only interested in virtual registers right now. */
+ if (!hregIsVirtual(vreg))
+ continue;
+
+# if 0
+ vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n");
+# endif
+
+ /* Now we're trying to find a rreg for "vreg". First of all,
+ if it already has an rreg assigned, we don't need to do
+ anything more. Search the current state to find out. */
+ m = hregNumber(vreg);
+ vassert(IS_VALID_VREGNO(m));
+ k = vreg_state[m];
+ if (IS_VALID_RREGNO(k)) {
+ vassert(rreg_state[k].disp == Bound);
+ addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
+ /* If this rreg is written or modified, mark it as different
+ from any spill slot value. */
+ if (reg_usage.mode[j] != HRmRead)
+ rreg_state[k].eq_spill_slot = False;
+ continue;
+ } else {
+ vassert(k == INVALID_RREG_NO);
+ }
+
+ /* No luck. The next thing to do is see if there is a
+ currently free rreg available, of the correct class. If
+ so, bag it. NOTE, we could improve this by selecting an
+ rreg for which the next live-range event is as far ahead
+ as possible. */
+ k_suboptimal = -1;
+ for (k = 0; k < n_rregs; k++) {
+ if (rreg_state[k].disp != Free
+ || hregClass(rreg_state[k].rreg) != hregClass(vreg))
+ continue;
+ if (rreg_state[k].has_hlrs) {
+ /* Well, at least we can use k_suboptimal if we really
+ have to. Keep on looking for a better candidate. */
+ k_suboptimal = k;
+ } else {
+ /* Found a preferable reg. Use it. */
+ k_suboptimal = -1;
+ break;
+ }
+ }
+ if (k_suboptimal >= 0)
+ k = k_suboptimal;
+
+ if (k < n_rregs) {
+ rreg_state[k].disp = Bound;
+ rreg_state[k].vreg = vreg;
+ m = hregNumber(vreg);
+ vassert(IS_VALID_VREGNO(m));
+ vreg_state[m] = toShort(k);
+ addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
+ /* Generate a reload if needed. This only creates needed
+ reloads because the live range builder for vregs will
+ guarantee that the first event for a vreg is a write.
+ Hence, if this reference is not a write, it cannot be
+ the first reference for this vreg, and so a reload is
+ indeed needed. */
+ if (reg_usage.mode[j] != HRmWrite) {
+ vassert(vreg_lrs[m].reg_class != HRcINVALID);
+ HInstr* reload1 = NULL;
+ HInstr* reload2 = NULL;
+ (*genReload)( &reload1, &reload2, rreg_state[k].rreg,
+ vreg_lrs[m].spill_offset, mode64 );
+ vassert(reload1 || reload2); /* can't both be NULL */
+ if (reload1)
+ EMIT_INSTR(reload1);
+ if (reload2)
+ EMIT_INSTR(reload2);
+ /* This rreg is read or modified by the instruction.
+ If it's merely read we can claim it now equals the
+ spill slot, but not so if it is modified. */
+ if (reg_usage.mode[j] == HRmRead) {
+ rreg_state[k].eq_spill_slot = True;
+ } else {
+ vassert(reg_usage.mode[j] == HRmModify);
+ rreg_state[k].eq_spill_slot = False;
+ }
+ } else {
+ rreg_state[k].eq_spill_slot = False;
+ }
+
+ continue;
+ }
+
+ /* Well, now we have no option but to spill a vreg. It's
+ important to make a good choice of vreg to spill, and of
+ course we need to be careful not to spill a vreg which is
+ needed by this insn. */
+
+ /* First, mark in the rreg_state, those rregs which are not spill
+ candidates, due to holding a vreg mentioned by this
+ instruction. Or being of the wrong class. */
+ for (k = 0; k < n_rregs; k++) {
+ rreg_state[k].is_spill_cand = False;
+ if (rreg_state[k].disp != Bound)
+ continue;
+ if (hregClass(rreg_state[k].rreg) != hregClass(vreg))
+ continue;
+ rreg_state[k].is_spill_cand = True;
+ for (m = 0; m < reg_usage.n_used; m++) {
+ if (rreg_state[k].vreg == reg_usage.hreg[m]) {
+ rreg_state[k].is_spill_cand = False;
+ break;
+ }
+ }
+ }
+
+ /* We can choose to spill any rreg satisfying
+ rreg_state[r].is_spill_cand (so to speak). Choose r so that
+ the next use of its associated vreg is as far ahead as
+ possible, in the hope that this will minimise the number
+ of consequent reloads required. */
+ spillee
+ = findMostDistantlyMentionedVReg (
+ getRegUsage, instrs_in, ii+1, rreg_state, n_rregs, mode64 );
+
+ if (spillee == -1) {
+ /* Hmmmmm. There don't appear to be any spill candidates.
+ We're hosed. */
+ vex_printf("reg_alloc: can't find a register in class: ");
+ ppHRegClass(hregClass(vreg));
+ vex_printf("\n");
+ vpanic("reg_alloc: can't create a free register.");
+ }
+
+ /* Right. So we're going to spill rreg_state[spillee]. */
+ vassert(IS_VALID_RREGNO(spillee));
+ vassert(rreg_state[spillee].disp == Bound);
+ /* check it's the right class */
+ vassert(hregClass(rreg_state[spillee].rreg) == hregClass(vreg));
+ /* check we're not ejecting the vreg for which we are trying
+ to free up a register. */
+ vassert(rreg_state[spillee].vreg != vreg);
+
+ m = hregNumber(rreg_state[spillee].vreg);
+ vassert(IS_VALID_VREGNO(m));
+
+ /* So here's the spill store. Assert that we're spilling a
+ live vreg. */
+ vassert(vreg_lrs[m].dead_before > ii);
+ vassert(vreg_lrs[m].reg_class != HRcINVALID);
+ if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
+ HInstr* spill1 = NULL;
+ HInstr* spill2 = NULL;
+ (*genSpill)( &spill1, &spill2, rreg_state[spillee].rreg,
+ vreg_lrs[m].spill_offset, mode64 );
+ vassert(spill1 || spill2); /* can't both be NULL */
+ if (spill1)
+ EMIT_INSTR(spill1);
+ if (spill2)
+ EMIT_INSTR(spill2);
+ }
+
+ /* Update the rreg_state to reflect the new assignment for this
+ rreg. */
+ rreg_state[spillee].vreg = vreg;
+ vreg_state[m] = INVALID_RREG_NO;
+
+ rreg_state[spillee].eq_spill_slot = False; /* be safe */
+
+ m = hregNumber(vreg);
+ vassert(IS_VALID_VREGNO(m));
+ vreg_state[m] = toShort(spillee);
+
+ /* Now, if this vreg is being read or modified (as opposed to
+ written), we have to generate a reload for it. */
+ if (reg_usage.mode[j] != HRmWrite) {
+ vassert(vreg_lrs[m].reg_class != HRcINVALID);
+ HInstr* reload1 = NULL;
+ HInstr* reload2 = NULL;
+ (*genReload)( &reload1, &reload2, rreg_state[spillee].rreg,
+ vreg_lrs[m].spill_offset, mode64 );
+ vassert(reload1 || reload2); /* can't both be NULL */
+ if (reload1)
+ EMIT_INSTR(reload1);
+ if (reload2)
+ EMIT_INSTR(reload2);
+ /* This rreg is read or modified by the instruction.
+ If it's merely read we can claim it now equals the
+ spill slot, but not so if it is modified. */
+ if (reg_usage.mode[j] == HRmRead) {
+ rreg_state[spillee].eq_spill_slot = True;
+ } else {
+ vassert(reg_usage.mode[j] == HRmModify);
+ rreg_state[spillee].eq_spill_slot = False;
+ }
+ }
+
+ /* So after much twisting and turning, we have vreg mapped to
+ rreg_state[spillee].rreg. Note that in the map. */
+ addToHRegRemap(&remap, vreg, rreg_state[spillee].rreg);
+
+ } /* iterate over registers in this instruction. */
+
+ /* We've finished clowning around with registers in this instruction.
+ Three results:
+ - the running rreg_state[] has been updated
+ - a suitable vreg->rreg mapping for this instruction has been
+ constructed
+ - spill and reload instructions may have been emitted.
+
+ The final step is to apply the mapping to the instruction,
+ and emit that.
+ */
+
+ /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
+ (*mapRegs)( &remap, instrs_in->arr[ii], mode64 );
+ EMIT_INSTR( instrs_in->arr[ii] );
+
+# if DEBUG_REGALLOC
+ vex_printf("After dealing with current insn:\n");
+ PRINT_STATE;
+ vex_printf("\n");
+# endif
+
+ /* ------ Post-instruction actions for fixed rreg uses ------ */
+
+ /* Now we need to check for rregs exiting fixed live ranges
+ after this instruction, and if so mark them as free. */
+ while (True) {
+ vassert(rreg_lrs_db_next >= 0);
+ vassert(rreg_lrs_db_next <= rreg_lrs_used);
+ if (rreg_lrs_db_next == rreg_lrs_used)
+ break; /* no more real reg live ranges to consider */
+ if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
+ break; /* next live range does not yet start */
+ vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
+ /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
+ range. Mark it as such in the main rreg_state array. */
+ for (k = 0; k < n_rregs; k++)
+ if (rreg_state[k].rreg == rreg_lrs_db[rreg_lrs_db_next].rreg)
+ break;
+ /* If this vassertion fails, we don't have an entry for
+ this rreg. Which we should. */
+ vassert(k < n_rregs);
+ vassert(rreg_state[k].disp == Unavail);
+ rreg_state[k].disp = Free;
+ rreg_state[k].vreg = INVALID_HREG;
+ rreg_state[k].eq_spill_slot = False;
+
+ /* check for further rregs leaving HLRs at this point */
+ rreg_lrs_db_next++;
+ }
+
+# if DEBUG_REGALLOC
+ vex_printf("After post-insn actions for fixed regs:\n");
+ PRINT_STATE;
+ vex_printf("\n");
+# endif
+
+ } /* iterate over insns */
+
+ /* ------ END: Process each insn in turn. ------ */
+
+ /* free(rreg_state); */
+ /* free(rreg_lrs); */
+ /* if (vreg_lrs) free(vreg_lrs); */
+
+ /* Paranoia */
+ for (j = 0; j < n_rregs; j++)
+ vassert(rreg_state[j].rreg == available_real_regs[j]);
+
+ vassert(rreg_lrs_la_next == rreg_lrs_used);
+ vassert(rreg_lrs_db_next == rreg_lrs_used);
+
+ return instrs_out;
+
+# undef INVALID_INSTRNO
+# undef EMIT_INSTR
+# undef PRINT_STATE
+}
+
+
+
+/*---------------------------------------------------------------*/
+/*--- host_reg_alloc2.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/host_generic_regs.c b/VEX/priv/host_generic_regs.c
new file mode 100644
index 0000000..e36b4dc
--- /dev/null
+++ b/VEX/priv/host_generic_regs.c
@@ -0,0 +1,223 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin host_generic_regs.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex.h"
+
+#include "main_util.h"
+#include "host_generic_regs.h"
+
+
+void ppHRegClass ( HRegClass hrc )
+{
+ switch (hrc) {
+ case HRcInt32: vex_printf("HRcInt32"); break;
+ case HRcInt64: vex_printf("HRcInt64"); break;
+ case HRcFlt32: vex_printf("HRcFlt32"); break;
+ case HRcFlt64: vex_printf("HRcFlt64"); break;
+ case HRcVec64: vex_printf("HRcVec64"); break;
+ case HRcVec128: vex_printf("HRcVec128"); break;
+ default: vpanic("ppHRegClass");
+ }
+}
+
+/* Generic printing for registers. */
+void ppHReg ( HReg r )
+{
+ HChar* maybe_v = hregIsVirtual(r) ? "v" : "";
+ Int regNo = hregNumber(r);
+ switch (hregClass(r)) {
+ case HRcInt32: vex_printf("%%%sr%d", maybe_v, regNo); return;
+ case HRcInt64: vex_printf("%%%sR%d", maybe_v, regNo); return;
+ case HRcFlt32: vex_printf("%%%sF%d", maybe_v, regNo); return;
+ case HRcFlt64: vex_printf("%%%sD%d", maybe_v, regNo); return;
+ case HRcVec64: vex_printf("%%%sv%d", maybe_v, regNo); return;
+ case HRcVec128: vex_printf("%%%sV%d", maybe_v, regNo); return;
+ default: vpanic("ppHReg");
+ }
+}
+
+
+/*---------------------------------------------------------*/
+/*--- Helpers for recording reg usage (for reg-alloc) ---*/
+/*---------------------------------------------------------*/
+
+void ppHRegUsage ( HRegUsage* tab )
+{
+ Int i;
+ HChar* str;
+ vex_printf("HRegUsage {\n");
+ for (i = 0; i < tab->n_used; i++) {
+ switch (tab->mode[i]) {
+ case HRmRead: str = "Read "; break;
+ case HRmWrite: str = "Write "; break;
+ case HRmModify: str = "Modify "; break;
+ default: vpanic("ppHRegUsage");
+ }
+ vex_printf(" %s ", str);
+ ppHReg(tab->hreg[i]);
+ vex_printf("\n");
+ }
+ vex_printf("}\n");
+}
+
+
+/* Add a register to a usage table. Combine incoming read uses with
+ existing write uses into a modify use, and vice versa. Do not
+ create duplicate entries -- each reg should only be mentioned once.
+*/
+void addHRegUse ( HRegUsage* tab, HRegMode mode, HReg reg )
+{
+ Int i;
+ /* Find it ... */
+ for (i = 0; i < tab->n_used; i++)
+ if (tab->hreg[i] == reg)
+ break;
+ if (i == tab->n_used) {
+ /* Not found, add new entry. */
+ vassert(tab->n_used < N_HREG_USAGE);
+ tab->hreg[tab->n_used] = reg;
+ tab->mode[tab->n_used] = mode;
+ tab->n_used++;
+ } else {
+ /* Found: combine or ignore. */
+ /* This is a greatest-lower-bound operation in the poset:
+
+ R W
+ \ /
+ M
+
+ Need to do: tab->mode[i] = GLB(tab->mode, mode). In this
+ case very simple -- if tab->mode[i] != mode then result must
+ be M.
+ */
+ if (tab->mode[i] == mode) {
+ /* duplicate, ignore */
+ } else {
+ tab->mode[i] = HRmModify;
+ }
+ }
+}
+
+
+/*---------------------------------------------------------*/
+/*--- Indicating register remappings (for reg-alloc) ---*/
+/*---------------------------------------------------------*/
+
+void ppHRegRemap ( HRegRemap* map )
+{
+ Int i;
+ vex_printf("HRegRemap {\n");
+ for (i = 0; i < map->n_used; i++) {
+ vex_printf(" ");
+ ppHReg(map->orig[i]);
+ vex_printf(" --> ");
+ ppHReg(map->replacement[i]);
+ vex_printf("\n");
+ }
+ vex_printf("}\n");
+}
+
+
+void initHRegRemap ( HRegRemap* map )
+{
+ map->n_used = 0;
+}
+
+
+void addToHRegRemap ( HRegRemap* map, HReg orig, HReg replacement )
+{
+ Int i;
+ for (i = 0; i < map->n_used; i++)
+ if (map->orig[i] == orig)
+ vpanic("addToHRegMap: duplicate entry");
+ if (!hregIsVirtual(orig))
+ vpanic("addToHRegMap: orig is not a vreg");
+ if (hregIsVirtual(replacement))
+ vpanic("addToHRegMap: replacement is not a vreg");
+
+ vassert(map->n_used+1 < N_HREG_REMAP);
+ map->orig[map->n_used] = orig;
+ map->replacement[map->n_used] = replacement;
+ map->n_used++;
+}
+
+
+HReg lookupHRegRemap ( HRegRemap* map, HReg orig )
+{
+ Int i;
+ if (!hregIsVirtual(orig))
+ return orig;
+ for (i = 0; i < map->n_used; i++)
+ if (map->orig[i] == orig)
+ return map->replacement[i];
+ vpanic("lookupHRegRemap: not found");
+}
+
+/*---------------------------------------------------------*/
+/*--- Abstract instructions ---*/
+/*---------------------------------------------------------*/
+
+HInstrArray* newHInstrArray ( void )
+{
+ HInstrArray* ha = LibVEX_Alloc(sizeof(HInstrArray));
+ ha->arr_size = 4;
+ ha->arr_used = 0;
+ ha->arr = LibVEX_Alloc(ha->arr_size * sizeof(HInstr*));
+ ha->n_vregs = 0;
+ return ha;
+}
+
+void addHInstr ( HInstrArray* ha, HInstr* instr )
+{
+ vassert(ha->arr_used <= ha->arr_size);
+ if (ha->arr_used < ha->arr_size) {
+ ha->arr[ha->arr_used] = instr;
+ ha->arr_used++;
+ } else {
+ Int i;
+ HInstr** arr2 = LibVEX_Alloc(ha->arr_size * 2 * sizeof(HInstr*));
+ for (i = 0; i < ha->arr_size; i++)
+ arr2[i] = ha->arr[i];
+ ha->arr_size *= 2;
+ ha->arr = arr2;
+ addHInstr(ha, instr);
+ }
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- end host_generic_regs.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/host_generic_regs.h b/VEX/priv/host_generic_regs.h
new file mode 100644
index 0000000..1c6826c
--- /dev/null
+++ b/VEX/priv/host_generic_regs.h
@@ -0,0 +1,281 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin host_generic_regs.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#ifndef __VEX_HOST_GENERIC_REGS_H
+#define __VEX_HOST_GENERIC_REGS_H
+
+#include "libvex_basictypes.h"
+
+
+/*---------------------------------------------------------*/
+/*--- Representing HOST REGISTERS ---*/
+/*---------------------------------------------------------*/
+
+/* Host registers. Stuff to represent:
+
+ - The register number
+ - The register class
+ - Whether or not the register is a virtual reg.
+
+ Registers are a 32-bit Int, thusly:
+
+ bits 31-28 are the register class.
+ bits 27-23 are 0000b for real register, 0001b for virtual register
+ bits 23-0 register number
+
+ Note (importantly) that by arranging that the class field is never
+ 0000b, any valid register looks like an extremely large int -- at
+ least 2^28 -- and so there is little chance of confusing it with an
+ integer array index in the register allocator.
+
+ Note further that since the class field is never 1111b, no valid
+ register can have the value INVALID_HREG.
+
+ There are currently 6 register classes:
+
+ int32 int64 float32 float64 simd64 simd128
+*/
+
+typedef UInt HReg;
+
+/* When extending this, do not use any value > 14 or < 0. */
+/* HRegClass describes host register classes which the instruction
+ selectors can speak about. We would not expect all of them to be
+ available on any specific host. For example on x86, the available
+ classes are: Int32, Flt64, Vec128 only.
+
+ IMPORTANT NOTE: host_generic_reg_alloc2.c needs how much space is
+ needed to spill each class of register. It allocates the following
+ amount of space:
+
+ HRcInt32 64 bits
+ HRcInt64 64 bits
+ HRcFlt32 64 bits
+ HRcFlt64 128 bits (on x86 these are spilled by fstpt/fldt and
+ so won't fit in a 64-bit slot)
+ HRcVec64 64 bits
+ HRcVec128 128 bits
+
+ If you add another regclass, you must remember to update
+ host_generic_reg_alloc2.c accordingly.
+*/
+typedef
+ enum {
+ HRcINVALID=1, /* NOT A VALID REGISTER CLASS */
+ HRcInt32=3, /* 32-bit int */
+ HRcInt64=4, /* 64-bit int */
+ HRcFlt32=5, /* 32-bit float */
+ HRcFlt64=6, /* 64-bit float */
+ HRcVec64=7, /* 64-bit SIMD */
+ HRcVec128=8 /* 128-bit SIMD */
+ }
+ HRegClass;
+
+extern void ppHRegClass ( HRegClass );
+
+
+/* Print an HReg in a generic (non-target-specific) way. */
+extern void ppHReg ( HReg );
+
+/* Construct/destruct. */
+static inline HReg mkHReg ( UInt regno, HRegClass rc, Bool virtual ) {
+ UInt r24 = regno & 0x00FFFFFF;
+ /* This is critical. The register number field may only
+ occupy 24 bits. */
+ if (r24 != regno)
+ vpanic("mkHReg: regno exceeds 2^24");
+ return regno | (((UInt)rc) << 28) | (virtual ? (1<<24) : 0);
+}
+
+static inline HRegClass hregClass ( HReg r ) {
+ UInt rc = r;
+ rc = (rc >> 28) & 0x0F;
+ vassert(rc >= HRcInt32 && rc <= HRcVec128);
+ return (HRegClass)rc;
+}
+
+static inline UInt hregNumber ( HReg r ) {
+ return ((UInt)r) & 0x00FFFFFF;
+}
+
+static inline Bool hregIsVirtual ( HReg r ) {
+ return toBool(((UInt)r) & (1<<24));
+}
+
+
+
+
+#define INVALID_HREG ((HReg)0xFFFFFFFF)
+
+
+/*---------------------------------------------------------*/
+/*--- Recording register usage (for reg-alloc) ---*/
+/*---------------------------------------------------------*/
+
+typedef
+ enum { HRmRead, HRmWrite, HRmModify }
+ HRegMode;
+
+
+/* A struct for recording the usage of registers in instructions.
+ This can get quite large, but we don't expect to allocate them
+ dynamically, so there's no problem.
+*/
+#define N_HREG_USAGE 25
+
+typedef
+ struct {
+ HReg hreg[N_HREG_USAGE];
+ HRegMode mode[N_HREG_USAGE];
+ Int n_used;
+ }
+ HRegUsage;
+
+extern void ppHRegUsage ( HRegUsage* );
+
+static inline void initHRegUsage ( HRegUsage* tab ) {
+ tab->n_used = 0;
+}
+
+/* Add a register to a usage table. Combine incoming read uses with
+ existing write uses into a modify use, and vice versa. Do not
+ create duplicate entries -- each reg should only be mentioned once.
+*/
+extern void addHRegUse ( HRegUsage*, HRegMode, HReg );
+
+
+
+/*---------------------------------------------------------*/
+/*--- Indicating register remappings (for reg-alloc) ---*/
+/*---------------------------------------------------------*/
+
+/* Note that such maps can only map virtual regs to real regs.
+ addToHRegRenap will barf if given a pair not of that form. As a
+ result, no valid HRegRemap will bind a real reg to anything, and so
+ if lookupHRegMap is given a real reg, it returns it unchanged.
+ This is precisely the behaviour that the register allocator needs
+ to impose its decisions on the instructions it processes. */
+
+#define N_HREG_REMAP 5
+
+typedef
+ struct {
+ HReg orig [N_HREG_REMAP];
+ HReg replacement[N_HREG_REMAP];
+ Int n_used;
+ }
+ HRegRemap;
+
+extern void ppHRegRemap ( HRegRemap* );
+extern void initHRegRemap ( HRegRemap* );
+extern void addToHRegRemap ( HRegRemap*, HReg, HReg );
+extern HReg lookupHRegRemap ( HRegRemap*, HReg );
+
+
+/*---------------------------------------------------------*/
+/*--- Abstract instructions ---*/
+/*---------------------------------------------------------*/
+
+/* A type is needed to refer to pointers to instructions of any
+ target. Defining it like this means that HInstr* can stand in for
+ X86Instr*, ArmInstr*, etc. */
+
+typedef void HInstr;
+
+
+/* An expandable array of HInstr*'s. Handy for insn selection and
+ register allocation. n_vregs indicates the number of virtual
+ registers mentioned in the code, something that reg-alloc needs to
+ know. These are required to be numbered 0 .. n_vregs-1.
+*/
+typedef
+ struct {
+ HInstr** arr;
+ Int arr_size;
+ Int arr_used;
+ Int n_vregs;
+ }
+ HInstrArray;
+
+extern HInstrArray* newHInstrArray ( void );
+extern void addHInstr ( HInstrArray*, HInstr* );
+
+
+/*---------------------------------------------------------*/
+/*--- Reg alloc: TODO: move somewhere else ---*/
+/*---------------------------------------------------------*/
+
+extern
+HInstrArray* doRegisterAllocation (
+
+ /* Incoming virtual-registerised code. */
+ HInstrArray* instrs_in,
+
+ /* An array listing all the real registers the allocator may use,
+ in no particular order. */
+ HReg* available_real_regs,
+ Int n_available_real_regs,
+
+ /* Return True iff the given insn is a reg-reg move, in which
+ case also return the src and dst regs. */
+ Bool (*isMove) (HInstr*, HReg*, HReg*),
+
+ /* Get info about register usage in this insn. */
+ void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
+
+ /* Apply a reg-reg mapping to an insn. */
+ void (*mapRegs) (HRegRemap*, HInstr*, Bool),
+
+ /* Return insn(s) to spill/restore a real reg to a spill slot
+ offset. And optionally a function to do direct reloads. */
+ void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ),
+ void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
+ HInstr* (*directReload) ( HInstr*, HReg, Short ),
+ Int guest_sizeB,
+
+ /* For debug printing only. */
+ void (*ppInstr) ( HInstr*, Bool ),
+ void (*ppReg) ( HReg ),
+
+ /* 32/64bit mode */
+ Bool mode64
+);
+
+
+#endif /* ndef __VEX_HOST_GENERIC_REGS_H */
+
+/*---------------------------------------------------------------*/
+/*--- host_generic_regs.h ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/host_generic_simd128.c b/VEX/priv/host_generic_simd128.c
new file mode 100644
index 0000000..8ed5166
--- /dev/null
+++ b/VEX/priv/host_generic_simd128.c
@@ -0,0 +1,220 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin host_generic_simd128.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2010-2010 OpenWorks GbR
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Generic helper functions for doing 128-bit SIMD arithmetic in cases
+ where the instruction selectors cannot generate code in-line.
+ These are purely back-end entities and cannot be seen/referenced
+ from IR. */
+
+#include "libvex_basictypes.h"
+#include "host_generic_simd128.h"
+
+
+/* Primitive helpers always take args of the real type (signed vs
+ unsigned) but return an unsigned result, so there's no conversion
+ weirdness when stuffing results back in the V128 union fields,
+ which are all unsigned. */
+
+static inline UInt mul32 ( Int xx, Int yy )
+{
+ Int t = ((Int)xx) * ((Int)yy);
+ return toUInt(t);
+}
+
+static inline UInt max32S ( Int xx, Int yy )
+{
+ return toUInt((xx > yy) ? xx : yy);
+}
+
+static inline UInt min32S ( Int xx, Int yy )
+{
+ return toUInt((xx < yy) ? xx : yy);
+}
+
+static inline UInt max32U ( UInt xx, UInt yy )
+{
+ return toUInt((xx > yy) ? xx : yy);
+}
+
+static inline UInt min32U ( UInt xx, UInt yy )
+{
+ return toUInt((xx < yy) ? xx : yy);
+}
+
+static inline UShort max16U ( UShort xx, UShort yy )
+{
+ return toUShort((xx > yy) ? xx : yy);
+}
+
+static inline UShort min16U ( UShort xx, UShort yy )
+{
+ return toUShort((xx < yy) ? xx : yy);
+}
+
+static inline UChar max8S ( Char xx, Char yy )
+{
+ return toUChar((xx > yy) ? xx : yy);
+}
+
+static inline UChar min8S ( Char xx, Char yy )
+{
+ return toUChar((xx < yy) ? xx : yy);
+}
+
+static inline ULong cmpGT64S ( Long xx, Long yy )
+{
+ return (((Long)xx) > ((Long)yy))
+ ? 0xFFFFFFFFFFFFFFFFULL : 0ULL;
+}
+
+void h_generic_calc_Mul32x4 ( /*OUT*/V128* res,
+ V128* argL, V128* argR )
+{
+ res->w32[0] = mul32(argL->w32[0], argR->w32[0]);
+ res->w32[1] = mul32(argL->w32[1], argR->w32[1]);
+ res->w32[2] = mul32(argL->w32[2], argR->w32[2]);
+ res->w32[3] = mul32(argL->w32[3], argR->w32[3]);
+}
+
+void h_generic_calc_Max32Sx4 ( /*OUT*/V128* res,
+ V128* argL, V128* argR )
+{
+ res->w32[0] = max32S(argL->w32[0], argR->w32[0]);
+ res->w32[1] = max32S(argL->w32[1], argR->w32[1]);
+ res->w32[2] = max32S(argL->w32[2], argR->w32[2]);
+ res->w32[3] = max32S(argL->w32[3], argR->w32[3]);
+}
+
+void h_generic_calc_Min32Sx4 ( /*OUT*/V128* res,
+ V128* argL, V128* argR )
+{
+ res->w32[0] = min32S(argL->w32[0], argR->w32[0]);
+ res->w32[1] = min32S(argL->w32[1], argR->w32[1]);
+ res->w32[2] = min32S(argL->w32[2], argR->w32[2]);
+ res->w32[3] = min32S(argL->w32[3], argR->w32[3]);
+}
+
+void h_generic_calc_Max32Ux4 ( /*OUT*/V128* res,
+ V128* argL, V128* argR )
+{
+ res->w32[0] = max32U(argL->w32[0], argR->w32[0]);
+ res->w32[1] = max32U(argL->w32[1], argR->w32[1]);
+ res->w32[2] = max32U(argL->w32[2], argR->w32[2]);
+ res->w32[3] = max32U(argL->w32[3], argR->w32[3]);
+}
+
+void h_generic_calc_Min32Ux4 ( /*OUT*/V128* res,
+ V128* argL, V128* argR )
+{
+ res->w32[0] = min32U(argL->w32[0], argR->w32[0]);
+ res->w32[1] = min32U(argL->w32[1], argR->w32[1]);
+ res->w32[2] = min32U(argL->w32[2], argR->w32[2]);
+ res->w32[3] = min32U(argL->w32[3], argR->w32[3]);
+}
+
+void h_generic_calc_Max16Ux8 ( /*OUT*/V128* res,
+ V128* argL, V128* argR )
+{
+ res->w16[0] = max16U(argL->w16[0], argR->w16[0]);
+ res->w16[1] = max16U(argL->w16[1], argR->w16[1]);
+ res->w16[2] = max16U(argL->w16[2], argR->w16[2]);
+ res->w16[3] = max16U(argL->w16[3], argR->w16[3]);
+ res->w16[4] = max16U(argL->w16[4], argR->w16[4]);
+ res->w16[5] = max16U(argL->w16[5], argR->w16[5]);
+ res->w16[6] = max16U(argL->w16[6], argR->w16[6]);
+ res->w16[7] = max16U(argL->w16[7], argR->w16[7]);
+}
+
+void h_generic_calc_Min16Ux8 ( /*OUT*/V128* res,
+ V128* argL, V128* argR )
+{
+ res->w16[0] = min16U(argL->w16[0], argR->w16[0]);
+ res->w16[1] = min16U(argL->w16[1], argR->w16[1]);
+ res->w16[2] = min16U(argL->w16[2], argR->w16[2]);
+ res->w16[3] = min16U(argL->w16[3], argR->w16[3]);
+ res->w16[4] = min16U(argL->w16[4], argR->w16[4]);
+ res->w16[5] = min16U(argL->w16[5], argR->w16[5]);
+ res->w16[6] = min16U(argL->w16[6], argR->w16[6]);
+ res->w16[7] = min16U(argL->w16[7], argR->w16[7]);
+}
+
+void h_generic_calc_Max8Sx16 ( /*OUT*/V128* res,
+ V128* argL, V128* argR )
+{
+ res->w8[ 0] = max8S(argL->w8[ 0], argR->w8[ 0]);
+ res->w8[ 1] = max8S(argL->w8[ 1], argR->w8[ 1]);
+ res->w8[ 2] = max8S(argL->w8[ 2], argR->w8[ 2]);
+ res->w8[ 3] = max8S(argL->w8[ 3], argR->w8[ 3]);
+ res->w8[ 4] = max8S(argL->w8[ 4], argR->w8[ 4]);
+ res->w8[ 5] = max8S(argL->w8[ 5], argR->w8[ 5]);
+ res->w8[ 6] = max8S(argL->w8[ 6], argR->w8[ 6]);
+ res->w8[ 7] = max8S(argL->w8[ 7], argR->w8[ 7]);
+ res->w8[ 8] = max8S(argL->w8[ 8], argR->w8[ 8]);
+ res->w8[ 9] = max8S(argL->w8[ 9], argR->w8[ 9]);
+ res->w8[10] = max8S(argL->w8[10], argR->w8[10]);
+ res->w8[11] = max8S(argL->w8[11], argR->w8[11]);
+ res->w8[12] = max8S(argL->w8[12], argR->w8[12]);
+ res->w8[13] = max8S(argL->w8[13], argR->w8[13]);
+ res->w8[14] = max8S(argL->w8[14], argR->w8[14]);
+ res->w8[15] = max8S(argL->w8[15], argR->w8[15]);
+}
+
+void h_generic_calc_Min8Sx16 ( /*OUT*/V128* res,
+ V128* argL, V128* argR )
+{
+ res->w8[ 0] = min8S(argL->w8[ 0], argR->w8[ 0]);
+ res->w8[ 1] = min8S(argL->w8[ 1], argR->w8[ 1]);
+ res->w8[ 2] = min8S(argL->w8[ 2], argR->w8[ 2]);
+ res->w8[ 3] = min8S(argL->w8[ 3], argR->w8[ 3]);
+ res->w8[ 4] = min8S(argL->w8[ 4], argR->w8[ 4]);
+ res->w8[ 5] = min8S(argL->w8[ 5], argR->w8[ 5]);
+ res->w8[ 6] = min8S(argL->w8[ 6], argR->w8[ 6]);
+ res->w8[ 7] = min8S(argL->w8[ 7], argR->w8[ 7]);
+ res->w8[ 8] = min8S(argL->w8[ 8], argR->w8[ 8]);
+ res->w8[ 9] = min8S(argL->w8[ 9], argR->w8[ 9]);
+ res->w8[10] = min8S(argL->w8[10], argR->w8[10]);
+ res->w8[11] = min8S(argL->w8[11], argR->w8[11]);
+ res->w8[12] = min8S(argL->w8[12], argR->w8[12]);
+ res->w8[13] = min8S(argL->w8[13], argR->w8[13]);
+ res->w8[14] = min8S(argL->w8[14], argR->w8[14]);
+ res->w8[15] = min8S(argL->w8[15], argR->w8[15]);
+}
+
+void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128* res,
+ V128* argL, V128* argR )
+{
+ res->w64[0] = cmpGT64S(argL->w64[0], argR->w64[0]);
+ res->w64[1] = cmpGT64S(argL->w64[1], argR->w64[1]);
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- end host_generic_simd128.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/host_generic_simd128.h b/VEX/priv/host_generic_simd128.h
new file mode 100644
index 0000000..53850cb
--- /dev/null
+++ b/VEX/priv/host_generic_simd128.h
@@ -0,0 +1,67 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin host_generic_simd128.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2010-2010 OpenWorks GbR
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Generic helper functions for doing 128-bit SIMD arithmetic in cases
+ where the instruction selectors cannot generate code in-line.
+ These are purely back-end entities and cannot be seen/referenced
+ as clean helper functions from IR.
+
+ These will get called from generated code and therefore should be
+ well behaved -- no floating point or mmx insns, just straight
+ integer code.
+
+ Each function implements the correspondingly-named IR primop.
+*/
+
+#ifndef __VEX_HOST_GENERIC_SIMD128_H
+#define __VEX_HOST_GENERIC_SIMD128_H
+
+#include "libvex_basictypes.h"
+
+/* DO NOT MAKE THESE INTO REGPARM FNS! THIS WILL BREAK CALLING
+ SEQUENCES GENERATED BY host-x86/isel.c. */
+
+extern void h_generic_calc_Mul32x4 ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Max32Sx4 ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Min32Sx4 ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Max32Ux4 ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Min32Ux4 ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Max16Ux8 ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Min16Ux8 ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Max8Sx16 ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Min8Sx16 ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128*, V128*, V128* );
+
+
+#endif /* ndef __VEX_HOST_GENERIC_SIMD128_H */
+
+/*---------------------------------------------------------------*/
+/*--- end host_generic_simd128.h ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/host_generic_simd64.c b/VEX/priv/host_generic_simd64.c
new file mode 100644
index 0000000..03d6d2f
--- /dev/null
+++ b/VEX/priv/host_generic_simd64.c
@@ -0,0 +1,1337 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin host_generic_simd64.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+/* Generic helper functions for doing 64-bit SIMD arithmetic in cases
+ where the instruction selectors cannot generate code in-line.
+ These are purely back-end entities and cannot be seen/referenced
+ from IR. */
+
+#include "libvex_basictypes.h"
+#include "host_generic_simd64.h"
+
+
+
+/* Tuple/select functions for 32x2 vectors. */
+
+static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
+ return (((ULong)w1) << 32) | ((ULong)w0);
+}
+
+static inline UInt sel32x2_1 ( ULong w64 ) {
+ return 0xFFFFFFFF & toUInt(w64 >> 32);
+}
+static inline UInt sel32x2_0 ( ULong w64 ) {
+ return 0xFFFFFFFF & toUInt(w64);
+}
+
+
+/* Tuple/select functions for 16x4 vectors. gcc is pretty hopeless
+ with 64-bit shifts so we give it a hand. */
+
+static inline ULong mk16x4 ( UShort w3, UShort w2,
+ UShort w1, UShort w0 ) {
+ UInt hi32 = (((UInt)w3) << 16) | ((UInt)w2);
+ UInt lo32 = (((UInt)w1) << 16) | ((UInt)w0);
+ return mk32x2(hi32, lo32);
+}
+
+static inline UShort sel16x4_3 ( ULong w64 ) {
+ UInt hi32 = toUInt(w64 >> 32);
+ return toUShort(0xFFFF & (hi32 >> 16));
+}
+static inline UShort sel16x4_2 ( ULong w64 ) {
+ UInt hi32 = toUInt(w64 >> 32);
+ return toUShort(0xFFFF & hi32);
+}
+static inline UShort sel16x4_1 ( ULong w64 ) {
+ UInt lo32 = (UInt)w64;
+ return toUShort(0xFFFF & (lo32 >> 16));
+}
+static inline UShort sel16x4_0 ( ULong w64 ) {
+ UInt lo32 = (UInt)w64;
+ return toUShort(0xFFFF & lo32);
+}
+
+
+/* Tuple/select functions for 8x8 vectors. */
+
+static inline ULong mk8x8 ( UChar w7, UChar w6,
+ UChar w5, UChar w4,
+ UChar w3, UChar w2,
+ UChar w1, UChar w0 ) {
+ UInt hi32 = (((UInt)w7) << 24) | (((UInt)w6) << 16)
+ | (((UInt)w5) << 8) | (((UInt)w4) << 0);
+ UInt lo32 = (((UInt)w3) << 24) | (((UInt)w2) << 16)
+ | (((UInt)w1) << 8) | (((UInt)w0) << 0);
+ return mk32x2(hi32, lo32);
+}
+
+static inline UChar sel8x8_7 ( ULong w64 ) {
+ UInt hi32 = toUInt(w64 >> 32);
+ return toUChar(0xFF & (hi32 >> 24));
+}
+static inline UChar sel8x8_6 ( ULong w64 ) {
+ UInt hi32 = toUInt(w64 >> 32);
+ return toUChar(0xFF & (hi32 >> 16));
+}
+static inline UChar sel8x8_5 ( ULong w64 ) {
+ UInt hi32 = toUInt(w64 >> 32);
+ return toUChar(0xFF & (hi32 >> 8));
+}
+static inline UChar sel8x8_4 ( ULong w64 ) {
+ UInt hi32 = toUInt(w64 >> 32);
+ return toUChar(0xFF & (hi32 >> 0));
+}
+static inline UChar sel8x8_3 ( ULong w64 ) {
+ UInt lo32 = (UInt)w64;
+ return toUChar(0xFF & (lo32 >> 24));
+}
+static inline UChar sel8x8_2 ( ULong w64 ) {
+ UInt lo32 = (UInt)w64;
+ return toUChar(0xFF & (lo32 >> 16));
+}
+static inline UChar sel8x8_1 ( ULong w64 ) {
+ UInt lo32 = (UInt)w64;
+ return toUChar(0xFF & (lo32 >> 8));
+}
+static inline UChar sel8x8_0 ( ULong w64 ) {
+ UInt lo32 = (UInt)w64;
+ return toUChar(0xFF & (lo32 >> 0));
+}
+
+static inline UChar index8x8 ( ULong w64, UChar ix ) {
+ ix &= 7;
+ return toUChar((w64 >> (8*ix)) & 0xFF);
+}
+
+
+/* Scalar helpers. */
+
+static inline Short qadd16S ( Short xx, Short yy )
+{
+ Int t = ((Int)xx) + ((Int)yy);
+ if (t < -32768) t = -32768;
+ if (t > 32767) t = 32767;
+ return (Short)t;
+}
+
+static inline Char qadd8S ( Char xx, Char yy )
+{
+ Int t = ((Int)xx) + ((Int)yy);
+ if (t < -128) t = -128;
+ if (t > 127) t = 127;
+ return (Char)t;
+}
+
+static inline UShort qadd16U ( UShort xx, UShort yy )
+{
+ UInt t = ((UInt)xx) + ((UInt)yy);
+ if (t > 0xFFFF) t = 0xFFFF;
+ return (UShort)t;
+}
+
+static inline UChar qadd8U ( UChar xx, UChar yy )
+{
+ UInt t = ((UInt)xx) + ((UInt)yy);
+ if (t > 0xFF) t = 0xFF;
+ return (UChar)t;
+}
+
+static inline Short qsub16S ( Short xx, Short yy )
+{
+ Int t = ((Int)xx) - ((Int)yy);
+ if (t < -32768) t = -32768;
+ if (t > 32767) t = 32767;
+ return (Short)t;
+}
+
+static inline Char qsub8S ( Char xx, Char yy )
+{
+ Int t = ((Int)xx) - ((Int)yy);
+ if (t < -128) t = -128;
+ if (t > 127) t = 127;
+ return (Char)t;
+}
+
+static inline UShort qsub16U ( UShort xx, UShort yy )
+{
+ Int t = ((Int)xx) - ((Int)yy);
+ if (t < 0) t = 0;
+ if (t > 0xFFFF) t = 0xFFFF;
+ return (UShort)t;
+}
+
+static inline UChar qsub8U ( UChar xx, UChar yy )
+{
+ Int t = ((Int)xx) - ((Int)yy);
+ if (t < 0) t = 0;
+ if (t > 0xFF) t = 0xFF;
+ return (UChar)t;
+}
+
+static inline Short mul16 ( Short xx, Short yy )
+{
+ Int t = ((Int)xx) * ((Int)yy);
+ return (Short)t;
+}
+
+static inline Int mul32 ( Int xx, Int yy )
+{
+ Int t = ((Int)xx) * ((Int)yy);
+ return (Int)t;
+}
+
+static inline Short mulhi16S ( Short xx, Short yy )
+{
+ Int t = ((Int)xx) * ((Int)yy);
+ t >>=/*s*/ 16;
+ return (Short)t;
+}
+
+static inline UShort mulhi16U ( UShort xx, UShort yy )
+{
+ UInt t = ((UInt)xx) * ((UInt)yy);
+ t >>=/*u*/ 16;
+ return (UShort)t;
+}
+
+static inline UInt cmpeq32 ( UInt xx, UInt yy )
+{
+ return xx==yy ? 0xFFFFFFFF : 0;
+}
+
+static inline UShort cmpeq16 ( UShort xx, UShort yy )
+{
+ return toUShort(xx==yy ? 0xFFFF : 0);
+}
+
+static inline UChar cmpeq8 ( UChar xx, UChar yy )
+{
+ return toUChar(xx==yy ? 0xFF : 0);
+}
+
+static inline UInt cmpgt32S ( Int xx, Int yy )
+{
+ return xx>yy ? 0xFFFFFFFF : 0;
+}
+
+static inline UShort cmpgt16S ( Short xx, Short yy )
+{
+ return toUShort(xx>yy ? 0xFFFF : 0);
+}
+
+static inline UChar cmpgt8S ( Char xx, Char yy )
+{
+ return toUChar(xx>yy ? 0xFF : 0);
+}
+
+static inline UInt cmpnez32 ( UInt xx )
+{
+ return xx==0 ? 0 : 0xFFFFFFFF;
+}
+
+static inline UShort cmpnez16 ( UShort xx )
+{
+ return toUShort(xx==0 ? 0 : 0xFFFF);
+}
+
+static inline UChar cmpnez8 ( UChar xx )
+{
+ return toUChar(xx==0 ? 0 : 0xFF);
+}
+
+static inline Short qnarrow32Sto16 ( UInt xx0 )
+{
+ Int xx = (Int)xx0;
+ if (xx < -32768) xx = -32768;
+ if (xx > 32767) xx = 32767;
+ return (Short)xx;
+}
+
+static inline Char qnarrow16Sto8 ( UShort xx0 )
+{
+ Short xx = (Short)xx0;
+ if (xx < -128) xx = -128;
+ if (xx > 127) xx = 127;
+ return (Char)xx;
+}
+
+static inline UChar qnarrow16Uto8 ( UShort xx0 )
+{
+ Short xx = (Short)xx0;
+ if (xx < 0) xx = 0;
+ if (xx > 255) xx = 255;
+ return (UChar)xx;
+}
+
+/* shifts: we don't care about out-of-range ones, since
+ that is dealt with at a higher level. */
+
+static inline UChar shl8 ( UChar v, UInt n )
+{
+ return toUChar(v << n);
+}
+
+static inline UChar sar8 ( UChar v, UInt n )
+{
+ return toUChar(((Char)v) >> n);
+}
+
+static inline UShort shl16 ( UShort v, UInt n )
+{
+ return toUShort(v << n);
+}
+
+static inline UShort shr16 ( UShort v, UInt n )
+{
+ return toUShort((((UShort)v) >> n));
+}
+
+static inline UShort sar16 ( UShort v, UInt n )
+{
+ return toUShort(((Short)v) >> n);
+}
+
+static inline UInt shl32 ( UInt v, UInt n )
+{
+ return v << n;
+}
+
+static inline UInt shr32 ( UInt v, UInt n )
+{
+ return (((UInt)v) >> n);
+}
+
+static inline UInt sar32 ( UInt v, UInt n )
+{
+ return ((Int)v) >> n;
+}
+
+static inline UChar avg8U ( UChar xx, UChar yy )
+{
+ UInt xxi = (UInt)xx;
+ UInt yyi = (UInt)yy;
+ UInt r = (xxi + yyi + 1) >> 1;
+ return (UChar)r;
+}
+
+static inline UShort avg16U ( UShort xx, UShort yy )
+{
+ UInt xxi = (UInt)xx;
+ UInt yyi = (UInt)yy;
+ UInt r = (xxi + yyi + 1) >> 1;
+ return (UShort)r;
+}
+
+static inline Short max16S ( Short xx, Short yy )
+{
+ return toUShort((xx > yy) ? xx : yy);
+}
+
+static inline UChar max8U ( UChar xx, UChar yy )
+{
+ return toUChar((xx > yy) ? xx : yy);
+}
+
+static inline Short min16S ( Short xx, Short yy )
+{
+ return toUShort((xx < yy) ? xx : yy);
+}
+
+static inline UChar min8U ( UChar xx, UChar yy )
+{
+ return toUChar((xx < yy) ? xx : yy);
+}
+
+static inline UShort hadd16U ( UShort xx, UShort yy )
+{
+ UInt xxi = (UInt)xx;
+ UInt yyi = (UInt)yy;
+ UInt r = (xxi + yyi) >> 1;
+ return (UShort)r;
+}
+
+static inline Short hadd16S ( Short xx, Short yy )
+{
+ Int xxi = (Int)xx;
+ Int yyi = (Int)yy;
+ Int r = (xxi + yyi) >> 1;
+ return (Short)r;
+}
+
+static inline UShort hsub16U ( UShort xx, UShort yy )
+{
+ UInt xxi = (UInt)xx;
+ UInt yyi = (UInt)yy;
+ UInt r = (xxi - yyi) >> 1;
+ return (UShort)r;
+}
+
+static inline Short hsub16S ( Short xx, Short yy )
+{
+ Int xxi = (Int)xx;
+ Int yyi = (Int)yy;
+ Int r = (xxi - yyi) >> 1;
+ return (Short)r;
+}
+
+static inline UChar hadd8U ( UChar xx, UChar yy )
+{
+ UInt xxi = (UInt)xx;
+ UInt yyi = (UInt)yy;
+ UInt r = (xxi + yyi) >> 1;
+ return (UChar)r;
+}
+
+static inline Char hadd8S ( Char xx, Char yy )
+{
+ Int xxi = (Int)xx;
+ Int yyi = (Int)yy;
+ Int r = (xxi + yyi) >> 1;
+ return (Char)r;
+}
+
+static inline UChar hsub8U ( UChar xx, UChar yy )
+{
+ UInt xxi = (UInt)xx;
+ UInt yyi = (UInt)yy;
+ UInt r = (xxi - yyi) >> 1;
+ return (UChar)r;
+}
+
+static inline Char hsub8S ( Char xx, Char yy )
+{
+ Int xxi = (Int)xx;
+ Int yyi = (Int)yy;
+ Int r = (xxi - yyi) >> 1;
+ return (Char)r;
+}
+
+static inline UInt absdiff8U ( UChar xx, UChar yy )
+{
+ UInt xxu = (UChar)xx;
+ UInt yyu = (UChar)yy;
+ return xxu >= yyu ? xxu - yyu : yyu - xxu;
+}
+
+/* ----------------------------------------------------- */
+/* Start of the externally visible functions. These simply
+ implement the corresponding IR primops. */
+/* ----------------------------------------------------- */
+
+/* ------------ Normal addition ------------ */
+
+ULong h_generic_calc_Add32x2 ( ULong xx, ULong yy )
+{
+ return mk32x2(
+ sel32x2_1(xx) + sel32x2_1(yy),
+ sel32x2_0(xx) + sel32x2_0(yy)
+ );
+}
+
+ULong h_generic_calc_Add16x4 ( ULong xx, ULong yy )
+{
+ return mk16x4(
+ toUShort( sel16x4_3(xx) + sel16x4_3(yy) ),
+ toUShort( sel16x4_2(xx) + sel16x4_2(yy) ),
+ toUShort( sel16x4_1(xx) + sel16x4_1(yy) ),
+ toUShort( sel16x4_0(xx) + sel16x4_0(yy) )
+ );
+}
+
+ULong h_generic_calc_Add8x8 ( ULong xx, ULong yy )
+{
+ return mk8x8(
+ toUChar( sel8x8_7(xx) + sel8x8_7(yy) ),
+ toUChar( sel8x8_6(xx) + sel8x8_6(yy) ),
+ toUChar( sel8x8_5(xx) + sel8x8_5(yy) ),
+ toUChar( sel8x8_4(xx) + sel8x8_4(yy) ),
+ toUChar( sel8x8_3(xx) + sel8x8_3(yy) ),
+ toUChar( sel8x8_2(xx) + sel8x8_2(yy) ),
+ toUChar( sel8x8_1(xx) + sel8x8_1(yy) ),
+ toUChar( sel8x8_0(xx) + sel8x8_0(yy) )
+ );
+}
+
+/* ------------ Saturating addition ------------ */
+
+ULong h_generic_calc_QAdd16Sx4 ( ULong xx, ULong yy )
+{
+ return mk16x4(
+ qadd16S( sel16x4_3(xx), sel16x4_3(yy) ),
+ qadd16S( sel16x4_2(xx), sel16x4_2(yy) ),
+ qadd16S( sel16x4_1(xx), sel16x4_1(yy) ),
+ qadd16S( sel16x4_0(xx), sel16x4_0(yy) )
+ );
+}
+
+ULong h_generic_calc_QAdd8Sx8 ( ULong xx, ULong yy )
+{
+ return mk8x8(
+ qadd8S( sel8x8_7(xx), sel8x8_7(yy) ),
+ qadd8S( sel8x8_6(xx), sel8x8_6(yy) ),
+ qadd8S( sel8x8_5(xx), sel8x8_5(yy) ),
+ qadd8S( sel8x8_4(xx), sel8x8_4(yy) ),
+ qadd8S( sel8x8_3(xx), sel8x8_3(yy) ),
+ qadd8S( sel8x8_2(xx), sel8x8_2(yy) ),
+ qadd8S( sel8x8_1(xx), sel8x8_1(yy) ),
+ qadd8S( sel8x8_0(xx), sel8x8_0(yy) )
+ );
+}
+
+ULong h_generic_calc_QAdd16Ux4 ( ULong xx, ULong yy )
+{
+ return mk16x4(
+ qadd16U( sel16x4_3(xx), sel16x4_3(yy) ),
+ qadd16U( sel16x4_2(xx), sel16x4_2(yy) ),
+ qadd16U( sel16x4_1(xx), sel16x4_1(yy) ),
+ qadd16U( sel16x4_0(xx), sel16x4_0(yy) )
+ );
+}
+
+ULong h_generic_calc_QAdd8Ux8 ( ULong xx, ULong yy )
+{
+ return mk8x8(
+ qadd8U( sel8x8_7(xx), sel8x8_7(yy) ),
+ qadd8U( sel8x8_6(xx), sel8x8_6(yy) ),
+ qadd8U( sel8x8_5(xx), sel8x8_5(yy) ),
+ qadd8U( sel8x8_4(xx), sel8x8_4(yy) ),
+ qadd8U( sel8x8_3(xx), sel8x8_3(yy) ),
+ qadd8U( sel8x8_2(xx), sel8x8_2(yy) ),
+ qadd8U( sel8x8_1(xx), sel8x8_1(yy) ),
+ qadd8U( sel8x8_0(xx), sel8x8_0(yy) )
+ );
+}
+
+/* ------------ Normal subtraction ------------ */
+
+ULong h_generic_calc_Sub32x2 ( ULong xx, ULong yy )
+{
+ return mk32x2(
+ sel32x2_1(xx) - sel32x2_1(yy),
+ sel32x2_0(xx) - sel32x2_0(yy)
+ );
+}
+
+ULong h_generic_calc_Sub16x4 ( ULong xx, ULong yy )
+{
+ return mk16x4(
+ toUShort( sel16x4_3(xx) - sel16x4_3(yy) ),
+ toUShort( sel16x4_2(xx) - sel16x4_2(yy) ),
+ toUShort( sel16x4_1(xx) - sel16x4_1(yy) ),
+ toUShort( sel16x4_0(xx) - sel16x4_0(yy) )
+ );
+}
+
+ULong h_generic_calc_Sub8x8 ( ULong xx, ULong yy )
+{
+ return mk8x8(
+ toUChar( sel8x8_7(xx) - sel8x8_7(yy) ),
+ toUChar( sel8x8_6(xx) - sel8x8_6(yy) ),
+ toUChar( sel8x8_5(xx) - sel8x8_5(yy) ),
+ toUChar( sel8x8_4(xx) - sel8x8_4(yy) ),
+ toUChar( sel8x8_3(xx) - sel8x8_3(yy) ),
+ toUChar( sel8x8_2(xx) - sel8x8_2(yy) ),
+ toUChar( sel8x8_1(xx) - sel8x8_1(yy) ),
+ toUChar( sel8x8_0(xx) - sel8x8_0(yy) )
+ );
+}
+
+/* ------------ Saturating subtraction ------------ */
+
+ULong h_generic_calc_QSub16Sx4 ( ULong xx, ULong yy )
+{
+ return mk16x4(
+ qsub16S( sel16x4_3(xx), sel16x4_3(yy) ),
+ qsub16S( sel16x4_2(xx), sel16x4_2(yy) ),
+ qsub16S( sel16x4_1(xx), sel16x4_1(yy) ),
+ qsub16S( sel16x4_0(xx), sel16x4_0(yy) )
+ );
+}
+
+ULong h_generic_calc_QSub8Sx8 ( ULong xx, ULong yy )
+{
+ return mk8x8(
+ qsub8S( sel8x8_7(xx), sel8x8_7(yy) ),
+ qsub8S( sel8x8_6(xx), sel8x8_6(yy) ),
+ qsub8S( sel8x8_5(xx), sel8x8_5(yy) ),
+ qsub8S( sel8x8_4(xx), sel8x8_4(yy) ),
+ qsub8S( sel8x8_3(xx), sel8x8_3(yy) ),
+ qsub8S( sel8x8_2(xx), sel8x8_2(yy) ),
+ qsub8S( sel8x8_1(xx), sel8x8_1(yy) ),
+ qsub8S( sel8x8_0(xx), sel8x8_0(yy) )
+ );
+}
+
+ULong h_generic_calc_QSub16Ux4 ( ULong xx, ULong yy )
+{
+ return mk16x4(
+ qsub16U( sel16x4_3(xx), sel16x4_3(yy) ),
+ qsub16U( sel16x4_2(xx), sel16x4_2(yy) ),
+ qsub16U( sel16x4_1(xx), sel16x4_1(yy) ),
+ qsub16U( sel16x4_0(xx), sel16x4_0(yy) )
+ );
+}
+
+ULong h_generic_calc_QSub8Ux8 ( ULong xx, ULong yy )
+{
+ return mk8x8(
+ qsub8U( sel8x8_7(xx), sel8x8_7(yy) ),
+ qsub8U( sel8x8_6(xx), sel8x8_6(yy) ),
+ qsub8U( sel8x8_5(xx), sel8x8_5(yy) ),
+ qsub8U( sel8x8_4(xx), sel8x8_4(yy) ),
+ qsub8U( sel8x8_3(xx), sel8x8_3(yy) ),
+ qsub8U( sel8x8_2(xx), sel8x8_2(yy) ),
+ qsub8U( sel8x8_1(xx), sel8x8_1(yy) ),
+ qsub8U( sel8x8_0(xx), sel8x8_0(yy) )
+ );
+}
+
+/* ------------ Multiplication ------------ */
+
+ULong h_generic_calc_Mul16x4 ( ULong xx, ULong yy )
+{
+ return mk16x4(
+ mul16( sel16x4_3(xx), sel16x4_3(yy) ),
+ mul16( sel16x4_2(xx), sel16x4_2(yy) ),
+ mul16( sel16x4_1(xx), sel16x4_1(yy) ),
+ mul16( sel16x4_0(xx), sel16x4_0(yy) )
+ );
+}
+
+ULong h_generic_calc_Mul32x2 ( ULong xx, ULong yy )
+{
+ return mk32x2(
+ mul32( sel32x2_1(xx), sel32x2_1(yy) ),
+ mul32( sel32x2_0(xx), sel32x2_0(yy) )
+ );
+}
+
+ULong h_generic_calc_MulHi16Sx4 ( ULong xx, ULong yy )
+{
+ return mk16x4(
+ mulhi16S( sel16x4_3(xx), sel16x4_3(yy) ),
+ mulhi16S( sel16x4_2(xx), sel16x4_2(yy) ),
+ mulhi16S( sel16x4_1(xx), sel16x4_1(yy) ),
+ mulhi16S( sel16x4_0(xx), sel16x4_0(yy) )
+ );
+}
+
+ULong h_generic_calc_MulHi16Ux4 ( ULong xx, ULong yy )
+{
+ return mk16x4(
+ mulhi16U( sel16x4_3(xx), sel16x4_3(yy) ),
+ mulhi16U( sel16x4_2(xx), sel16x4_2(yy) ),
+ mulhi16U( sel16x4_1(xx), sel16x4_1(yy) ),
+ mulhi16U( sel16x4_0(xx), sel16x4_0(yy) )
+ );
+}
+
+/* ------------ Comparison ------------ */
+
+ULong h_generic_calc_CmpEQ32x2 ( ULong xx, ULong yy )
+{
+ return mk32x2(
+ cmpeq32( sel32x2_1(xx), sel32x2_1(yy) ),
+ cmpeq32( sel32x2_0(xx), sel32x2_0(yy) )
+ );
+}
+
+ULong h_generic_calc_CmpEQ16x4 ( ULong xx, ULong yy )
+{
+ return mk16x4(
+ cmpeq16( sel16x4_3(xx), sel16x4_3(yy) ),
+ cmpeq16( sel16x4_2(xx), sel16x4_2(yy) ),
+ cmpeq16( sel16x4_1(xx), sel16x4_1(yy) ),
+ cmpeq16( sel16x4_0(xx), sel16x4_0(yy) )
+ );
+}
+
+ULong h_generic_calc_CmpEQ8x8 ( ULong xx, ULong yy )
+{
+ return mk8x8(
+ cmpeq8( sel8x8_7(xx), sel8x8_7(yy) ),
+ cmpeq8( sel8x8_6(xx), sel8x8_6(yy) ),
+ cmpeq8( sel8x8_5(xx), sel8x8_5(yy) ),
+ cmpeq8( sel8x8_4(xx), sel8x8_4(yy) ),
+ cmpeq8( sel8x8_3(xx), sel8x8_3(yy) ),
+ cmpeq8( sel8x8_2(xx), sel8x8_2(yy) ),
+ cmpeq8( sel8x8_1(xx), sel8x8_1(yy) ),
+ cmpeq8( sel8x8_0(xx), sel8x8_0(yy) )
+ );
+}
+
+ULong h_generic_calc_CmpGT32Sx2 ( ULong xx, ULong yy )
+{
+ return mk32x2(
+ cmpgt32S( sel32x2_1(xx), sel32x2_1(yy) ),
+ cmpgt32S( sel32x2_0(xx), sel32x2_0(yy) )
+ );
+}
+
+ULong h_generic_calc_CmpGT16Sx4 ( ULong xx, ULong yy )
+{
+ return mk16x4(
+ cmpgt16S( sel16x4_3(xx), sel16x4_3(yy) ),
+ cmpgt16S( sel16x4_2(xx), sel16x4_2(yy) ),
+ cmpgt16S( sel16x4_1(xx), sel16x4_1(yy) ),
+ cmpgt16S( sel16x4_0(xx), sel16x4_0(yy) )
+ );
+}
+
+ULong h_generic_calc_CmpGT8Sx8 ( ULong xx, ULong yy )
+{
+ return mk8x8(
+ cmpgt8S( sel8x8_7(xx), sel8x8_7(yy) ),
+ cmpgt8S( sel8x8_6(xx), sel8x8_6(yy) ),
+ cmpgt8S( sel8x8_5(xx), sel8x8_5(yy) ),
+ cmpgt8S( sel8x8_4(xx), sel8x8_4(yy) ),
+ cmpgt8S( sel8x8_3(xx), sel8x8_3(yy) ),
+ cmpgt8S( sel8x8_2(xx), sel8x8_2(yy) ),
+ cmpgt8S( sel8x8_1(xx), sel8x8_1(yy) ),
+ cmpgt8S( sel8x8_0(xx), sel8x8_0(yy) )
+ );
+}
+
+ULong h_generic_calc_CmpNEZ32x2 ( ULong xx )
+{
+ return mk32x2(
+ cmpnez32( sel32x2_1(xx) ),
+ cmpnez32( sel32x2_0(xx) )
+ );
+}
+
+ULong h_generic_calc_CmpNEZ16x4 ( ULong xx )
+{
+ return mk16x4(
+ cmpnez16( sel16x4_3(xx) ),
+ cmpnez16( sel16x4_2(xx) ),
+ cmpnez16( sel16x4_1(xx) ),
+ cmpnez16( sel16x4_0(xx) )
+ );
+}
+
+ULong h_generic_calc_CmpNEZ8x8 ( ULong xx )
+{
+ return mk8x8(
+ cmpnez8( sel8x8_7(xx) ),
+ cmpnez8( sel8x8_6(xx) ),
+ cmpnez8( sel8x8_5(xx) ),
+ cmpnez8( sel8x8_4(xx) ),
+ cmpnez8( sel8x8_3(xx) ),
+ cmpnez8( sel8x8_2(xx) ),
+ cmpnez8( sel8x8_1(xx) ),
+ cmpnez8( sel8x8_0(xx) )
+ );
+}
+
+/* ------------ Saturating narrowing ------------ */
+
+ULong h_generic_calc_QNarrow32Sx2 ( ULong aa, ULong bb )
+{
+ UInt d = sel32x2_1(aa);
+ UInt c = sel32x2_0(aa);
+ UInt b = sel32x2_1(bb);
+ UInt a = sel32x2_0(bb);
+ return mk16x4(
+ qnarrow32Sto16(d),
+ qnarrow32Sto16(c),
+ qnarrow32Sto16(b),
+ qnarrow32Sto16(a)
+ );
+}
+
+ULong h_generic_calc_QNarrow16Sx4 ( ULong aa, ULong bb )
+{
+ UShort h = sel16x4_3(aa);
+ UShort g = sel16x4_2(aa);
+ UShort f = sel16x4_1(aa);
+ UShort e = sel16x4_0(aa);
+ UShort d = sel16x4_3(bb);
+ UShort c = sel16x4_2(bb);
+ UShort b = sel16x4_1(bb);
+ UShort a = sel16x4_0(bb);
+ return mk8x8(
+ qnarrow16Sto8(h),
+ qnarrow16Sto8(g),
+ qnarrow16Sto8(f),
+ qnarrow16Sto8(e),
+ qnarrow16Sto8(d),
+ qnarrow16Sto8(c),
+ qnarrow16Sto8(b),
+ qnarrow16Sto8(a)
+ );
+}
+
+ULong h_generic_calc_QNarrow16Ux4 ( ULong aa, ULong bb )
+{
+ UShort h = sel16x4_3(aa);
+ UShort g = sel16x4_2(aa);
+ UShort f = sel16x4_1(aa);
+ UShort e = sel16x4_0(aa);
+ UShort d = sel16x4_3(bb);
+ UShort c = sel16x4_2(bb);
+ UShort b = sel16x4_1(bb);
+ UShort a = sel16x4_0(bb);
+ return mk8x8(
+ qnarrow16Uto8(h),
+ qnarrow16Uto8(g),
+ qnarrow16Uto8(f),
+ qnarrow16Uto8(e),
+ qnarrow16Uto8(d),
+ qnarrow16Uto8(c),
+ qnarrow16Uto8(b),
+ qnarrow16Uto8(a)
+ );
+}
+
+/* ------------ Interleaving ------------ */
+
+ULong h_generic_calc_InterleaveHI8x8 ( ULong aa, ULong bb )
+{
+ return mk8x8(
+ sel8x8_7(aa),
+ sel8x8_7(bb),
+ sel8x8_6(aa),
+ sel8x8_6(bb),
+ sel8x8_5(aa),
+ sel8x8_5(bb),
+ sel8x8_4(aa),
+ sel8x8_4(bb)
+ );
+}
+
+ULong h_generic_calc_InterleaveLO8x8 ( ULong aa, ULong bb )
+{
+ return mk8x8(
+ sel8x8_3(aa),
+ sel8x8_3(bb),
+ sel8x8_2(aa),
+ sel8x8_2(bb),
+ sel8x8_1(aa),
+ sel8x8_1(bb),
+ sel8x8_0(aa),
+ sel8x8_0(bb)
+ );
+}
+
+ULong h_generic_calc_InterleaveHI16x4 ( ULong aa, ULong bb )
+{
+ return mk16x4(
+ sel16x4_3(aa),
+ sel16x4_3(bb),
+ sel16x4_2(aa),
+ sel16x4_2(bb)
+ );
+}
+
+ULong h_generic_calc_InterleaveLO16x4 ( ULong aa, ULong bb )
+{
+ return mk16x4(
+ sel16x4_1(aa),
+ sel16x4_1(bb),
+ sel16x4_0(aa),
+ sel16x4_0(bb)
+ );
+}
+
+ULong h_generic_calc_InterleaveHI32x2 ( ULong aa, ULong bb )
+{
+ return mk32x2(
+ sel32x2_1(aa),
+ sel32x2_1(bb)
+ );
+}
+
+ULong h_generic_calc_InterleaveLO32x2 ( ULong aa, ULong bb )
+{
+ return mk32x2(
+ sel32x2_0(aa),
+ sel32x2_0(bb)
+ );
+}
+
+/* ------------ Concatenation ------------ */
+
+ULong h_generic_calc_CatOddLanes16x4 ( ULong aa, ULong bb )
+{
+ return mk16x4(
+ sel16x4_3(aa),
+ sel16x4_1(aa),
+ sel16x4_3(bb),
+ sel16x4_1(bb)
+ );
+}
+
+ULong h_generic_calc_CatEvenLanes16x4 ( ULong aa, ULong bb )
+{
+ return mk16x4(
+ sel16x4_2(aa),
+ sel16x4_0(aa),
+ sel16x4_2(bb),
+ sel16x4_0(bb)
+ );
+}
+
+/* misc hack looking for a proper home */
+ULong h_generic_calc_Perm8x8 ( ULong aa, ULong bb )
+{
+ return mk8x8(
+ index8x8(aa, sel8x8_7(bb)),
+ index8x8(aa, sel8x8_6(bb)),
+ index8x8(aa, sel8x8_5(bb)),
+ index8x8(aa, sel8x8_4(bb)),
+ index8x8(aa, sel8x8_3(bb)),
+ index8x8(aa, sel8x8_2(bb)),
+ index8x8(aa, sel8x8_1(bb)),
+ index8x8(aa, sel8x8_0(bb))
+ );
+}
+
+/* ------------ Shifting ------------ */
+/* Note that because these primops are undefined if the shift amount
+ equals or exceeds the lane width, the shift amount is masked so
+ that the scalar shifts are always in range. In fact, given the
+ semantics of these primops (ShlN16x4, etc) it is an error if in
+ fact we are ever given an out-of-range shift amount.
+*/
+ULong h_generic_calc_ShlN32x2 ( ULong xx, UInt nn )
+{
+ /* vassert(nn < 32); */
+ nn &= 31;
+ return mk32x2(
+ shl32( sel32x2_1(xx), nn ),
+ shl32( sel32x2_0(xx), nn )
+ );
+}
+
+ULong h_generic_calc_ShlN16x4 ( ULong xx, UInt nn )
+{
+ /* vassert(nn < 16); */
+ nn &= 15;
+ return mk16x4(
+ shl16( sel16x4_3(xx), nn ),
+ shl16( sel16x4_2(xx), nn ),
+ shl16( sel16x4_1(xx), nn ),
+ shl16( sel16x4_0(xx), nn )
+ );
+}
+
+ULong h_generic_calc_ShlN8x8 ( ULong xx, UInt nn )
+{
+ /* vassert(nn < 8); */
+ nn &= 7;
+ return mk8x8(
+ shl8( sel8x8_7(xx), nn ),
+ shl8( sel8x8_6(xx), nn ),
+ shl8( sel8x8_5(xx), nn ),
+ shl8( sel8x8_4(xx), nn ),
+ shl8( sel8x8_3(xx), nn ),
+ shl8( sel8x8_2(xx), nn ),
+ shl8( sel8x8_1(xx), nn ),
+ shl8( sel8x8_0(xx), nn )
+ );
+}
+
+ULong h_generic_calc_ShrN32x2 ( ULong xx, UInt nn )
+{
+ /* vassert(nn < 32); */
+ nn &= 31;
+ return mk32x2(
+ shr32( sel32x2_1(xx), nn ),
+ shr32( sel32x2_0(xx), nn )
+ );
+}
+
+ULong h_generic_calc_ShrN16x4 ( ULong xx, UInt nn )
+{
+ /* vassert(nn < 16); */
+ nn &= 15;
+ return mk16x4(
+ shr16( sel16x4_3(xx), nn ),
+ shr16( sel16x4_2(xx), nn ),
+ shr16( sel16x4_1(xx), nn ),
+ shr16( sel16x4_0(xx), nn )
+ );
+}
+
+ULong h_generic_calc_SarN32x2 ( ULong xx, UInt nn )
+{
+ /* vassert(nn < 32); */
+ nn &= 31;
+ return mk32x2(
+ sar32( sel32x2_1(xx), nn ),
+ sar32( sel32x2_0(xx), nn )
+ );
+}
+
+ULong h_generic_calc_SarN16x4 ( ULong xx, UInt nn )
+{
+ /* vassert(nn < 16); */
+ nn &= 15;
+ return mk16x4(
+ sar16( sel16x4_3(xx), nn ),
+ sar16( sel16x4_2(xx), nn ),
+ sar16( sel16x4_1(xx), nn ),
+ sar16( sel16x4_0(xx), nn )
+ );
+}
+
+ULong h_generic_calc_SarN8x8 ( ULong xx, UInt nn )
+{
+ /* vassert(nn < 8); */
+ nn &= 7;
+ return mk8x8(
+ sar8( sel8x8_7(xx), nn ),
+ sar8( sel8x8_6(xx), nn ),
+ sar8( sel8x8_5(xx), nn ),
+ sar8( sel8x8_4(xx), nn ),
+ sar8( sel8x8_3(xx), nn ),
+ sar8( sel8x8_2(xx), nn ),
+ sar8( sel8x8_1(xx), nn ),
+ sar8( sel8x8_0(xx), nn )
+ );
+}
+
+/* ------------ Averaging ------------ */
+
+ULong h_generic_calc_Avg8Ux8 ( ULong xx, ULong yy )
+{
+ return mk8x8(
+ avg8U( sel8x8_7(xx), sel8x8_7(yy) ),
+ avg8U( sel8x8_6(xx), sel8x8_6(yy) ),
+ avg8U( sel8x8_5(xx), sel8x8_5(yy) ),
+ avg8U( sel8x8_4(xx), sel8x8_4(yy) ),
+ avg8U( sel8x8_3(xx), sel8x8_3(yy) ),
+ avg8U( sel8x8_2(xx), sel8x8_2(yy) ),
+ avg8U( sel8x8_1(xx), sel8x8_1(yy) ),
+ avg8U( sel8x8_0(xx), sel8x8_0(yy) )
+ );
+}
+
+ULong h_generic_calc_Avg16Ux4 ( ULong xx, ULong yy )
+{
+ return mk16x4(
+ avg16U( sel16x4_3(xx), sel16x4_3(yy) ),
+ avg16U( sel16x4_2(xx), sel16x4_2(yy) ),
+ avg16U( sel16x4_1(xx), sel16x4_1(yy) ),
+ avg16U( sel16x4_0(xx), sel16x4_0(yy) )
+ );
+}
+
+/* ------------ max/min ------------ */
+
+ULong h_generic_calc_Max16Sx4 ( ULong xx, ULong yy )
+{
+ return mk16x4(
+ max16S( sel16x4_3(xx), sel16x4_3(yy) ),
+ max16S( sel16x4_2(xx), sel16x4_2(yy) ),
+ max16S( sel16x4_1(xx), sel16x4_1(yy) ),
+ max16S( sel16x4_0(xx), sel16x4_0(yy) )
+ );
+}
+
+ULong h_generic_calc_Max8Ux8 ( ULong xx, ULong yy )
+{
+ return mk8x8(
+ max8U( sel8x8_7(xx), sel8x8_7(yy) ),
+ max8U( sel8x8_6(xx), sel8x8_6(yy) ),
+ max8U( sel8x8_5(xx), sel8x8_5(yy) ),
+ max8U( sel8x8_4(xx), sel8x8_4(yy) ),
+ max8U( sel8x8_3(xx), sel8x8_3(yy) ),
+ max8U( sel8x8_2(xx), sel8x8_2(yy) ),
+ max8U( sel8x8_1(xx), sel8x8_1(yy) ),
+ max8U( sel8x8_0(xx), sel8x8_0(yy) )
+ );
+}
+
+ULong h_generic_calc_Min16Sx4 ( ULong xx, ULong yy )
+{
+ return mk16x4(
+ min16S( sel16x4_3(xx), sel16x4_3(yy) ),
+ min16S( sel16x4_2(xx), sel16x4_2(yy) ),
+ min16S( sel16x4_1(xx), sel16x4_1(yy) ),
+ min16S( sel16x4_0(xx), sel16x4_0(yy) )
+ );
+}
+
+ULong h_generic_calc_Min8Ux8 ( ULong xx, ULong yy )
+{
+ return mk8x8(
+ min8U( sel8x8_7(xx), sel8x8_7(yy) ),
+ min8U( sel8x8_6(xx), sel8x8_6(yy) ),
+ min8U( sel8x8_5(xx), sel8x8_5(yy) ),
+ min8U( sel8x8_4(xx), sel8x8_4(yy) ),
+ min8U( sel8x8_3(xx), sel8x8_3(yy) ),
+ min8U( sel8x8_2(xx), sel8x8_2(yy) ),
+ min8U( sel8x8_1(xx), sel8x8_1(yy) ),
+ min8U( sel8x8_0(xx), sel8x8_0(yy) )
+ );
+}
+
+/* ------------ SOME 32-bit SIMD HELPERS TOO ------------ */
+
+/* Tuple/select functions for 16x2 vectors. */
+static inline UInt mk16x2 ( UShort w1, UShort w2 ) {
+ return (((UInt)w1) << 16) | ((UInt)w2);
+}
+
+static inline UShort sel16x2_1 ( UInt w32 ) {
+ return 0xFFFF & (UShort)(w32 >> 16);
+}
+static inline UShort sel16x2_0 ( UInt w32 ) {
+ return 0xFFFF & (UShort)(w32);
+}
+
+static inline UInt mk8x4 ( UChar w3, UChar w2,
+ UChar w1, UChar w0 ) {
+ UInt w32 = (((UInt)w3) << 24) | (((UInt)w2) << 16)
+ | (((UInt)w1) << 8) | (((UInt)w0) << 0);
+ return w32;
+}
+
+static inline UChar sel8x4_3 ( UInt w32 ) {
+ return toUChar(0xFF & (w32 >> 24));
+}
+static inline UChar sel8x4_2 ( UInt w32 ) {
+ return toUChar(0xFF & (w32 >> 16));
+}
+static inline UChar sel8x4_1 ( UInt w32 ) {
+ return toUChar(0xFF & (w32 >> 8));
+}
+static inline UChar sel8x4_0 ( UInt w32 ) {
+ return toUChar(0xFF & (w32 >> 0));
+}
+
+
+/* ----------------------------------------------------- */
+/* More externally visible functions. These simply
+ implement the corresponding IR primops. */
+/* ----------------------------------------------------- */
+
+/* ------ 16x2 ------ */
+
+UInt h_generic_calc_Add16x2 ( UInt xx, UInt yy )
+{
+ return mk16x2( sel16x2_1(xx) + sel16x2_1(yy),
+ sel16x2_0(xx) + sel16x2_0(yy) );
+}
+
+UInt h_generic_calc_Sub16x2 ( UInt xx, UInt yy )
+{
+ return mk16x2( sel16x2_1(xx) - sel16x2_1(yy),
+ sel16x2_0(xx) - sel16x2_0(yy) );
+}
+
+UInt h_generic_calc_HAdd16Ux2 ( UInt xx, UInt yy )
+{
+ return mk16x2( hadd16U( sel16x2_1(xx), sel16x2_1(yy) ),
+ hadd16U( sel16x2_0(xx), sel16x2_0(yy) ) );
+}
+
+UInt h_generic_calc_HAdd16Sx2 ( UInt xx, UInt yy )
+{
+ return mk16x2( hadd16S( sel16x2_1(xx), sel16x2_1(yy) ),
+ hadd16S( sel16x2_0(xx), sel16x2_0(yy) ) );
+}
+
+UInt h_generic_calc_HSub16Ux2 ( UInt xx, UInt yy )
+{
+ return mk16x2( hsub16U( sel16x2_1(xx), sel16x2_1(yy) ),
+ hsub16U( sel16x2_0(xx), sel16x2_0(yy) ) );
+}
+
+UInt h_generic_calc_HSub16Sx2 ( UInt xx, UInt yy )
+{
+ return mk16x2( hsub16S( sel16x2_1(xx), sel16x2_1(yy) ),
+ hsub16S( sel16x2_0(xx), sel16x2_0(yy) ) );
+}
+
+UInt h_generic_calc_QAdd16Ux2 ( UInt xx, UInt yy )
+{
+ return mk16x2( qadd16U( sel16x2_1(xx), sel16x2_1(yy) ),
+ qadd16U( sel16x2_0(xx), sel16x2_0(yy) ) );
+}
+
+UInt h_generic_calc_QAdd16Sx2 ( UInt xx, UInt yy )
+{
+ return mk16x2( qadd16S( sel16x2_1(xx), sel16x2_1(yy) ),
+ qadd16S( sel16x2_0(xx), sel16x2_0(yy) ) );
+}
+
+UInt h_generic_calc_QSub16Ux2 ( UInt xx, UInt yy )
+{
+ return mk16x2( qsub16U( sel16x2_1(xx), sel16x2_1(yy) ),
+ qsub16U( sel16x2_0(xx), sel16x2_0(yy) ) );
+}
+
+UInt h_generic_calc_QSub16Sx2 ( UInt xx, UInt yy )
+{
+ return mk16x2( qsub16S( sel16x2_1(xx), sel16x2_1(yy) ),
+ qsub16S( sel16x2_0(xx), sel16x2_0(yy) ) );
+}
+
+/* ------ 8x4 ------ */
+
+UInt h_generic_calc_Add8x4 ( UInt xx, UInt yy )
+{
+ return mk8x4(
+ sel8x4_3(xx) + sel8x4_3(yy),
+ sel8x4_2(xx) + sel8x4_2(yy),
+ sel8x4_1(xx) + sel8x4_1(yy),
+ sel8x4_0(xx) + sel8x4_0(yy)
+ );
+}
+
+UInt h_generic_calc_Sub8x4 ( UInt xx, UInt yy )
+{
+ return mk8x4(
+ sel8x4_3(xx) - sel8x4_3(yy),
+ sel8x4_2(xx) - sel8x4_2(yy),
+ sel8x4_1(xx) - sel8x4_1(yy),
+ sel8x4_0(xx) - sel8x4_0(yy)
+ );
+}
+
+UInt h_generic_calc_HAdd8Ux4 ( UInt xx, UInt yy )
+{
+ return mk8x4(
+ hadd8U( sel8x4_3(xx), sel8x4_3(yy) ),
+ hadd8U( sel8x4_2(xx), sel8x4_2(yy) ),
+ hadd8U( sel8x4_1(xx), sel8x4_1(yy) ),
+ hadd8U( sel8x4_0(xx), sel8x4_0(yy) )
+ );
+}
+
+UInt h_generic_calc_HAdd8Sx4 ( UInt xx, UInt yy )
+{
+ return mk8x4(
+ hadd8S( sel8x4_3(xx), sel8x4_3(yy) ),
+ hadd8S( sel8x4_2(xx), sel8x4_2(yy) ),
+ hadd8S( sel8x4_1(xx), sel8x4_1(yy) ),
+ hadd8S( sel8x4_0(xx), sel8x4_0(yy) )
+ );
+}
+
+UInt h_generic_calc_HSub8Ux4 ( UInt xx, UInt yy )
+{
+ return mk8x4(
+ hsub8U( sel8x4_3(xx), sel8x4_3(yy) ),
+ hsub8U( sel8x4_2(xx), sel8x4_2(yy) ),
+ hsub8U( sel8x4_1(xx), sel8x4_1(yy) ),
+ hsub8U( sel8x4_0(xx), sel8x4_0(yy) )
+ );
+}
+
+UInt h_generic_calc_HSub8Sx4 ( UInt xx, UInt yy )
+{
+ return mk8x4(
+ hsub8S( sel8x4_3(xx), sel8x4_3(yy) ),
+ hsub8S( sel8x4_2(xx), sel8x4_2(yy) ),
+ hsub8S( sel8x4_1(xx), sel8x4_1(yy) ),
+ hsub8S( sel8x4_0(xx), sel8x4_0(yy) )
+ );
+}
+
+UInt h_generic_calc_QAdd8Ux4 ( UInt xx, UInt yy )
+{
+ return mk8x4(
+ qadd8U( sel8x4_3(xx), sel8x4_3(yy) ),
+ qadd8U( sel8x4_2(xx), sel8x4_2(yy) ),
+ qadd8U( sel8x4_1(xx), sel8x4_1(yy) ),
+ qadd8U( sel8x4_0(xx), sel8x4_0(yy) )
+ );
+}
+
+UInt h_generic_calc_QAdd8Sx4 ( UInt xx, UInt yy )
+{
+ return mk8x4(
+ qadd8S( sel8x4_3(xx), sel8x4_3(yy) ),
+ qadd8S( sel8x4_2(xx), sel8x4_2(yy) ),
+ qadd8S( sel8x4_1(xx), sel8x4_1(yy) ),
+ qadd8S( sel8x4_0(xx), sel8x4_0(yy) )
+ );
+}
+
+UInt h_generic_calc_QSub8Ux4 ( UInt xx, UInt yy )
+{
+ return mk8x4(
+ qsub8U( sel8x4_3(xx), sel8x4_3(yy) ),
+ qsub8U( sel8x4_2(xx), sel8x4_2(yy) ),
+ qsub8U( sel8x4_1(xx), sel8x4_1(yy) ),
+ qsub8U( sel8x4_0(xx), sel8x4_0(yy) )
+ );
+}
+
+UInt h_generic_calc_QSub8Sx4 ( UInt xx, UInt yy )
+{
+ return mk8x4(
+ qsub8S( sel8x4_3(xx), sel8x4_3(yy) ),
+ qsub8S( sel8x4_2(xx), sel8x4_2(yy) ),
+ qsub8S( sel8x4_1(xx), sel8x4_1(yy) ),
+ qsub8S( sel8x4_0(xx), sel8x4_0(yy) )
+ );
+}
+
+UInt h_generic_calc_CmpNEZ16x2 ( UInt xx )
+{
+ return mk16x2(
+ cmpnez16( sel16x2_1(xx) ),
+ cmpnez16( sel16x2_0(xx) )
+ );
+}
+
+UInt h_generic_calc_CmpNEZ8x4 ( UInt xx )
+{
+ return mk8x4(
+ cmpnez8( sel8x4_3(xx) ),
+ cmpnez8( sel8x4_2(xx) ),
+ cmpnez8( sel8x4_1(xx) ),
+ cmpnez8( sel8x4_0(xx) )
+ );
+}
+
+UInt h_generic_calc_Sad8Ux4 ( UInt xx, UInt yy )
+{
+ return absdiff8U( sel8x4_3(xx), sel8x4_3(yy) )
+ + absdiff8U( sel8x4_2(xx), sel8x4_2(yy) )
+ + absdiff8U( sel8x4_1(xx), sel8x4_1(yy) )
+ + absdiff8U( sel8x4_0(xx), sel8x4_0(yy) );
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- end host_generic_simd64.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/host_generic_simd64.h b/VEX/priv/host_generic_simd64.h
new file mode 100644
index 0000000..e854fc7
--- /dev/null
+++ b/VEX/priv/host_generic_simd64.h
@@ -0,0 +1,161 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin host_generic_simd64.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+/* Generic helper functions for doing 64-bit SIMD arithmetic in cases
+ where the instruction selectors cannot generate code in-line.
+ These are purely back-end entities and cannot be seen/referenced
+ as clean helper functions from IR.
+
+ These will get called from generated code and therefore should be
+ well behaved -- no floating point or mmx insns, just straight
+ integer code.
+
+ Each function implements the correspondingly-named IR primop.
+*/
+
+#ifndef __VEX_HOST_GENERIC_SIMD64_H
+#define __VEX_HOST_GENERIC_SIMD64_H
+
+#include "libvex_basictypes.h"
+
+/* DO NOT MAKE THESE INTO REGPARM FNS! THIS WILL BREAK CALLING
+ SEQUENCES GENERATED BY host-x86/isel.c. */
+
+extern ULong h_generic_calc_Add32x2 ( ULong, ULong );
+extern ULong h_generic_calc_Add16x4 ( ULong, ULong );
+extern ULong h_generic_calc_Add8x8 ( ULong, ULong );
+
+extern ULong h_generic_calc_QAdd16Sx4 ( ULong, ULong );
+extern ULong h_generic_calc_QAdd8Sx8 ( ULong, ULong );
+extern ULong h_generic_calc_QAdd16Ux4 ( ULong, ULong );
+extern ULong h_generic_calc_QAdd8Ux8 ( ULong, ULong );
+
+extern ULong h_generic_calc_Sub32x2 ( ULong, ULong );
+extern ULong h_generic_calc_Sub16x4 ( ULong, ULong );
+extern ULong h_generic_calc_Sub8x8 ( ULong, ULong );
+
+extern ULong h_generic_calc_QSub16Sx4 ( ULong, ULong );
+extern ULong h_generic_calc_QSub8Sx8 ( ULong, ULong );
+extern ULong h_generic_calc_QSub16Ux4 ( ULong, ULong );
+extern ULong h_generic_calc_QSub8Ux8 ( ULong, ULong );
+
+extern ULong h_generic_calc_Mul16x4 ( ULong, ULong );
+extern ULong h_generic_calc_Mul32x2 ( ULong, ULong );
+extern ULong h_generic_calc_MulHi16Sx4 ( ULong, ULong );
+extern ULong h_generic_calc_MulHi16Ux4 ( ULong, ULong );
+
+extern ULong h_generic_calc_CmpEQ32x2 ( ULong, ULong );
+extern ULong h_generic_calc_CmpEQ16x4 ( ULong, ULong );
+extern ULong h_generic_calc_CmpEQ8x8 ( ULong, ULong );
+extern ULong h_generic_calc_CmpGT32Sx2 ( ULong, ULong );
+extern ULong h_generic_calc_CmpGT16Sx4 ( ULong, ULong );
+extern ULong h_generic_calc_CmpGT8Sx8 ( ULong, ULong );
+
+extern ULong h_generic_calc_CmpNEZ32x2 ( ULong );
+extern ULong h_generic_calc_CmpNEZ16x4 ( ULong );
+extern ULong h_generic_calc_CmpNEZ8x8 ( ULong );
+
+extern ULong h_generic_calc_QNarrow32Sx2 ( ULong, ULong );
+extern ULong h_generic_calc_QNarrow16Sx4 ( ULong, ULong );
+extern ULong h_generic_calc_QNarrow16Ux4 ( ULong, ULong );
+
+extern ULong h_generic_calc_InterleaveHI8x8 ( ULong, ULong );
+extern ULong h_generic_calc_InterleaveLO8x8 ( ULong, ULong );
+extern ULong h_generic_calc_InterleaveHI16x4 ( ULong, ULong );
+extern ULong h_generic_calc_InterleaveLO16x4 ( ULong, ULong );
+extern ULong h_generic_calc_InterleaveHI32x2 ( ULong, ULong );
+extern ULong h_generic_calc_InterleaveLO32x2 ( ULong, ULong );
+
+extern ULong h_generic_calc_CatOddLanes16x4 ( ULong, ULong );
+extern ULong h_generic_calc_CatEvenLanes16x4 ( ULong, ULong );
+extern ULong h_generic_calc_Perm8x8 ( ULong, ULong );
+
+extern ULong h_generic_calc_ShlN8x8 ( ULong, UInt );
+extern ULong h_generic_calc_ShlN16x4 ( ULong, UInt );
+extern ULong h_generic_calc_ShlN32x2 ( ULong, UInt );
+
+extern ULong h_generic_calc_ShrN16x4 ( ULong, UInt );
+extern ULong h_generic_calc_ShrN32x2 ( ULong, UInt );
+
+extern ULong h_generic_calc_SarN8x8 ( ULong, UInt );
+extern ULong h_generic_calc_SarN16x4 ( ULong, UInt );
+extern ULong h_generic_calc_SarN32x2 ( ULong, UInt );
+
+extern ULong h_generic_calc_Avg8Ux8 ( ULong, ULong );
+extern ULong h_generic_calc_Avg16Ux4 ( ULong, ULong );
+
+extern ULong h_generic_calc_Max16Sx4 ( ULong, ULong );
+extern ULong h_generic_calc_Max8Ux8 ( ULong, ULong );
+extern ULong h_generic_calc_Min16Sx4 ( ULong, ULong );
+extern ULong h_generic_calc_Min8Ux8 ( ULong, ULong );
+
+/* 32-bit SIMD HELPERS */
+
+extern UInt h_generic_calc_Add16x2 ( UInt, UInt );
+extern UInt h_generic_calc_Sub16x2 ( UInt, UInt );
+
+extern UInt h_generic_calc_HAdd16Ux2 ( UInt, UInt );
+extern UInt h_generic_calc_HAdd16Sx2 ( UInt, UInt );
+extern UInt h_generic_calc_HSub16Ux2 ( UInt, UInt );
+extern UInt h_generic_calc_HSub16Sx2 ( UInt, UInt );
+
+extern UInt h_generic_calc_QAdd16Ux2 ( UInt, UInt );
+extern UInt h_generic_calc_QAdd16Sx2 ( UInt, UInt );
+extern UInt h_generic_calc_QSub16Ux2 ( UInt, UInt );
+extern UInt h_generic_calc_QSub16Sx2 ( UInt, UInt );
+
+extern UInt h_generic_calc_Add8x4 ( UInt, UInt );
+extern UInt h_generic_calc_Sub8x4 ( UInt, UInt );
+
+extern UInt h_generic_calc_HAdd8Ux4 ( UInt, UInt );
+extern UInt h_generic_calc_HAdd8Sx4 ( UInt, UInt );
+extern UInt h_generic_calc_HSub8Ux4 ( UInt, UInt );
+extern UInt h_generic_calc_HSub8Sx4 ( UInt, UInt );
+
+extern UInt h_generic_calc_QAdd8Ux4 ( UInt, UInt );
+extern UInt h_generic_calc_QAdd8Sx4 ( UInt, UInt );
+extern UInt h_generic_calc_QSub8Ux4 ( UInt, UInt );
+extern UInt h_generic_calc_QSub8Sx4 ( UInt, UInt );
+
+extern UInt h_generic_calc_Sad8Ux4 ( UInt, UInt );
+
+extern UInt h_generic_calc_CmpNEZ16x2 ( UInt );
+extern UInt h_generic_calc_CmpNEZ8x4 ( UInt );
+
+#endif /* ndef __VEX_HOST_GENERIC_SIMD64_H */
+
+/*---------------------------------------------------------------*/
+/*--- end host_generic_simd64.h ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c
new file mode 100644
index 0000000..54fd2fd
--- /dev/null
+++ b/VEX/priv/host_ppc_defs.c
@@ -0,0 +1,3851 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin host_ppc_defs.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex.h"
+#include "libvex_trc_values.h"
+
+#include "main_util.h"
+#include "host_generic_regs.h"
+#include "host_ppc_defs.h"
+
+
+/* --------- Registers. --------- */
+
+void ppHRegPPC ( HReg reg )
+{
+ Int r;
+ static HChar* ireg32_names[32]
+ = { "%r0", "%r1", "%r2", "%r3",
+ "%r4", "%r5", "%r6", "%r7",
+ "%r8", "%r9", "%r10", "%r11",
+ "%r12", "%r13", "%r14", "%r15",
+ "%r16", "%r17", "%r18", "%r19",
+ "%r20", "%r21", "%r22", "%r23",
+ "%r24", "%r25", "%r26", "%r27",
+ "%r28", "%r29", "%r30", "%r31" };
+ /* Be generic for all virtual regs. */
+ if (hregIsVirtual(reg)) {
+ ppHReg(reg);
+ return;
+ }
+ /* But specific for real regs. */
+ switch (hregClass(reg)) {
+ case HRcInt64:
+ r = hregNumber(reg);
+ vassert(r >= 0 && r < 32);
+ vex_printf("%s", ireg32_names[r]);
+ return;
+ case HRcInt32:
+ r = hregNumber(reg);
+ vassert(r >= 0 && r < 32);
+ vex_printf("%s", ireg32_names[r]);
+ return;
+ case HRcFlt64:
+ r = hregNumber(reg);
+ vassert(r >= 0 && r < 32);
+ vex_printf("%%fr%d", r);
+ return;
+ case HRcVec128:
+ r = hregNumber(reg);
+ vassert(r >= 0 && r < 32);
+ vex_printf("%%v%d", r);
+ return;
+ default:
+ vpanic("ppHRegPPC");
+ }
+}
+
+
+#define MkHRegGPR(_n, _mode64) \
+ mkHReg(_n, _mode64 ? HRcInt64 : HRcInt32, False)
+
+HReg hregPPC_GPR0 ( Bool mode64 ) { return MkHRegGPR( 0, mode64); }
+HReg hregPPC_GPR1 ( Bool mode64 ) { return MkHRegGPR( 1, mode64); }
+HReg hregPPC_GPR2 ( Bool mode64 ) { return MkHRegGPR( 2, mode64); }
+HReg hregPPC_GPR3 ( Bool mode64 ) { return MkHRegGPR( 3, mode64); }
+HReg hregPPC_GPR4 ( Bool mode64 ) { return MkHRegGPR( 4, mode64); }
+HReg hregPPC_GPR5 ( Bool mode64 ) { return MkHRegGPR( 5, mode64); }
+HReg hregPPC_GPR6 ( Bool mode64 ) { return MkHRegGPR( 6, mode64); }
+HReg hregPPC_GPR7 ( Bool mode64 ) { return MkHRegGPR( 7, mode64); }
+HReg hregPPC_GPR8 ( Bool mode64 ) { return MkHRegGPR( 8, mode64); }
+HReg hregPPC_GPR9 ( Bool mode64 ) { return MkHRegGPR( 9, mode64); }
+HReg hregPPC_GPR10 ( Bool mode64 ) { return MkHRegGPR(10, mode64); }
+HReg hregPPC_GPR11 ( Bool mode64 ) { return MkHRegGPR(11, mode64); }
+HReg hregPPC_GPR12 ( Bool mode64 ) { return MkHRegGPR(12, mode64); }
+HReg hregPPC_GPR13 ( Bool mode64 ) { return MkHRegGPR(13, mode64); }
+HReg hregPPC_GPR14 ( Bool mode64 ) { return MkHRegGPR(14, mode64); }
+HReg hregPPC_GPR15 ( Bool mode64 ) { return MkHRegGPR(15, mode64); }
+HReg hregPPC_GPR16 ( Bool mode64 ) { return MkHRegGPR(16, mode64); }
+HReg hregPPC_GPR17 ( Bool mode64 ) { return MkHRegGPR(17, mode64); }
+HReg hregPPC_GPR18 ( Bool mode64 ) { return MkHRegGPR(18, mode64); }
+HReg hregPPC_GPR19 ( Bool mode64 ) { return MkHRegGPR(19, mode64); }
+HReg hregPPC_GPR20 ( Bool mode64 ) { return MkHRegGPR(20, mode64); }
+HReg hregPPC_GPR21 ( Bool mode64 ) { return MkHRegGPR(21, mode64); }
+HReg hregPPC_GPR22 ( Bool mode64 ) { return MkHRegGPR(22, mode64); }
+HReg hregPPC_GPR23 ( Bool mode64 ) { return MkHRegGPR(23, mode64); }
+HReg hregPPC_GPR24 ( Bool mode64 ) { return MkHRegGPR(24, mode64); }
+HReg hregPPC_GPR25 ( Bool mode64 ) { return MkHRegGPR(25, mode64); }
+HReg hregPPC_GPR26 ( Bool mode64 ) { return MkHRegGPR(26, mode64); }
+HReg hregPPC_GPR27 ( Bool mode64 ) { return MkHRegGPR(27, mode64); }
+HReg hregPPC_GPR28 ( Bool mode64 ) { return MkHRegGPR(28, mode64); }
+HReg hregPPC_GPR29 ( Bool mode64 ) { return MkHRegGPR(29, mode64); }
+HReg hregPPC_GPR30 ( Bool mode64 ) { return MkHRegGPR(30, mode64); }
+HReg hregPPC_GPR31 ( Bool mode64 ) { return MkHRegGPR(31, mode64); }
+
+#undef MK_INT_HREG
+
+HReg hregPPC_FPR0 ( void ) { return mkHReg( 0, HRcFlt64, False); }
+HReg hregPPC_FPR1 ( void ) { return mkHReg( 1, HRcFlt64, False); }
+HReg hregPPC_FPR2 ( void ) { return mkHReg( 2, HRcFlt64, False); }
+HReg hregPPC_FPR3 ( void ) { return mkHReg( 3, HRcFlt64, False); }
+HReg hregPPC_FPR4 ( void ) { return mkHReg( 4, HRcFlt64, False); }
+HReg hregPPC_FPR5 ( void ) { return mkHReg( 5, HRcFlt64, False); }
+HReg hregPPC_FPR6 ( void ) { return mkHReg( 6, HRcFlt64, False); }
+HReg hregPPC_FPR7 ( void ) { return mkHReg( 7, HRcFlt64, False); }
+HReg hregPPC_FPR8 ( void ) { return mkHReg( 8, HRcFlt64, False); }
+HReg hregPPC_FPR9 ( void ) { return mkHReg( 9, HRcFlt64, False); }
+HReg hregPPC_FPR10 ( void ) { return mkHReg(10, HRcFlt64, False); }
+HReg hregPPC_FPR11 ( void ) { return mkHReg(11, HRcFlt64, False); }
+HReg hregPPC_FPR12 ( void ) { return mkHReg(12, HRcFlt64, False); }
+HReg hregPPC_FPR13 ( void ) { return mkHReg(13, HRcFlt64, False); }
+HReg hregPPC_FPR14 ( void ) { return mkHReg(14, HRcFlt64, False); }
+HReg hregPPC_FPR15 ( void ) { return mkHReg(15, HRcFlt64, False); }
+HReg hregPPC_FPR16 ( void ) { return mkHReg(16, HRcFlt64, False); }
+HReg hregPPC_FPR17 ( void ) { return mkHReg(17, HRcFlt64, False); }
+HReg hregPPC_FPR18 ( void ) { return mkHReg(18, HRcFlt64, False); }
+HReg hregPPC_FPR19 ( void ) { return mkHReg(19, HRcFlt64, False); }
+HReg hregPPC_FPR20 ( void ) { return mkHReg(20, HRcFlt64, False); }
+HReg hregPPC_FPR21 ( void ) { return mkHReg(21, HRcFlt64, False); }
+HReg hregPPC_FPR22 ( void ) { return mkHReg(22, HRcFlt64, False); }
+HReg hregPPC_FPR23 ( void ) { return mkHReg(23, HRcFlt64, False); }
+HReg hregPPC_FPR24 ( void ) { return mkHReg(24, HRcFlt64, False); }
+HReg hregPPC_FPR25 ( void ) { return mkHReg(25, HRcFlt64, False); }
+HReg hregPPC_FPR26 ( void ) { return mkHReg(26, HRcFlt64, False); }
+HReg hregPPC_FPR27 ( void ) { return mkHReg(27, HRcFlt64, False); }
+HReg hregPPC_FPR28 ( void ) { return mkHReg(28, HRcFlt64, False); }
+HReg hregPPC_FPR29 ( void ) { return mkHReg(29, HRcFlt64, False); }
+HReg hregPPC_FPR30 ( void ) { return mkHReg(30, HRcFlt64, False); }
+HReg hregPPC_FPR31 ( void ) { return mkHReg(31, HRcFlt64, False); }
+
+HReg hregPPC_VR0 ( void ) { return mkHReg( 0, HRcVec128, False); }
+HReg hregPPC_VR1 ( void ) { return mkHReg( 1, HRcVec128, False); }
+HReg hregPPC_VR2 ( void ) { return mkHReg( 2, HRcVec128, False); }
+HReg hregPPC_VR3 ( void ) { return mkHReg( 3, HRcVec128, False); }
+HReg hregPPC_VR4 ( void ) { return mkHReg( 4, HRcVec128, False); }
+HReg hregPPC_VR5 ( void ) { return mkHReg( 5, HRcVec128, False); }
+HReg hregPPC_VR6 ( void ) { return mkHReg( 6, HRcVec128, False); }
+HReg hregPPC_VR7 ( void ) { return mkHReg( 7, HRcVec128, False); }
+HReg hregPPC_VR8 ( void ) { return mkHReg( 8, HRcVec128, False); }
+HReg hregPPC_VR9 ( void ) { return mkHReg( 9, HRcVec128, False); }
+HReg hregPPC_VR10 ( void ) { return mkHReg(10, HRcVec128, False); }
+HReg hregPPC_VR11 ( void ) { return mkHReg(11, HRcVec128, False); }
+HReg hregPPC_VR12 ( void ) { return mkHReg(12, HRcVec128, False); }
+HReg hregPPC_VR13 ( void ) { return mkHReg(13, HRcVec128, False); }
+HReg hregPPC_VR14 ( void ) { return mkHReg(14, HRcVec128, False); }
+HReg hregPPC_VR15 ( void ) { return mkHReg(15, HRcVec128, False); }
+HReg hregPPC_VR16 ( void ) { return mkHReg(16, HRcVec128, False); }
+HReg hregPPC_VR17 ( void ) { return mkHReg(17, HRcVec128, False); }
+HReg hregPPC_VR18 ( void ) { return mkHReg(18, HRcVec128, False); }
+HReg hregPPC_VR19 ( void ) { return mkHReg(19, HRcVec128, False); }
+HReg hregPPC_VR20 ( void ) { return mkHReg(20, HRcVec128, False); }
+HReg hregPPC_VR21 ( void ) { return mkHReg(21, HRcVec128, False); }
+HReg hregPPC_VR22 ( void ) { return mkHReg(22, HRcVec128, False); }
+HReg hregPPC_VR23 ( void ) { return mkHReg(23, HRcVec128, False); }
+HReg hregPPC_VR24 ( void ) { return mkHReg(24, HRcVec128, False); }
+HReg hregPPC_VR25 ( void ) { return mkHReg(25, HRcVec128, False); }
+HReg hregPPC_VR26 ( void ) { return mkHReg(26, HRcVec128, False); }
+HReg hregPPC_VR27 ( void ) { return mkHReg(27, HRcVec128, False); }
+HReg hregPPC_VR28 ( void ) { return mkHReg(28, HRcVec128, False); }
+HReg hregPPC_VR29 ( void ) { return mkHReg(29, HRcVec128, False); }
+HReg hregPPC_VR30 ( void ) { return mkHReg(30, HRcVec128, False); }
+HReg hregPPC_VR31 ( void ) { return mkHReg(31, HRcVec128, False); }
+
+void getAllocableRegs_PPC ( Int* nregs, HReg** arr, Bool mode64 )
+{
+ UInt i=0;
+ if (mode64)
+ *nregs = (32-9) + (32-24) + (32-24);
+ else
+ *nregs = (32-7) + (32-24) + (32-24);
+ *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
+ // GPR0 = scratch reg where poss. - some ops interpret as value zero
+ // GPR1 = stack pointer
+ // GPR2 = TOC pointer
+ (*arr)[i++] = hregPPC_GPR3(mode64);
+ (*arr)[i++] = hregPPC_GPR4(mode64);
+ (*arr)[i++] = hregPPC_GPR5(mode64);
+ (*arr)[i++] = hregPPC_GPR6(mode64);
+ (*arr)[i++] = hregPPC_GPR7(mode64);
+ (*arr)[i++] = hregPPC_GPR8(mode64);
+ (*arr)[i++] = hregPPC_GPR9(mode64);
+ (*arr)[i++] = hregPPC_GPR10(mode64);
+ if (!mode64) {
+ /* in mode64:
+ r11 used for calls by ptr / env ptr for some langs
+ r12 used for exception handling and global linkage code */
+ (*arr)[i++] = hregPPC_GPR11(mode64);
+ (*arr)[i++] = hregPPC_GPR12(mode64);
+ }
+ // GPR13 = thread specific pointer
+ // GPR14 and above are callee save. Yay.
+ (*arr)[i++] = hregPPC_GPR14(mode64);
+ (*arr)[i++] = hregPPC_GPR15(mode64);
+ (*arr)[i++] = hregPPC_GPR16(mode64);
+ (*arr)[i++] = hregPPC_GPR17(mode64);
+ (*arr)[i++] = hregPPC_GPR18(mode64);
+ (*arr)[i++] = hregPPC_GPR19(mode64);
+ (*arr)[i++] = hregPPC_GPR20(mode64);
+ (*arr)[i++] = hregPPC_GPR21(mode64);
+ (*arr)[i++] = hregPPC_GPR22(mode64);
+ (*arr)[i++] = hregPPC_GPR23(mode64);
+ (*arr)[i++] = hregPPC_GPR24(mode64);
+ (*arr)[i++] = hregPPC_GPR25(mode64);
+ (*arr)[i++] = hregPPC_GPR26(mode64);
+ (*arr)[i++] = hregPPC_GPR27(mode64);
+ (*arr)[i++] = hregPPC_GPR28(mode64);
+ // GPR29 is reserved for the dispatcher
+ // GPR30 is reserved as AltiVec spill reg temporary
+ // GPR31 is reserved for the GuestStatePtr
+
+ /* Don't waste the reg-allocs's time trawling through zillions of
+ FP registers - they mostly will never be used. We'll tolerate
+ the occasional extra spill instead. */
+ /* For both ppc32-linux and ppc64-linux, f14-f31 are callee save.
+ So use them. */
+ (*arr)[i++] = hregPPC_FPR14();
+ (*arr)[i++] = hregPPC_FPR15();
+ (*arr)[i++] = hregPPC_FPR16();
+ (*arr)[i++] = hregPPC_FPR17();
+ (*arr)[i++] = hregPPC_FPR18();
+ (*arr)[i++] = hregPPC_FPR19();
+ (*arr)[i++] = hregPPC_FPR20();
+ (*arr)[i++] = hregPPC_FPR21();
+
+ /* Same deal re Altivec */
+ /* For both ppc32-linux and ppc64-linux, v20-v31 are callee save.
+ So use them. */
+ /* NB, vr29 is used as a scratch temporary -- do not allocate */
+ (*arr)[i++] = hregPPC_VR20();
+ (*arr)[i++] = hregPPC_VR21();
+ (*arr)[i++] = hregPPC_VR22();
+ (*arr)[i++] = hregPPC_VR23();
+ (*arr)[i++] = hregPPC_VR24();
+ (*arr)[i++] = hregPPC_VR25();
+ (*arr)[i++] = hregPPC_VR26();
+ (*arr)[i++] = hregPPC_VR27();
+
+ vassert(i == *nregs);
+}
+
+
+/* --------- Condition codes, Intel encoding. --------- */
+
+HChar* showPPCCondCode ( PPCCondCode cond )
+{
+ if (cond.test == Pct_ALWAYS) return "always";
+
+ switch (cond.flag) {
+ case Pcf_7SO:
+ return (cond.test == Pct_TRUE) ? "cr7.so=1" : "cr7.so=0";
+ case Pcf_7EQ:
+ return (cond.test == Pct_TRUE) ? "cr7.eq=1" : "cr7.eq=0";
+ case Pcf_7GT:
+ return (cond.test == Pct_TRUE) ? "cr7.gt=1" : "cr7.gt=0";
+ case Pcf_7LT:
+ return (cond.test == Pct_TRUE) ? "cr7.lt=1" : "cr7.lt=0";
+ default: vpanic("ppPPCCondCode");
+ }
+}
+
+/* construct condition code */
+PPCCondCode mk_PPCCondCode ( PPCCondTest test, PPCCondFlag flag )
+{
+ PPCCondCode cc;
+ cc.flag = flag;
+ cc.test = test;
+ return cc;
+}
+
+/* false->true, true->false */
+PPCCondTest invertCondTest ( PPCCondTest ct )
+{
+ vassert(ct != Pct_ALWAYS);
+ return (ct == Pct_TRUE) ? Pct_FALSE : Pct_TRUE;
+}
+
+
+/* --------- PPCAMode: memory address expressions. --------- */
+
+PPCAMode* PPCAMode_IR ( Int idx, HReg base ) {
+ PPCAMode* am = LibVEX_Alloc(sizeof(PPCAMode));
+ vassert(idx >= -0x8000 && idx < 0x8000);
+ am->tag = Pam_IR;
+ am->Pam.IR.base = base;
+ am->Pam.IR.index = idx;
+ return am;
+}
+PPCAMode* PPCAMode_RR ( HReg idx, HReg base ) {
+ PPCAMode* am = LibVEX_Alloc(sizeof(PPCAMode));
+ am->tag = Pam_RR;
+ am->Pam.RR.base = base;
+ am->Pam.RR.index = idx;
+ return am;
+}
+
+PPCAMode* dopyPPCAMode ( PPCAMode* am ) {
+ switch (am->tag) {
+ case Pam_IR:
+ return PPCAMode_IR( am->Pam.IR.index, am->Pam.IR.base );
+ case Pam_RR:
+ return PPCAMode_RR( am->Pam.RR.index, am->Pam.RR.base );
+ default:
+ vpanic("dopyPPCAMode");
+ }
+}
+
+void ppPPCAMode ( PPCAMode* am ) {
+ switch (am->tag) {
+ case Pam_IR:
+ if (am->Pam.IR.index == 0)
+ vex_printf("0(");
+ else
+ vex_printf("%d(", (Int)am->Pam.IR.index);
+ ppHRegPPC(am->Pam.IR.base);
+ vex_printf(")");
+ return;
+ case Pam_RR:
+ ppHRegPPC(am->Pam.RR.base);
+ vex_printf(",");
+ ppHRegPPC(am->Pam.RR.index);
+ return;
+ default:
+ vpanic("ppPPCAMode");
+ }
+}
+
+static void addRegUsage_PPCAMode ( HRegUsage* u, PPCAMode* am ) {
+ switch (am->tag) {
+ case Pam_IR:
+ addHRegUse(u, HRmRead, am->Pam.IR.base);
+ return;
+ case Pam_RR:
+ addHRegUse(u, HRmRead, am->Pam.RR.base);
+ addHRegUse(u, HRmRead, am->Pam.RR.index);
+ return;
+ default:
+ vpanic("addRegUsage_PPCAMode");
+ }
+}
+
+static void mapRegs_PPCAMode ( HRegRemap* m, PPCAMode* am ) {
+ switch (am->tag) {
+ case Pam_IR:
+ am->Pam.IR.base = lookupHRegRemap(m, am->Pam.IR.base);
+ return;
+ case Pam_RR:
+ am->Pam.RR.base = lookupHRegRemap(m, am->Pam.RR.base);
+ am->Pam.RR.index = lookupHRegRemap(m, am->Pam.RR.index);
+ return;
+ default:
+ vpanic("mapRegs_PPCAMode");
+ }
+}
+
+/* --------- Operand, which can be a reg or a u16/s16. --------- */
+
+PPCRH* PPCRH_Imm ( Bool syned, UShort imm16 ) {
+ PPCRH* op = LibVEX_Alloc(sizeof(PPCRH));
+ op->tag = Prh_Imm;
+ op->Prh.Imm.syned = syned;
+ op->Prh.Imm.imm16 = imm16;
+ /* If this is a signed value, ensure it's not -32768, so that we
+ are guaranteed always to be able to negate if needed. */
+ if (syned)
+ vassert(imm16 != 0x8000);
+ vassert(syned == True || syned == False);
+ return op;
+}
+PPCRH* PPCRH_Reg ( HReg reg ) {
+ PPCRH* op = LibVEX_Alloc(sizeof(PPCRH));
+ op->tag = Prh_Reg;
+ op->Prh.Reg.reg = reg;
+ return op;
+}
+
+void ppPPCRH ( PPCRH* op ) {
+ switch (op->tag) {
+ case Prh_Imm:
+ if (op->Prh.Imm.syned)
+ vex_printf("%d", (Int)(Short)op->Prh.Imm.imm16);
+ else
+ vex_printf("%u", (UInt)(UShort)op->Prh.Imm.imm16);
+ return;
+ case Prh_Reg:
+ ppHRegPPC(op->Prh.Reg.reg);
+ return;
+ default:
+ vpanic("ppPPCRH");
+ }
+}
+
+/* An PPCRH can only be used in a "read" context (what would it mean
+ to write or modify a literal?) and so we enumerate its registers
+ accordingly. */
+static void addRegUsage_PPCRH ( HRegUsage* u, PPCRH* op ) {
+ switch (op->tag) {
+ case Prh_Imm:
+ return;
+ case Prh_Reg:
+ addHRegUse(u, HRmRead, op->Prh.Reg.reg);
+ return;
+ default:
+ vpanic("addRegUsage_PPCRH");
+ }
+}
+
+static void mapRegs_PPCRH ( HRegRemap* m, PPCRH* op ) {
+ switch (op->tag) {
+ case Prh_Imm:
+ return;
+ case Prh_Reg:
+ op->Prh.Reg.reg = lookupHRegRemap(m, op->Prh.Reg.reg);
+ return;
+ default:
+ vpanic("mapRegs_PPCRH");
+ }
+}
+
+
+/* --------- Operand, which can be a reg or a u32/64. --------- */
+
+PPCRI* PPCRI_Imm ( ULong imm64 ) {
+ PPCRI* op = LibVEX_Alloc(sizeof(PPCRI));
+ op->tag = Pri_Imm;
+ op->Pri.Imm = imm64;
+ return op;
+}
+PPCRI* PPCRI_Reg ( HReg reg ) {
+ PPCRI* op = LibVEX_Alloc(sizeof(PPCRI));
+ op->tag = Pri_Reg;
+ op->Pri.Reg = reg;
+ return op;
+}
+
+void ppPPCRI ( PPCRI* dst ) {
+ switch (dst->tag) {
+ case Pri_Imm:
+ vex_printf("0x%llx", dst->Pri.Imm);
+ break;
+ case Pri_Reg:
+ ppHRegPPC(dst->Pri.Reg);
+ break;
+ default:
+ vpanic("ppPPCRI");
+ }
+}
+
+/* An PPCRI can only be used in a "read" context (what would it
+ mean to write or modify a literal?) and so we enumerate its
+ registers accordingly. */
+static void addRegUsage_PPCRI ( HRegUsage* u, PPCRI* dst ) {
+ switch (dst->tag) {
+ case Pri_Imm:
+ return;
+ case Pri_Reg:
+ addHRegUse(u, HRmRead, dst->Pri.Reg);
+ return;
+ default:
+ vpanic("addRegUsage_PPCRI");
+ }
+}
+
+static void mapRegs_PPCRI ( HRegRemap* m, PPCRI* dst ) {
+ switch (dst->tag) {
+ case Pri_Imm:
+ return;
+ case Pri_Reg:
+ dst->Pri.Reg = lookupHRegRemap(m, dst->Pri.Reg);
+ return;
+ default:
+ vpanic("mapRegs_PPCRI");
+ }
+}
+
+
+/* --------- Operand, which can be a vector reg or a simm5. --------- */
+
+PPCVI5s* PPCVI5s_Imm ( Char simm5 ) {
+ PPCVI5s* op = LibVEX_Alloc(sizeof(PPCVI5s));
+ op->tag = Pvi_Imm;
+ op->Pvi.Imm5s = simm5;
+ vassert(simm5 >= -16 && simm5 <= 15);
+ return op;
+}
+PPCVI5s* PPCVI5s_Reg ( HReg reg ) {
+ PPCVI5s* op = LibVEX_Alloc(sizeof(PPCVI5s));
+ op->tag = Pvi_Reg;
+ op->Pvi.Reg = reg;
+ vassert(hregClass(reg) == HRcVec128);
+ return op;
+}
+
+void ppPPCVI5s ( PPCVI5s* src ) {
+ switch (src->tag) {
+ case Pvi_Imm:
+ vex_printf("%d", (Int)src->Pvi.Imm5s);
+ break;
+ case Pvi_Reg:
+ ppHRegPPC(src->Pvi.Reg);
+ break;
+ default:
+ vpanic("ppPPCVI5s");
+ }
+}
+
+/* An PPCVI5s can only be used in a "read" context (what would it
+ mean to write or modify a literal?) and so we enumerate its
+ registers accordingly. */
+static void addRegUsage_PPCVI5s ( HRegUsage* u, PPCVI5s* dst ) {
+ switch (dst->tag) {
+ case Pvi_Imm:
+ return;
+ case Pvi_Reg:
+ addHRegUse(u, HRmRead, dst->Pvi.Reg);
+ return;
+ default:
+ vpanic("addRegUsage_PPCVI5s");
+ }
+}
+
+static void mapRegs_PPCVI5s ( HRegRemap* m, PPCVI5s* dst ) {
+ switch (dst->tag) {
+ case Pvi_Imm:
+ return;
+ case Pvi_Reg:
+ dst->Pvi.Reg = lookupHRegRemap(m, dst->Pvi.Reg);
+ return;
+ default:
+ vpanic("mapRegs_PPCVI5s");
+ }
+}
+
+
+/* --------- Instructions. --------- */
+
+HChar* showPPCUnaryOp ( PPCUnaryOp op ) {
+ switch (op) {
+ case Pun_NOT: return "not";
+ case Pun_NEG: return "neg";
+ case Pun_CLZ32: return "cntlzw";
+ case Pun_CLZ64: return "cntlzd";
+ case Pun_EXTSW: return "extsw";
+ default: vpanic("showPPCUnaryOp");
+ }
+}
+
+HChar* showPPCAluOp ( PPCAluOp op, Bool immR ) {
+ switch (op) {
+ case Palu_ADD: return immR ? "addi" : "add";
+ case Palu_SUB: return immR ? "subi" : "sub";
+ case Palu_AND: return immR ? "andi." : "and";
+ case Palu_OR: return immR ? "ori" : "or";
+ case Palu_XOR: return immR ? "xori" : "xor";
+ default: vpanic("showPPCAluOp");
+ }
+}
+
+HChar* showPPCShftOp ( PPCShftOp op, Bool immR, Bool sz32 ) {
+ switch (op) {
+ case Pshft_SHL: return sz32 ? (immR ? "slwi" : "slw") :
+ (immR ? "sldi" : "sld");
+ case Pshft_SHR: return sz32 ? (immR ? "srwi" : "srw") :
+ (immR ? "srdi" : "srd");
+ case Pshft_SAR: return sz32 ? (immR ? "srawi" : "sraw") :
+ (immR ? "sradi" : "srad");
+ default: vpanic("showPPCShftOp");
+ }
+}
+
+HChar* showPPCFpOp ( PPCFpOp op ) {
+ switch (op) {
+ case Pfp_ADDD: return "fadd";
+ case Pfp_SUBD: return "fsub";
+ case Pfp_MULD: return "fmul";
+ case Pfp_DIVD: return "fdiv";
+ case Pfp_MADDD: return "fmadd";
+ case Pfp_MSUBD: return "fmsub";
+ case Pfp_MADDS: return "fmadds";
+ case Pfp_MSUBS: return "fmsubs";
+ case Pfp_ADDS: return "fadds";
+ case Pfp_SUBS: return "fsubs";
+ case Pfp_MULS: return "fmuls";
+ case Pfp_DIVS: return "fdivs";
+ case Pfp_SQRT: return "fsqrt";
+ case Pfp_ABS: return "fabs";
+ case Pfp_NEG: return "fneg";
+ case Pfp_MOV: return "fmr";
+ case Pfp_RES: return "fres";
+ case Pfp_RSQRTE: return "frsqrte";
+ case Pfp_FRIM: return "frim";
+ case Pfp_FRIN: return "frin";
+ case Pfp_FRIP: return "frip";
+ case Pfp_FRIZ: return "friz";
+ default: vpanic("showPPCFpOp");
+ }
+}
+
+HChar* showPPCAvOp ( PPCAvOp op ) {
+ switch (op) {
+
+ /* Unary */
+ case Pav_MOV: return "vmr"; /* Mov */
+
+ case Pav_AND: return "vand"; /* Bitwise */
+ case Pav_OR: return "vor";
+ case Pav_XOR: return "vxor";
+ case Pav_NOT: return "vnot";
+
+ case Pav_UNPCKH8S: return "vupkhsb"; /* Unpack */
+ case Pav_UNPCKH16S: return "vupkhsh";
+ case Pav_UNPCKL8S: return "vupklsb";
+ case Pav_UNPCKL16S: return "vupklsh";
+ case Pav_UNPCKHPIX: return "vupkhpx";
+ case Pav_UNPCKLPIX: return "vupklpx";
+
+ /* Integer binary */
+ case Pav_ADDU: return "vaddu_m"; // b,h,w
+ case Pav_QADDU: return "vaddu_s"; // b,h,w
+ case Pav_QADDS: return "vadds_s"; // b,h,w
+
+ case Pav_SUBU: return "vsubu_m"; // b,h,w
+ case Pav_QSUBU: return "vsubu_s"; // b,h,w
+ case Pav_QSUBS: return "vsubs_s"; // b,h,w
+
+ case Pav_OMULU: return "vmulou"; // b,h
+ case Pav_OMULS: return "vmulos"; // b,h
+ case Pav_EMULU: return "vmuleu"; // b,h
+ case Pav_EMULS: return "vmules"; // b,h
+
+ case Pav_AVGU: return "vavgu"; // b,h,w
+ case Pav_AVGS: return "vavgs"; // b,h,w
+
+ case Pav_MAXU: return "vmaxu"; // b,h,w
+ case Pav_MAXS: return "vmaxs"; // b,h,w
+
+ case Pav_MINU: return "vminu"; // b,h,w
+ case Pav_MINS: return "vmins"; // b,h,w
+
+ /* Compare (always affects CR field 6) */
+ case Pav_CMPEQU: return "vcmpequ"; // b,h,w
+ case Pav_CMPGTU: return "vcmpgtu"; // b,h,w
+ case Pav_CMPGTS: return "vcmpgts"; // b,h,w
+
+ /* Shift */
+ case Pav_SHL: return "vsl"; // ' ',b,h,w
+ case Pav_SHR: return "vsr"; // ' ',b,h,w
+ case Pav_SAR: return "vsra"; // b,h,w
+ case Pav_ROTL: return "vrl"; // b,h,w
+
+ /* Pack */
+ case Pav_PACKUU: return "vpku_um"; // h,w
+ case Pav_QPACKUU: return "vpku_us"; // h,w
+ case Pav_QPACKSU: return "vpks_us"; // h,w
+ case Pav_QPACKSS: return "vpks_ss"; // h,w
+ case Pav_PACKPXL: return "vpkpx";
+
+ /* Merge */
+ case Pav_MRGHI: return "vmrgh"; // b,h,w
+ case Pav_MRGLO: return "vmrgl"; // b,h,w
+
+ default: vpanic("showPPCAvOp");
+ }
+}
+
+HChar* showPPCAvFpOp ( PPCAvFpOp op ) {
+ switch (op) {
+ /* Floating Point Binary */
+ case Pavfp_ADDF: return "vaddfp";
+ case Pavfp_SUBF: return "vsubfp";
+ case Pavfp_MULF: return "vmaddfp";
+ case Pavfp_MAXF: return "vmaxfp";
+ case Pavfp_MINF: return "vminfp";
+ case Pavfp_CMPEQF: return "vcmpeqfp";
+ case Pavfp_CMPGTF: return "vcmpgtfp";
+ case Pavfp_CMPGEF: return "vcmpgefp";
+
+ /* Floating Point Unary */
+ case Pavfp_RCPF: return "vrefp";
+ case Pavfp_RSQRTF: return "vrsqrtefp";
+ case Pavfp_CVTU2F: return "vcfux";
+ case Pavfp_CVTS2F: return "vcfsx";
+ case Pavfp_QCVTF2U: return "vctuxs";
+ case Pavfp_QCVTF2S: return "vctsxs";
+ case Pavfp_ROUNDM: return "vrfim";
+ case Pavfp_ROUNDP: return "vrfip";
+ case Pavfp_ROUNDN: return "vrfin";
+ case Pavfp_ROUNDZ: return "vrfiz";
+
+ default: vpanic("showPPCAvFpOp");
+ }
+}
+
+PPCInstr* PPCInstr_LI ( HReg dst, ULong imm64, Bool mode64 )
+{
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_LI;
+ i->Pin.LI.dst = dst;
+ i->Pin.LI.imm64 = imm64;
+ if (!mode64)
+ vassert( (Long)imm64 == (Long)(Int)(UInt)imm64 );
+ return i;
+}
+PPCInstr* PPCInstr_Alu ( PPCAluOp op, HReg dst,
+ HReg srcL, PPCRH* srcR ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_Alu;
+ i->Pin.Alu.op = op;
+ i->Pin.Alu.dst = dst;
+ i->Pin.Alu.srcL = srcL;
+ i->Pin.Alu.srcR = srcR;
+ return i;
+}
+PPCInstr* PPCInstr_Shft ( PPCShftOp op, Bool sz32,
+ HReg dst, HReg srcL, PPCRH* srcR ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_Shft;
+ i->Pin.Shft.op = op;
+ i->Pin.Shft.sz32 = sz32;
+ i->Pin.Shft.dst = dst;
+ i->Pin.Shft.srcL = srcL;
+ i->Pin.Shft.srcR = srcR;
+ return i;
+}
+PPCInstr* PPCInstr_AddSubC ( Bool isAdd, Bool setC,
+ HReg dst, HReg srcL, HReg srcR ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_AddSubC;
+ i->Pin.AddSubC.isAdd = isAdd;
+ i->Pin.AddSubC.setC = setC;
+ i->Pin.AddSubC.dst = dst;
+ i->Pin.AddSubC.srcL = srcL;
+ i->Pin.AddSubC.srcR = srcR;
+ return i;
+}
+PPCInstr* PPCInstr_Cmp ( Bool syned, Bool sz32,
+ UInt crfD, HReg srcL, PPCRH* srcR ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_Cmp;
+ i->Pin.Cmp.syned = syned;
+ i->Pin.Cmp.sz32 = sz32;
+ i->Pin.Cmp.crfD = crfD;
+ i->Pin.Cmp.srcL = srcL;
+ i->Pin.Cmp.srcR = srcR;
+ return i;
+}
+PPCInstr* PPCInstr_Unary ( PPCUnaryOp op, HReg dst, HReg src ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_Unary;
+ i->Pin.Unary.op = op;
+ i->Pin.Unary.dst = dst;
+ i->Pin.Unary.src = src;
+ return i;
+}
+PPCInstr* PPCInstr_MulL ( Bool syned, Bool hi, Bool sz32,
+ HReg dst, HReg srcL, HReg srcR ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_MulL;
+ i->Pin.MulL.syned = syned;
+ i->Pin.MulL.hi = hi;
+ i->Pin.MulL.sz32 = sz32;
+ i->Pin.MulL.dst = dst;
+ i->Pin.MulL.srcL = srcL;
+ i->Pin.MulL.srcR = srcR;
+ /* if doing the low word, the signedness is irrelevant, but tie it
+ down anyway. */
+ if (!hi) vassert(!syned);
+ return i;
+}
+PPCInstr* PPCInstr_Div ( Bool syned, Bool sz32,
+ HReg dst, HReg srcL, HReg srcR ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_Div;
+ i->Pin.Div.syned = syned;
+ i->Pin.Div.sz32 = sz32;
+ i->Pin.Div.dst = dst;
+ i->Pin.Div.srcL = srcL;
+ i->Pin.Div.srcR = srcR;
+ return i;
+}
+PPCInstr* PPCInstr_Call ( PPCCondCode cond,
+ Addr64 target, UInt argiregs ) {
+ UInt mask;
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_Call;
+ i->Pin.Call.cond = cond;
+ i->Pin.Call.target = target;
+ i->Pin.Call.argiregs = argiregs;
+ /* Only r3 .. r10 inclusive may be used as arg regs. Hence: */
+ mask = (1<<3)|(1<<4)|(1<<5)|(1<<6)|(1<<7)|(1<<8)|(1<<9)|(1<<10);
+ vassert(0 == (argiregs & ~mask));
+ return i;
+}
+PPCInstr* PPCInstr_Goto ( IRJumpKind jk,
+ PPCCondCode cond, PPCRI* dst ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_Goto;
+ i->Pin.Goto.cond = cond;
+ i->Pin.Goto.dst = dst;
+ i->Pin.Goto.jk = jk;
+ return i;
+}
+PPCInstr* PPCInstr_CMov ( PPCCondCode cond,
+ HReg dst, PPCRI* src ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_CMov;
+ i->Pin.CMov.cond = cond;
+ i->Pin.CMov.src = src;
+ i->Pin.CMov.dst = dst;
+ vassert(cond.test != Pct_ALWAYS);
+ return i;
+}
+PPCInstr* PPCInstr_Load ( UChar sz,
+ HReg dst, PPCAMode* src, Bool mode64 ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_Load;
+ i->Pin.Load.sz = sz;
+ i->Pin.Load.src = src;
+ i->Pin.Load.dst = dst;
+ vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
+ if (sz == 8) vassert(mode64);
+ return i;
+}
+PPCInstr* PPCInstr_LoadL ( UChar sz,
+ HReg dst, HReg src, Bool mode64 )
+{
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_LoadL;
+ i->Pin.LoadL.sz = sz;
+ i->Pin.LoadL.src = src;
+ i->Pin.LoadL.dst = dst;
+ vassert(sz == 4 || sz == 8);
+ if (sz == 8) vassert(mode64);
+ return i;
+}
+PPCInstr* PPCInstr_Store ( UChar sz, PPCAMode* dst, HReg src,
+ Bool mode64 ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_Store;
+ i->Pin.Store.sz = sz;
+ i->Pin.Store.src = src;
+ i->Pin.Store.dst = dst;
+ vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
+ if (sz == 8) vassert(mode64);
+ return i;
+}
+PPCInstr* PPCInstr_StoreC ( UChar sz, HReg dst, HReg src, Bool mode64 ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_StoreC;
+ i->Pin.StoreC.sz = sz;
+ i->Pin.StoreC.src = src;
+ i->Pin.StoreC.dst = dst;
+ vassert(sz == 4 || sz == 8);
+ if (sz == 8) vassert(mode64);
+ return i;
+}
+PPCInstr* PPCInstr_Set ( PPCCondCode cond, HReg dst ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_Set;
+ i->Pin.Set.cond = cond;
+ i->Pin.Set.dst = dst;
+ return i;
+}
+PPCInstr* PPCInstr_MfCR ( HReg dst )
+{
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_MfCR;
+ i->Pin.MfCR.dst = dst;
+ return i;
+}
+PPCInstr* PPCInstr_MFence ( void )
+{
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_MFence;
+ return i;
+}
+
+PPCInstr* PPCInstr_FpUnary ( PPCFpOp op, HReg dst, HReg src ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_FpUnary;
+ i->Pin.FpUnary.op = op;
+ i->Pin.FpUnary.dst = dst;
+ i->Pin.FpUnary.src = src;
+ return i;
+}
+PPCInstr* PPCInstr_FpBinary ( PPCFpOp op, HReg dst,
+ HReg srcL, HReg srcR ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_FpBinary;
+ i->Pin.FpBinary.op = op;
+ i->Pin.FpBinary.dst = dst;
+ i->Pin.FpBinary.srcL = srcL;
+ i->Pin.FpBinary.srcR = srcR;
+ return i;
+}
+PPCInstr* PPCInstr_FpMulAcc ( PPCFpOp op, HReg dst, HReg srcML,
+ HReg srcMR, HReg srcAcc )
+{
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_FpMulAcc;
+ i->Pin.FpMulAcc.op = op;
+ i->Pin.FpMulAcc.dst = dst;
+ i->Pin.FpMulAcc.srcML = srcML;
+ i->Pin.FpMulAcc.srcMR = srcMR;
+ i->Pin.FpMulAcc.srcAcc = srcAcc;
+ return i;
+}
+PPCInstr* PPCInstr_FpLdSt ( Bool isLoad, UChar sz,
+ HReg reg, PPCAMode* addr ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_FpLdSt;
+ i->Pin.FpLdSt.isLoad = isLoad;
+ i->Pin.FpLdSt.sz = sz;
+ i->Pin.FpLdSt.reg = reg;
+ i->Pin.FpLdSt.addr = addr;
+ vassert(sz == 4 || sz == 8);
+ return i;
+}
+PPCInstr* PPCInstr_FpSTFIW ( HReg addr, HReg data )
+{
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_FpSTFIW;
+ i->Pin.FpSTFIW.addr = addr;
+ i->Pin.FpSTFIW.data = data;
+ return i;
+}
+PPCInstr* PPCInstr_FpRSP ( HReg dst, HReg src ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_FpRSP;
+ i->Pin.FpRSP.dst = dst;
+ i->Pin.FpRSP.src = src;
+ return i;
+}
+PPCInstr* PPCInstr_FpCftI ( Bool fromI, Bool int32,
+ HReg dst, HReg src ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_FpCftI;
+ i->Pin.FpCftI.fromI = fromI;
+ i->Pin.FpCftI.int32 = int32;
+ i->Pin.FpCftI.dst = dst;
+ i->Pin.FpCftI.src = src;
+ vassert(!(int32 && fromI)); /* no such insn ("fcfiw"). */
+ return i;
+}
+PPCInstr* PPCInstr_FpCMov ( PPCCondCode cond, HReg dst, HReg src ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_FpCMov;
+ i->Pin.FpCMov.cond = cond;
+ i->Pin.FpCMov.dst = dst;
+ i->Pin.FpCMov.src = src;
+ vassert(cond.test != Pct_ALWAYS);
+ return i;
+}
+PPCInstr* PPCInstr_FpLdFPSCR ( HReg src ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_FpLdFPSCR;
+ i->Pin.FpLdFPSCR.src = src;
+ return i;
+}
+PPCInstr* PPCInstr_FpCmp ( HReg dst, HReg srcL, HReg srcR ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_FpCmp;
+ i->Pin.FpCmp.dst = dst;
+ i->Pin.FpCmp.srcL = srcL;
+ i->Pin.FpCmp.srcR = srcR;
+ return i;
+}
+
+/* Read/Write Link Register */
+PPCInstr* PPCInstr_RdWrLR ( Bool wrLR, HReg gpr ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_RdWrLR;
+ i->Pin.RdWrLR.wrLR = wrLR;
+ i->Pin.RdWrLR.gpr = gpr;
+ return i;
+}
+
+/* AltiVec */
+PPCInstr* PPCInstr_AvLdSt ( Bool isLoad, UChar sz,
+ HReg reg, PPCAMode* addr ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_AvLdSt;
+ i->Pin.AvLdSt.isLoad = isLoad;
+ i->Pin.AvLdSt.sz = sz;
+ i->Pin.AvLdSt.reg = reg;
+ i->Pin.AvLdSt.addr = addr;
+ return i;
+}
+PPCInstr* PPCInstr_AvUnary ( PPCAvOp op, HReg dst, HReg src ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_AvUnary;
+ i->Pin.AvUnary.op = op;
+ i->Pin.AvUnary.dst = dst;
+ i->Pin.AvUnary.src = src;
+ return i;
+}
+PPCInstr* PPCInstr_AvBinary ( PPCAvOp op, HReg dst,
+ HReg srcL, HReg srcR ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_AvBinary;
+ i->Pin.AvBinary.op = op;
+ i->Pin.AvBinary.dst = dst;
+ i->Pin.AvBinary.srcL = srcL;
+ i->Pin.AvBinary.srcR = srcR;
+ return i;
+}
+PPCInstr* PPCInstr_AvBin8x16 ( PPCAvOp op, HReg dst,
+ HReg srcL, HReg srcR ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_AvBin8x16;
+ i->Pin.AvBin8x16.op = op;
+ i->Pin.AvBin8x16.dst = dst;
+ i->Pin.AvBin8x16.srcL = srcL;
+ i->Pin.AvBin8x16.srcR = srcR;
+ return i;
+}
+PPCInstr* PPCInstr_AvBin16x8 ( PPCAvOp op, HReg dst,
+ HReg srcL, HReg srcR ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_AvBin16x8;
+ i->Pin.AvBin16x8.op = op;
+ i->Pin.AvBin16x8.dst = dst;
+ i->Pin.AvBin16x8.srcL = srcL;
+ i->Pin.AvBin16x8.srcR = srcR;
+ return i;
+}
+PPCInstr* PPCInstr_AvBin32x4 ( PPCAvOp op, HReg dst,
+ HReg srcL, HReg srcR ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_AvBin32x4;
+ i->Pin.AvBin32x4.op = op;
+ i->Pin.AvBin32x4.dst = dst;
+ i->Pin.AvBin32x4.srcL = srcL;
+ i->Pin.AvBin32x4.srcR = srcR;
+ return i;
+}
+PPCInstr* PPCInstr_AvBin32Fx4 ( PPCAvOp op, HReg dst,
+ HReg srcL, HReg srcR ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_AvBin32Fx4;
+ i->Pin.AvBin32Fx4.op = op;
+ i->Pin.AvBin32Fx4.dst = dst;
+ i->Pin.AvBin32Fx4.srcL = srcL;
+ i->Pin.AvBin32Fx4.srcR = srcR;
+ return i;
+}
+PPCInstr* PPCInstr_AvUn32Fx4 ( PPCAvOp op, HReg dst, HReg src ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_AvUn32Fx4;
+ i->Pin.AvUn32Fx4.op = op;
+ i->Pin.AvUn32Fx4.dst = dst;
+ i->Pin.AvUn32Fx4.src = src;
+ return i;
+}
+PPCInstr* PPCInstr_AvPerm ( HReg dst, HReg srcL, HReg srcR, HReg ctl ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_AvPerm;
+ i->Pin.AvPerm.dst = dst;
+ i->Pin.AvPerm.srcL = srcL;
+ i->Pin.AvPerm.srcR = srcR;
+ i->Pin.AvPerm.ctl = ctl;
+ return i;
+}
+PPCInstr* PPCInstr_AvSel ( HReg ctl, HReg dst, HReg srcL, HReg srcR ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_AvSel;
+ i->Pin.AvSel.ctl = ctl;
+ i->Pin.AvSel.dst = dst;
+ i->Pin.AvSel.srcL = srcL;
+ i->Pin.AvSel.srcR = srcR;
+ return i;
+}
+PPCInstr* PPCInstr_AvShlDbl ( UChar shift, HReg dst,
+ HReg srcL, HReg srcR ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_AvShlDbl;
+ i->Pin.AvShlDbl.shift = shift;
+ i->Pin.AvShlDbl.dst = dst;
+ i->Pin.AvShlDbl.srcL = srcL;
+ i->Pin.AvShlDbl.srcR = srcR;
+ return i;
+}
+PPCInstr* PPCInstr_AvSplat ( UChar sz, HReg dst, PPCVI5s* src ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_AvSplat;
+ i->Pin.AvSplat.sz = sz;
+ i->Pin.AvSplat.dst = dst;
+ i->Pin.AvSplat.src = src;
+ return i;
+}
+PPCInstr* PPCInstr_AvCMov ( PPCCondCode cond, HReg dst, HReg src ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_AvCMov;
+ i->Pin.AvCMov.cond = cond;
+ i->Pin.AvCMov.dst = dst;
+ i->Pin.AvCMov.src = src;
+ vassert(cond.test != Pct_ALWAYS);
+ return i;
+}
+PPCInstr* PPCInstr_AvLdVSCR ( HReg src ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_AvLdVSCR;
+ i->Pin.AvLdVSCR.src = src;
+ return i;
+}
+
+
+/* Pretty Print instructions */
+static void ppLoadImm ( HReg dst, ULong imm, Bool mode64 ) {
+ vex_printf("li_word ");
+ ppHRegPPC(dst);
+ if (!mode64) {
+ vex_printf(",0x%08x", (UInt)imm);
+ } else {
+ vex_printf(",0x%016llx", imm);
+ }
+}
+
+static void ppMovReg ( HReg dst, HReg src ) {
+ if (hregNumber(dst) != hregNumber(src)) {
+ vex_printf("mr ");
+ ppHRegPPC(dst);
+ vex_printf(",");
+ ppHRegPPC(src);
+ }
+}
+
+void ppPPCInstr ( PPCInstr* i, Bool mode64 )
+{
+ switch (i->tag) {
+ case Pin_LI:
+ ppLoadImm(i->Pin.LI.dst, i->Pin.LI.imm64, mode64);
+ break;
+ case Pin_Alu: {
+ HReg r_srcL = i->Pin.Alu.srcL;
+ PPCRH* rh_srcR = i->Pin.Alu.srcR;
+ /* special-case "mr" */
+ if (i->Pin.Alu.op == Palu_OR && // or Rd,Rs,Rs == mr Rd,Rs
+ rh_srcR->tag == Prh_Reg &&
+ rh_srcR->Prh.Reg.reg == r_srcL) {
+ vex_printf("mr ");
+ ppHRegPPC(i->Pin.Alu.dst);
+ vex_printf(",");
+ ppHRegPPC(r_srcL);
+ return;
+ }
+ /* special-case "li" */
+ if (i->Pin.Alu.op == Palu_ADD && // addi Rd,0,imm == li Rd,imm
+ rh_srcR->tag == Prh_Imm &&
+ hregNumber(r_srcL) == 0) {
+ vex_printf("li ");
+ ppHRegPPC(i->Pin.Alu.dst);
+ vex_printf(",");
+ ppPPCRH(rh_srcR);
+ return;
+ }
+ /* generic */
+ vex_printf("%s ", showPPCAluOp(i->Pin.Alu.op,
+ toBool(rh_srcR->tag == Prh_Imm)));
+ ppHRegPPC(i->Pin.Alu.dst);
+ vex_printf(",");
+ ppHRegPPC(r_srcL);
+ vex_printf(",");
+ ppPPCRH(rh_srcR);
+ return;
+ }
+ case Pin_Shft: {
+ HReg r_srcL = i->Pin.Shft.srcL;
+ PPCRH* rh_srcR = i->Pin.Shft.srcR;
+ vex_printf("%s ", showPPCShftOp(i->Pin.Shft.op,
+ toBool(rh_srcR->tag == Prh_Imm),
+ i->Pin.Shft.sz32));
+ ppHRegPPC(i->Pin.Shft.dst);
+ vex_printf(",");
+ ppHRegPPC(r_srcL);
+ vex_printf(",");
+ ppPPCRH(rh_srcR);
+ return;
+ }
+ case Pin_AddSubC:
+ vex_printf("%s%s ",
+ i->Pin.AddSubC.isAdd ? "add" : "sub",
+ i->Pin.AddSubC.setC ? "c" : "e");
+ ppHRegPPC(i->Pin.AddSubC.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AddSubC.srcL);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AddSubC.srcR);
+ return;
+ case Pin_Cmp:
+ vex_printf("%s%c%s %%cr%u,",
+ i->Pin.Cmp.syned ? "cmp" : "cmpl",
+ i->Pin.Cmp.sz32 ? 'w' : 'd',
+ i->Pin.Cmp.srcR->tag == Prh_Imm ? "i" : "",
+ i->Pin.Cmp.crfD);
+ ppHRegPPC(i->Pin.Cmp.srcL);
+ vex_printf(",");
+ ppPPCRH(i->Pin.Cmp.srcR);
+ return;
+ case Pin_Unary:
+ vex_printf("%s ", showPPCUnaryOp(i->Pin.Unary.op));
+ ppHRegPPC(i->Pin.Unary.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.Unary.src);
+ return;
+ case Pin_MulL:
+ vex_printf("mul%c%c%s ",
+ i->Pin.MulL.hi ? 'h' : 'l',
+ i->Pin.MulL.sz32 ? 'w' : 'd',
+ i->Pin.MulL.hi ? (i->Pin.MulL.syned ? "s" : "u") : "");
+ ppHRegPPC(i->Pin.MulL.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.MulL.srcL);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.MulL.srcR);
+ return;
+ case Pin_Div:
+ vex_printf("div%c%s ",
+ i->Pin.Div.sz32 ? 'w' : 'd',
+ i->Pin.Div.syned ? "" : "u");
+ ppHRegPPC(i->Pin.Div.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.Div.srcL);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.Div.srcR);
+ return;
+ case Pin_Call: {
+ Int n;
+ vex_printf("call: ");
+ if (i->Pin.Call.cond.test != Pct_ALWAYS) {
+ vex_printf("if (%s) ", showPPCCondCode(i->Pin.Call.cond));
+ }
+ vex_printf("{ ");
+ ppLoadImm(hregPPC_GPR10(mode64), i->Pin.Call.target, mode64);
+ vex_printf(" ; mtctr r10 ; bctrl [");
+ for (n = 0; n < 32; n++) {
+ if (i->Pin.Call.argiregs & (1<<n)) {
+ vex_printf("r%d", n);
+ if ((i->Pin.Call.argiregs >> n) > 1)
+ vex_printf(",");
+ }
+ }
+ vex_printf("] }");
+ break;
+ }
+ case Pin_Goto:
+ vex_printf("goto: ");
+ if (i->Pin.Goto.cond.test != Pct_ALWAYS) {
+ vex_printf("if (%s) ", showPPCCondCode(i->Pin.Goto.cond));
+ }
+ vex_printf("{ ");
+ if (i->Pin.Goto.jk != Ijk_Boring
+ && i->Pin.Goto.jk != Ijk_Call
+ && i->Pin.Goto.jk != Ijk_Ret) {
+ vex_printf("li %%r31,$");
+ ppIRJumpKind(i->Pin.Goto.jk);
+ vex_printf(" ; ");
+ }
+ if (i->Pin.Goto.dst->tag == Pri_Imm) {
+ ppLoadImm(hregPPC_GPR3(mode64), i->Pin.Goto.dst->Pri.Imm,
+ mode64);
+ } else {
+ ppMovReg(hregPPC_GPR3(mode64), i->Pin.Goto.dst->Pri.Reg);
+ }
+ vex_printf(" ; blr }");
+ return;
+ case Pin_CMov:
+ vex_printf("cmov (%s) ", showPPCCondCode(i->Pin.CMov.cond));
+ ppHRegPPC(i->Pin.CMov.dst);
+ vex_printf(",");
+ ppPPCRI(i->Pin.CMov.src);
+ vex_printf(": ");
+ if (i->Pin.CMov.cond.test != Pct_ALWAYS) {
+ vex_printf("if (%s) ", showPPCCondCode(i->Pin.CMov.cond));
+ }
+ vex_printf("{ ");
+ if (i->Pin.CMov.src->tag == Pri_Imm) {
+ ppLoadImm(i->Pin.CMov.dst, i->Pin.CMov.src->Pri.Imm, mode64);
+ } else {
+ ppMovReg(i->Pin.CMov.dst, i->Pin.CMov.src->Pri.Reg);
+ }
+ vex_printf(" }");
+ return;
+ case Pin_Load: {
+ Bool idxd = toBool(i->Pin.Load.src->tag == Pam_RR);
+ UChar sz = i->Pin.Load.sz;
+ UChar c_sz = sz==1 ? 'b' : sz==2 ? 'h' : sz==4 ? 'w' : 'd';
+ vex_printf("l%c%s%s ", c_sz, sz==8 ? "" : "z", idxd ? "x" : "" );
+ ppHRegPPC(i->Pin.Load.dst);
+ vex_printf(",");
+ ppPPCAMode(i->Pin.Load.src);
+ return;
+ }
+ case Pin_LoadL:
+ vex_printf("l%carx ", i->Pin.LoadL.sz==4 ? 'w' : 'd');
+ ppHRegPPC(i->Pin.LoadL.dst);
+ vex_printf(",%%r0,");
+ ppHRegPPC(i->Pin.LoadL.src);
+ return;
+ case Pin_Store: {
+ UChar sz = i->Pin.Store.sz;
+ Bool idxd = toBool(i->Pin.Store.dst->tag == Pam_RR);
+ UChar c_sz = sz==1 ? 'b' : sz==2 ? 'h' : sz==4 ? 'w' : /*8*/ 'd';
+ vex_printf("st%c%s ", c_sz, idxd ? "x" : "" );
+ ppHRegPPC(i->Pin.Store.src);
+ vex_printf(",");
+ ppPPCAMode(i->Pin.Store.dst);
+ return;
+ }
+ case Pin_StoreC:
+ vex_printf("st%ccx. ", i->Pin.StoreC.sz==4 ? 'w' : 'd');
+ ppHRegPPC(i->Pin.StoreC.src);
+ vex_printf(",%%r0,");
+ ppHRegPPC(i->Pin.StoreC.dst);
+ return;
+ case Pin_Set: {
+ PPCCondCode cc = i->Pin.Set.cond;
+ vex_printf("set (%s),", showPPCCondCode(cc));
+ ppHRegPPC(i->Pin.Set.dst);
+ if (cc.test == Pct_ALWAYS) {
+ vex_printf(": { li ");
+ ppHRegPPC(i->Pin.Set.dst);
+ vex_printf(",1 }");
+ } else {
+ vex_printf(": { mfcr r0 ; rlwinm ");
+ ppHRegPPC(i->Pin.Set.dst);
+ vex_printf(",r0,%u,31,31", cc.flag+1);
+ if (cc.test == Pct_FALSE) {
+ vex_printf("; xori ");
+ ppHRegPPC(i->Pin.Set.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.Set.dst);
+ vex_printf(",1");
+ }
+ vex_printf(" }");
+ }
+ return;
+ }
+ case Pin_MfCR:
+ vex_printf("mfcr ");
+ ppHRegPPC(i->Pin.MfCR.dst);
+ break;
+ case Pin_MFence:
+ vex_printf("mfence (=sync)");
+ return;
+
+ case Pin_FpUnary:
+ vex_printf("%s ", showPPCFpOp(i->Pin.FpUnary.op));
+ ppHRegPPC(i->Pin.FpUnary.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.FpUnary.src);
+ return;
+ case Pin_FpBinary:
+ vex_printf("%s ", showPPCFpOp(i->Pin.FpBinary.op));
+ ppHRegPPC(i->Pin.FpBinary.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.FpBinary.srcL);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.FpBinary.srcR);
+ return;
+ case Pin_FpMulAcc:
+ vex_printf("%s ", showPPCFpOp(i->Pin.FpMulAcc.op));
+ ppHRegPPC(i->Pin.FpMulAcc.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.FpMulAcc.srcML);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.FpMulAcc.srcMR);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.FpMulAcc.srcAcc);
+ return;
+ case Pin_FpLdSt: {
+ UChar sz = i->Pin.FpLdSt.sz;
+ Bool idxd = toBool(i->Pin.FpLdSt.addr->tag == Pam_RR);
+ if (i->Pin.FpLdSt.isLoad) {
+ vex_printf("lf%c%s ",
+ (sz==4 ? 's' : 'd'),
+ idxd ? "x" : "" );
+ ppHRegPPC(i->Pin.FpLdSt.reg);
+ vex_printf(",");
+ ppPPCAMode(i->Pin.FpLdSt.addr);
+ } else {
+ vex_printf("stf%c%s ",
+ (sz==4 ? 's' : 'd'),
+ idxd ? "x" : "" );
+ ppHRegPPC(i->Pin.FpLdSt.reg);
+ vex_printf(",");
+ ppPPCAMode(i->Pin.FpLdSt.addr);
+ }
+ return;
+ }
+ case Pin_FpSTFIW:
+ vex_printf("stfiwz ");
+ ppHRegPPC(i->Pin.FpSTFIW.data);
+ vex_printf(",0(");
+ ppHRegPPC(i->Pin.FpSTFIW.addr);
+ vex_printf(")");
+ return;
+ case Pin_FpRSP:
+ vex_printf("frsp ");
+ ppHRegPPC(i->Pin.FpRSP.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.FpRSP.src);
+ return;
+ case Pin_FpCftI: {
+ HChar* str = "fc???";
+ if (i->Pin.FpCftI.fromI == False && i->Pin.FpCftI.int32 == False)
+ str = "fctid";
+ else
+ if (i->Pin.FpCftI.fromI == False && i->Pin.FpCftI.int32 == True)
+ str = "fctiw";
+ else
+ if (i->Pin.FpCftI.fromI == True && i->Pin.FpCftI.int32 == False)
+ str = "fcfid";
+ vex_printf("%s ", str);
+ ppHRegPPC(i->Pin.FpCftI.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.FpCftI.src);
+ return;
+ }
+ case Pin_FpCMov:
+ vex_printf("fpcmov (%s) ", showPPCCondCode(i->Pin.FpCMov.cond));
+ ppHRegPPC(i->Pin.FpCMov.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.FpCMov.src);
+ vex_printf(": ");
+ vex_printf("if (fr_dst != fr_src) { ");
+ if (i->Pin.FpCMov.cond.test != Pct_ALWAYS) {
+ vex_printf("if (%s) { ", showPPCCondCode(i->Pin.FpCMov.cond));
+ }
+ vex_printf("fmr ");
+ ppHRegPPC(i->Pin.FpCMov.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.FpCMov.src);
+ if (i->Pin.FpCMov.cond.test != Pct_ALWAYS)
+ vex_printf(" }");
+ vex_printf(" }");
+ return;
+ case Pin_FpLdFPSCR:
+ vex_printf("mtfsf 0xFF,");
+ ppHRegPPC(i->Pin.FpLdFPSCR.src);
+ return;
+ case Pin_FpCmp:
+ vex_printf("fcmpo %%cr1,");
+ ppHRegPPC(i->Pin.FpCmp.srcL);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.FpCmp.srcR);
+ vex_printf("; mfcr ");
+ ppHRegPPC(i->Pin.FpCmp.dst);
+ vex_printf("; rlwinm ");
+ ppHRegPPC(i->Pin.FpCmp.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.FpCmp.dst);
+ vex_printf(",8,28,31");
+ return;
+
+ case Pin_RdWrLR:
+ vex_printf("%s ", i->Pin.RdWrLR.wrLR ? "mtlr" : "mflr");
+ ppHRegPPC(i->Pin.RdWrLR.gpr);
+ return;
+
+ case Pin_AvLdSt: {
+ UChar sz = i->Pin.AvLdSt.sz;
+ HChar* str_size;
+ if (i->Pin.AvLdSt.addr->tag == Pam_IR) {
+ ppLoadImm(hregPPC_GPR30(mode64),
+ i->Pin.AvLdSt.addr->Pam.RR.index, mode64);
+ vex_printf(" ; ");
+ }
+ str_size = sz==1 ? "eb" : sz==2 ? "eh" : sz==4 ? "ew" : "";
+ if (i->Pin.AvLdSt.isLoad)
+ vex_printf("lv%sx ", str_size);
+ else
+ vex_printf("stv%sx ", str_size);
+ ppHRegPPC(i->Pin.AvLdSt.reg);
+ vex_printf(",");
+ if (i->Pin.AvLdSt.addr->tag == Pam_IR)
+ vex_printf("%%r30");
+ else
+ ppHRegPPC(i->Pin.AvLdSt.addr->Pam.RR.index);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvLdSt.addr->Pam.RR.base);
+ return;
+ }
+ case Pin_AvUnary:
+ vex_printf("%s ", showPPCAvOp(i->Pin.AvUnary.op));
+ ppHRegPPC(i->Pin.AvUnary.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvUnary.src);
+ return;
+ case Pin_AvBinary:
+ vex_printf("%s ", showPPCAvOp(i->Pin.AvBinary.op));
+ ppHRegPPC(i->Pin.AvBinary.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvBinary.srcL);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvBinary.srcR);
+ return;
+ case Pin_AvBin8x16:
+ vex_printf("%s(b) ", showPPCAvOp(i->Pin.AvBin8x16.op));
+ ppHRegPPC(i->Pin.AvBin8x16.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvBin8x16.srcL);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvBin8x16.srcR);
+ return;
+ case Pin_AvBin16x8:
+ vex_printf("%s(h) ", showPPCAvOp(i->Pin.AvBin16x8.op));
+ ppHRegPPC(i->Pin.AvBin16x8.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvBin16x8.srcL);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvBin16x8.srcR);
+ return;
+ case Pin_AvBin32x4:
+ vex_printf("%s(w) ", showPPCAvOp(i->Pin.AvBin32x4.op));
+ ppHRegPPC(i->Pin.AvBin32x4.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvBin32x4.srcL);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvBin32x4.srcR);
+ return;
+ case Pin_AvBin32Fx4:
+ vex_printf("%s ", showPPCAvFpOp(i->Pin.AvBin32Fx4.op));
+ ppHRegPPC(i->Pin.AvBin32Fx4.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvBin32Fx4.srcL);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvBin32Fx4.srcR);
+ return;
+ case Pin_AvUn32Fx4:
+ vex_printf("%s ", showPPCAvFpOp(i->Pin.AvUn32Fx4.op));
+ ppHRegPPC(i->Pin.AvUn32Fx4.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvUn32Fx4.src);
+ return;
+ case Pin_AvPerm:
+ vex_printf("vperm ");
+ ppHRegPPC(i->Pin.AvPerm.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvPerm.srcL);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvPerm.srcR);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvPerm.ctl);
+ return;
+
+ case Pin_AvSel:
+ vex_printf("vsel ");
+ ppHRegPPC(i->Pin.AvSel.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvSel.srcL);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvSel.srcR);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvSel.ctl);
+ return;
+
+ case Pin_AvShlDbl:
+ vex_printf("vsldoi ");
+ ppHRegPPC(i->Pin.AvShlDbl.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvShlDbl.srcL);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvShlDbl.srcR);
+ vex_printf(",%d", i->Pin.AvShlDbl.shift);
+ return;
+
+ case Pin_AvSplat: {
+ UChar sz = i->Pin.AvSplat.sz;
+ UChar ch_sz = toUChar( (sz == 8) ? 'b' : (sz == 16) ? 'h' : 'w' );
+ vex_printf("vsplt%s%c ",
+ i->Pin.AvSplat.src->tag == Pvi_Imm ? "is" : "", ch_sz);
+ ppHRegPPC(i->Pin.AvSplat.dst);
+ vex_printf(",");
+ ppPPCVI5s(i->Pin.AvSplat.src);
+ if (i->Pin.AvSplat.src->tag == Pvi_Reg)
+ vex_printf(", %d", (128/sz)-1); /* louis lane */
+ return;
+ }
+
+ case Pin_AvCMov:
+ vex_printf("avcmov (%s) ", showPPCCondCode(i->Pin.AvCMov.cond));
+ ppHRegPPC(i->Pin.AvCMov.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvCMov.src);
+ vex_printf(": ");
+ vex_printf("if (v_dst != v_src) { ");
+ if (i->Pin.AvCMov.cond.test != Pct_ALWAYS) {
+ vex_printf("if (%s) { ", showPPCCondCode(i->Pin.AvCMov.cond));
+ }
+ vex_printf("vmr ");
+ ppHRegPPC(i->Pin.AvCMov.dst);
+ vex_printf(",");
+ ppHRegPPC(i->Pin.AvCMov.src);
+ if (i->Pin.FpCMov.cond.test != Pct_ALWAYS)
+ vex_printf(" }");
+ vex_printf(" }");
+ return;
+
+ case Pin_AvLdVSCR:
+ vex_printf("mtvscr ");
+ ppHRegPPC(i->Pin.AvLdVSCR.src);
+ return;
+
+ default:
+ vex_printf("\nppPPCInstr: No such tag(%d)\n", (Int)i->tag);
+ vpanic("ppPPCInstr");
+ }
+}
+
+/* --------- Helpers for register allocation. --------- */
+
+void getRegUsage_PPCInstr ( HRegUsage* u, PPCInstr* i, Bool mode64 )
+{
+ initHRegUsage(u);
+ switch (i->tag) {
+ case Pin_LI:
+ addHRegUse(u, HRmWrite, i->Pin.LI.dst);
+ break;
+ case Pin_Alu:
+ addHRegUse(u, HRmRead, i->Pin.Alu.srcL);
+ addRegUsage_PPCRH(u, i->Pin.Alu.srcR);
+ addHRegUse(u, HRmWrite, i->Pin.Alu.dst);
+ return;
+ case Pin_Shft:
+ addHRegUse(u, HRmRead, i->Pin.Shft.srcL);
+ addRegUsage_PPCRH(u, i->Pin.Shft.srcR);
+ addHRegUse(u, HRmWrite, i->Pin.Shft.dst);
+ return;
+ case Pin_AddSubC:
+ addHRegUse(u, HRmWrite, i->Pin.AddSubC.dst);
+ addHRegUse(u, HRmRead, i->Pin.AddSubC.srcL);
+ addHRegUse(u, HRmRead, i->Pin.AddSubC.srcR);
+ return;
+ case Pin_Cmp:
+ addHRegUse(u, HRmRead, i->Pin.Cmp.srcL);
+ addRegUsage_PPCRH(u, i->Pin.Cmp.srcR);
+ return;
+ case Pin_Unary:
+ addHRegUse(u, HRmWrite, i->Pin.Unary.dst);
+ addHRegUse(u, HRmRead, i->Pin.Unary.src);
+ return;
+ case Pin_MulL:
+ addHRegUse(u, HRmWrite, i->Pin.MulL.dst);
+ addHRegUse(u, HRmRead, i->Pin.MulL.srcL);
+ addHRegUse(u, HRmRead, i->Pin.MulL.srcR);
+ return;
+ case Pin_Div:
+ addHRegUse(u, HRmWrite, i->Pin.Div.dst);
+ addHRegUse(u, HRmRead, i->Pin.Div.srcL);
+ addHRegUse(u, HRmRead, i->Pin.Div.srcR);
+ return;
+ case Pin_Call: {
+ UInt argir;
+ /* This is a bit subtle. */
+ /* First off, claim it trashes all the caller-saved regs
+ which fall within the register allocator's jurisdiction.
+ These I believe to be:
+ mode32: r3 to r12
+ mode64: r3 to r10
+ */
+ /* XXXXXXXXXXXXXXXXX BUG! This doesn't say anything about the FP
+ or Altivec registers. We get away with this ONLY because
+ getAllocatableRegs_PPC gives the allocator callee-saved fp
+ and Altivec regs, and no caller-save ones. */
+ addHRegUse(u, HRmWrite, hregPPC_GPR3(mode64));
+ addHRegUse(u, HRmWrite, hregPPC_GPR4(mode64));
+ addHRegUse(u, HRmWrite, hregPPC_GPR5(mode64));
+ addHRegUse(u, HRmWrite, hregPPC_GPR6(mode64));
+ addHRegUse(u, HRmWrite, hregPPC_GPR7(mode64));
+ addHRegUse(u, HRmWrite, hregPPC_GPR8(mode64));
+ addHRegUse(u, HRmWrite, hregPPC_GPR9(mode64));
+ addHRegUse(u, HRmWrite, hregPPC_GPR10(mode64));
+ if (!mode64) {
+ addHRegUse(u, HRmWrite, hregPPC_GPR11(mode64));
+ addHRegUse(u, HRmWrite, hregPPC_GPR12(mode64));
+ }
+
+ /* Now we have to state any parameter-carrying registers
+ which might be read. This depends on the argiregs field. */
+ argir = i->Pin.Call.argiregs;
+ if (argir &(1<<10)) addHRegUse(u, HRmRead, hregPPC_GPR10(mode64));
+ if (argir & (1<<9)) addHRegUse(u, HRmRead, hregPPC_GPR9(mode64));
+ if (argir & (1<<8)) addHRegUse(u, HRmRead, hregPPC_GPR8(mode64));
+ if (argir & (1<<7)) addHRegUse(u, HRmRead, hregPPC_GPR7(mode64));
+ if (argir & (1<<6)) addHRegUse(u, HRmRead, hregPPC_GPR6(mode64));
+ if (argir & (1<<5)) addHRegUse(u, HRmRead, hregPPC_GPR5(mode64));
+ if (argir & (1<<4)) addHRegUse(u, HRmRead, hregPPC_GPR4(mode64));
+ if (argir & (1<<3)) addHRegUse(u, HRmRead, hregPPC_GPR3(mode64));
+
+ vassert(0 == (argir & ~((1<<3)|(1<<4)|(1<<5)|(1<<6)
+ |(1<<7)|(1<<8)|(1<<9)|(1<<10))));
+
+ /* Finally, there is the issue that the insn trashes a
+ register because the literal target address has to be
+ loaded into a register. %r10 seems a suitable victim.
+ (Can't use %r0, as some insns interpret it as value zero). */
+ addHRegUse(u, HRmWrite, hregPPC_GPR10(mode64));
+ /* Upshot of this is that the assembler really must use %r10,
+ and no other, as a destination temporary. */
+ return;
+ }
+ case Pin_Goto:
+ addRegUsage_PPCRI(u, i->Pin.Goto.dst);
+ /* GPR3 holds destination address from Pin_Goto */
+ addHRegUse(u, HRmWrite, hregPPC_GPR3(mode64));
+ if (i->Pin.Goto.jk != Ijk_Boring
+ && i->Pin.Goto.jk != Ijk_Call
+ && i->Pin.Goto.jk != Ijk_Ret)
+ /* note, this is irrelevant since the guest state pointer
+ register is not actually available to the allocator.
+ But still .. */
+ addHRegUse(u, HRmWrite, GuestStatePtr(mode64));
+ return;
+ case Pin_CMov:
+ addRegUsage_PPCRI(u, i->Pin.CMov.src);
+ addHRegUse(u, HRmWrite, i->Pin.CMov.dst);
+ return;
+ case Pin_Load:
+ addRegUsage_PPCAMode(u, i->Pin.Load.src);
+ addHRegUse(u, HRmWrite, i->Pin.Load.dst);
+ return;
+ case Pin_LoadL:
+ addHRegUse(u, HRmRead, i->Pin.LoadL.src);
+ addHRegUse(u, HRmWrite, i->Pin.LoadL.dst);
+ return;
+ case Pin_Store:
+ addHRegUse(u, HRmRead, i->Pin.Store.src);
+ addRegUsage_PPCAMode(u, i->Pin.Store.dst);
+ return;
+ case Pin_StoreC:
+ addHRegUse(u, HRmRead, i->Pin.StoreC.src);
+ addHRegUse(u, HRmRead, i->Pin.StoreC.dst);
+ return;
+ case Pin_Set:
+ addHRegUse(u, HRmWrite, i->Pin.Set.dst);
+ return;
+ case Pin_MfCR:
+ addHRegUse(u, HRmWrite, i->Pin.MfCR.dst);
+ return;
+ case Pin_MFence:
+ return;
+
+ case Pin_FpUnary:
+ addHRegUse(u, HRmWrite, i->Pin.FpUnary.dst);
+ addHRegUse(u, HRmRead, i->Pin.FpUnary.src);
+ return;
+ case Pin_FpBinary:
+ addHRegUse(u, HRmWrite, i->Pin.FpBinary.dst);
+ addHRegUse(u, HRmRead, i->Pin.FpBinary.srcL);
+ addHRegUse(u, HRmRead, i->Pin.FpBinary.srcR);
+ return;
+ case Pin_FpMulAcc:
+ addHRegUse(u, HRmWrite, i->Pin.FpMulAcc.dst);
+ addHRegUse(u, HRmRead, i->Pin.FpMulAcc.srcML);
+ addHRegUse(u, HRmRead, i->Pin.FpMulAcc.srcMR);
+ addHRegUse(u, HRmRead, i->Pin.FpMulAcc.srcAcc);
+ return;
+ case Pin_FpLdSt:
+ addHRegUse(u, (i->Pin.FpLdSt.isLoad ? HRmWrite : HRmRead),
+ i->Pin.FpLdSt.reg);
+ addRegUsage_PPCAMode(u, i->Pin.FpLdSt.addr);
+ return;
+ case Pin_FpSTFIW:
+ addHRegUse(u, HRmRead, i->Pin.FpSTFIW.addr);
+ addHRegUse(u, HRmRead, i->Pin.FpSTFIW.data);
+ return;
+ case Pin_FpRSP:
+ addHRegUse(u, HRmWrite, i->Pin.FpRSP.dst);
+ addHRegUse(u, HRmRead, i->Pin.FpRSP.src);
+ return;
+ case Pin_FpCftI:
+ addHRegUse(u, HRmWrite, i->Pin.FpCftI.dst);
+ addHRegUse(u, HRmRead, i->Pin.FpCftI.src);
+ return;
+ case Pin_FpCMov:
+ addHRegUse(u, HRmModify, i->Pin.FpCMov.dst);
+ addHRegUse(u, HRmRead, i->Pin.FpCMov.src);
+ return;
+ case Pin_FpLdFPSCR:
+ addHRegUse(u, HRmRead, i->Pin.FpLdFPSCR.src);
+ return;
+ case Pin_FpCmp:
+ addHRegUse(u, HRmWrite, i->Pin.FpCmp.dst);
+ addHRegUse(u, HRmRead, i->Pin.FpCmp.srcL);
+ addHRegUse(u, HRmRead, i->Pin.FpCmp.srcR);
+ return;
+
+ case Pin_RdWrLR:
+ addHRegUse(u, (i->Pin.RdWrLR.wrLR ? HRmRead : HRmWrite),
+ i->Pin.RdWrLR.gpr);
+ return;
+
+ case Pin_AvLdSt:
+ addHRegUse(u, (i->Pin.AvLdSt.isLoad ? HRmWrite : HRmRead),
+ i->Pin.AvLdSt.reg);
+ if (i->Pin.AvLdSt.addr->tag == Pam_IR)
+ addHRegUse(u, HRmWrite, hregPPC_GPR30(mode64));
+ addRegUsage_PPCAMode(u, i->Pin.AvLdSt.addr);
+ return;
+ case Pin_AvUnary:
+ addHRegUse(u, HRmWrite, i->Pin.AvUnary.dst);
+ addHRegUse(u, HRmRead, i->Pin.AvUnary.src);
+ return;
+ case Pin_AvBinary:
+ if (i->Pin.AvBinary.op == Pav_XOR
+ && i->Pin.AvBinary.dst == i->Pin.AvBinary.srcL
+ && i->Pin.AvBinary.dst == i->Pin.AvBinary.srcR) {
+ /* reg-alloc needs to understand 'xor r,r,r' as a write of r */
+ /* (as opposed to a rite of passage :-) */
+ addHRegUse(u, HRmWrite, i->Pin.AvBinary.dst);
+ } else {
+ addHRegUse(u, HRmWrite, i->Pin.AvBinary.dst);
+ addHRegUse(u, HRmRead, i->Pin.AvBinary.srcL);
+ addHRegUse(u, HRmRead, i->Pin.AvBinary.srcR);
+ }
+ return;
+ case Pin_AvBin8x16:
+ addHRegUse(u, HRmWrite, i->Pin.AvBin8x16.dst);
+ addHRegUse(u, HRmRead, i->Pin.AvBin8x16.srcL);
+ addHRegUse(u, HRmRead, i->Pin.AvBin8x16.srcR);
+ return;
+ case Pin_AvBin16x8:
+ addHRegUse(u, HRmWrite, i->Pin.AvBin16x8.dst);
+ addHRegUse(u, HRmRead, i->Pin.AvBin16x8.srcL);
+ addHRegUse(u, HRmRead, i->Pin.AvBin16x8.srcR);
+ return;
+ case Pin_AvBin32x4:
+ addHRegUse(u, HRmWrite, i->Pin.AvBin32x4.dst);
+ addHRegUse(u, HRmRead, i->Pin.AvBin32x4.srcL);
+ addHRegUse(u, HRmRead, i->Pin.AvBin32x4.srcR);
+ return;
+ case Pin_AvBin32Fx4:
+ addHRegUse(u, HRmWrite, i->Pin.AvBin32Fx4.dst);
+ addHRegUse(u, HRmRead, i->Pin.AvBin32Fx4.srcL);
+ addHRegUse(u, HRmRead, i->Pin.AvBin32Fx4.srcR);
+ if (i->Pin.AvBin32Fx4.op == Pavfp_MULF)
+ addHRegUse(u, HRmWrite, hregPPC_VR29());
+ return;
+ case Pin_AvUn32Fx4:
+ addHRegUse(u, HRmWrite, i->Pin.AvUn32Fx4.dst);
+ addHRegUse(u, HRmRead, i->Pin.AvUn32Fx4.src);
+ return;
+ case Pin_AvPerm:
+ addHRegUse(u, HRmWrite, i->Pin.AvPerm.dst);
+ addHRegUse(u, HRmRead, i->Pin.AvPerm.srcL);
+ addHRegUse(u, HRmRead, i->Pin.AvPerm.srcR);
+ addHRegUse(u, HRmRead, i->Pin.AvPerm.ctl);
+ return;
+ case Pin_AvSel:
+ addHRegUse(u, HRmWrite, i->Pin.AvSel.dst);
+ addHRegUse(u, HRmRead, i->Pin.AvSel.ctl);
+ addHRegUse(u, HRmRead, i->Pin.AvSel.srcL);
+ addHRegUse(u, HRmRead, i->Pin.AvSel.srcR);
+ return;
+ case Pin_AvShlDbl:
+ addHRegUse(u, HRmWrite, i->Pin.AvShlDbl.dst);
+ addHRegUse(u, HRmRead, i->Pin.AvShlDbl.srcL);
+ addHRegUse(u, HRmRead, i->Pin.AvShlDbl.srcR);
+ return;
+ case Pin_AvSplat:
+ addHRegUse(u, HRmWrite, i->Pin.AvSplat.dst);
+ addRegUsage_PPCVI5s(u, i->Pin.AvSplat.src);
+ return;
+ case Pin_AvCMov:
+ addHRegUse(u, HRmModify, i->Pin.AvCMov.dst);
+ addHRegUse(u, HRmRead, i->Pin.AvCMov.src);
+ return;
+ case Pin_AvLdVSCR:
+ addHRegUse(u, HRmRead, i->Pin.AvLdVSCR.src);
+ return;
+
+ default:
+ ppPPCInstr(i, mode64);
+ vpanic("getRegUsage_PPCInstr");
+ }
+}
+
+/* local helper */
+static void mapReg( HRegRemap* m, HReg* r )
+{
+ *r = lookupHRegRemap(m, *r);
+}
+
+void mapRegs_PPCInstr ( HRegRemap* m, PPCInstr* i, Bool mode64 )
+{
+ switch (i->tag) {
+ case Pin_LI:
+ mapReg(m, &i->Pin.LI.dst);
+ return;
+ case Pin_Alu:
+ mapReg(m, &i->Pin.Alu.dst);
+ mapReg(m, &i->Pin.Alu.srcL);
+ mapRegs_PPCRH(m, i->Pin.Alu.srcR);
+ return;
+ case Pin_Shft:
+ mapReg(m, &i->Pin.Shft.dst);
+ mapReg(m, &i->Pin.Shft.srcL);
+ mapRegs_PPCRH(m, i->Pin.Shft.srcR);
+ return;
+ case Pin_AddSubC:
+ mapReg(m, &i->Pin.AddSubC.dst);
+ mapReg(m, &i->Pin.AddSubC.srcL);
+ mapReg(m, &i->Pin.AddSubC.srcR);
+ return;
+ case Pin_Cmp:
+ mapReg(m, &i->Pin.Cmp.srcL);
+ mapRegs_PPCRH(m, i->Pin.Cmp.srcR);
+ return;
+ case Pin_Unary:
+ mapReg(m, &i->Pin.Unary.dst);
+ mapReg(m, &i->Pin.Unary.src);
+ return;
+ case Pin_MulL:
+ mapReg(m, &i->Pin.MulL.dst);
+ mapReg(m, &i->Pin.MulL.srcL);
+ mapReg(m, &i->Pin.MulL.srcR);
+ return;
+ case Pin_Div:
+ mapReg(m, &i->Pin.Div.dst);
+ mapReg(m, &i->Pin.Div.srcL);
+ mapReg(m, &i->Pin.Div.srcR);
+ return;
+ case Pin_Call:
+ return;
+ case Pin_Goto:
+ mapRegs_PPCRI(m, i->Pin.Goto.dst);
+ return;
+ case Pin_CMov:
+ mapRegs_PPCRI(m, i->Pin.CMov.src);
+ mapReg(m, &i->Pin.CMov.dst);
+ return;
+ case Pin_Load:
+ mapRegs_PPCAMode(m, i->Pin.Load.src);
+ mapReg(m, &i->Pin.Load.dst);
+ return;
+ case Pin_LoadL:
+ mapReg(m, &i->Pin.LoadL.src);
+ mapReg(m, &i->Pin.LoadL.dst);
+ return;
+ case Pin_Store:
+ mapReg(m, &i->Pin.Store.src);
+ mapRegs_PPCAMode(m, i->Pin.Store.dst);
+ return;
+ case Pin_StoreC:
+ mapReg(m, &i->Pin.StoreC.src);
+ mapReg(m, &i->Pin.StoreC.dst);
+ return;
+ case Pin_Set:
+ mapReg(m, &i->Pin.Set.dst);
+ return;
+ case Pin_MfCR:
+ mapReg(m, &i->Pin.MfCR.dst);
+ return;
+ case Pin_MFence:
+ return;
+ case Pin_FpUnary:
+ mapReg(m, &i->Pin.FpUnary.dst);
+ mapReg(m, &i->Pin.FpUnary.src);
+ return;
+ case Pin_FpBinary:
+ mapReg(m, &i->Pin.FpBinary.dst);
+ mapReg(m, &i->Pin.FpBinary.srcL);
+ mapReg(m, &i->Pin.FpBinary.srcR);
+ return;
+ case Pin_FpMulAcc:
+ mapReg(m, &i->Pin.FpMulAcc.dst);
+ mapReg(m, &i->Pin.FpMulAcc.srcML);
+ mapReg(m, &i->Pin.FpMulAcc.srcMR);
+ mapReg(m, &i->Pin.FpMulAcc.srcAcc);
+ return;
+ case Pin_FpLdSt:
+ mapReg(m, &i->Pin.FpLdSt.reg);
+ mapRegs_PPCAMode(m, i->Pin.FpLdSt.addr);
+ return;
+ case Pin_FpSTFIW:
+ mapReg(m, &i->Pin.FpSTFIW.addr);
+ mapReg(m, &i->Pin.FpSTFIW.data);
+ return;
+ case Pin_FpRSP:
+ mapReg(m, &i->Pin.FpRSP.dst);
+ mapReg(m, &i->Pin.FpRSP.src);
+ return;
+ case Pin_FpCftI:
+ mapReg(m, &i->Pin.FpCftI.dst);
+ mapReg(m, &i->Pin.FpCftI.src);
+ return;
+ case Pin_FpCMov:
+ mapReg(m, &i->Pin.FpCMov.dst);
+ mapReg(m, &i->Pin.FpCMov.src);
+ return;
+ case Pin_FpLdFPSCR:
+ mapReg(m, &i->Pin.FpLdFPSCR.src);
+ return;
+ case Pin_FpCmp:
+ mapReg(m, &i->Pin.FpCmp.dst);
+ mapReg(m, &i->Pin.FpCmp.srcL);
+ mapReg(m, &i->Pin.FpCmp.srcR);
+ return;
+ case Pin_RdWrLR:
+ mapReg(m, &i->Pin.RdWrLR.gpr);
+ return;
+ case Pin_AvLdSt:
+ mapReg(m, &i->Pin.AvLdSt.reg);
+ mapRegs_PPCAMode(m, i->Pin.AvLdSt.addr);
+ return;
+ case Pin_AvUnary:
+ mapReg(m, &i->Pin.AvUnary.dst);
+ mapReg(m, &i->Pin.AvUnary.src);
+ return;
+ case Pin_AvBinary:
+ mapReg(m, &i->Pin.AvBinary.dst);
+ mapReg(m, &i->Pin.AvBinary.srcL);
+ mapReg(m, &i->Pin.AvBinary.srcR);
+ return;
+ case Pin_AvBin8x16:
+ mapReg(m, &i->Pin.AvBin8x16.dst);
+ mapReg(m, &i->Pin.AvBin8x16.srcL);
+ mapReg(m, &i->Pin.AvBin8x16.srcR);
+ return;
+ case Pin_AvBin16x8:
+ mapReg(m, &i->Pin.AvBin16x8.dst);
+ mapReg(m, &i->Pin.AvBin16x8.srcL);
+ mapReg(m, &i->Pin.AvBin16x8.srcR);
+ return;
+ case Pin_AvBin32x4:
+ mapReg(m, &i->Pin.AvBin32x4.dst);
+ mapReg(m, &i->Pin.AvBin32x4.srcL);
+ mapReg(m, &i->Pin.AvBin32x4.srcR);
+ return;
+ case Pin_AvBin32Fx4:
+ mapReg(m, &i->Pin.AvBin32Fx4.dst);
+ mapReg(m, &i->Pin.AvBin32Fx4.srcL);
+ mapReg(m, &i->Pin.AvBin32Fx4.srcR);
+ return;
+ case Pin_AvUn32Fx4:
+ mapReg(m, &i->Pin.AvUn32Fx4.dst);
+ mapReg(m, &i->Pin.AvUn32Fx4.src);
+ return;
+ case Pin_AvPerm:
+ mapReg(m, &i->Pin.AvPerm.dst);
+ mapReg(m, &i->Pin.AvPerm.srcL);
+ mapReg(m, &i->Pin.AvPerm.srcR);
+ mapReg(m, &i->Pin.AvPerm.ctl);
+ return;
+ case Pin_AvSel:
+ mapReg(m, &i->Pin.AvSel.dst);
+ mapReg(m, &i->Pin.AvSel.srcL);
+ mapReg(m, &i->Pin.AvSel.srcR);
+ mapReg(m, &i->Pin.AvSel.ctl);
+ return;
+ case Pin_AvShlDbl:
+ mapReg(m, &i->Pin.AvShlDbl.dst);
+ mapReg(m, &i->Pin.AvShlDbl.srcL);
+ mapReg(m, &i->Pin.AvShlDbl.srcR);
+ return;
+ case Pin_AvSplat:
+ mapReg(m, &i->Pin.AvSplat.dst);
+ mapRegs_PPCVI5s(m, i->Pin.AvSplat.src);
+ return;
+ case Pin_AvCMov:
+ mapReg(m, &i->Pin.AvCMov.dst);
+ mapReg(m, &i->Pin.AvCMov.src);
+ return;
+ case Pin_AvLdVSCR:
+ mapReg(m, &i->Pin.AvLdVSCR.src);
+ return;
+
+ default:
+ ppPPCInstr(i, mode64);
+ vpanic("mapRegs_PPCInstr");
+ }
+}
+
+/* Figure out if i represents a reg-reg move, and if so assign the
+ source and destination to *src and *dst. If in doubt say No. Used
+ by the register allocator to do move coalescing.
+*/
+Bool isMove_PPCInstr ( PPCInstr* i, HReg* src, HReg* dst )
+{
+ /* Moves between integer regs */
+ if (i->tag == Pin_Alu) {
+ // or Rd,Rs,Rs == mr Rd,Rs
+ if (i->Pin.Alu.op != Palu_OR)
+ return False;
+ if (i->Pin.Alu.srcR->tag != Prh_Reg)
+ return False;
+ if (i->Pin.Alu.srcR->Prh.Reg.reg != i->Pin.Alu.srcL)
+ return False;
+ *src = i->Pin.Alu.srcL;
+ *dst = i->Pin.Alu.dst;
+ return True;
+ }
+ /* Moves between FP regs */
+ if (i->tag == Pin_FpUnary) {
+ if (i->Pin.FpUnary.op != Pfp_MOV)
+ return False;
+ *src = i->Pin.FpUnary.src;
+ *dst = i->Pin.FpUnary.dst;
+ return True;
+ }
+ return False;
+}
+
+
+/* Generate ppc spill/reload instructions under the direction of the
+ register allocator. Note it's critical these don't write the
+ condition codes. */
+
+void genSpill_PPC ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
+ HReg rreg, Int offsetB, Bool mode64 )
+{
+ PPCAMode* am;
+ vassert(!hregIsVirtual(rreg));
+ *i1 = *i2 = NULL;
+ am = PPCAMode_IR( offsetB, GuestStatePtr(mode64) );
+ switch (hregClass(rreg)) {
+ case HRcInt64:
+ vassert(mode64);
+ *i1 = PPCInstr_Store( 8, am, rreg, mode64 );
+ return;
+ case HRcInt32:
+ vassert(!mode64);
+ *i1 = PPCInstr_Store( 4, am, rreg, mode64 );
+ return;
+ case HRcFlt64:
+ *i1 = PPCInstr_FpLdSt ( False/*store*/, 8, rreg, am );
+ return;
+ case HRcVec128:
+ // XXX: GPR30 used as spill register to kludge AltiVec
+ // AMode_IR
+ *i1 = PPCInstr_AvLdSt ( False/*store*/, 16, rreg, am );
+ return;
+ default:
+ ppHRegClass(hregClass(rreg));
+ vpanic("genSpill_PPC: unimplemented regclass");
+ }
+}
+
+void genReload_PPC ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
+ HReg rreg, Int offsetB, Bool mode64 )
+{
+ PPCAMode* am;
+ vassert(!hregIsVirtual(rreg));
+ *i1 = *i2 = NULL;
+ am = PPCAMode_IR( offsetB, GuestStatePtr(mode64) );
+ switch (hregClass(rreg)) {
+ case HRcInt64:
+ vassert(mode64);
+ *i1 = PPCInstr_Load( 8, rreg, am, mode64 );
+ return;
+ case HRcInt32:
+ vassert(!mode64);
+ *i1 = PPCInstr_Load( 4, rreg, am, mode64 );
+ return;
+ case HRcFlt64:
+ *i1 = PPCInstr_FpLdSt ( True/*load*/, 8, rreg, am );
+ return;
+ case HRcVec128:
+ // XXX: GPR30 used as spill register to kludge AltiVec AMode_IR
+ *i1 = PPCInstr_AvLdSt ( True/*load*/, 16, rreg, am );
+ return;
+ default:
+ ppHRegClass(hregClass(rreg));
+ vpanic("genReload_PPC: unimplemented regclass");
+ }
+}
+
+
+/* --------- The ppc assembler (bleh.) --------- */
+
+static UInt iregNo ( HReg r, Bool mode64 )
+{
+ UInt n;
+ vassert(hregClass(r) == mode64 ? HRcInt64 : HRcInt32);
+ vassert(!hregIsVirtual(r));
+ n = hregNumber(r);
+ vassert(n <= 32);
+ return n;
+}
+
+static UInt fregNo ( HReg fr )
+{
+ UInt n;
+ vassert(hregClass(fr) == HRcFlt64);
+ vassert(!hregIsVirtual(fr));
+ n = hregNumber(fr);
+ vassert(n <= 32);
+ return n;
+}
+
+static UInt vregNo ( HReg v )
+{
+ UInt n;
+ vassert(hregClass(v) == HRcVec128);
+ vassert(!hregIsVirtual(v));
+ n = hregNumber(v);
+ vassert(n <= 32);
+ return n;
+}
+
+/* Emit 32bit instruction big-endianly */
+static UChar* emit32 ( UChar* p, UInt w32 )
+{
+ *p++ = toUChar((w32 >> 24) & 0x000000FF);
+ *p++ = toUChar((w32 >> 16) & 0x000000FF);
+ *p++ = toUChar((w32 >> 8) & 0x000000FF);
+ *p++ = toUChar((w32) & 0x000000FF);
+ return p;
+}
+
+/* The following mkForm[...] functions refer to ppc instruction forms
+ as per PPC32 p576
+ */
+
+static UChar* mkFormD ( UChar* p, UInt opc1,
+ UInt r1, UInt r2, UInt imm )
+{
+ UInt theInstr;
+ vassert(opc1 < 0x40);
+ vassert(r1 < 0x20);
+ vassert(r2 < 0x20);
+ imm = imm & 0xFFFF;
+ theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) | (imm));
+ return emit32(p, theInstr);
+}
+
+static UChar* mkFormMD ( UChar* p, UInt opc1, UInt r1, UInt r2,
+ UInt imm1, UInt imm2, UInt opc2 )
+{
+ UInt theInstr;
+ vassert(opc1 < 0x40);
+ vassert(r1 < 0x20);
+ vassert(r2 < 0x20);
+ vassert(imm1 < 0x40);
+ vassert(imm2 < 0x40);
+ vassert(opc2 < 0x08);
+ imm2 = ((imm2 & 0x1F) << 1) | (imm2 >> 5);
+ theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) |
+ ((imm1 & 0x1F)<<11) | (imm2<<5) |
+ (opc2<<2) | ((imm1 >> 5)<<1));
+ return emit32(p, theInstr);
+}
+
+static UChar* mkFormX ( UChar* p, UInt opc1, UInt r1, UInt r2,
+ UInt r3, UInt opc2, UInt b0 )
+{
+ UInt theInstr;
+ vassert(opc1 < 0x40);
+ vassert(r1 < 0x20);
+ vassert(r2 < 0x20);
+ vassert(r3 < 0x20);
+ vassert(opc2 < 0x400);
+ vassert(b0 < 0x2);
+ theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) |
+ (r3<<11) | (opc2<<1) | (b0));
+ return emit32(p, theInstr);
+}
+
+static UChar* mkFormXO ( UChar* p, UInt opc1, UInt r1, UInt r2,
+ UInt r3, UInt b10, UInt opc2, UInt b0 )
+{
+ UInt theInstr;
+ vassert(opc1 < 0x40);
+ vassert(r1 < 0x20);
+ vassert(r2 < 0x20);
+ vassert(r3 < 0x20);
+ vassert(b10 < 0x2);
+ vassert(opc2 < 0x200);
+ vassert(b0 < 0x2);
+ theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) |
+ (r3<<11) | (b10 << 10) | (opc2<<1) | (b0));
+ return emit32(p, theInstr);
+}
+
+static UChar* mkFormXL ( UChar* p, UInt opc1, UInt f1, UInt f2,
+ UInt f3, UInt opc2, UInt b0 )
+{
+ UInt theInstr;
+ vassert(opc1 < 0x40);
+ vassert(f1 < 0x20);
+ vassert(f2 < 0x20);
+ vassert(f3 < 0x20);
+ vassert(opc2 < 0x400);
+ vassert(b0 < 0x2);
+ theInstr = ((opc1<<26) | (f1<<21) | (f2<<16) |
+ (f3<<11) | (opc2<<1) | (b0));
+ return emit32(p, theInstr);
+}
+
+// Note: for split field ops, give mnemonic arg
+static UChar* mkFormXFX ( UChar* p, UInt r1, UInt f2, UInt opc2 )
+{
+ UInt theInstr;
+ vassert(r1 < 0x20);
+ vassert(f2 < 0x20);
+ vassert(opc2 < 0x400);
+ switch (opc2) {
+ case 144: // mtcrf
+ vassert(f2 < 0x100);
+ f2 = f2 << 1;
+ break;
+ case 339: // mfspr
+ case 371: // mftb
+ case 467: // mtspr
+ vassert(f2 < 0x400);
+ // re-arrange split field
+ f2 = ((f2>>5) & 0x1F) | ((f2 & 0x1F)<<5);
+ break;
+ default: vpanic("mkFormXFX(ppch)");
+ }
+ theInstr = ((31<<26) | (r1<<21) | (f2<<11) | (opc2<<1));
+ return emit32(p, theInstr);
+}
+
+// Only used by mtfsf
+static UChar* mkFormXFL ( UChar* p, UInt FM, UInt freg )
+{
+ UInt theInstr;
+ vassert(FM < 0x100);
+ vassert(freg < 0x20);
+ theInstr = ((63<<26) | (FM<<17) | (freg<<11) | (711<<1));
+ return emit32(p, theInstr);
+}
+
+static UChar* mkFormXS ( UChar* p, UInt opc1, UInt r1, UInt r2,
+ UInt imm, UInt opc2, UInt b0 )
+{
+ UInt theInstr;
+ vassert(opc1 < 0x40);
+ vassert(r1 < 0x20);
+ vassert(r2 < 0x20);
+ vassert(imm < 0x40);
+ vassert(opc2 < 0x400);
+ vassert(b0 < 0x2);
+ theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) |
+ ((imm & 0x1F)<<11) | (opc2<<2) | ((imm>>5)<<1) | (b0));
+ return emit32(p, theInstr);
+}
+
+
+#if 0
+// 'b'
+static UChar* mkFormI ( UChar* p, UInt LI, UInt AA, UInt LK )
+{
+ UInt theInstr;
+ vassert(LI < 0x1000000);
+ vassert(AA < 0x2);
+ vassert(LK < 0x2);
+ theInstr = ((18<<26) | (LI<<2) | (AA<<1) | (LK));
+ return emit32(p, theInstr);
+}
+#endif
+
+// 'bc'
+static UChar* mkFormB ( UChar* p, UInt BO, UInt BI,
+ UInt BD, UInt AA, UInt LK )
+{
+ UInt theInstr;
+ vassert(BO < 0x20);
+ vassert(BI < 0x20);
+ vassert(BD < 0x4000);
+ vassert(AA < 0x2);
+ vassert(LK < 0x2);
+ theInstr = ((16<<26) | (BO<<21) | (BI<<16) |
+ (BD<<2) | (AA<<1) | (LK));
+ return emit32(p, theInstr);
+}
+
+// rotates
+static UChar* mkFormM ( UChar* p, UInt opc1, UInt r1, UInt r2,
+ UInt f3, UInt MB, UInt ME, UInt Rc )
+{
+ UInt theInstr;
+ vassert(opc1 < 0x40);
+ vassert(r1 < 0x20);
+ vassert(r2 < 0x20);
+ vassert(f3 < 0x20);
+ vassert(MB < 0x20);
+ vassert(ME < 0x20);
+ vassert(Rc < 0x2);
+ theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) |
+ (f3<<11) | (MB<<6) | (ME<<1) | (Rc));
+ return emit32(p, theInstr);
+}
+
+static UChar* mkFormA ( UChar* p, UInt opc1, UInt r1, UInt r2,
+ UInt r3, UInt r4, UInt opc2, UInt b0 )
+{
+ UInt theInstr;
+ vassert(opc1 < 0x40);
+ vassert(r1 < 0x20);
+ vassert(r2 < 0x20);
+ vassert(r3 < 0x20);
+ vassert(r4 < 0x20);
+ vassert(opc2 < 0x20);
+ vassert(b0 < 0x2 );
+ theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) | (r3<<11) |
+ (r4<<6) | (opc2<<1) | (b0));
+ return emit32(p, theInstr);
+}
+
+static UChar* doAMode_IR ( UChar* p, UInt opc1, UInt rSD,
+ PPCAMode* am, Bool mode64 )
+{
+ UInt rA, idx;
+ vassert(am->tag == Pam_IR);
+ vassert(am->Pam.IR.index < 0x10000);
+
+ rA = iregNo(am->Pam.IR.base, mode64);
+ idx = am->Pam.IR.index;
+
+ if (opc1 == 58 || opc1 == 62) { // ld/std: mode64 only
+ vassert(mode64);
+ /* stay sane with DS form: lowest 2 bits must be 00. This
+ should be guaranteed to us by iselWordExpr_AMode. */
+ vassert(0 == (idx & 3));
+ }
+ p = mkFormD(p, opc1, rSD, rA, idx);
+ return p;
+}
+
+static UChar* doAMode_RR ( UChar* p, UInt opc1, UInt opc2,
+ UInt rSD, PPCAMode* am, Bool mode64 )
+{
+ UInt rA, rB;
+ vassert(am->tag == Pam_RR);
+
+ rA = iregNo(am->Pam.RR.base, mode64);
+ rB = iregNo(am->Pam.RR.index, mode64);
+
+ p = mkFormX(p, opc1, rSD, rA, rB, opc2, 0);
+ return p;
+}
+
+
+/* Load imm to r_dst */
+static UChar* mkLoadImm ( UChar* p, UInt r_dst, ULong imm, Bool mode64 )
+{
+ vassert(r_dst < 0x20);
+
+ if (!mode64) {
+ /* In 32-bit mode, make sure the top 32 bits of imm are a sign
+ extension of the bottom 32 bits, so that the range tests
+ below work correctly. */
+ UInt u32 = (UInt)imm;
+ Int s32 = (Int)u32;
+ Long s64 = (Long)s32;
+ imm = (ULong)s64;
+ }
+
+ if (imm >= 0xFFFFFFFFFFFF8000ULL || imm < 0x8000) {
+ // sign-extendable from 16 bits
+
+ // addi r_dst,0,imm => li r_dst,imm
+ p = mkFormD(p, 14, r_dst, 0, imm & 0xFFFF);
+ } else {
+ if (imm >= 0xFFFFFFFF80000000ULL || imm < 0x80000000ULL) {
+ // sign-extendable from 32 bits
+
+ // addis r_dst,r0,(imm>>16) => lis r_dst, (imm>>16)
+ p = mkFormD(p, 15, r_dst, 0, (imm>>16) & 0xFFFF);
+ // ori r_dst, r_dst, (imm & 0xFFFF)
+ p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF);
+ } else {
+ // full 64bit immediate load: 5 (five!) insns.
+ vassert(mode64);
+
+ // load high word
+
+ // lis r_dst, (imm>>48) & 0xFFFF
+ p = mkFormD(p, 15, r_dst, 0, (imm>>48) & 0xFFFF);
+
+ // ori r_dst, r_dst, (imm>>32) & 0xFFFF
+ if ((imm>>32) & 0xFFFF)
+ p = mkFormD(p, 24, r_dst, r_dst, (imm>>32) & 0xFFFF);
+
+ // shift r_dst low word to high word => rldicr
+ p = mkFormMD(p, 30, r_dst, r_dst, 32, 31, 1);
+
+ // load low word
+
+ // oris r_dst, r_dst, (imm>>16) & 0xFFFF
+ if ((imm>>16) & 0xFFFF)
+ p = mkFormD(p, 25, r_dst, r_dst, (imm>>16) & 0xFFFF);
+
+ // ori r_dst, r_dst, (imm) & 0xFFFF
+ if (imm & 0xFFFF)
+ p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF);
+ }
+ }
+ return p;
+}
+
+/* Move r_dst to r_src */
+static UChar* mkMoveReg ( UChar* p, UInt r_dst, UInt r_src )
+{
+ vassert(r_dst < 0x20);
+ vassert(r_src < 0x20);
+
+ if (r_dst != r_src) {
+ /* or r_dst, r_src, r_src */
+ p = mkFormX(p, 31, r_src, r_dst, r_src, 444, 0 );
+ }
+ return p;
+}
+
+static UChar* mkFormVX ( UChar* p, UInt opc1, UInt r1, UInt r2,
+ UInt r3, UInt opc2 )
+{
+ UInt theInstr;
+ vassert(opc1 < 0x40);
+ vassert(r1 < 0x20);
+ vassert(r2 < 0x20);
+ vassert(r3 < 0x20);
+ vassert(opc2 < 0x800);
+ theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) | (r3<<11) | opc2);
+ return emit32(p, theInstr);
+}
+
+static UChar* mkFormVXR ( UChar* p, UInt opc1, UInt r1, UInt r2,
+ UInt r3, UInt Rc, UInt opc2 )
+{
+ UInt theInstr;
+ vassert(opc1 < 0x40);
+ vassert(r1 < 0x20);
+ vassert(r2 < 0x20);
+ vassert(r3 < 0x20);
+ vassert(Rc < 0x2);
+ vassert(opc2 < 0x400);
+ theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) |
+ (r3<<11) | (Rc<<10) | opc2);
+ return emit32(p, theInstr);
+}
+
+static UChar* mkFormVA ( UChar* p, UInt opc1, UInt r1, UInt r2,
+ UInt r3, UInt r4, UInt opc2 )
+{
+ UInt theInstr;
+ vassert(opc1 < 0x40);
+ vassert(r1 < 0x20);
+ vassert(r2 < 0x20);
+ vassert(r3 < 0x20);
+ vassert(r4 < 0x20);
+ vassert(opc2 < 0x40);
+ theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) |
+ (r3<<11) | (r4<<6) | opc2);
+ return emit32(p, theInstr);
+}
+
+
+
+/* Emit an instruction into buf and return the number of bytes used.
+ Note that buf is not the insn's final place, and therefore it is
+ imperative to emit position-independent code.
+
+ Note, dispatch should always be NULL since ppc32/64 backends
+ use a call-return scheme to get from the dispatcher to generated
+ code and back.
+*/
+Int emit_PPCInstr ( UChar* buf, Int nbuf, PPCInstr* i,
+ Bool mode64, void* dispatch )
+{
+ UChar* p = &buf[0];
+ UChar* ptmp = p;
+ vassert(nbuf >= 32);
+
+ if (0) {
+ vex_printf("asm ");ppPPCInstr(i, mode64); vex_printf("\n");
+ }
+
+ switch (i->tag) {
+
+ case Pin_LI:
+ p = mkLoadImm(p, iregNo(i->Pin.LI.dst, mode64),
+ i->Pin.LI.imm64, mode64);
+ goto done;
+
+ case Pin_Alu: {
+ PPCRH* srcR = i->Pin.Alu.srcR;
+ Bool immR = toBool(srcR->tag == Prh_Imm);
+ UInt r_dst = iregNo(i->Pin.Alu.dst, mode64);
+ UInt r_srcL = iregNo(i->Pin.Alu.srcL, mode64);
+ UInt r_srcR = immR ? (-1)/*bogus*/ :
+ iregNo(srcR->Prh.Reg.reg, mode64);
+
+ switch (i->Pin.Alu.op) {
+ case Palu_ADD:
+ if (immR) {
+ /* addi (PPC32 p350) */
+ vassert(srcR->Prh.Imm.syned);
+ vassert(srcR->Prh.Imm.imm16 != 0x8000);
+ p = mkFormD(p, 14, r_dst, r_srcL, srcR->Prh.Imm.imm16);
+ } else {
+ /* add (PPC32 p347) */
+ p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 266, 0);
+ }
+ break;
+
+ case Palu_SUB:
+ if (immR) {
+ /* addi (PPC32 p350), but with negated imm */
+ vassert(srcR->Prh.Imm.syned);
+ vassert(srcR->Prh.Imm.imm16 != 0x8000);
+ p = mkFormD(p, 14, r_dst, r_srcL, (- srcR->Prh.Imm.imm16));
+ } else {
+ /* subf (PPC32 p537), with args the "wrong" way round */
+ p = mkFormXO(p, 31, r_dst, r_srcR, r_srcL, 0, 40, 0);
+ }
+ break;
+
+ case Palu_AND:
+ if (immR) {
+ /* andi. (PPC32 p358) */
+ vassert(!srcR->Prh.Imm.syned);
+ p = mkFormD(p, 28, r_srcL, r_dst, srcR->Prh.Imm.imm16);
+ } else {
+ /* and (PPC32 p356) */
+ p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 28, 0);
+ }
+ break;
+
+ case Palu_OR:
+ if (immR) {
+ /* ori (PPC32 p497) */
+ vassert(!srcR->Prh.Imm.syned);
+ p = mkFormD(p, 24, r_srcL, r_dst, srcR->Prh.Imm.imm16);
+ } else {
+ /* or (PPC32 p495) */
+ p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 444, 0);
+ }
+ break;
+
+ case Palu_XOR:
+ if (immR) {
+ /* xori (PPC32 p550) */
+ vassert(!srcR->Prh.Imm.syned);
+ p = mkFormD(p, 26, r_srcL, r_dst, srcR->Prh.Imm.imm16);
+ } else {
+ /* xor (PPC32 p549) */
+ p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 316, 0);
+ }
+ break;
+
+ default:
+ goto bad;
+ }
+ goto done;
+ }
+
+ case Pin_Shft: {
+ PPCRH* srcR = i->Pin.Shft.srcR;
+ Bool sz32 = i->Pin.Shft.sz32;
+ Bool immR = toBool(srcR->tag == Prh_Imm);
+ UInt r_dst = iregNo(i->Pin.Shft.dst, mode64);
+ UInt r_srcL = iregNo(i->Pin.Shft.srcL, mode64);
+ UInt r_srcR = immR ? (-1)/*bogus*/ :
+ iregNo(srcR->Prh.Reg.reg, mode64);
+ if (!mode64)
+ vassert(sz32);
+
+ switch (i->Pin.Shft.op) {
+ case Pshft_SHL:
+ if (sz32) {
+ if (immR) {
+ /* rd = rs << n, 1 <= n <= 31
+ is
+ rlwinm rd,rs,n,0,31-n (PPC32 p501)
+ */
+ UInt n = srcR->Prh.Imm.imm16;
+ vassert(!srcR->Prh.Imm.syned);
+ vassert(n > 0 && n < 32);
+ p = mkFormM(p, 21, r_srcL, r_dst, n, 0, 31-n, 0);
+ } else {
+ /* slw (PPC32 p505) */
+ p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 24, 0);
+ }
+ } else {
+ if (immR) {
+ /* rd = rs << n, 1 <= n <= 63
+ is
+ rldicr rd,rs,n,63-n (PPC64 p559)
+ */
+ UInt n = srcR->Prh.Imm.imm16;
+ vassert(!srcR->Prh.Imm.syned);
+ vassert(n > 0 && n < 64);
+ p = mkFormMD(p, 30, r_srcL, r_dst, n, 63-n, 1);
+ } else {
+ /* sld (PPC64 p568) */
+ p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 27, 0);
+ }
+ }
+ break;
+
+ case Pshft_SHR:
+ if (sz32) {
+ if (immR) {
+ /* rd = rs >>u n, 1 <= n <= 31
+ is
+ rlwinm rd,rs,32-n,n,31 (PPC32 p501)
+ */
+ UInt n = srcR->Prh.Imm.imm16;
+ vassert(!srcR->Prh.Imm.syned);
+ vassert(n > 0 && n < 32);
+ p = mkFormM(p, 21, r_srcL, r_dst, 32-n, n, 31, 0);
+ } else {
+ /* srw (PPC32 p508) */
+ p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 536, 0);
+ }
+ } else {
+ if (immR) {
+ /* rd = rs >>u n, 1 <= n <= 63
+ is
+ rldicl rd,rs,64-n,n (PPC64 p558)
+ */
+ UInt n = srcR->Prh.Imm.imm16;
+ vassert(!srcR->Prh.Imm.syned);
+ vassert(n > 0 && n < 64);
+ p = mkFormMD(p, 30, r_srcL, r_dst, 64-n, n, 0);
+ } else {
+ /* srd (PPC64 p574) */
+ p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 539, 0);
+ }
+ }
+ break;
+
+ case Pshft_SAR:
+ if (sz32) {
+ if (immR) {
+ /* srawi (PPC32 p507) */
+ UInt n = srcR->Prh.Imm.imm16;
+ vassert(!srcR->Prh.Imm.syned);
+ /* In 64-bit mode, we allow right shifts by zero bits
+ as that is a handy way to sign extend the lower 32
+ bits into the upper 32 bits. */
+ if (mode64)
+ vassert(n >= 0 && n < 32);
+ else
+ vassert(n > 0 && n < 32);
+ p = mkFormX(p, 31, r_srcL, r_dst, n, 824, 0);
+ } else {
+ /* sraw (PPC32 p506) */
+ p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 792, 0);
+ }
+ } else {
+ if (immR) {
+ /* sradi (PPC64 p571) */
+ UInt n = srcR->Prh.Imm.imm16;
+ vassert(!srcR->Prh.Imm.syned);
+ vassert(n > 0 && n < 64);
+ p = mkFormXS(p, 31, r_srcL, r_dst, n, 413, 0);
+ } else {
+ /* srad (PPC32 p570) */
+ p = mkFormX(p, 31, r_srcL, r_dst, r_srcR, 794, 0);
+ }
+ }
+ break;
+
+ default:
+ goto bad;
+ }
+ goto done;
+ }
+
+ case Pin_AddSubC: {
+ Bool isAdd = i->Pin.AddSubC.isAdd;
+ Bool setC = i->Pin.AddSubC.setC;
+ UInt r_srcL = iregNo(i->Pin.AddSubC.srcL, mode64);
+ UInt r_srcR = iregNo(i->Pin.AddSubC.srcR, mode64);
+ UInt r_dst = iregNo(i->Pin.AddSubC.dst, mode64);
+
+ if (isAdd) {
+ if (setC) /* addc (PPC32 p348) */
+ p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 10, 0);
+ else /* adde (PPC32 p349) */
+ p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 138, 0);
+ } else {
+ /* subfX, with args the "wrong" way round */
+ if (setC) /* subfc (PPC32 p538) */
+ p = mkFormXO(p, 31, r_dst, r_srcR, r_srcL, 0, 8, 0);
+ else /* subfe (PPC32 p539) */
+ p = mkFormXO(p, 31, r_dst, r_srcR, r_srcL, 0, 136, 0);
+ }
+ goto done;
+ }
+
+ case Pin_Cmp: {
+ Bool syned = i->Pin.Cmp.syned;
+ Bool sz32 = i->Pin.Cmp.sz32;
+ UInt fld1 = i->Pin.Cmp.crfD << 2;
+ UInt r_srcL = iregNo(i->Pin.Cmp.srcL, mode64);
+ UInt r_srcR, imm_srcR;
+ PPCRH* srcR = i->Pin.Cmp.srcR;
+
+ if (!mode64) // cmp double word invalid for mode32
+ vassert(sz32);
+ else if (!sz32) // mode64 && cmp64: set L=1
+ fld1 |= 1;
+
+ switch (srcR->tag) {
+ case Prh_Imm:
+ vassert(syned == srcR->Prh.Imm.syned);
+ imm_srcR = srcR->Prh.Imm.imm16;
+ if (syned) { // cmpw/di (signed) (PPC32 p368)
+ vassert(imm_srcR != 0x8000);
+ p = mkFormD(p, 11, fld1, r_srcL, imm_srcR);
+ } else { // cmplw/di (unsigned) (PPC32 p370)
+ p = mkFormD(p, 10, fld1, r_srcL, imm_srcR);
+ }
+ break;
+ case Prh_Reg:
+ r_srcR = iregNo(srcR->Prh.Reg.reg, mode64);
+ if (syned) // cmpwi (signed) (PPC32 p367)
+ p = mkFormX(p, 31, fld1, r_srcL, r_srcR, 0, 0);
+ else // cmplwi (unsigned) (PPC32 p379)
+ p = mkFormX(p, 31, fld1, r_srcL, r_srcR, 32, 0);
+ break;
+ default:
+ goto bad;
+ }
+ goto done;
+ }
+
+ case Pin_Unary: {
+ UInt r_dst = iregNo(i->Pin.Unary.dst, mode64);
+ UInt r_src = iregNo(i->Pin.Unary.src, mode64);
+
+ switch (i->Pin.Unary.op) {
+ case Pun_NOT: // nor r_dst,r_src,r_src
+ p = mkFormX(p, 31, r_src, r_dst, r_src, 124, 0);
+ break;
+ case Pun_NEG: // neg r_dst,r_src
+ p = mkFormXO(p, 31, r_dst, r_src, 0, 0, 104, 0);
+ break;
+ case Pun_CLZ32: // cntlzw r_dst, r_src
+ p = mkFormX(p, 31, r_src, r_dst, 0, 26, 0);
+ break;
+ case Pun_CLZ64: // cntlzd r_dst, r_src
+ vassert(mode64);
+ p = mkFormX(p, 31, r_src, r_dst, 0, 58, 0);
+ break;
+ case Pun_EXTSW: // extsw r_dst, r_src
+ vassert(mode64);
+ p = mkFormX(p, 31, r_src, r_dst, 0, 986, 0);
+ break;
+ default: goto bad;
+ }
+ goto done;
+ }
+
+ case Pin_MulL: {
+ Bool syned = i->Pin.MulL.syned;
+ Bool sz32 = i->Pin.MulL.sz32;
+ UInt r_dst = iregNo(i->Pin.MulL.dst, mode64);
+ UInt r_srcL = iregNo(i->Pin.MulL.srcL, mode64);
+ UInt r_srcR = iregNo(i->Pin.MulL.srcR, mode64);
+
+ if (!mode64)
+ vassert(sz32);
+
+ if (i->Pin.MulL.hi) {
+ // mul hi words, must consider sign
+ if (sz32) {
+ if (syned) // mulhw r_dst,r_srcL,r_srcR
+ p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 75, 0);
+ else // mulhwu r_dst,r_srcL,r_srcR
+ p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 11, 0);
+ } else {
+ if (syned) // mulhd r_dst,r_srcL,r_srcR
+ p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 73, 0);
+ else // mulhdu r_dst,r_srcL,r_srcR
+ p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 9, 0);
+ }
+ } else {
+ // mul low word, sign is irrelevant
+ vassert(!i->Pin.MulL.syned);
+ if (sz32) // mullw r_dst,r_srcL,r_srcR
+ p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 235, 0);
+ else // mulld r_dst,r_srcL,r_srcR
+ p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 233, 0);
+ }
+ goto done;
+ }
+
+ case Pin_Div: {
+ Bool syned = i->Pin.Div.syned;
+ Bool sz32 = i->Pin.Div.sz32;
+ UInt r_dst = iregNo(i->Pin.Div.dst, mode64);
+ UInt r_srcL = iregNo(i->Pin.Div.srcL, mode64);
+ UInt r_srcR = iregNo(i->Pin.Div.srcR, mode64);
+
+ if (!mode64)
+ vassert(sz32);
+
+ if (sz32) {
+ if (syned) // divw r_dst,r_srcL,r_srcR
+ p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 491, 0);
+ else // divwu r_dst,r_srcL,r_srcR
+ p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 459, 0);
+ } else {
+ if (syned) // divd r_dst,r_srcL,r_srcR
+ p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 489, 0);
+ else // divdu r_dst,r_srcL,r_srcR
+ p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 457, 0);
+ }
+ goto done;
+ }
+
+ case Pin_Call: {
+ PPCCondCode cond = i->Pin.Call.cond;
+ UInt r_dst = 10;
+ /* As per detailed comment for Pin_Call in
+ getRegUsage_PPCInstr above, %r10 is used as an address temp */
+
+ /* jump over the following insns if condition does not hold */
+ if (cond.test != Pct_ALWAYS) {
+ /* jmp fwds if !condition */
+ /* don't know how many bytes to jump over yet...
+ make space for a jump instruction and fill in later. */
+ ptmp = p; /* fill in this bit later */
+ p += 4; // p += 4
+ }
+
+ /* load target to r_dst */ // p += 4|8|20
+ p = mkLoadImm(p, r_dst, i->Pin.Call.target, mode64);
+
+ /* mtspr 9,r_dst => move r_dst to count register */
+ p = mkFormXFX(p, r_dst, 9, 467); // p += 4
+
+ /* bctrl => branch to count register (and save to lr) */
+ p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 528, 1); // p += 4
+
+ /* Fix up the conditional jump, if there was one. */
+ if (cond.test != Pct_ALWAYS) {
+ Int delta = p - ptmp;
+ vassert(delta >= 16 && delta <= 32);
+ /* bc !ct,cf,delta */
+ mkFormB(ptmp, invertCondTest(cond.test),
+ cond.flag, (delta>>2), 0, 0);
+ }
+ goto done;
+ }
+
+ case Pin_Goto: {
+ UInt trc = 0;
+ UChar r_ret = 3; /* Put target addr into %r3 */
+ PPCCondCode cond = i->Pin.Goto.cond;
+ UInt r_dst;
+ ULong imm_dst;
+
+ vassert(dispatch == NULL);
+
+ /* First off, if this is conditional, create a conditional
+ jump over the rest of it. */
+ if (cond.test != Pct_ALWAYS) {
+ /* jmp fwds if !condition */
+ /* don't know how many bytes to jump over yet...
+ make space for a jump instruction and fill in later. */
+ ptmp = p; /* fill in this bit later */
+ p += 4;
+ }
+
+ // cond succeeds...
+
+ /* If a non-boring, set GuestStatePtr appropriately. */
+ switch (i->Pin.Goto.jk) {
+ case Ijk_ClientReq: trc = VEX_TRC_JMP_CLIENTREQ; break;
+ case Ijk_Sys_syscall: trc = VEX_TRC_JMP_SYS_SYSCALL; break;
+ case Ijk_Yield: trc = VEX_TRC_JMP_YIELD; break;
+ case Ijk_EmWarn: trc = VEX_TRC_JMP_EMWARN; break;
+ case Ijk_EmFail: trc = VEX_TRC_JMP_EMFAIL; break;
+ case Ijk_MapFail: trc = VEX_TRC_JMP_MAPFAIL; break;
+ case Ijk_NoDecode: trc = VEX_TRC_JMP_NODECODE; break;
+ case Ijk_TInval: trc = VEX_TRC_JMP_TINVAL; break;
+ case Ijk_NoRedir: trc = VEX_TRC_JMP_NOREDIR; break;
+ case Ijk_SigTRAP: trc = VEX_TRC_JMP_SIGTRAP; break;
+ case Ijk_SigBUS: trc = VEX_TRC_JMP_SIGBUS; break;
+ case Ijk_Ret:
+ case Ijk_Call:
+ case Ijk_Boring:
+ break;
+ default:
+ ppIRJumpKind(i->Pin.Goto.jk);
+ vpanic("emit_PPCInstr.Pin_Goto: unknown jump kind");
+ }
+ if (trc !=0) {
+ vassert(trc < 0x10000);
+ /* addi r31,0,trc */
+ p = mkFormD(p, 14, 31, 0, trc); // p += 4
+ }
+
+ /* Get the destination address into %r_ret */
+ if (i->Pin.Goto.dst->tag == Pri_Imm) {
+ imm_dst = i->Pin.Goto.dst->Pri.Imm;
+ p = mkLoadImm(p, r_ret, imm_dst, mode64); // p += 4|8|20
+ } else {
+ vassert(i->Pin.Goto.dst->tag == Pri_Reg);
+ r_dst = iregNo(i->Pin.Goto.dst->Pri.Reg, mode64);
+ p = mkMoveReg(p, r_ret, r_dst); // p += 4
+ }
+
+ /* blr */
+ p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 16, 0); // p += 4
+
+ /* Fix up the conditional jump, if there was one. */
+ if (cond.test != Pct_ALWAYS) {
+ Int delta = p - ptmp;
+ vassert(delta >= 12 && delta <= 32);
+ /* bc !ct,cf,delta */
+ mkFormB(ptmp, invertCondTest(cond.test),
+ cond.flag, delta>>2, 0, 0);
+ }
+ goto done;
+ }
+
+ case Pin_CMov: {
+ UInt r_dst, r_src;
+ ULong imm_src;
+ PPCCondCode cond;
+ vassert(i->Pin.CMov.cond.test != Pct_ALWAYS);
+
+ r_dst = iregNo(i->Pin.CMov.dst, mode64);
+ cond = i->Pin.CMov.cond;
+
+ /* branch (if cond fails) over move instrs */
+ if (cond.test != Pct_ALWAYS) {
+ /* don't know how many bytes to jump over yet...
+ make space for a jump instruction and fill in later. */
+ ptmp = p; /* fill in this bit later */
+ p += 4;
+ }
+
+ // cond true: move src => dst
+ switch (i->Pin.CMov.src->tag) {
+ case Pri_Imm:
+ imm_src = i->Pin.CMov.src->Pri.Imm;
+ p = mkLoadImm(p, r_dst, imm_src, mode64); // p += 4|8|20
+ break;
+ case Pri_Reg:
+ r_src = iregNo(i->Pin.CMov.src->Pri.Reg, mode64);
+ p = mkMoveReg(p, r_dst, r_src); // p += 4
+ break;
+ default: goto bad;
+ }
+
+ /* Fix up the conditional jump, if there was one. */
+ if (cond.test != Pct_ALWAYS) {
+ Int delta = p - ptmp;
+ vassert(delta >= 8 && delta <= 24);
+ /* bc !ct,cf,delta */
+ mkFormB(ptmp, invertCondTest(cond.test),
+ cond.flag, (delta>>2), 0, 0);
+ }
+ goto done;
+ }
+
+ case Pin_Load: {
+ PPCAMode* am_addr = i->Pin.Load.src;
+ UInt r_dst = iregNo(i->Pin.Load.dst, mode64);
+ UInt opc1, opc2, sz = i->Pin.Load.sz;
+ switch (am_addr->tag) {
+ case Pam_IR:
+ if (mode64 && (sz == 4 || sz == 8)) {
+ /* should be guaranteed to us by iselWordExpr_AMode */
+ vassert(0 == (am_addr->Pam.IR.index & 3));
+ }
+ switch(sz) {
+ case 1: opc1 = 34; break;
+ case 2: opc1 = 40; break;
+ case 4: opc1 = 32; break;
+ case 8: opc1 = 58; vassert(mode64); break;
+ default: goto bad;
+ }
+ p = doAMode_IR(p, opc1, r_dst, am_addr, mode64);
+ goto done;
+ case Pam_RR:
+ switch(sz) {
+ case 1: opc2 = 87; break;
+ case 2: opc2 = 279; break;
+ case 4: opc2 = 23; break;
+ case 8: opc2 = 21; vassert(mode64); break;
+ default: goto bad;
+ }
+ p = doAMode_RR(p, 31, opc2, r_dst, am_addr, mode64);
+ goto done;
+ default:
+ goto bad;
+ }
+ }
+
+ case Pin_LoadL: {
+ if (i->Pin.LoadL.sz == 4) {
+ p = mkFormX(p, 31, iregNo(i->Pin.LoadL.dst, mode64),
+ 0, iregNo(i->Pin.LoadL.src, mode64), 20, 0);
+ goto done;
+ }
+ if (i->Pin.LoadL.sz == 8 && mode64) {
+ p = mkFormX(p, 31, iregNo(i->Pin.LoadL.dst, mode64),
+ 0, iregNo(i->Pin.LoadL.src, mode64), 84, 0);
+ goto done;
+ }
+ goto bad;
+ }
+
+ case Pin_Set: {
+ /* Make the destination register be 1 or 0, depending on whether
+ the relevant condition holds. */
+ UInt r_dst = iregNo(i->Pin.Set.dst, mode64);
+ PPCCondCode cond = i->Pin.Set.cond;
+ UInt rot_imm, r_tmp;
+
+ if (cond.test == Pct_ALWAYS) {
+ // Just load 1 to dst => li dst,1
+ p = mkFormD(p, 14, r_dst, 0, 1);
+ } else {
+ rot_imm = 1 + cond.flag;
+ r_tmp = 0; // Not set in getAllocable, so no need to declare.
+
+ // r_tmp = CR => mfcr r_tmp
+ p = mkFormX(p, 31, r_tmp, 0, 0, 19, 0);
+
+ // r_dst = flag (rotate left and mask)
+ // => rlwinm r_dst,r_tmp,rot_imm,31,31
+ p = mkFormM(p, 21, r_tmp, r_dst, rot_imm, 31, 31, 0);
+
+ if (cond.test == Pct_FALSE) {
+ // flip bit => xori r_dst,r_dst,1
+ p = mkFormD(p, 26, r_dst, r_dst, 1);
+ }
+ }
+ goto done;
+ }
+
+ case Pin_MfCR:
+ // mfcr dst
+ p = mkFormX(p, 31, iregNo(i->Pin.MfCR.dst, mode64), 0, 0, 19, 0);
+ goto done;
+
+ case Pin_MFence: {
+ p = mkFormX(p, 31, 0, 0, 0, 598, 0); // sync, PPC32 p616
+ // CAB: Should this be isync?
+ // p = mkFormXL(p, 19, 0, 0, 0, 150, 0); // isync, PPC32 p467
+ goto done;
+ }
+
+ case Pin_Store: {
+ PPCAMode* am_addr = i->Pin.Store.dst;
+ UInt r_src = iregNo(i->Pin.Store.src, mode64);
+ UInt opc1, opc2, sz = i->Pin.Store.sz;
+ switch (i->Pin.Store.dst->tag) {
+ case Pam_IR:
+ if (mode64 && (sz == 4 || sz == 8)) {
+ /* should be guaranteed to us by iselWordExpr_AMode */
+ vassert(0 == (am_addr->Pam.IR.index & 3));
+ }
+ switch(sz) {
+ case 1: opc1 = 38; break;
+ case 2: opc1 = 44; break;
+ case 4: opc1 = 36; break;
+ case 8: vassert(mode64);
+ opc1 = 62; break;
+ default:
+ goto bad;
+ }
+ p = doAMode_IR(p, opc1, r_src, am_addr, mode64);
+ goto done;
+ case Pam_RR:
+ switch(sz) {
+ case 1: opc2 = 215; break;
+ case 2: opc2 = 407; break;
+ case 4: opc2 = 151; break;
+ case 8: vassert(mode64);
+ opc2 = 149; break;
+ default:
+ goto bad;
+ }
+ p = doAMode_RR(p, 31, opc2, r_src, am_addr, mode64);
+ goto done;
+ default:
+ goto bad;
+ }
+ goto done;
+ }
+
+ case Pin_StoreC: {
+ if (i->Pin.StoreC.sz == 4) {
+ p = mkFormX(p, 31, iregNo(i->Pin.StoreC.src, mode64),
+ 0, iregNo(i->Pin.StoreC.dst, mode64), 150, 1);
+ goto done;
+ }
+ if (i->Pin.StoreC.sz == 8 && mode64) {
+ p = mkFormX(p, 31, iregNo(i->Pin.StoreC.src, mode64),
+ 0, iregNo(i->Pin.StoreC.dst, mode64), 214, 1);
+ goto done;
+ }
+ goto bad;
+ }
+
+ case Pin_FpUnary: {
+ UInt fr_dst = fregNo(i->Pin.FpUnary.dst);
+ UInt fr_src = fregNo(i->Pin.FpUnary.src);
+ switch (i->Pin.FpUnary.op) {
+ case Pfp_RSQRTE: // frsqrtre, PPC32 p424
+ p = mkFormA( p, 63, fr_dst, 0, fr_src, 0, 26, 0 );
+ break;
+ case Pfp_RES: // fres, PPC32 p421
+ p = mkFormA( p, 59, fr_dst, 0, fr_src, 0, 24, 0 );
+ break;
+ case Pfp_SQRT: // fsqrt, PPC32 p427
+ p = mkFormA( p, 63, fr_dst, 0, fr_src, 0, 22, 0 );
+ break;
+ case Pfp_ABS: // fabs, PPC32 p399
+ p = mkFormX(p, 63, fr_dst, 0, fr_src, 264, 0);
+ break;
+ case Pfp_NEG: // fneg, PPC32 p416
+ p = mkFormX(p, 63, fr_dst, 0, fr_src, 40, 0);
+ break;
+ case Pfp_MOV: // fmr, PPC32 p410
+ p = mkFormX(p, 63, fr_dst, 0, fr_src, 72, 0);
+ break;
+ case Pfp_FRIM: // frim, PPC ISA 2.05 p137
+ p = mkFormX(p, 63, fr_dst, 0, fr_src, 488, 0);
+ break;
+ case Pfp_FRIP: // frip, PPC ISA 2.05 p137
+ p = mkFormX(p, 63, fr_dst, 0, fr_src, 456, 0);
+ break;
+ case Pfp_FRIN: // frin, PPC ISA 2.05 p137
+ p = mkFormX(p, 63, fr_dst, 0, fr_src, 392, 0);
+ break;
+ case Pfp_FRIZ: // friz, PPC ISA 2.05 p137
+ p = mkFormX(p, 63, fr_dst, 0, fr_src, 424, 0);
+ break;
+ default:
+ goto bad;
+ }
+ goto done;
+ }
+
+ case Pin_FpBinary: {
+ UInt fr_dst = fregNo(i->Pin.FpBinary.dst);
+ UInt fr_srcL = fregNo(i->Pin.FpBinary.srcL);
+ UInt fr_srcR = fregNo(i->Pin.FpBinary.srcR);
+ switch (i->Pin.FpBinary.op) {
+ case Pfp_ADDD: // fadd, PPC32 p400
+ p = mkFormA( p, 63, fr_dst, fr_srcL, fr_srcR, 0, 21, 0 );
+ break;
+ case Pfp_ADDS: // fadds, PPC32 p401
+ p = mkFormA( p, 59, fr_dst, fr_srcL, fr_srcR, 0, 21, 0 );
+ break;
+ case Pfp_SUBD: // fsub, PPC32 p429
+ p = mkFormA( p, 63, fr_dst, fr_srcL, fr_srcR, 0, 20, 0 );
+ break;
+ case Pfp_SUBS: // fsubs, PPC32 p430
+ p = mkFormA( p, 59, fr_dst, fr_srcL, fr_srcR, 0, 20, 0 );
+ break;
+ case Pfp_MULD: // fmul, PPC32 p413
+ p = mkFormA( p, 63, fr_dst, fr_srcL, 0, fr_srcR, 25, 0 );
+ break;
+ case Pfp_MULS: // fmuls, PPC32 p414
+ p = mkFormA( p, 59, fr_dst, fr_srcL, 0, fr_srcR, 25, 0 );
+ break;
+ case Pfp_DIVD: // fdiv, PPC32 p406
+ p = mkFormA( p, 63, fr_dst, fr_srcL, fr_srcR, 0, 18, 0 );
+ break;
+ case Pfp_DIVS: // fdivs, PPC32 p407
+ p = mkFormA( p, 59, fr_dst, fr_srcL, fr_srcR, 0, 18, 0 );
+ break;
+ default:
+ goto bad;
+ }
+ goto done;
+ }
+
+ case Pin_FpMulAcc: {
+ UInt fr_dst = fregNo(i->Pin.FpMulAcc.dst);
+ UInt fr_srcML = fregNo(i->Pin.FpMulAcc.srcML);
+ UInt fr_srcMR = fregNo(i->Pin.FpMulAcc.srcMR);
+ UInt fr_srcAcc = fregNo(i->Pin.FpMulAcc.srcAcc);
+ switch (i->Pin.FpMulAcc.op) {
+ case Pfp_MADDD: // fmadd, PPC32 p408
+ p = mkFormA( p, 63, fr_dst, fr_srcML, fr_srcAcc, fr_srcMR, 29, 0 );
+ break;
+ case Pfp_MADDS: // fmadds, PPC32 p409
+ p = mkFormA( p, 59, fr_dst, fr_srcML, fr_srcAcc, fr_srcMR, 29, 0 );
+ break;
+ case Pfp_MSUBD: // fmsub, PPC32 p411
+ p = mkFormA( p, 63, fr_dst, fr_srcML, fr_srcAcc, fr_srcMR, 28, 0 );
+ break;
+ case Pfp_MSUBS: // fmsubs, PPC32 p412
+ p = mkFormA( p, 59, fr_dst, fr_srcML, fr_srcAcc, fr_srcMR, 28, 0 );
+ break;
+ default:
+ goto bad;
+ }
+ goto done;
+ }
+
+ case Pin_FpLdSt: {
+ PPCAMode* am_addr = i->Pin.FpLdSt.addr;
+ UInt f_reg = fregNo(i->Pin.FpLdSt.reg);
+ Bool idxd = toBool(i->Pin.FpLdSt.addr->tag == Pam_RR);
+ UChar sz = i->Pin.FpLdSt.sz;
+ UInt opc;
+ vassert(sz == 4 || sz == 8);
+
+ if (i->Pin.FpLdSt.isLoad) { // Load from memory
+ if (idxd) { // lf[s|d]x, PPC32 p444|440
+ opc = (sz == 4) ? 535 : 599;
+ p = doAMode_RR(p, 31, opc, f_reg, am_addr, mode64);
+ } else { // lf[s|d], PPC32 p441|437
+ opc = (sz == 4) ? 48 : 50;
+ p = doAMode_IR(p, opc, f_reg, am_addr, mode64);
+ }
+ } else { // Store to memory
+ if (idxd) { // stf[s|d]x, PPC32 p521|516
+ opc = (sz == 4) ? 663 : 727;
+ p = doAMode_RR(p, 31, opc, f_reg, am_addr, mode64);
+ } else { // stf[s|d], PPC32 p518|513
+ opc = (sz == 4) ? 52 : 54;
+ p = doAMode_IR(p, opc, f_reg, am_addr, mode64);
+ }
+ }
+ goto done;
+ }
+
+ case Pin_FpSTFIW: {
+ UInt ir_addr = iregNo(i->Pin.FpSTFIW.addr, mode64);
+ UInt fr_data = fregNo(i->Pin.FpSTFIW.data);
+ // stfiwx (store fp64[lo32] as int32), PPC32 p517
+ // Use rA==0, so that EA == rB == ir_addr
+ p = mkFormX(p, 31, fr_data, 0/*rA=0*/, ir_addr, 983, 0);
+ goto done;
+ }
+
+ case Pin_FpRSP: {
+ UInt fr_dst = fregNo(i->Pin.FpRSP.dst);
+ UInt fr_src = fregNo(i->Pin.FpRSP.src);
+ // frsp, PPC32 p423
+ p = mkFormX(p, 63, fr_dst, 0, fr_src, 12, 0);
+ goto done;
+ }
+
+ case Pin_FpCftI: {
+ UInt fr_dst = fregNo(i->Pin.FpCftI.dst);
+ UInt fr_src = fregNo(i->Pin.FpCftI.src);
+ if (i->Pin.FpCftI.fromI == False && i->Pin.FpCftI.int32 == True) {
+ // fctiw (conv f64 to i32), PPC32 p404
+ p = mkFormX(p, 63, fr_dst, 0, fr_src, 14, 0);
+ goto done;
+ }
+ if (i->Pin.FpCftI.fromI == False && i->Pin.FpCftI.int32 == False) {
+ // fctid (conv f64 to i64), PPC64 p437
+ p = mkFormX(p, 63, fr_dst, 0, fr_src, 814, 0);
+ goto done;
+ }
+ if (i->Pin.FpCftI.fromI == True && i->Pin.FpCftI.int32 == False) {
+ // fcfid (conv i64 to f64), PPC64 p434
+ p = mkFormX(p, 63, fr_dst, 0, fr_src, 846, 0);
+ goto done;
+ }
+ goto bad;
+ }
+
+ case Pin_FpCMov: {
+ UInt fr_dst = fregNo(i->Pin.FpCMov.dst);
+ UInt fr_src = fregNo(i->Pin.FpCMov.src);
+ PPCCondCode cc = i->Pin.FpCMov.cond;
+
+ if (fr_dst == fr_src) goto done;
+
+ vassert(cc.test != Pct_ALWAYS);
+
+ /* jmp fwds if !condition */
+ if (cc.test != Pct_ALWAYS) {
+ /* bc !ct,cf,n_bytes>>2 */
+ p = mkFormB(p, invertCondTest(cc.test), cc.flag, 8>>2, 0, 0);
+ }
+
+ // fmr, PPC32 p410
+ p = mkFormX(p, 63, fr_dst, 0, fr_src, 72, 0);
+ goto done;
+ }
+
+ case Pin_FpLdFPSCR: {
+ UInt fr_src = fregNo(i->Pin.FpLdFPSCR.src);
+ p = mkFormXFL(p, 0xFF, fr_src); // mtfsf, PPC32 p480
+ goto done;
+ }
+
+ case Pin_FpCmp: {
+ UChar crfD = 1;
+ UInt r_dst = iregNo(i->Pin.FpCmp.dst, mode64);
+ UInt fr_srcL = fregNo(i->Pin.FpCmp.srcL);
+ UInt fr_srcR = fregNo(i->Pin.FpCmp.srcR);
+ vassert(crfD < 8);
+ // fcmpo, PPC32 p402
+ p = mkFormX(p, 63, crfD<<2, fr_srcL, fr_srcR, 32, 0);
+
+ // mfcr (mv CR to r_dst), PPC32 p467
+ p = mkFormX(p, 31, r_dst, 0, 0, 19, 0);
+
+ // rlwinm r_dst,r_dst,8,28,31, PPC32 p501
+ // => rotate field 1 to bottomw of word, masking out upper 28
+ p = mkFormM(p, 21, r_dst, r_dst, 8, 28, 31, 0);
+ goto done;
+ }
+
+ case Pin_RdWrLR: {
+ UInt reg = iregNo(i->Pin.RdWrLR.gpr, mode64);
+ /* wrLR==True ? mtlr r4 : mflr r4 */
+ p = mkFormXFX(p, reg, 8, (i->Pin.RdWrLR.wrLR==True) ? 467 : 339);
+ goto done;
+ }
+
+
+ /* AltiVec */
+ case Pin_AvLdSt: {
+ UInt opc2, v_reg, r_idx, r_base;
+ UChar sz = i->Pin.AvLdSt.sz;
+ Bool idxd = toBool(i->Pin.AvLdSt.addr->tag == Pam_RR);
+ vassert(sz == 1 || sz == 2 || sz == 4 || sz == 16);
+
+ v_reg = vregNo(i->Pin.AvLdSt.reg);
+ r_base = iregNo(i->Pin.AvLdSt.addr->Pam.RR.base, mode64);
+
+ // Only have AltiVec AMode_RR: kludge AMode_IR
+ if (!idxd) {
+ r_idx = 30; // XXX: Using r30 as temp
+ p = mkLoadImm(p, r_idx,
+ i->Pin.AvLdSt.addr->Pam.IR.index, mode64);
+ } else {
+ r_idx = iregNo(i->Pin.AvLdSt.addr->Pam.RR.index, mode64);
+ }
+
+ if (i->Pin.FpLdSt.isLoad) { // Load from memory (1,2,4,16)
+ opc2 = (sz==1) ? 7 : (sz==2) ? 39 : (sz==4) ? 71 : 103;
+ p = mkFormX(p, 31, v_reg, r_idx, r_base, opc2, 0);
+ } else { // Store to memory (1,2,4,16)
+ opc2 = (sz==1) ? 135 : (sz==2) ? 167 : (sz==4) ? 199 : 231;
+ p = mkFormX(p, 31, v_reg, r_idx, r_base, opc2, 0);
+ }
+ goto done;
+ }
+
+ case Pin_AvUnary: {
+ UInt v_dst = vregNo(i->Pin.AvUnary.dst);
+ UInt v_src = vregNo(i->Pin.AvUnary.src);
+ UInt opc2;
+ switch (i->Pin.AvUnary.op) {
+ case Pav_MOV: opc2 = 1156; break; // vor vD,vS,vS
+ case Pav_NOT: opc2 = 1284; break; // vnor vD,vS,vS
+ case Pav_UNPCKH8S: opc2 = 526; break; // vupkhsb
+ case Pav_UNPCKH16S: opc2 = 590; break; // vupkhsh
+ case Pav_UNPCKL8S: opc2 = 654; break; // vupklsb
+ case Pav_UNPCKL16S: opc2 = 718; break; // vupklsh
+ case Pav_UNPCKHPIX: opc2 = 846; break; // vupkhpx
+ case Pav_UNPCKLPIX: opc2 = 974; break; // vupklpx
+ default:
+ goto bad;
+ }
+ switch (i->Pin.AvUnary.op) {
+ case Pav_MOV:
+ case Pav_NOT:
+ p = mkFormVX( p, 4, v_dst, v_src, v_src, opc2 );
+ break;
+ default:
+ p = mkFormVX( p, 4, v_dst, 0, v_src, opc2 );
+ break;
+ }
+ goto done;
+ }
+
+ case Pin_AvBinary: {
+ UInt v_dst = vregNo(i->Pin.AvBinary.dst);
+ UInt v_srcL = vregNo(i->Pin.AvBinary.srcL);
+ UInt v_srcR = vregNo(i->Pin.AvBinary.srcR);
+ UInt opc2;
+ if (i->Pin.AvBinary.op == Pav_SHL) {
+ p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1036 ); // vslo
+ p = mkFormVX( p, 4, v_dst, v_dst, v_srcR, 452 ); // vsl
+ goto done;
+ }
+ if (i->Pin.AvBinary.op == Pav_SHR) {
+ p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1100 ); // vsro
+ p = mkFormVX( p, 4, v_dst, v_dst, v_srcR, 708 ); // vsr
+ goto done;
+ }
+ switch (i->Pin.AvBinary.op) {
+ /* Bitwise */
+ case Pav_AND: opc2 = 1028; break; // vand
+ case Pav_OR: opc2 = 1156; break; // vor
+ case Pav_XOR: opc2 = 1220; break; // vxor
+ default:
+ goto bad;
+ }
+ p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2 );
+ goto done;
+ }
+
+ case Pin_AvBin8x16: {
+ UInt v_dst = vregNo(i->Pin.AvBin8x16.dst);
+ UInt v_srcL = vregNo(i->Pin.AvBin8x16.srcL);
+ UInt v_srcR = vregNo(i->Pin.AvBin8x16.srcR);
+ UInt opc2;
+ switch (i->Pin.AvBin8x16.op) {
+
+ case Pav_ADDU: opc2 = 0; break; // vaddubm
+ case Pav_QADDU: opc2 = 512; break; // vaddubs
+ case Pav_QADDS: opc2 = 768; break; // vaddsbs
+
+ case Pav_SUBU: opc2 = 1024; break; // vsububm
+ case Pav_QSUBU: opc2 = 1536; break; // vsububs
+ case Pav_QSUBS: opc2 = 1792; break; // vsubsbs
+
+ case Pav_OMULU: opc2 = 8; break; // vmuloub
+ case Pav_OMULS: opc2 = 264; break; // vmulosb
+ case Pav_EMULU: opc2 = 520; break; // vmuleub
+ case Pav_EMULS: opc2 = 776; break; // vmulesb
+
+ case Pav_AVGU: opc2 = 1026; break; // vavgub
+ case Pav_AVGS: opc2 = 1282; break; // vavgsb
+ case Pav_MAXU: opc2 = 2; break; // vmaxub
+ case Pav_MAXS: opc2 = 258; break; // vmaxsb
+ case Pav_MINU: opc2 = 514; break; // vminub
+ case Pav_MINS: opc2 = 770; break; // vminsb
+
+ case Pav_CMPEQU: opc2 = 6; break; // vcmpequb
+ case Pav_CMPGTU: opc2 = 518; break; // vcmpgtub
+ case Pav_CMPGTS: opc2 = 774; break; // vcmpgtsb
+
+ case Pav_SHL: opc2 = 260; break; // vslb
+ case Pav_SHR: opc2 = 516; break; // vsrb
+ case Pav_SAR: opc2 = 772; break; // vsrab
+ case Pav_ROTL: opc2 = 4; break; // vrlb
+
+ case Pav_MRGHI: opc2 = 12; break; // vmrghb
+ case Pav_MRGLO: opc2 = 268; break; // vmrglb
+
+ default:
+ goto bad;
+ }
+ p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2 );
+ goto done;
+ }
+
+ case Pin_AvBin16x8: {
+ UInt v_dst = vregNo(i->Pin.AvBin16x8.dst);
+ UInt v_srcL = vregNo(i->Pin.AvBin16x8.srcL);
+ UInt v_srcR = vregNo(i->Pin.AvBin16x8.srcR);
+ UInt opc2;
+ switch (i->Pin.AvBin16x8.op) {
+
+ case Pav_ADDU: opc2 = 64; break; // vadduhm
+ case Pav_QADDU: opc2 = 576; break; // vadduhs
+ case Pav_QADDS: opc2 = 832; break; // vaddshs
+
+ case Pav_SUBU: opc2 = 1088; break; // vsubuhm
+ case Pav_QSUBU: opc2 = 1600; break; // vsubuhs
+ case Pav_QSUBS: opc2 = 1856; break; // vsubshs
+
+ case Pav_OMULU: opc2 = 72; break; // vmulouh
+ case Pav_OMULS: opc2 = 328; break; // vmulosh
+ case Pav_EMULU: opc2 = 584; break; // vmuleuh
+ case Pav_EMULS: opc2 = 840; break; // vmulesh
+
+ case Pav_AVGU: opc2 = 1090; break; // vavguh
+ case Pav_AVGS: opc2 = 1346; break; // vavgsh
+ case Pav_MAXU: opc2 = 66; break; // vmaxuh
+ case Pav_MAXS: opc2 = 322; break; // vmaxsh
+ case Pav_MINS: opc2 = 834; break; // vminsh
+ case Pav_MINU: opc2 = 578; break; // vminuh
+
+ case Pav_CMPEQU: opc2 = 70; break; // vcmpequh
+ case Pav_CMPGTU: opc2 = 582; break; // vcmpgtuh
+ case Pav_CMPGTS: opc2 = 838; break; // vcmpgtsh
+
+ case Pav_SHL: opc2 = 324; break; // vslh
+ case Pav_SHR: opc2 = 580; break; // vsrh
+ case Pav_SAR: opc2 = 836; break; // vsrah
+ case Pav_ROTL: opc2 = 68; break; // vrlh
+
+ case Pav_PACKUU: opc2 = 14; break; // vpkuhum
+ case Pav_QPACKUU: opc2 = 142; break; // vpkuhus
+ case Pav_QPACKSU: opc2 = 270; break; // vpkshus
+ case Pav_QPACKSS: opc2 = 398; break; // vpkshss
+ case Pav_PACKPXL: opc2 = 782; break; // vpkpx
+
+ case Pav_MRGHI: opc2 = 76; break; // vmrghh
+ case Pav_MRGLO: opc2 = 332; break; // vmrglh
+
+ default:
+ goto bad;
+ }
+ p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2 );
+ goto done;
+ }
+
+ case Pin_AvBin32x4: {
+ UInt v_dst = vregNo(i->Pin.AvBin32x4.dst);
+ UInt v_srcL = vregNo(i->Pin.AvBin32x4.srcL);
+ UInt v_srcR = vregNo(i->Pin.AvBin32x4.srcR);
+ UInt opc2;
+ switch (i->Pin.AvBin32x4.op) {
+
+ case Pav_ADDU: opc2 = 128; break; // vadduwm
+ case Pav_QADDU: opc2 = 640; break; // vadduws
+ case Pav_QADDS: opc2 = 896; break; // vaddsws
+
+ case Pav_SUBU: opc2 = 1152; break; // vsubuwm
+ case Pav_QSUBU: opc2 = 1664; break; // vsubuws
+ case Pav_QSUBS: opc2 = 1920; break; // vsubsws
+
+ case Pav_AVGU: opc2 = 1154; break; // vavguw
+ case Pav_AVGS: opc2 = 1410; break; // vavgsw
+
+ case Pav_MAXU: opc2 = 130; break; // vmaxuw
+ case Pav_MAXS: opc2 = 386; break; // vmaxsw
+
+ case Pav_MINS: opc2 = 898; break; // vminsw
+ case Pav_MINU: opc2 = 642; break; // vminuw
+
+ case Pav_CMPEQU: opc2 = 134; break; // vcmpequw
+ case Pav_CMPGTS: opc2 = 902; break; // vcmpgtsw
+ case Pav_CMPGTU: opc2 = 646; break; // vcmpgtuw
+
+ case Pav_SHL: opc2 = 388; break; // vslw
+ case Pav_SHR: opc2 = 644; break; // vsrw
+ case Pav_SAR: opc2 = 900; break; // vsraw
+ case Pav_ROTL: opc2 = 132; break; // vrlw
+
+ case Pav_PACKUU: opc2 = 78; break; // vpkuwum
+ case Pav_QPACKUU: opc2 = 206; break; // vpkuwus
+ case Pav_QPACKSU: opc2 = 334; break; // vpkswus
+ case Pav_QPACKSS: opc2 = 462; break; // vpkswss
+
+ case Pav_MRGHI: opc2 = 140; break; // vmrghw
+ case Pav_MRGLO: opc2 = 396; break; // vmrglw
+
+ default:
+ goto bad;
+ }
+ p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2 );
+ goto done;
+ }
+
+ case Pin_AvBin32Fx4: {
+ UInt v_dst = vregNo(i->Pin.AvBin32Fx4.dst);
+ UInt v_srcL = vregNo(i->Pin.AvBin32Fx4.srcL);
+ UInt v_srcR = vregNo(i->Pin.AvBin32Fx4.srcR);
+ switch (i->Pin.AvBin32Fx4.op) {
+
+ case Pavfp_ADDF:
+ p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 10 ); // vaddfp
+ break;
+ case Pavfp_SUBF:
+ p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 74 ); // vsubfp
+ break;
+ case Pavfp_MAXF:
+ p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1034 ); // vmaxfp
+ break;
+ case Pavfp_MINF:
+ p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1098 ); // vminfp
+ break;
+
+ case Pavfp_MULF: {
+ /* Make a vmulfp from a vmaddfp:
+ load -0.0 (0x8000_0000) to each 32-bit word of vB
+ this makes the add a noop.
+ */
+ UInt vB = 29; // XXX: Using v29 for temp do not change
+ // without also changing
+ // getRegUsage_PPCInstr
+ UInt konst = 0x1F;
+
+ // Better way to load -0.0 (0x80000000) ?
+ // vspltisw vB,0x1F (0x1F => each word of vB)
+ p = mkFormVX( p, 4, vB, konst, 0, 908 );
+
+ // vslw vB,vB,vB (each word of vB = (0x1F << 0x1F) = 0x80000000
+ p = mkFormVX( p, 4, vB, vB, vB, 388 );
+
+ // Finally, do the multiply:
+ p = mkFormVA( p, 4, v_dst, v_srcL, vB, v_srcR, 46 );
+ break;
+ }
+ case Pavfp_CMPEQF: // vcmpeqfp
+ p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 0, 198 );
+ break;
+ case Pavfp_CMPGTF: // vcmpgtfp
+ p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 0, 710 );
+ break;
+ case Pavfp_CMPGEF: // vcmpgefp
+ p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 0, 454 );
+ break;
+
+ default:
+ goto bad;
+ }
+ goto done;
+ }
+
+ case Pin_AvUn32Fx4: {
+ UInt v_dst = vregNo(i->Pin.AvUn32Fx4.dst);
+ UInt v_src = vregNo(i->Pin.AvUn32Fx4.src);
+ UInt opc2;
+ switch (i->Pin.AvUn32Fx4.op) {
+ case Pavfp_RCPF: opc2 = 266; break; // vrefp
+ case Pavfp_RSQRTF: opc2 = 330; break; // vrsqrtefp
+ case Pavfp_CVTU2F: opc2 = 778; break; // vcfux
+ case Pavfp_CVTS2F: opc2 = 842; break; // vcfsx
+ case Pavfp_QCVTF2U: opc2 = 906; break; // vctuxs
+ case Pavfp_QCVTF2S: opc2 = 970; break; // vctsxs
+ case Pavfp_ROUNDM: opc2 = 714; break; // vrfim
+ case Pavfp_ROUNDP: opc2 = 650; break; // vrfip
+ case Pavfp_ROUNDN: opc2 = 522; break; // vrfin
+ case Pavfp_ROUNDZ: opc2 = 586; break; // vrfiz
+ default:
+ goto bad;
+ }
+ p = mkFormVX( p, 4, v_dst, 0, v_src, opc2 );
+ goto done;
+ }
+
+ case Pin_AvPerm: { // vperm
+ UInt v_dst = vregNo(i->Pin.AvPerm.dst);
+ UInt v_srcL = vregNo(i->Pin.AvPerm.srcL);
+ UInt v_srcR = vregNo(i->Pin.AvPerm.srcR);
+ UInt v_ctl = vregNo(i->Pin.AvPerm.ctl);
+ p = mkFormVA( p, 4, v_dst, v_srcL, v_srcR, v_ctl, 43 );
+ goto done;
+ }
+
+ case Pin_AvSel: { // vsel
+ UInt v_ctl = vregNo(i->Pin.AvSel.ctl);
+ UInt v_dst = vregNo(i->Pin.AvSel.dst);
+ UInt v_srcL = vregNo(i->Pin.AvSel.srcL);
+ UInt v_srcR = vregNo(i->Pin.AvSel.srcR);
+ p = mkFormVA( p, 4, v_dst, v_srcL, v_srcR, v_ctl, 42 );
+ goto done;
+ }
+
+ case Pin_AvShlDbl: { // vsldoi
+ UInt shift = i->Pin.AvShlDbl.shift;
+ UInt v_dst = vregNo(i->Pin.AvShlDbl.dst);
+ UInt v_srcL = vregNo(i->Pin.AvShlDbl.srcL);
+ UInt v_srcR = vregNo(i->Pin.AvShlDbl.srcR);
+ vassert(shift <= 0xF);
+ p = mkFormVA( p, 4, v_dst, v_srcL, v_srcR, shift, 44 );
+ goto done;
+ }
+
+ case Pin_AvSplat: { // vsplt(is)(b,h,w)
+ UInt v_dst = vregNo(i->Pin.AvShlDbl.dst);
+ UChar sz = i->Pin.AvSplat.sz;
+ UInt v_src, opc2;
+ vassert(sz == 8 || sz == 16 || sz == 32);
+
+ if (i->Pin.AvSplat.src->tag == Pvi_Imm) {
+ Char simm5;
+ opc2 = (sz == 8) ? 780 : (sz == 16) ? 844 : 908; // 8,16,32
+ /* expects 5-bit-signed-imm */
+ simm5 = i->Pin.AvSplat.src->Pvi.Imm5s;
+ vassert(simm5 >= -16 && simm5 <= 15);
+ simm5 = simm5 & 0x1F;
+ p = mkFormVX( p, 4, v_dst, (UInt)simm5, 0, opc2 );
+ }
+ else { // Pri_Reg
+ UInt lowest_lane;
+ opc2 = (sz == 8) ? 524 : (sz == 16) ? 588 : 652; // 8,16,32
+ vassert(hregClass(i->Pin.AvSplat.src->Pvi.Reg) == HRcVec128);
+ v_src = vregNo(i->Pin.AvSplat.src->Pvi.Reg);
+ lowest_lane = (128/sz)-1;
+ p = mkFormVX( p, 4, v_dst, lowest_lane, v_src, opc2 );
+ }
+ goto done;
+ }
+
+ case Pin_AvCMov: {
+ UInt v_dst = vregNo(i->Pin.AvCMov.dst);
+ UInt v_src = vregNo(i->Pin.AvCMov.src);
+ PPCCondCode cc = i->Pin.AvCMov.cond;
+
+ if (v_dst == v_src) goto done;
+
+ vassert(cc.test != Pct_ALWAYS);
+
+ /* jmp fwds 2 insns if !condition */
+ if (cc.test != Pct_ALWAYS) {
+ /* bc !ct,cf,n_bytes>>2 */
+ p = mkFormB(p, invertCondTest(cc.test), cc.flag, 8>>2, 0, 0);
+ }
+ /* vmr */
+ p = mkFormVX( p, 4, v_dst, v_src, v_src, 1156 );
+ goto done;
+ }
+
+ case Pin_AvLdVSCR: { // mtvscr
+ UInt v_src = vregNo(i->Pin.AvLdVSCR.src);
+ p = mkFormVX( p, 4, 0, 0, v_src, 1604 );
+ goto done;
+ }
+
+ default:
+ goto bad;
+ }
+
+ bad:
+ vex_printf("\n=> ");
+ ppPPCInstr(i, mode64);
+ vpanic("emit_PPCInstr");
+ /*NOTREACHED*/
+
+ done:
+ vassert(p - &buf[0] <= 32);
+ return p - &buf[0];
+}
+
+/*---------------------------------------------------------------*/
+/*--- end host_ppc_defs.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h
new file mode 100644
index 0000000..accfd58
--- /dev/null
+++ b/VEX/priv/host_ppc_defs.h
@@ -0,0 +1,861 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin host_ppc_defs.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#ifndef __VEX_HOST_PPC_DEFS_H
+#define __VEX_HOST_PPC_DEFS_H
+
+/* Num registers used for function calls */
+#define PPC_N_REGPARMS 8
+
+
+/* --------- Registers. --------- */
+
+/* The usual HReg abstraction. There are 32 real int regs,
+ 32 real float regs, and 32 real vector regs.
+*/
+
+extern void ppHRegPPC ( HReg );
+
+extern HReg hregPPC_GPR0 ( Bool mode64 ); // scratch reg / zero reg
+extern HReg hregPPC_GPR1 ( Bool mode64 ); // Stack Frame Pointer
+extern HReg hregPPC_GPR2 ( Bool mode64 ); // not used: TOC pointer
+extern HReg hregPPC_GPR3 ( Bool mode64 );
+extern HReg hregPPC_GPR4 ( Bool mode64 );
+extern HReg hregPPC_GPR5 ( Bool mode64 );
+extern HReg hregPPC_GPR6 ( Bool mode64 );
+extern HReg hregPPC_GPR7 ( Bool mode64 );
+extern HReg hregPPC_GPR8 ( Bool mode64 );
+extern HReg hregPPC_GPR9 ( Bool mode64 );
+extern HReg hregPPC_GPR10 ( Bool mode64 );
+extern HReg hregPPC_GPR11 ( Bool mode64 );
+extern HReg hregPPC_GPR12 ( Bool mode64 );
+extern HReg hregPPC_GPR13 ( Bool mode64 );
+extern HReg hregPPC_GPR14 ( Bool mode64 );
+extern HReg hregPPC_GPR15 ( Bool mode64 );
+extern HReg hregPPC_GPR16 ( Bool mode64 );
+extern HReg hregPPC_GPR17 ( Bool mode64 );
+extern HReg hregPPC_GPR18 ( Bool mode64 );
+extern HReg hregPPC_GPR19 ( Bool mode64 );
+extern HReg hregPPC_GPR20 ( Bool mode64 );
+extern HReg hregPPC_GPR21 ( Bool mode64 );
+extern HReg hregPPC_GPR22 ( Bool mode64 );
+extern HReg hregPPC_GPR23 ( Bool mode64 );
+extern HReg hregPPC_GPR24 ( Bool mode64 );
+extern HReg hregPPC_GPR25 ( Bool mode64 );
+extern HReg hregPPC_GPR26 ( Bool mode64 );
+extern HReg hregPPC_GPR27 ( Bool mode64 );
+extern HReg hregPPC_GPR28 ( Bool mode64 );
+extern HReg hregPPC_GPR29 ( Bool mode64 ); // reserved for dispatcher
+extern HReg hregPPC_GPR30 ( Bool mode64 ); // used as VMX spill temp
+extern HReg hregPPC_GPR31 ( Bool mode64 ); // GuestStatePtr (callee-saved)
+
+extern HReg hregPPC_FPR0 ( void );
+extern HReg hregPPC_FPR1 ( void );
+extern HReg hregPPC_FPR2 ( void );
+extern HReg hregPPC_FPR3 ( void );
+extern HReg hregPPC_FPR4 ( void );
+extern HReg hregPPC_FPR5 ( void );
+extern HReg hregPPC_FPR6 ( void );
+extern HReg hregPPC_FPR7 ( void );
+extern HReg hregPPC_FPR8 ( void );
+extern HReg hregPPC_FPR9 ( void );
+extern HReg hregPPC_FPR10 ( void );
+extern HReg hregPPC_FPR11 ( void );
+extern HReg hregPPC_FPR12 ( void );
+extern HReg hregPPC_FPR13 ( void );
+extern HReg hregPPC_FPR14 ( void );
+extern HReg hregPPC_FPR15 ( void );
+extern HReg hregPPC_FPR16 ( void );
+extern HReg hregPPC_FPR17 ( void );
+extern HReg hregPPC_FPR18 ( void );
+extern HReg hregPPC_FPR19 ( void );
+extern HReg hregPPC_FPR20 ( void );
+extern HReg hregPPC_FPR21 ( void );
+extern HReg hregPPC_FPR22 ( void );
+extern HReg hregPPC_FPR23 ( void );
+extern HReg hregPPC_FPR24 ( void );
+extern HReg hregPPC_FPR25 ( void );
+extern HReg hregPPC_FPR26 ( void );
+extern HReg hregPPC_FPR27 ( void );
+extern HReg hregPPC_FPR28 ( void );
+extern HReg hregPPC_FPR29 ( void );
+extern HReg hregPPC_FPR30 ( void );
+extern HReg hregPPC_FPR31 ( void );
+
+extern HReg hregPPC_VR0 ( void );
+extern HReg hregPPC_VR1 ( void );
+extern HReg hregPPC_VR2 ( void );
+extern HReg hregPPC_VR3 ( void );
+extern HReg hregPPC_VR4 ( void );
+extern HReg hregPPC_VR5 ( void );
+extern HReg hregPPC_VR6 ( void );
+extern HReg hregPPC_VR7 ( void );
+extern HReg hregPPC_VR8 ( void );
+extern HReg hregPPC_VR9 ( void );
+extern HReg hregPPC_VR10 ( void );
+extern HReg hregPPC_VR11 ( void );
+extern HReg hregPPC_VR12 ( void );
+extern HReg hregPPC_VR13 ( void );
+extern HReg hregPPC_VR14 ( void );
+extern HReg hregPPC_VR15 ( void );
+extern HReg hregPPC_VR16 ( void );
+extern HReg hregPPC_VR17 ( void );
+extern HReg hregPPC_VR18 ( void );
+extern HReg hregPPC_VR19 ( void );
+extern HReg hregPPC_VR20 ( void );
+extern HReg hregPPC_VR21 ( void );
+extern HReg hregPPC_VR22 ( void );
+extern HReg hregPPC_VR23 ( void );
+extern HReg hregPPC_VR24 ( void );
+extern HReg hregPPC_VR25 ( void );
+extern HReg hregPPC_VR26 ( void );
+extern HReg hregPPC_VR27 ( void );
+extern HReg hregPPC_VR28 ( void );
+extern HReg hregPPC_VR29 ( void );
+extern HReg hregPPC_VR30 ( void );
+extern HReg hregPPC_VR31 ( void );
+
+#define StackFramePtr(_mode64) hregPPC_GPR1(_mode64)
+#define GuestStatePtr(_mode64) hregPPC_GPR31(_mode64)
+
+
+
+/* --------- Condition codes --------- */
+
+/* This gives names from bitfields in CR; hence it names BI numbers */
+/* Using IBM/hardware indexing convention */
+typedef
+ enum {
+ // CR7, which we use for integer compares
+ Pcf_7LT = 28, /* neg | lt */
+ Pcf_7GT = 29, /* pos | gt */
+ Pcf_7EQ = 30, /* zero | equal */
+ Pcf_7SO = 31 /* summary overflow */
+ }
+ PPCCondFlag;
+
+typedef
+ enum { /* Maps bc bitfield BO */
+ Pct_FALSE = 0x4,
+ Pct_TRUE = 0xC,
+ Pct_ALWAYS = 0x14
+ }
+ PPCCondTest;
+
+typedef
+ struct {
+ PPCCondFlag flag;
+ PPCCondTest test;
+ }
+ PPCCondCode;
+
+extern HChar* showPPCCondCode ( PPCCondCode );
+
+/* constructor */
+extern PPCCondCode mk_PPCCondCode ( PPCCondTest, PPCCondFlag );
+
+/* false->true, true->false */
+extern PPCCondTest invertCondTest ( PPCCondTest );
+
+
+
+
+/* --------- Memory address expressions (amodes). --------- */
+
+typedef
+ enum {
+ Pam_IR=1, /* Immediate (signed 16-bit) + Reg */
+ Pam_RR=2 /* Reg1 + Reg2 */
+ }
+ PPCAModeTag;
+
+typedef
+ struct {
+ PPCAModeTag tag;
+ union {
+ struct {
+ HReg base;
+ Int index;
+ } IR;
+ struct {
+ HReg base;
+ HReg index;
+ } RR;
+ } Pam;
+ }
+ PPCAMode;
+
+extern PPCAMode* PPCAMode_IR ( Int, HReg );
+extern PPCAMode* PPCAMode_RR ( HReg, HReg );
+
+extern PPCAMode* dopyPPCAMode ( PPCAMode* );
+
+extern void ppPPCAMode ( PPCAMode* );
+
+
+/* --------- Operand, which can be a reg or a u16/s16. --------- */
+/* ("RH" == "Register or Halfword immediate") */
+typedef
+ enum {
+ Prh_Imm=3,
+ Prh_Reg=4
+ }
+ PPCRHTag;
+
+typedef
+ struct {
+ PPCRHTag tag;
+ union {
+ struct {
+ Bool syned;
+ UShort imm16;
+ } Imm;
+ struct {
+ HReg reg;
+ } Reg;
+ }
+ Prh;
+ }
+ PPCRH;
+
+extern PPCRH* PPCRH_Imm ( Bool, UShort );
+extern PPCRH* PPCRH_Reg ( HReg );
+
+extern void ppPPCRH ( PPCRH* );
+
+
+/* --------- Operand, which can be a reg or a u32/64. --------- */
+
+typedef
+ enum {
+ Pri_Imm=5,
+ Pri_Reg=6
+ }
+ PPCRITag;
+
+typedef
+ struct {
+ PPCRITag tag;
+ union {
+ ULong Imm;
+ HReg Reg;
+ }
+ Pri;
+ }
+ PPCRI;
+
+extern PPCRI* PPCRI_Imm ( ULong );
+extern PPCRI* PPCRI_Reg( HReg );
+
+extern void ppPPCRI ( PPCRI* );
+
+
+/* --------- Operand, which can be a vector reg or a s6. --------- */
+/* ("VI" == "Vector Register or Immediate") */
+typedef
+ enum {
+ Pvi_Imm=7,
+ Pvi_Reg=8
+ }
+ PPCVI5sTag;
+
+typedef
+ struct {
+ PPCVI5sTag tag;
+ union {
+ Char Imm5s;
+ HReg Reg;
+ }
+ Pvi;
+ }
+ PPCVI5s;
+
+extern PPCVI5s* PPCVI5s_Imm ( Char );
+extern PPCVI5s* PPCVI5s_Reg ( HReg );
+
+extern void ppPPCVI5s ( PPCVI5s* );
+
+
+/* --------- Instructions. --------- */
+
+/* --------- */
+typedef
+ enum {
+ Pun_NEG,
+ Pun_NOT,
+ Pun_CLZ32,
+ Pun_CLZ64,
+ Pun_EXTSW
+ }
+ PPCUnaryOp;
+
+extern HChar* showPPCUnaryOp ( PPCUnaryOp );
+
+
+/* --------- */
+typedef
+ enum {
+ Palu_INVALID,
+ Palu_ADD, Palu_SUB,
+ Palu_AND, Palu_OR, Palu_XOR,
+ }
+ PPCAluOp;
+
+extern
+HChar* showPPCAluOp ( PPCAluOp,
+ Bool /* is the 2nd operand an immediate? */);
+
+
+/* --------- */
+typedef
+ enum {
+ Pshft_INVALID,
+ Pshft_SHL, Pshft_SHR, Pshft_SAR,
+ }
+ PPCShftOp;
+
+extern
+HChar* showPPCShftOp ( PPCShftOp,
+ Bool /* is the 2nd operand an immediate? */,
+ Bool /* is this a 32bit or 64bit op? */ );
+
+
+/* --------- */
+typedef
+ enum {
+ Pfp_INVALID,
+
+ /* Ternary */
+ Pfp_MADDD, Pfp_MSUBD,
+ Pfp_MADDS, Pfp_MSUBS,
+
+ /* Binary */
+ Pfp_ADDD, Pfp_SUBD, Pfp_MULD, Pfp_DIVD,
+ Pfp_ADDS, Pfp_SUBS, Pfp_MULS, Pfp_DIVS,
+
+ /* Unary */
+ Pfp_SQRT, Pfp_ABS, Pfp_NEG, Pfp_MOV, Pfp_RES, Pfp_RSQRTE,
+ Pfp_FRIN, Pfp_FRIM, Pfp_FRIP, Pfp_FRIZ
+ }
+ PPCFpOp;
+
+extern HChar* showPPCFpOp ( PPCFpOp );
+
+
+/* --------- */
+typedef
+ enum {
+ Pav_INVALID,
+
+ /* Integer Unary */
+ Pav_MOV, /* Mov */
+ Pav_NOT, /* Bitwise */
+ Pav_UNPCKH8S, Pav_UNPCKH16S, /* Unpack */
+ Pav_UNPCKL8S, Pav_UNPCKL16S,
+ Pav_UNPCKHPIX, Pav_UNPCKLPIX,
+
+ /* Integer Binary */
+ Pav_AND, Pav_OR, Pav_XOR, /* Bitwise */
+ Pav_ADDU, Pav_QADDU, Pav_QADDS,
+ Pav_SUBU, Pav_QSUBU, Pav_QSUBS,
+ Pav_OMULU, Pav_OMULS, Pav_EMULU, Pav_EMULS,
+ Pav_AVGU, Pav_AVGS,
+ Pav_MAXU, Pav_MAXS,
+ Pav_MINU, Pav_MINS,
+
+ /* Compare (always affects CR field 6) */
+ Pav_CMPEQU, Pav_CMPGTU, Pav_CMPGTS,
+
+ /* Shift */
+ Pav_SHL, Pav_SHR, Pav_SAR, Pav_ROTL,
+
+ /* Pack */
+ Pav_PACKUU, Pav_QPACKUU, Pav_QPACKSU, Pav_QPACKSS,
+ Pav_PACKPXL,
+
+ /* Merge */
+ Pav_MRGHI, Pav_MRGLO,
+ }
+ PPCAvOp;
+
+extern HChar* showPPCAvOp ( PPCAvOp );
+
+
+/* --------- */
+typedef
+ enum {
+ Pavfp_INVALID,
+
+ /* Floating point binary */
+ Pavfp_ADDF, Pavfp_SUBF, Pavfp_MULF,
+ Pavfp_MAXF, Pavfp_MINF,
+ Pavfp_CMPEQF, Pavfp_CMPGTF, Pavfp_CMPGEF,
+
+ /* Floating point unary */
+ Pavfp_RCPF, Pavfp_RSQRTF,
+ Pavfp_CVTU2F, Pavfp_CVTS2F, Pavfp_QCVTF2U, Pavfp_QCVTF2S,
+ Pavfp_ROUNDM, Pavfp_ROUNDP, Pavfp_ROUNDN, Pavfp_ROUNDZ,
+ }
+ PPCAvFpOp;
+
+extern HChar* showPPCAvFpOp ( PPCAvFpOp );
+
+
+/* --------- */
+typedef
+ enum {
+ Pin_LI, /* load word (32/64-bit) immediate (fake insn) */
+ Pin_Alu, /* word add/sub/and/or/xor */
+ Pin_Shft, /* word shl/shr/sar */
+ Pin_AddSubC, /* add/sub with read/write carry */
+ Pin_Cmp, /* word compare */
+ Pin_Unary, /* not, neg, clz */
+ Pin_MulL, /* widening multiply */
+ Pin_Div, /* div */
+ Pin_Call, /* call to address in register */
+ Pin_Goto, /* conditional/unconditional jmp to dst */
+ Pin_CMov, /* conditional move */
+ Pin_Load, /* zero-extending load a 8|16|32|64 bit value from mem */
+ Pin_LoadL, /* load-linked (lwarx/ldarx) 32|64 bit value from mem */
+ Pin_Store, /* store a 8|16|32|64 bit value to mem */
+ Pin_StoreC, /* store-conditional (stwcx./stdcx.) 32|64 bit val */
+ Pin_Set, /* convert condition code to value 0 or 1 */
+ Pin_MfCR, /* move from condition register to GPR */
+ Pin_MFence, /* mem fence */
+
+ Pin_FpUnary, /* FP unary op */
+ Pin_FpBinary, /* FP binary op */
+ Pin_FpMulAcc, /* FP multipy-accumulate style op */
+ Pin_FpLdSt, /* FP load/store */
+ Pin_FpSTFIW, /* stfiwx */
+ Pin_FpRSP, /* FP round IEEE754 double to IEEE754 single */
+ Pin_FpCftI, /* fcfid/fctid/fctiw */
+ Pin_FpCMov, /* FP floating point conditional move */
+ Pin_FpLdFPSCR, /* mtfsf */
+ Pin_FpCmp, /* FP compare, generating value into int reg */
+
+ Pin_RdWrLR, /* Read/Write Link Register */
+
+ Pin_AvLdSt, /* AV load/store (kludging for AMode_IR) */
+ Pin_AvUnary, /* AV unary general reg=>reg */
+
+ Pin_AvBinary, /* AV binary general reg,reg=>reg */
+ Pin_AvBin8x16, /* AV binary, 8x4 */
+ Pin_AvBin16x8, /* AV binary, 16x4 */
+ Pin_AvBin32x4, /* AV binary, 32x4 */
+
+ Pin_AvBin32Fx4, /* AV FP binary, 32Fx4 */
+ Pin_AvUn32Fx4, /* AV FP unary, 32Fx4 */
+
+ Pin_AvPerm, /* AV permute (shuffle) */
+ Pin_AvSel, /* AV select */
+ Pin_AvShlDbl, /* AV shift-left double by imm */
+ Pin_AvSplat, /* One elem repeated throughout dst */
+ Pin_AvLdVSCR, /* mtvscr */
+ Pin_AvCMov /* AV conditional move */
+ }
+ PPCInstrTag;
+
+/* Destinations are on the LEFT (first operand) */
+
+typedef
+ struct {
+ PPCInstrTag tag;
+ union {
+ /* Get a 32/64-bit literal into a register.
+ May turn into a number of real insns. */
+ struct {
+ HReg dst;
+ ULong imm64;
+ } LI;
+ /* Integer add/sub/and/or/xor. Limitations:
+ - For add, the immediate, if it exists, is a signed 16.
+ - For sub, the immediate, if it exists, is a signed 16
+ which may not be -32768, since no such instruction
+ exists, and so we have to emit addi with +32768, but
+ that is not possible.
+ - For and/or/xor, the immediate, if it exists,
+ is an unsigned 16.
+ */
+ struct {
+ PPCAluOp op;
+ HReg dst;
+ HReg srcL;
+ PPCRH* srcR;
+ } Alu;
+ /* Integer shl/shr/sar.
+ Limitations: the immediate, if it exists,
+ is a signed 5-bit value between 1 and 31 inclusive.
+ */
+ struct {
+ PPCShftOp op;
+ Bool sz32; /* mode64 has both 32 and 64bit shft */
+ HReg dst;
+ HReg srcL;
+ PPCRH* srcR;
+ } Shft;
+ /* */
+ struct {
+ Bool isAdd; /* else sub */
+ Bool setC; /* else read carry */
+ HReg dst;
+ HReg srcL;
+ HReg srcR;
+ } AddSubC;
+ /* If signed, the immediate, if it exists, is a signed 16,
+ else it is an unsigned 16. */
+ struct {
+ Bool syned;
+ Bool sz32; /* mode64 has both 32 and 64bit cmp */
+ UInt crfD;
+ HReg srcL;
+ PPCRH* srcR;
+ } Cmp;
+ /* Not, Neg, Clz32/64, Extsw */
+ struct {
+ PPCUnaryOp op;
+ HReg dst;
+ HReg src;
+ } Unary;
+ struct {
+ Bool syned; /* meaningless if hi32==False */
+ Bool hi; /* False=>low, True=>high */
+ Bool sz32; /* mode64 has both 32 & 64bit mull */
+ HReg dst;
+ HReg srcL;
+ HReg srcR;
+ } MulL;
+ /* ppc32 div/divu instruction. */
+ struct {
+ Bool syned;
+ Bool sz32; /* mode64 has both 32 & 64bit div */
+ HReg dst;
+ HReg srcL;
+ HReg srcR;
+ } Div;
+ /* Pseudo-insn. Call target (an absolute address), on given
+ condition (which could be Pct_ALWAYS). argiregs indicates
+ which of r3 .. r10 carries argument values for this call,
+ using a bit mask (1<<N is set if rN holds an arg, for N in
+ 3 .. 10 inclusive). */
+ struct {
+ PPCCondCode cond;
+ Addr64 target;
+ UInt argiregs;
+ } Call;
+ /* Pseudo-insn. Goto dst, on given condition (which could be
+ Pct_ALWAYS). */
+ struct {
+ IRJumpKind jk;
+ PPCCondCode cond;
+ PPCRI* dst;
+ } Goto;
+ /* Mov src to dst on the given condition, which may not
+ be the bogus Pct_ALWAYS. */
+ struct {
+ PPCCondCode cond;
+ HReg dst;
+ PPCRI* src;
+ } CMov;
+ /* Zero extending loads. Dst size is host word size */
+ struct {
+ UChar sz; /* 1|2|4|8 */
+ HReg dst;
+ PPCAMode* src;
+ } Load;
+ /* Load-and-reserve (lwarx, ldarx) */
+ struct {
+ UChar sz; /* 4|8 */
+ HReg dst;
+ HReg src;
+ } LoadL;
+ /* 64/32/16/8 bit stores */
+ struct {
+ UChar sz; /* 1|2|4|8 */
+ PPCAMode* dst;
+ HReg src;
+ } Store;
+ /* Store-conditional (stwcx., stdcx.) */
+ struct {
+ UChar sz; /* 4|8 */
+ HReg dst;
+ HReg src;
+ } StoreC;
+ /* Convert a ppc condition code to value 0 or 1. */
+ struct {
+ PPCCondCode cond;
+ HReg dst;
+ } Set;
+ /* Move the entire CR to a GPR */
+ struct {
+ HReg dst;
+ } MfCR;
+ /* Mem fence. In short, an insn which flushes all preceding
+ loads and stores as much as possible before continuing.
+ On PPC we emit a "sync". */
+ struct {
+ } MFence;
+
+ /* PPC Floating point */
+ struct {
+ PPCFpOp op;
+ HReg dst;
+ HReg src;
+ } FpUnary;
+ struct {
+ PPCFpOp op;
+ HReg dst;
+ HReg srcL;
+ HReg srcR;
+ } FpBinary;
+ struct {
+ PPCFpOp op;
+ HReg dst;
+ HReg srcML;
+ HReg srcMR;
+ HReg srcAcc;
+ } FpMulAcc;
+ struct {
+ Bool isLoad;
+ UChar sz; /* only 4 (IEEE single) or 8 (IEEE double) */
+ HReg reg;
+ PPCAMode* addr;
+ } FpLdSt;
+ struct {
+ HReg addr; /* int reg */
+ HReg data; /* float reg */
+ } FpSTFIW;
+ /* Round 64-bit FP value to 32-bit FP value in an FP reg. */
+ struct {
+ HReg src;
+ HReg dst;
+ } FpRSP;
+ /* fcfid/fctid/fctiw. Note there's no fcfiw so fromI==True
+ && int32==True is not allowed. */
+ struct {
+ Bool fromI; /* False==F->I, True==I->F */
+ Bool int32; /* True== I is 32, False==I is 64 */
+ HReg src;
+ HReg dst;
+ } FpCftI;
+ /* FP mov src to dst on the given condition. */
+ struct {
+ PPCCondCode cond;
+ HReg dst;
+ HReg src;
+ } FpCMov;
+ /* Load FP Status & Control Register */
+ struct {
+ HReg src;
+ } FpLdFPSCR;
+ /* Do a compare, generating result into an int register. */
+ struct {
+ UChar crfD;
+ HReg dst;
+ HReg srcL;
+ HReg srcR;
+ } FpCmp;
+
+ /* Read/Write Link Register */
+ struct {
+ Bool wrLR;
+ HReg gpr;
+ } RdWrLR;
+
+ /* Simplistic AltiVec */
+ struct {
+ Bool isLoad;
+ UChar sz; /* 8|16|32|128 */
+ HReg reg;
+ PPCAMode* addr;
+ } AvLdSt;
+ struct {
+ PPCAvOp op;
+ HReg dst;
+ HReg src;
+ } AvUnary;
+ struct {
+ PPCAvOp op;
+ HReg dst;
+ HReg srcL;
+ HReg srcR;
+ } AvBinary;
+ struct {
+ PPCAvOp op;
+ HReg dst;
+ HReg srcL;
+ HReg srcR;
+ } AvBin8x16;
+ struct {
+ PPCAvOp op;
+ HReg dst;
+ HReg srcL;
+ HReg srcR;
+ } AvBin16x8;
+ struct {
+ PPCAvOp op;
+ HReg dst;
+ HReg srcL;
+ HReg srcR;
+ } AvBin32x4;
+ struct {
+ PPCAvFpOp op;
+ HReg dst;
+ HReg srcL;
+ HReg srcR;
+ } AvBin32Fx4;
+ struct {
+ PPCAvFpOp op;
+ HReg dst;
+ HReg src;
+ } AvUn32Fx4;
+ /* Perm,Sel,SlDbl,Splat are all weird AV permutations */
+ struct {
+ HReg dst;
+ HReg srcL;
+ HReg srcR;
+ HReg ctl;
+ } AvPerm;
+ struct {
+ HReg dst;
+ HReg srcL;
+ HReg srcR;
+ HReg ctl;
+ } AvSel;
+ struct {
+ UChar shift;
+ HReg dst;
+ HReg srcL;
+ HReg srcR;
+ } AvShlDbl;
+ struct {
+ UChar sz; /* 8,16,32 */
+ HReg dst;
+ PPCVI5s* src;
+ } AvSplat;
+ /* Mov src to dst on the given condition, which may not
+ be the bogus Xcc_ALWAYS. */
+ struct {
+ PPCCondCode cond;
+ HReg dst;
+ HReg src;
+ } AvCMov;
+ /* Load AltiVec Status & Control Register */
+ struct {
+ HReg src;
+ } AvLdVSCR;
+ } Pin;
+ }
+ PPCInstr;
+
+
+extern PPCInstr* PPCInstr_LI ( HReg, ULong, Bool );
+extern PPCInstr* PPCInstr_Alu ( PPCAluOp, HReg, HReg, PPCRH* );
+extern PPCInstr* PPCInstr_Shft ( PPCShftOp, Bool sz32, HReg, HReg, PPCRH* );
+extern PPCInstr* PPCInstr_AddSubC ( Bool, Bool, HReg, HReg, HReg );
+extern PPCInstr* PPCInstr_Cmp ( Bool, Bool, UInt, HReg, PPCRH* );
+extern PPCInstr* PPCInstr_Unary ( PPCUnaryOp op, HReg dst, HReg src );
+extern PPCInstr* PPCInstr_MulL ( Bool syned, Bool hi32, Bool sz32, HReg, HReg, HReg );
+extern PPCInstr* PPCInstr_Div ( Bool syned, Bool sz32, HReg dst, HReg srcL, HReg srcR );
+extern PPCInstr* PPCInstr_Call ( PPCCondCode, Addr64, UInt );
+extern PPCInstr* PPCInstr_Goto ( IRJumpKind, PPCCondCode cond, PPCRI* dst );
+extern PPCInstr* PPCInstr_CMov ( PPCCondCode, HReg dst, PPCRI* src );
+extern PPCInstr* PPCInstr_Load ( UChar sz,
+ HReg dst, PPCAMode* src, Bool mode64 );
+extern PPCInstr* PPCInstr_LoadL ( UChar sz,
+ HReg dst, HReg src, Bool mode64 );
+extern PPCInstr* PPCInstr_Store ( UChar sz, PPCAMode* dst,
+ HReg src, Bool mode64 );
+extern PPCInstr* PPCInstr_StoreC ( UChar sz, HReg dst, HReg src,
+ Bool mode64 );
+extern PPCInstr* PPCInstr_Set ( PPCCondCode cond, HReg dst );
+extern PPCInstr* PPCInstr_MfCR ( HReg dst );
+extern PPCInstr* PPCInstr_MFence ( void );
+
+extern PPCInstr* PPCInstr_FpUnary ( PPCFpOp op, HReg dst, HReg src );
+extern PPCInstr* PPCInstr_FpBinary ( PPCFpOp op, HReg dst, HReg srcL, HReg srcR );
+extern PPCInstr* PPCInstr_FpMulAcc ( PPCFpOp op, HReg dst, HReg srcML,
+ HReg srcMR, HReg srcAcc );
+extern PPCInstr* PPCInstr_FpLdSt ( Bool isLoad, UChar sz, HReg, PPCAMode* );
+extern PPCInstr* PPCInstr_FpSTFIW ( HReg addr, HReg data );
+extern PPCInstr* PPCInstr_FpRSP ( HReg dst, HReg src );
+extern PPCInstr* PPCInstr_FpCftI ( Bool fromI, Bool int32,
+ HReg dst, HReg src );
+extern PPCInstr* PPCInstr_FpCMov ( PPCCondCode, HReg dst, HReg src );
+extern PPCInstr* PPCInstr_FpLdFPSCR ( HReg src );
+extern PPCInstr* PPCInstr_FpCmp ( HReg dst, HReg srcL, HReg srcR );
+
+extern PPCInstr* PPCInstr_RdWrLR ( Bool wrLR, HReg gpr );
+
+extern PPCInstr* PPCInstr_AvLdSt ( Bool isLoad, UChar sz, HReg, PPCAMode* );
+extern PPCInstr* PPCInstr_AvUnary ( PPCAvOp op, HReg dst, HReg src );
+extern PPCInstr* PPCInstr_AvBinary ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
+extern PPCInstr* PPCInstr_AvBin8x16 ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
+extern PPCInstr* PPCInstr_AvBin16x8 ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
+extern PPCInstr* PPCInstr_AvBin32x4 ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
+extern PPCInstr* PPCInstr_AvBin32Fx4 ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
+extern PPCInstr* PPCInstr_AvUn32Fx4 ( PPCAvOp op, HReg dst, HReg src );
+extern PPCInstr* PPCInstr_AvPerm ( HReg dst, HReg srcL, HReg srcR, HReg ctl );
+extern PPCInstr* PPCInstr_AvSel ( HReg ctl, HReg dst, HReg srcL, HReg srcR );
+extern PPCInstr* PPCInstr_AvShlDbl ( UChar shift, HReg dst, HReg srcL, HReg srcR );
+extern PPCInstr* PPCInstr_AvSplat ( UChar sz, HReg dst, PPCVI5s* src );
+extern PPCInstr* PPCInstr_AvCMov ( PPCCondCode, HReg dst, HReg src );
+extern PPCInstr* PPCInstr_AvLdVSCR ( HReg src );
+
+extern void ppPPCInstr ( PPCInstr*, Bool mode64 );
+
+/* Some functions that insulate the register allocator from details
+ of the underlying instruction set. */
+extern void getRegUsage_PPCInstr ( HRegUsage*, PPCInstr*, Bool mode64 );
+extern void mapRegs_PPCInstr ( HRegRemap*, PPCInstr* , Bool mode64);
+extern Bool isMove_PPCInstr ( PPCInstr*, HReg*, HReg* );
+extern Int emit_PPCInstr ( UChar* buf, Int nbuf, PPCInstr*,
+ Bool mode64, void* dispatch );
+
+extern void genSpill_PPC ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
+ HReg rreg, Int offsetB, Bool mode64 );
+extern void genReload_PPC ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
+ HReg rreg, Int offsetB, Bool mode64 );
+
+extern void getAllocableRegs_PPC ( Int*, HReg**, Bool mode64 );
+extern HInstrArray* iselSB_PPC ( IRSB*, VexArch,
+ VexArchInfo*,
+ VexAbiInfo* );
+
+#endif /* ndef __VEX_HOST_PPC_DEFS_H */
+
+/*---------------------------------------------------------------*/
+/*--- end host_ppc_defs.h ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
new file mode 100644
index 0000000..4ae18f3
--- /dev/null
+++ b/VEX/priv/host_ppc_isel.c
@@ -0,0 +1,4206 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin host_ppc_isel.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+
+#include "ir_match.h"
+#include "main_util.h"
+#include "main_globals.h"
+#include "host_generic_regs.h"
+#include "host_ppc_defs.h"
+
+/* GPR register class for ppc32/64 */
+#define HRcGPR(__mode64) (__mode64 ? HRcInt64 : HRcInt32)
+
+
+/*---------------------------------------------------------*/
+/*--- Register Usage Conventions ---*/
+/*---------------------------------------------------------*/
+/*
+ Integer Regs
+ ------------
+ GPR0 Reserved
+ GPR1 Stack Pointer
+ GPR2 not used - TOC pointer
+ GPR3:10 Allocateable
+ GPR11 if mode64: not used - calls by ptr / env ptr for some langs
+ GPR12 if mode64: not used - exceptions / global linkage code
+ GPR13 not used - Thread-specific pointer
+ GPR14:28 Allocateable
+ GPR29 Unused by us (reserved for the dispatcher)
+ GPR30 AltiVec temp spill register
+ GPR31 GuestStatePointer
+
+ Of Allocateable regs:
+ if (mode64)
+ GPR3:10 Caller-saved regs
+ else
+ GPR3:12 Caller-saved regs
+ GPR14:29 Callee-saved regs
+
+ GPR3 [Return | Parameter] - carrying reg
+ GPR4:10 Parameter-carrying regs
+
+
+ Floating Point Regs
+ -------------------
+ FPR0:31 Allocateable
+
+ FPR0 Caller-saved - scratch reg
+ if (mode64)
+ FPR1:13 Caller-saved - param & return regs
+ else
+ FPR1:8 Caller-saved - param & return regs
+ FPR9:13 Caller-saved regs
+ FPR14:31 Callee-saved regs
+
+
+ Vector Regs (on processors with the VMX feature)
+ -----------
+ VR0-VR1 Volatile scratch registers
+ VR2-VR13 Volatile vector parameters registers
+ VR14-VR19 Volatile scratch registers
+ VR20-VR31 Non-volatile registers
+ VRSAVE Non-volatile 32-bit register
+*/
+
+
+/*---------------------------------------------------------*/
+/*--- PPC FP Status & Control Register Conventions ---*/
+/*---------------------------------------------------------*/
+/*
+ Vex-generated code expects to run with the FPU set as follows: all
+ exceptions masked. The rounding mode is set appropriately before
+ each floating point insn emitted (or left unchanged if known to be
+ correct already). There are a few fp insns (fmr,fneg,fabs,fnabs),
+ which are unaffected by the rm and so the rounding mode is not set
+ prior to them.
+
+ At least on MPC7447A (Mac Mini), frsqrte is also not affected by
+ rounding mode. At some point the ppc docs get sufficiently vague
+ that the only way to find out is to write test programs.
+*/
+/* Notes on the FP instruction set, 6 Feb 06.
+
+What exns -> CR1 ? Sets FPRF ? Observes RM ?
+-------------------------------------------------------------
+
+fmr[.] if . n n
+fneg[.] if . n n
+fabs[.] if . n n
+fnabs[.] if . n n
+
+fadd[.] if . y y
+fadds[.] if . y y
+fcfid[.] (i64->dbl) if . y y
+fcmpo (cmp, result n n n
+fcmpu to crfD) n n n
+fctid[.] (dbl->i64) if . ->undef y
+fctidz[.] (dbl->i64) if . ->undef rounds-to-zero
+fctiw[.] (dbl->i32) if . ->undef y
+fctiwz[.] (dbl->i32) if . ->undef rounds-to-zero
+fdiv[.] if . y y
+fdivs[.] if . y y
+fmadd[.] if . y y
+fmadds[.] if . y y
+fmsub[.] if . y y
+fmsubs[.] if . y y
+fmul[.] if . y y
+fmuls[.] if . y y
+
+(note: for fnm*, rounding happens before final negation)
+fnmadd[.] if . y y
+fnmadds[.] if . y y
+fnmsub[.] if . y y
+fnmsubs[.] if . y y
+
+fre[.] if . y y
+fres[.] if . y y
+
+frsqrte[.] if . y apparently not
+
+fsqrt[.] if . y y
+fsqrts[.] if . y y
+fsub[.] if . y y
+fsubs[.] if . y y
+
+
+fpscr: bits 30-31 (ibm) is RM
+ 24-29 (ibm) are exnmasks/non-IEEE bit, all zero
+ 15-19 (ibm) is FPRF: class, <, =, >, UNord
+
+ppc fe(guest) makes fpscr read as all zeros except RM (and maybe FPRF
+in future)
+
+mcrfs - move fpscr field to CR field
+mtfsfi[.] - 4 bit imm moved to fpscr field
+mtfsf[.] - move frS[low 1/2] to fpscr but using 8-bit field mask
+mtfsb1[.] - set given fpscr bit
+mtfsb0[.] - clear given fpscr bit
+mffs[.] - move all fpscr to frD[low 1/2]
+
+For [.] presumably cr1 is set with exn summary bits, as per
+main FP insns
+
+A single precision store truncates/denormalises the in-register value,
+but does not round it. This is so that flds followed by fsts is
+always the identity.
+*/
+
+
+/*---------------------------------------------------------*/
+/*--- misc helpers ---*/
+/*---------------------------------------------------------*/
+
+/* These are duplicated in guest-ppc/toIR.c */
+static IRExpr* unop ( IROp op, IRExpr* a )
+{
+ return IRExpr_Unop(op, a);
+}
+
+static IRExpr* mkU32 ( UInt i )
+{
+ return IRExpr_Const(IRConst_U32(i));
+}
+
+static IRExpr* bind ( Int binder )
+{
+ return IRExpr_Binder(binder);
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISelEnv ---*/
+/*---------------------------------------------------------*/
+
+/* This carries around:
+
+ - A mapping from IRTemp to IRType, giving the type of any IRTemp we
+ might encounter. This is computed before insn selection starts,
+ and does not change.
+
+ - A mapping from IRTemp to HReg. This tells the insn selector
+ which virtual register(s) are associated with each IRTemp
+ temporary. This is computed before insn selection starts, and
+ does not change. We expect this mapping to map precisely the
+ same set of IRTemps as the type mapping does.
+
+ - vregmap holds the primary register for the IRTemp.
+ - vregmapHI holds the secondary register for the IRTemp,
+ if any is needed. That's only for Ity_I64 temps
+ in 32 bit mode or Ity_I128 temps in 64-bit mode.
+
+ - The name of the vreg in which we stash a copy of the link reg,
+ so helper functions don't kill it.
+
+ - The code array, that is, the insns selected so far.
+
+ - A counter, for generating new virtual registers.
+
+ - The host subarchitecture we are selecting insns for.
+ This is set at the start and does not change.
+
+ - A Bool to tell us if the host is 32 or 64bit.
+ This is set at the start and does not change.
+
+ - An IRExpr*, which may be NULL, holding the IR expression (an
+ IRRoundingMode-encoded value) to which the FPU's rounding mode
+ was most recently set. Setting to NULL is always safe. Used to
+ avoid redundant settings of the FPU's rounding mode, as
+ described in set_FPU_rounding_mode below.
+
+ - A VexMiscInfo*, needed for knowing how to generate
+ function calls for this target
+*/
+
+typedef
+ struct {
+ IRTypeEnv* type_env;
+
+ HReg* vregmap;
+ HReg* vregmapHI;
+ Int n_vregmap;
+
+ HReg savedLR;
+
+ HInstrArray* code;
+
+ Int vreg_ctr;
+
+ /* 27 Jan 06: Not currently used, but should be */
+ UInt hwcaps;
+
+ Bool mode64;
+
+ IRExpr* previous_rm;
+
+ VexAbiInfo* vbi;
+ }
+ ISelEnv;
+
+
+static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
+{
+ vassert(tmp >= 0);
+ vassert(tmp < env->n_vregmap);
+ return env->vregmap[tmp];
+}
+
+static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
+ ISelEnv* env, IRTemp tmp )
+{
+ vassert(!env->mode64);
+ vassert(tmp >= 0);
+ vassert(tmp < env->n_vregmap);
+ vassert(env->vregmapHI[tmp] != INVALID_HREG);
+ *vrLO = env->vregmap[tmp];
+ *vrHI = env->vregmapHI[tmp];
+}
+
+static void addInstr ( ISelEnv* env, PPCInstr* instr )
+{
+ addHInstr(env->code, instr);
+ if (vex_traceflags & VEX_TRACE_VCODE) {
+ ppPPCInstr(instr, env->mode64);
+ vex_printf("\n");
+ }
+}
+
+static HReg newVRegI ( ISelEnv* env )
+{
+ HReg reg = mkHReg(env->vreg_ctr, HRcGPR(env->mode64),
+ True/*virtual reg*/);
+ env->vreg_ctr++;
+ return reg;
+}
+
+static HReg newVRegF ( ISelEnv* env )
+{
+ HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
+ env->vreg_ctr++;
+ return reg;
+}
+
+static HReg newVRegV ( ISelEnv* env )
+{
+ HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
+ env->vreg_ctr++;
+ return reg;
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Forward declarations ---*/
+/*---------------------------------------------------------*/
+
+/* These are organised as iselXXX and iselXXX_wrk pairs. The
+ iselXXX_wrk do the real work, but are not to be called directly.
+ For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
+ checks that all returned registers are virtual. You should not
+ call the _wrk version directly.
+
+ 'Word' refers to the size of the native machine word, that is,
+ 32-bit int in 32-bit mode and 64-bit int in 64-bit mode. '2Word'
+ therefore refers to a double-width (64/128-bit) quantity in two
+ integer registers.
+*/
+/* 32-bit mode: compute an I8/I16/I32 into a GPR.
+ 64-bit mode: compute an I8/I16/I32/I64 into a GPR. */
+static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e );
+static HReg iselWordExpr_R ( ISelEnv* env, IRExpr* e );
+
+/* 32-bit mode: Compute an I8/I16/I32 into a RH
+ (reg-or-halfword-immediate).
+ 64-bit mode: Compute an I8/I16/I32/I64 into a RH
+ (reg-or-halfword-immediate).
+ It's important to specify whether the immediate is to be regarded
+ as signed or not. If yes, this will never return -32768 as an
+ immediate; this guaranteed that all signed immediates that are
+ return can have their sign inverted if need be.
+*/
+static PPCRH* iselWordExpr_RH_wrk ( ISelEnv* env,
+ Bool syned, IRExpr* e );
+static PPCRH* iselWordExpr_RH ( ISelEnv* env,
+ Bool syned, IRExpr* e );
+
+/* 32-bit mode: compute an I32 into a RI (reg or 32-bit immediate).
+ 64-bit mode: compute an I64 into a RI (reg or 64-bit immediate). */
+static PPCRI* iselWordExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
+static PPCRI* iselWordExpr_RI ( ISelEnv* env, IRExpr* e );
+
+/* In 32 bit mode ONLY, compute an I8 into a
+ reg-or-5-bit-unsigned-immediate, the latter being an immediate in
+ the range 1 .. 31 inclusive. Used for doing shift amounts. */
+static PPCRH* iselWordExpr_RH5u_wrk ( ISelEnv* env, IRExpr* e );
+static PPCRH* iselWordExpr_RH5u ( ISelEnv* env, IRExpr* e );
+
+/* In 64-bit mode ONLY, compute an I8 into a
+ reg-or-6-bit-unsigned-immediate, the latter being an immediate in
+ the range 1 .. 63 inclusive. Used for doing shift amounts. */
+static PPCRH* iselWordExpr_RH6u_wrk ( ISelEnv* env, IRExpr* e );
+static PPCRH* iselWordExpr_RH6u ( ISelEnv* env, IRExpr* e );
+
+/* 32-bit mode: compute an I32 into an AMode.
+ 64-bit mode: compute an I64 into an AMode.
+
+ Requires to know (xferTy) the type of data to be loaded/stored
+ using this amode. That is so that, for 64-bit code generation, any
+ PPCAMode_IR returned will have an index (immediate offset) field
+ that is guaranteed to be 4-aligned, if there is any chance that the
+ amode is to be used in ld/ldu/lda/std/stdu.
+
+ Since there are no such restrictions on 32-bit insns, xferTy is
+ ignored for 32-bit code generation. */
+static PPCAMode* iselWordExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType xferTy );
+static PPCAMode* iselWordExpr_AMode ( ISelEnv* env, IRExpr* e, IRType xferTy );
+
+/* 32-bit mode ONLY: compute an I64 into a GPR pair. */
+static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
+ ISelEnv* env, IRExpr* e );
+static void iselInt64Expr ( HReg* rHi, HReg* rLo,
+ ISelEnv* env, IRExpr* e );
+
+/* 64-bit mode ONLY: compute an I128 into a GPR64 pair. */
+static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
+ ISelEnv* env, IRExpr* e );
+static void iselInt128Expr ( HReg* rHi, HReg* rLo,
+ ISelEnv* env, IRExpr* e );
+
+static PPCCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
+static PPCCondCode iselCondCode ( ISelEnv* env, IRExpr* e );
+
+static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
+static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
+
+static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
+static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
+
+static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
+static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Misc helpers ---*/
+/*---------------------------------------------------------*/
+
+/* Make an int reg-reg move. */
+
+static PPCInstr* mk_iMOVds_RR ( HReg r_dst, HReg r_src )
+{
+ vassert(hregClass(r_dst) == hregClass(r_src));
+ vassert(hregClass(r_src) == HRcInt32 ||
+ hregClass(r_src) == HRcInt64);
+ return PPCInstr_Alu(Palu_OR, r_dst, r_src, PPCRH_Reg(r_src));
+}
+
+/* Advance/retreat %r1 by n. */
+
+static void add_to_sp ( ISelEnv* env, UInt n )
+{
+ HReg sp = StackFramePtr(env->mode64);
+ vassert(n < 256 && (n%16) == 0);
+ addInstr(env, PPCInstr_Alu( Palu_ADD, sp, sp,
+ PPCRH_Imm(True,toUShort(n)) ));
+}
+
+static void sub_from_sp ( ISelEnv* env, UInt n )
+{
+ HReg sp = StackFramePtr(env->mode64);
+ vassert(n < 256 && (n%16) == 0);
+ addInstr(env, PPCInstr_Alu( Palu_SUB, sp, sp,
+ PPCRH_Imm(True,toUShort(n)) ));
+}
+
+/*
+ returns a quadword aligned address on the stack
+ - copies SP, adds 16bytes, aligns to quadword.
+ use sub_from_sp(32) before calling this,
+ as expects to have 32 bytes to play with.
+*/
+static HReg get_sp_aligned16 ( ISelEnv* env )
+{
+ HReg r = newVRegI(env);
+ HReg align16 = newVRegI(env);
+ addInstr(env, mk_iMOVds_RR(r, StackFramePtr(env->mode64)));
+ // add 16
+ addInstr(env, PPCInstr_Alu( Palu_ADD, r, r,
+ PPCRH_Imm(True,toUShort(16)) ));
+ // mask to quadword
+ addInstr(env,
+ PPCInstr_LI(align16, 0xFFFFFFFFFFFFFFF0ULL, env->mode64));
+ addInstr(env, PPCInstr_Alu(Palu_AND, r,r, PPCRH_Reg(align16)));
+ return r;
+}
+
+
+
+/* Load 2*I32 regs to fp reg */
+static HReg mk_LoadRR32toFPR ( ISelEnv* env,
+ HReg r_srcHi, HReg r_srcLo )
+{
+ HReg fr_dst = newVRegF(env);
+ PPCAMode *am_addr0, *am_addr1;
+
+ vassert(!env->mode64);
+ vassert(hregClass(r_srcHi) == HRcInt32);
+ vassert(hregClass(r_srcLo) == HRcInt32);
+
+ sub_from_sp( env, 16 ); // Move SP down 16 bytes
+ am_addr0 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
+ am_addr1 = PPCAMode_IR( 4, StackFramePtr(env->mode64) );
+
+ // store hi,lo as Ity_I32's
+ addInstr(env, PPCInstr_Store( 4, am_addr0, r_srcHi, env->mode64 ));
+ addInstr(env, PPCInstr_Store( 4, am_addr1, r_srcLo, env->mode64 ));
+
+ // load as float
+ addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, am_addr0));
+
+ add_to_sp( env, 16 ); // Reset SP
+ return fr_dst;
+}
+
+/* Load I64 reg to fp reg */
+static HReg mk_LoadR64toFPR ( ISelEnv* env, HReg r_src )
+{
+ HReg fr_dst = newVRegF(env);
+ PPCAMode *am_addr0;
+
+ vassert(env->mode64);
+ vassert(hregClass(r_src) == HRcInt64);
+
+ sub_from_sp( env, 16 ); // Move SP down 16 bytes
+ am_addr0 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
+
+ // store as Ity_I64
+ addInstr(env, PPCInstr_Store( 8, am_addr0, r_src, env->mode64 ));
+
+ // load as float
+ addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fr_dst, am_addr0));
+
+ add_to_sp( env, 16 ); // Reset SP
+ return fr_dst;
+}
+
+
+/* Given an amode, return one which references 4 bytes further
+ along. */
+
+static PPCAMode* advance4 ( ISelEnv* env, PPCAMode* am )
+{
+ PPCAMode* am4 = dopyPPCAMode( am );
+ if (am4->tag == Pam_IR
+ && am4->Pam.IR.index + 4 <= 32767) {
+ am4->Pam.IR.index += 4;
+ } else {
+ vpanic("advance4(ppc,host)");
+ }
+ return am4;
+}
+
+
+/* Given a guest-state array descriptor, an index expression and a
+ bias, generate a PPCAMode pointing at the relevant piece of
+ guest state. */
+static
+PPCAMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
+ IRExpr* off, Int bias )
+{
+ HReg rtmp, roff;
+ Int elemSz = sizeofIRType(descr->elemTy);
+ Int nElems = descr->nElems;
+ Int shift = 0;
+
+ /* Throw out any cases we don't need. In theory there might be a
+ day where we need to handle others, but not today. */
+
+ if (nElems != 16 && nElems != 32)
+ vpanic("genGuestArrayOffset(ppc host)(1)");
+
+ switch (elemSz) {
+ case 4: shift = 2; break;
+ case 8: shift = 3; break;
+ default: vpanic("genGuestArrayOffset(ppc host)(2)");
+ }
+
+ if (bias < -100 || bias > 100) /* somewhat arbitrarily */
+ vpanic("genGuestArrayOffset(ppc host)(3)");
+ if (descr->base < 0 || descr->base > 4000) /* somewhat arbitrarily */
+ vpanic("genGuestArrayOffset(ppc host)(4)");
+
+ /* Compute off into a reg, %off. Then return:
+
+ addi %tmp, %off, bias (if bias != 0)
+ andi %tmp, nElems-1
+ sldi %tmp, shift
+ addi %tmp, %tmp, base
+ ... Baseblockptr + %tmp ...
+ */
+ roff = iselWordExpr_R(env, off);
+ rtmp = newVRegI(env);
+ addInstr(env, PPCInstr_Alu(
+ Palu_ADD,
+ rtmp, roff,
+ PPCRH_Imm(True/*signed*/, toUShort(bias))));
+ addInstr(env, PPCInstr_Alu(
+ Palu_AND,
+ rtmp, rtmp,
+ PPCRH_Imm(False/*unsigned*/, toUShort(nElems-1))));
+ addInstr(env, PPCInstr_Shft(
+ Pshft_SHL,
+ env->mode64 ? False : True/*F:64-bit, T:32-bit shift*/,
+ rtmp, rtmp,
+ PPCRH_Imm(False/*unsigned*/, toUShort(shift))));
+ addInstr(env, PPCInstr_Alu(
+ Palu_ADD,
+ rtmp, rtmp,
+ PPCRH_Imm(True/*signed*/, toUShort(descr->base))));
+ return
+ PPCAMode_RR( GuestStatePtr(env->mode64), rtmp );
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Function call helpers ---*/
+/*---------------------------------------------------------*/
+
+/* Used only in doHelperCall. See big comment in doHelperCall re
+ handling of register-parameter args. This function figures out
+ whether evaluation of an expression might require use of a fixed
+ register. If in doubt return True (safe but suboptimal).
+*/
+static
+Bool mightRequireFixedRegs ( IRExpr* e )
+{
+ switch (e->tag) {
+ case Iex_RdTmp: case Iex_Const: case Iex_Get:
+ return False;
+ default:
+ return True;
+ }
+}
+
+
+/* Do a complete function call. guard is a Ity_Bit expression
+ indicating whether or not the call happens. If guard==NULL, the
+ call is unconditional. */
+
+static
+void doHelperCall ( ISelEnv* env,
+ Bool passBBP,
+ IRExpr* guard, IRCallee* cee, IRExpr** args )
+{
+ PPCCondCode cc;
+ HReg argregs[PPC_N_REGPARMS];
+ HReg tmpregs[PPC_N_REGPARMS];
+ Bool go_fast;
+ Int n_args, i, argreg;
+ UInt argiregs;
+ ULong target;
+ Bool mode64 = env->mode64;
+
+ /* Do we need to force use of an odd-even reg pair for 64-bit
+ args? */
+ Bool regalign_int64s
+ = (!mode64) && env->vbi->host_ppc32_regalign_int64_args;
+
+ /* Marshal args for a call and do the call.
+
+ If passBBP is True, %rbp (the baseblock pointer) is to be passed
+ as the first arg.
+
+ This function only deals with a tiny set of possibilities, which
+ cover all helpers in practice. The restrictions are that only
+ arguments in registers are supported, hence only PPC_N_REGPARMS x
+ (mode32:32 | mode64:64) integer bits in total can be passed.
+ In fact the only supported arg type is (mode32:I32 | mode64:I64).
+
+ Generating code which is both efficient and correct when
+ parameters are to be passed in registers is difficult, for the
+ reasons elaborated in detail in comments attached to
+ doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
+ of the method described in those comments.
+
+ The problem is split into two cases: the fast scheme and the
+ slow scheme. In the fast scheme, arguments are computed
+ directly into the target (real) registers. This is only safe
+ when we can be sure that computation of each argument will not
+ trash any real registers set by computation of any other
+ argument.
+
+ In the slow scheme, all args are first computed into vregs, and
+ once they are all done, they are moved to the relevant real
+ regs. This always gives correct code, but it also gives a bunch
+ of vreg-to-rreg moves which are usually redundant but are hard
+ for the register allocator to get rid of.
+
+ To decide which scheme to use, all argument expressions are
+ first examined. If they are all so simple that it is clear they
+ will be evaluated without use of any fixed registers, use the
+ fast scheme, else use the slow scheme. Note also that only
+ unconditional calls may use the fast scheme, since having to
+ compute a condition expression could itself trash real
+ registers.
+
+ Note this requires being able to examine an expression and
+ determine whether or not evaluation of it might use a fixed
+ register. That requires knowledge of how the rest of this insn
+ selector works. Currently just the following 3 are regarded as
+ safe -- hopefully they cover the majority of arguments in
+ practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
+ */
+
+ /* Note that the cee->regparms field is meaningless on PPC32/64 host
+ (since there is only one calling convention) and so we always
+ ignore it. */
+
+ n_args = 0;
+ for (i = 0; args[i]; i++)
+ n_args++;
+
+ if (PPC_N_REGPARMS < n_args + (passBBP ? 1 : 0)) {
+ vpanic("doHelperCall(PPC): cannot currently handle > 8 args");
+ // PPC_N_REGPARMS
+ }
+
+ argregs[0] = hregPPC_GPR3(mode64);
+ argregs[1] = hregPPC_GPR4(mode64);
+ argregs[2] = hregPPC_GPR5(mode64);
+ argregs[3] = hregPPC_GPR6(mode64);
+ argregs[4] = hregPPC_GPR7(mode64);
+ argregs[5] = hregPPC_GPR8(mode64);
+ argregs[6] = hregPPC_GPR9(mode64);
+ argregs[7] = hregPPC_GPR10(mode64);
+ argiregs = 0;
+
+ tmpregs[0] = tmpregs[1] = tmpregs[2] =
+ tmpregs[3] = tmpregs[4] = tmpregs[5] =
+ tmpregs[6] = tmpregs[7] = INVALID_HREG;
+
+ /* First decide which scheme (slow or fast) is to be used. First
+ assume the fast scheme, and select slow if any contraindications
+ (wow) appear. */
+
+ go_fast = True;
+
+ if (guard) {
+ if (guard->tag == Iex_Const
+ && guard->Iex.Const.con->tag == Ico_U1
+ && guard->Iex.Const.con->Ico.U1 == True) {
+ /* unconditional */
+ } else {
+ /* Not manifestly unconditional -- be conservative. */
+ go_fast = False;
+ }
+ }
+
+ if (go_fast) {
+ for (i = 0; i < n_args; i++) {
+ if (mightRequireFixedRegs(args[i])) {
+ go_fast = False;
+ break;
+ }
+ }
+ }
+
+ /* At this point the scheme to use has been established. Generate
+ code to get the arg values into the argument rregs. */
+
+ if (go_fast) {
+
+ /* FAST SCHEME */
+ argreg = 0;
+ if (passBBP) {
+ argiregs |= (1 << (argreg+3));
+ addInstr(env, mk_iMOVds_RR( argregs[argreg],
+ GuestStatePtr(mode64) ));
+ argreg++;
+ }
+
+ for (i = 0; i < n_args; i++) {
+ vassert(argreg < PPC_N_REGPARMS);
+ vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32 ||
+ typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
+ if (!mode64) {
+ if (typeOfIRExpr(env->type_env, args[i]) == Ity_I32) {
+ argiregs |= (1 << (argreg+3));
+ addInstr(env,
+ mk_iMOVds_RR( argregs[argreg],
+ iselWordExpr_R(env, args[i]) ));
+ } else { // Ity_I64
+ HReg rHi, rLo;
+ if (regalign_int64s && (argreg%2) == 1)
+ // ppc32 ELF abi spec for passing LONG_LONG
+ argreg++; // XXX: odd argreg => even rN
+ vassert(argreg < PPC_N_REGPARMS-1);
+ iselInt64Expr(&rHi,&rLo, env, args[i]);
+ argiregs |= (1 << (argreg+3));
+ addInstr(env, mk_iMOVds_RR( argregs[argreg++], rHi ));
+ argiregs |= (1 << (argreg+3));
+ addInstr(env, mk_iMOVds_RR( argregs[argreg], rLo));
+ }
+ } else { // mode64
+ argiregs |= (1 << (argreg+3));
+ addInstr(env, mk_iMOVds_RR( argregs[argreg],
+ iselWordExpr_R(env, args[i]) ));
+ }
+ argreg++;
+ }
+
+ /* Fast scheme only applies for unconditional calls. Hence: */
+ cc.test = Pct_ALWAYS;
+
+ } else {
+
+ /* SLOW SCHEME; move via temporaries */
+ argreg = 0;
+
+ if (passBBP) {
+ /* This is pretty stupid; better to move directly to r3
+ after the rest of the args are done. */
+ tmpregs[argreg] = newVRegI(env);
+ addInstr(env, mk_iMOVds_RR( tmpregs[argreg],
+ GuestStatePtr(mode64) ));
+ argreg++;
+ }
+
+ for (i = 0; i < n_args; i++) {
+ vassert(argreg < PPC_N_REGPARMS);
+ vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32 ||
+ typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
+ if (!mode64) {
+ if (typeOfIRExpr(env->type_env, args[i]) == Ity_I32) {
+ tmpregs[argreg] = iselWordExpr_R(env, args[i]);
+ } else { // Ity_I64
+ HReg rHi, rLo;
+ if (regalign_int64s && (argreg%2) == 1)
+ // ppc32 ELF abi spec for passing LONG_LONG
+ argreg++; // XXX: odd argreg => even rN
+ vassert(argreg < PPC_N_REGPARMS-1);
+ iselInt64Expr(&rHi,&rLo, env, args[i]);
+ tmpregs[argreg++] = rHi;
+ tmpregs[argreg] = rLo;
+ }
+ } else { // mode64
+ tmpregs[argreg] = iselWordExpr_R(env, args[i]);
+ }
+ argreg++;
+ }
+
+ /* Now we can compute the condition. We can't do it earlier
+ because the argument computations could trash the condition
+ codes. Be a bit clever to handle the common case where the
+ guard is 1:Bit. */
+ cc.test = Pct_ALWAYS;
+ if (guard) {
+ if (guard->tag == Iex_Const
+ && guard->Iex.Const.con->tag == Ico_U1
+ && guard->Iex.Const.con->Ico.U1 == True) {
+ /* unconditional -- do nothing */
+ } else {
+ cc = iselCondCode( env, guard );
+ }
+ }
+
+ /* Move the args to their final destinations. */
+ for (i = 0; i < argreg; i++) {
+ if (tmpregs[i] == INVALID_HREG) // Skip invalid regs
+ continue;
+ /* None of these insns, including any spill code that might
+ be generated, may alter the condition codes. */
+ argiregs |= (1 << (i+3));
+ addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
+ }
+
+ }
+
+ target = mode64 ? Ptr_to_ULong(cee->addr) :
+ toUInt(Ptr_to_ULong(cee->addr));
+
+ /* Finally, the call itself. */
+ addInstr(env, PPCInstr_Call( cc, (Addr64)target, argiregs ));
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: FP rounding mode helpers ---*/
+/*---------------------------------------------------------*/
+
+///* Set FPU's rounding mode to the default */
+//static
+//void set_FPU_rounding_default ( ISelEnv* env )
+//{
+// HReg fr_src = newVRegF(env);
+// HReg r_src = newVRegI(env);
+//
+// /* Default rounding mode = 0x0
+// Only supporting the rounding-mode bits - the rest of FPSCR is 0x0
+// - so we can set the whole register at once (faster)
+// note: upper 32 bits ignored by FpLdFPSCR
+// */
+// addInstr(env, PPCInstr_LI(r_src, 0x0, env->mode64));
+// if (env->mode64) {
+// fr_src = mk_LoadR64toFPR( env, r_src ); // 1*I64 -> F64
+// } else {
+// fr_src = mk_LoadRR32toFPR( env, r_src, r_src ); // 2*I32 -> F64
+// }
+// addInstr(env, PPCInstr_FpLdFPSCR( fr_src ));
+//}
+
+/* Convert IR rounding mode to PPC encoding */
+static HReg roundModeIRtoPPC ( ISelEnv* env, HReg r_rmIR )
+{
+ /*
+ rounding mode | PPC | IR
+ ------------------------
+ to nearest | 00 | 00
+ to zero | 01 | 11
+ to +infinity | 10 | 10
+ to -infinity | 11 | 01
+ */
+ HReg r_rmPPC = newVRegI(env);
+ HReg r_tmp1 = newVRegI(env);
+
+ vassert(hregClass(r_rmIR) == HRcGPR(env->mode64));
+
+ // r_rmPPC = XOR(r_rmIR, r_rmIR << 1) & 3
+ //
+ // slwi tmp1, r_rmIR, 1
+ // xor tmp1, r_rmIR, tmp1
+ // andi r_rmPPC, tmp1, 3
+
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
+ r_tmp1, r_rmIR, PPCRH_Imm(False,1)));
+
+ addInstr(env, PPCInstr_Alu( Palu_XOR, r_tmp1, r_rmIR,
+ PPCRH_Reg(r_tmp1) ));
+
+ addInstr(env, PPCInstr_Alu( Palu_AND, r_rmPPC, r_tmp1,
+ PPCRH_Imm(False,3) ));
+
+ return r_rmPPC;
+}
+
+
+/* Set the FPU's rounding mode: 'mode' is an I32-typed expression
+ denoting a value in the range 0 .. 3, indicating a round mode
+ encoded as per type IRRoundingMode. Set the PPC FPSCR to have the
+ same rounding.
+
+ For speed & simplicity, we're setting the *entire* FPSCR here.
+
+ Setting the rounding mode is expensive. So this function tries to
+ avoid repeatedly setting the rounding mode to the same thing by
+ first comparing 'mode' to the 'mode' tree supplied in the previous
+ call to this function, if any. (The previous value is stored in
+ env->previous_rm.) If 'mode' is a single IR temporary 't' and
+ env->previous_rm is also just 't', then the setting is skipped.
+
+ This is safe because of the SSA property of IR: an IR temporary can
+ only be defined once and so will have the same value regardless of
+ where it appears in the block. Cool stuff, SSA.
+
+ A safety condition: all attempts to set the RM must be aware of
+ this mechanism - by being routed through the functions here.
+
+ Of course this only helps if blocks where the RM is set more than
+ once and it is set to the same value each time, *and* that value is
+ held in the same IR temporary each time. In order to assure the
+ latter as much as possible, the IR optimiser takes care to do CSE
+ on any block with any sign of floating point activity.
+*/
+static
+void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
+{
+ HReg fr_src = newVRegF(env);
+ HReg r_src;
+
+ vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
+
+ /* Do we need to do anything? */
+ if (env->previous_rm
+ && env->previous_rm->tag == Iex_RdTmp
+ && mode->tag == Iex_RdTmp
+ && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
+ /* no - setting it to what it was before. */
+ vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
+ return;
+ }
+
+ /* No luck - we better set it, and remember what we set it to. */
+ env->previous_rm = mode;
+
+ /* Only supporting the rounding-mode bits - the rest of FPSCR is
+ 0x0 - so we can set the whole register at once (faster). */
+
+ // Resolve rounding mode and convert to PPC representation
+ r_src = roundModeIRtoPPC( env, iselWordExpr_R(env, mode) );
+ // gpr -> fpr
+ if (env->mode64) {
+ fr_src = mk_LoadR64toFPR( env, r_src ); // 1*I64 -> F64
+ } else {
+ fr_src = mk_LoadRR32toFPR( env, r_src, r_src ); // 2*I32 -> F64
+ }
+
+ // Move to FPSCR
+ addInstr(env, PPCInstr_FpLdFPSCR( fr_src ));
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: vector helpers ---*/
+/*---------------------------------------------------------*/
+
+/* Generate all-zeroes into a new vector register.
+*/
+static HReg generate_zeroes_V128 ( ISelEnv* env )
+{
+ HReg dst = newVRegV(env);
+ addInstr(env, PPCInstr_AvBinary(Pav_XOR, dst, dst, dst));
+ return dst;
+}
+
+
+/*
+ Generates code for AvSplat
+ - takes in IRExpr* of type 8|16|32
+ returns vector reg of duplicated lanes of input
+ - uses AvSplat(imm) for imms up to simm6.
+ otherwise must use store reg & load vector
+*/
+static HReg mk_AvDuplicateRI( ISelEnv* env, IRExpr* e )
+{
+ HReg r_src;
+ HReg dst = newVRegV(env);
+ PPCRI* ri = iselWordExpr_RI(env, e);
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ UInt sz = (ty == Ity_I8) ? 8 : (ty == Ity_I16) ? 16 : 32;
+ vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32);
+
+ /* special case: immediate */
+ if (ri->tag == Pri_Imm) {
+ Int simm32 = (Int)ri->Pri.Imm;
+
+ /* figure out if it's do-able with imm splats. */
+ if (simm32 >= -32 && simm32 <= 31) {
+ Char simm6 = (Char)simm32;
+ if (simm6 > 15) { /* 16:31 inclusive */
+ HReg v1 = newVRegV(env);
+ HReg v2 = newVRegV(env);
+ addInstr(env, PPCInstr_AvSplat(sz, v1, PPCVI5s_Imm(-16)));
+ addInstr(env, PPCInstr_AvSplat(sz, v2, PPCVI5s_Imm(simm6-16)));
+ addInstr(env,
+ (sz== 8) ? PPCInstr_AvBin8x16(Pav_SUBU, dst, v2, v1) :
+ (sz==16) ? PPCInstr_AvBin16x8(Pav_SUBU, dst, v2, v1)
+ : PPCInstr_AvBin32x4(Pav_SUBU, dst, v2, v1) );
+ return dst;
+ }
+ if (simm6 < -16) { /* -32:-17 inclusive */
+ HReg v1 = newVRegV(env);
+ HReg v2 = newVRegV(env);
+ addInstr(env, PPCInstr_AvSplat(sz, v1, PPCVI5s_Imm(-16)));
+ addInstr(env, PPCInstr_AvSplat(sz, v2, PPCVI5s_Imm(simm6+16)));
+ addInstr(env,
+ (sz== 8) ? PPCInstr_AvBin8x16(Pav_ADDU, dst, v2, v1) :
+ (sz==16) ? PPCInstr_AvBin16x8(Pav_ADDU, dst, v2, v1)
+ : PPCInstr_AvBin32x4(Pav_ADDU, dst, v2, v1) );
+ return dst;
+ }
+ /* simplest form: -16:15 inclusive */
+ addInstr(env, PPCInstr_AvSplat(sz, dst, PPCVI5s_Imm(simm6)));
+ return dst;
+ }
+
+ /* no luck; use the Slow way. */
+ r_src = newVRegI(env);
+ addInstr(env, PPCInstr_LI(r_src, (Long)simm32, env->mode64));
+ }
+ else {
+ r_src = ri->Pri.Reg;
+ }
+
+ /* default case: store r_src in lowest lane of 16-aligned mem,
+ load vector, splat lowest lane to dst */
+ {
+ /* CAB: Maybe faster to store r_src multiple times (sz dependent),
+ and simply load the vector? */
+ HReg r_aligned16;
+ HReg v_src = newVRegV(env);
+ PPCAMode *am_off12;
+
+ sub_from_sp( env, 32 ); // Move SP down
+ /* Get a 16-aligned address within our stack space */
+ r_aligned16 = get_sp_aligned16( env );
+ am_off12 = PPCAMode_IR( 12, r_aligned16 );
+
+ /* Store r_src in low word of 16-aligned mem */
+ addInstr(env, PPCInstr_Store( 4, am_off12, r_src, env->mode64 ));
+
+ /* Load src to vector[low lane] */
+ addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, v_src, am_off12 ) );
+ add_to_sp( env, 32 ); // Reset SP
+
+ /* Finally, splat v_src[low_lane] to dst */
+ addInstr(env, PPCInstr_AvSplat(sz, dst, PPCVI5s_Reg(v_src)));
+ return dst;
+ }
+}
+
+
+/* for each lane of vSrc: lane == nan ? laneX = all 1's : all 0's */
+static HReg isNan ( ISelEnv* env, HReg vSrc )
+{
+ HReg zeros, msk_exp, msk_mnt, expt, mnts, vIsNan;
+
+ vassert(hregClass(vSrc) == HRcVec128);
+
+ zeros = mk_AvDuplicateRI(env, mkU32(0));
+ msk_exp = mk_AvDuplicateRI(env, mkU32(0x7F800000));
+ msk_mnt = mk_AvDuplicateRI(env, mkU32(0x7FFFFF));
+ expt = newVRegV(env);
+ mnts = newVRegV(env);
+ vIsNan = newVRegV(env);
+
+ /* 32bit float => sign(1) | exponent(8) | mantissa(23)
+ nan => exponent all ones, mantissa > 0 */
+
+ addInstr(env, PPCInstr_AvBinary(Pav_AND, expt, vSrc, msk_exp));
+ addInstr(env, PPCInstr_AvBin32x4(Pav_CMPEQU, expt, expt, msk_exp));
+ addInstr(env, PPCInstr_AvBinary(Pav_AND, mnts, vSrc, msk_mnt));
+ addInstr(env, PPCInstr_AvBin32x4(Pav_CMPGTU, mnts, mnts, zeros));
+ addInstr(env, PPCInstr_AvBinary(Pav_AND, vIsNan, expt, mnts));
+ return vIsNan;
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/
+/*---------------------------------------------------------*/
+
+/* Select insns for an integer-typed expression, and add them to the
+ code list. Return a reg holding the result. This reg will be a
+ virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
+ want to modify it, ask for a new vreg, copy it in there, and modify
+ the copy. The register allocator will do its best to map both
+ vregs to the same real register, so the copies will often disappear
+ later in the game.
+
+ This should handle expressions of 64, 32, 16 and 8-bit type.
+ All results are returned in a (mode64 ? 64bit : 32bit) register.
+ For 16- and 8-bit expressions, the upper (32/48/56 : 16/24) bits
+ are arbitrary, so you should mask or sign extend partial values
+ if necessary.
+*/
+
+static HReg iselWordExpr_R ( ISelEnv* env, IRExpr* e )
+{
+ HReg r = iselWordExpr_R_wrk(env, e);
+ /* sanity checks ... */
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+
+ vassert(hregClass(r) == HRcGPR(env->mode64));
+ vassert(hregIsVirtual(r));
+ return r;
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e )
+{
+ Bool mode64 = env->mode64;
+ MatchInfo mi;
+ DECLARE_PATTERN(p_32to1_then_1Uto8);
+
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_I8 || ty == Ity_I16 ||
+ ty == Ity_I32 || ((ty == Ity_I64) && mode64));
+
+ switch (e->tag) {
+
+ /* --------- TEMP --------- */
+ case Iex_RdTmp:
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+
+ /* --------- LOAD --------- */
+ case Iex_Load: {
+ HReg r_dst;
+ PPCAMode* am_addr;
+ if (e->Iex.Load.end != Iend_BE)
+ goto irreducible;
+ r_dst = newVRegI(env);
+ am_addr = iselWordExpr_AMode( env, e->Iex.Load.addr, ty/*of xfer*/ );
+ addInstr(env, PPCInstr_Load( toUChar(sizeofIRType(ty)),
+ r_dst, am_addr, mode64 ));
+ return r_dst;
+ /*NOTREACHED*/
+ }
+
+ /* --------- BINARY OP --------- */
+ case Iex_Binop: {
+ PPCAluOp aluOp;
+ PPCShftOp shftOp;
+
+ /* Is it an addition or logical style op? */
+ switch (e->Iex.Binop.op) {
+ case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
+ aluOp = Palu_ADD; break;
+ case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
+ aluOp = Palu_SUB; break;
+ case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
+ aluOp = Palu_AND; break;
+ case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64:
+ aluOp = Palu_OR; break;
+ case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
+ aluOp = Palu_XOR; break;
+ default:
+ aluOp = Palu_INVALID; break;
+ }
+ /* For commutative ops we assume any literal
+ values are on the second operand. */
+ if (aluOp != Palu_INVALID) {
+ HReg r_dst = newVRegI(env);
+ HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+ PPCRH* ri_srcR = NULL;
+ /* get right arg into an RH, in the appropriate way */
+ switch (aluOp) {
+ case Palu_ADD: case Palu_SUB:
+ ri_srcR = iselWordExpr_RH(env, True/*signed*/,
+ e->Iex.Binop.arg2);
+ break;
+ case Palu_AND: case Palu_OR: case Palu_XOR:
+ ri_srcR = iselWordExpr_RH(env, False/*signed*/,
+ e->Iex.Binop.arg2);
+ break;
+ default:
+ vpanic("iselWordExpr_R_wrk-aluOp-arg2");
+ }
+ addInstr(env, PPCInstr_Alu(aluOp, r_dst, r_srcL, ri_srcR));
+ return r_dst;
+ }
+
+ /* a shift? */
+ switch (e->Iex.Binop.op) {
+ case Iop_Shl8: case Iop_Shl16: case Iop_Shl32: case Iop_Shl64:
+ shftOp = Pshft_SHL; break;
+ case Iop_Shr8: case Iop_Shr16: case Iop_Shr32: case Iop_Shr64:
+ shftOp = Pshft_SHR; break;
+ case Iop_Sar8: case Iop_Sar16: case Iop_Sar32: case Iop_Sar64:
+ shftOp = Pshft_SAR; break;
+ default:
+ shftOp = Pshft_INVALID; break;
+ }
+ /* we assume any literal values are on the second operand. */
+ if (shftOp != Pshft_INVALID) {
+ HReg r_dst = newVRegI(env);
+ HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+ PPCRH* ri_srcR = NULL;
+ /* get right arg into an RH, in the appropriate way */
+ switch (shftOp) {
+ case Pshft_SHL: case Pshft_SHR: case Pshft_SAR:
+ if (!mode64)
+ ri_srcR = iselWordExpr_RH5u(env, e->Iex.Binop.arg2);
+ else
+ ri_srcR = iselWordExpr_RH6u(env, e->Iex.Binop.arg2);
+ break;
+ default:
+ vpanic("iselIntExpr_R_wrk-shftOp-arg2");
+ }
+ /* widen the left arg if needed */
+ if (shftOp == Pshft_SHR || shftOp == Pshft_SAR) {
+ if (ty == Ity_I8 || ty == Ity_I16) {
+ PPCRH* amt = PPCRH_Imm(False,
+ toUShort(ty == Ity_I8 ? 24 : 16));
+ HReg tmp = newVRegI(env);
+ addInstr(env, PPCInstr_Shft(Pshft_SHL,
+ True/*32bit shift*/,
+ tmp, r_srcL, amt));
+ addInstr(env, PPCInstr_Shft(shftOp,
+ True/*32bit shift*/,
+ tmp, tmp, amt));
+ r_srcL = tmp;
+ vassert(0); /* AWAITING TEST CASE */
+ }
+ }
+ /* Only 64 expressions need 64bit shifts,
+ 32bit shifts are fine for all others */
+ if (ty == Ity_I64) {
+ vassert(mode64);
+ addInstr(env, PPCInstr_Shft(shftOp, False/*64bit shift*/,
+ r_dst, r_srcL, ri_srcR));
+ } else {
+ addInstr(env, PPCInstr_Shft(shftOp, True/*32bit shift*/,
+ r_dst, r_srcL, ri_srcR));
+ }
+ return r_dst;
+ }
+
+ /* How about a div? */
+ if (e->Iex.Binop.op == Iop_DivS32 ||
+ e->Iex.Binop.op == Iop_DivU32) {
+ Bool syned = toBool(e->Iex.Binop.op == Iop_DivS32);
+ HReg r_dst = newVRegI(env);
+ HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+ HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
+ addInstr(env, PPCInstr_Div(syned, True/*32bit div*/,
+ r_dst, r_srcL, r_srcR));
+ return r_dst;
+ }
+ if (e->Iex.Binop.op == Iop_DivS64 ||
+ e->Iex.Binop.op == Iop_DivU64) {
+ Bool syned = toBool(e->Iex.Binop.op == Iop_DivS64);
+ HReg r_dst = newVRegI(env);
+ HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+ HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
+ vassert(mode64);
+ addInstr(env, PPCInstr_Div(syned, False/*64bit div*/,
+ r_dst, r_srcL, r_srcR));
+ return r_dst;
+ }
+
+ /* No? Anyone for a mul? */
+ if (e->Iex.Binop.op == Iop_Mul32
+ || e->Iex.Binop.op == Iop_Mul64) {
+ Bool syned = False;
+ Bool sz32 = (e->Iex.Binop.op != Iop_Mul64);
+ HReg r_dst = newVRegI(env);
+ HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+ HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
+ addInstr(env, PPCInstr_MulL(syned, False/*lo32*/, sz32,
+ r_dst, r_srcL, r_srcR));
+ return r_dst;
+ }
+
+ /* 32 x 32 -> 64 multiply */
+ if (mode64
+ && (e->Iex.Binop.op == Iop_MullU32
+ || e->Iex.Binop.op == Iop_MullS32)) {
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ HReg r_dst = newVRegI(env);
+ Bool syned = toBool(e->Iex.Binop.op == Iop_MullS32);
+ HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+ HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
+ addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/,
+ False/*lo32*/, True/*32bit mul*/,
+ tLo, r_srcL, r_srcR));
+ addInstr(env, PPCInstr_MulL(syned,
+ True/*hi32*/, True/*32bit mul*/,
+ tHi, r_srcL, r_srcR));
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
+ r_dst, tHi, PPCRH_Imm(False,32)));
+ addInstr(env, PPCInstr_Alu(Palu_OR,
+ r_dst, r_dst, PPCRH_Reg(tLo)));
+ return r_dst;
+ }
+
+ /* El-mutanto 3-way compare? */
+ if (e->Iex.Binop.op == Iop_CmpORD32S
+ || e->Iex.Binop.op == Iop_CmpORD32U) {
+ Bool syned = toBool(e->Iex.Binop.op == Iop_CmpORD32S);
+ HReg dst = newVRegI(env);
+ HReg srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+ PPCRH* srcR = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2);
+ addInstr(env, PPCInstr_Cmp(syned, True/*32bit cmp*/,
+ 7/*cr*/, srcL, srcR));
+ addInstr(env, PPCInstr_MfCR(dst));
+ addInstr(env, PPCInstr_Alu(Palu_AND, dst, dst,
+ PPCRH_Imm(False,7<<1)));
+ return dst;
+ }
+
+ if (e->Iex.Binop.op == Iop_CmpORD64S
+ || e->Iex.Binop.op == Iop_CmpORD64U) {
+ Bool syned = toBool(e->Iex.Binop.op == Iop_CmpORD64S);
+ HReg dst = newVRegI(env);
+ HReg srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+ PPCRH* srcR = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2);
+ vassert(mode64);
+ addInstr(env, PPCInstr_Cmp(syned, False/*64bit cmp*/,
+ 7/*cr*/, srcL, srcR));
+ addInstr(env, PPCInstr_MfCR(dst));
+ addInstr(env, PPCInstr_Alu(Palu_AND, dst, dst,
+ PPCRH_Imm(False,7<<1)));
+ return dst;
+ }
+
+ if (e->Iex.Binop.op == Iop_Max32U) {
+ HReg r1 = iselWordExpr_R(env, e->Iex.Binop.arg1);
+ HReg r2 = iselWordExpr_R(env, e->Iex.Binop.arg2);
+ HReg rdst = newVRegI(env);
+ PPCCondCode cc = mk_PPCCondCode( Pct_TRUE, Pcf_7LT );
+ addInstr(env, mk_iMOVds_RR(rdst, r1));
+ addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
+ 7/*cr*/, rdst, PPCRH_Reg(r2)));
+ addInstr(env, PPCInstr_CMov(cc, rdst, PPCRI_Reg(r2)));
+ return rdst;
+ }
+
+ if (e->Iex.Binop.op == Iop_32HLto64) {
+ HReg r_Hi = iselWordExpr_R(env, e->Iex.Binop.arg1);
+ HReg r_Lo = iselWordExpr_R(env, e->Iex.Binop.arg2);
+ HReg r_dst = newVRegI(env);
+ HReg msk = newVRegI(env);
+ vassert(mode64);
+ /* r_dst = OR( r_Hi<<32, r_Lo ) */
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
+ r_dst, r_Hi, PPCRH_Imm(False,32)));
+ addInstr(env, PPCInstr_LI(msk, 0xFFFFFFFF, mode64));
+ addInstr(env, PPCInstr_Alu( Palu_AND, r_Lo, r_Lo,
+ PPCRH_Reg(msk) ));
+ addInstr(env, PPCInstr_Alu( Palu_OR, r_dst, r_dst,
+ PPCRH_Reg(r_Lo) ));
+ return r_dst;
+ }
+
+ if (e->Iex.Binop.op == Iop_CmpF64) {
+ HReg fr_srcL = iselDblExpr(env, e->Iex.Binop.arg1);
+ HReg fr_srcR = iselDblExpr(env, e->Iex.Binop.arg2);
+
+ HReg r_ccPPC = newVRegI(env);
+ HReg r_ccIR = newVRegI(env);
+ HReg r_ccIR_b0 = newVRegI(env);
+ HReg r_ccIR_b2 = newVRegI(env);
+ HReg r_ccIR_b6 = newVRegI(env);
+
+ addInstr(env, PPCInstr_FpCmp(r_ccPPC, fr_srcL, fr_srcR));
+
+ /* Map compare result from PPC to IR,
+ conforming to CmpF64 definition. */
+ /*
+ FP cmp result | PPC | IR
+ --------------------------
+ UN | 0x1 | 0x45
+ EQ | 0x2 | 0x40
+ GT | 0x4 | 0x00
+ LT | 0x8 | 0x01
+ */
+
+ // r_ccIR_b0 = r_ccPPC[0] | r_ccPPC[3]
+ addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
+ r_ccIR_b0, r_ccPPC,
+ PPCRH_Imm(False,0x3)));
+ addInstr(env, PPCInstr_Alu(Palu_OR, r_ccIR_b0,
+ r_ccPPC, PPCRH_Reg(r_ccIR_b0)));
+ addInstr(env, PPCInstr_Alu(Palu_AND, r_ccIR_b0,
+ r_ccIR_b0, PPCRH_Imm(False,0x1)));
+
+ // r_ccIR_b2 = r_ccPPC[0]
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
+ r_ccIR_b2, r_ccPPC,
+ PPCRH_Imm(False,0x2)));
+ addInstr(env, PPCInstr_Alu(Palu_AND, r_ccIR_b2,
+ r_ccIR_b2, PPCRH_Imm(False,0x4)));
+
+ // r_ccIR_b6 = r_ccPPC[0] | r_ccPPC[1]
+ addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
+ r_ccIR_b6, r_ccPPC,
+ PPCRH_Imm(False,0x1)));
+ addInstr(env, PPCInstr_Alu(Palu_OR, r_ccIR_b6,
+ r_ccPPC, PPCRH_Reg(r_ccIR_b6)));
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
+ r_ccIR_b6, r_ccIR_b6,
+ PPCRH_Imm(False,0x6)));
+ addInstr(env, PPCInstr_Alu(Palu_AND, r_ccIR_b6,
+ r_ccIR_b6, PPCRH_Imm(False,0x40)));
+
+ // r_ccIR = r_ccIR_b0 | r_ccIR_b2 | r_ccIR_b6
+ addInstr(env, PPCInstr_Alu(Palu_OR, r_ccIR,
+ r_ccIR_b0, PPCRH_Reg(r_ccIR_b2)));
+ addInstr(env, PPCInstr_Alu(Palu_OR, r_ccIR,
+ r_ccIR, PPCRH_Reg(r_ccIR_b6)));
+ return r_ccIR;
+ }
+
+ if (e->Iex.Binop.op == Iop_F64toI32S) {
+ /* This works in both mode64 and mode32. */
+ HReg r1 = StackFramePtr(env->mode64);
+ PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
+ HReg fsrc = iselDblExpr(env, e->Iex.Binop.arg2);
+ HReg ftmp = newVRegF(env);
+ HReg idst = newVRegI(env);
+
+ /* Set host rounding mode */
+ set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+
+ sub_from_sp( env, 16 );
+ addInstr(env, PPCInstr_FpCftI(False/*F->I*/, True/*int32*/,
+ ftmp, fsrc));
+ addInstr(env, PPCInstr_FpSTFIW(r1, ftmp));
+ addInstr(env, PPCInstr_Load(4, idst, zero_r1, mode64));
+
+ /* in 64-bit mode we need to sign-widen idst. */
+ if (mode64)
+ addInstr(env, PPCInstr_Unary(Pun_EXTSW, idst, idst));
+
+ add_to_sp( env, 16 );
+
+ ///* Restore default FPU rounding. */
+ //set_FPU_rounding_default( env );
+ return idst;
+ }
+
+ if (e->Iex.Binop.op == Iop_F64toI64S) {
+ if (mode64) {
+ HReg r1 = StackFramePtr(env->mode64);
+ PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
+ HReg fsrc = iselDblExpr(env, e->Iex.Binop.arg2);
+ HReg idst = newVRegI(env);
+ HReg ftmp = newVRegF(env);
+
+ /* Set host rounding mode */
+ set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+
+ sub_from_sp( env, 16 );
+ addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/,
+ ftmp, fsrc));
+ addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
+ addInstr(env, PPCInstr_Load(8, idst, zero_r1, True/*mode64*/));
+ add_to_sp( env, 16 );
+
+ ///* Restore default FPU rounding. */
+ //set_FPU_rounding_default( env );
+ return idst;
+ }
+ }
+
+ break;
+ }
+
+ /* --------- UNARY OP --------- */
+ case Iex_Unop: {
+ IROp op_unop = e->Iex.Unop.op;
+
+ /* 1Uto8(32to1(expr32)) */
+ DEFINE_PATTERN(p_32to1_then_1Uto8,
+ unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
+ if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
+ IRExpr* expr32 = mi.bindee[0];
+ HReg r_dst = newVRegI(env);
+ HReg r_src = iselWordExpr_R(env, expr32);
+ addInstr(env, PPCInstr_Alu(Palu_AND, r_dst,
+ r_src, PPCRH_Imm(False,1)));
+ return r_dst;
+ }
+
+ /* 16Uto32(LDbe:I16(expr32)) */
+ {
+ DECLARE_PATTERN(p_LDbe16_then_16Uto32);
+ DEFINE_PATTERN(p_LDbe16_then_16Uto32,
+ unop(Iop_16Uto32,
+ IRExpr_Load(Iend_BE,Ity_I16,bind(0))) );
+ if (matchIRExpr(&mi,p_LDbe16_then_16Uto32,e)) {
+ HReg r_dst = newVRegI(env);
+ PPCAMode* amode
+ = iselWordExpr_AMode( env, mi.bindee[0], Ity_I16/*xfer*/ );
+ addInstr(env, PPCInstr_Load(2,r_dst,amode, mode64));
+ return r_dst;
+ }
+ }
+
+ switch (op_unop) {
+ case Iop_8Uto16:
+ case Iop_8Uto32:
+ case Iop_8Uto64:
+ case Iop_16Uto32:
+ case Iop_16Uto64: {
+ HReg r_dst = newVRegI(env);
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+ UShort mask = toUShort(op_unop==Iop_16Uto64 ? 0xFFFF :
+ op_unop==Iop_16Uto32 ? 0xFFFF : 0xFF);
+ addInstr(env, PPCInstr_Alu(Palu_AND,r_dst,r_src,
+ PPCRH_Imm(False,mask)));
+ return r_dst;
+ }
+ case Iop_32Uto64: {
+ HReg r_dst = newVRegI(env);
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+ vassert(mode64);
+ addInstr(env,
+ PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
+ r_dst, r_src, PPCRH_Imm(False,32)));
+ addInstr(env,
+ PPCInstr_Shft(Pshft_SHR, False/*64bit shift*/,
+ r_dst, r_dst, PPCRH_Imm(False,32)));
+ return r_dst;
+ }
+ case Iop_8Sto16:
+ case Iop_8Sto32:
+ case Iop_16Sto32: {
+ HReg r_dst = newVRegI(env);
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+ UShort amt = toUShort(op_unop==Iop_16Sto32 ? 16 : 24);
+ addInstr(env,
+ PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
+ r_dst, r_src, PPCRH_Imm(False,amt)));
+ addInstr(env,
+ PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
+ r_dst, r_dst, PPCRH_Imm(False,amt)));
+ return r_dst;
+ }
+ case Iop_8Sto64:
+ case Iop_16Sto64: {
+ HReg r_dst = newVRegI(env);
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+ UShort amt = toUShort(op_unop==Iop_8Sto64 ? 56 :
+ op_unop==Iop_16Sto64 ? 48 : 32);
+ vassert(mode64);
+ addInstr(env,
+ PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
+ r_dst, r_src, PPCRH_Imm(False,amt)));
+ addInstr(env,
+ PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/,
+ r_dst, r_dst, PPCRH_Imm(False,amt)));
+ return r_dst;
+ }
+ case Iop_32Sto64: {
+ HReg r_dst = newVRegI(env);
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+ vassert(mode64);
+ /* According to the IBM docs, in 64 bit mode, srawi r,r,0
+ sign extends the lower 32 bits into the upper 32 bits. */
+ addInstr(env,
+ PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
+ r_dst, r_src, PPCRH_Imm(False,0)));
+ return r_dst;
+ }
+ case Iop_Not8:
+ case Iop_Not16:
+ case Iop_Not32:
+ case Iop_Not64: {
+ HReg r_dst = newVRegI(env);
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, PPCInstr_Unary(Pun_NOT,r_dst,r_src));
+ return r_dst;
+ }
+ case Iop_64HIto32: {
+ if (!mode64) {
+ HReg rHi, rLo;
+ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
+ return rHi; /* and abandon rLo .. poor wee thing :-) */
+ } else {
+ HReg r_dst = newVRegI(env);
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env,
+ PPCInstr_Shft(Pshft_SHR, False/*64bit shift*/,
+ r_dst, r_src, PPCRH_Imm(False,32)));
+ return r_dst;
+ }
+ }
+ case Iop_64to32: {
+ if (!mode64) {
+ HReg rHi, rLo;
+ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
+ return rLo; /* similar stupid comment to the above ... */
+ } else {
+ /* This is a no-op. */
+ return iselWordExpr_R(env, e->Iex.Unop.arg);
+ }
+ }
+ case Iop_64to16: {
+ if (mode64) { /* This is a no-op. */
+ return iselWordExpr_R(env, e->Iex.Unop.arg);
+ }
+ break; /* evidently not used in 32-bit mode */
+ }
+ case Iop_16HIto8:
+ case Iop_32HIto16: {
+ HReg r_dst = newVRegI(env);
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+ UShort shift = toUShort(op_unop == Iop_16HIto8 ? 8 : 16);
+ addInstr(env,
+ PPCInstr_Shft(Pshft_SHR, True/*32bit shift*/,
+ r_dst, r_src, PPCRH_Imm(False,shift)));
+ return r_dst;
+ }
+ case Iop_128HIto64:
+ if (mode64) {
+ HReg rHi, rLo;
+ iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
+ return rHi; /* and abandon rLo .. poor wee thing :-) */
+ }
+ break;
+ case Iop_128to64:
+ if (mode64) {
+ HReg rHi, rLo;
+ iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
+ return rLo; /* similar stupid comment to the above ... */
+ }
+ break;
+ case Iop_1Uto32:
+ case Iop_1Uto8: {
+ HReg r_dst = newVRegI(env);
+ PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
+ addInstr(env, PPCInstr_Set(cond,r_dst));
+ return r_dst;
+ }
+ case Iop_1Sto8:
+ case Iop_1Sto16:
+ case Iop_1Sto32: {
+ /* could do better than this, but for now ... */
+ HReg r_dst = newVRegI(env);
+ PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
+ addInstr(env, PPCInstr_Set(cond,r_dst));
+ addInstr(env,
+ PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
+ r_dst, r_dst, PPCRH_Imm(False,31)));
+ addInstr(env,
+ PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
+ r_dst, r_dst, PPCRH_Imm(False,31)));
+ return r_dst;
+ }
+ case Iop_1Sto64:
+ if (mode64) {
+ /* could do better than this, but for now ... */
+ HReg r_dst = newVRegI(env);
+ PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
+ addInstr(env, PPCInstr_Set(cond,r_dst));
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
+ r_dst, r_dst, PPCRH_Imm(False,63)));
+ addInstr(env, PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/,
+ r_dst, r_dst, PPCRH_Imm(False,63)));
+ return r_dst;
+ }
+ break;
+ case Iop_Clz32:
+ case Iop_Clz64: {
+ HReg r_src, r_dst;
+ PPCUnaryOp op_clz = (op_unop == Iop_Clz32) ? Pun_CLZ32 :
+ Pun_CLZ64;
+ if (op_unop == Iop_Clz64 && !mode64)
+ goto irreducible;
+ /* Count leading zeroes. */
+ r_dst = newVRegI(env);
+ r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, PPCInstr_Unary(op_clz,r_dst,r_src));
+ return r_dst;
+ }
+
+ case Iop_Left8:
+ case Iop_Left32:
+ case Iop_Left64: {
+ HReg r_src, r_dst;
+ if (op_unop == Iop_Left64 && !mode64)
+ goto irreducible;
+ r_dst = newVRegI(env);
+ r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
+ addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
+ return r_dst;
+ }
+
+ case Iop_CmpwNEZ32: {
+ HReg r_dst = newVRegI(env);
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
+ addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
+ addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
+ r_dst, r_dst, PPCRH_Imm(False, 31)));
+ return r_dst;
+ }
+
+ case Iop_CmpwNEZ64: {
+ HReg r_dst = newVRegI(env);
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+ if (!mode64) goto irreducible;
+ addInstr(env, PPCInstr_Unary(Pun_NEG,r_dst,r_src));
+ addInstr(env, PPCInstr_Alu(Palu_OR, r_dst, r_dst, PPCRH_Reg(r_src)));
+ addInstr(env, PPCInstr_Shft(Pshft_SAR, False/*64bit shift*/,
+ r_dst, r_dst, PPCRH_Imm(False, 63)));
+ return r_dst;
+ }
+
+ case Iop_V128to32: {
+ HReg r_aligned16;
+ HReg dst = newVRegI(env);
+ HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
+ PPCAMode *am_off0, *am_off12;
+ sub_from_sp( env, 32 ); // Move SP down 32 bytes
+
+ // get a quadword aligned address within our stack space
+ r_aligned16 = get_sp_aligned16( env );
+ am_off0 = PPCAMode_IR( 0, r_aligned16 );
+ am_off12 = PPCAMode_IR( 12,r_aligned16 );
+
+ // store vec, load low word to dst
+ addInstr(env,
+ PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
+ addInstr(env,
+ PPCInstr_Load( 4, dst, am_off12, mode64 ));
+
+ add_to_sp( env, 32 ); // Reset SP
+ return dst;
+ }
+
+ case Iop_V128to64:
+ case Iop_V128HIto64:
+ if (mode64) {
+ HReg r_aligned16;
+ HReg dst = newVRegI(env);
+ HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
+ PPCAMode *am_off0, *am_off8;
+ sub_from_sp( env, 32 ); // Move SP down 32 bytes
+
+ // get a quadword aligned address within our stack space
+ r_aligned16 = get_sp_aligned16( env );
+ am_off0 = PPCAMode_IR( 0, r_aligned16 );
+ am_off8 = PPCAMode_IR( 8 ,r_aligned16 );
+
+ // store vec, load low word (+8) or high (+0) to dst
+ addInstr(env,
+ PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
+ addInstr(env,
+ PPCInstr_Load(
+ 8, dst,
+ op_unop == Iop_V128HIto64 ? am_off0 : am_off8,
+ mode64 ));
+
+ add_to_sp( env, 32 ); // Reset SP
+ return dst;
+ }
+ break;
+ case Iop_16to8:
+ case Iop_32to8:
+ case Iop_32to16:
+ case Iop_64to8:
+ /* These are no-ops. */
+ return iselWordExpr_R(env, e->Iex.Unop.arg);
+
+ /* ReinterpF64asI64(e) */
+ /* Given an IEEE754 double, produce an I64 with the same bit
+ pattern. */
+ case Iop_ReinterpF64asI64:
+ if (mode64) {
+ PPCAMode *am_addr;
+ HReg fr_src = iselDblExpr(env, e->Iex.Unop.arg);
+ HReg r_dst = newVRegI(env);
+
+ sub_from_sp( env, 16 ); // Move SP down 16 bytes
+ am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
+
+ // store as F64
+ addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
+ fr_src, am_addr ));
+ // load as Ity_I64
+ addInstr(env, PPCInstr_Load( 8, r_dst, am_addr, mode64 ));
+
+ add_to_sp( env, 16 ); // Reset SP
+ return r_dst;
+ }
+ break;
+
+ /* ReinterpF32asI32(e) */
+ /* Given an IEEE754 float, produce an I32 with the same bit
+ pattern. */
+ case Iop_ReinterpF32asI32: {
+ /* I believe this generates correct code for both 32- and
+ 64-bit hosts. */
+ PPCAMode *am_addr;
+ HReg fr_src = iselFltExpr(env, e->Iex.Unop.arg);
+ HReg r_dst = newVRegI(env);
+
+ sub_from_sp( env, 16 ); // Move SP down 16 bytes
+ am_addr = PPCAMode_IR( 0, StackFramePtr(mode64) );
+
+ // store as F32
+ addInstr(env, PPCInstr_FpLdSt( False/*store*/, 4,
+ fr_src, am_addr ));
+ // load as Ity_I32
+ addInstr(env, PPCInstr_Load( 4, r_dst, am_addr, mode64 ));
+
+ add_to_sp( env, 16 ); // Reset SP
+ return r_dst;
+ }
+
+ default:
+ break;
+ }
+ break;
+ }
+
+ /* --------- GET --------- */
+ case Iex_Get: {
+ if (ty == Ity_I8 || ty == Ity_I16 ||
+ ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
+ HReg r_dst = newVRegI(env);
+ PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
+ GuestStatePtr(mode64) );
+ addInstr(env, PPCInstr_Load( toUChar(sizeofIRType(ty)),
+ r_dst, am_addr, mode64 ));
+ return r_dst;
+ }
+ break;
+ }
+
+ case Iex_GetI: {
+ PPCAMode* src_am
+ = genGuestArrayOffset( env, e->Iex.GetI.descr,
+ e->Iex.GetI.ix, e->Iex.GetI.bias );
+ HReg r_dst = newVRegI(env);
+ if (mode64 && ty == Ity_I64) {
+ addInstr(env, PPCInstr_Load( toUChar(8),
+ r_dst, src_am, mode64 ));
+ return r_dst;
+ }
+ if ((!mode64) && ty == Ity_I32) {
+ addInstr(env, PPCInstr_Load( toUChar(4),
+ r_dst, src_am, mode64 ));
+ return r_dst;
+ }
+ break;
+ }
+
+ /* --------- CCALL --------- */
+ case Iex_CCall: {
+ HReg r_dst = newVRegI(env);
+ vassert(ty == Ity_I32);
+
+ /* be very restrictive for now. Only 32/64-bit ints allowed
+ for args, and 32 bits for return type. */
+ if (e->Iex.CCall.retty != Ity_I32)
+ goto irreducible;
+
+ /* Marshal args, do the call, clear stack. */
+ doHelperCall( env, False, NULL,
+ e->Iex.CCall.cee, e->Iex.CCall.args );
+
+ /* GPR3 now holds the destination address from Pin_Goto */
+ addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
+ return r_dst;
+ }
+
+ /* --------- LITERAL --------- */
+ /* 32/16/8-bit literals */
+ case Iex_Const: {
+ Long l;
+ HReg r_dst = newVRegI(env);
+ IRConst* con = e->Iex.Const.con;
+ switch (con->tag) {
+ case Ico_U64: if (!mode64) goto irreducible;
+ l = (Long) con->Ico.U64; break;
+ case Ico_U32: l = (Long)(Int) con->Ico.U32; break;
+ case Ico_U16: l = (Long)(Int)(Short)con->Ico.U16; break;
+ case Ico_U8: l = (Long)(Int)(Char )con->Ico.U8; break;
+ default: vpanic("iselIntExpr_R.const(ppc)");
+ }
+ addInstr(env, PPCInstr_LI(r_dst, (ULong)l, mode64));
+ return r_dst;
+ }
+
+ /* --------- MULTIPLEX --------- */
+ case Iex_Mux0X: {
+ if ((ty == Ity_I8 || ty == Ity_I16 ||
+ ty == Ity_I32 || ((ty == Ity_I64) && mode64)) &&
+ typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
+ PPCCondCode cc = mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
+ HReg r_cond = iselWordExpr_R(env, e->Iex.Mux0X.cond);
+ HReg rX = iselWordExpr_R(env, e->Iex.Mux0X.exprX);
+ PPCRI* r0 = iselWordExpr_RI(env, e->Iex.Mux0X.expr0);
+ HReg r_dst = newVRegI(env);
+ HReg r_tmp = newVRegI(env);
+ addInstr(env, mk_iMOVds_RR(r_dst,rX));
+ addInstr(env, PPCInstr_Alu(Palu_AND, r_tmp,
+ r_cond, PPCRH_Imm(False,0xFF)));
+ addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
+ 7/*cr*/, r_tmp, PPCRH_Imm(False,0)));
+ addInstr(env, PPCInstr_CMov(cc,r_dst,r0));
+ return r_dst;
+ }
+ break;
+ }
+
+ default:
+ break;
+ } /* switch (e->tag) */
+
+
+ /* We get here if no pattern matched. */
+ irreducible:
+ ppIRExpr(e);
+ vpanic("iselIntExpr_R(ppc): cannot reduce tree");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Integer expression auxiliaries ---*/
+/*---------------------------------------------------------*/
+
+/* --------------------- AMODEs --------------------- */
+
+/* Return an AMode which computes the value of the specified
+ expression, possibly also adding insns to the code list as a
+ result. The expression may only be a word-size one.
+*/
+
+static Bool uInt_fits_in_16_bits ( UInt u )
+{
+ /* Is u the same as the sign-extend of its lower 16 bits? */
+ Int i = u & 0xFFFF;
+ i <<= 16;
+ i >>= 16;
+ return toBool(u == (UInt)i);
+}
+
+static Bool uLong_fits_in_16_bits ( ULong u )
+{
+ /* Is u the same as the sign-extend of its lower 16 bits? */
+ Long i = u & 0xFFFFULL;
+ i <<= 48;
+ i >>= 48;
+ return toBool(u == (ULong)i);
+}
+
+static Bool uLong_is_4_aligned ( ULong u )
+{
+ return toBool((u & 3ULL) == 0);
+}
+
+static Bool sane_AMode ( ISelEnv* env, PPCAMode* am )
+{
+ Bool mode64 = env->mode64;
+ switch (am->tag) {
+ case Pam_IR:
+ /* Using uInt_fits_in_16_bits in 64-bit mode seems a bit bogus,
+ somehow, but I think it's OK. */
+ return toBool( hregClass(am->Pam.IR.base) == HRcGPR(mode64) &&
+ hregIsVirtual(am->Pam.IR.base) &&
+ uInt_fits_in_16_bits(am->Pam.IR.index) );
+ case Pam_RR:
+ return toBool( hregClass(am->Pam.RR.base) == HRcGPR(mode64) &&
+ hregIsVirtual(am->Pam.RR.base) &&
+ hregClass(am->Pam.RR.index) == HRcGPR(mode64) &&
+ hregIsVirtual(am->Pam.IR.index) );
+ default:
+ vpanic("sane_AMode: unknown ppc amode tag");
+ }
+}
+
+static
+PPCAMode* iselWordExpr_AMode ( ISelEnv* env, IRExpr* e, IRType xferTy )
+{
+ PPCAMode* am = iselWordExpr_AMode_wrk(env, e, xferTy);
+ vassert(sane_AMode(env, am));
+ return am;
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static PPCAMode* iselWordExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType xferTy )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+
+ if (env->mode64) {
+
+ /* If the data load/store type is I32 or I64, this amode might
+ be destined for use in ld/ldu/lwa/st/stu. In which case
+ insist that if it comes out as an _IR, the immediate must
+ have its bottom two bits be zero. This does assume that for
+ any other type (I8/I16/I128/F32/F64/V128) the amode will not
+ be parked in any such instruction. But that seems a
+ reasonable assumption. */
+ Bool aligned4imm = toBool(xferTy == Ity_I32 || xferTy == Ity_I64);
+
+ vassert(ty == Ity_I64);
+
+ /* Add64(expr,i), where i == sign-extend of (i & 0xFFFF) */
+ if (e->tag == Iex_Binop
+ && e->Iex.Binop.op == Iop_Add64
+ && e->Iex.Binop.arg2->tag == Iex_Const
+ && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
+ && (aligned4imm ? uLong_is_4_aligned(e->Iex.Binop.arg2
+ ->Iex.Const.con->Ico.U64)
+ : True)
+ && uLong_fits_in_16_bits(e->Iex.Binop.arg2
+ ->Iex.Const.con->Ico.U64)) {
+ return PPCAMode_IR( (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64,
+ iselWordExpr_R(env, e->Iex.Binop.arg1) );
+ }
+
+ /* Add64(expr,expr) */
+ if (e->tag == Iex_Binop
+ && e->Iex.Binop.op == Iop_Add64) {
+ HReg r_base = iselWordExpr_R(env, e->Iex.Binop.arg1);
+ HReg r_idx = iselWordExpr_R(env, e->Iex.Binop.arg2);
+ return PPCAMode_RR( r_idx, r_base );
+ }
+
+ } else {
+
+ vassert(ty == Ity_I32);
+
+ /* Add32(expr,i), where i == sign-extend of (i & 0xFFFF) */
+ if (e->tag == Iex_Binop
+ && e->Iex.Binop.op == Iop_Add32
+ && e->Iex.Binop.arg2->tag == Iex_Const
+ && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
+ && uInt_fits_in_16_bits(e->Iex.Binop.arg2
+ ->Iex.Const.con->Ico.U32)) {
+ return PPCAMode_IR( (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32,
+ iselWordExpr_R(env, e->Iex.Binop.arg1) );
+ }
+
+ /* Add32(expr,expr) */
+ if (e->tag == Iex_Binop
+ && e->Iex.Binop.op == Iop_Add32) {
+ HReg r_base = iselWordExpr_R(env, e->Iex.Binop.arg1);
+ HReg r_idx = iselWordExpr_R(env, e->Iex.Binop.arg2);
+ return PPCAMode_RR( r_idx, r_base );
+ }
+
+ }
+
+ /* Doesn't match anything in particular. Generate it into
+ a register and use that. */
+ return PPCAMode_IR( 0, iselWordExpr_R(env,e) );
+}
+
+
+/* --------------------- RH --------------------- */
+
+/* Compute an I8/I16/I32 (and I64, in 64-bit mode) into a RH
+ (reg-or-halfword-immediate). It's important to specify whether the
+ immediate is to be regarded as signed or not. If yes, this will
+ never return -32768 as an immediate; this guaranteed that all
+ signed immediates that are return can have their sign inverted if
+ need be. */
+
+static PPCRH* iselWordExpr_RH ( ISelEnv* env, Bool syned, IRExpr* e )
+{
+ PPCRH* ri = iselWordExpr_RH_wrk(env, syned, e);
+ /* sanity checks ... */
+ switch (ri->tag) {
+ case Prh_Imm:
+ vassert(ri->Prh.Imm.syned == syned);
+ if (syned)
+ vassert(ri->Prh.Imm.imm16 != 0x8000);
+ return ri;
+ case Prh_Reg:
+ vassert(hregClass(ri->Prh.Reg.reg) == HRcGPR(env->mode64));
+ vassert(hregIsVirtual(ri->Prh.Reg.reg));
+ return ri;
+ default:
+ vpanic("iselIntExpr_RH: unknown ppc RH tag");
+ }
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static PPCRH* iselWordExpr_RH_wrk ( ISelEnv* env, Bool syned, IRExpr* e )
+{
+ ULong u;
+ Long l;
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_I8 || ty == Ity_I16 ||
+ ty == Ity_I32 || ((ty == Ity_I64) && env->mode64));
+
+ /* special case: immediate */
+ if (e->tag == Iex_Const) {
+ IRConst* con = e->Iex.Const.con;
+ /* What value are we aiming to generate? */
+ switch (con->tag) {
+ /* Note: Not sign-extending - we carry 'syned' around */
+ case Ico_U64: vassert(env->mode64);
+ u = con->Ico.U64; break;
+ case Ico_U32: u = 0xFFFFFFFF & con->Ico.U32; break;
+ case Ico_U16: u = 0x0000FFFF & con->Ico.U16; break;
+ case Ico_U8: u = 0x000000FF & con->Ico.U8; break;
+ default: vpanic("iselIntExpr_RH.Iex_Const(ppch)");
+ }
+ l = (Long)u;
+ /* Now figure out if it's representable. */
+ if (!syned && u <= 65535) {
+ return PPCRH_Imm(False/*unsigned*/, toUShort(u & 0xFFFF));
+ }
+ if (syned && l >= -32767 && l <= 32767) {
+ return PPCRH_Imm(True/*signed*/, toUShort(u & 0xFFFF));
+ }
+ /* no luck; use the Slow Way. */
+ }
+
+ /* default case: calculate into a register and return that */
+ return PPCRH_Reg( iselWordExpr_R ( env, e ) );
+}
+
+
+/* --------------------- RIs --------------------- */
+
+/* Calculate an expression into an PPCRI operand. As with
+ iselIntExpr_R, the expression can have type 32, 16 or 8 bits, or,
+ in 64-bit mode, 64 bits. */
+
+static PPCRI* iselWordExpr_RI ( ISelEnv* env, IRExpr* e )
+{
+ PPCRI* ri = iselWordExpr_RI_wrk(env, e);
+ /* sanity checks ... */
+ switch (ri->tag) {
+ case Pri_Imm:
+ return ri;
+ case Pri_Reg:
+ vassert(hregClass(ri->Pri.Reg) == HRcGPR(env->mode64));
+ vassert(hregIsVirtual(ri->Pri.Reg));
+ return ri;
+ default:
+ vpanic("iselIntExpr_RI: unknown ppc RI tag");
+ }
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static PPCRI* iselWordExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
+{
+ Long l;
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_I8 || ty == Ity_I16 ||
+ ty == Ity_I32 || ((ty == Ity_I64) && env->mode64));
+
+ /* special case: immediate */
+ if (e->tag == Iex_Const) {
+ IRConst* con = e->Iex.Const.con;
+ switch (con->tag) {
+ case Ico_U64: vassert(env->mode64);
+ l = (Long) con->Ico.U64; break;
+ case Ico_U32: l = (Long)(Int) con->Ico.U32; break;
+ case Ico_U16: l = (Long)(Int)(Short)con->Ico.U16; break;
+ case Ico_U8: l = (Long)(Int)(Char )con->Ico.U8; break;
+ default: vpanic("iselIntExpr_RI.Iex_Const(ppch)");
+ }
+ return PPCRI_Imm((ULong)l);
+ }
+
+ /* default case: calculate into a register and return that */
+ return PPCRI_Reg( iselWordExpr_R ( env, e ) );
+}
+
+
+/* --------------------- RH5u --------------------- */
+
+/* Compute an I8 into a reg-or-5-bit-unsigned-immediate, the latter
+ being an immediate in the range 1 .. 31 inclusive. Used for doing
+ shift amounts. Only used in 32-bit mode. */
+
+static PPCRH* iselWordExpr_RH5u ( ISelEnv* env, IRExpr* e )
+{
+ PPCRH* ri;
+ vassert(!env->mode64);
+ ri = iselWordExpr_RH5u_wrk(env, e);
+ /* sanity checks ... */
+ switch (ri->tag) {
+ case Prh_Imm:
+ vassert(ri->Prh.Imm.imm16 >= 1 && ri->Prh.Imm.imm16 <= 31);
+ vassert(!ri->Prh.Imm.syned);
+ return ri;
+ case Prh_Reg:
+ vassert(hregClass(ri->Prh.Reg.reg) == HRcGPR(env->mode64));
+ vassert(hregIsVirtual(ri->Prh.Reg.reg));
+ return ri;
+ default:
+ vpanic("iselIntExpr_RH5u: unknown ppc RI tag");
+ }
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static PPCRH* iselWordExpr_RH5u_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_I8);
+
+ /* special case: immediate */
+ if (e->tag == Iex_Const
+ && e->Iex.Const.con->tag == Ico_U8
+ && e->Iex.Const.con->Ico.U8 >= 1
+ && e->Iex.Const.con->Ico.U8 <= 31) {
+ return PPCRH_Imm(False/*unsigned*/, e->Iex.Const.con->Ico.U8);
+ }
+
+ /* default case: calculate into a register and return that */
+ return PPCRH_Reg( iselWordExpr_R ( env, e ) );
+}
+
+
+/* --------------------- RH6u --------------------- */
+
+/* Compute an I8 into a reg-or-6-bit-unsigned-immediate, the latter
+ being an immediate in the range 1 .. 63 inclusive. Used for doing
+ shift amounts. Only used in 64-bit mode. */
+
+static PPCRH* iselWordExpr_RH6u ( ISelEnv* env, IRExpr* e )
+{
+ PPCRH* ri;
+ vassert(env->mode64);
+ ri = iselWordExpr_RH6u_wrk(env, e);
+ /* sanity checks ... */
+ switch (ri->tag) {
+ case Prh_Imm:
+ vassert(ri->Prh.Imm.imm16 >= 1 && ri->Prh.Imm.imm16 <= 63);
+ vassert(!ri->Prh.Imm.syned);
+ return ri;
+ case Prh_Reg:
+ vassert(hregClass(ri->Prh.Reg.reg) == HRcGPR(env->mode64));
+ vassert(hregIsVirtual(ri->Prh.Reg.reg));
+ return ri;
+ default:
+ vpanic("iselIntExpr_RH6u: unknown ppc64 RI tag");
+ }
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static PPCRH* iselWordExpr_RH6u_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_I8);
+
+ /* special case: immediate */
+ if (e->tag == Iex_Const
+ && e->Iex.Const.con->tag == Ico_U8
+ && e->Iex.Const.con->Ico.U8 >= 1
+ && e->Iex.Const.con->Ico.U8 <= 63) {
+ return PPCRH_Imm(False/*unsigned*/, e->Iex.Const.con->Ico.U8);
+ }
+
+ /* default case: calculate into a register and return that */
+ return PPCRH_Reg( iselWordExpr_R ( env, e ) );
+}
+
+
+/* --------------------- CONDCODE --------------------- */
+
+/* Generate code to evaluated a bit-typed expression, returning the
+ condition code which would correspond when the expression would
+ notionally have returned 1. */
+
+static PPCCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
+{
+ /* Uh, there's nothing we can sanity check here, unfortunately. */
+ return iselCondCode_wrk(env,e);
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static PPCCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
+{
+ vassert(e);
+ vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
+
+ /* Constant 1:Bit */
+ if (e->tag == Iex_Const && e->Iex.Const.con->Ico.U1 == True) {
+ // Make a compare that will always be true:
+ HReg r_zero = newVRegI(env);
+ addInstr(env, PPCInstr_LI(r_zero, 0, env->mode64));
+ addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
+ 7/*cr*/, r_zero, PPCRH_Reg(r_zero)));
+ return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
+ }
+
+ /* Not1(...) */
+ if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
+ /* Generate code for the arg, and negate the test condition */
+ PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
+ cond.test = invertCondTest(cond.test);
+ return cond;
+ }
+
+ /* --- patterns rooted at: 32to1 or 64to1 --- */
+
+ /* 32to1, 64to1 */
+ if (e->tag == Iex_Unop &&
+ (e->Iex.Unop.op == Iop_32to1 || e->Iex.Unop.op == Iop_64to1)) {
+ HReg src = iselWordExpr_R(env, e->Iex.Unop.arg);
+ HReg tmp = newVRegI(env);
+ /* could do better, probably -- andi. */
+ addInstr(env, PPCInstr_Alu(Palu_AND, tmp,
+ src, PPCRH_Imm(False,1)));
+ addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
+ 7/*cr*/, tmp, PPCRH_Imm(False,1)));
+ return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
+ }
+
+ /* --- patterns rooted at: CmpNEZ8 --- */
+
+ /* CmpNEZ8(x) */
+ /* could do better -- andi. */
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.op == Iop_CmpNEZ8) {
+ HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg);
+ HReg tmp = newVRegI(env);
+ addInstr(env, PPCInstr_Alu(Palu_AND, tmp, arg,
+ PPCRH_Imm(False,0xFF)));
+ addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
+ 7/*cr*/, tmp, PPCRH_Imm(False,0)));
+ return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
+ }
+
+ /* --- patterns rooted at: CmpNEZ32 --- */
+
+ /* CmpNEZ32(x) */
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.op == Iop_CmpNEZ32) {
+ HReg r1 = iselWordExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
+ 7/*cr*/, r1, PPCRH_Imm(False,0)));
+ return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
+ }
+
+ /* --- patterns rooted at: Cmp*32* --- */
+
+ /* Cmp*32*(x,y) */
+ if (e->tag == Iex_Binop
+ && (e->Iex.Binop.op == Iop_CmpEQ32
+ || e->Iex.Binop.op == Iop_CmpNE32
+ || e->Iex.Binop.op == Iop_CmpLT32S
+ || e->Iex.Binop.op == Iop_CmpLT32U
+ || e->Iex.Binop.op == Iop_CmpLE32S
+ || e->Iex.Binop.op == Iop_CmpLE32U)) {
+ Bool syned = (e->Iex.Binop.op == Iop_CmpLT32S ||
+ e->Iex.Binop.op == Iop_CmpLE32S);
+ HReg r1 = iselWordExpr_R(env, e->Iex.Binop.arg1);
+ PPCRH* ri2 = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2);
+ addInstr(env, PPCInstr_Cmp(syned, True/*32bit cmp*/,
+ 7/*cr*/, r1, ri2));
+
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpEQ32: return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
+ case Iop_CmpNE32: return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
+ case Iop_CmpLT32U: return mk_PPCCondCode( Pct_TRUE, Pcf_7LT );
+ case Iop_CmpLE32U: return mk_PPCCondCode( Pct_FALSE, Pcf_7GT );
+ default: vpanic("iselCondCode(ppc): CmpXX32");
+ }
+ }
+
+ /* --- patterns rooted at: CmpNEZ64 --- */
+
+ /* CmpNEZ64 */
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.op == Iop_CmpNEZ64) {
+ if (!env->mode64) {
+ HReg hi, lo;
+ HReg tmp = newVRegI(env);
+ iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
+ addInstr(env, PPCInstr_Alu(Palu_OR, tmp, lo, PPCRH_Reg(hi)));
+ addInstr(env, PPCInstr_Cmp(False/*sign*/, True/*32bit cmp*/,
+ 7/*cr*/, tmp,PPCRH_Imm(False,0)));
+ return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
+ } else { // mode64
+ HReg r_src = iselWordExpr_R(env, e->Iex.Binop.arg1);
+ addInstr(env, PPCInstr_Cmp(False/*sign*/, False/*64bit cmp*/,
+ 7/*cr*/, r_src,PPCRH_Imm(False,0)));
+ return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
+ }
+ }
+
+ /* --- patterns rooted at: Cmp*64* --- */
+
+ /* Cmp*64*(x,y) */
+ if (e->tag == Iex_Binop
+ && (e->Iex.Binop.op == Iop_CmpEQ64
+ || e->Iex.Binop.op == Iop_CmpNE64
+ || e->Iex.Binop.op == Iop_CmpLT64S
+ || e->Iex.Binop.op == Iop_CmpLT64U
+ || e->Iex.Binop.op == Iop_CmpLE64S
+ || e->Iex.Binop.op == Iop_CmpLE64U)) {
+ Bool syned = (e->Iex.Binop.op == Iop_CmpLT64S ||
+ e->Iex.Binop.op == Iop_CmpLE64S);
+ HReg r1 = iselWordExpr_R(env, e->Iex.Binop.arg1);
+ PPCRH* ri2 = iselWordExpr_RH(env, syned, e->Iex.Binop.arg2);
+ vassert(env->mode64);
+ addInstr(env, PPCInstr_Cmp(syned, False/*64bit cmp*/,
+ 7/*cr*/, r1, ri2));
+
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpEQ64: return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
+ case Iop_CmpNE64: return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
+ case Iop_CmpLT64U: return mk_PPCCondCode( Pct_TRUE, Pcf_7LT );
+ case Iop_CmpLE64U: return mk_PPCCondCode( Pct_FALSE, Pcf_7GT );
+ default: vpanic("iselCondCode(ppc): CmpXX64");
+ }
+ }
+
+ /* var */
+ if (e->tag == Iex_RdTmp) {
+ HReg r_src = lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ HReg src_masked = newVRegI(env);
+ addInstr(env,
+ PPCInstr_Alu(Palu_AND, src_masked,
+ r_src, PPCRH_Imm(False,1)));
+ addInstr(env,
+ PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
+ 7/*cr*/, src_masked, PPCRH_Imm(False,1)));
+ return mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
+ }
+
+ vex_printf("iselCondCode(ppc): No such tag(%u)\n", e->tag);
+ ppIRExpr(e);
+ vpanic("iselCondCode(ppc)");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Integer expressions (128 bit) ---*/
+/*---------------------------------------------------------*/
+
+/* 64-bit mode ONLY: compute a 128-bit value into a register pair,
+ which is returned as the first two parameters. As with
+ iselWordExpr_R, these may be either real or virtual regs; in any
+ case they must not be changed by subsequent code emitted by the
+ caller. */
+
+static void iselInt128Expr ( HReg* rHi, HReg* rLo,
+ ISelEnv* env, IRExpr* e )
+{
+ vassert(env->mode64);
+ iselInt128Expr_wrk(rHi, rLo, env, e);
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(*rHi) == HRcGPR(env->mode64));
+ vassert(hregIsVirtual(*rHi));
+ vassert(hregClass(*rLo) == HRcGPR(env->mode64));
+ vassert(hregIsVirtual(*rLo));
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
+ ISelEnv* env, IRExpr* e )
+{
+ vassert(e);
+ vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
+
+ /* read 128-bit IRTemp */
+ if (e->tag == Iex_RdTmp) {
+ lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
+ return;
+ }
+
+ /* --------- BINARY ops --------- */
+ if (e->tag == Iex_Binop) {
+ switch (e->Iex.Binop.op) {
+ /* 64 x 64 -> 128 multiply */
+ case Iop_MullU64:
+ case Iop_MullS64: {
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
+ HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+ HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
+ addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/,
+ False/*lo64*/, False/*64bit mul*/,
+ tLo, r_srcL, r_srcR));
+ addInstr(env, PPCInstr_MulL(syned,
+ True/*hi64*/, False/*64bit mul*/,
+ tHi, r_srcL, r_srcR));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* 64HLto128(e1,e2) */
+ case Iop_64HLto128:
+ *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1);
+ *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2);
+ return;
+
+ default:
+ break;
+ }
+ } /* if (e->tag == Iex_Binop) */
+
+
+ /* --------- UNARY ops --------- */
+ if (e->tag == Iex_Unop) {
+ switch (e->Iex.Unop.op) {
+ default:
+ break;
+ }
+ } /* if (e->tag == Iex_Unop) */
+
+ vex_printf("iselInt128Expr(ppc64): No such tag(%u)\n", e->tag);
+ ppIRExpr(e);
+ vpanic("iselInt128Expr(ppc64)");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Integer expressions (64 bit) ---*/
+/*---------------------------------------------------------*/
+
+/* 32-bit mode ONLY: compute a 64-bit value into a register pair,
+ which is returned as the first two parameters. As with
+ iselIntExpr_R, these may be either real or virtual regs; in any
+ case they must not be changed by subsequent code emitted by the
+ caller. */
+
+static void iselInt64Expr ( HReg* rHi, HReg* rLo,
+ ISelEnv* env, IRExpr* e )
+{
+ vassert(!env->mode64);
+ iselInt64Expr_wrk(rHi, rLo, env, e);
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(*rHi) == HRcInt32);
+ vassert(hregIsVirtual(*rHi));
+ vassert(hregClass(*rLo) == HRcInt32);
+ vassert(hregIsVirtual(*rLo));
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
+ ISelEnv* env, IRExpr* e )
+{
+ vassert(e);
+ vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
+
+ /* 64-bit load */
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ HReg r_addr = iselWordExpr_R(env, e->Iex.Load.addr);
+ vassert(!env->mode64);
+ addInstr(env, PPCInstr_Load( 4/*byte-load*/,
+ tHi, PPCAMode_IR( 0, r_addr ),
+ False/*32-bit insn please*/) );
+ addInstr(env, PPCInstr_Load( 4/*byte-load*/,
+ tLo, PPCAMode_IR( 4, r_addr ),
+ False/*32-bit insn please*/) );
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* 64-bit literal */
+ if (e->tag == Iex_Const) {
+ ULong w64 = e->Iex.Const.con->Ico.U64;
+ UInt wHi = ((UInt)(w64 >> 32)) & 0xFFFFFFFF;
+ UInt wLo = ((UInt)w64) & 0xFFFFFFFF;
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ vassert(e->Iex.Const.con->tag == Ico_U64);
+ addInstr(env, PPCInstr_LI(tHi, (Long)(Int)wHi, False/*mode32*/));
+ addInstr(env, PPCInstr_LI(tLo, (Long)(Int)wLo, False/*mode32*/));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* read 64-bit IRTemp */
+ if (e->tag == Iex_RdTmp) {
+ lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
+ return;
+ }
+
+ /* 64-bit GET */
+ if (e->tag == Iex_Get) {
+ PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
+ GuestStatePtr(False/*mode32*/) );
+ PPCAMode* am_addr4 = advance4(env, am_addr);
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ addInstr(env, PPCInstr_Load( 4, tHi, am_addr, False/*mode32*/ ));
+ addInstr(env, PPCInstr_Load( 4, tLo, am_addr4, False/*mode32*/ ));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* 64-bit Mux0X */
+ if (e->tag == Iex_Mux0X) {
+ HReg e0Lo, e0Hi, eXLo, eXHi;
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+
+ PPCCondCode cc = mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
+ HReg r_cond = iselWordExpr_R(env, e->Iex.Mux0X.cond);
+ HReg r_tmp = newVRegI(env);
+
+ iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0);
+ iselInt64Expr(&eXHi, &eXLo, env, e->Iex.Mux0X.exprX);
+ addInstr(env, mk_iMOVds_RR(tHi,eXHi));
+ addInstr(env, mk_iMOVds_RR(tLo,eXLo));
+
+ addInstr(env, PPCInstr_Alu(Palu_AND,
+ r_tmp, r_cond, PPCRH_Imm(False,0xFF)));
+ addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
+ 7/*cr*/, r_tmp, PPCRH_Imm(False,0)));
+
+ addInstr(env, PPCInstr_CMov(cc,tHi,PPCRI_Reg(e0Hi)));
+ addInstr(env, PPCInstr_CMov(cc,tLo,PPCRI_Reg(e0Lo)));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* --------- BINARY ops --------- */
+ if (e->tag == Iex_Binop) {
+ IROp op_binop = e->Iex.Binop.op;
+ switch (op_binop) {
+ /* 32 x 32 -> 64 multiply */
+ case Iop_MullU32:
+ case Iop_MullS32: {
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ Bool syned = toBool(op_binop == Iop_MullS32);
+ HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+ HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
+ addInstr(env, PPCInstr_MulL(False/*signedness irrelevant*/,
+ False/*lo32*/, True/*32bit mul*/,
+ tLo, r_srcL, r_srcR));
+ addInstr(env, PPCInstr_MulL(syned,
+ True/*hi32*/, True/*32bit mul*/,
+ tHi, r_srcL, r_srcR));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* Or64/And64/Xor64 */
+ case Iop_Or64:
+ case Iop_And64:
+ case Iop_Xor64: {
+ HReg xLo, xHi, yLo, yHi;
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ PPCAluOp op = (op_binop == Iop_Or64) ? Palu_OR :
+ (op_binop == Iop_And64) ? Palu_AND : Palu_XOR;
+ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
+ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
+ addInstr(env, PPCInstr_Alu(op, tHi, xHi, PPCRH_Reg(yHi)));
+ addInstr(env, PPCInstr_Alu(op, tLo, xLo, PPCRH_Reg(yLo)));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* Add64 */
+ case Iop_Add64: {
+ HReg xLo, xHi, yLo, yHi;
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
+ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
+ addInstr(env, PPCInstr_AddSubC( True/*add*/, True /*set carry*/,
+ tLo, xLo, yLo));
+ addInstr(env, PPCInstr_AddSubC( True/*add*/, False/*read carry*/,
+ tHi, xHi, yHi));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* 32HLto64(e1,e2) */
+ case Iop_32HLto64:
+ *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1);
+ *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2);
+ return;
+
+ /* F64toI64S */
+ case Iop_F64toI64S: {
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ HReg r1 = StackFramePtr(env->mode64);
+ PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
+ PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
+ HReg fsrc = iselDblExpr(env, e->Iex.Binop.arg2);
+ HReg ftmp = newVRegF(env);
+
+ vassert(!env->mode64);
+ /* Set host rounding mode */
+ set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+
+ sub_from_sp( env, 16 );
+ addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/,
+ ftmp, fsrc));
+ addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
+ addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
+ addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
+ add_to_sp( env, 16 );
+
+ ///* Restore default FPU rounding. */
+ //set_FPU_rounding_default( env );
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ default:
+ break;
+ }
+ } /* if (e->tag == Iex_Binop) */
+
+
+ /* --------- UNARY ops --------- */
+ if (e->tag == Iex_Unop) {
+ switch (e->Iex.Unop.op) {
+
+ /* CmpwNEZ64(e) */
+ case Iop_CmpwNEZ64: {
+ HReg argHi, argLo;
+ HReg tmp1 = newVRegI(env);
+ HReg tmp2 = newVRegI(env);
+ iselInt64Expr(&argHi, &argLo, env, e->Iex.Unop.arg);
+ /* tmp1 = argHi | argLo */
+ addInstr(env, PPCInstr_Alu(Palu_OR, tmp1, argHi, PPCRH_Reg(argLo)));
+ /* tmp2 = (tmp1 | -tmp1) >>s 31 */
+ addInstr(env, PPCInstr_Unary(Pun_NEG,tmp2,tmp1));
+ addInstr(env, PPCInstr_Alu(Palu_OR, tmp2, tmp2, PPCRH_Reg(tmp1)));
+ addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
+ tmp2, tmp2, PPCRH_Imm(False, 31)));
+ *rHi = tmp2;
+ *rLo = tmp2; /* yes, really tmp2 */
+ return;
+ }
+
+ /* Left64 */
+ case Iop_Left64: {
+ HReg argHi, argLo;
+ HReg zero32 = newVRegI(env);
+ HReg resHi = newVRegI(env);
+ HReg resLo = newVRegI(env);
+ iselInt64Expr(&argHi, &argLo, env, e->Iex.Unop.arg);
+ vassert(env->mode64 == False);
+ addInstr(env, PPCInstr_LI(zero32, 0, env->mode64));
+ /* resHi:resLo = - argHi:argLo */
+ addInstr(env, PPCInstr_AddSubC( False/*sub*/, True/*set carry*/,
+ resLo, zero32, argLo ));
+ addInstr(env, PPCInstr_AddSubC( False/*sub*/, False/*read carry*/,
+ resHi, zero32, argHi ));
+ /* resHi:resLo |= srcHi:srcLo */
+ addInstr(env, PPCInstr_Alu(Palu_OR, resLo, resLo, PPCRH_Reg(argLo)));
+ addInstr(env, PPCInstr_Alu(Palu_OR, resHi, resHi, PPCRH_Reg(argHi)));
+ *rHi = resHi;
+ *rLo = resLo;
+ return;
+ }
+
+ /* 32Sto64(e) */
+ case Iop_32Sto64: {
+ HReg tHi = newVRegI(env);
+ HReg src = iselWordExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
+ tHi, src, PPCRH_Imm(False,31)));
+ *rHi = tHi;
+ *rLo = src;
+ return;
+ }
+
+ /* 32Uto64(e) */
+ case Iop_32Uto64: {
+ HReg tHi = newVRegI(env);
+ HReg tLo = iselWordExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, PPCInstr_LI(tHi, 0, False/*mode32*/));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* V128{HI}to64 */
+ case Iop_V128HIto64:
+ case Iop_V128to64: {
+ HReg r_aligned16;
+ Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 0 : 8;
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
+ PPCAMode *am_off0, *am_offLO, *am_offHI;
+ sub_from_sp( env, 32 ); // Move SP down 32 bytes
+
+ // get a quadword aligned address within our stack space
+ r_aligned16 = get_sp_aligned16( env );
+ am_off0 = PPCAMode_IR( 0, r_aligned16 );
+ am_offHI = PPCAMode_IR( off, r_aligned16 );
+ am_offLO = PPCAMode_IR( off+4, r_aligned16 );
+
+ // store as Vec128
+ addInstr(env,
+ PPCInstr_AvLdSt( False/*store*/, 16, vec, am_off0 ));
+
+ // load hi,lo words (of hi/lo half of vec) as Ity_I32's
+ addInstr(env,
+ PPCInstr_Load( 4, tHi, am_offHI, False/*mode32*/ ));
+ addInstr(env,
+ PPCInstr_Load( 4, tLo, am_offLO, False/*mode32*/ ));
+
+ add_to_sp( env, 32 ); // Reset SP
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* could do better than this, but for now ... */
+ case Iop_1Sto64: {
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ PPCCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
+ addInstr(env, PPCInstr_Set(cond,tLo));
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
+ tLo, tLo, PPCRH_Imm(False,31)));
+ addInstr(env, PPCInstr_Shft(Pshft_SAR, True/*32bit shift*/,
+ tLo, tLo, PPCRH_Imm(False,31)));
+ addInstr(env, mk_iMOVds_RR(tHi, tLo));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* ReinterpF64asI64(e) */
+ /* Given an IEEE754 double, produce an I64 with the same bit
+ pattern. */
+ case Iop_ReinterpF64asI64: {
+ PPCAMode *am_addr0, *am_addr1;
+ HReg fr_src = iselDblExpr(env, e->Iex.Unop.arg);
+ HReg r_dstLo = newVRegI(env);
+ HReg r_dstHi = newVRegI(env);
+
+ sub_from_sp( env, 16 ); // Move SP down 16 bytes
+ am_addr0 = PPCAMode_IR( 0, StackFramePtr(False/*mode32*/) );
+ am_addr1 = PPCAMode_IR( 4, StackFramePtr(False/*mode32*/) );
+
+ // store as F64
+ addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
+ fr_src, am_addr0 ));
+
+ // load hi,lo as Ity_I32's
+ addInstr(env, PPCInstr_Load( 4, r_dstHi,
+ am_addr0, False/*mode32*/ ));
+ addInstr(env, PPCInstr_Load( 4, r_dstLo,
+ am_addr1, False/*mode32*/ ));
+ *rHi = r_dstHi;
+ *rLo = r_dstLo;
+
+ add_to_sp( env, 16 ); // Reset SP
+ return;
+ }
+
+ default:
+ break;
+ }
+ } /* if (e->tag == Iex_Unop) */
+
+ vex_printf("iselInt64Expr(ppc): No such tag(%u)\n", e->tag);
+ ppIRExpr(e);
+ vpanic("iselInt64Expr(ppc)");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Floating point expressions (32 bit) ---*/
+/*---------------------------------------------------------*/
+
+/* Nothing interesting here; really just wrappers for
+ 64-bit stuff. */
+
+static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
+{
+ HReg r = iselFltExpr_wrk( env, e );
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
+ vassert(hregIsVirtual(r));
+ return r;
+}
+
+/* DO NOT CALL THIS DIRECTLY */
+static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_F32);
+
+ if (e->tag == Iex_RdTmp) {
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ }
+
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
+ PPCAMode* am_addr;
+ HReg r_dst = newVRegF(env);
+ vassert(e->Iex.Load.ty == Ity_F32);
+ am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_F32/*xfer*/);
+ addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, r_dst, am_addr));
+ return r_dst;
+ }
+
+ if (e->tag == Iex_Get) {
+ HReg r_dst = newVRegF(env);
+ PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
+ GuestStatePtr(env->mode64) );
+ addInstr(env, PPCInstr_FpLdSt( True/*load*/, 4, r_dst, am_addr ));
+ return r_dst;
+ }
+
+ if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_TruncF64asF32) {
+ /* This is quite subtle. The only way to do the relevant
+ truncation is to do a single-precision store and then a
+ double precision load to get it back into a register. The
+ problem is, if the data is then written to memory a second
+ time, as in
+
+ STbe(...) = TruncF64asF32(...)
+
+ then will the second truncation further alter the value? The
+ answer is no: flds (as generated here) followed by fsts
+ (generated for the STbe) is the identity function on 32-bit
+ floats, so we are safe.
+
+ Another upshot of this is that if iselStmt can see the
+ entirety of
+
+ STbe(...) = TruncF64asF32(arg)
+
+ then it can short circuit having to deal with TruncF64asF32
+ individually; instead just compute arg into a 64-bit FP
+ register and do 'fsts' (since that itself does the
+ truncation).
+
+ We generate pretty poor code here (should be ok both for
+ 32-bit and 64-bit mode); but it is expected that for the most
+ part the latter optimisation will apply and hence this code
+ will not often be used.
+ */
+ HReg fsrc = iselDblExpr(env, e->Iex.Unop.arg);
+ HReg fdst = newVRegF(env);
+ PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
+
+ sub_from_sp( env, 16 );
+ // store as F32, hence truncating
+ addInstr(env, PPCInstr_FpLdSt( False/*store*/, 4,
+ fsrc, zero_r1 ));
+ // and reload. Good huh?! (sigh)
+ addInstr(env, PPCInstr_FpLdSt( True/*load*/, 4,
+ fdst, zero_r1 ));
+ add_to_sp( env, 16 );
+ return fdst;
+ }
+
+ vex_printf("iselFltExpr(ppc): No such tag(%u)\n", e->tag);
+ ppIRExpr(e);
+ vpanic("iselFltExpr_wrk(ppc)");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Floating point expressions (64 bit) ---*/
+/*---------------------------------------------------------*/
+
+/* Compute a 64-bit floating point value into a register, the identity
+ of which is returned. As with iselIntExpr_R, the reg may be either
+ real or virtual; in any case it must not be changed by subsequent
+ code emitted by the caller. */
+
+/* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
+
+ Type S (1 bit) E (11 bits) F (52 bits)
+ ---- --------- ----------- -----------
+ signalling NaN u 2047 (max) .0uuuuu---u
+ (with at least
+ one 1 bit)
+ quiet NaN u 2047 (max) .1uuuuu---u
+
+ negative infinity 1 2047 (max) .000000---0
+
+ positive infinity 0 2047 (max) .000000---0
+
+ negative zero 1 0 .000000---0
+
+ positive zero 0 0 .000000---0
+*/
+
+static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
+{
+ HReg r = iselDblExpr_wrk( env, e );
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(r) == HRcFlt64);
+ vassert(hregIsVirtual(r));
+ return r;
+}
+
+/* DO NOT CALL THIS DIRECTLY */
+static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
+{
+ Bool mode64 = env->mode64;
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(e);
+ vassert(ty == Ity_F64);
+
+ if (e->tag == Iex_RdTmp) {
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ }
+
+ /* --------- LITERAL --------- */
+ if (e->tag == Iex_Const) {
+ union { UInt u32x2[2]; ULong u64; Double f64; } u;
+ vassert(sizeof(u) == 8);
+ vassert(sizeof(u.u64) == 8);
+ vassert(sizeof(u.f64) == 8);
+ vassert(sizeof(u.u32x2) == 8);
+
+ if (e->Iex.Const.con->tag == Ico_F64) {
+ u.f64 = e->Iex.Const.con->Ico.F64;
+ }
+ else if (e->Iex.Const.con->tag == Ico_F64i) {
+ u.u64 = e->Iex.Const.con->Ico.F64i;
+ }
+ else
+ vpanic("iselDblExpr(ppc): const");
+
+ if (!mode64) {
+ HReg r_srcHi = newVRegI(env);
+ HReg r_srcLo = newVRegI(env);
+ addInstr(env, PPCInstr_LI(r_srcHi, u.u32x2[0], mode64));
+ addInstr(env, PPCInstr_LI(r_srcLo, u.u32x2[1], mode64));
+ return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
+ } else { // mode64
+ HReg r_src = newVRegI(env);
+ addInstr(env, PPCInstr_LI(r_src, u.u64, mode64));
+ return mk_LoadR64toFPR( env, r_src ); // 1*I64 -> F64
+ }
+ }
+
+ /* --------- LOAD --------- */
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
+ HReg r_dst = newVRegF(env);
+ PPCAMode* am_addr;
+ vassert(e->Iex.Load.ty == Ity_F64);
+ am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_F64/*xfer*/);
+ addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, r_dst, am_addr));
+ return r_dst;
+ }
+
+ /* --------- GET --------- */
+ if (e->tag == Iex_Get) {
+ HReg r_dst = newVRegF(env);
+ PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
+ GuestStatePtr(mode64) );
+ addInstr(env, PPCInstr_FpLdSt( True/*load*/, 8, r_dst, am_addr ));
+ return r_dst;
+ }
+
+ /* --------- OPS --------- */
+ if (e->tag == Iex_Qop) {
+ PPCFpOp fpop = Pfp_INVALID;
+ switch (e->Iex.Qop.op) {
+ case Iop_MAddF64: fpop = Pfp_MADDD; break;
+ case Iop_MAddF64r32: fpop = Pfp_MADDS; break;
+ case Iop_MSubF64: fpop = Pfp_MSUBD; break;
+ case Iop_MSubF64r32: fpop = Pfp_MSUBS; break;
+ default: break;
+ }
+ if (fpop != Pfp_INVALID) {
+ HReg r_dst = newVRegF(env);
+ HReg r_srcML = iselDblExpr(env, e->Iex.Qop.arg2);
+ HReg r_srcMR = iselDblExpr(env, e->Iex.Qop.arg3);
+ HReg r_srcAcc = iselDblExpr(env, e->Iex.Qop.arg4);
+ set_FPU_rounding_mode( env, e->Iex.Qop.arg1 );
+ addInstr(env, PPCInstr_FpMulAcc(fpop, r_dst,
+ r_srcML, r_srcMR, r_srcAcc));
+ return r_dst;
+ }
+ }
+
+ if (e->tag == Iex_Triop) {
+ PPCFpOp fpop = Pfp_INVALID;
+ switch (e->Iex.Triop.op) {
+ case Iop_AddF64: fpop = Pfp_ADDD; break;
+ case Iop_SubF64: fpop = Pfp_SUBD; break;
+ case Iop_MulF64: fpop = Pfp_MULD; break;
+ case Iop_DivF64: fpop = Pfp_DIVD; break;
+ case Iop_AddF64r32: fpop = Pfp_ADDS; break;
+ case Iop_SubF64r32: fpop = Pfp_SUBS; break;
+ case Iop_MulF64r32: fpop = Pfp_MULS; break;
+ case Iop_DivF64r32: fpop = Pfp_DIVS; break;
+ default: break;
+ }
+ if (fpop != Pfp_INVALID) {
+ HReg r_dst = newVRegF(env);
+ HReg r_srcL = iselDblExpr(env, e->Iex.Triop.arg2);
+ HReg r_srcR = iselDblExpr(env, e->Iex.Triop.arg3);
+ set_FPU_rounding_mode( env, e->Iex.Triop.arg1 );
+ addInstr(env, PPCInstr_FpBinary(fpop, r_dst, r_srcL, r_srcR));
+ return r_dst;
+ }
+ }
+
+ if (e->tag == Iex_Binop) {
+ PPCFpOp fpop = Pfp_INVALID;
+ switch (e->Iex.Binop.op) {
+ case Iop_SqrtF64: fpop = Pfp_SQRT; break;
+ default: break;
+ }
+ if (fpop != Pfp_INVALID) {
+ HReg fr_dst = newVRegF(env);
+ HReg fr_src = iselDblExpr(env, e->Iex.Binop.arg2);
+ set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+ addInstr(env, PPCInstr_FpUnary(fpop, fr_dst, fr_src));
+ return fr_dst;
+ }
+ }
+
+ if (e->tag == Iex_Binop) {
+
+ if (e->Iex.Binop.op == Iop_RoundF64toF32) {
+ HReg r_dst = newVRegF(env);
+ HReg r_src = iselDblExpr(env, e->Iex.Binop.arg2);
+ set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+ addInstr(env, PPCInstr_FpRSP(r_dst, r_src));
+ //set_FPU_rounding_default( env );
+ return r_dst;
+ }
+
+ if (e->Iex.Binop.op == Iop_I64StoF64) {
+ if (mode64) {
+ HReg fdst = newVRegF(env);
+ HReg isrc = iselWordExpr_R(env, e->Iex.Binop.arg2);
+ HReg r1 = StackFramePtr(env->mode64);
+ PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
+
+ /* Set host rounding mode */
+ set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+
+ sub_from_sp( env, 16 );
+
+ addInstr(env, PPCInstr_Store(8, zero_r1, isrc, True/*mode64*/));
+ addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
+ addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
+ fdst, fdst));
+
+ add_to_sp( env, 16 );
+
+ ///* Restore default FPU rounding. */
+ //set_FPU_rounding_default( env );
+ return fdst;
+ } else {
+ /* 32-bit mode */
+ HReg fdst = newVRegF(env);
+ HReg isrcHi, isrcLo;
+ HReg r1 = StackFramePtr(env->mode64);
+ PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
+ PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
+
+ iselInt64Expr(&isrcHi, &isrcLo, env, e->Iex.Binop.arg2);
+
+ /* Set host rounding mode */
+ set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+
+ sub_from_sp( env, 16 );
+
+ addInstr(env, PPCInstr_Store(4, zero_r1, isrcHi, False/*mode32*/));
+ addInstr(env, PPCInstr_Store(4, four_r1, isrcLo, False/*mode32*/));
+ addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
+ addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/,
+ fdst, fdst));
+
+ add_to_sp( env, 16 );
+
+ ///* Restore default FPU rounding. */
+ //set_FPU_rounding_default( env );
+ return fdst;
+ }
+ }
+
+ }
+
+ if (e->tag == Iex_Unop) {
+ PPCFpOp fpop = Pfp_INVALID;
+ switch (e->Iex.Unop.op) {
+ case Iop_NegF64: fpop = Pfp_NEG; break;
+ case Iop_AbsF64: fpop = Pfp_ABS; break;
+ case Iop_Est5FRSqrt: fpop = Pfp_RSQRTE; break;
+ case Iop_RoundF64toF64_NegINF: fpop = Pfp_FRIM; break;
+ case Iop_RoundF64toF64_PosINF: fpop = Pfp_FRIP; break;
+ case Iop_RoundF64toF64_NEAREST: fpop = Pfp_FRIN; break;
+ case Iop_RoundF64toF64_ZERO: fpop = Pfp_FRIZ; break;
+ default: break;
+ }
+ if (fpop != Pfp_INVALID) {
+ HReg fr_dst = newVRegF(env);
+ HReg fr_src = iselDblExpr(env, e->Iex.Unop.arg);
+ addInstr(env, PPCInstr_FpUnary(fpop, fr_dst, fr_src));
+ return fr_dst;
+ }
+ }
+
+ if (e->tag == Iex_Unop) {
+ switch (e->Iex.Unop.op) {
+ case Iop_ReinterpI64asF64: {
+ /* Given an I64, produce an IEEE754 double with the same
+ bit pattern. */
+ if (!mode64) {
+ HReg r_srcHi, r_srcLo;
+ iselInt64Expr( &r_srcHi, &r_srcLo, env, e->Iex.Unop.arg);
+ return mk_LoadRR32toFPR( env, r_srcHi, r_srcLo );
+ } else {
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+ return mk_LoadR64toFPR( env, r_src );
+ }
+ }
+ case Iop_F32toF64: {
+ /* this is a no-op */
+ HReg res = iselFltExpr(env, e->Iex.Unop.arg);
+ return res;
+ }
+ default:
+ break;
+ }
+ }
+
+ /* --------- MULTIPLEX --------- */
+ if (e->tag == Iex_Mux0X) {
+ if (ty == Ity_F64
+ && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
+ PPCCondCode cc = mk_PPCCondCode( Pct_TRUE, Pcf_7EQ );
+ HReg r_cond = iselWordExpr_R(env, e->Iex.Mux0X.cond);
+ HReg frX = iselDblExpr(env, e->Iex.Mux0X.exprX);
+ HReg fr0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
+ HReg fr_dst = newVRegF(env);
+ HReg r_tmp = newVRegI(env);
+ addInstr(env, PPCInstr_Alu(Palu_AND, r_tmp,
+ r_cond, PPCRH_Imm(False,0xFF)));
+ addInstr(env, PPCInstr_FpUnary( Pfp_MOV, fr_dst, frX ));
+ addInstr(env, PPCInstr_Cmp(False/*unsigned*/, True/*32bit cmp*/,
+ 7/*cr*/, r_tmp, PPCRH_Imm(False,0)));
+ addInstr(env, PPCInstr_FpCMov( cc, fr_dst, fr0 ));
+ return fr_dst;
+ }
+ }
+
+ vex_printf("iselDblExpr(ppc): No such tag(%u)\n", e->tag);
+ ppIRExpr(e);
+ vpanic("iselDblExpr_wrk(ppc)");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
+/*---------------------------------------------------------*/
+
+static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
+{
+ HReg r = iselVecExpr_wrk( env, e );
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(r) == HRcVec128);
+ vassert(hregIsVirtual(r));
+ return r;
+}
+
+/* DO NOT CALL THIS DIRECTLY */
+static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
+{
+ Bool mode64 = env->mode64;
+ PPCAvOp op = Pav_INVALID;
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(e);
+ vassert(ty == Ity_V128);
+
+ if (e->tag == Iex_RdTmp) {
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ }
+
+ if (e->tag == Iex_Get) {
+ /* Guest state vectors are 16byte aligned,
+ so don't need to worry here */
+ HReg dst = newVRegV(env);
+ addInstr(env,
+ PPCInstr_AvLdSt( True/*load*/, 16, dst,
+ PPCAMode_IR( e->Iex.Get.offset,
+ GuestStatePtr(mode64) )));
+ return dst;
+ }
+
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_BE) {
+ PPCAMode* am_addr;
+ HReg v_dst = newVRegV(env);
+ vassert(e->Iex.Load.ty == Ity_V128);
+ am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_V128/*xfer*/);
+ addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, v_dst, am_addr));
+ return v_dst;
+ }
+
+ if (e->tag == Iex_Unop) {
+ switch (e->Iex.Unop.op) {
+
+ case Iop_NotV128: {
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegV(env);
+ addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, arg));
+ return dst;
+ }
+
+ case Iop_CmpNEZ8x16: {
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg zero = newVRegV(env);
+ HReg dst = newVRegV(env);
+ addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
+ addInstr(env, PPCInstr_AvBin8x16(Pav_CMPEQU, dst, arg, zero));
+ addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
+ return dst;
+ }
+
+ case Iop_CmpNEZ16x8: {
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg zero = newVRegV(env);
+ HReg dst = newVRegV(env);
+ addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
+ addInstr(env, PPCInstr_AvBin16x8(Pav_CMPEQU, dst, arg, zero));
+ addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
+ return dst;
+ }
+
+ case Iop_CmpNEZ32x4: {
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg zero = newVRegV(env);
+ HReg dst = newVRegV(env);
+ addInstr(env, PPCInstr_AvBinary(Pav_XOR, zero, zero, zero));
+ addInstr(env, PPCInstr_AvBin32x4(Pav_CMPEQU, dst, arg, zero));
+ addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
+ return dst;
+ }
+
+ case Iop_Recip32Fx4: op = Pavfp_RCPF; goto do_32Fx4_unary;
+ case Iop_RSqrt32Fx4: op = Pavfp_RSQRTF; goto do_32Fx4_unary;
+ case Iop_I32UtoFx4: op = Pavfp_CVTU2F; goto do_32Fx4_unary;
+ case Iop_I32StoFx4: op = Pavfp_CVTS2F; goto do_32Fx4_unary;
+ case Iop_QFtoI32Ux4_RZ: op = Pavfp_QCVTF2U; goto do_32Fx4_unary;
+ case Iop_QFtoI32Sx4_RZ: op = Pavfp_QCVTF2S; goto do_32Fx4_unary;
+ case Iop_RoundF32x4_RM: op = Pavfp_ROUNDM; goto do_32Fx4_unary;
+ case Iop_RoundF32x4_RP: op = Pavfp_ROUNDP; goto do_32Fx4_unary;
+ case Iop_RoundF32x4_RN: op = Pavfp_ROUNDN; goto do_32Fx4_unary;
+ case Iop_RoundF32x4_RZ: op = Pavfp_ROUNDZ; goto do_32Fx4_unary;
+ do_32Fx4_unary:
+ {
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegV(env);
+ addInstr(env, PPCInstr_AvUn32Fx4(op, dst, arg));
+ return dst;
+ }
+
+ case Iop_32UtoV128: {
+ HReg r_aligned16, r_zeros;
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+ HReg dst = newVRegV(env);
+ PPCAMode *am_off0, *am_off4, *am_off8, *am_off12;
+ sub_from_sp( env, 32 ); // Move SP down
+
+ /* Get a quadword aligned address within our stack space */
+ r_aligned16 = get_sp_aligned16( env );
+ am_off0 = PPCAMode_IR( 0, r_aligned16 );
+ am_off4 = PPCAMode_IR( 4, r_aligned16 );
+ am_off8 = PPCAMode_IR( 8, r_aligned16 );
+ am_off12 = PPCAMode_IR( 12, r_aligned16 );
+
+ /* Store zeros */
+ r_zeros = newVRegI(env);
+ addInstr(env, PPCInstr_LI(r_zeros, 0x0, mode64));
+ addInstr(env, PPCInstr_Store( 4, am_off0, r_zeros, mode64 ));
+ addInstr(env, PPCInstr_Store( 4, am_off4, r_zeros, mode64 ));
+ addInstr(env, PPCInstr_Store( 4, am_off8, r_zeros, mode64 ));
+
+ /* Store r_src in low word of quadword-aligned mem */
+ addInstr(env, PPCInstr_Store( 4, am_off12, r_src, mode64 ));
+
+ /* Load word into low word of quadword vector reg */
+ addInstr(env, PPCInstr_AvLdSt( True/*ld*/, 4, dst, am_off12 ));
+
+ add_to_sp( env, 32 ); // Reset SP
+ return dst;
+ }
+
+ case Iop_Dup8x16:
+ case Iop_Dup16x8:
+ case Iop_Dup32x4:
+ return mk_AvDuplicateRI(env, e->Iex.Binop.arg1);
+
+ default:
+ break;
+ } /* switch (e->Iex.Unop.op) */
+ } /* if (e->tag == Iex_Unop) */
+
+ if (e->tag == Iex_Binop) {
+ switch (e->Iex.Binop.op) {
+
+ case Iop_64HLtoV128: {
+ if (!mode64) {
+ HReg r3, r2, r1, r0, r_aligned16;
+ PPCAMode *am_off0, *am_off4, *am_off8, *am_off12;
+ HReg dst = newVRegV(env);
+ /* do this via the stack (easy, convenient, etc) */
+ sub_from_sp( env, 32 ); // Move SP down
+
+ // get a quadword aligned address within our stack space
+ r_aligned16 = get_sp_aligned16( env );
+ am_off0 = PPCAMode_IR( 0, r_aligned16 );
+ am_off4 = PPCAMode_IR( 4, r_aligned16 );
+ am_off8 = PPCAMode_IR( 8, r_aligned16 );
+ am_off12 = PPCAMode_IR( 12, r_aligned16 );
+
+ /* Do the less significant 64 bits */
+ iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2);
+ addInstr(env, PPCInstr_Store( 4, am_off12, r0, mode64 ));
+ addInstr(env, PPCInstr_Store( 4, am_off8, r1, mode64 ));
+ /* Do the more significant 64 bits */
+ iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1);
+ addInstr(env, PPCInstr_Store( 4, am_off4, r2, mode64 ));
+ addInstr(env, PPCInstr_Store( 4, am_off0, r3, mode64 ));
+
+ /* Fetch result back from stack. */
+ addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, dst, am_off0));
+
+ add_to_sp( env, 32 ); // Reset SP
+ return dst;
+ } else {
+ HReg rHi = iselWordExpr_R(env, e->Iex.Binop.arg1);
+ HReg rLo = iselWordExpr_R(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+ HReg r_aligned16;
+ PPCAMode *am_off0, *am_off8;
+ /* do this via the stack (easy, convenient, etc) */
+ sub_from_sp( env, 32 ); // Move SP down
+
+ // get a quadword aligned address within our stack space
+ r_aligned16 = get_sp_aligned16( env );
+ am_off0 = PPCAMode_IR( 0, r_aligned16 );
+ am_off8 = PPCAMode_IR( 8, r_aligned16 );
+
+ /* Store 2*I64 to stack */
+ addInstr(env, PPCInstr_Store( 8, am_off0, rHi, mode64 ));
+ addInstr(env, PPCInstr_Store( 8, am_off8, rLo, mode64 ));
+
+ /* Fetch result back from stack. */
+ addInstr(env, PPCInstr_AvLdSt(True/*ld*/, 16, dst, am_off0));
+
+ add_to_sp( env, 32 ); // Reset SP
+ return dst;
+ }
+ }
+
+ case Iop_Add32Fx4: op = Pavfp_ADDF; goto do_32Fx4;
+ case Iop_Sub32Fx4: op = Pavfp_SUBF; goto do_32Fx4;
+ case Iop_Max32Fx4: op = Pavfp_MAXF; goto do_32Fx4;
+ case Iop_Min32Fx4: op = Pavfp_MINF; goto do_32Fx4;
+ case Iop_Mul32Fx4: op = Pavfp_MULF; goto do_32Fx4;
+ case Iop_CmpEQ32Fx4: op = Pavfp_CMPEQF; goto do_32Fx4;
+ case Iop_CmpGT32Fx4: op = Pavfp_CMPGTF; goto do_32Fx4;
+ case Iop_CmpGE32Fx4: op = Pavfp_CMPGEF; goto do_32Fx4;
+ do_32Fx4:
+ {
+ HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+ addInstr(env, PPCInstr_AvBin32Fx4(op, dst, argL, argR));
+ return dst;
+ }
+
+ case Iop_CmpLE32Fx4: {
+ HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+
+ /* stay consistent with native ppc compares:
+ if a left/right lane holds a nan, return zeros for that lane
+ so: le == NOT(gt OR isNan)
+ */
+ HReg isNanLR = newVRegV(env);
+ HReg isNanL = isNan(env, argL);
+ HReg isNanR = isNan(env, argR);
+ addInstr(env, PPCInstr_AvBinary(Pav_OR, isNanLR,
+ isNanL, isNanR));
+
+ addInstr(env, PPCInstr_AvBin32Fx4(Pavfp_CMPGTF, dst,
+ argL, argR));
+ addInstr(env, PPCInstr_AvBinary(Pav_OR, dst, dst, isNanLR));
+ addInstr(env, PPCInstr_AvUnary(Pav_NOT, dst, dst));
+ return dst;
+ }
+
+ case Iop_AndV128: op = Pav_AND; goto do_AvBin;
+ case Iop_OrV128: op = Pav_OR; goto do_AvBin;
+ case Iop_XorV128: op = Pav_XOR; goto do_AvBin;
+ do_AvBin: {
+ HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+ addInstr(env, PPCInstr_AvBinary(op, dst, arg1, arg2));
+ return dst;
+ }
+
+ case Iop_Shl8x16: op = Pav_SHL; goto do_AvBin8x16;
+ case Iop_Shr8x16: op = Pav_SHR; goto do_AvBin8x16;
+ case Iop_Sar8x16: op = Pav_SAR; goto do_AvBin8x16;
+ case Iop_Rol8x16: op = Pav_ROTL; goto do_AvBin8x16;
+ case Iop_InterleaveHI8x16: op = Pav_MRGHI; goto do_AvBin8x16;
+ case Iop_InterleaveLO8x16: op = Pav_MRGLO; goto do_AvBin8x16;
+ case Iop_Add8x16: op = Pav_ADDU; goto do_AvBin8x16;
+ case Iop_QAdd8Ux16: op = Pav_QADDU; goto do_AvBin8x16;
+ case Iop_QAdd8Sx16: op = Pav_QADDS; goto do_AvBin8x16;
+ case Iop_Sub8x16: op = Pav_SUBU; goto do_AvBin8x16;
+ case Iop_QSub8Ux16: op = Pav_QSUBU; goto do_AvBin8x16;
+ case Iop_QSub8Sx16: op = Pav_QSUBS; goto do_AvBin8x16;
+ case Iop_Avg8Ux16: op = Pav_AVGU; goto do_AvBin8x16;
+ case Iop_Avg8Sx16: op = Pav_AVGS; goto do_AvBin8x16;
+ case Iop_Max8Ux16: op = Pav_MAXU; goto do_AvBin8x16;
+ case Iop_Max8Sx16: op = Pav_MAXS; goto do_AvBin8x16;
+ case Iop_Min8Ux16: op = Pav_MINU; goto do_AvBin8x16;
+ case Iop_Min8Sx16: op = Pav_MINS; goto do_AvBin8x16;
+ case Iop_MullEven8Ux16: op = Pav_OMULU; goto do_AvBin8x16;
+ case Iop_MullEven8Sx16: op = Pav_OMULS; goto do_AvBin8x16;
+ case Iop_CmpEQ8x16: op = Pav_CMPEQU; goto do_AvBin8x16;
+ case Iop_CmpGT8Ux16: op = Pav_CMPGTU; goto do_AvBin8x16;
+ case Iop_CmpGT8Sx16: op = Pav_CMPGTS; goto do_AvBin8x16;
+ do_AvBin8x16: {
+ HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+ addInstr(env, PPCInstr_AvBin8x16(op, dst, arg1, arg2));
+ return dst;
+ }
+
+ case Iop_Shl16x8: op = Pav_SHL; goto do_AvBin16x8;
+ case Iop_Shr16x8: op = Pav_SHR; goto do_AvBin16x8;
+ case Iop_Sar16x8: op = Pav_SAR; goto do_AvBin16x8;
+ case Iop_Rol16x8: op = Pav_ROTL; goto do_AvBin16x8;
+ case Iop_Narrow16x8: op = Pav_PACKUU; goto do_AvBin16x8;
+ case Iop_QNarrow16Ux8: op = Pav_QPACKUU; goto do_AvBin16x8;
+ case Iop_QNarrow16Sx8: op = Pav_QPACKSS; goto do_AvBin16x8;
+ case Iop_InterleaveHI16x8: op = Pav_MRGHI; goto do_AvBin16x8;
+ case Iop_InterleaveLO16x8: op = Pav_MRGLO; goto do_AvBin16x8;
+ case Iop_Add16x8: op = Pav_ADDU; goto do_AvBin16x8;
+ case Iop_QAdd16Ux8: op = Pav_QADDU; goto do_AvBin16x8;
+ case Iop_QAdd16Sx8: op = Pav_QADDS; goto do_AvBin16x8;
+ case Iop_Sub16x8: op = Pav_SUBU; goto do_AvBin16x8;
+ case Iop_QSub16Ux8: op = Pav_QSUBU; goto do_AvBin16x8;
+ case Iop_QSub16Sx8: op = Pav_QSUBS; goto do_AvBin16x8;
+ case Iop_Avg16Ux8: op = Pav_AVGU; goto do_AvBin16x8;
+ case Iop_Avg16Sx8: op = Pav_AVGS; goto do_AvBin16x8;
+ case Iop_Max16Ux8: op = Pav_MAXU; goto do_AvBin16x8;
+ case Iop_Max16Sx8: op = Pav_MAXS; goto do_AvBin16x8;
+ case Iop_Min16Ux8: op = Pav_MINU; goto do_AvBin16x8;
+ case Iop_Min16Sx8: op = Pav_MINS; goto do_AvBin16x8;
+ case Iop_MullEven16Ux8: op = Pav_OMULU; goto do_AvBin16x8;
+ case Iop_MullEven16Sx8: op = Pav_OMULS; goto do_AvBin16x8;
+ case Iop_CmpEQ16x8: op = Pav_CMPEQU; goto do_AvBin16x8;
+ case Iop_CmpGT16Ux8: op = Pav_CMPGTU; goto do_AvBin16x8;
+ case Iop_CmpGT16Sx8: op = Pav_CMPGTS; goto do_AvBin16x8;
+ do_AvBin16x8: {
+ HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+ addInstr(env, PPCInstr_AvBin16x8(op, dst, arg1, arg2));
+ return dst;
+ }
+
+ case Iop_Shl32x4: op = Pav_SHL; goto do_AvBin32x4;
+ case Iop_Shr32x4: op = Pav_SHR; goto do_AvBin32x4;
+ case Iop_Sar32x4: op = Pav_SAR; goto do_AvBin32x4;
+ case Iop_Rol32x4: op = Pav_ROTL; goto do_AvBin32x4;
+ case Iop_Narrow32x4: op = Pav_PACKUU; goto do_AvBin32x4;
+ case Iop_QNarrow32Ux4: op = Pav_QPACKUU; goto do_AvBin32x4;
+ case Iop_QNarrow32Sx4: op = Pav_QPACKSS; goto do_AvBin32x4;
+ case Iop_InterleaveHI32x4: op = Pav_MRGHI; goto do_AvBin32x4;
+ case Iop_InterleaveLO32x4: op = Pav_MRGLO; goto do_AvBin32x4;
+ case Iop_Add32x4: op = Pav_ADDU; goto do_AvBin32x4;
+ case Iop_QAdd32Ux4: op = Pav_QADDU; goto do_AvBin32x4;
+ case Iop_QAdd32Sx4: op = Pav_QADDS; goto do_AvBin32x4;
+ case Iop_Sub32x4: op = Pav_SUBU; goto do_AvBin32x4;
+ case Iop_QSub32Ux4: op = Pav_QSUBU; goto do_AvBin32x4;
+ case Iop_QSub32Sx4: op = Pav_QSUBS; goto do_AvBin32x4;
+ case Iop_Avg32Ux4: op = Pav_AVGU; goto do_AvBin32x4;
+ case Iop_Avg32Sx4: op = Pav_AVGS; goto do_AvBin32x4;
+ case Iop_Max32Ux4: op = Pav_MAXU; goto do_AvBin32x4;
+ case Iop_Max32Sx4: op = Pav_MAXS; goto do_AvBin32x4;
+ case Iop_Min32Ux4: op = Pav_MINU; goto do_AvBin32x4;
+ case Iop_Min32Sx4: op = Pav_MINS; goto do_AvBin32x4;
+ case Iop_CmpEQ32x4: op = Pav_CMPEQU; goto do_AvBin32x4;
+ case Iop_CmpGT32Ux4: op = Pav_CMPGTU; goto do_AvBin32x4;
+ case Iop_CmpGT32Sx4: op = Pav_CMPGTS; goto do_AvBin32x4;
+ do_AvBin32x4: {
+ HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+ addInstr(env, PPCInstr_AvBin32x4(op, dst, arg1, arg2));
+ return dst;
+ }
+
+ case Iop_ShlN8x16: op = Pav_SHL; goto do_AvShift8x16;
+ case Iop_SarN8x16: op = Pav_SAR; goto do_AvShift8x16;
+ do_AvShift8x16: {
+ HReg r_src = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg dst = newVRegV(env);
+ HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2);
+ addInstr(env, PPCInstr_AvBin8x16(op, dst, r_src, v_shft));
+ return dst;
+ }
+
+ case Iop_ShlN16x8: op = Pav_SHL; goto do_AvShift16x8;
+ case Iop_ShrN16x8: op = Pav_SHR; goto do_AvShift16x8;
+ case Iop_SarN16x8: op = Pav_SAR; goto do_AvShift16x8;
+ do_AvShift16x8: {
+ HReg r_src = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg dst = newVRegV(env);
+ HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2);
+ addInstr(env, PPCInstr_AvBin16x8(op, dst, r_src, v_shft));
+ return dst;
+ }
+
+ case Iop_ShlN32x4: op = Pav_SHL; goto do_AvShift32x4;
+ case Iop_ShrN32x4: op = Pav_SHR; goto do_AvShift32x4;
+ case Iop_SarN32x4: op = Pav_SAR; goto do_AvShift32x4;
+ do_AvShift32x4: {
+ HReg r_src = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg dst = newVRegV(env);
+ HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2);
+ addInstr(env, PPCInstr_AvBin32x4(op, dst, r_src, v_shft));
+ return dst;
+ }
+
+ case Iop_ShrV128: op = Pav_SHR; goto do_AvShiftV128;
+ case Iop_ShlV128: op = Pav_SHL; goto do_AvShiftV128;
+ do_AvShiftV128: {
+ HReg dst = newVRegV(env);
+ HReg r_src = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2);
+ /* Note: shift value gets masked by 127 */
+ addInstr(env, PPCInstr_AvBinary(op, dst, r_src, v_shft));
+ return dst;
+ }
+
+ case Iop_Perm8x16: {
+ HReg dst = newVRegV(env);
+ HReg v_src = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg v_ctl = iselVecExpr(env, e->Iex.Binop.arg2);
+ addInstr(env, PPCInstr_AvPerm(dst, v_src, v_src, v_ctl));
+ return dst;
+ }
+
+ default:
+ break;
+ } /* switch (e->Iex.Binop.op) */
+ } /* if (e->tag == Iex_Binop) */
+
+ if (e->tag == Iex_Const ) {
+ vassert(e->Iex.Const.con->tag == Ico_V128);
+ if (e->Iex.Const.con->Ico.V128 == 0x0000) {
+ return generate_zeroes_V128(env);
+ }
+ }
+
+ vex_printf("iselVecExpr(ppc) (subarch = %s): can't reduce\n",
+ LibVEX_ppVexHwCaps(mode64 ? VexArchPPC64 : VexArchPPC32,
+ env->hwcaps));
+ ppIRExpr(e);
+ vpanic("iselVecExpr_wrk(ppc)");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Statements ---*/
+/*---------------------------------------------------------*/
+
+static void iselStmt ( ISelEnv* env, IRStmt* stmt )
+{
+ Bool mode64 = env->mode64;
+ if (vex_traceflags & VEX_TRACE_VCODE) {
+ vex_printf("\n -- ");
+ ppIRStmt(stmt);
+ vex_printf("\n");
+ }
+
+ switch (stmt->tag) {
+
+ /* --------- STORE --------- */
+ case Ist_Store: {
+ IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
+ IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
+ IREndness end = stmt->Ist.Store.end;
+
+ if (end != Iend_BE)
+ goto stmt_fail;
+ if (!mode64 && (tya != Ity_I32))
+ goto stmt_fail;
+ if (mode64 && (tya != Ity_I64))
+ goto stmt_fail;
+
+ if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32 ||
+ (mode64 && (tyd == Ity_I64))) {
+ PPCAMode* am_addr
+ = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/);
+ HReg r_src = iselWordExpr_R(env, stmt->Ist.Store.data);
+ addInstr(env, PPCInstr_Store( toUChar(sizeofIRType(tyd)),
+ am_addr, r_src, mode64 ));
+ return;
+ }
+ if (tyd == Ity_F64) {
+ PPCAMode* am_addr
+ = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/);
+ HReg fr_src = iselDblExpr(env, stmt->Ist.Store.data);
+ addInstr(env,
+ PPCInstr_FpLdSt(False/*store*/, 8, fr_src, am_addr));
+ return;
+ }
+ if (tyd == Ity_F32) {
+ PPCAMode* am_addr
+ = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/);
+ HReg fr_src = iselFltExpr(env, stmt->Ist.Store.data);
+ addInstr(env,
+ PPCInstr_FpLdSt(False/*store*/, 4, fr_src, am_addr));
+ return;
+ }
+ if (tyd == Ity_V128) {
+ PPCAMode* am_addr
+ = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd/*of xfer*/);
+ HReg v_src = iselVecExpr(env, stmt->Ist.Store.data);
+ addInstr(env,
+ PPCInstr_AvLdSt(False/*store*/, 16, v_src, am_addr));
+ return;
+ }
+ if (tyd == Ity_I64 && !mode64) {
+ /* Just calculate the address in the register. Life is too
+ short to arse around trying and possibly failing to adjust
+ the offset in a 'reg+offset' style amode. */
+ HReg rHi32, rLo32;
+ HReg r_addr = iselWordExpr_R(env, stmt->Ist.Store.addr);
+ iselInt64Expr( &rHi32, &rLo32, env, stmt->Ist.Store.data );
+ addInstr(env, PPCInstr_Store( 4/*byte-store*/,
+ PPCAMode_IR( 0, r_addr ),
+ rHi32,
+ False/*32-bit insn please*/) );
+ addInstr(env, PPCInstr_Store( 4/*byte-store*/,
+ PPCAMode_IR( 4, r_addr ),
+ rLo32,
+ False/*32-bit insn please*/) );
+ return;
+ }
+ break;
+ }
+
+ /* --------- PUT --------- */
+ case Ist_Put: {
+ IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
+ if (ty == Ity_I8 || ty == Ity_I16 ||
+ ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
+ HReg r_src = iselWordExpr_R(env, stmt->Ist.Put.data);
+ PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
+ GuestStatePtr(mode64) );
+ addInstr(env, PPCInstr_Store( toUChar(sizeofIRType(ty)),
+ am_addr, r_src, mode64 ));
+ return;
+ }
+ if (!mode64 && ty == Ity_I64) {
+ HReg rHi, rLo;
+ PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
+ GuestStatePtr(mode64) );
+ PPCAMode* am_addr4 = advance4(env, am_addr);
+ iselInt64Expr(&rHi,&rLo, env, stmt->Ist.Put.data);
+ addInstr(env, PPCInstr_Store( 4, am_addr, rHi, mode64 ));
+ addInstr(env, PPCInstr_Store( 4, am_addr4, rLo, mode64 ));
+ return;
+ }
+ if (ty == Ity_V128) {
+ /* Guest state vectors are 16byte aligned,
+ so don't need to worry here */
+ HReg v_src = iselVecExpr(env, stmt->Ist.Put.data);
+ PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
+ GuestStatePtr(mode64) );
+ addInstr(env,
+ PPCInstr_AvLdSt(False/*store*/, 16, v_src, am_addr));
+ return;
+ }
+ if (ty == Ity_F64) {
+ HReg fr_src = iselDblExpr(env, stmt->Ist.Put.data);
+ PPCAMode* am_addr = PPCAMode_IR( stmt->Ist.Put.offset,
+ GuestStatePtr(mode64) );
+ addInstr(env, PPCInstr_FpLdSt( False/*store*/, 8,
+ fr_src, am_addr ));
+ return;
+ }
+ break;
+ }
+
+ /* --------- Indexed PUT --------- */
+ case Ist_PutI: {
+ PPCAMode* dst_am
+ = genGuestArrayOffset(
+ env, stmt->Ist.PutI.descr,
+ stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
+ IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
+ if (mode64 && ty == Ity_I64) {
+ HReg r_src = iselWordExpr_R(env, stmt->Ist.PutI.data);
+ addInstr(env, PPCInstr_Store( toUChar(8),
+ dst_am, r_src, mode64 ));
+ return;
+ }
+ if ((!mode64) && ty == Ity_I32) {
+ HReg r_src = iselWordExpr_R(env, stmt->Ist.PutI.data);
+ addInstr(env, PPCInstr_Store( toUChar(4),
+ dst_am, r_src, mode64 ));
+ return;
+ }
+ break;
+ }
+
+ /* --------- TMP --------- */
+ case Ist_WrTmp: {
+ IRTemp tmp = stmt->Ist.WrTmp.tmp;
+ IRType ty = typeOfIRTemp(env->type_env, tmp);
+ if (ty == Ity_I8 || ty == Ity_I16 ||
+ ty == Ity_I32 || ((ty == Ity_I64) && mode64)) {
+ HReg r_dst = lookupIRTemp(env, tmp);
+ HReg r_src = iselWordExpr_R(env, stmt->Ist.WrTmp.data);
+ addInstr(env, mk_iMOVds_RR( r_dst, r_src ));
+ return;
+ }
+ if (!mode64 && ty == Ity_I64) {
+ HReg r_srcHi, r_srcLo, r_dstHi, r_dstLo;
+ iselInt64Expr(&r_srcHi,&r_srcLo, env, stmt->Ist.WrTmp.data);
+ lookupIRTempPair( &r_dstHi, &r_dstLo, env, tmp);
+ addInstr(env, mk_iMOVds_RR(r_dstHi, r_srcHi) );
+ addInstr(env, mk_iMOVds_RR(r_dstLo, r_srcLo) );
+ return;
+ }
+ if (mode64 && ty == Ity_I128) {
+ HReg r_srcHi, r_srcLo, r_dstHi, r_dstLo;
+ iselInt128Expr(&r_srcHi,&r_srcLo, env, stmt->Ist.WrTmp.data);
+ lookupIRTempPair( &r_dstHi, &r_dstLo, env, tmp);
+ addInstr(env, mk_iMOVds_RR(r_dstHi, r_srcHi) );
+ addInstr(env, mk_iMOVds_RR(r_dstLo, r_srcLo) );
+ return;
+ }
+ if (ty == Ity_I1) {
+ PPCCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
+ HReg r_dst = lookupIRTemp(env, tmp);
+ addInstr(env, PPCInstr_Set(cond, r_dst));
+ return;
+ }
+ if (ty == Ity_F64) {
+ HReg fr_dst = lookupIRTemp(env, tmp);
+ HReg fr_src = iselDblExpr(env, stmt->Ist.WrTmp.data);
+ addInstr(env, PPCInstr_FpUnary(Pfp_MOV, fr_dst, fr_src));
+ return;
+ }
+ if (ty == Ity_F32) {
+ HReg fr_dst = lookupIRTemp(env, tmp);
+ HReg fr_src = iselFltExpr(env, stmt->Ist.WrTmp.data);
+ addInstr(env, PPCInstr_FpUnary(Pfp_MOV, fr_dst, fr_src));
+ return;
+ }
+ if (ty == Ity_V128) {
+ HReg v_dst = lookupIRTemp(env, tmp);
+ HReg v_src = iselVecExpr(env, stmt->Ist.WrTmp.data);
+ addInstr(env, PPCInstr_AvUnary(Pav_MOV, v_dst, v_src));
+ return;
+ }
+ break;
+ }
+
+ /* --------- Load Linked or Store Conditional --------- */
+ case Ist_LLSC: {
+ IRTemp res = stmt->Ist.LLSC.result;
+ IRType tyRes = typeOfIRTemp(env->type_env, res);
+ IRType tyAddr = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.addr);
+
+ if (stmt->Ist.LLSC.end != Iend_BE)
+ goto stmt_fail;
+ if (!mode64 && (tyAddr != Ity_I32))
+ goto stmt_fail;
+ if (mode64 && (tyAddr != Ity_I64))
+ goto stmt_fail;
+
+ if (stmt->Ist.LLSC.storedata == NULL) {
+ /* LL */
+ HReg r_addr = iselWordExpr_R( env, stmt->Ist.LLSC.addr );
+ HReg r_dst = lookupIRTemp(env, res);
+ if (tyRes == Ity_I32) {
+ addInstr(env, PPCInstr_LoadL( 4, r_dst, r_addr, mode64 ));
+ return;
+ }
+ if (tyRes == Ity_I64 && mode64) {
+ addInstr(env, PPCInstr_LoadL( 8, r_dst, r_addr, mode64 ));
+ return;
+ }
+ /* fallthru */;
+ } else {
+ /* SC */
+ HReg r_res = lookupIRTemp(env, res); /* :: Ity_I1 */
+ HReg r_a = iselWordExpr_R(env, stmt->Ist.LLSC.addr);
+ HReg r_src = iselWordExpr_R(env, stmt->Ist.LLSC.storedata);
+ HReg r_tmp = newVRegI(env);
+ IRType tyData = typeOfIRExpr(env->type_env,
+ stmt->Ist.LLSC.storedata);
+ vassert(tyRes == Ity_I1);
+ if (tyData == Ity_I32 || (tyData == Ity_I64 && mode64)) {
+ addInstr(env, PPCInstr_StoreC( tyData==Ity_I32 ? 4 : 8,
+ r_a, r_src, mode64 ));
+ addInstr(env, PPCInstr_MfCR( r_tmp ));
+ addInstr(env, PPCInstr_Shft(
+ Pshft_SHR,
+ env->mode64 ? False : True
+ /*F:64-bit, T:32-bit shift*/,
+ r_tmp, r_tmp,
+ PPCRH_Imm(False/*unsigned*/, 29)));
+ /* Probably unnecessary, since the IR dest type is Ity_I1,
+ and so we are entitled to leave whatever junk we like
+ drifting round in the upper 31 or 63 bits of r_res.
+ However, for the sake of conservativeness .. */
+ addInstr(env, PPCInstr_Alu(
+ Palu_AND,
+ r_res, r_tmp,
+ PPCRH_Imm(False/*signed*/, 1)));
+ return;
+ }
+ /* fallthru */
+ }
+ goto stmt_fail;
+ /*NOTREACHED*/
+ }
+
+ /* --------- Call to DIRTY helper --------- */
+ case Ist_Dirty: {
+ IRType retty;
+ IRDirty* d = stmt->Ist.Dirty.details;
+ Bool passBBP = False;
+
+ if (d->nFxState == 0)
+ vassert(!d->needsBBP);
+ passBBP = toBool(d->nFxState > 0 && d->needsBBP);
+
+ /* Marshal args, do the call, clear stack. */
+ doHelperCall( env, passBBP, d->guard, d->cee, d->args );
+
+ /* Now figure out what to do with the returned value, if any. */
+ if (d->tmp == IRTemp_INVALID)
+ /* No return value. Nothing to do. */
+ return;
+
+ retty = typeOfIRTemp(env->type_env, d->tmp);
+ if (!mode64 && retty == Ity_I64) {
+ HReg r_dstHi, r_dstLo;
+ /* The returned value is in %r3:%r4. Park it in the
+ register-pair associated with tmp. */
+ lookupIRTempPair( &r_dstHi, &r_dstLo, env, d->tmp);
+ addInstr(env, mk_iMOVds_RR(r_dstHi, hregPPC_GPR3(mode64)));
+ addInstr(env, mk_iMOVds_RR(r_dstLo, hregPPC_GPR4(mode64)));
+ return;
+ }
+ if (retty == Ity_I8 || retty == Ity_I16 ||
+ retty == Ity_I32 || ((retty == Ity_I64) && mode64)) {
+ /* The returned value is in %r3. Park it in the register
+ associated with tmp. */
+ HReg r_dst = lookupIRTemp(env, d->tmp);
+ addInstr(env, mk_iMOVds_RR(r_dst, hregPPC_GPR3(mode64)));
+ return;
+ }
+ break;
+ }
+
+ /* --------- MEM FENCE --------- */
+ case Ist_MBE:
+ switch (stmt->Ist.MBE.event) {
+ case Imbe_Fence:
+ addInstr(env, PPCInstr_MFence());
+ return;
+ default:
+ break;
+ }
+ break;
+
+ /* --------- INSTR MARK --------- */
+ /* Doesn't generate any executable code ... */
+ case Ist_IMark:
+ return;
+
+ /* --------- ABI HINT --------- */
+ /* These have no meaning (denotation in the IR) and so we ignore
+ them ... if any actually made it this far. */
+ case Ist_AbiHint:
+ return;
+
+ /* --------- NO-OP --------- */
+ /* Fairly self-explanatory, wouldn't you say? */
+ case Ist_NoOp:
+ return;
+
+ /* --------- EXIT --------- */
+ case Ist_Exit: {
+ PPCRI* ri_dst;
+ PPCCondCode cc;
+ IRConstTag tag = stmt->Ist.Exit.dst->tag;
+ if (!mode64 && (tag != Ico_U32))
+ vpanic("iselStmt(ppc): Ist_Exit: dst is not a 32-bit value");
+ if (mode64 && (tag != Ico_U64))
+ vpanic("iselStmt(ppc64): Ist_Exit: dst is not a 64-bit value");
+ ri_dst = iselWordExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ cc = iselCondCode(env,stmt->Ist.Exit.guard);
+ addInstr(env, PPCInstr_RdWrLR(True, env->savedLR));
+ addInstr(env, PPCInstr_Goto(stmt->Ist.Exit.jk, cc, ri_dst));
+ return;
+ }
+
+ default: break;
+ }
+ stmt_fail:
+ ppIRStmt(stmt);
+ vpanic("iselStmt(ppc)");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Basic block terminators (Nexts) ---*/
+/*---------------------------------------------------------*/
+
+static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
+{
+ PPCCondCode cond;
+ PPCRI* ri;
+ if (vex_traceflags & VEX_TRACE_VCODE) {
+ vex_printf("\n-- goto {");
+ ppIRJumpKind(jk);
+ vex_printf("} ");
+ ppIRExpr(next);
+ vex_printf("\n");
+ }
+ cond = mk_PPCCondCode( Pct_ALWAYS, Pcf_7EQ );
+ ri = iselWordExpr_RI(env, next);
+ addInstr(env, PPCInstr_RdWrLR(True, env->savedLR));
+ addInstr(env, PPCInstr_Goto(jk, cond, ri));
+}
+
+
+/*---------------------------------------------------------*/
+/*--- Insn selector top-level ---*/
+/*---------------------------------------------------------*/
+
+/* Translate an entire BS to ppc code. */
+
+HInstrArray* iselSB_PPC ( IRSB* bb, VexArch arch_host,
+ VexArchInfo* archinfo_host,
+ VexAbiInfo* vbi )
+{
+ Int i, j;
+ HReg hreg, hregHI;
+ ISelEnv* env;
+ UInt hwcaps_host = archinfo_host->hwcaps;
+ Bool mode64 = False;
+ UInt mask32, mask64;
+
+ vassert(arch_host == VexArchPPC32 || arch_host == VexArchPPC64);
+ mode64 = arch_host == VexArchPPC64;
+
+ /* do some sanity checks */
+ mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
+ | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX;
+
+ mask64 = VEX_HWCAPS_PPC64_V
+ | VEX_HWCAPS_PPC64_FX | VEX_HWCAPS_PPC64_GX;
+
+ if (mode64) {
+ vassert((hwcaps_host & mask32) == 0);
+ } else {
+ vassert((hwcaps_host & mask64) == 0);
+ }
+
+ /* Make up an initial environment to use. */
+ env = LibVEX_Alloc(sizeof(ISelEnv));
+ env->vreg_ctr = 0;
+
+ /* Are we being ppc32 or ppc64? */
+ env->mode64 = mode64;
+
+ /* Set up output code array. */
+ env->code = newHInstrArray();
+
+ /* Copy BB's type env. */
+ env->type_env = bb->tyenv;
+
+ /* Make up an IRTemp -> virtual HReg mapping. This doesn't
+ change as we go along. */
+ env->n_vregmap = bb->tyenv->types_used;
+ env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
+ env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
+
+ /* and finally ... */
+ env->hwcaps = hwcaps_host;
+ env->previous_rm = NULL;
+ env->vbi = vbi;
+
+ /* For each IR temporary, allocate a suitably-kinded virtual
+ register. */
+ j = 0;
+ for (i = 0; i < env->n_vregmap; i++) {
+ hregHI = hreg = INVALID_HREG;
+ switch (bb->tyenv->types[i]) {
+ case Ity_I1:
+ case Ity_I8:
+ case Ity_I16:
+ case Ity_I32:
+ if (mode64) { hreg = mkHReg(j++, HRcInt64, True); break;
+ } else { hreg = mkHReg(j++, HRcInt32, True); break;
+ }
+ case Ity_I64:
+ if (mode64) { hreg = mkHReg(j++, HRcInt64, True); break;
+ } else { hreg = mkHReg(j++, HRcInt32, True);
+ hregHI = mkHReg(j++, HRcInt32, True); break;
+ }
+ case Ity_I128: vassert(mode64);
+ hreg = mkHReg(j++, HRcInt64, True);
+ hregHI = mkHReg(j++, HRcInt64, True); break;
+ case Ity_F32:
+ case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break;
+ case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break;
+ default:
+ ppIRType(bb->tyenv->types[i]);
+ vpanic("iselBB(ppc): IRTemp type");
+ }
+ env->vregmap[i] = hreg;
+ env->vregmapHI[i] = hregHI;
+ }
+ env->vreg_ctr = j;
+
+ /* Keep a copy of the link reg, so helper functions don't kill it. */
+ env->savedLR = newVRegI(env);
+ addInstr(env, PPCInstr_RdWrLR(False, env->savedLR));
+
+ /* Ok, finally we can iterate over the statements. */
+ for (i = 0; i < bb->stmts_used; i++)
+ if (bb->stmts[i])
+ iselStmt(env,bb->stmts[i]);
+
+ iselNext(env,bb->next,bb->jumpkind);
+
+ /* record the number of vregs we used. */
+ env->code->n_vregs = env->vreg_ctr;
+ return env->code;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- end host_ppc_isel.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/host_x86_defs.c b/VEX/priv/host_x86_defs.c
new file mode 100644
index 0000000..9a6d651
--- /dev/null
+++ b/VEX/priv/host_x86_defs.c
@@ -0,0 +1,3098 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin host_x86_defs.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex.h"
+#include "libvex_trc_values.h"
+
+#include "main_util.h"
+#include "host_generic_regs.h"
+#include "host_x86_defs.h"
+
+
+/* --------- Registers. --------- */
+
+void ppHRegX86 ( HReg reg )
+{
+ Int r;
+ static HChar* ireg32_names[8]
+ = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" };
+ /* Be generic for all virtual regs. */
+ if (hregIsVirtual(reg)) {
+ ppHReg(reg);
+ return;
+ }
+ /* But specific for real regs. */
+ switch (hregClass(reg)) {
+ case HRcInt32:
+ r = hregNumber(reg);
+ vassert(r >= 0 && r < 8);
+ vex_printf("%s", ireg32_names[r]);
+ return;
+ case HRcFlt64:
+ r = hregNumber(reg);
+ vassert(r >= 0 && r < 6);
+ vex_printf("%%fake%d", r);
+ return;
+ case HRcVec128:
+ r = hregNumber(reg);
+ vassert(r >= 0 && r < 8);
+ vex_printf("%%xmm%d", r);
+ return;
+ default:
+ vpanic("ppHRegX86");
+ }
+}
+
+HReg hregX86_EAX ( void ) { return mkHReg(0, HRcInt32, False); }
+HReg hregX86_ECX ( void ) { return mkHReg(1, HRcInt32, False); }
+HReg hregX86_EDX ( void ) { return mkHReg(2, HRcInt32, False); }
+HReg hregX86_EBX ( void ) { return mkHReg(3, HRcInt32, False); }
+HReg hregX86_ESP ( void ) { return mkHReg(4, HRcInt32, False); }
+HReg hregX86_EBP ( void ) { return mkHReg(5, HRcInt32, False); }
+HReg hregX86_ESI ( void ) { return mkHReg(6, HRcInt32, False); }
+HReg hregX86_EDI ( void ) { return mkHReg(7, HRcInt32, False); }
+
+HReg hregX86_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); }
+HReg hregX86_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); }
+HReg hregX86_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); }
+HReg hregX86_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); }
+HReg hregX86_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); }
+HReg hregX86_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); }
+
+HReg hregX86_XMM0 ( void ) { return mkHReg(0, HRcVec128, False); }
+HReg hregX86_XMM1 ( void ) { return mkHReg(1, HRcVec128, False); }
+HReg hregX86_XMM2 ( void ) { return mkHReg(2, HRcVec128, False); }
+HReg hregX86_XMM3 ( void ) { return mkHReg(3, HRcVec128, False); }
+HReg hregX86_XMM4 ( void ) { return mkHReg(4, HRcVec128, False); }
+HReg hregX86_XMM5 ( void ) { return mkHReg(5, HRcVec128, False); }
+HReg hregX86_XMM6 ( void ) { return mkHReg(6, HRcVec128, False); }
+HReg hregX86_XMM7 ( void ) { return mkHReg(7, HRcVec128, False); }
+
+
+void getAllocableRegs_X86 ( Int* nregs, HReg** arr )
+{
+ *nregs = 20;
+ *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
+ (*arr)[0] = hregX86_EAX();
+ (*arr)[1] = hregX86_EBX();
+ (*arr)[2] = hregX86_ECX();
+ (*arr)[3] = hregX86_EDX();
+ (*arr)[4] = hregX86_ESI();
+ (*arr)[5] = hregX86_EDI();
+ (*arr)[6] = hregX86_FAKE0();
+ (*arr)[7] = hregX86_FAKE1();
+ (*arr)[8] = hregX86_FAKE2();
+ (*arr)[9] = hregX86_FAKE3();
+ (*arr)[10] = hregX86_FAKE4();
+ (*arr)[11] = hregX86_FAKE5();
+ (*arr)[12] = hregX86_XMM0();
+ (*arr)[13] = hregX86_XMM1();
+ (*arr)[14] = hregX86_XMM2();
+ (*arr)[15] = hregX86_XMM3();
+ (*arr)[16] = hregX86_XMM4();
+ (*arr)[17] = hregX86_XMM5();
+ (*arr)[18] = hregX86_XMM6();
+ (*arr)[19] = hregX86_XMM7();
+}
+
+
+/* --------- Condition codes, Intel encoding. --------- */
+
+HChar* showX86CondCode ( X86CondCode cond )
+{
+ switch (cond) {
+ case Xcc_O: return "o";
+ case Xcc_NO: return "no";
+ case Xcc_B: return "b";
+ case Xcc_NB: return "nb";
+ case Xcc_Z: return "z";
+ case Xcc_NZ: return "nz";
+ case Xcc_BE: return "be";
+ case Xcc_NBE: return "nbe";
+ case Xcc_S: return "s";
+ case Xcc_NS: return "ns";
+ case Xcc_P: return "p";
+ case Xcc_NP: return "np";
+ case Xcc_L: return "l";
+ case Xcc_NL: return "nl";
+ case Xcc_LE: return "le";
+ case Xcc_NLE: return "nle";
+ case Xcc_ALWAYS: return "ALWAYS";
+ default: vpanic("ppX86CondCode");
+ }
+}
+
+
+/* --------- X86AMode: memory address expressions. --------- */
+
+X86AMode* X86AMode_IR ( UInt imm32, HReg reg ) {
+ X86AMode* am = LibVEX_Alloc(sizeof(X86AMode));
+ am->tag = Xam_IR;
+ am->Xam.IR.imm = imm32;
+ am->Xam.IR.reg = reg;
+ return am;
+}
+X86AMode* X86AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
+ X86AMode* am = LibVEX_Alloc(sizeof(X86AMode));
+ am->tag = Xam_IRRS;
+ am->Xam.IRRS.imm = imm32;
+ am->Xam.IRRS.base = base;
+ am->Xam.IRRS.index = indEx;
+ am->Xam.IRRS.shift = shift;
+ vassert(shift >= 0 && shift <= 3);
+ return am;
+}
+
+X86AMode* dopyX86AMode ( X86AMode* am ) {
+ switch (am->tag) {
+ case Xam_IR:
+ return X86AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg );
+ case Xam_IRRS:
+ return X86AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base,
+ am->Xam.IRRS.index, am->Xam.IRRS.shift );
+ default:
+ vpanic("dopyX86AMode");
+ }
+}
+
+void ppX86AMode ( X86AMode* am ) {
+ switch (am->tag) {
+ case Xam_IR:
+ if (am->Xam.IR.imm == 0)
+ vex_printf("(");
+ else
+ vex_printf("0x%x(", am->Xam.IR.imm);
+ ppHRegX86(am->Xam.IR.reg);
+ vex_printf(")");
+ return;
+ case Xam_IRRS:
+ vex_printf("0x%x(", am->Xam.IRRS.imm);
+ ppHRegX86(am->Xam.IRRS.base);
+ vex_printf(",");
+ ppHRegX86(am->Xam.IRRS.index);
+ vex_printf(",%d)", 1 << am->Xam.IRRS.shift);
+ return;
+ default:
+ vpanic("ppX86AMode");
+ }
+}
+
+static void addRegUsage_X86AMode ( HRegUsage* u, X86AMode* am ) {
+ switch (am->tag) {
+ case Xam_IR:
+ addHRegUse(u, HRmRead, am->Xam.IR.reg);
+ return;
+ case Xam_IRRS:
+ addHRegUse(u, HRmRead, am->Xam.IRRS.base);
+ addHRegUse(u, HRmRead, am->Xam.IRRS.index);
+ return;
+ default:
+ vpanic("addRegUsage_X86AMode");
+ }
+}
+
+static void mapRegs_X86AMode ( HRegRemap* m, X86AMode* am ) {
+ switch (am->tag) {
+ case Xam_IR:
+ am->Xam.IR.reg = lookupHRegRemap(m, am->Xam.IR.reg);
+ return;
+ case Xam_IRRS:
+ am->Xam.IRRS.base = lookupHRegRemap(m, am->Xam.IRRS.base);
+ am->Xam.IRRS.index = lookupHRegRemap(m, am->Xam.IRRS.index);
+ return;
+ default:
+ vpanic("mapRegs_X86AMode");
+ }
+}
+
+/* --------- Operand, which can be reg, immediate or memory. --------- */
+
+X86RMI* X86RMI_Imm ( UInt imm32 ) {
+ X86RMI* op = LibVEX_Alloc(sizeof(X86RMI));
+ op->tag = Xrmi_Imm;
+ op->Xrmi.Imm.imm32 = imm32;
+ return op;
+}
+X86RMI* X86RMI_Reg ( HReg reg ) {
+ X86RMI* op = LibVEX_Alloc(sizeof(X86RMI));
+ op->tag = Xrmi_Reg;
+ op->Xrmi.Reg.reg = reg;
+ return op;
+}
+X86RMI* X86RMI_Mem ( X86AMode* am ) {
+ X86RMI* op = LibVEX_Alloc(sizeof(X86RMI));
+ op->tag = Xrmi_Mem;
+ op->Xrmi.Mem.am = am;
+ return op;
+}
+
+void ppX86RMI ( X86RMI* op ) {
+ switch (op->tag) {
+ case Xrmi_Imm:
+ vex_printf("$0x%x", op->Xrmi.Imm.imm32);
+ return;
+ case Xrmi_Reg:
+ ppHRegX86(op->Xrmi.Reg.reg);
+ return;
+ case Xrmi_Mem:
+ ppX86AMode(op->Xrmi.Mem.am);
+ return;
+ default:
+ vpanic("ppX86RMI");
+ }
+}
+
+/* An X86RMI can only be used in a "read" context (what would it mean
+ to write or modify a literal?) and so we enumerate its registers
+ accordingly. */
+static void addRegUsage_X86RMI ( HRegUsage* u, X86RMI* op ) {
+ switch (op->tag) {
+ case Xrmi_Imm:
+ return;
+ case Xrmi_Reg:
+ addHRegUse(u, HRmRead, op->Xrmi.Reg.reg);
+ return;
+ case Xrmi_Mem:
+ addRegUsage_X86AMode(u, op->Xrmi.Mem.am);
+ return;
+ default:
+ vpanic("addRegUsage_X86RMI");
+ }
+}
+
+static void mapRegs_X86RMI ( HRegRemap* m, X86RMI* op ) {
+ switch (op->tag) {
+ case Xrmi_Imm:
+ return;
+ case Xrmi_Reg:
+ op->Xrmi.Reg.reg = lookupHRegRemap(m, op->Xrmi.Reg.reg);
+ return;
+ case Xrmi_Mem:
+ mapRegs_X86AMode(m, op->Xrmi.Mem.am);
+ return;
+ default:
+ vpanic("mapRegs_X86RMI");
+ }
+}
+
+
+/* --------- Operand, which can be reg or immediate only. --------- */
+
+X86RI* X86RI_Imm ( UInt imm32 ) {
+ X86RI* op = LibVEX_Alloc(sizeof(X86RI));
+ op->tag = Xri_Imm;
+ op->Xri.Imm.imm32 = imm32;
+ return op;
+}
+X86RI* X86RI_Reg ( HReg reg ) {
+ X86RI* op = LibVEX_Alloc(sizeof(X86RI));
+ op->tag = Xri_Reg;
+ op->Xri.Reg.reg = reg;
+ return op;
+}
+
+void ppX86RI ( X86RI* op ) {
+ switch (op->tag) {
+ case Xri_Imm:
+ vex_printf("$0x%x", op->Xri.Imm.imm32);
+ return;
+ case Xri_Reg:
+ ppHRegX86(op->Xri.Reg.reg);
+ return;
+ default:
+ vpanic("ppX86RI");
+ }
+}
+
+/* An X86RI can only be used in a "read" context (what would it mean
+ to write or modify a literal?) and so we enumerate its registers
+ accordingly. */
+static void addRegUsage_X86RI ( HRegUsage* u, X86RI* op ) {
+ switch (op->tag) {
+ case Xri_Imm:
+ return;
+ case Xri_Reg:
+ addHRegUse(u, HRmRead, op->Xri.Reg.reg);
+ return;
+ default:
+ vpanic("addRegUsage_X86RI");
+ }
+}
+
+static void mapRegs_X86RI ( HRegRemap* m, X86RI* op ) {
+ switch (op->tag) {
+ case Xri_Imm:
+ return;
+ case Xri_Reg:
+ op->Xri.Reg.reg = lookupHRegRemap(m, op->Xri.Reg.reg);
+ return;
+ default:
+ vpanic("mapRegs_X86RI");
+ }
+}
+
+
+/* --------- Operand, which can be reg or memory only. --------- */
+
+X86RM* X86RM_Reg ( HReg reg ) {
+ X86RM* op = LibVEX_Alloc(sizeof(X86RM));
+ op->tag = Xrm_Reg;
+ op->Xrm.Reg.reg = reg;
+ return op;
+}
+X86RM* X86RM_Mem ( X86AMode* am ) {
+ X86RM* op = LibVEX_Alloc(sizeof(X86RM));
+ op->tag = Xrm_Mem;
+ op->Xrm.Mem.am = am;
+ return op;
+}
+
+void ppX86RM ( X86RM* op ) {
+ switch (op->tag) {
+ case Xrm_Mem:
+ ppX86AMode(op->Xrm.Mem.am);
+ return;
+ case Xrm_Reg:
+ ppHRegX86(op->Xrm.Reg.reg);
+ return;
+ default:
+ vpanic("ppX86RM");
+ }
+}
+
+/* Because an X86RM can be both a source or destination operand, we
+ have to supply a mode -- pertaining to the operand as a whole --
+ indicating how it's being used. */
+static void addRegUsage_X86RM ( HRegUsage* u, X86RM* op, HRegMode mode ) {
+ switch (op->tag) {
+ case Xrm_Mem:
+ /* Memory is read, written or modified. So we just want to
+ know the regs read by the amode. */
+ addRegUsage_X86AMode(u, op->Xrm.Mem.am);
+ return;
+ case Xrm_Reg:
+ /* reg is read, written or modified. Add it in the
+ appropriate way. */
+ addHRegUse(u, mode, op->Xrm.Reg.reg);
+ return;
+ default:
+ vpanic("addRegUsage_X86RM");
+ }
+}
+
+static void mapRegs_X86RM ( HRegRemap* m, X86RM* op )
+{
+ switch (op->tag) {
+ case Xrm_Mem:
+ mapRegs_X86AMode(m, op->Xrm.Mem.am);
+ return;
+ case Xrm_Reg:
+ op->Xrm.Reg.reg = lookupHRegRemap(m, op->Xrm.Reg.reg);
+ return;
+ default:
+ vpanic("mapRegs_X86RM");
+ }
+}
+
+
+/* --------- Instructions. --------- */
+
+HChar* showX86UnaryOp ( X86UnaryOp op ) {
+ switch (op) {
+ case Xun_NOT: return "not";
+ case Xun_NEG: return "neg";
+ default: vpanic("showX86UnaryOp");
+ }
+}
+
+HChar* showX86AluOp ( X86AluOp op ) {
+ switch (op) {
+ case Xalu_MOV: return "mov";
+ case Xalu_CMP: return "cmp";
+ case Xalu_ADD: return "add";
+ case Xalu_SUB: return "sub";
+ case Xalu_ADC: return "adc";
+ case Xalu_SBB: return "sbb";
+ case Xalu_AND: return "and";
+ case Xalu_OR: return "or";
+ case Xalu_XOR: return "xor";
+ case Xalu_MUL: return "mul";
+ default: vpanic("showX86AluOp");
+ }
+}
+
+HChar* showX86ShiftOp ( X86ShiftOp op ) {
+ switch (op) {
+ case Xsh_SHL: return "shl";
+ case Xsh_SHR: return "shr";
+ case Xsh_SAR: return "sar";
+ default: vpanic("showX86ShiftOp");
+ }
+}
+
+HChar* showX86FpOp ( X86FpOp op ) {
+ switch (op) {
+ case Xfp_ADD: return "add";
+ case Xfp_SUB: return "sub";
+ case Xfp_MUL: return "mul";
+ case Xfp_DIV: return "div";
+ case Xfp_SCALE: return "scale";
+ case Xfp_ATAN: return "atan";
+ case Xfp_YL2X: return "yl2x";
+ case Xfp_YL2XP1: return "yl2xp1";
+ case Xfp_PREM: return "prem";
+ case Xfp_PREM1: return "prem1";
+ case Xfp_SQRT: return "sqrt";
+ case Xfp_ABS: return "abs";
+ case Xfp_NEG: return "chs";
+ case Xfp_MOV: return "mov";
+ case Xfp_SIN: return "sin";
+ case Xfp_COS: return "cos";
+ case Xfp_TAN: return "tan";
+ case Xfp_ROUND: return "round";
+ case Xfp_2XM1: return "2xm1";
+ default: vpanic("showX86FpOp");
+ }
+}
+
+HChar* showX86SseOp ( X86SseOp op ) {
+ switch (op) {
+ case Xsse_MOV: return "mov(?!)";
+ case Xsse_ADDF: return "add";
+ case Xsse_SUBF: return "sub";
+ case Xsse_MULF: return "mul";
+ case Xsse_DIVF: return "div";
+ case Xsse_MAXF: return "max";
+ case Xsse_MINF: return "min";
+ case Xsse_CMPEQF: return "cmpFeq";
+ case Xsse_CMPLTF: return "cmpFlt";
+ case Xsse_CMPLEF: return "cmpFle";
+ case Xsse_CMPUNF: return "cmpFun";
+ case Xsse_RCPF: return "rcp";
+ case Xsse_RSQRTF: return "rsqrt";
+ case Xsse_SQRTF: return "sqrt";
+ case Xsse_AND: return "and";
+ case Xsse_OR: return "or";
+ case Xsse_XOR: return "xor";
+ case Xsse_ANDN: return "andn";
+ case Xsse_ADD8: return "paddb";
+ case Xsse_ADD16: return "paddw";
+ case Xsse_ADD32: return "paddd";
+ case Xsse_ADD64: return "paddq";
+ case Xsse_QADD8U: return "paddusb";
+ case Xsse_QADD16U: return "paddusw";
+ case Xsse_QADD8S: return "paddsb";
+ case Xsse_QADD16S: return "paddsw";
+ case Xsse_SUB8: return "psubb";
+ case Xsse_SUB16: return "psubw";
+ case Xsse_SUB32: return "psubd";
+ case Xsse_SUB64: return "psubq";
+ case Xsse_QSUB8U: return "psubusb";
+ case Xsse_QSUB16U: return "psubusw";
+ case Xsse_QSUB8S: return "psubsb";
+ case Xsse_QSUB16S: return "psubsw";
+ case Xsse_MUL16: return "pmullw";
+ case Xsse_MULHI16U: return "pmulhuw";
+ case Xsse_MULHI16S: return "pmulhw";
+ case Xsse_AVG8U: return "pavgb";
+ case Xsse_AVG16U: return "pavgw";
+ case Xsse_MAX16S: return "pmaxw";
+ case Xsse_MAX8U: return "pmaxub";
+ case Xsse_MIN16S: return "pminw";
+ case Xsse_MIN8U: return "pminub";
+ case Xsse_CMPEQ8: return "pcmpeqb";
+ case Xsse_CMPEQ16: return "pcmpeqw";
+ case Xsse_CMPEQ32: return "pcmpeqd";
+ case Xsse_CMPGT8S: return "pcmpgtb";
+ case Xsse_CMPGT16S: return "pcmpgtw";
+ case Xsse_CMPGT32S: return "pcmpgtd";
+ case Xsse_SHL16: return "psllw";
+ case Xsse_SHL32: return "pslld";
+ case Xsse_SHL64: return "psllq";
+ case Xsse_SHR16: return "psrlw";
+ case Xsse_SHR32: return "psrld";
+ case Xsse_SHR64: return "psrlq";
+ case Xsse_SAR16: return "psraw";
+ case Xsse_SAR32: return "psrad";
+ case Xsse_PACKSSD: return "packssdw";
+ case Xsse_PACKSSW: return "packsswb";
+ case Xsse_PACKUSW: return "packuswb";
+ case Xsse_UNPCKHB: return "punpckhb";
+ case Xsse_UNPCKHW: return "punpckhw";
+ case Xsse_UNPCKHD: return "punpckhd";
+ case Xsse_UNPCKHQ: return "punpckhq";
+ case Xsse_UNPCKLB: return "punpcklb";
+ case Xsse_UNPCKLW: return "punpcklw";
+ case Xsse_UNPCKLD: return "punpckld";
+ case Xsse_UNPCKLQ: return "punpcklq";
+ default: vpanic("showX86SseOp");
+ }
+}
+
+X86Instr* X86Instr_Alu32R ( X86AluOp op, X86RMI* src, HReg dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_Alu32R;
+ i->Xin.Alu32R.op = op;
+ i->Xin.Alu32R.src = src;
+ i->Xin.Alu32R.dst = dst;
+ return i;
+}
+X86Instr* X86Instr_Alu32M ( X86AluOp op, X86RI* src, X86AMode* dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_Alu32M;
+ i->Xin.Alu32M.op = op;
+ i->Xin.Alu32M.src = src;
+ i->Xin.Alu32M.dst = dst;
+ vassert(op != Xalu_MUL);
+ return i;
+}
+X86Instr* X86Instr_Sh32 ( X86ShiftOp op, UInt src, HReg dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_Sh32;
+ i->Xin.Sh32.op = op;
+ i->Xin.Sh32.src = src;
+ i->Xin.Sh32.dst = dst;
+ return i;
+}
+X86Instr* X86Instr_Test32 ( UInt imm32, X86RM* dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_Test32;
+ i->Xin.Test32.imm32 = imm32;
+ i->Xin.Test32.dst = dst;
+ return i;
+}
+X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_Unary32;
+ i->Xin.Unary32.op = op;
+ i->Xin.Unary32.dst = dst;
+ return i;
+}
+X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_Lea32;
+ i->Xin.Lea32.am = am;
+ i->Xin.Lea32.dst = dst;
+ return i;
+}
+X86Instr* X86Instr_MulL ( Bool syned, X86RM* src ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_MulL;
+ i->Xin.MulL.syned = syned;
+ i->Xin.MulL.src = src;
+ return i;
+}
+X86Instr* X86Instr_Div ( Bool syned, X86RM* src ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_Div;
+ i->Xin.Div.syned = syned;
+ i->Xin.Div.src = src;
+ return i;
+}
+X86Instr* X86Instr_Sh3232 ( X86ShiftOp op, UInt amt, HReg src, HReg dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_Sh3232;
+ i->Xin.Sh3232.op = op;
+ i->Xin.Sh3232.amt = amt;
+ i->Xin.Sh3232.src = src;
+ i->Xin.Sh3232.dst = dst;
+ vassert(op == Xsh_SHL || op == Xsh_SHR);
+ return i;
+}
+X86Instr* X86Instr_Push( X86RMI* src ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_Push;
+ i->Xin.Push.src = src;
+ return i;
+}
+X86Instr* X86Instr_Call ( X86CondCode cond, Addr32 target, Int regparms ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_Call;
+ i->Xin.Call.cond = cond;
+ i->Xin.Call.target = target;
+ i->Xin.Call.regparms = regparms;
+ vassert(regparms >= 0 && regparms <= 3);
+ return i;
+}
+X86Instr* X86Instr_Goto ( IRJumpKind jk, X86CondCode cond, X86RI* dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_Goto;
+ i->Xin.Goto.cond = cond;
+ i->Xin.Goto.dst = dst;
+ i->Xin.Goto.jk = jk;
+ return i;
+}
+X86Instr* X86Instr_CMov32 ( X86CondCode cond, X86RM* src, HReg dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_CMov32;
+ i->Xin.CMov32.cond = cond;
+ i->Xin.CMov32.src = src;
+ i->Xin.CMov32.dst = dst;
+ vassert(cond != Xcc_ALWAYS);
+ return i;
+}
+X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned,
+ X86AMode* src, HReg dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_LoadEX;
+ i->Xin.LoadEX.szSmall = szSmall;
+ i->Xin.LoadEX.syned = syned;
+ i->Xin.LoadEX.src = src;
+ i->Xin.LoadEX.dst = dst;
+ vassert(szSmall == 1 || szSmall == 2);
+ return i;
+}
+X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_Store;
+ i->Xin.Store.sz = sz;
+ i->Xin.Store.src = src;
+ i->Xin.Store.dst = dst;
+ vassert(sz == 1 || sz == 2);
+ return i;
+}
+X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_Set32;
+ i->Xin.Set32.cond = cond;
+ i->Xin.Set32.dst = dst;
+ return i;
+}
+X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_Bsfr32;
+ i->Xin.Bsfr32.isFwds = isFwds;
+ i->Xin.Bsfr32.src = src;
+ i->Xin.Bsfr32.dst = dst;
+ return i;
+}
+X86Instr* X86Instr_MFence ( UInt hwcaps ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_MFence;
+ i->Xin.MFence.hwcaps = hwcaps;
+ vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_SSE1
+ |VEX_HWCAPS_X86_SSE2
+ |VEX_HWCAPS_X86_SSE3
+ |VEX_HWCAPS_X86_LZCNT)));
+ return i;
+}
+X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_ACAS;
+ i->Xin.ACAS.addr = addr;
+ i->Xin.ACAS.sz = sz;
+ vassert(sz == 4 || sz == 2 || sz == 1);
+ return i;
+}
+X86Instr* X86Instr_DACAS ( X86AMode* addr ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_DACAS;
+ i->Xin.DACAS.addr = addr;
+ return i;
+}
+
+X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_FpUnary;
+ i->Xin.FpUnary.op = op;
+ i->Xin.FpUnary.src = src;
+ i->Xin.FpUnary.dst = dst;
+ return i;
+}
+X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_FpBinary;
+ i->Xin.FpBinary.op = op;
+ i->Xin.FpBinary.srcL = srcL;
+ i->Xin.FpBinary.srcR = srcR;
+ i->Xin.FpBinary.dst = dst;
+ return i;
+}
+X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* addr ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_FpLdSt;
+ i->Xin.FpLdSt.isLoad = isLoad;
+ i->Xin.FpLdSt.sz = sz;
+ i->Xin.FpLdSt.reg = reg;
+ i->Xin.FpLdSt.addr = addr;
+ vassert(sz == 4 || sz == 8 || sz == 10);
+ return i;
+}
+X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz,
+ HReg reg, X86AMode* addr ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_FpLdStI;
+ i->Xin.FpLdStI.isLoad = isLoad;
+ i->Xin.FpLdStI.sz = sz;
+ i->Xin.FpLdStI.reg = reg;
+ i->Xin.FpLdStI.addr = addr;
+ vassert(sz == 2 || sz == 4 || sz == 8);
+ return i;
+}
+X86Instr* X86Instr_Fp64to32 ( HReg src, HReg dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_Fp64to32;
+ i->Xin.Fp64to32.src = src;
+ i->Xin.Fp64to32.dst = dst;
+ return i;
+}
+X86Instr* X86Instr_FpCMov ( X86CondCode cond, HReg src, HReg dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_FpCMov;
+ i->Xin.FpCMov.cond = cond;
+ i->Xin.FpCMov.src = src;
+ i->Xin.FpCMov.dst = dst;
+ vassert(cond != Xcc_ALWAYS);
+ return i;
+}
+X86Instr* X86Instr_FpLdCW ( X86AMode* addr ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_FpLdCW;
+ i->Xin.FpLdCW.addr = addr;
+ return i;
+}
+X86Instr* X86Instr_FpStSW_AX ( void ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_FpStSW_AX;
+ return i;
+}
+X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_FpCmp;
+ i->Xin.FpCmp.srcL = srcL;
+ i->Xin.FpCmp.srcR = srcR;
+ i->Xin.FpCmp.dst = dst;
+ return i;
+}
+
+X86Instr* X86Instr_SseConst ( UShort con, HReg dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_SseConst;
+ i->Xin.SseConst.con = con;
+ i->Xin.SseConst.dst = dst;
+ vassert(hregClass(dst) == HRcVec128);
+ return i;
+}
+X86Instr* X86Instr_SseLdSt ( Bool isLoad, HReg reg, X86AMode* addr ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_SseLdSt;
+ i->Xin.SseLdSt.isLoad = isLoad;
+ i->Xin.SseLdSt.reg = reg;
+ i->Xin.SseLdSt.addr = addr;
+ return i;
+}
+X86Instr* X86Instr_SseLdzLO ( Int sz, HReg reg, X86AMode* addr )
+{
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_SseLdzLO;
+ i->Xin.SseLdzLO.sz = toUChar(sz);
+ i->Xin.SseLdzLO.reg = reg;
+ i->Xin.SseLdzLO.addr = addr;
+ vassert(sz == 4 || sz == 8);
+ return i;
+}
+X86Instr* X86Instr_Sse32Fx4 ( X86SseOp op, HReg src, HReg dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_Sse32Fx4;
+ i->Xin.Sse32Fx4.op = op;
+ i->Xin.Sse32Fx4.src = src;
+ i->Xin.Sse32Fx4.dst = dst;
+ vassert(op != Xsse_MOV);
+ return i;
+}
+X86Instr* X86Instr_Sse32FLo ( X86SseOp op, HReg src, HReg dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_Sse32FLo;
+ i->Xin.Sse32FLo.op = op;
+ i->Xin.Sse32FLo.src = src;
+ i->Xin.Sse32FLo.dst = dst;
+ vassert(op != Xsse_MOV);
+ return i;
+}
+X86Instr* X86Instr_Sse64Fx2 ( X86SseOp op, HReg src, HReg dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_Sse64Fx2;
+ i->Xin.Sse64Fx2.op = op;
+ i->Xin.Sse64Fx2.src = src;
+ i->Xin.Sse64Fx2.dst = dst;
+ vassert(op != Xsse_MOV);
+ return i;
+}
+X86Instr* X86Instr_Sse64FLo ( X86SseOp op, HReg src, HReg dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_Sse64FLo;
+ i->Xin.Sse64FLo.op = op;
+ i->Xin.Sse64FLo.src = src;
+ i->Xin.Sse64FLo.dst = dst;
+ vassert(op != Xsse_MOV);
+ return i;
+}
+X86Instr* X86Instr_SseReRg ( X86SseOp op, HReg re, HReg rg ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_SseReRg;
+ i->Xin.SseReRg.op = op;
+ i->Xin.SseReRg.src = re;
+ i->Xin.SseReRg.dst = rg;
+ return i;
+}
+X86Instr* X86Instr_SseCMov ( X86CondCode cond, HReg src, HReg dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_SseCMov;
+ i->Xin.SseCMov.cond = cond;
+ i->Xin.SseCMov.src = src;
+ i->Xin.SseCMov.dst = dst;
+ vassert(cond != Xcc_ALWAYS);
+ return i;
+}
+X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_SseShuf;
+ i->Xin.SseShuf.order = order;
+ i->Xin.SseShuf.src = src;
+ i->Xin.SseShuf.dst = dst;
+ vassert(order >= 0 && order <= 0xFF);
+ return i;
+}
+
+void ppX86Instr ( X86Instr* i, Bool mode64 ) {
+ vassert(mode64 == False);
+ switch (i->tag) {
+ case Xin_Alu32R:
+ vex_printf("%sl ", showX86AluOp(i->Xin.Alu32R.op));
+ ppX86RMI(i->Xin.Alu32R.src);
+ vex_printf(",");
+ ppHRegX86(i->Xin.Alu32R.dst);
+ return;
+ case Xin_Alu32M:
+ vex_printf("%sl ", showX86AluOp(i->Xin.Alu32M.op));
+ ppX86RI(i->Xin.Alu32M.src);
+ vex_printf(",");
+ ppX86AMode(i->Xin.Alu32M.dst);
+ return;
+ case Xin_Sh32:
+ vex_printf("%sl ", showX86ShiftOp(i->Xin.Sh32.op));
+ if (i->Xin.Sh32.src == 0)
+ vex_printf("%%cl,");
+ else
+ vex_printf("$%d,", (Int)i->Xin.Sh32.src);
+ ppHRegX86(i->Xin.Sh32.dst);
+ return;
+ case Xin_Test32:
+ vex_printf("testl $%d,", (Int)i->Xin.Test32.imm32);
+ ppX86RM(i->Xin.Test32.dst);
+ return;
+ case Xin_Unary32:
+ vex_printf("%sl ", showX86UnaryOp(i->Xin.Unary32.op));
+ ppHRegX86(i->Xin.Unary32.dst);
+ return;
+ case Xin_Lea32:
+ vex_printf("leal ");
+ ppX86AMode(i->Xin.Lea32.am);
+ vex_printf(",");
+ ppHRegX86(i->Xin.Lea32.dst);
+ return;
+ case Xin_MulL:
+ vex_printf("%cmull ", i->Xin.MulL.syned ? 's' : 'u');
+ ppX86RM(i->Xin.MulL.src);
+ return;
+ case Xin_Div:
+ vex_printf("%cdivl ", i->Xin.Div.syned ? 's' : 'u');
+ ppX86RM(i->Xin.Div.src);
+ return;
+ case Xin_Sh3232:
+ vex_printf("%sdl ", showX86ShiftOp(i->Xin.Sh3232.op));
+ if (i->Xin.Sh3232.amt == 0)
+ vex_printf(" %%cl,");
+ else
+ vex_printf(" $%d,", (Int)i->Xin.Sh3232.amt);
+ ppHRegX86(i->Xin.Sh3232.src);
+ vex_printf(",");
+ ppHRegX86(i->Xin.Sh3232.dst);
+ return;
+ case Xin_Push:
+ vex_printf("pushl ");
+ ppX86RMI(i->Xin.Push.src);
+ return;
+ case Xin_Call:
+ vex_printf("call%s[%d] ",
+ i->Xin.Call.cond==Xcc_ALWAYS
+ ? "" : showX86CondCode(i->Xin.Call.cond),
+ i->Xin.Call.regparms);
+ vex_printf("0x%x", i->Xin.Call.target);
+ break;
+ case Xin_Goto:
+ if (i->Xin.Goto.cond != Xcc_ALWAYS) {
+ vex_printf("if (%%eflags.%s) { ",
+ showX86CondCode(i->Xin.Goto.cond));
+ }
+ if (i->Xin.Goto.jk != Ijk_Boring
+ && i->Xin.Goto.jk != Ijk_Call
+ && i->Xin.Goto.jk != Ijk_Ret) {
+ vex_printf("movl $");
+ ppIRJumpKind(i->Xin.Goto.jk);
+ vex_printf(",%%ebp ; ");
+ }
+ vex_printf("movl ");
+ ppX86RI(i->Xin.Goto.dst);
+ vex_printf(",%%eax ; movl $dispatcher_addr,%%edx ; jmp *%%edx");
+ if (i->Xin.Goto.cond != Xcc_ALWAYS) {
+ vex_printf(" }");
+ }
+ return;
+ case Xin_CMov32:
+ vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond));
+ ppX86RM(i->Xin.CMov32.src);
+ vex_printf(",");
+ ppHRegX86(i->Xin.CMov32.dst);
+ return;
+ case Xin_LoadEX:
+ vex_printf("mov%c%cl ",
+ i->Xin.LoadEX.syned ? 's' : 'z',
+ i->Xin.LoadEX.szSmall==1 ? 'b' : 'w');
+ ppX86AMode(i->Xin.LoadEX.src);
+ vex_printf(",");
+ ppHRegX86(i->Xin.LoadEX.dst);
+ return;
+ case Xin_Store:
+ vex_printf("mov%c ", i->Xin.Store.sz==1 ? 'b' : 'w');
+ ppHRegX86(i->Xin.Store.src);
+ vex_printf(",");
+ ppX86AMode(i->Xin.Store.dst);
+ return;
+ case Xin_Set32:
+ vex_printf("setl%s ", showX86CondCode(i->Xin.Set32.cond));
+ ppHRegX86(i->Xin.Set32.dst);
+ return;
+ case Xin_Bsfr32:
+ vex_printf("bs%cl ", i->Xin.Bsfr32.isFwds ? 'f' : 'r');
+ ppHRegX86(i->Xin.Bsfr32.src);
+ vex_printf(",");
+ ppHRegX86(i->Xin.Bsfr32.dst);
+ return;
+ case Xin_MFence:
+ vex_printf("mfence(%s)",
+ LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps));
+ return;
+ case Xin_ACAS:
+ vex_printf("lock cmpxchg%c ",
+ i->Xin.ACAS.sz==1 ? 'b'
+ : i->Xin.ACAS.sz==2 ? 'w' : 'l');
+ vex_printf("{%%eax->%%ebx},");
+ ppX86AMode(i->Xin.ACAS.addr);
+ return;
+ case Xin_DACAS:
+ vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},");
+ ppX86AMode(i->Xin.DACAS.addr);
+ return;
+ case Xin_FpUnary:
+ vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op));
+ ppHRegX86(i->Xin.FpUnary.src);
+ vex_printf(",");
+ ppHRegX86(i->Xin.FpUnary.dst);
+ break;
+ case Xin_FpBinary:
+ vex_printf("g%sD ", showX86FpOp(i->Xin.FpBinary.op));
+ ppHRegX86(i->Xin.FpBinary.srcL);
+ vex_printf(",");
+ ppHRegX86(i->Xin.FpBinary.srcR);
+ vex_printf(",");
+ ppHRegX86(i->Xin.FpBinary.dst);
+ break;
+ case Xin_FpLdSt:
+ if (i->Xin.FpLdSt.isLoad) {
+ vex_printf("gld%c " , i->Xin.FpLdSt.sz==10 ? 'T'
+ : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
+ ppX86AMode(i->Xin.FpLdSt.addr);
+ vex_printf(", ");
+ ppHRegX86(i->Xin.FpLdSt.reg);
+ } else {
+ vex_printf("gst%c " , i->Xin.FpLdSt.sz==10 ? 'T'
+ : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
+ ppHRegX86(i->Xin.FpLdSt.reg);
+ vex_printf(", ");
+ ppX86AMode(i->Xin.FpLdSt.addr);
+ }
+ return;
+ case Xin_FpLdStI:
+ if (i->Xin.FpLdStI.isLoad) {
+ vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
+ i->Xin.FpLdStI.sz==4 ? "l" : "w");
+ ppX86AMode(i->Xin.FpLdStI.addr);
+ vex_printf(", ");
+ ppHRegX86(i->Xin.FpLdStI.reg);
+ } else {
+ vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
+ i->Xin.FpLdStI.sz==4 ? "l" : "w");
+ ppHRegX86(i->Xin.FpLdStI.reg);
+ vex_printf(", ");
+ ppX86AMode(i->Xin.FpLdStI.addr);
+ }
+ return;
+ case Xin_Fp64to32:
+ vex_printf("gdtof ");
+ ppHRegX86(i->Xin.Fp64to32.src);
+ vex_printf(",");
+ ppHRegX86(i->Xin.Fp64to32.dst);
+ return;
+ case Xin_FpCMov:
+ vex_printf("gcmov%s ", showX86CondCode(i->Xin.FpCMov.cond));
+ ppHRegX86(i->Xin.FpCMov.src);
+ vex_printf(",");
+ ppHRegX86(i->Xin.FpCMov.dst);
+ return;
+ case Xin_FpLdCW:
+ vex_printf("fldcw ");
+ ppX86AMode(i->Xin.FpLdCW.addr);
+ return;
+ case Xin_FpStSW_AX:
+ vex_printf("fstsw %%ax");
+ return;
+ case Xin_FpCmp:
+ vex_printf("gcmp ");
+ ppHRegX86(i->Xin.FpCmp.srcL);
+ vex_printf(",");
+ ppHRegX86(i->Xin.FpCmp.srcR);
+ vex_printf(",");
+ ppHRegX86(i->Xin.FpCmp.dst);
+ break;
+ case Xin_SseConst:
+ vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con);
+ ppHRegX86(i->Xin.SseConst.dst);
+ break;
+ case Xin_SseLdSt:
+ vex_printf("movups ");
+ if (i->Xin.SseLdSt.isLoad) {
+ ppX86AMode(i->Xin.SseLdSt.addr);
+ vex_printf(",");
+ ppHRegX86(i->Xin.SseLdSt.reg);
+ } else {
+ ppHRegX86(i->Xin.SseLdSt.reg);
+ vex_printf(",");
+ ppX86AMode(i->Xin.SseLdSt.addr);
+ }
+ return;
+ case Xin_SseLdzLO:
+ vex_printf("movs%s ", i->Xin.SseLdzLO.sz==4 ? "s" : "d");
+ ppX86AMode(i->Xin.SseLdzLO.addr);
+ vex_printf(",");
+ ppHRegX86(i->Xin.SseLdzLO.reg);
+ return;
+ case Xin_Sse32Fx4:
+ vex_printf("%sps ", showX86SseOp(i->Xin.Sse32Fx4.op));
+ ppHRegX86(i->Xin.Sse32Fx4.src);
+ vex_printf(",");
+ ppHRegX86(i->Xin.Sse32Fx4.dst);
+ return;
+ case Xin_Sse32FLo:
+ vex_printf("%sss ", showX86SseOp(i->Xin.Sse32FLo.op));
+ ppHRegX86(i->Xin.Sse32FLo.src);
+ vex_printf(",");
+ ppHRegX86(i->Xin.Sse32FLo.dst);
+ return;
+ case Xin_Sse64Fx2:
+ vex_printf("%spd ", showX86SseOp(i->Xin.Sse64Fx2.op));
+ ppHRegX86(i->Xin.Sse64Fx2.src);
+ vex_printf(",");
+ ppHRegX86(i->Xin.Sse64Fx2.dst);
+ return;
+ case Xin_Sse64FLo:
+ vex_printf("%ssd ", showX86SseOp(i->Xin.Sse64FLo.op));
+ ppHRegX86(i->Xin.Sse64FLo.src);
+ vex_printf(",");
+ ppHRegX86(i->Xin.Sse64FLo.dst);
+ return;
+ case Xin_SseReRg:
+ vex_printf("%s ", showX86SseOp(i->Xin.SseReRg.op));
+ ppHRegX86(i->Xin.SseReRg.src);
+ vex_printf(",");
+ ppHRegX86(i->Xin.SseReRg.dst);
+ return;
+ case Xin_SseCMov:
+ vex_printf("cmov%s ", showX86CondCode(i->Xin.SseCMov.cond));
+ ppHRegX86(i->Xin.SseCMov.src);
+ vex_printf(",");
+ ppHRegX86(i->Xin.SseCMov.dst);
+ return;
+ case Xin_SseShuf:
+ vex_printf("pshufd $0x%x,", i->Xin.SseShuf.order);
+ ppHRegX86(i->Xin.SseShuf.src);
+ vex_printf(",");
+ ppHRegX86(i->Xin.SseShuf.dst);
+ return;
+
+ default:
+ vpanic("ppX86Instr");
+ }
+}
+
+/* --------- Helpers for register allocation. --------- */
+
+void getRegUsage_X86Instr (HRegUsage* u, X86Instr* i, Bool mode64)
+{
+ Bool unary;
+ vassert(mode64 == False);
+ initHRegUsage(u);
+ switch (i->tag) {
+ case Xin_Alu32R:
+ addRegUsage_X86RMI(u, i->Xin.Alu32R.src);
+ if (i->Xin.Alu32R.op == Xalu_MOV) {
+ addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst);
+ return;
+ }
+ if (i->Xin.Alu32R.op == Xalu_CMP) {
+ addHRegUse(u, HRmRead, i->Xin.Alu32R.dst);
+ return;
+ }
+ addHRegUse(u, HRmModify, i->Xin.Alu32R.dst);
+ return;
+ case Xin_Alu32M:
+ addRegUsage_X86RI(u, i->Xin.Alu32M.src);
+ addRegUsage_X86AMode(u, i->Xin.Alu32M.dst);
+ return;
+ case Xin_Sh32:
+ addHRegUse(u, HRmModify, i->Xin.Sh32.dst);
+ if (i->Xin.Sh32.src == 0)
+ addHRegUse(u, HRmRead, hregX86_ECX());
+ return;
+ case Xin_Test32:
+ addRegUsage_X86RM(u, i->Xin.Test32.dst, HRmRead);
+ return;
+ case Xin_Unary32:
+ addHRegUse(u, HRmModify, i->Xin.Unary32.dst);
+ return;
+ case Xin_Lea32:
+ addRegUsage_X86AMode(u, i->Xin.Lea32.am);
+ addHRegUse(u, HRmWrite, i->Xin.Lea32.dst);
+ return;
+ case Xin_MulL:
+ addRegUsage_X86RM(u, i->Xin.MulL.src, HRmRead);
+ addHRegUse(u, HRmModify, hregX86_EAX());
+ addHRegUse(u, HRmWrite, hregX86_EDX());
+ return;
+ case Xin_Div:
+ addRegUsage_X86RM(u, i->Xin.Div.src, HRmRead);
+ addHRegUse(u, HRmModify, hregX86_EAX());
+ addHRegUse(u, HRmModify, hregX86_EDX());
+ return;
+ case Xin_Sh3232:
+ addHRegUse(u, HRmRead, i->Xin.Sh3232.src);
+ addHRegUse(u, HRmModify, i->Xin.Sh3232.dst);
+ if (i->Xin.Sh3232.amt == 0)
+ addHRegUse(u, HRmRead, hregX86_ECX());
+ return;
+ case Xin_Push:
+ addRegUsage_X86RMI(u, i->Xin.Push.src);
+ addHRegUse(u, HRmModify, hregX86_ESP());
+ return;
+ case Xin_Call:
+ /* This is a bit subtle. */
+ /* First off, claim it trashes all the caller-saved regs
+ which fall within the register allocator's jurisdiction.
+ These I believe to be %eax %ecx %edx and all the xmm
+ registers. */
+ addHRegUse(u, HRmWrite, hregX86_EAX());
+ addHRegUse(u, HRmWrite, hregX86_ECX());
+ addHRegUse(u, HRmWrite, hregX86_EDX());
+ addHRegUse(u, HRmWrite, hregX86_XMM0());
+ addHRegUse(u, HRmWrite, hregX86_XMM1());
+ addHRegUse(u, HRmWrite, hregX86_XMM2());
+ addHRegUse(u, HRmWrite, hregX86_XMM3());
+ addHRegUse(u, HRmWrite, hregX86_XMM4());
+ addHRegUse(u, HRmWrite, hregX86_XMM5());
+ addHRegUse(u, HRmWrite, hregX86_XMM6());
+ addHRegUse(u, HRmWrite, hregX86_XMM7());
+ /* Now we have to state any parameter-carrying registers
+ which might be read. This depends on the regparmness. */
+ switch (i->Xin.Call.regparms) {
+ case 3: addHRegUse(u, HRmRead, hregX86_ECX()); /*fallthru*/
+ case 2: addHRegUse(u, HRmRead, hregX86_EDX()); /*fallthru*/
+ case 1: addHRegUse(u, HRmRead, hregX86_EAX()); break;
+ case 0: break;
+ default: vpanic("getRegUsage_X86Instr:Call:regparms");
+ }
+ /* Finally, there is the issue that the insn trashes a
+ register because the literal target address has to be
+ loaded into a register. Fortunately, for the 0/1/2
+ regparm case, we can use EAX, EDX and ECX respectively, so
+ this does not cause any further damage. For the 3-regparm
+ case, we'll have to choose another register arbitrarily --
+ since A, D and C are used for parameters -- and so we might
+ as well choose EDI. */
+ if (i->Xin.Call.regparms == 3)
+ addHRegUse(u, HRmWrite, hregX86_EDI());
+ /* Upshot of this is that the assembler really must observe
+ the here-stated convention of which register to use as an
+ address temporary, depending on the regparmness: 0==EAX,
+ 1==EDX, 2==ECX, 3==EDI. */
+ return;
+ case Xin_Goto:
+ addRegUsage_X86RI(u, i->Xin.Goto.dst);
+ addHRegUse(u, HRmWrite, hregX86_EAX()); /* used for next guest addr */
+ addHRegUse(u, HRmWrite, hregX86_EDX()); /* used for dispatcher addr */
+ if (i->Xin.Goto.jk != Ijk_Boring
+ && i->Xin.Goto.jk != Ijk_Call
+ && i->Xin.Goto.jk != Ijk_Ret)
+ /* note, this is irrelevant since ebp is not actually
+ available to the allocator. But still .. */
+ addHRegUse(u, HRmWrite, hregX86_EBP());
+ return;
+ case Xin_CMov32:
+ addRegUsage_X86RM(u, i->Xin.CMov32.src, HRmRead);
+ addHRegUse(u, HRmModify, i->Xin.CMov32.dst);
+ return;
+ case Xin_LoadEX:
+ addRegUsage_X86AMode(u, i->Xin.LoadEX.src);
+ addHRegUse(u, HRmWrite, i->Xin.LoadEX.dst);
+ return;
+ case Xin_Store:
+ addHRegUse(u, HRmRead, i->Xin.Store.src);
+ addRegUsage_X86AMode(u, i->Xin.Store.dst);
+ return;
+ case Xin_Set32:
+ addHRegUse(u, HRmWrite, i->Xin.Set32.dst);
+ return;
+ case Xin_Bsfr32:
+ addHRegUse(u, HRmRead, i->Xin.Bsfr32.src);
+ addHRegUse(u, HRmWrite, i->Xin.Bsfr32.dst);
+ return;
+ case Xin_MFence:
+ return;
+ case Xin_ACAS:
+ addRegUsage_X86AMode(u, i->Xin.ACAS.addr);
+ addHRegUse(u, HRmRead, hregX86_EBX());
+ addHRegUse(u, HRmModify, hregX86_EAX());
+ return;
+ case Xin_DACAS:
+ addRegUsage_X86AMode(u, i->Xin.DACAS.addr);
+ addHRegUse(u, HRmRead, hregX86_ECX());
+ addHRegUse(u, HRmRead, hregX86_EBX());
+ addHRegUse(u, HRmModify, hregX86_EDX());
+ addHRegUse(u, HRmModify, hregX86_EAX());
+ return;
+ case Xin_FpUnary:
+ addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
+ addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
+ return;
+ case Xin_FpBinary:
+ addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL);
+ addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR);
+ addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst);
+ return;
+ case Xin_FpLdSt:
+ addRegUsage_X86AMode(u, i->Xin.FpLdSt.addr);
+ addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead,
+ i->Xin.FpLdSt.reg);
+ return;
+ case Xin_FpLdStI:
+ addRegUsage_X86AMode(u, i->Xin.FpLdStI.addr);
+ addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead,
+ i->Xin.FpLdStI.reg);
+ return;
+ case Xin_Fp64to32:
+ addHRegUse(u, HRmRead, i->Xin.Fp64to32.src);
+ addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst);
+ return;
+ case Xin_FpCMov:
+ addHRegUse(u, HRmRead, i->Xin.FpCMov.src);
+ addHRegUse(u, HRmModify, i->Xin.FpCMov.dst);
+ return;
+ case Xin_FpLdCW:
+ addRegUsage_X86AMode(u, i->Xin.FpLdCW.addr);
+ return;
+ case Xin_FpStSW_AX:
+ addHRegUse(u, HRmWrite, hregX86_EAX());
+ return;
+ case Xin_FpCmp:
+ addHRegUse(u, HRmRead, i->Xin.FpCmp.srcL);
+ addHRegUse(u, HRmRead, i->Xin.FpCmp.srcR);
+ addHRegUse(u, HRmWrite, i->Xin.FpCmp.dst);
+ addHRegUse(u, HRmWrite, hregX86_EAX());
+ return;
+ case Xin_SseLdSt:
+ addRegUsage_X86AMode(u, i->Xin.SseLdSt.addr);
+ addHRegUse(u, i->Xin.SseLdSt.isLoad ? HRmWrite : HRmRead,
+ i->Xin.SseLdSt.reg);
+ return;
+ case Xin_SseLdzLO:
+ addRegUsage_X86AMode(u, i->Xin.SseLdzLO.addr);
+ addHRegUse(u, HRmWrite, i->Xin.SseLdzLO.reg);
+ return;
+ case Xin_SseConst:
+ addHRegUse(u, HRmWrite, i->Xin.SseConst.dst);
+ return;
+ case Xin_Sse32Fx4:
+ vassert(i->Xin.Sse32Fx4.op != Xsse_MOV);
+ unary = toBool( i->Xin.Sse32Fx4.op == Xsse_RCPF
+ || i->Xin.Sse32Fx4.op == Xsse_RSQRTF
+ || i->Xin.Sse32Fx4.op == Xsse_SQRTF );
+ addHRegUse(u, HRmRead, i->Xin.Sse32Fx4.src);
+ addHRegUse(u, unary ? HRmWrite : HRmModify,
+ i->Xin.Sse32Fx4.dst);
+ return;
+ case Xin_Sse32FLo:
+ vassert(i->Xin.Sse32FLo.op != Xsse_MOV);
+ unary = toBool( i->Xin.Sse32FLo.op == Xsse_RCPF
+ || i->Xin.Sse32FLo.op == Xsse_RSQRTF
+ || i->Xin.Sse32FLo.op == Xsse_SQRTF );
+ addHRegUse(u, HRmRead, i->Xin.Sse32FLo.src);
+ addHRegUse(u, unary ? HRmWrite : HRmModify,
+ i->Xin.Sse32FLo.dst);
+ return;
+ case Xin_Sse64Fx2:
+ vassert(i->Xin.Sse64Fx2.op != Xsse_MOV);
+ unary = toBool( i->Xin.Sse64Fx2.op == Xsse_RCPF
+ || i->Xin.Sse64Fx2.op == Xsse_RSQRTF
+ || i->Xin.Sse64Fx2.op == Xsse_SQRTF );
+ addHRegUse(u, HRmRead, i->Xin.Sse64Fx2.src);
+ addHRegUse(u, unary ? HRmWrite : HRmModify,
+ i->Xin.Sse64Fx2.dst);
+ return;
+ case Xin_Sse64FLo:
+ vassert(i->Xin.Sse64FLo.op != Xsse_MOV);
+ unary = toBool( i->Xin.Sse64FLo.op == Xsse_RCPF
+ || i->Xin.Sse64FLo.op == Xsse_RSQRTF
+ || i->Xin.Sse64FLo.op == Xsse_SQRTF );
+ addHRegUse(u, HRmRead, i->Xin.Sse64FLo.src);
+ addHRegUse(u, unary ? HRmWrite : HRmModify,
+ i->Xin.Sse64FLo.dst);
+ return;
+ case Xin_SseReRg:
+ if (i->Xin.SseReRg.op == Xsse_XOR
+ && i->Xin.SseReRg.src == i->Xin.SseReRg.dst) {
+ /* reg-alloc needs to understand 'xor r,r' as a write of r */
+ /* (as opposed to a rite of passage :-) */
+ addHRegUse(u, HRmWrite, i->Xin.SseReRg.dst);
+ } else {
+ addHRegUse(u, HRmRead, i->Xin.SseReRg.src);
+ addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV
+ ? HRmWrite : HRmModify,
+ i->Xin.SseReRg.dst);
+ }
+ return;
+ case Xin_SseCMov:
+ addHRegUse(u, HRmRead, i->Xin.SseCMov.src);
+ addHRegUse(u, HRmModify, i->Xin.SseCMov.dst);
+ return;
+ case Xin_SseShuf:
+ addHRegUse(u, HRmRead, i->Xin.SseShuf.src);
+ addHRegUse(u, HRmWrite, i->Xin.SseShuf.dst);
+ return;
+ default:
+ ppX86Instr(i, False);
+ vpanic("getRegUsage_X86Instr");
+ }
+}
+
+/* local helper */
+static void mapReg( HRegRemap* m, HReg* r )
+{
+ *r = lookupHRegRemap(m, *r);
+}
+
+void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 )
+{
+ vassert(mode64 == False);
+ switch (i->tag) {
+ case Xin_Alu32R:
+ mapRegs_X86RMI(m, i->Xin.Alu32R.src);
+ mapReg(m, &i->Xin.Alu32R.dst);
+ return;
+ case Xin_Alu32M:
+ mapRegs_X86RI(m, i->Xin.Alu32M.src);
+ mapRegs_X86AMode(m, i->Xin.Alu32M.dst);
+ return;
+ case Xin_Sh32:
+ mapReg(m, &i->Xin.Sh32.dst);
+ return;
+ case Xin_Test32:
+ mapRegs_X86RM(m, i->Xin.Test32.dst);
+ return;
+ case Xin_Unary32:
+ mapReg(m, &i->Xin.Unary32.dst);
+ return;
+ case Xin_Lea32:
+ mapRegs_X86AMode(m, i->Xin.Lea32.am);
+ mapReg(m, &i->Xin.Lea32.dst);
+ return;
+ case Xin_MulL:
+ mapRegs_X86RM(m, i->Xin.MulL.src);
+ return;
+ case Xin_Div:
+ mapRegs_X86RM(m, i->Xin.Div.src);
+ return;
+ case Xin_Sh3232:
+ mapReg(m, &i->Xin.Sh3232.src);
+ mapReg(m, &i->Xin.Sh3232.dst);
+ return;
+ case Xin_Push:
+ mapRegs_X86RMI(m, i->Xin.Push.src);
+ return;
+ case Xin_Call:
+ return;
+ case Xin_Goto:
+ mapRegs_X86RI(m, i->Xin.Goto.dst);
+ return;
+ case Xin_CMov32:
+ mapRegs_X86RM(m, i->Xin.CMov32.src);
+ mapReg(m, &i->Xin.CMov32.dst);
+ return;
+ case Xin_LoadEX:
+ mapRegs_X86AMode(m, i->Xin.LoadEX.src);
+ mapReg(m, &i->Xin.LoadEX.dst);
+ return;
+ case Xin_Store:
+ mapReg(m, &i->Xin.Store.src);
+ mapRegs_X86AMode(m, i->Xin.Store.dst);
+ return;
+ case Xin_Set32:
+ mapReg(m, &i->Xin.Set32.dst);
+ return;
+ case Xin_Bsfr32:
+ mapReg(m, &i->Xin.Bsfr32.src);
+ mapReg(m, &i->Xin.Bsfr32.dst);
+ return;
+ case Xin_MFence:
+ return;
+ case Xin_ACAS:
+ mapRegs_X86AMode(m, i->Xin.ACAS.addr);
+ return;
+ case Xin_DACAS:
+ mapRegs_X86AMode(m, i->Xin.DACAS.addr);
+ return;
+ case Xin_FpUnary:
+ mapReg(m, &i->Xin.FpUnary.src);
+ mapReg(m, &i->Xin.FpUnary.dst);
+ return;
+ case Xin_FpBinary:
+ mapReg(m, &i->Xin.FpBinary.srcL);
+ mapReg(m, &i->Xin.FpBinary.srcR);
+ mapReg(m, &i->Xin.FpBinary.dst);
+ return;
+ case Xin_FpLdSt:
+ mapRegs_X86AMode(m, i->Xin.FpLdSt.addr);
+ mapReg(m, &i->Xin.FpLdSt.reg);
+ return;
+ case Xin_FpLdStI:
+ mapRegs_X86AMode(m, i->Xin.FpLdStI.addr);
+ mapReg(m, &i->Xin.FpLdStI.reg);
+ return;
+ case Xin_Fp64to32:
+ mapReg(m, &i->Xin.Fp64to32.src);
+ mapReg(m, &i->Xin.Fp64to32.dst);
+ return;
+ case Xin_FpCMov:
+ mapReg(m, &i->Xin.FpCMov.src);
+ mapReg(m, &i->Xin.FpCMov.dst);
+ return;
+ case Xin_FpLdCW:
+ mapRegs_X86AMode(m, i->Xin.FpLdCW.addr);
+ return;
+ case Xin_FpStSW_AX:
+ return;
+ case Xin_FpCmp:
+ mapReg(m, &i->Xin.FpCmp.srcL);
+ mapReg(m, &i->Xin.FpCmp.srcR);
+ mapReg(m, &i->Xin.FpCmp.dst);
+ return;
+ case Xin_SseConst:
+ mapReg(m, &i->Xin.SseConst.dst);
+ return;
+ case Xin_SseLdSt:
+ mapReg(m, &i->Xin.SseLdSt.reg);
+ mapRegs_X86AMode(m, i->Xin.SseLdSt.addr);
+ break;
+ case Xin_SseLdzLO:
+ mapReg(m, &i->Xin.SseLdzLO.reg);
+ mapRegs_X86AMode(m, i->Xin.SseLdzLO.addr);
+ break;
+ case Xin_Sse32Fx4:
+ mapReg(m, &i->Xin.Sse32Fx4.src);
+ mapReg(m, &i->Xin.Sse32Fx4.dst);
+ return;
+ case Xin_Sse32FLo:
+ mapReg(m, &i->Xin.Sse32FLo.src);
+ mapReg(m, &i->Xin.Sse32FLo.dst);
+ return;
+ case Xin_Sse64Fx2:
+ mapReg(m, &i->Xin.Sse64Fx2.src);
+ mapReg(m, &i->Xin.Sse64Fx2.dst);
+ return;
+ case Xin_Sse64FLo:
+ mapReg(m, &i->Xin.Sse64FLo.src);
+ mapReg(m, &i->Xin.Sse64FLo.dst);
+ return;
+ case Xin_SseReRg:
+ mapReg(m, &i->Xin.SseReRg.src);
+ mapReg(m, &i->Xin.SseReRg.dst);
+ return;
+ case Xin_SseCMov:
+ mapReg(m, &i->Xin.SseCMov.src);
+ mapReg(m, &i->Xin.SseCMov.dst);
+ return;
+ case Xin_SseShuf:
+ mapReg(m, &i->Xin.SseShuf.src);
+ mapReg(m, &i->Xin.SseShuf.dst);
+ return;
+ default:
+ ppX86Instr(i, mode64);
+ vpanic("mapRegs_X86Instr");
+ }
+}
+
+/* Figure out if i represents a reg-reg move, and if so assign the
+ source and destination to *src and *dst. If in doubt say No. Used
+ by the register allocator to do move coalescing.
+*/
+Bool isMove_X86Instr ( X86Instr* i, HReg* src, HReg* dst )
+{
+ /* Moves between integer regs */
+ if (i->tag == Xin_Alu32R) {
+ if (i->Xin.Alu32R.op != Xalu_MOV)
+ return False;
+ if (i->Xin.Alu32R.src->tag != Xrmi_Reg)
+ return False;
+ *src = i->Xin.Alu32R.src->Xrmi.Reg.reg;
+ *dst = i->Xin.Alu32R.dst;
+ return True;
+ }
+ /* Moves between FP regs */
+ if (i->tag == Xin_FpUnary) {
+ if (i->Xin.FpUnary.op != Xfp_MOV)
+ return False;
+ *src = i->Xin.FpUnary.src;
+ *dst = i->Xin.FpUnary.dst;
+ return True;
+ }
+ if (i->tag == Xin_SseReRg) {
+ if (i->Xin.SseReRg.op != Xsse_MOV)
+ return False;
+ *src = i->Xin.SseReRg.src;
+ *dst = i->Xin.SseReRg.dst;
+ return True;
+ }
+ return False;
+}
+
+
+/* Generate x86 spill/reload instructions under the direction of the
+ register allocator. Note it's critical these don't write the
+ condition codes. */
+
+void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
+ HReg rreg, Int offsetB, Bool mode64 )
+{
+ X86AMode* am;
+ vassert(offsetB >= 0);
+ vassert(!hregIsVirtual(rreg));
+ vassert(mode64 == False);
+ *i1 = *i2 = NULL;
+ am = X86AMode_IR(offsetB, hregX86_EBP());
+ switch (hregClass(rreg)) {
+ case HRcInt32:
+ *i1 = X86Instr_Alu32M ( Xalu_MOV, X86RI_Reg(rreg), am );
+ return;
+ case HRcFlt64:
+ *i1 = X86Instr_FpLdSt ( False/*store*/, 10, rreg, am );
+ return;
+ case HRcVec128:
+ *i1 = X86Instr_SseLdSt ( False/*store*/, rreg, am );
+ return;
+ default:
+ ppHRegClass(hregClass(rreg));
+ vpanic("genSpill_X86: unimplemented regclass");
+ }
+}
+
+void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
+ HReg rreg, Int offsetB, Bool mode64 )
+{
+ X86AMode* am;
+ vassert(offsetB >= 0);
+ vassert(!hregIsVirtual(rreg));
+ vassert(mode64 == False);
+ *i1 = *i2 = NULL;
+ am = X86AMode_IR(offsetB, hregX86_EBP());
+ switch (hregClass(rreg)) {
+ case HRcInt32:
+ *i1 = X86Instr_Alu32R ( Xalu_MOV, X86RMI_Mem(am), rreg );
+ return;
+ case HRcFlt64:
+ *i1 = X86Instr_FpLdSt ( True/*load*/, 10, rreg, am );
+ return;
+ case HRcVec128:
+ *i1 = X86Instr_SseLdSt ( True/*load*/, rreg, am );
+ return;
+ default:
+ ppHRegClass(hregClass(rreg));
+ vpanic("genReload_X86: unimplemented regclass");
+ }
+}
+
+/* The given instruction reads the specified vreg exactly once, and
+ that vreg is currently located at the given spill offset. If
+ possible, return a variant of the instruction to one which instead
+ references the spill slot directly. */
+
+X86Instr* directReload_X86( X86Instr* i, HReg vreg, Short spill_off )
+{
+ vassert(spill_off >= 0 && spill_off < 10000); /* let's say */
+
+ /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg
+ Convert to: src=RMI_Mem, dst=Reg
+ */
+ if (i->tag == Xin_Alu32R
+ && (i->Xin.Alu32R.op == Xalu_MOV || i->Xin.Alu32R.op == Xalu_OR
+ || i->Xin.Alu32R.op == Xalu_XOR)
+ && i->Xin.Alu32R.src->tag == Xrmi_Reg
+ && i->Xin.Alu32R.src->Xrmi.Reg.reg == vreg) {
+ vassert(i->Xin.Alu32R.dst != vreg);
+ return X86Instr_Alu32R(
+ i->Xin.Alu32R.op,
+ X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())),
+ i->Xin.Alu32R.dst
+ );
+ }
+
+ /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg
+ Convert to: src=RI_Imm, dst=Mem
+ */
+ if (i->tag == Xin_Alu32R
+ && (i->Xin.Alu32R.op == Xalu_CMP)
+ && i->Xin.Alu32R.src->tag == Xrmi_Imm
+ && i->Xin.Alu32R.dst == vreg) {
+ return X86Instr_Alu32M(
+ i->Xin.Alu32R.op,
+ X86RI_Imm( i->Xin.Alu32R.src->Xrmi.Imm.imm32 ),
+ X86AMode_IR( spill_off, hregX86_EBP())
+ );
+ }
+
+ /* Deal with form: Push(RMI_Reg)
+ Convert to: Push(RMI_Mem)
+ */
+ if (i->tag == Xin_Push
+ && i->Xin.Push.src->tag == Xrmi_Reg
+ && i->Xin.Push.src->Xrmi.Reg.reg == vreg) {
+ return X86Instr_Push(
+ X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP()))
+ );
+ }
+
+ /* Deal with form: CMov32(src=RM_Reg, dst) where vreg == src
+ Convert to CMov32(RM_Mem, dst) */
+ if (i->tag == Xin_CMov32
+ && i->Xin.CMov32.src->tag == Xrm_Reg
+ && i->Xin.CMov32.src->Xrm.Reg.reg == vreg) {
+ vassert(i->Xin.CMov32.dst != vreg);
+ return X86Instr_CMov32(
+ i->Xin.CMov32.cond,
+ X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() )),
+ i->Xin.CMov32.dst
+ );
+ }
+
+ /* Deal with form: Test32(imm,RM_Reg vreg) -> Test32(imm,amode) */
+ if (i->tag == Xin_Test32
+ && i->Xin.Test32.dst->tag == Xrm_Reg
+ && i->Xin.Test32.dst->Xrm.Reg.reg == vreg) {
+ return X86Instr_Test32(
+ i->Xin.Test32.imm32,
+ X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() ) )
+ );
+ }
+
+ return NULL;
+}
+
+
+/* --------- The x86 assembler (bleh.) --------- */
+
+static UChar iregNo ( HReg r )
+{
+ UInt n;
+ vassert(hregClass(r) == HRcInt32);
+ vassert(!hregIsVirtual(r));
+ n = hregNumber(r);
+ vassert(n <= 7);
+ return toUChar(n);
+}
+
+static UInt fregNo ( HReg r )
+{
+ UInt n;
+ vassert(hregClass(r) == HRcFlt64);
+ vassert(!hregIsVirtual(r));
+ n = hregNumber(r);
+ vassert(n <= 5);
+ return n;
+}
+
+static UInt vregNo ( HReg r )
+{
+ UInt n;
+ vassert(hregClass(r) == HRcVec128);
+ vassert(!hregIsVirtual(r));
+ n = hregNumber(r);
+ vassert(n <= 7);
+ return n;
+}
+
+static UChar mkModRegRM ( UChar mod, UChar reg, UChar regmem )
+{
+ return toUChar( ((mod & 3) << 6)
+ | ((reg & 7) << 3)
+ | (regmem & 7) );
+}
+
+static UChar mkSIB ( Int shift, Int regindex, Int regbase )
+{
+ return toUChar( ((shift & 3) << 6)
+ | ((regindex & 7) << 3)
+ | (regbase & 7) );
+}
+
+static UChar* emit32 ( UChar* p, UInt w32 )
+{
+ *p++ = toUChar( w32 & 0x000000FF);
+ *p++ = toUChar((w32 >> 8) & 0x000000FF);
+ *p++ = toUChar((w32 >> 16) & 0x000000FF);
+ *p++ = toUChar((w32 >> 24) & 0x000000FF);
+ return p;
+}
+
+/* Does a sign-extend of the lowest 8 bits give
+ the original number? */
+static Bool fits8bits ( UInt w32 )
+{
+ Int i32 = (Int)w32;
+ return toBool(i32 == ((i32 << 24) >> 24));
+}
+
+
+/* Forming mod-reg-rm bytes and scale-index-base bytes.
+
+ greg, 0(ereg) | ereg != ESP && ereg != EBP
+ = 00 greg ereg
+
+ greg, d8(ereg) | ereg != ESP
+ = 01 greg ereg, d8
+
+ greg, d32(ereg) | ereg != ESP
+ = 10 greg ereg, d32
+
+ greg, d8(%esp) = 01 greg 100, 0x24, d8
+
+ -----------------------------------------------
+
+ greg, d8(base,index,scale)
+ | index != ESP
+ = 01 greg 100, scale index base, d8
+
+ greg, d32(base,index,scale)
+ | index != ESP
+ = 10 greg 100, scale index base, d32
+*/
+static UChar* doAMode_M ( UChar* p, HReg greg, X86AMode* am )
+{
+ if (am->tag == Xam_IR) {
+ if (am->Xam.IR.imm == 0
+ && am->Xam.IR.reg != hregX86_ESP()
+ && am->Xam.IR.reg != hregX86_EBP() ) {
+ *p++ = mkModRegRM(0, iregNo(greg), iregNo(am->Xam.IR.reg));
+ return p;
+ }
+ if (fits8bits(am->Xam.IR.imm)
+ && am->Xam.IR.reg != hregX86_ESP()) {
+ *p++ = mkModRegRM(1, iregNo(greg), iregNo(am->Xam.IR.reg));
+ *p++ = toUChar(am->Xam.IR.imm & 0xFF);
+ return p;
+ }
+ if (am->Xam.IR.reg != hregX86_ESP()) {
+ *p++ = mkModRegRM(2, iregNo(greg), iregNo(am->Xam.IR.reg));
+ p = emit32(p, am->Xam.IR.imm);
+ return p;
+ }
+ if (am->Xam.IR.reg == hregX86_ESP()
+ && fits8bits(am->Xam.IR.imm)) {
+ *p++ = mkModRegRM(1, iregNo(greg), 4);
+ *p++ = 0x24;
+ *p++ = toUChar(am->Xam.IR.imm & 0xFF);
+ return p;
+ }
+ ppX86AMode(am);
+ vpanic("doAMode_M: can't emit amode IR");
+ /*NOTREACHED*/
+ }
+ if (am->tag == Xam_IRRS) {
+ if (fits8bits(am->Xam.IRRS.imm)
+ && am->Xam.IRRS.index != hregX86_ESP()) {
+ *p++ = mkModRegRM(1, iregNo(greg), 4);
+ *p++ = mkSIB(am->Xam.IRRS.shift, am->Xam.IRRS.index,
+ am->Xam.IRRS.base);
+ *p++ = toUChar(am->Xam.IRRS.imm & 0xFF);
+ return p;
+ }
+ if (am->Xam.IRRS.index != hregX86_ESP()) {
+ *p++ = mkModRegRM(2, iregNo(greg), 4);
+ *p++ = mkSIB(am->Xam.IRRS.shift, am->Xam.IRRS.index,
+ am->Xam.IRRS.base);
+ p = emit32(p, am->Xam.IRRS.imm);
+ return p;
+ }
+ ppX86AMode(am);
+ vpanic("doAMode_M: can't emit amode IRRS");
+ /*NOTREACHED*/
+ }
+ vpanic("doAMode_M: unknown amode");
+ /*NOTREACHED*/
+}
+
+
+/* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
+static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
+{
+ *p++ = mkModRegRM(3, iregNo(greg), iregNo(ereg));
+ return p;
+}
+
+
+/* Emit ffree %st(7) */
+static UChar* do_ffree_st7 ( UChar* p )
+{
+ *p++ = 0xDD;
+ *p++ = 0xC7;
+ return p;
+}
+
+/* Emit fstp %st(i), 1 <= i <= 7 */
+static UChar* do_fstp_st ( UChar* p, Int i )
+{
+ vassert(1 <= i && i <= 7);
+ *p++ = 0xDD;
+ *p++ = toUChar(0xD8+i);
+ return p;
+}
+
+/* Emit fld %st(i), 0 <= i <= 6 */
+static UChar* do_fld_st ( UChar* p, Int i )
+{
+ vassert(0 <= i && i <= 6);
+ *p++ = 0xD9;
+ *p++ = toUChar(0xC0+i);
+ return p;
+}
+
+/* Emit f<op> %st(0) */
+static UChar* do_fop1_st ( UChar* p, X86FpOp op )
+{
+ switch (op) {
+ case Xfp_NEG: *p++ = 0xD9; *p++ = 0xE0; break;
+ case Xfp_ABS: *p++ = 0xD9; *p++ = 0xE1; break;
+ case Xfp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
+ case Xfp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
+ case Xfp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
+ case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
+ case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
+ case Xfp_MOV: break;
+ case Xfp_TAN: p = do_ffree_st7(p); /* since fptan pushes 1.0 */
+ *p++ = 0xD9; *p++ = 0xF2; /* fptan */
+ *p++ = 0xD9; *p++ = 0xF7; /* fincstp */
+ break;
+ default: vpanic("do_fop1_st: unknown op");
+ }
+ return p;
+}
+
+/* Emit f<op> %st(i), 1 <= i <= 5 */
+static UChar* do_fop2_st ( UChar* p, X86FpOp op, Int i )
+{
+# define fake(_n) mkHReg((_n), HRcInt32, False)
+ Int subopc;
+ switch (op) {
+ case Xfp_ADD: subopc = 0; break;
+ case Xfp_SUB: subopc = 4; break;
+ case Xfp_MUL: subopc = 1; break;
+ case Xfp_DIV: subopc = 6; break;
+ default: vpanic("do_fop2_st: unknown op");
+ }
+ *p++ = 0xD8;
+ p = doAMode_R(p, fake(subopc), fake(i));
+ return p;
+# undef fake
+}
+
+/* Push a 32-bit word on the stack. The word depends on tags[3:0];
+each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[].
+*/
+static UChar* push_word_from_tags ( UChar* p, UShort tags )
+{
+ UInt w;
+ vassert(0 == (tags & ~0xF));
+ if (tags == 0) {
+ /* pushl $0x00000000 */
+ *p++ = 0x6A;
+ *p++ = 0x00;
+ }
+ else
+ /* pushl $0xFFFFFFFF */
+ if (tags == 0xF) {
+ *p++ = 0x6A;
+ *p++ = 0xFF;
+ } else {
+ vassert(0); /* awaiting test case */
+ w = 0;
+ if (tags & 1) w |= 0x000000FF;
+ if (tags & 2) w |= 0x0000FF00;
+ if (tags & 4) w |= 0x00FF0000;
+ if (tags & 8) w |= 0xFF000000;
+ *p++ = 0x68;
+ p = emit32(p, w);
+ }
+ return p;
+}
+
+/* Emit an instruction into buf and return the number of bytes used.
+ Note that buf is not the insn's final place, and therefore it is
+ imperative to emit position-independent code. */
+
+Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i,
+ Bool mode64, void* dispatch )
+{
+ UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
+
+ UInt xtra;
+ UChar* p = &buf[0];
+ UChar* ptmp;
+ vassert(nbuf >= 32);
+ vassert(mode64 == False);
+
+ /* Wrap an integer as a int register, for use assembling
+ GrpN insns, in which the greg field is used as a sub-opcode
+ and does not really contain a register. */
+# define fake(_n) mkHReg((_n), HRcInt32, False)
+
+ /* vex_printf("asm ");ppX86Instr(i, mode64); vex_printf("\n"); */
+
+ switch (i->tag) {
+
+ case Xin_Alu32R:
+ /* Deal specially with MOV */
+ if (i->Xin.Alu32R.op == Xalu_MOV) {
+ switch (i->Xin.Alu32R.src->tag) {
+ case Xrmi_Imm:
+ *p++ = toUChar(0xB8 + iregNo(i->Xin.Alu32R.dst));
+ p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
+ goto done;
+ case Xrmi_Reg:
+ *p++ = 0x89;
+ p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
+ i->Xin.Alu32R.dst);
+ goto done;
+ case Xrmi_Mem:
+ *p++ = 0x8B;
+ p = doAMode_M(p, i->Xin.Alu32R.dst,
+ i->Xin.Alu32R.src->Xrmi.Mem.am);
+ goto done;
+ default:
+ goto bad;
+ }
+ }
+ /* MUL */
+ if (i->Xin.Alu32R.op == Xalu_MUL) {
+ switch (i->Xin.Alu32R.src->tag) {
+ case Xrmi_Reg:
+ *p++ = 0x0F;
+ *p++ = 0xAF;
+ p = doAMode_R(p, i->Xin.Alu32R.dst,
+ i->Xin.Alu32R.src->Xrmi.Reg.reg);
+ goto done;
+ case Xrmi_Mem:
+ *p++ = 0x0F;
+ *p++ = 0xAF;
+ p = doAMode_M(p, i->Xin.Alu32R.dst,
+ i->Xin.Alu32R.src->Xrmi.Mem.am);
+ goto done;
+ case Xrmi_Imm:
+ if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
+ *p++ = 0x6B;
+ p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
+ *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
+ } else {
+ *p++ = 0x69;
+ p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
+ p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
+ }
+ goto done;
+ default:
+ goto bad;
+ }
+ }
+ /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
+ opc = opc_rr = subopc_imm = opc_imma = 0;
+ switch (i->Xin.Alu32R.op) {
+ case Xalu_ADC: opc = 0x13; opc_rr = 0x11;
+ subopc_imm = 2; opc_imma = 0x15; break;
+ case Xalu_ADD: opc = 0x03; opc_rr = 0x01;
+ subopc_imm = 0; opc_imma = 0x05; break;
+ case Xalu_SUB: opc = 0x2B; opc_rr = 0x29;
+ subopc_imm = 5; opc_imma = 0x2D; break;
+ case Xalu_SBB: opc = 0x1B; opc_rr = 0x19;
+ subopc_imm = 3; opc_imma = 0x1D; break;
+ case Xalu_AND: opc = 0x23; opc_rr = 0x21;
+ subopc_imm = 4; opc_imma = 0x25; break;
+ case Xalu_XOR: opc = 0x33; opc_rr = 0x31;
+ subopc_imm = 6; opc_imma = 0x35; break;
+ case Xalu_OR: opc = 0x0B; opc_rr = 0x09;
+ subopc_imm = 1; opc_imma = 0x0D; break;
+ case Xalu_CMP: opc = 0x3B; opc_rr = 0x39;
+ subopc_imm = 7; opc_imma = 0x3D; break;
+ default: goto bad;
+ }
+ switch (i->Xin.Alu32R.src->tag) {
+ case Xrmi_Imm:
+ if (i->Xin.Alu32R.dst == hregX86_EAX()
+ && !fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
+ *p++ = toUChar(opc_imma);
+ p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
+ } else
+ if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
+ *p++ = 0x83;
+ p = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst);
+ *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
+ } else {
+ *p++ = 0x81;
+ p = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst);
+ p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
+ }
+ goto done;
+ case Xrmi_Reg:
+ *p++ = toUChar(opc_rr);
+ p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
+ i->Xin.Alu32R.dst);
+ goto done;
+ case Xrmi_Mem:
+ *p++ = toUChar(opc);
+ p = doAMode_M(p, i->Xin.Alu32R.dst,
+ i->Xin.Alu32R.src->Xrmi.Mem.am);
+ goto done;
+ default:
+ goto bad;
+ }
+ break;
+
+ case Xin_Alu32M:
+ /* Deal specially with MOV */
+ if (i->Xin.Alu32M.op == Xalu_MOV) {
+ switch (i->Xin.Alu32M.src->tag) {
+ case Xri_Reg:
+ *p++ = 0x89;
+ p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
+ i->Xin.Alu32M.dst);
+ goto done;
+ case Xri_Imm:
+ *p++ = 0xC7;
+ p = doAMode_M(p, fake(0), i->Xin.Alu32M.dst);
+ p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
+ goto done;
+ default:
+ goto bad;
+ }
+ }
+ /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not
+ allowed here. */
+ opc = subopc_imm = opc_imma = 0;
+ switch (i->Xin.Alu32M.op) {
+ case Xalu_ADD: opc = 0x01; subopc_imm = 0; break;
+ case Xalu_SUB: opc = 0x29; subopc_imm = 5; break;
+ case Xalu_CMP: opc = 0x39; subopc_imm = 7; break;
+ default: goto bad;
+ }
+ switch (i->Xin.Alu32M.src->tag) {
+ case Xri_Reg:
+ *p++ = toUChar(opc);
+ p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
+ i->Xin.Alu32M.dst);
+ goto done;
+ case Xri_Imm:
+ if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) {
+ *p++ = 0x83;
+ p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
+ *p++ = toUChar(0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32);
+ goto done;
+ } else {
+ *p++ = 0x81;
+ p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
+ p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
+ goto done;
+ }
+ default:
+ goto bad;
+ }
+ break;
+
+ case Xin_Sh32:
+ opc_cl = opc_imm = subopc = 0;
+ switch (i->Xin.Sh32.op) {
+ case Xsh_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
+ case Xsh_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
+ case Xsh_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
+ default: goto bad;
+ }
+ if (i->Xin.Sh32.src == 0) {
+ *p++ = toUChar(opc_cl);
+ p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst);
+ } else {
+ *p++ = toUChar(opc_imm);
+ p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst);
+ *p++ = (UChar)(i->Xin.Sh32.src);
+ }
+ goto done;
+
+ case Xin_Test32:
+ if (i->Xin.Test32.dst->tag == Xrm_Reg) {
+ /* testl $imm32, %reg */
+ *p++ = 0xF7;
+ p = doAMode_R(p, fake(0), i->Xin.Test32.dst->Xrm.Reg.reg);
+ p = emit32(p, i->Xin.Test32.imm32);
+ goto done;
+ } else {
+ /* testl $imm32, amode */
+ *p++ = 0xF7;
+ p = doAMode_M(p, fake(0), i->Xin.Test32.dst->Xrm.Mem.am);
+ p = emit32(p, i->Xin.Test32.imm32);
+ goto done;
+ }
+
+ case Xin_Unary32:
+ if (i->Xin.Unary32.op == Xun_NOT) {
+ *p++ = 0xF7;
+ p = doAMode_R(p, fake(2), i->Xin.Unary32.dst);
+ goto done;
+ }
+ if (i->Xin.Unary32.op == Xun_NEG) {
+ *p++ = 0xF7;
+ p = doAMode_R(p, fake(3), i->Xin.Unary32.dst);
+ goto done;
+ }
+ break;
+
+ case Xin_Lea32:
+ *p++ = 0x8D;
+ p = doAMode_M(p, i->Xin.Lea32.dst, i->Xin.Lea32.am);
+ goto done;
+
+ case Xin_MulL:
+ subopc = i->Xin.MulL.syned ? 5 : 4;
+ *p++ = 0xF7;
+ switch (i->Xin.MulL.src->tag) {
+ case Xrm_Mem:
+ p = doAMode_M(p, fake(subopc),
+ i->Xin.MulL.src->Xrm.Mem.am);
+ goto done;
+ case Xrm_Reg:
+ p = doAMode_R(p, fake(subopc),
+ i->Xin.MulL.src->Xrm.Reg.reg);
+ goto done;
+ default:
+ goto bad;
+ }
+ break;
+
+ case Xin_Div:
+ subopc = i->Xin.Div.syned ? 7 : 6;
+ *p++ = 0xF7;
+ switch (i->Xin.Div.src->tag) {
+ case Xrm_Mem:
+ p = doAMode_M(p, fake(subopc),
+ i->Xin.Div.src->Xrm.Mem.am);
+ goto done;
+ case Xrm_Reg:
+ p = doAMode_R(p, fake(subopc),
+ i->Xin.Div.src->Xrm.Reg.reg);
+ goto done;
+ default:
+ goto bad;
+ }
+ break;
+
+ case Xin_Sh3232:
+ vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR);
+ if (i->Xin.Sh3232.amt == 0) {
+ /* shldl/shrdl by %cl */
+ *p++ = 0x0F;
+ if (i->Xin.Sh3232.op == Xsh_SHL) {
+ *p++ = 0xA5;
+ } else {
+ *p++ = 0xAD;
+ }
+ p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst);
+ goto done;
+ }
+ break;
+
+ case Xin_Push:
+ switch (i->Xin.Push.src->tag) {
+ case Xrmi_Mem:
+ *p++ = 0xFF;
+ p = doAMode_M(p, fake(6), i->Xin.Push.src->Xrmi.Mem.am);
+ goto done;
+ case Xrmi_Imm:
+ *p++ = 0x68;
+ p = emit32(p, i->Xin.Push.src->Xrmi.Imm.imm32);
+ goto done;
+ case Xrmi_Reg:
+ *p++ = toUChar(0x50 + iregNo(i->Xin.Push.src->Xrmi.Reg.reg));
+ goto done;
+ default:
+ goto bad;
+ }
+
+ case Xin_Call:
+ /* See detailed comment for Xin_Call in getRegUsage_X86Instr above
+ for explanation of this. */
+ switch (i->Xin.Call.regparms) {
+ case 0: irno = iregNo(hregX86_EAX()); break;
+ case 1: irno = iregNo(hregX86_EDX()); break;
+ case 2: irno = iregNo(hregX86_ECX()); break;
+ case 3: irno = iregNo(hregX86_EDI()); break;
+ default: vpanic(" emit_X86Instr:call:regparms");
+ }
+ /* jump over the following two insns if the condition does not
+ hold */
+ if (i->Xin.Call.cond != Xcc_ALWAYS) {
+ *p++ = toUChar(0x70 + (0xF & (i->Xin.Call.cond ^ 1)));
+ *p++ = 0x07; /* 7 bytes in the next two insns */
+ }
+ /* movl $target, %tmp */
+ *p++ = toUChar(0xB8 + irno);
+ p = emit32(p, i->Xin.Call.target);
+ /* call *%tmp */
+ *p++ = 0xFF;
+ *p++ = toUChar(0xD0 + irno);
+ goto done;
+
+ case Xin_Goto:
+ /* Use ptmp for backpatching conditional jumps. */
+ ptmp = NULL;
+
+ /* First off, if this is conditional, create a conditional
+ jump over the rest of it. */
+ if (i->Xin.Goto.cond != Xcc_ALWAYS) {
+ /* jmp fwds if !condition */
+ *p++ = toUChar(0x70 + (0xF & (i->Xin.Goto.cond ^ 1)));
+ ptmp = p; /* fill in this bit later */
+ *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
+ }
+
+ /* If a non-boring, set %ebp (the guest state pointer)
+ appropriately. */
+ /* movl $magic_number, %ebp */
+ switch (i->Xin.Goto.jk) {
+ case Ijk_ClientReq:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_CLIENTREQ); break;
+ case Ijk_Sys_int128:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_SYS_INT128); break;
+ case Ijk_Sys_int129:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_SYS_INT129); break;
+ case Ijk_Sys_int130:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_SYS_INT130); break;
+ case Ijk_Yield:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_YIELD); break;
+ case Ijk_EmWarn:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_EMWARN); break;
+ case Ijk_MapFail:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_MAPFAIL); break;
+ case Ijk_NoDecode:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_NODECODE); break;
+ case Ijk_TInval:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_TINVAL); break;
+ case Ijk_NoRedir:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_NOREDIR); break;
+ case Ijk_Sys_sysenter:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_SYS_SYSENTER); break;
+ case Ijk_SigTRAP:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_SIGTRAP); break;
+ case Ijk_SigSEGV:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_SIGSEGV); break;
+ case Ijk_Ret:
+ case Ijk_Call:
+ case Ijk_Boring:
+ break;
+ default:
+ ppIRJumpKind(i->Xin.Goto.jk);
+ vpanic("emit_X86Instr.Xin_Goto: unknown jump kind");
+ }
+
+ /* Get the destination address into %eax */
+ if (i->Xin.Goto.dst->tag == Xri_Imm) {
+ /* movl $immediate, %eax */
+ *p++ = 0xB8;
+ p = emit32(p, i->Xin.Goto.dst->Xri.Imm.imm32);
+ } else {
+ vassert(i->Xin.Goto.dst->tag == Xri_Reg);
+ /* movl %reg, %eax */
+ if (i->Xin.Goto.dst->Xri.Reg.reg != hregX86_EAX()) {
+ *p++ = 0x89;
+ p = doAMode_R(p, i->Xin.Goto.dst->Xri.Reg.reg, hregX86_EAX());
+ }
+ }
+
+ /* Get the dispatcher address into %edx. This has to happen
+ after the load of %eax since %edx might be carrying the value
+ destined for %eax immediately prior to this Xin_Goto. */
+ vassert(sizeof(UInt) == sizeof(void*));
+ vassert(dispatch != NULL);
+ /* movl $imm32, %edx */
+ *p++ = 0xBA;
+ p = emit32(p, (UInt)Ptr_to_ULong(dispatch));
+
+ /* jmp *%edx */
+ *p++ = 0xFF;
+ *p++ = 0xE2;
+
+ /* Fix up the conditional jump, if there was one. */
+ if (i->Xin.Goto.cond != Xcc_ALWAYS) {
+ Int delta = p - ptmp;
+ vassert(delta > 0 && delta < 20);
+ *ptmp = toUChar(delta-1);
+ }
+ goto done;
+
+ case Xin_CMov32:
+ vassert(i->Xin.CMov32.cond != Xcc_ALWAYS);
+
+ /* This generates cmov, which is illegal on P54/P55. */
+ /*
+ *p++ = 0x0F;
+ *p++ = toUChar(0x40 + (0xF & i->Xin.CMov32.cond));
+ if (i->Xin.CMov32.src->tag == Xrm_Reg) {
+ p = doAMode_R(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Reg.reg);
+ goto done;
+ }
+ if (i->Xin.CMov32.src->tag == Xrm_Mem) {
+ p = doAMode_M(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Mem.am);
+ goto done;
+ }
+ */
+
+ /* Alternative version which works on any x86 variant. */
+ /* jmp fwds if !condition */
+ *p++ = toUChar(0x70 + (i->Xin.CMov32.cond ^ 1));
+ *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
+ ptmp = p;
+
+ switch (i->Xin.CMov32.src->tag) {
+ case Xrm_Reg:
+ /* Big sigh. This is movl E -> G ... */
+ *p++ = 0x89;
+ p = doAMode_R(p, i->Xin.CMov32.src->Xrm.Reg.reg,
+ i->Xin.CMov32.dst);
+
+ break;
+ case Xrm_Mem:
+ /* ... whereas this is movl G -> E. That's why the args
+ to doAMode_R appear to be the wrong way round in the
+ Xrm_Reg case. */
+ *p++ = 0x8B;
+ p = doAMode_M(p, i->Xin.CMov32.dst,
+ i->Xin.CMov32.src->Xrm.Mem.am);
+ break;
+ default:
+ goto bad;
+ }
+ /* Fill in the jump offset. */
+ *(ptmp-1) = toUChar(p - ptmp);
+ goto done;
+
+ break;
+
+ case Xin_LoadEX:
+ if (i->Xin.LoadEX.szSmall == 1 && !i->Xin.LoadEX.syned) {
+ /* movzbl */
+ *p++ = 0x0F;
+ *p++ = 0xB6;
+ p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
+ goto done;
+ }
+ if (i->Xin.LoadEX.szSmall == 2 && !i->Xin.LoadEX.syned) {
+ /* movzwl */
+ *p++ = 0x0F;
+ *p++ = 0xB7;
+ p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
+ goto done;
+ }
+ if (i->Xin.LoadEX.szSmall == 1 && i->Xin.LoadEX.syned) {
+ /* movsbl */
+ *p++ = 0x0F;
+ *p++ = 0xBE;
+ p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
+ goto done;
+ }
+ break;
+
+ case Xin_Set32:
+ /* Make the destination register be 1 or 0, depending on whether
+ the relevant condition holds. We have to dodge and weave
+ when the destination is %esi or %edi as we cannot directly
+ emit the native 'setb %reg' for those. Further complication:
+ the top 24 bits of the destination should be forced to zero,
+ but doing 'xor %r,%r' kills the flag(s) we are about to read.
+ Sigh. So start off my moving $0 into the dest. */
+
+ /* Do we need to swap in %eax? */
+ if (iregNo(i->Xin.Set32.dst) >= 4) {
+ /* xchg %eax, %dst */
+ *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst));
+ /* movl $0, %eax */
+ *p++ =toUChar(0xB8 + iregNo(hregX86_EAX()));
+ p = emit32(p, 0);
+ /* setb lo8(%eax) */
+ *p++ = 0x0F;
+ *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
+ p = doAMode_R(p, fake(0), hregX86_EAX());
+ /* xchg %eax, %dst */
+ *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst));
+ } else {
+ /* movl $0, %dst */
+ *p++ = toUChar(0xB8 + iregNo(i->Xin.Set32.dst));
+ p = emit32(p, 0);
+ /* setb lo8(%dst) */
+ *p++ = 0x0F;
+ *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
+ p = doAMode_R(p, fake(0), i->Xin.Set32.dst);
+ }
+ goto done;
+
+ case Xin_Bsfr32:
+ *p++ = 0x0F;
+ if (i->Xin.Bsfr32.isFwds) {
+ *p++ = 0xBC;
+ } else {
+ *p++ = 0xBD;
+ }
+ p = doAMode_R(p, i->Xin.Bsfr32.dst, i->Xin.Bsfr32.src);
+ goto done;
+
+ case Xin_MFence:
+ /* see comment in hdefs.h re this insn */
+ if (0) vex_printf("EMIT FENCE\n");
+ if (i->Xin.MFence.hwcaps & (VEX_HWCAPS_X86_SSE3
+ |VEX_HWCAPS_X86_SSE2)) {
+ /* mfence */
+ *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
+ goto done;
+ }
+ if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_SSE1) {
+ /* sfence */
+ *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8;
+ /* lock addl $0,0(%esp) */
+ *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
+ *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
+ goto done;
+ }
+ if (i->Xin.MFence.hwcaps == 0/*baseline, no SSE*/) {
+ /* lock addl $0,0(%esp) */
+ *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
+ *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
+ goto done;
+ }
+ vpanic("emit_X86Instr:mfence:hwcaps");
+ /*NOTREACHED*/
+ break;
+
+ case Xin_ACAS:
+ /* lock */
+ *p++ = 0xF0;
+ /* cmpxchg{b,w,l} %ebx,mem. Expected-value in %eax, new value
+ in %ebx. The new-value register is hardwired to be %ebx
+ since letting it be any integer register gives the problem
+ that %sil and %dil are unaddressible on x86 and hence we
+ would have to resort to the same kind of trickery as with
+ byte-sized Xin.Store, just below. Given that this isn't
+ performance critical, it is simpler just to force the
+ register operand to %ebx (could equally be %ecx or %edx).
+ (Although %ebx is more consistent with cmpxchg8b.) */
+ if (i->Xin.ACAS.sz == 2) *p++ = 0x66;
+ *p++ = 0x0F;
+ if (i->Xin.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
+ p = doAMode_M(p, hregX86_EBX(), i->Xin.ACAS.addr);
+ goto done;
+
+ case Xin_DACAS:
+ /* lock */
+ *p++ = 0xF0;
+ /* cmpxchg8b m64. Expected-value in %edx:%eax, new value
+ in %ecx:%ebx. All 4 regs are hardwired in the ISA, so
+ aren't encoded in the insn. */
+ *p++ = 0x0F;
+ *p++ = 0xC7;
+ p = doAMode_M(p, fake(1), i->Xin.DACAS.addr);
+ goto done;
+
+ case Xin_Store:
+ if (i->Xin.Store.sz == 2) {
+ /* This case, at least, is simple, given that we can
+ reference the low 16 bits of any integer register. */
+ *p++ = 0x66;
+ *p++ = 0x89;
+ p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
+ goto done;
+ }
+
+ if (i->Xin.Store.sz == 1) {
+ /* We have to do complex dodging and weaving if src is not
+ the low 8 bits of %eax/%ebx/%ecx/%edx. */
+ if (iregNo(i->Xin.Store.src) < 4) {
+ /* we're OK, can do it directly */
+ *p++ = 0x88;
+ p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
+ goto done;
+ } else {
+ /* Bleh. This means the source is %edi or %esi. Since
+ the address mode can only mention three registers, at
+ least one of %eax/%ebx/%ecx/%edx must be available to
+ temporarily swap the source into, so the store can
+ happen. So we have to look at the regs mentioned
+ in the amode. */
+ HReg swap = INVALID_HREG;
+ HReg eax = hregX86_EAX(), ebx = hregX86_EBX(),
+ ecx = hregX86_ECX(), edx = hregX86_EDX();
+ Bool a_ok = True, b_ok = True, c_ok = True, d_ok = True;
+ HRegUsage u;
+ Int j;
+ initHRegUsage(&u);
+ addRegUsage_X86AMode(&u, i->Xin.Store.dst);
+ for (j = 0; j < u.n_used; j++) {
+ HReg r = u.hreg[j];
+ if (r == eax) a_ok = False;
+ if (r == ebx) b_ok = False;
+ if (r == ecx) c_ok = False;
+ if (r == edx) d_ok = False;
+ }
+ if (a_ok) swap = eax;
+ if (b_ok) swap = ebx;
+ if (c_ok) swap = ecx;
+ if (d_ok) swap = edx;
+ vassert(swap != INVALID_HREG);
+ /* xchgl %source, %swap. Could do better if swap is %eax. */
+ *p++ = 0x87;
+ p = doAMode_R(p, i->Xin.Store.src, swap);
+ /* movb lo8{%swap}, (dst) */
+ *p++ = 0x88;
+ p = doAMode_M(p, swap, i->Xin.Store.dst);
+ /* xchgl %source, %swap. Could do better if swap is %eax. */
+ *p++ = 0x87;
+ p = doAMode_R(p, i->Xin.Store.src, swap);
+ goto done;
+ }
+ } /* if (i->Xin.Store.sz == 1) */
+ break;
+
+ case Xin_FpUnary:
+ /* gop %src, %dst
+ --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst)
+ */
+ p = do_ffree_st7(p);
+ p = do_fld_st(p, 0+hregNumber(i->Xin.FpUnary.src));
+ p = do_fop1_st(p, i->Xin.FpUnary.op);
+ p = do_fstp_st(p, 1+hregNumber(i->Xin.FpUnary.dst));
+ goto done;
+
+ case Xin_FpBinary:
+ if (i->Xin.FpBinary.op == Xfp_YL2X
+ || i->Xin.FpBinary.op == Xfp_YL2XP1) {
+ /* Have to do this specially. */
+ /* ffree %st7 ; fld %st(srcL) ;
+ ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */
+ p = do_ffree_st7(p);
+ p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
+ p = do_ffree_st7(p);
+ p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
+ *p++ = 0xD9;
+ *p++ = toUChar(i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9);
+ p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
+ goto done;
+ }
+ if (i->Xin.FpBinary.op == Xfp_ATAN) {
+ /* Have to do this specially. */
+ /* ffree %st7 ; fld %st(srcL) ;
+ ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */
+ p = do_ffree_st7(p);
+ p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
+ p = do_ffree_st7(p);
+ p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
+ *p++ = 0xD9; *p++ = 0xF3;
+ p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
+ goto done;
+ }
+ if (i->Xin.FpBinary.op == Xfp_PREM
+ || i->Xin.FpBinary.op == Xfp_PREM1
+ || i->Xin.FpBinary.op == Xfp_SCALE) {
+ /* Have to do this specially. */
+ /* ffree %st7 ; fld %st(srcR) ;
+ ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ;
+ fincstp ; ffree %st7 */
+ p = do_ffree_st7(p);
+ p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcR));
+ p = do_ffree_st7(p);
+ p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcL));
+ *p++ = 0xD9;
+ switch (i->Xin.FpBinary.op) {
+ case Xfp_PREM: *p++ = 0xF8; break;
+ case Xfp_PREM1: *p++ = 0xF5; break;
+ case Xfp_SCALE: *p++ = 0xFD; break;
+ default: vpanic("emitX86Instr(FpBinary,PREM/PREM1/SCALE)");
+ }
+ p = do_fstp_st(p, 2+hregNumber(i->Xin.FpBinary.dst));
+ *p++ = 0xD9; *p++ = 0xF7;
+ p = do_ffree_st7(p);
+ goto done;
+ }
+ /* General case */
+ /* gop %srcL, %srcR, %dst
+ --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst)
+ */
+ p = do_ffree_st7(p);
+ p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
+ p = do_fop2_st(p, i->Xin.FpBinary.op,
+ 1+hregNumber(i->Xin.FpBinary.srcR));
+ p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
+ goto done;
+
+ case Xin_FpLdSt:
+ if (i->Xin.FpLdSt.isLoad) {
+ /* Load from memory into %fakeN.
+ --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1)
+ */
+ p = do_ffree_st7(p);
+ switch (i->Xin.FpLdSt.sz) {
+ case 4:
+ *p++ = 0xD9;
+ p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
+ break;
+ case 8:
+ *p++ = 0xDD;
+ p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
+ break;
+ case 10:
+ *p++ = 0xDB;
+ p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdSt.addr);
+ break;
+ default:
+ vpanic("emitX86Instr(FpLdSt,load)");
+ }
+ p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg));
+ goto done;
+ } else {
+ /* Store from %fakeN into memory.
+ --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode
+ */
+ p = do_ffree_st7(p);
+ p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg));
+ switch (i->Xin.FpLdSt.sz) {
+ case 4:
+ *p++ = 0xD9;
+ p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
+ break;
+ case 8:
+ *p++ = 0xDD;
+ p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
+ break;
+ case 10:
+ *p++ = 0xDB;
+ p = doAMode_M(p, fake(7)/*subopcode*/, i->Xin.FpLdSt.addr);
+ break;
+ default:
+ vpanic("emitX86Instr(FpLdSt,store)");
+ }
+ goto done;
+ }
+ break;
+
+ case Xin_FpLdStI:
+ if (i->Xin.FpLdStI.isLoad) {
+ /* Load from memory into %fakeN, converting from an int.
+ --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1)
+ */
+ switch (i->Xin.FpLdStI.sz) {
+ case 8: opc = 0xDF; subopc_imm = 5; break;
+ case 4: opc = 0xDB; subopc_imm = 0; break;
+ case 2: vassert(0); opc = 0xDF; subopc_imm = 0; break;
+ default: vpanic("emitX86Instr(Xin_FpLdStI-load)");
+ }
+ p = do_ffree_st7(p);
+ *p++ = toUChar(opc);
+ p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
+ p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdStI.reg));
+ goto done;
+ } else {
+ /* Store from %fakeN into memory, converting to an int.
+ --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode
+ */
+ switch (i->Xin.FpLdStI.sz) {
+ case 8: opc = 0xDF; subopc_imm = 7; break;
+ case 4: opc = 0xDB; subopc_imm = 3; break;
+ case 2: opc = 0xDF; subopc_imm = 3; break;
+ default: vpanic("emitX86Instr(Xin_FpLdStI-store)");
+ }
+ p = do_ffree_st7(p);
+ p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdStI.reg));
+ *p++ = toUChar(opc);
+ p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
+ goto done;
+ }
+ break;
+
+ case Xin_Fp64to32:
+ /* ffree %st7 ; fld %st(src) */
+ p = do_ffree_st7(p);
+ p = do_fld_st(p, 0+fregNo(i->Xin.Fp64to32.src));
+ /* subl $4, %esp */
+ *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04;
+ /* fstps (%esp) */
+ *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24;
+ /* flds (%esp) */
+ *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24;
+ /* addl $4, %esp */
+ *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04;
+ /* fstp %st(1+dst) */
+ p = do_fstp_st(p, 1+fregNo(i->Xin.Fp64to32.dst));
+ goto done;
+
+ case Xin_FpCMov:
+ /* jmp fwds if !condition */
+ *p++ = toUChar(0x70 + (i->Xin.FpCMov.cond ^ 1));
+ *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
+ ptmp = p;
+
+ /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */
+ p = do_ffree_st7(p);
+ p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src));
+ p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst));
+
+ /* Fill in the jump offset. */
+ *(ptmp-1) = toUChar(p - ptmp);
+ goto done;
+
+ case Xin_FpLdCW:
+ *p++ = 0xD9;
+ p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdCW.addr);
+ goto done;
+
+ case Xin_FpStSW_AX:
+ /* note, this emits fnstsw %ax, not fstsw %ax */
+ *p++ = 0xDF;
+ *p++ = 0xE0;
+ goto done;
+
+ case Xin_FpCmp:
+ /* gcmp %fL, %fR, %dst
+ -> ffree %st7; fpush %fL ; fucomp %(fR+1) ;
+ fnstsw %ax ; movl %eax, %dst
+ */
+ /* ffree %st7 */
+ p = do_ffree_st7(p);
+ /* fpush %fL */
+ p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL));
+ /* fucomp %(fR+1) */
+ *p++ = 0xDD;
+ *p++ = toUChar(0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR))));
+ /* fnstsw %ax */
+ *p++ = 0xDF;
+ *p++ = 0xE0;
+ /* movl %eax, %dst */
+ *p++ = 0x89;
+ p = doAMode_R(p, hregX86_EAX(), i->Xin.FpCmp.dst);
+ goto done;
+
+ case Xin_SseConst: {
+ UShort con = i->Xin.SseConst.con;
+ p = push_word_from_tags(p, toUShort((con >> 12) & 0xF));
+ p = push_word_from_tags(p, toUShort((con >> 8) & 0xF));
+ p = push_word_from_tags(p, toUShort((con >> 4) & 0xF));
+ p = push_word_from_tags(p, toUShort(con & 0xF));
+ /* movl (%esp), %xmm-dst */
+ *p++ = 0x0F;
+ *p++ = 0x10;
+ *p++ = toUChar(0x04 + 8 * (7 & vregNo(i->Xin.SseConst.dst)));
+ *p++ = 0x24;
+ /* addl $16, %esp */
+ *p++ = 0x83;
+ *p++ = 0xC4;
+ *p++ = 0x10;
+ goto done;
+ }
+
+ case Xin_SseLdSt:
+ *p++ = 0x0F;
+ *p++ = toUChar(i->Xin.SseLdSt.isLoad ? 0x10 : 0x11);
+ p = doAMode_M(p, fake(vregNo(i->Xin.SseLdSt.reg)), i->Xin.SseLdSt.addr);
+ goto done;
+
+ case Xin_SseLdzLO:
+ vassert(i->Xin.SseLdzLO.sz == 4 || i->Xin.SseLdzLO.sz == 8);
+ /* movs[sd] amode, %xmm-dst */
+ *p++ = toUChar(i->Xin.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
+ *p++ = 0x0F;
+ *p++ = 0x10;
+ p = doAMode_M(p, fake(vregNo(i->Xin.SseLdzLO.reg)),
+ i->Xin.SseLdzLO.addr);
+ goto done;
+
+ case Xin_Sse32Fx4:
+ xtra = 0;
+ *p++ = 0x0F;
+ switch (i->Xin.Sse32Fx4.op) {
+ case Xsse_ADDF: *p++ = 0x58; break;
+ case Xsse_DIVF: *p++ = 0x5E; break;
+ case Xsse_MAXF: *p++ = 0x5F; break;
+ case Xsse_MINF: *p++ = 0x5D; break;
+ case Xsse_MULF: *p++ = 0x59; break;
+ case Xsse_RCPF: *p++ = 0x53; break;
+ case Xsse_RSQRTF: *p++ = 0x52; break;
+ case Xsse_SQRTF: *p++ = 0x51; break;
+ case Xsse_SUBF: *p++ = 0x5C; break;
+ case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
+ case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
+ case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
+ case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
+ default: goto bad;
+ }
+ p = doAMode_R(p, fake(vregNo(i->Xin.Sse32Fx4.dst)),
+ fake(vregNo(i->Xin.Sse32Fx4.src)) );
+ if (xtra & 0x100)
+ *p++ = toUChar(xtra & 0xFF);
+ goto done;
+
+ case Xin_Sse64Fx2:
+ xtra = 0;
+ *p++ = 0x66;
+ *p++ = 0x0F;
+ switch (i->Xin.Sse64Fx2.op) {
+ case Xsse_ADDF: *p++ = 0x58; break;
+ case Xsse_DIVF: *p++ = 0x5E; break;
+ case Xsse_MAXF: *p++ = 0x5F; break;
+ case Xsse_MINF: *p++ = 0x5D; break;
+ case Xsse_MULF: *p++ = 0x59; break;
+ case Xsse_RCPF: *p++ = 0x53; break;
+ case Xsse_RSQRTF: *p++ = 0x52; break;
+ case Xsse_SQRTF: *p++ = 0x51; break;
+ case Xsse_SUBF: *p++ = 0x5C; break;
+ case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
+ case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
+ case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
+ case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
+ default: goto bad;
+ }
+ p = doAMode_R(p, fake(vregNo(i->Xin.Sse64Fx2.dst)),
+ fake(vregNo(i->Xin.Sse64Fx2.src)) );
+ if (xtra & 0x100)
+ *p++ = toUChar(xtra & 0xFF);
+ goto done;
+
+ case Xin_Sse32FLo:
+ xtra = 0;
+ *p++ = 0xF3;
+ *p++ = 0x0F;
+ switch (i->Xin.Sse32FLo.op) {
+ case Xsse_ADDF: *p++ = 0x58; break;
+ case Xsse_DIVF: *p++ = 0x5E; break;
+ case Xsse_MAXF: *p++ = 0x5F; break;
+ case Xsse_MINF: *p++ = 0x5D; break;
+ case Xsse_MULF: *p++ = 0x59; break;
+ case Xsse_RCPF: *p++ = 0x53; break;
+ case Xsse_RSQRTF: *p++ = 0x52; break;
+ case Xsse_SQRTF: *p++ = 0x51; break;
+ case Xsse_SUBF: *p++ = 0x5C; break;
+ case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
+ case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
+ case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
+ case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
+ default: goto bad;
+ }
+ p = doAMode_R(p, fake(vregNo(i->Xin.Sse32FLo.dst)),
+ fake(vregNo(i->Xin.Sse32FLo.src)) );
+ if (xtra & 0x100)
+ *p++ = toUChar(xtra & 0xFF);
+ goto done;
+
+ case Xin_Sse64FLo:
+ xtra = 0;
+ *p++ = 0xF2;
+ *p++ = 0x0F;
+ switch (i->Xin.Sse64FLo.op) {
+ case Xsse_ADDF: *p++ = 0x58; break;
+ case Xsse_DIVF: *p++ = 0x5E; break;
+ case Xsse_MAXF: *p++ = 0x5F; break;
+ case Xsse_MINF: *p++ = 0x5D; break;
+ case Xsse_MULF: *p++ = 0x59; break;
+ case Xsse_RCPF: *p++ = 0x53; break;
+ case Xsse_RSQRTF: *p++ = 0x52; break;
+ case Xsse_SQRTF: *p++ = 0x51; break;
+ case Xsse_SUBF: *p++ = 0x5C; break;
+ case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
+ case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
+ case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
+ case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
+ default: goto bad;
+ }
+ p = doAMode_R(p, fake(vregNo(i->Xin.Sse64FLo.dst)),
+ fake(vregNo(i->Xin.Sse64FLo.src)) );
+ if (xtra & 0x100)
+ *p++ = toUChar(xtra & 0xFF);
+ goto done;
+
+ case Xin_SseReRg:
+# define XX(_n) *p++ = (_n)
+ switch (i->Xin.SseReRg.op) {
+ case Xsse_MOV: /*movups*/ XX(0x0F); XX(0x10); break;
+ case Xsse_OR: XX(0x0F); XX(0x56); break;
+ case Xsse_XOR: XX(0x0F); XX(0x57); break;
+ case Xsse_AND: XX(0x0F); XX(0x54); break;
+ case Xsse_PACKSSD: XX(0x66); XX(0x0F); XX(0x6B); break;
+ case Xsse_PACKSSW: XX(0x66); XX(0x0F); XX(0x63); break;
+ case Xsse_PACKUSW: XX(0x66); XX(0x0F); XX(0x67); break;
+ case Xsse_ADD8: XX(0x66); XX(0x0F); XX(0xFC); break;
+ case Xsse_ADD16: XX(0x66); XX(0x0F); XX(0xFD); break;
+ case Xsse_ADD32: XX(0x66); XX(0x0F); XX(0xFE); break;
+ case Xsse_ADD64: XX(0x66); XX(0x0F); XX(0xD4); break;
+ case Xsse_QADD8S: XX(0x66); XX(0x0F); XX(0xEC); break;
+ case Xsse_QADD16S: XX(0x66); XX(0x0F); XX(0xED); break;
+ case Xsse_QADD8U: XX(0x66); XX(0x0F); XX(0xDC); break;
+ case Xsse_QADD16U: XX(0x66); XX(0x0F); XX(0xDD); break;
+ case Xsse_AVG8U: XX(0x66); XX(0x0F); XX(0xE0); break;
+ case Xsse_AVG16U: XX(0x66); XX(0x0F); XX(0xE3); break;
+ case Xsse_CMPEQ8: XX(0x66); XX(0x0F); XX(0x74); break;
+ case Xsse_CMPEQ16: XX(0x66); XX(0x0F); XX(0x75); break;
+ case Xsse_CMPEQ32: XX(0x66); XX(0x0F); XX(0x76); break;
+ case Xsse_CMPGT8S: XX(0x66); XX(0x0F); XX(0x64); break;
+ case Xsse_CMPGT16S: XX(0x66); XX(0x0F); XX(0x65); break;
+ case Xsse_CMPGT32S: XX(0x66); XX(0x0F); XX(0x66); break;
+ case Xsse_MAX16S: XX(0x66); XX(0x0F); XX(0xEE); break;
+ case Xsse_MAX8U: XX(0x66); XX(0x0F); XX(0xDE); break;
+ case Xsse_MIN16S: XX(0x66); XX(0x0F); XX(0xEA); break;
+ case Xsse_MIN8U: XX(0x66); XX(0x0F); XX(0xDA); break;
+ case Xsse_MULHI16U: XX(0x66); XX(0x0F); XX(0xE4); break;
+ case Xsse_MULHI16S: XX(0x66); XX(0x0F); XX(0xE5); break;
+ case Xsse_MUL16: XX(0x66); XX(0x0F); XX(0xD5); break;
+ case Xsse_SHL16: XX(0x66); XX(0x0F); XX(0xF1); break;
+ case Xsse_SHL32: XX(0x66); XX(0x0F); XX(0xF2); break;
+ case Xsse_SHL64: XX(0x66); XX(0x0F); XX(0xF3); break;
+ case Xsse_SAR16: XX(0x66); XX(0x0F); XX(0xE1); break;
+ case Xsse_SAR32: XX(0x66); XX(0x0F); XX(0xE2); break;
+ case Xsse_SHR16: XX(0x66); XX(0x0F); XX(0xD1); break;
+ case Xsse_SHR32: XX(0x66); XX(0x0F); XX(0xD2); break;
+ case Xsse_SHR64: XX(0x66); XX(0x0F); XX(0xD3); break;
+ case Xsse_SUB8: XX(0x66); XX(0x0F); XX(0xF8); break;
+ case Xsse_SUB16: XX(0x66); XX(0x0F); XX(0xF9); break;
+ case Xsse_SUB32: XX(0x66); XX(0x0F); XX(0xFA); break;
+ case Xsse_SUB64: XX(0x66); XX(0x0F); XX(0xFB); break;
+ case Xsse_QSUB8S: XX(0x66); XX(0x0F); XX(0xE8); break;
+ case Xsse_QSUB16S: XX(0x66); XX(0x0F); XX(0xE9); break;
+ case Xsse_QSUB8U: XX(0x66); XX(0x0F); XX(0xD8); break;
+ case Xsse_QSUB16U: XX(0x66); XX(0x0F); XX(0xD9); break;
+ case Xsse_UNPCKHB: XX(0x66); XX(0x0F); XX(0x68); break;
+ case Xsse_UNPCKHW: XX(0x66); XX(0x0F); XX(0x69); break;
+ case Xsse_UNPCKHD: XX(0x66); XX(0x0F); XX(0x6A); break;
+ case Xsse_UNPCKHQ: XX(0x66); XX(0x0F); XX(0x6D); break;
+ case Xsse_UNPCKLB: XX(0x66); XX(0x0F); XX(0x60); break;
+ case Xsse_UNPCKLW: XX(0x66); XX(0x0F); XX(0x61); break;
+ case Xsse_UNPCKLD: XX(0x66); XX(0x0F); XX(0x62); break;
+ case Xsse_UNPCKLQ: XX(0x66); XX(0x0F); XX(0x6C); break;
+ default: goto bad;
+ }
+ p = doAMode_R(p, fake(vregNo(i->Xin.SseReRg.dst)),
+ fake(vregNo(i->Xin.SseReRg.src)) );
+# undef XX
+ goto done;
+
+ case Xin_SseCMov:
+ /* jmp fwds if !condition */
+ *p++ = toUChar(0x70 + (i->Xin.SseCMov.cond ^ 1));
+ *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
+ ptmp = p;
+
+ /* movaps %src, %dst */
+ *p++ = 0x0F;
+ *p++ = 0x28;
+ p = doAMode_R(p, fake(vregNo(i->Xin.SseCMov.dst)),
+ fake(vregNo(i->Xin.SseCMov.src)) );
+
+ /* Fill in the jump offset. */
+ *(ptmp-1) = toUChar(p - ptmp);
+ goto done;
+
+ case Xin_SseShuf:
+ *p++ = 0x66;
+ *p++ = 0x0F;
+ *p++ = 0x70;
+ p = doAMode_R(p, fake(vregNo(i->Xin.SseShuf.dst)),
+ fake(vregNo(i->Xin.SseShuf.src)) );
+ *p++ = (UChar)(i->Xin.SseShuf.order);
+ goto done;
+
+ default:
+ goto bad;
+ }
+
+ bad:
+ ppX86Instr(i, mode64);
+ vpanic("emit_X86Instr");
+ /*NOTREACHED*/
+
+ done:
+ vassert(p - &buf[0] <= 32);
+ return p - &buf[0];
+
+# undef fake
+}
+
+/*---------------------------------------------------------------*/
+/*--- end host_x86_defs.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/host_x86_defs.h b/VEX/priv/host_x86_defs.h
new file mode 100644
index 0000000..fde700a
--- /dev/null
+++ b/VEX/priv/host_x86_defs.h
@@ -0,0 +1,694 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin host_x86_defs.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#ifndef __VEX_HOST_X86_DEFS_H
+#define __VEX_HOST_X86_DEFS_H
+
+
+/* --------- Registers. --------- */
+
+/* The usual HReg abstraction. There are 8 real int regs,
+ 6 real float regs, and 8 real vector regs.
+*/
+
+extern void ppHRegX86 ( HReg );
+
+extern HReg hregX86_EAX ( void );
+extern HReg hregX86_EBX ( void );
+extern HReg hregX86_ECX ( void );
+extern HReg hregX86_EDX ( void );
+extern HReg hregX86_ESP ( void );
+extern HReg hregX86_EBP ( void );
+extern HReg hregX86_ESI ( void );
+extern HReg hregX86_EDI ( void );
+
+extern HReg hregX86_FAKE0 ( void );
+extern HReg hregX86_FAKE1 ( void );
+extern HReg hregX86_FAKE2 ( void );
+extern HReg hregX86_FAKE3 ( void );
+extern HReg hregX86_FAKE4 ( void );
+extern HReg hregX86_FAKE5 ( void );
+
+extern HReg hregX86_XMM0 ( void );
+extern HReg hregX86_XMM1 ( void );
+extern HReg hregX86_XMM2 ( void );
+extern HReg hregX86_XMM3 ( void );
+extern HReg hregX86_XMM4 ( void );
+extern HReg hregX86_XMM5 ( void );
+extern HReg hregX86_XMM6 ( void );
+extern HReg hregX86_XMM7 ( void );
+
+
+/* --------- Condition codes, Intel encoding. --------- */
+
+typedef
+ enum {
+ Xcc_O = 0, /* overflow */
+ Xcc_NO = 1, /* no overflow */
+
+ Xcc_B = 2, /* below */
+ Xcc_NB = 3, /* not below */
+
+ Xcc_Z = 4, /* zero */
+ Xcc_NZ = 5, /* not zero */
+
+ Xcc_BE = 6, /* below or equal */
+ Xcc_NBE = 7, /* not below or equal */
+
+ Xcc_S = 8, /* negative */
+ Xcc_NS = 9, /* not negative */
+
+ Xcc_P = 10, /* parity even */
+ Xcc_NP = 11, /* not parity even */
+
+ Xcc_L = 12, /* jump less */
+ Xcc_NL = 13, /* not less */
+
+ Xcc_LE = 14, /* less or equal */
+ Xcc_NLE = 15, /* not less or equal */
+
+ Xcc_ALWAYS = 16 /* the usual hack */
+ }
+ X86CondCode;
+
+extern HChar* showX86CondCode ( X86CondCode );
+
+
+/* --------- Memory address expressions (amodes). --------- */
+
+typedef
+ enum {
+ Xam_IR, /* Immediate + Reg */
+ Xam_IRRS /* Immediate + Reg1 + (Reg2 << Shift) */
+ }
+ X86AModeTag;
+
+typedef
+ struct {
+ X86AModeTag tag;
+ union {
+ struct {
+ UInt imm;
+ HReg reg;
+ } IR;
+ struct {
+ UInt imm;
+ HReg base;
+ HReg index;
+ Int shift; /* 0, 1, 2 or 3 only */
+ } IRRS;
+ } Xam;
+ }
+ X86AMode;
+
+extern X86AMode* X86AMode_IR ( UInt, HReg );
+extern X86AMode* X86AMode_IRRS ( UInt, HReg, HReg, Int );
+
+extern X86AMode* dopyX86AMode ( X86AMode* );
+
+extern void ppX86AMode ( X86AMode* );
+
+
+/* --------- Operand, which can be reg, immediate or memory. --------- */
+
+typedef
+ enum {
+ Xrmi_Imm,
+ Xrmi_Reg,
+ Xrmi_Mem
+ }
+ X86RMITag;
+
+typedef
+ struct {
+ X86RMITag tag;
+ union {
+ struct {
+ UInt imm32;
+ } Imm;
+ struct {
+ HReg reg;
+ } Reg;
+ struct {
+ X86AMode* am;
+ } Mem;
+ }
+ Xrmi;
+ }
+ X86RMI;
+
+extern X86RMI* X86RMI_Imm ( UInt );
+extern X86RMI* X86RMI_Reg ( HReg );
+extern X86RMI* X86RMI_Mem ( X86AMode* );
+
+extern void ppX86RMI ( X86RMI* );
+
+
+/* --------- Operand, which can be reg or immediate only. --------- */
+
+typedef
+ enum {
+ Xri_Imm,
+ Xri_Reg
+ }
+ X86RITag;
+
+typedef
+ struct {
+ X86RITag tag;
+ union {
+ struct {
+ UInt imm32;
+ } Imm;
+ struct {
+ HReg reg;
+ } Reg;
+ }
+ Xri;
+ }
+ X86RI;
+
+extern X86RI* X86RI_Imm ( UInt );
+extern X86RI* X86RI_Reg ( HReg );
+
+extern void ppX86RI ( X86RI* );
+
+
+/* --------- Operand, which can be reg or memory only. --------- */
+
+typedef
+ enum {
+ Xrm_Reg,
+ Xrm_Mem
+ }
+ X86RMTag;
+
+typedef
+ struct {
+ X86RMTag tag;
+ union {
+ struct {
+ HReg reg;
+ } Reg;
+ struct {
+ X86AMode* am;
+ } Mem;
+ }
+ Xrm;
+ }
+ X86RM;
+
+extern X86RM* X86RM_Reg ( HReg );
+extern X86RM* X86RM_Mem ( X86AMode* );
+
+extern void ppX86RM ( X86RM* );
+
+
+/* --------- Instructions. --------- */
+
+/* --------- */
+typedef
+ enum {
+ Xun_NEG,
+ Xun_NOT
+ }
+ X86UnaryOp;
+
+extern HChar* showX86UnaryOp ( X86UnaryOp );
+
+
+/* --------- */
+typedef
+ enum {
+ Xalu_INVALID,
+ Xalu_MOV,
+ Xalu_CMP,
+ Xalu_ADD, Xalu_SUB, Xalu_ADC, Xalu_SBB,
+ Xalu_AND, Xalu_OR, Xalu_XOR,
+ Xalu_MUL
+ }
+ X86AluOp;
+
+extern HChar* showX86AluOp ( X86AluOp );
+
+
+/* --------- */
+typedef
+ enum {
+ Xsh_INVALID,
+ Xsh_SHL, Xsh_SHR, Xsh_SAR
+ }
+ X86ShiftOp;
+
+extern HChar* showX86ShiftOp ( X86ShiftOp );
+
+
+/* --------- */
+typedef
+ enum {
+ Xfp_INVALID,
+ /* Binary */
+ Xfp_ADD, Xfp_SUB, Xfp_MUL, Xfp_DIV,
+ Xfp_SCALE, Xfp_ATAN, Xfp_YL2X, Xfp_YL2XP1, Xfp_PREM, Xfp_PREM1,
+ /* Unary */
+ Xfp_SQRT, Xfp_ABS, Xfp_NEG, Xfp_MOV, Xfp_SIN, Xfp_COS, Xfp_TAN,
+ Xfp_ROUND, Xfp_2XM1
+ }
+ X86FpOp;
+
+extern HChar* showX86FpOp ( X86FpOp );
+
+
+/* --------- */
+typedef
+ enum {
+ Xsse_INVALID,
+ /* mov */
+ Xsse_MOV,
+ /* Floating point binary */
+ Xsse_ADDF, Xsse_SUBF, Xsse_MULF, Xsse_DIVF,
+ Xsse_MAXF, Xsse_MINF,
+ Xsse_CMPEQF, Xsse_CMPLTF, Xsse_CMPLEF, Xsse_CMPUNF,
+ /* Floating point unary */
+ Xsse_RCPF, Xsse_RSQRTF, Xsse_SQRTF,
+ /* Bitwise */
+ Xsse_AND, Xsse_OR, Xsse_XOR, Xsse_ANDN,
+ /* Integer binary */
+ Xsse_ADD8, Xsse_ADD16, Xsse_ADD32, Xsse_ADD64,
+ Xsse_QADD8U, Xsse_QADD16U,
+ Xsse_QADD8S, Xsse_QADD16S,
+ Xsse_SUB8, Xsse_SUB16, Xsse_SUB32, Xsse_SUB64,
+ Xsse_QSUB8U, Xsse_QSUB16U,
+ Xsse_QSUB8S, Xsse_QSUB16S,
+ Xsse_MUL16,
+ Xsse_MULHI16U,
+ Xsse_MULHI16S,
+ Xsse_AVG8U, Xsse_AVG16U,
+ Xsse_MAX16S,
+ Xsse_MAX8U,
+ Xsse_MIN16S,
+ Xsse_MIN8U,
+ Xsse_CMPEQ8, Xsse_CMPEQ16, Xsse_CMPEQ32,
+ Xsse_CMPGT8S, Xsse_CMPGT16S, Xsse_CMPGT32S,
+ Xsse_SHL16, Xsse_SHL32, Xsse_SHL64,
+ Xsse_SHR16, Xsse_SHR32, Xsse_SHR64,
+ Xsse_SAR16, Xsse_SAR32,
+ Xsse_PACKSSD, Xsse_PACKSSW, Xsse_PACKUSW,
+ Xsse_UNPCKHB, Xsse_UNPCKHW, Xsse_UNPCKHD, Xsse_UNPCKHQ,
+ Xsse_UNPCKLB, Xsse_UNPCKLW, Xsse_UNPCKLD, Xsse_UNPCKLQ
+ }
+ X86SseOp;
+
+extern HChar* showX86SseOp ( X86SseOp );
+
+
+/* --------- */
+typedef
+ enum {
+ Xin_Alu32R, /* 32-bit mov/arith/logical, dst=REG */
+ Xin_Alu32M, /* 32-bit mov/arith/logical, dst=MEM */
+ Xin_Sh32, /* 32-bit shift/rotate, dst=REG */
+ Xin_Test32, /* 32-bit test of REG or MEM against imm32 (AND, set
+ flags, discard result) */
+ Xin_Unary32, /* 32-bit not and neg */
+ Xin_Lea32, /* 32-bit compute EA into a reg */
+ Xin_MulL, /* 32 x 32 -> 64 multiply */
+ Xin_Div, /* 64/32 -> (32,32) div and mod */
+ Xin_Sh3232, /* shldl or shrdl */
+ Xin_Push, /* push (32-bit?) value on stack */
+ Xin_Call, /* call to address in register */
+ Xin_Goto, /* conditional/unconditional jmp to dst */
+ Xin_CMov32, /* conditional move */
+ Xin_LoadEX, /* mov{s,z}{b,w}l from mem to reg */
+ Xin_Store, /* store 16/8 bit value in memory */
+ Xin_Set32, /* convert condition code to 32-bit value */
+ Xin_Bsfr32, /* 32-bit bsf/bsr */
+ Xin_MFence, /* mem fence (not just sse2, but sse0 and 1 too) */
+ Xin_ACAS, /* 8/16/32-bit lock;cmpxchg */
+ Xin_DACAS, /* lock;cmpxchg8b (doubleword ACAS, 2 x 32-bit only) */
+
+ Xin_FpUnary, /* FP fake unary op */
+ Xin_FpBinary, /* FP fake binary op */
+ Xin_FpLdSt, /* FP fake load/store */
+ Xin_FpLdStI, /* FP fake load/store, converting to/from Int */
+ Xin_Fp64to32, /* FP round IEEE754 double to IEEE754 single */
+ Xin_FpCMov, /* FP fake floating point conditional move */
+ Xin_FpLdCW, /* fldcw */
+ Xin_FpStSW_AX, /* fstsw %ax */
+ Xin_FpCmp, /* FP compare, generating a C320 value into int reg */
+
+ Xin_SseConst, /* Generate restricted SSE literal */
+ Xin_SseLdSt, /* SSE load/store, no alignment constraints */
+ Xin_SseLdzLO, /* SSE load low 32/64 bits, zero remainder of reg */
+ Xin_Sse32Fx4, /* SSE binary, 32Fx4 */
+ Xin_Sse32FLo, /* SSE binary, 32F in lowest lane only */
+ Xin_Sse64Fx2, /* SSE binary, 64Fx2 */
+ Xin_Sse64FLo, /* SSE binary, 64F in lowest lane only */
+ Xin_SseReRg, /* SSE binary general reg-reg, Re, Rg */
+ Xin_SseCMov, /* SSE conditional move */
+ Xin_SseShuf /* SSE2 shuffle (pshufd) */
+ }
+ X86InstrTag;
+
+/* Destinations are on the RIGHT (second operand) */
+
+typedef
+ struct {
+ X86InstrTag tag;
+ union {
+ struct {
+ X86AluOp op;
+ X86RMI* src;
+ HReg dst;
+ } Alu32R;
+ struct {
+ X86AluOp op;
+ X86RI* src;
+ X86AMode* dst;
+ } Alu32M;
+ struct {
+ X86ShiftOp op;
+ UInt src; /* shift amount, or 0 means %cl */
+ HReg dst;
+ } Sh32;
+ struct {
+ UInt imm32;
+ X86RM* dst; /* not written, only read */
+ } Test32;
+ /* Not and Neg */
+ struct {
+ X86UnaryOp op;
+ HReg dst;
+ } Unary32;
+ /* 32-bit compute EA into a reg */
+ struct {
+ X86AMode* am;
+ HReg dst;
+ } Lea32;
+ /* EDX:EAX = EAX *s/u r/m32 */
+ struct {
+ Bool syned;
+ X86RM* src;
+ } MulL;
+ /* x86 div/idiv instruction. Modifies EDX and EAX and reads src. */
+ struct {
+ Bool syned;
+ X86RM* src;
+ } Div;
+ /* shld/shrd. op may only be Xsh_SHL or Xsh_SHR */
+ struct {
+ X86ShiftOp op;
+ UInt amt; /* shift amount, or 0 means %cl */
+ HReg src;
+ HReg dst;
+ } Sh3232;
+ struct {
+ X86RMI* src;
+ } Push;
+ /* Pseudo-insn. Call target (an absolute address), on given
+ condition (which could be Xcc_ALWAYS). */
+ struct {
+ X86CondCode cond;
+ Addr32 target;
+ Int regparms; /* 0 .. 3 */
+ } Call;
+ /* Pseudo-insn. Goto dst, on given condition (which could be
+ Xcc_ALWAYS). */
+ struct {
+ IRJumpKind jk;
+ X86CondCode cond;
+ X86RI* dst;
+ } Goto;
+ /* Mov src to dst on the given condition, which may not
+ be the bogus Xcc_ALWAYS. */
+ struct {
+ X86CondCode cond;
+ X86RM* src;
+ HReg dst;
+ } CMov32;
+ /* Sign/Zero extending loads. Dst size is always 32 bits. */
+ struct {
+ UChar szSmall;
+ Bool syned;
+ X86AMode* src;
+ HReg dst;
+ } LoadEX;
+ /* 16/8 bit stores, which are troublesome (particularly
+ 8-bit) */
+ struct {
+ UChar sz; /* only 1 or 2 */
+ HReg src;
+ X86AMode* dst;
+ } Store;
+ /* Convert a x86 condition code to a 32-bit value (0 or 1). */
+ struct {
+ X86CondCode cond;
+ HReg dst;
+ } Set32;
+ /* 32-bit bsf or bsr. */
+ struct {
+ Bool isFwds;
+ HReg src;
+ HReg dst;
+ } Bsfr32;
+ /* Mem fence (not just sse2, but sse0 and 1 too). In short,
+ an insn which flushes all preceding loads and stores as
+ much as possible before continuing. On SSE2 we emit a
+ real "mfence", on SSE1 "sfence ; lock addl $0,0(%esp)" and
+ on SSE0 "lock addl $0,0(%esp)". This insn therefore
+ carries the host's hwcaps so the assembler knows what to
+ emit. */
+ struct {
+ UInt hwcaps;
+ } MFence;
+ /* "lock;cmpxchg": mem address in .addr,
+ expected value in %eax, new value in %ebx */
+ struct {
+ X86AMode* addr;
+ UChar sz; /* 1, 2 or 4 */
+ } ACAS;
+ /* "lock;cmpxchg8b": mem address in .addr, expected value in
+ %edx:%eax, new value in %ecx:%ebx */
+ struct {
+ X86AMode* addr;
+ } DACAS;
+
+ /* X86 Floating point (fake 3-operand, "flat reg file" insns) */
+ struct {
+ X86FpOp op;
+ HReg src;
+ HReg dst;
+ } FpUnary;
+ struct {
+ X86FpOp op;
+ HReg srcL;
+ HReg srcR;
+ HReg dst;
+ } FpBinary;
+ struct {
+ Bool isLoad;
+ UChar sz; /* only 4 (IEEE single) or 8 (IEEE double) */
+ HReg reg;
+ X86AMode* addr;
+ } FpLdSt;
+ /* Move 64-bit float to/from memory, converting to/from
+ signed int on the way. Note the conversions will observe
+ the host FPU rounding mode currently in force. */
+ struct {
+ Bool isLoad;
+ UChar sz; /* only 2, 4 or 8 */
+ HReg reg;
+ X86AMode* addr;
+ } FpLdStI;
+ /* By observing the current FPU rounding mode, round (etc)
+ src into dst given that dst should be interpreted as an
+ IEEE754 32-bit (float) type. */
+ struct {
+ HReg src;
+ HReg dst;
+ } Fp64to32;
+ /* Mov src to dst on the given condition, which may not
+ be the bogus Xcc_ALWAYS. */
+ struct {
+ X86CondCode cond;
+ HReg src;
+ HReg dst;
+ } FpCMov;
+ /* Load the FPU's 16-bit control word (fldcw) */
+ struct {
+ X86AMode* addr;
+ }
+ FpLdCW;
+ /* fstsw %ax */
+ struct {
+ /* no fields */
+ }
+ FpStSW_AX;
+ /* Do a compare, generating the C320 bits into the dst. */
+ struct {
+ HReg srcL;
+ HReg srcR;
+ HReg dst;
+ } FpCmp;
+
+ /* Simplistic SSE[123] */
+ struct {
+ UShort con;
+ HReg dst;
+ } SseConst;
+ struct {
+ Bool isLoad;
+ HReg reg;
+ X86AMode* addr;
+ } SseLdSt;
+ struct {
+ UChar sz; /* 4 or 8 only */
+ HReg reg;
+ X86AMode* addr;
+ } SseLdzLO;
+ struct {
+ X86SseOp op;
+ HReg src;
+ HReg dst;
+ } Sse32Fx4;
+ struct {
+ X86SseOp op;
+ HReg src;
+ HReg dst;
+ } Sse32FLo;
+ struct {
+ X86SseOp op;
+ HReg src;
+ HReg dst;
+ } Sse64Fx2;
+ struct {
+ X86SseOp op;
+ HReg src;
+ HReg dst;
+ } Sse64FLo;
+ struct {
+ X86SseOp op;
+ HReg src;
+ HReg dst;
+ } SseReRg;
+ /* Mov src to dst on the given condition, which may not
+ be the bogus Xcc_ALWAYS. */
+ struct {
+ X86CondCode cond;
+ HReg src;
+ HReg dst;
+ } SseCMov;
+ struct {
+ Int order; /* 0 <= order <= 0xFF */
+ HReg src;
+ HReg dst;
+ } SseShuf;
+
+ } Xin;
+ }
+ X86Instr;
+
+extern X86Instr* X86Instr_Alu32R ( X86AluOp, X86RMI*, HReg );
+extern X86Instr* X86Instr_Alu32M ( X86AluOp, X86RI*, X86AMode* );
+extern X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst );
+extern X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst );
+
+extern X86Instr* X86Instr_Sh32 ( X86ShiftOp, UInt, HReg );
+extern X86Instr* X86Instr_Test32 ( UInt imm32, X86RM* dst );
+extern X86Instr* X86Instr_MulL ( Bool syned, X86RM* );
+extern X86Instr* X86Instr_Div ( Bool syned, X86RM* );
+extern X86Instr* X86Instr_Sh3232 ( X86ShiftOp, UInt amt, HReg src, HReg dst );
+extern X86Instr* X86Instr_Push ( X86RMI* );
+extern X86Instr* X86Instr_Call ( X86CondCode, Addr32, Int );
+extern X86Instr* X86Instr_Goto ( IRJumpKind, X86CondCode cond, X86RI* dst );
+extern X86Instr* X86Instr_CMov32 ( X86CondCode, X86RM* src, HReg dst );
+extern X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned,
+ X86AMode* src, HReg dst );
+extern X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst );
+extern X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst );
+extern X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst );
+extern X86Instr* X86Instr_MFence ( UInt hwcaps );
+extern X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz );
+extern X86Instr* X86Instr_DACAS ( X86AMode* addr );
+
+extern X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst );
+extern X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst );
+extern X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* );
+extern X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz, HReg reg, X86AMode* );
+extern X86Instr* X86Instr_Fp64to32 ( HReg src, HReg dst );
+extern X86Instr* X86Instr_FpCMov ( X86CondCode, HReg src, HReg dst );
+extern X86Instr* X86Instr_FpLdCW ( X86AMode* );
+extern X86Instr* X86Instr_FpStSW_AX ( void );
+extern X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst );
+
+extern X86Instr* X86Instr_SseConst ( UShort con, HReg dst );
+extern X86Instr* X86Instr_SseLdSt ( Bool isLoad, HReg, X86AMode* );
+extern X86Instr* X86Instr_SseLdzLO ( Int sz, HReg, X86AMode* );
+extern X86Instr* X86Instr_Sse32Fx4 ( X86SseOp, HReg, HReg );
+extern X86Instr* X86Instr_Sse32FLo ( X86SseOp, HReg, HReg );
+extern X86Instr* X86Instr_Sse64Fx2 ( X86SseOp, HReg, HReg );
+extern X86Instr* X86Instr_Sse64FLo ( X86SseOp, HReg, HReg );
+extern X86Instr* X86Instr_SseReRg ( X86SseOp, HReg, HReg );
+extern X86Instr* X86Instr_SseCMov ( X86CondCode, HReg src, HReg dst );
+extern X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst );
+
+
+extern void ppX86Instr ( X86Instr*, Bool );
+
+/* Some functions that insulate the register allocator from details
+ of the underlying instruction set. */
+extern void getRegUsage_X86Instr ( HRegUsage*, X86Instr*, Bool );
+extern void mapRegs_X86Instr ( HRegRemap*, X86Instr*, Bool );
+extern Bool isMove_X86Instr ( X86Instr*, HReg*, HReg* );
+extern Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr*,
+ Bool, void* dispatch );
+
+extern void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
+ HReg rreg, Int offset, Bool );
+extern void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
+ HReg rreg, Int offset, Bool );
+
+extern X86Instr* directReload_X86 ( X86Instr* i,
+ HReg vreg, Short spill_off );
+extern void getAllocableRegs_X86 ( Int*, HReg** );
+extern HInstrArray* iselSB_X86 ( IRSB*, VexArch,
+ VexArchInfo*,
+ VexAbiInfo* );
+
+#endif /* ndef __VEX_HOST_X86_DEFS_H */
+
+/*---------------------------------------------------------------*/
+/*--- end host_x86_defs.h ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/host_x86_isel.c b/VEX/priv/host_x86_isel.c
new file mode 100644
index 0000000..fc5cf05
--- /dev/null
+++ b/VEX/priv/host_x86_isel.c
@@ -0,0 +1,4079 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin host_x86_isel.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+
+#include "ir_match.h"
+#include "main_util.h"
+#include "main_globals.h"
+#include "host_generic_regs.h"
+#include "host_generic_simd64.h"
+#include "host_x86_defs.h"
+
+/* TODO 21 Apr 2005:
+
+ -- (Really an assembler issue) don't emit CMov32 as a cmov
+ insn, since that's expensive on P4 and conditional branch
+ is cheaper if (as we expect) the condition is highly predictable
+
+ -- preserve xmm registers across function calls (by declaring them
+ as trashed by call insns)
+
+ -- preserve x87 ST stack discipline across function calls. Sigh.
+
+ -- Check doHelperCall: if a call is conditional, we cannot safely
+ compute any regparm args directly to registers. Hence, the
+ fast-regparm marshalling should be restricted to unconditional
+ calls only.
+*/
+
+/*---------------------------------------------------------*/
+/*--- x87 control word stuff ---*/
+/*---------------------------------------------------------*/
+
+/* Vex-generated code expects to run with the FPU set as follows: all
+ exceptions masked, round-to-nearest, precision = 53 bits. This
+ corresponds to a FPU control word value of 0x027F.
+
+ Similarly the SSE control word (%mxcsr) should be 0x1F80.
+
+ %fpucw and %mxcsr should have these values on entry to
+ Vex-generated code, and should those values should be
+ unchanged at exit.
+*/
+
+#define DEFAULT_FPUCW 0x027F
+
+/* debugging only, do not use */
+/* define DEFAULT_FPUCW 0x037F */
+
+
+/*---------------------------------------------------------*/
+/*--- misc helpers ---*/
+/*---------------------------------------------------------*/
+
+/* These are duplicated in guest-x86/toIR.c */
+static IRExpr* unop ( IROp op, IRExpr* a )
+{
+ return IRExpr_Unop(op, a);
+}
+
+static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
+{
+ return IRExpr_Binop(op, a1, a2);
+}
+
+static IRExpr* bind ( Int binder )
+{
+ return IRExpr_Binder(binder);
+}
+
+static Bool isZeroU8 ( IRExpr* e )
+{
+ return e->tag == Iex_Const
+ && e->Iex.Const.con->tag == Ico_U8
+ && e->Iex.Const.con->Ico.U8 == 0;
+}
+
+static Bool isZeroU32 ( IRExpr* e )
+{
+ return e->tag == Iex_Const
+ && e->Iex.Const.con->tag == Ico_U32
+ && e->Iex.Const.con->Ico.U32 == 0;
+}
+
+static Bool isZeroU64 ( IRExpr* e )
+{
+ return e->tag == Iex_Const
+ && e->Iex.Const.con->tag == Ico_U64
+ && e->Iex.Const.con->Ico.U64 == 0ULL;
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISelEnv ---*/
+/*---------------------------------------------------------*/
+
+/* This carries around:
+
+ - A mapping from IRTemp to IRType, giving the type of any IRTemp we
+ might encounter. This is computed before insn selection starts,
+ and does not change.
+
+ - A mapping from IRTemp to HReg. This tells the insn selector
+ which virtual register(s) are associated with each IRTemp
+ temporary. This is computed before insn selection starts, and
+ does not change. We expect this mapping to map precisely the
+ same set of IRTemps as the type mapping does.
+
+ - vregmap holds the primary register for the IRTemp.
+ - vregmapHI is only used for 64-bit integer-typed
+ IRTemps. It holds the identity of a second
+ 32-bit virtual HReg, which holds the high half
+ of the value.
+
+ - The code array, that is, the insns selected so far.
+
+ - A counter, for generating new virtual registers.
+
+ - The host subarchitecture we are selecting insns for.
+ This is set at the start and does not change.
+
+ Note, this is all host-independent. */
+
+typedef
+ struct {
+ IRTypeEnv* type_env;
+
+ HReg* vregmap;
+ HReg* vregmapHI;
+ Int n_vregmap;
+
+ HInstrArray* code;
+
+ Int vreg_ctr;
+
+ UInt hwcaps;
+ }
+ ISelEnv;
+
+
+static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
+{
+ vassert(tmp >= 0);
+ vassert(tmp < env->n_vregmap);
+ return env->vregmap[tmp];
+}
+
+static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
+{
+ vassert(tmp >= 0);
+ vassert(tmp < env->n_vregmap);
+ vassert(env->vregmapHI[tmp] != INVALID_HREG);
+ *vrLO = env->vregmap[tmp];
+ *vrHI = env->vregmapHI[tmp];
+}
+
+static void addInstr ( ISelEnv* env, X86Instr* instr )
+{
+ addHInstr(env->code, instr);
+ if (vex_traceflags & VEX_TRACE_VCODE) {
+ ppX86Instr(instr, False);
+ vex_printf("\n");
+ }
+}
+
+static HReg newVRegI ( ISelEnv* env )
+{
+ HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
+ env->vreg_ctr++;
+ return reg;
+}
+
+static HReg newVRegF ( ISelEnv* env )
+{
+ HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
+ env->vreg_ctr++;
+ return reg;
+}
+
+static HReg newVRegV ( ISelEnv* env )
+{
+ HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
+ env->vreg_ctr++;
+ return reg;
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Forward declarations ---*/
+/*---------------------------------------------------------*/
+
+/* These are organised as iselXXX and iselXXX_wrk pairs. The
+ iselXXX_wrk do the real work, but are not to be called directly.
+ For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
+ checks that all returned registers are virtual. You should not
+ call the _wrk version directly.
+*/
+static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
+static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e );
+
+static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
+static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e );
+
+static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
+static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e );
+
+static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
+static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
+
+static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
+static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e );
+
+static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
+ ISelEnv* env, IRExpr* e );
+static void iselInt64Expr ( HReg* rHi, HReg* rLo,
+ ISelEnv* env, IRExpr* e );
+
+static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
+static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
+
+static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
+static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
+
+static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
+static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
+
+static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
+static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Misc helpers ---*/
+/*---------------------------------------------------------*/
+
+/* Make a int reg-reg move. */
+
+static X86Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
+{
+ vassert(hregClass(src) == HRcInt32);
+ vassert(hregClass(dst) == HRcInt32);
+ return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst);
+}
+
+
+/* Make a vector reg-reg move. */
+
+static X86Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
+{
+ vassert(hregClass(src) == HRcVec128);
+ vassert(hregClass(dst) == HRcVec128);
+ return X86Instr_SseReRg(Xsse_MOV, src, dst);
+}
+
+/* Advance/retreat %esp by n. */
+
+static void add_to_esp ( ISelEnv* env, Int n )
+{
+ vassert(n > 0 && n < 256 && (n%4) == 0);
+ addInstr(env,
+ X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(n), hregX86_ESP()));
+}
+
+static void sub_from_esp ( ISelEnv* env, Int n )
+{
+ vassert(n > 0 && n < 256 && (n%4) == 0);
+ addInstr(env,
+ X86Instr_Alu32R(Xalu_SUB, X86RMI_Imm(n), hregX86_ESP()));
+}
+
+
+/* Given an amode, return one which references 4 bytes further
+ along. */
+
+static X86AMode* advance4 ( X86AMode* am )
+{
+ X86AMode* am4 = dopyX86AMode(am);
+ switch (am4->tag) {
+ case Xam_IRRS:
+ am4->Xam.IRRS.imm += 4; break;
+ case Xam_IR:
+ am4->Xam.IR.imm += 4; break;
+ default:
+ vpanic("advance4(x86,host)");
+ }
+ return am4;
+}
+
+
+/* Push an arg onto the host stack, in preparation for a call to a
+ helper function of some kind. Returns the number of 32-bit words
+ pushed. */
+
+static Int pushArg ( ISelEnv* env, IRExpr* arg )
+{
+ IRType arg_ty = typeOfIRExpr(env->type_env, arg);
+ if (arg_ty == Ity_I32) {
+ addInstr(env, X86Instr_Push(iselIntExpr_RMI(env, arg)));
+ return 1;
+ } else
+ if (arg_ty == Ity_I64) {
+ HReg rHi, rLo;
+ iselInt64Expr(&rHi, &rLo, env, arg);
+ addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
+ addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
+ return 2;
+ }
+ ppIRExpr(arg);
+ vpanic("pushArg(x86): can't handle arg of this type");
+}
+
+
+/* Complete the call to a helper function, by calling the
+ helper and clearing the args off the stack. */
+
+static
+void callHelperAndClearArgs ( ISelEnv* env, X86CondCode cc,
+ IRCallee* cee, Int n_arg_ws )
+{
+ /* Complication. Need to decide which reg to use as the fn address
+ pointer, in a way that doesn't trash regparm-passed
+ parameters. */
+ vassert(sizeof(void*) == 4);
+
+ addInstr(env, X86Instr_Call( cc, toUInt(Ptr_to_ULong(cee->addr)),
+ cee->regparms));
+ if (n_arg_ws > 0)
+ add_to_esp(env, 4*n_arg_ws);
+}
+
+
+/* Used only in doHelperCall. See big comment in doHelperCall re
+ handling of regparm args. This function figures out whether
+ evaluation of an expression might require use of a fixed register.
+ If in doubt return True (safe but suboptimal).
+*/
+static
+Bool mightRequireFixedRegs ( IRExpr* e )
+{
+ switch (e->tag) {
+ case Iex_RdTmp: case Iex_Const: case Iex_Get:
+ return False;
+ default:
+ return True;
+ }
+}
+
+
+/* Do a complete function call. guard is a Ity_Bit expression
+ indicating whether or not the call happens. If guard==NULL, the
+ call is unconditional. */
+
+static
+void doHelperCall ( ISelEnv* env,
+ Bool passBBP,
+ IRExpr* guard, IRCallee* cee, IRExpr** args )
+{
+ X86CondCode cc;
+ HReg argregs[3];
+ HReg tmpregs[3];
+ Bool danger;
+ Int not_done_yet, n_args, n_arg_ws, stack_limit,
+ i, argreg, argregX;
+
+ /* Marshal args for a call, do the call, and clear the stack.
+ Complexities to consider:
+
+ * if passBBP is True, %ebp (the baseblock pointer) is to be
+ passed as the first arg.
+
+ * If the callee claims regparmness of 1, 2 or 3, we must pass the
+ first 1, 2 or 3 args in registers (EAX, EDX, and ECX
+ respectively). To keep things relatively simple, only args of
+ type I32 may be passed as regparms -- just bomb out if anything
+ else turns up. Clearly this depends on the front ends not
+ trying to pass any other types as regparms.
+ */
+
+ /* 16 Nov 2004: the regparm handling is complicated by the
+ following problem.
+
+ Consider a call two a function with two regparm parameters:
+ f(e1,e2). We need to compute e1 into %eax and e2 into %edx.
+ Suppose code is first generated to compute e1 into %eax. Then,
+ code is generated to compute e2 into %edx. Unfortunately, if
+ the latter code sequence uses %eax, it will trash the value of
+ e1 computed by the former sequence. This could happen if (for
+ example) e2 itself involved a function call. In the code below,
+ args are evaluated right-to-left, not left-to-right, but the
+ principle and the problem are the same.
+
+ One solution is to compute all regparm-bound args into vregs
+ first, and once they are all done, move them to the relevant
+ real regs. This always gives correct code, but it also gives
+ a bunch of vreg-to-rreg moves which are usually redundant but
+ are hard for the register allocator to get rid of.
+
+ A compromise is to first examine all regparm'd argument
+ expressions. If they are all so simple that it is clear
+ they will be evaluated without use of any fixed registers,
+ use the old compute-directly-to-fixed-target scheme. If not,
+ be safe and use the via-vregs scheme.
+
+ Note this requires being able to examine an expression and
+ determine whether or not evaluation of it might use a fixed
+ register. That requires knowledge of how the rest of this
+ insn selector works. Currently just the following 3 are
+ regarded as safe -- hopefully they cover the majority of
+ arguments in practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
+ */
+ vassert(cee->regparms >= 0 && cee->regparms <= 3);
+
+ n_args = n_arg_ws = 0;
+ while (args[n_args]) n_args++;
+
+ not_done_yet = n_args;
+ if (passBBP)
+ not_done_yet++;
+
+ stack_limit = cee->regparms;
+ if (cee->regparms > 0 && passBBP) stack_limit--;
+
+ /* ------ BEGIN marshall all arguments ------ */
+
+ /* Push (R to L) the stack-passed args, [n_args-1 .. stack_limit] */
+ for (i = n_args-1; i >= stack_limit; i--) {
+ n_arg_ws += pushArg(env, args[i]);
+ not_done_yet--;
+ }
+
+ /* args [stack_limit-1 .. 0] and possibly %ebp are to be passed in
+ registers. */
+
+ if (cee->regparms > 0) {
+
+ /* ------ BEGIN deal with regparms ------ */
+
+ /* deal with regparms, not forgetting %ebp if needed. */
+ argregs[0] = hregX86_EAX();
+ argregs[1] = hregX86_EDX();
+ argregs[2] = hregX86_ECX();
+ tmpregs[0] = tmpregs[1] = tmpregs[2] = INVALID_HREG;
+
+ argreg = cee->regparms;
+
+ /* In keeping with big comment above, detect potential danger
+ and use the via-vregs scheme if needed. */
+ danger = False;
+ for (i = stack_limit-1; i >= 0; i--) {
+ if (mightRequireFixedRegs(args[i])) {
+ danger = True;
+ break;
+ }
+ }
+
+ if (danger) {
+
+ /* Move via temporaries */
+ argregX = argreg;
+ for (i = stack_limit-1; i >= 0; i--) {
+
+ if (0) {
+ vex_printf("x86 host: register param is complex: ");
+ ppIRExpr(args[i]);
+ vex_printf("\n");
+ }
+
+ argreg--;
+ vassert(argreg >= 0);
+ vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32);
+ tmpregs[argreg] = iselIntExpr_R(env, args[i]);
+ not_done_yet--;
+ }
+ for (i = stack_limit-1; i >= 0; i--) {
+ argregX--;
+ vassert(argregX >= 0);
+ addInstr( env, mk_iMOVsd_RR( tmpregs[argregX], argregs[argregX] ) );
+ }
+
+ } else {
+ /* It's safe to compute all regparm args directly into their
+ target registers. */
+ for (i = stack_limit-1; i >= 0; i--) {
+ argreg--;
+ vassert(argreg >= 0);
+ vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I32);
+ addInstr(env, X86Instr_Alu32R(Xalu_MOV,
+ iselIntExpr_RMI(env, args[i]),
+ argregs[argreg]));
+ not_done_yet--;
+ }
+
+ }
+
+ /* Not forgetting %ebp if needed. */
+ if (passBBP) {
+ vassert(argreg == 1);
+ addInstr(env, mk_iMOVsd_RR( hregX86_EBP(), argregs[0]));
+ not_done_yet--;
+ }
+
+ /* ------ END deal with regparms ------ */
+
+ } else {
+
+ /* No regparms. Heave %ebp on the stack if needed. */
+ if (passBBP) {
+ addInstr(env, X86Instr_Push(X86RMI_Reg(hregX86_EBP())));
+ n_arg_ws++;
+ not_done_yet--;
+ }
+
+ }
+
+ vassert(not_done_yet == 0);
+
+ /* ------ END marshall all arguments ------ */
+
+ /* Now we can compute the condition. We can't do it earlier
+ because the argument computations could trash the condition
+ codes. Be a bit clever to handle the common case where the
+ guard is 1:Bit. */
+ cc = Xcc_ALWAYS;
+ if (guard) {
+ if (guard->tag == Iex_Const
+ && guard->Iex.Const.con->tag == Ico_U1
+ && guard->Iex.Const.con->Ico.U1 == True) {
+ /* unconditional -- do nothing */
+ } else {
+ cc = iselCondCode( env, guard );
+ }
+ }
+
+ /* call the helper, and get the args off the stack afterwards. */
+ callHelperAndClearArgs( env, cc, cee, n_arg_ws );
+}
+
+
+/* Given a guest-state array descriptor, an index expression and a
+ bias, generate an X86AMode holding the relevant guest state
+ offset. */
+
+static
+X86AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
+ IRExpr* off, Int bias )
+{
+ HReg tmp, roff;
+ Int elemSz = sizeofIRType(descr->elemTy);
+ Int nElems = descr->nElems;
+ Int shift = 0;
+
+ /* throw out any cases not generated by an x86 front end. In
+ theory there might be a day where we need to handle them -- if
+ we ever run non-x86-guest on x86 host. */
+
+ if (nElems != 8)
+ vpanic("genGuestArrayOffset(x86 host)(1)");
+
+ switch (elemSz) {
+ case 1: shift = 0; break;
+ case 4: shift = 2; break;
+ case 8: shift = 3; break;
+ default: vpanic("genGuestArrayOffset(x86 host)(2)");
+ }
+
+ /* Compute off into a reg, %off. Then return:
+
+ movl %off, %tmp
+ addl $bias, %tmp (if bias != 0)
+ andl %tmp, 7
+ ... base(%ebp, %tmp, shift) ...
+ */
+ tmp = newVRegI(env);
+ roff = iselIntExpr_R(env, off);
+ addInstr(env, mk_iMOVsd_RR(roff, tmp));
+ if (bias != 0) {
+ addInstr(env,
+ X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(bias), tmp));
+ }
+ addInstr(env,
+ X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(7), tmp));
+ return
+ X86AMode_IRRS( descr->base, hregX86_EBP(), tmp, shift );
+}
+
+
+/* Mess with the FPU's rounding mode: set to the default rounding mode
+ (DEFAULT_FPUCW). */
+static
+void set_FPU_rounding_default ( ISelEnv* env )
+{
+ /* pushl $DEFAULT_FPUCW
+ fldcw 0(%esp)
+ addl $4, %esp
+ */
+ X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
+ addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW)));
+ addInstr(env, X86Instr_FpLdCW(zero_esp));
+ add_to_esp(env, 4);
+}
+
+
+/* Mess with the FPU's rounding mode: 'mode' is an I32-typed
+ expression denoting a value in the range 0 .. 3, indicating a round
+ mode encoded as per type IRRoundingMode. Set the x87 FPU to have
+ the same rounding.
+*/
+static
+void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
+{
+ HReg rrm = iselIntExpr_R(env, mode);
+ HReg rrm2 = newVRegI(env);
+ X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
+
+ /* movl %rrm, %rrm2
+ andl $3, %rrm2 -- shouldn't be needed; paranoia
+ shll $10, %rrm2
+ orl $DEFAULT_FPUCW, %rrm2
+ pushl %rrm2
+ fldcw 0(%esp)
+ addl $4, %esp
+ */
+ addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
+ addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2));
+ addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, rrm2));
+ addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2));
+ addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2)));
+ addInstr(env, X86Instr_FpLdCW(zero_esp));
+ add_to_esp(env, 4);
+}
+
+
+/* Generate !src into a new vector register, and be sure that the code
+ is SSE1 compatible. Amazing that Intel doesn't offer a less crappy
+ way to do this.
+*/
+static HReg do_sse_Not128 ( ISelEnv* env, HReg src )
+{
+ HReg dst = newVRegV(env);
+ /* Set dst to zero. If dst contains a NaN then all hell might
+ break loose after the comparison. So, first zero it. */
+ addInstr(env, X86Instr_SseReRg(Xsse_XOR, dst, dst));
+ /* And now make it all 1s ... */
+ addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, dst, dst));
+ /* Finally, xor 'src' into it. */
+ addInstr(env, X86Instr_SseReRg(Xsse_XOR, src, dst));
+ /* Doesn't that just totally suck? */
+ return dst;
+}
+
+
+/* Round an x87 FPU value to 53-bit-mantissa precision, to be used
+ after most non-simple FPU operations (simple = +, -, *, / and
+ sqrt).
+
+ This could be done a lot more efficiently if needed, by loading
+ zero and adding it to the value to be rounded (fldz ; faddp?).
+*/
+static void roundToF64 ( ISelEnv* env, HReg reg )
+{
+ X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
+ sub_from_esp(env, 8);
+ addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, reg, zero_esp));
+ addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, reg, zero_esp));
+ add_to_esp(env, 8);
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Integer expressions (32/16/8 bit) ---*/
+/*---------------------------------------------------------*/
+
+/* Select insns for an integer-typed expression, and add them to the
+ code list. Return a reg holding the result. This reg will be a
+ virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
+ want to modify it, ask for a new vreg, copy it in there, and modify
+ the copy. The register allocator will do its best to map both
+ vregs to the same real register, so the copies will often disappear
+ later in the game.
+
+ This should handle expressions of 32, 16 and 8-bit type. All
+ results are returned in a 32-bit register. For 16- and 8-bit
+ expressions, the upper 16/24 bits are arbitrary, so you should mask
+ or sign extend partial values if necessary.
+*/
+
+static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
+{
+ HReg r = iselIntExpr_R_wrk(env, e);
+ /* sanity checks ... */
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(r) == HRcInt32);
+ vassert(hregIsVirtual(r));
+ return r;
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
+{
+ MatchInfo mi;
+
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
+
+ switch (e->tag) {
+
+ /* --------- TEMP --------- */
+ case Iex_RdTmp: {
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ }
+
+ /* --------- LOAD --------- */
+ case Iex_Load: {
+ HReg dst = newVRegI(env);
+ X86AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
+
+ /* We can't handle big-endian loads, nor load-linked. */
+ if (e->Iex.Load.end != Iend_LE)
+ goto irreducible;
+
+ if (ty == Ity_I32) {
+ addInstr(env, X86Instr_Alu32R(Xalu_MOV,
+ X86RMI_Mem(amode), dst) );
+ return dst;
+ }
+ if (ty == Ity_I16) {
+ addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
+ return dst;
+ }
+ if (ty == Ity_I8) {
+ addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
+ return dst;
+ }
+ break;
+ }
+
+ /* --------- TERNARY OP --------- */
+ case Iex_Triop: {
+ /* C3210 flags following FPU partial remainder (fprem), both
+ IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
+ if (e->Iex.Triop.op == Iop_PRemC3210F64
+ || e->Iex.Triop.op == Iop_PRem1C3210F64) {
+ HReg junk = newVRegF(env);
+ HReg dst = newVRegI(env);
+ HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2);
+ HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3);
+ /* XXXROUNDINGFIXME */
+ /* set roundingmode here */
+ addInstr(env, X86Instr_FpBinary(
+ e->Iex.Binop.op==Iop_PRemC3210F64
+ ? Xfp_PREM : Xfp_PREM1,
+ srcL,srcR,junk
+ ));
+ /* The previous pseudo-insn will have left the FPU's C3210
+ flags set correctly. So bag them. */
+ addInstr(env, X86Instr_FpStSW_AX());
+ addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
+ addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
+ return dst;
+ }
+
+ break;
+ }
+
+ /* --------- BINARY OP --------- */
+ case Iex_Binop: {
+ X86AluOp aluOp;
+ X86ShiftOp shOp;
+
+ /* Pattern: Sub32(0,x) */
+ if (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1)) {
+ HReg dst = newVRegI(env);
+ HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ addInstr(env, mk_iMOVsd_RR(reg,dst));
+ addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
+ return dst;
+ }
+
+ /* Is it an addition or logical style op? */
+ switch (e->Iex.Binop.op) {
+ case Iop_Add8: case Iop_Add16: case Iop_Add32:
+ aluOp = Xalu_ADD; break;
+ case Iop_Sub8: case Iop_Sub16: case Iop_Sub32:
+ aluOp = Xalu_SUB; break;
+ case Iop_And8: case Iop_And16: case Iop_And32:
+ aluOp = Xalu_AND; break;
+ case Iop_Or8: case Iop_Or16: case Iop_Or32:
+ aluOp = Xalu_OR; break;
+ case Iop_Xor8: case Iop_Xor16: case Iop_Xor32:
+ aluOp = Xalu_XOR; break;
+ case Iop_Mul16: case Iop_Mul32:
+ aluOp = Xalu_MUL; break;
+ default:
+ aluOp = Xalu_INVALID; break;
+ }
+ /* For commutative ops we assume any literal
+ values are on the second operand. */
+ if (aluOp != Xalu_INVALID) {
+ HReg dst = newVRegI(env);
+ HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
+ addInstr(env, mk_iMOVsd_RR(reg,dst));
+ addInstr(env, X86Instr_Alu32R(aluOp, rmi, dst));
+ return dst;
+ }
+ /* Could do better here; forcing the first arg into a reg
+ isn't always clever.
+ -- t70 = Xor32(And32(Xor32(LDle:I32(Add32(t41,0xFFFFFFA0:I32)),
+ LDle:I32(Add32(t41,0xFFFFFFA4:I32))),LDle:I32(Add32(
+ t41,0xFFFFFFA8:I32))),LDle:I32(Add32(t41,0xFFFFFFA0:I32)))
+ movl 0xFFFFFFA0(%vr41),%vr107
+ movl 0xFFFFFFA4(%vr41),%vr108
+ movl %vr107,%vr106
+ xorl %vr108,%vr106
+ movl 0xFFFFFFA8(%vr41),%vr109
+ movl %vr106,%vr105
+ andl %vr109,%vr105
+ movl 0xFFFFFFA0(%vr41),%vr110
+ movl %vr105,%vr104
+ xorl %vr110,%vr104
+ movl %vr104,%vr70
+ */
+
+ /* Perhaps a shift op? */
+ switch (e->Iex.Binop.op) {
+ case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
+ shOp = Xsh_SHL; break;
+ case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
+ shOp = Xsh_SHR; break;
+ case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
+ shOp = Xsh_SAR; break;
+ default:
+ shOp = Xsh_INVALID; break;
+ }
+ if (shOp != Xsh_INVALID) {
+ HReg dst = newVRegI(env);
+
+ /* regL = the value to be shifted */
+ HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ addInstr(env, mk_iMOVsd_RR(regL,dst));
+
+ /* Do any necessary widening for 16/8 bit operands */
+ switch (e->Iex.Binop.op) {
+ case Iop_Shr8:
+ addInstr(env, X86Instr_Alu32R(
+ Xalu_AND, X86RMI_Imm(0xFF), dst));
+ break;
+ case Iop_Shr16:
+ addInstr(env, X86Instr_Alu32R(
+ Xalu_AND, X86RMI_Imm(0xFFFF), dst));
+ break;
+ case Iop_Sar8:
+ addInstr(env, X86Instr_Sh32(Xsh_SHL, 24, dst));
+ addInstr(env, X86Instr_Sh32(Xsh_SAR, 24, dst));
+ break;
+ case Iop_Sar16:
+ addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, dst));
+ addInstr(env, X86Instr_Sh32(Xsh_SAR, 16, dst));
+ break;
+ default: break;
+ }
+
+ /* Now consider the shift amount. If it's a literal, we
+ can do a much better job than the general case. */
+ if (e->Iex.Binop.arg2->tag == Iex_Const) {
+ /* assert that the IR is well-typed */
+ Int nshift;
+ vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+ nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ vassert(nshift >= 0);
+ if (nshift > 0)
+ /* Can't allow nshift==0 since that means %cl */
+ addInstr(env, X86Instr_Sh32( shOp, nshift, dst ));
+ } else {
+ /* General case; we have to force the amount into %cl. */
+ HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ addInstr(env, mk_iMOVsd_RR(regR,hregX86_ECX()));
+ addInstr(env, X86Instr_Sh32(shOp, 0/* %cl */, dst));
+ }
+ return dst;
+ }
+
+ /* Handle misc other ops. */
+
+ if (e->Iex.Binop.op == Iop_Max32U) {
+ HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ HReg dst = newVRegI(env);
+ HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ addInstr(env, mk_iMOVsd_RR(src1,dst));
+ addInstr(env, X86Instr_Alu32R(Xalu_CMP, X86RMI_Reg(src2), dst));
+ addInstr(env, X86Instr_CMov32(Xcc_B, X86RM_Reg(src2), dst));
+ return dst;
+ }
+
+ if (e->Iex.Binop.op == Iop_8HLto16) {
+ HReg hi8 = newVRegI(env);
+ HReg lo8 = newVRegI(env);
+ HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
+ addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
+ addInstr(env, X86Instr_Sh32(Xsh_SHL, 8, hi8));
+ addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFF), lo8));
+ addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo8), hi8));
+ return hi8;
+ }
+
+ if (e->Iex.Binop.op == Iop_16HLto32) {
+ HReg hi16 = newVRegI(env);
+ HReg lo16 = newVRegI(env);
+ HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
+ addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
+ addInstr(env, X86Instr_Sh32(Xsh_SHL, 16, hi16));
+ addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0xFFFF), lo16));
+ addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(lo16), hi16));
+ return hi16;
+ }
+
+ if (e->Iex.Binop.op == Iop_MullS16 || e->Iex.Binop.op == Iop_MullS8
+ || e->Iex.Binop.op == Iop_MullU16 || e->Iex.Binop.op == Iop_MullU8) {
+ HReg a16 = newVRegI(env);
+ HReg b16 = newVRegI(env);
+ HReg a16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ HReg b16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ Int shift = (e->Iex.Binop.op == Iop_MullS8
+ || e->Iex.Binop.op == Iop_MullU8)
+ ? 24 : 16;
+ X86ShiftOp shr_op = (e->Iex.Binop.op == Iop_MullS8
+ || e->Iex.Binop.op == Iop_MullS16)
+ ? Xsh_SAR : Xsh_SHR;
+
+ addInstr(env, mk_iMOVsd_RR(a16s, a16));
+ addInstr(env, mk_iMOVsd_RR(b16s, b16));
+ addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, a16));
+ addInstr(env, X86Instr_Sh32(Xsh_SHL, shift, b16));
+ addInstr(env, X86Instr_Sh32(shr_op, shift, a16));
+ addInstr(env, X86Instr_Sh32(shr_op, shift, b16));
+ addInstr(env, X86Instr_Alu32R(Xalu_MUL, X86RMI_Reg(a16), b16));
+ return b16;
+ }
+
+ if (e->Iex.Binop.op == Iop_CmpF64) {
+ HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
+ HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegI(env);
+ addInstr(env, X86Instr_FpCmp(fL,fR,dst));
+ /* shift this right 8 bits so as to conform to CmpF64
+ definition. */
+ addInstr(env, X86Instr_Sh32(Xsh_SHR, 8, dst));
+ return dst;
+ }
+
+ if (e->Iex.Binop.op == Iop_F64toI32S
+ || e->Iex.Binop.op == Iop_F64toI16S) {
+ Int sz = e->Iex.Binop.op == Iop_F64toI16S ? 2 : 4;
+ HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegI(env);
+
+ /* Used several times ... */
+ X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
+
+ /* rf now holds the value to be converted, and rrm holds the
+ rounding mode value, encoded as per the IRRoundingMode
+ enum. The first thing to do is set the FPU's rounding
+ mode accordingly. */
+
+ /* Create a space for the format conversion. */
+ /* subl $4, %esp */
+ sub_from_esp(env, 4);
+
+ /* Set host rounding mode */
+ set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+
+ /* gistw/l %rf, 0(%esp) */
+ addInstr(env, X86Instr_FpLdStI(False/*store*/,
+ toUChar(sz), rf, zero_esp));
+
+ if (sz == 2) {
+ /* movzwl 0(%esp), %dst */
+ addInstr(env, X86Instr_LoadEX(2,False,zero_esp,dst));
+ } else {
+ /* movl 0(%esp), %dst */
+ vassert(sz == 4);
+ addInstr(env, X86Instr_Alu32R(
+ Xalu_MOV, X86RMI_Mem(zero_esp), dst));
+ }
+
+ /* Restore default FPU rounding. */
+ set_FPU_rounding_default( env );
+
+ /* addl $4, %esp */
+ add_to_esp(env, 4);
+ return dst;
+ }
+
+ break;
+ }
+
+ /* --------- UNARY OP --------- */
+ case Iex_Unop: {
+
+ /* 1Uto8(32to1(expr32)) */
+ if (e->Iex.Unop.op == Iop_1Uto8) {
+ DECLARE_PATTERN(p_32to1_then_1Uto8);
+ DEFINE_PATTERN(p_32to1_then_1Uto8,
+ unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
+ if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
+ IRExpr* expr32 = mi.bindee[0];
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, expr32);
+ addInstr(env, mk_iMOVsd_RR(src,dst) );
+ addInstr(env, X86Instr_Alu32R(Xalu_AND,
+ X86RMI_Imm(1), dst));
+ return dst;
+ }
+ }
+
+ /* 8Uto32(LDle(expr32)) */
+ if (e->Iex.Unop.op == Iop_8Uto32) {
+ DECLARE_PATTERN(p_LDle8_then_8Uto32);
+ DEFINE_PATTERN(p_LDle8_then_8Uto32,
+ unop(Iop_8Uto32,
+ IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
+ if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
+ HReg dst = newVRegI(env);
+ X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
+ addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
+ return dst;
+ }
+ }
+
+ /* 8Sto32(LDle(expr32)) */
+ if (e->Iex.Unop.op == Iop_8Sto32) {
+ DECLARE_PATTERN(p_LDle8_then_8Sto32);
+ DEFINE_PATTERN(p_LDle8_then_8Sto32,
+ unop(Iop_8Sto32,
+ IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
+ if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
+ HReg dst = newVRegI(env);
+ X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
+ addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
+ return dst;
+ }
+ }
+
+ /* 16Uto32(LDle(expr32)) */
+ if (e->Iex.Unop.op == Iop_16Uto32) {
+ DECLARE_PATTERN(p_LDle16_then_16Uto32);
+ DEFINE_PATTERN(p_LDle16_then_16Uto32,
+ unop(Iop_16Uto32,
+ IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
+ if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
+ HReg dst = newVRegI(env);
+ X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
+ addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
+ return dst;
+ }
+ }
+
+ /* 8Uto32(GET:I8) */
+ if (e->Iex.Unop.op == Iop_8Uto32) {
+ if (e->Iex.Unop.arg->tag == Iex_Get) {
+ HReg dst;
+ X86AMode* amode;
+ vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
+ dst = newVRegI(env);
+ amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
+ hregX86_EBP());
+ addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
+ return dst;
+ }
+ }
+
+ /* 16to32(GET:I16) */
+ if (e->Iex.Unop.op == Iop_16Uto32) {
+ if (e->Iex.Unop.arg->tag == Iex_Get) {
+ HReg dst;
+ X86AMode* amode;
+ vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
+ dst = newVRegI(env);
+ amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
+ hregX86_EBP());
+ addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
+ return dst;
+ }
+ }
+
+ switch (e->Iex.Unop.op) {
+ case Iop_8Uto16:
+ case Iop_8Uto32:
+ case Iop_16Uto32: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
+ addInstr(env, mk_iMOVsd_RR(src,dst) );
+ addInstr(env, X86Instr_Alu32R(Xalu_AND,
+ X86RMI_Imm(mask), dst));
+ return dst;
+ }
+ case Iop_8Sto16:
+ case Iop_8Sto32:
+ case Iop_16Sto32: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ UInt amt = e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24;
+ addInstr(env, mk_iMOVsd_RR(src,dst) );
+ addInstr(env, X86Instr_Sh32(Xsh_SHL, amt, dst));
+ addInstr(env, X86Instr_Sh32(Xsh_SAR, amt, dst));
+ return dst;
+ }
+ case Iop_Not8:
+ case Iop_Not16:
+ case Iop_Not32: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, mk_iMOVsd_RR(src,dst) );
+ addInstr(env, X86Instr_Unary32(Xun_NOT,dst));
+ return dst;
+ }
+ case Iop_64HIto32: {
+ HReg rHi, rLo;
+ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
+ return rHi; /* and abandon rLo .. poor wee thing :-) */
+ }
+ case Iop_64to32: {
+ HReg rHi, rLo;
+ iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
+ return rLo; /* similar stupid comment to the above ... */
+ }
+ case Iop_16HIto8:
+ case Iop_32HIto16: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
+ addInstr(env, mk_iMOVsd_RR(src,dst) );
+ addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
+ return dst;
+ }
+ case Iop_1Uto32:
+ case Iop_1Uto8: {
+ HReg dst = newVRegI(env);
+ X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
+ addInstr(env, X86Instr_Set32(cond,dst));
+ return dst;
+ }
+ case Iop_1Sto8:
+ case Iop_1Sto16:
+ case Iop_1Sto32: {
+ /* could do better than this, but for now ... */
+ HReg dst = newVRegI(env);
+ X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
+ addInstr(env, X86Instr_Set32(cond,dst));
+ addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
+ addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
+ return dst;
+ }
+ case Iop_Ctz32: {
+ /* Count trailing zeroes, implemented by x86 'bsfl' */
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, X86Instr_Bsfr32(True,src,dst));
+ return dst;
+ }
+ case Iop_Clz32: {
+ /* Count leading zeroes. Do 'bsrl' to establish the index
+ of the highest set bit, and subtract that value from
+ 31. */
+ HReg tmp = newVRegI(env);
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, X86Instr_Bsfr32(False,src,tmp));
+ addInstr(env, X86Instr_Alu32R(Xalu_MOV,
+ X86RMI_Imm(31), dst));
+ addInstr(env, X86Instr_Alu32R(Xalu_SUB,
+ X86RMI_Reg(tmp), dst));
+ return dst;
+ }
+
+ case Iop_CmpwNEZ32: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, mk_iMOVsd_RR(src,dst));
+ addInstr(env, X86Instr_Unary32(Xun_NEG,dst));
+ addInstr(env, X86Instr_Alu32R(Xalu_OR,
+ X86RMI_Reg(src), dst));
+ addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
+ return dst;
+ }
+ case Iop_Left8:
+ case Iop_Left16:
+ case Iop_Left32: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, mk_iMOVsd_RR(src, dst));
+ addInstr(env, X86Instr_Unary32(Xun_NEG, dst));
+ addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(src), dst));
+ return dst;
+ }
+
+ case Iop_V128to32: {
+ HReg dst = newVRegI(env);
+ HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
+ X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
+ sub_from_esp(env, 16);
+ addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
+ addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
+ add_to_esp(env, 16);
+ return dst;
+ }
+
+ /* ReinterpF32asI32(e) */
+ /* Given an IEEE754 single, produce an I32 with the same bit
+ pattern. Keep stack 8-aligned even though only using 4
+ bytes. */
+ case Iop_ReinterpF32asI32: {
+ HReg rf = iselFltExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegI(env);
+ X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
+ /* paranoia */
+ set_FPU_rounding_default(env);
+ /* subl $8, %esp */
+ sub_from_esp(env, 8);
+ /* gstF %rf, 0(%esp) */
+ addInstr(env,
+ X86Instr_FpLdSt(False/*store*/, 4, rf, zero_esp));
+ /* movl 0(%esp), %dst */
+ addInstr(env,
+ X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), dst));
+ /* addl $8, %esp */
+ add_to_esp(env, 8);
+ return dst;
+ }
+
+ case Iop_16to8:
+ case Iop_32to8:
+ case Iop_32to16:
+ /* These are no-ops. */
+ return iselIntExpr_R(env, e->Iex.Unop.arg);
+
+ default:
+ break;
+ }
+ break;
+ }
+
+ /* --------- GET --------- */
+ case Iex_Get: {
+ if (ty == Ity_I32) {
+ HReg dst = newVRegI(env);
+ addInstr(env, X86Instr_Alu32R(
+ Xalu_MOV,
+ X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
+ hregX86_EBP())),
+ dst));
+ return dst;
+ }
+ if (ty == Ity_I8 || ty == Ity_I16) {
+ HReg dst = newVRegI(env);
+ addInstr(env, X86Instr_LoadEX(
+ toUChar(ty==Ity_I8 ? 1 : 2),
+ False,
+ X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
+ dst));
+ return dst;
+ }
+ break;
+ }
+
+ case Iex_GetI: {
+ X86AMode* am
+ = genGuestArrayOffset(
+ env, e->Iex.GetI.descr,
+ e->Iex.GetI.ix, e->Iex.GetI.bias );
+ HReg dst = newVRegI(env);
+ if (ty == Ity_I8) {
+ addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
+ return dst;
+ }
+ if (ty == Ity_I32) {
+ addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
+ return dst;
+ }
+ break;
+ }
+
+ /* --------- CCALL --------- */
+ case Iex_CCall: {
+ HReg dst = newVRegI(env);
+ vassert(ty == e->Iex.CCall.retty);
+
+ /* be very restrictive for now. Only 32/64-bit ints allowed
+ for args, and 32 bits for return type. */
+ if (e->Iex.CCall.retty != Ity_I32)
+ goto irreducible;
+
+ /* Marshal args, do the call, clear stack. */
+ doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
+
+ addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
+ return dst;
+ }
+
+ /* --------- LITERAL --------- */
+ /* 32/16/8-bit literals */
+ case Iex_Const: {
+ X86RMI* rmi = iselIntExpr_RMI ( env, e );
+ HReg r = newVRegI(env);
+ addInstr(env, X86Instr_Alu32R(Xalu_MOV, rmi, r));
+ return r;
+ }
+
+ /* --------- MULTIPLEX --------- */
+ case Iex_Mux0X: {
+ if ((ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
+ && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
+ X86RM* r8;
+ HReg rX = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
+ X86RM* r0 = iselIntExpr_RM(env, e->Iex.Mux0X.expr0);
+ HReg dst = newVRegI(env);
+ addInstr(env, mk_iMOVsd_RR(rX,dst));
+ r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
+ addInstr(env, X86Instr_Test32(0xFF, r8));
+ addInstr(env, X86Instr_CMov32(Xcc_Z,r0,dst));
+ return dst;
+ }
+ break;
+ }
+
+ default:
+ break;
+ } /* switch (e->tag) */
+
+ /* We get here if no pattern matched. */
+ irreducible:
+ ppIRExpr(e);
+ vpanic("iselIntExpr_R: cannot reduce tree");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Integer expression auxiliaries ---*/
+/*---------------------------------------------------------*/
+
+/* --------------------- AMODEs --------------------- */
+
+/* Return an AMode which computes the value of the specified
+ expression, possibly also adding insns to the code list as a
+ result. The expression may only be a 32-bit one.
+*/
+
+static Bool sane_AMode ( X86AMode* am )
+{
+ switch (am->tag) {
+ case Xam_IR:
+ return
+ toBool( hregClass(am->Xam.IR.reg) == HRcInt32
+ && (hregIsVirtual(am->Xam.IR.reg)
+ || am->Xam.IR.reg == hregX86_EBP()) );
+ case Xam_IRRS:
+ return
+ toBool( hregClass(am->Xam.IRRS.base) == HRcInt32
+ && hregIsVirtual(am->Xam.IRRS.base)
+ && hregClass(am->Xam.IRRS.index) == HRcInt32
+ && hregIsVirtual(am->Xam.IRRS.index) );
+ default:
+ vpanic("sane_AMode: unknown x86 amode tag");
+ }
+}
+
+static X86AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
+{
+ X86AMode* am = iselIntExpr_AMode_wrk(env, e);
+ vassert(sane_AMode(am));
+ return am;
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static X86AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_I32);
+
+ /* Add32( Add32(expr1, Shl32(expr2, simm)), imm32 ) */
+ if (e->tag == Iex_Binop
+ && e->Iex.Binop.op == Iop_Add32
+ && e->Iex.Binop.arg2->tag == Iex_Const
+ && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32
+ && e->Iex.Binop.arg1->tag == Iex_Binop
+ && e->Iex.Binop.arg1->Iex.Binop.op == Iop_Add32
+ && e->Iex.Binop.arg1->Iex.Binop.arg2->tag == Iex_Binop
+ && e->Iex.Binop.arg1->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
+ && e->Iex.Binop.arg1
+ ->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
+ && e->Iex.Binop.arg1
+ ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
+ UInt shift = e->Iex.Binop.arg1
+ ->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ UInt imm32 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
+ if (shift == 1 || shift == 2 || shift == 3) {
+ HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1->Iex.Binop.arg1);
+ HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg1
+ ->Iex.Binop.arg2->Iex.Binop.arg1 );
+ return X86AMode_IRRS(imm32, r1, r2, shift);
+ }
+ }
+
+ /* Add32(expr1, Shl32(expr2, imm)) */
+ if (e->tag == Iex_Binop
+ && e->Iex.Binop.op == Iop_Add32
+ && e->Iex.Binop.arg2->tag == Iex_Binop
+ && e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl32
+ && e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
+ && e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
+ UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
+ if (shift == 1 || shift == 2 || shift == 3) {
+ HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
+ return X86AMode_IRRS(0, r1, r2, shift);
+ }
+ }
+
+ /* Add32(expr,i) */
+ if (e->tag == Iex_Binop
+ && e->Iex.Binop.op == Iop_Add32
+ && e->Iex.Binop.arg2->tag == Iex_Const
+ && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
+ HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ return X86AMode_IR(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32, r1);
+ }
+
+ /* Doesn't match anything in particular. Generate it into
+ a register and use that. */
+ {
+ HReg r1 = iselIntExpr_R(env, e);
+ return X86AMode_IR(0, r1);
+ }
+}
+
+
+/* --------------------- RMIs --------------------- */
+
+/* Similarly, calculate an expression into an X86RMI operand. As with
+ iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
+
+static X86RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
+{
+ X86RMI* rmi = iselIntExpr_RMI_wrk(env, e);
+ /* sanity checks ... */
+ switch (rmi->tag) {
+ case Xrmi_Imm:
+ return rmi;
+ case Xrmi_Reg:
+ vassert(hregClass(rmi->Xrmi.Reg.reg) == HRcInt32);
+ vassert(hregIsVirtual(rmi->Xrmi.Reg.reg));
+ return rmi;
+ case Xrmi_Mem:
+ vassert(sane_AMode(rmi->Xrmi.Mem.am));
+ return rmi;
+ default:
+ vpanic("iselIntExpr_RMI: unknown x86 RMI tag");
+ }
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static X86RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
+
+ /* special case: immediate */
+ if (e->tag == Iex_Const) {
+ UInt u;
+ switch (e->Iex.Const.con->tag) {
+ case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
+ case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
+ case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
+ default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
+ }
+ return X86RMI_Imm(u);
+ }
+
+ /* special case: 32-bit GET */
+ if (e->tag == Iex_Get && ty == Ity_I32) {
+ return X86RMI_Mem(X86AMode_IR(e->Iex.Get.offset,
+ hregX86_EBP()));
+ }
+
+ /* special case: 32-bit load from memory */
+ if (e->tag == Iex_Load && ty == Ity_I32
+ && e->Iex.Load.end == Iend_LE) {
+ X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
+ return X86RMI_Mem(am);
+ }
+
+ /* default case: calculate into a register and return that */
+ {
+ HReg r = iselIntExpr_R ( env, e );
+ return X86RMI_Reg(r);
+ }
+}
+
+
+/* --------------------- RIs --------------------- */
+
+/* Calculate an expression into an X86RI operand. As with
+ iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
+
+static X86RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
+{
+ X86RI* ri = iselIntExpr_RI_wrk(env, e);
+ /* sanity checks ... */
+ switch (ri->tag) {
+ case Xri_Imm:
+ return ri;
+ case Xri_Reg:
+ vassert(hregClass(ri->Xri.Reg.reg) == HRcInt32);
+ vassert(hregIsVirtual(ri->Xri.Reg.reg));
+ return ri;
+ default:
+ vpanic("iselIntExpr_RI: unknown x86 RI tag");
+ }
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static X86RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
+
+ /* special case: immediate */
+ if (e->tag == Iex_Const) {
+ UInt u;
+ switch (e->Iex.Const.con->tag) {
+ case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
+ case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
+ case Ico_U8: u = 0xFF & (e->Iex.Const.con->Ico.U8); break;
+ default: vpanic("iselIntExpr_RMI.Iex_Const(x86h)");
+ }
+ return X86RI_Imm(u);
+ }
+
+ /* default case: calculate into a register and return that */
+ {
+ HReg r = iselIntExpr_R ( env, e );
+ return X86RI_Reg(r);
+ }
+}
+
+
+/* --------------------- RMs --------------------- */
+
+/* Similarly, calculate an expression into an X86RM operand. As with
+ iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
+
+static X86RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
+{
+ X86RM* rm = iselIntExpr_RM_wrk(env, e);
+ /* sanity checks ... */
+ switch (rm->tag) {
+ case Xrm_Reg:
+ vassert(hregClass(rm->Xrm.Reg.reg) == HRcInt32);
+ vassert(hregIsVirtual(rm->Xrm.Reg.reg));
+ return rm;
+ case Xrm_Mem:
+ vassert(sane_AMode(rm->Xrm.Mem.am));
+ return rm;
+ default:
+ vpanic("iselIntExpr_RM: unknown x86 RM tag");
+ }
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static X86RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
+
+ /* special case: 32-bit GET */
+ if (e->tag == Iex_Get && ty == Ity_I32) {
+ return X86RM_Mem(X86AMode_IR(e->Iex.Get.offset,
+ hregX86_EBP()));
+ }
+
+ /* special case: load from memory */
+
+ /* default case: calculate into a register and return that */
+ {
+ HReg r = iselIntExpr_R ( env, e );
+ return X86RM_Reg(r);
+ }
+}
+
+
+/* --------------------- CONDCODE --------------------- */
+
+/* Generate code to evaluated a bit-typed expression, returning the
+ condition code which would correspond when the expression would
+ notionally have returned 1. */
+
+static X86CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
+{
+ /* Uh, there's nothing we can sanity check here, unfortunately. */
+ return iselCondCode_wrk(env,e);
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static X86CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
+{
+ MatchInfo mi;
+
+ vassert(e);
+ vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
+
+ /* var */
+ if (e->tag == Iex_RdTmp) {
+ HReg r32 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ /* Test32 doesn't modify r32; so this is OK. */
+ addInstr(env, X86Instr_Test32(1,X86RM_Reg(r32)));
+ return Xcc_NZ;
+ }
+
+ /* Constant 1:Bit */
+ if (e->tag == Iex_Const) {
+ HReg r;
+ vassert(e->Iex.Const.con->tag == Ico_U1);
+ vassert(e->Iex.Const.con->Ico.U1 == True
+ || e->Iex.Const.con->Ico.U1 == False);
+ r = newVRegI(env);
+ addInstr(env, X86Instr_Alu32R(Xalu_MOV,X86RMI_Imm(0),r));
+ addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(r),r));
+ return e->Iex.Const.con->Ico.U1 ? Xcc_Z : Xcc_NZ;
+ }
+
+ /* Not1(e) */
+ if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
+ /* Generate code for the arg, and negate the test condition */
+ return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
+ }
+
+ /* --- patterns rooted at: 32to1 --- */
+
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.op == Iop_32to1) {
+ X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
+ addInstr(env, X86Instr_Test32(1,rm));
+ return Xcc_NZ;
+ }
+
+ /* --- patterns rooted at: CmpNEZ8 --- */
+
+ /* CmpNEZ8(x) */
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.op == Iop_CmpNEZ8) {
+ X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
+ addInstr(env, X86Instr_Test32(0xFF,rm));
+ return Xcc_NZ;
+ }
+
+ /* --- patterns rooted at: CmpNEZ16 --- */
+
+ /* CmpNEZ16(x) */
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.op == Iop_CmpNEZ16) {
+ X86RM* rm = iselIntExpr_RM(env, e->Iex.Unop.arg);
+ addInstr(env, X86Instr_Test32(0xFFFF,rm));
+ return Xcc_NZ;
+ }
+
+ /* --- patterns rooted at: CmpNEZ32 --- */
+
+ /* CmpNEZ32(And32(x,y)) */
+ {
+ DECLARE_PATTERN(p_CmpNEZ32_And32);
+ DEFINE_PATTERN(p_CmpNEZ32_And32,
+ unop(Iop_CmpNEZ32, binop(Iop_And32, bind(0), bind(1))));
+ if (matchIRExpr(&mi, p_CmpNEZ32_And32, e)) {
+ HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
+ X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
+ HReg tmp = newVRegI(env);
+ addInstr(env, mk_iMOVsd_RR(r0, tmp));
+ addInstr(env, X86Instr_Alu32R(Xalu_AND,rmi1,tmp));
+ return Xcc_NZ;
+ }
+ }
+
+ /* CmpNEZ32(Or32(x,y)) */
+ {
+ DECLARE_PATTERN(p_CmpNEZ32_Or32);
+ DEFINE_PATTERN(p_CmpNEZ32_Or32,
+ unop(Iop_CmpNEZ32, binop(Iop_Or32, bind(0), bind(1))));
+ if (matchIRExpr(&mi, p_CmpNEZ32_Or32, e)) {
+ HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
+ X86RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
+ HReg tmp = newVRegI(env);
+ addInstr(env, mk_iMOVsd_RR(r0, tmp));
+ addInstr(env, X86Instr_Alu32R(Xalu_OR,rmi1,tmp));
+ return Xcc_NZ;
+ }
+ }
+
+ /* CmpNEZ32(GET(..):I32) */
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.op == Iop_CmpNEZ32
+ && e->Iex.Unop.arg->tag == Iex_Get) {
+ X86AMode* am = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
+ hregX86_EBP());
+ addInstr(env, X86Instr_Alu32M(Xalu_CMP, X86RI_Imm(0), am));
+ return Xcc_NZ;
+ }
+
+ /* CmpNEZ32(x) */
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.op == Iop_CmpNEZ32) {
+ HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
+ X86RMI* rmi2 = X86RMI_Imm(0);
+ addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
+ return Xcc_NZ;
+ }
+
+ /* --- patterns rooted at: CmpNEZ64 --- */
+
+ /* CmpNEZ64(Or64(x,y)) */
+ {
+ DECLARE_PATTERN(p_CmpNEZ64_Or64);
+ DEFINE_PATTERN(p_CmpNEZ64_Or64,
+ unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
+ if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
+ HReg hi1, lo1, hi2, lo2;
+ HReg tmp = newVRegI(env);
+ iselInt64Expr( &hi1, &lo1, env, mi.bindee[0] );
+ addInstr(env, mk_iMOVsd_RR(hi1, tmp));
+ addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo1),tmp));
+ iselInt64Expr( &hi2, &lo2, env, mi.bindee[1] );
+ addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(hi2),tmp));
+ addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo2),tmp));
+ return Xcc_NZ;
+ }
+ }
+
+ /* CmpNEZ64(x) */
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.op == Iop_CmpNEZ64) {
+ HReg hi, lo;
+ HReg tmp = newVRegI(env);
+ iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
+ addInstr(env, mk_iMOVsd_RR(hi, tmp));
+ addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(lo), tmp));
+ return Xcc_NZ;
+ }
+
+ /* --- patterns rooted at: Cmp{EQ,NE}{8,16} --- */
+
+ /* CmpEQ8 / CmpNE8 */
+ if (e->tag == Iex_Binop
+ && (e->Iex.Binop.op == Iop_CmpEQ8
+ || e->Iex.Binop.op == Iop_CmpNE8
+ || e->Iex.Binop.op == Iop_CasCmpEQ8
+ || e->Iex.Binop.op == Iop_CasCmpNE8)) {
+ if (isZeroU8(e->Iex.Binop.arg2)) {
+ HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r1)));
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
+ case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
+ default: vpanic("iselCondCode(x86): CmpXX8(expr,0:I8)");
+ }
+ } else {
+ HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
+ HReg r = newVRegI(env);
+ addInstr(env, mk_iMOVsd_RR(r1,r));
+ addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
+ addInstr(env, X86Instr_Test32(0xFF,X86RM_Reg(r)));
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Xcc_Z;
+ case Iop_CmpNE8: case Iop_CasCmpNE8: return Xcc_NZ;
+ default: vpanic("iselCondCode(x86): CmpXX8(expr,expr)");
+ }
+ }
+ }
+
+ /* CmpEQ16 / CmpNE16 */
+ if (e->tag == Iex_Binop
+ && (e->Iex.Binop.op == Iop_CmpEQ16
+ || e->Iex.Binop.op == Iop_CmpNE16
+ || e->Iex.Binop.op == Iop_CasCmpEQ16
+ || e->Iex.Binop.op == Iop_CasCmpNE16)) {
+ HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
+ HReg r = newVRegI(env);
+ addInstr(env, mk_iMOVsd_RR(r1,r));
+ addInstr(env, X86Instr_Alu32R(Xalu_XOR,rmi2,r));
+ addInstr(env, X86Instr_Test32(0xFFFF,X86RM_Reg(r)));
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Xcc_Z;
+ case Iop_CmpNE16: case Iop_CasCmpNE16: return Xcc_NZ;
+ default: vpanic("iselCondCode(x86): CmpXX16");
+ }
+ }
+
+ /* Cmp*32*(x,y) */
+ if (e->tag == Iex_Binop
+ && (e->Iex.Binop.op == Iop_CmpEQ32
+ || e->Iex.Binop.op == Iop_CmpNE32
+ || e->Iex.Binop.op == Iop_CmpLT32S
+ || e->Iex.Binop.op == Iop_CmpLT32U
+ || e->Iex.Binop.op == Iop_CmpLE32S
+ || e->Iex.Binop.op == Iop_CmpLE32U
+ || e->Iex.Binop.op == Iop_CasCmpEQ32
+ || e->Iex.Binop.op == Iop_CasCmpNE32)) {
+ HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ X86RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
+ addInstr(env, X86Instr_Alu32R(Xalu_CMP,rmi2,r1));
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Xcc_Z;
+ case Iop_CmpNE32: case Iop_CasCmpNE32: return Xcc_NZ;
+ case Iop_CmpLT32S: return Xcc_L;
+ case Iop_CmpLT32U: return Xcc_B;
+ case Iop_CmpLE32S: return Xcc_LE;
+ case Iop_CmpLE32U: return Xcc_BE;
+ default: vpanic("iselCondCode(x86): CmpXX32");
+ }
+ }
+
+ /* CmpNE64 */
+ if (e->tag == Iex_Binop
+ && (e->Iex.Binop.op == Iop_CmpNE64
+ || e->Iex.Binop.op == Iop_CmpEQ64)) {
+ HReg hi1, hi2, lo1, lo2;
+ HReg tHi = newVRegI(env);
+ HReg tLo = newVRegI(env);
+ iselInt64Expr( &hi1, &lo1, env, e->Iex.Binop.arg1 );
+ iselInt64Expr( &hi2, &lo2, env, e->Iex.Binop.arg2 );
+ addInstr(env, mk_iMOVsd_RR(hi1, tHi));
+ addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(hi2), tHi));
+ addInstr(env, mk_iMOVsd_RR(lo1, tLo));
+ addInstr(env, X86Instr_Alu32R(Xalu_XOR,X86RMI_Reg(lo2), tLo));
+ addInstr(env, X86Instr_Alu32R(Xalu_OR,X86RMI_Reg(tHi), tLo));
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpNE64: return Xcc_NZ;
+ case Iop_CmpEQ64: return Xcc_Z;
+ default: vpanic("iselCondCode(x86): CmpXX64");
+ }
+ }
+
+ ppIRExpr(e);
+ vpanic("iselCondCode");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Integer expressions (64 bit) ---*/
+/*---------------------------------------------------------*/
+
+/* Compute a 64-bit value into a register pair, which is returned as
+ the first two parameters. As with iselIntExpr_R, these may be
+ either real or virtual regs; in any case they must not be changed
+ by subsequent code emitted by the caller. */
+
+static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
+{
+ iselInt64Expr_wrk(rHi, rLo, env, e);
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(*rHi) == HRcInt32);
+ vassert(hregIsVirtual(*rHi));
+ vassert(hregClass(*rLo) == HRcInt32);
+ vassert(hregIsVirtual(*rLo));
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
+{
+ MatchInfo mi;
+ HWord fn = 0; /* helper fn for most SIMD64 stuff */
+ vassert(e);
+ vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
+
+ /* 64-bit literal */
+ if (e->tag == Iex_Const) {
+ ULong w64 = e->Iex.Const.con->Ico.U64;
+ UInt wHi = toUInt(w64 >> 32);
+ UInt wLo = toUInt(w64);
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ vassert(e->Iex.Const.con->tag == Ico_U64);
+ if (wLo == wHi) {
+ /* Save a precious Int register in this special case. */
+ addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
+ *rHi = tLo;
+ *rLo = tLo;
+ } else {
+ addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wHi), tHi));
+ addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(wLo), tLo));
+ *rHi = tHi;
+ *rLo = tLo;
+ }
+ return;
+ }
+
+ /* read 64-bit IRTemp */
+ if (e->tag == Iex_RdTmp) {
+ lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
+ return;
+ }
+
+ /* 64-bit load */
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
+ HReg tLo, tHi;
+ X86AMode *am0, *am4;
+ vassert(e->Iex.Load.ty == Ity_I64);
+ tLo = newVRegI(env);
+ tHi = newVRegI(env);
+ am0 = iselIntExpr_AMode(env, e->Iex.Load.addr);
+ am4 = advance4(am0);
+ addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am0), tLo ));
+ addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* 64-bit GET */
+ if (e->tag == Iex_Get) {
+ X86AMode* am = X86AMode_IR(e->Iex.Get.offset, hregX86_EBP());
+ X86AMode* am4 = advance4(am);
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
+ addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* 64-bit GETI */
+ if (e->tag == Iex_GetI) {
+ X86AMode* am
+ = genGuestArrayOffset( env, e->Iex.GetI.descr,
+ e->Iex.GetI.ix, e->Iex.GetI.bias );
+ X86AMode* am4 = advance4(am);
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am), tLo ));
+ addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(am4), tHi ));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* 64-bit Mux0X: Mux0X(g, expr, 0:I64) */
+ if (e->tag == Iex_Mux0X && isZeroU64(e->Iex.Mux0X.exprX)) {
+ X86RM* r8;
+ HReg e0Lo, e0Hi;
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
+ iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0);
+ r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
+ addInstr(env, mk_iMOVsd_RR( e0Hi, tHi ) );
+ addInstr(env, mk_iMOVsd_RR( e0Lo, tLo ) );
+ addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
+ addInstr(env, X86Instr_Test32(0xFF, r8));
+ addInstr(env, X86Instr_CMov32(Xcc_NZ,X86RM_Mem(zero_esp),tHi));
+ addInstr(env, X86Instr_CMov32(Xcc_NZ,X86RM_Mem(zero_esp),tLo));
+ add_to_esp(env, 4);
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+ /* 64-bit Mux0X: Mux0X(g, 0:I64, expr) */
+ if (e->tag == Iex_Mux0X && isZeroU64(e->Iex.Mux0X.expr0)) {
+ X86RM* r8;
+ HReg e0Lo, e0Hi;
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
+ iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.exprX);
+ r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
+ addInstr(env, mk_iMOVsd_RR( e0Hi, tHi ) );
+ addInstr(env, mk_iMOVsd_RR( e0Lo, tLo ) );
+ addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
+ addInstr(env, X86Instr_Test32(0xFF, r8));
+ addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Mem(zero_esp),tHi));
+ addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Mem(zero_esp),tLo));
+ add_to_esp(env, 4);
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* 64-bit Mux0X: Mux0X(g, expr, expr) */
+ if (e->tag == Iex_Mux0X) {
+ X86RM* r8;
+ HReg e0Lo, e0Hi, eXLo, eXHi;
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ iselInt64Expr(&e0Hi, &e0Lo, env, e->Iex.Mux0X.expr0);
+ iselInt64Expr(&eXHi, &eXLo, env, e->Iex.Mux0X.exprX);
+ addInstr(env, mk_iMOVsd_RR(eXHi, tHi));
+ addInstr(env, mk_iMOVsd_RR(eXLo, tLo));
+ r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
+ addInstr(env, X86Instr_Test32(0xFF, r8));
+ /* This assumes the first cmov32 doesn't trash the condition
+ codes, so they are still available for the second cmov32 */
+ addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Hi),tHi));
+ addInstr(env, X86Instr_CMov32(Xcc_Z,X86RM_Reg(e0Lo),tLo));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* --------- BINARY ops --------- */
+ if (e->tag == Iex_Binop) {
+ switch (e->Iex.Binop.op) {
+ /* 32 x 32 -> 64 multiply */
+ case Iop_MullU32:
+ case Iop_MullS32: {
+ /* get one operand into %eax, and the other into a R/M.
+ Need to make an educated guess about which is better in
+ which. */
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ Bool syned = toBool(e->Iex.Binop.op == Iop_MullS32);
+ X86RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
+ HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ addInstr(env, mk_iMOVsd_RR(rRight, hregX86_EAX()));
+ addInstr(env, X86Instr_MulL(syned, rmLeft));
+ /* Result is now in EDX:EAX. Tell the caller. */
+ addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
+ addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* 64 x 32 -> (32(rem),32(div)) division */
+ case Iop_DivModU64to32:
+ case Iop_DivModS64to32: {
+ /* Get the 64-bit operand into edx:eax, and the other into
+ any old R/M. */
+ HReg sHi, sLo;
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
+ X86RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
+ iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
+ addInstr(env, mk_iMOVsd_RR(sHi, hregX86_EDX()));
+ addInstr(env, mk_iMOVsd_RR(sLo, hregX86_EAX()));
+ addInstr(env, X86Instr_Div(syned, rmRight));
+ addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
+ addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* Or64/And64/Xor64 */
+ case Iop_Or64:
+ case Iop_And64:
+ case Iop_Xor64: {
+ HReg xLo, xHi, yLo, yHi;
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ X86AluOp op = e->Iex.Binop.op==Iop_Or64 ? Xalu_OR
+ : e->Iex.Binop.op==Iop_And64 ? Xalu_AND
+ : Xalu_XOR;
+ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
+ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
+ addInstr(env, mk_iMOVsd_RR(xHi, tHi));
+ addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yHi), tHi));
+ addInstr(env, mk_iMOVsd_RR(xLo, tLo));
+ addInstr(env, X86Instr_Alu32R(op, X86RMI_Reg(yLo), tLo));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* Add64/Sub64 */
+ case Iop_Add64:
+ if (e->Iex.Binop.arg2->tag == Iex_Const) {
+ /* special case Add64(e, const) */
+ ULong w64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
+ UInt wHi = toUInt(w64 >> 32);
+ UInt wLo = toUInt(w64);
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ HReg xLo, xHi;
+ vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64);
+ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
+ addInstr(env, mk_iMOVsd_RR(xHi, tHi));
+ addInstr(env, mk_iMOVsd_RR(xLo, tLo));
+ addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Imm(wLo), tLo));
+ addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Imm(wHi), tHi));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+ /* else fall through to the generic case */
+ case Iop_Sub64: {
+ HReg xLo, xHi, yLo, yHi;
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
+ addInstr(env, mk_iMOVsd_RR(xHi, tHi));
+ addInstr(env, mk_iMOVsd_RR(xLo, tLo));
+ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
+ if (e->Iex.Binop.op==Iop_Add64) {
+ addInstr(env, X86Instr_Alu32R(Xalu_ADD, X86RMI_Reg(yLo), tLo));
+ addInstr(env, X86Instr_Alu32R(Xalu_ADC, X86RMI_Reg(yHi), tHi));
+ } else {
+ addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
+ addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
+ }
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* 32HLto64(e1,e2) */
+ case Iop_32HLto64:
+ *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
+ *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ return;
+
+ /* 64-bit shifts */
+ case Iop_Shl64: {
+ /* We use the same ingenious scheme as gcc. Put the value
+ to be shifted into %hi:%lo, and the shift amount into
+ %cl. Then (dsts on right, a la ATT syntax):
+
+ shldl %cl, %lo, %hi -- make %hi be right for the
+ -- shift amt %cl % 32
+ shll %cl, %lo -- make %lo be right for the
+ -- shift amt %cl % 32
+
+ Now, if (shift amount % 64) is in the range 32 .. 63,
+ we have to do a fixup, which puts the result low half
+ into the result high half, and zeroes the low half:
+
+ testl $32, %ecx
+
+ cmovnz %lo, %hi
+ movl $0, %tmp -- sigh; need yet another reg
+ cmovnz %tmp, %lo
+ */
+ HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
+ tLo = newVRegI(env);
+ tHi = newVRegI(env);
+ tTemp = newVRegI(env);
+ rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
+ addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
+ addInstr(env, mk_iMOVsd_RR(sHi, tHi));
+ addInstr(env, mk_iMOVsd_RR(sLo, tLo));
+ /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
+ and those regs are legitimately modifiable. */
+ addInstr(env, X86Instr_Sh3232(Xsh_SHL, 0/*%cl*/, tLo, tHi));
+ addInstr(env, X86Instr_Sh32(Xsh_SHL, 0/*%cl*/, tLo));
+ addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
+ addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tLo), tHi));
+ addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
+ addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tLo));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ case Iop_Shr64: {
+ /* We use the same ingenious scheme as gcc. Put the value
+ to be shifted into %hi:%lo, and the shift amount into
+ %cl. Then:
+
+ shrdl %cl, %hi, %lo -- make %lo be right for the
+ -- shift amt %cl % 32
+ shrl %cl, %hi -- make %hi be right for the
+ -- shift amt %cl % 32
+
+ Now, if (shift amount % 64) is in the range 32 .. 63,
+ we have to do a fixup, which puts the result high half
+ into the result low half, and zeroes the high half:
+
+ testl $32, %ecx
+
+ cmovnz %hi, %lo
+ movl $0, %tmp -- sigh; need yet another reg
+ cmovnz %tmp, %hi
+ */
+ HReg rAmt, sHi, sLo, tHi, tLo, tTemp;
+ tLo = newVRegI(env);
+ tHi = newVRegI(env);
+ tTemp = newVRegI(env);
+ rAmt = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ iselInt64Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
+ addInstr(env, mk_iMOVsd_RR(rAmt, hregX86_ECX()));
+ addInstr(env, mk_iMOVsd_RR(sHi, tHi));
+ addInstr(env, mk_iMOVsd_RR(sLo, tLo));
+ /* Ok. Now shift amt is in %ecx, and value is in tHi/tLo
+ and those regs are legitimately modifiable. */
+ addInstr(env, X86Instr_Sh3232(Xsh_SHR, 0/*%cl*/, tHi, tLo));
+ addInstr(env, X86Instr_Sh32(Xsh_SHR, 0/*%cl*/, tHi));
+ addInstr(env, X86Instr_Test32(32, X86RM_Reg(hregX86_ECX())));
+ addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tHi), tLo));
+ addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tTemp));
+ addInstr(env, X86Instr_CMov32(Xcc_NZ, X86RM_Reg(tTemp), tHi));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* F64 -> I64 */
+ /* Sigh, this is an almost exact copy of the F64 -> I32/I16
+ case. Unfortunately I see no easy way to avoid the
+ duplication. */
+ case Iop_F64toI64S: {
+ HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+
+ /* Used several times ... */
+ /* Careful ... this sharing is only safe because
+ zero_esp/four_esp do not hold any registers which the
+ register allocator could attempt to swizzle later. */
+ X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
+ X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
+
+ /* rf now holds the value to be converted, and rrm holds
+ the rounding mode value, encoded as per the
+ IRRoundingMode enum. The first thing to do is set the
+ FPU's rounding mode accordingly. */
+
+ /* Create a space for the format conversion. */
+ /* subl $8, %esp */
+ sub_from_esp(env, 8);
+
+ /* Set host rounding mode */
+ set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+
+ /* gistll %rf, 0(%esp) */
+ addInstr(env, X86Instr_FpLdStI(False/*store*/, 8, rf, zero_esp));
+
+ /* movl 0(%esp), %dstLo */
+ /* movl 4(%esp), %dstHi */
+ addInstr(env, X86Instr_Alu32R(
+ Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
+ addInstr(env, X86Instr_Alu32R(
+ Xalu_MOV, X86RMI_Mem(four_esp), tHi));
+
+ /* Restore default FPU rounding. */
+ set_FPU_rounding_default( env );
+
+ /* addl $8, %esp */
+ add_to_esp(env, 8);
+
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ case Iop_Add8x8:
+ fn = (HWord)h_generic_calc_Add8x8; goto binnish;
+ case Iop_Add16x4:
+ fn = (HWord)h_generic_calc_Add16x4; goto binnish;
+ case Iop_Add32x2:
+ fn = (HWord)h_generic_calc_Add32x2; goto binnish;
+
+ case Iop_Avg8Ux8:
+ fn = (HWord)h_generic_calc_Avg8Ux8; goto binnish;
+ case Iop_Avg16Ux4:
+ fn = (HWord)h_generic_calc_Avg16Ux4; goto binnish;
+
+ case Iop_CmpEQ8x8:
+ fn = (HWord)h_generic_calc_CmpEQ8x8; goto binnish;
+ case Iop_CmpEQ16x4:
+ fn = (HWord)h_generic_calc_CmpEQ16x4; goto binnish;
+ case Iop_CmpEQ32x2:
+ fn = (HWord)h_generic_calc_CmpEQ32x2; goto binnish;
+
+ case Iop_CmpGT8Sx8:
+ fn = (HWord)h_generic_calc_CmpGT8Sx8; goto binnish;
+ case Iop_CmpGT16Sx4:
+ fn = (HWord)h_generic_calc_CmpGT16Sx4; goto binnish;
+ case Iop_CmpGT32Sx2:
+ fn = (HWord)h_generic_calc_CmpGT32Sx2; goto binnish;
+
+ case Iop_InterleaveHI8x8:
+ fn = (HWord)h_generic_calc_InterleaveHI8x8; goto binnish;
+ case Iop_InterleaveLO8x8:
+ fn = (HWord)h_generic_calc_InterleaveLO8x8; goto binnish;
+ case Iop_InterleaveHI16x4:
+ fn = (HWord)h_generic_calc_InterleaveHI16x4; goto binnish;
+ case Iop_InterleaveLO16x4:
+ fn = (HWord)h_generic_calc_InterleaveLO16x4; goto binnish;
+ case Iop_InterleaveHI32x2:
+ fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish;
+ case Iop_InterleaveLO32x2:
+ fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish;
+ case Iop_CatOddLanes16x4:
+ fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish;
+ case Iop_CatEvenLanes16x4:
+ fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish;
+ case Iop_Perm8x8:
+ fn = (HWord)h_generic_calc_Perm8x8; goto binnish;
+
+ case Iop_Max8Ux8:
+ fn = (HWord)h_generic_calc_Max8Ux8; goto binnish;
+ case Iop_Max16Sx4:
+ fn = (HWord)h_generic_calc_Max16Sx4; goto binnish;
+ case Iop_Min8Ux8:
+ fn = (HWord)h_generic_calc_Min8Ux8; goto binnish;
+ case Iop_Min16Sx4:
+ fn = (HWord)h_generic_calc_Min16Sx4; goto binnish;
+
+ case Iop_Mul16x4:
+ fn = (HWord)h_generic_calc_Mul16x4; goto binnish;
+ case Iop_Mul32x2:
+ fn = (HWord)h_generic_calc_Mul32x2; goto binnish;
+ case Iop_MulHi16Sx4:
+ fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish;
+ case Iop_MulHi16Ux4:
+ fn = (HWord)h_generic_calc_MulHi16Ux4; goto binnish;
+
+ case Iop_QAdd8Sx8:
+ fn = (HWord)h_generic_calc_QAdd8Sx8; goto binnish;
+ case Iop_QAdd16Sx4:
+ fn = (HWord)h_generic_calc_QAdd16Sx4; goto binnish;
+ case Iop_QAdd8Ux8:
+ fn = (HWord)h_generic_calc_QAdd8Ux8; goto binnish;
+ case Iop_QAdd16Ux4:
+ fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish;
+
+ case Iop_QNarrow32Sx2:
+ fn = (HWord)h_generic_calc_QNarrow32Sx2; goto binnish;
+ case Iop_QNarrow16Sx4:
+ fn = (HWord)h_generic_calc_QNarrow16Sx4; goto binnish;
+ case Iop_QNarrow16Ux4:
+ fn = (HWord)h_generic_calc_QNarrow16Ux4; goto binnish;
+
+ case Iop_QSub8Sx8:
+ fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
+ case Iop_QSub16Sx4:
+ fn = (HWord)h_generic_calc_QSub16Sx4; goto binnish;
+ case Iop_QSub8Ux8:
+ fn = (HWord)h_generic_calc_QSub8Ux8; goto binnish;
+ case Iop_QSub16Ux4:
+ fn = (HWord)h_generic_calc_QSub16Ux4; goto binnish;
+
+ case Iop_Sub8x8:
+ fn = (HWord)h_generic_calc_Sub8x8; goto binnish;
+ case Iop_Sub16x4:
+ fn = (HWord)h_generic_calc_Sub16x4; goto binnish;
+ case Iop_Sub32x2:
+ fn = (HWord)h_generic_calc_Sub32x2; goto binnish;
+
+ binnish: {
+ /* Note: the following assumes all helpers are of
+ signature
+ ULong fn ( ULong, ULong ), and they are
+ not marked as regparm functions.
+ */
+ HReg xLo, xHi, yLo, yHi;
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
+ addInstr(env, X86Instr_Push(X86RMI_Reg(yHi)));
+ addInstr(env, X86Instr_Push(X86RMI_Reg(yLo)));
+ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
+ addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
+ addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
+ addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
+ add_to_esp(env, 4*4);
+ addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
+ addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ case Iop_ShlN32x2:
+ fn = (HWord)h_generic_calc_ShlN32x2; goto shifty;
+ case Iop_ShlN16x4:
+ fn = (HWord)h_generic_calc_ShlN16x4; goto shifty;
+ case Iop_ShlN8x8:
+ fn = (HWord)h_generic_calc_ShlN8x8; goto shifty;
+ case Iop_ShrN32x2:
+ fn = (HWord)h_generic_calc_ShrN32x2; goto shifty;
+ case Iop_ShrN16x4:
+ fn = (HWord)h_generic_calc_ShrN16x4; goto shifty;
+ case Iop_SarN32x2:
+ fn = (HWord)h_generic_calc_SarN32x2; goto shifty;
+ case Iop_SarN16x4:
+ fn = (HWord)h_generic_calc_SarN16x4; goto shifty;
+ case Iop_SarN8x8:
+ fn = (HWord)h_generic_calc_SarN8x8; goto shifty;
+ shifty: {
+ /* Note: the following assumes all helpers are of
+ signature
+ ULong fn ( ULong, UInt ), and they are
+ not marked as regparm functions.
+ */
+ HReg xLo, xHi;
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ X86RMI* y = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
+ addInstr(env, X86Instr_Push(y));
+ iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
+ addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
+ addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
+ addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
+ add_to_esp(env, 3*4);
+ addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
+ addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ default:
+ break;
+ }
+ } /* if (e->tag == Iex_Binop) */
+
+
+ /* --------- UNARY ops --------- */
+ if (e->tag == Iex_Unop) {
+ switch (e->Iex.Unop.op) {
+
+ /* 32Sto64(e) */
+ case Iop_32Sto64: {
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, mk_iMOVsd_RR(src,tHi));
+ addInstr(env, mk_iMOVsd_RR(src,tLo));
+ addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tHi));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* 32Uto64(e) */
+ case Iop_32Uto64: {
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, mk_iMOVsd_RR(src,tLo));
+ addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* 16Uto64(e) */
+ case Iop_16Uto64: {
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, mk_iMOVsd_RR(src,tLo));
+ addInstr(env, X86Instr_Alu32R(Xalu_AND,
+ X86RMI_Imm(0xFFFF), tLo));
+ addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* V128{HI}to64 */
+ case Iop_V128HIto64:
+ case Iop_V128to64: {
+ Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
+ X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
+ X86AMode* espLO = X86AMode_IR(off, hregX86_ESP());
+ X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP());
+ sub_from_esp(env, 16);
+ addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
+ addInstr(env, X86Instr_Alu32R( Xalu_MOV,
+ X86RMI_Mem(espLO), tLo ));
+ addInstr(env, X86Instr_Alu32R( Xalu_MOV,
+ X86RMI_Mem(espHI), tHi ));
+ add_to_esp(env, 16);
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* could do better than this, but for now ... */
+ case Iop_1Sto64: {
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
+ addInstr(env, X86Instr_Set32(cond,tLo));
+ addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, tLo));
+ addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tLo));
+ addInstr(env, mk_iMOVsd_RR(tLo, tHi));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* Not64(e) */
+ case Iop_Not64: {
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ HReg sHi, sLo;
+ iselInt64Expr(&sHi, &sLo, env, e->Iex.Unop.arg);
+ addInstr(env, mk_iMOVsd_RR(sHi, tHi));
+ addInstr(env, mk_iMOVsd_RR(sLo, tLo));
+ addInstr(env, X86Instr_Unary32(Xun_NOT,tHi));
+ addInstr(env, X86Instr_Unary32(Xun_NOT,tLo));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* Left64(e) */
+ case Iop_Left64: {
+ HReg yLo, yHi;
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ /* yHi:yLo = arg */
+ iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
+ /* tLo = 0 - yLo, and set carry */
+ addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tLo));
+ addInstr(env, X86Instr_Alu32R(Xalu_SUB, X86RMI_Reg(yLo), tLo));
+ /* tHi = 0 - yHi - carry */
+ addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Imm(0), tHi));
+ addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(yHi), tHi));
+ /* So now we have tHi:tLo = -arg. To finish off, or 'arg'
+ back in, so as to give the final result
+ tHi:tLo = arg | -arg. */
+ addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yLo), tLo));
+ addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Reg(yHi), tHi));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ /* --- patterns rooted at: CmpwNEZ64 --- */
+
+ /* CmpwNEZ64(e) */
+ case Iop_CmpwNEZ64: {
+
+ DECLARE_PATTERN(p_CmpwNEZ64_Or64);
+ DEFINE_PATTERN(p_CmpwNEZ64_Or64,
+ unop(Iop_CmpwNEZ64,binop(Iop_Or64,bind(0),bind(1))));
+ if (matchIRExpr(&mi, p_CmpwNEZ64_Or64, e)) {
+ /* CmpwNEZ64(Or64(x,y)) */
+ HReg xHi,xLo,yHi,yLo;
+ HReg xBoth = newVRegI(env);
+ HReg merged = newVRegI(env);
+ HReg tmp2 = newVRegI(env);
+
+ iselInt64Expr(&xHi,&xLo, env, mi.bindee[0]);
+ addInstr(env, mk_iMOVsd_RR(xHi,xBoth));
+ addInstr(env, X86Instr_Alu32R(Xalu_OR,
+ X86RMI_Reg(xLo),xBoth));
+
+ iselInt64Expr(&yHi,&yLo, env, mi.bindee[1]);
+ addInstr(env, mk_iMOVsd_RR(yHi,merged));
+ addInstr(env, X86Instr_Alu32R(Xalu_OR,
+ X86RMI_Reg(yLo),merged));
+ addInstr(env, X86Instr_Alu32R(Xalu_OR,
+ X86RMI_Reg(xBoth),merged));
+
+ /* tmp2 = (merged | -merged) >>s 31 */
+ addInstr(env, mk_iMOVsd_RR(merged,tmp2));
+ addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
+ addInstr(env, X86Instr_Alu32R(Xalu_OR,
+ X86RMI_Reg(merged), tmp2));
+ addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
+ *rHi = tmp2;
+ *rLo = tmp2;
+ return;
+ } else {
+ /* CmpwNEZ64(e) */
+ HReg srcLo, srcHi;
+ HReg tmp1 = newVRegI(env);
+ HReg tmp2 = newVRegI(env);
+ /* srcHi:srcLo = arg */
+ iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
+ /* tmp1 = srcHi | srcLo */
+ addInstr(env, mk_iMOVsd_RR(srcHi,tmp1));
+ addInstr(env, X86Instr_Alu32R(Xalu_OR,
+ X86RMI_Reg(srcLo), tmp1));
+ /* tmp2 = (tmp1 | -tmp1) >>s 31 */
+ addInstr(env, mk_iMOVsd_RR(tmp1,tmp2));
+ addInstr(env, X86Instr_Unary32(Xun_NEG,tmp2));
+ addInstr(env, X86Instr_Alu32R(Xalu_OR,
+ X86RMI_Reg(tmp1), tmp2));
+ addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, tmp2));
+ *rHi = tmp2;
+ *rLo = tmp2;
+ return;
+ }
+ }
+
+ /* ReinterpF64asI64(e) */
+ /* Given an IEEE754 double, produce an I64 with the same bit
+ pattern. */
+ case Iop_ReinterpF64asI64: {
+ HReg rf = iselDblExpr(env, e->Iex.Unop.arg);
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
+ X86AMode* four_esp = X86AMode_IR(4, hregX86_ESP());
+ /* paranoia */
+ set_FPU_rounding_default(env);
+ /* subl $8, %esp */
+ sub_from_esp(env, 8);
+ /* gstD %rf, 0(%esp) */
+ addInstr(env,
+ X86Instr_FpLdSt(False/*store*/, 8, rf, zero_esp));
+ /* movl 0(%esp), %tLo */
+ addInstr(env,
+ X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(zero_esp), tLo));
+ /* movl 4(%esp), %tHi */
+ addInstr(env,
+ X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(four_esp), tHi));
+ /* addl $8, %esp */
+ add_to_esp(env, 8);
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ case Iop_CmpNEZ32x2:
+ fn = (HWord)h_generic_calc_CmpNEZ32x2; goto unish;
+ case Iop_CmpNEZ16x4:
+ fn = (HWord)h_generic_calc_CmpNEZ16x4; goto unish;
+ case Iop_CmpNEZ8x8:
+ fn = (HWord)h_generic_calc_CmpNEZ8x8; goto unish;
+ unish: {
+ /* Note: the following assumes all helpers are of
+ signature
+ ULong fn ( ULong ), and they are
+ not marked as regparm functions.
+ */
+ HReg xLo, xHi;
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
+ addInstr(env, X86Instr_Push(X86RMI_Reg(xHi)));
+ addInstr(env, X86Instr_Push(X86RMI_Reg(xLo)));
+ addInstr(env, X86Instr_Call( Xcc_ALWAYS, (UInt)fn, 0 ));
+ add_to_esp(env, 2*4);
+ addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
+ addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ default:
+ break;
+ }
+ } /* if (e->tag == Iex_Unop) */
+
+
+ /* --------- CCALL --------- */
+ if (e->tag == Iex_CCall) {
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+
+ /* Marshal args, do the call, clear stack. */
+ doHelperCall( env, False, NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
+
+ addInstr(env, mk_iMOVsd_RR(hregX86_EDX(), tHi));
+ addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), tLo));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
+ ppIRExpr(e);
+ vpanic("iselInt64Expr");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Floating point expressions (32 bit) ---*/
+/*---------------------------------------------------------*/
+
+/* Nothing interesting here; really just wrappers for
+ 64-bit stuff. */
+
+static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
+{
+ HReg r = iselFltExpr_wrk( env, e );
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(r) == HRcFlt64); /* yes, really Flt64 */
+ vassert(hregIsVirtual(r));
+ return r;
+}
+
+/* DO NOT CALL THIS DIRECTLY */
+static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(ty == Ity_F32);
+
+ if (e->tag == Iex_RdTmp) {
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ }
+
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
+ X86AMode* am;
+ HReg res = newVRegF(env);
+ vassert(e->Iex.Load.ty == Ity_F32);
+ am = iselIntExpr_AMode(env, e->Iex.Load.addr);
+ addInstr(env, X86Instr_FpLdSt(True/*load*/, 4, res, am));
+ return res;
+ }
+
+ if (e->tag == Iex_Binop
+ && e->Iex.Binop.op == Iop_F64toF32) {
+ /* Although the result is still held in a standard FPU register,
+ we need to round it to reflect the loss of accuracy/range
+ entailed in casting it to a 32-bit float. */
+ HReg dst = newVRegF(env);
+ HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
+ set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+ addInstr(env, X86Instr_Fp64to32(src,dst));
+ set_FPU_rounding_default( env );
+ return dst;
+ }
+
+ if (e->tag == Iex_Get) {
+ X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
+ hregX86_EBP() );
+ HReg res = newVRegF(env);
+ addInstr(env, X86Instr_FpLdSt( True/*load*/, 4, res, am ));
+ return res;
+ }
+
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.op == Iop_ReinterpI32asF32) {
+ /* Given an I32, produce an IEEE754 float with the same bit
+ pattern. */
+ HReg dst = newVRegF(env);
+ X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
+ /* paranoia */
+ addInstr(env, X86Instr_Push(rmi));
+ addInstr(env, X86Instr_FpLdSt(
+ True/*load*/, 4, dst,
+ X86AMode_IR(0, hregX86_ESP())));
+ add_to_esp(env, 4);
+ return dst;
+ }
+
+ if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
+ HReg rf = iselFltExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegF(env);
+
+ /* rf now holds the value to be rounded. The first thing to do
+ is set the FPU's rounding mode accordingly. */
+
+ /* Set host rounding mode */
+ set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+
+ /* grndint %rf, %dst */
+ addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
+
+ /* Restore default FPU rounding. */
+ set_FPU_rounding_default( env );
+
+ return dst;
+ }
+
+ ppIRExpr(e);
+ vpanic("iselFltExpr_wrk");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Floating point expressions (64 bit) ---*/
+/*---------------------------------------------------------*/
+
+/* Compute a 64-bit floating point value into a register, the identity
+ of which is returned. As with iselIntExpr_R, the reg may be either
+ real or virtual; in any case it must not be changed by subsequent
+ code emitted by the caller. */
+
+/* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
+
+ Type S (1 bit) E (11 bits) F (52 bits)
+ ---- --------- ----------- -----------
+ signalling NaN u 2047 (max) .0uuuuu---u
+ (with at least
+ one 1 bit)
+ quiet NaN u 2047 (max) .1uuuuu---u
+
+ negative infinity 1 2047 (max) .000000---0
+
+ positive infinity 0 2047 (max) .000000---0
+
+ negative zero 1 0 .000000---0
+
+ positive zero 0 0 .000000---0
+*/
+
+static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
+{
+ HReg r = iselDblExpr_wrk( env, e );
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(r) == HRcFlt64);
+ vassert(hregIsVirtual(r));
+ return r;
+}
+
+/* DO NOT CALL THIS DIRECTLY */
+static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
+{
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(e);
+ vassert(ty == Ity_F64);
+
+ if (e->tag == Iex_RdTmp) {
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ }
+
+ if (e->tag == Iex_Const) {
+ union { UInt u32x2[2]; ULong u64; Double f64; } u;
+ HReg freg = newVRegF(env);
+ vassert(sizeof(u) == 8);
+ vassert(sizeof(u.u64) == 8);
+ vassert(sizeof(u.f64) == 8);
+ vassert(sizeof(u.u32x2) == 8);
+
+ if (e->Iex.Const.con->tag == Ico_F64) {
+ u.f64 = e->Iex.Const.con->Ico.F64;
+ }
+ else if (e->Iex.Const.con->tag == Ico_F64i) {
+ u.u64 = e->Iex.Const.con->Ico.F64i;
+ }
+ else
+ vpanic("iselDblExpr(x86): const");
+
+ addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[1])));
+ addInstr(env, X86Instr_Push(X86RMI_Imm(u.u32x2[0])));
+ addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, freg,
+ X86AMode_IR(0, hregX86_ESP())));
+ add_to_esp(env, 8);
+ return freg;
+ }
+
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
+ X86AMode* am;
+ HReg res = newVRegF(env);
+ vassert(e->Iex.Load.ty == Ity_F64);
+ am = iselIntExpr_AMode(env, e->Iex.Load.addr);
+ addInstr(env, X86Instr_FpLdSt(True/*load*/, 8, res, am));
+ return res;
+ }
+
+ if (e->tag == Iex_Get) {
+ X86AMode* am = X86AMode_IR( e->Iex.Get.offset,
+ hregX86_EBP() );
+ HReg res = newVRegF(env);
+ addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
+ return res;
+ }
+
+ if (e->tag == Iex_GetI) {
+ X86AMode* am
+ = genGuestArrayOffset(
+ env, e->Iex.GetI.descr,
+ e->Iex.GetI.ix, e->Iex.GetI.bias );
+ HReg res = newVRegF(env);
+ addInstr(env, X86Instr_FpLdSt( True/*load*/, 8, res, am ));
+ return res;
+ }
+
+ if (e->tag == Iex_Triop) {
+ X86FpOp fpop = Xfp_INVALID;
+ switch (e->Iex.Triop.op) {
+ case Iop_AddF64: fpop = Xfp_ADD; break;
+ case Iop_SubF64: fpop = Xfp_SUB; break;
+ case Iop_MulF64: fpop = Xfp_MUL; break;
+ case Iop_DivF64: fpop = Xfp_DIV; break;
+ case Iop_ScaleF64: fpop = Xfp_SCALE; break;
+ case Iop_Yl2xF64: fpop = Xfp_YL2X; break;
+ case Iop_Yl2xp1F64: fpop = Xfp_YL2XP1; break;
+ case Iop_AtanF64: fpop = Xfp_ATAN; break;
+ case Iop_PRemF64: fpop = Xfp_PREM; break;
+ case Iop_PRem1F64: fpop = Xfp_PREM1; break;
+ default: break;
+ }
+ if (fpop != Xfp_INVALID) {
+ HReg res = newVRegF(env);
+ HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2);
+ HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3);
+ /* XXXROUNDINGFIXME */
+ /* set roundingmode here */
+ addInstr(env, X86Instr_FpBinary(fpop,srcL,srcR,res));
+ if (fpop != Xfp_ADD && fpop != Xfp_SUB
+ && fpop != Xfp_MUL && fpop != Xfp_DIV)
+ roundToF64(env, res);
+ return res;
+ }
+ }
+
+ if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
+ HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegF(env);
+
+ /* rf now holds the value to be rounded. The first thing to do
+ is set the FPU's rounding mode accordingly. */
+
+ /* Set host rounding mode */
+ set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+
+ /* grndint %rf, %dst */
+ addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
+
+ /* Restore default FPU rounding. */
+ set_FPU_rounding_default( env );
+
+ return dst;
+ }
+
+ if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
+ HReg dst = newVRegF(env);
+ HReg rHi,rLo;
+ iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2);
+ addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
+ addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
+
+ /* Set host rounding mode */
+ set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+
+ addInstr(env, X86Instr_FpLdStI(
+ True/*load*/, 8, dst,
+ X86AMode_IR(0, hregX86_ESP())));
+
+ /* Restore default FPU rounding. */
+ set_FPU_rounding_default( env );
+
+ add_to_esp(env, 8);
+ return dst;
+ }
+
+ if (e->tag == Iex_Binop) {
+ X86FpOp fpop = Xfp_INVALID;
+ switch (e->Iex.Binop.op) {
+ case Iop_SinF64: fpop = Xfp_SIN; break;
+ case Iop_CosF64: fpop = Xfp_COS; break;
+ case Iop_TanF64: fpop = Xfp_TAN; break;
+ case Iop_2xm1F64: fpop = Xfp_2XM1; break;
+ case Iop_SqrtF64: fpop = Xfp_SQRT; break;
+ default: break;
+ }
+ if (fpop != Xfp_INVALID) {
+ HReg res = newVRegF(env);
+ HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
+ /* XXXROUNDINGFIXME */
+ /* set roundingmode here */
+ addInstr(env, X86Instr_FpUnary(fpop,src,res));
+ if (fpop != Xfp_SQRT
+ && fpop != Xfp_NEG && fpop != Xfp_ABS)
+ roundToF64(env, res);
+ return res;
+ }
+ }
+
+ if (e->tag == Iex_Unop) {
+ X86FpOp fpop = Xfp_INVALID;
+ switch (e->Iex.Unop.op) {
+ case Iop_NegF64: fpop = Xfp_NEG; break;
+ case Iop_AbsF64: fpop = Xfp_ABS; break;
+ default: break;
+ }
+ if (fpop != Xfp_INVALID) {
+ HReg res = newVRegF(env);
+ HReg src = iselDblExpr(env, e->Iex.Unop.arg);
+ addInstr(env, X86Instr_FpUnary(fpop,src,res));
+ if (fpop != Xfp_NEG && fpop != Xfp_ABS)
+ roundToF64(env, res);
+ return res;
+ }
+ }
+
+ if (e->tag == Iex_Unop) {
+ switch (e->Iex.Unop.op) {
+ case Iop_I32StoF64: {
+ HReg dst = newVRegF(env);
+ HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
+ set_FPU_rounding_default(env);
+ addInstr(env, X86Instr_FpLdStI(
+ True/*load*/, 4, dst,
+ X86AMode_IR(0, hregX86_ESP())));
+ add_to_esp(env, 4);
+ return dst;
+ }
+ case Iop_ReinterpI64asF64: {
+ /* Given an I64, produce an IEEE754 double with the same
+ bit pattern. */
+ HReg dst = newVRegF(env);
+ HReg rHi, rLo;
+ iselInt64Expr( &rHi, &rLo, env, e->Iex.Unop.arg);
+ /* paranoia */
+ set_FPU_rounding_default(env);
+ addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
+ addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
+ addInstr(env, X86Instr_FpLdSt(
+ True/*load*/, 8, dst,
+ X86AMode_IR(0, hregX86_ESP())));
+ add_to_esp(env, 8);
+ return dst;
+ }
+ case Iop_F32toF64: {
+ /* this is a no-op */
+ HReg res = iselFltExpr(env, e->Iex.Unop.arg);
+ return res;
+ }
+ default:
+ break;
+ }
+ }
+
+ /* --------- MULTIPLEX --------- */
+ if (e->tag == Iex_Mux0X) {
+ if (ty == Ity_F64
+ && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
+ X86RM* r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
+ HReg rX = iselDblExpr(env, e->Iex.Mux0X.exprX);
+ HReg r0 = iselDblExpr(env, e->Iex.Mux0X.expr0);
+ HReg dst = newVRegF(env);
+ addInstr(env, X86Instr_FpUnary(Xfp_MOV,rX,dst));
+ addInstr(env, X86Instr_Test32(0xFF, r8));
+ addInstr(env, X86Instr_FpCMov(Xcc_Z,r0,dst));
+ return dst;
+ }
+ }
+
+ ppIRExpr(e);
+ vpanic("iselDblExpr_wrk");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
+/*---------------------------------------------------------*/
+
+static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
+{
+ HReg r = iselVecExpr_wrk( env, e );
+# if 0
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+# endif
+ vassert(hregClass(r) == HRcVec128);
+ vassert(hregIsVirtual(r));
+ return r;
+}
+
+
+/* DO NOT CALL THIS DIRECTLY */
+static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
+{
+
+# define REQUIRE_SSE1 \
+ do { if (env->hwcaps == 0/*baseline, no sse*/) \
+ goto vec_fail; \
+ } while (0)
+
+# define REQUIRE_SSE2 \
+ do { if (0 == (env->hwcaps & VEX_HWCAPS_X86_SSE2)) \
+ goto vec_fail; \
+ } while (0)
+
+# define SSE2_OR_ABOVE \
+ (env->hwcaps & VEX_HWCAPS_X86_SSE2)
+
+ MatchInfo mi;
+ Bool arg1isEReg = False;
+ X86SseOp op = Xsse_INVALID;
+ IRType ty = typeOfIRExpr(env->type_env,e);
+ vassert(e);
+ vassert(ty == Ity_V128);
+
+ REQUIRE_SSE1;
+
+ if (e->tag == Iex_RdTmp) {
+ return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+ }
+
+ if (e->tag == Iex_Get) {
+ HReg dst = newVRegV(env);
+ addInstr(env, X86Instr_SseLdSt(
+ True/*load*/,
+ dst,
+ X86AMode_IR(e->Iex.Get.offset, hregX86_EBP())
+ )
+ );
+ return dst;
+ }
+
+ if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
+ HReg dst = newVRegV(env);
+ X86AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
+ addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
+ return dst;
+ }
+
+ if (e->tag == Iex_Const) {
+ HReg dst = newVRegV(env);
+ vassert(e->Iex.Const.con->tag == Ico_V128);
+ addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst));
+ return dst;
+ }
+
+ if (e->tag == Iex_Unop) {
+
+ if (SSE2_OR_ABOVE) {
+ /* 64UtoV128(LDle:I64(addr)) */
+ DECLARE_PATTERN(p_zwiden_load64);
+ DEFINE_PATTERN(p_zwiden_load64,
+ unop(Iop_64UtoV128,
+ IRExpr_Load(Iend_LE,Ity_I64,bind(0))));
+ if (matchIRExpr(&mi, p_zwiden_load64, e)) {
+ X86AMode* am = iselIntExpr_AMode(env, mi.bindee[0]);
+ HReg dst = newVRegV(env);
+ addInstr(env, X86Instr_SseLdzLO(8, dst, am));
+ return dst;
+ }
+ }
+
+ switch (e->Iex.Unop.op) {
+
+ case Iop_NotV128: {
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ return do_sse_Not128(env, arg);
+ }
+
+ case Iop_CmpNEZ64x2: {
+ /* We can use SSE2 instructions for this. */
+ /* Ideally, we want to do a 64Ix2 comparison against zero of
+ the operand. Problem is no such insn exists. Solution
+ therefore is to do a 32Ix4 comparison instead, and bitwise-
+ negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
+ let the not'd result of this initial comparison be a:b:c:d.
+ What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
+ pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
+ giving the required result.
+
+ The required selection sequence is 2,3,0,1, which
+ according to Intel's documentation means the pshufd
+ literal value is 0xB1, that is,
+ (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
+ */
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg tmp = newVRegV(env);
+ HReg dst = newVRegV(env);
+ REQUIRE_SSE2;
+ addInstr(env, X86Instr_SseReRg(Xsse_XOR, tmp, tmp));
+ addInstr(env, X86Instr_SseReRg(Xsse_CMPEQ32, arg, tmp));
+ tmp = do_sse_Not128(env, tmp);
+ addInstr(env, X86Instr_SseShuf(0xB1, tmp, dst));
+ addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst));
+ return dst;
+ }
+
+ case Iop_CmpNEZ32x4: {
+ /* Sigh, we have to generate lousy code since this has to
+ work on SSE1 hosts */
+ /* basically, the idea is: for each lane:
+ movl lane, %r ; negl %r (now CF = lane==0 ? 0 : 1)
+ sbbl %r, %r (now %r = 1Sto32(CF))
+ movl %r, lane
+ */
+ Int i;
+ X86AMode* am;
+ X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegV(env);
+ HReg r32 = newVRegI(env);
+ sub_from_esp(env, 16);
+ addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0));
+ for (i = 0; i < 4; i++) {
+ am = X86AMode_IR(i*4, hregX86_ESP());
+ addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32));
+ addInstr(env, X86Instr_Unary32(Xun_NEG, r32));
+ addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32));
+ addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am));
+ }
+ addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
+ add_to_esp(env, 16);
+ return dst;
+ }
+
+ case Iop_CmpNEZ8x16:
+ case Iop_CmpNEZ16x8: {
+ /* We can use SSE2 instructions for this. */
+ HReg arg;
+ HReg vec0 = newVRegV(env);
+ HReg vec1 = newVRegV(env);
+ HReg dst = newVRegV(env);
+ X86SseOp cmpOp
+ = e->Iex.Unop.op==Iop_CmpNEZ16x8 ? Xsse_CMPEQ16
+ : Xsse_CMPEQ8;
+ REQUIRE_SSE2;
+ addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec0, vec0));
+ addInstr(env, mk_vMOVsd_RR(vec0, vec1));
+ addInstr(env, X86Instr_Sse32Fx4(Xsse_CMPEQF, vec1, vec1));
+ /* defer arg computation to here so as to give CMPEQF as long
+ as possible to complete */
+ arg = iselVecExpr(env, e->Iex.Unop.arg);
+ /* vec0 is all 0s; vec1 is all 1s */
+ addInstr(env, mk_vMOVsd_RR(arg, dst));
+ /* 16x8 or 8x16 comparison == */
+ addInstr(env, X86Instr_SseReRg(cmpOp, vec0, dst));
+ /* invert result */
+ addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst));
+ return dst;
+ }
+
+ case Iop_Recip32Fx4: op = Xsse_RCPF; goto do_32Fx4_unary;
+ case Iop_RSqrt32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary;
+ case Iop_Sqrt32Fx4: op = Xsse_SQRTF; goto do_32Fx4_unary;
+ do_32Fx4_unary:
+ {
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegV(env);
+ addInstr(env, X86Instr_Sse32Fx4(op, arg, dst));
+ return dst;
+ }
+
+ case Iop_Recip64Fx2: op = Xsse_RCPF; goto do_64Fx2_unary;
+ case Iop_RSqrt64Fx2: op = Xsse_RSQRTF; goto do_64Fx2_unary;
+ case Iop_Sqrt64Fx2: op = Xsse_SQRTF; goto do_64Fx2_unary;
+ do_64Fx2_unary:
+ {
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegV(env);
+ REQUIRE_SSE2;
+ addInstr(env, X86Instr_Sse64Fx2(op, arg, dst));
+ return dst;
+ }
+
+ case Iop_Recip32F0x4: op = Xsse_RCPF; goto do_32F0x4_unary;
+ case Iop_RSqrt32F0x4: op = Xsse_RSQRTF; goto do_32F0x4_unary;
+ case Iop_Sqrt32F0x4: op = Xsse_SQRTF; goto do_32F0x4_unary;
+ do_32F0x4_unary:
+ {
+ /* A bit subtle. We have to copy the arg to the result
+ register first, because actually doing the SSE scalar insn
+ leaves the upper 3/4 of the destination register
+ unchanged. Whereas the required semantics of these
+ primops is that the upper 3/4 is simply copied in from the
+ argument. */
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegV(env);
+ addInstr(env, mk_vMOVsd_RR(arg, dst));
+ addInstr(env, X86Instr_Sse32FLo(op, arg, dst));
+ return dst;
+ }
+
+ case Iop_Recip64F0x2: op = Xsse_RCPF; goto do_64F0x2_unary;
+ case Iop_RSqrt64F0x2: op = Xsse_RSQRTF; goto do_64F0x2_unary;
+ case Iop_Sqrt64F0x2: op = Xsse_SQRTF; goto do_64F0x2_unary;
+ do_64F0x2_unary:
+ {
+ /* A bit subtle. We have to copy the arg to the result
+ register first, because actually doing the SSE scalar insn
+ leaves the upper half of the destination register
+ unchanged. Whereas the required semantics of these
+ primops is that the upper half is simply copied in from the
+ argument. */
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegV(env);
+ REQUIRE_SSE2;
+ addInstr(env, mk_vMOVsd_RR(arg, dst));
+ addInstr(env, X86Instr_Sse64FLo(op, arg, dst));
+ return dst;
+ }
+
+ case Iop_32UtoV128: {
+ HReg dst = newVRegV(env);
+ X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
+ X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
+ addInstr(env, X86Instr_Push(rmi));
+ addInstr(env, X86Instr_SseLdzLO(4, dst, esp0));
+ add_to_esp(env, 4);
+ return dst;
+ }
+
+ case Iop_64UtoV128: {
+ HReg rHi, rLo;
+ HReg dst = newVRegV(env);
+ X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
+ iselInt64Expr(&rHi, &rLo, env, e->Iex.Unop.arg);
+ addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
+ addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
+ addInstr(env, X86Instr_SseLdzLO(8, dst, esp0));
+ add_to_esp(env, 8);
+ return dst;
+ }
+
+ default:
+ break;
+ } /* switch (e->Iex.Unop.op) */
+ } /* if (e->tag == Iex_Unop) */
+
+ if (e->tag == Iex_Binop) {
+ switch (e->Iex.Binop.op) {
+
+ case Iop_SetV128lo32: {
+ HReg dst = newVRegV(env);
+ HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
+ sub_from_esp(env, 16);
+ addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
+ addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcI), esp0));
+ addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
+ add_to_esp(env, 16);
+ return dst;
+ }
+
+ case Iop_SetV128lo64: {
+ HReg dst = newVRegV(env);
+ HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg srcIhi, srcIlo;
+ X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
+ X86AMode* esp4 = advance4(esp0);
+ iselInt64Expr(&srcIhi, &srcIlo, env, e->Iex.Binop.arg2);
+ sub_from_esp(env, 16);
+ addInstr(env, X86Instr_SseLdSt(False/*store*/, srcV, esp0));
+ addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIlo), esp0));
+ addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(srcIhi), esp4));
+ addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
+ add_to_esp(env, 16);
+ return dst;
+ }
+
+ case Iop_64HLtoV128: {
+ HReg r3, r2, r1, r0;
+ X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
+ X86AMode* esp4 = advance4(esp0);
+ X86AMode* esp8 = advance4(esp4);
+ X86AMode* esp12 = advance4(esp8);
+ HReg dst = newVRegV(env);
+ /* do this via the stack (easy, convenient, etc) */
+ sub_from_esp(env, 16);
+ /* Do the less significant 64 bits */
+ iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2);
+ addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0));
+ addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4));
+ /* Do the more significant 64 bits */
+ iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1);
+ addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8));
+ addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12));
+ /* Fetch result back from stack. */
+ addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
+ add_to_esp(env, 16);
+ return dst;
+ }
+
+ case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4;
+ case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
+ case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
+ case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4;
+ case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4;
+ case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4;
+ case Iop_Max32Fx4: op = Xsse_MAXF; goto do_32Fx4;
+ case Iop_Min32Fx4: op = Xsse_MINF; goto do_32Fx4;
+ case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4;
+ case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4;
+ do_32Fx4:
+ {
+ HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+ addInstr(env, mk_vMOVsd_RR(argL, dst));
+ addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
+ return dst;
+ }
+
+ case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2;
+ case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2;
+ case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2;
+ case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2;
+ case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2;
+ case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2;
+ case Iop_Max64Fx2: op = Xsse_MAXF; goto do_64Fx2;
+ case Iop_Min64Fx2: op = Xsse_MINF; goto do_64Fx2;
+ case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2;
+ case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2;
+ do_64Fx2:
+ {
+ HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+ REQUIRE_SSE2;
+ addInstr(env, mk_vMOVsd_RR(argL, dst));
+ addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
+ return dst;
+ }
+
+ case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4;
+ case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4;
+ case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4;
+ case Iop_CmpUN32F0x4: op = Xsse_CMPUNF; goto do_32F0x4;
+ case Iop_Add32F0x4: op = Xsse_ADDF; goto do_32F0x4;
+ case Iop_Div32F0x4: op = Xsse_DIVF; goto do_32F0x4;
+ case Iop_Max32F0x4: op = Xsse_MAXF; goto do_32F0x4;
+ case Iop_Min32F0x4: op = Xsse_MINF; goto do_32F0x4;
+ case Iop_Mul32F0x4: op = Xsse_MULF; goto do_32F0x4;
+ case Iop_Sub32F0x4: op = Xsse_SUBF; goto do_32F0x4;
+ do_32F0x4: {
+ HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+ addInstr(env, mk_vMOVsd_RR(argL, dst));
+ addInstr(env, X86Instr_Sse32FLo(op, argR, dst));
+ return dst;
+ }
+
+ case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2;
+ case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2;
+ case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2;
+ case Iop_CmpUN64F0x2: op = Xsse_CMPUNF; goto do_64F0x2;
+ case Iop_Add64F0x2: op = Xsse_ADDF; goto do_64F0x2;
+ case Iop_Div64F0x2: op = Xsse_DIVF; goto do_64F0x2;
+ case Iop_Max64F0x2: op = Xsse_MAXF; goto do_64F0x2;
+ case Iop_Min64F0x2: op = Xsse_MINF; goto do_64F0x2;
+ case Iop_Mul64F0x2: op = Xsse_MULF; goto do_64F0x2;
+ case Iop_Sub64F0x2: op = Xsse_SUBF; goto do_64F0x2;
+ do_64F0x2: {
+ HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+ REQUIRE_SSE2;
+ addInstr(env, mk_vMOVsd_RR(argL, dst));
+ addInstr(env, X86Instr_Sse64FLo(op, argR, dst));
+ return dst;
+ }
+
+ case Iop_QNarrow32Sx4:
+ op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
+ case Iop_QNarrow16Sx8:
+ op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
+ case Iop_QNarrow16Ux8:
+ op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
+
+ case Iop_InterleaveHI8x16:
+ op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
+ case Iop_InterleaveHI16x8:
+ op = Xsse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
+ case Iop_InterleaveHI32x4:
+ op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
+ case Iop_InterleaveHI64x2:
+ op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
+
+ case Iop_InterleaveLO8x16:
+ op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
+ case Iop_InterleaveLO16x8:
+ op = Xsse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
+ case Iop_InterleaveLO32x4:
+ op = Xsse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
+ case Iop_InterleaveLO64x2:
+ op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
+
+ case Iop_AndV128: op = Xsse_AND; goto do_SseReRg;
+ case Iop_OrV128: op = Xsse_OR; goto do_SseReRg;
+ case Iop_XorV128: op = Xsse_XOR; goto do_SseReRg;
+ case Iop_Add8x16: op = Xsse_ADD8; goto do_SseReRg;
+ case Iop_Add16x8: op = Xsse_ADD16; goto do_SseReRg;
+ case Iop_Add32x4: op = Xsse_ADD32; goto do_SseReRg;
+ case Iop_Add64x2: op = Xsse_ADD64; goto do_SseReRg;
+ case Iop_QAdd8Sx16: op = Xsse_QADD8S; goto do_SseReRg;
+ case Iop_QAdd16Sx8: op = Xsse_QADD16S; goto do_SseReRg;
+ case Iop_QAdd8Ux16: op = Xsse_QADD8U; goto do_SseReRg;
+ case Iop_QAdd16Ux8: op = Xsse_QADD16U; goto do_SseReRg;
+ case Iop_Avg8Ux16: op = Xsse_AVG8U; goto do_SseReRg;
+ case Iop_Avg16Ux8: op = Xsse_AVG16U; goto do_SseReRg;
+ case Iop_CmpEQ8x16: op = Xsse_CMPEQ8; goto do_SseReRg;
+ case Iop_CmpEQ16x8: op = Xsse_CMPEQ16; goto do_SseReRg;
+ case Iop_CmpEQ32x4: op = Xsse_CMPEQ32; goto do_SseReRg;
+ case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S; goto do_SseReRg;
+ case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg;
+ case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg;
+ case Iop_Max16Sx8: op = Xsse_MAX16S; goto do_SseReRg;
+ case Iop_Max8Ux16: op = Xsse_MAX8U; goto do_SseReRg;
+ case Iop_Min16Sx8: op = Xsse_MIN16S; goto do_SseReRg;
+ case Iop_Min8Ux16: op = Xsse_MIN8U; goto do_SseReRg;
+ case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg;
+ case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg;
+ case Iop_Mul16x8: op = Xsse_MUL16; goto do_SseReRg;
+ case Iop_Sub8x16: op = Xsse_SUB8; goto do_SseReRg;
+ case Iop_Sub16x8: op = Xsse_SUB16; goto do_SseReRg;
+ case Iop_Sub32x4: op = Xsse_SUB32; goto do_SseReRg;
+ case Iop_Sub64x2: op = Xsse_SUB64; goto do_SseReRg;
+ case Iop_QSub8Sx16: op = Xsse_QSUB8S; goto do_SseReRg;
+ case Iop_QSub16Sx8: op = Xsse_QSUB16S; goto do_SseReRg;
+ case Iop_QSub8Ux16: op = Xsse_QSUB8U; goto do_SseReRg;
+ case Iop_QSub16Ux8: op = Xsse_QSUB16U; goto do_SseReRg;
+ do_SseReRg: {
+ HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+ if (op != Xsse_OR && op != Xsse_AND && op != Xsse_XOR)
+ REQUIRE_SSE2;
+ if (arg1isEReg) {
+ addInstr(env, mk_vMOVsd_RR(arg2, dst));
+ addInstr(env, X86Instr_SseReRg(op, arg1, dst));
+ } else {
+ addInstr(env, mk_vMOVsd_RR(arg1, dst));
+ addInstr(env, X86Instr_SseReRg(op, arg2, dst));
+ }
+ return dst;
+ }
+
+ case Iop_ShlN16x8: op = Xsse_SHL16; goto do_SseShift;
+ case Iop_ShlN32x4: op = Xsse_SHL32; goto do_SseShift;
+ case Iop_ShlN64x2: op = Xsse_SHL64; goto do_SseShift;
+ case Iop_SarN16x8: op = Xsse_SAR16; goto do_SseShift;
+ case Iop_SarN32x4: op = Xsse_SAR32; goto do_SseShift;
+ case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift;
+ case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift;
+ case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift;
+ do_SseShift: {
+ HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
+ X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
+ X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
+ HReg ereg = newVRegV(env);
+ HReg dst = newVRegV(env);
+ REQUIRE_SSE2;
+ addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
+ addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
+ addInstr(env, X86Instr_Push(X86RMI_Imm(0)));
+ addInstr(env, X86Instr_Push(rmi));
+ addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0));
+ addInstr(env, mk_vMOVsd_RR(greg, dst));
+ addInstr(env, X86Instr_SseReRg(op, ereg, dst));
+ add_to_esp(env, 16);
+ return dst;
+ }
+
+ default:
+ break;
+ } /* switch (e->Iex.Binop.op) */
+ } /* if (e->tag == Iex_Binop) */
+
+ if (e->tag == Iex_Mux0X) {
+ X86RM* r8 = iselIntExpr_RM(env, e->Iex.Mux0X.cond);
+ HReg rX = iselVecExpr(env, e->Iex.Mux0X.exprX);
+ HReg r0 = iselVecExpr(env, e->Iex.Mux0X.expr0);
+ HReg dst = newVRegV(env);
+ addInstr(env, mk_vMOVsd_RR(rX,dst));
+ addInstr(env, X86Instr_Test32(0xFF, r8));
+ addInstr(env, X86Instr_SseCMov(Xcc_Z,r0,dst));
+ return dst;
+ }
+
+ vec_fail:
+ vex_printf("iselVecExpr (hwcaps = %s): can't reduce\n",
+ LibVEX_ppVexHwCaps(VexArchX86,env->hwcaps));
+ ppIRExpr(e);
+ vpanic("iselVecExpr_wrk");
+
+# undef REQUIRE_SSE1
+# undef REQUIRE_SSE2
+# undef SSE2_OR_ABOVE
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Statements ---*/
+/*---------------------------------------------------------*/
+
+static void iselStmt ( ISelEnv* env, IRStmt* stmt )
+{
+ if (vex_traceflags & VEX_TRACE_VCODE) {
+ vex_printf("\n-- ");
+ ppIRStmt(stmt);
+ vex_printf("\n");
+ }
+
+ switch (stmt->tag) {
+
+ /* --------- STORE --------- */
+ case Ist_Store: {
+ IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
+ IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
+ IREndness end = stmt->Ist.Store.end;
+
+ if (tya != Ity_I32 || end != Iend_LE)
+ goto stmt_fail;
+
+ if (tyd == Ity_I32) {
+ X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
+ X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
+ addInstr(env, X86Instr_Alu32M(Xalu_MOV,ri,am));
+ return;
+ }
+ if (tyd == Ity_I8 || tyd == Ity_I16) {
+ X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
+ HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
+ addInstr(env, X86Instr_Store( toUChar(tyd==Ity_I8 ? 1 : 2),
+ r,am ));
+ return;
+ }
+ if (tyd == Ity_F64) {
+ X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
+ HReg r = iselDblExpr(env, stmt->Ist.Store.data);
+ addInstr(env, X86Instr_FpLdSt(False/*store*/, 8, r, am));
+ return;
+ }
+ if (tyd == Ity_F32) {
+ X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
+ HReg r = iselFltExpr(env, stmt->Ist.Store.data);
+ addInstr(env, X86Instr_FpLdSt(False/*store*/, 4, r, am));
+ return;
+ }
+ if (tyd == Ity_I64) {
+ HReg vHi, vLo, rA;
+ iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Store.data);
+ rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
+ addInstr(env, X86Instr_Alu32M(
+ Xalu_MOV, X86RI_Reg(vLo), X86AMode_IR(0, rA)));
+ addInstr(env, X86Instr_Alu32M(
+ Xalu_MOV, X86RI_Reg(vHi), X86AMode_IR(4, rA)));
+ return;
+ }
+ if (tyd == Ity_V128) {
+ X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
+ HReg r = iselVecExpr(env, stmt->Ist.Store.data);
+ addInstr(env, X86Instr_SseLdSt(False/*store*/, r, am));
+ return;
+ }
+ break;
+ }
+
+ /* --------- PUT --------- */
+ case Ist_Put: {
+ IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
+ if (ty == Ity_I32) {
+ /* We're going to write to memory, so compute the RHS into an
+ X86RI. */
+ X86RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
+ addInstr(env,
+ X86Instr_Alu32M(
+ Xalu_MOV,
+ ri,
+ X86AMode_IR(stmt->Ist.Put.offset,hregX86_EBP())
+ ));
+ return;
+ }
+ if (ty == Ity_I8 || ty == Ity_I16) {
+ HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
+ addInstr(env, X86Instr_Store(
+ toUChar(ty==Ity_I8 ? 1 : 2),
+ r,
+ X86AMode_IR(stmt->Ist.Put.offset,
+ hregX86_EBP())));
+ return;
+ }
+ if (ty == Ity_I64) {
+ HReg vHi, vLo;
+ X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
+ X86AMode* am4 = advance4(am);
+ iselInt64Expr(&vHi, &vLo, env, stmt->Ist.Put.data);
+ addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vLo), am ));
+ addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(vHi), am4 ));
+ return;
+ }
+ if (ty == Ity_V128) {
+ HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
+ X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
+ addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, am));
+ return;
+ }
+ if (ty == Ity_F32) {
+ HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
+ X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
+ set_FPU_rounding_default(env); /* paranoia */
+ addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am ));
+ return;
+ }
+ if (ty == Ity_F64) {
+ HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
+ X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
+ set_FPU_rounding_default(env); /* paranoia */
+ addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am ));
+ return;
+ }
+ break;
+ }
+
+ /* --------- Indexed PUT --------- */
+ case Ist_PutI: {
+ X86AMode* am
+ = genGuestArrayOffset(
+ env, stmt->Ist.PutI.descr,
+ stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
+
+ IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
+ if (ty == Ity_F64) {
+ HReg val = iselDblExpr(env, stmt->Ist.PutI.data);
+ addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, val, am ));
+ return;
+ }
+ if (ty == Ity_I8) {
+ HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data);
+ addInstr(env, X86Instr_Store( 1, r, am ));
+ return;
+ }
+ if (ty == Ity_I32) {
+ HReg r = iselIntExpr_R(env, stmt->Ist.PutI.data);
+ addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(r), am ));
+ return;
+ }
+ if (ty == Ity_I64) {
+ HReg rHi, rLo;
+ X86AMode* am4 = advance4(am);
+ iselInt64Expr(&rHi, &rLo, env, stmt->Ist.PutI.data);
+ addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rLo), am ));
+ addInstr(env, X86Instr_Alu32M( Xalu_MOV, X86RI_Reg(rHi), am4 ));
+ return;
+ }
+ break;
+ }
+
+ /* --------- TMP --------- */
+ case Ist_WrTmp: {
+ IRTemp tmp = stmt->Ist.WrTmp.tmp;
+ IRType ty = typeOfIRTemp(env->type_env, tmp);
+
+ /* optimisation: if stmt->Ist.WrTmp.data is Add32(..,..),
+ compute it into an AMode and then use LEA. This usually
+ produces fewer instructions, often because (for memcheck
+ created IR) we get t = address-expression, (t is later used
+ twice) and so doing this naturally turns address-expression
+ back into an X86 amode. */
+ if (ty == Ity_I32
+ && stmt->Ist.WrTmp.data->tag == Iex_Binop
+ && stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add32) {
+ X86AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
+ HReg dst = lookupIRTemp(env, tmp);
+ if (am->tag == Xam_IR && am->Xam.IR.imm == 0) {
+ /* Hmm, iselIntExpr_AMode wimped out and just computed the
+ value into a register. Just emit a normal reg-reg move
+ so reg-alloc can coalesce it away in the usual way. */
+ HReg src = am->Xam.IR.reg;
+ addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(src), dst));
+ } else {
+ addInstr(env, X86Instr_Lea32(am,dst));
+ }
+ return;
+ }
+
+ if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
+ X86RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
+ HReg dst = lookupIRTemp(env, tmp);
+ addInstr(env, X86Instr_Alu32R(Xalu_MOV,rmi,dst));
+ return;
+ }
+ if (ty == Ity_I64) {
+ HReg rHi, rLo, dstHi, dstLo;
+ iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
+ lookupIRTemp64( &dstHi, &dstLo, env, tmp);
+ addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
+ addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
+ return;
+ }
+ if (ty == Ity_I1) {
+ X86CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
+ HReg dst = lookupIRTemp(env, tmp);
+ addInstr(env, X86Instr_Set32(cond, dst));
+ return;
+ }
+ if (ty == Ity_F64) {
+ HReg dst = lookupIRTemp(env, tmp);
+ HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
+ addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
+ return;
+ }
+ if (ty == Ity_F32) {
+ HReg dst = lookupIRTemp(env, tmp);
+ HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
+ addInstr(env, X86Instr_FpUnary(Xfp_MOV,src,dst));
+ return;
+ }
+ if (ty == Ity_V128) {
+ HReg dst = lookupIRTemp(env, tmp);
+ HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
+ addInstr(env, mk_vMOVsd_RR(src,dst));
+ return;
+ }
+ break;
+ }
+
+ /* --------- Call to DIRTY helper --------- */
+ case Ist_Dirty: {
+ IRType retty;
+ IRDirty* d = stmt->Ist.Dirty.details;
+ Bool passBBP = False;
+
+ if (d->nFxState == 0)
+ vassert(!d->needsBBP);
+
+ passBBP = toBool(d->nFxState > 0 && d->needsBBP);
+
+ /* Marshal args, do the call, clear stack. */
+ doHelperCall( env, passBBP, d->guard, d->cee, d->args );
+
+ /* Now figure out what to do with the returned value, if any. */
+ if (d->tmp == IRTemp_INVALID)
+ /* No return value. Nothing to do. */
+ return;
+
+ retty = typeOfIRTemp(env->type_env, d->tmp);
+ if (retty == Ity_I64) {
+ HReg dstHi, dstLo;
+ /* The returned value is in %edx:%eax. Park it in the
+ register-pair associated with tmp. */
+ lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
+ addInstr(env, mk_iMOVsd_RR(hregX86_EDX(),dstHi) );
+ addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dstLo) );
+ return;
+ }
+ if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) {
+ /* The returned value is in %eax. Park it in the register
+ associated with tmp. */
+ HReg dst = lookupIRTemp(env, d->tmp);
+ addInstr(env, mk_iMOVsd_RR(hregX86_EAX(),dst) );
+ return;
+ }
+ break;
+ }
+
+ /* --------- MEM FENCE --------- */
+ case Ist_MBE:
+ switch (stmt->Ist.MBE.event) {
+ case Imbe_Fence:
+ addInstr(env, X86Instr_MFence(env->hwcaps));
+ return;
+ default:
+ break;
+ }
+ break;
+
+ /* --------- ACAS --------- */
+ case Ist_CAS:
+ if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
+ /* "normal" singleton CAS */
+ UChar sz;
+ IRCAS* cas = stmt->Ist.CAS.details;
+ IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
+ /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
+ X86AMode* am = iselIntExpr_AMode(env, cas->addr);
+ HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
+ HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
+ HReg rOldLo = lookupIRTemp(env, cas->oldLo);
+ vassert(cas->expdHi == NULL);
+ vassert(cas->dataHi == NULL);
+ addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
+ addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
+ addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
+ switch (ty) {
+ case Ity_I32: sz = 4; break;
+ case Ity_I16: sz = 2; break;
+ case Ity_I8: sz = 1; break;
+ default: goto unhandled_cas;
+ }
+ addInstr(env, X86Instr_ACAS(am, sz));
+ addInstr(env,
+ X86Instr_CMov32(Xcc_NZ,
+ X86RM_Reg(hregX86_EAX()), rOldLo));
+ return;
+ } else {
+ /* double CAS */
+ IRCAS* cas = stmt->Ist.CAS.details;
+ IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
+ /* only 32-bit allowed in this case */
+ /* get: cas->expdLo into %eax, and cas->dataLo into %ebx */
+ /* get: cas->expdHi into %edx, and cas->dataHi into %ecx */
+ X86AMode* am = iselIntExpr_AMode(env, cas->addr);
+ HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
+ HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
+ HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
+ HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
+ HReg rOldHi = lookupIRTemp(env, cas->oldHi);
+ HReg rOldLo = lookupIRTemp(env, cas->oldLo);
+ if (ty != Ity_I32)
+ goto unhandled_cas;
+ addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
+ addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
+ addInstr(env, mk_iMOVsd_RR(rExpdHi, hregX86_EDX()));
+ addInstr(env, mk_iMOVsd_RR(rExpdLo, hregX86_EAX()));
+ addInstr(env, mk_iMOVsd_RR(rDataHi, hregX86_ECX()));
+ addInstr(env, mk_iMOVsd_RR(rDataLo, hregX86_EBX()));
+ addInstr(env, X86Instr_DACAS(am));
+ addInstr(env,
+ X86Instr_CMov32(Xcc_NZ,
+ X86RM_Reg(hregX86_EDX()), rOldHi));
+ addInstr(env,
+ X86Instr_CMov32(Xcc_NZ,
+ X86RM_Reg(hregX86_EAX()), rOldLo));
+ return;
+ }
+ unhandled_cas:
+ break;
+
+ /* --------- INSTR MARK --------- */
+ /* Doesn't generate any executable code ... */
+ case Ist_IMark:
+ return;
+
+ /* --------- NO-OP --------- */
+ /* Fairly self-explanatory, wouldn't you say? */
+ case Ist_NoOp:
+ return;
+
+ /* --------- EXIT --------- */
+ case Ist_Exit: {
+ X86RI* dst;
+ X86CondCode cc;
+ if (stmt->Ist.Exit.dst->tag != Ico_U32)
+ vpanic("isel_x86: Ist_Exit: dst is not a 32-bit value");
+ dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ cc = iselCondCode(env,stmt->Ist.Exit.guard);
+ addInstr(env, X86Instr_Goto(stmt->Ist.Exit.jk, cc, dst));
+ return;
+ }
+
+ default: break;
+ }
+ stmt_fail:
+ ppIRStmt(stmt);
+ vpanic("iselStmt");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Basic block terminators (Nexts) ---*/
+/*---------------------------------------------------------*/
+
+static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
+{
+ X86RI* ri;
+ if (vex_traceflags & VEX_TRACE_VCODE) {
+ vex_printf("\n-- goto {");
+ ppIRJumpKind(jk);
+ vex_printf("} ");
+ ppIRExpr(next);
+ vex_printf("\n");
+ }
+ ri = iselIntExpr_RI(env, next);
+ addInstr(env, X86Instr_Goto(jk, Xcc_ALWAYS,ri));
+}
+
+
+/*---------------------------------------------------------*/
+/*--- Insn selector top-level ---*/
+/*---------------------------------------------------------*/
+
+/* Translate an entire SB to x86 code. */
+
+HInstrArray* iselSB_X86 ( IRSB* bb, VexArch arch_host,
+ VexArchInfo* archinfo_host,
+ VexAbiInfo* vbi/*UNUSED*/ )
+{
+ Int i, j;
+ HReg hreg, hregHI;
+ ISelEnv* env;
+ UInt hwcaps_host = archinfo_host->hwcaps;
+
+ /* sanity ... */
+ vassert(arch_host == VexArchX86);
+ vassert(0 == (hwcaps_host
+ & ~(VEX_HWCAPS_X86_SSE1
+ | VEX_HWCAPS_X86_SSE2
+ | VEX_HWCAPS_X86_SSE3
+ | VEX_HWCAPS_X86_LZCNT)));
+
+ /* Make up an initial environment to use. */
+ env = LibVEX_Alloc(sizeof(ISelEnv));
+ env->vreg_ctr = 0;
+
+ /* Set up output code array. */
+ env->code = newHInstrArray();
+
+ /* Copy BB's type env. */
+ env->type_env = bb->tyenv;
+
+ /* Make up an IRTemp -> virtual HReg mapping. This doesn't
+ change as we go along. */
+ env->n_vregmap = bb->tyenv->types_used;
+ env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
+ env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
+
+ /* and finally ... */
+ env->hwcaps = hwcaps_host;
+
+ /* For each IR temporary, allocate a suitably-kinded virtual
+ register. */
+ j = 0;
+ for (i = 0; i < env->n_vregmap; i++) {
+ hregHI = hreg = INVALID_HREG;
+ switch (bb->tyenv->types[i]) {
+ case Ity_I1:
+ case Ity_I8:
+ case Ity_I16:
+ case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break;
+ case Ity_I64: hreg = mkHReg(j++, HRcInt32, True);
+ hregHI = mkHReg(j++, HRcInt32, True); break;
+ case Ity_F32:
+ case Ity_F64: hreg = mkHReg(j++, HRcFlt64, True); break;
+ case Ity_V128: hreg = mkHReg(j++, HRcVec128, True); break;
+ default: ppIRType(bb->tyenv->types[i]);
+ vpanic("iselBB: IRTemp type");
+ }
+ env->vregmap[i] = hreg;
+ env->vregmapHI[i] = hregHI;
+ }
+ env->vreg_ctr = j;
+
+ /* Ok, finally we can iterate over the statements. */
+ for (i = 0; i < bb->stmts_used; i++)
+ iselStmt(env,bb->stmts[i]);
+
+ iselNext(env,bb->next,bb->jumpkind);
+
+ /* record the number of vregs we used. */
+ env->code->n_vregs = env->vreg_ctr;
+ return env->code;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- end host_x86_isel.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c
new file mode 100644
index 0000000..f78db10
--- /dev/null
+++ b/VEX/priv/ir_defs.c
@@ -0,0 +1,3501 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin ir_defs.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+
+#include "main_util.h"
+
+
+/*---------------------------------------------------------------*/
+/*--- Printing the IR ---*/
+/*---------------------------------------------------------------*/
+
+void ppIRType ( IRType ty )
+{
+ switch (ty) {
+ case Ity_INVALID: vex_printf("Ity_INVALID"); break;
+ case Ity_I1: vex_printf( "I1"); break;
+ case Ity_I8: vex_printf( "I8"); break;
+ case Ity_I16: vex_printf( "I16"); break;
+ case Ity_I32: vex_printf( "I32"); break;
+ case Ity_I64: vex_printf( "I64"); break;
+ case Ity_I128: vex_printf( "I128"); break;
+ case Ity_F32: vex_printf( "F32"); break;
+ case Ity_F64: vex_printf( "F64"); break;
+ case Ity_V128: vex_printf( "V128"); break;
+ default: vex_printf("ty = 0x%x\n", (Int)ty);
+ vpanic("ppIRType");
+ }
+}
+
+void ppIRConst ( IRConst* con )
+{
+ union { ULong i64; Double f64; } u;
+ vassert(sizeof(ULong) == sizeof(Double));
+ switch (con->tag) {
+ case Ico_U1: vex_printf( "%d:I1", con->Ico.U1 ? 1 : 0); break;
+ case Ico_U8: vex_printf( "0x%x:I8", (UInt)(con->Ico.U8)); break;
+ case Ico_U16: vex_printf( "0x%x:I16", (UInt)(con->Ico.U16)); break;
+ case Ico_U32: vex_printf( "0x%x:I32", (UInt)(con->Ico.U32)); break;
+ case Ico_U64: vex_printf( "0x%llx:I64", (ULong)(con->Ico.U64)); break;
+ case Ico_F64: u.f64 = con->Ico.F64;
+ vex_printf( "F64{0x%llx}", u.i64);
+ break;
+ case Ico_F64i: vex_printf( "F64i{0x%llx}", con->Ico.F64i); break;
+ case Ico_V128: vex_printf( "V128{0x%04x}", (UInt)(con->Ico.V128)); break;
+ default: vpanic("ppIRConst");
+ }
+}
+
+void ppIRCallee ( IRCallee* ce )
+{
+ vex_printf("%s", ce->name);
+ if (ce->regparms > 0)
+ vex_printf("[rp=%d]", ce->regparms);
+ if (ce->mcx_mask > 0)
+ vex_printf("[mcx=0x%x]", ce->mcx_mask);
+ vex_printf("{%p}", (void*)ce->addr);
+}
+
+void ppIRRegArray ( IRRegArray* arr )
+{
+ vex_printf("(%d:%dx", arr->base, arr->nElems);
+ ppIRType(arr->elemTy);
+ vex_printf(")");
+}
+
+void ppIRTemp ( IRTemp tmp )
+{
+ if (tmp == IRTemp_INVALID)
+ vex_printf("IRTemp_INVALID");
+ else
+ vex_printf( "t%d", (Int)tmp);
+}
+
+void ppIROp ( IROp op )
+{
+ HChar* str = NULL;
+ IROp base;
+ switch (op) {
+ case Iop_Add8 ... Iop_Add64:
+ str = "Add"; base = Iop_Add8; break;
+ case Iop_Sub8 ... Iop_Sub64:
+ str = "Sub"; base = Iop_Sub8; break;
+ case Iop_Mul8 ... Iop_Mul64:
+ str = "Mul"; base = Iop_Mul8; break;
+ case Iop_Or8 ... Iop_Or64:
+ str = "Or"; base = Iop_Or8; break;
+ case Iop_And8 ... Iop_And64:
+ str = "And"; base = Iop_And8; break;
+ case Iop_Xor8 ... Iop_Xor64:
+ str = "Xor"; base = Iop_Xor8; break;
+ case Iop_Shl8 ... Iop_Shl64:
+ str = "Shl"; base = Iop_Shl8; break;
+ case Iop_Shr8 ... Iop_Shr64:
+ str = "Shr"; base = Iop_Shr8; break;
+ case Iop_Sar8 ... Iop_Sar64:
+ str = "Sar"; base = Iop_Sar8; break;
+ case Iop_CmpEQ8 ... Iop_CmpEQ64:
+ str = "CmpEQ"; base = Iop_CmpEQ8; break;
+ case Iop_CmpNE8 ... Iop_CmpNE64:
+ str = "CmpNE"; base = Iop_CmpNE8; break;
+ case Iop_CasCmpEQ8 ... Iop_CasCmpEQ64:
+ str = "CasCmpEQ"; base = Iop_CasCmpEQ8; break;
+ case Iop_CasCmpNE8 ... Iop_CasCmpNE64:
+ str = "CasCmpNE"; base = Iop_CasCmpNE8; break;
+ case Iop_Not8 ... Iop_Not64:
+ str = "Not"; base = Iop_Not8; break;
+ /* other cases must explicitly "return;" */
+ case Iop_8Uto16: vex_printf("8Uto16"); return;
+ case Iop_8Uto32: vex_printf("8Uto32"); return;
+ case Iop_16Uto32: vex_printf("16Uto32"); return;
+ case Iop_8Sto16: vex_printf("8Sto16"); return;
+ case Iop_8Sto32: vex_printf("8Sto32"); return;
+ case Iop_16Sto32: vex_printf("16Sto32"); return;
+ case Iop_32Sto64: vex_printf("32Sto64"); return;
+ case Iop_32Uto64: vex_printf("32Uto64"); return;
+ case Iop_32to8: vex_printf("32to8"); return;
+ case Iop_16Uto64: vex_printf("16Uto64"); return;
+ case Iop_16Sto64: vex_printf("16Sto64"); return;
+ case Iop_8Uto64: vex_printf("8Uto64"); return;
+ case Iop_8Sto64: vex_printf("8Sto64"); return;
+ case Iop_64to16: vex_printf("64to16"); return;
+ case Iop_64to8: vex_printf("64to8"); return;
+
+ case Iop_Not1: vex_printf("Not1"); return;
+ case Iop_32to1: vex_printf("32to1"); return;
+ case Iop_64to1: vex_printf("64to1"); return;
+ case Iop_1Uto8: vex_printf("1Uto8"); return;
+ case Iop_1Uto32: vex_printf("1Uto32"); return;
+ case Iop_1Uto64: vex_printf("1Uto64"); return;
+ case Iop_1Sto8: vex_printf("1Sto8"); return;
+ case Iop_1Sto16: vex_printf("1Sto16"); return;
+ case Iop_1Sto32: vex_printf("1Sto32"); return;
+ case Iop_1Sto64: vex_printf("1Sto64"); return;
+
+ case Iop_MullS8: vex_printf("MullS8"); return;
+ case Iop_MullS16: vex_printf("MullS16"); return;
+ case Iop_MullS32: vex_printf("MullS32"); return;
+ case Iop_MullS64: vex_printf("MullS64"); return;
+ case Iop_MullU8: vex_printf("MullU8"); return;
+ case Iop_MullU16: vex_printf("MullU16"); return;
+ case Iop_MullU32: vex_printf("MullU32"); return;
+ case Iop_MullU64: vex_printf("MullU64"); return;
+
+ case Iop_Clz64: vex_printf("Clz64"); return;
+ case Iop_Clz32: vex_printf("Clz32"); return;
+ case Iop_Ctz64: vex_printf("Ctz64"); return;
+ case Iop_Ctz32: vex_printf("Ctz32"); return;
+
+ case Iop_CmpLT32S: vex_printf("CmpLT32S"); return;
+ case Iop_CmpLE32S: vex_printf("CmpLE32S"); return;
+ case Iop_CmpLT32U: vex_printf("CmpLT32U"); return;
+ case Iop_CmpLE32U: vex_printf("CmpLE32U"); return;
+
+ case Iop_CmpLT64S: vex_printf("CmpLT64S"); return;
+ case Iop_CmpLE64S: vex_printf("CmpLE64S"); return;
+ case Iop_CmpLT64U: vex_printf("CmpLT64U"); return;
+ case Iop_CmpLE64U: vex_printf("CmpLE64U"); return;
+
+ case Iop_CmpNEZ8: vex_printf("CmpNEZ8"); return;
+ case Iop_CmpNEZ16: vex_printf("CmpNEZ16"); return;
+ case Iop_CmpNEZ32: vex_printf("CmpNEZ32"); return;
+ case Iop_CmpNEZ64: vex_printf("CmpNEZ64"); return;
+
+ case Iop_CmpwNEZ32: vex_printf("CmpwNEZ32"); return;
+ case Iop_CmpwNEZ64: vex_printf("CmpwNEZ64"); return;
+
+ case Iop_Left8: vex_printf("Left8"); return;
+ case Iop_Left16: vex_printf("Left16"); return;
+ case Iop_Left32: vex_printf("Left32"); return;
+ case Iop_Left64: vex_printf("Left64"); return;
+ case Iop_Max32U: vex_printf("Max32U"); return;
+
+ case Iop_CmpORD32U: vex_printf("CmpORD32U"); return;
+ case Iop_CmpORD32S: vex_printf("CmpORD32S"); return;
+
+ case Iop_CmpORD64U: vex_printf("CmpORD64U"); return;
+ case Iop_CmpORD64S: vex_printf("CmpORD64S"); return;
+
+ case Iop_DivU32: vex_printf("DivU32"); return;
+ case Iop_DivS32: vex_printf("DivS32"); return;
+ case Iop_DivU64: vex_printf("DivU64"); return;
+ case Iop_DivS64: vex_printf("DivS64"); return;
+
+ case Iop_DivModU64to32: vex_printf("DivModU64to32"); return;
+ case Iop_DivModS64to32: vex_printf("DivModS64to32"); return;
+
+ case Iop_DivModU128to64: vex_printf("DivModU128to64"); return;
+ case Iop_DivModS128to64: vex_printf("DivModS128to64"); return;
+
+ case Iop_16HIto8: vex_printf("16HIto8"); return;
+ case Iop_16to8: vex_printf("16to8"); return;
+ case Iop_8HLto16: vex_printf("8HLto16"); return;
+
+ case Iop_32HIto16: vex_printf("32HIto16"); return;
+ case Iop_32to16: vex_printf("32to16"); return;
+ case Iop_16HLto32: vex_printf("16HLto32"); return;
+
+ case Iop_64HIto32: vex_printf("64HIto32"); return;
+ case Iop_64to32: vex_printf("64to32"); return;
+ case Iop_32HLto64: vex_printf("32HLto64"); return;
+
+ case Iop_128HIto64: vex_printf("128HIto64"); return;
+ case Iop_128to64: vex_printf("128to64"); return;
+ case Iop_64HLto128: vex_printf("64HLto128"); return;
+
+ case Iop_AddF64: vex_printf("AddF64"); return;
+ case Iop_SubF64: vex_printf("SubF64"); return;
+ case Iop_MulF64: vex_printf("MulF64"); return;
+ case Iop_DivF64: vex_printf("DivF64"); return;
+ case Iop_AddF64r32: vex_printf("AddF64r32"); return;
+ case Iop_SubF64r32: vex_printf("SubF64r32"); return;
+ case Iop_MulF64r32: vex_printf("MulF64r32"); return;
+ case Iop_DivF64r32: vex_printf("DivF64r32"); return;
+ case Iop_AddF32: vex_printf("AddF32"); return;
+ case Iop_SubF32: vex_printf("SubF32"); return;
+ case Iop_MulF32: vex_printf("MulF32"); return;
+ case Iop_DivF32: vex_printf("DivF32"); return;
+
+ case Iop_ScaleF64: vex_printf("ScaleF64"); return;
+ case Iop_AtanF64: vex_printf("AtanF64"); return;
+ case Iop_Yl2xF64: vex_printf("Yl2xF64"); return;
+ case Iop_Yl2xp1F64: vex_printf("Yl2xp1F64"); return;
+ case Iop_PRemF64: vex_printf("PRemF64"); return;
+ case Iop_PRemC3210F64: vex_printf("PRemC3210F64"); return;
+ case Iop_PRem1F64: vex_printf("PRem1F64"); return;
+ case Iop_PRem1C3210F64: vex_printf("PRem1C3210F64"); return;
+ case Iop_NegF64: vex_printf("NegF64"); return;
+ case Iop_AbsF64: vex_printf("AbsF64"); return;
+ case Iop_NegF32: vex_printf("NegF32"); return;
+ case Iop_AbsF32: vex_printf("AbsF32"); return;
+ case Iop_SqrtF64: vex_printf("SqrtF64"); return;
+ case Iop_SqrtF32: vex_printf("SqrtF32"); return;
+ case Iop_SinF64: vex_printf("SinF64"); return;
+ case Iop_CosF64: vex_printf("CosF64"); return;
+ case Iop_TanF64: vex_printf("TanF64"); return;
+ case Iop_2xm1F64: vex_printf("2xm1F64"); return;
+
+ case Iop_MAddF64: vex_printf("MAddF64"); return;
+ case Iop_MSubF64: vex_printf("MSubF64"); return;
+ case Iop_MAddF64r32: vex_printf("MAddF64r32"); return;
+ case Iop_MSubF64r32: vex_printf("MSubF64r32"); return;
+
+ case Iop_Est5FRSqrt: vex_printf("Est5FRSqrt"); return;
+ case Iop_RoundF64toF64_NEAREST: vex_printf("RoundF64toF64_NEAREST"); return;
+ case Iop_RoundF64toF64_NegINF: vex_printf("RoundF64toF64_NegINF"); return;
+ case Iop_RoundF64toF64_PosINF: vex_printf("RoundF64toF64_PosINF"); return;
+ case Iop_RoundF64toF64_ZERO: vex_printf("RoundF64toF64_ZERO"); return;
+
+ case Iop_TruncF64asF32: vex_printf("TruncF64asF32"); return;
+ case Iop_CalcFPRF: vex_printf("CalcFPRF"); return;
+
+ case Iop_Add16x2: vex_printf("Add16x2"); return;
+ case Iop_Sub16x2: vex_printf("Sub16x2"); return;
+ case Iop_QAdd16Sx2: vex_printf("QAdd16Sx2"); return;
+ case Iop_QAdd16Ux2: vex_printf("QAdd16Ux2"); return;
+ case Iop_QSub16Sx2: vex_printf("QSub16Sx2"); return;
+ case Iop_QSub16Ux2: vex_printf("QSub16Ux2"); return;
+ case Iop_HAdd16Ux2: vex_printf("HAdd16Ux2"); return;
+ case Iop_HAdd16Sx2: vex_printf("HAdd16Sx2"); return;
+ case Iop_HSub16Ux2: vex_printf("HSub16Ux2"); return;
+ case Iop_HSub16Sx2: vex_printf("HSub16Sx2"); return;
+
+ case Iop_Add8x4: vex_printf("Add8x4"); return;
+ case Iop_Sub8x4: vex_printf("Sub8x4"); return;
+ case Iop_QAdd8Sx4: vex_printf("QAdd8Sx4"); return;
+ case Iop_QAdd8Ux4: vex_printf("QAdd8Ux4"); return;
+ case Iop_QSub8Sx4: vex_printf("QSub8Sx4"); return;
+ case Iop_QSub8Ux4: vex_printf("QSub8Ux4"); return;
+ case Iop_HAdd8Ux4: vex_printf("HAdd8Ux4"); return;
+ case Iop_HAdd8Sx4: vex_printf("HAdd8Sx4"); return;
+ case Iop_HSub8Ux4: vex_printf("HSub8Ux4"); return;
+ case Iop_HSub8Sx4: vex_printf("HSub8Sx4"); return;
+ case Iop_Sad8Ux4: vex_printf("Sad8Ux4"); return;
+
+ case Iop_CmpNEZ16x2: vex_printf("CmpNEZ16x2"); return;
+ case Iop_CmpNEZ8x4: vex_printf("CmpNEZ8x4"); return;
+
+ case Iop_CmpF64: vex_printf("CmpF64"); return;
+
+ case Iop_F64toI16S: vex_printf("F64toI16S"); return;
+ case Iop_F64toI32S: vex_printf("F64toI32S"); return;
+ case Iop_F64toI64S: vex_printf("F64toI64S"); return;
+
+ case Iop_F64toI32U: vex_printf("F64toI32U"); return;
+
+ case Iop_I16StoF64: vex_printf("I16StoF64"); return;
+ case Iop_I32StoF64: vex_printf("I32StoF64"); return;
+ case Iop_I64StoF64: vex_printf("I64StoF64"); return;
+
+ case Iop_I32UtoF64: vex_printf("I32UtoF64"); return;
+
+ case Iop_F32toF64: vex_printf("F32toF64"); return;
+ case Iop_F64toF32: vex_printf("F64toF32"); return;
+
+ case Iop_RoundF64toInt: vex_printf("RoundF64toInt"); return;
+ case Iop_RoundF32toInt: vex_printf("RoundF32toInt"); return;
+ case Iop_RoundF64toF32: vex_printf("RoundF64toF32"); return;
+
+ case Iop_ReinterpF64asI64: vex_printf("ReinterpF64asI64"); return;
+ case Iop_ReinterpI64asF64: vex_printf("ReinterpI64asF64"); return;
+ case Iop_ReinterpF32asI32: vex_printf("ReinterpF32asI32"); return;
+ case Iop_ReinterpI32asF32: vex_printf("ReinterpI32asF32"); return;
+
+ case Iop_I32UtoFx4: vex_printf("I32UtoFx4"); return;
+ case Iop_I32StoFx4: vex_printf("I32StoFx4"); return;
+
+ case Iop_F32toF16x4: vex_printf("F32toF16x4"); return;
+ case Iop_F16toF32x4: vex_printf("F16toF32x4"); return;
+
+ case Iop_Rsqrte32Fx4: vex_printf("VRsqrte32Fx4"); return;
+ case Iop_Rsqrte32x4: vex_printf("VRsqrte32x4"); return;
+ case Iop_Rsqrte32Fx2: vex_printf("VRsqrte32Fx2"); return;
+ case Iop_Rsqrte32x2: vex_printf("VRsqrte32x2"); return;
+
+ case Iop_QFtoI32Ux4_RZ: vex_printf("QFtoI32Ux4_RZ"); return;
+ case Iop_QFtoI32Sx4_RZ: vex_printf("QFtoI32Sx4_RZ"); return;
+
+ case Iop_FtoI32Ux4_RZ: vex_printf("FtoI32Ux4_RZ"); return;
+ case Iop_FtoI32Sx4_RZ: vex_printf("FtoI32Sx4_RZ"); return;
+
+ case Iop_I32UtoFx2: vex_printf("I32UtoFx2"); return;
+ case Iop_I32StoFx2: vex_printf("I32StoFx2"); return;
+
+ case Iop_FtoI32Ux2_RZ: vex_printf("FtoI32Ux2_RZ"); return;
+ case Iop_FtoI32Sx2_RZ: vex_printf("FtoI32Sx2_RZ"); return;
+
+ case Iop_RoundF32x4_RM: vex_printf("RoundF32x4_RM"); return;
+ case Iop_RoundF32x4_RP: vex_printf("RoundF32x4_RP"); return;
+ case Iop_RoundF32x4_RN: vex_printf("RoundF32x4_RN"); return;
+ case Iop_RoundF32x4_RZ: vex_printf("RoundF32x4_RZ"); return;
+
+ case Iop_Abs8x8: vex_printf("Abs8x8"); return;
+ case Iop_Abs16x4: vex_printf("Abs16x4"); return;
+ case Iop_Abs32x2: vex_printf("Abs32x2"); return;
+ case Iop_Add8x8: vex_printf("Add8x8"); return;
+ case Iop_Add16x4: vex_printf("Add16x4"); return;
+ case Iop_Add32x2: vex_printf("Add32x2"); return;
+ case Iop_QAdd8Ux8: vex_printf("QAdd8Ux8"); return;
+ case Iop_QAdd16Ux4: vex_printf("QAdd16Ux4"); return;
+ case Iop_QAdd32Ux2: vex_printf("QAdd32Ux2"); return;
+ case Iop_QAdd64Ux1: vex_printf("QAdd64Ux1"); return;
+ case Iop_QAdd8Sx8: vex_printf("QAdd8Sx8"); return;
+ case Iop_QAdd16Sx4: vex_printf("QAdd16Sx4"); return;
+ case Iop_QAdd32Sx2: vex_printf("QAdd32Sx2"); return;
+ case Iop_QAdd64Sx1: vex_printf("QAdd64Sx1"); return;
+ case Iop_PwAdd8x8: vex_printf("PwAdd8x8"); return;
+ case Iop_PwAdd16x4: vex_printf("PwAdd16x4"); return;
+ case Iop_PwAdd32x2: vex_printf("PwAdd32x2"); return;
+ case Iop_PwAdd32Fx2: vex_printf("PwAdd32Fx2"); return;
+ case Iop_PwAddL8Ux8: vex_printf("PwAddL8Ux8"); return;
+ case Iop_PwAddL16Ux4: vex_printf("PwAddL16Ux4"); return;
+ case Iop_PwAddL32Ux2: vex_printf("PwAddL32Ux2"); return;
+ case Iop_PwAddL8Sx8: vex_printf("PwAddL8Sx8"); return;
+ case Iop_PwAddL16Sx4: vex_printf("PwAddL16Sx4"); return;
+ case Iop_PwAddL32Sx2: vex_printf("PwAddL32Sx2"); return;
+ case Iop_Sub8x8: vex_printf("Sub8x8"); return;
+ case Iop_Sub16x4: vex_printf("Sub16x4"); return;
+ case Iop_Sub32x2: vex_printf("Sub32x2"); return;
+ case Iop_QSub8Ux8: vex_printf("QSub8Ux8"); return;
+ case Iop_QSub16Ux4: vex_printf("QSub16Ux4"); return;
+ case Iop_QSub32Ux2: vex_printf("QSub32Ux2"); return;
+ case Iop_QSub64Ux1: vex_printf("QSub64Ux1"); return;
+ case Iop_QSub8Sx8: vex_printf("QSub8Sx8"); return;
+ case Iop_QSub16Sx4: vex_printf("QSub16Sx4"); return;
+ case Iop_QSub32Sx2: vex_printf("QSub32Sx2"); return;
+ case Iop_QSub64Sx1: vex_printf("QSub64Sx1"); return;
+ case Iop_Mul8x8: vex_printf("Mul8x8"); return;
+ case Iop_Mul16x4: vex_printf("Mul16x4"); return;
+ case Iop_Mul32x2: vex_printf("Mul32x2"); return;
+ case Iop_Mul32Fx2: vex_printf("Mul32Fx2"); return;
+ case Iop_PolynomialMul8x8: vex_printf("PolynomialMul8x8"); return;
+ case Iop_MulHi16Ux4: vex_printf("MulHi16Ux4"); return;
+ case Iop_MulHi16Sx4: vex_printf("MulHi16Sx4"); return;
+ case Iop_QDMulHi16Sx4: vex_printf("QDMulHi16Sx4"); return;
+ case Iop_QDMulHi32Sx2: vex_printf("QDMulHi32Sx2"); return;
+ case Iop_QRDMulHi16Sx4: vex_printf("QRDMulHi16Sx4"); return;
+ case Iop_QRDMulHi32Sx2: vex_printf("QRDMulHi32Sx2"); return;
+ case Iop_QDMulLong16Sx4: vex_printf("QDMulLong16Sx4"); return;
+ case Iop_QDMulLong32Sx2: vex_printf("QDMulLong32Sx2"); return;
+ case Iop_Avg8Ux8: vex_printf("Avg8Ux8"); return;
+ case Iop_Avg16Ux4: vex_printf("Avg16Ux4"); return;
+ case Iop_Max8Sx8: vex_printf("Max8Sx8"); return;
+ case Iop_Max16Sx4: vex_printf("Max16Sx4"); return;
+ case Iop_Max32Sx2: vex_printf("Max32Sx2"); return;
+ case Iop_Max8Ux8: vex_printf("Max8Ux8"); return;
+ case Iop_Max16Ux4: vex_printf("Max16Ux4"); return;
+ case Iop_Max32Ux2: vex_printf("Max32Ux2"); return;
+ case Iop_Min8Sx8: vex_printf("Min8Sx8"); return;
+ case Iop_Min16Sx4: vex_printf("Min16Sx4"); return;
+ case Iop_Min32Sx2: vex_printf("Min32Sx2"); return;
+ case Iop_Min8Ux8: vex_printf("Min8Ux8"); return;
+ case Iop_Min16Ux4: vex_printf("Min16Ux4"); return;
+ case Iop_Min32Ux2: vex_printf("Min32Ux2"); return;
+ case Iop_PwMax8Sx8: vex_printf("PwMax8Sx8"); return;
+ case Iop_PwMax16Sx4: vex_printf("PwMax16Sx4"); return;
+ case Iop_PwMax32Sx2: vex_printf("PwMax32Sx2"); return;
+ case Iop_PwMax8Ux8: vex_printf("PwMax8Ux8"); return;
+ case Iop_PwMax16Ux4: vex_printf("PwMax16Ux4"); return;
+ case Iop_PwMax32Ux2: vex_printf("PwMax32Ux2"); return;
+ case Iop_PwMin8Sx8: vex_printf("PwMin8Sx8"); return;
+ case Iop_PwMin16Sx4: vex_printf("PwMin16Sx4"); return;
+ case Iop_PwMin32Sx2: vex_printf("PwMin32Sx2"); return;
+ case Iop_PwMin8Ux8: vex_printf("PwMin8Ux8"); return;
+ case Iop_PwMin16Ux4: vex_printf("PwMin16Ux4"); return;
+ case Iop_PwMin32Ux2: vex_printf("PwMin32Ux2"); return;
+ case Iop_CmpEQ8x8: vex_printf("CmpEQ8x8"); return;
+ case Iop_CmpEQ16x4: vex_printf("CmpEQ16x4"); return;
+ case Iop_CmpEQ32x2: vex_printf("CmpEQ32x2"); return;
+ case Iop_CmpGT8Ux8: vex_printf("CmpGT8Ux8"); return;
+ case Iop_CmpGT16Ux4: vex_printf("CmpGT16Ux4"); return;
+ case Iop_CmpGT32Ux2: vex_printf("CmpGT32Ux2"); return;
+ case Iop_CmpGT8Sx8: vex_printf("CmpGT8Sx8"); return;
+ case Iop_CmpGT16Sx4: vex_printf("CmpGT16Sx4"); return;
+ case Iop_CmpGT32Sx2: vex_printf("CmpGT32Sx2"); return;
+ case Iop_Cnt8x8: vex_printf("Cnt8x8"); return;
+ case Iop_Clz8Sx8: vex_printf("Clz8Sx8"); return;
+ case Iop_Clz16Sx4: vex_printf("Clz16Sx4"); return;
+ case Iop_Clz32Sx2: vex_printf("Clz32Sx2"); return;
+ case Iop_Cls8Sx8: vex_printf("Cls8Sx8"); return;
+ case Iop_Cls16Sx4: vex_printf("Cls16Sx4"); return;
+ case Iop_Cls32Sx2: vex_printf("Cls32Sx2"); return;
+ case Iop_ShlN8x8: vex_printf("ShlN8x8"); return;
+ case Iop_ShlN16x4: vex_printf("ShlN16x4"); return;
+ case Iop_ShlN32x2: vex_printf("ShlN32x2"); return;
+ case Iop_ShrN8x8: vex_printf("ShrN8x8"); return;
+ case Iop_ShrN16x4: vex_printf("ShrN16x4"); return;
+ case Iop_ShrN32x2: vex_printf("ShrN32x2"); return;
+ case Iop_SarN8x8: vex_printf("SarN8x8"); return;
+ case Iop_SarN16x4: vex_printf("SarN16x4"); return;
+ case Iop_SarN32x2: vex_printf("SarN32x2"); return;
+ case Iop_QNarrow16Ux4: vex_printf("QNarrow16Ux4"); return;
+ case Iop_QNarrow16Sx4: vex_printf("QNarrow16Sx4"); return;
+ case Iop_QNarrow32Sx2: vex_printf("QNarrow32Sx2"); return;
+ case Iop_InterleaveHI8x8: vex_printf("InterleaveHI8x8"); return;
+ case Iop_InterleaveHI16x4: vex_printf("InterleaveHI16x4"); return;
+ case Iop_InterleaveHI32x2: vex_printf("InterleaveHI32x2"); return;
+ case Iop_InterleaveLO8x8: vex_printf("InterleaveLO8x8"); return;
+ case Iop_InterleaveLO16x4: vex_printf("InterleaveLO16x4"); return;
+ case Iop_InterleaveLO32x2: vex_printf("InterleaveLO32x2"); return;
+ case Iop_CatOddLanes8x8: vex_printf("CatOddLanes8x8"); return;
+ case Iop_CatOddLanes16x4: vex_printf("CatOddLanes16x4"); return;
+ case Iop_CatEvenLanes8x8: vex_printf("CatEvenLanes8x8"); return;
+ case Iop_CatEvenLanes16x4: vex_printf("CatEvenLanes16x4"); return;
+ case Iop_InterleaveOddLanes8x8: vex_printf("InterleaveOddLanes8x8"); return;
+ case Iop_InterleaveOddLanes16x4: vex_printf("InterleaveOddLanes16x4"); return;
+ case Iop_InterleaveEvenLanes8x8: vex_printf("InterleaveEvenLanes8x8"); return;
+ case Iop_InterleaveEvenLanes16x4: vex_printf("InterleaveEvenLanes16x4"); return;
+ case Iop_Shl8x8: vex_printf("Shl8x8"); return;
+ case Iop_Shl16x4: vex_printf("Shl16x4"); return;
+ case Iop_Shl32x2: vex_printf("Shl32x2"); return;
+ case Iop_Shr8x8: vex_printf("Shr8x8"); return;
+ case Iop_Shr16x4: vex_printf("Shr16x4"); return;
+ case Iop_Shr32x2: vex_printf("Shr32x2"); return;
+ case Iop_QShl8x8: vex_printf("QShl8x8"); return;
+ case Iop_QShl16x4: vex_printf("QShl16x4"); return;
+ case Iop_QShl32x2: vex_printf("QShl32x2"); return;
+ case Iop_QShl64x1: vex_printf("QShl64x1"); return;
+ case Iop_QSal8x8: vex_printf("QSal8x8"); return;
+ case Iop_QSal16x4: vex_printf("QSal16x4"); return;
+ case Iop_QSal32x2: vex_printf("QSal32x2"); return;
+ case Iop_QSal64x1: vex_printf("QSal64x1"); return;
+ case Iop_QShlN8x8: vex_printf("QShlN8x8"); return;
+ case Iop_QShlN16x4: vex_printf("QShlN16x4"); return;
+ case Iop_QShlN32x2: vex_printf("QShlN32x2"); return;
+ case Iop_QShlN64x1: vex_printf("QShlN64x1"); return;
+ case Iop_QShlN8Sx8: vex_printf("QShlN8Sx8"); return;
+ case Iop_QShlN16Sx4: vex_printf("QShlN16Sx4"); return;
+ case Iop_QShlN32Sx2: vex_printf("QShlN32Sx2"); return;
+ case Iop_QShlN64Sx1: vex_printf("QShlN64Sx1"); return;
+ case Iop_QSalN8x8: vex_printf("QSalN8x8"); return;
+ case Iop_QSalN16x4: vex_printf("QSalN16x4"); return;
+ case Iop_QSalN32x2: vex_printf("QSalN32x2"); return;
+ case Iop_QSalN64x1: vex_printf("QSalN64x1"); return;
+ case Iop_Sar8x8: vex_printf("Sar8x8"); return;
+ case Iop_Sar16x4: vex_printf("Sar16x4"); return;
+ case Iop_Sar32x2: vex_printf("Sar32x2"); return;
+ case Iop_Sal8x8: vex_printf("Sal8x8"); return;
+ case Iop_Sal16x4: vex_printf("Sal16x4"); return;
+ case Iop_Sal32x2: vex_printf("Sal32x2"); return;
+ case Iop_Sal64x1: vex_printf("Sal64x1"); return;
+ case Iop_Perm8x8: vex_printf("Perm8x8"); return;
+ case Iop_Reverse16_8x8: vex_printf("Reverse16_8x8"); return;
+ case Iop_Reverse32_8x8: vex_printf("Reverse32_8x8"); return;
+ case Iop_Reverse32_16x4: vex_printf("Reverse32_16x4"); return;
+ case Iop_Reverse64_8x8: vex_printf("Reverse64_8x8"); return;
+ case Iop_Reverse64_16x4: vex_printf("Reverse64_16x4"); return;
+ case Iop_Reverse64_32x2: vex_printf("Reverse64_32x2"); return;
+ case Iop_Abs32Fx2: vex_printf("Abs32Fx2"); return;
+
+ case Iop_CmpNEZ32x2: vex_printf("CmpNEZ32x2"); return;
+ case Iop_CmpNEZ16x4: vex_printf("CmpNEZ16x4"); return;
+ case Iop_CmpNEZ8x8: vex_printf("CmpNEZ8x8"); return;
+
+ case Iop_Add32Fx4: vex_printf("Add32Fx4"); return;
+ case Iop_Add32Fx2: vex_printf("Add32Fx2"); return;
+ case Iop_Add32F0x4: vex_printf("Add32F0x4"); return;
+ case Iop_Add64Fx2: vex_printf("Add64Fx2"); return;
+ case Iop_Add64F0x2: vex_printf("Add64F0x2"); return;
+
+ case Iop_Div32Fx4: vex_printf("Div32Fx4"); return;
+ case Iop_Div32F0x4: vex_printf("Div32F0x4"); return;
+ case Iop_Div64Fx2: vex_printf("Div64Fx2"); return;
+ case Iop_Div64F0x2: vex_printf("Div64F0x2"); return;
+
+ case Iop_Max32Fx4: vex_printf("Max32Fx4"); return;
+ case Iop_Max32Fx2: vex_printf("Max32Fx2"); return;
+ case Iop_PwMax32Fx4: vex_printf("PwMax32Fx4"); return;
+ case Iop_PwMax32Fx2: vex_printf("PwMax32Fx2"); return;
+ case Iop_Max32F0x4: vex_printf("Max32F0x4"); return;
+ case Iop_Max64Fx2: vex_printf("Max64Fx2"); return;
+ case Iop_Max64F0x2: vex_printf("Max64F0x2"); return;
+
+ case Iop_Min32Fx4: vex_printf("Min32Fx4"); return;
+ case Iop_Min32Fx2: vex_printf("Min32Fx2"); return;
+ case Iop_PwMin32Fx4: vex_printf("PwMin32Fx4"); return;
+ case Iop_PwMin32Fx2: vex_printf("PwMin32Fx2"); return;
+ case Iop_Min32F0x4: vex_printf("Min32F0x4"); return;
+ case Iop_Min64Fx2: vex_printf("Min64Fx2"); return;
+ case Iop_Min64F0x2: vex_printf("Min64F0x2"); return;
+
+ case Iop_Mul32Fx4: vex_printf("Mul32Fx4"); return;
+ case Iop_Mul32F0x4: vex_printf("Mul32F0x4"); return;
+ case Iop_Mul64Fx2: vex_printf("Mul64Fx2"); return;
+ case Iop_Mul64F0x2: vex_printf("Mul64F0x2"); return;
+
+ case Iop_Recip32x2: vex_printf("Recip32x2"); return;
+ case Iop_Recip32Fx2: vex_printf("Recip32Fx2"); return;
+ case Iop_Recip32Fx4: vex_printf("Recip32Fx4"); return;
+ case Iop_Recip32x4: vex_printf("Recip32x4"); return;
+ case Iop_Recip32F0x4: vex_printf("Recip32F0x4"); return;
+ case Iop_Recip64Fx2: vex_printf("Recip64Fx2"); return;
+ case Iop_Recip64F0x2: vex_printf("Recip64F0x2"); return;
+ case Iop_Recps32Fx2: vex_printf("VRecps32Fx2"); return;
+ case Iop_Recps32Fx4: vex_printf("VRecps32Fx4"); return;
+ case Iop_Abs32Fx4: vex_printf("Abs32Fx4"); return;
+ case Iop_Rsqrts32Fx4: vex_printf("VRsqrts32Fx4"); return;
+ case Iop_Rsqrts32Fx2: vex_printf("VRsqrts32Fx2"); return;
+
+ case Iop_RSqrt32Fx4: vex_printf("RSqrt32Fx4"); return;
+ case Iop_RSqrt32F0x4: vex_printf("RSqrt32F0x4"); return;
+ case Iop_RSqrt64Fx2: vex_printf("RSqrt64Fx2"); return;
+ case Iop_RSqrt64F0x2: vex_printf("RSqrt64F0x2"); return;
+
+ case Iop_Sqrt32Fx4: vex_printf("Sqrt32Fx4"); return;
+ case Iop_Sqrt32F0x4: vex_printf("Sqrt32F0x4"); return;
+ case Iop_Sqrt64Fx2: vex_printf("Sqrt64Fx2"); return;
+ case Iop_Sqrt64F0x2: vex_printf("Sqrt64F0x2"); return;
+
+ case Iop_Sub32Fx4: vex_printf("Sub32Fx4"); return;
+ case Iop_Sub32Fx2: vex_printf("Sub32Fx2"); return;
+ case Iop_Sub32F0x4: vex_printf("Sub32F0x4"); return;
+ case Iop_Sub64Fx2: vex_printf("Sub64Fx2"); return;
+ case Iop_Sub64F0x2: vex_printf("Sub64F0x2"); return;
+
+ case Iop_CmpEQ32Fx4: vex_printf("CmpEQ32Fx4"); return;
+ case Iop_CmpLT32Fx4: vex_printf("CmpLT32Fx4"); return;
+ case Iop_CmpLE32Fx4: vex_printf("CmpLE32Fx4"); return;
+ case Iop_CmpGT32Fx4: vex_printf("CmpGT32Fx4"); return;
+ case Iop_CmpGE32Fx4: vex_printf("CmpGE32Fx4"); return;
+ case Iop_CmpUN32Fx4: vex_printf("CmpUN32Fx4"); return;
+ case Iop_CmpEQ64Fx2: vex_printf("CmpEQ64Fx2"); return;
+ case Iop_CmpLT64Fx2: vex_printf("CmpLT64Fx2"); return;
+ case Iop_CmpLE64Fx2: vex_printf("CmpLE64Fx2"); return;
+ case Iop_CmpUN64Fx2: vex_printf("CmpUN64Fx2"); return;
+ case Iop_CmpGT32Fx2: vex_printf("CmpGT32Fx2"); return;
+ case Iop_CmpEQ32Fx2: vex_printf("CmpEQ32Fx2"); return;
+ case Iop_CmpGE32Fx2: vex_printf("CmpGE32Fx2"); return;
+
+ case Iop_CmpEQ32F0x4: vex_printf("CmpEQ32F0x4"); return;
+ case Iop_CmpLT32F0x4: vex_printf("CmpLT32F0x4"); return;
+ case Iop_CmpLE32F0x4: vex_printf("CmpLE32F0x4"); return;
+ case Iop_CmpUN32F0x4: vex_printf("CmpUN32F0x4"); return;
+ case Iop_CmpEQ64F0x2: vex_printf("CmpEQ64F0x2"); return;
+ case Iop_CmpLT64F0x2: vex_printf("CmpLT64F0x2"); return;
+ case Iop_CmpLE64F0x2: vex_printf("CmpLE64F0x2"); return;
+ case Iop_CmpUN64F0x2: vex_printf("CmpUN64F0x2"); return;
+
+ case Iop_Neg32Fx4: vex_printf("Neg32Fx4"); return;
+ case Iop_Neg32Fx2: vex_printf("Neg32Fx2"); return;
+
+ case Iop_V128to64: vex_printf("V128to64"); return;
+ case Iop_V128HIto64: vex_printf("V128HIto64"); return;
+ case Iop_64HLtoV128: vex_printf("64HLtoV128"); return;
+
+ case Iop_64UtoV128: vex_printf("64UtoV128"); return;
+ case Iop_SetV128lo64: vex_printf("SetV128lo64"); return;
+
+ case Iop_32UtoV128: vex_printf("32UtoV128"); return;
+ case Iop_V128to32: vex_printf("V128to32"); return;
+ case Iop_SetV128lo32: vex_printf("SetV128lo32"); return;
+
+ case Iop_Dup8x16: vex_printf("Dup8x16"); return;
+ case Iop_Dup16x8: vex_printf("Dup16x8"); return;
+ case Iop_Dup32x4: vex_printf("Dup32x4"); return;
+ case Iop_Dup8x8: vex_printf("Dup8x8"); return;
+ case Iop_Dup16x4: vex_printf("Dup16x4"); return;
+ case Iop_Dup32x2: vex_printf("Dup32x2"); return;
+
+ case Iop_NotV128: vex_printf("NotV128"); return;
+ case Iop_AndV128: vex_printf("AndV128"); return;
+ case Iop_OrV128: vex_printf("OrV128"); return;
+ case Iop_XorV128: vex_printf("XorV128"); return;
+
+ case Iop_CmpNEZ8x16: vex_printf("CmpNEZ8x16"); return;
+ case Iop_CmpNEZ16x8: vex_printf("CmpNEZ16x8"); return;
+ case Iop_CmpNEZ32x4: vex_printf("CmpNEZ32x4"); return;
+ case Iop_CmpNEZ64x2: vex_printf("CmpNEZ64x2"); return;
+
+ case Iop_Abs8x16: vex_printf("Abs8x16"); return;
+ case Iop_Abs16x8: vex_printf("Abs16x8"); return;
+ case Iop_Abs32x4: vex_printf("Abs32x4"); return;
+
+ case Iop_Add8x16: vex_printf("Add8x16"); return;
+ case Iop_Add16x8: vex_printf("Add16x8"); return;
+ case Iop_Add32x4: vex_printf("Add32x4"); return;
+ case Iop_Add64x2: vex_printf("Add64x2"); return;
+ case Iop_QAdd8Ux16: vex_printf("QAdd8Ux16"); return;
+ case Iop_QAdd16Ux8: vex_printf("QAdd16Ux8"); return;
+ case Iop_QAdd32Ux4: vex_printf("QAdd32Ux4"); return;
+ case Iop_QAdd8Sx16: vex_printf("QAdd8Sx16"); return;
+ case Iop_QAdd16Sx8: vex_printf("QAdd16Sx8"); return;
+ case Iop_QAdd32Sx4: vex_printf("QAdd32Sx4"); return;
+ case Iop_QAdd64Ux2: vex_printf("QAdd64Ux2"); return;
+ case Iop_QAdd64Sx2: vex_printf("QAdd64Sx2"); return;
+ case Iop_PwAdd8x16: vex_printf("PwAdd8x16"); return;
+ case Iop_PwAdd16x8: vex_printf("PwAdd16x8"); return;
+ case Iop_PwAdd32x4: vex_printf("PwAdd32x4"); return;
+ case Iop_PwAddL8Ux16: vex_printf("PwAddL8Ux16"); return;
+ case Iop_PwAddL16Ux8: vex_printf("PwAddL16Ux8"); return;
+ case Iop_PwAddL32Ux4: vex_printf("PwAddL32Ux4"); return;
+ case Iop_PwAddL8Sx16: vex_printf("PwAddL8Sx16"); return;
+ case Iop_PwAddL16Sx8: vex_printf("PwAddL16Sx8"); return;
+ case Iop_PwAddL32Sx4: vex_printf("PwAddL32Sx4"); return;
+
+ case Iop_Sub8x16: vex_printf("Sub8x16"); return;
+ case Iop_Sub16x8: vex_printf("Sub16x8"); return;
+ case Iop_Sub32x4: vex_printf("Sub32x4"); return;
+ case Iop_Sub64x2: vex_printf("Sub64x2"); return;
+ case Iop_QSub8Ux16: vex_printf("QSub8Ux16"); return;
+ case Iop_QSub16Ux8: vex_printf("QSub16Ux8"); return;
+ case Iop_QSub32Ux4: vex_printf("QSub32Ux4"); return;
+ case Iop_QSub8Sx16: vex_printf("QSub8Sx16"); return;
+ case Iop_QSub16Sx8: vex_printf("QSub16Sx8"); return;
+ case Iop_QSub32Sx4: vex_printf("QSub32Sx4"); return;
+ case Iop_QSub64Ux2: vex_printf("QSub64Ux2"); return;
+ case Iop_QSub64Sx2: vex_printf("QSub64Sx2"); return;
+
+ case Iop_Mul8x16: vex_printf("Mul8x16"); return;
+ case Iop_Mul16x8: vex_printf("Mul16x8"); return;
+ case Iop_Mul32x4: vex_printf("Mul32x4"); return;
+ case Iop_Mull8Ux8: vex_printf("Mull8Ux8"); return;
+ case Iop_Mull8Sx8: vex_printf("Mull8Sx8"); return;
+ case Iop_Mull16Ux4: vex_printf("Mull16Ux4"); return;
+ case Iop_Mull16Sx4: vex_printf("Mull16Sx4"); return;
+ case Iop_Mull32Ux2: vex_printf("Mull32Ux2"); return;
+ case Iop_Mull32Sx2: vex_printf("Mull32Sx2"); return;
+ case Iop_PolynomialMul8x16: vex_printf("PolynomialMul8x16"); return;
+ case Iop_PolynomialMull8x8: vex_printf("PolynomialMull8x8"); return;
+ case Iop_MulHi16Ux8: vex_printf("MulHi16Ux8"); return;
+ case Iop_MulHi32Ux4: vex_printf("MulHi32Ux4"); return;
+ case Iop_MulHi16Sx8: vex_printf("MulHi16Sx8"); return;
+ case Iop_MulHi32Sx4: vex_printf("MulHi32Sx4"); return;
+ case Iop_QDMulHi16Sx8: vex_printf("QDMulHi16Sx8"); return;
+ case Iop_QDMulHi32Sx4: vex_printf("QDMulHi32Sx4"); return;
+ case Iop_QRDMulHi16Sx8: vex_printf("QRDMulHi16Sx8"); return;
+ case Iop_QRDMulHi32Sx4: vex_printf("QRDMulHi32Sx4"); return;
+
+ case Iop_MullEven8Ux16: vex_printf("MullEven8Ux16"); return;
+ case Iop_MullEven16Ux8: vex_printf("MullEven16Ux8"); return;
+ case Iop_MullEven8Sx16: vex_printf("MullEven8Sx16"); return;
+ case Iop_MullEven16Sx8: vex_printf("MullEven16Sx8"); return;
+
+ case Iop_Avg8Ux16: vex_printf("Avg8Ux16"); return;
+ case Iop_Avg16Ux8: vex_printf("Avg16Ux8"); return;
+ case Iop_Avg32Ux4: vex_printf("Avg32Ux4"); return;
+ case Iop_Avg8Sx16: vex_printf("Avg8Sx16"); return;
+ case Iop_Avg16Sx8: vex_printf("Avg16Sx8"); return;
+ case Iop_Avg32Sx4: vex_printf("Avg32Sx4"); return;
+
+ case Iop_Max8Sx16: vex_printf("Max8Sx16"); return;
+ case Iop_Max16Sx8: vex_printf("Max16Sx8"); return;
+ case Iop_Max32Sx4: vex_printf("Max32Sx4"); return;
+ case Iop_Max8Ux16: vex_printf("Max8Ux16"); return;
+ case Iop_Max16Ux8: vex_printf("Max16Ux8"); return;
+ case Iop_Max32Ux4: vex_printf("Max32Ux4"); return;
+
+ case Iop_Min8Sx16: vex_printf("Min8Sx16"); return;
+ case Iop_Min16Sx8: vex_printf("Min16Sx8"); return;
+ case Iop_Min32Sx4: vex_printf("Min32Sx4"); return;
+ case Iop_Min8Ux16: vex_printf("Min8Ux16"); return;
+ case Iop_Min16Ux8: vex_printf("Min16Ux8"); return;
+ case Iop_Min32Ux4: vex_printf("Min32Ux4"); return;
+
+ case Iop_CmpEQ8x16: vex_printf("CmpEQ8x16"); return;
+ case Iop_CmpEQ16x8: vex_printf("CmpEQ16x8"); return;
+ case Iop_CmpEQ32x4: vex_printf("CmpEQ32x4"); return;
+ case Iop_CmpGT8Sx16: vex_printf("CmpGT8Sx16"); return;
+ case Iop_CmpGT16Sx8: vex_printf("CmpGT16Sx8"); return;
+ case Iop_CmpGT32Sx4: vex_printf("CmpGT32Sx4"); return;
+ case Iop_CmpGT64Sx2: vex_printf("CmpGT64Sx2"); return;
+ case Iop_CmpGT8Ux16: vex_printf("CmpGT8Ux16"); return;
+ case Iop_CmpGT16Ux8: vex_printf("CmpGT16Ux8"); return;
+ case Iop_CmpGT32Ux4: vex_printf("CmpGT32Ux4"); return;
+
+ case Iop_Cnt8x16: vex_printf("Cnt8x16"); return;
+ case Iop_Clz8Sx16: vex_printf("Clz8Sx16"); return;
+ case Iop_Clz16Sx8: vex_printf("Clz16Sx8"); return;
+ case Iop_Clz32Sx4: vex_printf("Clz32Sx4"); return;
+ case Iop_Cls8Sx16: vex_printf("Cls8Sx16"); return;
+ case Iop_Cls16Sx8: vex_printf("Cls16Sx8"); return;
+ case Iop_Cls32Sx4: vex_printf("Cls32Sx4"); return;
+
+ case Iop_ShlV128: vex_printf("ShlV128"); return;
+ case Iop_ShrV128: vex_printf("ShrV128"); return;
+
+ case Iop_ShlN8x16: vex_printf("ShlN8x16"); return;
+ case Iop_ShlN16x8: vex_printf("ShlN16x8"); return;
+ case Iop_ShlN32x4: vex_printf("ShlN32x4"); return;
+ case Iop_ShlN64x2: vex_printf("ShlN64x2"); return;
+ case Iop_ShrN8x16: vex_printf("ShrN8x16"); return;
+ case Iop_ShrN16x8: vex_printf("ShrN16x8"); return;
+ case Iop_ShrN32x4: vex_printf("ShrN32x4"); return;
+ case Iop_ShrN64x2: vex_printf("ShrN64x2"); return;
+ case Iop_SarN8x16: vex_printf("SarN8x16"); return;
+ case Iop_SarN16x8: vex_printf("SarN16x8"); return;
+ case Iop_SarN32x4: vex_printf("SarN32x4"); return;
+ case Iop_SarN64x2: vex_printf("SarN64x2"); return;
+
+ case Iop_Shl8x16: vex_printf("Shl8x16"); return;
+ case Iop_Shl16x8: vex_printf("Shl16x8"); return;
+ case Iop_Shl32x4: vex_printf("Shl32x4"); return;
+ case Iop_Shl64x2: vex_printf("Shl64x2"); return;
+ case Iop_QSal8x16: vex_printf("QSal8x16"); return;
+ case Iop_QSal16x8: vex_printf("QSal16x8"); return;
+ case Iop_QSal32x4: vex_printf("QSal32x4"); return;
+ case Iop_QSal64x2: vex_printf("QSal64x2"); return;
+ case Iop_QShl8x16: vex_printf("QShl8x16"); return;
+ case Iop_QShl16x8: vex_printf("QShl16x8"); return;
+ case Iop_QShl32x4: vex_printf("QShl32x4"); return;
+ case Iop_QShl64x2: vex_printf("QShl64x2"); return;
+ case Iop_QSalN8x16: vex_printf("QSalN8x16"); return;
+ case Iop_QSalN16x8: vex_printf("QSalN16x8"); return;
+ case Iop_QSalN32x4: vex_printf("QSalN32x4"); return;
+ case Iop_QSalN64x2: vex_printf("QSalN64x2"); return;
+ case Iop_QShlN8x16: vex_printf("QShlN8x16"); return;
+ case Iop_QShlN16x8: vex_printf("QShlN16x8"); return;
+ case Iop_QShlN32x4: vex_printf("QShlN32x4"); return;
+ case Iop_QShlN64x2: vex_printf("QShlN64x2"); return;
+ case Iop_QShlN8Sx16: vex_printf("QShlN8Sx16"); return;
+ case Iop_QShlN16Sx8: vex_printf("QShlN16Sx8"); return;
+ case Iop_QShlN32Sx4: vex_printf("QShlN32Sx4"); return;
+ case Iop_QShlN64Sx2: vex_printf("QShlN64Sx2"); return;
+ case Iop_Shr8x16: vex_printf("Shr8x16"); return;
+ case Iop_Shr16x8: vex_printf("Shr16x8"); return;
+ case Iop_Shr32x4: vex_printf("Shr32x4"); return;
+ case Iop_Shr64x2: vex_printf("Shr64x2"); return;
+ case Iop_Sar8x16: vex_printf("Sar8x16"); return;
+ case Iop_Sar16x8: vex_printf("Sar16x8"); return;
+ case Iop_Sar32x4: vex_printf("Sar32x4"); return;
+ case Iop_Sar64x2: vex_printf("Sar64x2"); return;
+ case Iop_Sal8x16: vex_printf("Sal8x16"); return;
+ case Iop_Sal16x8: vex_printf("Sal16x8"); return;
+ case Iop_Sal32x4: vex_printf("Sal32x4"); return;
+ case Iop_Sal64x2: vex_printf("Sal64x2"); return;
+ case Iop_Rol8x16: vex_printf("Rol8x16"); return;
+ case Iop_Rol16x8: vex_printf("Rol16x8"); return;
+ case Iop_Rol32x4: vex_printf("Rol32x4"); return;
+
+ case Iop_Narrow16x8: vex_printf("Narrow16x8"); return;
+ case Iop_Narrow32x4: vex_printf("Narrow32x4"); return;
+ case Iop_QNarrow16Ux8: vex_printf("QNarrow16Ux8"); return;
+ case Iop_QNarrow32Ux4: vex_printf("QNarrow32Ux4"); return;
+ case Iop_QNarrow16Sx8: vex_printf("QNarrow16Sx8"); return;
+ case Iop_QNarrow32Sx4: vex_printf("QNarrow32Sx4"); return;
+ case Iop_Shorten16x8: vex_printf("Shorten16x8"); return;
+ case Iop_Shorten32x4: vex_printf("Shorten32x4"); return;
+ case Iop_Shorten64x2: vex_printf("Shorten64x2"); return;
+ case Iop_QShortenU16Ux8: vex_printf("QShortenU16Ux8"); return;
+ case Iop_QShortenU32Ux4: vex_printf("QShortenU32Ux4"); return;
+ case Iop_QShortenU64Ux2: vex_printf("QShortenU64Ux2"); return;
+ case Iop_QShortenS16Sx8: vex_printf("QShortenS16Sx8"); return;
+ case Iop_QShortenS32Sx4: vex_printf("QShortenS32Sx4"); return;
+ case Iop_QShortenS64Sx2: vex_printf("QShortenS64Sx2"); return;
+ case Iop_QShortenU16Sx8: vex_printf("QShortenU16Sx8"); return;
+ case Iop_QShortenU32Sx4: vex_printf("QShortenU32Sx4"); return;
+ case Iop_QShortenU64Sx2: vex_printf("QShortenU64Sx2"); return;
+ case Iop_Longen8Ux8: vex_printf("Longen8Ux8"); return;
+ case Iop_Longen16Ux4: vex_printf("Longen16Ux4"); return;
+ case Iop_Longen32Ux2: vex_printf("Longen32Ux2"); return;
+ case Iop_Longen8Sx8: vex_printf("Longen8Sx8"); return;
+ case Iop_Longen16Sx4: vex_printf("Longen16Sx4"); return;
+ case Iop_Longen32Sx2: vex_printf("Longen32Sx2"); return;
+
+ case Iop_InterleaveHI8x16: vex_printf("InterleaveHI8x16"); return;
+ case Iop_InterleaveHI16x8: vex_printf("InterleaveHI16x8"); return;
+ case Iop_InterleaveHI32x4: vex_printf("InterleaveHI32x4"); return;
+ case Iop_InterleaveHI64x2: vex_printf("InterleaveHI64x2"); return;
+ case Iop_InterleaveLO8x16: vex_printf("InterleaveLO8x16"); return;
+ case Iop_InterleaveLO16x8: vex_printf("InterleaveLO16x8"); return;
+ case Iop_InterleaveLO32x4: vex_printf("InterleaveLO32x4"); return;
+ case Iop_InterleaveLO64x2: vex_printf("InterleaveLO64x2"); return;
+
+ case Iop_CatOddLanes8x16: vex_printf("CatOddLanes8x16"); return;
+ case Iop_CatOddLanes16x8: vex_printf("CatOddLanes16x8"); return;
+ case Iop_CatOddLanes32x4: vex_printf("CatOddLanes32x4"); return;
+ case Iop_CatEvenLanes8x16: vex_printf("CatEvenLanes8x16"); return;
+ case Iop_CatEvenLanes16x8: vex_printf("CatEvenLanes16x8"); return;
+ case Iop_CatEvenLanes32x4: vex_printf("CatEvenLanes32x4"); return;
+
+ case Iop_InterleaveOddLanes8x16: vex_printf("InterleaveOddLanes8x16"); return;
+ case Iop_InterleaveOddLanes16x8: vex_printf("InterleaveOddLanes16x8"); return;
+ case Iop_InterleaveOddLanes32x4: vex_printf("InterleaveOddLanes32x4"); return;
+ case Iop_InterleaveEvenLanes8x16: vex_printf("InterleaveEvenLanes8x16"); return;
+ case Iop_InterleaveEvenLanes16x8: vex_printf("InterleaveEvenLanes16x8"); return;
+ case Iop_InterleaveEvenLanes32x4: vex_printf("InterleaveEvenLanes32x4"); return;
+
+ case Iop_GetElem8x16: vex_printf("GetElem8x16"); return;
+ case Iop_GetElem16x8: vex_printf("GetElem16x8"); return;
+ case Iop_GetElem32x4: vex_printf("GetElem32x4"); return;
+ case Iop_GetElem64x2: vex_printf("GetElem64x2"); return;
+
+ case Iop_GetElem8x8: vex_printf("GetElem8x8"); return;
+ case Iop_GetElem16x4: vex_printf("GetElem16x4"); return;
+ case Iop_GetElem32x2: vex_printf("GetElem32x2"); return;
+ case Iop_SetElem8x8: vex_printf("SetElem8x8"); return;
+ case Iop_SetElem16x4: vex_printf("SetElem16x4"); return;
+ case Iop_SetElem32x2: vex_printf("SetElem32x2"); return;
+
+ case Iop_Extract64: vex_printf("Extract64"); return;
+ case Iop_ExtractV128: vex_printf("ExtractV128"); return;
+
+ case Iop_Perm8x16: vex_printf("Perm8x16"); return;
+ case Iop_Reverse16_8x16: vex_printf("Reverse16_8x16"); return;
+ case Iop_Reverse32_8x16: vex_printf("Reverse32_8x16"); return;
+ case Iop_Reverse32_16x8: vex_printf("Reverse32_16x8"); return;
+ case Iop_Reverse64_8x16: vex_printf("Reverse64_8x16"); return;
+ case Iop_Reverse64_16x8: vex_printf("Reverse64_16x8"); return;
+ case Iop_Reverse64_32x4: vex_printf("Reverse64_32x4"); return;
+
+ case Iop_F32ToFixed32Ux4_RZ: vex_printf("F32ToFixed32Ux4_RZ"); return;
+ case Iop_F32ToFixed32Sx4_RZ: vex_printf("F32ToFixed32Sx4_RZ"); return;
+ case Iop_Fixed32UToF32x4_RN: vex_printf("Fixed32UToF32x4_RN"); return;
+ case Iop_Fixed32SToF32x4_RN: vex_printf("Fixed32SToF32x4_RN"); return;
+ case Iop_F32ToFixed32Ux2_RZ: vex_printf("F32ToFixed32Ux2_RZ"); return;
+ case Iop_F32ToFixed32Sx2_RZ: vex_printf("F32ToFixed32Sx2_RZ"); return;
+ case Iop_Fixed32UToF32x2_RN: vex_printf("Fixed32UToF32x2_RN"); return;
+ case Iop_Fixed32SToF32x2_RN: vex_printf("Fixed32SToF32x2_RN"); return;
+
+ default: vpanic("ppIROp(1)");
+ }
+
+ vassert(str);
+ switch (op - base) {
+ case 0: vex_printf("%s",str); vex_printf("8"); break;
+ case 1: vex_printf("%s",str); vex_printf("16"); break;
+ case 2: vex_printf("%s",str); vex_printf("32"); break;
+ case 3: vex_printf("%s",str); vex_printf("64"); break;
+ default: vpanic("ppIROp(2)");
+ }
+}
+
+void ppIRExpr ( IRExpr* e )
+{
+ Int i;
+ switch (e->tag) {
+ case Iex_Binder:
+ vex_printf("BIND-%d", e->Iex.Binder.binder);
+ break;
+ case Iex_Get:
+ vex_printf( "GET:" );
+ ppIRType(e->Iex.Get.ty);
+ vex_printf("(%d)", e->Iex.Get.offset);
+ break;
+ case Iex_GetI:
+ vex_printf( "GETI" );
+ ppIRRegArray(e->Iex.GetI.descr);
+ vex_printf("[");
+ ppIRExpr(e->Iex.GetI.ix);
+ vex_printf(",%d]", e->Iex.GetI.bias);
+ break;
+ case Iex_RdTmp:
+ ppIRTemp(e->Iex.RdTmp.tmp);
+ break;
+ case Iex_Qop:
+ ppIROp(e->Iex.Qop.op);
+ vex_printf( "(" );
+ ppIRExpr(e->Iex.Qop.arg1);
+ vex_printf( "," );
+ ppIRExpr(e->Iex.Qop.arg2);
+ vex_printf( "," );
+ ppIRExpr(e->Iex.Qop.arg3);
+ vex_printf( "," );
+ ppIRExpr(e->Iex.Qop.arg4);
+ vex_printf( ")" );
+ break;
+ case Iex_Triop:
+ ppIROp(e->Iex.Triop.op);
+ vex_printf( "(" );
+ ppIRExpr(e->Iex.Triop.arg1);
+ vex_printf( "," );
+ ppIRExpr(e->Iex.Triop.arg2);
+ vex_printf( "," );
+ ppIRExpr(e->Iex.Triop.arg3);
+ vex_printf( ")" );
+ break;
+ case Iex_Binop:
+ ppIROp(e->Iex.Binop.op);
+ vex_printf( "(" );
+ ppIRExpr(e->Iex.Binop.arg1);
+ vex_printf( "," );
+ ppIRExpr(e->Iex.Binop.arg2);
+ vex_printf( ")" );
+ break;
+ case Iex_Unop:
+ ppIROp(e->Iex.Unop.op);
+ vex_printf( "(" );
+ ppIRExpr(e->Iex.Unop.arg);
+ vex_printf( ")" );
+ break;
+ case Iex_Load:
+ vex_printf( "LD%s:", e->Iex.Load.end==Iend_LE ? "le" : "be" );
+ ppIRType(e->Iex.Load.ty);
+ vex_printf( "(" );
+ ppIRExpr(e->Iex.Load.addr);
+ vex_printf( ")" );
+ break;
+ case Iex_Const:
+ ppIRConst(e->Iex.Const.con);
+ break;
+ case Iex_CCall:
+ ppIRCallee(e->Iex.CCall.cee);
+ vex_printf("(");
+ for (i = 0; e->Iex.CCall.args[i] != NULL; i++) {
+ ppIRExpr(e->Iex.CCall.args[i]);
+ if (e->Iex.CCall.args[i+1] != NULL)
+ vex_printf(",");
+ }
+ vex_printf("):");
+ ppIRType(e->Iex.CCall.retty);
+ break;
+ case Iex_Mux0X:
+ vex_printf("Mux0X(");
+ ppIRExpr(e->Iex.Mux0X.cond);
+ vex_printf(",");
+ ppIRExpr(e->Iex.Mux0X.expr0);
+ vex_printf(",");
+ ppIRExpr(e->Iex.Mux0X.exprX);
+ vex_printf(")");
+ break;
+ default:
+ vpanic("ppIRExpr");
+ }
+}
+
+void ppIREffect ( IREffect fx )
+{
+ switch (fx) {
+ case Ifx_None: vex_printf("noFX"); return;
+ case Ifx_Read: vex_printf("RdFX"); return;
+ case Ifx_Write: vex_printf("WrFX"); return;
+ case Ifx_Modify: vex_printf("MoFX"); return;
+ default: vpanic("ppIREffect");
+ }
+}
+
+void ppIRDirty ( IRDirty* d )
+{
+ Int i;
+ if (d->tmp != IRTemp_INVALID) {
+ ppIRTemp(d->tmp);
+ vex_printf(" = ");
+ }
+ vex_printf("DIRTY ");
+ ppIRExpr(d->guard);
+ if (d->needsBBP)
+ vex_printf(" NeedsBBP");
+ if (d->mFx != Ifx_None) {
+ vex_printf(" ");
+ ppIREffect(d->mFx);
+ vex_printf("-mem(");
+ ppIRExpr(d->mAddr);
+ vex_printf(",%d)", d->mSize);
+ }
+ for (i = 0; i < d->nFxState; i++) {
+ vex_printf(" ");
+ ppIREffect(d->fxState[i].fx);
+ vex_printf("-gst(%d,%d)", d->fxState[i].offset, d->fxState[i].size);
+ }
+ vex_printf(" ::: ");
+ ppIRCallee(d->cee);
+ vex_printf("(");
+ for (i = 0; d->args[i] != NULL; i++) {
+ ppIRExpr(d->args[i]);
+ if (d->args[i+1] != NULL) {
+ vex_printf(",");
+ }
+ }
+ vex_printf(")");
+}
+
+void ppIRCAS ( IRCAS* cas )
+{
+ /* Print even structurally invalid constructions, as an aid to
+ debugging. */
+ if (cas->oldHi != IRTemp_INVALID) {
+ ppIRTemp(cas->oldHi);
+ vex_printf(",");
+ }
+ ppIRTemp(cas->oldLo);
+ vex_printf(" = CAS%s(", cas->end==Iend_LE ? "le" : "be" );
+ ppIRExpr(cas->addr);
+ vex_printf("::");
+ if (cas->expdHi) {
+ ppIRExpr(cas->expdHi);
+ vex_printf(",");
+ }
+ ppIRExpr(cas->expdLo);
+ vex_printf("->");
+ if (cas->dataHi) {
+ ppIRExpr(cas->dataHi);
+ vex_printf(",");
+ }
+ ppIRExpr(cas->dataLo);
+ vex_printf(")");
+}
+
+void ppIRJumpKind ( IRJumpKind kind )
+{
+ switch (kind) {
+ case Ijk_Boring: vex_printf("Boring"); break;
+ case Ijk_Call: vex_printf("Call"); break;
+ case Ijk_Ret: vex_printf("Return"); break;
+ case Ijk_ClientReq: vex_printf("ClientReq"); break;
+ case Ijk_Yield: vex_printf("Yield"); break;
+ case Ijk_EmWarn: vex_printf("EmWarn"); break;
+ case Ijk_EmFail: vex_printf("EmFail"); break;
+ case Ijk_NoDecode: vex_printf("NoDecode"); break;
+ case Ijk_MapFail: vex_printf("MapFail"); break;
+ case Ijk_TInval: vex_printf("Invalidate"); break;
+ case Ijk_NoRedir: vex_printf("NoRedir"); break;
+ case Ijk_SigTRAP: vex_printf("SigTRAP"); break;
+ case Ijk_SigSEGV: vex_printf("SigSEGV"); break;
+ case Ijk_SigBUS: vex_printf("SigBUS"); break;
+ case Ijk_Sys_syscall: vex_printf("Sys_syscall"); break;
+ case Ijk_Sys_int32: vex_printf("Sys_int32"); break;
+ case Ijk_Sys_int128: vex_printf("Sys_int128"); break;
+ case Ijk_Sys_int129: vex_printf("Sys_int129"); break;
+ case Ijk_Sys_int130: vex_printf("Sys_int130"); break;
+ case Ijk_Sys_sysenter: vex_printf("Sys_sysenter"); break;
+ default: vpanic("ppIRJumpKind");
+ }
+}
+
+void ppIRMBusEvent ( IRMBusEvent event )
+{
+ switch (event) {
+ case Imbe_Fence: vex_printf("Fence"); break;
+ default: vpanic("ppIRMBusEvent");
+ }
+}
+
+void ppIRStmt ( IRStmt* s )
+{
+ if (!s) {
+ vex_printf("!!! IRStmt* which is NULL !!!");
+ return;
+ }
+ switch (s->tag) {
+ case Ist_NoOp:
+ vex_printf("IR-NoOp");
+ break;
+ case Ist_IMark:
+ vex_printf( "------ IMark(0x%llx, %d) ------",
+ s->Ist.IMark.addr, s->Ist.IMark.len);
+ break;
+ case Ist_AbiHint:
+ vex_printf("====== AbiHint(");
+ ppIRExpr(s->Ist.AbiHint.base);
+ vex_printf(", %d, ", s->Ist.AbiHint.len);
+ ppIRExpr(s->Ist.AbiHint.nia);
+ vex_printf(") ======");
+ break;
+ case Ist_Put:
+ vex_printf( "PUT(%d) = ", s->Ist.Put.offset);
+ ppIRExpr(s->Ist.Put.data);
+ break;
+ case Ist_PutI:
+ vex_printf( "PUTI" );
+ ppIRRegArray(s->Ist.PutI.descr);
+ vex_printf("[");
+ ppIRExpr(s->Ist.PutI.ix);
+ vex_printf(",%d] = ", s->Ist.PutI.bias);
+ ppIRExpr(s->Ist.PutI.data);
+ break;
+ case Ist_WrTmp:
+ ppIRTemp(s->Ist.WrTmp.tmp);
+ vex_printf( " = " );
+ ppIRExpr(s->Ist.WrTmp.data);
+ break;
+ case Ist_Store:
+ vex_printf( "ST%s(", s->Ist.Store.end==Iend_LE ? "le" : "be" );
+ ppIRExpr(s->Ist.Store.addr);
+ vex_printf( ") = ");
+ ppIRExpr(s->Ist.Store.data);
+ break;
+ case Ist_CAS:
+ ppIRCAS(s->Ist.CAS.details);
+ break;
+ case Ist_LLSC:
+ if (s->Ist.LLSC.storedata == NULL) {
+ ppIRTemp(s->Ist.LLSC.result);
+ vex_printf(" = LD%s-Linked(",
+ s->Ist.LLSC.end==Iend_LE ? "le" : "be");
+ ppIRExpr(s->Ist.LLSC.addr);
+ vex_printf(")");
+ } else {
+ ppIRTemp(s->Ist.LLSC.result);
+ vex_printf(" = ( ST%s-Cond(",
+ s->Ist.LLSC.end==Iend_LE ? "le" : "be");
+ ppIRExpr(s->Ist.LLSC.addr);
+ vex_printf(") = ");
+ ppIRExpr(s->Ist.LLSC.storedata);
+ vex_printf(" )");
+ }
+ break;
+ case Ist_Dirty:
+ ppIRDirty(s->Ist.Dirty.details);
+ break;
+ case Ist_MBE:
+ vex_printf("IR-");
+ ppIRMBusEvent(s->Ist.MBE.event);
+ break;
+ case Ist_Exit:
+ vex_printf( "if (" );
+ ppIRExpr(s->Ist.Exit.guard);
+ vex_printf( ") goto {");
+ ppIRJumpKind(s->Ist.Exit.jk);
+ vex_printf("} ");
+ ppIRConst(s->Ist.Exit.dst);
+ break;
+ default:
+ vpanic("ppIRStmt");
+ }
+}
+
+void ppIRTypeEnv ( IRTypeEnv* env ) {
+ UInt i;
+ for (i = 0; i < env->types_used; i++) {
+ if (i % 8 == 0)
+ vex_printf( " ");
+ ppIRTemp(i);
+ vex_printf( ":");
+ ppIRType(env->types[i]);
+ if (i % 8 == 7)
+ vex_printf( "\n");
+ else
+ vex_printf( " ");
+ }
+ if (env->types_used > 0 && env->types_used % 8 != 7)
+ vex_printf( "\n");
+}
+
+void ppIRSB ( IRSB* bb )
+{
+ Int i;
+ vex_printf("IRSB {\n");
+ ppIRTypeEnv(bb->tyenv);
+ vex_printf("\n");
+ for (i = 0; i < bb->stmts_used; i++) {
+ vex_printf( " ");
+ ppIRStmt(bb->stmts[i]);
+ vex_printf( "\n");
+ }
+ vex_printf( " goto {");
+ ppIRJumpKind(bb->jumpkind);
+ vex_printf( "} ");
+ ppIRExpr( bb->next );
+ vex_printf( "\n}\n");
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Constructors ---*/
+/*---------------------------------------------------------------*/
+
+
+/* Constructors -- IRConst */
+
+IRConst* IRConst_U1 ( Bool bit )
+{
+ IRConst* c = LibVEX_Alloc(sizeof(IRConst));
+ c->tag = Ico_U1;
+ c->Ico.U1 = bit;
+ /* call me paranoid; I don't care :-) */
+ vassert(bit == False || bit == True);
+ return c;
+}
+IRConst* IRConst_U8 ( UChar u8 )
+{
+ IRConst* c = LibVEX_Alloc(sizeof(IRConst));
+ c->tag = Ico_U8;
+ c->Ico.U8 = u8;
+ return c;
+}
+IRConst* IRConst_U16 ( UShort u16 )
+{
+ IRConst* c = LibVEX_Alloc(sizeof(IRConst));
+ c->tag = Ico_U16;
+ c->Ico.U16 = u16;
+ return c;
+}
+IRConst* IRConst_U32 ( UInt u32 )
+{
+ IRConst* c = LibVEX_Alloc(sizeof(IRConst));
+ c->tag = Ico_U32;
+ c->Ico.U32 = u32;
+ return c;
+}
+IRConst* IRConst_U64 ( ULong u64 )
+{
+ IRConst* c = LibVEX_Alloc(sizeof(IRConst));
+ c->tag = Ico_U64;
+ c->Ico.U64 = u64;
+ return c;
+}
+IRConst* IRConst_F64 ( Double f64 )
+{
+ IRConst* c = LibVEX_Alloc(sizeof(IRConst));
+ c->tag = Ico_F64;
+ c->Ico.F64 = f64;
+ return c;
+}
+IRConst* IRConst_F64i ( ULong f64i )
+{
+ IRConst* c = LibVEX_Alloc(sizeof(IRConst));
+ c->tag = Ico_F64i;
+ c->Ico.F64i = f64i;
+ return c;
+}
+IRConst* IRConst_V128 ( UShort con )
+{
+ IRConst* c = LibVEX_Alloc(sizeof(IRConst));
+ c->tag = Ico_V128;
+ c->Ico.V128 = con;
+ return c;
+}
+
+/* Constructors -- IRCallee */
+
+IRCallee* mkIRCallee ( Int regparms, HChar* name, void* addr )
+{
+ IRCallee* ce = LibVEX_Alloc(sizeof(IRCallee));
+ ce->regparms = regparms;
+ ce->name = name;
+ ce->addr = addr;
+ ce->mcx_mask = 0;
+ vassert(regparms >= 0 && regparms <= 3);
+ vassert(name != NULL);
+ vassert(addr != 0);
+ return ce;
+}
+
+
+/* Constructors -- IRRegArray */
+
+IRRegArray* mkIRRegArray ( Int base, IRType elemTy, Int nElems )
+{
+ IRRegArray* arr = LibVEX_Alloc(sizeof(IRRegArray));
+ arr->base = base;
+ arr->elemTy = elemTy;
+ arr->nElems = nElems;
+ vassert(!(arr->base < 0 || arr->base > 10000 /* somewhat arbitrary */));
+ vassert(!(arr->elemTy == Ity_I1));
+ vassert(!(arr->nElems <= 0 || arr->nElems > 500 /* somewhat arbitrary */));
+ return arr;
+}
+
+
+/* Constructors -- IRExpr */
+
+IRExpr* IRExpr_Binder ( Int binder ) {
+ IRExpr* e = LibVEX_Alloc(sizeof(IRExpr));
+ e->tag = Iex_Binder;
+ e->Iex.Binder.binder = binder;
+ return e;
+}
+IRExpr* IRExpr_Get ( Int off, IRType ty ) {
+ IRExpr* e = LibVEX_Alloc(sizeof(IRExpr));
+ e->tag = Iex_Get;
+ e->Iex.Get.offset = off;
+ e->Iex.Get.ty = ty;
+ return e;
+}
+IRExpr* IRExpr_GetI ( IRRegArray* descr, IRExpr* ix, Int bias ) {
+ IRExpr* e = LibVEX_Alloc(sizeof(IRExpr));
+ e->tag = Iex_GetI;
+ e->Iex.GetI.descr = descr;
+ e->Iex.GetI.ix = ix;
+ e->Iex.GetI.bias = bias;
+ return e;
+}
+IRExpr* IRExpr_RdTmp ( IRTemp tmp ) {
+ IRExpr* e = LibVEX_Alloc(sizeof(IRExpr));
+ e->tag = Iex_RdTmp;
+ e->Iex.RdTmp.tmp = tmp;
+ return e;
+}
+IRExpr* IRExpr_Qop ( IROp op, IRExpr* arg1, IRExpr* arg2,
+ IRExpr* arg3, IRExpr* arg4 ) {
+ IRExpr* e = LibVEX_Alloc(sizeof(IRExpr));
+ e->tag = Iex_Qop;
+ e->Iex.Qop.op = op;
+ e->Iex.Qop.arg1 = arg1;
+ e->Iex.Qop.arg2 = arg2;
+ e->Iex.Qop.arg3 = arg3;
+ e->Iex.Qop.arg4 = arg4;
+ return e;
+}
+IRExpr* IRExpr_Triop ( IROp op, IRExpr* arg1,
+ IRExpr* arg2, IRExpr* arg3 ) {
+ IRExpr* e = LibVEX_Alloc(sizeof(IRExpr));
+ e->tag = Iex_Triop;
+ e->Iex.Triop.op = op;
+ e->Iex.Triop.arg1 = arg1;
+ e->Iex.Triop.arg2 = arg2;
+ e->Iex.Triop.arg3 = arg3;
+ return e;
+}
+IRExpr* IRExpr_Binop ( IROp op, IRExpr* arg1, IRExpr* arg2 ) {
+ IRExpr* e = LibVEX_Alloc(sizeof(IRExpr));
+ e->tag = Iex_Binop;
+ e->Iex.Binop.op = op;
+ e->Iex.Binop.arg1 = arg1;
+ e->Iex.Binop.arg2 = arg2;
+ return e;
+}
+IRExpr* IRExpr_Unop ( IROp op, IRExpr* arg ) {
+ IRExpr* e = LibVEX_Alloc(sizeof(IRExpr));
+ e->tag = Iex_Unop;
+ e->Iex.Unop.op = op;
+ e->Iex.Unop.arg = arg;
+ return e;
+}
+IRExpr* IRExpr_Load ( IREndness end, IRType ty, IRExpr* addr ) {
+ IRExpr* e = LibVEX_Alloc(sizeof(IRExpr));
+ e->tag = Iex_Load;
+ e->Iex.Load.end = end;
+ e->Iex.Load.ty = ty;
+ e->Iex.Load.addr = addr;
+ vassert(end == Iend_LE || end == Iend_BE);
+ return e;
+}
+IRExpr* IRExpr_Const ( IRConst* con ) {
+ IRExpr* e = LibVEX_Alloc(sizeof(IRExpr));
+ e->tag = Iex_Const;
+ e->Iex.Const.con = con;
+ return e;
+}
+IRExpr* IRExpr_CCall ( IRCallee* cee, IRType retty, IRExpr** args ) {
+ IRExpr* e = LibVEX_Alloc(sizeof(IRExpr));
+ e->tag = Iex_CCall;
+ e->Iex.CCall.cee = cee;
+ e->Iex.CCall.retty = retty;
+ e->Iex.CCall.args = args;
+ return e;
+}
+IRExpr* IRExpr_Mux0X ( IRExpr* cond, IRExpr* expr0, IRExpr* exprX ) {
+ IRExpr* e = LibVEX_Alloc(sizeof(IRExpr));
+ e->tag = Iex_Mux0X;
+ e->Iex.Mux0X.cond = cond;
+ e->Iex.Mux0X.expr0 = expr0;
+ e->Iex.Mux0X.exprX = exprX;
+ return e;
+}
+
+
+/* Constructors for NULL-terminated IRExpr expression vectors,
+ suitable for use as arg lists in clean/dirty helper calls. */
+
+IRExpr** mkIRExprVec_0 ( void ) {
+ IRExpr** vec = LibVEX_Alloc(1 * sizeof(IRExpr*));
+ vec[0] = NULL;
+ return vec;
+}
+IRExpr** mkIRExprVec_1 ( IRExpr* arg1 ) {
+ IRExpr** vec = LibVEX_Alloc(2 * sizeof(IRExpr*));
+ vec[0] = arg1;
+ vec[1] = NULL;
+ return vec;
+}
+IRExpr** mkIRExprVec_2 ( IRExpr* arg1, IRExpr* arg2 ) {
+ IRExpr** vec = LibVEX_Alloc(3 * sizeof(IRExpr*));
+ vec[0] = arg1;
+ vec[1] = arg2;
+ vec[2] = NULL;
+ return vec;
+}
+IRExpr** mkIRExprVec_3 ( IRExpr* arg1, IRExpr* arg2, IRExpr* arg3 ) {
+ IRExpr** vec = LibVEX_Alloc(4 * sizeof(IRExpr*));
+ vec[0] = arg1;
+ vec[1] = arg2;
+ vec[2] = arg3;
+ vec[3] = NULL;
+ return vec;
+}
+IRExpr** mkIRExprVec_4 ( IRExpr* arg1, IRExpr* arg2, IRExpr* arg3,
+ IRExpr* arg4 ) {
+ IRExpr** vec = LibVEX_Alloc(5 * sizeof(IRExpr*));
+ vec[0] = arg1;
+ vec[1] = arg2;
+ vec[2] = arg3;
+ vec[3] = arg4;
+ vec[4] = NULL;
+ return vec;
+}
+IRExpr** mkIRExprVec_5 ( IRExpr* arg1, IRExpr* arg2, IRExpr* arg3,
+ IRExpr* arg4, IRExpr* arg5 ) {
+ IRExpr** vec = LibVEX_Alloc(6 * sizeof(IRExpr*));
+ vec[0] = arg1;
+ vec[1] = arg2;
+ vec[2] = arg3;
+ vec[3] = arg4;
+ vec[4] = arg5;
+ vec[5] = NULL;
+ return vec;
+}
+IRExpr** mkIRExprVec_6 ( IRExpr* arg1, IRExpr* arg2, IRExpr* arg3,
+ IRExpr* arg4, IRExpr* arg5, IRExpr* arg6 ) {
+ IRExpr** vec = LibVEX_Alloc(7 * sizeof(IRExpr*));
+ vec[0] = arg1;
+ vec[1] = arg2;
+ vec[2] = arg3;
+ vec[3] = arg4;
+ vec[4] = arg5;
+ vec[5] = arg6;
+ vec[6] = NULL;
+ return vec;
+}
+IRExpr** mkIRExprVec_7 ( IRExpr* arg1, IRExpr* arg2, IRExpr* arg3,
+ IRExpr* arg4, IRExpr* arg5, IRExpr* arg6,
+ IRExpr* arg7 ) {
+ IRExpr** vec = LibVEX_Alloc(8 * sizeof(IRExpr*));
+ vec[0] = arg1;
+ vec[1] = arg2;
+ vec[2] = arg3;
+ vec[3] = arg4;
+ vec[4] = arg5;
+ vec[5] = arg6;
+ vec[6] = arg7;
+ vec[7] = NULL;
+ return vec;
+}
+IRExpr** mkIRExprVec_8 ( IRExpr* arg1, IRExpr* arg2, IRExpr* arg3,
+ IRExpr* arg4, IRExpr* arg5, IRExpr* arg6,
+ IRExpr* arg7, IRExpr* arg8 ) {
+ IRExpr** vec = LibVEX_Alloc(9 * sizeof(IRExpr*));
+ vec[0] = arg1;
+ vec[1] = arg2;
+ vec[2] = arg3;
+ vec[3] = arg4;
+ vec[4] = arg5;
+ vec[5] = arg6;
+ vec[6] = arg7;
+ vec[7] = arg8;
+ vec[8] = NULL;
+ return vec;
+}
+
+
+/* Constructors -- IRDirty */
+
+IRDirty* emptyIRDirty ( void ) {
+ IRDirty* d = LibVEX_Alloc(sizeof(IRDirty));
+ d->cee = NULL;
+ d->guard = NULL;
+ d->args = NULL;
+ d->tmp = IRTemp_INVALID;
+ d->mFx = Ifx_None;
+ d->mAddr = NULL;
+ d->mSize = 0;
+ d->needsBBP = False;
+ d->nFxState = 0;
+ return d;
+}
+
+
+/* Constructors -- IRCAS */
+
+IRCAS* mkIRCAS ( IRTemp oldHi, IRTemp oldLo,
+ IREndness end, IRExpr* addr,
+ IRExpr* expdHi, IRExpr* expdLo,
+ IRExpr* dataHi, IRExpr* dataLo ) {
+ IRCAS* cas = LibVEX_Alloc(sizeof(IRCAS));
+ cas->oldHi = oldHi;
+ cas->oldLo = oldLo;
+ cas->end = end;
+ cas->addr = addr;
+ cas->expdHi = expdHi;
+ cas->expdLo = expdLo;
+ cas->dataHi = dataHi;
+ cas->dataLo = dataLo;
+ return cas;
+}
+
+
+/* Constructors -- IRStmt */
+
+IRStmt* IRStmt_NoOp ( void )
+{
+ /* Just use a single static closure. */
+ static IRStmt static_closure;
+ static_closure.tag = Ist_NoOp;
+ return &static_closure;
+}
+IRStmt* IRStmt_IMark ( Addr64 addr, Int len ) {
+ IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
+ s->tag = Ist_IMark;
+ s->Ist.IMark.addr = addr;
+ s->Ist.IMark.len = len;
+ return s;
+}
+IRStmt* IRStmt_AbiHint ( IRExpr* base, Int len, IRExpr* nia ) {
+ IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
+ s->tag = Ist_AbiHint;
+ s->Ist.AbiHint.base = base;
+ s->Ist.AbiHint.len = len;
+ s->Ist.AbiHint.nia = nia;
+ return s;
+}
+IRStmt* IRStmt_Put ( Int off, IRExpr* data ) {
+ IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
+ s->tag = Ist_Put;
+ s->Ist.Put.offset = off;
+ s->Ist.Put.data = data;
+ return s;
+}
+IRStmt* IRStmt_PutI ( IRRegArray* descr, IRExpr* ix,
+ Int bias, IRExpr* data ) {
+ IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
+ s->tag = Ist_PutI;
+ s->Ist.PutI.descr = descr;
+ s->Ist.PutI.ix = ix;
+ s->Ist.PutI.bias = bias;
+ s->Ist.PutI.data = data;
+ return s;
+}
+IRStmt* IRStmt_WrTmp ( IRTemp tmp, IRExpr* data ) {
+ IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
+ s->tag = Ist_WrTmp;
+ s->Ist.WrTmp.tmp = tmp;
+ s->Ist.WrTmp.data = data;
+ return s;
+}
+IRStmt* IRStmt_Store ( IREndness end, IRExpr* addr, IRExpr* data ) {
+ IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
+ s->tag = Ist_Store;
+ s->Ist.Store.end = end;
+ s->Ist.Store.addr = addr;
+ s->Ist.Store.data = data;
+ vassert(end == Iend_LE || end == Iend_BE);
+ return s;
+}
+IRStmt* IRStmt_CAS ( IRCAS* cas ) {
+ IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
+ s->tag = Ist_CAS;
+ s->Ist.CAS.details = cas;
+ return s;
+}
+IRStmt* IRStmt_LLSC ( IREndness end,
+ IRTemp result, IRExpr* addr, IRExpr* storedata ) {
+ IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
+ s->tag = Ist_LLSC;
+ s->Ist.LLSC.end = end;
+ s->Ist.LLSC.result = result;
+ s->Ist.LLSC.addr = addr;
+ s->Ist.LLSC.storedata = storedata;
+ return s;
+}
+IRStmt* IRStmt_Dirty ( IRDirty* d )
+{
+ IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
+ s->tag = Ist_Dirty;
+ s->Ist.Dirty.details = d;
+ return s;
+}
+IRStmt* IRStmt_MBE ( IRMBusEvent event )
+{
+ IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
+ s->tag = Ist_MBE;
+ s->Ist.MBE.event = event;
+ return s;
+}
+IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst ) {
+ IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
+ s->tag = Ist_Exit;
+ s->Ist.Exit.guard = guard;
+ s->Ist.Exit.jk = jk;
+ s->Ist.Exit.dst = dst;
+ return s;
+}
+
+
+/* Constructors -- IRTypeEnv */
+
+IRTypeEnv* emptyIRTypeEnv ( void )
+{
+ IRTypeEnv* env = LibVEX_Alloc(sizeof(IRTypeEnv));
+ env->types = LibVEX_Alloc(8 * sizeof(IRType));
+ env->types_size = 8;
+ env->types_used = 0;
+ return env;
+}
+
+
+/* Constructors -- IRSB */
+
+IRSB* emptyIRSB ( void )
+{
+ IRSB* bb = LibVEX_Alloc(sizeof(IRSB));
+ bb->tyenv = emptyIRTypeEnv();
+ bb->stmts_used = 0;
+ bb->stmts_size = 8;
+ bb->stmts = LibVEX_Alloc(bb->stmts_size * sizeof(IRStmt*));
+ bb->next = NULL;
+ bb->jumpkind = Ijk_Boring;
+ return bb;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- (Deep) copy constructors. These make complete copies ---*/
+/*--- the original, which can be modified without affecting ---*/
+/*--- the original. ---*/
+/*---------------------------------------------------------------*/
+
+/* Copying IR Expr vectors (for call args). */
+
+/* Shallow copy of an IRExpr vector */
+
+IRExpr** shallowCopyIRExprVec ( IRExpr** vec )
+{
+ Int i;
+ IRExpr** newvec;
+ for (i = 0; vec[i]; i++)
+ ;
+ newvec = LibVEX_Alloc((i+1)*sizeof(IRExpr*));
+ for (i = 0; vec[i]; i++)
+ newvec[i] = vec[i];
+ newvec[i] = NULL;
+ return newvec;
+}
+
+/* Deep copy of an IRExpr vector */
+
+IRExpr** deepCopyIRExprVec ( IRExpr** vec )
+{
+ Int i;
+ IRExpr** newvec = shallowCopyIRExprVec( vec );
+ for (i = 0; newvec[i]; i++)
+ newvec[i] = deepCopyIRExpr(newvec[i]);
+ return newvec;
+}
+
+/* Deep copy constructors for all heap-allocated IR types follow. */
+
+IRConst* deepCopyIRConst ( IRConst* c )
+{
+ switch (c->tag) {
+ case Ico_U1: return IRConst_U1(c->Ico.U1);
+ case Ico_U8: return IRConst_U8(c->Ico.U8);
+ case Ico_U16: return IRConst_U16(c->Ico.U16);
+ case Ico_U32: return IRConst_U32(c->Ico.U32);
+ case Ico_U64: return IRConst_U64(c->Ico.U64);
+ case Ico_F64: return IRConst_F64(c->Ico.F64);
+ case Ico_F64i: return IRConst_F64i(c->Ico.F64i);
+ case Ico_V128: return IRConst_V128(c->Ico.V128);
+ default: vpanic("deepCopyIRConst");
+ }
+}
+
+IRCallee* deepCopyIRCallee ( IRCallee* ce )
+{
+ IRCallee* ce2 = mkIRCallee(ce->regparms, ce->name, ce->addr);
+ ce2->mcx_mask = ce->mcx_mask;
+ return ce2;
+}
+
+IRRegArray* deepCopyIRRegArray ( IRRegArray* d )
+{
+ return mkIRRegArray(d->base, d->elemTy, d->nElems);
+}
+
+IRExpr* deepCopyIRExpr ( IRExpr* e )
+{
+ switch (e->tag) {
+ case Iex_Get:
+ return IRExpr_Get(e->Iex.Get.offset, e->Iex.Get.ty);
+ case Iex_GetI:
+ return IRExpr_GetI(deepCopyIRRegArray(e->Iex.GetI.descr),
+ deepCopyIRExpr(e->Iex.GetI.ix),
+ e->Iex.GetI.bias);
+ case Iex_RdTmp:
+ return IRExpr_RdTmp(e->Iex.RdTmp.tmp);
+ case Iex_Qop:
+ return IRExpr_Qop(e->Iex.Qop.op,
+ deepCopyIRExpr(e->Iex.Qop.arg1),
+ deepCopyIRExpr(e->Iex.Qop.arg2),
+ deepCopyIRExpr(e->Iex.Qop.arg3),
+ deepCopyIRExpr(e->Iex.Qop.arg4));
+ case Iex_Triop:
+ return IRExpr_Triop(e->Iex.Triop.op,
+ deepCopyIRExpr(e->Iex.Triop.arg1),
+ deepCopyIRExpr(e->Iex.Triop.arg2),
+ deepCopyIRExpr(e->Iex.Triop.arg3));
+ case Iex_Binop:
+ return IRExpr_Binop(e->Iex.Binop.op,
+ deepCopyIRExpr(e->Iex.Binop.arg1),
+ deepCopyIRExpr(e->Iex.Binop.arg2));
+ case Iex_Unop:
+ return IRExpr_Unop(e->Iex.Unop.op,
+ deepCopyIRExpr(e->Iex.Unop.arg));
+ case Iex_Load:
+ return IRExpr_Load(e->Iex.Load.end,
+ e->Iex.Load.ty,
+ deepCopyIRExpr(e->Iex.Load.addr));
+ case Iex_Const:
+ return IRExpr_Const(deepCopyIRConst(e->Iex.Const.con));
+ case Iex_CCall:
+ return IRExpr_CCall(deepCopyIRCallee(e->Iex.CCall.cee),
+ e->Iex.CCall.retty,
+ deepCopyIRExprVec(e->Iex.CCall.args));
+
+ case Iex_Mux0X:
+ return IRExpr_Mux0X(deepCopyIRExpr(e->Iex.Mux0X.cond),
+ deepCopyIRExpr(e->Iex.Mux0X.expr0),
+ deepCopyIRExpr(e->Iex.Mux0X.exprX));
+ default:
+ vpanic("deepCopyIRExpr");
+ }
+}
+
+IRDirty* deepCopyIRDirty ( IRDirty* d )
+{
+ Int i;
+ IRDirty* d2 = emptyIRDirty();
+ d2->cee = deepCopyIRCallee(d->cee);
+ d2->guard = deepCopyIRExpr(d->guard);
+ d2->args = deepCopyIRExprVec(d->args);
+ d2->tmp = d->tmp;
+ d2->mFx = d->mFx;
+ d2->mAddr = d->mAddr==NULL ? NULL : deepCopyIRExpr(d->mAddr);
+ d2->mSize = d->mSize;
+ d2->needsBBP = d->needsBBP;
+ d2->nFxState = d->nFxState;
+ for (i = 0; i < d2->nFxState; i++)
+ d2->fxState[i] = d->fxState[i];
+ return d2;
+}
+
+IRCAS* deepCopyIRCAS ( IRCAS* cas )
+{
+ return mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
+ deepCopyIRExpr(cas->addr),
+ cas->expdHi==NULL ? NULL : deepCopyIRExpr(cas->expdHi),
+ deepCopyIRExpr(cas->expdLo),
+ cas->dataHi==NULL ? NULL : deepCopyIRExpr(cas->dataHi),
+ deepCopyIRExpr(cas->dataLo) );
+}
+
+IRStmt* deepCopyIRStmt ( IRStmt* s )
+{
+ switch (s->tag) {
+ case Ist_NoOp:
+ return IRStmt_NoOp();
+ case Ist_AbiHint:
+ return IRStmt_AbiHint(deepCopyIRExpr(s->Ist.AbiHint.base),
+ s->Ist.AbiHint.len,
+ deepCopyIRExpr(s->Ist.AbiHint.nia));
+ case Ist_IMark:
+ return IRStmt_IMark(s->Ist.IMark.addr, s->Ist.IMark.len);
+ case Ist_Put:
+ return IRStmt_Put(s->Ist.Put.offset,
+ deepCopyIRExpr(s->Ist.Put.data));
+ case Ist_PutI:
+ return IRStmt_PutI(deepCopyIRRegArray(s->Ist.PutI.descr),
+ deepCopyIRExpr(s->Ist.PutI.ix),
+ s->Ist.PutI.bias,
+ deepCopyIRExpr(s->Ist.PutI.data));
+ case Ist_WrTmp:
+ return IRStmt_WrTmp(s->Ist.WrTmp.tmp,
+ deepCopyIRExpr(s->Ist.WrTmp.data));
+ case Ist_Store:
+ return IRStmt_Store(s->Ist.Store.end,
+ deepCopyIRExpr(s->Ist.Store.addr),
+ deepCopyIRExpr(s->Ist.Store.data));
+ case Ist_CAS:
+ return IRStmt_CAS(deepCopyIRCAS(s->Ist.CAS.details));
+ case Ist_LLSC:
+ return IRStmt_LLSC(s->Ist.LLSC.end,
+ s->Ist.LLSC.result,
+ deepCopyIRExpr(s->Ist.LLSC.addr),
+ s->Ist.LLSC.storedata
+ ? deepCopyIRExpr(s->Ist.LLSC.storedata)
+ : NULL);
+ case Ist_Dirty:
+ return IRStmt_Dirty(deepCopyIRDirty(s->Ist.Dirty.details));
+ case Ist_MBE:
+ return IRStmt_MBE(s->Ist.MBE.event);
+ case Ist_Exit:
+ return IRStmt_Exit(deepCopyIRExpr(s->Ist.Exit.guard),
+ s->Ist.Exit.jk,
+ deepCopyIRConst(s->Ist.Exit.dst));
+ default:
+ vpanic("deepCopyIRStmt");
+ }
+}
+
+IRTypeEnv* deepCopyIRTypeEnv ( IRTypeEnv* src )
+{
+ Int i;
+ IRTypeEnv* dst = LibVEX_Alloc(sizeof(IRTypeEnv));
+ dst->types_size = src->types_size;
+ dst->types_used = src->types_used;
+ dst->types = LibVEX_Alloc(dst->types_size * sizeof(IRType));
+ for (i = 0; i < src->types_used; i++)
+ dst->types[i] = src->types[i];
+ return dst;
+}
+
+IRSB* deepCopyIRSB ( IRSB* bb )
+{
+ Int i;
+ IRStmt** sts2;
+ IRSB* bb2 = deepCopyIRSBExceptStmts(bb);
+ bb2->stmts_used = bb2->stmts_size = bb->stmts_used;
+ sts2 = LibVEX_Alloc(bb2->stmts_used * sizeof(IRStmt*));
+ for (i = 0; i < bb2->stmts_used; i++)
+ sts2[i] = deepCopyIRStmt(bb->stmts[i]);
+ bb2->stmts = sts2;
+ return bb2;
+}
+
+IRSB* deepCopyIRSBExceptStmts ( IRSB* bb )
+{
+ IRSB* bb2 = emptyIRSB();
+ bb2->tyenv = deepCopyIRTypeEnv(bb->tyenv);
+ bb2->next = deepCopyIRExpr(bb->next);
+ bb2->jumpkind = bb->jumpkind;
+ return bb2;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Primop types ---*/
+/*---------------------------------------------------------------*/
+
+static
+void typeOfPrimop ( IROp op,
+ /*OUTs*/
+ IRType* t_dst,
+ IRType* t_arg1, IRType* t_arg2,
+ IRType* t_arg3, IRType* t_arg4 )
+{
+# define UNARY(_ta1,_td) \
+ *t_dst = (_td); *t_arg1 = (_ta1); break
+# define BINARY(_ta1,_ta2,_td) \
+ *t_dst = (_td); *t_arg1 = (_ta1); *t_arg2 = (_ta2); break
+# define TERNARY(_ta1,_ta2,_ta3,_td) \
+ *t_dst = (_td); *t_arg1 = (_ta1); \
+ *t_arg2 = (_ta2); *t_arg3 = (_ta3); break
+# define QUATERNARY(_ta1,_ta2,_ta3,_ta4,_td) \
+ *t_dst = (_td); *t_arg1 = (_ta1); \
+ *t_arg2 = (_ta2); *t_arg3 = (_ta3); \
+ *t_arg4 = (_ta4); break
+# define COMPARISON(_ta) \
+ *t_dst = Ity_I1; *t_arg1 = *t_arg2 = (_ta); break;
+# define UNARY_COMPARISON(_ta) \
+ *t_dst = Ity_I1; *t_arg1 = (_ta); break;
+
+ /* Rounding mode values are always Ity_I32, encoded as per
+ IRRoundingMode */
+ const IRType ity_RMode = Ity_I32;
+
+ *t_dst = Ity_INVALID;
+ *t_arg1 = Ity_INVALID;
+ *t_arg2 = Ity_INVALID;
+ *t_arg3 = Ity_INVALID;
+ *t_arg4 = Ity_INVALID;
+ switch (op) {
+ case Iop_Add8: case Iop_Sub8: case Iop_Mul8:
+ case Iop_Or8: case Iop_And8: case Iop_Xor8:
+ BINARY(Ity_I8,Ity_I8, Ity_I8);
+
+ case Iop_Add16: case Iop_Sub16: case Iop_Mul16:
+ case Iop_Or16: case Iop_And16: case Iop_Xor16:
+ BINARY(Ity_I16,Ity_I16, Ity_I16);
+
+ case Iop_CmpORD32U:
+ case Iop_CmpORD32S:
+ case Iop_Add32: case Iop_Sub32: case Iop_Mul32:
+ case Iop_Or32: case Iop_And32: case Iop_Xor32:
+ case Iop_Max32U:
+ case Iop_Add16x2: case Iop_Sub16x2:
+ case Iop_QAdd16Sx2: case Iop_QAdd16Ux2:
+ case Iop_QSub16Sx2: case Iop_QSub16Ux2:
+ case Iop_HAdd16Ux2: case Iop_HAdd16Sx2:
+ case Iop_HSub16Ux2: case Iop_HSub16Sx2:
+ case Iop_Add8x4: case Iop_Sub8x4:
+ case Iop_QAdd8Sx4: case Iop_QAdd8Ux4:
+ case Iop_QSub8Sx4: case Iop_QSub8Ux4:
+ case Iop_HAdd8Ux4: case Iop_HAdd8Sx4:
+ case Iop_HSub8Ux4: case Iop_HSub8Sx4:
+ case Iop_Sad8Ux4:
+ BINARY(Ity_I32,Ity_I32, Ity_I32);
+
+ case Iop_Add64: case Iop_Sub64: case Iop_Mul64:
+ case Iop_Or64: case Iop_And64: case Iop_Xor64:
+ case Iop_CmpORD64U:
+ case Iop_CmpORD64S:
+ case Iop_Avg8Ux8: case Iop_Avg16Ux4:
+ case Iop_Add8x8: case Iop_Add16x4: case Iop_Add32x2:
+ case Iop_Add32Fx2: case Iop_Sub32Fx2:
+ case Iop_CmpEQ8x8: case Iop_CmpEQ16x4: case Iop_CmpEQ32x2:
+ case Iop_CmpGT8Sx8: case Iop_CmpGT16Sx4: case Iop_CmpGT32Sx2:
+ case Iop_CmpGT8Ux8: case Iop_CmpGT16Ux4: case Iop_CmpGT32Ux2:
+ case Iop_CmpGT32Fx2: case Iop_CmpEQ32Fx2: case Iop_CmpGE32Fx2:
+ case Iop_InterleaveHI8x8: case Iop_InterleaveLO8x8:
+ case Iop_InterleaveHI16x4: case Iop_InterleaveLO16x4:
+ case Iop_InterleaveHI32x2: case Iop_InterleaveLO32x2:
+ case Iop_CatOddLanes8x8: case Iop_CatEvenLanes8x8:
+ case Iop_CatOddLanes16x4: case Iop_CatEvenLanes16x4:
+ case Iop_InterleaveOddLanes8x8: case Iop_InterleaveEvenLanes8x8:
+ case Iop_InterleaveOddLanes16x4: case Iop_InterleaveEvenLanes16x4:
+ case Iop_Perm8x8:
+ case Iop_Max8Ux8: case Iop_Max16Ux4: case Iop_Max32Ux2:
+ case Iop_Max8Sx8: case Iop_Max16Sx4: case Iop_Max32Sx2:
+ case Iop_Max32Fx2: case Iop_Min32Fx2:
+ case Iop_PwMax32Fx2: case Iop_PwMin32Fx2:
+ case Iop_Min8Ux8: case Iop_Min16Ux4: case Iop_Min32Ux2:
+ case Iop_Min8Sx8: case Iop_Min16Sx4: case Iop_Min32Sx2:
+ case Iop_PwMax8Ux8: case Iop_PwMax16Ux4: case Iop_PwMax32Ux2:
+ case Iop_PwMax8Sx8: case Iop_PwMax16Sx4: case Iop_PwMax32Sx2:
+ case Iop_PwMin8Ux8: case Iop_PwMin16Ux4: case Iop_PwMin32Ux2:
+ case Iop_PwMin8Sx8: case Iop_PwMin16Sx4: case Iop_PwMin32Sx2:
+ case Iop_Mul8x8: case Iop_Mul16x4: case Iop_Mul32x2:
+ case Iop_Mul32Fx2:
+ case Iop_PolynomialMul8x8:
+ case Iop_MulHi16Sx4: case Iop_MulHi16Ux4:
+ case Iop_QDMulHi16Sx4: case Iop_QDMulHi32Sx2:
+ case Iop_QRDMulHi16Sx4: case Iop_QRDMulHi32Sx2:
+ case Iop_QAdd8Sx8: case Iop_QAdd16Sx4:
+ case Iop_QAdd32Sx2: case Iop_QAdd64Sx1:
+ case Iop_QAdd8Ux8: case Iop_QAdd16Ux4:
+ case Iop_QAdd32Ux2: case Iop_QAdd64Ux1:
+ case Iop_PwAdd8x8: case Iop_PwAdd16x4: case Iop_PwAdd32x2:
+ case Iop_PwAdd32Fx2:
+ case Iop_QNarrow32Sx2:
+ case Iop_QNarrow16Sx4: case Iop_QNarrow16Ux4:
+ case Iop_Sub8x8: case Iop_Sub16x4: case Iop_Sub32x2:
+ case Iop_QSub8Sx8: case Iop_QSub16Sx4:
+ case Iop_QSub32Sx2: case Iop_QSub64Sx1:
+ case Iop_QSub8Ux8: case Iop_QSub16Ux4:
+ case Iop_QSub32Ux2: case Iop_QSub64Ux1:
+ case Iop_Shl8x8: case Iop_Shl16x4: case Iop_Shl32x2:
+ case Iop_Shr8x8: case Iop_Shr16x4: case Iop_Shr32x2:
+ case Iop_Sar8x8: case Iop_Sar16x4: case Iop_Sar32x2:
+ case Iop_Sal8x8: case Iop_Sal16x4: case Iop_Sal32x2: case Iop_Sal64x1:
+ case Iop_QShl8x8: case Iop_QShl16x4: case Iop_QShl32x2: case Iop_QShl64x1:
+ case Iop_QSal8x8: case Iop_QSal16x4: case Iop_QSal32x2: case Iop_QSal64x1:
+ case Iop_Recps32Fx2:
+ case Iop_Rsqrts32Fx2:
+ BINARY(Ity_I64,Ity_I64, Ity_I64);
+
+ case Iop_ShlN32x2: case Iop_ShlN16x4: case Iop_ShlN8x8:
+ case Iop_ShrN32x2: case Iop_ShrN16x4: case Iop_ShrN8x8:
+ case Iop_SarN32x2: case Iop_SarN16x4: case Iop_SarN8x8:
+ case Iop_QShlN8x8: case Iop_QShlN16x4:
+ case Iop_QShlN32x2: case Iop_QShlN64x1:
+ case Iop_QShlN8Sx8: case Iop_QShlN16Sx4:
+ case Iop_QShlN32Sx2: case Iop_QShlN64Sx1:
+ case Iop_QSalN8x8: case Iop_QSalN16x4:
+ case Iop_QSalN32x2: case Iop_QSalN64x1:
+ BINARY(Ity_I64,Ity_I8, Ity_I64);
+
+ case Iop_Shl8: case Iop_Shr8: case Iop_Sar8:
+ BINARY(Ity_I8,Ity_I8, Ity_I8);
+ case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
+ BINARY(Ity_I16,Ity_I8, Ity_I16);
+ case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
+ BINARY(Ity_I32,Ity_I8, Ity_I32);
+ case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
+ BINARY(Ity_I64,Ity_I8, Ity_I64);
+
+ case Iop_Not8:
+ UNARY(Ity_I8, Ity_I8);
+ case Iop_Not16:
+ UNARY(Ity_I16, Ity_I16);
+ case Iop_Not32:
+ case Iop_CmpNEZ16x2: case Iop_CmpNEZ8x4:
+ UNARY(Ity_I32, Ity_I32);
+
+ case Iop_Not64:
+ case Iop_CmpNEZ32x2: case Iop_CmpNEZ16x4: case Iop_CmpNEZ8x8:
+ case Iop_Cnt8x8:
+ case Iop_Clz8Sx8: case Iop_Clz16Sx4: case Iop_Clz32Sx2:
+ case Iop_Cls8Sx8: case Iop_Cls16Sx4: case Iop_Cls32Sx2:
+ case Iop_PwAddL8Ux8: case Iop_PwAddL16Ux4: case Iop_PwAddL32Ux2:
+ case Iop_PwAddL8Sx8: case Iop_PwAddL16Sx4: case Iop_PwAddL32Sx2:
+ case Iop_Reverse64_8x8: case Iop_Reverse64_16x4: case Iop_Reverse64_32x2:
+ case Iop_Reverse32_8x8: case Iop_Reverse32_16x4:
+ case Iop_Reverse16_8x8:
+ case Iop_FtoI32Sx2_RZ: case Iop_FtoI32Ux2_RZ:
+ case Iop_I32StoFx2: case Iop_I32UtoFx2:
+ case Iop_Recip32x2: case Iop_Recip32Fx2:
+ case Iop_Abs32Fx2:
+ case Iop_Rsqrte32Fx2:
+ case Iop_Rsqrte32x2:
+ case Iop_Neg32Fx2:
+ case Iop_Abs8x8: case Iop_Abs16x4: case Iop_Abs32x2:
+ UNARY(Ity_I64, Ity_I64);
+
+ case Iop_CmpEQ8: case Iop_CmpNE8:
+ case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
+ COMPARISON(Ity_I8);
+ case Iop_CmpEQ16: case Iop_CmpNE16:
+ case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
+ COMPARISON(Ity_I16);
+ case Iop_CmpEQ32: case Iop_CmpNE32:
+ case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
+ case Iop_CmpLT32S: case Iop_CmpLE32S:
+ case Iop_CmpLT32U: case Iop_CmpLE32U:
+ COMPARISON(Ity_I32);
+ case Iop_CmpEQ64: case Iop_CmpNE64:
+ case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
+ case Iop_CmpLT64S: case Iop_CmpLE64S:
+ case Iop_CmpLT64U: case Iop_CmpLE64U:
+ COMPARISON(Ity_I64);
+
+ case Iop_CmpNEZ8: UNARY_COMPARISON(Ity_I8);
+ case Iop_CmpNEZ16: UNARY_COMPARISON(Ity_I16);
+ case Iop_CmpNEZ32: UNARY_COMPARISON(Ity_I32);
+ case Iop_CmpNEZ64: UNARY_COMPARISON(Ity_I64);
+
+ case Iop_Left8: UNARY(Ity_I8, Ity_I8);
+ case Iop_Left16: UNARY(Ity_I16,Ity_I16);
+ case Iop_CmpwNEZ32: case Iop_Left32: UNARY(Ity_I32,Ity_I32);
+ case Iop_CmpwNEZ64: case Iop_Left64: UNARY(Ity_I64,Ity_I64);
+
+ case Iop_MullU8: case Iop_MullS8:
+ BINARY(Ity_I8,Ity_I8, Ity_I16);
+ case Iop_MullU16: case Iop_MullS16:
+ BINARY(Ity_I16,Ity_I16, Ity_I32);
+ case Iop_MullU32: case Iop_MullS32:
+ BINARY(Ity_I32,Ity_I32, Ity_I64);
+ case Iop_MullU64: case Iop_MullS64:
+ BINARY(Ity_I64,Ity_I64, Ity_I128);
+
+ case Iop_Clz32: case Iop_Ctz32:
+ UNARY(Ity_I32, Ity_I32);
+
+ case Iop_Clz64: case Iop_Ctz64:
+ UNARY(Ity_I64, Ity_I64);
+
+ case Iop_DivU32: case Iop_DivS32:
+ BINARY(Ity_I32,Ity_I32, Ity_I32);
+
+ case Iop_DivU64: case Iop_DivS64:
+ BINARY(Ity_I64,Ity_I64, Ity_I64);
+
+ case Iop_DivModU64to32: case Iop_DivModS64to32:
+ BINARY(Ity_I64,Ity_I32, Ity_I64);
+
+ case Iop_DivModU128to64: case Iop_DivModS128to64:
+ BINARY(Ity_I128,Ity_I64, Ity_I128);
+
+ case Iop_16HIto8: case Iop_16to8:
+ UNARY(Ity_I16, Ity_I8);
+ case Iop_8HLto16:
+ BINARY(Ity_I8,Ity_I8, Ity_I16);
+
+ case Iop_32HIto16: case Iop_32to16:
+ UNARY(Ity_I32, Ity_I16);
+ case Iop_16HLto32:
+ BINARY(Ity_I16,Ity_I16, Ity_I32);
+
+ case Iop_64HIto32: case Iop_64to32:
+ UNARY(Ity_I64, Ity_I32);
+ case Iop_32HLto64:
+ BINARY(Ity_I32,Ity_I32, Ity_I64);
+
+ case Iop_128HIto64: case Iop_128to64:
+ UNARY(Ity_I128, Ity_I64);
+ case Iop_64HLto128:
+ BINARY(Ity_I64,Ity_I64, Ity_I128);
+
+ case Iop_Not1: UNARY(Ity_I1, Ity_I1);
+ case Iop_1Uto8: UNARY(Ity_I1, Ity_I8);
+ case Iop_1Sto8: UNARY(Ity_I1, Ity_I8);
+ case Iop_1Sto16: UNARY(Ity_I1, Ity_I16);
+ case Iop_1Uto32: case Iop_1Sto32: UNARY(Ity_I1, Ity_I32);
+ case Iop_1Sto64: case Iop_1Uto64: UNARY(Ity_I1, Ity_I64);
+ case Iop_32to1: UNARY(Ity_I32, Ity_I1);
+ case Iop_64to1: UNARY(Ity_I64, Ity_I1);
+
+ case Iop_8Uto32: case Iop_8Sto32:
+ UNARY(Ity_I8, Ity_I32);
+
+ case Iop_8Uto16: case Iop_8Sto16:
+ UNARY(Ity_I8, Ity_I16);
+
+ case Iop_16Uto32: case Iop_16Sto32:
+ UNARY(Ity_I16, Ity_I32);
+
+ case Iop_32Sto64: case Iop_32Uto64:
+ UNARY(Ity_I32, Ity_I64);
+
+ case Iop_8Uto64: case Iop_8Sto64:
+ UNARY(Ity_I8, Ity_I64);
+
+ case Iop_16Uto64: case Iop_16Sto64:
+ UNARY(Ity_I16, Ity_I64);
+ case Iop_64to16:
+ UNARY(Ity_I64, Ity_I16);
+
+ case Iop_32to8: UNARY(Ity_I32, Ity_I8);
+ case Iop_64to8: UNARY(Ity_I64, Ity_I8);
+
+ case Iop_AddF64: case Iop_SubF64:
+ case Iop_MulF64: case Iop_DivF64:
+ case Iop_AddF64r32: case Iop_SubF64r32:
+ case Iop_MulF64r32: case Iop_DivF64r32:
+ TERNARY(ity_RMode,Ity_F64,Ity_F64, Ity_F64);
+
+ case Iop_AddF32: case Iop_SubF32:
+ case Iop_MulF32: case Iop_DivF32:
+ TERNARY(ity_RMode,Ity_F32,Ity_F32, Ity_F32);
+
+ case Iop_NegF64: case Iop_AbsF64:
+ UNARY(Ity_F64, Ity_F64);
+
+ case Iop_NegF32: case Iop_AbsF32:
+ UNARY(Ity_F32, Ity_F32);
+
+ case Iop_SqrtF64:
+ case Iop_SqrtF64r32:
+ BINARY(ity_RMode,Ity_F64, Ity_F64);
+
+ case Iop_SqrtF32:
+ case Iop_RoundF32toInt:
+ BINARY(ity_RMode,Ity_F32, Ity_F32);
+
+ case Iop_CmpF64:
+ BINARY(Ity_F64,Ity_F64, Ity_I32);
+
+ case Iop_F64toI16S: BINARY(ity_RMode,Ity_F64, Ity_I16);
+ case Iop_F64toI32S: BINARY(ity_RMode,Ity_F64, Ity_I32);
+ case Iop_F64toI64S: BINARY(ity_RMode,Ity_F64, Ity_I64);
+
+ case Iop_F64toI32U: BINARY(ity_RMode,Ity_F64, Ity_I32);
+
+ case Iop_I16StoF64: UNARY(Ity_I16, Ity_F64);
+ case Iop_I32StoF64: UNARY(Ity_I32, Ity_F64);
+ case Iop_I64StoF64: BINARY(ity_RMode,Ity_I64, Ity_F64);
+
+ case Iop_I32UtoF64: UNARY(Ity_I32, Ity_F64);
+
+ case Iop_F32toF64: UNARY(Ity_F32, Ity_F64);
+ case Iop_F64toF32: BINARY(ity_RMode,Ity_F64, Ity_F32);
+
+ case Iop_ReinterpI64asF64: UNARY(Ity_I64, Ity_F64);
+ case Iop_ReinterpF64asI64: UNARY(Ity_F64, Ity_I64);
+ case Iop_ReinterpI32asF32: UNARY(Ity_I32, Ity_F32);
+ case Iop_ReinterpF32asI32: UNARY(Ity_F32, Ity_I32);
+
+ case Iop_AtanF64: case Iop_Yl2xF64: case Iop_Yl2xp1F64:
+ case Iop_ScaleF64: case Iop_PRemF64: case Iop_PRem1F64:
+ TERNARY(ity_RMode,Ity_F64,Ity_F64, Ity_F64);
+
+ case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
+ TERNARY(ity_RMode,Ity_F64,Ity_F64, Ity_I32);
+
+ case Iop_SinF64: case Iop_CosF64: case Iop_TanF64:
+ case Iop_2xm1F64:
+ case Iop_RoundF64toInt: BINARY(ity_RMode,Ity_F64, Ity_F64);
+
+ case Iop_MAddF64: case Iop_MSubF64:
+ case Iop_MAddF64r32: case Iop_MSubF64r32:
+ QUATERNARY(ity_RMode,Ity_F64,Ity_F64,Ity_F64, Ity_F64);
+
+ case Iop_Est5FRSqrt:
+ case Iop_RoundF64toF64_NEAREST: case Iop_RoundF64toF64_NegINF:
+ case Iop_RoundF64toF64_PosINF: case Iop_RoundF64toF64_ZERO:
+ UNARY(Ity_F64, Ity_F64);
+ case Iop_RoundF64toF32:
+ BINARY(ity_RMode,Ity_F64, Ity_F64);
+ case Iop_CalcFPRF:
+ UNARY(Ity_F64, Ity_I32);
+ case Iop_TruncF64asF32:
+ UNARY(Ity_F64, Ity_F32);
+
+ case Iop_I32UtoFx4:
+ case Iop_I32StoFx4:
+ case Iop_QFtoI32Ux4_RZ:
+ case Iop_QFtoI32Sx4_RZ:
+ case Iop_FtoI32Ux4_RZ:
+ case Iop_FtoI32Sx4_RZ:
+ case Iop_RoundF32x4_RM:
+ case Iop_RoundF32x4_RP:
+ case Iop_RoundF32x4_RN:
+ case Iop_RoundF32x4_RZ:
+ case Iop_Abs32Fx4:
+ case Iop_Rsqrte32Fx4:
+ case Iop_Rsqrte32x4:
+ UNARY(Ity_V128, Ity_V128);
+
+ case Iop_64HLtoV128: BINARY(Ity_I64,Ity_I64, Ity_V128);
+ case Iop_V128to64: case Iop_V128HIto64:
+ case Iop_Shorten16x8: case Iop_Shorten32x4: case Iop_Shorten64x2:
+ case Iop_QShortenU16Ux8: case Iop_QShortenU32Ux4: case Iop_QShortenU64Ux2:
+ case Iop_QShortenS16Sx8: case Iop_QShortenS32Sx4: case Iop_QShortenS64Sx2:
+ case Iop_QShortenU16Sx8: case Iop_QShortenU32Sx4: case Iop_QShortenU64Sx2:
+ case Iop_F32toF16x4:
+ UNARY(Ity_V128, Ity_I64);
+
+ case Iop_Longen8Ux8: case Iop_Longen16Ux4: case Iop_Longen32Ux2:
+ case Iop_Longen8Sx8: case Iop_Longen16Sx4: case Iop_Longen32Sx2:
+ case Iop_F16toF32x4:
+ UNARY(Ity_I64, Ity_V128);
+
+ case Iop_V128to32: UNARY(Ity_V128, Ity_I32);
+ case Iop_32UtoV128: UNARY(Ity_I32, Ity_V128);
+ case Iop_64UtoV128: UNARY(Ity_I64, Ity_V128);
+ case Iop_SetV128lo32: BINARY(Ity_V128,Ity_I32, Ity_V128);
+ case Iop_SetV128lo64: BINARY(Ity_V128,Ity_I64, Ity_V128);
+
+ case Iop_Dup8x16: UNARY(Ity_I8, Ity_V128);
+ case Iop_Dup16x8: UNARY(Ity_I16, Ity_V128);
+ case Iop_Dup32x4: UNARY(Ity_I32, Ity_V128);
+ case Iop_Dup8x8: UNARY(Ity_I8, Ity_I64);
+ case Iop_Dup16x4: UNARY(Ity_I16, Ity_I64);
+ case Iop_Dup32x2: UNARY(Ity_I32, Ity_I64);
+
+ case Iop_CmpEQ32Fx4: case Iop_CmpLT32Fx4:
+ case Iop_CmpEQ64Fx2: case Iop_CmpLT64Fx2:
+ case Iop_CmpLE32Fx4: case Iop_CmpUN32Fx4:
+ case Iop_CmpLE64Fx2: case Iop_CmpUN64Fx2:
+ case Iop_CmpGT32Fx4: case Iop_CmpGE32Fx4:
+ case Iop_CmpEQ32F0x4: case Iop_CmpLT32F0x4:
+ case Iop_CmpEQ64F0x2: case Iop_CmpLT64F0x2:
+ case Iop_CmpLE32F0x4: case Iop_CmpUN32F0x4:
+ case Iop_CmpLE64F0x2: case Iop_CmpUN64F0x2:
+ case Iop_Add32Fx4: case Iop_Add32F0x4:
+ case Iop_Add64Fx2: case Iop_Add64F0x2:
+ case Iop_Div32Fx4: case Iop_Div32F0x4:
+ case Iop_Div64Fx2: case Iop_Div64F0x2:
+ case Iop_Max32Fx4: case Iop_Max32F0x4:
+ case Iop_PwMax32Fx4: case Iop_PwMin32Fx4:
+ case Iop_Max64Fx2: case Iop_Max64F0x2:
+ case Iop_Min32Fx4: case Iop_Min32F0x4:
+ case Iop_Min64Fx2: case Iop_Min64F0x2:
+ case Iop_Mul32Fx4: case Iop_Mul32F0x4:
+ case Iop_Mul64Fx2: case Iop_Mul64F0x2:
+ case Iop_Sub32Fx4: case Iop_Sub32F0x4:
+ case Iop_Sub64Fx2: case Iop_Sub64F0x2:
+ case Iop_AndV128: case Iop_OrV128: case Iop_XorV128:
+ case Iop_Add8x16: case Iop_Add16x8:
+ case Iop_Add32x4: case Iop_Add64x2:
+ case Iop_QAdd8Ux16: case Iop_QAdd16Ux8:
+ case Iop_QAdd32Ux4: //case Iop_QAdd64Ux2:
+ case Iop_QAdd8Sx16: case Iop_QAdd16Sx8:
+ case Iop_QAdd32Sx4: case Iop_QAdd64Sx2:
+ case Iop_PwAdd8x16: case Iop_PwAdd16x8: case Iop_PwAdd32x4:
+ case Iop_Sub8x16: case Iop_Sub16x8:
+ case Iop_Sub32x4: case Iop_Sub64x2:
+ case Iop_QSub8Ux16: case Iop_QSub16Ux8:
+ case Iop_QSub32Ux4: //case Iop_QSub64Ux2:
+ case Iop_QSub8Sx16: case Iop_QSub16Sx8:
+ case Iop_QSub32Sx4: case Iop_QSub64Sx2:
+ case Iop_Mul8x16: case Iop_Mul16x8: case Iop_Mul32x4:
+ case Iop_PolynomialMul8x16:
+ case Iop_MulHi16Ux8: case Iop_MulHi32Ux4:
+ case Iop_MulHi16Sx8: case Iop_MulHi32Sx4:
+ case Iop_QDMulHi16Sx8: case Iop_QDMulHi32Sx4:
+ case Iop_QRDMulHi16Sx8: case Iop_QRDMulHi32Sx4:
+ case Iop_MullEven8Ux16: case Iop_MullEven16Ux8:
+ case Iop_MullEven8Sx16: case Iop_MullEven16Sx8:
+ case Iop_Avg8Ux16: case Iop_Avg16Ux8: case Iop_Avg32Ux4:
+ case Iop_Avg8Sx16: case Iop_Avg16Sx8: case Iop_Avg32Sx4:
+ case Iop_Max8Sx16: case Iop_Max16Sx8: case Iop_Max32Sx4:
+ case Iop_Max8Ux16: case Iop_Max16Ux8: case Iop_Max32Ux4:
+ case Iop_Min8Sx16: case Iop_Min16Sx8: case Iop_Min32Sx4:
+ case Iop_Min8Ux16: case Iop_Min16Ux8: case Iop_Min32Ux4:
+ case Iop_CmpEQ8x16: case Iop_CmpEQ16x8: case Iop_CmpEQ32x4:
+ case Iop_CmpGT8Sx16: case Iop_CmpGT16Sx8: case Iop_CmpGT32Sx4:
+ case Iop_CmpGT64Sx2:
+ case Iop_CmpGT8Ux16: case Iop_CmpGT16Ux8: case Iop_CmpGT32Ux4:
+ case Iop_Shl8x16: case Iop_Shl16x8: case Iop_Shl32x4: case Iop_Shl64x2:
+ case Iop_QShl8x16: case Iop_QShl16x8: case Iop_QShl32x4: case Iop_QShl64x2:
+ case Iop_QSal8x16: case Iop_QSal16x8: case Iop_QSal32x4: case Iop_QSal64x2:
+ case Iop_Shr8x16: case Iop_Shr16x8: case Iop_Shr32x4: case Iop_Shr64x2:
+ case Iop_Sar8x16: case Iop_Sar16x8: case Iop_Sar32x4: case Iop_Sar64x2:
+ case Iop_Sal8x16: case Iop_Sal16x8: case Iop_Sal32x4: case Iop_Sal64x2:
+ case Iop_Rol8x16: case Iop_Rol16x8: case Iop_Rol32x4:
+ case Iop_QNarrow16Ux8: case Iop_QNarrow32Ux4:
+ case Iop_QNarrow16Sx8: case Iop_QNarrow32Sx4:
+ case Iop_Narrow16x8: case Iop_Narrow32x4:
+ case Iop_InterleaveHI8x16: case Iop_InterleaveHI16x8:
+ case Iop_InterleaveHI32x4: case Iop_InterleaveHI64x2:
+ case Iop_InterleaveLO8x16: case Iop_InterleaveLO16x8:
+ case Iop_InterleaveLO32x4: case Iop_InterleaveLO64x2:
+ case Iop_CatOddLanes8x16: case Iop_CatEvenLanes8x16:
+ case Iop_CatOddLanes16x8: case Iop_CatEvenLanes16x8:
+ case Iop_CatOddLanes32x4: case Iop_CatEvenLanes32x4:
+ case Iop_InterleaveOddLanes8x16: case Iop_InterleaveEvenLanes8x16:
+ case Iop_InterleaveOddLanes16x8: case Iop_InterleaveEvenLanes16x8:
+ case Iop_InterleaveOddLanes32x4: case Iop_InterleaveEvenLanes32x4:
+ case Iop_Perm8x16:
+ case Iop_Recps32Fx4:
+ case Iop_Rsqrts32Fx4:
+ BINARY(Ity_V128,Ity_V128, Ity_V128);
+
+ case Iop_PolynomialMull8x8:
+ case Iop_Mull8Ux8: case Iop_Mull8Sx8:
+ case Iop_Mull16Ux4: case Iop_Mull16Sx4:
+ case Iop_Mull32Ux2: case Iop_Mull32Sx2:
+ BINARY(Ity_I64, Ity_I64, Ity_V128);
+
+ case Iop_NotV128:
+ case Iop_Recip32Fx4: case Iop_Recip32F0x4:
+ case Iop_Recip32x4:
+ case Iop_Recip64Fx2: case Iop_Recip64F0x2:
+ case Iop_RSqrt32Fx4: case Iop_RSqrt32F0x4:
+ case Iop_RSqrt64Fx2: case Iop_RSqrt64F0x2:
+ case Iop_Sqrt32Fx4: case Iop_Sqrt32F0x4:
+ case Iop_Sqrt64Fx2: case Iop_Sqrt64F0x2:
+ case Iop_CmpNEZ8x16: case Iop_CmpNEZ16x8:
+ case Iop_CmpNEZ32x4: case Iop_CmpNEZ64x2:
+ case Iop_Cnt8x16:
+ case Iop_Clz8Sx16: case Iop_Clz16Sx8: case Iop_Clz32Sx4:
+ case Iop_Cls8Sx16: case Iop_Cls16Sx8: case Iop_Cls32Sx4:
+ case Iop_PwAddL8Ux16: case Iop_PwAddL16Ux8: case Iop_PwAddL32Ux4:
+ case Iop_PwAddL8Sx16: case Iop_PwAddL16Sx8: case Iop_PwAddL32Sx4:
+ case Iop_Reverse64_8x16: case Iop_Reverse64_16x8: case Iop_Reverse64_32x4:
+ case Iop_Reverse32_8x16: case Iop_Reverse32_16x8:
+ case Iop_Reverse16_8x16:
+ case Iop_Neg32Fx4:
+ case Iop_Abs8x16: case Iop_Abs16x8: case Iop_Abs32x4:
+ UNARY(Ity_V128, Ity_V128);
+
+ case Iop_ShlV128: case Iop_ShrV128:
+ case Iop_ShlN8x16: case Iop_ShlN16x8:
+ case Iop_ShlN32x4: case Iop_ShlN64x2:
+ case Iop_ShrN8x16: case Iop_ShrN16x8:
+ case Iop_ShrN32x4: case Iop_ShrN64x2:
+ case Iop_SarN8x16: case Iop_SarN16x8:
+ case Iop_SarN32x4: case Iop_SarN64x2:
+ case Iop_QShlN8x16: case Iop_QShlN16x8:
+ case Iop_QShlN32x4: case Iop_QShlN64x2:
+ case Iop_QShlN8Sx16: case Iop_QShlN16Sx8:
+ case Iop_QShlN32Sx4: case Iop_QShlN64Sx2:
+ case Iop_QSalN8x16: case Iop_QSalN16x8:
+ case Iop_QSalN32x4: case Iop_QSalN64x2:
+ BINARY(Ity_V128,Ity_I8, Ity_V128);
+
+ case Iop_F32ToFixed32Ux4_RZ:
+ case Iop_F32ToFixed32Sx4_RZ:
+ case Iop_Fixed32UToF32x4_RN:
+ case Iop_Fixed32SToF32x4_RN:
+ BINARY(Ity_V128, Ity_I8, Ity_V128);
+
+ case Iop_F32ToFixed32Ux2_RZ:
+ case Iop_F32ToFixed32Sx2_RZ:
+ case Iop_Fixed32UToF32x2_RN:
+ case Iop_Fixed32SToF32x2_RN:
+ BINARY(Ity_I64, Ity_I8, Ity_I64);
+
+ case Iop_GetElem8x16:
+ BINARY(Ity_V128, Ity_I8, Ity_I8);
+ case Iop_GetElem16x8:
+ BINARY(Ity_V128, Ity_I8, Ity_I16);
+ case Iop_GetElem32x4:
+ BINARY(Ity_V128, Ity_I8, Ity_I32);
+ case Iop_GetElem64x2:
+ BINARY(Ity_V128, Ity_I8, Ity_I64);
+ case Iop_GetElem8x8:
+ BINARY(Ity_I64, Ity_I8, Ity_I8);
+ case Iop_GetElem16x4:
+ BINARY(Ity_I64, Ity_I8, Ity_I16);
+ case Iop_GetElem32x2:
+ BINARY(Ity_I64, Ity_I8, Ity_I32);
+ case Iop_SetElem8x8:
+ TERNARY(Ity_I64, Ity_I8, Ity_I8, Ity_I64);
+ case Iop_SetElem16x4:
+ TERNARY(Ity_I64, Ity_I8, Ity_I16, Ity_I64);
+ case Iop_SetElem32x2:
+ TERNARY(Ity_I64, Ity_I8, Ity_I32, Ity_I64);
+
+ case Iop_Extract64:
+ TERNARY(Ity_I64, Ity_I64, Ity_I8, Ity_I64);
+ case Iop_ExtractV128:
+ TERNARY(Ity_V128, Ity_V128, Ity_I8, Ity_V128);
+
+ case Iop_QDMulLong16Sx4: case Iop_QDMulLong32Sx2:
+ BINARY(Ity_I64, Ity_I64, Ity_V128);
+
+ default:
+ ppIROp(op);
+ vpanic("typeOfPrimop");
+ }
+# undef UNARY
+# undef BINARY
+# undef TERNARY
+# undef COMPARISON
+# undef UNARY_COMPARISON
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Helper functions for the IR -- IR Basic Blocks ---*/
+/*---------------------------------------------------------------*/
+
+void addStmtToIRSB ( IRSB* bb, IRStmt* st )
+{
+ Int i;
+ if (bb->stmts_used == bb->stmts_size) {
+ IRStmt** stmts2 = LibVEX_Alloc(2 * bb->stmts_size * sizeof(IRStmt*));
+ for (i = 0; i < bb->stmts_size; i++)
+ stmts2[i] = bb->stmts[i];
+ bb->stmts = stmts2;
+ bb->stmts_size *= 2;
+ }
+ vassert(bb->stmts_used < bb->stmts_size);
+ bb->stmts[bb->stmts_used] = st;
+ bb->stmts_used++;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Helper functions for the IR -- IR Type Environments ---*/
+/*---------------------------------------------------------------*/
+
+/* Allocate a new IRTemp, given its type. */
+
+IRTemp newIRTemp ( IRTypeEnv* env, IRType ty )
+{
+ vassert(env);
+ vassert(env->types_used >= 0);
+ vassert(env->types_size >= 0);
+ vassert(env->types_used <= env->types_size);
+ if (env->types_used < env->types_size) {
+ env->types[env->types_used] = ty;
+ return env->types_used++;
+ } else {
+ Int i;
+ Int new_size = env->types_size==0 ? 8 : 2*env->types_size;
+ IRType* new_types
+ = LibVEX_Alloc(new_size * sizeof(IRType));
+ for (i = 0; i < env->types_used; i++)
+ new_types[i] = env->types[i];
+ env->types = new_types;
+ env->types_size = new_size;
+ return newIRTemp(env, ty);
+ }
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Helper functions for the IR -- finding types of exprs ---*/
+/*---------------------------------------------------------------*/
+
+inline
+IRType typeOfIRTemp ( IRTypeEnv* env, IRTemp tmp )
+{
+ vassert(tmp >= 0);
+ vassert(tmp < env->types_used);
+ return env->types[tmp];
+}
+
+
+IRType typeOfIRConst ( IRConst* con )
+{
+ switch (con->tag) {
+ case Ico_U1: return Ity_I1;
+ case Ico_U8: return Ity_I8;
+ case Ico_U16: return Ity_I16;
+ case Ico_U32: return Ity_I32;
+ case Ico_U64: return Ity_I64;
+ case Ico_F64: return Ity_F64;
+ case Ico_F64i: return Ity_F64;
+ case Ico_V128: return Ity_V128;
+ default: vpanic("typeOfIRConst");
+ }
+}
+
+IRType typeOfIRExpr ( IRTypeEnv* tyenv, IRExpr* e )
+{
+ IRType t_dst, t_arg1, t_arg2, t_arg3, t_arg4;
+ start:
+ switch (e->tag) {
+ case Iex_Load:
+ return e->Iex.Load.ty;
+ case Iex_Get:
+ return e->Iex.Get.ty;
+ case Iex_GetI:
+ return e->Iex.GetI.descr->elemTy;
+ case Iex_RdTmp:
+ return typeOfIRTemp(tyenv, e->Iex.RdTmp.tmp);
+ case Iex_Const:
+ return typeOfIRConst(e->Iex.Const.con);
+ case Iex_Qop:
+ typeOfPrimop(e->Iex.Qop.op,
+ &t_dst, &t_arg1, &t_arg2, &t_arg3, &t_arg4);
+ return t_dst;
+ case Iex_Triop:
+ typeOfPrimop(e->Iex.Triop.op,
+ &t_dst, &t_arg1, &t_arg2, &t_arg3, &t_arg4);
+ return t_dst;
+ case Iex_Binop:
+ typeOfPrimop(e->Iex.Binop.op,
+ &t_dst, &t_arg1, &t_arg2, &t_arg3, &t_arg4);
+ return t_dst;
+ case Iex_Unop:
+ typeOfPrimop(e->Iex.Unop.op,
+ &t_dst, &t_arg1, &t_arg2, &t_arg3, &t_arg4);
+ return t_dst;
+ case Iex_CCall:
+ return e->Iex.CCall.retty;
+ case Iex_Mux0X:
+ e = e->Iex.Mux0X.expr0;
+ goto start;
+ /* return typeOfIRExpr(tyenv, e->Iex.Mux0X.expr0); */
+ case Iex_Binder:
+ vpanic("typeOfIRExpr: Binder is not a valid expression");
+ default:
+ ppIRExpr(e);
+ vpanic("typeOfIRExpr");
+ }
+}
+
+/* Is this any value actually in the enumeration 'IRType' ? */
+Bool isPlausibleIRType ( IRType ty )
+{
+ switch (ty) {
+ case Ity_INVALID: case Ity_I1:
+ case Ity_I8: case Ity_I16: case Ity_I32:
+ case Ity_I64: case Ity_I128:
+ case Ity_F32: case Ity_F64:
+ case Ity_V128:
+ return True;
+ default:
+ return False;
+ }
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Sanity checking -- FLATNESS ---*/
+/*---------------------------------------------------------------*/
+
+/* Check that the canonical flatness constraints hold on an
+ IRStmt. The only place where any expression is allowed to be
+ non-atomic is the RHS of IRStmt_Tmp. */
+
+/* Relies on:
+ inline static Bool isAtom ( IRExpr* e ) {
+ return e->tag == Iex_RdTmp || e->tag == Iex_Const;
+ }
+*/
+
+Bool isFlatIRStmt ( IRStmt* st )
+{
+ Int i;
+ IRExpr* e;
+ IRDirty* di;
+ IRCAS* cas;
+
+ switch (st->tag) {
+ case Ist_AbiHint:
+ return isIRAtom(st->Ist.AbiHint.base)
+ && isIRAtom(st->Ist.AbiHint.nia);
+ case Ist_Put:
+ return isIRAtom(st->Ist.Put.data);
+ case Ist_PutI:
+ return toBool( isIRAtom(st->Ist.PutI.ix)
+ && isIRAtom(st->Ist.PutI.data) );
+ case Ist_WrTmp:
+ /* This is the only interesting case. The RHS can be any
+ expression, *but* all its subexpressions *must* be
+ atoms. */
+ e = st->Ist.WrTmp.data;
+ switch (e->tag) {
+ case Iex_Binder: return True;
+ case Iex_Get: return True;
+ case Iex_GetI: return isIRAtom(e->Iex.GetI.ix);
+ case Iex_RdTmp: return True;
+ case Iex_Qop: return toBool(
+ isIRAtom(e->Iex.Qop.arg1)
+ && isIRAtom(e->Iex.Qop.arg2)
+ && isIRAtom(e->Iex.Qop.arg3)
+ && isIRAtom(e->Iex.Qop.arg4));
+ case Iex_Triop: return toBool(
+ isIRAtom(e->Iex.Triop.arg1)
+ && isIRAtom(e->Iex.Triop.arg2)
+ && isIRAtom(e->Iex.Triop.arg3));
+ case Iex_Binop: return toBool(
+ isIRAtom(e->Iex.Binop.arg1)
+ && isIRAtom(e->Iex.Binop.arg2));
+ case Iex_Unop: return isIRAtom(e->Iex.Unop.arg);
+ case Iex_Load: return isIRAtom(e->Iex.Load.addr);
+ case Iex_Const: return True;
+ case Iex_CCall: for (i = 0; e->Iex.CCall.args[i]; i++)
+ if (!isIRAtom(e->Iex.CCall.args[i]))
+ return False;
+ return True;
+ case Iex_Mux0X: return toBool (
+ isIRAtom(e->Iex.Mux0X.cond)
+ && isIRAtom(e->Iex.Mux0X.expr0)
+ && isIRAtom(e->Iex.Mux0X.exprX));
+ default: vpanic("isFlatIRStmt(e)");
+ }
+ /*notreached*/
+ vassert(0);
+ case Ist_Store:
+ return toBool( isIRAtom(st->Ist.Store.addr)
+ && isIRAtom(st->Ist.Store.data) );
+ case Ist_CAS:
+ cas = st->Ist.CAS.details;
+ return toBool( isIRAtom(cas->addr)
+ && (cas->expdHi ? isIRAtom(cas->expdHi) : True)
+ && isIRAtom(cas->expdLo)
+ && (cas->dataHi ? isIRAtom(cas->dataHi) : True)
+ && isIRAtom(cas->dataLo) );
+ case Ist_LLSC:
+ return toBool( isIRAtom(st->Ist.LLSC.addr)
+ && (st->Ist.LLSC.storedata
+ ? isIRAtom(st->Ist.LLSC.storedata) : True) );
+ case Ist_Dirty:
+ di = st->Ist.Dirty.details;
+ if (!isIRAtom(di->guard))
+ return False;
+ for (i = 0; di->args[i]; i++)
+ if (!isIRAtom(di->args[i]))
+ return False;
+ if (di->mAddr && !isIRAtom(di->mAddr))
+ return False;
+ return True;
+ case Ist_NoOp:
+ case Ist_IMark:
+ case Ist_MBE:
+ return True;
+ case Ist_Exit:
+ return isIRAtom(st->Ist.Exit.guard);
+ default:
+ vpanic("isFlatIRStmt(st)");
+ }
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Sanity checking ---*/
+/*---------------------------------------------------------------*/
+
+/* Checks:
+
+ Everything is type-consistent. No ill-typed anything.
+ The target address at the end of the BB is a 32- or 64-
+ bit expression, depending on the guest's word size.
+
+ Each temp is assigned only once, before its uses.
+*/
+
+static inline Int countArgs ( IRExpr** args )
+{
+ Int i;
+ for (i = 0; args[i]; i++)
+ ;
+ return i;
+}
+
+static
+__attribute((noreturn))
+void sanityCheckFail ( IRSB* bb, IRStmt* stmt, HChar* what )
+{
+ vex_printf("\nIR SANITY CHECK FAILURE\n\n");
+ ppIRSB(bb);
+ if (stmt) {
+ vex_printf("\nIN STATEMENT:\n\n");
+ ppIRStmt(stmt);
+ }
+ vex_printf("\n\nERROR = %s\n\n", what );
+ vpanic("sanityCheckFail: exiting due to bad IR");
+}
+
+static Bool saneIRRegArray ( IRRegArray* arr )
+{
+ if (arr->base < 0 || arr->base > 10000 /* somewhat arbitrary */)
+ return False;
+ if (arr->elemTy == Ity_I1)
+ return False;
+ if (arr->nElems <= 0 || arr->nElems > 500 /* somewhat arbitrary */)
+ return False;
+ return True;
+}
+
+static Bool saneIRCallee ( IRCallee* cee )
+{
+ if (cee->name == NULL)
+ return False;
+ if (cee->addr == 0)
+ return False;
+ if (cee->regparms < 0 || cee->regparms > 3)
+ return False;
+ return True;
+}
+
+static Bool saneIRConst ( IRConst* con )
+{
+ switch (con->tag) {
+ case Ico_U1:
+ return toBool( con->Ico.U1 == True || con->Ico.U1 == False );
+ default:
+ /* Is there anything we can meaningfully check? I don't
+ think so. */
+ return True;
+ }
+}
+
+/* Traverse a Stmt/Expr, inspecting IRTemp uses. Report any out of
+ range ones. Report any which are read and for which the current
+ def_count is zero. */
+
+static
+void useBeforeDef_Temp ( IRSB* bb, IRStmt* stmt, IRTemp tmp, Int* def_counts )
+{
+ if (tmp < 0 || tmp >= bb->tyenv->types_used)
+ sanityCheckFail(bb,stmt, "out of range Temp in IRExpr");
+ if (def_counts[tmp] < 1)
+ sanityCheckFail(bb,stmt, "IRTemp use before def in IRExpr");
+}
+
+static
+void useBeforeDef_Expr ( IRSB* bb, IRStmt* stmt, IRExpr* expr, Int* def_counts )
+{
+ Int i;
+ switch (expr->tag) {
+ case Iex_Get:
+ break;
+ case Iex_GetI:
+ useBeforeDef_Expr(bb,stmt,expr->Iex.GetI.ix,def_counts);
+ break;
+ case Iex_RdTmp:
+ useBeforeDef_Temp(bb,stmt,expr->Iex.RdTmp.tmp,def_counts);
+ break;
+ case Iex_Qop:
+ useBeforeDef_Expr(bb,stmt,expr->Iex.Qop.arg1,def_counts);
+ useBeforeDef_Expr(bb,stmt,expr->Iex.Qop.arg2,def_counts);
+ useBeforeDef_Expr(bb,stmt,expr->Iex.Qop.arg3,def_counts);
+ useBeforeDef_Expr(bb,stmt,expr->Iex.Qop.arg4,def_counts);
+ break;
+ case Iex_Triop:
+ useBeforeDef_Expr(bb,stmt,expr->Iex.Triop.arg1,def_counts);
+ useBeforeDef_Expr(bb,stmt,expr->Iex.Triop.arg2,def_counts);
+ useBeforeDef_Expr(bb,stmt,expr->Iex.Triop.arg3,def_counts);
+ break;
+ case Iex_Binop:
+ useBeforeDef_Expr(bb,stmt,expr->Iex.Binop.arg1,def_counts);
+ useBeforeDef_Expr(bb,stmt,expr->Iex.Binop.arg2,def_counts);
+ break;
+ case Iex_Unop:
+ useBeforeDef_Expr(bb,stmt,expr->Iex.Unop.arg,def_counts);
+ break;
+ case Iex_Load:
+ useBeforeDef_Expr(bb,stmt,expr->Iex.Load.addr,def_counts);
+ break;
+ case Iex_Const:
+ break;
+ case Iex_CCall:
+ for (i = 0; expr->Iex.CCall.args[i]; i++)
+ useBeforeDef_Expr(bb,stmt,expr->Iex.CCall.args[i],def_counts);
+ break;
+ case Iex_Mux0X:
+ useBeforeDef_Expr(bb,stmt,expr->Iex.Mux0X.cond,def_counts);
+ useBeforeDef_Expr(bb,stmt,expr->Iex.Mux0X.expr0,def_counts);
+ useBeforeDef_Expr(bb,stmt,expr->Iex.Mux0X.exprX,def_counts);
+ break;
+ default:
+ vpanic("useBeforeDef_Expr");
+ }
+}
+
+static
+void useBeforeDef_Stmt ( IRSB* bb, IRStmt* stmt, Int* def_counts )
+{
+ Int i;
+ IRDirty* d;
+ IRCAS* cas;
+ switch (stmt->tag) {
+ case Ist_IMark:
+ break;
+ case Ist_AbiHint:
+ useBeforeDef_Expr(bb,stmt,stmt->Ist.AbiHint.base,def_counts);
+ useBeforeDef_Expr(bb,stmt,stmt->Ist.AbiHint.nia,def_counts);
+ break;
+ case Ist_Put:
+ useBeforeDef_Expr(bb,stmt,stmt->Ist.Put.data,def_counts);
+ break;
+ case Ist_PutI:
+ useBeforeDef_Expr(bb,stmt,stmt->Ist.PutI.ix,def_counts);
+ useBeforeDef_Expr(bb,stmt,stmt->Ist.PutI.data,def_counts);
+ break;
+ case Ist_WrTmp:
+ useBeforeDef_Expr(bb,stmt,stmt->Ist.WrTmp.data,def_counts);
+ break;
+ case Ist_Store:
+ useBeforeDef_Expr(bb,stmt,stmt->Ist.Store.addr,def_counts);
+ useBeforeDef_Expr(bb,stmt,stmt->Ist.Store.data,def_counts);
+ break;
+ case Ist_CAS:
+ cas = stmt->Ist.CAS.details;
+ useBeforeDef_Expr(bb,stmt,cas->addr,def_counts);
+ if (cas->expdHi)
+ useBeforeDef_Expr(bb,stmt,cas->expdHi,def_counts);
+ useBeforeDef_Expr(bb,stmt,cas->expdLo,def_counts);
+ if (cas->dataHi)
+ useBeforeDef_Expr(bb,stmt,cas->dataHi,def_counts);
+ useBeforeDef_Expr(bb,stmt,cas->dataLo,def_counts);
+ break;
+ case Ist_LLSC:
+ useBeforeDef_Expr(bb,stmt,stmt->Ist.LLSC.addr,def_counts);
+ if (stmt->Ist.LLSC.storedata != NULL)
+ useBeforeDef_Expr(bb,stmt,stmt->Ist.LLSC.storedata,def_counts);
+ break;
+ case Ist_Dirty:
+ d = stmt->Ist.Dirty.details;
+ for (i = 0; d->args[i] != NULL; i++)
+ useBeforeDef_Expr(bb,stmt,d->args[i],def_counts);
+ if (d->mFx != Ifx_None)
+ useBeforeDef_Expr(bb,stmt,d->mAddr,def_counts);
+ break;
+ case Ist_NoOp:
+ case Ist_MBE:
+ break;
+ case Ist_Exit:
+ useBeforeDef_Expr(bb,stmt,stmt->Ist.Exit.guard,def_counts);
+ break;
+ default:
+ vpanic("useBeforeDef_Stmt");
+ }
+}
+
+static
+void tcExpr ( IRSB* bb, IRStmt* stmt, IRExpr* expr, IRType gWordTy )
+{
+ Int i;
+ IRType t_dst, t_arg1, t_arg2, t_arg3, t_arg4;
+ IRTypeEnv* tyenv = bb->tyenv;
+ switch (expr->tag) {
+ case Iex_Get:
+ case Iex_RdTmp:
+ break;
+ case Iex_GetI:
+ tcExpr(bb,stmt, expr->Iex.GetI.ix, gWordTy );
+ if (typeOfIRExpr(tyenv,expr->Iex.GetI.ix) != Ity_I32)
+ sanityCheckFail(bb,stmt,"IRExpr.GetI.ix: not :: Ity_I32");
+ if (!saneIRRegArray(expr->Iex.GetI.descr))
+ sanityCheckFail(bb,stmt,"IRExpr.GetI.descr: invalid descr");
+ break;
+ case Iex_Qop: {
+ IRType ttarg1, ttarg2, ttarg3, ttarg4;
+ tcExpr(bb,stmt, expr->Iex.Qop.arg1, gWordTy );
+ tcExpr(bb,stmt, expr->Iex.Qop.arg2, gWordTy );
+ tcExpr(bb,stmt, expr->Iex.Qop.arg3, gWordTy );
+ tcExpr(bb,stmt, expr->Iex.Qop.arg4, gWordTy );
+ typeOfPrimop(expr->Iex.Qop.op,
+ &t_dst, &t_arg1, &t_arg2, &t_arg3, &t_arg4);
+ if (t_arg1 == Ity_INVALID || t_arg2 == Ity_INVALID
+ || t_arg3 == Ity_INVALID || t_arg4 == Ity_INVALID) {
+ vex_printf(" op name: " );
+ ppIROp(expr->Iex.Qop.op);
+ vex_printf("\n");
+ sanityCheckFail(bb,stmt,
+ "Iex.Qop: wrong arity op\n"
+ "... name of op precedes BB printout\n");
+ }
+ ttarg1 = typeOfIRExpr(tyenv, expr->Iex.Qop.arg1);
+ ttarg2 = typeOfIRExpr(tyenv, expr->Iex.Qop.arg2);
+ ttarg3 = typeOfIRExpr(tyenv, expr->Iex.Qop.arg3);
+ ttarg4 = typeOfIRExpr(tyenv, expr->Iex.Qop.arg4);
+ if (t_arg1 != ttarg1 || t_arg2 != ttarg2
+ || t_arg3 != ttarg3 || t_arg4 != ttarg4) {
+ vex_printf(" op name: ");
+ ppIROp(expr->Iex.Qop.op);
+ vex_printf("\n");
+ vex_printf(" op type is (");
+ ppIRType(t_arg1);
+ vex_printf(",");
+ ppIRType(t_arg2);
+ vex_printf(",");
+ ppIRType(t_arg3);
+ vex_printf(",");
+ ppIRType(t_arg4);
+ vex_printf(") -> ");
+ ppIRType (t_dst);
+ vex_printf("\narg tys are (");
+ ppIRType(ttarg1);
+ vex_printf(",");
+ ppIRType(ttarg2);
+ vex_printf(",");
+ ppIRType(ttarg3);
+ vex_printf(",");
+ ppIRType(ttarg4);
+ vex_printf(")\n");
+ sanityCheckFail(bb,stmt,
+ "Iex.Qop: arg tys don't match op tys\n"
+ "... additional details precede BB printout\n");
+ }
+ break;
+ }
+ case Iex_Triop: {
+ IRType ttarg1, ttarg2, ttarg3;
+ tcExpr(bb,stmt, expr->Iex.Triop.arg1, gWordTy );
+ tcExpr(bb,stmt, expr->Iex.Triop.arg2, gWordTy );
+ tcExpr(bb,stmt, expr->Iex.Triop.arg3, gWordTy );
+ typeOfPrimop(expr->Iex.Triop.op,
+ &t_dst, &t_arg1, &t_arg2, &t_arg3, &t_arg4);
+ if (t_arg1 == Ity_INVALID || t_arg2 == Ity_INVALID
+ || t_arg3 == Ity_INVALID || t_arg4 != Ity_INVALID) {
+ vex_printf(" op name: " );
+ ppIROp(expr->Iex.Triop.op);
+ vex_printf("\n");
+ sanityCheckFail(bb,stmt,
+ "Iex.Triop: wrong arity op\n"
+ "... name of op precedes BB printout\n");
+ }
+ ttarg1 = typeOfIRExpr(tyenv, expr->Iex.Triop.arg1);
+ ttarg2 = typeOfIRExpr(tyenv, expr->Iex.Triop.arg2);
+ ttarg3 = typeOfIRExpr(tyenv, expr->Iex.Triop.arg3);
+ if (t_arg1 != ttarg1 || t_arg2 != ttarg2 || t_arg3 != ttarg3) {
+ vex_printf(" op name: ");
+ ppIROp(expr->Iex.Triop.op);
+ vex_printf("\n");
+ vex_printf(" op type is (");
+ ppIRType(t_arg1);
+ vex_printf(",");
+ ppIRType(t_arg2);
+ vex_printf(",");
+ ppIRType(t_arg3);
+ vex_printf(") -> ");
+ ppIRType (t_dst);
+ vex_printf("\narg tys are (");
+ ppIRType(ttarg1);
+ vex_printf(",");
+ ppIRType(ttarg2);
+ vex_printf(",");
+ ppIRType(ttarg3);
+ vex_printf(")\n");
+ sanityCheckFail(bb,stmt,
+ "Iex.Triop: arg tys don't match op tys\n"
+ "... additional details precede BB printout\n");
+ }
+ break;
+ }
+ case Iex_Binop: {
+ IRType ttarg1, ttarg2;
+ tcExpr(bb,stmt, expr->Iex.Binop.arg1, gWordTy );
+ tcExpr(bb,stmt, expr->Iex.Binop.arg2, gWordTy );
+ typeOfPrimop(expr->Iex.Binop.op,
+ &t_dst, &t_arg1, &t_arg2, &t_arg3, &t_arg4);
+ if (t_arg1 == Ity_INVALID || t_arg2 == Ity_INVALID
+ || t_arg3 != Ity_INVALID || t_arg4 != Ity_INVALID) {
+ vex_printf(" op name: " );
+ ppIROp(expr->Iex.Binop.op);
+ vex_printf("\n");
+ sanityCheckFail(bb,stmt,
+ "Iex.Binop: wrong arity op\n"
+ "... name of op precedes BB printout\n");
+ }
+ ttarg1 = typeOfIRExpr(tyenv, expr->Iex.Binop.arg1);
+ ttarg2 = typeOfIRExpr(tyenv, expr->Iex.Binop.arg2);
+ if (t_arg1 != ttarg1 || t_arg2 != ttarg2) {
+ vex_printf(" op name: ");
+ ppIROp(expr->Iex.Binop.op);
+ vex_printf("\n");
+ vex_printf(" op type is (");
+ ppIRType(t_arg1);
+ vex_printf(",");
+ ppIRType(t_arg2);
+ vex_printf(") -> ");
+ ppIRType (t_dst);
+ vex_printf("\narg tys are (");
+ ppIRType(ttarg1);
+ vex_printf(",");
+ ppIRType(ttarg2);
+ vex_printf(")\n");
+ sanityCheckFail(bb,stmt,
+ "Iex.Binop: arg tys don't match op tys\n"
+ "... additional details precede BB printout\n");
+ }
+ break;
+ }
+ case Iex_Unop:
+ tcExpr(bb,stmt, expr->Iex.Unop.arg, gWordTy );
+ typeOfPrimop(expr->Iex.Binop.op,
+ &t_dst, &t_arg1, &t_arg2, &t_arg3, &t_arg4);
+ if (t_arg1 == Ity_INVALID || t_arg2 != Ity_INVALID
+ || t_arg3 != Ity_INVALID || t_arg4 != Ity_INVALID)
+ sanityCheckFail(bb,stmt,"Iex.Unop: wrong arity op");
+ if (t_arg1 != typeOfIRExpr(tyenv, expr->Iex.Unop.arg))
+ sanityCheckFail(bb,stmt,"Iex.Unop: arg ty doesn't match op ty");
+ break;
+ case Iex_Load:
+ tcExpr(bb,stmt, expr->Iex.Load.addr, gWordTy);
+ if (typeOfIRExpr(tyenv, expr->Iex.Load.addr) != gWordTy)
+ sanityCheckFail(bb,stmt,"Iex.Load.addr: not :: guest word type");
+ if (expr->Iex.Load.end != Iend_LE && expr->Iex.Load.end != Iend_BE)
+ sanityCheckFail(bb,stmt,"Iex.Load.end: bogus endianness");
+ break;
+ case Iex_CCall:
+ if (!saneIRCallee(expr->Iex.CCall.cee))
+ sanityCheckFail(bb,stmt,"Iex.CCall.cee: bad IRCallee");
+ if (expr->Iex.CCall.cee->regparms > countArgs(expr->Iex.CCall.args))
+ sanityCheckFail(bb,stmt,"Iex.CCall.cee: #regparms > #args");
+ for (i = 0; expr->Iex.CCall.args[i]; i++) {
+ if (i >= 32)
+ sanityCheckFail(bb,stmt,"Iex.CCall: > 32 args");
+ tcExpr(bb,stmt, expr->Iex.CCall.args[i], gWordTy);
+ }
+ if (expr->Iex.CCall.retty == Ity_I1)
+ sanityCheckFail(bb,stmt,"Iex.CCall.retty: cannot return :: Ity_I1");
+ for (i = 0; expr->Iex.CCall.args[i]; i++)
+ if (typeOfIRExpr(tyenv, expr->Iex.CCall.args[i]) == Ity_I1)
+ sanityCheckFail(bb,stmt,"Iex.CCall.arg: arg :: Ity_I1");
+ break;
+ case Iex_Const:
+ if (!saneIRConst(expr->Iex.Const.con))
+ sanityCheckFail(bb,stmt,"Iex.Const.con: invalid const");
+ break;
+ case Iex_Mux0X:
+ tcExpr(bb,stmt, expr->Iex.Mux0X.cond, gWordTy);
+ tcExpr(bb,stmt, expr->Iex.Mux0X.expr0, gWordTy);
+ tcExpr(bb,stmt, expr->Iex.Mux0X.exprX, gWordTy);
+ if (typeOfIRExpr(tyenv, expr->Iex.Mux0X.cond) != Ity_I8)
+ sanityCheckFail(bb,stmt,"Iex.Mux0X.cond: cond :: Ity_I8");
+ if (typeOfIRExpr(tyenv, expr->Iex.Mux0X.expr0)
+ != typeOfIRExpr(tyenv, expr->Iex.Mux0X.exprX))
+ sanityCheckFail(bb,stmt,"Iex.Mux0X: expr0/exprX mismatch");
+ break;
+ default:
+ vpanic("tcExpr");
+ }
+}
+
+
+static
+void tcStmt ( IRSB* bb, IRStmt* stmt, IRType gWordTy )
+{
+ Int i;
+ IRDirty* d;
+ IRCAS* cas;
+ IRType tyExpd, tyData;
+ IRTypeEnv* tyenv = bb->tyenv;
+ switch (stmt->tag) {
+ case Ist_IMark:
+ /* Somewhat heuristic, but rule out totally implausible
+ instruction sizes. */
+ if (stmt->Ist.IMark.len < 0 || stmt->Ist.IMark.len > 20)
+ sanityCheckFail(bb,stmt,"IRStmt.IMark.len: implausible");
+ break;
+ case Ist_AbiHint:
+ if (typeOfIRExpr(tyenv, stmt->Ist.AbiHint.base) != gWordTy)
+ sanityCheckFail(bb,stmt,"IRStmt.AbiHint.base: "
+ "not :: guest word type");
+ if (typeOfIRExpr(tyenv, stmt->Ist.AbiHint.nia) != gWordTy)
+ sanityCheckFail(bb,stmt,"IRStmt.AbiHint.nia: "
+ "not :: guest word type");
+ break;
+ case Ist_Put:
+ tcExpr( bb, stmt, stmt->Ist.Put.data, gWordTy );
+ if (typeOfIRExpr(tyenv,stmt->Ist.Put.data) == Ity_I1)
+ sanityCheckFail(bb,stmt,"IRStmt.Put.data: cannot Put :: Ity_I1");
+ break;
+ case Ist_PutI:
+ tcExpr( bb, stmt, stmt->Ist.PutI.data, gWordTy );
+ tcExpr( bb, stmt, stmt->Ist.PutI.ix, gWordTy );
+ if (typeOfIRExpr(tyenv,stmt->Ist.PutI.data) == Ity_I1)
+ sanityCheckFail(bb,stmt,"IRStmt.PutI.data: cannot PutI :: Ity_I1");
+ if (typeOfIRExpr(tyenv,stmt->Ist.PutI.data)
+ != stmt->Ist.PutI.descr->elemTy)
+ sanityCheckFail(bb,stmt,"IRStmt.PutI.data: data ty != elem ty");
+ if (typeOfIRExpr(tyenv,stmt->Ist.PutI.ix) != Ity_I32)
+ sanityCheckFail(bb,stmt,"IRStmt.PutI.ix: not :: Ity_I32");
+ if (!saneIRRegArray(stmt->Ist.PutI.descr))
+ sanityCheckFail(bb,stmt,"IRStmt.PutI.descr: invalid descr");
+ break;
+ case Ist_WrTmp:
+ tcExpr( bb, stmt, stmt->Ist.WrTmp.data, gWordTy );
+ if (typeOfIRTemp(tyenv, stmt->Ist.WrTmp.tmp)
+ != typeOfIRExpr(tyenv, stmt->Ist.WrTmp.data))
+ sanityCheckFail(bb,stmt,"IRStmt.Put.Tmp: tmp and expr do not match");
+ break;
+ case Ist_Store:
+ tcExpr( bb, stmt, stmt->Ist.Store.addr, gWordTy );
+ tcExpr( bb, stmt, stmt->Ist.Store.data, gWordTy );
+ if (typeOfIRExpr(tyenv, stmt->Ist.Store.addr) != gWordTy)
+ sanityCheckFail(bb,stmt,"IRStmt.Store.addr: not :: guest word type");
+ if (typeOfIRExpr(tyenv, stmt->Ist.Store.data) == Ity_I1)
+ sanityCheckFail(bb,stmt,"IRStmt.Store.data: cannot Store :: Ity_I1");
+ if (stmt->Ist.Store.end != Iend_LE && stmt->Ist.Store.end != Iend_BE)
+ sanityCheckFail(bb,stmt,"Ist.Store.end: bogus endianness");
+ break;
+ case Ist_CAS:
+ cas = stmt->Ist.CAS.details;
+ /* make sure it's definitely either a CAS or a DCAS */
+ if (cas->oldHi == IRTemp_INVALID
+ && cas->expdHi == NULL && cas->dataHi == NULL) {
+ /* fine; it's a single cas */
+ }
+ else
+ if (cas->oldHi != IRTemp_INVALID
+ && cas->expdHi != NULL && cas->dataHi != NULL) {
+ /* fine; it's a double cas */
+ }
+ else {
+ /* it's some el-mutanto hybrid */
+ goto bad_cas;
+ }
+ /* check the address type */
+ tcExpr( bb, stmt, cas->addr, gWordTy );
+ if (typeOfIRExpr(tyenv, cas->addr) != gWordTy) goto bad_cas;
+ /* check types on the {old,expd,data}Lo components agree */
+ tyExpd = typeOfIRExpr(tyenv, cas->expdLo);
+ tyData = typeOfIRExpr(tyenv, cas->dataLo);
+ if (tyExpd != tyData) goto bad_cas;
+ if (tyExpd != typeOfIRTemp(tyenv, cas->oldLo))
+ goto bad_cas;
+ /* check the base element type is sane */
+ if (tyExpd == Ity_I8 || tyExpd == Ity_I16 || tyExpd == Ity_I32
+ || (gWordTy == Ity_I64 && tyExpd == Ity_I64)) {
+ /* fine */
+ } else {
+ goto bad_cas;
+ }
+ /* If it's a DCAS, check types on the {old,expd,data}Hi
+ components too */
+ if (cas->oldHi != IRTemp_INVALID) {
+ tyExpd = typeOfIRExpr(tyenv, cas->expdHi);
+ tyData = typeOfIRExpr(tyenv, cas->dataHi);
+ if (tyExpd != tyData) goto bad_cas;
+ if (tyExpd != typeOfIRTemp(tyenv, cas->oldHi))
+ goto bad_cas;
+ /* and finally check that oldLo and oldHi have the same
+ type. This forces equivalence amongst all 6 types. */
+ if (typeOfIRTemp(tyenv, cas->oldHi)
+ != typeOfIRTemp(tyenv, cas->oldLo))
+ goto bad_cas;
+ }
+ break;
+ bad_cas:
+ sanityCheckFail(bb,stmt,"IRStmt.CAS: ill-formed");
+ break;
+ case Ist_LLSC: {
+ IRType tyRes;
+ if (typeOfIRExpr(tyenv, stmt->Ist.LLSC.addr) != gWordTy)
+ sanityCheckFail(bb,stmt,"IRStmt.LLSC.addr: not :: guest word type");
+ if (stmt->Ist.LLSC.end != Iend_LE && stmt->Ist.LLSC.end != Iend_BE)
+ sanityCheckFail(bb,stmt,"Ist.LLSC.end: bogus endianness");
+ tyRes = typeOfIRTemp(tyenv, stmt->Ist.LLSC.result);
+ if (stmt->Ist.LLSC.storedata == NULL) {
+ /* it's a LL */
+ if (tyRes != Ity_I64 && tyRes != Ity_I32 && tyRes != Ity_I8)
+ sanityCheckFail(bb,stmt,"Ist.LLSC(LL).result :: bogus");
+ } else {
+ /* it's a SC */
+ if (tyRes != Ity_I1)
+ sanityCheckFail(bb,stmt,"Ist.LLSC(SC).result: not :: Ity_I1");
+ tyData = typeOfIRExpr(tyenv, stmt->Ist.LLSC.storedata);
+ if (tyData != Ity_I64 && tyData != Ity_I32 && tyData != Ity_I8)
+ sanityCheckFail(bb,stmt,
+ "Ist.LLSC(SC).result :: storedata bogus");
+ }
+ break;
+ }
+ case Ist_Dirty:
+ /* Mostly check for various kinds of ill-formed dirty calls. */
+ d = stmt->Ist.Dirty.details;
+ if (d->cee == NULL) goto bad_dirty;
+ if (!saneIRCallee(d->cee)) goto bad_dirty;
+ if (d->cee->regparms > countArgs(d->args)) goto bad_dirty;
+ if (d->mFx == Ifx_None) {
+ if (d->mAddr != NULL || d->mSize != 0)
+ goto bad_dirty;
+ } else {
+ if (d->mAddr == NULL || d->mSize == 0)
+ goto bad_dirty;
+ }
+ if (d->nFxState < 0 || d->nFxState > VEX_N_FXSTATE)
+ goto bad_dirty;
+ if (d->nFxState == 0 && d->needsBBP)
+ goto bad_dirty;
+ for (i = 0; i < d->nFxState; i++) {
+ if (d->fxState[i].fx == Ifx_None) goto bad_dirty;
+ if (d->fxState[i].size <= 0) goto bad_dirty;
+ }
+ /* check types, minimally */
+ if (d->guard == NULL) goto bad_dirty;
+ tcExpr( bb, stmt, d->guard, gWordTy );
+ if (typeOfIRExpr(tyenv, d->guard) != Ity_I1)
+ sanityCheckFail(bb,stmt,"IRStmt.Dirty.guard not :: Ity_I1");
+ if (d->tmp != IRTemp_INVALID
+ && typeOfIRTemp(tyenv, d->tmp) == Ity_I1)
+ sanityCheckFail(bb,stmt,"IRStmt.Dirty.dst :: Ity_I1");
+ for (i = 0; d->args[i] != NULL; i++) {
+ if (i >= 32)
+ sanityCheckFail(bb,stmt,"IRStmt.Dirty: > 32 args");
+ if (typeOfIRExpr(tyenv, d->args[i]) == Ity_I1)
+ sanityCheckFail(bb,stmt,"IRStmt.Dirty.arg[i] :: Ity_I1");
+ }
+ break;
+ bad_dirty:
+ sanityCheckFail(bb,stmt,"IRStmt.Dirty: ill-formed");
+ break;
+ case Ist_NoOp:
+ break;
+ case Ist_MBE:
+ switch (stmt->Ist.MBE.event) {
+ case Imbe_Fence:
+ break;
+ default: sanityCheckFail(bb,stmt,"IRStmt.MBE.event: unknown");
+ break;
+ }
+ break;
+ case Ist_Exit:
+ tcExpr( bb, stmt, stmt->Ist.Exit.guard, gWordTy );
+ if (typeOfIRExpr(tyenv,stmt->Ist.Exit.guard) != Ity_I1)
+ sanityCheckFail(bb,stmt,"IRStmt.Exit.guard: not :: Ity_I1");
+ if (!saneIRConst(stmt->Ist.Exit.dst))
+ sanityCheckFail(bb,stmt,"IRStmt.Exit.dst: bad dst");
+ if (typeOfIRConst(stmt->Ist.Exit.dst) != gWordTy)
+ sanityCheckFail(bb,stmt,"IRStmt.Exit.dst: not :: guest word type");
+ break;
+ default:
+ vpanic("tcStmt");
+ }
+}
+
+void sanityCheckIRSB ( IRSB* bb, HChar* caller,
+ Bool require_flat, IRType guest_word_size )
+{
+ Int i;
+ IRStmt* stmt;
+ Int n_temps = bb->tyenv->types_used;
+ Int* def_counts = LibVEX_Alloc(n_temps * sizeof(Int));
+
+ if (0)
+ vex_printf("sanityCheck: %s\n", caller);
+
+ vassert(guest_word_size == Ity_I32
+ || guest_word_size == Ity_I64);
+
+ if (bb->stmts_used < 0 || bb->stmts_size < 8
+ || bb->stmts_used > bb->stmts_size)
+ /* this BB is so strange we can't even print it */
+ vpanic("sanityCheckIRSB: stmts array limits wierd");
+
+ /* Ensure each temp has a plausible type. */
+ for (i = 0; i < n_temps; i++) {
+ IRType ty = typeOfIRTemp(bb->tyenv,(IRTemp)i);
+ if (!isPlausibleIRType(ty)) {
+ vex_printf("Temp t%d declared with implausible type 0x%x\n",
+ i, (UInt)ty);
+ sanityCheckFail(bb,NULL,"Temp declared with implausible type");
+ }
+ }
+
+ /* Check for flatness, if required. */
+ if (require_flat) {
+ for (i = 0; i < bb->stmts_used; i++) {
+ stmt = bb->stmts[i];
+ if (!stmt)
+ sanityCheckFail(bb, stmt, "IRStmt: is NULL");
+ if (!isFlatIRStmt(stmt))
+ sanityCheckFail(bb, stmt, "IRStmt: is not flat");
+ }
+ if (!isIRAtom(bb->next))
+ sanityCheckFail(bb, NULL, "bb->next is not an atom");
+ }
+
+ /* Count the defs of each temp. Only one def is allowed.
+ Also, check that each used temp has already been defd. */
+
+ for (i = 0; i < n_temps; i++)
+ def_counts[i] = 0;
+
+ for (i = 0; i < bb->stmts_used; i++) {
+ IRDirty* d;
+ IRCAS* cas;
+ stmt = bb->stmts[i];
+ /* Check any temps used by this statement. */
+ useBeforeDef_Stmt(bb,stmt,def_counts);
+
+ /* Now make note of any temps defd by this statement. */
+ switch (stmt->tag) {
+ case Ist_WrTmp:
+ if (stmt->Ist.WrTmp.tmp < 0 || stmt->Ist.WrTmp.tmp >= n_temps)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.Tmp: destination tmp is out of range");
+ def_counts[stmt->Ist.WrTmp.tmp]++;
+ if (def_counts[stmt->Ist.WrTmp.tmp] > 1)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.Tmp: destination tmp is assigned more than once");
+ break;
+ case Ist_Store:
+ break;
+ case Ist_Dirty:
+ if (stmt->Ist.Dirty.details->tmp != IRTemp_INVALID) {
+ d = stmt->Ist.Dirty.details;
+ if (d->tmp < 0 || d->tmp >= n_temps)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.Dirty: destination tmp is out of range");
+ def_counts[d->tmp]++;
+ if (def_counts[d->tmp] > 1)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.Dirty: destination tmp is assigned more than once");
+ }
+ break;
+ case Ist_CAS:
+ cas = stmt->Ist.CAS.details;
+ if (cas->oldHi != IRTemp_INVALID) {
+ if (cas->oldHi < 0 || cas->oldHi >= n_temps)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.CAS: destination tmpHi is out of range");
+ def_counts[cas->oldHi]++;
+ if (def_counts[cas->oldHi] > 1)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.CAS: destination tmpHi is assigned more than once");
+ }
+ if (cas->oldLo < 0 || cas->oldLo >= n_temps)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.CAS: destination tmpLo is out of range");
+ def_counts[cas->oldLo]++;
+ if (def_counts[cas->oldLo] > 1)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.CAS: destination tmpLo is assigned more than once");
+ break;
+ case Ist_LLSC:
+ if (stmt->Ist.LLSC.result < 0 || stmt->Ist.LLSC.result >= n_temps)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.LLSC: destination tmp is out of range");
+ def_counts[stmt->Ist.LLSC.result]++;
+ if (def_counts[stmt->Ist.LLSC.result] > 1)
+ sanityCheckFail(bb, stmt,
+ "IRStmt.LLSC: destination tmp is assigned more than once");
+ break;
+ default:
+ /* explicitly handle the rest, so as to keep gcc quiet */
+ break;
+ }
+ }
+
+ /* Typecheck everything. */
+ for (i = 0; i < bb->stmts_used; i++)
+ if (bb->stmts[i])
+ tcStmt( bb, bb->stmts[i], guest_word_size );
+ if (typeOfIRExpr(bb->tyenv,bb->next) != guest_word_size)
+ sanityCheckFail(bb, NULL, "bb->next field has wrong type");
+}
+
+/*---------------------------------------------------------------*/
+/*--- Misc helper functions ---*/
+/*---------------------------------------------------------------*/
+
+Bool eqIRConst ( IRConst* c1, IRConst* c2 )
+{
+ if (c1->tag != c2->tag)
+ return False;
+
+ switch (c1->tag) {
+ case Ico_U1: return toBool( (1 & c1->Ico.U1) == (1 & c2->Ico.U1) );
+ case Ico_U8: return toBool( c1->Ico.U8 == c2->Ico.U8 );
+ case Ico_U16: return toBool( c1->Ico.U16 == c2->Ico.U16 );
+ case Ico_U32: return toBool( c1->Ico.U32 == c2->Ico.U32 );
+ case Ico_U64: return toBool( c1->Ico.U64 == c2->Ico.U64 );
+ case Ico_F64: return toBool( c1->Ico.F64 == c2->Ico.F64 );
+ case Ico_F64i: return toBool( c1->Ico.F64i == c2->Ico.F64i );
+ case Ico_V128: return toBool( c1->Ico.V128 == c2->Ico.V128 );
+ default: vpanic("eqIRConst");
+ }
+}
+
+Bool eqIRRegArray ( IRRegArray* descr1, IRRegArray* descr2 )
+{
+ return toBool( descr1->base == descr2->base
+ && descr1->elemTy == descr2->elemTy
+ && descr1->nElems == descr2->nElems );
+}
+
+Int sizeofIRType ( IRType ty )
+{
+ switch (ty) {
+ case Ity_I8: return 1;
+ case Ity_I16: return 2;
+ case Ity_I32: return 4;
+ case Ity_I64: return 8;
+ case Ity_I128: return 16;
+ case Ity_F32: return 4;
+ case Ity_F64: return 8;
+ case Ity_V128: return 16;
+ default: vex_printf("\n"); ppIRType(ty); vex_printf("\n");
+ vpanic("sizeofIRType");
+ }
+}
+
+IRExpr* mkIRExpr_HWord ( HWord hw )
+{
+ vassert(sizeof(void*) == sizeof(HWord));
+ if (sizeof(HWord) == 4)
+ return IRExpr_Const(IRConst_U32((UInt)hw));
+ if (sizeof(HWord) == 8)
+ return IRExpr_Const(IRConst_U64((ULong)hw));
+ vpanic("mkIRExpr_HWord");
+}
+
+IRDirty* unsafeIRDirty_0_N ( Int regparms, HChar* name, void* addr,
+ IRExpr** args )
+{
+ IRDirty* d = emptyIRDirty();
+ d->cee = mkIRCallee ( regparms, name, addr );
+ d->guard = IRExpr_Const(IRConst_U1(True));
+ d->args = args;
+ return d;
+}
+
+IRDirty* unsafeIRDirty_1_N ( IRTemp dst,
+ Int regparms, HChar* name, void* addr,
+ IRExpr** args )
+{
+ IRDirty* d = emptyIRDirty();
+ d->cee = mkIRCallee ( regparms, name, addr );
+ d->guard = IRExpr_Const(IRConst_U1(True));
+ d->args = args;
+ d->tmp = dst;
+ return d;
+}
+
+IRExpr* mkIRExprCCall ( IRType retty,
+ Int regparms, HChar* name, void* addr,
+ IRExpr** args )
+{
+ return IRExpr_CCall ( mkIRCallee ( regparms, name, addr ),
+ retty, args );
+}
+
+Bool eqIRAtom ( IRExpr* a1, IRExpr* a2 )
+{
+ vassert(isIRAtom(a1));
+ vassert(isIRAtom(a2));
+ if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
+ return toBool(a1->Iex.RdTmp.tmp == a2->Iex.RdTmp.tmp);
+ if (a1->tag == Iex_Const && a2->tag == Iex_Const)
+ return eqIRConst(a1->Iex.Const.con, a2->Iex.Const.con);
+ return False;
+}
+
+/*---------------------------------------------------------------*/
+/*--- end ir_defs.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/ir_match.c b/VEX/priv/ir_match.c
new file mode 100644
index 0000000..fc32f2e
--- /dev/null
+++ b/VEX/priv/ir_match.c
@@ -0,0 +1,111 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin ir_match.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+/* Provides a facility for doing IR tree matching. */
+
+#include "main_util.h"
+#include "ir_match.h"
+
+
+/* Assign a value to a binder. Checks for obvious stupidities. */
+
+static
+void setBindee ( MatchInfo* mi, Int n, IRExpr* bindee )
+{
+ if (n < 0 || n >= N_IRMATCH_BINDERS)
+ vpanic("setBindee: out of range index");
+ if (mi->bindee[n] != NULL)
+ vpanic("setBindee: bindee already set");
+ mi->bindee[n] = bindee;
+}
+
+
+/* This is the actual matching function, recursing over the pattern
+ and expression trees in the obvious way, and dumping any matches
+ found into 'mi'. */
+
+static
+Bool matchWrk ( MatchInfo* mi, IRExpr* p/*attern*/, IRExpr* e/*xpr*/ )
+{
+ switch (p->tag) {
+ case Iex_Binder: /* aha, what we were looking for. */
+ setBindee(mi, p->Iex.Binder.binder, e);
+ return True;
+ case Iex_Unop:
+ if (e->tag != Iex_Unop) return False;
+ if (p->Iex.Unop.op != e->Iex.Unop.op) return False;
+ if (!matchWrk(mi, p->Iex.Unop.arg, e->Iex.Unop.arg))
+ return False;
+ return True;
+ case Iex_Binop:
+ if (e->tag != Iex_Binop) return False;
+ if (p->Iex.Binop.op != e->Iex.Binop.op) return False;
+ if (!matchWrk(mi, p->Iex.Binop.arg1, e->Iex.Binop.arg1))
+ return False;
+ if (!matchWrk(mi, p->Iex.Binop.arg2, e->Iex.Binop.arg2))
+ return False;
+ return True;
+ case Iex_Load:
+ if (e->tag != Iex_Load) return False;
+ if (p->Iex.Load.end != e->Iex.Load.end) return False;
+ if (p->Iex.Load.ty != e->Iex.Load.ty) return False;
+ if (!matchWrk(mi, p->Iex.Load.addr, e->Iex.Load.addr))
+ return False;
+ return True;
+ case Iex_Const:
+ if (e->tag != Iex_Const) return False;
+ return eqIRConst(p->Iex.Const.con, e->Iex.Const.con);
+ default:
+ ppIRExpr(p);
+ vpanic("match");
+ }
+}
+
+
+/* Top level entry point to the matcher. */
+
+Bool matchIRExpr ( MatchInfo* mi, IRExpr* p/*attern*/, IRExpr* e/*xpr*/ )
+{
+ Int i;
+ for (i = 0; i < N_IRMATCH_BINDERS; i++)
+ mi->bindee[i] = NULL;
+ return matchWrk(mi, p, e);
+}
+
+
+
+/*---------------------------------------------------------------*/
+/*--- end ir_match.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/ir_match.h b/VEX/priv/ir_match.h
new file mode 100644
index 0000000..5755505
--- /dev/null
+++ b/VEX/priv/ir_match.h
@@ -0,0 +1,90 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin ir_match.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+/* Provides a facility for doing IR tree matching. */
+
+#ifndef __VEX_IR_MATCH_H
+#define __VEX_IR_MATCH_H
+
+#include "libvex_basictypes.h"
+#include "libvex_ir.h"
+
+
+/* Patterns are simply IRExpr* trees, with IRExpr_Binder nodes at the
+ leaves, indicating binding points. Use these magic macros to
+ declare and define patterns. */
+
+#define DECLARE_PATTERN(_patt) \
+ static IRExpr* _patt = NULL
+
+#define DEFINE_PATTERN(_patt,_expr) \
+ do { \
+ if (!(_patt)) { \
+ vassert(vexGetAllocMode() == VexAllocModeTEMP); \
+ vexSetAllocMode(VexAllocModePERM); \
+ _patt = (_expr); \
+ vexSetAllocMode(VexAllocModeTEMP); \
+ vassert(vexGetAllocMode() == VexAllocModeTEMP); \
+ } \
+ } while (0)
+
+
+/* This type returns the result of a match -- it records what
+ the binders got instantiated to. */
+
+#define N_IRMATCH_BINDERS 4
+
+typedef
+ struct {
+ IRExpr* bindee[N_IRMATCH_BINDERS];
+ }
+ MatchInfo;
+
+
+/* The matching function. p is expected to have zero or more
+ IRExpr_Binds in it, numbered 0, 1, 2 ... Returns True if a match
+ succeeded. */
+
+extern
+Bool matchIRExpr ( MatchInfo* mi, IRExpr* p/*attern*/, IRExpr* e/*xpr*/ );
+
+
+#endif /* ndef __VEX_IR_MATCH_H */
+
+
+
+/*---------------------------------------------------------------*/
+/*--- end ir_match.h ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/ir_opt.c b/VEX/priv/ir_opt.c
new file mode 100644
index 0000000..4730680
--- /dev/null
+++ b/VEX/priv/ir_opt.c
@@ -0,0 +1,4671 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin ir_opt.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+
+#include "main_util.h"
+#include "main_globals.h"
+#include "ir_opt.h"
+
+
+/* Set to 1 for lots of debugging output. */
+#define DEBUG_IROPT 0
+
+
+/* What iropt does, 29 Dec 04.
+
+ It takes an IRSB and produces a new one with the same meaning,
+ defined thus:
+
+ After execution of the new BB, all guest state and guest memory is
+ the same as after execution of the original. This is true
+ regardless of how the block was exited (at the end vs side exit).
+
+ In addition, parts of the guest state will be identical to that
+ created by execution of the original at the following observation
+ points:
+
+ * In a dirty helper call, any parts of the guest state that the
+ helper states that it reads or modifies will be up to date.
+ Also, guest memory will be up to date. Parts of the guest state
+ not marked as being read or modified by the helper cannot be
+ assumed to be up-to-date at the point where the helper is called.
+
+ * Immediately prior to any load or store, those parts of the guest
+ state marked as requiring precise exceptions will be up to date.
+ Also, guest memory will be up to date. Parts of the guest state
+ not marked as requiring precise exceptions cannot be assumed to
+ be up-to-date at the point of the load/store.
+
+ The relative order of loads and stores (including loads/stores of
+ guest memory done by dirty helpers annotated as such) is not
+ changed. However, the relative order of loads with no intervening
+ stores/modifies may be changed.
+
+ Transformation order
+ ~~~~~~~~~~~~~~~~~~~~
+
+ There are three levels of optimisation, controlled by
+ vex_control.iropt_level. Define first:
+
+ "Cheap transformations" are the following sequence:
+ * Redundant-Get removal
+ * Redundant-Put removal
+ * Constant propagation/folding
+ * Dead code removal
+ * Specialisation of clean helper functions
+ * Dead code removal
+
+ "Expensive transformations" are the following sequence:
+ * CSE
+ * Folding of add/sub chains
+ * Redundant-GetI removal
+ * Redundant-PutI removal
+ * Dead code removal
+
+ Then the transformations are as follows, as defined by
+ vex_control.iropt_level:
+
+ Level 0:
+ * Flatten into atomic form.
+
+ Level 1: the following sequence:
+ * Flatten into atomic form.
+ * Cheap transformations.
+
+ Level 2: the following sequence
+ * Flatten into atomic form.
+ * Cheap transformations.
+ * If block contains any floating or vector types, CSE.
+ * If block contains GetI or PutI, Expensive transformations.
+ * Try unrolling loops. Three possible outcomes:
+ - No effect: do nothing more.
+ - Unrolled a loop, and block does not contain GetI or PutI:
+ Do: * CSE
+ * Dead code removal
+ - Unrolled a loop, and block contains GetI or PutI:
+ Do: * Expensive transformations
+ * Cheap transformations
+*/
+
+/* Implementation notes, 29 Dec 04.
+
+ TODO (important): I think rPutI removal ignores precise exceptions
+ and is therefore in a sense, wrong. In the sense that PutIs are
+ assumed not to write parts of the guest state that we need to have
+ up-to-date at loads/stores. So far on x86 guest that has not
+ mattered since indeed only the x87 FP registers and tags are
+ accessed using GetI/PutI, and there is no need so far for them to
+ be up to date at mem exception points. The rPutI pass should be
+ fixed.
+
+ TODO: improve pessimistic handling of precise exceptions
+ in the tree builder.
+
+ TODO: check interaction of rGetI and dirty helpers.
+
+ F64i constants are treated differently from other constants.
+ They are not regarded as atoms, and instead lifted off and
+ bound to temps. This allows them to participate in CSE, which
+ is important for getting good performance for x86 guest code.
+
+ CSE up F64 literals (already doing F64is)
+
+ CSE: consider carefully the requirement for precise exns
+ prior to making CSE any more aggressive. */
+
+
+/*---------------------------------------------------------------*/
+/*--- Finite mappery, of a sort ---*/
+/*---------------------------------------------------------------*/
+
+/* General map from HWord-sized thing HWord-sized thing. Could be by
+ hashing, but it's not clear whether or not this would really be any
+ faster. */
+
+typedef
+ struct {
+ Bool* inuse;
+ HWord* key;
+ HWord* val;
+ Int size;
+ Int used;
+ }
+ HashHW;
+
+static HashHW* newHHW ( void )
+{
+ HashHW* h = LibVEX_Alloc(sizeof(HashHW));
+ h->size = 8;
+ h->used = 0;
+ h->inuse = LibVEX_Alloc(h->size * sizeof(Bool));
+ h->key = LibVEX_Alloc(h->size * sizeof(HWord));
+ h->val = LibVEX_Alloc(h->size * sizeof(HWord));
+ return h;
+}
+
+
+/* Look up key in the map. */
+
+static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
+{
+ Int i;
+ /* vex_printf("lookupHHW(%llx)\n", key ); */
+ for (i = 0; i < h->used; i++) {
+ if (h->inuse[i] && h->key[i] == key) {
+ if (val)
+ *val = h->val[i];
+ return True;
+ }
+ }
+ return False;
+}
+
+
+/* Add key->val to the map. Replaces any existing binding for key. */
+
+static void addToHHW ( HashHW* h, HWord key, HWord val )
+{
+ Int i, j;
+ /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
+
+ /* Find and replace existing binding, if any. */
+ for (i = 0; i < h->used; i++) {
+ if (h->inuse[i] && h->key[i] == key) {
+ h->val[i] = val;
+ return;
+ }
+ }
+
+ /* Ensure a space is available. */
+ if (h->used == h->size) {
+ /* Copy into arrays twice the size. */
+ Bool* inuse2 = LibVEX_Alloc(2 * h->size * sizeof(Bool));
+ HWord* key2 = LibVEX_Alloc(2 * h->size * sizeof(HWord));
+ HWord* val2 = LibVEX_Alloc(2 * h->size * sizeof(HWord));
+ for (i = j = 0; i < h->size; i++) {
+ if (!h->inuse[i]) continue;
+ inuse2[j] = True;
+ key2[j] = h->key[i];
+ val2[j] = h->val[i];
+ j++;
+ }
+ h->used = j;
+ h->size *= 2;
+ h->inuse = inuse2;
+ h->key = key2;
+ h->val = val2;
+ }
+
+ /* Finally, add it. */
+ vassert(h->used < h->size);
+ h->inuse[h->used] = True;
+ h->key[h->used] = key;
+ h->val[h->used] = val;
+ h->used++;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Flattening out a BB into atomic SSA form ---*/
+/*---------------------------------------------------------------*/
+
+/* Non-critical helper, heuristic for reducing the number of tmp-tmp
+ copies made by flattening. If in doubt return False. */
+
+static Bool isFlat ( IRExpr* e )
+{
+ if (e->tag == Iex_Get)
+ return True;
+ if (e->tag == Iex_Binop)
+ return toBool( isIRAtom(e->Iex.Binop.arg1)
+ && isIRAtom(e->Iex.Binop.arg2) );
+ if (e->tag == Iex_Load)
+ return isIRAtom(e->Iex.Load.addr);
+ return False;
+}
+
+/* Flatten out 'ex' so it is atomic, returning a new expression with
+ the same value, after having appended extra IRTemp assignments to
+ the end of 'bb'. */
+
+static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
+{
+ Int i;
+ IRExpr** newargs;
+ IRType ty = typeOfIRExpr(bb->tyenv, ex);
+ IRTemp t1;
+
+ switch (ex->tag) {
+
+ case Iex_GetI:
+ t1 = newIRTemp(bb->tyenv, ty);
+ addStmtToIRSB(bb, IRStmt_WrTmp(t1,
+ IRExpr_GetI(ex->Iex.GetI.descr,
+ flatten_Expr(bb, ex->Iex.GetI.ix),
+ ex->Iex.GetI.bias)));
+ return IRExpr_RdTmp(t1);
+
+ case Iex_Get:
+ t1 = newIRTemp(bb->tyenv, ty);
+ addStmtToIRSB(bb,
+ IRStmt_WrTmp(t1, ex));
+ return IRExpr_RdTmp(t1);
+
+ case Iex_Qop:
+ t1 = newIRTemp(bb->tyenv, ty);
+ addStmtToIRSB(bb, IRStmt_WrTmp(t1,
+ IRExpr_Qop(ex->Iex.Qop.op,
+ flatten_Expr(bb, ex->Iex.Qop.arg1),
+ flatten_Expr(bb, ex->Iex.Qop.arg2),
+ flatten_Expr(bb, ex->Iex.Qop.arg3),
+ flatten_Expr(bb, ex->Iex.Qop.arg4))));
+ return IRExpr_RdTmp(t1);
+
+ case Iex_Triop:
+ t1 = newIRTemp(bb->tyenv, ty);
+ addStmtToIRSB(bb, IRStmt_WrTmp(t1,
+ IRExpr_Triop(ex->Iex.Triop.op,
+ flatten_Expr(bb, ex->Iex.Triop.arg1),
+ flatten_Expr(bb, ex->Iex.Triop.arg2),
+ flatten_Expr(bb, ex->Iex.Triop.arg3))));
+ return IRExpr_RdTmp(t1);
+
+ case Iex_Binop:
+ t1 = newIRTemp(bb->tyenv, ty);
+ addStmtToIRSB(bb, IRStmt_WrTmp(t1,
+ IRExpr_Binop(ex->Iex.Binop.op,
+ flatten_Expr(bb, ex->Iex.Binop.arg1),
+ flatten_Expr(bb, ex->Iex.Binop.arg2))));
+ return IRExpr_RdTmp(t1);
+
+ case Iex_Unop:
+ t1 = newIRTemp(bb->tyenv, ty);
+ addStmtToIRSB(bb, IRStmt_WrTmp(t1,
+ IRExpr_Unop(ex->Iex.Unop.op,
+ flatten_Expr(bb, ex->Iex.Unop.arg))));
+ return IRExpr_RdTmp(t1);
+
+ case Iex_Load:
+ t1 = newIRTemp(bb->tyenv, ty);
+ addStmtToIRSB(bb, IRStmt_WrTmp(t1,
+ IRExpr_Load(ex->Iex.Load.end,
+ ex->Iex.Load.ty,
+ flatten_Expr(bb, ex->Iex.Load.addr))));
+ return IRExpr_RdTmp(t1);
+
+ case Iex_CCall:
+ newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
+ for (i = 0; newargs[i]; i++)
+ newargs[i] = flatten_Expr(bb, newargs[i]);
+ t1 = newIRTemp(bb->tyenv, ty);
+ addStmtToIRSB(bb, IRStmt_WrTmp(t1,
+ IRExpr_CCall(ex->Iex.CCall.cee,
+ ex->Iex.CCall.retty,
+ newargs)));
+ return IRExpr_RdTmp(t1);
+
+ case Iex_Mux0X:
+ t1 = newIRTemp(bb->tyenv, ty);
+ addStmtToIRSB(bb, IRStmt_WrTmp(t1,
+ IRExpr_Mux0X(flatten_Expr(bb, ex->Iex.Mux0X.cond),
+ flatten_Expr(bb, ex->Iex.Mux0X.expr0),
+ flatten_Expr(bb, ex->Iex.Mux0X.exprX))));
+ return IRExpr_RdTmp(t1);
+
+ case Iex_Const:
+ /* Lift F64i constants out onto temps so they can be CSEd
+ later. */
+ if (ex->Iex.Const.con->tag == Ico_F64i) {
+ t1 = newIRTemp(bb->tyenv, ty);
+ addStmtToIRSB(bb, IRStmt_WrTmp(t1,
+ IRExpr_Const(ex->Iex.Const.con)));
+ return IRExpr_RdTmp(t1);
+ } else {
+ /* Leave all other constants alone. */
+ return ex;
+ }
+
+ case Iex_RdTmp:
+ return ex;
+
+ default:
+ vex_printf("\n");
+ ppIRExpr(ex);
+ vex_printf("\n");
+ vpanic("flatten_Expr");
+ }
+}
+
+
+/* Append a completely flattened form of 'st' to the end of 'bb'. */
+
+static void flatten_Stmt ( IRSB* bb, IRStmt* st )
+{
+ Int i;
+ IRExpr *e1, *e2, *e3, *e4, *e5;
+ IRDirty *d, *d2;
+ IRCAS *cas, *cas2;
+ switch (st->tag) {
+ case Ist_Put:
+ if (isIRAtom(st->Ist.Put.data)) {
+ /* optimisation to reduce the amount of heap wasted
+ by the flattener */
+ addStmtToIRSB(bb, st);
+ } else {
+ /* general case, always correct */
+ e1 = flatten_Expr(bb, st->Ist.Put.data);
+ addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
+ }
+ break;
+ case Ist_PutI:
+ e1 = flatten_Expr(bb, st->Ist.PutI.ix);
+ e2 = flatten_Expr(bb, st->Ist.PutI.data);
+ addStmtToIRSB(bb, IRStmt_PutI(st->Ist.PutI.descr,
+ e1,
+ st->Ist.PutI.bias,
+ e2));
+ break;
+ case Ist_WrTmp:
+ if (isFlat(st->Ist.WrTmp.data)) {
+ /* optimisation, to reduce the number of tmp-tmp
+ copies generated */
+ addStmtToIRSB(bb, st);
+ } else {
+ /* general case, always correct */
+ e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
+ addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
+ }
+ break;
+ case Ist_Store:
+ e1 = flatten_Expr(bb, st->Ist.Store.addr);
+ e2 = flatten_Expr(bb, st->Ist.Store.data);
+ addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
+ break;
+ case Ist_CAS:
+ cas = st->Ist.CAS.details;
+ e1 = flatten_Expr(bb, cas->addr);
+ e2 = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
+ e3 = flatten_Expr(bb, cas->expdLo);
+ e4 = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
+ e5 = flatten_Expr(bb, cas->dataLo);
+ cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
+ e1, e2, e3, e4, e5 );
+ addStmtToIRSB(bb, IRStmt_CAS(cas2));
+ break;
+ case Ist_LLSC:
+ e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
+ e2 = st->Ist.LLSC.storedata
+ ? flatten_Expr(bb, st->Ist.LLSC.storedata)
+ : NULL;
+ addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
+ st->Ist.LLSC.result, e1, e2));
+ break;
+ case Ist_Dirty:
+ d = st->Ist.Dirty.details;
+ d2 = emptyIRDirty();
+ *d2 = *d;
+ d2->args = shallowCopyIRExprVec(d2->args);
+ if (d2->mFx != Ifx_None) {
+ d2->mAddr = flatten_Expr(bb, d2->mAddr);
+ } else {
+ vassert(d2->mAddr == NULL);
+ }
+ d2->guard = flatten_Expr(bb, d2->guard);
+ for (i = 0; d2->args[i]; i++)
+ d2->args[i] = flatten_Expr(bb, d2->args[i]);
+ addStmtToIRSB(bb, IRStmt_Dirty(d2));
+ break;
+ case Ist_NoOp:
+ case Ist_MBE:
+ case Ist_IMark:
+ addStmtToIRSB(bb, st);
+ break;
+ case Ist_AbiHint:
+ e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
+ e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
+ addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
+ break;
+ case Ist_Exit:
+ e1 = flatten_Expr(bb, st->Ist.Exit.guard);
+ addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
+ st->Ist.Exit.dst));
+ break;
+ default:
+ vex_printf("\n");
+ ppIRStmt(st);
+ vex_printf("\n");
+ vpanic("flatten_Stmt");
+ }
+}
+
+
+static IRSB* flatten_BB ( IRSB* in )
+{
+ Int i;
+ IRSB* out;
+ out = emptyIRSB();
+ out->tyenv = deepCopyIRTypeEnv( in->tyenv );
+ for (i = 0; i < in->stmts_used; i++)
+ if (in->stmts[i])
+ flatten_Stmt( out, in->stmts[i] );
+ out->next = flatten_Expr( out, in->next );
+ out->jumpkind = in->jumpkind;
+ return out;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- In-place removal of redundant GETs ---*/
+/*---------------------------------------------------------------*/
+
+/* Scan forwards, building up an environment binding (min offset, max
+ offset) pairs to values, which will either be temps or constants.
+
+ On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
+ env and if it matches, replace the Get with the stored value. If
+ there is no match, add a (minoff,maxoff) :-> t binding.
+
+ On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
+ any binding which fully or partially overlaps with (minoff,maxoff).
+ Then add a new (minoff,maxoff) :-> t or c binding. */
+
+/* Extract the min/max offsets from a guest state array descriptor. */
+
+inline
+static void getArrayBounds ( IRRegArray* descr,
+ UInt* minoff, UInt* maxoff )
+{
+ *minoff = descr->base;
+ *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
+ vassert((*minoff & ~0xFFFF) == 0);
+ vassert((*maxoff & ~0xFFFF) == 0);
+ vassert(*minoff <= *maxoff);
+}
+
+/* Create keys, of the form ((minoffset << 16) | maxoffset). */
+
+static UInt mk_key_GetPut ( Int offset, IRType ty )
+{
+ /* offset should fit in 16 bits. */
+ UInt minoff = offset;
+ UInt maxoff = minoff + sizeofIRType(ty) - 1;
+ vassert((minoff & ~0xFFFF) == 0);
+ vassert((maxoff & ~0xFFFF) == 0);
+ return (minoff << 16) | maxoff;
+}
+
+static UInt mk_key_GetIPutI ( IRRegArray* descr )
+{
+ UInt minoff, maxoff;
+ getArrayBounds( descr, &minoff, &maxoff );
+ vassert((minoff & ~0xFFFF) == 0);
+ vassert((maxoff & ~0xFFFF) == 0);
+ return (minoff << 16) | maxoff;
+}
+
+/* Supposing h has keys of the form generated by mk_key_GetPut and
+ mk_key_GetIPutI, invalidate any key which overlaps (k_lo
+ .. k_hi).
+*/
+static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
+{
+ Int j;
+ UInt e_lo, e_hi;
+ vassert(k_lo <= k_hi);
+ /* invalidate any env entries which in any way overlap (k_lo
+ .. k_hi) */
+ /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
+
+ for (j = 0; j < h->used; j++) {
+ if (!h->inuse[j])
+ continue;
+ e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
+ e_hi = ((UInt)h->key[j]) & 0xFFFF;
+ vassert(e_lo <= e_hi);
+ if (e_hi < k_lo || k_hi < e_lo)
+ continue; /* no overlap possible */
+ else
+ /* overlap; invalidate */
+ h->inuse[j] = False;
+ }
+}
+
+
+static void redundant_get_removal_BB ( IRSB* bb )
+{
+ HashHW* env = newHHW();
+ UInt key = 0; /* keep gcc -O happy */
+ Int i, j;
+ HWord val;
+
+ for (i = 0; i < bb->stmts_used; i++) {
+ IRStmt* st = bb->stmts[i];
+
+ if (st->tag == Ist_NoOp)
+ continue;
+
+ /* Deal with Gets */
+ if (st->tag == Ist_WrTmp
+ && st->Ist.WrTmp.data->tag == Iex_Get) {
+ /* st is 't = Get(...)'. Look up in the environment and see
+ if the Get can be replaced. */
+ IRExpr* get = st->Ist.WrTmp.data;
+ key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
+ get->Iex.Get.ty );
+ if (lookupHHW(env, &val, (HWord)key)) {
+ /* found it */
+ /* Note, we could do better here. If the types are
+ different we don't do the substitution, since doing so
+ could lead to invalidly-typed IR. An improvement would
+ be to stick in a reinterpret-style cast, although that
+ would make maintaining flatness more difficult. */
+ IRExpr* valE = (IRExpr*)val;
+ Bool typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
+ == st->Ist.WrTmp.data->Iex.Get.ty );
+ if (typesOK && DEBUG_IROPT) {
+ vex_printf("rGET: "); ppIRExpr(get);
+ vex_printf(" -> "); ppIRExpr(valE);
+ vex_printf("\n");
+ }
+ if (typesOK)
+ bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
+ } else {
+ /* Not found, but at least we know that t and the Get(...)
+ are now associated. So add a binding to reflect that
+ fact. */
+ addToHHW( env, (HWord)key,
+ (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
+ }
+ }
+
+ /* Deal with Puts: invalidate any env entries overlapped by this
+ Put */
+ if (st->tag == Ist_Put || st->tag == Ist_PutI) {
+ UInt k_lo, k_hi;
+ if (st->tag == Ist_Put) {
+ key = mk_key_GetPut( st->Ist.Put.offset,
+ typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
+ } else {
+ vassert(st->tag == Ist_PutI);
+ key = mk_key_GetIPutI( st->Ist.PutI.descr );
+ }
+
+ k_lo = (key >> 16) & 0xFFFF;
+ k_hi = key & 0xFFFF;
+ invalidateOverlaps(env, k_lo, k_hi);
+ }
+ else
+ if (st->tag == Ist_Dirty) {
+ /* Deal with dirty helpers which write or modify guest state.
+ Invalidate the entire env. We could do a lot better
+ here. */
+ IRDirty* d = st->Ist.Dirty.details;
+ Bool writes = False;
+ for (j = 0; j < d->nFxState; j++) {
+ if (d->fxState[j].fx == Ifx_Modify
+ || d->fxState[j].fx == Ifx_Write)
+ writes = True;
+ }
+ if (writes) {
+ /* dump the entire env (not clever, but correct ...) */
+ for (j = 0; j < env->used; j++)
+ env->inuse[j] = False;
+ if (0) vex_printf("rGET: trash env due to dirty helper\n");
+ }
+ }
+
+ /* add this one to the env, if appropriate */
+ if (st->tag == Ist_Put) {
+ vassert(isIRAtom(st->Ist.Put.data));
+ addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
+ }
+
+ } /* for (i = 0; i < bb->stmts_used; i++) */
+
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- In-place removal of redundant PUTs ---*/
+/*---------------------------------------------------------------*/
+
+/* Find any Get uses in st and invalidate any partially or fully
+ overlapping ranges listed in env. Due to the flattening phase, the
+ only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
+
+static void handle_gets_Stmt (
+ HashHW* env,
+ IRStmt* st,
+ Bool (*preciseMemExnsFn)(Int,Int)
+ )
+{
+ Int j;
+ UInt key = 0; /* keep gcc -O happy */
+ Bool isGet;
+ Bool memRW = False;
+ IRExpr* e;
+
+ switch (st->tag) {
+
+ /* This is the only interesting case. Deal with Gets in the RHS
+ expression. */
+ case Ist_WrTmp:
+ e = st->Ist.WrTmp.data;
+ switch (e->tag) {
+ case Iex_Get:
+ isGet = True;
+ key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
+ break;
+ case Iex_GetI:
+ isGet = True;
+ key = mk_key_GetIPutI ( e->Iex.GetI.descr );
+ break;
+ case Iex_Load:
+ isGet = False;
+ memRW = True;
+ break;
+ default:
+ isGet = False;
+ }
+ if (isGet) {
+ UInt k_lo, k_hi;
+ k_lo = (key >> 16) & 0xFFFF;
+ k_hi = key & 0xFFFF;
+ invalidateOverlaps(env, k_lo, k_hi);
+ }
+ break;
+
+ /* Be very conservative for dirty helper calls; dump the entire
+ environment. The helper might read guest state, in which
+ case it needs to be flushed first. Also, the helper might
+ access guest memory, in which case all parts of the guest
+ state requiring precise exceptions needs to be flushed. The
+ crude solution is just to flush everything; we could easily
+ enough do a lot better if needed. */
+ /* Probably also overly-conservative, but also dump everything
+ if we hit a memory bus event (fence, lock, unlock). Ditto
+ AbiHints, CASs, LLs and SCs. */
+ case Ist_AbiHint:
+ vassert(isIRAtom(st->Ist.AbiHint.base));
+ vassert(isIRAtom(st->Ist.AbiHint.nia));
+ /* fall through */
+ case Ist_MBE:
+ case Ist_Dirty:
+ case Ist_CAS:
+ case Ist_LLSC:
+ for (j = 0; j < env->used; j++)
+ env->inuse[j] = False;
+ break;
+
+ /* all other cases are boring. */
+ case Ist_Store:
+ vassert(isIRAtom(st->Ist.Store.addr));
+ vassert(isIRAtom(st->Ist.Store.data));
+ memRW = True;
+ break;
+
+ case Ist_Exit:
+ vassert(isIRAtom(st->Ist.Exit.guard));
+ break;
+
+ case Ist_PutI:
+ vassert(isIRAtom(st->Ist.PutI.ix));
+ vassert(isIRAtom(st->Ist.PutI.data));
+ break;
+
+ case Ist_NoOp:
+ case Ist_IMark:
+ break;
+
+ default:
+ vex_printf("\n");
+ ppIRStmt(st);
+ vex_printf("\n");
+ vpanic("handle_gets_Stmt");
+ }
+
+ if (memRW) {
+ /* This statement accesses memory. So we need to dump all parts
+ of the environment corresponding to guest state that may not
+ be reordered with respect to memory references. That means
+ at least the stack pointer. */
+ for (j = 0; j < env->used; j++) {
+ if (!env->inuse[j])
+ continue;
+ if (vex_control.iropt_precise_memory_exns) {
+ /* Precise exceptions required. Flush all guest state. */
+ env->inuse[j] = False;
+ } else {
+ /* Just flush the minimal amount required, as computed by
+ preciseMemExnsFn. */
+ HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
+ HWord k_hi = env->key[j] & 0xFFFF;
+ if (preciseMemExnsFn( k_lo, k_hi ))
+ env->inuse[j] = False;
+ }
+ }
+ } /* if (memRW) */
+
+}
+
+
+/* Scan backwards, building up a set of (min offset, max
+ offset) pairs, indicating those parts of the guest state
+ for which the next event is a write.
+
+ On seeing a conditional exit, empty the set.
+
+ On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
+ completely within the set, remove the Put. Otherwise, add
+ (minoff,maxoff) to the set.
+
+ On seeing 'Get (minoff,maxoff)', remove any part of the set
+ overlapping (minoff,maxoff). The same has to happen for any events
+ which implicitly read parts of the guest state: dirty helper calls
+ and loads/stores.
+*/
+
+static void redundant_put_removal_BB (
+ IRSB* bb,
+ Bool (*preciseMemExnsFn)(Int,Int)
+ )
+{
+ Int i, j;
+ Bool isPut;
+ IRStmt* st;
+ UInt key = 0; /* keep gcc -O happy */
+
+ HashHW* env = newHHW();
+ for (i = bb->stmts_used-1; i >= 0; i--) {
+ st = bb->stmts[i];
+
+ if (st->tag == Ist_NoOp)
+ continue;
+
+ /* Deal with conditional exits. */
+ if (st->tag == Ist_Exit) {
+ /* Since control may not get beyond this point, we must empty
+ out the set, since we can no longer claim that the next
+ event for any part of the guest state is definitely a
+ write. */
+ vassert(isIRAtom(st->Ist.Exit.guard));
+ for (j = 0; j < env->used; j++)
+ env->inuse[j] = False;
+ continue;
+ }
+
+ /* Deal with Puts */
+ switch (st->tag) {
+ case Ist_Put:
+ isPut = True;
+ key = mk_key_GetPut( st->Ist.Put.offset,
+ typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
+ vassert(isIRAtom(st->Ist.Put.data));
+ break;
+ case Ist_PutI:
+ isPut = True;
+ key = mk_key_GetIPutI( st->Ist.PutI.descr );
+ vassert(isIRAtom(st->Ist.PutI.ix));
+ vassert(isIRAtom(st->Ist.PutI.data));
+ break;
+ default:
+ isPut = False;
+ }
+ if (isPut && st->tag != Ist_PutI) {
+ /* See if any single entry in env overlaps this Put. This is
+ simplistic in that the transformation is valid if, say, two
+ or more entries in the env overlap this Put, but the use of
+ lookupHHW will only find a single entry which exactly
+ overlaps this Put. This is suboptimal but safe. */
+ if (lookupHHW(env, NULL, (HWord)key)) {
+ /* This Put is redundant because a later one will overwrite
+ it. So NULL (nop) it out. */
+ if (DEBUG_IROPT) {
+ vex_printf("rPUT: "); ppIRStmt(st);
+ vex_printf("\n");
+ }
+ bb->stmts[i] = IRStmt_NoOp();
+ } else {
+ /* We can't demonstrate that this Put is redundant, so add it
+ to the running collection. */
+ addToHHW(env, (HWord)key, 0);
+ }
+ continue;
+ }
+
+ /* Deal with Gets. These remove bits of the environment since
+ appearance of a Get means that the next event for that slice
+ of the guest state is no longer a write, but a read. Also
+ deals with implicit reads of guest state needed to maintain
+ precise exceptions. */
+ handle_gets_Stmt( env, st, preciseMemExnsFn );
+ }
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Constant propagation and folding ---*/
+/*---------------------------------------------------------------*/
+
+/* The env in this section is a map from IRTemp to IRExpr*,
+ that is, an array indexed by IRTemp. */
+
+/* Are both expressions simply the same IRTemp ? */
+static Bool sameIRTemps ( IRExpr* e1, IRExpr* e2 )
+{
+ return toBool( e1->tag == Iex_RdTmp
+ && e2->tag == Iex_RdTmp
+ && e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp );
+}
+
+static Bool sameIcoU32s ( IRExpr* e1, IRExpr* e2 )
+{
+ return toBool( e1->tag == Iex_Const
+ && e2->tag == Iex_Const
+ && e1->Iex.Const.con->tag == Ico_U32
+ && e2->Iex.Const.con->tag == Ico_U32
+ && e1->Iex.Const.con->Ico.U32
+ == e2->Iex.Const.con->Ico.U32 );
+}
+
+/* Are both expressions either the same IRTemp or IRConst-U32s ? If
+ in doubt, say No. */
+static Bool sameIRTempsOrIcoU32s ( IRExpr* e1, IRExpr* e2 )
+{
+ switch (e1->tag) {
+ case Iex_RdTmp:
+ return sameIRTemps(e1, e2);
+ case Iex_Const:
+ return sameIcoU32s(e1, e2);
+ default:
+ return False;
+ }
+}
+
+static Bool notBool ( Bool b )
+{
+ if (b == True) return False;
+ if (b == False) return True;
+ vpanic("notBool");
+}
+
+/* Make a zero which has the same type as the result of the given
+ primop. */
+static IRExpr* mkZeroOfPrimopResultType ( IROp op )
+{
+ switch (op) {
+ case Iop_Xor8: return IRExpr_Const(IRConst_U8(0));
+ case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
+ case Iop_Sub32:
+ case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
+ case Iop_Sub64:
+ case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
+ case Iop_XorV128: return IRExpr_Const(IRConst_V128(0));
+ default: vpanic("mkZeroOfPrimopResultType: bad primop");
+ }
+}
+
+/* Make a value containing all 1-bits, which has the same type as the
+ result of the given primop. */
+static IRExpr* mkOnesOfPrimopResultType ( IROp op )
+{
+ switch (op) {
+ case Iop_CmpEQ64:
+ return IRExpr_Const(IRConst_U1(toBool(1)));
+ case Iop_CmpEQ8x8:
+ return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
+ case Iop_CmpEQ8x16:
+ return IRExpr_Const(IRConst_V128(0xFFFF));
+ default:
+ vpanic("mkOnesOfPrimopResultType: bad primop");
+ }
+}
+
+
+static IRExpr* fold_Expr ( IRExpr* e )
+{
+ Int shift;
+ IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
+
+ /* UNARY ops */
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.arg->tag == Iex_Const) {
+ switch (e->Iex.Unop.op) {
+ case Iop_1Uto8:
+ e2 = IRExpr_Const(IRConst_U8(toUChar(
+ e->Iex.Unop.arg->Iex.Const.con->Ico.U1
+ ? 1 : 0)));
+ break;
+ case Iop_1Uto32:
+ e2 = IRExpr_Const(IRConst_U32(
+ e->Iex.Unop.arg->Iex.Const.con->Ico.U1
+ ? 1 : 0));
+ break;
+ case Iop_1Uto64:
+ e2 = IRExpr_Const(IRConst_U64(
+ e->Iex.Unop.arg->Iex.Const.con->Ico.U1
+ ? 1 : 0));
+ break;
+
+ case Iop_1Sto8:
+ e2 = IRExpr_Const(IRConst_U8(toUChar(
+ e->Iex.Unop.arg->Iex.Const.con->Ico.U1
+ ? 0xFF : 0)));
+ break;
+ case Iop_1Sto16:
+ e2 = IRExpr_Const(IRConst_U16(toUShort(
+ e->Iex.Unop.arg->Iex.Const.con->Ico.U1
+ ? 0xFFFF : 0)));
+ break;
+ case Iop_1Sto32:
+ e2 = IRExpr_Const(IRConst_U32(
+ e->Iex.Unop.arg->Iex.Const.con->Ico.U1
+ ? 0xFFFFFFFF : 0));
+ break;
+ case Iop_1Sto64:
+ e2 = IRExpr_Const(IRConst_U64(
+ e->Iex.Unop.arg->Iex.Const.con->Ico.U1
+ ? 0xFFFFFFFFFFFFFFFFULL : 0));
+ break;
+
+ case Iop_8Sto32: {
+ /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
+ s32 <<= 24;
+ s32 >>= 24;
+ e2 = IRExpr_Const(IRConst_U32((UInt)s32));
+ break;
+ }
+ case Iop_8Uto64:
+ e2 = IRExpr_Const(IRConst_U64(
+ 0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
+ break;
+ case Iop_16Uto64:
+ e2 = IRExpr_Const(IRConst_U64(
+ 0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
+ break;
+ case Iop_8Uto32:
+ e2 = IRExpr_Const(IRConst_U32(
+ 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
+ break;
+ case Iop_16Uto32:
+ e2 = IRExpr_Const(IRConst_U32(
+ 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
+ break;
+ case Iop_32to16:
+ e2 = IRExpr_Const(IRConst_U16(toUShort(
+ 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
+ break;
+ case Iop_32to8:
+ e2 = IRExpr_Const(IRConst_U8(toUChar(
+ 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
+ break;
+ case Iop_32to1:
+ e2 = IRExpr_Const(IRConst_U1(toBool(
+ 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
+ )));
+ break;
+ case Iop_64to1:
+ e2 = IRExpr_Const(IRConst_U1(toBool(
+ 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
+ )));
+ break;
+
+ case Iop_Not64:
+ e2 = IRExpr_Const(IRConst_U64(
+ ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
+ break;
+ case Iop_Not32:
+ e2 = IRExpr_Const(IRConst_U32(
+ ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
+ break;
+ case Iop_Not16:
+ e2 = IRExpr_Const(IRConst_U16(toUShort(
+ ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
+ break;
+ case Iop_Not8:
+ e2 = IRExpr_Const(IRConst_U8(toUChar(
+ ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
+ break;
+
+ case Iop_Not1:
+ e2 = IRExpr_Const(IRConst_U1(
+ notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
+ break;
+
+ case Iop_64to8: {
+ ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
+ w64 &= 0xFFULL;
+ e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
+ break;
+ }
+ case Iop_64to16: {
+ ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
+ w64 &= 0xFFFFULL;
+ e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
+ break;
+ }
+ case Iop_64to32: {
+ ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
+ w64 &= 0x00000000FFFFFFFFULL;
+ e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
+ break;
+ }
+ case Iop_64HIto32: {
+ ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
+ w64 >>= 32;
+ e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
+ break;
+ }
+ case Iop_32Uto64:
+ e2 = IRExpr_Const(IRConst_U64(
+ 0xFFFFFFFFULL
+ & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
+ break;
+ case Iop_32Sto64: {
+ /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
+ s64 <<= 32;
+ s64 >>= 32;
+ e2 = IRExpr_Const(IRConst_U64((ULong)s64));
+ break;
+ }
+ case Iop_CmpNEZ8:
+ e2 = IRExpr_Const(IRConst_U1(toBool(
+ 0 !=
+ (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
+ )));
+ break;
+ case Iop_CmpNEZ32:
+ e2 = IRExpr_Const(IRConst_U1(toBool(
+ 0 !=
+ (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
+ )));
+ break;
+ case Iop_CmpNEZ64:
+ e2 = IRExpr_Const(IRConst_U1(toBool(
+ 0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
+ )));
+ break;
+
+ case Iop_CmpwNEZ32: {
+ UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
+ if (w32 == 0)
+ e2 = IRExpr_Const(IRConst_U32( 0 ));
+ else
+ e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
+ break;
+ }
+ case Iop_CmpwNEZ64: {
+ ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
+ if (w64 == 0)
+ e2 = IRExpr_Const(IRConst_U64( 0 ));
+ else
+ e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
+ break;
+ }
+
+ case Iop_Left32: {
+ UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
+ Int s32 = (Int)(u32 & 0xFFFFFFFF);
+ s32 = (s32 | (-s32));
+ e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
+ break;
+ }
+
+ case Iop_Left64: {
+ ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
+ Long s64 = (Long)u64;
+ s64 = (s64 | (-s64));
+ e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
+ break;
+ }
+
+ default:
+ goto unhandled;
+ }
+ }
+
+ /* BINARY ops */
+ if (e->tag == Iex_Binop) {
+ if (e->Iex.Binop.arg1->tag == Iex_Const
+ && e->Iex.Binop.arg2->tag == Iex_Const) {
+ /* cases where both args are consts */
+ switch (e->Iex.Binop.op) {
+
+ /* -- Or -- */
+ case Iop_Or8:
+ e2 = IRExpr_Const(IRConst_U8(toUChar(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
+ | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
+ break;
+ case Iop_Or16:
+ e2 = IRExpr_Const(IRConst_U16(toUShort(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
+ | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
+ break;
+ case Iop_Or32:
+ e2 = IRExpr_Const(IRConst_U32(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
+ | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
+ break;
+ case Iop_Or64:
+ e2 = IRExpr_Const(IRConst_U64(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
+ | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
+ break;
+
+ /* -- Xor -- */
+ case Iop_Xor8:
+ e2 = IRExpr_Const(IRConst_U8(toUChar(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
+ ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
+ break;
+ case Iop_Xor16:
+ e2 = IRExpr_Const(IRConst_U16(toUShort(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
+ ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
+ break;
+ case Iop_Xor32:
+ e2 = IRExpr_Const(IRConst_U32(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
+ ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
+ break;
+ case Iop_Xor64:
+ e2 = IRExpr_Const(IRConst_U64(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
+ ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
+ break;
+
+ /* -- And -- */
+ case Iop_And8:
+ e2 = IRExpr_Const(IRConst_U8(toUChar(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
+ & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
+ break;
+ case Iop_And32:
+ e2 = IRExpr_Const(IRConst_U32(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
+ & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
+ break;
+ case Iop_And64:
+ e2 = IRExpr_Const(IRConst_U64(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
+ & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
+ break;
+
+ /* -- Add -- */
+ case Iop_Add8:
+ e2 = IRExpr_Const(IRConst_U8(toUChar(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
+ + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
+ break;
+ case Iop_Add32:
+ e2 = IRExpr_Const(IRConst_U32(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
+ + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
+ break;
+ case Iop_Add64:
+ e2 = IRExpr_Const(IRConst_U64(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
+ + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
+ break;
+
+ /* -- Sub -- */
+ case Iop_Sub8:
+ e2 = IRExpr_Const(IRConst_U8(toUChar(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
+ - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
+ break;
+ case Iop_Sub32:
+ e2 = IRExpr_Const(IRConst_U32(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
+ - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
+ break;
+ case Iop_Sub64:
+ e2 = IRExpr_Const(IRConst_U64(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
+ - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
+ break;
+
+ /* -- Max32U -- */
+ case Iop_Max32U: {
+ UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
+ UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
+ UInt res = u32a > u32b ? u32a : u32b;
+ e2 = IRExpr_Const(IRConst_U32(res));
+ break;
+ }
+
+ /* -- Mul -- */
+ case Iop_Mul32:
+ e2 = IRExpr_Const(IRConst_U32(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
+ * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
+ break;
+ case Iop_Mul64:
+ e2 = IRExpr_Const(IRConst_U64(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
+ * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
+ break;
+
+ case Iop_MullS32: {
+ /* very paranoid */
+ UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
+ UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
+ Int s32a = (Int)u32a;
+ Int s32b = (Int)u32b;
+ Long s64a = (Long)s32a;
+ Long s64b = (Long)s32b;
+ Long sres = s64a * s64b;
+ ULong ures = (ULong)sres;
+ e2 = IRExpr_Const(IRConst_U64(ures));
+ break;
+ }
+
+ /* -- Shl -- */
+ case Iop_Shl32:
+ vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+ shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
+ if (shift >= 0 && shift <= 31)
+ e2 = IRExpr_Const(IRConst_U32(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
+ << shift)));
+ break;
+ case Iop_Shl64:
+ vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+ shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
+ if (shift >= 0 && shift <= 63)
+ e2 = IRExpr_Const(IRConst_U64(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
+ << shift)));
+ break;
+
+ /* -- Sar -- */
+ case Iop_Sar32: {
+ /* paranoid ... */
+ /*signed*/ Int s32;
+ vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+ s32 = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
+ shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
+ if (shift >= 0 && shift <= 31) {
+ s32 >>=/*signed*/ shift;
+ e2 = IRExpr_Const(IRConst_U32((UInt)s32));
+ }
+ break;
+ }
+ case Iop_Sar64: {
+ /* paranoid ... */
+ /*signed*/ Long s64;
+ vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+ s64 = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
+ shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
+ if (shift >= 0 && shift <= 63) {
+ s64 >>=/*signed*/ shift;
+ e2 = IRExpr_Const(IRConst_U64((ULong)s64));
+ }
+ break;
+ }
+
+ /* -- Shr -- */
+ case Iop_Shr32: {
+ /* paranoid ... */
+ /*unsigned*/ UInt u32;
+ vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+ u32 = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
+ shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
+ if (shift >= 0 && shift <= 31) {
+ u32 >>=/*unsigned*/ shift;
+ e2 = IRExpr_Const(IRConst_U32(u32));
+ }
+ break;
+ }
+ case Iop_Shr64: {
+ /* paranoid ... */
+ /*unsigned*/ ULong u64;
+ vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+ u64 = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
+ shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
+ if (shift >= 0 && shift <= 63) {
+ u64 >>=/*unsigned*/ shift;
+ e2 = IRExpr_Const(IRConst_U64(u64));
+ }
+ break;
+ }
+
+ /* -- CmpEQ -- */
+ case Iop_CmpEQ32:
+ e2 = IRExpr_Const(IRConst_U1(toBool(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
+ == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
+ break;
+ case Iop_CmpEQ64:
+ e2 = IRExpr_Const(IRConst_U1(toBool(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
+ == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
+ break;
+
+ /* -- CmpNE -- */
+ case Iop_CmpNE8:
+ e2 = IRExpr_Const(IRConst_U1(toBool(
+ ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
+ != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
+ break;
+ case Iop_CmpNE32:
+ e2 = IRExpr_Const(IRConst_U1(toBool(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
+ != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
+ break;
+ case Iop_CmpNE64:
+ e2 = IRExpr_Const(IRConst_U1(toBool(
+ (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
+ != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
+ break;
+
+ /* -- CmpLEU -- */
+ case Iop_CmpLE32U:
+ e2 = IRExpr_Const(IRConst_U1(toBool(
+ ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
+ <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
+ break;
+
+ /* -- CmpLES -- */
+ case Iop_CmpLE32S:
+ e2 = IRExpr_Const(IRConst_U1(toBool(
+ ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
+ <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
+ break;
+
+ /* -- CmpLTS -- */
+ case Iop_CmpLT32S:
+ e2 = IRExpr_Const(IRConst_U1(toBool(
+ ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
+ < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
+ break;
+
+ /* -- CmpLTU -- */
+ case Iop_CmpLT32U:
+ e2 = IRExpr_Const(IRConst_U1(toBool(
+ ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
+ < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
+ break;
+
+ /* -- CmpORD -- */
+ case Iop_CmpORD32S: {
+ /* very paranoid */
+ UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
+ UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
+ Int s32a = (Int)u32a;
+ Int s32b = (Int)u32b;
+ Int r = 0x2; /* EQ */
+ if (s32a < s32b) {
+ r = 0x8; /* LT */
+ }
+ else if (s32a > s32b) {
+ r = 0x4; /* GT */
+ }
+ e2 = IRExpr_Const(IRConst_U32(r));
+ break;
+ }
+
+ /* -- nHLto2n -- */
+ case Iop_32HLto64:
+ e2 = IRExpr_Const(IRConst_U64(
+ (((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)) << 32)
+ | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
+ ));
+ break;
+ case Iop_64HLto128:
+ /* We can't fold this, because there is no way to
+ express he result in IR, but at least pretend to
+ handle it, so as to stop getting blasted with
+ no-rule-for-this-primop messages. */
+ break;
+
+ default:
+ goto unhandled;
+ }
+
+ } else {
+
+ /* other cases (identities, etc) */
+
+ /* Shl64/Shr64(x,0) ==> x */
+ if ((e->Iex.Binop.op == Iop_Shl64 || e->Iex.Binop.op == Iop_Shr64)
+ && e->Iex.Binop.arg2->tag == Iex_Const
+ && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
+ e2 = e->Iex.Binop.arg1;
+ } else
+
+ /* Shl32/Shr32(x,0) ==> x */
+ if ((e->Iex.Binop.op == Iop_Shl32 || e->Iex.Binop.op == Iop_Shr32)
+ && e->Iex.Binop.arg2->tag == Iex_Const
+ && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
+ e2 = e->Iex.Binop.arg1;
+ } else
+
+ /* Or8(x,0) ==> x */
+ if ((e->Iex.Binop.op == Iop_Or8)
+ && e->Iex.Binop.arg2->tag == Iex_Const
+ && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
+ e2 = e->Iex.Binop.arg1;
+ } else
+
+ /* Or16(x,0) ==> x */
+ if ((e->Iex.Binop.op == Iop_Or16)
+ && e->Iex.Binop.arg2->tag == Iex_Const
+ && e->Iex.Binop.arg2->Iex.Const.con->Ico.U16 == 0) {
+ e2 = e->Iex.Binop.arg1;
+ } else
+
+ /* Or32/Add32/Max32U(x,0) ==> x */
+ if ((e->Iex.Binop.op == Iop_Add32
+ || e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U)
+ && e->Iex.Binop.arg2->tag == Iex_Const
+ && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0) {
+ e2 = e->Iex.Binop.arg1;
+ } else
+
+ /* Add32(t,t) ==> t << 1. Memcheck doesn't understand that
+ x+x produces a defined least significant bit, and it seems
+ simplest just to get rid of the problem by rewriting it
+ out, since the opportunity to do so exists. */
+ if (e->Iex.Binop.op == Iop_Add32
+ && e->Iex.Binop.arg1->tag == Iex_RdTmp
+ && e->Iex.Binop.arg2->tag == Iex_RdTmp
+ && e->Iex.Binop.arg1->Iex.RdTmp.tmp
+ == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
+ e2 = IRExpr_Binop(Iop_Shl32,
+ e->Iex.Binop.arg1,
+ IRExpr_Const(IRConst_U8(1)));
+ } else
+
+ /* Add64(t,t) ==> t << 1; rationale as for Add32(t,t) above. */
+ if (e->Iex.Binop.op == Iop_Add64
+ && e->Iex.Binop.arg1->tag == Iex_RdTmp
+ && e->Iex.Binop.arg2->tag == Iex_RdTmp
+ && e->Iex.Binop.arg1->Iex.RdTmp.tmp
+ == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
+ e2 = IRExpr_Binop(Iop_Shl64,
+ e->Iex.Binop.arg1,
+ IRExpr_Const(IRConst_U8(1)));
+ } else
+
+ /* Add8(t,t) ==> t << 1; rationale as for Add32(t,t) above. */
+ if (e->Iex.Binop.op == Iop_Add8
+ && e->Iex.Binop.arg1->tag == Iex_RdTmp
+ && e->Iex.Binop.arg2->tag == Iex_RdTmp
+ && e->Iex.Binop.arg1->Iex.RdTmp.tmp
+ == e->Iex.Binop.arg2->Iex.RdTmp.tmp) {
+ e2 = IRExpr_Binop(Iop_Shl8,
+ e->Iex.Binop.arg1,
+ IRExpr_Const(IRConst_U8(1)));
+ } else
+ /* NB no Add16(t,t) case yet as no known test case exists */
+
+ /* Or64/Add64(x,0) ==> x */
+ if ((e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Or64)
+ && e->Iex.Binop.arg2->tag == Iex_Const
+ && e->Iex.Binop.arg2->Iex.Const.con->Ico.U64 == 0) {
+ e2 = e->Iex.Binop.arg1;
+ } else
+
+ /* And32(x,0xFFFFFFFF) ==> x */
+ if (e->Iex.Binop.op == Iop_And32
+ && e->Iex.Binop.arg2->tag == Iex_Const
+ && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0xFFFFFFFF) {
+ e2 = e->Iex.Binop.arg1;
+ } else
+
+ /* And32(x,0) ==> 0 */
+ if (e->Iex.Binop.op == Iop_And32
+ && e->Iex.Binop.arg2->tag == Iex_Const
+ && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0) {
+ e2 = IRExpr_Const(IRConst_U32(0));
+ } else
+
+ /* And32/Shl32(0,x) ==> 0 */
+ if ((e->Iex.Binop.op == Iop_And32 || e->Iex.Binop.op == Iop_Shl32)
+ && e->Iex.Binop.arg1->tag == Iex_Const
+ && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) {
+ e2 = IRExpr_Const(IRConst_U32(0));
+ } else
+
+ /* Or8(0,x) ==> x */
+ if (e->Iex.Binop.op == Iop_Or8
+ && e->Iex.Binop.arg1->tag == Iex_Const
+ && e->Iex.Binop.arg1->Iex.Const.con->Ico.U8 == 0) {
+ e2 = e->Iex.Binop.arg2;
+ } else
+
+ /* Or32/Max32U(0,x) ==> x */
+ if ((e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U)
+ && e->Iex.Binop.arg1->tag == Iex_Const
+ && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) {
+ e2 = e->Iex.Binop.arg2;
+ } else
+
+ /* Or64(0,x) ==> x */
+ if (e->Iex.Binop.op == Iop_Or64
+ && e->Iex.Binop.arg1->tag == Iex_Const
+ && e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 == 0) {
+ e2 = e->Iex.Binop.arg2;
+ } else
+
+ /* Or8/16/32/64/V128(t,t) ==> t, for some IRTemp t */
+ /* And8/16/32/64(t,t) ==> t, for some IRTemp t */
+ /* Max32U(t,t) ==> t, for some IRTemp t */
+ switch (e->Iex.Binop.op) {
+ case Iop_And64: case Iop_And32:
+ case Iop_And16: case Iop_And8:
+ case Iop_Or64: case Iop_Or32:
+ case Iop_Or16: case Iop_Or8: case Iop_OrV128:
+ case Iop_Max32U:
+ if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
+ e2 = e->Iex.Binop.arg1;
+ break;
+ default:
+ break;
+ }
+
+ /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
+ /* Sub32/64(t,t) ==> 0, for some IRTemp t */
+ switch (e->Iex.Binop.op) {
+ case Iop_Xor64: case Iop_Xor32:
+ case Iop_Xor16: case Iop_Xor8:
+ case Iop_XorV128:
+ case Iop_Sub64: case Iop_Sub32:
+ if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
+ e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
+ break;
+ default:
+ break;
+ }
+
+ switch (e->Iex.Binop.op) {
+ case Iop_CmpEQ64:
+ case Iop_CmpEQ8x8:
+ case Iop_CmpEQ8x16:
+ if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2))
+ e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
+ break;
+ default:
+ break;
+ }
+
+ }
+ }
+
+ /* Mux0X */
+ if (e->tag == Iex_Mux0X) {
+ /* is the discriminant is a constant? */
+ if (e->Iex.Mux0X.cond->tag == Iex_Const) {
+ Bool zero;
+ /* assured us by the IR type rules */
+ vassert(e->Iex.Mux0X.cond->Iex.Const.con->tag == Ico_U8);
+ zero = toBool(0 == (0xFF & e->Iex.Mux0X.cond
+ ->Iex.Const.con->Ico.U8));
+ e2 = zero ? e->Iex.Mux0X.expr0 : e->Iex.Mux0X.exprX;
+ }
+ else
+ /* are the arms identical? (pretty weedy test) */
+ if (sameIRTempsOrIcoU32s(e->Iex.Mux0X.expr0,
+ e->Iex.Mux0X.exprX)) {
+ e2 = e->Iex.Mux0X.expr0;
+ }
+ }
+
+ /* Show cases where we've found but not folded 'op(t,t)'. */
+ if (0 && e == e2 && e->tag == Iex_Binop
+ && sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
+ vex_printf("IDENT: ");
+ ppIRExpr(e); vex_printf("\n");
+ }
+
+ /* Show the overall results of folding. */
+ if (DEBUG_IROPT && e2 != e) {
+ vex_printf("FOLD: ");
+ ppIRExpr(e); vex_printf(" -> ");
+ ppIRExpr(e2); vex_printf("\n");
+ }
+
+ return e2;
+
+ unhandled:
+# if 0
+ vex_printf("\n\n");
+ ppIRExpr(e);
+ vpanic("fold_Expr: no rule for the above");
+# else
+ if (vex_control.iropt_verbosity > 0) {
+ vex_printf("vex iropt: fold_Expr: no rule for: ");
+ ppIRExpr(e);
+ vex_printf("\n");
+ }
+ return e2;
+# endif
+}
+
+
+/* Apply the subst to a simple 1-level expression -- guaranteed to be
+ 1-level due to previous flattening pass. */
+
+static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
+{
+ switch (ex->tag) {
+ case Iex_RdTmp:
+ if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
+ return env[(Int)ex->Iex.RdTmp.tmp];
+ } else {
+ /* not bound in env */
+ return ex;
+ }
+
+ case Iex_Const:
+ case Iex_Get:
+ return ex;
+
+ case Iex_GetI:
+ vassert(isIRAtom(ex->Iex.GetI.ix));
+ return IRExpr_GetI(
+ ex->Iex.GetI.descr,
+ subst_Expr(env, ex->Iex.GetI.ix),
+ ex->Iex.GetI.bias
+ );
+
+ case Iex_Qop:
+ vassert(isIRAtom(ex->Iex.Qop.arg1));
+ vassert(isIRAtom(ex->Iex.Qop.arg2));
+ vassert(isIRAtom(ex->Iex.Qop.arg3));
+ vassert(isIRAtom(ex->Iex.Qop.arg4));
+ return IRExpr_Qop(
+ ex->Iex.Qop.op,
+ subst_Expr(env, ex->Iex.Qop.arg1),
+ subst_Expr(env, ex->Iex.Qop.arg2),
+ subst_Expr(env, ex->Iex.Qop.arg3),
+ subst_Expr(env, ex->Iex.Qop.arg4)
+ );
+
+ case Iex_Triop:
+ vassert(isIRAtom(ex->Iex.Triop.arg1));
+ vassert(isIRAtom(ex->Iex.Triop.arg2));
+ vassert(isIRAtom(ex->Iex.Triop.arg3));
+ return IRExpr_Triop(
+ ex->Iex.Triop.op,
+ subst_Expr(env, ex->Iex.Triop.arg1),
+ subst_Expr(env, ex->Iex.Triop.arg2),
+ subst_Expr(env, ex->Iex.Triop.arg3)
+ );
+
+ case Iex_Binop:
+ vassert(isIRAtom(ex->Iex.Binop.arg1));
+ vassert(isIRAtom(ex->Iex.Binop.arg2));
+ return IRExpr_Binop(
+ ex->Iex.Binop.op,
+ subst_Expr(env, ex->Iex.Binop.arg1),
+ subst_Expr(env, ex->Iex.Binop.arg2)
+ );
+
+ case Iex_Unop:
+ vassert(isIRAtom(ex->Iex.Unop.arg));
+ return IRExpr_Unop(
+ ex->Iex.Unop.op,
+ subst_Expr(env, ex->Iex.Unop.arg)
+ );
+
+ case Iex_Load:
+ vassert(isIRAtom(ex->Iex.Load.addr));
+ return IRExpr_Load(
+ ex->Iex.Load.end,
+ ex->Iex.Load.ty,
+ subst_Expr(env, ex->Iex.Load.addr)
+ );
+
+ case Iex_CCall: {
+ Int i;
+ IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
+ for (i = 0; args2[i]; i++) {
+ vassert(isIRAtom(args2[i]));
+ args2[i] = subst_Expr(env, args2[i]);
+ }
+ return IRExpr_CCall(
+ ex->Iex.CCall.cee,
+ ex->Iex.CCall.retty,
+ args2
+ );
+ }
+
+ case Iex_Mux0X:
+ vassert(isIRAtom(ex->Iex.Mux0X.cond));
+ vassert(isIRAtom(ex->Iex.Mux0X.expr0));
+ vassert(isIRAtom(ex->Iex.Mux0X.exprX));
+ return IRExpr_Mux0X(
+ subst_Expr(env, ex->Iex.Mux0X.cond),
+ subst_Expr(env, ex->Iex.Mux0X.expr0),
+ subst_Expr(env, ex->Iex.Mux0X.exprX)
+ );
+
+ default:
+ vex_printf("\n\n"); ppIRExpr(ex);
+ vpanic("subst_Expr");
+
+ }
+}
+
+
+/* Apply the subst to stmt, then fold the result as much as possible.
+ Much simplified due to stmt being previously flattened. As a
+ result of this, the stmt may wind up being turned into a no-op.
+*/
+static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
+{
+# if 0
+ vex_printf("\nsubst and fold stmt\n");
+ ppIRStmt(st);
+ vex_printf("\n");
+# endif
+
+ switch (st->tag) {
+ case Ist_AbiHint:
+ vassert(isIRAtom(st->Ist.AbiHint.base));
+ vassert(isIRAtom(st->Ist.AbiHint.nia));
+ return IRStmt_AbiHint(
+ fold_Expr(subst_Expr(env, st->Ist.AbiHint.base)),
+ st->Ist.AbiHint.len,
+ fold_Expr(subst_Expr(env, st->Ist.AbiHint.nia))
+ );
+ case Ist_Put:
+ vassert(isIRAtom(st->Ist.Put.data));
+ return IRStmt_Put(
+ st->Ist.Put.offset,
+ fold_Expr(subst_Expr(env, st->Ist.Put.data))
+ );
+
+ case Ist_PutI:
+ vassert(isIRAtom(st->Ist.PutI.ix));
+ vassert(isIRAtom(st->Ist.PutI.data));
+ return IRStmt_PutI(
+ st->Ist.PutI.descr,
+ fold_Expr(subst_Expr(env, st->Ist.PutI.ix)),
+ st->Ist.PutI.bias,
+ fold_Expr(subst_Expr(env, st->Ist.PutI.data))
+ );
+
+ case Ist_WrTmp:
+ /* This is the one place where an expr (st->Ist.WrTmp.data) is
+ allowed to be more than just a constant or a tmp. */
+ return IRStmt_WrTmp(
+ st->Ist.WrTmp.tmp,
+ fold_Expr(subst_Expr(env, st->Ist.WrTmp.data))
+ );
+
+ case Ist_Store:
+ vassert(isIRAtom(st->Ist.Store.addr));
+ vassert(isIRAtom(st->Ist.Store.data));
+ return IRStmt_Store(
+ st->Ist.Store.end,
+ fold_Expr(subst_Expr(env, st->Ist.Store.addr)),
+ fold_Expr(subst_Expr(env, st->Ist.Store.data))
+ );
+
+ case Ist_CAS: {
+ IRCAS *cas, *cas2;
+ cas = st->Ist.CAS.details;
+ vassert(isIRAtom(cas->addr));
+ vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
+ vassert(isIRAtom(cas->expdLo));
+ vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
+ vassert(isIRAtom(cas->dataLo));
+ cas2 = mkIRCAS(
+ cas->oldHi, cas->oldLo, cas->end,
+ fold_Expr(subst_Expr(env, cas->addr)),
+ cas->expdHi ? fold_Expr(subst_Expr(env, cas->expdHi)) : NULL,
+ fold_Expr(subst_Expr(env, cas->expdLo)),
+ cas->dataHi ? fold_Expr(subst_Expr(env, cas->dataHi)) : NULL,
+ fold_Expr(subst_Expr(env, cas->dataLo))
+ );
+ return IRStmt_CAS(cas2);
+ }
+
+ case Ist_LLSC:
+ vassert(isIRAtom(st->Ist.LLSC.addr));
+ if (st->Ist.LLSC.storedata)
+ vassert(isIRAtom(st->Ist.LLSC.storedata));
+ return IRStmt_LLSC(
+ st->Ist.LLSC.end,
+ st->Ist.LLSC.result,
+ fold_Expr(subst_Expr(env, st->Ist.LLSC.addr)),
+ st->Ist.LLSC.storedata
+ ? fold_Expr(subst_Expr(env, st->Ist.LLSC.storedata))
+ : NULL
+ );
+
+ case Ist_Dirty: {
+ Int i;
+ IRDirty *d, *d2;
+ d = st->Ist.Dirty.details;
+ d2 = emptyIRDirty();
+ *d2 = *d;
+ d2->args = shallowCopyIRExprVec(d2->args);
+ if (d2->mFx != Ifx_None) {
+ vassert(isIRAtom(d2->mAddr));
+ d2->mAddr = fold_Expr(subst_Expr(env, d2->mAddr));
+ }
+ vassert(isIRAtom(d2->guard));
+ d2->guard = fold_Expr(subst_Expr(env, d2->guard));
+ for (i = 0; d2->args[i]; i++) {
+ vassert(isIRAtom(d2->args[i]));
+ d2->args[i] = fold_Expr(subst_Expr(env, d2->args[i]));
+ }
+ return IRStmt_Dirty(d2);
+ }
+
+ case Ist_IMark:
+ return IRStmt_IMark(st->Ist.IMark.addr, st->Ist.IMark.len);
+
+ case Ist_NoOp:
+ return IRStmt_NoOp();
+
+ case Ist_MBE:
+ return IRStmt_MBE(st->Ist.MBE.event);
+
+ case Ist_Exit: {
+ IRExpr* fcond;
+ vassert(isIRAtom(st->Ist.Exit.guard));
+ fcond = fold_Expr(subst_Expr(env, st->Ist.Exit.guard));
+ if (fcond->tag == Iex_Const) {
+ /* Interesting. The condition on this exit has folded down to
+ a constant. */
+ vassert(fcond->Iex.Const.con->tag == Ico_U1);
+ vassert(fcond->Iex.Const.con->Ico.U1 == False
+ || fcond->Iex.Const.con->Ico.U1 == True);
+ if (fcond->Iex.Const.con->Ico.U1 == False) {
+ /* exit is never going to happen, so dump the statement. */
+ return IRStmt_NoOp();
+ } else {
+ vassert(fcond->Iex.Const.con->Ico.U1 == True);
+ /* Hmmm. The exit has become unconditional. Leave it
+ as it is for now, since we'd have to truncate the BB
+ at this point, which is tricky. Such truncation is
+ done later by the dead-code elimination pass. */
+ /* fall out into the reconstruct-the-exit code. */
+ if (vex_control.iropt_verbosity > 0)
+ /* really a misuse of vex_control.iropt_verbosity */
+ vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
+ }
+ }
+ return IRStmt_Exit(fcond, st->Ist.Exit.jk, st->Ist.Exit.dst);
+ }
+
+ default:
+ vex_printf("\n"); ppIRStmt(st);
+ vpanic("subst_and_fold_Stmt");
+ }
+}
+
+
+IRSB* cprop_BB ( IRSB* in )
+{
+ Int i;
+ IRSB* out;
+ IRStmt* st2;
+ Int n_tmps = in->tyenv->types_used;
+ IRExpr** env = LibVEX_Alloc(n_tmps * sizeof(IRExpr*));
+
+ out = emptyIRSB();
+ out->tyenv = deepCopyIRTypeEnv( in->tyenv );
+
+ /* Set up the env with which travels forward. This holds a
+ substitution, mapping IRTemps to atoms, that is, IRExprs which
+ are either IRTemps or IRConsts. Thus, copy and constant
+ propagation is done. The environment is to be applied as we
+ move along. Keys are IRTemps. Values are IRExpr*s.
+ */
+ for (i = 0; i < n_tmps; i++)
+ env[i] = NULL;
+
+ /* For each original SSA-form stmt ... */
+ for (i = 0; i < in->stmts_used; i++) {
+
+ /* First apply the substitution to the current stmt. This
+ propagates in any constants and tmp-tmp assignments
+ accumulated prior to this point. As part of the subst_Stmt
+ call, also then fold any constant expressions resulting. */
+
+ st2 = in->stmts[i];
+
+ /* perhaps st2 is already a no-op? */
+ if (st2->tag == Ist_NoOp) continue;
+
+ st2 = subst_and_fold_Stmt( env, st2 );
+
+ /* If the statement has been folded into a no-op, forget it. */
+ if (st2->tag == Ist_NoOp) continue;
+
+ /* Now consider what the stmt looks like. If it's of the form
+ 't = const' or 't1 = t2', add it to the running environment
+ and not to the output BB. Otherwise, add it to the output
+ BB. Note, we choose not to propagate const when const is an
+ F64i, so that F64i literals can be CSE'd later. This helps
+ x86 floating point code generation. */
+
+ if (st2->tag == Ist_WrTmp
+ && st2->Ist.WrTmp.data->tag == Iex_Const
+ && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
+ /* 't = const' -- add to env.
+ The pair (IRTemp, IRExpr*) is added. */
+ env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
+ }
+ else
+ if (st2->tag == Ist_WrTmp && st2->Ist.WrTmp.data->tag == Iex_RdTmp) {
+ /* 't1 = t2' -- add to env.
+ The pair (IRTemp, IRExpr*) is added. */
+ env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
+ }
+ else {
+ /* Not interesting, copy st2 into the output block. */
+ addStmtToIRSB( out, st2 );
+ }
+ }
+
+ out->next = subst_Expr( env, in->next );
+ out->jumpkind = in->jumpkind;
+ return out;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Dead code (t = E) removal ---*/
+/*---------------------------------------------------------------*/
+
+/* As a side effect, also removes all code following an unconditional
+ side exit. */
+
+/* The type of the HashHW map is: a map from IRTemp to nothing
+ -- really just operating a set or IRTemps.
+*/
+
+inline
+static void addUses_Temp ( Bool* set, IRTemp tmp )
+{
+ set[(Int)tmp] = True;
+}
+
+static void addUses_Expr ( Bool* set, IRExpr* e )
+{
+ Int i;
+ switch (e->tag) {
+ case Iex_GetI:
+ addUses_Expr(set, e->Iex.GetI.ix);
+ return;
+ case Iex_Mux0X:
+ addUses_Expr(set, e->Iex.Mux0X.cond);
+ addUses_Expr(set, e->Iex.Mux0X.expr0);
+ addUses_Expr(set, e->Iex.Mux0X.exprX);
+ return;
+ case Iex_CCall:
+ for (i = 0; e->Iex.CCall.args[i]; i++)
+ addUses_Expr(set, e->Iex.CCall.args[i]);
+ return;
+ case Iex_Load:
+ addUses_Expr(set, e->Iex.Load.addr);
+ return;
+ case Iex_Qop:
+ addUses_Expr(set, e->Iex.Qop.arg1);
+ addUses_Expr(set, e->Iex.Qop.arg2);
+ addUses_Expr(set, e->Iex.Qop.arg3);
+ addUses_Expr(set, e->Iex.Qop.arg4);
+ return;
+ case Iex_Triop:
+ addUses_Expr(set, e->Iex.Triop.arg1);
+ addUses_Expr(set, e->Iex.Triop.arg2);
+ addUses_Expr(set, e->Iex.Triop.arg3);
+ return;
+ case Iex_Binop:
+ addUses_Expr(set, e->Iex.Binop.arg1);
+ addUses_Expr(set, e->Iex.Binop.arg2);
+ return;
+ case Iex_Unop:
+ addUses_Expr(set, e->Iex.Unop.arg);
+ return;
+ case Iex_RdTmp:
+ addUses_Temp(set, e->Iex.RdTmp.tmp);
+ return;
+ case Iex_Const:
+ case Iex_Get:
+ return;
+ default:
+ vex_printf("\n");
+ ppIRExpr(e);
+ vpanic("addUses_Expr");
+ }
+}
+
+static void addUses_Stmt ( Bool* set, IRStmt* st )
+{
+ Int i;
+ IRDirty* d;
+ IRCAS* cas;
+ switch (st->tag) {
+ case Ist_AbiHint:
+ addUses_Expr(set, st->Ist.AbiHint.base);
+ addUses_Expr(set, st->Ist.AbiHint.nia);
+ return;
+ case Ist_PutI:
+ addUses_Expr(set, st->Ist.PutI.ix);
+ addUses_Expr(set, st->Ist.PutI.data);
+ return;
+ case Ist_WrTmp:
+ addUses_Expr(set, st->Ist.WrTmp.data);
+ return;
+ case Ist_Put:
+ addUses_Expr(set, st->Ist.Put.data);
+ return;
+ case Ist_Store:
+ addUses_Expr(set, st->Ist.Store.addr);
+ addUses_Expr(set, st->Ist.Store.data);
+ return;
+ case Ist_CAS:
+ cas = st->Ist.CAS.details;
+ addUses_Expr(set, cas->addr);
+ if (cas->expdHi)
+ addUses_Expr(set, cas->expdHi);
+ addUses_Expr(set, cas->expdLo);
+ if (cas->dataHi)
+ addUses_Expr(set, cas->dataHi);
+ addUses_Expr(set, cas->dataLo);
+ return;
+ case Ist_LLSC:
+ addUses_Expr(set, st->Ist.LLSC.addr);
+ if (st->Ist.LLSC.storedata)
+ addUses_Expr(set, st->Ist.LLSC.storedata);
+ return;
+ case Ist_Dirty:
+ d = st->Ist.Dirty.details;
+ if (d->mFx != Ifx_None)
+ addUses_Expr(set, d->mAddr);
+ addUses_Expr(set, d->guard);
+ for (i = 0; d->args[i] != NULL; i++)
+ addUses_Expr(set, d->args[i]);
+ return;
+ case Ist_NoOp:
+ case Ist_IMark:
+ case Ist_MBE:
+ return;
+ case Ist_Exit:
+ addUses_Expr(set, st->Ist.Exit.guard);
+ return;
+ default:
+ vex_printf("\n");
+ ppIRStmt(st);
+ vpanic("addUses_Stmt");
+ }
+}
+
+
+/* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
+static Bool isZeroU1 ( IRExpr* e )
+{
+ return toBool( e->tag == Iex_Const
+ && e->Iex.Const.con->tag == Ico_U1
+ && e->Iex.Const.con->Ico.U1 == False );
+}
+
+/* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
+static Bool isOneU1 ( IRExpr* e )
+{
+ return toBool( e->tag == Iex_Const
+ && e->Iex.Const.con->tag == Ico_U1
+ && e->Iex.Const.con->Ico.U1 == True );
+}
+
+
+/* Note, this destructively modifies the given IRSB. */
+
+/* Scan backwards through statements, carrying a set of IRTemps which
+ are known to be used after the current point. On encountering 't =
+ E', delete the binding if it is not used. Otherwise, add any temp
+ uses to the set and keep on moving backwards.
+
+ As an enhancement, the first (backwards) pass searches for IR exits
+ with always-taken conditions and notes the location of the earliest
+ one in the block. If any such are found, a second pass copies the
+ exit destination and jump kind to the bb-end. Then, the exit and
+ all statements following it are turned into no-ops.
+*/
+
+/* notstatic */ void do_deadcode_BB ( IRSB* bb )
+{
+ Int i, i_unconditional_exit;
+ Int n_tmps = bb->tyenv->types_used;
+ Bool* set = LibVEX_Alloc(n_tmps * sizeof(Bool));
+ IRStmt* st;
+
+ for (i = 0; i < n_tmps; i++)
+ set[i] = False;
+
+ /* start off by recording IRTemp uses in the next field. */
+ addUses_Expr(set, bb->next);
+
+ /* First pass */
+
+ /* Work backwards through the stmts */
+ i_unconditional_exit = -1;
+ for (i = bb->stmts_used-1; i >= 0; i--) {
+ st = bb->stmts[i];
+ if (st->tag == Ist_NoOp)
+ continue;
+ /* take note of any unconditional exits */
+ if (st->tag == Ist_Exit
+ && isOneU1(st->Ist.Exit.guard))
+ i_unconditional_exit = i;
+ if (st->tag == Ist_WrTmp
+ && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
+ /* it's an IRTemp which never got used. Delete it. */
+ if (DEBUG_IROPT) {
+ vex_printf("DEAD: ");
+ ppIRStmt(st);
+ vex_printf("\n");
+ }
+ bb->stmts[i] = IRStmt_NoOp();
+ }
+ else
+ if (st->tag == Ist_Dirty
+ && st->Ist.Dirty.details->guard
+ && isZeroU1(st->Ist.Dirty.details->guard)) {
+ /* This is a dirty helper which will never get called.
+ Delete it. */
+ bb->stmts[i] = IRStmt_NoOp();
+ }
+ else {
+ /* Note any IRTemp uses made by the current statement. */
+ addUses_Stmt(set, st);
+ }
+ }
+
+ /* Optional second pass: if any unconditional exits were found,
+ delete them and all following statements. */
+
+ if (i_unconditional_exit != -1) {
+ if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
+ i_unconditional_exit);
+ vassert(i_unconditional_exit >= 0
+ && i_unconditional_exit < bb->stmts_used);
+ bb->next
+ = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
+ bb->jumpkind
+ = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
+ for (i = i_unconditional_exit; i < bb->stmts_used; i++)
+ bb->stmts[i] = IRStmt_NoOp();
+ }
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Specialisation of helper function calls, in ---*/
+/*--- collaboration with the front end ---*/
+/*---------------------------------------------------------------*/
+
+static
+IRSB* spec_helpers_BB(
+ IRSB* bb,
+ IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int)
+ )
+{
+ Int i;
+ IRStmt* st;
+ IRExpr* ex;
+ Bool any = False;
+
+ for (i = bb->stmts_used-1; i >= 0; i--) {
+ st = bb->stmts[i];
+
+ if (st->tag != Ist_WrTmp
+ || st->Ist.WrTmp.data->tag != Iex_CCall)
+ continue;
+
+ ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
+ st->Ist.WrTmp.data->Iex.CCall.args,
+ &bb->stmts[0], i );
+ if (!ex)
+ /* the front end can't think of a suitable replacement */
+ continue;
+
+ /* We got something better. Install it in the bb. */
+ any = True;
+ bb->stmts[i]
+ = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
+
+ if (0) {
+ vex_printf("SPEC: ");
+ ppIRExpr(st->Ist.WrTmp.data);
+ vex_printf(" --> ");
+ ppIRExpr(ex);
+ vex_printf("\n");
+ }
+ }
+
+ if (any)
+ bb = flatten_BB(bb);
+ return bb;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Determination of guest state aliasing relationships ---*/
+/*---------------------------------------------------------------*/
+
+/* These are helper functions for CSE and GetI/PutI transformations.
+
+ Determine, to the extent possible, the relationship between two
+ guest state accesses. The possible outcomes are:
+
+ * Exact alias. These two accesses denote precisely the same
+ piece of the guest state.
+
+ * Definitely no alias. These two accesses are guaranteed not to
+ overlap any part of the guest state.
+
+ * Unknown -- if neither of the above can be established.
+
+ If in doubt, return Unknown. */
+
+typedef
+ enum { ExactAlias, NoAlias, UnknownAlias }
+ GSAliasing;
+
+
+/* Produces the alias relation between an indexed guest
+ state access and a non-indexed access. */
+
+static
+GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
+ Int offset2, IRType ty2 )
+{
+ UInt minoff1, maxoff1, minoff2, maxoff2;
+
+ getArrayBounds( descr1, &minoff1, &maxoff1 );
+ minoff2 = offset2;
+ maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
+
+ if (maxoff1 < minoff2 || maxoff2 < minoff1)
+ return NoAlias;
+
+ /* Could probably do better here if required. For the moment
+ however just claim not to know anything more. */
+ return UnknownAlias;
+}
+
+
+/* Produces the alias relation between two indexed guest state
+ accesses. */
+
+static
+GSAliasing getAliasingRelation_II (
+ IRRegArray* descr1, IRExpr* ix1, Int bias1,
+ IRRegArray* descr2, IRExpr* ix2, Int bias2
+ )
+{
+ UInt minoff1, maxoff1, minoff2, maxoff2;
+ Int iters;
+
+ /* First try hard to show they don't alias. */
+ getArrayBounds( descr1, &minoff1, &maxoff1 );
+ getArrayBounds( descr2, &minoff2, &maxoff2 );
+ if (maxoff1 < minoff2 || maxoff2 < minoff1)
+ return NoAlias;
+
+ /* So the two arrays at least partially overlap. To get any
+ further we'll have to be sure that the descriptors are
+ identical. */
+ if (!eqIRRegArray(descr1, descr2))
+ return UnknownAlias;
+
+ /* The descriptors are identical. Now the only difference can be
+ in the index expressions. If they cannot be shown to be
+ identical, we have to say we don't know what the aliasing
+ relation will be. Now, since the IR is flattened, the index
+ expressions should be atoms -- either consts or tmps. So that
+ makes the comparison simple. */
+ vassert(isIRAtom(ix1));
+ vassert(isIRAtom(ix2));
+ if (!eqIRAtom(ix1,ix2))
+ return UnknownAlias;
+
+ /* Ok, the index expressions are identical. So now the only way
+ they can be different is in the bias. Normalise this
+ paranoidly, to reliably establish equality/non-equality. */
+
+ /* So now we know that the GetI and PutI index the same array
+ with the same base. Are the offsets the same, modulo the
+ array size? Do this paranoidly. */
+ vassert(descr1->nElems == descr2->nElems);
+ vassert(descr1->elemTy == descr2->elemTy);
+ vassert(descr1->base == descr2->base);
+ iters = 0;
+ while (bias1 < 0 || bias2 < 0) {
+ bias1 += descr1->nElems;
+ bias2 += descr1->nElems;
+ iters++;
+ if (iters > 10)
+ vpanic("getAliasingRelation: iters");
+ }
+ vassert(bias1 >= 0 && bias2 >= 0);
+ bias1 %= descr1->nElems;
+ bias2 %= descr1->nElems;
+ vassert(bias1 >= 0 && bias1 < descr1->nElems);
+ vassert(bias2 >= 0 && bias2 < descr1->nElems);
+
+ /* Finally, biasP and biasG are normalised into the range
+ 0 .. descrP/G->nElems - 1. And so we can establish
+ equality/non-equality. */
+
+ return bias1==bias2 ? ExactAlias : NoAlias;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Common Subexpression Elimination ---*/
+/*---------------------------------------------------------------*/
+
+/* Expensive in time and space. */
+
+/* Uses two environments:
+ a IRTemp -> IRTemp mapping
+ a mapping from AvailExpr* to IRTemp
+*/
+
+typedef
+ struct {
+ enum { Ut, Btt, Btc, Bct, Cf64i, Mttt, GetIt } tag;
+ union {
+ /* unop(tmp) */
+ struct {
+ IROp op;
+ IRTemp arg;
+ } Ut;
+ /* binop(tmp,tmp) */
+ struct {
+ IROp op;
+ IRTemp arg1;
+ IRTemp arg2;
+ } Btt;
+ /* binop(tmp,const) */
+ struct {
+ IROp op;
+ IRTemp arg1;
+ IRConst con2;
+ } Btc;
+ /* binop(const,tmp) */
+ struct {
+ IROp op;
+ IRConst con1;
+ IRTemp arg2;
+ } Bct;
+ /* F64i-style const */
+ struct {
+ ULong f64i;
+ } Cf64i;
+ /* Mux0X(tmp,tmp,tmp) */
+ struct {
+ IRTemp co;
+ IRTemp e0;
+ IRTemp eX;
+ } Mttt;
+ /* GetI(descr,tmp,bias)*/
+ struct {
+ IRRegArray* descr;
+ IRTemp ix;
+ Int bias;
+ } GetIt;
+ } u;
+ }
+ AvailExpr;
+
+static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
+{
+ if (a1->tag != a2->tag)
+ return False;
+ switch (a1->tag) {
+ case Ut:
+ return toBool(
+ a1->u.Ut.op == a2->u.Ut.op
+ && a1->u.Ut.arg == a2->u.Ut.arg);
+ case Btt:
+ return toBool(
+ a1->u.Btt.op == a2->u.Btt.op
+ && a1->u.Btt.arg1 == a2->u.Btt.arg1
+ && a1->u.Btt.arg2 == a2->u.Btt.arg2);
+ case Btc:
+ return toBool(
+ a1->u.Btc.op == a2->u.Btc.op
+ && a1->u.Btc.arg1 == a2->u.Btc.arg1
+ && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
+ case Bct:
+ return toBool(
+ a1->u.Bct.op == a2->u.Bct.op
+ && a1->u.Bct.arg2 == a2->u.Bct.arg2
+ && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
+ case Cf64i:
+ return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
+ case Mttt:
+ return toBool(a1->u.Mttt.co == a2->u.Mttt.co
+ && a1->u.Mttt.e0 == a2->u.Mttt.e0
+ && a1->u.Mttt.eX == a2->u.Mttt.eX);
+ case GetIt:
+ return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
+ && a1->u.GetIt.ix == a2->u.GetIt.ix
+ && a1->u.GetIt.bias == a2->u.GetIt.bias);
+ default: vpanic("eq_AvailExpr");
+ }
+}
+
+static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
+{
+ IRConst* con;
+ switch (ae->tag) {
+ case Ut:
+ return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
+ case Btt:
+ return IRExpr_Binop( ae->u.Btt.op,
+ IRExpr_RdTmp(ae->u.Btt.arg1),
+ IRExpr_RdTmp(ae->u.Btt.arg2) );
+ case Btc:
+ con = LibVEX_Alloc(sizeof(IRConst));
+ *con = ae->u.Btc.con2;
+ return IRExpr_Binop( ae->u.Btc.op,
+ IRExpr_RdTmp(ae->u.Btc.arg1),
+ IRExpr_Const(con) );
+ case Bct:
+ con = LibVEX_Alloc(sizeof(IRConst));
+ *con = ae->u.Bct.con1;
+ return IRExpr_Binop( ae->u.Bct.op,
+ IRExpr_Const(con),
+ IRExpr_RdTmp(ae->u.Bct.arg2) );
+ case Cf64i:
+ return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
+ case Mttt:
+ return IRExpr_Mux0X(IRExpr_RdTmp(ae->u.Mttt.co),
+ IRExpr_RdTmp(ae->u.Mttt.e0),
+ IRExpr_RdTmp(ae->u.Mttt.eX));
+ case GetIt:
+ return IRExpr_GetI(ae->u.GetIt.descr,
+ IRExpr_RdTmp(ae->u.GetIt.ix),
+ ae->u.GetIt.bias);
+ default:
+ vpanic("availExpr_to_IRExpr");
+ }
+}
+
+inline
+static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
+{
+ HWord res;
+ /* env :: IRTemp -> IRTemp */
+ if (lookupHHW( env, &res, (HWord)tmp ))
+ return (IRTemp)res;
+ else
+ return tmp;
+}
+
+static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
+{
+ /* env :: IRTemp -> IRTemp */
+ switch (ae->tag) {
+ case Ut:
+ ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
+ break;
+ case Btt:
+ ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
+ ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
+ break;
+ case Btc:
+ ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
+ break;
+ case Bct:
+ ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
+ break;
+ case Cf64i:
+ break;
+ case Mttt:
+ ae->u.Mttt.co = subst_AvailExpr_Temp( env, ae->u.Mttt.co );
+ ae->u.Mttt.e0 = subst_AvailExpr_Temp( env, ae->u.Mttt.e0 );
+ ae->u.Mttt.eX = subst_AvailExpr_Temp( env, ae->u.Mttt.eX );
+ break;
+ case GetIt:
+ ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
+ break;
+ default:
+ vpanic("subst_AvailExpr");
+ }
+}
+
+static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e )
+{
+ AvailExpr* ae;
+
+ if (e->tag == Iex_Unop
+ && e->Iex.Unop.arg->tag == Iex_RdTmp) {
+ ae = LibVEX_Alloc(sizeof(AvailExpr));
+ ae->tag = Ut;
+ ae->u.Ut.op = e->Iex.Unop.op;
+ ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
+ return ae;
+ }
+
+ if (e->tag == Iex_Binop
+ && e->Iex.Binop.arg1->tag == Iex_RdTmp
+ && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
+ ae = LibVEX_Alloc(sizeof(AvailExpr));
+ ae->tag = Btt;
+ ae->u.Btt.op = e->Iex.Binop.op;
+ ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
+ ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
+ return ae;
+ }
+
+ if (e->tag == Iex_Binop
+ && e->Iex.Binop.arg1->tag == Iex_RdTmp
+ && e->Iex.Binop.arg2->tag == Iex_Const) {
+ ae = LibVEX_Alloc(sizeof(AvailExpr));
+ ae->tag = Btc;
+ ae->u.Btc.op = e->Iex.Binop.op;
+ ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
+ ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
+ return ae;
+ }
+
+ if (e->tag == Iex_Binop
+ && e->Iex.Binop.arg1->tag == Iex_Const
+ && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
+ ae = LibVEX_Alloc(sizeof(AvailExpr));
+ ae->tag = Bct;
+ ae->u.Bct.op = e->Iex.Binop.op;
+ ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
+ ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
+ return ae;
+ }
+
+ if (e->tag == Iex_Const
+ && e->Iex.Const.con->tag == Ico_F64i) {
+ ae = LibVEX_Alloc(sizeof(AvailExpr));
+ ae->tag = Cf64i;
+ ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
+ return ae;
+ }
+
+ if (e->tag == Iex_Mux0X
+ && e->Iex.Mux0X.cond->tag == Iex_RdTmp
+ && e->Iex.Mux0X.expr0->tag == Iex_RdTmp
+ && e->Iex.Mux0X.exprX->tag == Iex_RdTmp) {
+ ae = LibVEX_Alloc(sizeof(AvailExpr));
+ ae->tag = Mttt;
+ ae->u.Mttt.co = e->Iex.Mux0X.cond->Iex.RdTmp.tmp;
+ ae->u.Mttt.e0 = e->Iex.Mux0X.expr0->Iex.RdTmp.tmp;
+ ae->u.Mttt.eX = e->Iex.Mux0X.exprX->Iex.RdTmp.tmp;
+ return ae;
+ }
+
+ if (e->tag == Iex_GetI
+ && e->Iex.GetI.ix->tag == Iex_RdTmp) {
+ ae = LibVEX_Alloc(sizeof(AvailExpr));
+ ae->tag = GetIt;
+ ae->u.GetIt.descr = e->Iex.GetI.descr;
+ ae->u.GetIt.ix = e->Iex.GetI.ix->Iex.RdTmp.tmp;
+ ae->u.GetIt.bias = e->Iex.GetI.bias;
+ return ae;
+ }
+
+ return NULL;
+}
+
+
+/* The BB is modified in-place. Returns True if any changes were
+ made. */
+
+static Bool do_cse_BB ( IRSB* bb )
+{
+ Int i, j, paranoia;
+ IRTemp t, q;
+ IRStmt* st;
+ AvailExpr* eprime;
+ AvailExpr* ae;
+ Bool invalidate;
+ Bool anyDone = False;
+
+ HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
+ HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
+
+ vassert(sizeof(IRTemp) <= sizeof(HWord));
+
+ if (0) { ppIRSB(bb); vex_printf("\n\n"); }
+
+ /* Iterate forwards over the stmts.
+ On seeing "t = E", where E is one of the 5 AvailExpr forms:
+ let E' = apply tenv substitution to E
+ search aenv for E'
+ if a mapping E' -> q is found,
+ replace this stmt by "t = q"
+ and add binding t -> q to tenv
+ else
+ add binding E' -> t to aenv
+ replace this stmt by "t = E'"
+
+ Other statements are only interesting to the extent that they
+ might invalidate some of the expressions in aenv. So there is
+ an invalidate-bindings check for each statement seen.
+ */
+ for (i = 0; i < bb->stmts_used; i++) {
+ st = bb->stmts[i];
+
+ /* ------ BEGIN invalidate aenv bindings ------ */
+ /* This is critical: remove from aenv any E' -> .. bindings
+ which might be invalidated by this statement. The only
+ vulnerable kind of bindings are the GetI kind.
+ Dirty call - dump (paranoia level -> 2)
+ Store - dump (ditto)
+ Put, PutI - dump unless no-overlap is proven (.. -> 1)
+ Uses getAliasingRelation_IC and getAliasingRelation_II
+ to do the no-overlap assessments needed for Put/PutI.
+ */
+ switch (st->tag) {
+ case Ist_Dirty: case Ist_Store: case Ist_MBE:
+ case Ist_CAS: case Ist_LLSC:
+ paranoia = 2; break;
+ case Ist_Put: case Ist_PutI:
+ paranoia = 1; break;
+ case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
+ case Ist_WrTmp: case Ist_Exit:
+ paranoia = 0; break;
+ default:
+ vpanic("do_cse_BB(1)");
+ }
+
+ if (paranoia > 0) {
+ for (j = 0; j < aenv->used; j++) {
+ if (!aenv->inuse[j])
+ continue;
+ ae = (AvailExpr*)aenv->key[j];
+ if (ae->tag != GetIt)
+ continue;
+ invalidate = False;
+ if (paranoia >= 2) {
+ invalidate = True;
+ } else {
+ vassert(paranoia == 1);
+ if (st->tag == Ist_Put) {
+ if (getAliasingRelation_IC(
+ ae->u.GetIt.descr,
+ IRExpr_RdTmp(ae->u.GetIt.ix),
+ st->Ist.Put.offset,
+ typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
+ ) != NoAlias)
+ invalidate = True;
+ }
+ else
+ if (st->tag == Ist_PutI) {
+ if (getAliasingRelation_II(
+ ae->u.GetIt.descr,
+ IRExpr_RdTmp(ae->u.GetIt.ix),
+ ae->u.GetIt.bias,
+ st->Ist.PutI.descr,
+ st->Ist.PutI.ix,
+ st->Ist.PutI.bias
+ ) != NoAlias)
+ invalidate = True;
+ }
+ else
+ vpanic("do_cse_BB(2)");
+ }
+
+ if (invalidate) {
+ aenv->inuse[j] = False;
+ aenv->key[j] = (HWord)NULL; /* be sure */
+ }
+ } /* for j */
+ } /* paranoia > 0 */
+
+ /* ------ ENV invalidate aenv bindings ------ */
+
+ /* ignore not-interestings */
+ if (st->tag != Ist_WrTmp)
+ continue;
+
+ t = st->Ist.WrTmp.tmp;
+ eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data);
+ /* ignore if not of AvailExpr form */
+ if (!eprime)
+ continue;
+
+ /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
+
+ /* apply tenv */
+ subst_AvailExpr( tenv, eprime );
+
+ /* search aenv for eprime, unfortunately the hard way */
+ for (j = 0; j < aenv->used; j++)
+ if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
+ break;
+
+ if (j < aenv->used) {
+ /* A binding E' -> q was found. Replace stmt by "t = q" and
+ note the t->q binding in tenv. */
+ /* (this is the core of the CSE action) */
+ q = (IRTemp)aenv->val[j];
+ bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
+ addToHHW( tenv, (HWord)t, (HWord)q );
+ anyDone = True;
+ } else {
+ /* No binding was found, so instead we add E' -> t to our
+ collection of available expressions, replace this stmt
+ with "t = E'", and move on. */
+ bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
+ addToHHW( aenv, (HWord)eprime, (HWord)t );
+ }
+ }
+
+ /*
+ ppIRSB(bb);
+ sanityCheckIRSB(bb, Ity_I32);
+ vex_printf("\n\n");
+ */
+ return anyDone;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Add32/Sub32 chain collapsing ---*/
+/*---------------------------------------------------------------*/
+
+/* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
+
+/* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ? If
+ yes, set *tmp and *i32 appropriately. *i32 is set as if the
+ root node is Add32, not Sub32. */
+
+static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
+{
+ if (e->tag != Iex_Binop)
+ return False;
+ if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
+ return False;
+ if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
+ return False;
+ if (e->Iex.Binop.arg2->tag != Iex_Const)
+ return False;
+ *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
+ *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
+ if (e->Iex.Binop.op == Iop_Sub32)
+ *i32 = -*i32;
+ return True;
+}
+
+
+/* Figure out if tmp can be expressed as tmp2 +32 const, for some
+ other tmp2. Scan backwards from the specified start point -- an
+ optimisation. */
+
+static Bool collapseChain ( IRSB* bb, Int startHere,
+ IRTemp tmp,
+ IRTemp* tmp2, Int* i32 )
+{
+ Int j, ii;
+ IRTemp vv;
+ IRStmt* st;
+ IRExpr* e;
+
+ /* the (var, con) pair contain the current 'representation' for
+ 'tmp'. We start with 'tmp + 0'. */
+ IRTemp var = tmp;
+ Int con = 0;
+
+ /* Scan backwards to see if tmp can be replaced by some other tmp
+ +/- a constant. */
+ for (j = startHere; j >= 0; j--) {
+ st = bb->stmts[j];
+ if (st->tag != Ist_WrTmp)
+ continue;
+ if (st->Ist.WrTmp.tmp != var)
+ continue;
+ e = st->Ist.WrTmp.data;
+ if (!isAdd32OrSub32(e, &vv, &ii))
+ break;
+ var = vv;
+ con += ii;
+ }
+ if (j == -1)
+ /* no earlier binding for var .. ill-formed IR */
+ vpanic("collapseChain");
+
+ /* so, did we find anything interesting? */
+ if (var == tmp)
+ return False; /* no .. */
+
+ *tmp2 = var;
+ *i32 = con;
+ return True;
+}
+
+
+/* ------- Main function for Add32/Sub32 chain collapsing ------ */
+
+static void collapse_AddSub_chains_BB ( IRSB* bb )
+{
+ IRStmt *st;
+ IRTemp var, var2;
+ Int i, con, con2;
+
+ for (i = bb->stmts_used-1; i >= 0; i--) {
+ st = bb->stmts[i];
+ if (st->tag == Ist_NoOp)
+ continue;
+
+ /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
+
+ if (st->tag == Ist_WrTmp
+ && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
+
+ /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
+ Find out if var can be expressed as var2 + con2. */
+ if (collapseChain(bb, i-1, var, &var2, &con2)) {
+ if (DEBUG_IROPT) {
+ vex_printf("replacing1 ");
+ ppIRStmt(st);
+ vex_printf(" with ");
+ }
+ con2 += con;
+ bb->stmts[i]
+ = IRStmt_WrTmp(
+ st->Ist.WrTmp.tmp,
+ (con2 >= 0)
+ ? IRExpr_Binop(Iop_Add32,
+ IRExpr_RdTmp(var2),
+ IRExpr_Const(IRConst_U32(con2)))
+ : IRExpr_Binop(Iop_Sub32,
+ IRExpr_RdTmp(var2),
+ IRExpr_Const(IRConst_U32(-con2)))
+ );
+ if (DEBUG_IROPT) {
+ ppIRStmt(bb->stmts[i]);
+ vex_printf("\n");
+ }
+ }
+
+ continue;
+ }
+
+ /* Try to collapse 't1 = GetI[t2, con]'. */
+
+ if (st->tag == Ist_WrTmp
+ && st->Ist.WrTmp.data->tag == Iex_GetI
+ && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
+ && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
+ ->Iex.RdTmp.tmp, &var2, &con2)) {
+ if (DEBUG_IROPT) {
+ vex_printf("replacing3 ");
+ ppIRStmt(st);
+ vex_printf(" with ");
+ }
+ con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
+ bb->stmts[i]
+ = IRStmt_WrTmp(
+ st->Ist.WrTmp.tmp,
+ IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
+ IRExpr_RdTmp(var2),
+ con2));
+ if (DEBUG_IROPT) {
+ ppIRStmt(bb->stmts[i]);
+ vex_printf("\n");
+ }
+ continue;
+ }
+
+ /* Perhaps st is PutI[t, con] ? */
+
+ if (st->tag == Ist_PutI
+ && st->Ist.PutI.ix->tag == Iex_RdTmp
+ && collapseChain(bb, i-1, st->Ist.PutI.ix->Iex.RdTmp.tmp,
+ &var2, &con2)) {
+ if (DEBUG_IROPT) {
+ vex_printf("replacing2 ");
+ ppIRStmt(st);
+ vex_printf(" with ");
+ }
+ con2 += st->Ist.PutI.bias;
+ bb->stmts[i]
+ = IRStmt_PutI(st->Ist.PutI.descr,
+ IRExpr_RdTmp(var2),
+ con2,
+ st->Ist.PutI.data);
+ if (DEBUG_IROPT) {
+ ppIRStmt(bb->stmts[i]);
+ vex_printf("\n");
+ }
+ continue;
+ }
+
+ } /* for */
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- PutI/GetI transformations ---*/
+/*---------------------------------------------------------------*/
+
+/* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
+ the given starting point to find, if any, a PutI which writes
+ exactly the same piece of guest state, and so return the expression
+ that the PutI writes. This is the core of PutI-GetI forwarding. */
+
+static
+IRExpr* findPutI ( IRSB* bb, Int startHere,
+ IRRegArray* descrG, IRExpr* ixG, Int biasG )
+{
+ Int j;
+ IRStmt* st;
+ GSAliasing relation;
+
+ if (0) {
+ vex_printf("\nfindPutI ");
+ ppIRRegArray(descrG);
+ vex_printf(" ");
+ ppIRExpr(ixG);
+ vex_printf(" %d\n", biasG);
+ }
+
+ /* Scan backwards in bb from startHere to find a suitable PutI
+ binding for (descrG, ixG, biasG), if any. */
+
+ for (j = startHere; j >= 0; j--) {
+ st = bb->stmts[j];
+ if (st->tag == Ist_NoOp)
+ continue;
+
+ if (st->tag == Ist_Put) {
+ /* Non-indexed Put. This can't give a binding, but we do
+ need to check it doesn't invalidate the search by
+ overlapping any part of the indexed guest state. */
+
+ relation
+ = getAliasingRelation_IC(
+ descrG, ixG,
+ st->Ist.Put.offset,
+ typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
+
+ if (relation == NoAlias) {
+ /* we're OK; keep going */
+ continue;
+ } else {
+ /* relation == UnknownAlias || relation == ExactAlias */
+ /* If this assertion fails, we've found a Put which writes
+ an area of guest state which is read by a GetI. Which
+ is unlikely (although not per se wrong). */
+ vassert(relation != ExactAlias);
+ /* This Put potentially writes guest state that the GetI
+ reads; we must fail. */
+ return NULL;
+ }
+ }
+
+ if (st->tag == Ist_PutI) {
+
+ relation = getAliasingRelation_II(
+ descrG, ixG, biasG,
+ st->Ist.PutI.descr,
+ st->Ist.PutI.ix,
+ st->Ist.PutI.bias
+ );
+
+ if (relation == NoAlias) {
+ /* This PutI definitely doesn't overlap. Ignore it and
+ keep going. */
+ continue; /* the for j loop */
+ }
+
+ if (relation == UnknownAlias) {
+ /* We don't know if this PutI writes to the same guest
+ state that the GetI, or not. So we have to give up. */
+ return NULL;
+ }
+
+ /* Otherwise, we've found what we're looking for. */
+ vassert(relation == ExactAlias);
+ return st->Ist.PutI.data;
+
+ } /* if (st->tag == Ist_PutI) */
+
+ if (st->tag == Ist_Dirty) {
+ /* Be conservative. If the dirty call has any guest effects at
+ all, give up. We could do better -- only give up if there
+ are any guest writes/modifies. */
+ if (st->Ist.Dirty.details->nFxState > 0)
+ return NULL;
+ }
+
+ } /* for */
+
+ /* No valid replacement was found. */
+ return NULL;
+}
+
+
+
+/* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
+ that it writes exactly the same piece of guest state) ? Safe
+ answer: False. */
+
+static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
+{
+ vassert(pi->tag == Ist_PutI);
+ if (s2->tag != Ist_PutI)
+ return False;
+
+ return toBool(
+ getAliasingRelation_II(
+ pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias,
+ s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias
+ )
+ == ExactAlias
+ );
+}
+
+
+/* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
+ overlap it? Safe answer: True. Note, we could do a lot better
+ than this if needed. */
+
+static
+Bool guestAccessWhichMightOverlapPutI (
+ IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
+ )
+{
+ GSAliasing relation;
+ UInt minoffP, maxoffP;
+
+ vassert(pi->tag == Ist_PutI);
+ getArrayBounds(pi->Ist.PutI.descr, &minoffP, &maxoffP);
+ switch (s2->tag) {
+
+ case Ist_NoOp:
+ case Ist_IMark:
+ return False;
+
+ case Ist_MBE:
+ case Ist_AbiHint:
+ /* just be paranoid ... these should be rare. */
+ return True;
+
+ case Ist_CAS:
+ /* This is unbelievably lame, but it's probably not
+ significant from a performance point of view. Really, a
+ CAS is a load-store op, so it should be safe to say False.
+ However .. */
+ return True;
+
+ case Ist_Dirty:
+ /* If the dirty call has any guest effects at all, give up.
+ Probably could do better. */
+ if (s2->Ist.Dirty.details->nFxState > 0)
+ return True;
+ return False;
+
+ case Ist_Put:
+ vassert(isIRAtom(s2->Ist.Put.data));
+ relation
+ = getAliasingRelation_IC(
+ pi->Ist.PutI.descr, pi->Ist.PutI.ix,
+ s2->Ist.Put.offset,
+ typeOfIRExpr(tyenv,s2->Ist.Put.data)
+ );
+ goto have_relation;
+
+ case Ist_PutI:
+ vassert(isIRAtom(s2->Ist.PutI.ix));
+ vassert(isIRAtom(s2->Ist.PutI.data));
+ relation
+ = getAliasingRelation_II(
+ pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias,
+ s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias
+ );
+ goto have_relation;
+
+ case Ist_WrTmp:
+ if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
+ relation
+ = getAliasingRelation_II(
+ pi->Ist.PutI.descr, pi->Ist.PutI.ix,
+ pi->Ist.PutI.bias,
+ s2->Ist.WrTmp.data->Iex.GetI.descr,
+ s2->Ist.WrTmp.data->Iex.GetI.ix,
+ s2->Ist.WrTmp.data->Iex.GetI.bias
+ );
+ goto have_relation;
+ }
+ if (s2->Ist.WrTmp.data->tag == Iex_Get) {
+ relation
+ = getAliasingRelation_IC(
+ pi->Ist.PutI.descr, pi->Ist.PutI.ix,
+ s2->Ist.WrTmp.data->Iex.Get.offset,
+ s2->Ist.WrTmp.data->Iex.Get.ty
+ );
+ goto have_relation;
+ }
+ return False;
+
+ case Ist_Store:
+ vassert(isIRAtom(s2->Ist.Store.addr));
+ vassert(isIRAtom(s2->Ist.Store.data));
+ return False;
+
+ default:
+ vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
+ vpanic("guestAccessWhichMightOverlapPutI");
+ }
+
+ have_relation:
+ if (relation == NoAlias)
+ return False;
+ else
+ return True; /* ExactAlias or UnknownAlias */
+}
+
+
+
+/* ---------- PutI/GetI transformations main functions --------- */
+
+/* Remove redundant GetIs, to the extent that they can be detected.
+ bb is modified in-place. */
+
+static
+void do_redundant_GetI_elimination ( IRSB* bb )
+{
+ Int i;
+ IRStmt* st;
+
+ for (i = bb->stmts_used-1; i >= 0; i--) {
+ st = bb->stmts[i];
+ if (st->tag == Ist_NoOp)
+ continue;
+
+ if (st->tag == Ist_WrTmp
+ && st->Ist.WrTmp.data->tag == Iex_GetI
+ && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
+ IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
+ IRExpr* ix = st->Ist.WrTmp.data->Iex.GetI.ix;
+ Int bias = st->Ist.WrTmp.data->Iex.GetI.bias;
+ IRExpr* replacement = findPutI(bb, i-1, descr, ix, bias);
+ if (replacement
+ && isIRAtom(replacement)
+ /* Make sure we're doing a type-safe transformation! */
+ && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
+ if (DEBUG_IROPT) {
+ vex_printf("rGI: ");
+ ppIRExpr(st->Ist.WrTmp.data);
+ vex_printf(" -> ");
+ ppIRExpr(replacement);
+ vex_printf("\n");
+ }
+ bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
+ }
+ }
+ }
+
+}
+
+
+/* Remove redundant PutIs, to the extent which they can be detected.
+ bb is modified in-place. */
+
+static
+void do_redundant_PutI_elimination ( IRSB* bb )
+{
+ Int i, j;
+ Bool delete;
+ IRStmt *st, *stj;
+
+ for (i = 0; i < bb->stmts_used; i++) {
+ st = bb->stmts[i];
+ if (st->tag != Ist_PutI)
+ continue;
+ /* Ok, search forwards from here to see if we can find another
+ PutI which makes this one redundant, and dodging various
+ hazards. Search forwards:
+ * If conditional exit, give up (because anything after that
+ does not postdominate this put).
+ * If a Get which might overlap, give up (because this PutI
+ not necessarily dead).
+ * If a Put which is identical, stop with success.
+ * If a Put which might overlap, but is not identical, give up.
+ * If a dirty helper call which might write guest state, give up.
+ * If a Put which definitely doesn't overlap, or any other
+ kind of stmt, continue.
+ */
+ delete = False;
+ for (j = i+1; j < bb->stmts_used; j++) {
+ stj = bb->stmts[j];
+ if (stj->tag == Ist_NoOp)
+ continue;
+ if (identicalPutIs(st, stj)) {
+ /* success! */
+ delete = True;
+ break;
+ }
+ if (stj->tag == Ist_Exit)
+ /* give up */
+ break;
+ if (st->tag == Ist_Dirty)
+ /* give up; could do better here */
+ break;
+ if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
+ /* give up */
+ break;
+ }
+
+ if (delete) {
+ if (DEBUG_IROPT) {
+ vex_printf("rPI: ");
+ ppIRStmt(st);
+ vex_printf("\n");
+ }
+ bb->stmts[i] = IRStmt_NoOp();
+ }
+
+ }
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Loop unrolling ---*/
+/*---------------------------------------------------------------*/
+
+/* Adjust all tmp values (names) in e by delta. e is destructively
+ modified. */
+
+static void deltaIRExpr ( IRExpr* e, Int delta )
+{
+ Int i;
+ switch (e->tag) {
+ case Iex_RdTmp:
+ e->Iex.RdTmp.tmp += delta;
+ break;
+ case Iex_Get:
+ case Iex_Const:
+ break;
+ case Iex_GetI:
+ deltaIRExpr(e->Iex.GetI.ix, delta);
+ break;
+ case Iex_Qop:
+ deltaIRExpr(e->Iex.Qop.arg1, delta);
+ deltaIRExpr(e->Iex.Qop.arg2, delta);
+ deltaIRExpr(e->Iex.Qop.arg3, delta);
+ deltaIRExpr(e->Iex.Qop.arg4, delta);
+ break;
+ case Iex_Triop:
+ deltaIRExpr(e->Iex.Triop.arg1, delta);
+ deltaIRExpr(e->Iex.Triop.arg2, delta);
+ deltaIRExpr(e->Iex.Triop.arg3, delta);
+ break;
+ case Iex_Binop:
+ deltaIRExpr(e->Iex.Binop.arg1, delta);
+ deltaIRExpr(e->Iex.Binop.arg2, delta);
+ break;
+ case Iex_Unop:
+ deltaIRExpr(e->Iex.Unop.arg, delta);
+ break;
+ case Iex_Load:
+ deltaIRExpr(e->Iex.Load.addr, delta);
+ break;
+ case Iex_CCall:
+ for (i = 0; e->Iex.CCall.args[i]; i++)
+ deltaIRExpr(e->Iex.CCall.args[i], delta);
+ break;
+ case Iex_Mux0X:
+ deltaIRExpr(e->Iex.Mux0X.cond, delta);
+ deltaIRExpr(e->Iex.Mux0X.expr0, delta);
+ deltaIRExpr(e->Iex.Mux0X.exprX, delta);
+ break;
+ default:
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+ vpanic("deltaIRExpr");
+ }
+}
+
+/* Adjust all tmp values (names) in st by delta. st is destructively
+ modified. */
+
+static void deltaIRStmt ( IRStmt* st, Int delta )
+{
+ Int i;
+ IRDirty* d;
+ switch (st->tag) {
+ case Ist_NoOp:
+ case Ist_IMark:
+ case Ist_MBE:
+ break;
+ case Ist_AbiHint:
+ deltaIRExpr(st->Ist.AbiHint.base, delta);
+ deltaIRExpr(st->Ist.AbiHint.nia, delta);
+ break;
+ case Ist_Put:
+ deltaIRExpr(st->Ist.Put.data, delta);
+ break;
+ case Ist_PutI:
+ deltaIRExpr(st->Ist.PutI.ix, delta);
+ deltaIRExpr(st->Ist.PutI.data, delta);
+ break;
+ case Ist_WrTmp:
+ st->Ist.WrTmp.tmp += delta;
+ deltaIRExpr(st->Ist.WrTmp.data, delta);
+ break;
+ case Ist_Exit:
+ deltaIRExpr(st->Ist.Exit.guard, delta);
+ break;
+ case Ist_Store:
+ deltaIRExpr(st->Ist.Store.addr, delta);
+ deltaIRExpr(st->Ist.Store.data, delta);
+ break;
+ case Ist_CAS:
+ if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
+ st->Ist.CAS.details->oldHi += delta;
+ st->Ist.CAS.details->oldLo += delta;
+ deltaIRExpr(st->Ist.CAS.details->addr, delta);
+ if (st->Ist.CAS.details->expdHi)
+ deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
+ deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
+ if (st->Ist.CAS.details->dataHi)
+ deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
+ deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
+ break;
+ case Ist_LLSC:
+ st->Ist.LLSC.result += delta;
+ deltaIRExpr(st->Ist.LLSC.addr, delta);
+ if (st->Ist.LLSC.storedata)
+ deltaIRExpr(st->Ist.LLSC.storedata, delta);
+ break;
+ case Ist_Dirty:
+ d = st->Ist.Dirty.details;
+ deltaIRExpr(d->guard, delta);
+ for (i = 0; d->args[i]; i++)
+ deltaIRExpr(d->args[i], delta);
+ if (d->tmp != IRTemp_INVALID)
+ d->tmp += delta;
+ if (d->mAddr)
+ deltaIRExpr(d->mAddr, delta);
+ break;
+ default:
+ vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
+ vpanic("deltaIRStmt");
+ }
+}
+
+
+/* If possible, return a loop-unrolled version of bb0. The original
+ is changed. If not possible, return NULL. */
+
+/* The two schemas considered are:
+
+ X: BODY; goto X
+
+ which unrolls to (eg) X: BODY;BODY; goto X
+
+ and
+
+ X: BODY; if (c) goto X; goto Y
+ which trivially transforms to
+ X: BODY; if (!c) goto Y; goto X;
+ so it falls in the scope of the first case.
+
+ X and Y must be literal (guest) addresses.
+*/
+
+static Int calc_unroll_factor( IRSB* bb )
+{
+ Int n_stmts, i;
+
+ n_stmts = 0;
+ for (i = 0; i < bb->stmts_used; i++) {
+ if (bb->stmts[i]->tag != Ist_NoOp)
+ n_stmts++;
+ }
+
+ if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
+ if (vex_control.iropt_verbosity > 0)
+ vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
+ n_stmts, 8* n_stmts);
+ return 8;
+ }
+ if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
+ if (vex_control.iropt_verbosity > 0)
+ vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
+ n_stmts, 4* n_stmts);
+ return 4;
+ }
+
+ if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
+ if (vex_control.iropt_verbosity > 0)
+ vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
+ n_stmts, 2* n_stmts);
+ return 2;
+ }
+
+ if (vex_control.iropt_verbosity > 0)
+ vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
+
+ return 1;
+}
+
+
+static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr64 my_addr )
+{
+ Int i, j, jmax, n_vars;
+ Bool xxx_known;
+ Addr64 xxx_value, yyy_value;
+ IRExpr* udst;
+ IRStmt* st;
+ IRConst* con;
+ IRSB *bb1, *bb2;
+ Int unroll_factor;
+
+ if (vex_control.iropt_unroll_thresh <= 0)
+ return NULL;
+
+ /* First off, figure out if we can unroll this loop. Do this
+ without modifying bb0. */
+
+ if (bb0->jumpkind != Ijk_Boring)
+ return NULL;
+
+ xxx_known = False;
+ xxx_value = 0;
+
+ /* Extract the next-guest address. If it isn't a literal, we
+ have to give up. */
+
+ udst = bb0->next;
+ if (udst->tag == Iex_Const
+ && (udst->Iex.Const.con->tag == Ico_U32
+ || udst->Iex.Const.con->tag == Ico_U64)) {
+ /* The BB ends in a jump to a literal location. */
+ xxx_known = True;
+ xxx_value = udst->Iex.Const.con->tag == Ico_U64
+ ? udst->Iex.Const.con->Ico.U64
+ : (Addr64)(udst->Iex.Const.con->Ico.U32);
+ }
+
+ if (!xxx_known)
+ return NULL;
+
+ /* Now we know the BB ends to a jump to a literal location. If
+ it's a jump to itself (viz, idiom #1), move directly to the
+ unrolling stage, first cloning the bb so the original isn't
+ modified. */
+ if (xxx_value == my_addr) {
+ unroll_factor = calc_unroll_factor( bb0 );
+ if (unroll_factor < 2)
+ return NULL;
+ bb1 = deepCopyIRSB( bb0 );
+ bb0 = NULL;
+ udst = NULL; /* is now invalid */
+ goto do_unroll;
+ }
+
+ /* Search for the second idiomatic form:
+ X: BODY; if (c) goto X; goto Y
+ We know Y, but need to establish that the last stmt
+ is 'if (c) goto X'.
+ */
+ yyy_value = xxx_value;
+ for (i = bb0->stmts_used-1; i >= 0; i--)
+ if (bb0->stmts[i])
+ break;
+
+ if (i < 0)
+ return NULL; /* block with no stmts. Strange. */
+
+ st = bb0->stmts[i];
+ if (st->tag != Ist_Exit)
+ return NULL;
+ if (st->Ist.Exit.jk != Ijk_Boring)
+ return NULL;
+
+ con = st->Ist.Exit.dst;
+ vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
+
+ xxx_value = con->tag == Ico_U64
+ ? st->Ist.Exit.dst->Ico.U64
+ : (Addr64)(st->Ist.Exit.dst->Ico.U32);
+
+ /* If this assertion fails, we have some kind of type error. */
+ vassert(con->tag == udst->Iex.Const.con->tag);
+
+ if (xxx_value != my_addr)
+ /* We didn't find either idiom. Give up. */
+ return NULL;
+
+ /* Ok, we found idiom #2. Copy the BB, switch around the xxx and
+ yyy values (which makes it look like idiom #1), and go into
+ unrolling proper. This means finding (again) the last stmt, in
+ the copied BB. */
+
+ unroll_factor = calc_unroll_factor( bb0 );
+ if (unroll_factor < 2)
+ return NULL;
+
+ bb1 = deepCopyIRSB( bb0 );
+ bb0 = NULL;
+ udst = NULL; /* is now invalid */
+ for (i = bb1->stmts_used-1; i >= 0; i--)
+ if (bb1->stmts[i])
+ break;
+
+ /* The next bunch of assertions should be true since we already
+ found and checked the last stmt in the original bb. */
+
+ vassert(i >= 0);
+
+ st = bb1->stmts[i];
+ vassert(st->tag == Ist_Exit);
+
+ con = st->Ist.Exit.dst;
+ vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
+
+ udst = bb1->next;
+ vassert(udst->tag == Iex_Const);
+ vassert(udst->Iex.Const.con->tag == Ico_U32
+ || udst->Iex.Const.con->tag == Ico_U64);
+ vassert(con->tag == udst->Iex.Const.con->tag);
+
+ /* switch the xxx and yyy fields around */
+ if (con->tag == Ico_U64) {
+ udst->Iex.Const.con->Ico.U64 = xxx_value;
+ con->Ico.U64 = yyy_value;
+ } else {
+ udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
+ con->Ico.U32 = (UInt)yyy_value;
+ }
+
+ /* negate the test condition */
+ st->Ist.Exit.guard
+ = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
+
+ /* --- The unroller proper. Both idioms are by now --- */
+ /* --- now converted to idiom 1. --- */
+
+ do_unroll:
+
+ vassert(unroll_factor == 2
+ || unroll_factor == 4
+ || unroll_factor == 8);
+
+ jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
+ for (j = 1; j <= jmax; j++) {
+
+ n_vars = bb1->tyenv->types_used;
+
+ bb2 = deepCopyIRSB(bb1);
+ for (i = 0; i < n_vars; i++)
+ (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
+
+ for (i = 0; i < bb2->stmts_used; i++) {
+ /* deltaIRStmt destructively modifies the stmt, but
+ that's OK since bb2 is a complete fresh copy of bb1. */
+ deltaIRStmt(bb2->stmts[i], n_vars);
+ addStmtToIRSB(bb1, bb2->stmts[i]);
+ }
+ }
+
+ if (DEBUG_IROPT) {
+ vex_printf("\nUNROLLED (%llx)\n", my_addr);
+ ppIRSB(bb1);
+ vex_printf("\n");
+ }
+
+ /* Flattening; sigh. The unroller succeeds in breaking flatness
+ by negating the test condition. This should be fixed properly.
+ For the moment use this shotgun approach. */
+ return flatten_BB(bb1);
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- The tree builder ---*/
+/*---------------------------------------------------------------*/
+
+/* This isn't part of IR optimisation. Really it's a pass done prior
+ to instruction selection, which improves the code that the
+ instruction selector can produce. */
+
+/* --- The 'tmp' environment is the central data structure here --- */
+
+/* The number of outstanding bindings we're prepared to track.
+ The number of times the env becomes full and we have to dump
+ the oldest binding (hence reducing code quality) falls very
+ rapidly as the env size increases. 8 gives reasonable performance
+ under most circumstances. */
+#define A_NENV 10
+
+/* bindee == NULL === slot is not in use
+ bindee != NULL === slot is in use
+*/
+typedef
+ struct {
+ IRTemp binder;
+ IRExpr* bindee;
+ Bool doesLoad;
+ Bool doesGet;
+ }
+ ATmpInfo;
+
+__attribute__((unused))
+static void ppAEnv ( ATmpInfo* env )
+{
+ Int i;
+ for (i = 0; i < A_NENV; i++) {
+ vex_printf("%d tmp %d val ", i, (Int)env[i].binder);
+ if (env[i].bindee)
+ ppIRExpr(env[i].bindee);
+ else
+ vex_printf("(null)");
+ vex_printf("\n");
+ }
+}
+
+/* --- Tree-traversal fns --- */
+
+/* Traverse an expr, and detect if any part of it reads memory or does
+ a Get. Be careful ... this really controls how much the
+ tree-builder can reorder the code, so getting it right is critical.
+*/
+static void setHints_Expr (Bool* doesLoad, Bool* doesGet, IRExpr* e )
+{
+ Int i;
+ switch (e->tag) {
+ case Iex_CCall:
+ for (i = 0; e->Iex.CCall.args[i]; i++)
+ setHints_Expr(doesLoad, doesGet, e->Iex.CCall.args[i]);
+ return;
+ case Iex_Mux0X:
+ setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.cond);
+ setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.expr0);
+ setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.exprX);
+ return;
+ case Iex_Qop:
+ setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg1);
+ setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg2);
+ setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg3);
+ setHints_Expr(doesLoad, doesGet, e->Iex.Qop.arg4);
+ return;
+ case Iex_Triop:
+ setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg1);
+ setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg2);
+ setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg3);
+ return;
+ case Iex_Binop:
+ setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg1);
+ setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg2);
+ return;
+ case Iex_Unop:
+ setHints_Expr(doesLoad, doesGet, e->Iex.Unop.arg);
+ return;
+ case Iex_Load:
+ *doesLoad = True;
+ setHints_Expr(doesLoad, doesGet, e->Iex.Load.addr);
+ return;
+ case Iex_Get:
+ *doesGet = True;
+ return;
+ case Iex_GetI:
+ *doesGet = True;
+ setHints_Expr(doesLoad, doesGet, e->Iex.GetI.ix);
+ return;
+ case Iex_RdTmp:
+ case Iex_Const:
+ return;
+ default:
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+ vpanic("setHints_Expr");
+ }
+}
+
+
+/* Add a binding to the front of the env and slide all the rest
+ backwards. It should be the case that the last slot is free. */
+static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
+{
+ Int i;
+ vassert(env[A_NENV-1].bindee == NULL);
+ for (i = A_NENV-1; i >= 1; i--)
+ env[i] = env[i-1];
+ env[0].binder = binder;
+ env[0].bindee = bindee;
+ env[0].doesLoad = False; /* filled in later */
+ env[0].doesGet = False; /* filled in later */
+}
+
+/* Given uses :: array of UShort, indexed by IRTemp
+ Add the use-occurrences of temps in this expression
+ to the env.
+*/
+static void aoccCount_Expr ( UShort* uses, IRExpr* e )
+{
+ Int i;
+
+ switch (e->tag) {
+
+ case Iex_RdTmp: /* the only interesting case */
+ uses[e->Iex.RdTmp.tmp]++;
+ return;
+
+ case Iex_Mux0X:
+ aoccCount_Expr(uses, e->Iex.Mux0X.cond);
+ aoccCount_Expr(uses, e->Iex.Mux0X.expr0);
+ aoccCount_Expr(uses, e->Iex.Mux0X.exprX);
+ return;
+
+ case Iex_Qop:
+ aoccCount_Expr(uses, e->Iex.Qop.arg1);
+ aoccCount_Expr(uses, e->Iex.Qop.arg2);
+ aoccCount_Expr(uses, e->Iex.Qop.arg3);
+ aoccCount_Expr(uses, e->Iex.Qop.arg4);
+ return;
+
+ case Iex_Triop:
+ aoccCount_Expr(uses, e->Iex.Triop.arg1);
+ aoccCount_Expr(uses, e->Iex.Triop.arg2);
+ aoccCount_Expr(uses, e->Iex.Triop.arg3);
+ return;
+
+ case Iex_Binop:
+ aoccCount_Expr(uses, e->Iex.Binop.arg1);
+ aoccCount_Expr(uses, e->Iex.Binop.arg2);
+ return;
+
+ case Iex_Unop:
+ aoccCount_Expr(uses, e->Iex.Unop.arg);
+ return;
+
+ case Iex_Load:
+ aoccCount_Expr(uses, e->Iex.Load.addr);
+ return;
+
+ case Iex_CCall:
+ for (i = 0; e->Iex.CCall.args[i]; i++)
+ aoccCount_Expr(uses, e->Iex.CCall.args[i]);
+ return;
+
+ case Iex_GetI:
+ aoccCount_Expr(uses, e->Iex.GetI.ix);
+ return;
+
+ case Iex_Const:
+ case Iex_Get:
+ return;
+
+ default:
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+ vpanic("aoccCount_Expr");
+ }
+}
+
+
+/* Given uses :: array of UShort, indexed by IRTemp
+ Add the use-occurrences of temps in this statement
+ to the env.
+*/
+static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
+{
+ Int i;
+ IRDirty* d;
+ IRCAS* cas;
+ switch (st->tag) {
+ case Ist_AbiHint:
+ aoccCount_Expr(uses, st->Ist.AbiHint.base);
+ aoccCount_Expr(uses, st->Ist.AbiHint.nia);
+ return;
+ case Ist_WrTmp:
+ aoccCount_Expr(uses, st->Ist.WrTmp.data);
+ return;
+ case Ist_Put:
+ aoccCount_Expr(uses, st->Ist.Put.data);
+ return;
+ case Ist_PutI:
+ aoccCount_Expr(uses, st->Ist.PutI.ix);
+ aoccCount_Expr(uses, st->Ist.PutI.data);
+ return;
+ case Ist_Store:
+ aoccCount_Expr(uses, st->Ist.Store.addr);
+ aoccCount_Expr(uses, st->Ist.Store.data);
+ return;
+ case Ist_CAS:
+ cas = st->Ist.CAS.details;
+ aoccCount_Expr(uses, cas->addr);
+ if (cas->expdHi)
+ aoccCount_Expr(uses, cas->expdHi);
+ aoccCount_Expr(uses, cas->expdLo);
+ if (cas->dataHi)
+ aoccCount_Expr(uses, cas->dataHi);
+ aoccCount_Expr(uses, cas->dataLo);
+ return;
+ case Ist_LLSC:
+ aoccCount_Expr(uses, st->Ist.LLSC.addr);
+ if (st->Ist.LLSC.storedata)
+ aoccCount_Expr(uses, st->Ist.LLSC.storedata);
+ return;
+ case Ist_Dirty:
+ d = st->Ist.Dirty.details;
+ if (d->mFx != Ifx_None)
+ aoccCount_Expr(uses, d->mAddr);
+ aoccCount_Expr(uses, d->guard);
+ for (i = 0; d->args[i]; i++)
+ aoccCount_Expr(uses, d->args[i]);
+ return;
+ case Ist_NoOp:
+ case Ist_IMark:
+ case Ist_MBE:
+ return;
+ case Ist_Exit:
+ aoccCount_Expr(uses, st->Ist.Exit.guard);
+ return;
+ default:
+ vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
+ vpanic("aoccCount_Stmt");
+ }
+}
+
+/* Look up a binding for tmp in the env. If found, return the bound
+ expression, and set the env's binding to NULL so it is marked as
+ used. If not found, return NULL. */
+
+static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
+{
+ Int i;
+ for (i = 0; i < A_NENV; i++) {
+ if (env[i].binder == tmp && env[i].bindee != NULL) {
+ IRExpr* bindee = env[i].bindee;
+ env[i].bindee = NULL;
+ return bindee;
+ }
+ }
+ return NULL;
+}
+
+/* Traverse e, looking for temps. For each observed temp, see if env
+ contains a binding for the temp, and if so return the bound value.
+ The env has the property that any binding it holds is
+ 'single-shot', so once a binding is used, it is marked as no longer
+ available, by setting its .bindee field to NULL. */
+
+static inline Bool is_Unop ( IRExpr* e, IROp op ) {
+ return e->tag == Iex_Unop && e->Iex.Unop.op == op;
+}
+static inline Bool is_Binop ( IRExpr* e, IROp op ) {
+ return e->tag == Iex_Binop && e->Iex.Binop.op == op;
+}
+
+static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
+{
+ switch (op) {
+ case Iop_Or32:
+ /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) ) */
+ if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
+ return IRExpr_Unop( Iop_CmpwNEZ32,
+ IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
+ a2->Iex.Unop.arg ) );
+ break;
+ default:
+ break;
+ }
+ /* no reduction rule applies */
+ return IRExpr_Binop( op, a1, a2 );
+}
+
+static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
+{
+ switch (op) {
+ case Iop_CmpwNEZ64:
+ /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
+ if (is_Binop(aa, Iop_Or64)
+ && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
+ return fold_IRExpr_Unop(
+ Iop_CmpwNEZ64,
+ IRExpr_Binop(Iop_Or64,
+ aa->Iex.Binop.arg1->Iex.Unop.arg,
+ aa->Iex.Binop.arg2));
+ /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
+ if (is_Binop(aa, Iop_Or64)
+ && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
+ return fold_IRExpr_Unop(
+ Iop_CmpwNEZ64,
+ IRExpr_Binop(Iop_Or64,
+ aa->Iex.Binop.arg1,
+ aa->Iex.Binop.arg2->Iex.Unop.arg));
+ break;
+ case Iop_CmpNEZ64:
+ /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
+ if (is_Unop(aa, Iop_Left64))
+ return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
+ break;
+ case Iop_CmpwNEZ32:
+ /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
+ if (is_Unop(aa, Iop_CmpwNEZ32))
+ return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
+ break;
+ case Iop_CmpNEZ32:
+ /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
+ if (is_Unop(aa, Iop_Left32))
+ return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
+ break;
+ case Iop_Left32:
+ /* Left32( Left32(x) ) --> Left32(x) */
+ if (is_Unop(aa, Iop_Left32))
+ return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
+ break;
+ case Iop_32to1:
+ /* 32to1( 1Uto32 ( x ) ) --> x */
+ if (is_Unop(aa, Iop_1Uto32))
+ return aa->Iex.Unop.arg;
+ /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
+ if (is_Unop(aa, Iop_CmpwNEZ32))
+ return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
+ break;
+ case Iop_64to1:
+ /* 64to1( 1Uto64 ( x ) ) --> x */
+ if (is_Unop(aa, Iop_1Uto64))
+ return aa->Iex.Unop.arg;
+ /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
+ if (is_Unop(aa, Iop_CmpwNEZ64))
+ return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
+ break;
+ case Iop_64to32:
+ /* 64to32( 32Uto64 ( x )) --> x */
+ if (is_Unop(aa, Iop_32Uto64))
+ return aa->Iex.Unop.arg;
+ /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
+ if (is_Unop(aa, Iop_8Uto64))
+ return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
+ break;
+
+ case Iop_32Uto64:
+ /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
+ if (is_Unop(aa, Iop_8Uto32))
+ return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
+ /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
+ if (is_Unop(aa, Iop_16Uto32))
+ return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
+ break;
+
+ case Iop_1Sto32:
+ /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
+ if (is_Unop(aa, Iop_CmpNEZ8)
+ && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
+ && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
+ && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
+ Iop_CmpNEZ32)) {
+ return IRExpr_Unop( Iop_CmpwNEZ32,
+ aa->Iex.Unop.arg->Iex.Unop.arg
+ ->Iex.Unop.arg->Iex.Unop.arg);
+ }
+ break;
+
+
+ default:
+ break;
+ }
+ /* no reduction rule applies */
+ return IRExpr_Unop( op, aa );
+}
+
+static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
+{
+ IRExpr* e2;
+ IRExpr** args2;
+ Int i;
+
+ switch (e->tag) {
+
+ case Iex_CCall:
+ args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
+ for (i = 0; args2[i]; i++)
+ args2[i] = atbSubst_Expr(env,args2[i]);
+ return IRExpr_CCall(
+ e->Iex.CCall.cee,
+ e->Iex.CCall.retty,
+ args2
+ );
+ case Iex_RdTmp:
+ e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
+ return e2 ? e2 : e;
+ case Iex_Mux0X:
+ return IRExpr_Mux0X(
+ atbSubst_Expr(env, e->Iex.Mux0X.cond),
+ atbSubst_Expr(env, e->Iex.Mux0X.expr0),
+ atbSubst_Expr(env, e->Iex.Mux0X.exprX)
+ );
+ case Iex_Qop:
+ return IRExpr_Qop(
+ e->Iex.Qop.op,
+ atbSubst_Expr(env, e->Iex.Qop.arg1),
+ atbSubst_Expr(env, e->Iex.Qop.arg2),
+ atbSubst_Expr(env, e->Iex.Qop.arg3),
+ atbSubst_Expr(env, e->Iex.Qop.arg4)
+ );
+ case Iex_Triop:
+ return IRExpr_Triop(
+ e->Iex.Triop.op,
+ atbSubst_Expr(env, e->Iex.Triop.arg1),
+ atbSubst_Expr(env, e->Iex.Triop.arg2),
+ atbSubst_Expr(env, e->Iex.Triop.arg3)
+ );
+ case Iex_Binop:
+ return fold_IRExpr_Binop(
+ e->Iex.Binop.op,
+ atbSubst_Expr(env, e->Iex.Binop.arg1),
+ atbSubst_Expr(env, e->Iex.Binop.arg2)
+ );
+ case Iex_Unop:
+ return fold_IRExpr_Unop(
+ e->Iex.Unop.op,
+ atbSubst_Expr(env, e->Iex.Unop.arg)
+ );
+ case Iex_Load:
+ return IRExpr_Load(
+ e->Iex.Load.end,
+ e->Iex.Load.ty,
+ atbSubst_Expr(env, e->Iex.Load.addr)
+ );
+ case Iex_GetI:
+ return IRExpr_GetI(
+ e->Iex.GetI.descr,
+ atbSubst_Expr(env, e->Iex.GetI.ix),
+ e->Iex.GetI.bias
+ );
+ case Iex_Const:
+ case Iex_Get:
+ return e;
+ default:
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+ vpanic("atbSubst_Expr");
+ }
+}
+
+/* Same deal as atbSubst_Expr, except for stmts. */
+
+static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
+{
+ Int i;
+ IRDirty *d, *d2;
+ IRCAS *cas, *cas2;
+ switch (st->tag) {
+ case Ist_AbiHint:
+ return IRStmt_AbiHint(
+ atbSubst_Expr(env, st->Ist.AbiHint.base),
+ st->Ist.AbiHint.len,
+ atbSubst_Expr(env, st->Ist.AbiHint.nia)
+ );
+ case Ist_Store:
+ return IRStmt_Store(
+ st->Ist.Store.end,
+ atbSubst_Expr(env, st->Ist.Store.addr),
+ atbSubst_Expr(env, st->Ist.Store.data)
+ );
+ case Ist_WrTmp:
+ return IRStmt_WrTmp(
+ st->Ist.WrTmp.tmp,
+ atbSubst_Expr(env, st->Ist.WrTmp.data)
+ );
+ case Ist_Put:
+ return IRStmt_Put(
+ st->Ist.Put.offset,
+ atbSubst_Expr(env, st->Ist.Put.data)
+ );
+ case Ist_PutI:
+ return IRStmt_PutI(
+ st->Ist.PutI.descr,
+ atbSubst_Expr(env, st->Ist.PutI.ix),
+ st->Ist.PutI.bias,
+ atbSubst_Expr(env, st->Ist.PutI.data)
+ );
+
+ case Ist_Exit:
+ return IRStmt_Exit(
+ atbSubst_Expr(env, st->Ist.Exit.guard),
+ st->Ist.Exit.jk,
+ st->Ist.Exit.dst
+ );
+ case Ist_IMark:
+ return IRStmt_IMark(st->Ist.IMark.addr, st->Ist.IMark.len);
+ case Ist_NoOp:
+ return IRStmt_NoOp();
+ case Ist_MBE:
+ return IRStmt_MBE(st->Ist.MBE.event);
+ case Ist_CAS:
+ cas = st->Ist.CAS.details;
+ cas2 = mkIRCAS(
+ cas->oldHi, cas->oldLo, cas->end,
+ atbSubst_Expr(env, cas->addr),
+ cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
+ atbSubst_Expr(env, cas->expdLo),
+ cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
+ atbSubst_Expr(env, cas->dataLo)
+ );
+ return IRStmt_CAS(cas2);
+ case Ist_LLSC:
+ return IRStmt_LLSC(
+ st->Ist.LLSC.end,
+ st->Ist.LLSC.result,
+ atbSubst_Expr(env, st->Ist.LLSC.addr),
+ st->Ist.LLSC.storedata
+ ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
+ );
+ case Ist_Dirty:
+ d = st->Ist.Dirty.details;
+ d2 = emptyIRDirty();
+ *d2 = *d;
+ if (d2->mFx != Ifx_None)
+ d2->mAddr = atbSubst_Expr(env, d2->mAddr);
+ d2->guard = atbSubst_Expr(env, d2->guard);
+ for (i = 0; d2->args[i]; i++)
+ d2->args[i] = atbSubst_Expr(env, d2->args[i]);
+ return IRStmt_Dirty(d2);
+ default:
+ vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
+ vpanic("atbSubst_Stmt");
+ }
+}
+
+/* notstatic */ void ado_treebuild_BB ( IRSB* bb )
+{
+ Int i, j, k, m;
+ Bool stmtPuts, stmtStores, invalidateMe;
+ IRStmt* st;
+ IRStmt* st2;
+ ATmpInfo env[A_NENV];
+
+ Int n_tmps = bb->tyenv->types_used;
+ UShort* uses = LibVEX_Alloc(n_tmps * sizeof(UShort));
+
+ /* Phase 1. Scan forwards in bb, counting use occurrences of each
+ temp. Also count occurrences in the bb->next field. */
+
+ for (i = 0; i < n_tmps; i++)
+ uses[i] = 0;
+
+ for (i = 0; i < bb->stmts_used; i++) {
+ st = bb->stmts[i];
+ if (st->tag == Ist_NoOp)
+ continue;
+ aoccCount_Stmt( uses, st );
+ }
+ aoccCount_Expr(uses, bb->next );
+
+# if 0
+ for (i = 0; i < n_tmps; i++) {
+ if (uses[i] == 0)
+ continue;
+ ppIRTemp( (IRTemp)i );
+ vex_printf(" used %d\n", (Int)uses[i] );
+ }
+# endif
+
+ /* Phase 2. Scan forwards in bb. For each statement in turn:
+
+ If the env is full, emit the end element. This guarantees
+ there is at least one free slot in the following.
+
+ On seeing 't = E', occ(t)==1,
+ let E'=env(E)
+ delete this stmt
+ add t -> E' to the front of the env
+ Examine E' and set the hints for E' appropriately
+ (doesLoad? doesGet?)
+
+ On seeing any other stmt,
+ let stmt' = env(stmt)
+ remove from env any 't=E' binds invalidated by stmt
+ emit the invalidated stmts
+ emit stmt'
+ compact any holes in env
+ by sliding entries towards the front
+
+ Finally, apply env to bb->next.
+ */
+
+ for (i = 0; i < A_NENV; i++) {
+ env[i].bindee = NULL;
+ env[i].binder = IRTemp_INVALID;
+ }
+
+ /* The stmts in bb are being reordered, and we are guaranteed to
+ end up with no more than the number we started with. Use i to
+ be the cursor of the current stmt examined and j <= i to be that
+ for the current stmt being written.
+ */
+ j = 0;
+ for (i = 0; i < bb->stmts_used; i++) {
+
+ st = bb->stmts[i];
+ if (st->tag == Ist_NoOp)
+ continue;
+
+ /* Ensure there's at least one space in the env, by emitting
+ the oldest binding if necessary. */
+ if (env[A_NENV-1].bindee != NULL) {
+ bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
+ env[A_NENV-1].bindee );
+ j++;
+ vassert(j <= i);
+ env[A_NENV-1].bindee = NULL;
+ }
+
+ /* Consider current stmt. */
+ if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
+ IRExpr *e, *e2;
+
+ /* optional extra: dump dead bindings as we find them.
+ Removes the need for a prior dead-code removal pass. */
+ if (uses[st->Ist.WrTmp.tmp] == 0) {
+ if (0) vex_printf("DEAD binding\n");
+ continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
+ }
+ vassert(uses[st->Ist.WrTmp.tmp] == 1);
+
+ /* ok, we have 't = E', occ(t)==1. Do the abovementioned
+ actions. */
+ e = st->Ist.WrTmp.data;
+ e2 = atbSubst_Expr(env, e);
+ addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
+ setHints_Expr(&env[0].doesLoad, &env[0].doesGet, e2);
+ /* don't advance j, as we are deleting this stmt and instead
+ holding it temporarily in the env. */
+ continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
+ }
+
+ /* we get here for any other kind of statement. */
+ /* 'use up' any bindings required by the current statement. */
+ st2 = atbSubst_Stmt(env, st);
+
+ /* Now, before this stmt, dump any bindings in env that it
+ invalidates. These need to be dumped in the order in which
+ they originally entered env -- that means from oldest to
+ youngest. */
+
+ /* stmtPuts/stmtStores characterise what the stmt under
+ consideration does, or might do (sidely safe @ True). */
+ stmtPuts
+ = toBool( st->tag == Ist_Put
+ || st->tag == Ist_PutI
+ || st->tag == Ist_Dirty );
+
+ /* be True if this stmt writes memory or might do (==> we don't
+ want to reorder other loads or stores relative to it). Also,
+ both LL and SC fall under this classification, since we
+ really ought to be conservative and not reorder any other
+ memory transactions relative to them. */
+ stmtStores
+ = toBool( st->tag == Ist_Store
+ || st->tag == Ist_Dirty
+ || st->tag == Ist_LLSC );
+
+ for (k = A_NENV-1; k >= 0; k--) {
+ if (env[k].bindee == NULL)
+ continue;
+ /* Compare the actions of this stmt with the actions of
+ binding 'k', to see if they invalidate the binding. */
+ invalidateMe
+ = toBool(
+ /* a store invalidates loaded data */
+ (env[k].doesLoad && stmtStores)
+ /* a put invalidates get'd data */
+ || (env[k].doesGet && stmtPuts)
+ /* a put invalidates loaded data. Note, we could do
+ much better here in the sense that we only need to
+ invalidate trees containing loads if the Put in
+ question is marked as requiring precise
+ exceptions. */
+ || (env[k].doesLoad && stmtPuts)
+ /* probably overly conservative: a memory bus event
+ invalidates absolutely everything, so that all
+ computation prior to it is forced to complete before
+ proceeding with the event (fence,lock,unlock). */
+ || st->tag == Ist_MBE
+ /* also be (probably overly) paranoid re AbiHints */
+ || st->tag == Ist_AbiHint
+ );
+ if (invalidateMe) {
+ bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
+ j++;
+ vassert(j <= i);
+ env[k].bindee = NULL;
+ }
+ }
+
+ /* Slide in-use entries in env up to the front */
+ m = 0;
+ for (k = 0; k < A_NENV; k++) {
+ if (env[k].bindee != NULL) {
+ env[m] = env[k];
+ m++;
+ }
+ }
+ for (m = m; m < A_NENV; m++) {
+ env[m].bindee = NULL;
+ }
+
+ /* finally, emit the substituted statement */
+ bb->stmts[j] = st2;
+ /* vex_printf("**2 "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
+ j++;
+
+ vassert(j <= i+1);
+ } /* for each stmt in the original bb ... */
+
+ /* Finally ... substitute the ->next field as much as possible, and
+ dump any left-over bindings. Hmm. Perhaps there should be no
+ left over bindings? Or any left-over bindings are
+ by definition dead? */
+ bb->next = atbSubst_Expr(env, bb->next);
+ bb->stmts_used = j;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- iropt main ---*/
+/*---------------------------------------------------------------*/
+
+static Bool iropt_verbose = False; /* True; */
+
+
+/* Do a simple cleanup pass on bb. This is: redundant Get removal,
+ redundant Put removal, constant propagation, dead code removal,
+ clean helper specialisation, and dead code removal (again).
+*/
+
+
+static
+IRSB* cheap_transformations (
+ IRSB* bb,
+ IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int),
+ Bool (*preciseMemExnsFn)(Int,Int)
+ )
+{
+ redundant_get_removal_BB ( bb );
+ if (iropt_verbose) {
+ vex_printf("\n========= REDUNDANT GET\n\n" );
+ ppIRSB(bb);
+ }
+
+ redundant_put_removal_BB ( bb, preciseMemExnsFn );
+ if (iropt_verbose) {
+ vex_printf("\n========= REDUNDANT PUT\n\n" );
+ ppIRSB(bb);
+ }
+
+ bb = cprop_BB ( bb );
+ if (iropt_verbose) {
+ vex_printf("\n========= CPROPD\n\n" );
+ ppIRSB(bb);
+ }
+
+ do_deadcode_BB ( bb );
+ if (iropt_verbose) {
+ vex_printf("\n========= DEAD\n\n" );
+ ppIRSB(bb);
+ }
+
+ bb = spec_helpers_BB ( bb, specHelper );
+ do_deadcode_BB ( bb );
+ if (iropt_verbose) {
+ vex_printf("\n========= SPECd \n\n" );
+ ppIRSB(bb);
+ }
+
+ return bb;
+}
+
+
+/* Do some more expensive transformations on bb, which are aimed at
+ optimising as much as possible in the presence of GetI and PutI. */
+
+static
+IRSB* expensive_transformations( IRSB* bb )
+{
+ (void)do_cse_BB( bb );
+ collapse_AddSub_chains_BB( bb );
+ do_redundant_GetI_elimination( bb );
+ do_redundant_PutI_elimination( bb );
+ do_deadcode_BB( bb );
+ return bb;
+}
+
+
+/* Scan a flattened BB to look for signs that more expensive
+ optimisations might be useful:
+ - find out if there are any GetIs and PutIs
+ - find out if there are any floating or vector-typed temporaries
+*/
+
+static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
+ /*OUT*/Bool* hasVorFtemps,
+ IRSB* bb )
+{
+ Int i, j;
+ IRStmt* st;
+ IRDirty* d;
+ IRCAS* cas;
+
+ *hasGetIorPutI = False;
+ *hasVorFtemps = False;
+
+ for (i = 0; i < bb->stmts_used; i++) {
+ st = bb->stmts[i];
+ switch (st->tag) {
+ case Ist_AbiHint:
+ vassert(isIRAtom(st->Ist.AbiHint.base));
+ vassert(isIRAtom(st->Ist.AbiHint.nia));
+ break;
+ case Ist_PutI:
+ *hasGetIorPutI = True;
+ break;
+ case Ist_WrTmp:
+ if (st->Ist.WrTmp.data->tag == Iex_GetI)
+ *hasGetIorPutI = True;
+ switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
+ case Ity_I1: case Ity_I8: case Ity_I16:
+ case Ity_I32: case Ity_I64: case Ity_I128:
+ break;
+ case Ity_F32: case Ity_F64: case Ity_V128:
+ *hasVorFtemps = True;
+ break;
+ default:
+ goto bad;
+ }
+ break;
+ case Ist_Put:
+ vassert(isIRAtom(st->Ist.Put.data));
+ break;
+ case Ist_Store:
+ vassert(isIRAtom(st->Ist.Store.addr));
+ vassert(isIRAtom(st->Ist.Store.data));
+ break;
+ case Ist_CAS:
+ cas = st->Ist.CAS.details;
+ vassert(isIRAtom(cas->addr));
+ vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
+ vassert(isIRAtom(cas->expdLo));
+ vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
+ vassert(isIRAtom(cas->dataLo));
+ break;
+ case Ist_LLSC:
+ vassert(isIRAtom(st->Ist.LLSC.addr));
+ if (st->Ist.LLSC.storedata)
+ vassert(isIRAtom(st->Ist.LLSC.storedata));
+ break;
+ case Ist_Dirty:
+ d = st->Ist.Dirty.details;
+ vassert(isIRAtom(d->guard));
+ for (j = 0; d->args[j]; j++)
+ vassert(isIRAtom(d->args[j]));
+ if (d->mFx != Ifx_None)
+ vassert(isIRAtom(d->mAddr));
+ break;
+ case Ist_NoOp:
+ case Ist_IMark:
+ case Ist_MBE:
+ break;
+ case Ist_Exit:
+ vassert(isIRAtom(st->Ist.Exit.guard));
+ break;
+ default:
+ bad:
+ ppIRStmt(st);
+ vpanic("considerExpensives");
+ }
+ }
+}
+
+
+/* ---------------- The main iropt entry point. ---------------- */
+
+/* exported from this file */
+/* Rules of the game:
+
+ - IRExpr/IRStmt trees should be treated as immutable, as they
+ may get shared. So never change a field of such a tree node;
+ instead construct and return a new one if needed.
+*/
+
+
+IRSB* do_iropt_BB(
+ IRSB* bb0,
+ IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int),
+ Bool (*preciseMemExnsFn)(Int,Int),
+ Addr64 guest_addr,
+ VexArch guest_arch
+ )
+{
+ static Int n_total = 0;
+ static Int n_expensive = 0;
+
+ Bool hasGetIorPutI, hasVorFtemps;
+ IRSB *bb, *bb2;
+
+ n_total++;
+
+ /* First flatten the block out, since all other
+ phases assume flat code. */
+
+ bb = flatten_BB ( bb0 );
+
+ if (iropt_verbose) {
+ vex_printf("\n========= FLAT\n\n" );
+ ppIRSB(bb);
+ }
+
+ /* If at level 0, stop now. */
+ if (vex_control.iropt_level <= 0) return bb;
+
+ /* Now do a preliminary cleanup pass, and figure out if we also
+ need to do 'expensive' optimisations. Expensive optimisations
+ are deemed necessary if the block contains any GetIs or PutIs.
+ If needed, do expensive transformations and then another cheap
+ cleanup pass. */
+
+ bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
+
+ if (guest_arch == VexArchARM) {
+ /* Translating Thumb2 code produces a lot of chaff. We have to
+ work extra hard to get rid of it. */
+ bb = cprop_BB(bb);
+ bb = spec_helpers_BB ( bb, specHelper );
+ redundant_put_removal_BB ( bb, preciseMemExnsFn );
+ do_deadcode_BB( bb );
+ }
+
+ if (vex_control.iropt_level > 1) {
+
+ /* Peer at what we have, to decide how much more effort to throw
+ at it. */
+ considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
+
+ if (hasVorFtemps && !hasGetIorPutI) {
+ /* If any evidence of FP or Vector activity, CSE, as that
+ tends to mop up all manner of lardy code to do with
+ rounding modes. Don't bother if hasGetIorPutI since that
+ case leads into the expensive transformations, which do
+ CSE anyway. */
+ (void)do_cse_BB( bb );
+ do_deadcode_BB( bb );
+ }
+
+ if (hasGetIorPutI) {
+ Bool cses;
+ n_expensive++;
+ if (DEBUG_IROPT)
+ vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
+ bb = expensive_transformations( bb );
+ bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
+ /* Potentially common up GetIs */
+ cses = do_cse_BB( bb );
+ if (cses)
+ bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
+ }
+
+ /* Now have a go at unrolling simple (single-BB) loops. If
+ successful, clean up the results as much as possible. */
+
+ bb2 = maybe_loop_unroll_BB( bb, guest_addr );
+ if (bb2) {
+ bb = cheap_transformations( bb2, specHelper, preciseMemExnsFn );
+ if (hasGetIorPutI) {
+ bb = expensive_transformations( bb );
+ bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
+ } else {
+ /* at least do CSE and dead code removal */
+ do_cse_BB( bb );
+ do_deadcode_BB( bb );
+ }
+ if (0) vex_printf("vex iropt: unrolled a loop\n");
+ }
+
+ }
+
+ return bb;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- end ir_opt.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/ir_opt.h b/VEX/priv/ir_opt.h
new file mode 100644
index 0000000..ecdb146
--- /dev/null
+++ b/VEX/priv/ir_opt.h
@@ -0,0 +1,71 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin ir_opt.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#ifndef __VEX_IR_OPT_H
+#define __VEX_IR_OPT_H
+
+#include "libvex_basictypes.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+
+/* Top level optimiser entry point. Returns a new BB. Operates
+ under the control of the global "vex_control" struct. */
+extern
+IRSB* do_iropt_BB(
+ IRSB* bb,
+ IRExpr* (*specHelper) (HChar*, IRExpr**, IRStmt**, Int),
+ Bool (*preciseMemExnsFn)(Int,Int),
+ Addr64 guest_addr,
+ VexArch guest_arch
+ );
+
+/* Do a constant folding/propagation pass. */
+extern
+IRSB* cprop_BB ( IRSB* );
+
+/* Do a dead-code removal pass. bb is destructively modified. */
+extern
+void do_deadcode_BB ( IRSB* bb );
+
+/* The tree-builder. Make (approximately) maximal safe trees. bb is
+ destructively modified. */
+extern
+void ado_treebuild_BB ( IRSB* bb );
+
+#endif /* ndef __VEX_IR_OPT_H */
+
+/*---------------------------------------------------------------*/
+/*--- end ir_opt.h ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/main_globals.c b/VEX/priv/main_globals.c
new file mode 100644
index 0000000..716fa75
--- /dev/null
+++ b/VEX/priv/main_globals.c
@@ -0,0 +1,71 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin main_globals.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#include "libvex_basictypes.h"
+
+#include "main_util.h"
+#include "main_globals.h"
+
+
+/* Global settings for the VEX library. These are the
+ only library-wide globals. */
+
+/* Are we started yet? */
+Bool vex_initdone = False;
+
+/* failure exit function */
+__attribute__ ((noreturn))
+void (*vex_failure_exit) ( void ) = NULL;
+
+/* logging output function */
+void (*vex_log_bytes) ( HChar*, Int nbytes ) = NULL;
+
+/* debug paranoia level */
+Int vex_debuglevel = 0;
+
+/* trace flags */
+Int vex_traceflags = 0;
+
+/* Are we supporting valgrind checking? */
+Bool vex_valgrind_support = False;
+
+/* Max # guest insns per bb */
+VexControl vex_control = { 0,0,False,0,0,0 };
+
+
+
+/*---------------------------------------------------------------*/
+/*--- end main_globals.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/main_globals.h b/VEX/priv/main_globals.h
new file mode 100644
index 0000000..5b561a3
--- /dev/null
+++ b/VEX/priv/main_globals.h
@@ -0,0 +1,84 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin main_globals.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#ifndef __VEX_MAIN_GLOBALS_H
+#define __VEX_MAIN_GLOBALS_H
+
+#include "libvex_basictypes.h"
+#include "libvex.h"
+
+
+/* Global settings for the VEX library. These are the
+ only library-wide globals. */
+
+/* Are we started yet? */
+extern Bool vex_initdone;
+
+/* failure exit function */
+__attribute__ ((noreturn))
+extern void (*vex_failure_exit) ( void );
+
+/* logging output function */
+extern void (*vex_log_bytes) ( HChar*, Int nbytes );
+
+/* debug paranoia level */
+extern Int vex_debuglevel;
+
+/* trace flags */
+extern Int vex_traceflags;
+
+/* Are we supporting valgrind checking? */
+extern Bool vex_valgrind_support;
+
+/* Optimiser/front-end control */
+extern VexControl vex_control;
+
+
+/* vex_traceflags values */
+#define VEX_TRACE_FE (1 << 7) /* show conversion into IR */
+#define VEX_TRACE_OPT1 (1 << 6) /* show after initial opt */
+#define VEX_TRACE_INST (1 << 5) /* show after instrumentation */
+#define VEX_TRACE_OPT2 (1 << 4) /* show after second opt */
+#define VEX_TRACE_TREES (1 << 3) /* show after tree building */
+#define VEX_TRACE_VCODE (1 << 2) /* show selected insns */
+#define VEX_TRACE_RCODE (1 << 1) /* show after reg-alloc */
+#define VEX_TRACE_ASM (1 << 0) /* show final assembly */
+
+
+#endif /* ndef __VEX_MAIN_GLOBALS_H */
+
+/*---------------------------------------------------------------*/
+/*--- end main_globals.h ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c
new file mode 100644
index 0000000..1e80972
--- /dev/null
+++ b/VEX/priv/main_main.c
@@ -0,0 +1,911 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin main_main.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#include "libvex.h"
+#include "libvex_emwarn.h"
+#include "libvex_guest_x86.h"
+#include "libvex_guest_amd64.h"
+#include "libvex_guest_arm.h"
+#include "libvex_guest_ppc32.h"
+#include "libvex_guest_ppc64.h"
+
+#include "main_globals.h"
+#include "main_util.h"
+#include "host_generic_regs.h"
+#include "ir_opt.h"
+
+#include "host_x86_defs.h"
+#include "host_amd64_defs.h"
+#include "host_ppc_defs.h"
+#include "host_arm_defs.h"
+
+#include "guest_generic_bb_to_IR.h"
+#include "guest_x86_defs.h"
+#include "guest_amd64_defs.h"
+#include "guest_arm_defs.h"
+#include "guest_ppc_defs.h"
+
+#include "host_generic_simd128.h"
+
+
+/* This file contains the top level interface to the library. */
+
+/* --------- fwds ... --------- */
+
+static Bool are_valid_hwcaps ( VexArch arch, UInt hwcaps );
+static HChar* show_hwcaps ( VexArch arch, UInt hwcaps );
+
+
+/* --------- Initialise the library. --------- */
+
+/* Exported to library client. */
+
+void LibVEX_default_VexControl ( /*OUT*/ VexControl* vcon )
+{
+ vcon->iropt_verbosity = 0;
+ vcon->iropt_level = 2;
+ vcon->iropt_precise_memory_exns = False;
+ vcon->iropt_unroll_thresh = 120;
+ vcon->guest_max_insns = 60;
+ vcon->guest_chase_thresh = 10;
+ vcon->guest_chase_cond = False;
+}
+
+
+/* Exported to library client. */
+
+void LibVEX_Init (
+ /* failure exit function */
+ __attribute__ ((noreturn))
+ void (*failure_exit) ( void ),
+ /* logging output function */
+ void (*log_bytes) ( HChar*, Int nbytes ),
+ /* debug paranoia level */
+ Int debuglevel,
+ /* Are we supporting valgrind checking? */
+ Bool valgrind_support,
+ /* Control ... */
+ /*READONLY*/VexControl* vcon
+)
+{
+ /* First off, do enough minimal setup so that the following
+ assertions can fail in a sane fashion, if need be. */
+ vex_failure_exit = failure_exit;
+ vex_log_bytes = log_bytes;
+
+ /* Now it's safe to check parameters for sanity. */
+ vassert(!vex_initdone);
+ vassert(failure_exit);
+ vassert(log_bytes);
+ vassert(debuglevel >= 0);
+
+ vassert(vcon->iropt_verbosity >= 0);
+ vassert(vcon->iropt_level >= 0);
+ vassert(vcon->iropt_level <= 2);
+ vassert(vcon->iropt_unroll_thresh >= 0);
+ vassert(vcon->iropt_unroll_thresh <= 400);
+ vassert(vcon->guest_max_insns >= 1);
+ vassert(vcon->guest_max_insns <= 100);
+ vassert(vcon->guest_chase_thresh >= 0);
+ vassert(vcon->guest_chase_thresh < vcon->guest_max_insns);
+ vassert(vcon->guest_chase_cond == True
+ || vcon->guest_chase_cond == False);
+
+ /* Check that Vex has been built with sizes of basic types as
+ stated in priv/libvex_basictypes.h. Failure of any of these is
+ a serious configuration error and should be corrected
+ immediately. If any of these assertions fail you can fully
+ expect Vex not to work properly, if at all. */
+
+ vassert(1 == sizeof(UChar));
+ vassert(1 == sizeof(Char));
+ vassert(2 == sizeof(UShort));
+ vassert(2 == sizeof(Short));
+ vassert(4 == sizeof(UInt));
+ vassert(4 == sizeof(Int));
+ vassert(8 == sizeof(ULong));
+ vassert(8 == sizeof(Long));
+ vassert(4 == sizeof(Float));
+ vassert(8 == sizeof(Double));
+ vassert(1 == sizeof(Bool));
+ vassert(4 == sizeof(Addr32));
+ vassert(8 == sizeof(Addr64));
+ vassert(16 == sizeof(U128));
+ vassert(16 == sizeof(V128));
+
+ vassert(sizeof(void*) == 4 || sizeof(void*) == 8);
+ vassert(sizeof(void*) == sizeof(int*));
+ vassert(sizeof(void*) == sizeof(HWord));
+
+ vassert(VEX_HOST_WORDSIZE == sizeof(void*));
+ vassert(VEX_HOST_WORDSIZE == sizeof(HWord));
+
+ /* Really start up .. */
+ vex_debuglevel = debuglevel;
+ vex_valgrind_support = valgrind_support;
+ vex_control = *vcon;
+ vex_initdone = True;
+ vexSetAllocMode ( VexAllocModeTEMP );
+}
+
+
+/* --------- Make a translation. --------- */
+
+/* Exported to library client. */
+
+VexTranslateResult LibVEX_Translate ( VexTranslateArgs* vta )
+{
+ /* This the bundle of functions we need to do the back-end stuff
+ (insn selection, reg-alloc, assembly) whilst being insulated
+ from the target instruction set. */
+ HReg* available_real_regs;
+ Int n_available_real_regs;
+ Bool (*isMove) ( HInstr*, HReg*, HReg* );
+ void (*getRegUsage) ( HRegUsage*, HInstr*, Bool );
+ void (*mapRegs) ( HRegRemap*, HInstr*, Bool );
+ void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool );
+ void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool );
+ HInstr* (*directReload) ( HInstr*, HReg, Short );
+ void (*ppInstr) ( HInstr*, Bool );
+ void (*ppReg) ( HReg );
+ HInstrArray* (*iselSB) ( IRSB*, VexArch, VexArchInfo*,
+ VexAbiInfo* );
+ Int (*emit) ( UChar*, Int, HInstr*, Bool, void* );
+ IRExpr* (*specHelper) ( HChar*, IRExpr**, IRStmt**, Int );
+ Bool (*preciseMemExnsFn) ( Int, Int );
+
+ DisOneInstrFn disInstrFn;
+
+ VexGuestLayout* guest_layout;
+ Bool host_is_bigendian = False;
+ IRSB* irsb;
+ HInstrArray* vcode;
+ HInstrArray* rcode;
+ Int i, j, k, out_used, guest_sizeB;
+ Int offB_TISTART, offB_TILEN;
+ UChar insn_bytes[32];
+ IRType guest_word_type;
+ IRType host_word_type;
+ Bool mode64;
+
+ guest_layout = NULL;
+ available_real_regs = NULL;
+ n_available_real_regs = 0;
+ isMove = NULL;
+ getRegUsage = NULL;
+ mapRegs = NULL;
+ genSpill = NULL;
+ genReload = NULL;
+ directReload = NULL;
+ ppInstr = NULL;
+ ppReg = NULL;
+ iselSB = NULL;
+ emit = NULL;
+ specHelper = NULL;
+ preciseMemExnsFn = NULL;
+ disInstrFn = NULL;
+ guest_word_type = Ity_INVALID;
+ host_word_type = Ity_INVALID;
+ offB_TISTART = 0;
+ offB_TILEN = 0;
+ mode64 = False;
+
+ vex_traceflags = vta->traceflags;
+
+ vassert(vex_initdone);
+ vexSetAllocModeTEMP_and_clear();
+ vexAllocSanityCheck();
+
+ /* First off, check that the guest and host insn sets
+ are supported. */
+
+ switch (vta->arch_host) {
+
+ case VexArchX86:
+ mode64 = False;
+ getAllocableRegs_X86 ( &n_available_real_regs,
+ &available_real_regs );
+ isMove = (Bool(*)(HInstr*,HReg*,HReg*)) isMove_X86Instr;
+ getRegUsage = (void(*)(HRegUsage*,HInstr*, Bool))
+ getRegUsage_X86Instr;
+ mapRegs = (void(*)(HRegRemap*,HInstr*, Bool)) mapRegs_X86Instr;
+ genSpill = (void(*)(HInstr**,HInstr**,HReg,Int,Bool))
+ genSpill_X86;
+ genReload = (void(*)(HInstr**,HInstr**,HReg,Int,Bool))
+ genReload_X86;
+ directReload = (HInstr*(*)(HInstr*,HReg,Short)) directReload_X86;
+ ppInstr = (void(*)(HInstr*, Bool)) ppX86Instr;
+ ppReg = (void(*)(HReg)) ppHRegX86;
+ iselSB = iselSB_X86;
+ emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_X86Instr;
+ host_is_bigendian = False;
+ host_word_type = Ity_I32;
+ vassert(are_valid_hwcaps(VexArchX86, vta->archinfo_host.hwcaps));
+ vassert(vta->dispatch != NULL); /* jump-to-dispatcher scheme */
+ break;
+
+ case VexArchAMD64:
+ mode64 = True;
+ getAllocableRegs_AMD64 ( &n_available_real_regs,
+ &available_real_regs );
+ isMove = (Bool(*)(HInstr*,HReg*,HReg*)) isMove_AMD64Instr;
+ getRegUsage = (void(*)(HRegUsage*,HInstr*, Bool))
+ getRegUsage_AMD64Instr;
+ mapRegs = (void(*)(HRegRemap*,HInstr*, Bool)) mapRegs_AMD64Instr;
+ genSpill = (void(*)(HInstr**,HInstr**,HReg,Int,Bool))
+ genSpill_AMD64;
+ genReload = (void(*)(HInstr**,HInstr**,HReg,Int,Bool))
+ genReload_AMD64;
+ ppInstr = (void(*)(HInstr*, Bool)) ppAMD64Instr;
+ ppReg = (void(*)(HReg)) ppHRegAMD64;
+ iselSB = iselSB_AMD64;
+ emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_AMD64Instr;
+ host_is_bigendian = False;
+ host_word_type = Ity_I64;
+ vassert(are_valid_hwcaps(VexArchAMD64, vta->archinfo_host.hwcaps));
+ vassert(vta->dispatch != NULL); /* jump-to-dispatcher scheme */
+ break;
+
+ case VexArchPPC32:
+ mode64 = False;
+ getAllocableRegs_PPC ( &n_available_real_regs,
+ &available_real_regs, mode64 );
+ isMove = (Bool(*)(HInstr*,HReg*,HReg*)) isMove_PPCInstr;
+ getRegUsage = (void(*)(HRegUsage*,HInstr*,Bool)) getRegUsage_PPCInstr;
+ mapRegs = (void(*)(HRegRemap*,HInstr*,Bool)) mapRegs_PPCInstr;
+ genSpill = (void(*)(HInstr**,HInstr**,HReg,Int,Bool)) genSpill_PPC;
+ genReload = (void(*)(HInstr**,HInstr**,HReg,Int,Bool)) genReload_PPC;
+ ppInstr = (void(*)(HInstr*,Bool)) ppPPCInstr;
+ ppReg = (void(*)(HReg)) ppHRegPPC;
+ iselSB = iselSB_PPC;
+ emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_PPCInstr;
+ host_is_bigendian = True;
+ host_word_type = Ity_I32;
+ vassert(are_valid_hwcaps(VexArchPPC32, vta->archinfo_host.hwcaps));
+ vassert(vta->dispatch == NULL); /* return-to-dispatcher scheme */
+ break;
+
+ case VexArchPPC64:
+ mode64 = True;
+ getAllocableRegs_PPC ( &n_available_real_regs,
+ &available_real_regs, mode64 );
+ isMove = (Bool(*)(HInstr*,HReg*,HReg*)) isMove_PPCInstr;
+ getRegUsage = (void(*)(HRegUsage*,HInstr*, Bool)) getRegUsage_PPCInstr;
+ mapRegs = (void(*)(HRegRemap*,HInstr*, Bool)) mapRegs_PPCInstr;
+ genSpill = (void(*)(HInstr**,HInstr**,HReg,Int,Bool)) genSpill_PPC;
+ genReload = (void(*)(HInstr**,HInstr**,HReg,Int,Bool)) genReload_PPC;
+ ppInstr = (void(*)(HInstr*, Bool)) ppPPCInstr;
+ ppReg = (void(*)(HReg)) ppHRegPPC;
+ iselSB = iselSB_PPC;
+ emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_PPCInstr;
+ host_is_bigendian = True;
+ host_word_type = Ity_I64;
+ vassert(are_valid_hwcaps(VexArchPPC64, vta->archinfo_host.hwcaps));
+ vassert(vta->dispatch == NULL); /* return-to-dispatcher scheme */
+ break;
+
+ case VexArchARM:
+ mode64 = False;
+ getAllocableRegs_ARM ( &n_available_real_regs,
+ &available_real_regs );
+ isMove = (Bool(*)(HInstr*,HReg*,HReg*)) isMove_ARMInstr;
+ getRegUsage = (void(*)(HRegUsage*,HInstr*, Bool)) getRegUsage_ARMInstr;
+ mapRegs = (void(*)(HRegRemap*,HInstr*, Bool)) mapRegs_ARMInstr;
+ genSpill = (void(*)(HInstr**,HInstr**,HReg,Int,Bool)) genSpill_ARM;
+ genReload = (void(*)(HInstr**,HInstr**,HReg,Int,Bool)) genReload_ARM;
+ ppInstr = (void(*)(HInstr*, Bool)) ppARMInstr;
+ ppReg = (void(*)(HReg)) ppHRegARM;
+ iselSB = iselSB_ARM;
+ emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_ARMInstr;
+ host_is_bigendian = False;
+ host_word_type = Ity_I32;
+ vassert(are_valid_hwcaps(VexArchARM, vta->archinfo_host.hwcaps));
+ vassert(vta->dispatch == NULL); /* return-to-dispatcher scheme */
+ break;
+
+ default:
+ vpanic("LibVEX_Translate: unsupported host insn set");
+ }
+
+
+ switch (vta->arch_guest) {
+
+ case VexArchX86:
+ preciseMemExnsFn = guest_x86_state_requires_precise_mem_exns;
+ disInstrFn = disInstr_X86;
+ specHelper = guest_x86_spechelper;
+ guest_sizeB = sizeof(VexGuestX86State);
+ guest_word_type = Ity_I32;
+ guest_layout = &x86guest_layout;
+ offB_TISTART = offsetof(VexGuestX86State,guest_TISTART);
+ offB_TILEN = offsetof(VexGuestX86State,guest_TILEN);
+ vassert(are_valid_hwcaps(VexArchX86, vta->archinfo_guest.hwcaps));
+ vassert(0 == sizeof(VexGuestX86State) % 16);
+ vassert(sizeof( ((VexGuestX86State*)0)->guest_TISTART) == 4);
+ vassert(sizeof( ((VexGuestX86State*)0)->guest_TILEN ) == 4);
+ vassert(sizeof( ((VexGuestX86State*)0)->guest_NRADDR ) == 4);
+ break;
+
+ case VexArchAMD64:
+ preciseMemExnsFn = guest_amd64_state_requires_precise_mem_exns;
+ disInstrFn = disInstr_AMD64;
+ specHelper = guest_amd64_spechelper;
+ guest_sizeB = sizeof(VexGuestAMD64State);
+ guest_word_type = Ity_I64;
+ guest_layout = &amd64guest_layout;
+ offB_TISTART = offsetof(VexGuestAMD64State,guest_TISTART);
+ offB_TILEN = offsetof(VexGuestAMD64State,guest_TILEN);
+ vassert(are_valid_hwcaps(VexArchAMD64, vta->archinfo_guest.hwcaps));
+ vassert(0 == sizeof(VexGuestAMD64State) % 16);
+ vassert(sizeof( ((VexGuestAMD64State*)0)->guest_TISTART ) == 8);
+ vassert(sizeof( ((VexGuestAMD64State*)0)->guest_TILEN ) == 8);
+ vassert(sizeof( ((VexGuestAMD64State*)0)->guest_NRADDR ) == 8);
+ break;
+
+ case VexArchPPC32:
+ preciseMemExnsFn = guest_ppc32_state_requires_precise_mem_exns;
+ disInstrFn = disInstr_PPC;
+ specHelper = guest_ppc32_spechelper;
+ guest_sizeB = sizeof(VexGuestPPC32State);
+ guest_word_type = Ity_I32;
+ guest_layout = &ppc32Guest_layout;
+ offB_TISTART = offsetof(VexGuestPPC32State,guest_TISTART);
+ offB_TILEN = offsetof(VexGuestPPC32State,guest_TILEN);
+ vassert(are_valid_hwcaps(VexArchPPC32, vta->archinfo_guest.hwcaps));
+ vassert(0 == sizeof(VexGuestPPC32State) % 16);
+ vassert(sizeof( ((VexGuestPPC32State*)0)->guest_TISTART ) == 4);
+ vassert(sizeof( ((VexGuestPPC32State*)0)->guest_TILEN ) == 4);
+ vassert(sizeof( ((VexGuestPPC32State*)0)->guest_NRADDR ) == 4);
+ break;
+
+ case VexArchPPC64:
+ preciseMemExnsFn = guest_ppc64_state_requires_precise_mem_exns;
+ disInstrFn = disInstr_PPC;
+ specHelper = guest_ppc64_spechelper;
+ guest_sizeB = sizeof(VexGuestPPC64State);
+ guest_word_type = Ity_I64;
+ guest_layout = &ppc64Guest_layout;
+ offB_TISTART = offsetof(VexGuestPPC64State,guest_TISTART);
+ offB_TILEN = offsetof(VexGuestPPC64State,guest_TILEN);
+ vassert(are_valid_hwcaps(VexArchPPC64, vta->archinfo_guest.hwcaps));
+ vassert(0 == sizeof(VexGuestPPC64State) % 16);
+ vassert(sizeof( ((VexGuestPPC64State*)0)->guest_TISTART ) == 8);
+ vassert(sizeof( ((VexGuestPPC64State*)0)->guest_TILEN ) == 8);
+ vassert(sizeof( ((VexGuestPPC64State*)0)->guest_NRADDR ) == 8);
+ vassert(sizeof( ((VexGuestPPC64State*)0)->guest_NRADDR_GPR2) == 8);
+ break;
+
+ case VexArchARM:
+ preciseMemExnsFn = guest_arm_state_requires_precise_mem_exns;
+ disInstrFn = disInstr_ARM;
+ specHelper = guest_arm_spechelper;
+ guest_sizeB = sizeof(VexGuestARMState);
+ guest_word_type = Ity_I32;
+ guest_layout = &armGuest_layout;
+ offB_TISTART = offsetof(VexGuestARMState,guest_TISTART);
+ offB_TILEN = offsetof(VexGuestARMState,guest_TILEN);
+ vassert(are_valid_hwcaps(VexArchARM, vta->archinfo_guest.hwcaps));
+ vassert(0 == sizeof(VexGuestARMState) % 16);
+ vassert(sizeof( ((VexGuestARMState*)0)->guest_TISTART) == 4);
+ vassert(sizeof( ((VexGuestARMState*)0)->guest_TILEN ) == 4);
+ vassert(sizeof( ((VexGuestARMState*)0)->guest_NRADDR ) == 4);
+ break;
+
+ default:
+ vpanic("LibVEX_Translate: unsupported guest insn set");
+ }
+
+ /* yet more sanity checks ... */
+ if (vta->arch_guest == vta->arch_host) {
+ /* doesn't necessarily have to be true, but if it isn't it means
+ we are simulating one flavour of an architecture a different
+ flavour of the same architecture, which is pretty strange. */
+ vassert(vta->archinfo_guest.hwcaps == vta->archinfo_host.hwcaps);
+ }
+
+ vexAllocSanityCheck();
+
+ if (vex_traceflags & VEX_TRACE_FE)
+ vex_printf("\n------------------------"
+ " Front end "
+ "------------------------\n\n");
+
+ irsb = bb_to_IR ( vta->guest_extents,
+ vta->callback_opaque,
+ disInstrFn,
+ vta->guest_bytes,
+ vta->guest_bytes_addr,
+ vta->chase_into_ok,
+ host_is_bigendian,
+ vta->arch_guest,
+ &vta->archinfo_guest,
+ &vta->abiinfo_both,
+ guest_word_type,
+ vta->do_self_check,
+ vta->preamble_function,
+ offB_TISTART,
+ offB_TILEN );
+
+ vexAllocSanityCheck();
+
+ if (irsb == NULL) {
+ /* Access failure. */
+ vexSetAllocModeTEMP_and_clear();
+ vex_traceflags = 0;
+ return VexTransAccessFail;
+ }
+
+ vassert(vta->guest_extents->n_used >= 1 && vta->guest_extents->n_used <= 3);
+ vassert(vta->guest_extents->base[0] == vta->guest_bytes_addr);
+ for (i = 0; i < vta->guest_extents->n_used; i++) {
+ vassert(vta->guest_extents->len[i] < 10000); /* sanity */
+ }
+
+ /* If debugging, show the raw guest bytes for this bb. */
+ if (0 || (vex_traceflags & VEX_TRACE_FE)) {
+ if (vta->guest_extents->n_used > 1) {
+ vex_printf("can't show code due to extents > 1\n");
+ } else {
+ /* HACK */
+ UChar* p = (UChar*)vta->guest_bytes;
+ UInt sum = 0;
+ UInt guest_bytes_read = (UInt)vta->guest_extents->len[0];
+ vex_printf("GuestBytes %llx %u ", vta->guest_bytes_addr,
+ guest_bytes_read );
+ for (i = 0; i < guest_bytes_read; i++) {
+ UInt b = (UInt)p[i];
+ vex_printf(" %02x", b );
+ sum = (sum << 1) ^ b;
+ }
+ vex_printf(" %08x\n\n", sum);
+ }
+ }
+
+ /* Sanity check the initial IR. */
+ sanityCheckIRSB( irsb, "initial IR",
+ False/*can be non-flat*/, guest_word_type );
+
+ vexAllocSanityCheck();
+
+ /* Clean it up, hopefully a lot. */
+ irsb = do_iropt_BB ( irsb, specHelper, preciseMemExnsFn,
+ vta->guest_bytes_addr,
+ vta->arch_guest );
+ sanityCheckIRSB( irsb, "after initial iropt",
+ True/*must be flat*/, guest_word_type );
+
+ if (vex_traceflags & VEX_TRACE_OPT1) {
+ vex_printf("\n------------------------"
+ " After pre-instr IR optimisation "
+ "------------------------\n\n");
+ ppIRSB ( irsb );
+ vex_printf("\n");
+ }
+
+ vexAllocSanityCheck();
+
+ /* Get the thing instrumented. */
+ if (vta->instrument1)
+ irsb = vta->instrument1(vta->callback_opaque,
+ irsb, guest_layout,
+ vta->guest_extents,
+ guest_word_type, host_word_type);
+ vexAllocSanityCheck();
+
+ if (vta->instrument2)
+ irsb = vta->instrument2(vta->callback_opaque,
+ irsb, guest_layout,
+ vta->guest_extents,
+ guest_word_type, host_word_type);
+
+ if (vex_traceflags & VEX_TRACE_INST) {
+ vex_printf("\n------------------------"
+ " After instrumentation "
+ "------------------------\n\n");
+ ppIRSB ( irsb );
+ vex_printf("\n");
+ }
+
+ if (vta->instrument1 || vta->instrument2)
+ sanityCheckIRSB( irsb, "after instrumentation",
+ True/*must be flat*/, guest_word_type );
+
+ /* Do a post-instrumentation cleanup pass. */
+ if (vta->instrument1 || vta->instrument2) {
+ do_deadcode_BB( irsb );
+ irsb = cprop_BB( irsb );
+ do_deadcode_BB( irsb );
+ sanityCheckIRSB( irsb, "after post-instrumentation cleanup",
+ True/*must be flat*/, guest_word_type );
+ }
+
+ vexAllocSanityCheck();
+
+ if (vex_traceflags & VEX_TRACE_OPT2) {
+ vex_printf("\n------------------------"
+ " After post-instr IR optimisation "
+ "------------------------\n\n");
+ ppIRSB ( irsb );
+ vex_printf("\n");
+ }
+
+ /* Turn it into virtual-registerised code. Build trees -- this
+ also throws away any dead bindings. */
+ ado_treebuild_BB( irsb );
+
+ if (vta->finaltidy) {
+ irsb = vta->finaltidy(irsb);
+ }
+
+ vexAllocSanityCheck();
+
+ if (vex_traceflags & VEX_TRACE_TREES) {
+ vex_printf("\n------------------------"
+ " After tree-building "
+ "------------------------\n\n");
+ ppIRSB ( irsb );
+ vex_printf("\n");
+ }
+
+ /* HACK */
+ if (0) { *(vta->host_bytes_used) = 0; return VexTransOK; }
+ /* end HACK */
+
+ if (vex_traceflags & VEX_TRACE_VCODE)
+ vex_printf("\n------------------------"
+ " Instruction selection "
+ "------------------------\n");
+
+ vcode = iselSB ( irsb, vta->arch_host, &vta->archinfo_host,
+ &vta->abiinfo_both );
+
+ vexAllocSanityCheck();
+
+ if (vex_traceflags & VEX_TRACE_VCODE)
+ vex_printf("\n");
+
+ if (vex_traceflags & VEX_TRACE_VCODE) {
+ for (i = 0; i < vcode->arr_used; i++) {
+ vex_printf("%3d ", i);
+ ppInstr(vcode->arr[i], mode64);
+ vex_printf("\n");
+ }
+ vex_printf("\n");
+ }
+
+ /* Register allocate. */
+ rcode = doRegisterAllocation ( vcode, available_real_regs,
+ n_available_real_regs,
+ isMove, getRegUsage, mapRegs,
+ genSpill, genReload, directReload,
+ guest_sizeB,
+ ppInstr, ppReg, mode64 );
+
+ vexAllocSanityCheck();
+
+ if (vex_traceflags & VEX_TRACE_RCODE) {
+ vex_printf("\n------------------------"
+ " Register-allocated code "
+ "------------------------\n\n");
+ for (i = 0; i < rcode->arr_used; i++) {
+ vex_printf("%3d ", i);
+ ppInstr(rcode->arr[i], mode64);
+ vex_printf("\n");
+ }
+ vex_printf("\n");
+ }
+
+ /* HACK */
+ if (0) { *(vta->host_bytes_used) = 0; return VexTransOK; }
+ /* end HACK */
+
+ /* Assemble */
+ if (vex_traceflags & VEX_TRACE_ASM) {
+ vex_printf("\n------------------------"
+ " Assembly "
+ "------------------------\n\n");
+ }
+
+ out_used = 0; /* tracks along the host_bytes array */
+ for (i = 0; i < rcode->arr_used; i++) {
+ if (vex_traceflags & VEX_TRACE_ASM) {
+ ppInstr(rcode->arr[i], mode64);
+ vex_printf("\n");
+ }
+ j = (*emit)( insn_bytes, 32, rcode->arr[i], mode64, vta->dispatch );
+ if (vex_traceflags & VEX_TRACE_ASM) {
+ for (k = 0; k < j; k++)
+ if (insn_bytes[k] < 16)
+ vex_printf("0%x ", (UInt)insn_bytes[k]);
+ else
+ vex_printf("%x ", (UInt)insn_bytes[k]);
+ vex_printf("\n\n");
+ }
+ if (out_used + j > vta->host_bytes_size) {
+ vexSetAllocModeTEMP_and_clear();
+ vex_traceflags = 0;
+ return VexTransOutputFull;
+ }
+ for (k = 0; k < j; k++) {
+ vta->host_bytes[out_used] = insn_bytes[k];
+ out_used++;
+ }
+ vassert(out_used <= vta->host_bytes_size);
+ }
+ *(vta->host_bytes_used) = out_used;
+
+ vexAllocSanityCheck();
+
+ vexSetAllocModeTEMP_and_clear();
+
+ vex_traceflags = 0;
+ return VexTransOK;
+}
+
+
+/* --------- Emulation warnings. --------- */
+
+HChar* LibVEX_EmWarn_string ( VexEmWarn ew )
+{
+ switch (ew) {
+ case EmWarn_NONE:
+ return "none";
+ case EmWarn_X86_x87exns:
+ return "Unmasking x87 FP exceptions";
+ case EmWarn_X86_x87precision:
+ return "Selection of non-80-bit x87 FP precision";
+ case EmWarn_X86_sseExns:
+ return "Unmasking SSE FP exceptions";
+ case EmWarn_X86_fz:
+ return "Setting %mxcsr.fz (SSE flush-underflows-to-zero mode)";
+ case EmWarn_X86_daz:
+ return "Setting %mxcsr.daz (SSE treat-denormals-as-zero mode)";
+ case EmWarn_X86_acFlag:
+ return "Setting %eflags.ac (setting noted but ignored)";
+ case EmWarn_PPCexns:
+ return "Unmasking PPC32/64 FP exceptions";
+ case EmWarn_PPC64_redir_overflow:
+ return "PPC64 function redirection stack overflow";
+ case EmWarn_PPC64_redir_underflow:
+ return "PPC64 function redirection stack underflow";
+ default:
+ vpanic("LibVEX_EmWarn_string: unknown warning");
+ }
+}
+
+/* ------------------ Arch/HwCaps stuff. ------------------ */
+
+const HChar* LibVEX_ppVexArch ( VexArch arch )
+{
+ switch (arch) {
+ case VexArch_INVALID: return "INVALID";
+ case VexArchX86: return "X86";
+ case VexArchAMD64: return "AMD64";
+ case VexArchARM: return "ARM";
+ case VexArchPPC32: return "PPC32";
+ case VexArchPPC64: return "PPC64";
+ default: return "VexArch???";
+ }
+}
+
+const HChar* LibVEX_ppVexHwCaps ( VexArch arch, UInt hwcaps )
+{
+ HChar* str = show_hwcaps(arch,hwcaps);
+ return str ? str : "INVALID";
+}
+
+
+/* Write default settings info *vai. */
+void LibVEX_default_VexArchInfo ( /*OUT*/VexArchInfo* vai )
+{
+ vai->hwcaps = 0;
+ vai->ppc_cache_line_szB = 0;
+ vai->ppc_dcbz_szB = 0;
+ vai->ppc_dcbzl_szB = 0;
+
+}
+
+/* Write default settings info *vbi. */
+void LibVEX_default_VexAbiInfo ( /*OUT*/VexAbiInfo* vbi )
+{
+ vbi->guest_stack_redzone_size = 0;
+ vbi->guest_amd64_assume_fs_is_zero = False;
+ vbi->guest_amd64_assume_gs_is_0x60 = False;
+ vbi->guest_ppc_zap_RZ_at_blr = False;
+ vbi->guest_ppc_zap_RZ_at_bl = NULL;
+ vbi->guest_ppc_sc_continues_at_LR = False;
+ vbi->host_ppc_calls_use_fndescrs = False;
+ vbi->host_ppc32_regalign_int64_args = False;
+}
+
+
+/* Return a string showing the hwcaps in a nice way. The string will
+ be NULL for invalid combinations of flags, so these functions also
+ serve as a way to validate hwcaps values. */
+
+static HChar* show_hwcaps_x86 ( UInt hwcaps )
+{
+ /* Monotonic, SSE3 > SSE2 > SSE1 > baseline. */
+ switch (hwcaps) {
+ case 0:
+ return "x86-sse0";
+ case VEX_HWCAPS_X86_SSE1:
+ return "x86-sse1";
+ case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2:
+ return "x86-sse1-sse2";
+ case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
+ | VEX_HWCAPS_X86_LZCNT:
+ return "x86-sse1-sse2-lzcnt";
+ case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
+ | VEX_HWCAPS_X86_SSE3:
+ return "x86-sse1-sse2-sse3";
+ case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
+ | VEX_HWCAPS_X86_SSE3 | VEX_HWCAPS_X86_LZCNT:
+ return "x86-sse1-sse2-sse3-lzcnt";
+ default:
+ return NULL;
+ }
+}
+
+static HChar* show_hwcaps_amd64 ( UInt hwcaps )
+{
+ /* SSE3 and CX16 are orthogonal and > baseline, although we really
+ don't expect to come across anything which can do SSE3 but can't
+ do CX16. Still, we can handle that case. LZCNT is similarly
+ orthogonal. */
+ switch (hwcaps) {
+ case 0:
+ return "amd64-sse2";
+ case VEX_HWCAPS_AMD64_SSE3:
+ return "amd64-sse3";
+ case VEX_HWCAPS_AMD64_CX16:
+ return "amd64-sse2-cx16";
+ case VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_CX16:
+ return "amd64-sse3-cx16";
+ case VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_LZCNT:
+ return "amd64-sse3-lzcnt";
+ case VEX_HWCAPS_AMD64_CX16 | VEX_HWCAPS_AMD64_LZCNT:
+ return "amd64-sse2-cx16-lzcnt";
+ case VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_CX16
+ | VEX_HWCAPS_AMD64_LZCNT:
+ return "amd64-sse3-cx16-lzcnt";
+
+ default:
+ return NULL;
+ }
+}
+
+static HChar* show_hwcaps_ppc32 ( UInt hwcaps )
+{
+ /* Monotonic with complications. Basically V > F > baseline,
+ but once you have F then you can have FX or GX too. */
+ const UInt F = VEX_HWCAPS_PPC32_F;
+ const UInt V = VEX_HWCAPS_PPC32_V;
+ const UInt FX = VEX_HWCAPS_PPC32_FX;
+ const UInt GX = VEX_HWCAPS_PPC32_GX;
+ UInt c = hwcaps;
+ if (c == 0) return "ppc32-int";
+ if (c == F) return "ppc32-int-flt";
+ if (c == (F|FX)) return "ppc32-int-flt-FX";
+ if (c == (F|GX)) return "ppc32-int-flt-GX";
+ if (c == (F|FX|GX)) return "ppc32-int-flt-FX-GX";
+ if (c == (F|V)) return "ppc32-int-flt-vmx";
+ if (c == (F|V|FX)) return "ppc32-int-flt-vmx-FX";
+ if (c == (F|V|GX)) return "ppc32-int-flt-vmx-GX";
+ if (c == (F|V|FX|GX)) return "ppc32-int-flt-vmx-FX-GX";
+ return NULL;
+}
+
+static HChar* show_hwcaps_ppc64 ( UInt hwcaps )
+{
+ /* Monotonic with complications. Basically V > baseline(==F),
+ but once you have F then you can have FX or GX too. */
+ const UInt V = VEX_HWCAPS_PPC64_V;
+ const UInt FX = VEX_HWCAPS_PPC64_FX;
+ const UInt GX = VEX_HWCAPS_PPC64_GX;
+ UInt c = hwcaps;
+ if (c == 0) return "ppc64-int-flt";
+ if (c == FX) return "ppc64-int-flt-FX";
+ if (c == GX) return "ppc64-int-flt-GX";
+ if (c == (FX|GX)) return "ppc64-int-flt-FX-GX";
+ if (c == V) return "ppc64-int-flt-vmx";
+ if (c == (V|FX)) return "ppc64-int-flt-vmx-FX";
+ if (c == (V|GX)) return "ppc64-int-flt-vmx-GX";
+ if (c == (V|FX|GX)) return "ppc64-int-flt-vmx-FX-GX";
+ return NULL;
+}
+
+static HChar* show_hwcaps_arm ( UInt hwcaps )
+{
+ Bool N = ((hwcaps & VEX_HWCAPS_ARM_NEON) != 0);
+ Bool vfp = ((hwcaps & (VEX_HWCAPS_ARM_VFP |
+ VEX_HWCAPS_ARM_VFP2 | VEX_HWCAPS_ARM_VFP3)) != 0);
+ switch (VEX_ARM_ARCHLEVEL(hwcaps)) {
+ case 5:
+ if (N)
+ return NULL;
+ if (vfp)
+ return "ARMv5-vfp";
+ else
+ return "ARMv5";
+ return NULL;
+ case 6:
+ if (N)
+ return NULL;
+ if (vfp)
+ return "ARMv6-vfp";
+ else
+ return "ARMv6";
+ return NULL;
+ case 7:
+ if (vfp) {
+ if (N)
+ return "ARMv7-vfp-neon";
+ else
+ return "ARMv7-vfp";
+ } else {
+ if (N)
+ return "ARMv7-neon";
+ else
+ return "ARMv7";
+ }
+ default:
+ return NULL;
+ }
+ return NULL;
+}
+
+/* ---- */
+static HChar* show_hwcaps ( VexArch arch, UInt hwcaps )
+{
+ switch (arch) {
+ case VexArchX86: return show_hwcaps_x86(hwcaps);
+ case VexArchAMD64: return show_hwcaps_amd64(hwcaps);
+ case VexArchPPC32: return show_hwcaps_ppc32(hwcaps);
+ case VexArchPPC64: return show_hwcaps_ppc64(hwcaps);
+ case VexArchARM: return show_hwcaps_arm(hwcaps);
+ default: return NULL;
+ }
+}
+
+static Bool are_valid_hwcaps ( VexArch arch, UInt hwcaps )
+{
+ return show_hwcaps(arch,hwcaps) != NULL;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- end main_main.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/main_util.c b/VEX/priv/main_util.c
new file mode 100644
index 0000000..d12380e
--- /dev/null
+++ b/VEX/priv/main_util.c
@@ -0,0 +1,538 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin main_util.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex.h"
+
+#include "main_globals.h"
+#include "main_util.h"
+
+
+/*---------------------------------------------------------*/
+/*--- Storage ---*/
+/*---------------------------------------------------------*/
+
+/* Try to keep this as low as possible -- in particular, less than the
+ size of the smallest L2 cache we might encounter. At 50000, my VIA
+ Nehemiah 1 GHz (a weedy machine) can satisfy 27 million calls/
+ second to LibVEX_Alloc(16) -- that is, allocate memory at over 400
+ MByte/sec. Once the size increases enough to fall out of the cache
+ into memory, the rate falls by about a factor of 3.
+*/
+#define N_TEMPORARY_BYTES 5000000
+
+static HChar temporary[N_TEMPORARY_BYTES] __attribute__((aligned(8)));
+static HChar* temporary_first = &temporary[0];
+static HChar* temporary_curr = &temporary[0];
+static HChar* temporary_last = &temporary[N_TEMPORARY_BYTES-1];
+
+static ULong temporary_bytes_allocd_TOT = 0;
+
+#define N_PERMANENT_BYTES 10000
+
+static HChar permanent[N_PERMANENT_BYTES] __attribute__((aligned(8)));
+static HChar* permanent_first = &permanent[0];
+static HChar* permanent_curr = &permanent[0];
+static HChar* permanent_last = &permanent[N_PERMANENT_BYTES-1];
+
+static VexAllocMode mode = VexAllocModeTEMP;
+
+void vexAllocSanityCheck ( void )
+{
+ vassert(temporary_first == &temporary[0]);
+ vassert(temporary_last == &temporary[N_TEMPORARY_BYTES-1]);
+ vassert(permanent_first == &permanent[0]);
+ vassert(permanent_last == &permanent[N_PERMANENT_BYTES-1]);
+ vassert(temporary_first <= temporary_curr);
+ vassert(temporary_curr <= temporary_last);
+ vassert(permanent_first <= permanent_curr);
+ vassert(permanent_curr <= permanent_last);
+ vassert(private_LibVEX_alloc_first <= private_LibVEX_alloc_curr);
+ vassert(private_LibVEX_alloc_curr <= private_LibVEX_alloc_last);
+ if (mode == VexAllocModeTEMP){
+ vassert(private_LibVEX_alloc_first == temporary_first);
+ vassert(private_LibVEX_alloc_last == temporary_last);
+ }
+ else
+ if (mode == VexAllocModePERM) {
+ vassert(private_LibVEX_alloc_first == permanent_first);
+ vassert(private_LibVEX_alloc_last == permanent_last);
+ }
+ else
+ vassert(0);
+
+# define IS_WORD_ALIGNED(p) (0 == (((HWord)p) & (sizeof(HWord)-1)))
+ vassert(sizeof(HWord) == 4 || sizeof(HWord) == 8);
+ vassert(IS_WORD_ALIGNED(temporary_first));
+ vassert(IS_WORD_ALIGNED(temporary_curr));
+ vassert(IS_WORD_ALIGNED(temporary_last+1));
+ vassert(IS_WORD_ALIGNED(permanent_first));
+ vassert(IS_WORD_ALIGNED(permanent_curr));
+ vassert(IS_WORD_ALIGNED(permanent_last+1));
+ vassert(IS_WORD_ALIGNED(private_LibVEX_alloc_first));
+ vassert(IS_WORD_ALIGNED(private_LibVEX_alloc_curr));
+ vassert(IS_WORD_ALIGNED(private_LibVEX_alloc_last+1));
+# undef IS_WORD_ALIGNED
+}
+
+/* The current allocation mode. */
+
+void vexSetAllocMode ( VexAllocMode m )
+{
+ vexAllocSanityCheck();
+
+ /* Save away the current allocation point .. */
+ if (mode == VexAllocModeTEMP){
+ temporary_curr = private_LibVEX_alloc_curr;
+ }
+ else
+ if (mode == VexAllocModePERM) {
+ permanent_curr = private_LibVEX_alloc_curr;
+ }
+ else
+ vassert(0);
+
+ /* Did that screw anything up? */
+ vexAllocSanityCheck();
+
+ if (m == VexAllocModeTEMP){
+ private_LibVEX_alloc_first = temporary_first;
+ private_LibVEX_alloc_curr = temporary_curr;
+ private_LibVEX_alloc_last = temporary_last;
+ }
+ else
+ if (m == VexAllocModePERM) {
+ private_LibVEX_alloc_first = permanent_first;
+ private_LibVEX_alloc_curr = permanent_curr;
+ private_LibVEX_alloc_last = permanent_last;
+ }
+ else
+ vassert(0);
+
+ mode = m;
+}
+
+VexAllocMode vexGetAllocMode ( void )
+{
+ return mode;
+}
+
+/* Visible to library client, unfortunately. */
+
+HChar* private_LibVEX_alloc_first = &temporary[0];
+HChar* private_LibVEX_alloc_curr = &temporary[0];
+HChar* private_LibVEX_alloc_last = &temporary[N_TEMPORARY_BYTES-1];
+
+__attribute__((noreturn))
+void private_LibVEX_alloc_OOM(void)
+{
+ HChar* pool = "???";
+ if (private_LibVEX_alloc_first == &temporary[0]) pool = "TEMP";
+ if (private_LibVEX_alloc_first == &permanent[0]) pool = "PERM";
+ vex_printf("VEX temporary storage exhausted.\n");
+ vex_printf("Pool = %s, start %p curr %p end %p (size %lld)\n",
+ pool,
+ private_LibVEX_alloc_first,
+ private_LibVEX_alloc_curr,
+ private_LibVEX_alloc_last,
+ (Long)(private_LibVEX_alloc_last + 1 - private_LibVEX_alloc_first));
+ vpanic("VEX temporary storage exhausted.\n"
+ "Increase N_{TEMPORARY,PERMANENT}_BYTES and recompile.");
+}
+
+void vexSetAllocModeTEMP_and_clear ( void )
+{
+ /* vassert(vex_initdone); */ /* causes infinite assert loops */
+ temporary_bytes_allocd_TOT
+ += (ULong)(private_LibVEX_alloc_curr - private_LibVEX_alloc_first);
+
+ mode = VexAllocModeTEMP;
+ temporary_curr = &temporary[0];
+ private_LibVEX_alloc_curr = &temporary[0];
+
+ /* Set to (1) and change the fill byte to 0x00 or 0xFF to test for
+ any potential bugs due to using uninitialised memory in the main
+ VEX storage area. */
+ if (0) {
+ Int i;
+ for (i = 0; i < N_TEMPORARY_BYTES; i++)
+ temporary[i] = 0x00;
+ }
+
+ vexAllocSanityCheck();
+}
+
+
+/* Exported to library client. */
+
+void LibVEX_ShowAllocStats ( void )
+{
+ vex_printf("vex storage: T total %lld bytes allocated\n",
+ (Long)temporary_bytes_allocd_TOT );
+ vex_printf("vex storage: P total %lld bytes allocated\n",
+ (Long)(permanent_curr - permanent_first) );
+}
+
+
+/*---------------------------------------------------------*/
+/*--- Bombing out ---*/
+/*---------------------------------------------------------*/
+
+__attribute__ ((noreturn))
+void vex_assert_fail ( const HChar* expr,
+ const HChar* file, Int line, const HChar* fn )
+{
+ vex_printf( "\nvex: %s:%d (%s): Assertion `%s' failed.\n",
+ file, line, fn, expr );
+ (*vex_failure_exit)();
+}
+
+__attribute__ ((noreturn))
+void vpanic ( HChar* str )
+{
+ vex_printf("\nvex: the `impossible' happened:\n %s\n", str);
+ (*vex_failure_exit)();
+}
+
+
+/*---------------------------------------------------------*/
+/*--- vex_printf ---*/
+/*---------------------------------------------------------*/
+
+/* This should be the only <...> include in the entire VEX library.
+ New code for vex_util.c should go above this point. */
+#include <stdarg.h>
+
+static Int vex_strlen ( const HChar* str )
+{
+ Int i = 0;
+ while (str[i] != 0) i++;
+ return i;
+}
+
+Bool vex_streq ( const HChar* s1, const HChar* s2 )
+{
+ while (True) {
+ if (*s1 == 0 && *s2 == 0)
+ return True;
+ if (*s1 != *s2)
+ return False;
+ s1++;
+ s2++;
+ }
+}
+
+
+/* Convert N0 into ascii in BUF, which is assumed to be big enough (at
+ least 67 bytes long). Observe BASE, SYNED and HEXCAPS. */
+static
+void convert_int ( /*OUT*/HChar* buf, Long n0,
+ Int base, Bool syned, Bool hexcaps )
+{
+ ULong u0;
+ HChar c;
+ Bool minus = False;
+ Int i, j, bufi = 0;
+ buf[bufi] = 0;
+
+ if (syned) {
+ if (n0 < 0) {
+ minus = True;
+ u0 = (ULong)(-n0);
+ } else {
+ u0 = (ULong)(n0);
+ }
+ } else {
+ u0 = (ULong)n0;
+ }
+
+ while (1) {
+ buf[bufi++] = toHChar('0' + toUInt(u0 % base));
+ u0 /= base;
+ if (u0 == 0) break;
+ }
+ if (minus)
+ buf[bufi++] = '-';
+
+ buf[bufi] = 0;
+ for (i = 0; i < bufi; i++)
+ if (buf[i] > '9')
+ buf[i] = toHChar(buf[i] + (hexcaps ? 'A' : 'a') - '9' - 1);
+
+ i = 0;
+ j = bufi-1;
+ while (i <= j) {
+ c = buf[i];
+ buf[i] = buf[j];
+ buf[j] = c;
+ i++;
+ j--;
+ }
+}
+
+
+/* A half-arsed and buggy, but good-enough, implementation of
+ printf. */
+static
+UInt vprintf_wrk ( void(*sink)(HChar),
+ HChar* format,
+ va_list ap )
+{
+# define PUT(_ch) \
+ do { sink(_ch); nout++; } \
+ while (0)
+
+# define PAD(_n) \
+ do { Int _qq = (_n); for (; _qq > 0; _qq--) PUT(padchar); } \
+ while (0)
+
+# define PUTSTR(_str) \
+ do { HChar* _qq = _str; for (; *_qq; _qq++) PUT(*_qq); } \
+ while (0)
+
+ HChar* saved_format;
+ Bool longlong, ljustify;
+ HChar padchar;
+ Int fwidth, nout, len1, len2, len3;
+ HChar intbuf[100]; /* big enough for a 64-bit # in base 2 */
+
+ nout = 0;
+ while (1) {
+
+ if (!format)
+ break;
+ if (*format == 0)
+ break;
+
+ if (*format != '%') {
+ PUT(*format);
+ format++;
+ continue;
+ }
+
+ saved_format = format;
+ longlong = False;
+ ljustify = False;
+ padchar = ' ';
+ fwidth = 0;
+ format++;
+
+ if (*format == '-') {
+ format++;
+ ljustify = True;
+ }
+ if (*format == '0') {
+ format++;
+ padchar = '0';
+ }
+ while (*format >= '0' && *format <= '9') {
+ fwidth = fwidth * 10 + (*format - '0');
+ format++;
+ }
+ if (*format == 'l') {
+ format++;
+ if (*format == 'l') {
+ format++;
+ longlong = True;
+ }
+ }
+
+ switch (*format) {
+ case 's': {
+ HChar* str = va_arg(ap, HChar*);
+ if (str == NULL)
+ str = "(null)";
+ len1 = len3 = 0;
+ len2 = vex_strlen(str);
+ if (fwidth > len2) { len1 = ljustify ? 0 : fwidth-len2;
+ len3 = ljustify ? fwidth-len2 : 0; }
+ PAD(len1); PUTSTR(str); PAD(len3);
+ break;
+ }
+ case 'c': {
+ HChar c = (HChar)va_arg(ap, int);
+ HChar str[2];
+ str[0] = c;
+ str[1] = 0;
+ len1 = len3 = 0;
+ len2 = vex_strlen(str);
+ if (fwidth > len2) { len1 = ljustify ? 0 : fwidth-len2;
+ len3 = ljustify ? fwidth-len2 : 0; }
+ PAD(len1); PUTSTR(str); PAD(len3);
+ break;
+ }
+ case 'd': {
+ Long l;
+ if (longlong) {
+ l = va_arg(ap, Long);
+ } else {
+ l = (Long)va_arg(ap, Int);
+ }
+ convert_int(intbuf, l, 10/*base*/, True/*signed*/,
+ False/*irrelevant*/);
+ len1 = len3 = 0;
+ len2 = vex_strlen(intbuf);
+ if (fwidth > len2) { len1 = ljustify ? 0 : fwidth-len2;
+ len3 = ljustify ? fwidth-len2 : 0; }
+ PAD(len1); PUTSTR(intbuf); PAD(len3);
+ break;
+ }
+ case 'u':
+ case 'x':
+ case 'X': {
+ Int base = *format == 'u' ? 10 : 16;
+ Bool hexcaps = True; /* *format == 'X'; */
+ ULong l;
+ if (longlong) {
+ l = va_arg(ap, ULong);
+ } else {
+ l = (ULong)va_arg(ap, UInt);
+ }
+ convert_int(intbuf, l, base, False/*unsigned*/, hexcaps);
+ len1 = len3 = 0;
+ len2 = vex_strlen(intbuf);
+ if (fwidth > len2) { len1 = ljustify ? 0 : fwidth-len2;
+ len3 = ljustify ? fwidth-len2 : 0; }
+ PAD(len1); PUTSTR(intbuf); PAD(len3);
+ break;
+ }
+ case 'p':
+ case 'P': {
+ Bool hexcaps = toBool(*format == 'P');
+ ULong l = Ptr_to_ULong( va_arg(ap, void*) );
+ convert_int(intbuf, l, 16/*base*/, False/*unsigned*/, hexcaps);
+ len1 = len3 = 0;
+ len2 = vex_strlen(intbuf)+2;
+ if (fwidth > len2) { len1 = ljustify ? 0 : fwidth-len2;
+ len3 = ljustify ? fwidth-len2 : 0; }
+ PAD(len1); PUT('0'); PUT('x'); PUTSTR(intbuf); PAD(len3);
+ break;
+ }
+ case '%': {
+ PUT('%');
+ break;
+ }
+ default:
+ /* no idea what it is. Print the format literally and
+ move on. */
+ while (saved_format <= format) {
+ PUT(*saved_format);
+ saved_format++;
+ }
+ break;
+ }
+
+ format++;
+
+ }
+
+ return nout;
+
+# undef PUT
+# undef PAD
+# undef PUTSTR
+}
+
+
+/* A general replacement for printf(). Note that only low-level
+ debugging info should be sent via here. The official route is to
+ to use vg_message(). This interface is deprecated.
+*/
+static HChar myprintf_buf[1000];
+static Int n_myprintf_buf;
+
+static void add_to_myprintf_buf ( HChar c )
+{
+ Bool emit = toBool(c == '\n' || n_myprintf_buf >= 1000-10 /*paranoia*/);
+ myprintf_buf[n_myprintf_buf++] = c;
+ myprintf_buf[n_myprintf_buf] = 0;
+ if (emit) {
+ (*vex_log_bytes)( myprintf_buf, vex_strlen(myprintf_buf) );
+ n_myprintf_buf = 0;
+ myprintf_buf[n_myprintf_buf] = 0;
+ }
+}
+
+UInt vex_printf ( HChar* format, ... )
+{
+ UInt ret;
+ va_list vargs;
+ va_start(vargs,format);
+
+ n_myprintf_buf = 0;
+ myprintf_buf[n_myprintf_buf] = 0;
+ ret = vprintf_wrk ( add_to_myprintf_buf, format, vargs );
+
+ if (n_myprintf_buf > 0) {
+ (*vex_log_bytes)( myprintf_buf, n_myprintf_buf );
+ }
+
+ va_end(vargs);
+
+ return ret;
+}
+
+
+/* A general replacement for sprintf(). */
+
+static HChar *vg_sprintf_ptr;
+
+static void add_to_vg_sprintf_buf ( HChar c )
+{
+ *vg_sprintf_ptr++ = c;
+}
+
+UInt vex_sprintf ( HChar* buf, HChar *format, ... )
+{
+ Int ret;
+ va_list vargs;
+
+ vg_sprintf_ptr = buf;
+
+ va_start(vargs,format);
+
+ ret = vprintf_wrk ( add_to_vg_sprintf_buf, format, vargs );
+ add_to_vg_sprintf_buf(0);
+
+ va_end(vargs);
+
+ vassert(vex_strlen(buf) == ret);
+ return ret;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- end main_util.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/VEX/priv/main_util.h b/VEX/priv/main_util.h
new file mode 100644
index 0000000..1392b4b
--- /dev/null
+++ b/VEX/priv/main_util.h
@@ -0,0 +1,101 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin main_util.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2004-2010 OpenWorks LLP
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+
+ Neither the names of the U.S. Department of Energy nor the
+ University of California nor the names of its contributors may be
+ used to endorse or promote products derived from this software
+ without prior written permission.
+*/
+
+#ifndef __VEX_MAIN_UTIL_H
+#define __VEX_MAIN_UTIL_H
+
+#include "libvex_basictypes.h"
+
+
+/* Misc. */
+
+#define NULL ((void*)0)
+
+
+/* Stuff for panicking and assertion. */
+
+#define VG__STRING(__str) #__str
+
+#define vassert(expr) \
+ ((void) ((expr) ? 0 : \
+ (vex_assert_fail (VG__STRING(expr), \
+ __FILE__, __LINE__, \
+ __PRETTY_FUNCTION__), 0)))
+
+__attribute__ ((__noreturn__))
+extern void vex_assert_fail ( const HChar* expr, const HChar* file,
+ Int line, const HChar* fn );
+__attribute__ ((__noreturn__))
+extern void vpanic ( HChar* str );
+
+
+/* Printing */
+
+__attribute__ ((format (printf, 1, 2)))
+extern UInt vex_printf ( HChar *format, ... );
+
+__attribute__ ((format (printf, 2, 3)))
+extern UInt vex_sprintf ( HChar* buf, HChar *format, ... );
+
+
+/* String ops */
+
+extern Bool vex_streq ( const HChar* s1, const HChar* s2 );
+
+
+/* Storage management: clear the area, and allocate from it. */
+
+/* By default allocation occurs in the temporary area. However, it is
+ possible to switch to permanent area allocation if that's what you
+ want. Permanent area allocation is very limited, tho. */
+
+typedef
+ enum {
+ VexAllocModeTEMP,
+ VexAllocModePERM
+ }
+ VexAllocMode;
+
+extern void vexSetAllocMode ( VexAllocMode );
+extern VexAllocMode vexGetAllocMode ( void );
+extern void vexAllocSanityCheck ( void );
+
+extern void vexSetAllocModeTEMP_and_clear ( void );
+
+#endif /* ndef __VEX_MAIN_UTIL_H */
+
+/*---------------------------------------------------------------*/
+/*--- main_util.h ---*/
+/*---------------------------------------------------------------*/