blob: 25ed75a7cf46842dccec2e4f63b6ef8437fdb1ee [file] [log] [blame]
nethercotebb1c9912004-01-04 16:43:23 +00001
njn25e49d8e72002-09-23 09:36:25 +00002/*--------------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003/*--- Instrument IR to perform memory checking operations. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00006
njn25e49d8e72002-09-23 09:36:25 +00007/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of MemCheck, a heavyweight Valgrind tool for
njnc9539842002-10-02 13:26:35 +00009 detecting memory errors.
njn25e49d8e72002-09-23 09:36:25 +000010
sewardj0f157dd2013-10-18 14:27:36 +000011 Copyright (C) 2000-2013 Julian Seward
njn25e49d8e72002-09-23 09:36:25 +000012 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
njnc7561b92005-06-19 01:24:32 +000032#include "pub_tool_basics.h"
philippe6643e962012-01-17 21:16:30 +000033#include "pub_tool_poolalloc.h" // For mc_include.h
njn1d0825f2006-03-27 11:37:07 +000034#include "pub_tool_hashtable.h" // For mc_include.h
njn132bfcc2005-06-04 19:16:06 +000035#include "pub_tool_libcassert.h"
njn36a20fa2005-06-03 03:08:39 +000036#include "pub_tool_libcprint.h"
njnc7561b92005-06-19 01:24:32 +000037#include "pub_tool_tooliface.h"
sewardj53ee1fc2005-12-23 02:29:58 +000038#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
sewardj81651dc2007-08-28 06:05:20 +000039#include "pub_tool_xarray.h"
40#include "pub_tool_mallocfree.h"
41#include "pub_tool_libcbase.h"
njn25e49d8e72002-09-23 09:36:25 +000042
sewardj7cf4e6b2008-05-01 20:24:26 +000043#include "mc_include.h"
44
45
sewardj7ee7d852011-06-16 11:37:21 +000046/* FIXMEs JRS 2011-June-16.
47
48 Check the interpretation for vector narrowing and widening ops,
49 particularly the saturating ones. I suspect they are either overly
50 pessimistic and/or wrong.
sewardjbfd03f82014-08-26 18:35:13 +000051
52 Iop_QandSQsh64x2 and friends (vector-by-vector bidirectional
53 saturating shifts): the interpretation is overly pessimistic.
54 See comments on the relevant cases below for details.
55
56 Iop_Sh64Sx2 and friends (vector-by-vector bidirectional shifts,
57 both rounding and non-rounding variants): ditto
sewardj7ee7d852011-06-16 11:37:21 +000058*/
59
sewardj992dff92005-10-07 11:08:55 +000060/* This file implements the Memcheck instrumentation, and in
61 particular contains the core of its undefined value detection
62 machinery. For a comprehensive background of the terminology,
63 algorithms and rationale used herein, read:
64
65 Using Valgrind to detect undefined value errors with
66 bit-precision
67
68 Julian Seward and Nicholas Nethercote
69
70 2005 USENIX Annual Technical Conference (General Track),
71 Anaheim, CA, USA, April 10-15, 2005.
njn6665ea22007-05-24 23:14:41 +000072
73 ----
74
75 Here is as good a place as any to record exactly when V bits are and
76 should be checked, why, and what function is responsible.
77
78
79 Memcheck complains when an undefined value is used:
80
81 1. In the condition of a conditional branch. Because it could cause
82 incorrect control flow, and thus cause incorrect externally-visible
83 behaviour. [mc_translate.c:complainIfUndefined]
84
85 2. As an argument to a system call, or as the value that specifies
86 the system call number. Because it could cause an incorrect
87 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
88
89 3. As the address in a load or store. Because it could cause an
90 incorrect value to be used later, which could cause externally-visible
91 behaviour (eg. via incorrect control flow or an incorrect system call
92 argument) [complainIfUndefined]
93
94 4. As the target address of a branch. Because it could cause incorrect
95 control flow. [complainIfUndefined]
96
97 5. As an argument to setenv, unsetenv, or putenv. Because it could put
98 an incorrect value into the external environment.
99 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
100
101 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
102 [complainIfUndefined]
103
104 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
105 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
106 requested it. [in memcheck.h]
107
108
109 Memcheck also complains, but should not, when an undefined value is used:
110
111 8. As the shift value in certain SIMD shift operations (but not in the
112 standard integer shift operations). This inconsistency is due to
113 historical reasons.) [complainIfUndefined]
114
115
116 Memcheck does not complain, but should, when an undefined value is used:
117
118 9. As an input to a client request. Because the client request may
119 affect the visible behaviour -- see bug #144362 for an example
120 involving the malloc replacements in vg_replace_malloc.c and
121 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
122 isn't identified. That bug report also has some info on how to solve
123 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
124
125
126 In practice, 1 and 2 account for the vast majority of cases.
sewardj992dff92005-10-07 11:08:55 +0000127*/
128
sewardjb9e6d242013-05-11 13:42:08 +0000129/* Generation of addr-definedness, addr-validity and
130 guard-definedness checks pertaining to loads and stores (Iex_Load,
131 Ist_Store, IRLoadG, IRStoreG, LLSC, CAS and Dirty memory
132 loads/stores) was re-checked 11 May 2013. */
133
sewardj95448072004-11-22 20:19:51 +0000134/*------------------------------------------------------------*/
135/*--- Forward decls ---*/
136/*------------------------------------------------------------*/
137
138struct _MCEnv;
139
sewardj7cf4e6b2008-05-01 20:24:26 +0000140static IRType shadowTypeV ( IRType ty );
sewardj95448072004-11-22 20:19:51 +0000141static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
sewardjafa617b2008-07-22 09:59:48 +0000142static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
sewardj95448072004-11-22 20:19:51 +0000143
sewardjb5b87402011-03-07 16:05:35 +0000144static IRExpr *i128_const_zero(void);
sewardj95448072004-11-22 20:19:51 +0000145
146/*------------------------------------------------------------*/
147/*--- Memcheck running state, and tmp management. ---*/
148/*------------------------------------------------------------*/
149
sewardj1c0ce7a2009-07-01 08:10:49 +0000150/* Carries info about a particular tmp. The tmp's number is not
151 recorded, as this is implied by (equal to) its index in the tmpMap
152 in MCEnv. The tmp's type is also not recorded, as this is present
153 in MCEnv.sb->tyenv.
154
155 When .kind is Orig, .shadowV and .shadowB may give the identities
156 of the temps currently holding the associated definedness (shadowV)
157 and origin (shadowB) values, or these may be IRTemp_INVALID if code
158 to compute such values has not yet been emitted.
159
160 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
161 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
162 illogical for a shadow tmp itself to be shadowed.
163*/
164typedef
165 enum { Orig=1, VSh=2, BSh=3 }
166 TempKind;
167
168typedef
169 struct {
170 TempKind kind;
171 IRTemp shadowV;
172 IRTemp shadowB;
173 }
174 TempMapEnt;
175
176
sewardj95448072004-11-22 20:19:51 +0000177/* Carries around state during memcheck instrumentation. */
178typedef
179 struct _MCEnv {
sewardj0b9d74a2006-12-24 02:24:11 +0000180 /* MODIFIED: the superblock being constructed. IRStmts are
181 added. */
sewardj1c0ce7a2009-07-01 08:10:49 +0000182 IRSB* sb;
sewardj7cf4e6b2008-05-01 20:24:26 +0000183 Bool trace;
sewardj95448072004-11-22 20:19:51 +0000184
sewardj1c0ce7a2009-07-01 08:10:49 +0000185 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
186 current kind and possibly shadow temps for each temp in the
187 IRSB being constructed. Note that it does not contain the
188 type of each tmp. If you want to know the type, look at the
189 relevant entry in sb->tyenv. It follows that at all times
190 during the instrumentation process, the valid indices for
191 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
192 total number of Orig, V- and B- temps allocated so far.
193
194 The reason for this strange split (types in one place, all
195 other info in another) is that we need the types to be
196 attached to sb so as to make it possible to do
197 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
198 instrumentation process. */
199 XArray* /* of TempMapEnt */ tmpMap;
sewardj95448072004-11-22 20:19:51 +0000200
sewardjd5204dc2004-12-31 01:16:11 +0000201 /* MODIFIED: indicates whether "bogus" literals have so far been
202 found. Starts off False, and may change to True. */
sewardj54eac252012-03-27 10:19:39 +0000203 Bool bogusLiterals;
204
205 /* READONLY: indicates whether we should use expensive
206 interpretations of integer adds, since unfortunately LLVM
207 uses them to do ORs in some circumstances. Defaulted to True
208 on MacOS and False everywhere else. */
209 Bool useLLVMworkarounds;
sewardjd5204dc2004-12-31 01:16:11 +0000210
sewardj95448072004-11-22 20:19:51 +0000211 /* READONLY: the guest layout. This indicates which parts of
212 the guest state should be regarded as 'always defined'. */
florian3c0c9472014-09-24 12:06:55 +0000213 const VexGuestLayout* layout;
sewardj634ba772006-10-15 12:47:37 +0000214
sewardj95448072004-11-22 20:19:51 +0000215 /* READONLY: the host word type. Needed for constructing
216 arguments of type 'HWord' to be passed to helper functions.
217 Ity_I32 or Ity_I64 only. */
218 IRType hWordTy;
219 }
220 MCEnv;
221
222/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
223 demand), as they are encountered. This is for two reasons.
224
225 (1) (less important reason): Many original tmps are unused due to
226 initial IR optimisation, and we do not want to spaces in tables
227 tracking them.
228
229 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
230 table indexed [0 .. n_types-1], which gives the current shadow for
231 each original tmp, or INVALID_IRTEMP if none is so far assigned.
232 It is necessary to support making multiple assignments to a shadow
233 -- specifically, after testing a shadow for definedness, it needs
234 to be made defined. But IR's SSA property disallows this.
235
236 (2) (more important reason): Therefore, when a shadow needs to get
237 a new value, a new temporary is created, the value is assigned to
238 that, and the tmpMap is updated to reflect the new binding.
239
240 A corollary is that if the tmpMap maps a given tmp to
sewardjf1962d32006-10-19 13:22:16 +0000241 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
sewardj95448072004-11-22 20:19:51 +0000242 there's a read-before-write error in the original tmps. The IR
243 sanity checker should catch all such anomalies, however.
njn25e49d8e72002-09-23 09:36:25 +0000244*/
sewardj95448072004-11-22 20:19:51 +0000245
sewardj1c0ce7a2009-07-01 08:10:49 +0000246/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
247 both the table in mce->sb and to our auxiliary mapping. Note that
248 newTemp may cause mce->tmpMap to resize, hence previous results
249 from VG_(indexXA)(mce->tmpMap) are invalidated. */
250static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
251{
252 Word newIx;
253 TempMapEnt ent;
254 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
255 ent.kind = kind;
256 ent.shadowV = IRTemp_INVALID;
257 ent.shadowB = IRTemp_INVALID;
258 newIx = VG_(addToXA)( mce->tmpMap, &ent );
259 tl_assert(newIx == (Word)tmp);
260 return tmp;
261}
262
263
sewardj95448072004-11-22 20:19:51 +0000264/* Find the tmp currently shadowing the given original tmp. If none
265 so far exists, allocate one. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000266static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000267{
sewardj1c0ce7a2009-07-01 08:10:49 +0000268 TempMapEnt* ent;
269 /* VG_(indexXA) range-checks 'orig', hence no need to check
270 here. */
271 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
272 tl_assert(ent->kind == Orig);
273 if (ent->shadowV == IRTemp_INVALID) {
274 IRTemp tmpV
275 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
276 /* newTemp may cause mce->tmpMap to resize, hence previous results
277 from VG_(indexXA) are invalid. */
278 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
279 tl_assert(ent->kind == Orig);
280 tl_assert(ent->shadowV == IRTemp_INVALID);
281 ent->shadowV = tmpV;
njn25e49d8e72002-09-23 09:36:25 +0000282 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000283 return ent->shadowV;
njn25e49d8e72002-09-23 09:36:25 +0000284}
285
sewardj95448072004-11-22 20:19:51 +0000286/* Allocate a new shadow for the given original tmp. This means any
287 previous shadow is abandoned. This is needed because it is
288 necessary to give a new value to a shadow once it has been tested
289 for undefinedness, but unfortunately IR's SSA property disallows
290 this. Instead we must abandon the old shadow, allocate a new one
sewardj1c0ce7a2009-07-01 08:10:49 +0000291 and use that instead.
292
293 This is the same as findShadowTmpV, except we don't bother to see
294 if a shadow temp already existed -- we simply allocate a new one
295 regardless. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000296static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000297{
sewardj1c0ce7a2009-07-01 08:10:49 +0000298 TempMapEnt* ent;
299 /* VG_(indexXA) range-checks 'orig', hence no need to check
300 here. */
301 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
302 tl_assert(ent->kind == Orig);
303 if (1) {
304 IRTemp tmpV
305 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
306 /* newTemp may cause mce->tmpMap to resize, hence previous results
307 from VG_(indexXA) are invalid. */
308 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
309 tl_assert(ent->kind == Orig);
310 ent->shadowV = tmpV;
311 }
sewardj95448072004-11-22 20:19:51 +0000312}
313
314
315/*------------------------------------------------------------*/
316/*--- IRAtoms -- a subset of IRExprs ---*/
317/*------------------------------------------------------------*/
318
319/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
sewardj710d6c22005-03-20 18:55:15 +0000320 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
sewardj95448072004-11-22 20:19:51 +0000321 input, most of this code deals in atoms. Usefully, a value atom
322 always has a V-value which is also an atom: constants are shadowed
323 by constants, and temps are shadowed by the corresponding shadow
324 temporary. */
325
326typedef IRExpr IRAtom;
327
328/* (used for sanity checks only): is this an atom which looks
329 like it's from original code? */
330static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
331{
332 if (a1->tag == Iex_Const)
333 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000334 if (a1->tag == Iex_RdTmp) {
335 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
336 return ent->kind == Orig;
337 }
sewardj95448072004-11-22 20:19:51 +0000338 return False;
339}
340
341/* (used for sanity checks only): is this an atom which looks
342 like it's from shadow code? */
343static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
344{
345 if (a1->tag == Iex_Const)
346 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000347 if (a1->tag == Iex_RdTmp) {
348 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
349 return ent->kind == VSh || ent->kind == BSh;
350 }
sewardj95448072004-11-22 20:19:51 +0000351 return False;
352}
353
354/* (used for sanity checks only): check that both args are atoms and
355 are identically-kinded. */
356static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
357{
sewardj0b9d74a2006-12-24 02:24:11 +0000358 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +0000359 return True;
sewardjbef552a2005-08-30 12:54:36 +0000360 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
sewardj95448072004-11-22 20:19:51 +0000361 return True;
362 return False;
363}
364
365
366/*------------------------------------------------------------*/
367/*--- Type management ---*/
368/*------------------------------------------------------------*/
369
370/* Shadow state is always accessed using integer types. This returns
371 an integer type with the same size (as per sizeofIRType) as the
372 given type. The only valid shadow types are Bit, I8, I16, I32,
sewardj45fa9f42012-05-21 10:18:10 +0000373 I64, I128, V128, V256. */
sewardj95448072004-11-22 20:19:51 +0000374
sewardj7cf4e6b2008-05-01 20:24:26 +0000375static IRType shadowTypeV ( IRType ty )
sewardj95448072004-11-22 20:19:51 +0000376{
377 switch (ty) {
378 case Ity_I1:
379 case Ity_I8:
380 case Ity_I16:
381 case Ity_I32:
sewardj6cf40ff2005-04-20 22:31:26 +0000382 case Ity_I64:
383 case Ity_I128: return ty;
sewardj3245c912004-12-10 14:58:26 +0000384 case Ity_F32: return Ity_I32;
sewardjb0ccb4d2012-04-02 10:22:05 +0000385 case Ity_D32: return Ity_I32;
sewardj3245c912004-12-10 14:58:26 +0000386 case Ity_F64: return Ity_I64;
sewardjb0ccb4d2012-04-02 10:22:05 +0000387 case Ity_D64: return Ity_I64;
sewardjb5b87402011-03-07 16:05:35 +0000388 case Ity_F128: return Ity_I128;
sewardjb0ccb4d2012-04-02 10:22:05 +0000389 case Ity_D128: return Ity_I128;
sewardj3245c912004-12-10 14:58:26 +0000390 case Ity_V128: return Ity_V128;
sewardj45fa9f42012-05-21 10:18:10 +0000391 case Ity_V256: return Ity_V256;
sewardj95448072004-11-22 20:19:51 +0000392 default: ppIRType(ty);
sewardj7cf4e6b2008-05-01 20:24:26 +0000393 VG_(tool_panic)("memcheck:shadowTypeV");
sewardj95448072004-11-22 20:19:51 +0000394 }
395}
396
397/* Produce a 'defined' value of the given shadow type. Should only be
398 supplied shadow types (Bit/I8/I16/I32/UI64). */
399static IRExpr* definedOfType ( IRType ty ) {
400 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +0000401 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
402 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
403 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
404 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
405 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
sewardjb5b87402011-03-07 16:05:35 +0000406 case Ity_I128: return i128_const_zero();
sewardj170ee212004-12-10 18:57:51 +0000407 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
sewardj1eb272f2014-01-26 18:36:52 +0000408 case Ity_V256: return IRExpr_Const(IRConst_V256(0x00000000));
sewardjf1962d32006-10-19 13:22:16 +0000409 default: VG_(tool_panic)("memcheck:definedOfType");
njn25e49d8e72002-09-23 09:36:25 +0000410 }
411}
412
413
sewardj95448072004-11-22 20:19:51 +0000414/*------------------------------------------------------------*/
415/*--- Constructing IR fragments ---*/
416/*------------------------------------------------------------*/
417
sewardj95448072004-11-22 20:19:51 +0000418/* add stmt to a bb */
sewardj7cf4e6b2008-05-01 20:24:26 +0000419static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
420 if (mce->trace) {
421 VG_(printf)(" %c: ", cat);
422 ppIRStmt(st);
423 VG_(printf)("\n");
424 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000425 addStmtToIRSB(mce->sb, st);
sewardj7cf4e6b2008-05-01 20:24:26 +0000426}
427
428/* assign value to tmp */
429static inline
430void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
sewardj1c0ce7a2009-07-01 08:10:49 +0000431 stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
sewardj7cf4e6b2008-05-01 20:24:26 +0000432}
sewardj95448072004-11-22 20:19:51 +0000433
434/* build various kinds of expressions */
sewardj57f92b02010-08-22 11:54:14 +0000435#define triop(_op, _arg1, _arg2, _arg3) \
436 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
sewardj95448072004-11-22 20:19:51 +0000437#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
438#define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
sewardjcc961652013-01-26 11:49:15 +0000439#define mkU1(_n) IRExpr_Const(IRConst_U1(_n))
sewardj95448072004-11-22 20:19:51 +0000440#define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
441#define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
442#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
443#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
sewardj170ee212004-12-10 18:57:51 +0000444#define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
sewardj0b9d74a2006-12-24 02:24:11 +0000445#define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
sewardj95448072004-11-22 20:19:51 +0000446
sewardj7cf4e6b2008-05-01 20:24:26 +0000447/* Bind the given expression to a new temporary, and return the
sewardj95448072004-11-22 20:19:51 +0000448 temporary. This effectively converts an arbitrary expression into
sewardj7cf4e6b2008-05-01 20:24:26 +0000449 an atom.
450
451 'ty' is the type of 'e' and hence the type that the new temporary
sewardj1c0ce7a2009-07-01 08:10:49 +0000452 needs to be. But passing it in is redundant, since we can deduce
453 the type merely by inspecting 'e'. So at least use that fact to
454 assert that the two types agree. */
455static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
456{
457 TempKind k;
458 IRTemp t;
459 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
sewardjb0ccb4d2012-04-02 10:22:05 +0000460
sewardj7cf4e6b2008-05-01 20:24:26 +0000461 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
sewardj1c0ce7a2009-07-01 08:10:49 +0000462 switch (cat) {
463 case 'V': k = VSh; break;
464 case 'B': k = BSh; break;
465 case 'C': k = Orig; break;
466 /* happens when we are making up new "orig"
467 expressions, for IRCAS handling */
468 default: tl_assert(0);
469 }
470 t = newTemp(mce, ty, k);
sewardj7cf4e6b2008-05-01 20:24:26 +0000471 assign(cat, mce, t, e);
sewardj95448072004-11-22 20:19:51 +0000472 return mkexpr(t);
473}
474
475
476/*------------------------------------------------------------*/
sewardjb5b87402011-03-07 16:05:35 +0000477/*--- Helper functions for 128-bit ops ---*/
478/*------------------------------------------------------------*/
sewardj45fa9f42012-05-21 10:18:10 +0000479
sewardjb5b87402011-03-07 16:05:35 +0000480static IRExpr *i128_const_zero(void)
481{
sewardj45fa9f42012-05-21 10:18:10 +0000482 IRAtom* z64 = IRExpr_Const(IRConst_U64(0));
483 return binop(Iop_64HLto128, z64, z64);
sewardjb5b87402011-03-07 16:05:35 +0000484}
485
sewardj45fa9f42012-05-21 10:18:10 +0000486/* There are no I128-bit loads and/or stores [as generated by any
487 current front ends]. So we do not need to worry about that in
488 expr2vbits_Load */
489
sewardjb5b87402011-03-07 16:05:35 +0000490
491/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +0000492/*--- Constructing definedness primitive ops ---*/
493/*------------------------------------------------------------*/
494
495/* --------- Defined-if-either-defined --------- */
496
497static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
498 tl_assert(isShadowAtom(mce,a1));
499 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000500 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000501}
502
503static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
504 tl_assert(isShadowAtom(mce,a1));
505 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000506 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000507}
508
509static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
510 tl_assert(isShadowAtom(mce,a1));
511 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000512 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000513}
514
sewardj7010f6e2004-12-10 13:35:22 +0000515static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
516 tl_assert(isShadowAtom(mce,a1));
517 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000518 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
sewardj7010f6e2004-12-10 13:35:22 +0000519}
520
sewardj20d38f22005-02-07 23:50:18 +0000521static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj170ee212004-12-10 18:57:51 +0000522 tl_assert(isShadowAtom(mce,a1));
523 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000524 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
sewardj170ee212004-12-10 18:57:51 +0000525}
526
sewardj350e8f72012-06-25 07:52:15 +0000527static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
528 tl_assert(isShadowAtom(mce,a1));
529 tl_assert(isShadowAtom(mce,a2));
530 return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2));
531}
532
sewardj95448072004-11-22 20:19:51 +0000533/* --------- Undefined-if-either-undefined --------- */
534
535static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
536 tl_assert(isShadowAtom(mce,a1));
537 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000538 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000539}
540
541static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
542 tl_assert(isShadowAtom(mce,a1));
543 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000544 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000545}
546
547static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
548 tl_assert(isShadowAtom(mce,a1));
549 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000550 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000551}
552
553static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
554 tl_assert(isShadowAtom(mce,a1));
555 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000556 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000557}
558
sewardjb5b87402011-03-07 16:05:35 +0000559static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
560 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
561 tl_assert(isShadowAtom(mce,a1));
562 tl_assert(isShadowAtom(mce,a2));
563 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
564 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
565 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
566 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
567 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
568 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
569
570 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
571}
572
sewardj20d38f22005-02-07 23:50:18 +0000573static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj3245c912004-12-10 14:58:26 +0000574 tl_assert(isShadowAtom(mce,a1));
575 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000576 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
sewardj3245c912004-12-10 14:58:26 +0000577}
578
sewardj350e8f72012-06-25 07:52:15 +0000579static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
580 tl_assert(isShadowAtom(mce,a1));
581 tl_assert(isShadowAtom(mce,a2));
582 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2));
583}
584
sewardje50a1b12004-12-17 01:24:54 +0000585static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
sewardj95448072004-11-22 20:19:51 +0000586 switch (vty) {
sewardje50a1b12004-12-17 01:24:54 +0000587 case Ity_I8: return mkUifU8(mce, a1, a2);
sewardja1d93302004-12-12 16:45:06 +0000588 case Ity_I16: return mkUifU16(mce, a1, a2);
589 case Ity_I32: return mkUifU32(mce, a1, a2);
590 case Ity_I64: return mkUifU64(mce, a1, a2);
sewardjb5b87402011-03-07 16:05:35 +0000591 case Ity_I128: return mkUifU128(mce, a1, a2);
sewardj20d38f22005-02-07 23:50:18 +0000592 case Ity_V128: return mkUifUV128(mce, a1, a2);
sewardja2f30952013-03-27 11:40:02 +0000593 case Ity_V256: return mkUifUV256(mce, a1, a2);
sewardj95448072004-11-22 20:19:51 +0000594 default:
595 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
596 VG_(tool_panic)("memcheck:mkUifU");
njn25e49d8e72002-09-23 09:36:25 +0000597 }
598}
599
sewardj95448072004-11-22 20:19:51 +0000600/* --------- The Left-family of operations. --------- */
njn25e49d8e72002-09-23 09:36:25 +0000601
sewardj95448072004-11-22 20:19:51 +0000602static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
603 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000604 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
sewardj95448072004-11-22 20:19:51 +0000605}
606
607static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
608 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000609 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
sewardj95448072004-11-22 20:19:51 +0000610}
611
612static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
613 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000614 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
sewardj95448072004-11-22 20:19:51 +0000615}
616
sewardj681be302005-01-15 20:43:58 +0000617static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
618 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000619 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
sewardj681be302005-01-15 20:43:58 +0000620}
621
sewardj95448072004-11-22 20:19:51 +0000622/* --------- 'Improvement' functions for AND/OR. --------- */
623
624/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
625 defined (0); all other -> undefined (1).
626*/
627static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
njn25e49d8e72002-09-23 09:36:25 +0000628{
sewardj95448072004-11-22 20:19:51 +0000629 tl_assert(isOriginalAtom(mce, data));
630 tl_assert(isShadowAtom(mce, vbits));
631 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000632 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000633}
njn25e49d8e72002-09-23 09:36:25 +0000634
sewardj95448072004-11-22 20:19:51 +0000635static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
636{
637 tl_assert(isOriginalAtom(mce, data));
638 tl_assert(isShadowAtom(mce, vbits));
639 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000640 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000641}
njn25e49d8e72002-09-23 09:36:25 +0000642
sewardj95448072004-11-22 20:19:51 +0000643static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
644{
645 tl_assert(isOriginalAtom(mce, data));
646 tl_assert(isShadowAtom(mce, vbits));
647 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000648 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000649}
njn25e49d8e72002-09-23 09:36:25 +0000650
sewardj7010f6e2004-12-10 13:35:22 +0000651static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
652{
653 tl_assert(isOriginalAtom(mce, data));
654 tl_assert(isShadowAtom(mce, vbits));
655 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000656 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
sewardj7010f6e2004-12-10 13:35:22 +0000657}
658
sewardj20d38f22005-02-07 23:50:18 +0000659static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000660{
661 tl_assert(isOriginalAtom(mce, data));
662 tl_assert(isShadowAtom(mce, vbits));
663 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000664 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
sewardj170ee212004-12-10 18:57:51 +0000665}
666
sewardj350e8f72012-06-25 07:52:15 +0000667static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
668{
669 tl_assert(isOriginalAtom(mce, data));
670 tl_assert(isShadowAtom(mce, vbits));
671 tl_assert(sameKindedAtoms(data, vbits));
672 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits));
673}
674
sewardj95448072004-11-22 20:19:51 +0000675/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
676 defined (0); all other -> undefined (1).
677*/
678static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
679{
680 tl_assert(isOriginalAtom(mce, data));
681 tl_assert(isShadowAtom(mce, vbits));
682 tl_assert(sameKindedAtoms(data, vbits));
683 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000684 'V', mce, Ity_I8,
sewardj95448072004-11-22 20:19:51 +0000685 binop(Iop_Or8,
sewardj7cf4e6b2008-05-01 20:24:26 +0000686 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
sewardj95448072004-11-22 20:19:51 +0000687 vbits) );
688}
njn25e49d8e72002-09-23 09:36:25 +0000689
sewardj95448072004-11-22 20:19:51 +0000690static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
691{
692 tl_assert(isOriginalAtom(mce, data));
693 tl_assert(isShadowAtom(mce, vbits));
694 tl_assert(sameKindedAtoms(data, vbits));
695 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000696 'V', mce, Ity_I16,
sewardj95448072004-11-22 20:19:51 +0000697 binop(Iop_Or16,
sewardj7cf4e6b2008-05-01 20:24:26 +0000698 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
sewardj95448072004-11-22 20:19:51 +0000699 vbits) );
700}
njn25e49d8e72002-09-23 09:36:25 +0000701
sewardj95448072004-11-22 20:19:51 +0000702static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
703{
704 tl_assert(isOriginalAtom(mce, data));
705 tl_assert(isShadowAtom(mce, vbits));
706 tl_assert(sameKindedAtoms(data, vbits));
707 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000708 'V', mce, Ity_I32,
sewardj95448072004-11-22 20:19:51 +0000709 binop(Iop_Or32,
sewardj7cf4e6b2008-05-01 20:24:26 +0000710 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
sewardj95448072004-11-22 20:19:51 +0000711 vbits) );
712}
713
sewardj7010f6e2004-12-10 13:35:22 +0000714static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
715{
716 tl_assert(isOriginalAtom(mce, data));
717 tl_assert(isShadowAtom(mce, vbits));
718 tl_assert(sameKindedAtoms(data, vbits));
719 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000720 'V', mce, Ity_I64,
sewardj7010f6e2004-12-10 13:35:22 +0000721 binop(Iop_Or64,
sewardj7cf4e6b2008-05-01 20:24:26 +0000722 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
sewardj7010f6e2004-12-10 13:35:22 +0000723 vbits) );
724}
725
sewardj20d38f22005-02-07 23:50:18 +0000726static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000727{
728 tl_assert(isOriginalAtom(mce, data));
729 tl_assert(isShadowAtom(mce, vbits));
730 tl_assert(sameKindedAtoms(data, vbits));
731 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000732 'V', mce, Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +0000733 binop(Iop_OrV128,
sewardj7cf4e6b2008-05-01 20:24:26 +0000734 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
sewardj170ee212004-12-10 18:57:51 +0000735 vbits) );
736}
737
sewardj350e8f72012-06-25 07:52:15 +0000738static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
739{
740 tl_assert(isOriginalAtom(mce, data));
741 tl_assert(isShadowAtom(mce, vbits));
742 tl_assert(sameKindedAtoms(data, vbits));
743 return assignNew(
744 'V', mce, Ity_V256,
745 binop(Iop_OrV256,
746 assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)),
747 vbits) );
748}
749
sewardj95448072004-11-22 20:19:51 +0000750/* --------- Pessimising casts. --------- */
751
sewardjb5b87402011-03-07 16:05:35 +0000752/* The function returns an expression of type DST_TY. If any of the VBITS
753 is undefined (value == 1) the resulting expression has all bits set to
754 1. Otherwise, all bits are 0. */
755
sewardj95448072004-11-22 20:19:51 +0000756static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
757{
sewardj4cc684b2007-08-25 23:09:36 +0000758 IRType src_ty;
sewardj7cf97ee2004-11-28 14:25:01 +0000759 IRAtom* tmp1;
sewardj2eecb742012-06-01 16:11:41 +0000760
sewardj95448072004-11-22 20:19:51 +0000761 /* Note, dst_ty is a shadow type, not an original type. */
sewardj95448072004-11-22 20:19:51 +0000762 tl_assert(isShadowAtom(mce,vbits));
sewardj1c0ce7a2009-07-01 08:10:49 +0000763 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
sewardj4cc684b2007-08-25 23:09:36 +0000764
765 /* Fast-track some common cases */
766 if (src_ty == Ity_I32 && dst_ty == Ity_I32)
sewardj7cf4e6b2008-05-01 20:24:26 +0000767 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000768
769 if (src_ty == Ity_I64 && dst_ty == Ity_I64)
sewardj7cf4e6b2008-05-01 20:24:26 +0000770 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000771
772 if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
sewardj2eecb742012-06-01 16:11:41 +0000773 /* PCast the arg, then clone it. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000774 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
775 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
sewardj4cc684b2007-08-25 23:09:36 +0000776 }
777
sewardj1eb272f2014-01-26 18:36:52 +0000778 if (src_ty == Ity_I32 && dst_ty == Ity_V128) {
779 /* PCast the arg, then clone it 4 times. */
780 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
781 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
782 return assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp));
783 }
784
785 if (src_ty == Ity_I32 && dst_ty == Ity_V256) {
786 /* PCast the arg, then clone it 8 times. */
787 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
788 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
789 tmp = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp));
790 return assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256, tmp, tmp));
791 }
792
sewardj2eecb742012-06-01 16:11:41 +0000793 if (src_ty == Ity_I64 && dst_ty == Ity_I32) {
794 /* PCast the arg. This gives all 0s or all 1s. Then throw away
795 the top half. */
796 IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
797 return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp));
798 }
799
sewardjbfd03f82014-08-26 18:35:13 +0000800 if (src_ty == Ity_V128 && dst_ty == Ity_I64) {
801 /* Use InterleaveHI64x2 to copy the top half of the vector into
802 the bottom half. Then we can UifU it with the original, throw
803 away the upper half of the result, and PCast-I64-to-I64
804 the lower half. */
805 // Generates vbits[127:64] : vbits[127:64]
806 IRAtom* hi64hi64
807 = assignNew('V', mce, Ity_V128,
808 binop(Iop_InterleaveHI64x2, vbits, vbits));
809 // Generates
810 // UifU(vbits[127:64],vbits[127:64]) : UifU(vbits[127:64],vbits[63:0])
811 // == vbits[127:64] : UifU(vbits[127:64],vbits[63:0])
812 IRAtom* lohi64
813 = mkUifUV128(mce, hi64hi64, vbits);
814 // Generates UifU(vbits[127:64],vbits[63:0])
815 IRAtom* lo64
816 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, lohi64));
817 // Generates
818 // PCast-to-I64( UifU(vbits[127:64], vbits[63:0] )
819 // == PCast-to-I64( vbits[127:0] )
820 IRAtom* res
821 = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, lo64));
822 return res;
823 }
824
sewardj4cc684b2007-08-25 23:09:36 +0000825 /* Else do it the slow way .. */
sewardj2eecb742012-06-01 16:11:41 +0000826 /* First of all, collapse vbits down to a single bit. */
sewardj4cc684b2007-08-25 23:09:36 +0000827 tmp1 = NULL;
828 switch (src_ty) {
sewardj95448072004-11-22 20:19:51 +0000829 case Ity_I1:
830 tmp1 = vbits;
njn25e49d8e72002-09-23 09:36:25 +0000831 break;
sewardj95448072004-11-22 20:19:51 +0000832 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000833 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
sewardj95448072004-11-22 20:19:51 +0000834 break;
835 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000836 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
sewardj95448072004-11-22 20:19:51 +0000837 break;
838 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000839 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
sewardj95448072004-11-22 20:19:51 +0000840 break;
841 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000842 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
sewardj95448072004-11-22 20:19:51 +0000843 break;
sewardj69a13322005-04-23 01:14:51 +0000844 case Ity_I128: {
845 /* Gah. Chop it in half, OR the halves together, and compare
846 that with zero. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000847 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
848 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
849 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
850 tmp1 = assignNew('V', mce, Ity_I1,
sewardj37c31cc2005-04-26 23:49:24 +0000851 unop(Iop_CmpNEZ64, tmp4));
sewardj69a13322005-04-23 01:14:51 +0000852 break;
853 }
sewardj95448072004-11-22 20:19:51 +0000854 default:
sewardj4cc684b2007-08-25 23:09:36 +0000855 ppIRType(src_ty);
sewardj95448072004-11-22 20:19:51 +0000856 VG_(tool_panic)("mkPCastTo(1)");
857 }
858 tl_assert(tmp1);
859 /* Now widen up to the dst type. */
860 switch (dst_ty) {
861 case Ity_I1:
862 return tmp1;
863 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000864 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
sewardj95448072004-11-22 20:19:51 +0000865 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000866 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
sewardj95448072004-11-22 20:19:51 +0000867 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000868 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
sewardj95448072004-11-22 20:19:51 +0000869 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000870 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000871 case Ity_V128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000872 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
873 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000874 return tmp1;
sewardj69a13322005-04-23 01:14:51 +0000875 case Ity_I128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000876 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
877 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
sewardj69a13322005-04-23 01:14:51 +0000878 return tmp1;
sewardja2f30952013-03-27 11:40:02 +0000879 case Ity_V256:
880 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
881 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128,
882 tmp1, tmp1));
883 tmp1 = assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256,
884 tmp1, tmp1));
885 return tmp1;
sewardj95448072004-11-22 20:19:51 +0000886 default:
887 ppIRType(dst_ty);
888 VG_(tool_panic)("mkPCastTo(2)");
889 }
890}
891
sewardjbfd03f82014-08-26 18:35:13 +0000892/* This is a minor variant. It takes an arg of some type and returns
893 a value of the same type. The result consists entirely of Defined
894 (zero) bits except its least significant bit, which is a PCast of
895 the entire argument down to a single bit. */
896static IRAtom* mkPCastXXtoXXlsb ( MCEnv* mce, IRAtom* varg, IRType ty )
897{
898 if (ty == Ity_V128) {
899 /* --- Case for V128 --- */
900 IRAtom* varg128 = varg;
901 // generates: PCast-to-I64(varg128)
902 IRAtom* pcdTo64 = mkPCastTo(mce, Ity_I64, varg128);
903 // Now introduce zeros (defined bits) in the top 63 places
904 // generates: Def--(63)--Def PCast-to-I1(varg128)
905 IRAtom* d63pc
906 = assignNew('V', mce, Ity_I64, binop(Iop_And64, pcdTo64, mkU64(1)));
907 // generates: Def--(64)--Def
908 IRAtom* d64
909 = definedOfType(Ity_I64);
910 // generates: Def--(127)--Def PCast-to-I1(varg128)
911 IRAtom* res
912 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, d64, d63pc));
913 return res;
914 }
915 if (ty == Ity_I64) {
916 /* --- Case for I64 --- */
917 // PCast to 64
918 IRAtom* pcd = mkPCastTo(mce, Ity_I64, varg);
919 // Zero (Def) out the top 63 bits
920 IRAtom* res
921 = assignNew('V', mce, Ity_I64, binop(Iop_And64, pcd, mkU64(1)));
922 return res;
923 }
924 /*NOTREACHED*/
925 tl_assert(0);
926}
927
sewardjd5204dc2004-12-31 01:16:11 +0000928/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
929/*
930 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
931 PCasting to Ity_U1. However, sometimes it is necessary to be more
932 accurate. The insight is that the result is defined if two
933 corresponding bits can be found, one from each argument, so that
934 both bits are defined but are different -- that makes EQ say "No"
935 and NE say "Yes". Hence, we compute an improvement term and DifD
936 it onto the "normal" (UifU) result.
937
938 The result is:
939
940 PCastTo<1> (
sewardje6f8af42005-07-06 18:48:59 +0000941 -- naive version
942 PCastTo<sz>( UifU<sz>(vxx, vyy) )
943
sewardjd5204dc2004-12-31 01:16:11 +0000944 `DifD<sz>`
sewardje6f8af42005-07-06 18:48:59 +0000945
946 -- improvement term
947 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
sewardjd5204dc2004-12-31 01:16:11 +0000948 )
sewardje6f8af42005-07-06 18:48:59 +0000949
sewardjd5204dc2004-12-31 01:16:11 +0000950 where
951 vec contains 0 (defined) bits where the corresponding arg bits
sewardje6f8af42005-07-06 18:48:59 +0000952 are defined but different, and 1 bits otherwise.
sewardjd5204dc2004-12-31 01:16:11 +0000953
sewardje6f8af42005-07-06 18:48:59 +0000954 vec = Or<sz>( vxx, // 0 iff bit defined
955 vyy, // 0 iff bit defined
956 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
957 )
958
959 If any bit of vec is 0, the result is defined and so the
960 improvement term should produce 0...0, else it should produce
961 1...1.
962
963 Hence require for the improvement term:
964
965 if vec == 1...1 then 1...1 else 0...0
966 ->
967 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
968
969 This was extensively re-analysed and checked on 6 July 05.
sewardjd5204dc2004-12-31 01:16:11 +0000970*/
971static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
972 IRType ty,
973 IRAtom* vxx, IRAtom* vyy,
974 IRAtom* xx, IRAtom* yy )
975{
sewardje6f8af42005-07-06 18:48:59 +0000976 IRAtom *naive, *vec, *improvement_term;
977 IRAtom *improved, *final_cast, *top;
978 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
sewardjd5204dc2004-12-31 01:16:11 +0000979
980 tl_assert(isShadowAtom(mce,vxx));
981 tl_assert(isShadowAtom(mce,vyy));
982 tl_assert(isOriginalAtom(mce,xx));
983 tl_assert(isOriginalAtom(mce,yy));
984 tl_assert(sameKindedAtoms(vxx,xx));
985 tl_assert(sameKindedAtoms(vyy,yy));
986
987 switch (ty) {
sewardj4cfa81b2012-11-08 10:58:16 +0000988 case Ity_I16:
989 opOR = Iop_Or16;
990 opDIFD = Iop_And16;
991 opUIFU = Iop_Or16;
992 opNOT = Iop_Not16;
993 opXOR = Iop_Xor16;
994 opCMP = Iop_CmpEQ16;
995 top = mkU16(0xFFFF);
996 break;
sewardjd5204dc2004-12-31 01:16:11 +0000997 case Ity_I32:
sewardje6f8af42005-07-06 18:48:59 +0000998 opOR = Iop_Or32;
sewardjd5204dc2004-12-31 01:16:11 +0000999 opDIFD = Iop_And32;
1000 opUIFU = Iop_Or32;
1001 opNOT = Iop_Not32;
1002 opXOR = Iop_Xor32;
1003 opCMP = Iop_CmpEQ32;
1004 top = mkU32(0xFFFFFFFF);
1005 break;
tomcd986332005-04-26 07:44:48 +00001006 case Ity_I64:
sewardje6f8af42005-07-06 18:48:59 +00001007 opOR = Iop_Or64;
tomcd986332005-04-26 07:44:48 +00001008 opDIFD = Iop_And64;
1009 opUIFU = Iop_Or64;
1010 opNOT = Iop_Not64;
1011 opXOR = Iop_Xor64;
1012 opCMP = Iop_CmpEQ64;
sewardj37c31cc2005-04-26 23:49:24 +00001013 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
tomcd986332005-04-26 07:44:48 +00001014 break;
sewardjd5204dc2004-12-31 01:16:11 +00001015 default:
1016 VG_(tool_panic)("expensiveCmpEQorNE");
1017 }
1018
1019 naive
sewardj7cf4e6b2008-05-01 20:24:26 +00001020 = mkPCastTo(mce,ty,
1021 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
sewardjd5204dc2004-12-31 01:16:11 +00001022
1023 vec
1024 = assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001025 'V', mce,ty,
sewardje6f8af42005-07-06 18:48:59 +00001026 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001027 assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
sewardjd5204dc2004-12-31 01:16:11 +00001028 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001029 'V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001030 unop( opNOT,
sewardj7cf4e6b2008-05-01 20:24:26 +00001031 assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
sewardjd5204dc2004-12-31 01:16:11 +00001032
sewardje6f8af42005-07-06 18:48:59 +00001033 improvement_term
sewardj7cf4e6b2008-05-01 20:24:26 +00001034 = mkPCastTo( mce,ty,
1035 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
sewardjd5204dc2004-12-31 01:16:11 +00001036
1037 improved
sewardj7cf4e6b2008-05-01 20:24:26 +00001038 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
sewardjd5204dc2004-12-31 01:16:11 +00001039
1040 final_cast
1041 = mkPCastTo( mce, Ity_I1, improved );
1042
1043 return final_cast;
1044}
1045
sewardj95448072004-11-22 20:19:51 +00001046
sewardj992dff92005-10-07 11:08:55 +00001047/* --------- Semi-accurate interpretation of CmpORD. --------- */
1048
1049/* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
1050
1051 CmpORD32S(x,y) = 1<<3 if x <s y
1052 = 1<<2 if x >s y
1053 = 1<<1 if x == y
1054
1055 and similarly the unsigned variant. The default interpretation is:
1056
1057 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
sewardj1bc82102005-12-23 00:16:24 +00001058 & (7<<1)
sewardj992dff92005-10-07 11:08:55 +00001059
1060 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
1061 are zero and therefore defined (viz, zero).
sewardja9e62a92005-10-07 12:13:21 +00001062
1063 Also deal with a special case better:
1064
1065 CmpORD32S(x,0)
1066
1067 Here, bit 3 (LT) of the result is a copy of the top bit of x and
1068 will be defined even if the rest of x isn't. In which case we do:
1069
1070 CmpORD32S#(x,x#,0,{impliedly 0}#)
sewardj1bc82102005-12-23 00:16:24 +00001071 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
1072 | (x# >>u 31) << 3 -- LT# = x#[31]
sewardja9e62a92005-10-07 12:13:21 +00001073
sewardj1bc82102005-12-23 00:16:24 +00001074 Analogous handling for CmpORD64{S,U}.
sewardj992dff92005-10-07 11:08:55 +00001075*/
sewardja9e62a92005-10-07 12:13:21 +00001076static Bool isZeroU32 ( IRAtom* e )
1077{
1078 return
1079 toBool( e->tag == Iex_Const
1080 && e->Iex.Const.con->tag == Ico_U32
1081 && e->Iex.Const.con->Ico.U32 == 0 );
1082}
1083
sewardj1bc82102005-12-23 00:16:24 +00001084static Bool isZeroU64 ( IRAtom* e )
sewardj992dff92005-10-07 11:08:55 +00001085{
sewardj1bc82102005-12-23 00:16:24 +00001086 return
1087 toBool( e->tag == Iex_Const
1088 && e->Iex.Const.con->tag == Ico_U64
1089 && e->Iex.Const.con->Ico.U64 == 0 );
1090}
1091
1092static IRAtom* doCmpORD ( MCEnv* mce,
1093 IROp cmp_op,
1094 IRAtom* xxhash, IRAtom* yyhash,
1095 IRAtom* xx, IRAtom* yy )
1096{
1097 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
1098 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
1099 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
1100 IROp opAND = m64 ? Iop_And64 : Iop_And32;
1101 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
1102 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
1103 IRType ty = m64 ? Ity_I64 : Ity_I32;
1104 Int width = m64 ? 64 : 32;
1105
1106 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
1107
1108 IRAtom* threeLeft1 = NULL;
1109 IRAtom* sevenLeft1 = NULL;
1110
sewardj992dff92005-10-07 11:08:55 +00001111 tl_assert(isShadowAtom(mce,xxhash));
1112 tl_assert(isShadowAtom(mce,yyhash));
1113 tl_assert(isOriginalAtom(mce,xx));
1114 tl_assert(isOriginalAtom(mce,yy));
1115 tl_assert(sameKindedAtoms(xxhash,xx));
1116 tl_assert(sameKindedAtoms(yyhash,yy));
sewardj1bc82102005-12-23 00:16:24 +00001117 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
1118 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
sewardj992dff92005-10-07 11:08:55 +00001119
sewardja9e62a92005-10-07 12:13:21 +00001120 if (0) {
1121 ppIROp(cmp_op); VG_(printf)(" ");
1122 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
1123 }
1124
sewardj1bc82102005-12-23 00:16:24 +00001125 if (syned && isZero(yy)) {
sewardja9e62a92005-10-07 12:13:21 +00001126 /* fancy interpretation */
1127 /* if yy is zero, then it must be fully defined (zero#). */
sewardj1bc82102005-12-23 00:16:24 +00001128 tl_assert(isZero(yyhash));
1129 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
sewardja9e62a92005-10-07 12:13:21 +00001130 return
1131 binop(
sewardj1bc82102005-12-23 00:16:24 +00001132 opOR,
sewardja9e62a92005-10-07 12:13:21 +00001133 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001134 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +00001135 binop(
sewardj1bc82102005-12-23 00:16:24 +00001136 opAND,
1137 mkPCastTo(mce,ty, xxhash),
1138 threeLeft1
sewardja9e62a92005-10-07 12:13:21 +00001139 )),
1140 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001141 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +00001142 binop(
sewardj1bc82102005-12-23 00:16:24 +00001143 opSHL,
sewardja9e62a92005-10-07 12:13:21 +00001144 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001145 'V', mce,ty,
sewardj1bc82102005-12-23 00:16:24 +00001146 binop(opSHR, xxhash, mkU8(width-1))),
sewardja9e62a92005-10-07 12:13:21 +00001147 mkU8(3)
1148 ))
1149 );
1150 } else {
1151 /* standard interpretation */
sewardj1bc82102005-12-23 00:16:24 +00001152 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
sewardja9e62a92005-10-07 12:13:21 +00001153 return
1154 binop(
sewardj1bc82102005-12-23 00:16:24 +00001155 opAND,
1156 mkPCastTo( mce,ty,
1157 mkUifU(mce,ty, xxhash,yyhash)),
1158 sevenLeft1
sewardja9e62a92005-10-07 12:13:21 +00001159 );
1160 }
sewardj992dff92005-10-07 11:08:55 +00001161}
1162
1163
sewardj95448072004-11-22 20:19:51 +00001164/*------------------------------------------------------------*/
1165/*--- Emit a test and complaint if something is undefined. ---*/
1166/*------------------------------------------------------------*/
1167
sewardj7cf4e6b2008-05-01 20:24:26 +00001168static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1169
1170
sewardj95448072004-11-22 20:19:51 +00001171/* Set the annotations on a dirty helper to indicate that the stack
1172 pointer and instruction pointers might be read. This is the
1173 behaviour of all 'emit-a-complaint' style functions we might
1174 call. */
1175
1176static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1177 di->nFxState = 2;
sewardj2eecb742012-06-01 16:11:41 +00001178 di->fxState[0].fx = Ifx_Read;
1179 di->fxState[0].offset = mce->layout->offset_SP;
1180 di->fxState[0].size = mce->layout->sizeof_SP;
1181 di->fxState[0].nRepeats = 0;
1182 di->fxState[0].repeatLen = 0;
1183 di->fxState[1].fx = Ifx_Read;
1184 di->fxState[1].offset = mce->layout->offset_IP;
1185 di->fxState[1].size = mce->layout->sizeof_IP;
1186 di->fxState[1].nRepeats = 0;
1187 di->fxState[1].repeatLen = 0;
sewardj95448072004-11-22 20:19:51 +00001188}
1189
1190
sewardjcafe5052013-01-17 14:24:35 +00001191/* Check the supplied *original* |atom| for undefinedness, and emit a
sewardj95448072004-11-22 20:19:51 +00001192 complaint if so. Once that happens, mark it as defined. This is
1193 possible because the atom is either a tmp or literal. If it's a
1194 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1195 be defined. In fact as mentioned above, we will have to allocate a
1196 new tmp to carry the new 'defined' shadow value, and update the
1197 original->tmp mapping accordingly; we cannot simply assign a new
sewardjcafe5052013-01-17 14:24:35 +00001198 value to an existing shadow tmp as this breaks SSAness.
1199
sewardjb9e6d242013-05-11 13:42:08 +00001200 The checks are performed, any resulting complaint emitted, and
1201 |atom|'s shadow temp set to 'defined', ONLY in the case that
1202 |guard| evaluates to True at run-time. If it evaluates to False
1203 then no action is performed. If |guard| is NULL (the usual case)
1204 then it is assumed to be always-true, and hence these actions are
1205 performed unconditionally.
1206
1207 This routine does not generate code to check the definedness of
1208 |guard|. The caller is assumed to have taken care of that already.
sewardj95448072004-11-22 20:19:51 +00001209*/
sewardjb9e6d242013-05-11 13:42:08 +00001210static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard )
sewardj95448072004-11-22 20:19:51 +00001211{
sewardj7cf97ee2004-11-28 14:25:01 +00001212 IRAtom* vatom;
1213 IRType ty;
1214 Int sz;
1215 IRDirty* di;
1216 IRAtom* cond;
sewardj7cf4e6b2008-05-01 20:24:26 +00001217 IRAtom* origin;
1218 void* fn;
florian6bd9dc12012-11-23 16:17:43 +00001219 const HChar* nm;
sewardj7cf4e6b2008-05-01 20:24:26 +00001220 IRExpr** args;
1221 Int nargs;
sewardj7cf97ee2004-11-28 14:25:01 +00001222
njn1d0825f2006-03-27 11:37:07 +00001223 // Don't do V bit tests if we're not reporting undefined value errors.
sewardj7cf4e6b2008-05-01 20:24:26 +00001224 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001225 return;
1226
sewardjb9e6d242013-05-11 13:42:08 +00001227 if (guard)
1228 tl_assert(isOriginalAtom(mce, guard));
1229
sewardj95448072004-11-22 20:19:51 +00001230 /* Since the original expression is atomic, there's no duplicated
1231 work generated by making multiple V-expressions for it. So we
1232 don't really care about the possibility that someone else may
1233 also create a V-interpretion for it. */
1234 tl_assert(isOriginalAtom(mce, atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001235 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001236 tl_assert(isShadowAtom(mce, vatom));
1237 tl_assert(sameKindedAtoms(atom, vatom));
1238
sewardj1c0ce7a2009-07-01 08:10:49 +00001239 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001240
1241 /* sz is only used for constructing the error message */
sewardj7cf97ee2004-11-28 14:25:01 +00001242 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001243
sewardj7cf97ee2004-11-28 14:25:01 +00001244 cond = mkPCastTo( mce, Ity_I1, vatom );
sewardj95448072004-11-22 20:19:51 +00001245 /* cond will be 0 if all defined, and 1 if any not defined. */
1246
sewardj7cf4e6b2008-05-01 20:24:26 +00001247 /* Get the origin info for the value we are about to check. At
1248 least, if we are doing origin tracking. If not, use a dummy
1249 zero origin. */
1250 if (MC_(clo_mc_level) == 3) {
1251 origin = schemeE( mce, atom );
1252 if (mce->hWordTy == Ity_I64) {
1253 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1254 }
1255 } else {
1256 origin = NULL;
1257 }
1258
1259 fn = NULL;
1260 nm = NULL;
1261 args = NULL;
1262 nargs = -1;
1263
sewardj95448072004-11-22 20:19:51 +00001264 switch (sz) {
1265 case 0:
sewardj7cf4e6b2008-05-01 20:24:26 +00001266 if (origin) {
1267 fn = &MC_(helperc_value_check0_fail_w_o);
1268 nm = "MC_(helperc_value_check0_fail_w_o)";
1269 args = mkIRExprVec_1(origin);
1270 nargs = 1;
1271 } else {
1272 fn = &MC_(helperc_value_check0_fail_no_o);
1273 nm = "MC_(helperc_value_check0_fail_no_o)";
1274 args = mkIRExprVec_0();
1275 nargs = 0;
1276 }
sewardj95448072004-11-22 20:19:51 +00001277 break;
1278 case 1:
sewardj7cf4e6b2008-05-01 20:24:26 +00001279 if (origin) {
1280 fn = &MC_(helperc_value_check1_fail_w_o);
1281 nm = "MC_(helperc_value_check1_fail_w_o)";
1282 args = mkIRExprVec_1(origin);
1283 nargs = 1;
1284 } else {
1285 fn = &MC_(helperc_value_check1_fail_no_o);
1286 nm = "MC_(helperc_value_check1_fail_no_o)";
1287 args = mkIRExprVec_0();
1288 nargs = 0;
1289 }
sewardj95448072004-11-22 20:19:51 +00001290 break;
1291 case 4:
sewardj7cf4e6b2008-05-01 20:24:26 +00001292 if (origin) {
1293 fn = &MC_(helperc_value_check4_fail_w_o);
1294 nm = "MC_(helperc_value_check4_fail_w_o)";
1295 args = mkIRExprVec_1(origin);
1296 nargs = 1;
1297 } else {
1298 fn = &MC_(helperc_value_check4_fail_no_o);
1299 nm = "MC_(helperc_value_check4_fail_no_o)";
1300 args = mkIRExprVec_0();
1301 nargs = 0;
1302 }
sewardj95448072004-11-22 20:19:51 +00001303 break;
sewardj11bcc4e2005-04-23 22:38:38 +00001304 case 8:
sewardj7cf4e6b2008-05-01 20:24:26 +00001305 if (origin) {
1306 fn = &MC_(helperc_value_check8_fail_w_o);
1307 nm = "MC_(helperc_value_check8_fail_w_o)";
1308 args = mkIRExprVec_1(origin);
1309 nargs = 1;
1310 } else {
1311 fn = &MC_(helperc_value_check8_fail_no_o);
1312 nm = "MC_(helperc_value_check8_fail_no_o)";
1313 args = mkIRExprVec_0();
1314 nargs = 0;
1315 }
sewardj11bcc4e2005-04-23 22:38:38 +00001316 break;
njn4c245e52009-03-15 23:25:38 +00001317 case 2:
1318 case 16:
sewardj7cf4e6b2008-05-01 20:24:26 +00001319 if (origin) {
1320 fn = &MC_(helperc_value_checkN_fail_w_o);
1321 nm = "MC_(helperc_value_checkN_fail_w_o)";
1322 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1323 nargs = 2;
1324 } else {
1325 fn = &MC_(helperc_value_checkN_fail_no_o);
1326 nm = "MC_(helperc_value_checkN_fail_no_o)";
1327 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1328 nargs = 1;
1329 }
sewardj95448072004-11-22 20:19:51 +00001330 break;
njn4c245e52009-03-15 23:25:38 +00001331 default:
1332 VG_(tool_panic)("unexpected szB");
sewardj95448072004-11-22 20:19:51 +00001333 }
sewardj7cf4e6b2008-05-01 20:24:26 +00001334
1335 tl_assert(fn);
1336 tl_assert(nm);
1337 tl_assert(args);
1338 tl_assert(nargs >= 0 && nargs <= 2);
1339 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1340 || (MC_(clo_mc_level) == 2 && origin == NULL) );
1341
1342 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1343 VG_(fnptr_to_fnentry)( fn ), args );
sewardjb9e6d242013-05-11 13:42:08 +00001344 di->guard = cond; // and cond is PCast-to-1(atom#)
1345
1346 /* If the complaint is to be issued under a guard condition, AND
1347 that into the guard condition for the helper call. */
1348 if (guard) {
1349 IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard));
1350 IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard));
1351 IRAtom *e = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2));
1352 di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e));
1353 }
florian434ffae2012-07-19 17:23:42 +00001354
sewardj95448072004-11-22 20:19:51 +00001355 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00001356 stmt( 'V', mce, IRStmt_Dirty(di));
sewardj95448072004-11-22 20:19:51 +00001357
sewardjb9e6d242013-05-11 13:42:08 +00001358 /* If |atom| is shadowed by an IRTemp, set the shadow tmp to be
1359 defined -- but only in the case where the guard evaluates to
1360 True at run-time. Do the update by setting the orig->shadow
1361 mapping for tmp to reflect the fact that this shadow is getting
1362 a new value. */
sewardj710d6c22005-03-20 18:55:15 +00001363 tl_assert(isIRAtom(vatom));
sewardj95448072004-11-22 20:19:51 +00001364 /* sameKindedAtoms ... */
sewardj0b9d74a2006-12-24 02:24:11 +00001365 if (vatom->tag == Iex_RdTmp) {
1366 tl_assert(atom->tag == Iex_RdTmp);
sewardjb9e6d242013-05-11 13:42:08 +00001367 if (guard == NULL) {
1368 // guard is 'always True', hence update unconditionally
1369 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1370 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1371 definedOfType(ty));
1372 } else {
1373 // update the temp only conditionally. Do this by copying
1374 // its old value when the guard is False.
1375 // The old value ..
1376 IRTemp old_tmpV = findShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1377 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1378 IRAtom* new_tmpV
1379 = assignNew('V', mce, shadowTypeV(ty),
1380 IRExpr_ITE(guard, definedOfType(ty),
1381 mkexpr(old_tmpV)));
1382 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), new_tmpV);
1383 }
sewardj95448072004-11-22 20:19:51 +00001384 }
1385}
1386
1387
1388/*------------------------------------------------------------*/
1389/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1390/*------------------------------------------------------------*/
1391
1392/* Examine the always-defined sections declared in layout to see if
1393 the (offset,size) section is within one. Note, is is an error to
1394 partially fall into such a region: (offset,size) should either be
1395 completely in such a region or completely not-in such a region.
1396*/
1397static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1398{
1399 Int minoffD, maxoffD, i;
1400 Int minoff = offset;
1401 Int maxoff = minoff + size - 1;
1402 tl_assert((minoff & ~0xFFFF) == 0);
1403 tl_assert((maxoff & ~0xFFFF) == 0);
1404
1405 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1406 minoffD = mce->layout->alwaysDefd[i].offset;
1407 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1408 tl_assert((minoffD & ~0xFFFF) == 0);
1409 tl_assert((maxoffD & ~0xFFFF) == 0);
1410
1411 if (maxoff < minoffD || maxoffD < minoff)
1412 continue; /* no overlap */
1413 if (minoff >= minoffD && maxoff <= maxoffD)
1414 return True; /* completely contained in an always-defd section */
1415
1416 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1417 }
1418 return False; /* could not find any containing section */
1419}
1420
1421
1422/* Generate into bb suitable actions to shadow this Put. If the state
1423 slice is marked 'always defined', do nothing. Otherwise, write the
1424 supplied V bits to the shadow state. We can pass in either an
1425 original atom or a V-atom, but not both. In the former case the
1426 relevant V-bits are then generated from the original.
florian434ffae2012-07-19 17:23:42 +00001427 We assume here, that the definedness of GUARD has already been checked.
sewardj95448072004-11-22 20:19:51 +00001428*/
1429static
1430void do_shadow_PUT ( MCEnv* mce, Int offset,
florian434ffae2012-07-19 17:23:42 +00001431 IRAtom* atom, IRAtom* vatom, IRExpr *guard )
sewardj95448072004-11-22 20:19:51 +00001432{
sewardj7cf97ee2004-11-28 14:25:01 +00001433 IRType ty;
njn1d0825f2006-03-27 11:37:07 +00001434
1435 // Don't do shadow PUTs if we're not doing undefined value checking.
1436 // Their absence lets Vex's optimiser remove all the shadow computation
1437 // that they depend on, which includes GETs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001438 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001439 return;
1440
sewardj95448072004-11-22 20:19:51 +00001441 if (atom) {
1442 tl_assert(!vatom);
1443 tl_assert(isOriginalAtom(mce, atom));
1444 vatom = expr2vbits( mce, atom );
1445 } else {
1446 tl_assert(vatom);
1447 tl_assert(isShadowAtom(mce, vatom));
1448 }
1449
sewardj1c0ce7a2009-07-01 08:10:49 +00001450 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001451 tl_assert(ty != Ity_I1);
sewardjb5b87402011-03-07 16:05:35 +00001452 tl_assert(ty != Ity_I128);
sewardj95448072004-11-22 20:19:51 +00001453 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1454 /* later: no ... */
1455 /* emit code to emit a complaint if any of the vbits are 1. */
1456 /* complainIfUndefined(mce, atom); */
1457 } else {
1458 /* Do a plain shadow Put. */
florian434ffae2012-07-19 17:23:42 +00001459 if (guard) {
1460 /* If the guard expression evaluates to false we simply Put the value
1461 that is already stored in the guest state slot */
1462 IRAtom *cond, *iffalse;
1463
sewardjcc961652013-01-26 11:49:15 +00001464 cond = assignNew('V', mce, Ity_I1, guard);
florian434ffae2012-07-19 17:23:42 +00001465 iffalse = assignNew('V', mce, ty,
1466 IRExpr_Get(offset + mce->layout->total_sizeB, ty));
florian5686b2d2013-01-29 03:57:40 +00001467 vatom = assignNew('V', mce, ty, IRExpr_ITE(cond, vatom, iffalse));
florian434ffae2012-07-19 17:23:42 +00001468 }
1469 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ));
sewardj95448072004-11-22 20:19:51 +00001470 }
1471}
1472
1473
1474/* Return an expression which contains the V bits corresponding to the
1475 given GETI (passed in in pieces).
1476*/
1477static
floriand39b0222012-05-31 15:48:13 +00001478void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti)
sewardj95448072004-11-22 20:19:51 +00001479{
sewardj7cf97ee2004-11-28 14:25:01 +00001480 IRAtom* vatom;
1481 IRType ty, tyS;
1482 Int arrSize;;
floriand39b0222012-05-31 15:48:13 +00001483 IRRegArray* descr = puti->descr;
1484 IRAtom* ix = puti->ix;
1485 Int bias = puti->bias;
1486 IRAtom* atom = puti->data;
sewardj7cf97ee2004-11-28 14:25:01 +00001487
njn1d0825f2006-03-27 11:37:07 +00001488 // Don't do shadow PUTIs if we're not doing undefined value checking.
1489 // Their absence lets Vex's optimiser remove all the shadow computation
1490 // that they depend on, which includes GETIs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001491 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001492 return;
1493
sewardj95448072004-11-22 20:19:51 +00001494 tl_assert(isOriginalAtom(mce,atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001495 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001496 tl_assert(sameKindedAtoms(atom, vatom));
sewardj7cf97ee2004-11-28 14:25:01 +00001497 ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001498 tyS = shadowTypeV(ty);
sewardj7cf97ee2004-11-28 14:25:01 +00001499 arrSize = descr->nElems * sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001500 tl_assert(ty != Ity_I1);
1501 tl_assert(isOriginalAtom(mce,ix));
sewardjb9e6d242013-05-11 13:42:08 +00001502 complainIfUndefined(mce, ix, NULL);
sewardj95448072004-11-22 20:19:51 +00001503 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1504 /* later: no ... */
1505 /* emit code to emit a complaint if any of the vbits are 1. */
1506 /* complainIfUndefined(mce, atom); */
1507 } else {
1508 /* Do a cloned version of the Put that refers to the shadow
1509 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001510 IRRegArray* new_descr
1511 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1512 tyS, descr->nElems);
floriand39b0222012-05-31 15:48:13 +00001513 stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) ));
sewardj95448072004-11-22 20:19:51 +00001514 }
1515}
1516
1517
1518/* Return an expression which contains the V bits corresponding to the
1519 given GET (passed in in pieces).
1520*/
1521static
1522IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1523{
sewardj7cf4e6b2008-05-01 20:24:26 +00001524 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001525 tl_assert(ty != Ity_I1);
sewardjb5b87402011-03-07 16:05:35 +00001526 tl_assert(ty != Ity_I128);
sewardj95448072004-11-22 20:19:51 +00001527 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1528 /* Always defined, return all zeroes of the relevant type */
1529 return definedOfType(tyS);
1530 } else {
1531 /* return a cloned version of the Get that refers to the shadow
1532 area. */
sewardj7cf4e6b2008-05-01 20:24:26 +00001533 /* FIXME: this isn't an atom! */
sewardj95448072004-11-22 20:19:51 +00001534 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1535 }
1536}
1537
1538
1539/* Return an expression which contains the V bits corresponding to the
1540 given GETI (passed in in pieces).
1541*/
1542static
sewardj0b9d74a2006-12-24 02:24:11 +00001543IRExpr* shadow_GETI ( MCEnv* mce,
1544 IRRegArray* descr, IRAtom* ix, Int bias )
sewardj95448072004-11-22 20:19:51 +00001545{
1546 IRType ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001547 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001548 Int arrSize = descr->nElems * sizeofIRType(ty);
1549 tl_assert(ty != Ity_I1);
1550 tl_assert(isOriginalAtom(mce,ix));
sewardjb9e6d242013-05-11 13:42:08 +00001551 complainIfUndefined(mce, ix, NULL);
sewardj95448072004-11-22 20:19:51 +00001552 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1553 /* Always defined, return all zeroes of the relevant type */
1554 return definedOfType(tyS);
1555 } else {
1556 /* return a cloned version of the Get that refers to the shadow
1557 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001558 IRRegArray* new_descr
1559 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1560 tyS, descr->nElems);
sewardj95448072004-11-22 20:19:51 +00001561 return IRExpr_GetI( new_descr, ix, bias );
1562 }
1563}
1564
1565
1566/*------------------------------------------------------------*/
1567/*--- Generating approximations for unknown operations, ---*/
1568/*--- using lazy-propagate semantics ---*/
1569/*------------------------------------------------------------*/
1570
1571/* Lazy propagation of undefinedness from two values, resulting in the
1572 specified shadow type.
1573*/
1574static
1575IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1576{
sewardj95448072004-11-22 20:19:51 +00001577 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001578 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1579 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
sewardj95448072004-11-22 20:19:51 +00001580 tl_assert(isShadowAtom(mce,va1));
1581 tl_assert(isShadowAtom(mce,va2));
sewardj37c31cc2005-04-26 23:49:24 +00001582
1583 /* The general case is inefficient because PCast is an expensive
1584 operation. Here are some special cases which use PCast only
1585 once rather than twice. */
1586
1587 /* I64 x I64 -> I64 */
1588 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1589 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1590 at = mkUifU(mce, Ity_I64, va1, va2);
1591 at = mkPCastTo(mce, Ity_I64, at);
1592 return at;
1593 }
1594
1595 /* I64 x I64 -> I32 */
1596 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1597 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1598 at = mkUifU(mce, Ity_I64, va1, va2);
1599 at = mkPCastTo(mce, Ity_I32, at);
1600 return at;
1601 }
1602
1603 if (0) {
1604 VG_(printf)("mkLazy2 ");
1605 ppIRType(t1);
1606 VG_(printf)("_");
1607 ppIRType(t2);
1608 VG_(printf)("_");
1609 ppIRType(finalVty);
1610 VG_(printf)("\n");
1611 }
1612
1613 /* General case: force everything via 32-bit intermediaries. */
sewardj95448072004-11-22 20:19:51 +00001614 at = mkPCastTo(mce, Ity_I32, va1);
1615 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1616 at = mkPCastTo(mce, finalVty, at);
1617 return at;
1618}
1619
1620
sewardjed69fdb2006-02-03 16:12:27 +00001621/* 3-arg version of the above. */
1622static
1623IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1624 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1625{
1626 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001627 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1628 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1629 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
sewardjed69fdb2006-02-03 16:12:27 +00001630 tl_assert(isShadowAtom(mce,va1));
1631 tl_assert(isShadowAtom(mce,va2));
1632 tl_assert(isShadowAtom(mce,va3));
1633
1634 /* The general case is inefficient because PCast is an expensive
1635 operation. Here are some special cases which use PCast only
1636 twice rather than three times. */
1637
1638 /* I32 x I64 x I64 -> I64 */
1639 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1640 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1641 && finalVty == Ity_I64) {
1642 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1643 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1644 mode indication which is fully defined, this should get
1645 folded out later. */
1646 at = mkPCastTo(mce, Ity_I64, va1);
1647 /* Now fold in 2nd and 3rd args. */
1648 at = mkUifU(mce, Ity_I64, at, va2);
1649 at = mkUifU(mce, Ity_I64, at, va3);
1650 /* and PCast once again. */
1651 at = mkPCastTo(mce, Ity_I64, at);
1652 return at;
1653 }
1654
carllfb583cb2013-01-22 20:26:34 +00001655 /* I32 x I8 x I64 -> I64 */
1656 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I64
1657 && finalVty == Ity_I64) {
1658 if (0) VG_(printf)("mkLazy3: I32 x I8 x I64 -> I64\n");
1659 /* Widen 1st and 2nd args to I64. Since 1st arg is typically a
1660 * rounding mode indication which is fully defined, this should
1661 * get folded out later.
1662 */
1663 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1664 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
1665 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
1666 at = mkUifU(mce, Ity_I64, at, va3);
1667 /* and PCast once again. */
1668 at = mkPCastTo(mce, Ity_I64, at);
1669 return at;
1670 }
1671
sewardj453e8f82006-02-09 03:25:06 +00001672 /* I32 x I64 x I64 -> I32 */
1673 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1674 && finalVty == Ity_I32) {
sewardj59570ff2010-01-01 11:59:33 +00001675 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
sewardj453e8f82006-02-09 03:25:06 +00001676 at = mkPCastTo(mce, Ity_I64, va1);
1677 at = mkUifU(mce, Ity_I64, at, va2);
1678 at = mkUifU(mce, Ity_I64, at, va3);
1679 at = mkPCastTo(mce, Ity_I32, at);
1680 return at;
1681 }
1682
sewardj59570ff2010-01-01 11:59:33 +00001683 /* I32 x I32 x I32 -> I32 */
1684 /* 32-bit FP idiom, as (eg) happens on ARM */
1685 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1686 && finalVty == Ity_I32) {
1687 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1688 at = va1;
1689 at = mkUifU(mce, Ity_I32, at, va2);
1690 at = mkUifU(mce, Ity_I32, at, va3);
1691 at = mkPCastTo(mce, Ity_I32, at);
1692 return at;
1693 }
1694
sewardjb5b87402011-03-07 16:05:35 +00001695 /* I32 x I128 x I128 -> I128 */
1696 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1697 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1698 && finalVty == Ity_I128) {
1699 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1700 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1701 mode indication which is fully defined, this should get
1702 folded out later. */
1703 at = mkPCastTo(mce, Ity_I128, va1);
1704 /* Now fold in 2nd and 3rd args. */
1705 at = mkUifU(mce, Ity_I128, at, va2);
1706 at = mkUifU(mce, Ity_I128, at, va3);
1707 /* and PCast once again. */
1708 at = mkPCastTo(mce, Ity_I128, at);
1709 return at;
1710 }
carllfb583cb2013-01-22 20:26:34 +00001711
1712 /* I32 x I8 x I128 -> I128 */
1713 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1714 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I128
1715 && finalVty == Ity_I128) {
1716 if (0) VG_(printf)("mkLazy3: I32 x I8 x I128 -> I128\n");
sewardja28c43c2013-01-29 17:18:56 +00001717 /* Use I64 as an intermediate type, which means PCasting all 3
1718 args to I64 to start with. 1st arg is typically a rounding
1719 mode indication which is fully defined, so we hope that it
1720 will get folded out later. */
carllfb583cb2013-01-22 20:26:34 +00001721 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1722 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
sewardja28c43c2013-01-29 17:18:56 +00001723 IRAtom* at3 = mkPCastTo(mce, Ity_I64, va3);
1724 /* Now UifU all three together. */
carllfb583cb2013-01-22 20:26:34 +00001725 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
sewardja28c43c2013-01-29 17:18:56 +00001726 at = mkUifU(mce, Ity_I64, at, at3); // ... `UifU` PCast(va3)
carllfb583cb2013-01-22 20:26:34 +00001727 /* and PCast once again. */
1728 at = mkPCastTo(mce, Ity_I128, at);
1729 return at;
1730 }
sewardj453e8f82006-02-09 03:25:06 +00001731 if (1) {
1732 VG_(printf)("mkLazy3: ");
sewardjed69fdb2006-02-03 16:12:27 +00001733 ppIRType(t1);
sewardj453e8f82006-02-09 03:25:06 +00001734 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001735 ppIRType(t2);
sewardj453e8f82006-02-09 03:25:06 +00001736 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001737 ppIRType(t3);
sewardj453e8f82006-02-09 03:25:06 +00001738 VG_(printf)(" -> ");
sewardjed69fdb2006-02-03 16:12:27 +00001739 ppIRType(finalVty);
1740 VG_(printf)("\n");
1741 }
1742
sewardj453e8f82006-02-09 03:25:06 +00001743 tl_assert(0);
sewardjed69fdb2006-02-03 16:12:27 +00001744 /* General case: force everything via 32-bit intermediaries. */
sewardj453e8f82006-02-09 03:25:06 +00001745 /*
sewardjed69fdb2006-02-03 16:12:27 +00001746 at = mkPCastTo(mce, Ity_I32, va1);
1747 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1748 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1749 at = mkPCastTo(mce, finalVty, at);
1750 return at;
sewardj453e8f82006-02-09 03:25:06 +00001751 */
sewardjed69fdb2006-02-03 16:12:27 +00001752}
1753
1754
sewardje91cea72006-02-08 19:32:02 +00001755/* 4-arg version of the above. */
1756static
1757IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1758 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1759{
1760 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001761 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1762 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1763 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1764 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
sewardje91cea72006-02-08 19:32:02 +00001765 tl_assert(isShadowAtom(mce,va1));
1766 tl_assert(isShadowAtom(mce,va2));
1767 tl_assert(isShadowAtom(mce,va3));
1768 tl_assert(isShadowAtom(mce,va4));
1769
1770 /* The general case is inefficient because PCast is an expensive
1771 operation. Here are some special cases which use PCast only
1772 twice rather than three times. */
1773
1774 /* I32 x I64 x I64 x I64 -> I64 */
1775 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1776 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1777 && finalVty == Ity_I64) {
1778 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1779 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1780 mode indication which is fully defined, this should get
1781 folded out later. */
1782 at = mkPCastTo(mce, Ity_I64, va1);
1783 /* Now fold in 2nd, 3rd, 4th args. */
1784 at = mkUifU(mce, Ity_I64, at, va2);
1785 at = mkUifU(mce, Ity_I64, at, va3);
1786 at = mkUifU(mce, Ity_I64, at, va4);
1787 /* and PCast once again. */
1788 at = mkPCastTo(mce, Ity_I64, at);
1789 return at;
1790 }
sewardjb5b87402011-03-07 16:05:35 +00001791 /* I32 x I32 x I32 x I32 -> I32 */
1792 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1793 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
1794 && finalVty == Ity_I32) {
1795 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
1796 at = va1;
1797 /* Now fold in 2nd, 3rd, 4th args. */
1798 at = mkUifU(mce, Ity_I32, at, va2);
1799 at = mkUifU(mce, Ity_I32, at, va3);
1800 at = mkUifU(mce, Ity_I32, at, va4);
1801 at = mkPCastTo(mce, Ity_I32, at);
1802 return at;
1803 }
sewardje91cea72006-02-08 19:32:02 +00001804
1805 if (1) {
sewardj453e8f82006-02-09 03:25:06 +00001806 VG_(printf)("mkLazy4: ");
sewardje91cea72006-02-08 19:32:02 +00001807 ppIRType(t1);
1808 VG_(printf)(" x ");
1809 ppIRType(t2);
1810 VG_(printf)(" x ");
1811 ppIRType(t3);
1812 VG_(printf)(" x ");
1813 ppIRType(t4);
1814 VG_(printf)(" -> ");
1815 ppIRType(finalVty);
1816 VG_(printf)("\n");
1817 }
1818
1819 tl_assert(0);
1820}
1821
1822
sewardj95448072004-11-22 20:19:51 +00001823/* Do the lazy propagation game from a null-terminated vector of
1824 atoms. This is presumably the arguments to a helper call, so the
1825 IRCallee info is also supplied in order that we can know which
1826 arguments should be ignored (via the .mcx_mask field).
1827*/
1828static
1829IRAtom* mkLazyN ( MCEnv* mce,
1830 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1831{
sewardj4cc684b2007-08-25 23:09:36 +00001832 Int i;
sewardj95448072004-11-22 20:19:51 +00001833 IRAtom* here;
sewardj4cc684b2007-08-25 23:09:36 +00001834 IRAtom* curr;
1835 IRType mergeTy;
sewardj99430032011-05-04 09:09:31 +00001836 Bool mergeTy64 = True;
sewardj4cc684b2007-08-25 23:09:36 +00001837
1838 /* Decide on the type of the merge intermediary. If all relevant
1839 args are I64, then it's I64. In all other circumstances, use
1840 I32. */
1841 for (i = 0; exprvec[i]; i++) {
1842 tl_assert(i < 32);
1843 tl_assert(isOriginalAtom(mce, exprvec[i]));
1844 if (cee->mcx_mask & (1<<i))
1845 continue;
sewardj1c0ce7a2009-07-01 08:10:49 +00001846 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
sewardj4cc684b2007-08-25 23:09:36 +00001847 mergeTy64 = False;
1848 }
1849
1850 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
1851 curr = definedOfType(mergeTy);
1852
sewardj95448072004-11-22 20:19:51 +00001853 for (i = 0; exprvec[i]; i++) {
1854 tl_assert(i < 32);
1855 tl_assert(isOriginalAtom(mce, exprvec[i]));
1856 /* Only take notice of this arg if the callee's mc-exclusion
1857 mask does not say it is to be excluded. */
1858 if (cee->mcx_mask & (1<<i)) {
1859 /* the arg is to be excluded from definedness checking. Do
1860 nothing. */
1861 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1862 } else {
1863 /* calculate the arg's definedness, and pessimistically merge
1864 it in. */
sewardj4cc684b2007-08-25 23:09:36 +00001865 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1866 curr = mergeTy64
1867 ? mkUifU64(mce, here, curr)
1868 : mkUifU32(mce, here, curr);
sewardj95448072004-11-22 20:19:51 +00001869 }
1870 }
1871 return mkPCastTo(mce, finalVtype, curr );
1872}
1873
1874
1875/*------------------------------------------------------------*/
1876/*--- Generating expensive sequences for exact carry-chain ---*/
1877/*--- propagation in add/sub and related operations. ---*/
1878/*------------------------------------------------------------*/
1879
1880static
sewardjd5204dc2004-12-31 01:16:11 +00001881IRAtom* expensiveAddSub ( MCEnv* mce,
1882 Bool add,
1883 IRType ty,
1884 IRAtom* qaa, IRAtom* qbb,
1885 IRAtom* aa, IRAtom* bb )
sewardj95448072004-11-22 20:19:51 +00001886{
sewardj7cf97ee2004-11-28 14:25:01 +00001887 IRAtom *a_min, *b_min, *a_max, *b_max;
sewardjd5204dc2004-12-31 01:16:11 +00001888 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
sewardj7cf97ee2004-11-28 14:25:01 +00001889
sewardj95448072004-11-22 20:19:51 +00001890 tl_assert(isShadowAtom(mce,qaa));
1891 tl_assert(isShadowAtom(mce,qbb));
1892 tl_assert(isOriginalAtom(mce,aa));
1893 tl_assert(isOriginalAtom(mce,bb));
1894 tl_assert(sameKindedAtoms(qaa,aa));
1895 tl_assert(sameKindedAtoms(qbb,bb));
1896
sewardjd5204dc2004-12-31 01:16:11 +00001897 switch (ty) {
1898 case Ity_I32:
1899 opAND = Iop_And32;
1900 opOR = Iop_Or32;
1901 opXOR = Iop_Xor32;
1902 opNOT = Iop_Not32;
1903 opADD = Iop_Add32;
1904 opSUB = Iop_Sub32;
1905 break;
tomd9774d72005-06-27 08:11:01 +00001906 case Ity_I64:
1907 opAND = Iop_And64;
1908 opOR = Iop_Or64;
1909 opXOR = Iop_Xor64;
1910 opNOT = Iop_Not64;
1911 opADD = Iop_Add64;
1912 opSUB = Iop_Sub64;
1913 break;
sewardjd5204dc2004-12-31 01:16:11 +00001914 default:
1915 VG_(tool_panic)("expensiveAddSub");
1916 }
sewardj95448072004-11-22 20:19:51 +00001917
1918 // a_min = aa & ~qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001919 a_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001920 binop(opAND, aa,
sewardj7cf4e6b2008-05-01 20:24:26 +00001921 assignNew('V', mce,ty, unop(opNOT, qaa))));
sewardj95448072004-11-22 20:19:51 +00001922
1923 // b_min = bb & ~qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001924 b_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001925 binop(opAND, bb,
sewardj7cf4e6b2008-05-01 20:24:26 +00001926 assignNew('V', mce,ty, unop(opNOT, qbb))));
sewardj95448072004-11-22 20:19:51 +00001927
1928 // a_max = aa | qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001929 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
sewardj95448072004-11-22 20:19:51 +00001930
1931 // b_max = bb | qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001932 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
sewardj95448072004-11-22 20:19:51 +00001933
sewardjd5204dc2004-12-31 01:16:11 +00001934 if (add) {
1935 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1936 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001937 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001938 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001939 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1940 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001941 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001942 assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1943 assignNew('V', mce,ty, binop(opADD, a_max, b_max))
sewardjd5204dc2004-12-31 01:16:11 +00001944 )
sewardj95448072004-11-22 20:19:51 +00001945 )
sewardjd5204dc2004-12-31 01:16:11 +00001946 )
1947 );
1948 } else {
1949 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1950 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001951 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001952 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001953 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1954 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001955 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001956 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1957 assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
sewardjd5204dc2004-12-31 01:16:11 +00001958 )
1959 )
1960 )
1961 );
1962 }
1963
sewardj95448072004-11-22 20:19:51 +00001964}
1965
1966
sewardj4cfa81b2012-11-08 10:58:16 +00001967static
1968IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
1969 IRAtom* atom, IRAtom* vatom )
1970{
1971 IRType ty;
1972 IROp xorOp, subOp, andOp;
1973 IRExpr *one;
1974 IRAtom *improver, *improved;
1975 tl_assert(isShadowAtom(mce,vatom));
1976 tl_assert(isOriginalAtom(mce,atom));
1977 tl_assert(sameKindedAtoms(atom,vatom));
1978
1979 switch (czop) {
1980 case Iop_Ctz32:
1981 ty = Ity_I32;
1982 xorOp = Iop_Xor32;
1983 subOp = Iop_Sub32;
1984 andOp = Iop_And32;
1985 one = mkU32(1);
1986 break;
1987 case Iop_Ctz64:
1988 ty = Ity_I64;
1989 xorOp = Iop_Xor64;
1990 subOp = Iop_Sub64;
1991 andOp = Iop_And64;
1992 one = mkU64(1);
1993 break;
1994 default:
1995 ppIROp(czop);
1996 VG_(tool_panic)("memcheck:expensiveCountTrailingZeroes");
1997 }
1998
1999 // improver = atom ^ (atom - 1)
2000 //
2001 // That is, improver has its low ctz(atom) bits equal to one;
2002 // higher bits (if any) equal to zero.
2003 improver = assignNew('V', mce,ty,
2004 binop(xorOp,
2005 atom,
2006 assignNew('V', mce, ty,
2007 binop(subOp, atom, one))));
2008
2009 // improved = vatom & improver
2010 //
2011 // That is, treat any V bits above the first ctz(atom) bits as
2012 // "defined".
2013 improved = assignNew('V', mce, ty,
2014 binop(andOp, vatom, improver));
2015
2016 // Return pessimizing cast of improved.
2017 return mkPCastTo(mce, ty, improved);
2018}
2019
2020
sewardj95448072004-11-22 20:19:51 +00002021/*------------------------------------------------------------*/
sewardjaaddbc22005-10-07 09:49:53 +00002022/*--- Scalar shifts. ---*/
2023/*------------------------------------------------------------*/
2024
2025/* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
2026 idea is to shift the definedness bits by the original shift amount.
2027 This introduces 0s ("defined") in new positions for left shifts and
2028 unsigned right shifts, and copies the top definedness bit for
2029 signed right shifts. So, conveniently, applying the original shift
2030 operator to the definedness bits for the left arg is exactly the
2031 right thing to do:
2032
2033 (qaa << bb)
2034
2035 However if the shift amount is undefined then the whole result
2036 is undefined. Hence need:
2037
2038 (qaa << bb) `UifU` PCast(qbb)
2039
2040 If the shift amount bb is a literal than qbb will say 'all defined'
2041 and the UifU and PCast will get folded out by post-instrumentation
2042 optimisation.
2043*/
2044static IRAtom* scalarShift ( MCEnv* mce,
2045 IRType ty,
2046 IROp original_op,
2047 IRAtom* qaa, IRAtom* qbb,
2048 IRAtom* aa, IRAtom* bb )
2049{
2050 tl_assert(isShadowAtom(mce,qaa));
2051 tl_assert(isShadowAtom(mce,qbb));
2052 tl_assert(isOriginalAtom(mce,aa));
2053 tl_assert(isOriginalAtom(mce,bb));
2054 tl_assert(sameKindedAtoms(qaa,aa));
2055 tl_assert(sameKindedAtoms(qbb,bb));
2056 return
2057 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00002058 'V', mce, ty,
sewardjaaddbc22005-10-07 09:49:53 +00002059 mkUifU( mce, ty,
sewardj7cf4e6b2008-05-01 20:24:26 +00002060 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
sewardjaaddbc22005-10-07 09:49:53 +00002061 mkPCastTo(mce, ty, qbb)
2062 )
2063 );
2064}
2065
2066
2067/*------------------------------------------------------------*/
2068/*--- Helpers for dealing with vector primops. ---*/
sewardj3245c912004-12-10 14:58:26 +00002069/*------------------------------------------------------------*/
2070
sewardja1d93302004-12-12 16:45:06 +00002071/* Vector pessimisation -- pessimise within each lane individually. */
2072
2073static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
2074{
sewardj7cf4e6b2008-05-01 20:24:26 +00002075 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
sewardja1d93302004-12-12 16:45:06 +00002076}
2077
2078static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
2079{
sewardj7cf4e6b2008-05-01 20:24:26 +00002080 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
sewardja1d93302004-12-12 16:45:06 +00002081}
2082
2083static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
2084{
sewardj7cf4e6b2008-05-01 20:24:26 +00002085 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
sewardja1d93302004-12-12 16:45:06 +00002086}
2087
2088static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
2089{
sewardj7cf4e6b2008-05-01 20:24:26 +00002090 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
sewardja1d93302004-12-12 16:45:06 +00002091}
2092
sewardj350e8f72012-06-25 07:52:15 +00002093static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at )
2094{
2095 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at));
2096}
2097
2098static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at )
2099{
2100 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at));
2101}
2102
sewardjacd2e912005-01-13 19:17:06 +00002103static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
2104{
sewardj7cf4e6b2008-05-01 20:24:26 +00002105 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
sewardjacd2e912005-01-13 19:17:06 +00002106}
2107
sewardja2f30952013-03-27 11:40:02 +00002108static IRAtom* mkPCast16x16 ( MCEnv* mce, IRAtom* at )
2109{
2110 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ16x16, at));
2111}
2112
sewardjacd2e912005-01-13 19:17:06 +00002113static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
2114{
sewardj7cf4e6b2008-05-01 20:24:26 +00002115 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
sewardjacd2e912005-01-13 19:17:06 +00002116}
2117
sewardja2f30952013-03-27 11:40:02 +00002118static IRAtom* mkPCast8x32 ( MCEnv* mce, IRAtom* at )
2119{
2120 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ8x32, at));
2121}
2122
sewardjacd2e912005-01-13 19:17:06 +00002123static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
2124{
sewardj7cf4e6b2008-05-01 20:24:26 +00002125 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
sewardjacd2e912005-01-13 19:17:06 +00002126}
2127
sewardjc678b852010-09-22 00:58:51 +00002128static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
2129{
2130 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
2131}
2132
2133static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
2134{
2135 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
2136}
2137
sewardja1d93302004-12-12 16:45:06 +00002138
sewardj3245c912004-12-10 14:58:26 +00002139/* Here's a simple scheme capable of handling ops derived from SSE1
2140 code and while only generating ops that can be efficiently
2141 implemented in SSE1. */
2142
2143/* All-lanes versions are straightforward:
2144
sewardj20d38f22005-02-07 23:50:18 +00002145 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
sewardj3245c912004-12-10 14:58:26 +00002146
2147 unary32Fx4(x,y) ==> PCast32x4(x#)
2148
2149 Lowest-lane-only versions are more complex:
2150
sewardj20d38f22005-02-07 23:50:18 +00002151 binary32F0x4(x,y) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00002152 x#,
sewardj20d38f22005-02-07 23:50:18 +00002153 PCast32(V128to32(UifUV128(x#,y#)))
sewardj3245c912004-12-10 14:58:26 +00002154 )
2155
2156 This is perhaps not so obvious. In particular, it's faster to
sewardj20d38f22005-02-07 23:50:18 +00002157 do a V128-bit UifU and then take the bottom 32 bits than the more
sewardj3245c912004-12-10 14:58:26 +00002158 obvious scheme of taking the bottom 32 bits of each operand
2159 and doing a 32-bit UifU. Basically since UifU is fast and
2160 chopping lanes off vector values is slow.
2161
2162 Finally:
2163
sewardj20d38f22005-02-07 23:50:18 +00002164 unary32F0x4(x) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00002165 x#,
sewardj20d38f22005-02-07 23:50:18 +00002166 PCast32(V128to32(x#))
sewardj3245c912004-12-10 14:58:26 +00002167 )
2168
2169 Where:
2170
2171 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
2172 PCast32x4(v#) = CmpNEZ32x4(v#)
2173*/
2174
2175static
2176IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2177{
2178 IRAtom* at;
2179 tl_assert(isShadowAtom(mce, vatomX));
2180 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002181 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002182 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
sewardj3245c912004-12-10 14:58:26 +00002183 return at;
2184}
2185
2186static
2187IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
2188{
2189 IRAtom* at;
2190 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002191 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
sewardj3245c912004-12-10 14:58:26 +00002192 return at;
2193}
2194
2195static
2196IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2197{
2198 IRAtom* at;
2199 tl_assert(isShadowAtom(mce, vatomX));
2200 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002201 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002202 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
sewardj3245c912004-12-10 14:58:26 +00002203 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002204 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00002205 return at;
2206}
2207
2208static
2209IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
2210{
2211 IRAtom* at;
2212 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002213 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
sewardj3245c912004-12-10 14:58:26 +00002214 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002215 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00002216 return at;
2217}
2218
sewardj0b070592004-12-10 21:44:22 +00002219/* --- ... and ... 64Fx2 versions of the same ... --- */
2220
2221static
2222IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2223{
2224 IRAtom* at;
2225 tl_assert(isShadowAtom(mce, vatomX));
2226 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002227 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002228 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
sewardj0b070592004-12-10 21:44:22 +00002229 return at;
2230}
2231
2232static
2233IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
2234{
2235 IRAtom* at;
2236 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002237 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
sewardj0b070592004-12-10 21:44:22 +00002238 return at;
2239}
2240
2241static
2242IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2243{
2244 IRAtom* at;
2245 tl_assert(isShadowAtom(mce, vatomX));
2246 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002247 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002248 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
sewardj0b070592004-12-10 21:44:22 +00002249 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002250 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00002251 return at;
2252}
2253
2254static
2255IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
2256{
2257 IRAtom* at;
2258 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002259 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
sewardj0b070592004-12-10 21:44:22 +00002260 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002261 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00002262 return at;
2263}
2264
sewardj57f92b02010-08-22 11:54:14 +00002265/* --- --- ... and ... 32Fx2 versions of the same --- --- */
2266
2267static
2268IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2269{
2270 IRAtom* at;
2271 tl_assert(isShadowAtom(mce, vatomX));
2272 tl_assert(isShadowAtom(mce, vatomY));
2273 at = mkUifU64(mce, vatomX, vatomY);
2274 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
2275 return at;
2276}
2277
2278static
2279IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
2280{
2281 IRAtom* at;
2282 tl_assert(isShadowAtom(mce, vatomX));
2283 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
2284 return at;
2285}
2286
sewardj350e8f72012-06-25 07:52:15 +00002287/* --- ... and ... 64Fx4 versions of the same ... --- */
2288
2289static
2290IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2291{
2292 IRAtom* at;
2293 tl_assert(isShadowAtom(mce, vatomX));
2294 tl_assert(isShadowAtom(mce, vatomY));
2295 at = mkUifUV256(mce, vatomX, vatomY);
2296 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at));
2297 return at;
2298}
2299
2300static
2301IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX )
2302{
2303 IRAtom* at;
2304 tl_assert(isShadowAtom(mce, vatomX));
2305 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX));
2306 return at;
2307}
2308
2309/* --- ... and ... 32Fx8 versions of the same ... --- */
2310
2311static
2312IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2313{
2314 IRAtom* at;
2315 tl_assert(isShadowAtom(mce, vatomX));
2316 tl_assert(isShadowAtom(mce, vatomY));
2317 at = mkUifUV256(mce, vatomX, vatomY);
2318 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at));
2319 return at;
2320}
2321
2322static
2323IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX )
2324{
2325 IRAtom* at;
2326 tl_assert(isShadowAtom(mce, vatomX));
2327 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX));
2328 return at;
2329}
2330
sewardj1eb272f2014-01-26 18:36:52 +00002331/* --- 64Fx2 binary FP ops, with rounding mode --- */
2332
2333static
2334IRAtom* binary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM,
2335 IRAtom* vatomX, IRAtom* vatomY )
2336{
2337 /* This is the same as binary64Fx2, except that we subsequently
2338 pessimise vRM (definedness of the rounding mode), widen to 128
2339 bits and UifU it into the result. As with the scalar cases, if
2340 the RM is a constant then it is defined and so this extra bit
2341 will get constant-folded out later. */
2342 // "do" the vector args
2343 IRAtom* t1 = binary64Fx2(mce, vatomX, vatomY);
2344 // PCast the RM, and widen it to 128 bits
2345 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2346 // Roll it into the result
2347 t1 = mkUifUV128(mce, t1, t2);
2348 return t1;
2349}
2350
2351/* --- ... and ... 32Fx4 versions of the same --- */
2352
2353static
2354IRAtom* binary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM,
2355 IRAtom* vatomX, IRAtom* vatomY )
2356{
2357 IRAtom* t1 = binary32Fx4(mce, vatomX, vatomY);
2358 // PCast the RM, and widen it to 128 bits
2359 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2360 // Roll it into the result
2361 t1 = mkUifUV128(mce, t1, t2);
2362 return t1;
2363}
2364
2365/* --- ... and ... 64Fx4 versions of the same --- */
2366
2367static
2368IRAtom* binary64Fx4_w_rm ( MCEnv* mce, IRAtom* vRM,
2369 IRAtom* vatomX, IRAtom* vatomY )
2370{
2371 IRAtom* t1 = binary64Fx4(mce, vatomX, vatomY);
2372 // PCast the RM, and widen it to 256 bits
2373 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM);
2374 // Roll it into the result
2375 t1 = mkUifUV256(mce, t1, t2);
2376 return t1;
2377}
2378
2379/* --- ... and ... 32Fx8 versions of the same --- */
2380
2381static
2382IRAtom* binary32Fx8_w_rm ( MCEnv* mce, IRAtom* vRM,
2383 IRAtom* vatomX, IRAtom* vatomY )
2384{
2385 IRAtom* t1 = binary32Fx8(mce, vatomX, vatomY);
2386 // PCast the RM, and widen it to 256 bits
2387 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM);
2388 // Roll it into the result
2389 t1 = mkUifUV256(mce, t1, t2);
2390 return t1;
2391}
2392
2393
sewardja1d93302004-12-12 16:45:06 +00002394/* --- --- Vector saturated narrowing --- --- */
2395
sewardjb5a29232011-10-22 09:29:41 +00002396/* We used to do something very clever here, but on closer inspection
2397 (2011-Jun-15), and in particular bug #279698, it turns out to be
2398 wrong. Part of the problem came from the fact that for a long
2399 time, the IR primops to do with saturated narrowing were
2400 underspecified and managed to confuse multiple cases which needed
2401 to be separate: the op names had a signedness qualifier, but in
2402 fact the source and destination signednesses needed to be specified
2403 independently, so the op names really need two independent
2404 signedness specifiers.
sewardja1d93302004-12-12 16:45:06 +00002405
sewardjb5a29232011-10-22 09:29:41 +00002406 As of 2011-Jun-15 (ish) the underspecification was sorted out
2407 properly. The incorrect instrumentation remained, though. That
2408 has now (2011-Oct-22) been fixed.
sewardja1d93302004-12-12 16:45:06 +00002409
sewardjb5a29232011-10-22 09:29:41 +00002410 What we now do is simple:
sewardja1d93302004-12-12 16:45:06 +00002411
sewardjb5a29232011-10-22 09:29:41 +00002412 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
2413 number of lanes, X is the source lane width and signedness, and Y
2414 is the destination lane width and signedness. In all cases the
2415 destination lane width is half the source lane width, so the names
2416 have a bit of redundancy, but are at least easy to read.
sewardja1d93302004-12-12 16:45:06 +00002417
sewardjb5a29232011-10-22 09:29:41 +00002418 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
2419 to unsigned 16s.
sewardja1d93302004-12-12 16:45:06 +00002420
sewardjb5a29232011-10-22 09:29:41 +00002421 Let Vanilla(OP) be a function that takes OP, one of these
2422 saturating narrowing ops, and produces the same "shaped" narrowing
2423 op which is not saturating, but merely dumps the most significant
2424 bits. "same shape" means that the lane numbers and widths are the
2425 same as with OP.
sewardja1d93302004-12-12 16:45:06 +00002426
sewardjb5a29232011-10-22 09:29:41 +00002427 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
2428 = Iop_NarrowBin32to16x8,
2429 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
2430 dumping the top half of each lane.
sewardja1d93302004-12-12 16:45:06 +00002431
sewardjb5a29232011-10-22 09:29:41 +00002432 So, with that in place, the scheme is simple, and it is simple to
2433 pessimise each lane individually and then apply Vanilla(OP) so as
2434 to get the result in the right "shape". If the original OP is
2435 QNarrowBinXtoYxZ then we produce
sewardja1d93302004-12-12 16:45:06 +00002436
sewardjb5a29232011-10-22 09:29:41 +00002437 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
sewardj9beeb0a2011-06-15 15:11:07 +00002438
sewardjb5a29232011-10-22 09:29:41 +00002439 or for the case when OP is unary (Iop_QNarrowUn*)
2440
2441 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
sewardja1d93302004-12-12 16:45:06 +00002442*/
2443static
sewardjb5a29232011-10-22 09:29:41 +00002444IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
2445{
2446 switch (qnarrowOp) {
2447 /* Binary: (128, 128) -> 128 */
2448 case Iop_QNarrowBin16Sto8Ux16:
2449 case Iop_QNarrowBin16Sto8Sx16:
2450 case Iop_QNarrowBin16Uto8Ux16:
carll62770672013-10-01 15:50:09 +00002451 case Iop_QNarrowBin64Sto32Sx4:
2452 case Iop_QNarrowBin64Uto32Ux4:
sewardjb5a29232011-10-22 09:29:41 +00002453 return Iop_NarrowBin16to8x16;
2454 case Iop_QNarrowBin32Sto16Ux8:
2455 case Iop_QNarrowBin32Sto16Sx8:
2456 case Iop_QNarrowBin32Uto16Ux8:
2457 return Iop_NarrowBin32to16x8;
2458 /* Binary: (64, 64) -> 64 */
2459 case Iop_QNarrowBin32Sto16Sx4:
2460 return Iop_NarrowBin32to16x4;
2461 case Iop_QNarrowBin16Sto8Ux8:
2462 case Iop_QNarrowBin16Sto8Sx8:
2463 return Iop_NarrowBin16to8x8;
2464 /* Unary: 128 -> 64 */
2465 case Iop_QNarrowUn64Uto32Ux2:
2466 case Iop_QNarrowUn64Sto32Sx2:
2467 case Iop_QNarrowUn64Sto32Ux2:
2468 return Iop_NarrowUn64to32x2;
2469 case Iop_QNarrowUn32Uto16Ux4:
2470 case Iop_QNarrowUn32Sto16Sx4:
2471 case Iop_QNarrowUn32Sto16Ux4:
2472 return Iop_NarrowUn32to16x4;
2473 case Iop_QNarrowUn16Uto8Ux8:
2474 case Iop_QNarrowUn16Sto8Sx8:
2475 case Iop_QNarrowUn16Sto8Ux8:
2476 return Iop_NarrowUn16to8x8;
2477 default:
2478 ppIROp(qnarrowOp);
2479 VG_(tool_panic)("vanillaNarrowOpOfShape");
2480 }
2481}
2482
2483static
sewardj7ee7d852011-06-16 11:37:21 +00002484IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
2485 IRAtom* vatom1, IRAtom* vatom2)
sewardja1d93302004-12-12 16:45:06 +00002486{
2487 IRAtom *at1, *at2, *at3;
2488 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2489 switch (narrow_op) {
carll62770672013-10-01 15:50:09 +00002490 case Iop_QNarrowBin64Sto32Sx4: pcast = mkPCast32x4; break;
2491 case Iop_QNarrowBin64Uto32Ux4: pcast = mkPCast32x4; break;
sewardj7ee7d852011-06-16 11:37:21 +00002492 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
2493 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
2494 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2495 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2496 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2497 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2498 default: VG_(tool_panic)("vectorNarrowBinV128");
sewardja1d93302004-12-12 16:45:06 +00002499 }
sewardjb5a29232011-10-22 09:29:41 +00002500 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardja1d93302004-12-12 16:45:06 +00002501 tl_assert(isShadowAtom(mce,vatom1));
2502 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00002503 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2504 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
sewardjb5a29232011-10-22 09:29:41 +00002505 at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
sewardja1d93302004-12-12 16:45:06 +00002506 return at3;
2507}
2508
sewardjacd2e912005-01-13 19:17:06 +00002509static
sewardj7ee7d852011-06-16 11:37:21 +00002510IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2511 IRAtom* vatom1, IRAtom* vatom2)
sewardjacd2e912005-01-13 19:17:06 +00002512{
2513 IRAtom *at1, *at2, *at3;
2514 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2515 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002516 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2517 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break;
2518 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break;
2519 default: VG_(tool_panic)("vectorNarrowBin64");
sewardjacd2e912005-01-13 19:17:06 +00002520 }
sewardjb5a29232011-10-22 09:29:41 +00002521 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardjacd2e912005-01-13 19:17:06 +00002522 tl_assert(isShadowAtom(mce,vatom1));
2523 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00002524 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2525 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
sewardjb5a29232011-10-22 09:29:41 +00002526 at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
sewardjacd2e912005-01-13 19:17:06 +00002527 return at3;
2528}
2529
sewardj57f92b02010-08-22 11:54:14 +00002530static
sewardjb5a29232011-10-22 09:29:41 +00002531IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
sewardj7ee7d852011-06-16 11:37:21 +00002532 IRAtom* vatom1)
sewardj57f92b02010-08-22 11:54:14 +00002533{
2534 IRAtom *at1, *at2;
2535 IRAtom* (*pcast)( MCEnv*, IRAtom* );
sewardjb5a29232011-10-22 09:29:41 +00002536 tl_assert(isShadowAtom(mce,vatom1));
2537 /* For vanilla narrowing (non-saturating), we can just apply
2538 the op directly to the V bits. */
2539 switch (narrow_op) {
2540 case Iop_NarrowUn16to8x8:
2541 case Iop_NarrowUn32to16x4:
2542 case Iop_NarrowUn64to32x2:
2543 at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
2544 return at1;
2545 default:
2546 break; /* Do Plan B */
2547 }
2548 /* Plan B: for ops that involve a saturation operation on the args,
2549 we must PCast before the vanilla narrow. */
2550 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002551 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break;
2552 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break;
2553 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break;
2554 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
2555 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
2556 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
2557 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
2558 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
2559 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
2560 default: VG_(tool_panic)("vectorNarrowUnV128");
sewardj57f92b02010-08-22 11:54:14 +00002561 }
sewardjb5a29232011-10-22 09:29:41 +00002562 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardj57f92b02010-08-22 11:54:14 +00002563 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
sewardjb5a29232011-10-22 09:29:41 +00002564 at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
sewardj57f92b02010-08-22 11:54:14 +00002565 return at2;
2566}
2567
2568static
sewardj7ee7d852011-06-16 11:37:21 +00002569IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
2570 IRAtom* vatom1)
sewardj57f92b02010-08-22 11:54:14 +00002571{
2572 IRAtom *at1, *at2;
2573 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2574 switch (longen_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002575 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break;
2576 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break;
2577 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
2578 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
2579 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
2580 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
2581 default: VG_(tool_panic)("vectorWidenI64");
sewardj57f92b02010-08-22 11:54:14 +00002582 }
2583 tl_assert(isShadowAtom(mce,vatom1));
2584 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
2585 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2586 return at2;
2587}
2588
sewardja1d93302004-12-12 16:45:06 +00002589
2590/* --- --- Vector integer arithmetic --- --- */
2591
2592/* Simple ... UifU the args and per-lane pessimise the results. */
sewardjacd2e912005-01-13 19:17:06 +00002593
sewardja2f30952013-03-27 11:40:02 +00002594/* --- V256-bit versions --- */
2595
2596static
2597IRAtom* binary8Ix32 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2598{
2599 IRAtom* at;
2600 at = mkUifUV256(mce, vatom1, vatom2);
2601 at = mkPCast8x32(mce, at);
2602 return at;
2603}
2604
2605static
2606IRAtom* binary16Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2607{
2608 IRAtom* at;
2609 at = mkUifUV256(mce, vatom1, vatom2);
2610 at = mkPCast16x16(mce, at);
2611 return at;
2612}
2613
2614static
2615IRAtom* binary32Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2616{
2617 IRAtom* at;
2618 at = mkUifUV256(mce, vatom1, vatom2);
2619 at = mkPCast32x8(mce, at);
2620 return at;
2621}
2622
2623static
2624IRAtom* binary64Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2625{
2626 IRAtom* at;
2627 at = mkUifUV256(mce, vatom1, vatom2);
2628 at = mkPCast64x4(mce, at);
2629 return at;
2630}
2631
sewardj20d38f22005-02-07 23:50:18 +00002632/* --- V128-bit versions --- */
sewardjacd2e912005-01-13 19:17:06 +00002633
sewardja1d93302004-12-12 16:45:06 +00002634static
2635IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2636{
2637 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002638 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002639 at = mkPCast8x16(mce, at);
2640 return at;
2641}
2642
2643static
2644IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2645{
2646 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002647 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002648 at = mkPCast16x8(mce, at);
2649 return at;
2650}
2651
2652static
2653IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2654{
2655 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002656 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002657 at = mkPCast32x4(mce, at);
2658 return at;
2659}
2660
2661static
2662IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2663{
2664 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002665 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002666 at = mkPCast64x2(mce, at);
2667 return at;
2668}
sewardj3245c912004-12-10 14:58:26 +00002669
sewardjacd2e912005-01-13 19:17:06 +00002670/* --- 64-bit versions --- */
2671
2672static
2673IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2674{
2675 IRAtom* at;
2676 at = mkUifU64(mce, vatom1, vatom2);
2677 at = mkPCast8x8(mce, at);
2678 return at;
2679}
2680
2681static
2682IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2683{
2684 IRAtom* at;
2685 at = mkUifU64(mce, vatom1, vatom2);
2686 at = mkPCast16x4(mce, at);
2687 return at;
2688}
2689
2690static
2691IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2692{
2693 IRAtom* at;
2694 at = mkUifU64(mce, vatom1, vatom2);
2695 at = mkPCast32x2(mce, at);
2696 return at;
2697}
2698
sewardj57f92b02010-08-22 11:54:14 +00002699static
2700IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2701{
2702 IRAtom* at;
2703 at = mkUifU64(mce, vatom1, vatom2);
2704 at = mkPCastTo(mce, Ity_I64, at);
2705 return at;
2706}
2707
sewardjc678b852010-09-22 00:58:51 +00002708/* --- 32-bit versions --- */
2709
2710static
2711IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2712{
2713 IRAtom* at;
2714 at = mkUifU32(mce, vatom1, vatom2);
2715 at = mkPCast8x4(mce, at);
2716 return at;
2717}
2718
2719static
2720IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2721{
2722 IRAtom* at;
2723 at = mkUifU32(mce, vatom1, vatom2);
2724 at = mkPCast16x2(mce, at);
2725 return at;
2726}
2727
sewardj3245c912004-12-10 14:58:26 +00002728
2729/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00002730/*--- Generate shadow values from all kinds of IRExprs. ---*/
2731/*------------------------------------------------------------*/
2732
2733static
sewardje91cea72006-02-08 19:32:02 +00002734IRAtom* expr2vbits_Qop ( MCEnv* mce,
2735 IROp op,
2736 IRAtom* atom1, IRAtom* atom2,
2737 IRAtom* atom3, IRAtom* atom4 )
2738{
2739 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2740 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2741 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2742 IRAtom* vatom4 = expr2vbits( mce, atom4 );
2743
2744 tl_assert(isOriginalAtom(mce,atom1));
2745 tl_assert(isOriginalAtom(mce,atom2));
2746 tl_assert(isOriginalAtom(mce,atom3));
2747 tl_assert(isOriginalAtom(mce,atom4));
2748 tl_assert(isShadowAtom(mce,vatom1));
2749 tl_assert(isShadowAtom(mce,vatom2));
2750 tl_assert(isShadowAtom(mce,vatom3));
2751 tl_assert(isShadowAtom(mce,vatom4));
2752 tl_assert(sameKindedAtoms(atom1,vatom1));
2753 tl_assert(sameKindedAtoms(atom2,vatom2));
2754 tl_assert(sameKindedAtoms(atom3,vatom3));
2755 tl_assert(sameKindedAtoms(atom4,vatom4));
2756 switch (op) {
2757 case Iop_MAddF64:
2758 case Iop_MAddF64r32:
2759 case Iop_MSubF64:
2760 case Iop_MSubF64r32:
2761 /* I32(rm) x F64 x F64 x F64 -> F64 */
2762 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
sewardjb5b87402011-03-07 16:05:35 +00002763
2764 case Iop_MAddF32:
2765 case Iop_MSubF32:
2766 /* I32(rm) x F32 x F32 x F32 -> F32 */
2767 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
2768
sewardj350e8f72012-06-25 07:52:15 +00002769 /* V256-bit data-steering */
2770 case Iop_64x4toV256:
2771 return assignNew('V', mce, Ity_V256,
2772 IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4));
2773
sewardje91cea72006-02-08 19:32:02 +00002774 default:
2775 ppIROp(op);
2776 VG_(tool_panic)("memcheck:expr2vbits_Qop");
2777 }
2778}
2779
2780
2781static
sewardjed69fdb2006-02-03 16:12:27 +00002782IRAtom* expr2vbits_Triop ( MCEnv* mce,
2783 IROp op,
2784 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2785{
sewardjed69fdb2006-02-03 16:12:27 +00002786 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2787 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2788 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2789
2790 tl_assert(isOriginalAtom(mce,atom1));
2791 tl_assert(isOriginalAtom(mce,atom2));
2792 tl_assert(isOriginalAtom(mce,atom3));
2793 tl_assert(isShadowAtom(mce,vatom1));
2794 tl_assert(isShadowAtom(mce,vatom2));
2795 tl_assert(isShadowAtom(mce,vatom3));
2796 tl_assert(sameKindedAtoms(atom1,vatom1));
2797 tl_assert(sameKindedAtoms(atom2,vatom2));
2798 tl_assert(sameKindedAtoms(atom3,vatom3));
2799 switch (op) {
sewardjb5b87402011-03-07 16:05:35 +00002800 case Iop_AddF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002801 case Iop_AddD128:
sewardjb5b87402011-03-07 16:05:35 +00002802 case Iop_SubF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002803 case Iop_SubD128:
sewardjb5b87402011-03-07 16:05:35 +00002804 case Iop_MulF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002805 case Iop_MulD128:
sewardjb5b87402011-03-07 16:05:35 +00002806 case Iop_DivF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002807 case Iop_DivD128:
sewardj18c72fa2012-04-23 11:22:05 +00002808 case Iop_QuantizeD128:
2809 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
sewardjb5b87402011-03-07 16:05:35 +00002810 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
sewardjed69fdb2006-02-03 16:12:27 +00002811 case Iop_AddF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002812 case Iop_AddD64:
sewardjed69fdb2006-02-03 16:12:27 +00002813 case Iop_AddF64r32:
2814 case Iop_SubF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002815 case Iop_SubD64:
sewardjed69fdb2006-02-03 16:12:27 +00002816 case Iop_SubF64r32:
2817 case Iop_MulF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002818 case Iop_MulD64:
sewardjed69fdb2006-02-03 16:12:27 +00002819 case Iop_MulF64r32:
2820 case Iop_DivF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002821 case Iop_DivD64:
sewardjed69fdb2006-02-03 16:12:27 +00002822 case Iop_DivF64r32:
sewardj22ac5f42006-02-03 22:55:04 +00002823 case Iop_ScaleF64:
2824 case Iop_Yl2xF64:
2825 case Iop_Yl2xp1F64:
2826 case Iop_AtanF64:
sewardjd6075eb2006-02-04 15:25:23 +00002827 case Iop_PRemF64:
2828 case Iop_PRem1F64:
sewardj18c72fa2012-04-23 11:22:05 +00002829 case Iop_QuantizeD64:
2830 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
sewardjed69fdb2006-02-03 16:12:27 +00002831 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
sewardjd6075eb2006-02-04 15:25:23 +00002832 case Iop_PRemC3210F64:
2833 case Iop_PRem1C3210F64:
2834 /* I32(rm) x F64 x F64 -> I32 */
2835 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj59570ff2010-01-01 11:59:33 +00002836 case Iop_AddF32:
2837 case Iop_SubF32:
2838 case Iop_MulF32:
2839 case Iop_DivF32:
2840 /* I32(rm) x F32 x F32 -> I32 */
2841 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj18c72fa2012-04-23 11:22:05 +00002842 case Iop_SignificanceRoundD64:
florian733b4db2013-06-06 19:13:29 +00002843 /* IRRoundingMode(I32) x I8 x D64 -> D64 */
sewardj18c72fa2012-04-23 11:22:05 +00002844 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2845 case Iop_SignificanceRoundD128:
florian733b4db2013-06-06 19:13:29 +00002846 /* IRRoundingMode(I32) x I8 x D128 -> D128 */
sewardj18c72fa2012-04-23 11:22:05 +00002847 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
sewardj7b7b1cb2014-09-01 11:34:32 +00002848 case Iop_SliceV128:
2849 /* (V128, V128, I8) -> V128 */
sewardjb9e6d242013-05-11 13:42:08 +00002850 complainIfUndefined(mce, atom3, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002851 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
sewardj7b7b1cb2014-09-01 11:34:32 +00002852 case Iop_Slice64:
2853 /* (I64, I64, I8) -> I64 */
sewardjb9e6d242013-05-11 13:42:08 +00002854 complainIfUndefined(mce, atom3, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002855 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2856 case Iop_SetElem8x8:
2857 case Iop_SetElem16x4:
2858 case Iop_SetElem32x2:
sewardjb9e6d242013-05-11 13:42:08 +00002859 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002860 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
carll24e40de2013-10-15 18:13:21 +00002861 /* BCDIops */
2862 case Iop_BCDAdd:
2863 case Iop_BCDSub:
2864 complainIfUndefined(mce, atom3, NULL);
2865 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2866
sewardj1eb272f2014-01-26 18:36:52 +00002867 /* Vector FP with rounding mode as the first arg */
2868 case Iop_Add64Fx2:
2869 case Iop_Sub64Fx2:
2870 case Iop_Mul64Fx2:
2871 case Iop_Div64Fx2:
2872 return binary64Fx2_w_rm(mce, vatom1, vatom2, vatom3);
2873
2874 case Iop_Add32Fx4:
2875 case Iop_Sub32Fx4:
2876 case Iop_Mul32Fx4:
2877 case Iop_Div32Fx4:
2878 return binary32Fx4_w_rm(mce, vatom1, vatom2, vatom3);
2879
2880 case Iop_Add64Fx4:
2881 case Iop_Sub64Fx4:
2882 case Iop_Mul64Fx4:
2883 case Iop_Div64Fx4:
2884 return binary64Fx4_w_rm(mce, vatom1, vatom2, vatom3);
2885
2886 case Iop_Add32Fx8:
2887 case Iop_Sub32Fx8:
2888 case Iop_Mul32Fx8:
2889 case Iop_Div32Fx8:
2890 return binary32Fx8_w_rm(mce, vatom1, vatom2, vatom3);
2891
sewardjed69fdb2006-02-03 16:12:27 +00002892 default:
2893 ppIROp(op);
2894 VG_(tool_panic)("memcheck:expr2vbits_Triop");
2895 }
2896}
2897
2898
2899static
sewardj95448072004-11-22 20:19:51 +00002900IRAtom* expr2vbits_Binop ( MCEnv* mce,
2901 IROp op,
2902 IRAtom* atom1, IRAtom* atom2 )
2903{
2904 IRType and_or_ty;
2905 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
2906 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
2907 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2908
2909 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2910 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2911
2912 tl_assert(isOriginalAtom(mce,atom1));
2913 tl_assert(isOriginalAtom(mce,atom2));
2914 tl_assert(isShadowAtom(mce,vatom1));
2915 tl_assert(isShadowAtom(mce,vatom2));
2916 tl_assert(sameKindedAtoms(atom1,vatom1));
2917 tl_assert(sameKindedAtoms(atom2,vatom2));
2918 switch (op) {
2919
sewardjc678b852010-09-22 00:58:51 +00002920 /* 32-bit SIMD */
2921
2922 case Iop_Add16x2:
2923 case Iop_HAdd16Ux2:
2924 case Iop_HAdd16Sx2:
2925 case Iop_Sub16x2:
2926 case Iop_HSub16Ux2:
2927 case Iop_HSub16Sx2:
2928 case Iop_QAdd16Sx2:
2929 case Iop_QSub16Sx2:
sewardj9fb31092012-09-17 15:28:46 +00002930 case Iop_QSub16Ux2:
sewardj7a370652013-07-04 20:37:33 +00002931 case Iop_QAdd16Ux2:
sewardjc678b852010-09-22 00:58:51 +00002932 return binary16Ix2(mce, vatom1, vatom2);
2933
2934 case Iop_Add8x4:
2935 case Iop_HAdd8Ux4:
2936 case Iop_HAdd8Sx4:
2937 case Iop_Sub8x4:
2938 case Iop_HSub8Ux4:
2939 case Iop_HSub8Sx4:
2940 case Iop_QSub8Ux4:
2941 case Iop_QAdd8Ux4:
2942 case Iop_QSub8Sx4:
2943 case Iop_QAdd8Sx4:
2944 return binary8Ix4(mce, vatom1, vatom2);
2945
sewardjacd2e912005-01-13 19:17:06 +00002946 /* 64-bit SIMD */
2947
sewardj57f92b02010-08-22 11:54:14 +00002948 case Iop_ShrN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002949 case Iop_ShrN16x4:
2950 case Iop_ShrN32x2:
sewardj03809ae2006-12-27 01:16:58 +00002951 case Iop_SarN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002952 case Iop_SarN16x4:
2953 case Iop_SarN32x2:
2954 case Iop_ShlN16x4:
2955 case Iop_ShlN32x2:
sewardj114a9172008-02-09 01:49:32 +00002956 case Iop_ShlN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002957 /* Same scheme as with all other shifts. */
sewardjb9e6d242013-05-11 13:42:08 +00002958 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00002959 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
sewardjacd2e912005-01-13 19:17:06 +00002960
sewardj7ee7d852011-06-16 11:37:21 +00002961 case Iop_QNarrowBin32Sto16Sx4:
2962 case Iop_QNarrowBin16Sto8Sx8:
2963 case Iop_QNarrowBin16Sto8Ux8:
2964 return vectorNarrowBin64(mce, op, vatom1, vatom2);
sewardjacd2e912005-01-13 19:17:06 +00002965
2966 case Iop_Min8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002967 case Iop_Min8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002968 case Iop_Max8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002969 case Iop_Max8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002970 case Iop_Avg8Ux8:
2971 case Iop_QSub8Sx8:
2972 case Iop_QSub8Ux8:
2973 case Iop_Sub8x8:
2974 case Iop_CmpGT8Sx8:
sewardj57f92b02010-08-22 11:54:14 +00002975 case Iop_CmpGT8Ux8:
sewardjacd2e912005-01-13 19:17:06 +00002976 case Iop_CmpEQ8x8:
2977 case Iop_QAdd8Sx8:
2978 case Iop_QAdd8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002979 case Iop_QSal8x8:
2980 case Iop_QShl8x8:
sewardjacd2e912005-01-13 19:17:06 +00002981 case Iop_Add8x8:
sewardj57f92b02010-08-22 11:54:14 +00002982 case Iop_Mul8x8:
2983 case Iop_PolynomialMul8x8:
sewardjacd2e912005-01-13 19:17:06 +00002984 return binary8Ix8(mce, vatom1, vatom2);
2985
2986 case Iop_Min16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002987 case Iop_Min16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002988 case Iop_Max16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002989 case Iop_Max16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002990 case Iop_Avg16Ux4:
2991 case Iop_QSub16Ux4:
2992 case Iop_QSub16Sx4:
2993 case Iop_Sub16x4:
2994 case Iop_Mul16x4:
2995 case Iop_MulHi16Sx4:
2996 case Iop_MulHi16Ux4:
2997 case Iop_CmpGT16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002998 case Iop_CmpGT16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002999 case Iop_CmpEQ16x4:
3000 case Iop_QAdd16Sx4:
3001 case Iop_QAdd16Ux4:
sewardj57f92b02010-08-22 11:54:14 +00003002 case Iop_QSal16x4:
3003 case Iop_QShl16x4:
sewardjacd2e912005-01-13 19:17:06 +00003004 case Iop_Add16x4:
sewardj57f92b02010-08-22 11:54:14 +00003005 case Iop_QDMulHi16Sx4:
3006 case Iop_QRDMulHi16Sx4:
sewardjacd2e912005-01-13 19:17:06 +00003007 return binary16Ix4(mce, vatom1, vatom2);
3008
3009 case Iop_Sub32x2:
sewardj114a9172008-02-09 01:49:32 +00003010 case Iop_Mul32x2:
sewardj57f92b02010-08-22 11:54:14 +00003011 case Iop_Max32Sx2:
3012 case Iop_Max32Ux2:
3013 case Iop_Min32Sx2:
3014 case Iop_Min32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00003015 case Iop_CmpGT32Sx2:
sewardj57f92b02010-08-22 11:54:14 +00003016 case Iop_CmpGT32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00003017 case Iop_CmpEQ32x2:
3018 case Iop_Add32x2:
sewardj57f92b02010-08-22 11:54:14 +00003019 case Iop_QAdd32Ux2:
3020 case Iop_QAdd32Sx2:
3021 case Iop_QSub32Ux2:
3022 case Iop_QSub32Sx2:
3023 case Iop_QSal32x2:
3024 case Iop_QShl32x2:
3025 case Iop_QDMulHi32Sx2:
3026 case Iop_QRDMulHi32Sx2:
sewardjacd2e912005-01-13 19:17:06 +00003027 return binary32Ix2(mce, vatom1, vatom2);
3028
sewardj57f92b02010-08-22 11:54:14 +00003029 case Iop_QSub64Ux1:
3030 case Iop_QSub64Sx1:
3031 case Iop_QAdd64Ux1:
3032 case Iop_QAdd64Sx1:
3033 case Iop_QSal64x1:
3034 case Iop_QShl64x1:
3035 case Iop_Sal64x1:
3036 return binary64Ix1(mce, vatom1, vatom2);
3037
sewardje541e222014-08-15 09:12:28 +00003038 case Iop_QShlNsatSU8x8:
3039 case Iop_QShlNsatUU8x8:
3040 case Iop_QShlNsatSS8x8:
sewardjb9e6d242013-05-11 13:42:08 +00003041 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003042 return mkPCast8x8(mce, vatom1);
3043
sewardje541e222014-08-15 09:12:28 +00003044 case Iop_QShlNsatSU16x4:
3045 case Iop_QShlNsatUU16x4:
3046 case Iop_QShlNsatSS16x4:
sewardjb9e6d242013-05-11 13:42:08 +00003047 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003048 return mkPCast16x4(mce, vatom1);
3049
sewardje541e222014-08-15 09:12:28 +00003050 case Iop_QShlNsatSU32x2:
3051 case Iop_QShlNsatUU32x2:
3052 case Iop_QShlNsatSS32x2:
sewardjb9e6d242013-05-11 13:42:08 +00003053 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003054 return mkPCast32x2(mce, vatom1);
3055
sewardje541e222014-08-15 09:12:28 +00003056 case Iop_QShlNsatSU64x1:
3057 case Iop_QShlNsatUU64x1:
3058 case Iop_QShlNsatSS64x1:
sewardjb9e6d242013-05-11 13:42:08 +00003059 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003060 return mkPCast32x2(mce, vatom1);
3061
3062 case Iop_PwMax32Sx2:
3063 case Iop_PwMax32Ux2:
3064 case Iop_PwMin32Sx2:
3065 case Iop_PwMin32Ux2:
3066 case Iop_PwMax32Fx2:
3067 case Iop_PwMin32Fx2:
sewardj350e8f72012-06-25 07:52:15 +00003068 return assignNew('V', mce, Ity_I64,
3069 binop(Iop_PwMax32Ux2,
3070 mkPCast32x2(mce, vatom1),
3071 mkPCast32x2(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00003072
3073 case Iop_PwMax16Sx4:
3074 case Iop_PwMax16Ux4:
3075 case Iop_PwMin16Sx4:
3076 case Iop_PwMin16Ux4:
sewardj350e8f72012-06-25 07:52:15 +00003077 return assignNew('V', mce, Ity_I64,
3078 binop(Iop_PwMax16Ux4,
3079 mkPCast16x4(mce, vatom1),
3080 mkPCast16x4(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00003081
3082 case Iop_PwMax8Sx8:
3083 case Iop_PwMax8Ux8:
3084 case Iop_PwMin8Sx8:
3085 case Iop_PwMin8Ux8:
sewardj350e8f72012-06-25 07:52:15 +00003086 return assignNew('V', mce, Ity_I64,
3087 binop(Iop_PwMax8Ux8,
3088 mkPCast8x8(mce, vatom1),
3089 mkPCast8x8(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00003090
3091 case Iop_PwAdd32x2:
3092 case Iop_PwAdd32Fx2:
3093 return mkPCast32x2(mce,
sewardj350e8f72012-06-25 07:52:15 +00003094 assignNew('V', mce, Ity_I64,
3095 binop(Iop_PwAdd32x2,
3096 mkPCast32x2(mce, vatom1),
3097 mkPCast32x2(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00003098
3099 case Iop_PwAdd16x4:
3100 return mkPCast16x4(mce,
sewardj350e8f72012-06-25 07:52:15 +00003101 assignNew('V', mce, Ity_I64,
3102 binop(op, mkPCast16x4(mce, vatom1),
3103 mkPCast16x4(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00003104
3105 case Iop_PwAdd8x8:
3106 return mkPCast8x8(mce,
sewardj350e8f72012-06-25 07:52:15 +00003107 assignNew('V', mce, Ity_I64,
3108 binop(op, mkPCast8x8(mce, vatom1),
3109 mkPCast8x8(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00003110
3111 case Iop_Shl8x8:
3112 case Iop_Shr8x8:
3113 case Iop_Sar8x8:
3114 case Iop_Sal8x8:
3115 return mkUifU64(mce,
3116 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3117 mkPCast8x8(mce,vatom2)
3118 );
3119
3120 case Iop_Shl16x4:
3121 case Iop_Shr16x4:
3122 case Iop_Sar16x4:
3123 case Iop_Sal16x4:
3124 return mkUifU64(mce,
3125 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3126 mkPCast16x4(mce,vatom2)
3127 );
3128
3129 case Iop_Shl32x2:
3130 case Iop_Shr32x2:
3131 case Iop_Sar32x2:
3132 case Iop_Sal32x2:
3133 return mkUifU64(mce,
3134 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3135 mkPCast32x2(mce,vatom2)
3136 );
3137
sewardjacd2e912005-01-13 19:17:06 +00003138 /* 64-bit data-steering */
3139 case Iop_InterleaveLO32x2:
3140 case Iop_InterleaveLO16x4:
3141 case Iop_InterleaveLO8x8:
3142 case Iop_InterleaveHI32x2:
3143 case Iop_InterleaveHI16x4:
3144 case Iop_InterleaveHI8x8:
sewardj57f92b02010-08-22 11:54:14 +00003145 case Iop_CatOddLanes8x8:
3146 case Iop_CatEvenLanes8x8:
sewardj114a9172008-02-09 01:49:32 +00003147 case Iop_CatOddLanes16x4:
3148 case Iop_CatEvenLanes16x4:
sewardj57f92b02010-08-22 11:54:14 +00003149 case Iop_InterleaveOddLanes8x8:
3150 case Iop_InterleaveEvenLanes8x8:
3151 case Iop_InterleaveOddLanes16x4:
3152 case Iop_InterleaveEvenLanes16x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00003153 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardjacd2e912005-01-13 19:17:06 +00003154
sewardj57f92b02010-08-22 11:54:14 +00003155 case Iop_GetElem8x8:
sewardjb9e6d242013-05-11 13:42:08 +00003156 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003157 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3158 case Iop_GetElem16x4:
sewardjb9e6d242013-05-11 13:42:08 +00003159 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003160 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3161 case Iop_GetElem32x2:
sewardjb9e6d242013-05-11 13:42:08 +00003162 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003163 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3164
sewardj114a9172008-02-09 01:49:32 +00003165 /* Perm8x8: rearrange values in left arg using steering values
3166 from right arg. So rearrange the vbits in the same way but
3167 pessimise wrt steering values. */
3168 case Iop_Perm8x8:
3169 return mkUifU64(
3170 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003171 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
sewardj114a9172008-02-09 01:49:32 +00003172 mkPCast8x8(mce, vatom2)
3173 );
3174
sewardj20d38f22005-02-07 23:50:18 +00003175 /* V128-bit SIMD */
sewardj0b070592004-12-10 21:44:22 +00003176
sewardj57f92b02010-08-22 11:54:14 +00003177 case Iop_ShrN8x16:
sewardja1d93302004-12-12 16:45:06 +00003178 case Iop_ShrN16x8:
3179 case Iop_ShrN32x4:
3180 case Iop_ShrN64x2:
sewardj57f92b02010-08-22 11:54:14 +00003181 case Iop_SarN8x16:
sewardja1d93302004-12-12 16:45:06 +00003182 case Iop_SarN16x8:
3183 case Iop_SarN32x4:
sewardj57f92b02010-08-22 11:54:14 +00003184 case Iop_SarN64x2:
3185 case Iop_ShlN8x16:
sewardja1d93302004-12-12 16:45:06 +00003186 case Iop_ShlN16x8:
3187 case Iop_ShlN32x4:
3188 case Iop_ShlN64x2:
sewardj620eb5b2005-10-22 12:50:43 +00003189 /* Same scheme as with all other shifts. Note: 22 Oct 05:
3190 this is wrong now, scalar shifts are done properly lazily.
3191 Vector shifts should be fixed too. */
sewardjb9e6d242013-05-11 13:42:08 +00003192 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00003193 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardja1d93302004-12-12 16:45:06 +00003194
sewardjcbf8be72005-11-10 18:34:41 +00003195 /* V x V shifts/rotates are done using the standard lazy scheme. */
sewardjbfd03f82014-08-26 18:35:13 +00003196 /* For the non-rounding variants of bi-di vector x vector
3197 shifts (the Iop_Sh.. ops, that is) we use the lazy scheme.
3198 But note that this is overly pessimistic, because in fact only
3199 the bottom 8 bits of each lane of the second argument are taken
3200 into account when shifting. So really we ought to ignore
3201 undefinedness in bits 8 and above of each lane in the
3202 second argument. */
sewardj43d60752005-11-10 18:13:01 +00003203 case Iop_Shl8x16:
3204 case Iop_Shr8x16:
3205 case Iop_Sar8x16:
sewardj57f92b02010-08-22 11:54:14 +00003206 case Iop_Sal8x16:
sewardjcbf8be72005-11-10 18:34:41 +00003207 case Iop_Rol8x16:
sewardjbfd03f82014-08-26 18:35:13 +00003208 case Iop_Sh8Sx16:
3209 case Iop_Sh8Ux16:
sewardj43d60752005-11-10 18:13:01 +00003210 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003211 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00003212 mkPCast8x16(mce,vatom2)
3213 );
3214
3215 case Iop_Shl16x8:
3216 case Iop_Shr16x8:
3217 case Iop_Sar16x8:
sewardj57f92b02010-08-22 11:54:14 +00003218 case Iop_Sal16x8:
sewardjcbf8be72005-11-10 18:34:41 +00003219 case Iop_Rol16x8:
sewardjbfd03f82014-08-26 18:35:13 +00003220 case Iop_Sh16Sx8:
3221 case Iop_Sh16Ux8:
sewardj43d60752005-11-10 18:13:01 +00003222 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003223 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00003224 mkPCast16x8(mce,vatom2)
3225 );
3226
3227 case Iop_Shl32x4:
3228 case Iop_Shr32x4:
3229 case Iop_Sar32x4:
sewardj57f92b02010-08-22 11:54:14 +00003230 case Iop_Sal32x4:
sewardjcbf8be72005-11-10 18:34:41 +00003231 case Iop_Rol32x4:
sewardjbfd03f82014-08-26 18:35:13 +00003232 case Iop_Sh32Sx4:
3233 case Iop_Sh32Ux4:
sewardj43d60752005-11-10 18:13:01 +00003234 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003235 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00003236 mkPCast32x4(mce,vatom2)
3237 );
3238
sewardj57f92b02010-08-22 11:54:14 +00003239 case Iop_Shl64x2:
3240 case Iop_Shr64x2:
3241 case Iop_Sar64x2:
3242 case Iop_Sal64x2:
sewardj147865c2014-08-26 17:30:07 +00003243 case Iop_Rol64x2:
sewardjbfd03f82014-08-26 18:35:13 +00003244 case Iop_Sh64Sx2:
3245 case Iop_Sh64Ux2:
sewardj57f92b02010-08-22 11:54:14 +00003246 return mkUifUV128(mce,
3247 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3248 mkPCast64x2(mce,vatom2)
3249 );
3250
sewardjbfd03f82014-08-26 18:35:13 +00003251 /* For the rounding variants of bi-di vector x vector shifts, the
3252 rounding adjustment can cause undefinedness to propagate through
3253 the entire lane, in the worst case. Too complex to handle
3254 properly .. just UifU the arguments and then PCast them.
3255 Suboptimal but safe. */
3256 case Iop_Rsh8Sx16:
3257 case Iop_Rsh8Ux16:
3258 return binary8Ix16(mce, vatom1, vatom2);
3259 case Iop_Rsh16Sx8:
3260 case Iop_Rsh16Ux8:
3261 return binary16Ix8(mce, vatom1, vatom2);
3262 case Iop_Rsh32Sx4:
3263 case Iop_Rsh32Ux4:
3264 return binary32Ix4(mce, vatom1, vatom2);
3265 case Iop_Rsh64Sx2:
3266 case Iop_Rsh64Ux2:
3267 return binary64Ix2(mce, vatom1, vatom2);
3268
sewardj57f92b02010-08-22 11:54:14 +00003269 case Iop_F32ToFixed32Ux4_RZ:
3270 case Iop_F32ToFixed32Sx4_RZ:
3271 case Iop_Fixed32UToF32x4_RN:
3272 case Iop_Fixed32SToF32x4_RN:
sewardjb9e6d242013-05-11 13:42:08 +00003273 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003274 return mkPCast32x4(mce, vatom1);
3275
3276 case Iop_F32ToFixed32Ux2_RZ:
3277 case Iop_F32ToFixed32Sx2_RZ:
3278 case Iop_Fixed32UToF32x2_RN:
3279 case Iop_Fixed32SToF32x2_RN:
sewardjb9e6d242013-05-11 13:42:08 +00003280 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003281 return mkPCast32x2(mce, vatom1);
3282
sewardja1d93302004-12-12 16:45:06 +00003283 case Iop_QSub8Ux16:
3284 case Iop_QSub8Sx16:
3285 case Iop_Sub8x16:
3286 case Iop_Min8Ux16:
sewardj43d60752005-11-10 18:13:01 +00003287 case Iop_Min8Sx16:
sewardja1d93302004-12-12 16:45:06 +00003288 case Iop_Max8Ux16:
sewardj43d60752005-11-10 18:13:01 +00003289 case Iop_Max8Sx16:
sewardja1d93302004-12-12 16:45:06 +00003290 case Iop_CmpGT8Sx16:
sewardj43d60752005-11-10 18:13:01 +00003291 case Iop_CmpGT8Ux16:
sewardja1d93302004-12-12 16:45:06 +00003292 case Iop_CmpEQ8x16:
3293 case Iop_Avg8Ux16:
sewardj43d60752005-11-10 18:13:01 +00003294 case Iop_Avg8Sx16:
sewardja1d93302004-12-12 16:45:06 +00003295 case Iop_QAdd8Ux16:
3296 case Iop_QAdd8Sx16:
sewardjbfd03f82014-08-26 18:35:13 +00003297 case Iop_QAddExtUSsatSS8x16:
3298 case Iop_QAddExtSUsatUU8x16:
sewardj57f92b02010-08-22 11:54:14 +00003299 case Iop_QSal8x16:
3300 case Iop_QShl8x16:
sewardja1d93302004-12-12 16:45:06 +00003301 case Iop_Add8x16:
sewardj57f92b02010-08-22 11:54:14 +00003302 case Iop_Mul8x16:
3303 case Iop_PolynomialMul8x16:
carll24e40de2013-10-15 18:13:21 +00003304 case Iop_PolynomialMulAdd8x16:
sewardja1d93302004-12-12 16:45:06 +00003305 return binary8Ix16(mce, vatom1, vatom2);
3306
3307 case Iop_QSub16Ux8:
3308 case Iop_QSub16Sx8:
3309 case Iop_Sub16x8:
3310 case Iop_Mul16x8:
3311 case Iop_MulHi16Sx8:
3312 case Iop_MulHi16Ux8:
3313 case Iop_Min16Sx8:
sewardj43d60752005-11-10 18:13:01 +00003314 case Iop_Min16Ux8:
sewardja1d93302004-12-12 16:45:06 +00003315 case Iop_Max16Sx8:
sewardj43d60752005-11-10 18:13:01 +00003316 case Iop_Max16Ux8:
sewardja1d93302004-12-12 16:45:06 +00003317 case Iop_CmpGT16Sx8:
sewardj43d60752005-11-10 18:13:01 +00003318 case Iop_CmpGT16Ux8:
sewardja1d93302004-12-12 16:45:06 +00003319 case Iop_CmpEQ16x8:
3320 case Iop_Avg16Ux8:
sewardj43d60752005-11-10 18:13:01 +00003321 case Iop_Avg16Sx8:
sewardja1d93302004-12-12 16:45:06 +00003322 case Iop_QAdd16Ux8:
3323 case Iop_QAdd16Sx8:
sewardjbfd03f82014-08-26 18:35:13 +00003324 case Iop_QAddExtUSsatSS16x8:
3325 case Iop_QAddExtSUsatUU16x8:
sewardj57f92b02010-08-22 11:54:14 +00003326 case Iop_QSal16x8:
3327 case Iop_QShl16x8:
sewardja1d93302004-12-12 16:45:06 +00003328 case Iop_Add16x8:
sewardj57f92b02010-08-22 11:54:14 +00003329 case Iop_QDMulHi16Sx8:
3330 case Iop_QRDMulHi16Sx8:
carll24e40de2013-10-15 18:13:21 +00003331 case Iop_PolynomialMulAdd16x8:
sewardja1d93302004-12-12 16:45:06 +00003332 return binary16Ix8(mce, vatom1, vatom2);
3333
3334 case Iop_Sub32x4:
3335 case Iop_CmpGT32Sx4:
sewardj43d60752005-11-10 18:13:01 +00003336 case Iop_CmpGT32Ux4:
sewardja1d93302004-12-12 16:45:06 +00003337 case Iop_CmpEQ32x4:
sewardj43d60752005-11-10 18:13:01 +00003338 case Iop_QAdd32Sx4:
3339 case Iop_QAdd32Ux4:
3340 case Iop_QSub32Sx4:
3341 case Iop_QSub32Ux4:
sewardjbfd03f82014-08-26 18:35:13 +00003342 case Iop_QAddExtUSsatSS32x4:
3343 case Iop_QAddExtSUsatUU32x4:
sewardj57f92b02010-08-22 11:54:14 +00003344 case Iop_QSal32x4:
3345 case Iop_QShl32x4:
sewardj43d60752005-11-10 18:13:01 +00003346 case Iop_Avg32Ux4:
3347 case Iop_Avg32Sx4:
sewardja1d93302004-12-12 16:45:06 +00003348 case Iop_Add32x4:
sewardj43d60752005-11-10 18:13:01 +00003349 case Iop_Max32Ux4:
3350 case Iop_Max32Sx4:
3351 case Iop_Min32Ux4:
3352 case Iop_Min32Sx4:
sewardjb823b852010-06-18 08:18:38 +00003353 case Iop_Mul32x4:
sewardj57f92b02010-08-22 11:54:14 +00003354 case Iop_QDMulHi32Sx4:
3355 case Iop_QRDMulHi32Sx4:
carll24e40de2013-10-15 18:13:21 +00003356 case Iop_PolynomialMulAdd32x4:
sewardja1d93302004-12-12 16:45:06 +00003357 return binary32Ix4(mce, vatom1, vatom2);
3358
3359 case Iop_Sub64x2:
3360 case Iop_Add64x2:
carll62770672013-10-01 15:50:09 +00003361 case Iop_Max64Sx2:
3362 case Iop_Max64Ux2:
3363 case Iop_Min64Sx2:
3364 case Iop_Min64Ux2:
sewardj9a2afe92011-10-19 15:24:55 +00003365 case Iop_CmpEQ64x2:
sewardjb823b852010-06-18 08:18:38 +00003366 case Iop_CmpGT64Sx2:
carll62770672013-10-01 15:50:09 +00003367 case Iop_CmpGT64Ux2:
sewardj57f92b02010-08-22 11:54:14 +00003368 case Iop_QSal64x2:
3369 case Iop_QShl64x2:
3370 case Iop_QAdd64Ux2:
3371 case Iop_QAdd64Sx2:
3372 case Iop_QSub64Ux2:
3373 case Iop_QSub64Sx2:
sewardjbfd03f82014-08-26 18:35:13 +00003374 case Iop_QAddExtUSsatSS64x2:
3375 case Iop_QAddExtSUsatUU64x2:
carll24e40de2013-10-15 18:13:21 +00003376 case Iop_PolynomialMulAdd64x2:
3377 case Iop_CipherV128:
3378 case Iop_CipherLV128:
3379 case Iop_NCipherV128:
3380 case Iop_NCipherLV128:
3381 return binary64Ix2(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00003382
carll62770672013-10-01 15:50:09 +00003383 case Iop_QNarrowBin64Sto32Sx4:
3384 case Iop_QNarrowBin64Uto32Ux4:
sewardj7ee7d852011-06-16 11:37:21 +00003385 case Iop_QNarrowBin32Sto16Sx8:
3386 case Iop_QNarrowBin32Uto16Ux8:
3387 case Iop_QNarrowBin32Sto16Ux8:
3388 case Iop_QNarrowBin16Sto8Sx16:
3389 case Iop_QNarrowBin16Uto8Ux16:
3390 case Iop_QNarrowBin16Sto8Ux16:
3391 return vectorNarrowBinV128(mce, op, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00003392
sewardj0b070592004-12-10 21:44:22 +00003393 case Iop_Min64Fx2:
3394 case Iop_Max64Fx2:
sewardj0b070592004-12-10 21:44:22 +00003395 case Iop_CmpLT64Fx2:
3396 case Iop_CmpLE64Fx2:
3397 case Iop_CmpEQ64Fx2:
sewardj545663e2005-11-05 01:55:04 +00003398 case Iop_CmpUN64Fx2:
sewardj14350762015-02-24 12:24:35 +00003399 case Iop_RecipStep64Fx2:
3400 case Iop_RSqrtStep64Fx2:
sewardj0b070592004-12-10 21:44:22 +00003401 return binary64Fx2(mce, vatom1, vatom2);
3402
3403 case Iop_Sub64F0x2:
3404 case Iop_Mul64F0x2:
3405 case Iop_Min64F0x2:
3406 case Iop_Max64F0x2:
3407 case Iop_Div64F0x2:
3408 case Iop_CmpLT64F0x2:
3409 case Iop_CmpLE64F0x2:
3410 case Iop_CmpEQ64F0x2:
sewardj545663e2005-11-05 01:55:04 +00003411 case Iop_CmpUN64F0x2:
sewardj0b070592004-12-10 21:44:22 +00003412 case Iop_Add64F0x2:
3413 return binary64F0x2(mce, vatom1, vatom2);
3414
sewardj170ee212004-12-10 18:57:51 +00003415 case Iop_Min32Fx4:
3416 case Iop_Max32Fx4:
sewardj170ee212004-12-10 18:57:51 +00003417 case Iop_CmpLT32Fx4:
3418 case Iop_CmpLE32Fx4:
3419 case Iop_CmpEQ32Fx4:
sewardj545663e2005-11-05 01:55:04 +00003420 case Iop_CmpUN32Fx4:
cerione78ba2a2005-11-14 03:00:35 +00003421 case Iop_CmpGT32Fx4:
3422 case Iop_CmpGE32Fx4:
sewardjee6bb772014-08-24 14:02:22 +00003423 case Iop_RecipStep32Fx4:
3424 case Iop_RSqrtStep32Fx4:
sewardj3245c912004-12-10 14:58:26 +00003425 return binary32Fx4(mce, vatom1, vatom2);
3426
sewardj57f92b02010-08-22 11:54:14 +00003427 case Iop_Sub32Fx2:
3428 case Iop_Mul32Fx2:
3429 case Iop_Min32Fx2:
3430 case Iop_Max32Fx2:
3431 case Iop_CmpEQ32Fx2:
3432 case Iop_CmpGT32Fx2:
3433 case Iop_CmpGE32Fx2:
3434 case Iop_Add32Fx2:
sewardjee6bb772014-08-24 14:02:22 +00003435 case Iop_RecipStep32Fx2:
3436 case Iop_RSqrtStep32Fx2:
sewardj57f92b02010-08-22 11:54:14 +00003437 return binary32Fx2(mce, vatom1, vatom2);
3438
sewardj170ee212004-12-10 18:57:51 +00003439 case Iop_Sub32F0x4:
3440 case Iop_Mul32F0x4:
3441 case Iop_Min32F0x4:
3442 case Iop_Max32F0x4:
3443 case Iop_Div32F0x4:
3444 case Iop_CmpLT32F0x4:
3445 case Iop_CmpLE32F0x4:
3446 case Iop_CmpEQ32F0x4:
sewardj545663e2005-11-05 01:55:04 +00003447 case Iop_CmpUN32F0x4:
sewardj170ee212004-12-10 18:57:51 +00003448 case Iop_Add32F0x4:
3449 return binary32F0x4(mce, vatom1, vatom2);
3450
sewardje541e222014-08-15 09:12:28 +00003451 case Iop_QShlNsatSU8x16:
3452 case Iop_QShlNsatUU8x16:
3453 case Iop_QShlNsatSS8x16:
sewardjb9e6d242013-05-11 13:42:08 +00003454 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003455 return mkPCast8x16(mce, vatom1);
3456
sewardje541e222014-08-15 09:12:28 +00003457 case Iop_QShlNsatSU16x8:
3458 case Iop_QShlNsatUU16x8:
3459 case Iop_QShlNsatSS16x8:
sewardjb9e6d242013-05-11 13:42:08 +00003460 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003461 return mkPCast16x8(mce, vatom1);
3462
sewardje541e222014-08-15 09:12:28 +00003463 case Iop_QShlNsatSU32x4:
3464 case Iop_QShlNsatUU32x4:
3465 case Iop_QShlNsatSS32x4:
sewardjb9e6d242013-05-11 13:42:08 +00003466 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003467 return mkPCast32x4(mce, vatom1);
3468
sewardje541e222014-08-15 09:12:28 +00003469 case Iop_QShlNsatSU64x2:
3470 case Iop_QShlNsatUU64x2:
3471 case Iop_QShlNsatSS64x2:
sewardjb9e6d242013-05-11 13:42:08 +00003472 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003473 return mkPCast32x4(mce, vatom1);
3474
sewardjbfd03f82014-08-26 18:35:13 +00003475 /* Q-and-Qshift-by-imm-and-narrow of the form (V128, I8) -> V128.
3476 To make this simpler, do the following:
3477 * complain if the shift amount (the I8) is undefined
3478 * pcast each lane at the wide width
3479 * truncate each lane to half width
3480 * pcast the resulting 64-bit value to a single bit and use
3481 that as the least significant bit of the upper half of the
3482 result. */
3483 case Iop_QandQShrNnarrow64Uto32Ux2:
3484 case Iop_QandQSarNnarrow64Sto32Sx2:
3485 case Iop_QandQSarNnarrow64Sto32Ux2:
3486 case Iop_QandQRShrNnarrow64Uto32Ux2:
3487 case Iop_QandQRSarNnarrow64Sto32Sx2:
3488 case Iop_QandQRSarNnarrow64Sto32Ux2:
3489 case Iop_QandQShrNnarrow32Uto16Ux4:
3490 case Iop_QandQSarNnarrow32Sto16Sx4:
3491 case Iop_QandQSarNnarrow32Sto16Ux4:
3492 case Iop_QandQRShrNnarrow32Uto16Ux4:
3493 case Iop_QandQRSarNnarrow32Sto16Sx4:
3494 case Iop_QandQRSarNnarrow32Sto16Ux4:
3495 case Iop_QandQShrNnarrow16Uto8Ux8:
3496 case Iop_QandQSarNnarrow16Sto8Sx8:
3497 case Iop_QandQSarNnarrow16Sto8Ux8:
3498 case Iop_QandQRShrNnarrow16Uto8Ux8:
3499 case Iop_QandQRSarNnarrow16Sto8Sx8:
3500 case Iop_QandQRSarNnarrow16Sto8Ux8:
3501 {
3502 IRAtom* (*fnPessim) (MCEnv*, IRAtom*) = NULL;
3503 IROp opNarrow = Iop_INVALID;
3504 switch (op) {
3505 case Iop_QandQShrNnarrow64Uto32Ux2:
3506 case Iop_QandQSarNnarrow64Sto32Sx2:
3507 case Iop_QandQSarNnarrow64Sto32Ux2:
3508 case Iop_QandQRShrNnarrow64Uto32Ux2:
3509 case Iop_QandQRSarNnarrow64Sto32Sx2:
3510 case Iop_QandQRSarNnarrow64Sto32Ux2:
3511 fnPessim = mkPCast64x2;
3512 opNarrow = Iop_NarrowUn64to32x2;
3513 break;
3514 case Iop_QandQShrNnarrow32Uto16Ux4:
3515 case Iop_QandQSarNnarrow32Sto16Sx4:
3516 case Iop_QandQSarNnarrow32Sto16Ux4:
3517 case Iop_QandQRShrNnarrow32Uto16Ux4:
3518 case Iop_QandQRSarNnarrow32Sto16Sx4:
3519 case Iop_QandQRSarNnarrow32Sto16Ux4:
3520 fnPessim = mkPCast32x4;
3521 opNarrow = Iop_NarrowUn32to16x4;
3522 break;
3523 case Iop_QandQShrNnarrow16Uto8Ux8:
3524 case Iop_QandQSarNnarrow16Sto8Sx8:
3525 case Iop_QandQSarNnarrow16Sto8Ux8:
3526 case Iop_QandQRShrNnarrow16Uto8Ux8:
3527 case Iop_QandQRSarNnarrow16Sto8Sx8:
3528 case Iop_QandQRSarNnarrow16Sto8Ux8:
3529 fnPessim = mkPCast16x8;
3530 opNarrow = Iop_NarrowUn16to8x8;
3531 break;
3532 default:
3533 tl_assert(0);
3534 }
3535 complainIfUndefined(mce, atom2, NULL);
3536 // Pessimised shift result
3537 IRAtom* shV
3538 = fnPessim(mce, vatom1);
3539 // Narrowed, pessimised shift result
3540 IRAtom* shVnarrowed
3541 = assignNew('V', mce, Ity_I64, unop(opNarrow, shV));
3542 // Generates: Def--(63)--Def PCast-to-I1(narrowed)
3543 IRAtom* qV = mkPCastXXtoXXlsb(mce, shVnarrowed, Ity_I64);
3544 // and assemble the result
3545 return assignNew('V', mce, Ity_V128,
3546 binop(Iop_64HLtoV128, qV, shVnarrowed));
3547 }
3548
sewardj57f92b02010-08-22 11:54:14 +00003549 case Iop_Mull32Sx2:
3550 case Iop_Mull32Ux2:
sewardj4d6ce842014-07-21 09:21:57 +00003551 case Iop_QDMull32Sx2:
sewardj7ee7d852011-06-16 11:37:21 +00003552 return vectorWidenI64(mce, Iop_Widen32Sto64x2,
3553 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003554
3555 case Iop_Mull16Sx4:
3556 case Iop_Mull16Ux4:
sewardj4d6ce842014-07-21 09:21:57 +00003557 case Iop_QDMull16Sx4:
sewardj7ee7d852011-06-16 11:37:21 +00003558 return vectorWidenI64(mce, Iop_Widen16Sto32x4,
3559 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003560
3561 case Iop_Mull8Sx8:
3562 case Iop_Mull8Ux8:
3563 case Iop_PolynomialMull8x8:
sewardj7ee7d852011-06-16 11:37:21 +00003564 return vectorWidenI64(mce, Iop_Widen8Sto16x8,
3565 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003566
3567 case Iop_PwAdd32x4:
3568 return mkPCast32x4(mce,
3569 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
3570 mkPCast32x4(mce, vatom2))));
3571
3572 case Iop_PwAdd16x8:
3573 return mkPCast16x8(mce,
3574 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
3575 mkPCast16x8(mce, vatom2))));
3576
3577 case Iop_PwAdd8x16:
3578 return mkPCast8x16(mce,
3579 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
3580 mkPCast8x16(mce, vatom2))));
3581
sewardj20d38f22005-02-07 23:50:18 +00003582 /* V128-bit data-steering */
3583 case Iop_SetV128lo32:
3584 case Iop_SetV128lo64:
3585 case Iop_64HLtoV128:
sewardja1d93302004-12-12 16:45:06 +00003586 case Iop_InterleaveLO64x2:
3587 case Iop_InterleaveLO32x4:
3588 case Iop_InterleaveLO16x8:
3589 case Iop_InterleaveLO8x16:
3590 case Iop_InterleaveHI64x2:
3591 case Iop_InterleaveHI32x4:
3592 case Iop_InterleaveHI16x8:
3593 case Iop_InterleaveHI8x16:
sewardj57f92b02010-08-22 11:54:14 +00003594 case Iop_CatOddLanes8x16:
3595 case Iop_CatOddLanes16x8:
3596 case Iop_CatOddLanes32x4:
3597 case Iop_CatEvenLanes8x16:
3598 case Iop_CatEvenLanes16x8:
3599 case Iop_CatEvenLanes32x4:
3600 case Iop_InterleaveOddLanes8x16:
3601 case Iop_InterleaveOddLanes16x8:
3602 case Iop_InterleaveOddLanes32x4:
3603 case Iop_InterleaveEvenLanes8x16:
3604 case Iop_InterleaveEvenLanes16x8:
3605 case Iop_InterleaveEvenLanes32x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00003606 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003607
3608 case Iop_GetElem8x16:
sewardjb9e6d242013-05-11 13:42:08 +00003609 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003610 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3611 case Iop_GetElem16x8:
sewardjb9e6d242013-05-11 13:42:08 +00003612 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003613 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3614 case Iop_GetElem32x4:
sewardjb9e6d242013-05-11 13:42:08 +00003615 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003616 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3617 case Iop_GetElem64x2:
sewardjb9e6d242013-05-11 13:42:08 +00003618 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003619 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
3620
sewardj620eb5b2005-10-22 12:50:43 +00003621 /* Perm8x16: rearrange values in left arg using steering values
3622 from right arg. So rearrange the vbits in the same way but
sewardj350e8f72012-06-25 07:52:15 +00003623 pessimise wrt steering values. Perm32x4 ditto. */
sewardj620eb5b2005-10-22 12:50:43 +00003624 case Iop_Perm8x16:
3625 return mkUifUV128(
3626 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003627 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj620eb5b2005-10-22 12:50:43 +00003628 mkPCast8x16(mce, vatom2)
3629 );
sewardj350e8f72012-06-25 07:52:15 +00003630 case Iop_Perm32x4:
3631 return mkUifUV128(
3632 mce,
3633 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3634 mkPCast32x4(mce, vatom2)
3635 );
sewardj170ee212004-12-10 18:57:51 +00003636
sewardj43d60752005-11-10 18:13:01 +00003637 /* These two take the lower half of each 16-bit lane, sign/zero
3638 extend it to 32, and multiply together, producing a 32x4
3639 result (and implicitly ignoring half the operand bits). So
3640 treat it as a bunch of independent 16x8 operations, but then
3641 do 32-bit shifts left-right to copy the lower half results
3642 (which are all 0s or all 1s due to PCasting in binary16Ix8)
3643 into the upper half of each result lane. */
3644 case Iop_MullEven16Ux8:
3645 case Iop_MullEven16Sx8: {
3646 IRAtom* at;
3647 at = binary16Ix8(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00003648 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
3649 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
sewardj43d60752005-11-10 18:13:01 +00003650 return at;
3651 }
3652
3653 /* Same deal as Iop_MullEven16{S,U}x8 */
3654 case Iop_MullEven8Ux16:
3655 case Iop_MullEven8Sx16: {
3656 IRAtom* at;
3657 at = binary8Ix16(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00003658 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
3659 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
sewardj43d60752005-11-10 18:13:01 +00003660 return at;
3661 }
3662
carll62770672013-10-01 15:50:09 +00003663 /* Same deal as Iop_MullEven16{S,U}x8 */
3664 case Iop_MullEven32Ux4:
3665 case Iop_MullEven32Sx4: {
3666 IRAtom* at;
3667 at = binary32Ix4(mce,vatom1,vatom2);
3668 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN64x2, at, mkU8(32)));
3669 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN64x2, at, mkU8(32)));
3670 return at;
3671 }
3672
sewardj43d60752005-11-10 18:13:01 +00003673 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
3674 32x4 -> 16x8 laneage, discarding the upper half of each lane.
3675 Simply apply same op to the V bits, since this really no more
3676 than a data steering operation. */
sewardj7ee7d852011-06-16 11:37:21 +00003677 case Iop_NarrowBin32to16x8:
3678 case Iop_NarrowBin16to8x16:
carlldfbf2942013-08-12 18:04:22 +00003679 case Iop_NarrowBin64to32x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00003680 return assignNew('V', mce, Ity_V128,
3681 binop(op, vatom1, vatom2));
sewardj43d60752005-11-10 18:13:01 +00003682
3683 case Iop_ShrV128:
3684 case Iop_ShlV128:
3685 /* Same scheme as with all other shifts. Note: 10 Nov 05:
3686 this is wrong now, scalar shifts are done properly lazily.
3687 Vector shifts should be fixed too. */
sewardjb9e6d242013-05-11 13:42:08 +00003688 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00003689 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardj43d60752005-11-10 18:13:01 +00003690
carll24e40de2013-10-15 18:13:21 +00003691 /* SHA Iops */
3692 case Iop_SHA256:
3693 case Iop_SHA512:
3694 complainIfUndefined(mce, atom2, NULL);
3695 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
3696
sewardj69a13322005-04-23 01:14:51 +00003697 /* I128-bit data-steering */
3698 case Iop_64HLto128:
sewardj7cf4e6b2008-05-01 20:24:26 +00003699 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
sewardj69a13322005-04-23 01:14:51 +00003700
sewardj350e8f72012-06-25 07:52:15 +00003701 /* V256-bit SIMD */
3702
sewardj350e8f72012-06-25 07:52:15 +00003703 case Iop_Max64Fx4:
3704 case Iop_Min64Fx4:
3705 return binary64Fx4(mce, vatom1, vatom2);
3706
sewardj350e8f72012-06-25 07:52:15 +00003707 case Iop_Max32Fx8:
3708 case Iop_Min32Fx8:
3709 return binary32Fx8(mce, vatom1, vatom2);
3710
3711 /* V256-bit data-steering */
3712 case Iop_V128HLtoV256:
3713 return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2));
3714
sewardj3245c912004-12-10 14:58:26 +00003715 /* Scalar floating point */
3716
sewardjb5b87402011-03-07 16:05:35 +00003717 case Iop_F32toI64S:
florian1b9609a2012-09-01 00:15:45 +00003718 case Iop_F32toI64U:
sewardjb5b87402011-03-07 16:05:35 +00003719 /* I32(rm) x F32 -> I64 */
3720 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3721
3722 case Iop_I64StoF32:
3723 /* I32(rm) x I64 -> F32 */
3724 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3725
sewardjed69fdb2006-02-03 16:12:27 +00003726 case Iop_RoundF64toInt:
3727 case Iop_RoundF64toF32:
sewardj06f96d02009-12-31 19:24:12 +00003728 case Iop_F64toI64S:
sewardja201c452011-07-24 14:15:54 +00003729 case Iop_F64toI64U:
sewardj06f96d02009-12-31 19:24:12 +00003730 case Iop_I64StoF64:
sewardjf34eb492011-04-15 11:57:05 +00003731 case Iop_I64UtoF64:
sewardj22ac5f42006-02-03 22:55:04 +00003732 case Iop_SinF64:
3733 case Iop_CosF64:
3734 case Iop_TanF64:
3735 case Iop_2xm1F64:
3736 case Iop_SqrtF64:
sewardj14350762015-02-24 12:24:35 +00003737 case Iop_RecpExpF64:
sewardj22ac5f42006-02-03 22:55:04 +00003738 /* I32(rm) x I64/F64 -> I64/F64 */
sewardj95448072004-11-22 20:19:51 +00003739 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3740
sewardjea8b02f2012-04-12 17:28:57 +00003741 case Iop_ShlD64:
3742 case Iop_ShrD64:
sewardj18c72fa2012-04-23 11:22:05 +00003743 case Iop_RoundD64toInt:
florian054684f2013-06-06 21:21:46 +00003744 /* I32(rm) x D64 -> D64 */
sewardjea8b02f2012-04-12 17:28:57 +00003745 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3746
3747 case Iop_ShlD128:
3748 case Iop_ShrD128:
sewardj18c72fa2012-04-23 11:22:05 +00003749 case Iop_RoundD128toInt:
florian054684f2013-06-06 21:21:46 +00003750 /* I32(rm) x D128 -> D128 */
sewardjea8b02f2012-04-12 17:28:57 +00003751 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3752
3753 case Iop_D64toI64S:
florian53eb2a02013-01-12 22:04:00 +00003754 case Iop_D64toI64U:
sewardjea8b02f2012-04-12 17:28:57 +00003755 case Iop_I64StoD64:
florian53eb2a02013-01-12 22:04:00 +00003756 case Iop_I64UtoD64:
florian054684f2013-06-06 21:21:46 +00003757 /* I32(rm) x I64/D64 -> D64/I64 */
sewardjea8b02f2012-04-12 17:28:57 +00003758 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3759
florianba5693c2013-06-17 19:04:24 +00003760 case Iop_F32toD32:
3761 case Iop_F64toD32:
3762 case Iop_F128toD32:
3763 case Iop_D32toF32:
3764 case Iop_D64toF32:
3765 case Iop_D128toF32:
3766 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D32/F32 */
3767 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3768
3769 case Iop_F32toD64:
florian39b08d82013-05-05 15:05:42 +00003770 case Iop_F64toD64:
florianba5693c2013-06-17 19:04:24 +00003771 case Iop_F128toD64:
3772 case Iop_D32toF64:
florian39b08d82013-05-05 15:05:42 +00003773 case Iop_D64toF64:
florian39b08d82013-05-05 15:05:42 +00003774 case Iop_D128toF64:
florianba5693c2013-06-17 19:04:24 +00003775 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D64/F64 */
florian39b08d82013-05-05 15:05:42 +00003776 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3777
florianba5693c2013-06-17 19:04:24 +00003778 case Iop_F32toD128:
3779 case Iop_F64toD128:
florian39b08d82013-05-05 15:05:42 +00003780 case Iop_F128toD128:
florianba5693c2013-06-17 19:04:24 +00003781 case Iop_D32toF128:
3782 case Iop_D64toF128:
florian39b08d82013-05-05 15:05:42 +00003783 case Iop_D128toF128:
florianba5693c2013-06-17 19:04:24 +00003784 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D128/F128 */
florian39b08d82013-05-05 15:05:42 +00003785 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3786
sewardjd376a762010-06-27 09:08:54 +00003787 case Iop_RoundF32toInt:
sewardjaec1be32010-01-03 22:29:32 +00003788 case Iop_SqrtF32:
sewardj14350762015-02-24 12:24:35 +00003789 case Iop_RecpExpF32:
sewardjaec1be32010-01-03 22:29:32 +00003790 /* I32(rm) x I32/F32 -> I32/F32 */
3791 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3792
sewardjb5b87402011-03-07 16:05:35 +00003793 case Iop_SqrtF128:
3794 /* I32(rm) x F128 -> F128 */
3795 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3796
3797 case Iop_I32StoF32:
florian1b9609a2012-09-01 00:15:45 +00003798 case Iop_I32UtoF32:
sewardjb5b87402011-03-07 16:05:35 +00003799 case Iop_F32toI32S:
florian1b9609a2012-09-01 00:15:45 +00003800 case Iop_F32toI32U:
sewardjb5b87402011-03-07 16:05:35 +00003801 /* First arg is I32 (rounding mode), second is F32/I32 (data). */
3802 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3803
3804 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */
florian1b9609a2012-09-01 00:15:45 +00003805 case Iop_F128toI32U: /* IRRoundingMode(I32) x F128 -> unsigned I32 */
sewardjb5b87402011-03-07 16:05:35 +00003806 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */
florian733b4db2013-06-06 19:13:29 +00003807 case Iop_D128toI32S: /* IRRoundingMode(I32) x D128 -> signed I32 */
3808 case Iop_D128toI32U: /* IRRoundingMode(I32) x D128 -> unsigned I32 */
sewardjb5b87402011-03-07 16:05:35 +00003809 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3810
3811 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */
florian1b9609a2012-09-01 00:15:45 +00003812 case Iop_F128toI64U: /* IRRoundingMode(I32) x F128 -> unsigned I64 */
sewardjb5b87402011-03-07 16:05:35 +00003813 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */
florian733b4db2013-06-06 19:13:29 +00003814 case Iop_D128toD64: /* IRRoundingMode(I64) x D128 -> D64 */
3815 case Iop_D128toI64S: /* IRRoundingMode(I64) x D128 -> signed I64 */
3816 case Iop_D128toI64U: /* IRRoundingMode(I32) x D128 -> unsigned I64 */
sewardjb5b87402011-03-07 16:05:35 +00003817 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3818
3819 case Iop_F64HLtoF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00003820 case Iop_D64HLtoD128:
sewardj350e8f72012-06-25 07:52:15 +00003821 return assignNew('V', mce, Ity_I128,
3822 binop(Iop_64HLto128, vatom1, vatom2));
sewardjb5b87402011-03-07 16:05:35 +00003823
sewardj59570ff2010-01-01 11:59:33 +00003824 case Iop_F64toI32U:
sewardj06f96d02009-12-31 19:24:12 +00003825 case Iop_F64toI32S:
sewardje9e16d32004-12-10 13:17:55 +00003826 case Iop_F64toF32:
sewardjf34eb492011-04-15 11:57:05 +00003827 case Iop_I64UtoF32:
florian53eb2a02013-01-12 22:04:00 +00003828 case Iop_D64toI32U:
3829 case Iop_D64toI32S:
3830 /* First arg is I32 (rounding mode), second is F64/D64 (data). */
sewardj95448072004-11-22 20:19:51 +00003831 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3832
sewardjea8b02f2012-04-12 17:28:57 +00003833 case Iop_D64toD32:
florian054684f2013-06-06 21:21:46 +00003834 /* First arg is I32 (rounding mode), second is D64 (data). */
florianf4bed372012-12-21 04:25:10 +00003835 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
sewardjea8b02f2012-04-12 17:28:57 +00003836
sewardj06f96d02009-12-31 19:24:12 +00003837 case Iop_F64toI16S:
sewardj95448072004-11-22 20:19:51 +00003838 /* First arg is I32 (rounding mode), second is F64 (data). */
3839 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3840
sewardj18c72fa2012-04-23 11:22:05 +00003841 case Iop_InsertExpD64:
3842 /* I64 x I64 -> D64 */
3843 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3844
3845 case Iop_InsertExpD128:
3846 /* I64 x I128 -> D128 */
3847 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3848
sewardjb5b87402011-03-07 16:05:35 +00003849 case Iop_CmpF32:
sewardj95448072004-11-22 20:19:51 +00003850 case Iop_CmpF64:
sewardjb5b87402011-03-07 16:05:35 +00003851 case Iop_CmpF128:
sewardj18c72fa2012-04-23 11:22:05 +00003852 case Iop_CmpD64:
3853 case Iop_CmpD128:
florian29a36b92012-12-26 17:48:46 +00003854 case Iop_CmpExpD64:
3855 case Iop_CmpExpD128:
sewardj95448072004-11-22 20:19:51 +00003856 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3857
3858 /* non-FP after here */
3859
3860 case Iop_DivModU64to32:
3861 case Iop_DivModS64to32:
3862 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3863
sewardj69a13322005-04-23 01:14:51 +00003864 case Iop_DivModU128to64:
3865 case Iop_DivModS128to64:
3866 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3867
florian537ed2d2012-08-20 16:51:39 +00003868 case Iop_8HLto16:
3869 return assignNew('V', mce, Ity_I16, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003870 case Iop_16HLto32:
sewardj7cf4e6b2008-05-01 20:24:26 +00003871 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003872 case Iop_32HLto64:
sewardj7cf4e6b2008-05-01 20:24:26 +00003873 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003874
sewardjb5b87402011-03-07 16:05:35 +00003875 case Iop_DivModS64to64:
sewardj6cf40ff2005-04-20 22:31:26 +00003876 case Iop_MullS64:
3877 case Iop_MullU64: {
3878 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3879 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
sewardj350e8f72012-06-25 07:52:15 +00003880 return assignNew('V', mce, Ity_I128,
3881 binop(Iop_64HLto128, vHi64, vLo64));
sewardj6cf40ff2005-04-20 22:31:26 +00003882 }
3883
sewardj95448072004-11-22 20:19:51 +00003884 case Iop_MullS32:
3885 case Iop_MullU32: {
3886 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3887 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
sewardj350e8f72012-06-25 07:52:15 +00003888 return assignNew('V', mce, Ity_I64,
3889 binop(Iop_32HLto64, vHi32, vLo32));
sewardj95448072004-11-22 20:19:51 +00003890 }
3891
3892 case Iop_MullS16:
3893 case Iop_MullU16: {
3894 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3895 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
sewardj350e8f72012-06-25 07:52:15 +00003896 return assignNew('V', mce, Ity_I32,
3897 binop(Iop_16HLto32, vHi16, vLo16));
sewardj95448072004-11-22 20:19:51 +00003898 }
3899
3900 case Iop_MullS8:
3901 case Iop_MullU8: {
3902 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3903 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
sewardj7cf4e6b2008-05-01 20:24:26 +00003904 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
sewardj95448072004-11-22 20:19:51 +00003905 }
3906
sewardj5af05062010-10-18 16:31:14 +00003907 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */
cerion9e591082005-06-23 15:28:34 +00003908 case Iop_DivS32:
3909 case Iop_DivU32:
sewardja201c452011-07-24 14:15:54 +00003910 case Iop_DivU32E:
sewardj169ac042011-09-05 12:12:34 +00003911 case Iop_DivS32E:
sewardj2157b2c2012-07-11 13:20:58 +00003912 case Iop_QAdd32S: /* could probably do better */
3913 case Iop_QSub32S: /* could probably do better */
cerion9e591082005-06-23 15:28:34 +00003914 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3915
sewardjb00944a2005-12-23 12:47:16 +00003916 case Iop_DivS64:
3917 case Iop_DivU64:
sewardja201c452011-07-24 14:15:54 +00003918 case Iop_DivS64E:
sewardj169ac042011-09-05 12:12:34 +00003919 case Iop_DivU64E:
sewardjb00944a2005-12-23 12:47:16 +00003920 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3921
sewardj95448072004-11-22 20:19:51 +00003922 case Iop_Add32:
sewardj54eac252012-03-27 10:19:39 +00003923 if (mce->bogusLiterals || mce->useLLVMworkarounds)
sewardjd5204dc2004-12-31 01:16:11 +00003924 return expensiveAddSub(mce,True,Ity_I32,
3925 vatom1,vatom2, atom1,atom2);
3926 else
3927 goto cheap_AddSub32;
sewardj95448072004-11-22 20:19:51 +00003928 case Iop_Sub32:
sewardjd5204dc2004-12-31 01:16:11 +00003929 if (mce->bogusLiterals)
3930 return expensiveAddSub(mce,False,Ity_I32,
3931 vatom1,vatom2, atom1,atom2);
3932 else
3933 goto cheap_AddSub32;
3934
3935 cheap_AddSub32:
sewardj95448072004-11-22 20:19:51 +00003936 case Iop_Mul32:
sewardj992dff92005-10-07 11:08:55 +00003937 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3938
sewardj463b3d92005-07-18 11:41:15 +00003939 case Iop_CmpORD32S:
3940 case Iop_CmpORD32U:
sewardj1bc82102005-12-23 00:16:24 +00003941 case Iop_CmpORD64S:
3942 case Iop_CmpORD64U:
3943 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
sewardj95448072004-11-22 20:19:51 +00003944
sewardj681be302005-01-15 20:43:58 +00003945 case Iop_Add64:
sewardj54eac252012-03-27 10:19:39 +00003946 if (mce->bogusLiterals || mce->useLLVMworkarounds)
tomd9774d72005-06-27 08:11:01 +00003947 return expensiveAddSub(mce,True,Ity_I64,
3948 vatom1,vatom2, atom1,atom2);
3949 else
3950 goto cheap_AddSub64;
sewardj681be302005-01-15 20:43:58 +00003951 case Iop_Sub64:
tomd9774d72005-06-27 08:11:01 +00003952 if (mce->bogusLiterals)
3953 return expensiveAddSub(mce,False,Ity_I64,
3954 vatom1,vatom2, atom1,atom2);
3955 else
3956 goto cheap_AddSub64;
3957
3958 cheap_AddSub64:
3959 case Iop_Mul64:
sewardj681be302005-01-15 20:43:58 +00003960 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3961
sewardj95448072004-11-22 20:19:51 +00003962 case Iop_Mul16:
3963 case Iop_Add16:
3964 case Iop_Sub16:
3965 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3966
florian537ed2d2012-08-20 16:51:39 +00003967 case Iop_Mul8:
sewardj95448072004-11-22 20:19:51 +00003968 case Iop_Sub8:
3969 case Iop_Add8:
3970 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3971
sewardj69a13322005-04-23 01:14:51 +00003972 case Iop_CmpEQ64:
sewardje6f8af42005-07-06 18:48:59 +00003973 case Iop_CmpNE64:
sewardj69a13322005-04-23 01:14:51 +00003974 if (mce->bogusLiterals)
sewardj4cfa81b2012-11-08 10:58:16 +00003975 goto expensive_cmp64;
sewardj69a13322005-04-23 01:14:51 +00003976 else
3977 goto cheap_cmp64;
sewardj4cfa81b2012-11-08 10:58:16 +00003978
3979 expensive_cmp64:
3980 case Iop_ExpCmpNE64:
3981 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
3982
sewardj69a13322005-04-23 01:14:51 +00003983 cheap_cmp64:
tomcd986332005-04-26 07:44:48 +00003984 case Iop_CmpLE64S: case Iop_CmpLE64U:
3985 case Iop_CmpLT64U: case Iop_CmpLT64S:
sewardj69a13322005-04-23 01:14:51 +00003986 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
3987
sewardjd5204dc2004-12-31 01:16:11 +00003988 case Iop_CmpEQ32:
sewardje6f8af42005-07-06 18:48:59 +00003989 case Iop_CmpNE32:
sewardjd5204dc2004-12-31 01:16:11 +00003990 if (mce->bogusLiterals)
sewardj4cfa81b2012-11-08 10:58:16 +00003991 goto expensive_cmp32;
sewardjd5204dc2004-12-31 01:16:11 +00003992 else
3993 goto cheap_cmp32;
sewardj4cfa81b2012-11-08 10:58:16 +00003994
3995 expensive_cmp32:
3996 case Iop_ExpCmpNE32:
3997 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
3998
sewardjd5204dc2004-12-31 01:16:11 +00003999 cheap_cmp32:
sewardj95448072004-11-22 20:19:51 +00004000 case Iop_CmpLE32S: case Iop_CmpLE32U:
4001 case Iop_CmpLT32U: case Iop_CmpLT32S:
sewardj95448072004-11-22 20:19:51 +00004002 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
4003
4004 case Iop_CmpEQ16: case Iop_CmpNE16:
4005 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
4006
sewardj4cfa81b2012-11-08 10:58:16 +00004007 case Iop_ExpCmpNE16:
4008 return expensiveCmpEQorNE(mce,Ity_I16, vatom1,vatom2, atom1,atom2 );
4009
sewardj95448072004-11-22 20:19:51 +00004010 case Iop_CmpEQ8: case Iop_CmpNE8:
4011 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
4012
sewardjafed4c52009-07-12 13:00:17 +00004013 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
4014 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
4015 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
4016 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
4017 /* Just say these all produce a defined result, regardless
4018 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
4019 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
4020
sewardjaaddbc22005-10-07 09:49:53 +00004021 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
4022 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
4023
sewardj95448072004-11-22 20:19:51 +00004024 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
sewardjaaddbc22005-10-07 09:49:53 +00004025 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00004026
sewardjdb67f5f2004-12-14 01:15:31 +00004027 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
sewardjaaddbc22005-10-07 09:49:53 +00004028 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00004029
florian537ed2d2012-08-20 16:51:39 +00004030 case Iop_Shl8: case Iop_Shr8: case Iop_Sar8:
sewardjaaddbc22005-10-07 09:49:53 +00004031 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00004032
sewardj350e8f72012-06-25 07:52:15 +00004033 case Iop_AndV256:
4034 uifu = mkUifUV256; difd = mkDifDV256;
4035 and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or;
sewardj20d38f22005-02-07 23:50:18 +00004036 case Iop_AndV128:
4037 uifu = mkUifUV128; difd = mkDifDV128;
4038 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00004039 case Iop_And64:
4040 uifu = mkUifU64; difd = mkDifD64;
4041 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00004042 case Iop_And32:
4043 uifu = mkUifU32; difd = mkDifD32;
4044 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
4045 case Iop_And16:
4046 uifu = mkUifU16; difd = mkDifD16;
4047 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
4048 case Iop_And8:
4049 uifu = mkUifU8; difd = mkDifD8;
4050 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
4051
sewardj350e8f72012-06-25 07:52:15 +00004052 case Iop_OrV256:
4053 uifu = mkUifUV256; difd = mkDifDV256;
4054 and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or;
sewardj20d38f22005-02-07 23:50:18 +00004055 case Iop_OrV128:
4056 uifu = mkUifUV128; difd = mkDifDV128;
4057 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00004058 case Iop_Or64:
4059 uifu = mkUifU64; difd = mkDifD64;
4060 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00004061 case Iop_Or32:
4062 uifu = mkUifU32; difd = mkDifD32;
4063 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
4064 case Iop_Or16:
4065 uifu = mkUifU16; difd = mkDifD16;
4066 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
4067 case Iop_Or8:
4068 uifu = mkUifU8; difd = mkDifD8;
4069 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
4070
4071 do_And_Or:
4072 return
4073 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00004074 'V', mce,
sewardj95448072004-11-22 20:19:51 +00004075 and_or_ty,
4076 difd(mce, uifu(mce, vatom1, vatom2),
4077 difd(mce, improve(mce, atom1, vatom1),
4078 improve(mce, atom2, vatom2) ) ) );
4079
4080 case Iop_Xor8:
4081 return mkUifU8(mce, vatom1, vatom2);
4082 case Iop_Xor16:
4083 return mkUifU16(mce, vatom1, vatom2);
4084 case Iop_Xor32:
4085 return mkUifU32(mce, vatom1, vatom2);
sewardj7010f6e2004-12-10 13:35:22 +00004086 case Iop_Xor64:
4087 return mkUifU64(mce, vatom1, vatom2);
sewardj20d38f22005-02-07 23:50:18 +00004088 case Iop_XorV128:
4089 return mkUifUV128(mce, vatom1, vatom2);
sewardj350e8f72012-06-25 07:52:15 +00004090 case Iop_XorV256:
4091 return mkUifUV256(mce, vatom1, vatom2);
njn25e49d8e72002-09-23 09:36:25 +00004092
sewardja2f30952013-03-27 11:40:02 +00004093 /* V256-bit SIMD */
4094
4095 case Iop_ShrN16x16:
4096 case Iop_ShrN32x8:
4097 case Iop_ShrN64x4:
4098 case Iop_SarN16x16:
4099 case Iop_SarN32x8:
4100 case Iop_ShlN16x16:
4101 case Iop_ShlN32x8:
4102 case Iop_ShlN64x4:
4103 /* Same scheme as with all other shifts. Note: 22 Oct 05:
4104 this is wrong now, scalar shifts are done properly lazily.
4105 Vector shifts should be fixed too. */
sewardjb9e6d242013-05-11 13:42:08 +00004106 complainIfUndefined(mce, atom2, NULL);
sewardja2f30952013-03-27 11:40:02 +00004107 return assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2));
4108
4109 case Iop_QSub8Ux32:
4110 case Iop_QSub8Sx32:
4111 case Iop_Sub8x32:
4112 case Iop_Min8Ux32:
4113 case Iop_Min8Sx32:
4114 case Iop_Max8Ux32:
4115 case Iop_Max8Sx32:
4116 case Iop_CmpGT8Sx32:
4117 case Iop_CmpEQ8x32:
4118 case Iop_Avg8Ux32:
4119 case Iop_QAdd8Ux32:
4120 case Iop_QAdd8Sx32:
4121 case Iop_Add8x32:
4122 return binary8Ix32(mce, vatom1, vatom2);
4123
4124 case Iop_QSub16Ux16:
4125 case Iop_QSub16Sx16:
4126 case Iop_Sub16x16:
4127 case Iop_Mul16x16:
4128 case Iop_MulHi16Sx16:
4129 case Iop_MulHi16Ux16:
4130 case Iop_Min16Sx16:
4131 case Iop_Min16Ux16:
4132 case Iop_Max16Sx16:
4133 case Iop_Max16Ux16:
4134 case Iop_CmpGT16Sx16:
4135 case Iop_CmpEQ16x16:
4136 case Iop_Avg16Ux16:
4137 case Iop_QAdd16Ux16:
4138 case Iop_QAdd16Sx16:
4139 case Iop_Add16x16:
4140 return binary16Ix16(mce, vatom1, vatom2);
4141
4142 case Iop_Sub32x8:
4143 case Iop_CmpGT32Sx8:
4144 case Iop_CmpEQ32x8:
4145 case Iop_Add32x8:
4146 case Iop_Max32Ux8:
4147 case Iop_Max32Sx8:
4148 case Iop_Min32Ux8:
4149 case Iop_Min32Sx8:
4150 case Iop_Mul32x8:
4151 return binary32Ix8(mce, vatom1, vatom2);
4152
4153 case Iop_Sub64x4:
4154 case Iop_Add64x4:
4155 case Iop_CmpEQ64x4:
4156 case Iop_CmpGT64Sx4:
4157 return binary64Ix4(mce, vatom1, vatom2);
4158
4159 /* Perm32x8: rearrange values in left arg using steering values
4160 from right arg. So rearrange the vbits in the same way but
4161 pessimise wrt steering values. */
4162 case Iop_Perm32x8:
4163 return mkUifUV256(
4164 mce,
4165 assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2)),
4166 mkPCast32x8(mce, vatom2)
4167 );
4168
sewardjbfd03f82014-08-26 18:35:13 +00004169 /* Q-and-Qshift-by-vector of the form (V128, V128) -> V256.
4170 Handle the shifted results in the same way that other
4171 binary Q ops are handled, eg QSub: UifU the two args,
4172 then pessimise -- which is binaryNIxM. But for the upper
4173 V128, we require to generate just 1 bit which is the
4174 pessimised shift result, with 127 defined zeroes above it.
4175
4176 Note that this overly pessimistic in that in fact only the
4177 bottom 8 bits of each lane of the second arg determine the shift
4178 amount. Really we ought to ignore any undefinedness in the
4179 rest of the lanes of the second arg. */
4180 case Iop_QandSQsh64x2: case Iop_QandUQsh64x2:
4181 case Iop_QandSQRsh64x2: case Iop_QandUQRsh64x2:
4182 case Iop_QandSQsh32x4: case Iop_QandUQsh32x4:
4183 case Iop_QandSQRsh32x4: case Iop_QandUQRsh32x4:
4184 case Iop_QandSQsh16x8: case Iop_QandUQsh16x8:
4185 case Iop_QandSQRsh16x8: case Iop_QandUQRsh16x8:
4186 case Iop_QandSQsh8x16: case Iop_QandUQsh8x16:
4187 case Iop_QandSQRsh8x16: case Iop_QandUQRsh8x16:
4188 {
4189 // The function to generate the pessimised shift result
4190 IRAtom* (*binaryNIxM)(MCEnv*,IRAtom*,IRAtom*) = NULL;
4191 switch (op) {
4192 case Iop_QandSQsh64x2:
4193 case Iop_QandUQsh64x2:
4194 case Iop_QandSQRsh64x2:
4195 case Iop_QandUQRsh64x2:
4196 binaryNIxM = binary64Ix2;
4197 break;
4198 case Iop_QandSQsh32x4:
4199 case Iop_QandUQsh32x4:
4200 case Iop_QandSQRsh32x4:
4201 case Iop_QandUQRsh32x4:
4202 binaryNIxM = binary32Ix4;
4203 break;
4204 case Iop_QandSQsh16x8:
4205 case Iop_QandUQsh16x8:
4206 case Iop_QandSQRsh16x8:
4207 case Iop_QandUQRsh16x8:
4208 binaryNIxM = binary16Ix8;
4209 break;
4210 case Iop_QandSQsh8x16:
4211 case Iop_QandUQsh8x16:
4212 case Iop_QandSQRsh8x16:
4213 case Iop_QandUQRsh8x16:
4214 binaryNIxM = binary8Ix16;
4215 break;
4216 default:
4217 tl_assert(0);
4218 }
4219 tl_assert(binaryNIxM);
4220 // Pessimised shift result, shV[127:0]
4221 IRAtom* shV = binaryNIxM(mce, vatom1, vatom2);
4222 // Generates: Def--(127)--Def PCast-to-I1(shV)
4223 IRAtom* qV = mkPCastXXtoXXlsb(mce, shV, Ity_V128);
4224 // and assemble the result
4225 return assignNew('V', mce, Ity_V256,
4226 binop(Iop_V128HLtoV256, qV, shV));
4227 }
4228
njn25e49d8e72002-09-23 09:36:25 +00004229 default:
sewardj95448072004-11-22 20:19:51 +00004230 ppIROp(op);
4231 VG_(tool_panic)("memcheck:expr2vbits_Binop");
njn25e49d8e72002-09-23 09:36:25 +00004232 }
njn25e49d8e72002-09-23 09:36:25 +00004233}
4234
njn25e49d8e72002-09-23 09:36:25 +00004235
sewardj95448072004-11-22 20:19:51 +00004236static
4237IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
4238{
sewardjcafe5052013-01-17 14:24:35 +00004239 /* For the widening operations {8,16,32}{U,S}to{16,32,64}, the
4240 selection of shadow operation implicitly duplicates the logic in
4241 do_shadow_LoadG and should be kept in sync (in the very unlikely
4242 event that the interpretation of such widening ops changes in
4243 future). See comment in do_shadow_LoadG. */
sewardj95448072004-11-22 20:19:51 +00004244 IRAtom* vatom = expr2vbits( mce, atom );
4245 tl_assert(isOriginalAtom(mce,atom));
4246 switch (op) {
4247
sewardj0b070592004-12-10 21:44:22 +00004248 case Iop_Sqrt64Fx2:
sewardjc46e6cc2014-03-10 10:42:36 +00004249 case Iop_Abs64Fx2:
4250 case Iop_Neg64Fx2:
sewardj14350762015-02-24 12:24:35 +00004251 case Iop_RSqrtEst64Fx2:
4252 case Iop_RecipEst64Fx2:
sewardj0b070592004-12-10 21:44:22 +00004253 return unary64Fx2(mce, vatom);
4254
4255 case Iop_Sqrt64F0x2:
4256 return unary64F0x2(mce, vatom);
4257
sewardj350e8f72012-06-25 07:52:15 +00004258 case Iop_Sqrt32Fx8:
sewardjee6bb772014-08-24 14:02:22 +00004259 case Iop_RSqrtEst32Fx8:
4260 case Iop_RecipEst32Fx8:
sewardj350e8f72012-06-25 07:52:15 +00004261 return unary32Fx8(mce, vatom);
4262
4263 case Iop_Sqrt64Fx4:
4264 return unary64Fx4(mce, vatom);
4265
sewardj170ee212004-12-10 18:57:51 +00004266 case Iop_Sqrt32Fx4:
sewardjee6bb772014-08-24 14:02:22 +00004267 case Iop_RecipEst32Fx4:
cerion176cb4c2005-11-16 17:21:49 +00004268 case Iop_I32UtoFx4:
4269 case Iop_I32StoFx4:
4270 case Iop_QFtoI32Ux4_RZ:
4271 case Iop_QFtoI32Sx4_RZ:
4272 case Iop_RoundF32x4_RM:
4273 case Iop_RoundF32x4_RP:
4274 case Iop_RoundF32x4_RN:
4275 case Iop_RoundF32x4_RZ:
sewardjee6bb772014-08-24 14:02:22 +00004276 case Iop_RecipEst32Ux4:
sewardj57f92b02010-08-22 11:54:14 +00004277 case Iop_Abs32Fx4:
4278 case Iop_Neg32Fx4:
sewardjee6bb772014-08-24 14:02:22 +00004279 case Iop_RSqrtEst32Fx4:
sewardj170ee212004-12-10 18:57:51 +00004280 return unary32Fx4(mce, vatom);
4281
sewardj57f92b02010-08-22 11:54:14 +00004282 case Iop_I32UtoFx2:
4283 case Iop_I32StoFx2:
sewardjee6bb772014-08-24 14:02:22 +00004284 case Iop_RecipEst32Fx2:
4285 case Iop_RecipEst32Ux2:
sewardj57f92b02010-08-22 11:54:14 +00004286 case Iop_Abs32Fx2:
4287 case Iop_Neg32Fx2:
sewardjee6bb772014-08-24 14:02:22 +00004288 case Iop_RSqrtEst32Fx2:
sewardj57f92b02010-08-22 11:54:14 +00004289 return unary32Fx2(mce, vatom);
4290
sewardj170ee212004-12-10 18:57:51 +00004291 case Iop_Sqrt32F0x4:
sewardjee6bb772014-08-24 14:02:22 +00004292 case Iop_RSqrtEst32F0x4:
4293 case Iop_RecipEst32F0x4:
sewardj170ee212004-12-10 18:57:51 +00004294 return unary32F0x4(mce, vatom);
4295
sewardj20d38f22005-02-07 23:50:18 +00004296 case Iop_32UtoV128:
4297 case Iop_64UtoV128:
sewardj620eb5b2005-10-22 12:50:43 +00004298 case Iop_Dup8x16:
4299 case Iop_Dup16x8:
4300 case Iop_Dup32x4:
sewardjbfd03f82014-08-26 18:35:13 +00004301 case Iop_Reverse1sIn8_x16:
sewardj55404922014-06-26 10:51:03 +00004302 case Iop_Reverse8sIn16_x8:
4303 case Iop_Reverse8sIn32_x4:
4304 case Iop_Reverse16sIn32_x4:
4305 case Iop_Reverse8sIn64_x2:
4306 case Iop_Reverse16sIn64_x2:
4307 case Iop_Reverse32sIn64_x2:
sewardj350e8f72012-06-25 07:52:15 +00004308 case Iop_V256toV128_1: case Iop_V256toV128_0:
sewardjc46e6cc2014-03-10 10:42:36 +00004309 case Iop_ZeroHI64ofV128:
4310 case Iop_ZeroHI96ofV128:
4311 case Iop_ZeroHI112ofV128:
4312 case Iop_ZeroHI120ofV128:
sewardj7cf4e6b2008-05-01 20:24:26 +00004313 return assignNew('V', mce, Ity_V128, unop(op, vatom));
sewardj170ee212004-12-10 18:57:51 +00004314
sewardjb5b87402011-03-07 16:05:35 +00004315 case Iop_F128HItoF64: /* F128 -> high half of F128 */
sewardjb0ccb4d2012-04-02 10:22:05 +00004316 case Iop_D128HItoD64: /* D128 -> high half of D128 */
sewardjb5b87402011-03-07 16:05:35 +00004317 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
4318 case Iop_F128LOtoF64: /* F128 -> low half of F128 */
sewardjb0ccb4d2012-04-02 10:22:05 +00004319 case Iop_D128LOtoD64: /* D128 -> low half of D128 */
sewardjb5b87402011-03-07 16:05:35 +00004320 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
4321
4322 case Iop_NegF128:
4323 case Iop_AbsF128:
4324 return mkPCastTo(mce, Ity_I128, vatom);
4325
4326 case Iop_I32StoF128: /* signed I32 -> F128 */
4327 case Iop_I64StoF128: /* signed I64 -> F128 */
florian1b9609a2012-09-01 00:15:45 +00004328 case Iop_I32UtoF128: /* unsigned I32 -> F128 */
4329 case Iop_I64UtoF128: /* unsigned I64 -> F128 */
sewardjb5b87402011-03-07 16:05:35 +00004330 case Iop_F32toF128: /* F32 -> F128 */
4331 case Iop_F64toF128: /* F64 -> F128 */
florian53eb2a02013-01-12 22:04:00 +00004332 case Iop_I32StoD128: /* signed I64 -> D128 */
sewardjea8b02f2012-04-12 17:28:57 +00004333 case Iop_I64StoD128: /* signed I64 -> D128 */
florian53eb2a02013-01-12 22:04:00 +00004334 case Iop_I32UtoD128: /* unsigned I32 -> D128 */
4335 case Iop_I64UtoD128: /* unsigned I64 -> D128 */
sewardjb5b87402011-03-07 16:05:35 +00004336 return mkPCastTo(mce, Ity_I128, vatom);
4337
sewardj95448072004-11-22 20:19:51 +00004338 case Iop_F32toF64:
sewardj06f96d02009-12-31 19:24:12 +00004339 case Iop_I32StoF64:
sewardj59570ff2010-01-01 11:59:33 +00004340 case Iop_I32UtoF64:
sewardj95448072004-11-22 20:19:51 +00004341 case Iop_NegF64:
sewardj95448072004-11-22 20:19:51 +00004342 case Iop_AbsF64:
sewardjee6bb772014-08-24 14:02:22 +00004343 case Iop_RSqrtEst5GoodF64:
sewardjdead90a2008-08-08 08:38:23 +00004344 case Iop_RoundF64toF64_NEAREST:
4345 case Iop_RoundF64toF64_NegINF:
4346 case Iop_RoundF64toF64_PosINF:
4347 case Iop_RoundF64toF64_ZERO:
sewardj39cc7352005-06-09 21:31:55 +00004348 case Iop_Clz64:
sewardjea8b02f2012-04-12 17:28:57 +00004349 case Iop_D32toD64:
florian53eb2a02013-01-12 22:04:00 +00004350 case Iop_I32StoD64:
4351 case Iop_I32UtoD64:
sewardj18c72fa2012-04-23 11:22:05 +00004352 case Iop_ExtractExpD64: /* D64 -> I64 */
4353 case Iop_ExtractExpD128: /* D128 -> I64 */
florian974b4092012-12-27 20:06:18 +00004354 case Iop_ExtractSigD64: /* D64 -> I64 */
4355 case Iop_ExtractSigD128: /* D128 -> I64 */
florian1943eb52012-08-22 18:09:07 +00004356 case Iop_DPBtoBCD:
4357 case Iop_BCDtoDPB:
sewardj95448072004-11-22 20:19:51 +00004358 return mkPCastTo(mce, Ity_I64, vatom);
4359
sewardjea8b02f2012-04-12 17:28:57 +00004360 case Iop_D64toD128:
4361 return mkPCastTo(mce, Ity_I128, vatom);
4362
sewardj95448072004-11-22 20:19:51 +00004363 case Iop_Clz32:
sewardjed69fdb2006-02-03 16:12:27 +00004364 case Iop_TruncF64asF32:
sewardj59570ff2010-01-01 11:59:33 +00004365 case Iop_NegF32:
4366 case Iop_AbsF32:
sewardj95448072004-11-22 20:19:51 +00004367 return mkPCastTo(mce, Ity_I32, vatom);
4368
sewardj4cfa81b2012-11-08 10:58:16 +00004369 case Iop_Ctz32:
4370 case Iop_Ctz64:
4371 return expensiveCountTrailingZeroes(mce, op, atom, vatom);
4372
sewardjd9dbc192005-04-27 11:40:27 +00004373 case Iop_1Uto64:
sewardja201c452011-07-24 14:15:54 +00004374 case Iop_1Sto64:
sewardjd9dbc192005-04-27 11:40:27 +00004375 case Iop_8Uto64:
4376 case Iop_8Sto64:
4377 case Iop_16Uto64:
4378 case Iop_16Sto64:
sewardj95448072004-11-22 20:19:51 +00004379 case Iop_32Sto64:
4380 case Iop_32Uto64:
sewardj20d38f22005-02-07 23:50:18 +00004381 case Iop_V128to64:
4382 case Iop_V128HIto64:
sewardj6cf40ff2005-04-20 22:31:26 +00004383 case Iop_128HIto64:
4384 case Iop_128to64:
sewardj57f92b02010-08-22 11:54:14 +00004385 case Iop_Dup8x8:
4386 case Iop_Dup16x4:
4387 case Iop_Dup32x2:
sewardj55404922014-06-26 10:51:03 +00004388 case Iop_Reverse8sIn16_x4:
4389 case Iop_Reverse8sIn32_x2:
4390 case Iop_Reverse16sIn32_x2:
4391 case Iop_Reverse8sIn64_x1:
4392 case Iop_Reverse16sIn64_x1:
4393 case Iop_Reverse32sIn64_x1:
sewardj350e8f72012-06-25 07:52:15 +00004394 case Iop_V256to64_0: case Iop_V256to64_1:
4395 case Iop_V256to64_2: case Iop_V256to64_3:
sewardj7cf4e6b2008-05-01 20:24:26 +00004396 return assignNew('V', mce, Ity_I64, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00004397
4398 case Iop_64to32:
4399 case Iop_64HIto32:
4400 case Iop_1Uto32:
sewardj463b3d92005-07-18 11:41:15 +00004401 case Iop_1Sto32:
sewardj95448072004-11-22 20:19:51 +00004402 case Iop_8Uto32:
4403 case Iop_16Uto32:
4404 case Iop_16Sto32:
4405 case Iop_8Sto32:
cerionfafaa0d2005-09-12 22:29:38 +00004406 case Iop_V128to32:
sewardj7cf4e6b2008-05-01 20:24:26 +00004407 return assignNew('V', mce, Ity_I32, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00004408
4409 case Iop_8Sto16:
4410 case Iop_8Uto16:
4411 case Iop_32to16:
4412 case Iop_32HIto16:
sewardjd9dbc192005-04-27 11:40:27 +00004413 case Iop_64to16:
sewardjf5176342012-12-13 18:31:49 +00004414 case Iop_GetMSBs8x16:
sewardj7cf4e6b2008-05-01 20:24:26 +00004415 return assignNew('V', mce, Ity_I16, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00004416
4417 case Iop_1Uto8:
sewardja201c452011-07-24 14:15:54 +00004418 case Iop_1Sto8:
sewardj95448072004-11-22 20:19:51 +00004419 case Iop_16to8:
sewardj9a807e02006-12-17 14:20:31 +00004420 case Iop_16HIto8:
sewardj95448072004-11-22 20:19:51 +00004421 case Iop_32to8:
sewardjd9dbc192005-04-27 11:40:27 +00004422 case Iop_64to8:
sewardj4cfa81b2012-11-08 10:58:16 +00004423 case Iop_GetMSBs8x8:
sewardj7cf4e6b2008-05-01 20:24:26 +00004424 return assignNew('V', mce, Ity_I8, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00004425
4426 case Iop_32to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00004427 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
sewardj95448072004-11-22 20:19:51 +00004428
sewardjd9dbc192005-04-27 11:40:27 +00004429 case Iop_64to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00004430 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
sewardjd9dbc192005-04-27 11:40:27 +00004431
sewardj95448072004-11-22 20:19:51 +00004432 case Iop_ReinterpF64asI64:
4433 case Iop_ReinterpI64asF64:
sewardj0b070592004-12-10 21:44:22 +00004434 case Iop_ReinterpI32asF32:
sewardj59570ff2010-01-01 11:59:33 +00004435 case Iop_ReinterpF32asI32:
sewardj18c72fa2012-04-23 11:22:05 +00004436 case Iop_ReinterpI64asD64:
sewardj0892b822012-04-29 20:20:16 +00004437 case Iop_ReinterpD64asI64:
sewardj350e8f72012-06-25 07:52:15 +00004438 case Iop_NotV256:
sewardj20d38f22005-02-07 23:50:18 +00004439 case Iop_NotV128:
sewardj7010f6e2004-12-10 13:35:22 +00004440 case Iop_Not64:
sewardj95448072004-11-22 20:19:51 +00004441 case Iop_Not32:
4442 case Iop_Not16:
4443 case Iop_Not8:
4444 case Iop_Not1:
4445 return vatom;
sewardj7010f6e2004-12-10 13:35:22 +00004446
sewardj57f92b02010-08-22 11:54:14 +00004447 case Iop_CmpNEZ8x8:
4448 case Iop_Cnt8x8:
sewardj2e4d5af2014-06-26 08:22:01 +00004449 case Iop_Clz8x8:
4450 case Iop_Cls8x8:
sewardj57f92b02010-08-22 11:54:14 +00004451 case Iop_Abs8x8:
4452 return mkPCast8x8(mce, vatom);
4453
4454 case Iop_CmpNEZ8x16:
4455 case Iop_Cnt8x16:
sewardj2e4d5af2014-06-26 08:22:01 +00004456 case Iop_Clz8x16:
4457 case Iop_Cls8x16:
sewardj57f92b02010-08-22 11:54:14 +00004458 case Iop_Abs8x16:
4459 return mkPCast8x16(mce, vatom);
4460
4461 case Iop_CmpNEZ16x4:
sewardj2e4d5af2014-06-26 08:22:01 +00004462 case Iop_Clz16x4:
4463 case Iop_Cls16x4:
sewardj57f92b02010-08-22 11:54:14 +00004464 case Iop_Abs16x4:
4465 return mkPCast16x4(mce, vatom);
4466
4467 case Iop_CmpNEZ16x8:
sewardj2e4d5af2014-06-26 08:22:01 +00004468 case Iop_Clz16x8:
4469 case Iop_Cls16x8:
sewardj57f92b02010-08-22 11:54:14 +00004470 case Iop_Abs16x8:
4471 return mkPCast16x8(mce, vatom);
4472
4473 case Iop_CmpNEZ32x2:
sewardj2e4d5af2014-06-26 08:22:01 +00004474 case Iop_Clz32x2:
4475 case Iop_Cls32x2:
sewardj57f92b02010-08-22 11:54:14 +00004476 case Iop_FtoI32Ux2_RZ:
4477 case Iop_FtoI32Sx2_RZ:
4478 case Iop_Abs32x2:
4479 return mkPCast32x2(mce, vatom);
4480
4481 case Iop_CmpNEZ32x4:
sewardj2e4d5af2014-06-26 08:22:01 +00004482 case Iop_Clz32x4:
4483 case Iop_Cls32x4:
sewardj57f92b02010-08-22 11:54:14 +00004484 case Iop_FtoI32Ux4_RZ:
4485 case Iop_FtoI32Sx4_RZ:
4486 case Iop_Abs32x4:
sewardjbfd03f82014-08-26 18:35:13 +00004487 case Iop_RSqrtEst32Ux4:
sewardj57f92b02010-08-22 11:54:14 +00004488 return mkPCast32x4(mce, vatom);
4489
florian537ed2d2012-08-20 16:51:39 +00004490 case Iop_CmpwNEZ32:
4491 return mkPCastTo(mce, Ity_I32, vatom);
4492
sewardj57f92b02010-08-22 11:54:14 +00004493 case Iop_CmpwNEZ64:
4494 return mkPCastTo(mce, Ity_I64, vatom);
4495
4496 case Iop_CmpNEZ64x2:
carll24e40de2013-10-15 18:13:21 +00004497 case Iop_CipherSV128:
4498 case Iop_Clz64x2:
sewardj87a5bad2014-06-15 21:56:54 +00004499 case Iop_Abs64x2:
sewardj57f92b02010-08-22 11:54:14 +00004500 return mkPCast64x2(mce, vatom);
4501
carlle6bd3e42013-10-18 01:20:11 +00004502 case Iop_PwBitMtxXpose64x2:
4503 return assignNew('V', mce, Ity_V128, unop(op, vatom));
4504
sewardj7ee7d852011-06-16 11:37:21 +00004505 case Iop_NarrowUn16to8x8:
4506 case Iop_NarrowUn32to16x4:
4507 case Iop_NarrowUn64to32x2:
4508 case Iop_QNarrowUn16Sto8Sx8:
4509 case Iop_QNarrowUn16Sto8Ux8:
4510 case Iop_QNarrowUn16Uto8Ux8:
4511 case Iop_QNarrowUn32Sto16Sx4:
4512 case Iop_QNarrowUn32Sto16Ux4:
4513 case Iop_QNarrowUn32Uto16Ux4:
4514 case Iop_QNarrowUn64Sto32Sx2:
4515 case Iop_QNarrowUn64Sto32Ux2:
4516 case Iop_QNarrowUn64Uto32Ux2:
4517 return vectorNarrowUnV128(mce, op, vatom);
sewardj57f92b02010-08-22 11:54:14 +00004518
sewardj7ee7d852011-06-16 11:37:21 +00004519 case Iop_Widen8Sto16x8:
4520 case Iop_Widen8Uto16x8:
4521 case Iop_Widen16Sto32x4:
4522 case Iop_Widen16Uto32x4:
4523 case Iop_Widen32Sto64x2:
4524 case Iop_Widen32Uto64x2:
4525 return vectorWidenI64(mce, op, vatom);
sewardj57f92b02010-08-22 11:54:14 +00004526
4527 case Iop_PwAddL32Ux2:
4528 case Iop_PwAddL32Sx2:
4529 return mkPCastTo(mce, Ity_I64,
4530 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
4531
4532 case Iop_PwAddL16Ux4:
4533 case Iop_PwAddL16Sx4:
4534 return mkPCast32x2(mce,
4535 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
4536
4537 case Iop_PwAddL8Ux8:
4538 case Iop_PwAddL8Sx8:
4539 return mkPCast16x4(mce,
4540 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
4541
4542 case Iop_PwAddL32Ux4:
4543 case Iop_PwAddL32Sx4:
4544 return mkPCast64x2(mce,
4545 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
4546
4547 case Iop_PwAddL16Ux8:
4548 case Iop_PwAddL16Sx8:
4549 return mkPCast32x4(mce,
4550 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
4551
4552 case Iop_PwAddL8Ux16:
4553 case Iop_PwAddL8Sx16:
4554 return mkPCast16x8(mce,
4555 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
4556
sewardjf34eb492011-04-15 11:57:05 +00004557 case Iop_I64UtoF32:
sewardj95448072004-11-22 20:19:51 +00004558 default:
4559 ppIROp(op);
4560 VG_(tool_panic)("memcheck:expr2vbits_Unop");
4561 }
4562}
4563
4564
sewardjb9e6d242013-05-11 13:42:08 +00004565/* Worker function -- do not call directly. See comments on
4566 expr2vbits_Load for the meaning of |guard|.
4567
4568 Generates IR to (1) perform a definedness test of |addr|, (2)
4569 perform a validity test of |addr|, and (3) return the Vbits for the
4570 location indicated by |addr|. All of this only happens when
4571 |guard| is NULL or |guard| evaluates to True at run time.
4572
4573 If |guard| evaluates to False at run time, the returned value is
4574 the IR-mandated 0x55..55 value, and no checks nor shadow loads are
4575 performed.
4576
4577 The definedness of |guard| itself is not checked. That is assumed
4578 to have been done before this point, by the caller. */
sewardj95448072004-11-22 20:19:51 +00004579static
sewardj67564542013-08-16 08:31:29 +00004580IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
4581 IREndness end, IRType ty,
sewardjcafe5052013-01-17 14:24:35 +00004582 IRAtom* addr, UInt bias, IRAtom* guard )
sewardj95448072004-11-22 20:19:51 +00004583{
sewardj95448072004-11-22 20:19:51 +00004584 tl_assert(isOriginalAtom(mce,addr));
sewardj2e595852005-06-30 23:33:37 +00004585 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj95448072004-11-22 20:19:51 +00004586
4587 /* First, emit a definedness test for the address. This also sets
4588 the address (shadow) to 'defined' following the test. */
sewardjb9e6d242013-05-11 13:42:08 +00004589 complainIfUndefined( mce, addr, guard );
sewardj95448072004-11-22 20:19:51 +00004590
4591 /* Now cook up a call to the relevant helper function, to read the
4592 data V bits from shadow memory. */
sewardj7cf4e6b2008-05-01 20:24:26 +00004593 ty = shadowTypeV(ty);
sewardj2e595852005-06-30 23:33:37 +00004594
sewardj21a5f8c2013-08-08 10:41:46 +00004595 void* helper = NULL;
4596 const HChar* hname = NULL;
4597 Bool ret_via_outparam = False;
4598
sewardj67564542013-08-16 08:31:29 +00004599 if (end == Iend_LE) {
sewardj2e595852005-06-30 23:33:37 +00004600 switch (ty) {
sewardj67564542013-08-16 08:31:29 +00004601 case Ity_V256: helper = &MC_(helperc_LOADV256le);
4602 hname = "MC_(helperc_LOADV256le)";
4603 ret_via_outparam = True;
4604 break;
sewardj21a5f8c2013-08-08 10:41:46 +00004605 case Ity_V128: helper = &MC_(helperc_LOADV128le);
4606 hname = "MC_(helperc_LOADV128le)";
4607 ret_via_outparam = True;
4608 break;
4609 case Ity_I64: helper = &MC_(helperc_LOADV64le);
4610 hname = "MC_(helperc_LOADV64le)";
4611 break;
4612 case Ity_I32: helper = &MC_(helperc_LOADV32le);
4613 hname = "MC_(helperc_LOADV32le)";
4614 break;
4615 case Ity_I16: helper = &MC_(helperc_LOADV16le);
4616 hname = "MC_(helperc_LOADV16le)";
4617 break;
4618 case Ity_I8: helper = &MC_(helperc_LOADV8);
4619 hname = "MC_(helperc_LOADV8)";
4620 break;
4621 default: ppIRType(ty);
4622 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(LE)");
sewardj2e595852005-06-30 23:33:37 +00004623 }
4624 } else {
sewardj8cf88b72005-07-08 01:29:33 +00004625 switch (ty) {
sewardj67564542013-08-16 08:31:29 +00004626 case Ity_V256: helper = &MC_(helperc_LOADV256be);
4627 hname = "MC_(helperc_LOADV256be)";
4628 ret_via_outparam = True;
4629 break;
sewardj21a5f8c2013-08-08 10:41:46 +00004630 case Ity_V128: helper = &MC_(helperc_LOADV128be);
4631 hname = "MC_(helperc_LOADV128be)";
4632 ret_via_outparam = True;
4633 break;
4634 case Ity_I64: helper = &MC_(helperc_LOADV64be);
4635 hname = "MC_(helperc_LOADV64be)";
4636 break;
4637 case Ity_I32: helper = &MC_(helperc_LOADV32be);
4638 hname = "MC_(helperc_LOADV32be)";
4639 break;
4640 case Ity_I16: helper = &MC_(helperc_LOADV16be);
4641 hname = "MC_(helperc_LOADV16be)";
4642 break;
4643 case Ity_I8: helper = &MC_(helperc_LOADV8);
4644 hname = "MC_(helperc_LOADV8)";
4645 break;
4646 default: ppIRType(ty);
4647 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(BE)");
sewardj8cf88b72005-07-08 01:29:33 +00004648 }
sewardj95448072004-11-22 20:19:51 +00004649 }
4650
sewardj21a5f8c2013-08-08 10:41:46 +00004651 tl_assert(helper);
4652 tl_assert(hname);
4653
sewardj95448072004-11-22 20:19:51 +00004654 /* Generate the actual address into addrAct. */
sewardj21a5f8c2013-08-08 10:41:46 +00004655 IRAtom* addrAct;
sewardj95448072004-11-22 20:19:51 +00004656 if (bias == 0) {
4657 addrAct = addr;
4658 } else {
sewardj7cf97ee2004-11-28 14:25:01 +00004659 IROp mkAdd;
4660 IRAtom* eBias;
sewardj95448072004-11-22 20:19:51 +00004661 IRType tyAddr = mce->hWordTy;
4662 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj7cf97ee2004-11-28 14:25:01 +00004663 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
4664 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00004665 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
sewardj95448072004-11-22 20:19:51 +00004666 }
4667
4668 /* We need to have a place to park the V bits we're just about to
4669 read. */
sewardj21a5f8c2013-08-08 10:41:46 +00004670 IRTemp datavbits = newTemp(mce, ty, VSh);
4671
4672 /* Here's the call. */
4673 IRDirty* di;
4674 if (ret_via_outparam) {
4675 di = unsafeIRDirty_1_N( datavbits,
4676 2/*regparms*/,
4677 hname, VG_(fnptr_to_fnentry)( helper ),
floriana5c3ecb2013-08-15 20:55:42 +00004678 mkIRExprVec_2( IRExpr_VECRET(), addrAct ) );
sewardj21a5f8c2013-08-08 10:41:46 +00004679 } else {
4680 di = unsafeIRDirty_1_N( datavbits,
4681 1/*regparms*/,
4682 hname, VG_(fnptr_to_fnentry)( helper ),
4683 mkIRExprVec_1( addrAct ) );
4684 }
4685
sewardj95448072004-11-22 20:19:51 +00004686 setHelperAnns( mce, di );
sewardjcafe5052013-01-17 14:24:35 +00004687 if (guard) {
4688 di->guard = guard;
4689 /* Ideally the didn't-happen return value here would be all-ones
4690 (all-undefined), so it'd be obvious if it got used
4691 inadvertantly. We can get by with the IR-mandated default
4692 value (0b01 repeating, 0x55 etc) as that'll still look pretty
4693 undefined if it ever leaks out. */
4694 }
sewardj7cf4e6b2008-05-01 20:24:26 +00004695 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00004696
4697 return mkexpr(datavbits);
4698}
4699
4700
sewardjcafe5052013-01-17 14:24:35 +00004701/* Generate IR to do a shadow load. The helper is expected to check
4702 the validity of the address and return the V bits for that address.
4703 This can optionally be controlled by a guard, which is assumed to
4704 be True if NULL. In the case where the guard is False at runtime,
sewardjb9e6d242013-05-11 13:42:08 +00004705 the helper will return the didn't-do-the-call value of 0x55..55.
4706 Since that means "completely undefined result", the caller of
sewardjcafe5052013-01-17 14:24:35 +00004707 this function will need to fix up the result somehow in that
4708 case.
sewardjb9e6d242013-05-11 13:42:08 +00004709
4710 Caller of this function is also expected to have checked the
4711 definedness of |guard| before this point.
sewardjcafe5052013-01-17 14:24:35 +00004712*/
sewardj95448072004-11-22 20:19:51 +00004713static
sewardj67564542013-08-16 08:31:29 +00004714IRAtom* expr2vbits_Load ( MCEnv* mce,
4715 IREndness end, IRType ty,
sewardjcafe5052013-01-17 14:24:35 +00004716 IRAtom* addr, UInt bias,
4717 IRAtom* guard )
sewardj170ee212004-12-10 18:57:51 +00004718{
sewardj2e595852005-06-30 23:33:37 +00004719 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj7cf4e6b2008-05-01 20:24:26 +00004720 switch (shadowTypeV(ty)) {
sewardj67564542013-08-16 08:31:29 +00004721 case Ity_I8:
4722 case Ity_I16:
4723 case Ity_I32:
sewardj170ee212004-12-10 18:57:51 +00004724 case Ity_I64:
sewardj21a5f8c2013-08-08 10:41:46 +00004725 case Ity_V128:
sewardj67564542013-08-16 08:31:29 +00004726 case Ity_V256:
sewardjcafe5052013-01-17 14:24:35 +00004727 return expr2vbits_Load_WRK(mce, end, ty, addr, bias, guard);
sewardj170ee212004-12-10 18:57:51 +00004728 default:
sewardj2e595852005-06-30 23:33:37 +00004729 VG_(tool_panic)("expr2vbits_Load");
sewardj170ee212004-12-10 18:57:51 +00004730 }
4731}
4732
4733
sewardjcafe5052013-01-17 14:24:35 +00004734/* The most general handler for guarded loads. Assumes the
sewardjb9e6d242013-05-11 13:42:08 +00004735 definedness of GUARD has already been checked by the caller. A
4736 GUARD of NULL is assumed to mean "always True". Generates code to
4737 check the definedness and validity of ADDR.
sewardjcafe5052013-01-17 14:24:35 +00004738
4739 Generate IR to do a shadow load from ADDR and return the V bits.
4740 The loaded type is TY. The loaded data is then (shadow) widened by
4741 using VWIDEN, which can be Iop_INVALID to denote a no-op. If GUARD
4742 evaluates to False at run time then the returned Vbits are simply
4743 VALT instead. Note therefore that the argument type of VWIDEN must
4744 be TY and the result type of VWIDEN must equal the type of VALT.
4745*/
florian434ffae2012-07-19 17:23:42 +00004746static
sewardjcafe5052013-01-17 14:24:35 +00004747IRAtom* expr2vbits_Load_guarded_General ( MCEnv* mce,
4748 IREndness end, IRType ty,
4749 IRAtom* addr, UInt bias,
4750 IRAtom* guard,
4751 IROp vwiden, IRAtom* valt )
florian434ffae2012-07-19 17:23:42 +00004752{
sewardjcafe5052013-01-17 14:24:35 +00004753 /* Sanity check the conversion operation, and also set TYWIDE. */
4754 IRType tyWide = Ity_INVALID;
4755 switch (vwiden) {
4756 case Iop_INVALID:
4757 tyWide = ty;
4758 break;
4759 case Iop_16Uto32: case Iop_16Sto32: case Iop_8Uto32: case Iop_8Sto32:
4760 tyWide = Ity_I32;
4761 break;
4762 default:
4763 VG_(tool_panic)("memcheck:expr2vbits_Load_guarded_General");
florian434ffae2012-07-19 17:23:42 +00004764 }
4765
sewardjcafe5052013-01-17 14:24:35 +00004766 /* If the guard evaluates to True, this will hold the loaded V bits
4767 at TY. If the guard evaluates to False, this will be all
4768 ones, meaning "all undefined", in which case we will have to
florian5686b2d2013-01-29 03:57:40 +00004769 replace it using an ITE below. */
sewardjcafe5052013-01-17 14:24:35 +00004770 IRAtom* iftrue1
4771 = assignNew('V', mce, ty,
4772 expr2vbits_Load(mce, end, ty, addr, bias, guard));
4773 /* Now (shadow-) widen the loaded V bits to the desired width. In
4774 the guard-is-False case, the allowable widening operators will
4775 in the worst case (unsigned widening) at least leave the
4776 pre-widened part as being marked all-undefined, and in the best
4777 case (signed widening) mark the whole widened result as
4778 undefined. Anyway, it doesn't matter really, since in this case
florian5686b2d2013-01-29 03:57:40 +00004779 we will replace said value with the default value |valt| using an
4780 ITE. */
sewardjcafe5052013-01-17 14:24:35 +00004781 IRAtom* iftrue2
4782 = vwiden == Iop_INVALID
4783 ? iftrue1
4784 : assignNew('V', mce, tyWide, unop(vwiden, iftrue1));
4785 /* These are the V bits we will return if the load doesn't take
4786 place. */
4787 IRAtom* iffalse
4788 = valt;
florian5686b2d2013-01-29 03:57:40 +00004789 /* Prepare the cond for the ITE. Convert a NULL cond into
sewardjcafe5052013-01-17 14:24:35 +00004790 something that iropt knows how to fold out later. */
4791 IRAtom* cond
sewardjcc961652013-01-26 11:49:15 +00004792 = guard == NULL ? mkU1(1) : guard;
sewardjcafe5052013-01-17 14:24:35 +00004793 /* And assemble the final result. */
florian5686b2d2013-01-29 03:57:40 +00004794 return assignNew('V', mce, tyWide, IRExpr_ITE(cond, iftrue2, iffalse));
sewardjcafe5052013-01-17 14:24:35 +00004795}
4796
4797
4798/* A simpler handler for guarded loads, in which there is no
4799 conversion operation, and the default V bit return (when the guard
4800 evaluates to False at runtime) is "all defined". If there is no
4801 guard expression or the guard is always TRUE this function behaves
sewardjb9e6d242013-05-11 13:42:08 +00004802 like expr2vbits_Load. It is assumed that definedness of GUARD has
4803 already been checked at the call site. */
sewardjcafe5052013-01-17 14:24:35 +00004804static
4805IRAtom* expr2vbits_Load_guarded_Simple ( MCEnv* mce,
4806 IREndness end, IRType ty,
4807 IRAtom* addr, UInt bias,
4808 IRAtom *guard )
4809{
4810 return expr2vbits_Load_guarded_General(
4811 mce, end, ty, addr, bias, guard, Iop_INVALID, definedOfType(ty)
4812 );
florian434ffae2012-07-19 17:23:42 +00004813}
4814
4815
sewardj170ee212004-12-10 18:57:51 +00004816static
florian5686b2d2013-01-29 03:57:40 +00004817IRAtom* expr2vbits_ITE ( MCEnv* mce,
4818 IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse )
sewardj95448072004-11-22 20:19:51 +00004819{
florian5686b2d2013-01-29 03:57:40 +00004820 IRAtom *vbitsC, *vbits0, *vbits1;
sewardj95448072004-11-22 20:19:51 +00004821 IRType ty;
sewardj07bfda22013-01-29 21:11:55 +00004822 /* Given ITE(cond, iftrue, iffalse), generate
4823 ITE(cond, iftrue#, iffalse#) `UifU` PCast(cond#)
sewardj95448072004-11-22 20:19:51 +00004824 That is, steer the V bits like the originals, but trash the
4825 result if the steering value is undefined. This gives
4826 lazy propagation. */
4827 tl_assert(isOriginalAtom(mce, cond));
florian5686b2d2013-01-29 03:57:40 +00004828 tl_assert(isOriginalAtom(mce, iftrue));
4829 tl_assert(isOriginalAtom(mce, iffalse));
sewardj95448072004-11-22 20:19:51 +00004830
4831 vbitsC = expr2vbits(mce, cond);
florian5686b2d2013-01-29 03:57:40 +00004832 vbits1 = expr2vbits(mce, iftrue);
sewardj07bfda22013-01-29 21:11:55 +00004833 vbits0 = expr2vbits(mce, iffalse);
sewardj1c0ce7a2009-07-01 08:10:49 +00004834 ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
sewardj95448072004-11-22 20:19:51 +00004835
4836 return
sewardj7cf4e6b2008-05-01 20:24:26 +00004837 mkUifU(mce, ty, assignNew('V', mce, ty,
florian5686b2d2013-01-29 03:57:40 +00004838 IRExpr_ITE(cond, vbits1, vbits0)),
sewardj95448072004-11-22 20:19:51 +00004839 mkPCastTo(mce, ty, vbitsC) );
4840}
4841
4842/* --------- This is the main expression-handling function. --------- */
4843
4844static
4845IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
4846{
4847 switch (e->tag) {
4848
4849 case Iex_Get:
4850 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
4851
4852 case Iex_GetI:
4853 return shadow_GETI( mce, e->Iex.GetI.descr,
4854 e->Iex.GetI.ix, e->Iex.GetI.bias );
4855
sewardj0b9d74a2006-12-24 02:24:11 +00004856 case Iex_RdTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00004857 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
sewardj95448072004-11-22 20:19:51 +00004858
4859 case Iex_Const:
sewardj1c0ce7a2009-07-01 08:10:49 +00004860 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
sewardj95448072004-11-22 20:19:51 +00004861
sewardje91cea72006-02-08 19:32:02 +00004862 case Iex_Qop:
4863 return expr2vbits_Qop(
4864 mce,
floriane2ab2972012-06-01 20:43:03 +00004865 e->Iex.Qop.details->op,
4866 e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2,
4867 e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4
sewardje91cea72006-02-08 19:32:02 +00004868 );
4869
sewardjed69fdb2006-02-03 16:12:27 +00004870 case Iex_Triop:
4871 return expr2vbits_Triop(
4872 mce,
florian26441742012-06-02 20:30:41 +00004873 e->Iex.Triop.details->op,
4874 e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2,
4875 e->Iex.Triop.details->arg3
sewardjed69fdb2006-02-03 16:12:27 +00004876 );
4877
sewardj95448072004-11-22 20:19:51 +00004878 case Iex_Binop:
4879 return expr2vbits_Binop(
4880 mce,
4881 e->Iex.Binop.op,
4882 e->Iex.Binop.arg1, e->Iex.Binop.arg2
4883 );
4884
4885 case Iex_Unop:
4886 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
4887
sewardj2e595852005-06-30 23:33:37 +00004888 case Iex_Load:
4889 return expr2vbits_Load( mce, e->Iex.Load.end,
4890 e->Iex.Load.ty,
sewardjcafe5052013-01-17 14:24:35 +00004891 e->Iex.Load.addr, 0/*addr bias*/,
4892 NULL/* guard == "always True"*/ );
sewardj95448072004-11-22 20:19:51 +00004893
4894 case Iex_CCall:
4895 return mkLazyN( mce, e->Iex.CCall.args,
4896 e->Iex.CCall.retty,
4897 e->Iex.CCall.cee );
4898
florian5686b2d2013-01-29 03:57:40 +00004899 case Iex_ITE:
4900 return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue,
sewardj07bfda22013-01-29 21:11:55 +00004901 e->Iex.ITE.iffalse);
njn25e49d8e72002-09-23 09:36:25 +00004902
4903 default:
sewardj95448072004-11-22 20:19:51 +00004904 VG_(printf)("\n");
4905 ppIRExpr(e);
4906 VG_(printf)("\n");
4907 VG_(tool_panic)("memcheck: expr2vbits");
njn25e49d8e72002-09-23 09:36:25 +00004908 }
njn25e49d8e72002-09-23 09:36:25 +00004909}
4910
4911/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00004912/*--- Generate shadow stmts from all kinds of IRStmts. ---*/
njn25e49d8e72002-09-23 09:36:25 +00004913/*------------------------------------------------------------*/
4914
sewardj95448072004-11-22 20:19:51 +00004915/* Widen a value to the host word size. */
njn25e49d8e72002-09-23 09:36:25 +00004916
4917static
sewardj95448072004-11-22 20:19:51 +00004918IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
njn25e49d8e72002-09-23 09:36:25 +00004919{
sewardj7cf97ee2004-11-28 14:25:01 +00004920 IRType ty, tyH;
4921
sewardj95448072004-11-22 20:19:51 +00004922 /* vatom is vbits-value and as such can only have a shadow type. */
4923 tl_assert(isShadowAtom(mce,vatom));
njn25e49d8e72002-09-23 09:36:25 +00004924
sewardj1c0ce7a2009-07-01 08:10:49 +00004925 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj7cf97ee2004-11-28 14:25:01 +00004926 tyH = mce->hWordTy;
njn25e49d8e72002-09-23 09:36:25 +00004927
sewardj95448072004-11-22 20:19:51 +00004928 if (tyH == Ity_I32) {
4929 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004930 case Ity_I32:
4931 return vatom;
4932 case Ity_I16:
4933 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
4934 case Ity_I8:
4935 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
4936 default:
4937 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00004938 }
sewardj6cf40ff2005-04-20 22:31:26 +00004939 } else
4940 if (tyH == Ity_I64) {
4941 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004942 case Ity_I32:
4943 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
4944 case Ity_I16:
4945 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4946 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
4947 case Ity_I8:
4948 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4949 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
4950 default:
4951 goto unhandled;
sewardj6cf40ff2005-04-20 22:31:26 +00004952 }
sewardj95448072004-11-22 20:19:51 +00004953 } else {
4954 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00004955 }
sewardj95448072004-11-22 20:19:51 +00004956 unhandled:
4957 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
4958 VG_(tool_panic)("zwidenToHostWord");
njn25e49d8e72002-09-23 09:36:25 +00004959}
4960
njn25e49d8e72002-09-23 09:36:25 +00004961
sewardjcafe5052013-01-17 14:24:35 +00004962/* Generate a shadow store. |addr| is always the original address
4963 atom. You can pass in either originals or V-bits for the data
4964 atom, but obviously not both. This function generates a check for
sewardjb9e6d242013-05-11 13:42:08 +00004965 the definedness and (indirectly) the validity of |addr|, but only
4966 when |guard| evaluates to True at run time (or is NULL).
njn25e49d8e72002-09-23 09:36:25 +00004967
sewardjcafe5052013-01-17 14:24:35 +00004968 |guard| :: Ity_I1 controls whether the store really happens; NULL
4969 means it unconditionally does. Note that |guard| itself is not
4970 checked for definedness; the caller of this function must do that
4971 if necessary.
4972*/
sewardj95448072004-11-22 20:19:51 +00004973static
sewardj2e595852005-06-30 23:33:37 +00004974void do_shadow_Store ( MCEnv* mce,
4975 IREndness end,
4976 IRAtom* addr, UInt bias,
sewardj1c0ce7a2009-07-01 08:10:49 +00004977 IRAtom* data, IRAtom* vdata,
4978 IRAtom* guard )
njn25e49d8e72002-09-23 09:36:25 +00004979{
sewardj170ee212004-12-10 18:57:51 +00004980 IROp mkAdd;
4981 IRType ty, tyAddr;
sewardj95448072004-11-22 20:19:51 +00004982 void* helper = NULL;
floriana5f894c2012-10-21 03:43:20 +00004983 const HChar* hname = NULL;
njn1d0825f2006-03-27 11:37:07 +00004984 IRConst* c;
sewardj170ee212004-12-10 18:57:51 +00004985
4986 tyAddr = mce->hWordTy;
4987 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
4988 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj2e595852005-06-30 23:33:37 +00004989 tl_assert( end == Iend_LE || end == Iend_BE );
sewardj170ee212004-12-10 18:57:51 +00004990
sewardj95448072004-11-22 20:19:51 +00004991 if (data) {
4992 tl_assert(!vdata);
4993 tl_assert(isOriginalAtom(mce, data));
4994 tl_assert(bias == 0);
4995 vdata = expr2vbits( mce, data );
4996 } else {
4997 tl_assert(vdata);
4998 }
njn25e49d8e72002-09-23 09:36:25 +00004999
sewardj95448072004-11-22 20:19:51 +00005000 tl_assert(isOriginalAtom(mce,addr));
5001 tl_assert(isShadowAtom(mce,vdata));
njn25e49d8e72002-09-23 09:36:25 +00005002
sewardj1c0ce7a2009-07-01 08:10:49 +00005003 if (guard) {
5004 tl_assert(isOriginalAtom(mce, guard));
5005 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
5006 }
5007
5008 ty = typeOfIRExpr(mce->sb->tyenv, vdata);
njn25e49d8e72002-09-23 09:36:25 +00005009
njn1d0825f2006-03-27 11:37:07 +00005010 // If we're not doing undefined value checking, pretend that this value
5011 // is "all valid". That lets Vex's optimiser remove some of the V bit
5012 // shadow computation ops that precede it.
sewardj7cf4e6b2008-05-01 20:24:26 +00005013 if (MC_(clo_mc_level) == 1) {
njn1d0825f2006-03-27 11:37:07 +00005014 switch (ty) {
sewardj45fa9f42012-05-21 10:18:10 +00005015 case Ity_V256: // V256 weirdness -- used four times
sewardjbd43bfa2012-06-29 15:29:37 +00005016 c = IRConst_V256(V_BITS32_DEFINED); break;
sewardj45fa9f42012-05-21 10:18:10 +00005017 case Ity_V128: // V128 weirdness -- used twice
sewardj1c0ce7a2009-07-01 08:10:49 +00005018 c = IRConst_V128(V_BITS16_DEFINED); break;
njn1d0825f2006-03-27 11:37:07 +00005019 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
5020 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
5021 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
5022 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
5023 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
5024 }
5025 vdata = IRExpr_Const( c );
5026 }
5027
sewardj95448072004-11-22 20:19:51 +00005028 /* First, emit a definedness test for the address. This also sets
sewardjb9e6d242013-05-11 13:42:08 +00005029 the address (shadow) to 'defined' following the test. Both of
5030 those actions are gated on |guard|. */
5031 complainIfUndefined( mce, addr, guard );
njn25e49d8e72002-09-23 09:36:25 +00005032
sewardj170ee212004-12-10 18:57:51 +00005033 /* Now decide which helper function to call to write the data V
5034 bits into shadow memory. */
sewardj2e595852005-06-30 23:33:37 +00005035 if (end == Iend_LE) {
5036 switch (ty) {
sewardj45fa9f42012-05-21 10:18:10 +00005037 case Ity_V256: /* we'll use the helper four times */
sewardj2e595852005-06-30 23:33:37 +00005038 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00005039 case Ity_I64: helper = &MC_(helperc_STOREV64le);
5040 hname = "MC_(helperc_STOREV64le)";
sewardj2e595852005-06-30 23:33:37 +00005041 break;
njn1d0825f2006-03-27 11:37:07 +00005042 case Ity_I32: helper = &MC_(helperc_STOREV32le);
5043 hname = "MC_(helperc_STOREV32le)";
sewardj2e595852005-06-30 23:33:37 +00005044 break;
njn1d0825f2006-03-27 11:37:07 +00005045 case Ity_I16: helper = &MC_(helperc_STOREV16le);
5046 hname = "MC_(helperc_STOREV16le)";
sewardj2e595852005-06-30 23:33:37 +00005047 break;
njn1d0825f2006-03-27 11:37:07 +00005048 case Ity_I8: helper = &MC_(helperc_STOREV8);
5049 hname = "MC_(helperc_STOREV8)";
sewardj2e595852005-06-30 23:33:37 +00005050 break;
5051 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
5052 }
5053 } else {
sewardj8cf88b72005-07-08 01:29:33 +00005054 switch (ty) {
5055 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00005056 case Ity_I64: helper = &MC_(helperc_STOREV64be);
5057 hname = "MC_(helperc_STOREV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00005058 break;
njn1d0825f2006-03-27 11:37:07 +00005059 case Ity_I32: helper = &MC_(helperc_STOREV32be);
5060 hname = "MC_(helperc_STOREV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00005061 break;
njn1d0825f2006-03-27 11:37:07 +00005062 case Ity_I16: helper = &MC_(helperc_STOREV16be);
5063 hname = "MC_(helperc_STOREV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00005064 break;
njn1d0825f2006-03-27 11:37:07 +00005065 case Ity_I8: helper = &MC_(helperc_STOREV8);
5066 hname = "MC_(helperc_STOREV8)";
sewardj8cf88b72005-07-08 01:29:33 +00005067 break;
sewardj45fa9f42012-05-21 10:18:10 +00005068 /* Note, no V256 case here, because no big-endian target that
5069 we support, has 256 vectors. */
sewardj8cf88b72005-07-08 01:29:33 +00005070 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
5071 }
sewardj95448072004-11-22 20:19:51 +00005072 }
njn25e49d8e72002-09-23 09:36:25 +00005073
sewardj45fa9f42012-05-21 10:18:10 +00005074 if (UNLIKELY(ty == Ity_V256)) {
5075
5076 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
5077 Q3 being the most significant lane. */
5078 /* These are the offsets of the Qs in memory. */
5079 Int offQ0, offQ1, offQ2, offQ3;
5080
5081 /* Various bits for constructing the 4 lane helper calls */
5082 IRDirty *diQ0, *diQ1, *diQ2, *diQ3;
5083 IRAtom *addrQ0, *addrQ1, *addrQ2, *addrQ3;
5084 IRAtom *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3;
5085 IRAtom *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3;
5086
5087 if (end == Iend_LE) {
5088 offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24;
5089 } else {
5090 offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24;
5091 }
5092
5093 eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0);
5094 addrQ0 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) );
5095 vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata));
5096 diQ0 = unsafeIRDirty_0_N(
5097 1/*regparms*/,
5098 hname, VG_(fnptr_to_fnentry)( helper ),
5099 mkIRExprVec_2( addrQ0, vdataQ0 )
5100 );
5101
5102 eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1);
5103 addrQ1 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) );
5104 vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata));
5105 diQ1 = unsafeIRDirty_0_N(
5106 1/*regparms*/,
5107 hname, VG_(fnptr_to_fnentry)( helper ),
5108 mkIRExprVec_2( addrQ1, vdataQ1 )
5109 );
5110
5111 eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2);
5112 addrQ2 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) );
5113 vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata));
5114 diQ2 = unsafeIRDirty_0_N(
5115 1/*regparms*/,
5116 hname, VG_(fnptr_to_fnentry)( helper ),
5117 mkIRExprVec_2( addrQ2, vdataQ2 )
5118 );
5119
5120 eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3);
5121 addrQ3 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) );
5122 vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata));
5123 diQ3 = unsafeIRDirty_0_N(
5124 1/*regparms*/,
5125 hname, VG_(fnptr_to_fnentry)( helper ),
5126 mkIRExprVec_2( addrQ3, vdataQ3 )
5127 );
5128
5129 if (guard)
5130 diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard;
5131
5132 setHelperAnns( mce, diQ0 );
5133 setHelperAnns( mce, diQ1 );
5134 setHelperAnns( mce, diQ2 );
5135 setHelperAnns( mce, diQ3 );
5136 stmt( 'V', mce, IRStmt_Dirty(diQ0) );
5137 stmt( 'V', mce, IRStmt_Dirty(diQ1) );
5138 stmt( 'V', mce, IRStmt_Dirty(diQ2) );
5139 stmt( 'V', mce, IRStmt_Dirty(diQ3) );
5140
5141 }
5142 else if (UNLIKELY(ty == Ity_V128)) {
sewardj170ee212004-12-10 18:57:51 +00005143
sewardj20d38f22005-02-07 23:50:18 +00005144 /* V128-bit case */
sewardj170ee212004-12-10 18:57:51 +00005145 /* See comment in next clause re 64-bit regparms */
sewardj2e595852005-06-30 23:33:37 +00005146 /* also, need to be careful about endianness */
5147
njn4c245e52009-03-15 23:25:38 +00005148 Int offLo64, offHi64;
5149 IRDirty *diLo64, *diHi64;
5150 IRAtom *addrLo64, *addrHi64;
5151 IRAtom *vdataLo64, *vdataHi64;
5152 IRAtom *eBiasLo64, *eBiasHi64;
5153
sewardj2e595852005-06-30 23:33:37 +00005154 if (end == Iend_LE) {
5155 offLo64 = 0;
5156 offHi64 = 8;
5157 } else {
sewardj2e595852005-06-30 23:33:37 +00005158 offLo64 = 8;
5159 offHi64 = 0;
5160 }
5161
5162 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
sewardj7cf4e6b2008-05-01 20:24:26 +00005163 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
5164 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
sewardj170ee212004-12-10 18:57:51 +00005165 diLo64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00005166 1/*regparms*/,
5167 hname, VG_(fnptr_to_fnentry)( helper ),
5168 mkIRExprVec_2( addrLo64, vdataLo64 )
5169 );
sewardj2e595852005-06-30 23:33:37 +00005170 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
sewardj7cf4e6b2008-05-01 20:24:26 +00005171 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
5172 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
sewardj170ee212004-12-10 18:57:51 +00005173 diHi64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00005174 1/*regparms*/,
5175 hname, VG_(fnptr_to_fnentry)( helper ),
5176 mkIRExprVec_2( addrHi64, vdataHi64 )
5177 );
sewardj1c0ce7a2009-07-01 08:10:49 +00005178 if (guard) diLo64->guard = guard;
5179 if (guard) diHi64->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00005180 setHelperAnns( mce, diLo64 );
5181 setHelperAnns( mce, diHi64 );
sewardj7cf4e6b2008-05-01 20:24:26 +00005182 stmt( 'V', mce, IRStmt_Dirty(diLo64) );
5183 stmt( 'V', mce, IRStmt_Dirty(diHi64) );
sewardj170ee212004-12-10 18:57:51 +00005184
sewardj95448072004-11-22 20:19:51 +00005185 } else {
sewardj170ee212004-12-10 18:57:51 +00005186
njn4c245e52009-03-15 23:25:38 +00005187 IRDirty *di;
5188 IRAtom *addrAct;
5189
sewardj170ee212004-12-10 18:57:51 +00005190 /* 8/16/32/64-bit cases */
5191 /* Generate the actual address into addrAct. */
5192 if (bias == 0) {
5193 addrAct = addr;
5194 } else {
njn4c245e52009-03-15 23:25:38 +00005195 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00005196 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
sewardj170ee212004-12-10 18:57:51 +00005197 }
5198
5199 if (ty == Ity_I64) {
5200 /* We can't do this with regparm 2 on 32-bit platforms, since
5201 the back ends aren't clever enough to handle 64-bit
5202 regparm args. Therefore be different. */
5203 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00005204 1/*regparms*/,
5205 hname, VG_(fnptr_to_fnentry)( helper ),
5206 mkIRExprVec_2( addrAct, vdata )
5207 );
sewardj170ee212004-12-10 18:57:51 +00005208 } else {
5209 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00005210 2/*regparms*/,
5211 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj170ee212004-12-10 18:57:51 +00005212 mkIRExprVec_2( addrAct,
sewardj53ee1fc2005-12-23 02:29:58 +00005213 zwidenToHostWord( mce, vdata ))
5214 );
sewardj170ee212004-12-10 18:57:51 +00005215 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005216 if (guard) di->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00005217 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00005218 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00005219 }
njn25e49d8e72002-09-23 09:36:25 +00005220
sewardj95448072004-11-22 20:19:51 +00005221}
njn25e49d8e72002-09-23 09:36:25 +00005222
njn25e49d8e72002-09-23 09:36:25 +00005223
sewardj95448072004-11-22 20:19:51 +00005224/* Do lazy pessimistic propagation through a dirty helper call, by
5225 looking at the annotations on it. This is the most complex part of
5226 Memcheck. */
njn25e49d8e72002-09-23 09:36:25 +00005227
sewardj95448072004-11-22 20:19:51 +00005228static IRType szToITy ( Int n )
5229{
5230 switch (n) {
5231 case 1: return Ity_I8;
5232 case 2: return Ity_I16;
5233 case 4: return Ity_I32;
5234 case 8: return Ity_I64;
5235 default: VG_(tool_panic)("szToITy(memcheck)");
5236 }
5237}
njn25e49d8e72002-09-23 09:36:25 +00005238
sewardj95448072004-11-22 20:19:51 +00005239static
5240void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
5241{
sewardj2eecb742012-06-01 16:11:41 +00005242 Int i, k, n, toDo, gSz, gOff;
sewardj2e595852005-06-30 23:33:37 +00005243 IRAtom *src, *here, *curr;
njn4c245e52009-03-15 23:25:38 +00005244 IRType tySrc, tyDst;
sewardj2e595852005-06-30 23:33:37 +00005245 IRTemp dst;
5246 IREndness end;
5247
5248 /* What's the native endianness? We need to know this. */
sewardj6e340c72005-07-10 00:53:42 +00005249# if defined(VG_BIGENDIAN)
sewardj2e595852005-06-30 23:33:37 +00005250 end = Iend_BE;
sewardj6e340c72005-07-10 00:53:42 +00005251# elif defined(VG_LITTLEENDIAN)
sewardj2e595852005-06-30 23:33:37 +00005252 end = Iend_LE;
5253# else
5254# error "Unknown endianness"
5255# endif
njn25e49d8e72002-09-23 09:36:25 +00005256
sewardj95448072004-11-22 20:19:51 +00005257 /* First check the guard. */
sewardjb9e6d242013-05-11 13:42:08 +00005258 complainIfUndefined(mce, d->guard, NULL);
sewardj95448072004-11-22 20:19:51 +00005259
5260 /* Now round up all inputs and PCast over them. */
sewardj7cf97ee2004-11-28 14:25:01 +00005261 curr = definedOfType(Ity_I32);
sewardj95448072004-11-22 20:19:51 +00005262
florian434ffae2012-07-19 17:23:42 +00005263 /* Inputs: unmasked args
5264 Note: arguments are evaluated REGARDLESS of the guard expression */
sewardj95448072004-11-22 20:19:51 +00005265 for (i = 0; d->args[i]; i++) {
sewardj21a5f8c2013-08-08 10:41:46 +00005266 IRAtom* arg = d->args[i];
5267 if ( (d->cee->mcx_mask & (1<<i))
floriana5c3ecb2013-08-15 20:55:42 +00005268 || UNLIKELY(is_IRExpr_VECRET_or_BBPTR(arg)) ) {
sewardj95448072004-11-22 20:19:51 +00005269 /* ignore this arg */
njn25e49d8e72002-09-23 09:36:25 +00005270 } else {
sewardj21a5f8c2013-08-08 10:41:46 +00005271 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, arg) );
sewardj95448072004-11-22 20:19:51 +00005272 curr = mkUifU32(mce, here, curr);
njn25e49d8e72002-09-23 09:36:25 +00005273 }
5274 }
sewardj95448072004-11-22 20:19:51 +00005275
5276 /* Inputs: guest state that we read. */
5277 for (i = 0; i < d->nFxState; i++) {
5278 tl_assert(d->fxState[i].fx != Ifx_None);
5279 if (d->fxState[i].fx == Ifx_Write)
5280 continue;
sewardja7203252004-11-26 19:17:47 +00005281
sewardj2eecb742012-06-01 16:11:41 +00005282 /* Enumerate the described state segments */
5283 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
5284 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
5285 gSz = d->fxState[i].size;
sewardja7203252004-11-26 19:17:47 +00005286
sewardj2eecb742012-06-01 16:11:41 +00005287 /* Ignore any sections marked as 'always defined'. */
5288 if (isAlwaysDefd(mce, gOff, gSz)) {
5289 if (0)
5290 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
5291 gOff, gSz);
5292 continue;
5293 }
sewardje9e16d32004-12-10 13:17:55 +00005294
sewardj2eecb742012-06-01 16:11:41 +00005295 /* This state element is read or modified. So we need to
5296 consider it. If larger than 8 bytes, deal with it in
5297 8-byte chunks. */
5298 while (True) {
5299 tl_assert(gSz >= 0);
5300 if (gSz == 0) break;
5301 n = gSz <= 8 ? gSz : 8;
5302 /* update 'curr' with UifU of the state slice
5303 gOff .. gOff+n-1 */
5304 tySrc = szToITy( n );
florian434ffae2012-07-19 17:23:42 +00005305
5306 /* Observe the guard expression. If it is false use an
5307 all-bits-defined bit pattern */
5308 IRAtom *cond, *iffalse, *iftrue;
5309
sewardjcc961652013-01-26 11:49:15 +00005310 cond = assignNew('V', mce, Ity_I1, d->guard);
florian434ffae2012-07-19 17:23:42 +00005311 iftrue = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc));
5312 iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc));
5313 src = assignNew('V', mce, tySrc,
florian5686b2d2013-01-29 03:57:40 +00005314 IRExpr_ITE(cond, iftrue, iffalse));
florian434ffae2012-07-19 17:23:42 +00005315
sewardj2eecb742012-06-01 16:11:41 +00005316 here = mkPCastTo( mce, Ity_I32, src );
5317 curr = mkUifU32(mce, here, curr);
5318 gSz -= n;
5319 gOff += n;
5320 }
5321 }
sewardj95448072004-11-22 20:19:51 +00005322 }
5323
5324 /* Inputs: memory. First set up some info needed regardless of
5325 whether we're doing reads or writes. */
sewardj95448072004-11-22 20:19:51 +00005326
5327 if (d->mFx != Ifx_None) {
5328 /* Because we may do multiple shadow loads/stores from the same
5329 base address, it's best to do a single test of its
5330 definedness right now. Post-instrumentation optimisation
5331 should remove all but this test. */
njn4c245e52009-03-15 23:25:38 +00005332 IRType tyAddr;
sewardj95448072004-11-22 20:19:51 +00005333 tl_assert(d->mAddr);
sewardjb9e6d242013-05-11 13:42:08 +00005334 complainIfUndefined(mce, d->mAddr, d->guard);
sewardj95448072004-11-22 20:19:51 +00005335
sewardj1c0ce7a2009-07-01 08:10:49 +00005336 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
sewardj95448072004-11-22 20:19:51 +00005337 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
5338 tl_assert(tyAddr == mce->hWordTy); /* not really right */
5339 }
5340
5341 /* Deal with memory inputs (reads or modifies) */
5342 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00005343 toDo = d->mSize;
sewardj2e595852005-06-30 23:33:37 +00005344 /* chew off 32-bit chunks. We don't care about the endianness
5345 since it's all going to be condensed down to a single bit,
5346 but nevertheless choose an endianness which is hopefully
5347 native to the platform. */
sewardj95448072004-11-22 20:19:51 +00005348 while (toDo >= 4) {
5349 here = mkPCastTo(
5350 mce, Ity_I32,
sewardjcafe5052013-01-17 14:24:35 +00005351 expr2vbits_Load_guarded_Simple(
5352 mce, end, Ity_I32, d->mAddr, d->mSize - toDo, d->guard )
sewardj95448072004-11-22 20:19:51 +00005353 );
5354 curr = mkUifU32(mce, here, curr);
5355 toDo -= 4;
5356 }
5357 /* chew off 16-bit chunks */
5358 while (toDo >= 2) {
5359 here = mkPCastTo(
5360 mce, Ity_I32,
sewardjcafe5052013-01-17 14:24:35 +00005361 expr2vbits_Load_guarded_Simple(
5362 mce, end, Ity_I16, d->mAddr, d->mSize - toDo, d->guard )
sewardj95448072004-11-22 20:19:51 +00005363 );
5364 curr = mkUifU32(mce, here, curr);
5365 toDo -= 2;
5366 }
floriancda994b2012-06-08 16:01:19 +00005367 /* chew off the remaining 8-bit chunk, if any */
5368 if (toDo == 1) {
5369 here = mkPCastTo(
5370 mce, Ity_I32,
sewardjcafe5052013-01-17 14:24:35 +00005371 expr2vbits_Load_guarded_Simple(
5372 mce, end, Ity_I8, d->mAddr, d->mSize - toDo, d->guard )
floriancda994b2012-06-08 16:01:19 +00005373 );
5374 curr = mkUifU32(mce, here, curr);
5375 toDo -= 1;
5376 }
5377 tl_assert(toDo == 0);
sewardj95448072004-11-22 20:19:51 +00005378 }
5379
5380 /* Whew! So curr is a 32-bit V-value summarising pessimistically
5381 all the inputs to the helper. Now we need to re-distribute the
5382 results to all destinations. */
5383
5384 /* Outputs: the destination temporary, if there is one. */
5385 if (d->tmp != IRTemp_INVALID) {
sewardj7cf4e6b2008-05-01 20:24:26 +00005386 dst = findShadowTmpV(mce, d->tmp);
sewardj1c0ce7a2009-07-01 08:10:49 +00005387 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00005388 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
sewardj95448072004-11-22 20:19:51 +00005389 }
5390
5391 /* Outputs: guest state that we write or modify. */
5392 for (i = 0; i < d->nFxState; i++) {
5393 tl_assert(d->fxState[i].fx != Ifx_None);
5394 if (d->fxState[i].fx == Ifx_Read)
5395 continue;
sewardj2eecb742012-06-01 16:11:41 +00005396
5397 /* Enumerate the described state segments */
5398 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
5399 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
5400 gSz = d->fxState[i].size;
5401
5402 /* Ignore any sections marked as 'always defined'. */
5403 if (isAlwaysDefd(mce, gOff, gSz))
5404 continue;
5405
5406 /* This state element is written or modified. So we need to
5407 consider it. If larger than 8 bytes, deal with it in
5408 8-byte chunks. */
5409 while (True) {
5410 tl_assert(gSz >= 0);
5411 if (gSz == 0) break;
5412 n = gSz <= 8 ? gSz : 8;
5413 /* Write suitably-casted 'curr' to the state slice
5414 gOff .. gOff+n-1 */
5415 tyDst = szToITy( n );
5416 do_shadow_PUT( mce, gOff,
5417 NULL, /* original atom */
florian434ffae2012-07-19 17:23:42 +00005418 mkPCastTo( mce, tyDst, curr ), d->guard );
sewardj2eecb742012-06-01 16:11:41 +00005419 gSz -= n;
5420 gOff += n;
5421 }
sewardje9e16d32004-12-10 13:17:55 +00005422 }
sewardj95448072004-11-22 20:19:51 +00005423 }
5424
sewardj2e595852005-06-30 23:33:37 +00005425 /* Outputs: memory that we write or modify. Same comments about
5426 endianness as above apply. */
sewardj95448072004-11-22 20:19:51 +00005427 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00005428 toDo = d->mSize;
5429 /* chew off 32-bit chunks */
5430 while (toDo >= 4) {
sewardj2e595852005-06-30 23:33:37 +00005431 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5432 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00005433 mkPCastTo( mce, Ity_I32, curr ),
florian434ffae2012-07-19 17:23:42 +00005434 d->guard );
sewardj95448072004-11-22 20:19:51 +00005435 toDo -= 4;
5436 }
5437 /* chew off 16-bit chunks */
5438 while (toDo >= 2) {
sewardj2e595852005-06-30 23:33:37 +00005439 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5440 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00005441 mkPCastTo( mce, Ity_I16, curr ),
florian434ffae2012-07-19 17:23:42 +00005442 d->guard );
sewardj95448072004-11-22 20:19:51 +00005443 toDo -= 2;
5444 }
floriancda994b2012-06-08 16:01:19 +00005445 /* chew off the remaining 8-bit chunk, if any */
5446 if (toDo == 1) {
5447 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5448 NULL, /* original data */
5449 mkPCastTo( mce, Ity_I8, curr ),
florian434ffae2012-07-19 17:23:42 +00005450 d->guard );
floriancda994b2012-06-08 16:01:19 +00005451 toDo -= 1;
5452 }
5453 tl_assert(toDo == 0);
sewardj95448072004-11-22 20:19:51 +00005454 }
5455
njn25e49d8e72002-09-23 09:36:25 +00005456}
5457
sewardj1c0ce7a2009-07-01 08:10:49 +00005458
sewardj826ec492005-05-12 18:05:00 +00005459/* We have an ABI hint telling us that [base .. base+len-1] is to
5460 become undefined ("writable"). Generate code to call a helper to
5461 notify the A/V bit machinery of this fact.
5462
5463 We call
sewardj7cf4e6b2008-05-01 20:24:26 +00005464 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
5465 Addr nia );
sewardj826ec492005-05-12 18:05:00 +00005466*/
5467static
sewardj7cf4e6b2008-05-01 20:24:26 +00005468void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
sewardj826ec492005-05-12 18:05:00 +00005469{
5470 IRDirty* di;
sewardj7cf4e6b2008-05-01 20:24:26 +00005471 /* Minor optimisation: if not doing origin tracking, ignore the
5472 supplied nia and pass zero instead. This is on the basis that
5473 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
5474 almost always generate a shorter instruction to put zero into a
5475 register than any other value. */
5476 if (MC_(clo_mc_level) < 3)
5477 nia = mkIRExpr_HWord(0);
5478
sewardj826ec492005-05-12 18:05:00 +00005479 di = unsafeIRDirty_0_N(
5480 0/*regparms*/,
5481 "MC_(helperc_MAKE_STACK_UNINIT)",
sewardj53ee1fc2005-12-23 02:29:58 +00005482 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
sewardj7cf4e6b2008-05-01 20:24:26 +00005483 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
sewardj826ec492005-05-12 18:05:00 +00005484 );
sewardj7cf4e6b2008-05-01 20:24:26 +00005485 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj826ec492005-05-12 18:05:00 +00005486}
5487
njn25e49d8e72002-09-23 09:36:25 +00005488
sewardj1c0ce7a2009-07-01 08:10:49 +00005489/* ------ Dealing with IRCAS (big and complex) ------ */
5490
5491/* FWDS */
5492static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5493 IRAtom* baseaddr, Int offset );
5494static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
5495static void gen_store_b ( MCEnv* mce, Int szB,
5496 IRAtom* baseaddr, Int offset, IRAtom* dataB,
5497 IRAtom* guard );
5498
5499static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
5500static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
5501
5502
5503/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
5504 IRExpr.Consts, else this asserts. If they are both Consts, it
5505 doesn't do anything. So that just leaves the RdTmp case.
5506
5507 In which case: this assigns the shadow value SHADOW to the IR
5508 shadow temporary associated with ORIG. That is, ORIG, being an
5509 original temporary, will have a shadow temporary associated with
5510 it. However, in the case envisaged here, there will so far have
5511 been no IR emitted to actually write a shadow value into that
5512 temporary. What this routine does is to (emit IR to) copy the
5513 value in SHADOW into said temporary, so that after this call,
5514 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
5515 value in SHADOW.
5516
5517 Point is to allow callers to compute "by hand" a shadow value for
5518 ORIG, and force it to be associated with ORIG.
5519
5520 How do we know that that shadow associated with ORIG has not so far
5521 been assigned to? Well, we don't per se know that, but supposing
5522 it had. Then this routine would create a second assignment to it,
5523 and later the IR sanity checker would barf. But that never
5524 happens. QED.
5525*/
5526static void bind_shadow_tmp_to_orig ( UChar how,
5527 MCEnv* mce,
5528 IRAtom* orig, IRAtom* shadow )
5529{
5530 tl_assert(isOriginalAtom(mce, orig));
5531 tl_assert(isShadowAtom(mce, shadow));
5532 switch (orig->tag) {
5533 case Iex_Const:
5534 tl_assert(shadow->tag == Iex_Const);
5535 break;
5536 case Iex_RdTmp:
5537 tl_assert(shadow->tag == Iex_RdTmp);
5538 if (how == 'V') {
5539 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
5540 shadow);
5541 } else {
5542 tl_assert(how == 'B');
5543 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
5544 shadow);
5545 }
5546 break;
5547 default:
5548 tl_assert(0);
5549 }
5550}
5551
5552
5553static
5554void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
5555{
5556 /* Scheme is (both single- and double- cases):
5557
5558 1. fetch data#,dataB (the proposed new value)
5559
5560 2. fetch expd#,expdB (what we expect to see at the address)
5561
5562 3. check definedness of address
5563
5564 4. load old#,oldB from shadow memory; this also checks
5565 addressibility of the address
5566
5567 5. the CAS itself
5568
sewardjafed4c52009-07-12 13:00:17 +00005569 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
sewardj1c0ce7a2009-07-01 08:10:49 +00005570
sewardjafed4c52009-07-12 13:00:17 +00005571 7. if "expected == old" (as computed by (6))
sewardj1c0ce7a2009-07-01 08:10:49 +00005572 store data#,dataB to shadow memory
5573
5574 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
5575 'data' but 7 stores 'data#'. Hence it is possible for the
5576 shadow data to be incorrectly checked and/or updated:
5577
sewardj1c0ce7a2009-07-01 08:10:49 +00005578 * 7 is at least gated correctly, since the 'expected == old'
5579 condition is derived from outputs of 5. However, the shadow
5580 write could happen too late: imagine after 5 we are
5581 descheduled, a different thread runs, writes a different
5582 (shadow) value at the address, and then we resume, hence
5583 overwriting the shadow value written by the other thread.
5584
5585 Because the original memory access is atomic, there's no way to
5586 make both the original and shadow accesses into a single atomic
5587 thing, hence this is unavoidable.
5588
5589 At least as Valgrind stands, I don't think it's a problem, since
5590 we're single threaded *and* we guarantee that there are no
5591 context switches during the execution of any specific superblock
5592 -- context switches can only happen at superblock boundaries.
5593
5594 If Valgrind ever becomes MT in the future, then it might be more
5595 of a problem. A possible kludge would be to artificially
5596 associate with the location, a lock, which we must acquire and
5597 release around the transaction as a whole. Hmm, that probably
5598 would't work properly since it only guards us against other
5599 threads doing CASs on the same location, not against other
5600 threads doing normal reads and writes.
sewardjafed4c52009-07-12 13:00:17 +00005601
5602 ------------------------------------------------------------
5603
5604 COMMENT_ON_CasCmpEQ:
5605
5606 Note two things. Firstly, in the sequence above, we compute
5607 "expected == old", but we don't check definedness of it. Why
5608 not? Also, the x86 and amd64 front ends use
sewardjb9e6d242013-05-11 13:42:08 +00005609 Iop_CasCmp{EQ,NE}{8,16,32,64} comparisons to make the equivalent
sewardjafed4c52009-07-12 13:00:17 +00005610 determination (expected == old ?) for themselves, and we also
5611 don't check definedness for those primops; we just say that the
5612 result is defined. Why? Details follow.
5613
5614 x86/amd64 contains various forms of locked insns:
5615 * lock prefix before all basic arithmetic insn;
5616 eg lock xorl %reg1,(%reg2)
5617 * atomic exchange reg-mem
5618 * compare-and-swaps
5619
5620 Rather than attempt to represent them all, which would be a
5621 royal PITA, I used a result from Maurice Herlihy
5622 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
5623 demonstrates that compare-and-swap is a primitive more general
5624 than the other two, and so can be used to represent all of them.
5625 So the translation scheme for (eg) lock incl (%reg) is as
5626 follows:
5627
5628 again:
5629 old = * %reg
5630 new = old + 1
5631 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
5632
5633 The "atomically" is the CAS bit. The scheme is always the same:
5634 get old value from memory, compute new value, atomically stuff
5635 new value back in memory iff the old value has not changed (iow,
5636 no other thread modified it in the meantime). If it has changed
5637 then we've been out-raced and we have to start over.
5638
5639 Now that's all very neat, but it has the bad side effect of
5640 introducing an explicit equality test into the translation.
5641 Consider the behaviour of said code on a memory location which
5642 is uninitialised. We will wind up doing a comparison on
5643 uninitialised data, and mc duly complains.
5644
5645 What's difficult about this is, the common case is that the
5646 location is uncontended, and so we're usually comparing the same
5647 value (* %reg) with itself. So we shouldn't complain even if it
5648 is undefined. But mc doesn't know that.
5649
5650 My solution is to mark the == in the IR specially, so as to tell
5651 mc that it almost certainly compares a value with itself, and we
5652 should just regard the result as always defined. Rather than
5653 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
5654 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
5655
5656 So there's always the question of, can this give a false
5657 negative? eg, imagine that initially, * %reg is defined; and we
5658 read that; but then in the gap between the read and the CAS, a
5659 different thread writes an undefined (and different) value at
5660 the location. Then the CAS in this thread will fail and we will
5661 go back to "again:", but without knowing that the trip back
5662 there was based on an undefined comparison. No matter; at least
5663 the other thread won the race and the location is correctly
5664 marked as undefined. What if it wrote an uninitialised version
5665 of the same value that was there originally, though?
5666
5667 etc etc. Seems like there's a small corner case in which we
5668 might lose the fact that something's defined -- we're out-raced
5669 in between the "old = * reg" and the "atomically {", _and_ the
5670 other thread is writing in an undefined version of what's
5671 already there. Well, that seems pretty unlikely.
5672
5673 ---
5674
5675 If we ever need to reinstate it .. code which generates a
5676 definedness test for "expected == old" was removed at r10432 of
5677 this file.
sewardj1c0ce7a2009-07-01 08:10:49 +00005678 */
5679 if (cas->oldHi == IRTemp_INVALID) {
5680 do_shadow_CAS_single( mce, cas );
5681 } else {
5682 do_shadow_CAS_double( mce, cas );
5683 }
5684}
5685
5686
5687static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
5688{
5689 IRAtom *vdataLo = NULL, *bdataLo = NULL;
5690 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
5691 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00005692 IRAtom *expd_eq_old = NULL;
5693 IROp opCasCmpEQ;
sewardj1c0ce7a2009-07-01 08:10:49 +00005694 Int elemSzB;
5695 IRType elemTy;
5696 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
5697
5698 /* single CAS */
5699 tl_assert(cas->oldHi == IRTemp_INVALID);
5700 tl_assert(cas->expdHi == NULL);
5701 tl_assert(cas->dataHi == NULL);
5702
5703 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
5704 switch (elemTy) {
sewardjafed4c52009-07-12 13:00:17 +00005705 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break;
5706 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
5707 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
5708 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
sewardj1c0ce7a2009-07-01 08:10:49 +00005709 default: tl_assert(0); /* IR defn disallows any other types */
5710 }
5711
5712 /* 1. fetch data# (the proposed new value) */
5713 tl_assert(isOriginalAtom(mce, cas->dataLo));
5714 vdataLo
5715 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
5716 tl_assert(isShadowAtom(mce, vdataLo));
5717 if (otrak) {
5718 bdataLo
5719 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
5720 tl_assert(isShadowAtom(mce, bdataLo));
5721 }
5722
5723 /* 2. fetch expected# (what we expect to see at the address) */
5724 tl_assert(isOriginalAtom(mce, cas->expdLo));
5725 vexpdLo
5726 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5727 tl_assert(isShadowAtom(mce, vexpdLo));
5728 if (otrak) {
5729 bexpdLo
5730 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5731 tl_assert(isShadowAtom(mce, bexpdLo));
5732 }
5733
5734 /* 3. check definedness of address */
5735 /* 4. fetch old# from shadow memory; this also checks
5736 addressibility of the address */
5737 voldLo
5738 = assignNew(
5739 'V', mce, elemTy,
5740 expr2vbits_Load(
5741 mce,
sewardjcafe5052013-01-17 14:24:35 +00005742 cas->end, elemTy, cas->addr, 0/*Addr bias*/,
5743 NULL/*always happens*/
sewardj1c0ce7a2009-07-01 08:10:49 +00005744 ));
sewardjafed4c52009-07-12 13:00:17 +00005745 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005746 if (otrak) {
5747 boldLo
5748 = assignNew('B', mce, Ity_I32,
5749 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00005750 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005751 }
5752
5753 /* 5. the CAS itself */
5754 stmt( 'C', mce, IRStmt_CAS(cas) );
5755
sewardjafed4c52009-07-12 13:00:17 +00005756 /* 6. compute "expected == old" */
5757 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005758 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5759 tree, but it's not copied from the input block. */
5760 expd_eq_old
5761 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00005762 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005763
5764 /* 7. if "expected == old"
5765 store data# to shadow memory */
5766 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
5767 NULL/*data*/, vdataLo/*vdata*/,
5768 expd_eq_old/*guard for store*/ );
5769 if (otrak) {
5770 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
5771 bdataLo/*bdata*/,
5772 expd_eq_old/*guard for store*/ );
5773 }
5774}
5775
5776
5777static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
5778{
5779 IRAtom *vdataHi = NULL, *bdataHi = NULL;
5780 IRAtom *vdataLo = NULL, *bdataLo = NULL;
5781 IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
5782 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
5783 IRAtom *voldHi = NULL, *boldHi = NULL;
5784 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00005785 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
5786 IRAtom *expd_eq_old = NULL, *zero = NULL;
5787 IROp opCasCmpEQ, opOr, opXor;
sewardj1c0ce7a2009-07-01 08:10:49 +00005788 Int elemSzB, memOffsLo, memOffsHi;
5789 IRType elemTy;
5790 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
5791
5792 /* double CAS */
5793 tl_assert(cas->oldHi != IRTemp_INVALID);
5794 tl_assert(cas->expdHi != NULL);
5795 tl_assert(cas->dataHi != NULL);
5796
5797 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
5798 switch (elemTy) {
5799 case Ity_I8:
sewardjafed4c52009-07-12 13:00:17 +00005800 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
sewardj1c0ce7a2009-07-01 08:10:49 +00005801 elemSzB = 1; zero = mkU8(0);
5802 break;
5803 case Ity_I16:
sewardjafed4c52009-07-12 13:00:17 +00005804 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
sewardj1c0ce7a2009-07-01 08:10:49 +00005805 elemSzB = 2; zero = mkU16(0);
5806 break;
5807 case Ity_I32:
sewardjafed4c52009-07-12 13:00:17 +00005808 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
sewardj1c0ce7a2009-07-01 08:10:49 +00005809 elemSzB = 4; zero = mkU32(0);
5810 break;
5811 case Ity_I64:
sewardjafed4c52009-07-12 13:00:17 +00005812 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
sewardj1c0ce7a2009-07-01 08:10:49 +00005813 elemSzB = 8; zero = mkU64(0);
5814 break;
5815 default:
5816 tl_assert(0); /* IR defn disallows any other types */
5817 }
5818
5819 /* 1. fetch data# (the proposed new value) */
5820 tl_assert(isOriginalAtom(mce, cas->dataHi));
5821 tl_assert(isOriginalAtom(mce, cas->dataLo));
5822 vdataHi
5823 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
5824 vdataLo
5825 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
5826 tl_assert(isShadowAtom(mce, vdataHi));
5827 tl_assert(isShadowAtom(mce, vdataLo));
5828 if (otrak) {
5829 bdataHi
5830 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
5831 bdataLo
5832 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
5833 tl_assert(isShadowAtom(mce, bdataHi));
5834 tl_assert(isShadowAtom(mce, bdataLo));
5835 }
5836
5837 /* 2. fetch expected# (what we expect to see at the address) */
5838 tl_assert(isOriginalAtom(mce, cas->expdHi));
5839 tl_assert(isOriginalAtom(mce, cas->expdLo));
5840 vexpdHi
5841 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
5842 vexpdLo
5843 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5844 tl_assert(isShadowAtom(mce, vexpdHi));
5845 tl_assert(isShadowAtom(mce, vexpdLo));
5846 if (otrak) {
5847 bexpdHi
5848 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
5849 bexpdLo
5850 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5851 tl_assert(isShadowAtom(mce, bexpdHi));
5852 tl_assert(isShadowAtom(mce, bexpdLo));
5853 }
5854
5855 /* 3. check definedness of address */
5856 /* 4. fetch old# from shadow memory; this also checks
5857 addressibility of the address */
5858 if (cas->end == Iend_LE) {
5859 memOffsLo = 0;
5860 memOffsHi = elemSzB;
5861 } else {
5862 tl_assert(cas->end == Iend_BE);
5863 memOffsLo = elemSzB;
5864 memOffsHi = 0;
5865 }
5866 voldHi
5867 = assignNew(
5868 'V', mce, elemTy,
5869 expr2vbits_Load(
5870 mce,
sewardjcafe5052013-01-17 14:24:35 +00005871 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/,
5872 NULL/*always happens*/
sewardj1c0ce7a2009-07-01 08:10:49 +00005873 ));
5874 voldLo
5875 = assignNew(
5876 'V', mce, elemTy,
5877 expr2vbits_Load(
5878 mce,
sewardjcafe5052013-01-17 14:24:35 +00005879 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/,
5880 NULL/*always happens*/
sewardj1c0ce7a2009-07-01 08:10:49 +00005881 ));
sewardjafed4c52009-07-12 13:00:17 +00005882 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
5883 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005884 if (otrak) {
5885 boldHi
5886 = assignNew('B', mce, Ity_I32,
5887 gen_load_b(mce, elemSzB, cas->addr,
5888 memOffsHi/*addr bias*/));
5889 boldLo
5890 = assignNew('B', mce, Ity_I32,
5891 gen_load_b(mce, elemSzB, cas->addr,
5892 memOffsLo/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00005893 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
5894 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005895 }
5896
5897 /* 5. the CAS itself */
5898 stmt( 'C', mce, IRStmt_CAS(cas) );
5899
sewardjafed4c52009-07-12 13:00:17 +00005900 /* 6. compute "expected == old" */
5901 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005902 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5903 tree, but it's not copied from the input block. */
5904 /*
5905 xHi = oldHi ^ expdHi;
5906 xLo = oldLo ^ expdLo;
5907 xHL = xHi | xLo;
5908 expd_eq_old = xHL == 0;
5909 */
sewardj1c0ce7a2009-07-01 08:10:49 +00005910 xHi = assignNew('C', mce, elemTy,
5911 binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005912 xLo = assignNew('C', mce, elemTy,
5913 binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005914 xHL = assignNew('C', mce, elemTy,
5915 binop(opOr, xHi, xLo));
sewardj1c0ce7a2009-07-01 08:10:49 +00005916 expd_eq_old
5917 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00005918 binop(opCasCmpEQ, xHL, zero));
sewardj1c0ce7a2009-07-01 08:10:49 +00005919
5920 /* 7. if "expected == old"
5921 store data# to shadow memory */
5922 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
5923 NULL/*data*/, vdataHi/*vdata*/,
5924 expd_eq_old/*guard for store*/ );
5925 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
5926 NULL/*data*/, vdataLo/*vdata*/,
5927 expd_eq_old/*guard for store*/ );
5928 if (otrak) {
5929 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
5930 bdataHi/*bdata*/,
5931 expd_eq_old/*guard for store*/ );
5932 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
5933 bdataLo/*bdata*/,
5934 expd_eq_old/*guard for store*/ );
5935 }
5936}
5937
5938
sewardjdb5907d2009-11-26 17:20:21 +00005939/* ------ Dealing with LL/SC (not difficult) ------ */
5940
5941static void do_shadow_LLSC ( MCEnv* mce,
5942 IREndness stEnd,
5943 IRTemp stResult,
5944 IRExpr* stAddr,
5945 IRExpr* stStoredata )
5946{
5947 /* In short: treat a load-linked like a normal load followed by an
5948 assignment of the loaded (shadow) data to the result temporary.
5949 Treat a store-conditional like a normal store, and mark the
5950 result temporary as defined. */
5951 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult);
5952 IRTemp resTmp = findShadowTmpV(mce, stResult);
5953
5954 tl_assert(isIRAtom(stAddr));
5955 if (stStoredata)
5956 tl_assert(isIRAtom(stStoredata));
5957
5958 if (stStoredata == NULL) {
5959 /* Load Linked */
5960 /* Just treat this as a normal load, followed by an assignment of
5961 the value to .result. */
5962 /* Stay sane */
5963 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
5964 || resTy == Ity_I16 || resTy == Ity_I8);
5965 assign( 'V', mce, resTmp,
5966 expr2vbits_Load(
sewardjcafe5052013-01-17 14:24:35 +00005967 mce, stEnd, resTy, stAddr, 0/*addr bias*/,
5968 NULL/*always happens*/) );
sewardjdb5907d2009-11-26 17:20:21 +00005969 } else {
5970 /* Store Conditional */
5971 /* Stay sane */
5972 IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
5973 stStoredata);
5974 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
5975 || dataTy == Ity_I16 || dataTy == Ity_I8);
5976 do_shadow_Store( mce, stEnd,
5977 stAddr, 0/* addr bias */,
5978 stStoredata,
5979 NULL /* shadow data */,
5980 NULL/*guard*/ );
5981 /* This is a store conditional, so it writes to .result a value
5982 indicating whether or not the store succeeded. Just claim
5983 this value is always defined. In the PowerPC interpretation
5984 of store-conditional, definedness of the success indication
5985 depends on whether the address of the store matches the
5986 reservation address. But we can't tell that here (and
5987 anyway, we're not being PowerPC-specific). At least we are
5988 guaranteed that the definedness of the store address, and its
5989 addressibility, will be checked as per normal. So it seems
5990 pretty safe to just say that the success indication is always
5991 defined.
5992
5993 In schemeS, for origin tracking, we must correspondingly set
5994 a no-origin value for the origin shadow of .result.
5995 */
5996 tl_assert(resTy == Ity_I1);
5997 assign( 'V', mce, resTmp, definedOfType(resTy) );
5998 }
5999}
6000
6001
sewardjcafe5052013-01-17 14:24:35 +00006002/* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
6003
6004static void do_shadow_StoreG ( MCEnv* mce, IRStoreG* sg )
6005{
sewardjb9e6d242013-05-11 13:42:08 +00006006 complainIfUndefined(mce, sg->guard, NULL);
6007 /* do_shadow_Store will generate code to check the definedness and
6008 validity of sg->addr, in the case where sg->guard evaluates to
6009 True at run-time. */
sewardjcafe5052013-01-17 14:24:35 +00006010 do_shadow_Store( mce, sg->end,
6011 sg->addr, 0/* addr bias */,
6012 sg->data,
6013 NULL /* shadow data */,
6014 sg->guard );
6015}
6016
6017static void do_shadow_LoadG ( MCEnv* mce, IRLoadG* lg )
6018{
sewardjb9e6d242013-05-11 13:42:08 +00006019 complainIfUndefined(mce, lg->guard, NULL);
6020 /* expr2vbits_Load_guarded_General will generate code to check the
6021 definedness and validity of lg->addr, in the case where
6022 lg->guard evaluates to True at run-time. */
sewardjcafe5052013-01-17 14:24:35 +00006023
6024 /* Look at the LoadG's built-in conversion operation, to determine
6025 the source (actual loaded data) type, and the equivalent IROp.
6026 NOTE that implicitly we are taking a widening operation to be
6027 applied to original atoms and producing one that applies to V
6028 bits. Since signed and unsigned widening are self-shadowing,
6029 this is a straight copy of the op (modulo swapping from the
6030 IRLoadGOp form to the IROp form). Note also therefore that this
6031 implicitly duplicates the logic to do with said widening ops in
6032 expr2vbits_Unop. See comment at the start of expr2vbits_Unop. */
6033 IROp vwiden = Iop_INVALID;
6034 IRType loadedTy = Ity_INVALID;
6035 switch (lg->cvt) {
sewardjbe9d2352015-01-27 23:10:19 +00006036 case ILGop_Ident64: loadedTy = Ity_I64; vwiden = Iop_INVALID; break;
sewardjcafe5052013-01-17 14:24:35 +00006037 case ILGop_Ident32: loadedTy = Ity_I32; vwiden = Iop_INVALID; break;
6038 case ILGop_16Uto32: loadedTy = Ity_I16; vwiden = Iop_16Uto32; break;
6039 case ILGop_16Sto32: loadedTy = Ity_I16; vwiden = Iop_16Sto32; break;
6040 case ILGop_8Uto32: loadedTy = Ity_I8; vwiden = Iop_8Uto32; break;
6041 case ILGop_8Sto32: loadedTy = Ity_I8; vwiden = Iop_8Sto32; break;
6042 default: VG_(tool_panic)("do_shadow_LoadG");
6043 }
6044
6045 IRAtom* vbits_alt
6046 = expr2vbits( mce, lg->alt );
6047 IRAtom* vbits_final
6048 = expr2vbits_Load_guarded_General(mce, lg->end, loadedTy,
6049 lg->addr, 0/*addr bias*/,
6050 lg->guard, vwiden, vbits_alt );
6051 /* And finally, bind the V bits to the destination temporary. */
6052 assign( 'V', mce, findShadowTmpV(mce, lg->dst), vbits_final );
6053}
6054
6055
sewardj95448072004-11-22 20:19:51 +00006056/*------------------------------------------------------------*/
6057/*--- Memcheck main ---*/
6058/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00006059
sewardj7cf4e6b2008-05-01 20:24:26 +00006060static void schemeS ( MCEnv* mce, IRStmt* st );
6061
sewardj95448072004-11-22 20:19:51 +00006062static Bool isBogusAtom ( IRAtom* at )
njn25e49d8e72002-09-23 09:36:25 +00006063{
sewardj95448072004-11-22 20:19:51 +00006064 ULong n = 0;
6065 IRConst* con;
sewardj710d6c22005-03-20 18:55:15 +00006066 tl_assert(isIRAtom(at));
sewardj0b9d74a2006-12-24 02:24:11 +00006067 if (at->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +00006068 return False;
6069 tl_assert(at->tag == Iex_Const);
6070 con = at->Iex.Const.con;
6071 switch (con->tag) {
sewardjd5204dc2004-12-31 01:16:11 +00006072 case Ico_U1: return False;
6073 case Ico_U8: n = (ULong)con->Ico.U8; break;
6074 case Ico_U16: n = (ULong)con->Ico.U16; break;
6075 case Ico_U32: n = (ULong)con->Ico.U32; break;
6076 case Ico_U64: n = (ULong)con->Ico.U64; break;
sewardjf837aa72014-11-20 10:15:17 +00006077 case Ico_F32: return False;
sewardjd5204dc2004-12-31 01:16:11 +00006078 case Ico_F64: return False;
sewardjb5b87402011-03-07 16:05:35 +00006079 case Ico_F32i: return False;
sewardjd5204dc2004-12-31 01:16:11 +00006080 case Ico_F64i: return False;
6081 case Ico_V128: return False;
sewardj1eb272f2014-01-26 18:36:52 +00006082 case Ico_V256: return False;
sewardj95448072004-11-22 20:19:51 +00006083 default: ppIRExpr(at); tl_assert(0);
6084 }
6085 /* VG_(printf)("%llx\n", n); */
sewardj96a922e2005-04-23 23:26:29 +00006086 return (/*32*/ n == 0xFEFEFEFFULL
6087 /*32*/ || n == 0x80808080ULL
sewardj17b47432008-12-17 01:12:58 +00006088 /*32*/ || n == 0x7F7F7F7FULL
sewardja150fe92013-12-11 16:49:46 +00006089 /*32*/ || n == 0x7EFEFEFFULL
6090 /*32*/ || n == 0x81010100ULL
tomd9774d72005-06-27 08:11:01 +00006091 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
sewardj96a922e2005-04-23 23:26:29 +00006092 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
tomd9774d72005-06-27 08:11:01 +00006093 /*64*/ || n == 0x0000000000008080ULL
sewardj96a922e2005-04-23 23:26:29 +00006094 /*64*/ || n == 0x8080808080808080ULL
sewardj17b47432008-12-17 01:12:58 +00006095 /*64*/ || n == 0x0101010101010101ULL
sewardj96a922e2005-04-23 23:26:29 +00006096 );
sewardj95448072004-11-22 20:19:51 +00006097}
njn25e49d8e72002-09-23 09:36:25 +00006098
sewardj95448072004-11-22 20:19:51 +00006099static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
6100{
sewardjd5204dc2004-12-31 01:16:11 +00006101 Int i;
6102 IRExpr* e;
6103 IRDirty* d;
sewardj1c0ce7a2009-07-01 08:10:49 +00006104 IRCAS* cas;
sewardj95448072004-11-22 20:19:51 +00006105 switch (st->tag) {
sewardj0b9d74a2006-12-24 02:24:11 +00006106 case Ist_WrTmp:
6107 e = st->Ist.WrTmp.data;
sewardj95448072004-11-22 20:19:51 +00006108 switch (e->tag) {
6109 case Iex_Get:
sewardj0b9d74a2006-12-24 02:24:11 +00006110 case Iex_RdTmp:
sewardj95448072004-11-22 20:19:51 +00006111 return False;
sewardjd5204dc2004-12-31 01:16:11 +00006112 case Iex_Const:
6113 return isBogusAtom(e);
sewardj95448072004-11-22 20:19:51 +00006114 case Iex_Unop:
sewardja150fe92013-12-11 16:49:46 +00006115 return isBogusAtom(e->Iex.Unop.arg)
6116 || e->Iex.Unop.op == Iop_GetMSBs8x16;
sewardjd5204dc2004-12-31 01:16:11 +00006117 case Iex_GetI:
6118 return isBogusAtom(e->Iex.GetI.ix);
sewardj95448072004-11-22 20:19:51 +00006119 case Iex_Binop:
6120 return isBogusAtom(e->Iex.Binop.arg1)
6121 || isBogusAtom(e->Iex.Binop.arg2);
sewardjed69fdb2006-02-03 16:12:27 +00006122 case Iex_Triop:
florian26441742012-06-02 20:30:41 +00006123 return isBogusAtom(e->Iex.Triop.details->arg1)
6124 || isBogusAtom(e->Iex.Triop.details->arg2)
6125 || isBogusAtom(e->Iex.Triop.details->arg3);
sewardje91cea72006-02-08 19:32:02 +00006126 case Iex_Qop:
floriane2ab2972012-06-01 20:43:03 +00006127 return isBogusAtom(e->Iex.Qop.details->arg1)
6128 || isBogusAtom(e->Iex.Qop.details->arg2)
6129 || isBogusAtom(e->Iex.Qop.details->arg3)
6130 || isBogusAtom(e->Iex.Qop.details->arg4);
florian5686b2d2013-01-29 03:57:40 +00006131 case Iex_ITE:
6132 return isBogusAtom(e->Iex.ITE.cond)
6133 || isBogusAtom(e->Iex.ITE.iftrue)
6134 || isBogusAtom(e->Iex.ITE.iffalse);
sewardj2e595852005-06-30 23:33:37 +00006135 case Iex_Load:
6136 return isBogusAtom(e->Iex.Load.addr);
sewardj95448072004-11-22 20:19:51 +00006137 case Iex_CCall:
6138 for (i = 0; e->Iex.CCall.args[i]; i++)
6139 if (isBogusAtom(e->Iex.CCall.args[i]))
6140 return True;
6141 return False;
6142 default:
6143 goto unhandled;
6144 }
sewardjd5204dc2004-12-31 01:16:11 +00006145 case Ist_Dirty:
6146 d = st->Ist.Dirty.details;
sewardj21a5f8c2013-08-08 10:41:46 +00006147 for (i = 0; d->args[i]; i++) {
6148 IRAtom* atom = d->args[i];
floriana5c3ecb2013-08-15 20:55:42 +00006149 if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(atom))) {
sewardj21a5f8c2013-08-08 10:41:46 +00006150 if (isBogusAtom(atom))
6151 return True;
6152 }
6153 }
florian6c0aa2c2013-01-21 01:27:22 +00006154 if (isBogusAtom(d->guard))
sewardjd5204dc2004-12-31 01:16:11 +00006155 return True;
6156 if (d->mAddr && isBogusAtom(d->mAddr))
6157 return True;
6158 return False;
sewardj95448072004-11-22 20:19:51 +00006159 case Ist_Put:
6160 return isBogusAtom(st->Ist.Put.data);
sewardjd5204dc2004-12-31 01:16:11 +00006161 case Ist_PutI:
floriand39b0222012-05-31 15:48:13 +00006162 return isBogusAtom(st->Ist.PutI.details->ix)
6163 || isBogusAtom(st->Ist.PutI.details->data);
sewardj2e595852005-06-30 23:33:37 +00006164 case Ist_Store:
6165 return isBogusAtom(st->Ist.Store.addr)
6166 || isBogusAtom(st->Ist.Store.data);
sewardjcafe5052013-01-17 14:24:35 +00006167 case Ist_StoreG: {
6168 IRStoreG* sg = st->Ist.StoreG.details;
6169 return isBogusAtom(sg->addr) || isBogusAtom(sg->data)
6170 || isBogusAtom(sg->guard);
6171 }
6172 case Ist_LoadG: {
6173 IRLoadG* lg = st->Ist.LoadG.details;
6174 return isBogusAtom(lg->addr) || isBogusAtom(lg->alt)
6175 || isBogusAtom(lg->guard);
6176 }
sewardj95448072004-11-22 20:19:51 +00006177 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00006178 return isBogusAtom(st->Ist.Exit.guard);
sewardj826ec492005-05-12 18:05:00 +00006179 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00006180 return isBogusAtom(st->Ist.AbiHint.base)
6181 || isBogusAtom(st->Ist.AbiHint.nia);
sewardj21dc3452005-03-21 00:27:41 +00006182 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00006183 case Ist_IMark:
sewardj72d75132007-11-09 23:06:35 +00006184 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00006185 return False;
sewardj1c0ce7a2009-07-01 08:10:49 +00006186 case Ist_CAS:
6187 cas = st->Ist.CAS.details;
6188 return isBogusAtom(cas->addr)
6189 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
6190 || isBogusAtom(cas->expdLo)
6191 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
6192 || isBogusAtom(cas->dataLo);
sewardjdb5907d2009-11-26 17:20:21 +00006193 case Ist_LLSC:
6194 return isBogusAtom(st->Ist.LLSC.addr)
6195 || (st->Ist.LLSC.storedata
6196 ? isBogusAtom(st->Ist.LLSC.storedata)
6197 : False);
sewardj95448072004-11-22 20:19:51 +00006198 default:
6199 unhandled:
6200 ppIRStmt(st);
6201 VG_(tool_panic)("hasBogusLiterals");
6202 }
6203}
njn25e49d8e72002-09-23 09:36:25 +00006204
njn25e49d8e72002-09-23 09:36:25 +00006205
sewardj0b9d74a2006-12-24 02:24:11 +00006206IRSB* MC_(instrument) ( VgCallbackClosure* closure,
sewardj1c0ce7a2009-07-01 08:10:49 +00006207 IRSB* sb_in,
florian3c0c9472014-09-24 12:06:55 +00006208 const VexGuestLayout* layout,
6209 const VexGuestExtents* vge,
6210 const VexArchInfo* archinfo_host,
sewardjd54babf2005-03-21 00:55:49 +00006211 IRType gWordTy, IRType hWordTy )
sewardj95448072004-11-22 20:19:51 +00006212{
sewardj7cf4e6b2008-05-01 20:24:26 +00006213 Bool verboze = 0||False;
sewardj151b90d2005-07-06 19:42:23 +00006214 Bool bogus;
sewardjd5204dc2004-12-31 01:16:11 +00006215 Int i, j, first_stmt;
sewardj95448072004-11-22 20:19:51 +00006216 IRStmt* st;
sewardjd5204dc2004-12-31 01:16:11 +00006217 MCEnv mce;
sewardj1c0ce7a2009-07-01 08:10:49 +00006218 IRSB* sb_out;
sewardjd54babf2005-03-21 00:55:49 +00006219
6220 if (gWordTy != hWordTy) {
6221 /* We don't currently support this case. */
6222 VG_(tool_panic)("host/guest word size mismatch");
6223 }
njn25e49d8e72002-09-23 09:36:25 +00006224
sewardj6cf40ff2005-04-20 22:31:26 +00006225 /* Check we're not completely nuts */
sewardj7cf4e6b2008-05-01 20:24:26 +00006226 tl_assert(sizeof(UWord) == sizeof(void*));
6227 tl_assert(sizeof(Word) == sizeof(void*));
6228 tl_assert(sizeof(Addr) == sizeof(void*));
6229 tl_assert(sizeof(ULong) == 8);
6230 tl_assert(sizeof(Long) == 8);
sewardj7cf4e6b2008-05-01 20:24:26 +00006231 tl_assert(sizeof(UInt) == 4);
6232 tl_assert(sizeof(Int) == 4);
6233
6234 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
sewardj6cf40ff2005-04-20 22:31:26 +00006235
sewardj0b9d74a2006-12-24 02:24:11 +00006236 /* Set up SB */
sewardj1c0ce7a2009-07-01 08:10:49 +00006237 sb_out = deepCopyIRSBExceptStmts(sb_in);
njn25e49d8e72002-09-23 09:36:25 +00006238
sewardj1c0ce7a2009-07-01 08:10:49 +00006239 /* Set up the running environment. Both .sb and .tmpMap are
6240 modified as we go along. Note that tmps are added to both
6241 .sb->tyenv and .tmpMap together, so the valid index-set for
6242 those two arrays should always be identical. */
6243 VG_(memset)(&mce, 0, sizeof(mce));
6244 mce.sb = sb_out;
sewardj7cf4e6b2008-05-01 20:24:26 +00006245 mce.trace = verboze;
sewardj95448072004-11-22 20:19:51 +00006246 mce.layout = layout;
sewardj95448072004-11-22 20:19:51 +00006247 mce.hWordTy = hWordTy;
sewardjd5204dc2004-12-31 01:16:11 +00006248 mce.bogusLiterals = False;
sewardj1c0ce7a2009-07-01 08:10:49 +00006249
sewardj54eac252012-03-27 10:19:39 +00006250 /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on
6251 Darwin. 10.7 is mostly built with LLVM, which uses these for
6252 bitfield inserts, and we get a lot of false errors if the cheap
6253 interpretation is used, alas. Could solve this much better if
6254 we knew which of such adds came from x86/amd64 LEA instructions,
6255 since these are the only ones really needing the expensive
6256 interpretation, but that would require some way to tag them in
6257 the _toIR.c front ends, which is a lot of faffing around. So
6258 for now just use the slow and blunt-instrument solution. */
6259 mce.useLLVMworkarounds = False;
6260# if defined(VGO_darwin)
6261 mce.useLLVMworkarounds = True;
6262# endif
6263
sewardj1c0ce7a2009-07-01 08:10:49 +00006264 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
6265 sizeof(TempMapEnt));
6266 for (i = 0; i < sb_in->tyenv->types_used; i++) {
6267 TempMapEnt ent;
6268 ent.kind = Orig;
6269 ent.shadowV = IRTemp_INVALID;
6270 ent.shadowB = IRTemp_INVALID;
6271 VG_(addToXA)( mce.tmpMap, &ent );
sewardj7cf4e6b2008-05-01 20:24:26 +00006272 }
sewardj1c0ce7a2009-07-01 08:10:49 +00006273 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
sewardj95448072004-11-22 20:19:51 +00006274
sewardj151b90d2005-07-06 19:42:23 +00006275 /* Make a preliminary inspection of the statements, to see if there
6276 are any dodgy-looking literals. If there are, we generate
6277 extra-detailed (hence extra-expensive) instrumentation in
6278 places. Scan the whole bb even if dodgyness is found earlier,
6279 so that the flatness assertion is applied to all stmts. */
6280
6281 bogus = False;
sewardj95448072004-11-22 20:19:51 +00006282
sewardj1c0ce7a2009-07-01 08:10:49 +00006283 for (i = 0; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00006284
sewardj1c0ce7a2009-07-01 08:10:49 +00006285 st = sb_in->stmts[i];
sewardj21dc3452005-03-21 00:27:41 +00006286 tl_assert(st);
sewardj95448072004-11-22 20:19:51 +00006287 tl_assert(isFlatIRStmt(st));
6288
sewardj151b90d2005-07-06 19:42:23 +00006289 if (!bogus) {
6290 bogus = checkForBogusLiterals(st);
6291 if (0 && bogus) {
sewardj95448072004-11-22 20:19:51 +00006292 VG_(printf)("bogus: ");
6293 ppIRStmt(st);
6294 VG_(printf)("\n");
6295 }
6296 }
sewardjd5204dc2004-12-31 01:16:11 +00006297
sewardj151b90d2005-07-06 19:42:23 +00006298 }
6299
6300 mce.bogusLiterals = bogus;
6301
sewardja0871482006-10-18 12:41:55 +00006302 /* Copy verbatim any IR preamble preceding the first IMark */
sewardj151b90d2005-07-06 19:42:23 +00006303
sewardj1c0ce7a2009-07-01 08:10:49 +00006304 tl_assert(mce.sb == sb_out);
6305 tl_assert(mce.sb != sb_in);
sewardjf1962d32006-10-19 13:22:16 +00006306
sewardja0871482006-10-18 12:41:55 +00006307 i = 0;
sewardj1c0ce7a2009-07-01 08:10:49 +00006308 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
sewardja0871482006-10-18 12:41:55 +00006309
sewardj1c0ce7a2009-07-01 08:10:49 +00006310 st = sb_in->stmts[i];
sewardja0871482006-10-18 12:41:55 +00006311 tl_assert(st);
6312 tl_assert(isFlatIRStmt(st));
6313
sewardj1c0ce7a2009-07-01 08:10:49 +00006314 stmt( 'C', &mce, sb_in->stmts[i] );
sewardja0871482006-10-18 12:41:55 +00006315 i++;
6316 }
6317
sewardjf1962d32006-10-19 13:22:16 +00006318 /* Nasty problem. IR optimisation of the pre-instrumented IR may
6319 cause the IR following the preamble to contain references to IR
6320 temporaries defined in the preamble. Because the preamble isn't
6321 instrumented, these temporaries don't have any shadows.
6322 Nevertheless uses of them following the preamble will cause
6323 memcheck to generate references to their shadows. End effect is
6324 to cause IR sanity check failures, due to references to
6325 non-existent shadows. This is only evident for the complex
6326 preambles used for function wrapping on TOC-afflicted platforms
sewardj6e9de462011-06-28 07:25:29 +00006327 (ppc64-linux).
sewardjf1962d32006-10-19 13:22:16 +00006328
6329 The following loop therefore scans the preamble looking for
6330 assignments to temporaries. For each one found it creates an
sewardjafa617b2008-07-22 09:59:48 +00006331 assignment to the corresponding (V) shadow temp, marking it as
sewardjf1962d32006-10-19 13:22:16 +00006332 'defined'. This is the same resulting IR as if the main
6333 instrumentation loop before had been applied to the statement
6334 'tmp = CONSTANT'.
sewardjafa617b2008-07-22 09:59:48 +00006335
6336 Similarly, if origin tracking is enabled, we must generate an
6337 assignment for the corresponding origin (B) shadow, claiming
6338 no-origin, as appropriate for a defined value.
sewardjf1962d32006-10-19 13:22:16 +00006339 */
6340 for (j = 0; j < i; j++) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006341 if (sb_in->stmts[j]->tag == Ist_WrTmp) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006342 /* findShadowTmpV checks its arg is an original tmp;
sewardjf1962d32006-10-19 13:22:16 +00006343 no need to assert that here. */
sewardj1c0ce7a2009-07-01 08:10:49 +00006344 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
sewardjafa617b2008-07-22 09:59:48 +00006345 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00006346 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
sewardjafa617b2008-07-22 09:59:48 +00006347 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
6348 if (MC_(clo_mc_level) == 3) {
6349 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00006350 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
sewardjafa617b2008-07-22 09:59:48 +00006351 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
6352 }
sewardjf1962d32006-10-19 13:22:16 +00006353 if (0) {
sewardjafa617b2008-07-22 09:59:48 +00006354 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
6355 ppIRType( ty_v );
sewardjf1962d32006-10-19 13:22:16 +00006356 VG_(printf)("\n");
6357 }
6358 }
6359 }
6360
sewardja0871482006-10-18 12:41:55 +00006361 /* Iterate over the remaining stmts to generate instrumentation. */
6362
sewardj1c0ce7a2009-07-01 08:10:49 +00006363 tl_assert(sb_in->stmts_used > 0);
sewardja0871482006-10-18 12:41:55 +00006364 tl_assert(i >= 0);
sewardj1c0ce7a2009-07-01 08:10:49 +00006365 tl_assert(i < sb_in->stmts_used);
6366 tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
sewardja0871482006-10-18 12:41:55 +00006367
sewardj1c0ce7a2009-07-01 08:10:49 +00006368 for (/* use current i*/; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00006369
sewardj1c0ce7a2009-07-01 08:10:49 +00006370 st = sb_in->stmts[i];
6371 first_stmt = sb_out->stmts_used;
sewardj95448072004-11-22 20:19:51 +00006372
6373 if (verboze) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006374 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00006375 ppIRStmt(st);
sewardj7cf4e6b2008-05-01 20:24:26 +00006376 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00006377 }
6378
sewardj1c0ce7a2009-07-01 08:10:49 +00006379 if (MC_(clo_mc_level) == 3) {
6380 /* See comments on case Ist_CAS below. */
6381 if (st->tag != Ist_CAS)
6382 schemeS( &mce, st );
6383 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006384
sewardj29faa502005-03-16 18:20:21 +00006385 /* Generate instrumentation code for each stmt ... */
6386
sewardj95448072004-11-22 20:19:51 +00006387 switch (st->tag) {
6388
sewardj0b9d74a2006-12-24 02:24:11 +00006389 case Ist_WrTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00006390 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
6391 expr2vbits( &mce, st->Ist.WrTmp.data) );
njn25e49d8e72002-09-23 09:36:25 +00006392 break;
6393
sewardj95448072004-11-22 20:19:51 +00006394 case Ist_Put:
6395 do_shadow_PUT( &mce,
6396 st->Ist.Put.offset,
6397 st->Ist.Put.data,
florian434ffae2012-07-19 17:23:42 +00006398 NULL /* shadow atom */, NULL /* guard */ );
njn25e49d8e72002-09-23 09:36:25 +00006399 break;
6400
sewardj95448072004-11-22 20:19:51 +00006401 case Ist_PutI:
floriand39b0222012-05-31 15:48:13 +00006402 do_shadow_PUTI( &mce, st->Ist.PutI.details);
njn25e49d8e72002-09-23 09:36:25 +00006403 break;
6404
sewardj2e595852005-06-30 23:33:37 +00006405 case Ist_Store:
6406 do_shadow_Store( &mce, st->Ist.Store.end,
6407 st->Ist.Store.addr, 0/* addr bias */,
6408 st->Ist.Store.data,
sewardj1c0ce7a2009-07-01 08:10:49 +00006409 NULL /* shadow data */,
6410 NULL/*guard*/ );
njn25e49d8e72002-09-23 09:36:25 +00006411 break;
6412
sewardjcafe5052013-01-17 14:24:35 +00006413 case Ist_StoreG:
6414 do_shadow_StoreG( &mce, st->Ist.StoreG.details );
6415 break;
6416
6417 case Ist_LoadG:
6418 do_shadow_LoadG( &mce, st->Ist.LoadG.details );
6419 break;
6420
sewardj95448072004-11-22 20:19:51 +00006421 case Ist_Exit:
sewardjb9e6d242013-05-11 13:42:08 +00006422 complainIfUndefined( &mce, st->Ist.Exit.guard, NULL );
njn25e49d8e72002-09-23 09:36:25 +00006423 break;
6424
sewardj29faa502005-03-16 18:20:21 +00006425 case Ist_IMark:
sewardj7cf4e6b2008-05-01 20:24:26 +00006426 break;
6427
6428 case Ist_NoOp:
sewardj72d75132007-11-09 23:06:35 +00006429 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00006430 break;
6431
sewardj95448072004-11-22 20:19:51 +00006432 case Ist_Dirty:
6433 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
njn25e49d8e72002-09-23 09:36:25 +00006434 break;
6435
sewardj826ec492005-05-12 18:05:00 +00006436 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00006437 do_AbiHint( &mce, st->Ist.AbiHint.base,
6438 st->Ist.AbiHint.len,
6439 st->Ist.AbiHint.nia );
sewardj826ec492005-05-12 18:05:00 +00006440 break;
6441
sewardj1c0ce7a2009-07-01 08:10:49 +00006442 case Ist_CAS:
6443 do_shadow_CAS( &mce, st->Ist.CAS.details );
6444 /* Note, do_shadow_CAS copies the CAS itself to the output
6445 block, because it needs to add instrumentation both
6446 before and after it. Hence skip the copy below. Also
6447 skip the origin-tracking stuff (call to schemeS) above,
6448 since that's all tangled up with it too; do_shadow_CAS
6449 does it all. */
6450 break;
6451
sewardjdb5907d2009-11-26 17:20:21 +00006452 case Ist_LLSC:
6453 do_shadow_LLSC( &mce,
6454 st->Ist.LLSC.end,
6455 st->Ist.LLSC.result,
6456 st->Ist.LLSC.addr,
6457 st->Ist.LLSC.storedata );
6458 break;
6459
njn25e49d8e72002-09-23 09:36:25 +00006460 default:
sewardj95448072004-11-22 20:19:51 +00006461 VG_(printf)("\n");
6462 ppIRStmt(st);
6463 VG_(printf)("\n");
6464 VG_(tool_panic)("memcheck: unhandled IRStmt");
6465
6466 } /* switch (st->tag) */
6467
sewardj7cf4e6b2008-05-01 20:24:26 +00006468 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006469 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00006470 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00006471 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00006472 VG_(printf)("\n");
6473 }
6474 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00006475 }
sewardj95448072004-11-22 20:19:51 +00006476
sewardj1c0ce7a2009-07-01 08:10:49 +00006477 /* ... and finally copy the stmt itself to the output. Except,
6478 skip the copy of IRCASs; see comments on case Ist_CAS
6479 above. */
6480 if (st->tag != Ist_CAS)
6481 stmt('C', &mce, st);
njn25e49d8e72002-09-23 09:36:25 +00006482 }
njn25e49d8e72002-09-23 09:36:25 +00006483
sewardj95448072004-11-22 20:19:51 +00006484 /* Now we need to complain if the jump target is undefined. */
sewardj1c0ce7a2009-07-01 08:10:49 +00006485 first_stmt = sb_out->stmts_used;
njn25e49d8e72002-09-23 09:36:25 +00006486
sewardj95448072004-11-22 20:19:51 +00006487 if (verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006488 VG_(printf)("sb_in->next = ");
6489 ppIRExpr(sb_in->next);
sewardj95448072004-11-22 20:19:51 +00006490 VG_(printf)("\n\n");
6491 }
njn25e49d8e72002-09-23 09:36:25 +00006492
sewardjb9e6d242013-05-11 13:42:08 +00006493 complainIfUndefined( &mce, sb_in->next, NULL );
njn25e49d8e72002-09-23 09:36:25 +00006494
sewardj7cf4e6b2008-05-01 20:24:26 +00006495 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006496 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00006497 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00006498 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00006499 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00006500 }
sewardj95448072004-11-22 20:19:51 +00006501 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00006502 }
njn25e49d8e72002-09-23 09:36:25 +00006503
sewardj1c0ce7a2009-07-01 08:10:49 +00006504 /* If this fails, there's been some serious snafu with tmp management,
6505 that should be investigated. */
6506 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
6507 VG_(deleteXA)( mce.tmpMap );
6508
6509 tl_assert(mce.sb == sb_out);
6510 return sb_out;
sewardj95448072004-11-22 20:19:51 +00006511}
njn25e49d8e72002-09-23 09:36:25 +00006512
sewardj81651dc2007-08-28 06:05:20 +00006513/*------------------------------------------------------------*/
6514/*--- Post-tree-build final tidying ---*/
6515/*------------------------------------------------------------*/
6516
6517/* This exploits the observation that Memcheck often produces
6518 repeated conditional calls of the form
6519
sewardj7cf4e6b2008-05-01 20:24:26 +00006520 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
sewardj81651dc2007-08-28 06:05:20 +00006521
6522 with the same guard expression G guarding the same helper call.
6523 The second and subsequent calls are redundant. This usually
6524 results from instrumentation of guest code containing multiple
6525 memory references at different constant offsets from the same base
6526 register. After optimisation of the instrumentation, you get a
6527 test for the definedness of the base register for each memory
6528 reference, which is kinda pointless. MC_(final_tidy) therefore
6529 looks for such repeated calls and removes all but the first. */
6530
6531/* A struct for recording which (helper, guard) pairs we have already
6532 seen. */
6533typedef
6534 struct { void* entry; IRExpr* guard; }
6535 Pair;
6536
6537/* Return True if e1 and e2 definitely denote the same value (used to
6538 compare guards). Return False if unknown; False is the safe
6539 answer. Since guest registers and guest memory do not have the
6540 SSA property we must return False if any Gets or Loads appear in
6541 the expression. */
6542
6543static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
6544{
6545 if (e1->tag != e2->tag)
6546 return False;
6547 switch (e1->tag) {
6548 case Iex_Const:
6549 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
6550 case Iex_Binop:
6551 return e1->Iex.Binop.op == e2->Iex.Binop.op
6552 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
6553 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
6554 case Iex_Unop:
6555 return e1->Iex.Unop.op == e2->Iex.Unop.op
6556 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
6557 case Iex_RdTmp:
6558 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
florian5686b2d2013-01-29 03:57:40 +00006559 case Iex_ITE:
6560 return sameIRValue( e1->Iex.ITE.cond, e2->Iex.ITE.cond )
6561 && sameIRValue( e1->Iex.ITE.iftrue, e2->Iex.ITE.iftrue )
6562 && sameIRValue( e1->Iex.ITE.iffalse, e2->Iex.ITE.iffalse );
sewardj81651dc2007-08-28 06:05:20 +00006563 case Iex_Qop:
6564 case Iex_Triop:
6565 case Iex_CCall:
6566 /* be lazy. Could define equality for these, but they never
6567 appear to be used. */
6568 return False;
6569 case Iex_Get:
6570 case Iex_GetI:
6571 case Iex_Load:
6572 /* be conservative - these may not give the same value each
6573 time */
6574 return False;
6575 case Iex_Binder:
6576 /* should never see this */
6577 /* fallthrough */
6578 default:
6579 VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
6580 ppIRExpr(e1);
6581 VG_(tool_panic)("memcheck:sameIRValue");
6582 return False;
6583 }
6584}
6585
6586/* See if 'pairs' already has an entry for (entry, guard). Return
6587 True if so. If not, add an entry. */
6588
6589static
6590Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
6591{
6592 Pair p;
6593 Pair* pp;
6594 Int i, n = VG_(sizeXA)( pairs );
6595 for (i = 0; i < n; i++) {
6596 pp = VG_(indexXA)( pairs, i );
6597 if (pp->entry == entry && sameIRValue(pp->guard, guard))
6598 return True;
6599 }
6600 p.guard = guard;
6601 p.entry = entry;
6602 VG_(addToXA)( pairs, &p );
6603 return False;
6604}
6605
florian11f3cc82012-10-21 02:19:35 +00006606static Bool is_helperc_value_checkN_fail ( const HChar* name )
sewardj81651dc2007-08-28 06:05:20 +00006607{
6608 return
sewardj7cf4e6b2008-05-01 20:24:26 +00006609 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
6610 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
6611 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
6612 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
6613 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
6614 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
6615 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
6616 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
sewardj81651dc2007-08-28 06:05:20 +00006617}
6618
6619IRSB* MC_(final_tidy) ( IRSB* sb_in )
6620{
6621 Int i;
6622 IRStmt* st;
6623 IRDirty* di;
6624 IRExpr* guard;
6625 IRCallee* cee;
6626 Bool alreadyPresent;
sewardj9c606bd2008-09-18 18:12:50 +00006627 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
6628 VG_(free), sizeof(Pair) );
sewardj81651dc2007-08-28 06:05:20 +00006629 /* Scan forwards through the statements. Each time a call to one
6630 of the relevant helpers is seen, check if we have made a
6631 previous call to the same helper using the same guard
6632 expression, and if so, delete the call. */
6633 for (i = 0; i < sb_in->stmts_used; i++) {
6634 st = sb_in->stmts[i];
6635 tl_assert(st);
6636 if (st->tag != Ist_Dirty)
6637 continue;
6638 di = st->Ist.Dirty.details;
6639 guard = di->guard;
florian6c0aa2c2013-01-21 01:27:22 +00006640 tl_assert(guard);
sewardj81651dc2007-08-28 06:05:20 +00006641 if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
6642 cee = di->cee;
6643 if (!is_helperc_value_checkN_fail( cee->name ))
6644 continue;
6645 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
6646 guard 'guard'. Check if we have already seen a call to this
6647 function with the same guard. If so, delete it. If not,
6648 add it to the set of calls we do know about. */
6649 alreadyPresent = check_or_add( pairs, guard, cee->addr );
6650 if (alreadyPresent) {
6651 sb_in->stmts[i] = IRStmt_NoOp();
6652 if (0) VG_(printf)("XX\n");
6653 }
6654 }
6655 VG_(deleteXA)( pairs );
6656 return sb_in;
6657}
6658
6659
sewardj7cf4e6b2008-05-01 20:24:26 +00006660/*------------------------------------------------------------*/
6661/*--- Origin tracking stuff ---*/
6662/*------------------------------------------------------------*/
6663
sewardj1c0ce7a2009-07-01 08:10:49 +00006664/* Almost identical to findShadowTmpV. */
sewardj7cf4e6b2008-05-01 20:24:26 +00006665static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
6666{
sewardj1c0ce7a2009-07-01 08:10:49 +00006667 TempMapEnt* ent;
6668 /* VG_(indexXA) range-checks 'orig', hence no need to check
6669 here. */
6670 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6671 tl_assert(ent->kind == Orig);
6672 if (ent->shadowB == IRTemp_INVALID) {
6673 IRTemp tmpB
6674 = newTemp( mce, Ity_I32, BSh );
6675 /* newTemp may cause mce->tmpMap to resize, hence previous results
6676 from VG_(indexXA) are invalid. */
6677 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6678 tl_assert(ent->kind == Orig);
6679 tl_assert(ent->shadowB == IRTemp_INVALID);
6680 ent->shadowB = tmpB;
sewardj7cf4e6b2008-05-01 20:24:26 +00006681 }
sewardj1c0ce7a2009-07-01 08:10:49 +00006682 return ent->shadowB;
sewardj7cf4e6b2008-05-01 20:24:26 +00006683}
6684
6685static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
6686{
6687 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
6688}
6689
sewardjcafe5052013-01-17 14:24:35 +00006690
6691/* Make a guarded origin load, with no special handling in the
6692 didn't-happen case. A GUARD of NULL is assumed to mean "always
6693 True".
6694
6695 Generate IR to do a shadow origins load from BASEADDR+OFFSET and
6696 return the otag. The loaded size is SZB. If GUARD evaluates to
6697 False at run time then the returned otag is zero.
6698*/
6699static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB,
6700 IRAtom* baseaddr,
6701 Int offset, IRExpr* guard )
sewardj7cf4e6b2008-05-01 20:24:26 +00006702{
6703 void* hFun;
florian6bd9dc12012-11-23 16:17:43 +00006704 const HChar* hName;
sewardj7cf4e6b2008-05-01 20:24:26 +00006705 IRTemp bTmp;
6706 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00006707 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00006708 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
6709 IRAtom* ea = baseaddr;
6710 if (offset != 0) {
6711 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
6712 : mkU64( (Long)(Int)offset );
6713 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
6714 }
sewardj1c0ce7a2009-07-01 08:10:49 +00006715 bTmp = newTemp(mce, mce->hWordTy, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00006716
6717 switch (szB) {
6718 case 1: hFun = (void*)&MC_(helperc_b_load1);
6719 hName = "MC_(helperc_b_load1)";
6720 break;
6721 case 2: hFun = (void*)&MC_(helperc_b_load2);
6722 hName = "MC_(helperc_b_load2)";
6723 break;
6724 case 4: hFun = (void*)&MC_(helperc_b_load4);
6725 hName = "MC_(helperc_b_load4)";
6726 break;
6727 case 8: hFun = (void*)&MC_(helperc_b_load8);
6728 hName = "MC_(helperc_b_load8)";
6729 break;
6730 case 16: hFun = (void*)&MC_(helperc_b_load16);
6731 hName = "MC_(helperc_b_load16)";
6732 break;
sewardj45fa9f42012-05-21 10:18:10 +00006733 case 32: hFun = (void*)&MC_(helperc_b_load32);
6734 hName = "MC_(helperc_b_load32)";
6735 break;
sewardj7cf4e6b2008-05-01 20:24:26 +00006736 default:
6737 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
6738 tl_assert(0);
6739 }
6740 di = unsafeIRDirty_1_N(
6741 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
6742 mkIRExprVec_1( ea )
6743 );
sewardjcafe5052013-01-17 14:24:35 +00006744 if (guard) {
6745 di->guard = guard;
6746 /* Ideally the didn't-happen return value here would be
6747 all-zeroes (unknown-origin), so it'd be harmless if it got
6748 used inadvertantly. We slum it out with the IR-mandated
6749 default value (0b01 repeating, 0x55 etc) as that'll probably
6750 trump all legitimate otags via Max32, and it's pretty
6751 obviously bogus. */
6752 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006753 /* no need to mess with any annotations. This call accesses
6754 neither guest state nor guest memory. */
6755 stmt( 'B', mce, IRStmt_Dirty(di) );
6756 if (mce->hWordTy == Ity_I64) {
6757 /* 64-bit host */
sewardj1c0ce7a2009-07-01 08:10:49 +00006758 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00006759 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
6760 return mkexpr(bTmp32);
6761 } else {
6762 /* 32-bit host */
6763 return mkexpr(bTmp);
6764 }
6765}
sewardj1c0ce7a2009-07-01 08:10:49 +00006766
sewardjcafe5052013-01-17 14:24:35 +00006767
6768/* Generate IR to do a shadow origins load from BASEADDR+OFFSET. The
6769 loaded size is SZB. The load is regarded as unconditional (always
6770 happens).
6771*/
6772static IRAtom* gen_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr,
6773 Int offset )
florian434ffae2012-07-19 17:23:42 +00006774{
sewardjcafe5052013-01-17 14:24:35 +00006775 return gen_guarded_load_b(mce, szB, baseaddr, offset, NULL/*guard*/);
florian434ffae2012-07-19 17:23:42 +00006776}
6777
sewardjcafe5052013-01-17 14:24:35 +00006778
6779/* The most general handler for guarded origin loads. A GUARD of NULL
6780 is assumed to mean "always True".
6781
6782 Generate IR to do a shadow origin load from ADDR+BIAS and return
6783 the B bits. The loaded type is TY. If GUARD evaluates to False at
6784 run time then the returned B bits are simply BALT instead.
6785*/
6786static
6787IRAtom* expr2ori_Load_guarded_General ( MCEnv* mce,
6788 IRType ty,
6789 IRAtom* addr, UInt bias,
6790 IRAtom* guard, IRAtom* balt )
6791{
6792 /* If the guard evaluates to True, this will hold the loaded
6793 origin. If the guard evaluates to False, this will be zero,
6794 meaning "unknown origin", in which case we will have to replace
florian5686b2d2013-01-29 03:57:40 +00006795 it using an ITE below. */
sewardjcafe5052013-01-17 14:24:35 +00006796 IRAtom* iftrue
6797 = assignNew('B', mce, Ity_I32,
6798 gen_guarded_load_b(mce, sizeofIRType(ty),
6799 addr, bias, guard));
6800 /* These are the bits we will return if the load doesn't take
6801 place. */
6802 IRAtom* iffalse
6803 = balt;
florian5686b2d2013-01-29 03:57:40 +00006804 /* Prepare the cond for the ITE. Convert a NULL cond into
sewardjcafe5052013-01-17 14:24:35 +00006805 something that iropt knows how to fold out later. */
6806 IRAtom* cond
sewardjcc961652013-01-26 11:49:15 +00006807 = guard == NULL ? mkU1(1) : guard;
sewardjcafe5052013-01-17 14:24:35 +00006808 /* And assemble the final result. */
florian5686b2d2013-01-29 03:57:40 +00006809 return assignNew('B', mce, Ity_I32, IRExpr_ITE(cond, iftrue, iffalse));
sewardjcafe5052013-01-17 14:24:35 +00006810}
6811
6812
6813/* Generate a shadow origins store. guard :: Ity_I1 controls whether
6814 the store really happens; NULL means it unconditionally does. */
sewardj7cf4e6b2008-05-01 20:24:26 +00006815static void gen_store_b ( MCEnv* mce, Int szB,
sewardj1c0ce7a2009-07-01 08:10:49 +00006816 IRAtom* baseaddr, Int offset, IRAtom* dataB,
6817 IRAtom* guard )
sewardj7cf4e6b2008-05-01 20:24:26 +00006818{
6819 void* hFun;
florian6bd9dc12012-11-23 16:17:43 +00006820 const HChar* hName;
sewardj7cf4e6b2008-05-01 20:24:26 +00006821 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00006822 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00006823 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
6824 IRAtom* ea = baseaddr;
sewardj1c0ce7a2009-07-01 08:10:49 +00006825 if (guard) {
6826 tl_assert(isOriginalAtom(mce, guard));
6827 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
6828 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006829 if (offset != 0) {
6830 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
6831 : mkU64( (Long)(Int)offset );
6832 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
6833 }
6834 if (mce->hWordTy == Ity_I64)
6835 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
6836
6837 switch (szB) {
6838 case 1: hFun = (void*)&MC_(helperc_b_store1);
6839 hName = "MC_(helperc_b_store1)";
6840 break;
6841 case 2: hFun = (void*)&MC_(helperc_b_store2);
6842 hName = "MC_(helperc_b_store2)";
6843 break;
6844 case 4: hFun = (void*)&MC_(helperc_b_store4);
6845 hName = "MC_(helperc_b_store4)";
6846 break;
6847 case 8: hFun = (void*)&MC_(helperc_b_store8);
6848 hName = "MC_(helperc_b_store8)";
6849 break;
6850 case 16: hFun = (void*)&MC_(helperc_b_store16);
6851 hName = "MC_(helperc_b_store16)";
6852 break;
sewardj45fa9f42012-05-21 10:18:10 +00006853 case 32: hFun = (void*)&MC_(helperc_b_store32);
6854 hName = "MC_(helperc_b_store32)";
6855 break;
sewardj7cf4e6b2008-05-01 20:24:26 +00006856 default:
6857 tl_assert(0);
6858 }
6859 di = unsafeIRDirty_0_N( 2/*regparms*/,
6860 hName, VG_(fnptr_to_fnentry)( hFun ),
6861 mkIRExprVec_2( ea, dataB )
6862 );
6863 /* no need to mess with any annotations. This call accesses
6864 neither guest state nor guest memory. */
sewardj1c0ce7a2009-07-01 08:10:49 +00006865 if (guard) di->guard = guard;
sewardj7cf4e6b2008-05-01 20:24:26 +00006866 stmt( 'B', mce, IRStmt_Dirty(di) );
6867}
6868
6869static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006870 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00006871 if (eTy == Ity_I64)
6872 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
6873 if (eTy == Ity_I32)
6874 return e;
6875 tl_assert(0);
6876}
6877
6878static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006879 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00006880 tl_assert(eTy == Ity_I32);
6881 if (dstTy == Ity_I64)
6882 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
6883 tl_assert(0);
6884}
6885
sewardjdb5907d2009-11-26 17:20:21 +00006886
sewardj7cf4e6b2008-05-01 20:24:26 +00006887static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
6888{
6889 tl_assert(MC_(clo_mc_level) == 3);
6890
6891 switch (e->tag) {
6892
6893 case Iex_GetI: {
6894 IRRegArray* descr_b;
6895 IRAtom *t1, *t2, *t3, *t4;
6896 IRRegArray* descr = e->Iex.GetI.descr;
6897 IRType equivIntTy
6898 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
6899 /* If this array is unshadowable for whatever reason, use the
6900 usual approximation. */
6901 if (equivIntTy == Ity_INVALID)
6902 return mkU32(0);
6903 tl_assert(sizeofIRType(equivIntTy) >= 4);
6904 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
6905 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
6906 equivIntTy, descr->nElems );
6907 /* Do a shadow indexed get of the same size, giving t1. Take
6908 the bottom 32 bits of it, giving t2. Compute into t3 the
6909 origin for the index (almost certainly zero, but there's
6910 no harm in being completely general here, since iropt will
6911 remove any useless code), and fold it in, giving a final
6912 value t4. */
6913 t1 = assignNew( 'B', mce, equivIntTy,
6914 IRExpr_GetI( descr_b, e->Iex.GetI.ix,
6915 e->Iex.GetI.bias ));
6916 t2 = narrowTo32( mce, t1 );
6917 t3 = schemeE( mce, e->Iex.GetI.ix );
6918 t4 = gen_maxU32( mce, t2, t3 );
6919 return t4;
6920 }
6921 case Iex_CCall: {
6922 Int i;
6923 IRAtom* here;
6924 IRExpr** args = e->Iex.CCall.args;
6925 IRAtom* curr = mkU32(0);
6926 for (i = 0; args[i]; i++) {
6927 tl_assert(i < 32);
6928 tl_assert(isOriginalAtom(mce, args[i]));
6929 /* Only take notice of this arg if the callee's
6930 mc-exclusion mask does not say it is to be excluded. */
6931 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
6932 /* the arg is to be excluded from definedness checking.
6933 Do nothing. */
6934 if (0) VG_(printf)("excluding %s(%d)\n",
6935 e->Iex.CCall.cee->name, i);
6936 } else {
6937 /* calculate the arg's definedness, and pessimistically
6938 merge it in. */
6939 here = schemeE( mce, args[i] );
6940 curr = gen_maxU32( mce, curr, here );
6941 }
6942 }
6943 return curr;
6944 }
6945 case Iex_Load: {
6946 Int dszB;
6947 dszB = sizeofIRType(e->Iex.Load.ty);
6948 /* assert that the B value for the address is already
6949 available (somewhere) */
6950 tl_assert(isIRAtom(e->Iex.Load.addr));
6951 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
6952 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
6953 }
florian5686b2d2013-01-29 03:57:40 +00006954 case Iex_ITE: {
6955 IRAtom* b1 = schemeE( mce, e->Iex.ITE.cond );
florian5686b2d2013-01-29 03:57:40 +00006956 IRAtom* b3 = schemeE( mce, e->Iex.ITE.iftrue );
sewardj07bfda22013-01-29 21:11:55 +00006957 IRAtom* b2 = schemeE( mce, e->Iex.ITE.iffalse );
sewardj7cf4e6b2008-05-01 20:24:26 +00006958 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
6959 }
6960 case Iex_Qop: {
floriane2ab2972012-06-01 20:43:03 +00006961 IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 );
6962 IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 );
6963 IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 );
6964 IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 );
sewardj7cf4e6b2008-05-01 20:24:26 +00006965 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
6966 gen_maxU32( mce, b3, b4 ) );
6967 }
6968 case Iex_Triop: {
florian26441742012-06-02 20:30:41 +00006969 IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 );
6970 IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 );
6971 IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 );
sewardj7cf4e6b2008-05-01 20:24:26 +00006972 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
6973 }
6974 case Iex_Binop: {
sewardjafed4c52009-07-12 13:00:17 +00006975 switch (e->Iex.Binop.op) {
6976 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
6977 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
6978 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
6979 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
6980 /* Just say these all produce a defined result,
6981 regardless of their arguments. See
6982 COMMENT_ON_CasCmpEQ in this file. */
6983 return mkU32(0);
6984 default: {
6985 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
6986 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
6987 return gen_maxU32( mce, b1, b2 );
6988 }
6989 }
6990 tl_assert(0);
6991 /*NOTREACHED*/
sewardj7cf4e6b2008-05-01 20:24:26 +00006992 }
6993 case Iex_Unop: {
6994 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
6995 return b1;
6996 }
6997 case Iex_Const:
6998 return mkU32(0);
6999 case Iex_RdTmp:
7000 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
7001 case Iex_Get: {
7002 Int b_offset = MC_(get_otrack_shadow_offset)(
7003 e->Iex.Get.offset,
7004 sizeofIRType(e->Iex.Get.ty)
7005 );
7006 tl_assert(b_offset >= -1
7007 && b_offset <= mce->layout->total_sizeB -4);
7008 if (b_offset >= 0) {
7009 /* FIXME: this isn't an atom! */
7010 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
7011 Ity_I32 );
7012 }
7013 return mkU32(0);
7014 }
7015 default:
7016 VG_(printf)("mc_translate.c: schemeE: unhandled: ");
7017 ppIRExpr(e);
7018 VG_(tool_panic)("memcheck:schemeE");
7019 }
7020}
7021
sewardjdb5907d2009-11-26 17:20:21 +00007022
sewardj7cf4e6b2008-05-01 20:24:26 +00007023static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
7024{
7025 // This is a hacked version of do_shadow_Dirty
sewardj2eecb742012-06-01 16:11:41 +00007026 Int i, k, n, toDo, gSz, gOff;
sewardj7cf4e6b2008-05-01 20:24:26 +00007027 IRAtom *here, *curr;
7028 IRTemp dst;
sewardj7cf4e6b2008-05-01 20:24:26 +00007029
7030 /* First check the guard. */
7031 curr = schemeE( mce, d->guard );
7032
7033 /* Now round up all inputs and maxU32 over them. */
7034
florian434ffae2012-07-19 17:23:42 +00007035 /* Inputs: unmasked args
7036 Note: arguments are evaluated REGARDLESS of the guard expression */
sewardj7cf4e6b2008-05-01 20:24:26 +00007037 for (i = 0; d->args[i]; i++) {
sewardj21a5f8c2013-08-08 10:41:46 +00007038 IRAtom* arg = d->args[i];
7039 if ( (d->cee->mcx_mask & (1<<i))
floriana5c3ecb2013-08-15 20:55:42 +00007040 || UNLIKELY(is_IRExpr_VECRET_or_BBPTR(arg)) ) {
sewardj7cf4e6b2008-05-01 20:24:26 +00007041 /* ignore this arg */
7042 } else {
sewardj21a5f8c2013-08-08 10:41:46 +00007043 here = schemeE( mce, arg );
sewardj7cf4e6b2008-05-01 20:24:26 +00007044 curr = gen_maxU32( mce, curr, here );
7045 }
7046 }
7047
7048 /* Inputs: guest state that we read. */
7049 for (i = 0; i < d->nFxState; i++) {
7050 tl_assert(d->fxState[i].fx != Ifx_None);
7051 if (d->fxState[i].fx == Ifx_Write)
7052 continue;
7053
sewardj2eecb742012-06-01 16:11:41 +00007054 /* Enumerate the described state segments */
7055 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
7056 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
7057 gSz = d->fxState[i].size;
sewardj7cf4e6b2008-05-01 20:24:26 +00007058
sewardj2eecb742012-06-01 16:11:41 +00007059 /* Ignore any sections marked as 'always defined'. */
7060 if (isAlwaysDefd(mce, gOff, gSz)) {
7061 if (0)
7062 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
7063 gOff, gSz);
7064 continue;
sewardj7cf4e6b2008-05-01 20:24:26 +00007065 }
sewardj7cf4e6b2008-05-01 20:24:26 +00007066
sewardj2eecb742012-06-01 16:11:41 +00007067 /* This state element is read or modified. So we need to
7068 consider it. If larger than 4 bytes, deal with it in
7069 4-byte chunks. */
7070 while (True) {
7071 Int b_offset;
7072 tl_assert(gSz >= 0);
7073 if (gSz == 0) break;
7074 n = gSz <= 4 ? gSz : 4;
7075 /* update 'curr' with maxU32 of the state slice
7076 gOff .. gOff+n-1 */
7077 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
7078 if (b_offset != -1) {
florian434ffae2012-07-19 17:23:42 +00007079 /* Observe the guard expression. If it is false use 0, i.e.
7080 nothing is known about the origin */
7081 IRAtom *cond, *iffalse, *iftrue;
7082
sewardjcc961652013-01-26 11:49:15 +00007083 cond = assignNew( 'B', mce, Ity_I1, d->guard);
florian434ffae2012-07-19 17:23:42 +00007084 iffalse = mkU32(0);
7085 iftrue = assignNew( 'B', mce, Ity_I32,
7086 IRExpr_Get(b_offset
7087 + 2*mce->layout->total_sizeB,
7088 Ity_I32));
7089 here = assignNew( 'B', mce, Ity_I32,
florian5686b2d2013-01-29 03:57:40 +00007090 IRExpr_ITE(cond, iftrue, iffalse));
sewardj2eecb742012-06-01 16:11:41 +00007091 curr = gen_maxU32( mce, curr, here );
7092 }
7093 gSz -= n;
7094 gOff += n;
7095 }
7096 }
sewardj7cf4e6b2008-05-01 20:24:26 +00007097 }
7098
7099 /* Inputs: memory */
7100
7101 if (d->mFx != Ifx_None) {
7102 /* Because we may do multiple shadow loads/stores from the same
7103 base address, it's best to do a single test of its
7104 definedness right now. Post-instrumentation optimisation
7105 should remove all but this test. */
7106 tl_assert(d->mAddr);
7107 here = schemeE( mce, d->mAddr );
7108 curr = gen_maxU32( mce, curr, here );
7109 }
7110
7111 /* Deal with memory inputs (reads or modifies) */
7112 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00007113 toDo = d->mSize;
7114 /* chew off 32-bit chunks. We don't care about the endianness
7115 since it's all going to be condensed down to a single bit,
7116 but nevertheless choose an endianness which is hopefully
7117 native to the platform. */
7118 while (toDo >= 4) {
florian434ffae2012-07-19 17:23:42 +00007119 here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo,
7120 d->guard );
sewardj7cf4e6b2008-05-01 20:24:26 +00007121 curr = gen_maxU32( mce, curr, here );
7122 toDo -= 4;
7123 }
sewardj8c93fcc2008-10-30 13:08:31 +00007124 /* handle possible 16-bit excess */
7125 while (toDo >= 2) {
florian434ffae2012-07-19 17:23:42 +00007126 here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo,
7127 d->guard );
sewardj8c93fcc2008-10-30 13:08:31 +00007128 curr = gen_maxU32( mce, curr, here );
7129 toDo -= 2;
7130 }
floriancda994b2012-06-08 16:01:19 +00007131 /* chew off the remaining 8-bit chunk, if any */
7132 if (toDo == 1) {
florian434ffae2012-07-19 17:23:42 +00007133 here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo,
7134 d->guard );
floriancda994b2012-06-08 16:01:19 +00007135 curr = gen_maxU32( mce, curr, here );
7136 toDo -= 1;
7137 }
7138 tl_assert(toDo == 0);
sewardj7cf4e6b2008-05-01 20:24:26 +00007139 }
7140
7141 /* Whew! So curr is a 32-bit B-value which should give an origin
7142 of some use if any of the inputs to the helper are undefined.
7143 Now we need to re-distribute the results to all destinations. */
7144
7145 /* Outputs: the destination temporary, if there is one. */
7146 if (d->tmp != IRTemp_INVALID) {
7147 dst = findShadowTmpB(mce, d->tmp);
7148 assign( 'V', mce, dst, curr );
7149 }
7150
7151 /* Outputs: guest state that we write or modify. */
7152 for (i = 0; i < d->nFxState; i++) {
7153 tl_assert(d->fxState[i].fx != Ifx_None);
7154 if (d->fxState[i].fx == Ifx_Read)
7155 continue;
7156
sewardj2eecb742012-06-01 16:11:41 +00007157 /* Enumerate the described state segments */
7158 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
7159 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
7160 gSz = d->fxState[i].size;
sewardj7cf4e6b2008-05-01 20:24:26 +00007161
sewardj2eecb742012-06-01 16:11:41 +00007162 /* Ignore any sections marked as 'always defined'. */
7163 if (isAlwaysDefd(mce, gOff, gSz))
7164 continue;
7165
7166 /* This state element is written or modified. So we need to
7167 consider it. If larger than 4 bytes, deal with it in
7168 4-byte chunks. */
7169 while (True) {
7170 Int b_offset;
7171 tl_assert(gSz >= 0);
7172 if (gSz == 0) break;
7173 n = gSz <= 4 ? gSz : 4;
7174 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
7175 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
7176 if (b_offset != -1) {
florian434ffae2012-07-19 17:23:42 +00007177
florian6c0aa2c2013-01-21 01:27:22 +00007178 /* If the guard expression evaluates to false we simply Put
7179 the value that is already stored in the guest state slot */
7180 IRAtom *cond, *iffalse;
7181
sewardjcc961652013-01-26 11:49:15 +00007182 cond = assignNew('B', mce, Ity_I1,
7183 d->guard);
florian6c0aa2c2013-01-21 01:27:22 +00007184 iffalse = assignNew('B', mce, Ity_I32,
7185 IRExpr_Get(b_offset +
7186 2*mce->layout->total_sizeB,
7187 Ity_I32));
7188 curr = assignNew('V', mce, Ity_I32,
florian5686b2d2013-01-29 03:57:40 +00007189 IRExpr_ITE(cond, curr, iffalse));
florian6c0aa2c2013-01-21 01:27:22 +00007190
sewardj2eecb742012-06-01 16:11:41 +00007191 stmt( 'B', mce, IRStmt_Put(b_offset
florian6c0aa2c2013-01-21 01:27:22 +00007192 + 2*mce->layout->total_sizeB,
sewardj2eecb742012-06-01 16:11:41 +00007193 curr ));
7194 }
7195 gSz -= n;
7196 gOff += n;
sewardj7cf4e6b2008-05-01 20:24:26 +00007197 }
sewardj7cf4e6b2008-05-01 20:24:26 +00007198 }
7199 }
7200
7201 /* Outputs: memory that we write or modify. Same comments about
7202 endianness as above apply. */
7203 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00007204 toDo = d->mSize;
7205 /* chew off 32-bit chunks */
7206 while (toDo >= 4) {
sewardj1c0ce7a2009-07-01 08:10:49 +00007207 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
florian434ffae2012-07-19 17:23:42 +00007208 d->guard );
sewardj7cf4e6b2008-05-01 20:24:26 +00007209 toDo -= 4;
7210 }
sewardj8c93fcc2008-10-30 13:08:31 +00007211 /* handle possible 16-bit excess */
7212 while (toDo >= 2) {
sewardjcafe5052013-01-17 14:24:35 +00007213 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
7214 d->guard );
sewardj8c93fcc2008-10-30 13:08:31 +00007215 toDo -= 2;
7216 }
floriancda994b2012-06-08 16:01:19 +00007217 /* chew off the remaining 8-bit chunk, if any */
7218 if (toDo == 1) {
7219 gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr,
florian434ffae2012-07-19 17:23:42 +00007220 d->guard );
floriancda994b2012-06-08 16:01:19 +00007221 toDo -= 1;
7222 }
7223 tl_assert(toDo == 0);
sewardj7cf4e6b2008-05-01 20:24:26 +00007224 }
sewardj7cf4e6b2008-05-01 20:24:26 +00007225}
7226
sewardjdb5907d2009-11-26 17:20:21 +00007227
sewardjcafe5052013-01-17 14:24:35 +00007228/* Generate IR for origin shadowing for a general guarded store. */
7229static void do_origins_Store_guarded ( MCEnv* mce,
7230 IREndness stEnd,
7231 IRExpr* stAddr,
7232 IRExpr* stData,
7233 IRExpr* guard )
sewardjdb5907d2009-11-26 17:20:21 +00007234{
7235 Int dszB;
7236 IRAtom* dataB;
7237 /* assert that the B value for the address is already available
7238 (somewhere), since the call to schemeE will want to see it.
7239 XXXX how does this actually ensure that?? */
7240 tl_assert(isIRAtom(stAddr));
7241 tl_assert(isIRAtom(stData));
7242 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
7243 dataB = schemeE( mce, stData );
sewardjcafe5052013-01-17 14:24:35 +00007244 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB, guard );
7245}
7246
7247
7248/* Generate IR for origin shadowing for a plain store. */
7249static void do_origins_Store_plain ( MCEnv* mce,
7250 IREndness stEnd,
7251 IRExpr* stAddr,
7252 IRExpr* stData )
7253{
7254 do_origins_Store_guarded ( mce, stEnd, stAddr, stData,
7255 NULL/*guard*/ );
7256}
7257
7258
7259/* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
7260
7261static void do_origins_StoreG ( MCEnv* mce, IRStoreG* sg )
7262{
7263 do_origins_Store_guarded( mce, sg->end, sg->addr,
7264 sg->data, sg->guard );
7265}
7266
7267static void do_origins_LoadG ( MCEnv* mce, IRLoadG* lg )
7268{
7269 IRType loadedTy = Ity_INVALID;
7270 switch (lg->cvt) {
sewardjbe9d2352015-01-27 23:10:19 +00007271 case ILGop_Ident64: loadedTy = Ity_I64; break;
sewardjcafe5052013-01-17 14:24:35 +00007272 case ILGop_Ident32: loadedTy = Ity_I32; break;
7273 case ILGop_16Uto32: loadedTy = Ity_I16; break;
7274 case ILGop_16Sto32: loadedTy = Ity_I16; break;
7275 case ILGop_8Uto32: loadedTy = Ity_I8; break;
7276 case ILGop_8Sto32: loadedTy = Ity_I8; break;
7277 default: VG_(tool_panic)("schemeS.IRLoadG");
7278 }
7279 IRAtom* ori_alt
7280 = schemeE( mce,lg->alt );
7281 IRAtom* ori_final
7282 = expr2ori_Load_guarded_General(mce, loadedTy,
7283 lg->addr, 0/*addr bias*/,
7284 lg->guard, ori_alt );
7285 /* And finally, bind the origin to the destination temporary. */
7286 assign( 'B', mce, findShadowTmpB(mce, lg->dst), ori_final );
sewardjdb5907d2009-11-26 17:20:21 +00007287}
7288
7289
sewardj7cf4e6b2008-05-01 20:24:26 +00007290static void schemeS ( MCEnv* mce, IRStmt* st )
7291{
7292 tl_assert(MC_(clo_mc_level) == 3);
7293
7294 switch (st->tag) {
7295
7296 case Ist_AbiHint:
7297 /* The value-check instrumenter handles this - by arranging
7298 to pass the address of the next instruction to
7299 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
7300 happen for origin tracking w.r.t. AbiHints. So there is
7301 nothing to do here. */
7302 break;
7303
7304 case Ist_PutI: {
floriand39b0222012-05-31 15:48:13 +00007305 IRPutI *puti = st->Ist.PutI.details;
sewardj7cf4e6b2008-05-01 20:24:26 +00007306 IRRegArray* descr_b;
7307 IRAtom *t1, *t2, *t3, *t4;
floriand39b0222012-05-31 15:48:13 +00007308 IRRegArray* descr = puti->descr;
sewardj7cf4e6b2008-05-01 20:24:26 +00007309 IRType equivIntTy
7310 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
7311 /* If this array is unshadowable for whatever reason,
7312 generate no code. */
7313 if (equivIntTy == Ity_INVALID)
7314 break;
7315 tl_assert(sizeofIRType(equivIntTy) >= 4);
7316 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
7317 descr_b
7318 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
7319 equivIntTy, descr->nElems );
7320 /* Compute a value to Put - the conjoinment of the origin for
7321 the data to be Put-ted (obviously) and of the index value
7322 (not so obviously). */
floriand39b0222012-05-31 15:48:13 +00007323 t1 = schemeE( mce, puti->data );
7324 t2 = schemeE( mce, puti->ix );
sewardj7cf4e6b2008-05-01 20:24:26 +00007325 t3 = gen_maxU32( mce, t1, t2 );
7326 t4 = zWidenFrom32( mce, equivIntTy, t3 );
floriand39b0222012-05-31 15:48:13 +00007327 stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix,
7328 puti->bias, t4) ));
sewardj7cf4e6b2008-05-01 20:24:26 +00007329 break;
7330 }
sewardjdb5907d2009-11-26 17:20:21 +00007331
sewardj7cf4e6b2008-05-01 20:24:26 +00007332 case Ist_Dirty:
7333 do_origins_Dirty( mce, st->Ist.Dirty.details );
7334 break;
sewardjdb5907d2009-11-26 17:20:21 +00007335
7336 case Ist_Store:
sewardjcafe5052013-01-17 14:24:35 +00007337 do_origins_Store_plain( mce, st->Ist.Store.end,
7338 st->Ist.Store.addr,
7339 st->Ist.Store.data );
7340 break;
7341
7342 case Ist_StoreG:
7343 do_origins_StoreG( mce, st->Ist.StoreG.details );
7344 break;
7345
7346 case Ist_LoadG:
7347 do_origins_LoadG( mce, st->Ist.LoadG.details );
sewardjdb5907d2009-11-26 17:20:21 +00007348 break;
7349
7350 case Ist_LLSC: {
7351 /* In short: treat a load-linked like a normal load followed
7352 by an assignment of the loaded (shadow) data the result
7353 temporary. Treat a store-conditional like a normal store,
7354 and mark the result temporary as defined. */
7355 if (st->Ist.LLSC.storedata == NULL) {
7356 /* Load Linked */
7357 IRType resTy
7358 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
7359 IRExpr* vanillaLoad
7360 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
7361 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
7362 || resTy == Ity_I16 || resTy == Ity_I8);
7363 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
7364 schemeE(mce, vanillaLoad));
7365 } else {
7366 /* Store conditional */
sewardjcafe5052013-01-17 14:24:35 +00007367 do_origins_Store_plain( mce, st->Ist.LLSC.end,
7368 st->Ist.LLSC.addr,
7369 st->Ist.LLSC.storedata );
sewardjdb5907d2009-11-26 17:20:21 +00007370 /* For the rationale behind this, see comments at the
7371 place where the V-shadow for .result is constructed, in
7372 do_shadow_LLSC. In short, we regard .result as
7373 always-defined. */
7374 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
7375 mkU32(0) );
sewardj1c0ce7a2009-07-01 08:10:49 +00007376 }
sewardj7cf4e6b2008-05-01 20:24:26 +00007377 break;
7378 }
sewardjdb5907d2009-11-26 17:20:21 +00007379
sewardj7cf4e6b2008-05-01 20:24:26 +00007380 case Ist_Put: {
7381 Int b_offset
7382 = MC_(get_otrack_shadow_offset)(
7383 st->Ist.Put.offset,
sewardj1c0ce7a2009-07-01 08:10:49 +00007384 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
sewardj7cf4e6b2008-05-01 20:24:26 +00007385 );
7386 if (b_offset >= 0) {
7387 /* FIXME: this isn't an atom! */
7388 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
7389 schemeE( mce, st->Ist.Put.data )) );
7390 }
7391 break;
7392 }
sewardjdb5907d2009-11-26 17:20:21 +00007393
sewardj7cf4e6b2008-05-01 20:24:26 +00007394 case Ist_WrTmp:
7395 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
7396 schemeE(mce, st->Ist.WrTmp.data) );
7397 break;
sewardjdb5907d2009-11-26 17:20:21 +00007398
sewardj7cf4e6b2008-05-01 20:24:26 +00007399 case Ist_MBE:
7400 case Ist_NoOp:
7401 case Ist_Exit:
7402 case Ist_IMark:
7403 break;
sewardjdb5907d2009-11-26 17:20:21 +00007404
sewardj7cf4e6b2008-05-01 20:24:26 +00007405 default:
7406 VG_(printf)("mc_translate.c: schemeS: unhandled: ");
7407 ppIRStmt(st);
7408 VG_(tool_panic)("memcheck:schemeS");
7409 }
7410}
7411
7412
njn25e49d8e72002-09-23 09:36:25 +00007413/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00007414/*--- end mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00007415/*--------------------------------------------------------------------*/