blob: 18638d3ba6afa79862a4b81aad0c5fe1b3947c6b [file] [log] [blame]
nethercotebb1c9912004-01-04 16:43:23 +00001
njn25e49d8e72002-09-23 09:36:25 +00002/*--------------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003/*--- Instrument IR to perform memory checking operations. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00006
njn25e49d8e72002-09-23 09:36:25 +00007/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of MemCheck, a heavyweight Valgrind tool for
njnc9539842002-10-02 13:26:35 +00009 detecting memory errors.
njn25e49d8e72002-09-23 09:36:25 +000010
sewardj9eecbbb2010-05-03 21:37:12 +000011 Copyright (C) 2000-2010 Julian Seward
njn25e49d8e72002-09-23 09:36:25 +000012 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
njnc7561b92005-06-19 01:24:32 +000032#include "pub_tool_basics.h"
njn1d0825f2006-03-27 11:37:07 +000033#include "pub_tool_hashtable.h" // For mc_include.h
njn132bfcc2005-06-04 19:16:06 +000034#include "pub_tool_libcassert.h"
njn36a20fa2005-06-03 03:08:39 +000035#include "pub_tool_libcprint.h"
njnc7561b92005-06-19 01:24:32 +000036#include "pub_tool_tooliface.h"
sewardj53ee1fc2005-12-23 02:29:58 +000037#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
sewardj81651dc2007-08-28 06:05:20 +000038#include "pub_tool_xarray.h"
39#include "pub_tool_mallocfree.h"
40#include "pub_tool_libcbase.h"
njn25e49d8e72002-09-23 09:36:25 +000041
sewardj7cf4e6b2008-05-01 20:24:26 +000042#include "mc_include.h"
43
44
sewardj992dff92005-10-07 11:08:55 +000045/* This file implements the Memcheck instrumentation, and in
46 particular contains the core of its undefined value detection
47 machinery. For a comprehensive background of the terminology,
48 algorithms and rationale used herein, read:
49
50 Using Valgrind to detect undefined value errors with
51 bit-precision
52
53 Julian Seward and Nicholas Nethercote
54
55 2005 USENIX Annual Technical Conference (General Track),
56 Anaheim, CA, USA, April 10-15, 2005.
njn6665ea22007-05-24 23:14:41 +000057
58 ----
59
60 Here is as good a place as any to record exactly when V bits are and
61 should be checked, why, and what function is responsible.
62
63
64 Memcheck complains when an undefined value is used:
65
66 1. In the condition of a conditional branch. Because it could cause
67 incorrect control flow, and thus cause incorrect externally-visible
68 behaviour. [mc_translate.c:complainIfUndefined]
69
70 2. As an argument to a system call, or as the value that specifies
71 the system call number. Because it could cause an incorrect
72 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
73
74 3. As the address in a load or store. Because it could cause an
75 incorrect value to be used later, which could cause externally-visible
76 behaviour (eg. via incorrect control flow or an incorrect system call
77 argument) [complainIfUndefined]
78
79 4. As the target address of a branch. Because it could cause incorrect
80 control flow. [complainIfUndefined]
81
82 5. As an argument to setenv, unsetenv, or putenv. Because it could put
83 an incorrect value into the external environment.
84 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
85
86 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
87 [complainIfUndefined]
88
89 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
90 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
91 requested it. [in memcheck.h]
92
93
94 Memcheck also complains, but should not, when an undefined value is used:
95
96 8. As the shift value in certain SIMD shift operations (but not in the
97 standard integer shift operations). This inconsistency is due to
98 historical reasons.) [complainIfUndefined]
99
100
101 Memcheck does not complain, but should, when an undefined value is used:
102
103 9. As an input to a client request. Because the client request may
104 affect the visible behaviour -- see bug #144362 for an example
105 involving the malloc replacements in vg_replace_malloc.c and
106 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
107 isn't identified. That bug report also has some info on how to solve
108 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
109
110
111 In practice, 1 and 2 account for the vast majority of cases.
sewardj992dff92005-10-07 11:08:55 +0000112*/
113
sewardj95448072004-11-22 20:19:51 +0000114/*------------------------------------------------------------*/
115/*--- Forward decls ---*/
116/*------------------------------------------------------------*/
117
118struct _MCEnv;
119
sewardj7cf4e6b2008-05-01 20:24:26 +0000120static IRType shadowTypeV ( IRType ty );
sewardj95448072004-11-22 20:19:51 +0000121static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
sewardjafa617b2008-07-22 09:59:48 +0000122static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
sewardj95448072004-11-22 20:19:51 +0000123
124
125/*------------------------------------------------------------*/
126/*--- Memcheck running state, and tmp management. ---*/
127/*------------------------------------------------------------*/
128
sewardj1c0ce7a2009-07-01 08:10:49 +0000129/* Carries info about a particular tmp. The tmp's number is not
130 recorded, as this is implied by (equal to) its index in the tmpMap
131 in MCEnv. The tmp's type is also not recorded, as this is present
132 in MCEnv.sb->tyenv.
133
134 When .kind is Orig, .shadowV and .shadowB may give the identities
135 of the temps currently holding the associated definedness (shadowV)
136 and origin (shadowB) values, or these may be IRTemp_INVALID if code
137 to compute such values has not yet been emitted.
138
139 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
140 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
141 illogical for a shadow tmp itself to be shadowed.
142*/
143typedef
144 enum { Orig=1, VSh=2, BSh=3 }
145 TempKind;
146
147typedef
148 struct {
149 TempKind kind;
150 IRTemp shadowV;
151 IRTemp shadowB;
152 }
153 TempMapEnt;
154
155
sewardj95448072004-11-22 20:19:51 +0000156/* Carries around state during memcheck instrumentation. */
157typedef
158 struct _MCEnv {
sewardj0b9d74a2006-12-24 02:24:11 +0000159 /* MODIFIED: the superblock being constructed. IRStmts are
160 added. */
sewardj1c0ce7a2009-07-01 08:10:49 +0000161 IRSB* sb;
sewardj7cf4e6b2008-05-01 20:24:26 +0000162 Bool trace;
sewardj95448072004-11-22 20:19:51 +0000163
sewardj1c0ce7a2009-07-01 08:10:49 +0000164 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
165 current kind and possibly shadow temps for each temp in the
166 IRSB being constructed. Note that it does not contain the
167 type of each tmp. If you want to know the type, look at the
168 relevant entry in sb->tyenv. It follows that at all times
169 during the instrumentation process, the valid indices for
170 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
171 total number of Orig, V- and B- temps allocated so far.
172
173 The reason for this strange split (types in one place, all
174 other info in another) is that we need the types to be
175 attached to sb so as to make it possible to do
176 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
177 instrumentation process. */
178 XArray* /* of TempMapEnt */ tmpMap;
sewardj95448072004-11-22 20:19:51 +0000179
sewardjd5204dc2004-12-31 01:16:11 +0000180 /* MODIFIED: indicates whether "bogus" literals have so far been
181 found. Starts off False, and may change to True. */
182 Bool bogusLiterals;
183
sewardj95448072004-11-22 20:19:51 +0000184 /* READONLY: the guest layout. This indicates which parts of
185 the guest state should be regarded as 'always defined'. */
186 VexGuestLayout* layout;
sewardj634ba772006-10-15 12:47:37 +0000187
sewardj95448072004-11-22 20:19:51 +0000188 /* READONLY: the host word type. Needed for constructing
189 arguments of type 'HWord' to be passed to helper functions.
190 Ity_I32 or Ity_I64 only. */
191 IRType hWordTy;
192 }
193 MCEnv;
194
195/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
196 demand), as they are encountered. This is for two reasons.
197
198 (1) (less important reason): Many original tmps are unused due to
199 initial IR optimisation, and we do not want to spaces in tables
200 tracking them.
201
202 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
203 table indexed [0 .. n_types-1], which gives the current shadow for
204 each original tmp, or INVALID_IRTEMP if none is so far assigned.
205 It is necessary to support making multiple assignments to a shadow
206 -- specifically, after testing a shadow for definedness, it needs
207 to be made defined. But IR's SSA property disallows this.
208
209 (2) (more important reason): Therefore, when a shadow needs to get
210 a new value, a new temporary is created, the value is assigned to
211 that, and the tmpMap is updated to reflect the new binding.
212
213 A corollary is that if the tmpMap maps a given tmp to
sewardjf1962d32006-10-19 13:22:16 +0000214 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
sewardj95448072004-11-22 20:19:51 +0000215 there's a read-before-write error in the original tmps. The IR
216 sanity checker should catch all such anomalies, however.
njn25e49d8e72002-09-23 09:36:25 +0000217*/
sewardj95448072004-11-22 20:19:51 +0000218
sewardj1c0ce7a2009-07-01 08:10:49 +0000219/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
220 both the table in mce->sb and to our auxiliary mapping. Note that
221 newTemp may cause mce->tmpMap to resize, hence previous results
222 from VG_(indexXA)(mce->tmpMap) are invalidated. */
223static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
224{
225 Word newIx;
226 TempMapEnt ent;
227 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
228 ent.kind = kind;
229 ent.shadowV = IRTemp_INVALID;
230 ent.shadowB = IRTemp_INVALID;
231 newIx = VG_(addToXA)( mce->tmpMap, &ent );
232 tl_assert(newIx == (Word)tmp);
233 return tmp;
234}
235
236
sewardj95448072004-11-22 20:19:51 +0000237/* Find the tmp currently shadowing the given original tmp. If none
238 so far exists, allocate one. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000239static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000240{
sewardj1c0ce7a2009-07-01 08:10:49 +0000241 TempMapEnt* ent;
242 /* VG_(indexXA) range-checks 'orig', hence no need to check
243 here. */
244 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
245 tl_assert(ent->kind == Orig);
246 if (ent->shadowV == IRTemp_INVALID) {
247 IRTemp tmpV
248 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
249 /* newTemp may cause mce->tmpMap to resize, hence previous results
250 from VG_(indexXA) are invalid. */
251 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
252 tl_assert(ent->kind == Orig);
253 tl_assert(ent->shadowV == IRTemp_INVALID);
254 ent->shadowV = tmpV;
njn25e49d8e72002-09-23 09:36:25 +0000255 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000256 return ent->shadowV;
njn25e49d8e72002-09-23 09:36:25 +0000257}
258
sewardj95448072004-11-22 20:19:51 +0000259/* Allocate a new shadow for the given original tmp. This means any
260 previous shadow is abandoned. This is needed because it is
261 necessary to give a new value to a shadow once it has been tested
262 for undefinedness, but unfortunately IR's SSA property disallows
263 this. Instead we must abandon the old shadow, allocate a new one
sewardj1c0ce7a2009-07-01 08:10:49 +0000264 and use that instead.
265
266 This is the same as findShadowTmpV, except we don't bother to see
267 if a shadow temp already existed -- we simply allocate a new one
268 regardless. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000269static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000270{
sewardj1c0ce7a2009-07-01 08:10:49 +0000271 TempMapEnt* ent;
272 /* VG_(indexXA) range-checks 'orig', hence no need to check
273 here. */
274 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
275 tl_assert(ent->kind == Orig);
276 if (1) {
277 IRTemp tmpV
278 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
279 /* newTemp may cause mce->tmpMap to resize, hence previous results
280 from VG_(indexXA) are invalid. */
281 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
282 tl_assert(ent->kind == Orig);
283 ent->shadowV = tmpV;
284 }
sewardj95448072004-11-22 20:19:51 +0000285}
286
287
288/*------------------------------------------------------------*/
289/*--- IRAtoms -- a subset of IRExprs ---*/
290/*------------------------------------------------------------*/
291
292/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
sewardj710d6c22005-03-20 18:55:15 +0000293 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
sewardj95448072004-11-22 20:19:51 +0000294 input, most of this code deals in atoms. Usefully, a value atom
295 always has a V-value which is also an atom: constants are shadowed
296 by constants, and temps are shadowed by the corresponding shadow
297 temporary. */
298
299typedef IRExpr IRAtom;
300
301/* (used for sanity checks only): is this an atom which looks
302 like it's from original code? */
303static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
304{
305 if (a1->tag == Iex_Const)
306 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000307 if (a1->tag == Iex_RdTmp) {
308 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
309 return ent->kind == Orig;
310 }
sewardj95448072004-11-22 20:19:51 +0000311 return False;
312}
313
314/* (used for sanity checks only): is this an atom which looks
315 like it's from shadow code? */
316static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
317{
318 if (a1->tag == Iex_Const)
319 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000320 if (a1->tag == Iex_RdTmp) {
321 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
322 return ent->kind == VSh || ent->kind == BSh;
323 }
sewardj95448072004-11-22 20:19:51 +0000324 return False;
325}
326
327/* (used for sanity checks only): check that both args are atoms and
328 are identically-kinded. */
329static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
330{
sewardj0b9d74a2006-12-24 02:24:11 +0000331 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +0000332 return True;
sewardjbef552a2005-08-30 12:54:36 +0000333 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
sewardj95448072004-11-22 20:19:51 +0000334 return True;
335 return False;
336}
337
338
339/*------------------------------------------------------------*/
340/*--- Type management ---*/
341/*------------------------------------------------------------*/
342
343/* Shadow state is always accessed using integer types. This returns
344 an integer type with the same size (as per sizeofIRType) as the
345 given type. The only valid shadow types are Bit, I8, I16, I32,
sewardj3245c912004-12-10 14:58:26 +0000346 I64, V128. */
sewardj95448072004-11-22 20:19:51 +0000347
sewardj7cf4e6b2008-05-01 20:24:26 +0000348static IRType shadowTypeV ( IRType ty )
sewardj95448072004-11-22 20:19:51 +0000349{
350 switch (ty) {
351 case Ity_I1:
352 case Ity_I8:
353 case Ity_I16:
354 case Ity_I32:
sewardj6cf40ff2005-04-20 22:31:26 +0000355 case Ity_I64:
356 case Ity_I128: return ty;
sewardj3245c912004-12-10 14:58:26 +0000357 case Ity_F32: return Ity_I32;
358 case Ity_F64: return Ity_I64;
359 case Ity_V128: return Ity_V128;
sewardj95448072004-11-22 20:19:51 +0000360 default: ppIRType(ty);
sewardj7cf4e6b2008-05-01 20:24:26 +0000361 VG_(tool_panic)("memcheck:shadowTypeV");
sewardj95448072004-11-22 20:19:51 +0000362 }
363}
364
365/* Produce a 'defined' value of the given shadow type. Should only be
366 supplied shadow types (Bit/I8/I16/I32/UI64). */
367static IRExpr* definedOfType ( IRType ty ) {
368 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +0000369 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
370 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
371 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
372 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
373 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
374 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
sewardjf1962d32006-10-19 13:22:16 +0000375 default: VG_(tool_panic)("memcheck:definedOfType");
njn25e49d8e72002-09-23 09:36:25 +0000376 }
377}
378
379
sewardj95448072004-11-22 20:19:51 +0000380/*------------------------------------------------------------*/
381/*--- Constructing IR fragments ---*/
382/*------------------------------------------------------------*/
383
sewardj95448072004-11-22 20:19:51 +0000384/* add stmt to a bb */
sewardj7cf4e6b2008-05-01 20:24:26 +0000385static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
386 if (mce->trace) {
387 VG_(printf)(" %c: ", cat);
388 ppIRStmt(st);
389 VG_(printf)("\n");
390 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000391 addStmtToIRSB(mce->sb, st);
sewardj7cf4e6b2008-05-01 20:24:26 +0000392}
393
394/* assign value to tmp */
395static inline
396void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
sewardj1c0ce7a2009-07-01 08:10:49 +0000397 stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
sewardj7cf4e6b2008-05-01 20:24:26 +0000398}
sewardj95448072004-11-22 20:19:51 +0000399
400/* build various kinds of expressions */
401#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
402#define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
403#define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
404#define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
405#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
406#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
sewardj170ee212004-12-10 18:57:51 +0000407#define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
sewardj0b9d74a2006-12-24 02:24:11 +0000408#define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
sewardj95448072004-11-22 20:19:51 +0000409
sewardj7cf4e6b2008-05-01 20:24:26 +0000410/* Bind the given expression to a new temporary, and return the
sewardj95448072004-11-22 20:19:51 +0000411 temporary. This effectively converts an arbitrary expression into
sewardj7cf4e6b2008-05-01 20:24:26 +0000412 an atom.
413
414 'ty' is the type of 'e' and hence the type that the new temporary
sewardj1c0ce7a2009-07-01 08:10:49 +0000415 needs to be. But passing it in is redundant, since we can deduce
416 the type merely by inspecting 'e'. So at least use that fact to
417 assert that the two types agree. */
418static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
419{
420 TempKind k;
421 IRTemp t;
422 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +0000423 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
sewardj1c0ce7a2009-07-01 08:10:49 +0000424 switch (cat) {
425 case 'V': k = VSh; break;
426 case 'B': k = BSh; break;
427 case 'C': k = Orig; break;
428 /* happens when we are making up new "orig"
429 expressions, for IRCAS handling */
430 default: tl_assert(0);
431 }
432 t = newTemp(mce, ty, k);
sewardj7cf4e6b2008-05-01 20:24:26 +0000433 assign(cat, mce, t, e);
sewardj95448072004-11-22 20:19:51 +0000434 return mkexpr(t);
435}
436
437
438/*------------------------------------------------------------*/
439/*--- Constructing definedness primitive ops ---*/
440/*------------------------------------------------------------*/
441
442/* --------- Defined-if-either-defined --------- */
443
444static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
445 tl_assert(isShadowAtom(mce,a1));
446 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000447 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000448}
449
450static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
451 tl_assert(isShadowAtom(mce,a1));
452 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000453 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000454}
455
456static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
457 tl_assert(isShadowAtom(mce,a1));
458 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000459 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000460}
461
sewardj7010f6e2004-12-10 13:35:22 +0000462static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
463 tl_assert(isShadowAtom(mce,a1));
464 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000465 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
sewardj7010f6e2004-12-10 13:35:22 +0000466}
467
sewardj20d38f22005-02-07 23:50:18 +0000468static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj170ee212004-12-10 18:57:51 +0000469 tl_assert(isShadowAtom(mce,a1));
470 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000471 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
sewardj170ee212004-12-10 18:57:51 +0000472}
473
sewardj95448072004-11-22 20:19:51 +0000474/* --------- Undefined-if-either-undefined --------- */
475
476static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
477 tl_assert(isShadowAtom(mce,a1));
478 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000479 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000480}
481
482static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
483 tl_assert(isShadowAtom(mce,a1));
484 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000485 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000486}
487
488static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
489 tl_assert(isShadowAtom(mce,a1));
490 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000491 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000492}
493
494static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
495 tl_assert(isShadowAtom(mce,a1));
496 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000497 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000498}
499
sewardj20d38f22005-02-07 23:50:18 +0000500static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj3245c912004-12-10 14:58:26 +0000501 tl_assert(isShadowAtom(mce,a1));
502 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000503 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
sewardj3245c912004-12-10 14:58:26 +0000504}
505
sewardje50a1b12004-12-17 01:24:54 +0000506static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
sewardj95448072004-11-22 20:19:51 +0000507 switch (vty) {
sewardje50a1b12004-12-17 01:24:54 +0000508 case Ity_I8: return mkUifU8(mce, a1, a2);
sewardja1d93302004-12-12 16:45:06 +0000509 case Ity_I16: return mkUifU16(mce, a1, a2);
510 case Ity_I32: return mkUifU32(mce, a1, a2);
511 case Ity_I64: return mkUifU64(mce, a1, a2);
sewardj20d38f22005-02-07 23:50:18 +0000512 case Ity_V128: return mkUifUV128(mce, a1, a2);
sewardj95448072004-11-22 20:19:51 +0000513 default:
514 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
515 VG_(tool_panic)("memcheck:mkUifU");
njn25e49d8e72002-09-23 09:36:25 +0000516 }
517}
518
sewardj95448072004-11-22 20:19:51 +0000519/* --------- The Left-family of operations. --------- */
njn25e49d8e72002-09-23 09:36:25 +0000520
sewardj95448072004-11-22 20:19:51 +0000521static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
522 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000523 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
sewardj95448072004-11-22 20:19:51 +0000524}
525
526static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
527 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000528 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
sewardj95448072004-11-22 20:19:51 +0000529}
530
531static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
532 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000533 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
sewardj95448072004-11-22 20:19:51 +0000534}
535
sewardj681be302005-01-15 20:43:58 +0000536static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
537 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000538 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
sewardj681be302005-01-15 20:43:58 +0000539}
540
sewardj95448072004-11-22 20:19:51 +0000541/* --------- 'Improvement' functions for AND/OR. --------- */
542
543/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
544 defined (0); all other -> undefined (1).
545*/
546static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
njn25e49d8e72002-09-23 09:36:25 +0000547{
sewardj95448072004-11-22 20:19:51 +0000548 tl_assert(isOriginalAtom(mce, data));
549 tl_assert(isShadowAtom(mce, vbits));
550 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000551 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000552}
njn25e49d8e72002-09-23 09:36:25 +0000553
sewardj95448072004-11-22 20:19:51 +0000554static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
555{
556 tl_assert(isOriginalAtom(mce, data));
557 tl_assert(isShadowAtom(mce, vbits));
558 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000559 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000560}
njn25e49d8e72002-09-23 09:36:25 +0000561
sewardj95448072004-11-22 20:19:51 +0000562static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
563{
564 tl_assert(isOriginalAtom(mce, data));
565 tl_assert(isShadowAtom(mce, vbits));
566 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000567 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000568}
njn25e49d8e72002-09-23 09:36:25 +0000569
sewardj7010f6e2004-12-10 13:35:22 +0000570static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
571{
572 tl_assert(isOriginalAtom(mce, data));
573 tl_assert(isShadowAtom(mce, vbits));
574 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000575 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
sewardj7010f6e2004-12-10 13:35:22 +0000576}
577
sewardj20d38f22005-02-07 23:50:18 +0000578static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000579{
580 tl_assert(isOriginalAtom(mce, data));
581 tl_assert(isShadowAtom(mce, vbits));
582 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000583 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
sewardj170ee212004-12-10 18:57:51 +0000584}
585
sewardj95448072004-11-22 20:19:51 +0000586/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
587 defined (0); all other -> undefined (1).
588*/
589static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
590{
591 tl_assert(isOriginalAtom(mce, data));
592 tl_assert(isShadowAtom(mce, vbits));
593 tl_assert(sameKindedAtoms(data, vbits));
594 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000595 'V', mce, Ity_I8,
sewardj95448072004-11-22 20:19:51 +0000596 binop(Iop_Or8,
sewardj7cf4e6b2008-05-01 20:24:26 +0000597 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
sewardj95448072004-11-22 20:19:51 +0000598 vbits) );
599}
njn25e49d8e72002-09-23 09:36:25 +0000600
sewardj95448072004-11-22 20:19:51 +0000601static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
602{
603 tl_assert(isOriginalAtom(mce, data));
604 tl_assert(isShadowAtom(mce, vbits));
605 tl_assert(sameKindedAtoms(data, vbits));
606 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000607 'V', mce, Ity_I16,
sewardj95448072004-11-22 20:19:51 +0000608 binop(Iop_Or16,
sewardj7cf4e6b2008-05-01 20:24:26 +0000609 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
sewardj95448072004-11-22 20:19:51 +0000610 vbits) );
611}
njn25e49d8e72002-09-23 09:36:25 +0000612
sewardj95448072004-11-22 20:19:51 +0000613static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
614{
615 tl_assert(isOriginalAtom(mce, data));
616 tl_assert(isShadowAtom(mce, vbits));
617 tl_assert(sameKindedAtoms(data, vbits));
618 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000619 'V', mce, Ity_I32,
sewardj95448072004-11-22 20:19:51 +0000620 binop(Iop_Or32,
sewardj7cf4e6b2008-05-01 20:24:26 +0000621 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
sewardj95448072004-11-22 20:19:51 +0000622 vbits) );
623}
624
sewardj7010f6e2004-12-10 13:35:22 +0000625static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
626{
627 tl_assert(isOriginalAtom(mce, data));
628 tl_assert(isShadowAtom(mce, vbits));
629 tl_assert(sameKindedAtoms(data, vbits));
630 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000631 'V', mce, Ity_I64,
sewardj7010f6e2004-12-10 13:35:22 +0000632 binop(Iop_Or64,
sewardj7cf4e6b2008-05-01 20:24:26 +0000633 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
sewardj7010f6e2004-12-10 13:35:22 +0000634 vbits) );
635}
636
sewardj20d38f22005-02-07 23:50:18 +0000637static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000638{
639 tl_assert(isOriginalAtom(mce, data));
640 tl_assert(isShadowAtom(mce, vbits));
641 tl_assert(sameKindedAtoms(data, vbits));
642 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000643 'V', mce, Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +0000644 binop(Iop_OrV128,
sewardj7cf4e6b2008-05-01 20:24:26 +0000645 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
sewardj170ee212004-12-10 18:57:51 +0000646 vbits) );
647}
648
sewardj95448072004-11-22 20:19:51 +0000649/* --------- Pessimising casts. --------- */
650
651static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
652{
sewardj4cc684b2007-08-25 23:09:36 +0000653 IRType src_ty;
sewardj7cf97ee2004-11-28 14:25:01 +0000654 IRAtom* tmp1;
sewardj95448072004-11-22 20:19:51 +0000655 /* Note, dst_ty is a shadow type, not an original type. */
656 /* First of all, collapse vbits down to a single bit. */
657 tl_assert(isShadowAtom(mce,vbits));
sewardj1c0ce7a2009-07-01 08:10:49 +0000658 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
sewardj4cc684b2007-08-25 23:09:36 +0000659
660 /* Fast-track some common cases */
661 if (src_ty == Ity_I32 && dst_ty == Ity_I32)
sewardj7cf4e6b2008-05-01 20:24:26 +0000662 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000663
664 if (src_ty == Ity_I64 && dst_ty == Ity_I64)
sewardj7cf4e6b2008-05-01 20:24:26 +0000665 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000666
667 if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
sewardj7cf4e6b2008-05-01 20:24:26 +0000668 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
669 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
sewardj4cc684b2007-08-25 23:09:36 +0000670 }
671
672 /* Else do it the slow way .. */
673 tmp1 = NULL;
674 switch (src_ty) {
sewardj95448072004-11-22 20:19:51 +0000675 case Ity_I1:
676 tmp1 = vbits;
njn25e49d8e72002-09-23 09:36:25 +0000677 break;
sewardj95448072004-11-22 20:19:51 +0000678 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000679 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
sewardj95448072004-11-22 20:19:51 +0000680 break;
681 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000682 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
sewardj95448072004-11-22 20:19:51 +0000683 break;
684 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000685 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
sewardj95448072004-11-22 20:19:51 +0000686 break;
687 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000688 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
sewardj95448072004-11-22 20:19:51 +0000689 break;
sewardj69a13322005-04-23 01:14:51 +0000690 case Ity_I128: {
691 /* Gah. Chop it in half, OR the halves together, and compare
692 that with zero. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000693 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
694 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
695 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
696 tmp1 = assignNew('V', mce, Ity_I1,
sewardj37c31cc2005-04-26 23:49:24 +0000697 unop(Iop_CmpNEZ64, tmp4));
sewardj69a13322005-04-23 01:14:51 +0000698 break;
699 }
sewardj95448072004-11-22 20:19:51 +0000700 default:
sewardj4cc684b2007-08-25 23:09:36 +0000701 ppIRType(src_ty);
sewardj95448072004-11-22 20:19:51 +0000702 VG_(tool_panic)("mkPCastTo(1)");
703 }
704 tl_assert(tmp1);
705 /* Now widen up to the dst type. */
706 switch (dst_ty) {
707 case Ity_I1:
708 return tmp1;
709 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000710 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
sewardj95448072004-11-22 20:19:51 +0000711 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000712 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
sewardj95448072004-11-22 20:19:51 +0000713 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000714 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
sewardj95448072004-11-22 20:19:51 +0000715 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000716 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000717 case Ity_V128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000718 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
719 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000720 return tmp1;
sewardj69a13322005-04-23 01:14:51 +0000721 case Ity_I128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000722 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
723 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
sewardj69a13322005-04-23 01:14:51 +0000724 return tmp1;
sewardj95448072004-11-22 20:19:51 +0000725 default:
726 ppIRType(dst_ty);
727 VG_(tool_panic)("mkPCastTo(2)");
728 }
729}
730
sewardjd5204dc2004-12-31 01:16:11 +0000731/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
732/*
733 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
734 PCasting to Ity_U1. However, sometimes it is necessary to be more
735 accurate. The insight is that the result is defined if two
736 corresponding bits can be found, one from each argument, so that
737 both bits are defined but are different -- that makes EQ say "No"
738 and NE say "Yes". Hence, we compute an improvement term and DifD
739 it onto the "normal" (UifU) result.
740
741 The result is:
742
743 PCastTo<1> (
sewardje6f8af42005-07-06 18:48:59 +0000744 -- naive version
745 PCastTo<sz>( UifU<sz>(vxx, vyy) )
746
sewardjd5204dc2004-12-31 01:16:11 +0000747 `DifD<sz>`
sewardje6f8af42005-07-06 18:48:59 +0000748
749 -- improvement term
750 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
sewardjd5204dc2004-12-31 01:16:11 +0000751 )
sewardje6f8af42005-07-06 18:48:59 +0000752
sewardjd5204dc2004-12-31 01:16:11 +0000753 where
754 vec contains 0 (defined) bits where the corresponding arg bits
sewardje6f8af42005-07-06 18:48:59 +0000755 are defined but different, and 1 bits otherwise.
sewardjd5204dc2004-12-31 01:16:11 +0000756
sewardje6f8af42005-07-06 18:48:59 +0000757 vec = Or<sz>( vxx, // 0 iff bit defined
758 vyy, // 0 iff bit defined
759 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
760 )
761
762 If any bit of vec is 0, the result is defined and so the
763 improvement term should produce 0...0, else it should produce
764 1...1.
765
766 Hence require for the improvement term:
767
768 if vec == 1...1 then 1...1 else 0...0
769 ->
770 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
771
772 This was extensively re-analysed and checked on 6 July 05.
sewardjd5204dc2004-12-31 01:16:11 +0000773*/
774static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
775 IRType ty,
776 IRAtom* vxx, IRAtom* vyy,
777 IRAtom* xx, IRAtom* yy )
778{
sewardje6f8af42005-07-06 18:48:59 +0000779 IRAtom *naive, *vec, *improvement_term;
780 IRAtom *improved, *final_cast, *top;
781 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
sewardjd5204dc2004-12-31 01:16:11 +0000782
783 tl_assert(isShadowAtom(mce,vxx));
784 tl_assert(isShadowAtom(mce,vyy));
785 tl_assert(isOriginalAtom(mce,xx));
786 tl_assert(isOriginalAtom(mce,yy));
787 tl_assert(sameKindedAtoms(vxx,xx));
788 tl_assert(sameKindedAtoms(vyy,yy));
789
790 switch (ty) {
791 case Ity_I32:
sewardje6f8af42005-07-06 18:48:59 +0000792 opOR = Iop_Or32;
sewardjd5204dc2004-12-31 01:16:11 +0000793 opDIFD = Iop_And32;
794 opUIFU = Iop_Or32;
795 opNOT = Iop_Not32;
796 opXOR = Iop_Xor32;
797 opCMP = Iop_CmpEQ32;
798 top = mkU32(0xFFFFFFFF);
799 break;
tomcd986332005-04-26 07:44:48 +0000800 case Ity_I64:
sewardje6f8af42005-07-06 18:48:59 +0000801 opOR = Iop_Or64;
tomcd986332005-04-26 07:44:48 +0000802 opDIFD = Iop_And64;
803 opUIFU = Iop_Or64;
804 opNOT = Iop_Not64;
805 opXOR = Iop_Xor64;
806 opCMP = Iop_CmpEQ64;
sewardj37c31cc2005-04-26 23:49:24 +0000807 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
tomcd986332005-04-26 07:44:48 +0000808 break;
sewardjd5204dc2004-12-31 01:16:11 +0000809 default:
810 VG_(tool_panic)("expensiveCmpEQorNE");
811 }
812
813 naive
sewardj7cf4e6b2008-05-01 20:24:26 +0000814 = mkPCastTo(mce,ty,
815 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
sewardjd5204dc2004-12-31 01:16:11 +0000816
817 vec
818 = assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000819 'V', mce,ty,
sewardje6f8af42005-07-06 18:48:59 +0000820 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +0000821 assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
sewardjd5204dc2004-12-31 01:16:11 +0000822 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000823 'V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +0000824 unop( opNOT,
sewardj7cf4e6b2008-05-01 20:24:26 +0000825 assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
sewardjd5204dc2004-12-31 01:16:11 +0000826
sewardje6f8af42005-07-06 18:48:59 +0000827 improvement_term
sewardj7cf4e6b2008-05-01 20:24:26 +0000828 = mkPCastTo( mce,ty,
829 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
sewardjd5204dc2004-12-31 01:16:11 +0000830
831 improved
sewardj7cf4e6b2008-05-01 20:24:26 +0000832 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
sewardjd5204dc2004-12-31 01:16:11 +0000833
834 final_cast
835 = mkPCastTo( mce, Ity_I1, improved );
836
837 return final_cast;
838}
839
sewardj95448072004-11-22 20:19:51 +0000840
sewardj992dff92005-10-07 11:08:55 +0000841/* --------- Semi-accurate interpretation of CmpORD. --------- */
842
843/* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
844
845 CmpORD32S(x,y) = 1<<3 if x <s y
846 = 1<<2 if x >s y
847 = 1<<1 if x == y
848
849 and similarly the unsigned variant. The default interpretation is:
850
851 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
sewardj1bc82102005-12-23 00:16:24 +0000852 & (7<<1)
sewardj992dff92005-10-07 11:08:55 +0000853
854 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
855 are zero and therefore defined (viz, zero).
sewardja9e62a92005-10-07 12:13:21 +0000856
857 Also deal with a special case better:
858
859 CmpORD32S(x,0)
860
861 Here, bit 3 (LT) of the result is a copy of the top bit of x and
862 will be defined even if the rest of x isn't. In which case we do:
863
864 CmpORD32S#(x,x#,0,{impliedly 0}#)
sewardj1bc82102005-12-23 00:16:24 +0000865 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
866 | (x# >>u 31) << 3 -- LT# = x#[31]
sewardja9e62a92005-10-07 12:13:21 +0000867
sewardj1bc82102005-12-23 00:16:24 +0000868 Analogous handling for CmpORD64{S,U}.
sewardj992dff92005-10-07 11:08:55 +0000869*/
sewardja9e62a92005-10-07 12:13:21 +0000870static Bool isZeroU32 ( IRAtom* e )
871{
872 return
873 toBool( e->tag == Iex_Const
874 && e->Iex.Const.con->tag == Ico_U32
875 && e->Iex.Const.con->Ico.U32 == 0 );
876}
877
sewardj1bc82102005-12-23 00:16:24 +0000878static Bool isZeroU64 ( IRAtom* e )
sewardj992dff92005-10-07 11:08:55 +0000879{
sewardj1bc82102005-12-23 00:16:24 +0000880 return
881 toBool( e->tag == Iex_Const
882 && e->Iex.Const.con->tag == Ico_U64
883 && e->Iex.Const.con->Ico.U64 == 0 );
884}
885
886static IRAtom* doCmpORD ( MCEnv* mce,
887 IROp cmp_op,
888 IRAtom* xxhash, IRAtom* yyhash,
889 IRAtom* xx, IRAtom* yy )
890{
891 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
892 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
893 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
894 IROp opAND = m64 ? Iop_And64 : Iop_And32;
895 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
896 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
897 IRType ty = m64 ? Ity_I64 : Ity_I32;
898 Int width = m64 ? 64 : 32;
899
900 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
901
902 IRAtom* threeLeft1 = NULL;
903 IRAtom* sevenLeft1 = NULL;
904
sewardj992dff92005-10-07 11:08:55 +0000905 tl_assert(isShadowAtom(mce,xxhash));
906 tl_assert(isShadowAtom(mce,yyhash));
907 tl_assert(isOriginalAtom(mce,xx));
908 tl_assert(isOriginalAtom(mce,yy));
909 tl_assert(sameKindedAtoms(xxhash,xx));
910 tl_assert(sameKindedAtoms(yyhash,yy));
sewardj1bc82102005-12-23 00:16:24 +0000911 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
912 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
sewardj992dff92005-10-07 11:08:55 +0000913
sewardja9e62a92005-10-07 12:13:21 +0000914 if (0) {
915 ppIROp(cmp_op); VG_(printf)(" ");
916 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
917 }
918
sewardj1bc82102005-12-23 00:16:24 +0000919 if (syned && isZero(yy)) {
sewardja9e62a92005-10-07 12:13:21 +0000920 /* fancy interpretation */
921 /* if yy is zero, then it must be fully defined (zero#). */
sewardj1bc82102005-12-23 00:16:24 +0000922 tl_assert(isZero(yyhash));
923 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
sewardja9e62a92005-10-07 12:13:21 +0000924 return
925 binop(
sewardj1bc82102005-12-23 00:16:24 +0000926 opOR,
sewardja9e62a92005-10-07 12:13:21 +0000927 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000928 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +0000929 binop(
sewardj1bc82102005-12-23 00:16:24 +0000930 opAND,
931 mkPCastTo(mce,ty, xxhash),
932 threeLeft1
sewardja9e62a92005-10-07 12:13:21 +0000933 )),
934 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000935 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +0000936 binop(
sewardj1bc82102005-12-23 00:16:24 +0000937 opSHL,
sewardja9e62a92005-10-07 12:13:21 +0000938 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000939 'V', mce,ty,
sewardj1bc82102005-12-23 00:16:24 +0000940 binop(opSHR, xxhash, mkU8(width-1))),
sewardja9e62a92005-10-07 12:13:21 +0000941 mkU8(3)
942 ))
943 );
944 } else {
945 /* standard interpretation */
sewardj1bc82102005-12-23 00:16:24 +0000946 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
sewardja9e62a92005-10-07 12:13:21 +0000947 return
948 binop(
sewardj1bc82102005-12-23 00:16:24 +0000949 opAND,
950 mkPCastTo( mce,ty,
951 mkUifU(mce,ty, xxhash,yyhash)),
952 sevenLeft1
sewardja9e62a92005-10-07 12:13:21 +0000953 );
954 }
sewardj992dff92005-10-07 11:08:55 +0000955}
956
957
sewardj95448072004-11-22 20:19:51 +0000958/*------------------------------------------------------------*/
959/*--- Emit a test and complaint if something is undefined. ---*/
960/*------------------------------------------------------------*/
961
sewardj7cf4e6b2008-05-01 20:24:26 +0000962static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
963
964
sewardj95448072004-11-22 20:19:51 +0000965/* Set the annotations on a dirty helper to indicate that the stack
966 pointer and instruction pointers might be read. This is the
967 behaviour of all 'emit-a-complaint' style functions we might
968 call. */
969
970static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
971 di->nFxState = 2;
972 di->fxState[0].fx = Ifx_Read;
973 di->fxState[0].offset = mce->layout->offset_SP;
974 di->fxState[0].size = mce->layout->sizeof_SP;
975 di->fxState[1].fx = Ifx_Read;
976 di->fxState[1].offset = mce->layout->offset_IP;
977 di->fxState[1].size = mce->layout->sizeof_IP;
978}
979
980
981/* Check the supplied **original** atom for undefinedness, and emit a
982 complaint if so. Once that happens, mark it as defined. This is
983 possible because the atom is either a tmp or literal. If it's a
984 tmp, it will be shadowed by a tmp, and so we can set the shadow to
985 be defined. In fact as mentioned above, we will have to allocate a
986 new tmp to carry the new 'defined' shadow value, and update the
987 original->tmp mapping accordingly; we cannot simply assign a new
988 value to an existing shadow tmp as this breaks SSAness -- resulting
989 in the post-instrumentation sanity checker spluttering in disapproval.
990*/
991static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
992{
sewardj7cf97ee2004-11-28 14:25:01 +0000993 IRAtom* vatom;
994 IRType ty;
995 Int sz;
996 IRDirty* di;
997 IRAtom* cond;
sewardj7cf4e6b2008-05-01 20:24:26 +0000998 IRAtom* origin;
999 void* fn;
1000 HChar* nm;
1001 IRExpr** args;
1002 Int nargs;
sewardj7cf97ee2004-11-28 14:25:01 +00001003
njn1d0825f2006-03-27 11:37:07 +00001004 // Don't do V bit tests if we're not reporting undefined value errors.
sewardj7cf4e6b2008-05-01 20:24:26 +00001005 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001006 return;
1007
sewardj95448072004-11-22 20:19:51 +00001008 /* Since the original expression is atomic, there's no duplicated
1009 work generated by making multiple V-expressions for it. So we
1010 don't really care about the possibility that someone else may
1011 also create a V-interpretion for it. */
1012 tl_assert(isOriginalAtom(mce, atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001013 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001014 tl_assert(isShadowAtom(mce, vatom));
1015 tl_assert(sameKindedAtoms(atom, vatom));
1016
sewardj1c0ce7a2009-07-01 08:10:49 +00001017 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001018
1019 /* sz is only used for constructing the error message */
sewardj7cf97ee2004-11-28 14:25:01 +00001020 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001021
sewardj7cf97ee2004-11-28 14:25:01 +00001022 cond = mkPCastTo( mce, Ity_I1, vatom );
sewardj95448072004-11-22 20:19:51 +00001023 /* cond will be 0 if all defined, and 1 if any not defined. */
1024
sewardj7cf4e6b2008-05-01 20:24:26 +00001025 /* Get the origin info for the value we are about to check. At
1026 least, if we are doing origin tracking. If not, use a dummy
1027 zero origin. */
1028 if (MC_(clo_mc_level) == 3) {
1029 origin = schemeE( mce, atom );
1030 if (mce->hWordTy == Ity_I64) {
1031 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1032 }
1033 } else {
1034 origin = NULL;
1035 }
1036
1037 fn = NULL;
1038 nm = NULL;
1039 args = NULL;
1040 nargs = -1;
1041
sewardj95448072004-11-22 20:19:51 +00001042 switch (sz) {
1043 case 0:
sewardj7cf4e6b2008-05-01 20:24:26 +00001044 if (origin) {
1045 fn = &MC_(helperc_value_check0_fail_w_o);
1046 nm = "MC_(helperc_value_check0_fail_w_o)";
1047 args = mkIRExprVec_1(origin);
1048 nargs = 1;
1049 } else {
1050 fn = &MC_(helperc_value_check0_fail_no_o);
1051 nm = "MC_(helperc_value_check0_fail_no_o)";
1052 args = mkIRExprVec_0();
1053 nargs = 0;
1054 }
sewardj95448072004-11-22 20:19:51 +00001055 break;
1056 case 1:
sewardj7cf4e6b2008-05-01 20:24:26 +00001057 if (origin) {
1058 fn = &MC_(helperc_value_check1_fail_w_o);
1059 nm = "MC_(helperc_value_check1_fail_w_o)";
1060 args = mkIRExprVec_1(origin);
1061 nargs = 1;
1062 } else {
1063 fn = &MC_(helperc_value_check1_fail_no_o);
1064 nm = "MC_(helperc_value_check1_fail_no_o)";
1065 args = mkIRExprVec_0();
1066 nargs = 0;
1067 }
sewardj95448072004-11-22 20:19:51 +00001068 break;
1069 case 4:
sewardj7cf4e6b2008-05-01 20:24:26 +00001070 if (origin) {
1071 fn = &MC_(helperc_value_check4_fail_w_o);
1072 nm = "MC_(helperc_value_check4_fail_w_o)";
1073 args = mkIRExprVec_1(origin);
1074 nargs = 1;
1075 } else {
1076 fn = &MC_(helperc_value_check4_fail_no_o);
1077 nm = "MC_(helperc_value_check4_fail_no_o)";
1078 args = mkIRExprVec_0();
1079 nargs = 0;
1080 }
sewardj95448072004-11-22 20:19:51 +00001081 break;
sewardj11bcc4e2005-04-23 22:38:38 +00001082 case 8:
sewardj7cf4e6b2008-05-01 20:24:26 +00001083 if (origin) {
1084 fn = &MC_(helperc_value_check8_fail_w_o);
1085 nm = "MC_(helperc_value_check8_fail_w_o)";
1086 args = mkIRExprVec_1(origin);
1087 nargs = 1;
1088 } else {
1089 fn = &MC_(helperc_value_check8_fail_no_o);
1090 nm = "MC_(helperc_value_check8_fail_no_o)";
1091 args = mkIRExprVec_0();
1092 nargs = 0;
1093 }
sewardj11bcc4e2005-04-23 22:38:38 +00001094 break;
njn4c245e52009-03-15 23:25:38 +00001095 case 2:
1096 case 16:
sewardj7cf4e6b2008-05-01 20:24:26 +00001097 if (origin) {
1098 fn = &MC_(helperc_value_checkN_fail_w_o);
1099 nm = "MC_(helperc_value_checkN_fail_w_o)";
1100 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1101 nargs = 2;
1102 } else {
1103 fn = &MC_(helperc_value_checkN_fail_no_o);
1104 nm = "MC_(helperc_value_checkN_fail_no_o)";
1105 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1106 nargs = 1;
1107 }
sewardj95448072004-11-22 20:19:51 +00001108 break;
njn4c245e52009-03-15 23:25:38 +00001109 default:
1110 VG_(tool_panic)("unexpected szB");
sewardj95448072004-11-22 20:19:51 +00001111 }
sewardj7cf4e6b2008-05-01 20:24:26 +00001112
1113 tl_assert(fn);
1114 tl_assert(nm);
1115 tl_assert(args);
1116 tl_assert(nargs >= 0 && nargs <= 2);
1117 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1118 || (MC_(clo_mc_level) == 2 && origin == NULL) );
1119
1120 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1121 VG_(fnptr_to_fnentry)( fn ), args );
sewardj95448072004-11-22 20:19:51 +00001122 di->guard = cond;
1123 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00001124 stmt( 'V', mce, IRStmt_Dirty(di));
sewardj95448072004-11-22 20:19:51 +00001125
1126 /* Set the shadow tmp to be defined. First, update the
1127 orig->shadow tmp mapping to reflect the fact that this shadow is
1128 getting a new value. */
sewardj710d6c22005-03-20 18:55:15 +00001129 tl_assert(isIRAtom(vatom));
sewardj95448072004-11-22 20:19:51 +00001130 /* sameKindedAtoms ... */
sewardj0b9d74a2006-12-24 02:24:11 +00001131 if (vatom->tag == Iex_RdTmp) {
1132 tl_assert(atom->tag == Iex_RdTmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00001133 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1134 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1135 definedOfType(ty));
sewardj95448072004-11-22 20:19:51 +00001136 }
1137}
1138
1139
1140/*------------------------------------------------------------*/
1141/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1142/*------------------------------------------------------------*/
1143
1144/* Examine the always-defined sections declared in layout to see if
1145 the (offset,size) section is within one. Note, is is an error to
1146 partially fall into such a region: (offset,size) should either be
1147 completely in such a region or completely not-in such a region.
1148*/
1149static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1150{
1151 Int minoffD, maxoffD, i;
1152 Int minoff = offset;
1153 Int maxoff = minoff + size - 1;
1154 tl_assert((minoff & ~0xFFFF) == 0);
1155 tl_assert((maxoff & ~0xFFFF) == 0);
1156
1157 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1158 minoffD = mce->layout->alwaysDefd[i].offset;
1159 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1160 tl_assert((minoffD & ~0xFFFF) == 0);
1161 tl_assert((maxoffD & ~0xFFFF) == 0);
1162
1163 if (maxoff < minoffD || maxoffD < minoff)
1164 continue; /* no overlap */
1165 if (minoff >= minoffD && maxoff <= maxoffD)
1166 return True; /* completely contained in an always-defd section */
1167
1168 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1169 }
1170 return False; /* could not find any containing section */
1171}
1172
1173
1174/* Generate into bb suitable actions to shadow this Put. If the state
1175 slice is marked 'always defined', do nothing. Otherwise, write the
1176 supplied V bits to the shadow state. We can pass in either an
1177 original atom or a V-atom, but not both. In the former case the
1178 relevant V-bits are then generated from the original.
1179*/
1180static
1181void do_shadow_PUT ( MCEnv* mce, Int offset,
1182 IRAtom* atom, IRAtom* vatom )
1183{
sewardj7cf97ee2004-11-28 14:25:01 +00001184 IRType ty;
njn1d0825f2006-03-27 11:37:07 +00001185
1186 // Don't do shadow PUTs if we're not doing undefined value checking.
1187 // Their absence lets Vex's optimiser remove all the shadow computation
1188 // that they depend on, which includes GETs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001189 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001190 return;
1191
sewardj95448072004-11-22 20:19:51 +00001192 if (atom) {
1193 tl_assert(!vatom);
1194 tl_assert(isOriginalAtom(mce, atom));
1195 vatom = expr2vbits( mce, atom );
1196 } else {
1197 tl_assert(vatom);
1198 tl_assert(isShadowAtom(mce, vatom));
1199 }
1200
sewardj1c0ce7a2009-07-01 08:10:49 +00001201 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001202 tl_assert(ty != Ity_I1);
1203 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1204 /* later: no ... */
1205 /* emit code to emit a complaint if any of the vbits are 1. */
1206 /* complainIfUndefined(mce, atom); */
1207 } else {
1208 /* Do a plain shadow Put. */
sewardj7cf4e6b2008-05-01 20:24:26 +00001209 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
sewardj95448072004-11-22 20:19:51 +00001210 }
1211}
1212
1213
1214/* Return an expression which contains the V bits corresponding to the
1215 given GETI (passed in in pieces).
1216*/
1217static
1218void do_shadow_PUTI ( MCEnv* mce,
sewardj0b9d74a2006-12-24 02:24:11 +00001219 IRRegArray* descr,
1220 IRAtom* ix, Int bias, IRAtom* atom )
sewardj95448072004-11-22 20:19:51 +00001221{
sewardj7cf97ee2004-11-28 14:25:01 +00001222 IRAtom* vatom;
1223 IRType ty, tyS;
1224 Int arrSize;;
1225
njn1d0825f2006-03-27 11:37:07 +00001226 // Don't do shadow PUTIs if we're not doing undefined value checking.
1227 // Their absence lets Vex's optimiser remove all the shadow computation
1228 // that they depend on, which includes GETIs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001229 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001230 return;
1231
sewardj95448072004-11-22 20:19:51 +00001232 tl_assert(isOriginalAtom(mce,atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001233 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001234 tl_assert(sameKindedAtoms(atom, vatom));
sewardj7cf97ee2004-11-28 14:25:01 +00001235 ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001236 tyS = shadowTypeV(ty);
sewardj7cf97ee2004-11-28 14:25:01 +00001237 arrSize = descr->nElems * sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001238 tl_assert(ty != Ity_I1);
1239 tl_assert(isOriginalAtom(mce,ix));
1240 complainIfUndefined(mce,ix);
1241 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1242 /* later: no ... */
1243 /* emit code to emit a complaint if any of the vbits are 1. */
1244 /* complainIfUndefined(mce, atom); */
1245 } else {
1246 /* Do a cloned version of the Put that refers to the shadow
1247 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001248 IRRegArray* new_descr
1249 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1250 tyS, descr->nElems);
sewardj7cf4e6b2008-05-01 20:24:26 +00001251 stmt( 'V', mce, IRStmt_PutI( new_descr, ix, bias, vatom ));
sewardj95448072004-11-22 20:19:51 +00001252 }
1253}
1254
1255
1256/* Return an expression which contains the V bits corresponding to the
1257 given GET (passed in in pieces).
1258*/
1259static
1260IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1261{
sewardj7cf4e6b2008-05-01 20:24:26 +00001262 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001263 tl_assert(ty != Ity_I1);
1264 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1265 /* Always defined, return all zeroes of the relevant type */
1266 return definedOfType(tyS);
1267 } else {
1268 /* return a cloned version of the Get that refers to the shadow
1269 area. */
sewardj7cf4e6b2008-05-01 20:24:26 +00001270 /* FIXME: this isn't an atom! */
sewardj95448072004-11-22 20:19:51 +00001271 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1272 }
1273}
1274
1275
1276/* Return an expression which contains the V bits corresponding to the
1277 given GETI (passed in in pieces).
1278*/
1279static
sewardj0b9d74a2006-12-24 02:24:11 +00001280IRExpr* shadow_GETI ( MCEnv* mce,
1281 IRRegArray* descr, IRAtom* ix, Int bias )
sewardj95448072004-11-22 20:19:51 +00001282{
1283 IRType ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001284 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001285 Int arrSize = descr->nElems * sizeofIRType(ty);
1286 tl_assert(ty != Ity_I1);
1287 tl_assert(isOriginalAtom(mce,ix));
1288 complainIfUndefined(mce,ix);
1289 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1290 /* Always defined, return all zeroes of the relevant type */
1291 return definedOfType(tyS);
1292 } else {
1293 /* return a cloned version of the Get that refers to the shadow
1294 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001295 IRRegArray* new_descr
1296 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1297 tyS, descr->nElems);
sewardj95448072004-11-22 20:19:51 +00001298 return IRExpr_GetI( new_descr, ix, bias );
1299 }
1300}
1301
1302
1303/*------------------------------------------------------------*/
1304/*--- Generating approximations for unknown operations, ---*/
1305/*--- using lazy-propagate semantics ---*/
1306/*------------------------------------------------------------*/
1307
1308/* Lazy propagation of undefinedness from two values, resulting in the
1309 specified shadow type.
1310*/
1311static
1312IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1313{
sewardj95448072004-11-22 20:19:51 +00001314 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001315 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1316 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
sewardj95448072004-11-22 20:19:51 +00001317 tl_assert(isShadowAtom(mce,va1));
1318 tl_assert(isShadowAtom(mce,va2));
sewardj37c31cc2005-04-26 23:49:24 +00001319
1320 /* The general case is inefficient because PCast is an expensive
1321 operation. Here are some special cases which use PCast only
1322 once rather than twice. */
1323
1324 /* I64 x I64 -> I64 */
1325 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1326 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1327 at = mkUifU(mce, Ity_I64, va1, va2);
1328 at = mkPCastTo(mce, Ity_I64, at);
1329 return at;
1330 }
1331
1332 /* I64 x I64 -> I32 */
1333 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1334 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1335 at = mkUifU(mce, Ity_I64, va1, va2);
1336 at = mkPCastTo(mce, Ity_I32, at);
1337 return at;
1338 }
1339
1340 if (0) {
1341 VG_(printf)("mkLazy2 ");
1342 ppIRType(t1);
1343 VG_(printf)("_");
1344 ppIRType(t2);
1345 VG_(printf)("_");
1346 ppIRType(finalVty);
1347 VG_(printf)("\n");
1348 }
1349
1350 /* General case: force everything via 32-bit intermediaries. */
sewardj95448072004-11-22 20:19:51 +00001351 at = mkPCastTo(mce, Ity_I32, va1);
1352 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1353 at = mkPCastTo(mce, finalVty, at);
1354 return at;
1355}
1356
1357
sewardjed69fdb2006-02-03 16:12:27 +00001358/* 3-arg version of the above. */
1359static
1360IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1361 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1362{
1363 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001364 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1365 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1366 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
sewardjed69fdb2006-02-03 16:12:27 +00001367 tl_assert(isShadowAtom(mce,va1));
1368 tl_assert(isShadowAtom(mce,va2));
1369 tl_assert(isShadowAtom(mce,va3));
1370
1371 /* The general case is inefficient because PCast is an expensive
1372 operation. Here are some special cases which use PCast only
1373 twice rather than three times. */
1374
1375 /* I32 x I64 x I64 -> I64 */
1376 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1377 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1378 && finalVty == Ity_I64) {
1379 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1380 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1381 mode indication which is fully defined, this should get
1382 folded out later. */
1383 at = mkPCastTo(mce, Ity_I64, va1);
1384 /* Now fold in 2nd and 3rd args. */
1385 at = mkUifU(mce, Ity_I64, at, va2);
1386 at = mkUifU(mce, Ity_I64, at, va3);
1387 /* and PCast once again. */
1388 at = mkPCastTo(mce, Ity_I64, at);
1389 return at;
1390 }
1391
sewardj453e8f82006-02-09 03:25:06 +00001392 /* I32 x I64 x I64 -> I32 */
1393 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1394 && finalVty == Ity_I32) {
sewardj59570ff2010-01-01 11:59:33 +00001395 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
sewardj453e8f82006-02-09 03:25:06 +00001396 at = mkPCastTo(mce, Ity_I64, va1);
1397 at = mkUifU(mce, Ity_I64, at, va2);
1398 at = mkUifU(mce, Ity_I64, at, va3);
1399 at = mkPCastTo(mce, Ity_I32, at);
1400 return at;
1401 }
1402
sewardj59570ff2010-01-01 11:59:33 +00001403 /* I32 x I32 x I32 -> I32 */
1404 /* 32-bit FP idiom, as (eg) happens on ARM */
1405 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1406 && finalVty == Ity_I32) {
1407 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1408 at = va1;
1409 at = mkUifU(mce, Ity_I32, at, va2);
1410 at = mkUifU(mce, Ity_I32, at, va3);
1411 at = mkPCastTo(mce, Ity_I32, at);
1412 return at;
1413 }
1414
sewardj453e8f82006-02-09 03:25:06 +00001415 if (1) {
1416 VG_(printf)("mkLazy3: ");
sewardjed69fdb2006-02-03 16:12:27 +00001417 ppIRType(t1);
sewardj453e8f82006-02-09 03:25:06 +00001418 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001419 ppIRType(t2);
sewardj453e8f82006-02-09 03:25:06 +00001420 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001421 ppIRType(t3);
sewardj453e8f82006-02-09 03:25:06 +00001422 VG_(printf)(" -> ");
sewardjed69fdb2006-02-03 16:12:27 +00001423 ppIRType(finalVty);
1424 VG_(printf)("\n");
1425 }
1426
sewardj453e8f82006-02-09 03:25:06 +00001427 tl_assert(0);
sewardjed69fdb2006-02-03 16:12:27 +00001428 /* General case: force everything via 32-bit intermediaries. */
sewardj453e8f82006-02-09 03:25:06 +00001429 /*
sewardjed69fdb2006-02-03 16:12:27 +00001430 at = mkPCastTo(mce, Ity_I32, va1);
1431 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1432 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1433 at = mkPCastTo(mce, finalVty, at);
1434 return at;
sewardj453e8f82006-02-09 03:25:06 +00001435 */
sewardjed69fdb2006-02-03 16:12:27 +00001436}
1437
1438
sewardje91cea72006-02-08 19:32:02 +00001439/* 4-arg version of the above. */
1440static
1441IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1442 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1443{
1444 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001445 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1446 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1447 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1448 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
sewardje91cea72006-02-08 19:32:02 +00001449 tl_assert(isShadowAtom(mce,va1));
1450 tl_assert(isShadowAtom(mce,va2));
1451 tl_assert(isShadowAtom(mce,va3));
1452 tl_assert(isShadowAtom(mce,va4));
1453
1454 /* The general case is inefficient because PCast is an expensive
1455 operation. Here are some special cases which use PCast only
1456 twice rather than three times. */
1457
1458 /* I32 x I64 x I64 x I64 -> I64 */
1459 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1460 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1461 && finalVty == Ity_I64) {
1462 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1463 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1464 mode indication which is fully defined, this should get
1465 folded out later. */
1466 at = mkPCastTo(mce, Ity_I64, va1);
1467 /* Now fold in 2nd, 3rd, 4th args. */
1468 at = mkUifU(mce, Ity_I64, at, va2);
1469 at = mkUifU(mce, Ity_I64, at, va3);
1470 at = mkUifU(mce, Ity_I64, at, va4);
1471 /* and PCast once again. */
1472 at = mkPCastTo(mce, Ity_I64, at);
1473 return at;
1474 }
1475
1476 if (1) {
sewardj453e8f82006-02-09 03:25:06 +00001477 VG_(printf)("mkLazy4: ");
sewardje91cea72006-02-08 19:32:02 +00001478 ppIRType(t1);
1479 VG_(printf)(" x ");
1480 ppIRType(t2);
1481 VG_(printf)(" x ");
1482 ppIRType(t3);
1483 VG_(printf)(" x ");
1484 ppIRType(t4);
1485 VG_(printf)(" -> ");
1486 ppIRType(finalVty);
1487 VG_(printf)("\n");
1488 }
1489
1490 tl_assert(0);
1491}
1492
1493
sewardj95448072004-11-22 20:19:51 +00001494/* Do the lazy propagation game from a null-terminated vector of
1495 atoms. This is presumably the arguments to a helper call, so the
1496 IRCallee info is also supplied in order that we can know which
1497 arguments should be ignored (via the .mcx_mask field).
1498*/
1499static
1500IRAtom* mkLazyN ( MCEnv* mce,
1501 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1502{
sewardj4cc684b2007-08-25 23:09:36 +00001503 Int i;
sewardj95448072004-11-22 20:19:51 +00001504 IRAtom* here;
sewardj4cc684b2007-08-25 23:09:36 +00001505 IRAtom* curr;
1506 IRType mergeTy;
1507 IRType mergeTy64 = True;
1508
1509 /* Decide on the type of the merge intermediary. If all relevant
1510 args are I64, then it's I64. In all other circumstances, use
1511 I32. */
1512 for (i = 0; exprvec[i]; i++) {
1513 tl_assert(i < 32);
1514 tl_assert(isOriginalAtom(mce, exprvec[i]));
1515 if (cee->mcx_mask & (1<<i))
1516 continue;
sewardj1c0ce7a2009-07-01 08:10:49 +00001517 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
sewardj4cc684b2007-08-25 23:09:36 +00001518 mergeTy64 = False;
1519 }
1520
1521 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
1522 curr = definedOfType(mergeTy);
1523
sewardj95448072004-11-22 20:19:51 +00001524 for (i = 0; exprvec[i]; i++) {
1525 tl_assert(i < 32);
1526 tl_assert(isOriginalAtom(mce, exprvec[i]));
1527 /* Only take notice of this arg if the callee's mc-exclusion
1528 mask does not say it is to be excluded. */
1529 if (cee->mcx_mask & (1<<i)) {
1530 /* the arg is to be excluded from definedness checking. Do
1531 nothing. */
1532 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1533 } else {
1534 /* calculate the arg's definedness, and pessimistically merge
1535 it in. */
sewardj4cc684b2007-08-25 23:09:36 +00001536 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1537 curr = mergeTy64
1538 ? mkUifU64(mce, here, curr)
1539 : mkUifU32(mce, here, curr);
sewardj95448072004-11-22 20:19:51 +00001540 }
1541 }
1542 return mkPCastTo(mce, finalVtype, curr );
1543}
1544
1545
1546/*------------------------------------------------------------*/
1547/*--- Generating expensive sequences for exact carry-chain ---*/
1548/*--- propagation in add/sub and related operations. ---*/
1549/*------------------------------------------------------------*/
1550
1551static
sewardjd5204dc2004-12-31 01:16:11 +00001552IRAtom* expensiveAddSub ( MCEnv* mce,
1553 Bool add,
1554 IRType ty,
1555 IRAtom* qaa, IRAtom* qbb,
1556 IRAtom* aa, IRAtom* bb )
sewardj95448072004-11-22 20:19:51 +00001557{
sewardj7cf97ee2004-11-28 14:25:01 +00001558 IRAtom *a_min, *b_min, *a_max, *b_max;
sewardjd5204dc2004-12-31 01:16:11 +00001559 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
sewardj7cf97ee2004-11-28 14:25:01 +00001560
sewardj95448072004-11-22 20:19:51 +00001561 tl_assert(isShadowAtom(mce,qaa));
1562 tl_assert(isShadowAtom(mce,qbb));
1563 tl_assert(isOriginalAtom(mce,aa));
1564 tl_assert(isOriginalAtom(mce,bb));
1565 tl_assert(sameKindedAtoms(qaa,aa));
1566 tl_assert(sameKindedAtoms(qbb,bb));
1567
sewardjd5204dc2004-12-31 01:16:11 +00001568 switch (ty) {
1569 case Ity_I32:
1570 opAND = Iop_And32;
1571 opOR = Iop_Or32;
1572 opXOR = Iop_Xor32;
1573 opNOT = Iop_Not32;
1574 opADD = Iop_Add32;
1575 opSUB = Iop_Sub32;
1576 break;
tomd9774d72005-06-27 08:11:01 +00001577 case Ity_I64:
1578 opAND = Iop_And64;
1579 opOR = Iop_Or64;
1580 opXOR = Iop_Xor64;
1581 opNOT = Iop_Not64;
1582 opADD = Iop_Add64;
1583 opSUB = Iop_Sub64;
1584 break;
sewardjd5204dc2004-12-31 01:16:11 +00001585 default:
1586 VG_(tool_panic)("expensiveAddSub");
1587 }
sewardj95448072004-11-22 20:19:51 +00001588
1589 // a_min = aa & ~qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001590 a_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001591 binop(opAND, aa,
sewardj7cf4e6b2008-05-01 20:24:26 +00001592 assignNew('V', mce,ty, unop(opNOT, qaa))));
sewardj95448072004-11-22 20:19:51 +00001593
1594 // b_min = bb & ~qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001595 b_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001596 binop(opAND, bb,
sewardj7cf4e6b2008-05-01 20:24:26 +00001597 assignNew('V', mce,ty, unop(opNOT, qbb))));
sewardj95448072004-11-22 20:19:51 +00001598
1599 // a_max = aa | qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001600 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
sewardj95448072004-11-22 20:19:51 +00001601
1602 // b_max = bb | qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001603 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
sewardj95448072004-11-22 20:19:51 +00001604
sewardjd5204dc2004-12-31 01:16:11 +00001605 if (add) {
1606 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1607 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001608 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001609 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001610 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1611 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001612 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001613 assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1614 assignNew('V', mce,ty, binop(opADD, a_max, b_max))
sewardjd5204dc2004-12-31 01:16:11 +00001615 )
sewardj95448072004-11-22 20:19:51 +00001616 )
sewardjd5204dc2004-12-31 01:16:11 +00001617 )
1618 );
1619 } else {
1620 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1621 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001622 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001623 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001624 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1625 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001626 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001627 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1628 assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
sewardjd5204dc2004-12-31 01:16:11 +00001629 )
1630 )
1631 )
1632 );
1633 }
1634
sewardj95448072004-11-22 20:19:51 +00001635}
1636
1637
1638/*------------------------------------------------------------*/
sewardjaaddbc22005-10-07 09:49:53 +00001639/*--- Scalar shifts. ---*/
1640/*------------------------------------------------------------*/
1641
1642/* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
1643 idea is to shift the definedness bits by the original shift amount.
1644 This introduces 0s ("defined") in new positions for left shifts and
1645 unsigned right shifts, and copies the top definedness bit for
1646 signed right shifts. So, conveniently, applying the original shift
1647 operator to the definedness bits for the left arg is exactly the
1648 right thing to do:
1649
1650 (qaa << bb)
1651
1652 However if the shift amount is undefined then the whole result
1653 is undefined. Hence need:
1654
1655 (qaa << bb) `UifU` PCast(qbb)
1656
1657 If the shift amount bb is a literal than qbb will say 'all defined'
1658 and the UifU and PCast will get folded out by post-instrumentation
1659 optimisation.
1660*/
1661static IRAtom* scalarShift ( MCEnv* mce,
1662 IRType ty,
1663 IROp original_op,
1664 IRAtom* qaa, IRAtom* qbb,
1665 IRAtom* aa, IRAtom* bb )
1666{
1667 tl_assert(isShadowAtom(mce,qaa));
1668 tl_assert(isShadowAtom(mce,qbb));
1669 tl_assert(isOriginalAtom(mce,aa));
1670 tl_assert(isOriginalAtom(mce,bb));
1671 tl_assert(sameKindedAtoms(qaa,aa));
1672 tl_assert(sameKindedAtoms(qbb,bb));
1673 return
1674 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001675 'V', mce, ty,
sewardjaaddbc22005-10-07 09:49:53 +00001676 mkUifU( mce, ty,
sewardj7cf4e6b2008-05-01 20:24:26 +00001677 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
sewardjaaddbc22005-10-07 09:49:53 +00001678 mkPCastTo(mce, ty, qbb)
1679 )
1680 );
1681}
1682
1683
1684/*------------------------------------------------------------*/
1685/*--- Helpers for dealing with vector primops. ---*/
sewardj3245c912004-12-10 14:58:26 +00001686/*------------------------------------------------------------*/
1687
sewardja1d93302004-12-12 16:45:06 +00001688/* Vector pessimisation -- pessimise within each lane individually. */
1689
1690static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1691{
sewardj7cf4e6b2008-05-01 20:24:26 +00001692 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
sewardja1d93302004-12-12 16:45:06 +00001693}
1694
1695static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1696{
sewardj7cf4e6b2008-05-01 20:24:26 +00001697 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
sewardja1d93302004-12-12 16:45:06 +00001698}
1699
1700static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1701{
sewardj7cf4e6b2008-05-01 20:24:26 +00001702 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
sewardja1d93302004-12-12 16:45:06 +00001703}
1704
1705static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1706{
sewardj7cf4e6b2008-05-01 20:24:26 +00001707 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
sewardja1d93302004-12-12 16:45:06 +00001708}
1709
sewardjacd2e912005-01-13 19:17:06 +00001710static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1711{
sewardj7cf4e6b2008-05-01 20:24:26 +00001712 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
sewardjacd2e912005-01-13 19:17:06 +00001713}
1714
1715static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1716{
sewardj7cf4e6b2008-05-01 20:24:26 +00001717 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
sewardjacd2e912005-01-13 19:17:06 +00001718}
1719
1720static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1721{
sewardj7cf4e6b2008-05-01 20:24:26 +00001722 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
sewardjacd2e912005-01-13 19:17:06 +00001723}
1724
sewardja1d93302004-12-12 16:45:06 +00001725
sewardj3245c912004-12-10 14:58:26 +00001726/* Here's a simple scheme capable of handling ops derived from SSE1
1727 code and while only generating ops that can be efficiently
1728 implemented in SSE1. */
1729
1730/* All-lanes versions are straightforward:
1731
sewardj20d38f22005-02-07 23:50:18 +00001732 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
sewardj3245c912004-12-10 14:58:26 +00001733
1734 unary32Fx4(x,y) ==> PCast32x4(x#)
1735
1736 Lowest-lane-only versions are more complex:
1737
sewardj20d38f22005-02-07 23:50:18 +00001738 binary32F0x4(x,y) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001739 x#,
sewardj20d38f22005-02-07 23:50:18 +00001740 PCast32(V128to32(UifUV128(x#,y#)))
sewardj3245c912004-12-10 14:58:26 +00001741 )
1742
1743 This is perhaps not so obvious. In particular, it's faster to
sewardj20d38f22005-02-07 23:50:18 +00001744 do a V128-bit UifU and then take the bottom 32 bits than the more
sewardj3245c912004-12-10 14:58:26 +00001745 obvious scheme of taking the bottom 32 bits of each operand
1746 and doing a 32-bit UifU. Basically since UifU is fast and
1747 chopping lanes off vector values is slow.
1748
1749 Finally:
1750
sewardj20d38f22005-02-07 23:50:18 +00001751 unary32F0x4(x) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001752 x#,
sewardj20d38f22005-02-07 23:50:18 +00001753 PCast32(V128to32(x#))
sewardj3245c912004-12-10 14:58:26 +00001754 )
1755
1756 Where:
1757
1758 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
1759 PCast32x4(v#) = CmpNEZ32x4(v#)
1760*/
1761
1762static
1763IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1764{
1765 IRAtom* at;
1766 tl_assert(isShadowAtom(mce, vatomX));
1767 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001768 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001769 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
sewardj3245c912004-12-10 14:58:26 +00001770 return at;
1771}
1772
1773static
1774IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1775{
1776 IRAtom* at;
1777 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001778 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001779 return at;
1780}
1781
1782static
1783IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1784{
1785 IRAtom* at;
1786 tl_assert(isShadowAtom(mce, vatomX));
1787 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001788 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001789 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
sewardj3245c912004-12-10 14:58:26 +00001790 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001791 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001792 return at;
1793}
1794
1795static
1796IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1797{
1798 IRAtom* at;
1799 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001800 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001801 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001802 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001803 return at;
1804}
1805
sewardj0b070592004-12-10 21:44:22 +00001806/* --- ... and ... 64Fx2 versions of the same ... --- */
1807
1808static
1809IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1810{
1811 IRAtom* at;
1812 tl_assert(isShadowAtom(mce, vatomX));
1813 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001814 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001815 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
sewardj0b070592004-12-10 21:44:22 +00001816 return at;
1817}
1818
1819static
1820IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1821{
1822 IRAtom* at;
1823 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001824 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001825 return at;
1826}
1827
1828static
1829IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1830{
1831 IRAtom* at;
1832 tl_assert(isShadowAtom(mce, vatomX));
1833 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001834 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001835 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
sewardj0b070592004-12-10 21:44:22 +00001836 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001837 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001838 return at;
1839}
1840
1841static
1842IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1843{
1844 IRAtom* at;
1845 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001846 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001847 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001848 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001849 return at;
1850}
1851
sewardja1d93302004-12-12 16:45:06 +00001852/* --- --- Vector saturated narrowing --- --- */
1853
1854/* This is quite subtle. What to do is simple:
1855
1856 Let the original narrowing op be QNarrowW{S,U}xN. Produce:
1857
1858 the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1859
1860 Why this is right is not so simple. Consider a lane in the args,
1861 vatom1 or 2, doesn't matter.
1862
1863 After the PCast, that lane is all 0s (defined) or all
1864 1s(undefined).
1865
1866 Both signed and unsigned saturating narrowing of all 0s produces
1867 all 0s, which is what we want.
1868
1869 The all-1s case is more complex. Unsigned narrowing interprets an
1870 all-1s input as the largest unsigned integer, and so produces all
1871 1s as a result since that is the largest unsigned value at the
1872 smaller width.
1873
1874 Signed narrowing interprets all 1s as -1. Fortunately, -1 narrows
1875 to -1, so we still wind up with all 1s at the smaller width.
1876
1877 So: In short, pessimise the args, then apply the original narrowing
1878 op.
1879*/
1880static
sewardj20d38f22005-02-07 23:50:18 +00001881IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
sewardja1d93302004-12-12 16:45:06 +00001882 IRAtom* vatom1, IRAtom* vatom2)
1883{
1884 IRAtom *at1, *at2, *at3;
1885 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1886 switch (narrow_op) {
1887 case Iop_QNarrow32Sx4: pcast = mkPCast32x4; break;
sewardj43d60752005-11-10 18:13:01 +00001888 case Iop_QNarrow32Ux4: pcast = mkPCast32x4; break;
sewardja1d93302004-12-12 16:45:06 +00001889 case Iop_QNarrow16Sx8: pcast = mkPCast16x8; break;
1890 case Iop_QNarrow16Ux8: pcast = mkPCast16x8; break;
sewardj20d38f22005-02-07 23:50:18 +00001891 default: VG_(tool_panic)("vectorNarrowV128");
sewardja1d93302004-12-12 16:45:06 +00001892 }
1893 tl_assert(isShadowAtom(mce,vatom1));
1894 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00001895 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
1896 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
1897 at3 = assignNew('V', mce, Ity_V128, binop(narrow_op, at1, at2));
sewardja1d93302004-12-12 16:45:06 +00001898 return at3;
1899}
1900
sewardjacd2e912005-01-13 19:17:06 +00001901static
1902IRAtom* vectorNarrow64 ( MCEnv* mce, IROp narrow_op,
1903 IRAtom* vatom1, IRAtom* vatom2)
1904{
1905 IRAtom *at1, *at2, *at3;
1906 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1907 switch (narrow_op) {
1908 case Iop_QNarrow32Sx2: pcast = mkPCast32x2; break;
1909 case Iop_QNarrow16Sx4: pcast = mkPCast16x4; break;
1910 case Iop_QNarrow16Ux4: pcast = mkPCast16x4; break;
1911 default: VG_(tool_panic)("vectorNarrow64");
1912 }
1913 tl_assert(isShadowAtom(mce,vatom1));
1914 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00001915 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
1916 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
1917 at3 = assignNew('V', mce, Ity_I64, binop(narrow_op, at1, at2));
sewardjacd2e912005-01-13 19:17:06 +00001918 return at3;
1919}
1920
sewardja1d93302004-12-12 16:45:06 +00001921
1922/* --- --- Vector integer arithmetic --- --- */
1923
1924/* Simple ... UifU the args and per-lane pessimise the results. */
sewardjacd2e912005-01-13 19:17:06 +00001925
sewardj20d38f22005-02-07 23:50:18 +00001926/* --- V128-bit versions --- */
sewardjacd2e912005-01-13 19:17:06 +00001927
sewardja1d93302004-12-12 16:45:06 +00001928static
1929IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1930{
1931 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001932 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001933 at = mkPCast8x16(mce, at);
1934 return at;
1935}
1936
1937static
1938IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1939{
1940 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001941 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001942 at = mkPCast16x8(mce, at);
1943 return at;
1944}
1945
1946static
1947IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1948{
1949 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001950 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001951 at = mkPCast32x4(mce, at);
1952 return at;
1953}
1954
1955static
1956IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1957{
1958 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001959 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001960 at = mkPCast64x2(mce, at);
1961 return at;
1962}
sewardj3245c912004-12-10 14:58:26 +00001963
sewardjacd2e912005-01-13 19:17:06 +00001964/* --- 64-bit versions --- */
1965
1966static
1967IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1968{
1969 IRAtom* at;
1970 at = mkUifU64(mce, vatom1, vatom2);
1971 at = mkPCast8x8(mce, at);
1972 return at;
1973}
1974
1975static
1976IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1977{
1978 IRAtom* at;
1979 at = mkUifU64(mce, vatom1, vatom2);
1980 at = mkPCast16x4(mce, at);
1981 return at;
1982}
1983
1984static
1985IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1986{
1987 IRAtom* at;
1988 at = mkUifU64(mce, vatom1, vatom2);
1989 at = mkPCast32x2(mce, at);
1990 return at;
1991}
1992
sewardj3245c912004-12-10 14:58:26 +00001993
1994/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00001995/*--- Generate shadow values from all kinds of IRExprs. ---*/
1996/*------------------------------------------------------------*/
1997
1998static
sewardje91cea72006-02-08 19:32:02 +00001999IRAtom* expr2vbits_Qop ( MCEnv* mce,
2000 IROp op,
2001 IRAtom* atom1, IRAtom* atom2,
2002 IRAtom* atom3, IRAtom* atom4 )
2003{
2004 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2005 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2006 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2007 IRAtom* vatom4 = expr2vbits( mce, atom4 );
2008
2009 tl_assert(isOriginalAtom(mce,atom1));
2010 tl_assert(isOriginalAtom(mce,atom2));
2011 tl_assert(isOriginalAtom(mce,atom3));
2012 tl_assert(isOriginalAtom(mce,atom4));
2013 tl_assert(isShadowAtom(mce,vatom1));
2014 tl_assert(isShadowAtom(mce,vatom2));
2015 tl_assert(isShadowAtom(mce,vatom3));
2016 tl_assert(isShadowAtom(mce,vatom4));
2017 tl_assert(sameKindedAtoms(atom1,vatom1));
2018 tl_assert(sameKindedAtoms(atom2,vatom2));
2019 tl_assert(sameKindedAtoms(atom3,vatom3));
2020 tl_assert(sameKindedAtoms(atom4,vatom4));
2021 switch (op) {
2022 case Iop_MAddF64:
2023 case Iop_MAddF64r32:
2024 case Iop_MSubF64:
2025 case Iop_MSubF64r32:
2026 /* I32(rm) x F64 x F64 x F64 -> F64 */
2027 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
2028 default:
2029 ppIROp(op);
2030 VG_(tool_panic)("memcheck:expr2vbits_Qop");
2031 }
2032}
2033
2034
2035static
sewardjed69fdb2006-02-03 16:12:27 +00002036IRAtom* expr2vbits_Triop ( MCEnv* mce,
2037 IROp op,
2038 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2039{
sewardjed69fdb2006-02-03 16:12:27 +00002040 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2041 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2042 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2043
2044 tl_assert(isOriginalAtom(mce,atom1));
2045 tl_assert(isOriginalAtom(mce,atom2));
2046 tl_assert(isOriginalAtom(mce,atom3));
2047 tl_assert(isShadowAtom(mce,vatom1));
2048 tl_assert(isShadowAtom(mce,vatom2));
2049 tl_assert(isShadowAtom(mce,vatom3));
2050 tl_assert(sameKindedAtoms(atom1,vatom1));
2051 tl_assert(sameKindedAtoms(atom2,vatom2));
2052 tl_assert(sameKindedAtoms(atom3,vatom3));
2053 switch (op) {
2054 case Iop_AddF64:
2055 case Iop_AddF64r32:
2056 case Iop_SubF64:
2057 case Iop_SubF64r32:
2058 case Iop_MulF64:
2059 case Iop_MulF64r32:
2060 case Iop_DivF64:
2061 case Iop_DivF64r32:
sewardj22ac5f42006-02-03 22:55:04 +00002062 case Iop_ScaleF64:
2063 case Iop_Yl2xF64:
2064 case Iop_Yl2xp1F64:
2065 case Iop_AtanF64:
sewardjd6075eb2006-02-04 15:25:23 +00002066 case Iop_PRemF64:
2067 case Iop_PRem1F64:
sewardj22ac5f42006-02-03 22:55:04 +00002068 /* I32(rm) x F64 x F64 -> F64 */
sewardjed69fdb2006-02-03 16:12:27 +00002069 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
sewardjd6075eb2006-02-04 15:25:23 +00002070 case Iop_PRemC3210F64:
2071 case Iop_PRem1C3210F64:
2072 /* I32(rm) x F64 x F64 -> I32 */
2073 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj59570ff2010-01-01 11:59:33 +00002074 case Iop_AddF32:
2075 case Iop_SubF32:
2076 case Iop_MulF32:
2077 case Iop_DivF32:
2078 /* I32(rm) x F32 x F32 -> I32 */
2079 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardjed69fdb2006-02-03 16:12:27 +00002080 default:
2081 ppIROp(op);
2082 VG_(tool_panic)("memcheck:expr2vbits_Triop");
2083 }
2084}
2085
2086
2087static
sewardj95448072004-11-22 20:19:51 +00002088IRAtom* expr2vbits_Binop ( MCEnv* mce,
2089 IROp op,
2090 IRAtom* atom1, IRAtom* atom2 )
2091{
2092 IRType and_or_ty;
2093 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
2094 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
2095 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2096
2097 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2098 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2099
2100 tl_assert(isOriginalAtom(mce,atom1));
2101 tl_assert(isOriginalAtom(mce,atom2));
2102 tl_assert(isShadowAtom(mce,vatom1));
2103 tl_assert(isShadowAtom(mce,vatom2));
2104 tl_assert(sameKindedAtoms(atom1,vatom1));
2105 tl_assert(sameKindedAtoms(atom2,vatom2));
2106 switch (op) {
2107
sewardjacd2e912005-01-13 19:17:06 +00002108 /* 64-bit SIMD */
2109
2110 case Iop_ShrN16x4:
2111 case Iop_ShrN32x2:
sewardj03809ae2006-12-27 01:16:58 +00002112 case Iop_SarN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002113 case Iop_SarN16x4:
2114 case Iop_SarN32x2:
2115 case Iop_ShlN16x4:
2116 case Iop_ShlN32x2:
sewardj114a9172008-02-09 01:49:32 +00002117 case Iop_ShlN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002118 /* Same scheme as with all other shifts. */
2119 complainIfUndefined(mce, atom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002120 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
sewardjacd2e912005-01-13 19:17:06 +00002121
2122 case Iop_QNarrow32Sx2:
2123 case Iop_QNarrow16Sx4:
2124 case Iop_QNarrow16Ux4:
2125 return vectorNarrow64(mce, op, vatom1, vatom2);
2126
2127 case Iop_Min8Ux8:
2128 case Iop_Max8Ux8:
2129 case Iop_Avg8Ux8:
2130 case Iop_QSub8Sx8:
2131 case Iop_QSub8Ux8:
2132 case Iop_Sub8x8:
2133 case Iop_CmpGT8Sx8:
2134 case Iop_CmpEQ8x8:
2135 case Iop_QAdd8Sx8:
2136 case Iop_QAdd8Ux8:
2137 case Iop_Add8x8:
2138 return binary8Ix8(mce, vatom1, vatom2);
2139
2140 case Iop_Min16Sx4:
2141 case Iop_Max16Sx4:
2142 case Iop_Avg16Ux4:
2143 case Iop_QSub16Ux4:
2144 case Iop_QSub16Sx4:
2145 case Iop_Sub16x4:
2146 case Iop_Mul16x4:
2147 case Iop_MulHi16Sx4:
2148 case Iop_MulHi16Ux4:
2149 case Iop_CmpGT16Sx4:
2150 case Iop_CmpEQ16x4:
2151 case Iop_QAdd16Sx4:
2152 case Iop_QAdd16Ux4:
2153 case Iop_Add16x4:
2154 return binary16Ix4(mce, vatom1, vatom2);
2155
2156 case Iop_Sub32x2:
sewardj114a9172008-02-09 01:49:32 +00002157 case Iop_Mul32x2:
sewardjacd2e912005-01-13 19:17:06 +00002158 case Iop_CmpGT32Sx2:
2159 case Iop_CmpEQ32x2:
2160 case Iop_Add32x2:
2161 return binary32Ix2(mce, vatom1, vatom2);
2162
2163 /* 64-bit data-steering */
2164 case Iop_InterleaveLO32x2:
2165 case Iop_InterleaveLO16x4:
2166 case Iop_InterleaveLO8x8:
2167 case Iop_InterleaveHI32x2:
2168 case Iop_InterleaveHI16x4:
2169 case Iop_InterleaveHI8x8:
sewardj114a9172008-02-09 01:49:32 +00002170 case Iop_CatOddLanes16x4:
2171 case Iop_CatEvenLanes16x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00002172 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardjacd2e912005-01-13 19:17:06 +00002173
sewardj114a9172008-02-09 01:49:32 +00002174 /* Perm8x8: rearrange values in left arg using steering values
2175 from right arg. So rearrange the vbits in the same way but
2176 pessimise wrt steering values. */
2177 case Iop_Perm8x8:
2178 return mkUifU64(
2179 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002180 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
sewardj114a9172008-02-09 01:49:32 +00002181 mkPCast8x8(mce, vatom2)
2182 );
2183
sewardj20d38f22005-02-07 23:50:18 +00002184 /* V128-bit SIMD */
sewardj0b070592004-12-10 21:44:22 +00002185
sewardja1d93302004-12-12 16:45:06 +00002186 case Iop_ShrN16x8:
2187 case Iop_ShrN32x4:
2188 case Iop_ShrN64x2:
2189 case Iop_SarN16x8:
2190 case Iop_SarN32x4:
2191 case Iop_ShlN16x8:
2192 case Iop_ShlN32x4:
2193 case Iop_ShlN64x2:
sewardj620eb5b2005-10-22 12:50:43 +00002194 case Iop_ShlN8x16:
2195 case Iop_SarN8x16:
2196 /* Same scheme as with all other shifts. Note: 22 Oct 05:
2197 this is wrong now, scalar shifts are done properly lazily.
2198 Vector shifts should be fixed too. */
sewardja1d93302004-12-12 16:45:06 +00002199 complainIfUndefined(mce, atom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002200 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardja1d93302004-12-12 16:45:06 +00002201
sewardjcbf8be72005-11-10 18:34:41 +00002202 /* V x V shifts/rotates are done using the standard lazy scheme. */
sewardj43d60752005-11-10 18:13:01 +00002203 case Iop_Shl8x16:
2204 case Iop_Shr8x16:
2205 case Iop_Sar8x16:
sewardjcbf8be72005-11-10 18:34:41 +00002206 case Iop_Rol8x16:
sewardj43d60752005-11-10 18:13:01 +00002207 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002208 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002209 mkPCast8x16(mce,vatom2)
2210 );
2211
2212 case Iop_Shl16x8:
2213 case Iop_Shr16x8:
2214 case Iop_Sar16x8:
sewardjcbf8be72005-11-10 18:34:41 +00002215 case Iop_Rol16x8:
sewardj43d60752005-11-10 18:13:01 +00002216 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002217 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002218 mkPCast16x8(mce,vatom2)
2219 );
2220
2221 case Iop_Shl32x4:
2222 case Iop_Shr32x4:
2223 case Iop_Sar32x4:
sewardjcbf8be72005-11-10 18:34:41 +00002224 case Iop_Rol32x4:
sewardj43d60752005-11-10 18:13:01 +00002225 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002226 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002227 mkPCast32x4(mce,vatom2)
2228 );
2229
sewardja1d93302004-12-12 16:45:06 +00002230 case Iop_QSub8Ux16:
2231 case Iop_QSub8Sx16:
2232 case Iop_Sub8x16:
2233 case Iop_Min8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002234 case Iop_Min8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002235 case Iop_Max8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002236 case Iop_Max8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002237 case Iop_CmpGT8Sx16:
sewardj43d60752005-11-10 18:13:01 +00002238 case Iop_CmpGT8Ux16:
sewardja1d93302004-12-12 16:45:06 +00002239 case Iop_CmpEQ8x16:
2240 case Iop_Avg8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002241 case Iop_Avg8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002242 case Iop_QAdd8Ux16:
2243 case Iop_QAdd8Sx16:
2244 case Iop_Add8x16:
2245 return binary8Ix16(mce, vatom1, vatom2);
2246
2247 case Iop_QSub16Ux8:
2248 case Iop_QSub16Sx8:
2249 case Iop_Sub16x8:
2250 case Iop_Mul16x8:
2251 case Iop_MulHi16Sx8:
2252 case Iop_MulHi16Ux8:
2253 case Iop_Min16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002254 case Iop_Min16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002255 case Iop_Max16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002256 case Iop_Max16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002257 case Iop_CmpGT16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002258 case Iop_CmpGT16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002259 case Iop_CmpEQ16x8:
2260 case Iop_Avg16Ux8:
sewardj43d60752005-11-10 18:13:01 +00002261 case Iop_Avg16Sx8:
sewardja1d93302004-12-12 16:45:06 +00002262 case Iop_QAdd16Ux8:
2263 case Iop_QAdd16Sx8:
2264 case Iop_Add16x8:
2265 return binary16Ix8(mce, vatom1, vatom2);
2266
2267 case Iop_Sub32x4:
2268 case Iop_CmpGT32Sx4:
sewardj43d60752005-11-10 18:13:01 +00002269 case Iop_CmpGT32Ux4:
sewardja1d93302004-12-12 16:45:06 +00002270 case Iop_CmpEQ32x4:
sewardj43d60752005-11-10 18:13:01 +00002271 case Iop_QAdd32Sx4:
2272 case Iop_QAdd32Ux4:
2273 case Iop_QSub32Sx4:
2274 case Iop_QSub32Ux4:
2275 case Iop_Avg32Ux4:
2276 case Iop_Avg32Sx4:
sewardja1d93302004-12-12 16:45:06 +00002277 case Iop_Add32x4:
sewardj43d60752005-11-10 18:13:01 +00002278 case Iop_Max32Ux4:
2279 case Iop_Max32Sx4:
2280 case Iop_Min32Ux4:
2281 case Iop_Min32Sx4:
sewardjb823b852010-06-18 08:18:38 +00002282 case Iop_Mul32x4:
sewardja1d93302004-12-12 16:45:06 +00002283 return binary32Ix4(mce, vatom1, vatom2);
2284
2285 case Iop_Sub64x2:
2286 case Iop_Add64x2:
sewardjb823b852010-06-18 08:18:38 +00002287 case Iop_CmpGT64Sx2:
sewardja1d93302004-12-12 16:45:06 +00002288 return binary64Ix2(mce, vatom1, vatom2);
2289
2290 case Iop_QNarrow32Sx4:
sewardj43d60752005-11-10 18:13:01 +00002291 case Iop_QNarrow32Ux4:
sewardja1d93302004-12-12 16:45:06 +00002292 case Iop_QNarrow16Sx8:
2293 case Iop_QNarrow16Ux8:
sewardj20d38f22005-02-07 23:50:18 +00002294 return vectorNarrowV128(mce, op, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002295
sewardj0b070592004-12-10 21:44:22 +00002296 case Iop_Sub64Fx2:
2297 case Iop_Mul64Fx2:
2298 case Iop_Min64Fx2:
2299 case Iop_Max64Fx2:
2300 case Iop_Div64Fx2:
2301 case Iop_CmpLT64Fx2:
2302 case Iop_CmpLE64Fx2:
2303 case Iop_CmpEQ64Fx2:
sewardj545663e2005-11-05 01:55:04 +00002304 case Iop_CmpUN64Fx2:
sewardj0b070592004-12-10 21:44:22 +00002305 case Iop_Add64Fx2:
2306 return binary64Fx2(mce, vatom1, vatom2);
2307
2308 case Iop_Sub64F0x2:
2309 case Iop_Mul64F0x2:
2310 case Iop_Min64F0x2:
2311 case Iop_Max64F0x2:
2312 case Iop_Div64F0x2:
2313 case Iop_CmpLT64F0x2:
2314 case Iop_CmpLE64F0x2:
2315 case Iop_CmpEQ64F0x2:
sewardj545663e2005-11-05 01:55:04 +00002316 case Iop_CmpUN64F0x2:
sewardj0b070592004-12-10 21:44:22 +00002317 case Iop_Add64F0x2:
2318 return binary64F0x2(mce, vatom1, vatom2);
2319
sewardj170ee212004-12-10 18:57:51 +00002320 case Iop_Sub32Fx4:
2321 case Iop_Mul32Fx4:
2322 case Iop_Min32Fx4:
2323 case Iop_Max32Fx4:
2324 case Iop_Div32Fx4:
2325 case Iop_CmpLT32Fx4:
2326 case Iop_CmpLE32Fx4:
2327 case Iop_CmpEQ32Fx4:
sewardj545663e2005-11-05 01:55:04 +00002328 case Iop_CmpUN32Fx4:
cerione78ba2a2005-11-14 03:00:35 +00002329 case Iop_CmpGT32Fx4:
2330 case Iop_CmpGE32Fx4:
sewardj3245c912004-12-10 14:58:26 +00002331 case Iop_Add32Fx4:
2332 return binary32Fx4(mce, vatom1, vatom2);
2333
sewardj170ee212004-12-10 18:57:51 +00002334 case Iop_Sub32F0x4:
2335 case Iop_Mul32F0x4:
2336 case Iop_Min32F0x4:
2337 case Iop_Max32F0x4:
2338 case Iop_Div32F0x4:
2339 case Iop_CmpLT32F0x4:
2340 case Iop_CmpLE32F0x4:
2341 case Iop_CmpEQ32F0x4:
sewardj545663e2005-11-05 01:55:04 +00002342 case Iop_CmpUN32F0x4:
sewardj170ee212004-12-10 18:57:51 +00002343 case Iop_Add32F0x4:
2344 return binary32F0x4(mce, vatom1, vatom2);
2345
sewardj20d38f22005-02-07 23:50:18 +00002346 /* V128-bit data-steering */
2347 case Iop_SetV128lo32:
2348 case Iop_SetV128lo64:
2349 case Iop_64HLtoV128:
sewardja1d93302004-12-12 16:45:06 +00002350 case Iop_InterleaveLO64x2:
2351 case Iop_InterleaveLO32x4:
2352 case Iop_InterleaveLO16x8:
2353 case Iop_InterleaveLO8x16:
2354 case Iop_InterleaveHI64x2:
2355 case Iop_InterleaveHI32x4:
2356 case Iop_InterleaveHI16x8:
2357 case Iop_InterleaveHI8x16:
sewardj7cf4e6b2008-05-01 20:24:26 +00002358 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
sewardj620eb5b2005-10-22 12:50:43 +00002359
2360 /* Perm8x16: rearrange values in left arg using steering values
2361 from right arg. So rearrange the vbits in the same way but
2362 pessimise wrt steering values. */
2363 case Iop_Perm8x16:
2364 return mkUifUV128(
2365 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002366 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj620eb5b2005-10-22 12:50:43 +00002367 mkPCast8x16(mce, vatom2)
2368 );
sewardj170ee212004-12-10 18:57:51 +00002369
sewardj43d60752005-11-10 18:13:01 +00002370 /* These two take the lower half of each 16-bit lane, sign/zero
2371 extend it to 32, and multiply together, producing a 32x4
2372 result (and implicitly ignoring half the operand bits). So
2373 treat it as a bunch of independent 16x8 operations, but then
2374 do 32-bit shifts left-right to copy the lower half results
2375 (which are all 0s or all 1s due to PCasting in binary16Ix8)
2376 into the upper half of each result lane. */
2377 case Iop_MullEven16Ux8:
2378 case Iop_MullEven16Sx8: {
2379 IRAtom* at;
2380 at = binary16Ix8(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002381 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
2382 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
sewardj43d60752005-11-10 18:13:01 +00002383 return at;
2384 }
2385
2386 /* Same deal as Iop_MullEven16{S,U}x8 */
2387 case Iop_MullEven8Ux16:
2388 case Iop_MullEven8Sx16: {
2389 IRAtom* at;
2390 at = binary8Ix16(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002391 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
2392 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
sewardj43d60752005-11-10 18:13:01 +00002393 return at;
2394 }
2395
2396 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
2397 32x4 -> 16x8 laneage, discarding the upper half of each lane.
2398 Simply apply same op to the V bits, since this really no more
2399 than a data steering operation. */
sewardjcbf8be72005-11-10 18:34:41 +00002400 case Iop_Narrow32x4:
2401 case Iop_Narrow16x8:
sewardj7cf4e6b2008-05-01 20:24:26 +00002402 return assignNew('V', mce, Ity_V128,
2403 binop(op, vatom1, vatom2));
sewardj43d60752005-11-10 18:13:01 +00002404
2405 case Iop_ShrV128:
2406 case Iop_ShlV128:
2407 /* Same scheme as with all other shifts. Note: 10 Nov 05:
2408 this is wrong now, scalar shifts are done properly lazily.
2409 Vector shifts should be fixed too. */
2410 complainIfUndefined(mce, atom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002411 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardj43d60752005-11-10 18:13:01 +00002412
sewardj69a13322005-04-23 01:14:51 +00002413 /* I128-bit data-steering */
2414 case Iop_64HLto128:
sewardj7cf4e6b2008-05-01 20:24:26 +00002415 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
sewardj69a13322005-04-23 01:14:51 +00002416
sewardj3245c912004-12-10 14:58:26 +00002417 /* Scalar floating point */
2418
sewardjed69fdb2006-02-03 16:12:27 +00002419 case Iop_RoundF64toInt:
2420 case Iop_RoundF64toF32:
sewardj06f96d02009-12-31 19:24:12 +00002421 case Iop_F64toI64S:
2422 case Iop_I64StoF64:
sewardj22ac5f42006-02-03 22:55:04 +00002423 case Iop_SinF64:
2424 case Iop_CosF64:
2425 case Iop_TanF64:
2426 case Iop_2xm1F64:
2427 case Iop_SqrtF64:
2428 /* I32(rm) x I64/F64 -> I64/F64 */
sewardj95448072004-11-22 20:19:51 +00002429 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2430
sewardjd376a762010-06-27 09:08:54 +00002431 case Iop_RoundF32toInt:
sewardjaec1be32010-01-03 22:29:32 +00002432 case Iop_SqrtF32:
2433 /* I32(rm) x I32/F32 -> I32/F32 */
2434 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2435
sewardj59570ff2010-01-01 11:59:33 +00002436 case Iop_F64toI32U:
sewardj06f96d02009-12-31 19:24:12 +00002437 case Iop_F64toI32S:
sewardje9e16d32004-12-10 13:17:55 +00002438 case Iop_F64toF32:
sewardj95448072004-11-22 20:19:51 +00002439 /* First arg is I32 (rounding mode), second is F64 (data). */
2440 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2441
sewardj06f96d02009-12-31 19:24:12 +00002442 case Iop_F64toI16S:
sewardj95448072004-11-22 20:19:51 +00002443 /* First arg is I32 (rounding mode), second is F64 (data). */
2444 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
2445
sewardj95448072004-11-22 20:19:51 +00002446 case Iop_CmpF64:
2447 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2448
2449 /* non-FP after here */
2450
2451 case Iop_DivModU64to32:
2452 case Iop_DivModS64to32:
2453 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2454
sewardj69a13322005-04-23 01:14:51 +00002455 case Iop_DivModU128to64:
2456 case Iop_DivModS128to64:
2457 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
2458
sewardj95448072004-11-22 20:19:51 +00002459 case Iop_16HLto32:
sewardj7cf4e6b2008-05-01 20:24:26 +00002460 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00002461 case Iop_32HLto64:
sewardj7cf4e6b2008-05-01 20:24:26 +00002462 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00002463
sewardj6cf40ff2005-04-20 22:31:26 +00002464 case Iop_MullS64:
2465 case Iop_MullU64: {
2466 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
2467 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
sewardj7cf4e6b2008-05-01 20:24:26 +00002468 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, vHi64, vLo64));
sewardj6cf40ff2005-04-20 22:31:26 +00002469 }
2470
sewardj95448072004-11-22 20:19:51 +00002471 case Iop_MullS32:
2472 case Iop_MullU32: {
2473 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
2474 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
sewardj7cf4e6b2008-05-01 20:24:26 +00002475 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
sewardj95448072004-11-22 20:19:51 +00002476 }
2477
2478 case Iop_MullS16:
2479 case Iop_MullU16: {
2480 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
2481 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
sewardj7cf4e6b2008-05-01 20:24:26 +00002482 return assignNew('V', mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
sewardj95448072004-11-22 20:19:51 +00002483 }
2484
2485 case Iop_MullS8:
2486 case Iop_MullU8: {
2487 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
2488 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
sewardj7cf4e6b2008-05-01 20:24:26 +00002489 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
sewardj95448072004-11-22 20:19:51 +00002490 }
2491
cerion9e591082005-06-23 15:28:34 +00002492 case Iop_DivS32:
2493 case Iop_DivU32:
2494 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2495
sewardjb00944a2005-12-23 12:47:16 +00002496 case Iop_DivS64:
2497 case Iop_DivU64:
2498 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2499
sewardj95448072004-11-22 20:19:51 +00002500 case Iop_Add32:
sewardjd5204dc2004-12-31 01:16:11 +00002501 if (mce->bogusLiterals)
2502 return expensiveAddSub(mce,True,Ity_I32,
2503 vatom1,vatom2, atom1,atom2);
2504 else
2505 goto cheap_AddSub32;
sewardj95448072004-11-22 20:19:51 +00002506 case Iop_Sub32:
sewardjd5204dc2004-12-31 01:16:11 +00002507 if (mce->bogusLiterals)
2508 return expensiveAddSub(mce,False,Ity_I32,
2509 vatom1,vatom2, atom1,atom2);
2510 else
2511 goto cheap_AddSub32;
2512
2513 cheap_AddSub32:
sewardj95448072004-11-22 20:19:51 +00002514 case Iop_Mul32:
sewardj992dff92005-10-07 11:08:55 +00002515 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
2516
sewardj463b3d92005-07-18 11:41:15 +00002517 case Iop_CmpORD32S:
2518 case Iop_CmpORD32U:
sewardj1bc82102005-12-23 00:16:24 +00002519 case Iop_CmpORD64S:
2520 case Iop_CmpORD64U:
2521 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
sewardj95448072004-11-22 20:19:51 +00002522
sewardj681be302005-01-15 20:43:58 +00002523 case Iop_Add64:
tomd9774d72005-06-27 08:11:01 +00002524 if (mce->bogusLiterals)
2525 return expensiveAddSub(mce,True,Ity_I64,
2526 vatom1,vatom2, atom1,atom2);
2527 else
2528 goto cheap_AddSub64;
sewardj681be302005-01-15 20:43:58 +00002529 case Iop_Sub64:
tomd9774d72005-06-27 08:11:01 +00002530 if (mce->bogusLiterals)
2531 return expensiveAddSub(mce,False,Ity_I64,
2532 vatom1,vatom2, atom1,atom2);
2533 else
2534 goto cheap_AddSub64;
2535
2536 cheap_AddSub64:
2537 case Iop_Mul64:
sewardj681be302005-01-15 20:43:58 +00002538 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
2539
sewardj95448072004-11-22 20:19:51 +00002540 case Iop_Mul16:
2541 case Iop_Add16:
2542 case Iop_Sub16:
2543 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
2544
2545 case Iop_Sub8:
2546 case Iop_Add8:
2547 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
2548
sewardj69a13322005-04-23 01:14:51 +00002549 case Iop_CmpEQ64:
sewardje6f8af42005-07-06 18:48:59 +00002550 case Iop_CmpNE64:
sewardj69a13322005-04-23 01:14:51 +00002551 if (mce->bogusLiterals)
2552 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
2553 else
2554 goto cheap_cmp64;
2555 cheap_cmp64:
tomcd986332005-04-26 07:44:48 +00002556 case Iop_CmpLE64S: case Iop_CmpLE64U:
2557 case Iop_CmpLT64U: case Iop_CmpLT64S:
sewardj69a13322005-04-23 01:14:51 +00002558 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
2559
sewardjd5204dc2004-12-31 01:16:11 +00002560 case Iop_CmpEQ32:
sewardje6f8af42005-07-06 18:48:59 +00002561 case Iop_CmpNE32:
sewardjd5204dc2004-12-31 01:16:11 +00002562 if (mce->bogusLiterals)
2563 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
2564 else
2565 goto cheap_cmp32;
sewardjd5204dc2004-12-31 01:16:11 +00002566 cheap_cmp32:
sewardj95448072004-11-22 20:19:51 +00002567 case Iop_CmpLE32S: case Iop_CmpLE32U:
2568 case Iop_CmpLT32U: case Iop_CmpLT32S:
sewardj95448072004-11-22 20:19:51 +00002569 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
2570
2571 case Iop_CmpEQ16: case Iop_CmpNE16:
2572 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
2573
2574 case Iop_CmpEQ8: case Iop_CmpNE8:
2575 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
2576
sewardjafed4c52009-07-12 13:00:17 +00002577 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
2578 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
2579 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
2580 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
2581 /* Just say these all produce a defined result, regardless
2582 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
2583 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
2584
sewardjaaddbc22005-10-07 09:49:53 +00002585 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
2586 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
2587
sewardj95448072004-11-22 20:19:51 +00002588 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
sewardjaaddbc22005-10-07 09:49:53 +00002589 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00002590
sewardjdb67f5f2004-12-14 01:15:31 +00002591 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
sewardjaaddbc22005-10-07 09:49:53 +00002592 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00002593
2594 case Iop_Shl8: case Iop_Shr8:
sewardjaaddbc22005-10-07 09:49:53 +00002595 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00002596
sewardj20d38f22005-02-07 23:50:18 +00002597 case Iop_AndV128:
2598 uifu = mkUifUV128; difd = mkDifDV128;
2599 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00002600 case Iop_And64:
2601 uifu = mkUifU64; difd = mkDifD64;
2602 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00002603 case Iop_And32:
2604 uifu = mkUifU32; difd = mkDifD32;
2605 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
2606 case Iop_And16:
2607 uifu = mkUifU16; difd = mkDifD16;
2608 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
2609 case Iop_And8:
2610 uifu = mkUifU8; difd = mkDifD8;
2611 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
2612
sewardj20d38f22005-02-07 23:50:18 +00002613 case Iop_OrV128:
2614 uifu = mkUifUV128; difd = mkDifDV128;
2615 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00002616 case Iop_Or64:
2617 uifu = mkUifU64; difd = mkDifD64;
2618 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00002619 case Iop_Or32:
2620 uifu = mkUifU32; difd = mkDifD32;
2621 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
2622 case Iop_Or16:
2623 uifu = mkUifU16; difd = mkDifD16;
2624 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
2625 case Iop_Or8:
2626 uifu = mkUifU8; difd = mkDifD8;
2627 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
2628
2629 do_And_Or:
2630 return
2631 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00002632 'V', mce,
sewardj95448072004-11-22 20:19:51 +00002633 and_or_ty,
2634 difd(mce, uifu(mce, vatom1, vatom2),
2635 difd(mce, improve(mce, atom1, vatom1),
2636 improve(mce, atom2, vatom2) ) ) );
2637
2638 case Iop_Xor8:
2639 return mkUifU8(mce, vatom1, vatom2);
2640 case Iop_Xor16:
2641 return mkUifU16(mce, vatom1, vatom2);
2642 case Iop_Xor32:
2643 return mkUifU32(mce, vatom1, vatom2);
sewardj7010f6e2004-12-10 13:35:22 +00002644 case Iop_Xor64:
2645 return mkUifU64(mce, vatom1, vatom2);
sewardj20d38f22005-02-07 23:50:18 +00002646 case Iop_XorV128:
2647 return mkUifUV128(mce, vatom1, vatom2);
njn25e49d8e72002-09-23 09:36:25 +00002648
2649 default:
sewardj95448072004-11-22 20:19:51 +00002650 ppIROp(op);
2651 VG_(tool_panic)("memcheck:expr2vbits_Binop");
njn25e49d8e72002-09-23 09:36:25 +00002652 }
njn25e49d8e72002-09-23 09:36:25 +00002653}
2654
njn25e49d8e72002-09-23 09:36:25 +00002655
sewardj95448072004-11-22 20:19:51 +00002656static
2657IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
2658{
2659 IRAtom* vatom = expr2vbits( mce, atom );
2660 tl_assert(isOriginalAtom(mce,atom));
2661 switch (op) {
2662
sewardj0b070592004-12-10 21:44:22 +00002663 case Iop_Sqrt64Fx2:
2664 return unary64Fx2(mce, vatom);
2665
2666 case Iop_Sqrt64F0x2:
2667 return unary64F0x2(mce, vatom);
2668
sewardj170ee212004-12-10 18:57:51 +00002669 case Iop_Sqrt32Fx4:
2670 case Iop_RSqrt32Fx4:
2671 case Iop_Recip32Fx4:
cerion176cb4c2005-11-16 17:21:49 +00002672 case Iop_I32UtoFx4:
2673 case Iop_I32StoFx4:
2674 case Iop_QFtoI32Ux4_RZ:
2675 case Iop_QFtoI32Sx4_RZ:
2676 case Iop_RoundF32x4_RM:
2677 case Iop_RoundF32x4_RP:
2678 case Iop_RoundF32x4_RN:
2679 case Iop_RoundF32x4_RZ:
sewardj170ee212004-12-10 18:57:51 +00002680 return unary32Fx4(mce, vatom);
2681
2682 case Iop_Sqrt32F0x4:
2683 case Iop_RSqrt32F0x4:
2684 case Iop_Recip32F0x4:
2685 return unary32F0x4(mce, vatom);
2686
sewardj20d38f22005-02-07 23:50:18 +00002687 case Iop_32UtoV128:
2688 case Iop_64UtoV128:
sewardj620eb5b2005-10-22 12:50:43 +00002689 case Iop_Dup8x16:
2690 case Iop_Dup16x8:
2691 case Iop_Dup32x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00002692 return assignNew('V', mce, Ity_V128, unop(op, vatom));
sewardj170ee212004-12-10 18:57:51 +00002693
sewardj95448072004-11-22 20:19:51 +00002694 case Iop_F32toF64:
sewardj06f96d02009-12-31 19:24:12 +00002695 case Iop_I32StoF64:
sewardj59570ff2010-01-01 11:59:33 +00002696 case Iop_I32UtoF64:
sewardj95448072004-11-22 20:19:51 +00002697 case Iop_NegF64:
sewardj95448072004-11-22 20:19:51 +00002698 case Iop_AbsF64:
sewardj4f3e0f22006-01-27 15:11:31 +00002699 case Iop_Est5FRSqrt:
sewardjdead90a2008-08-08 08:38:23 +00002700 case Iop_RoundF64toF64_NEAREST:
2701 case Iop_RoundF64toF64_NegINF:
2702 case Iop_RoundF64toF64_PosINF:
2703 case Iop_RoundF64toF64_ZERO:
sewardj39cc7352005-06-09 21:31:55 +00002704 case Iop_Clz64:
2705 case Iop_Ctz64:
sewardj95448072004-11-22 20:19:51 +00002706 return mkPCastTo(mce, Ity_I64, vatom);
2707
sewardj95448072004-11-22 20:19:51 +00002708 case Iop_Clz32:
2709 case Iop_Ctz32:
sewardjed69fdb2006-02-03 16:12:27 +00002710 case Iop_TruncF64asF32:
sewardj59570ff2010-01-01 11:59:33 +00002711 case Iop_NegF32:
2712 case Iop_AbsF32:
sewardj95448072004-11-22 20:19:51 +00002713 return mkPCastTo(mce, Ity_I32, vatom);
2714
sewardjd9dbc192005-04-27 11:40:27 +00002715 case Iop_1Uto64:
2716 case Iop_8Uto64:
2717 case Iop_8Sto64:
2718 case Iop_16Uto64:
2719 case Iop_16Sto64:
sewardj95448072004-11-22 20:19:51 +00002720 case Iop_32Sto64:
2721 case Iop_32Uto64:
sewardj20d38f22005-02-07 23:50:18 +00002722 case Iop_V128to64:
2723 case Iop_V128HIto64:
sewardj6cf40ff2005-04-20 22:31:26 +00002724 case Iop_128HIto64:
2725 case Iop_128to64:
sewardj7cf4e6b2008-05-01 20:24:26 +00002726 return assignNew('V', mce, Ity_I64, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00002727
2728 case Iop_64to32:
2729 case Iop_64HIto32:
2730 case Iop_1Uto32:
sewardj463b3d92005-07-18 11:41:15 +00002731 case Iop_1Sto32:
sewardj95448072004-11-22 20:19:51 +00002732 case Iop_8Uto32:
2733 case Iop_16Uto32:
2734 case Iop_16Sto32:
2735 case Iop_8Sto32:
cerionfafaa0d2005-09-12 22:29:38 +00002736 case Iop_V128to32:
sewardj7cf4e6b2008-05-01 20:24:26 +00002737 return assignNew('V', mce, Ity_I32, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00002738
2739 case Iop_8Sto16:
2740 case Iop_8Uto16:
2741 case Iop_32to16:
2742 case Iop_32HIto16:
sewardjd9dbc192005-04-27 11:40:27 +00002743 case Iop_64to16:
sewardj7cf4e6b2008-05-01 20:24:26 +00002744 return assignNew('V', mce, Ity_I16, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00002745
2746 case Iop_1Uto8:
2747 case Iop_16to8:
sewardj9a807e02006-12-17 14:20:31 +00002748 case Iop_16HIto8:
sewardj95448072004-11-22 20:19:51 +00002749 case Iop_32to8:
sewardjd9dbc192005-04-27 11:40:27 +00002750 case Iop_64to8:
sewardj7cf4e6b2008-05-01 20:24:26 +00002751 return assignNew('V', mce, Ity_I8, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00002752
2753 case Iop_32to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00002754 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
sewardj95448072004-11-22 20:19:51 +00002755
sewardjd9dbc192005-04-27 11:40:27 +00002756 case Iop_64to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00002757 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
sewardjd9dbc192005-04-27 11:40:27 +00002758
sewardj95448072004-11-22 20:19:51 +00002759 case Iop_ReinterpF64asI64:
2760 case Iop_ReinterpI64asF64:
sewardj0b070592004-12-10 21:44:22 +00002761 case Iop_ReinterpI32asF32:
sewardj59570ff2010-01-01 11:59:33 +00002762 case Iop_ReinterpF32asI32:
sewardj20d38f22005-02-07 23:50:18 +00002763 case Iop_NotV128:
sewardj7010f6e2004-12-10 13:35:22 +00002764 case Iop_Not64:
sewardj95448072004-11-22 20:19:51 +00002765 case Iop_Not32:
2766 case Iop_Not16:
2767 case Iop_Not8:
2768 case Iop_Not1:
2769 return vatom;
sewardj7010f6e2004-12-10 13:35:22 +00002770
sewardj95448072004-11-22 20:19:51 +00002771 default:
2772 ppIROp(op);
2773 VG_(tool_panic)("memcheck:expr2vbits_Unop");
2774 }
2775}
2776
2777
sewardj170ee212004-12-10 18:57:51 +00002778/* Worker function; do not call directly. */
sewardj95448072004-11-22 20:19:51 +00002779static
sewardj2e595852005-06-30 23:33:37 +00002780IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
2781 IREndness end, IRType ty,
2782 IRAtom* addr, UInt bias )
sewardj95448072004-11-22 20:19:51 +00002783{
2784 void* helper;
2785 Char* hname;
2786 IRDirty* di;
2787 IRTemp datavbits;
2788 IRAtom* addrAct;
2789
2790 tl_assert(isOriginalAtom(mce,addr));
sewardj2e595852005-06-30 23:33:37 +00002791 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj95448072004-11-22 20:19:51 +00002792
2793 /* First, emit a definedness test for the address. This also sets
2794 the address (shadow) to 'defined' following the test. */
2795 complainIfUndefined( mce, addr );
2796
2797 /* Now cook up a call to the relevant helper function, to read the
2798 data V bits from shadow memory. */
sewardj7cf4e6b2008-05-01 20:24:26 +00002799 ty = shadowTypeV(ty);
sewardj2e595852005-06-30 23:33:37 +00002800
2801 if (end == Iend_LE) {
2802 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00002803 case Ity_I64: helper = &MC_(helperc_LOADV64le);
2804 hname = "MC_(helperc_LOADV64le)";
sewardj2e595852005-06-30 23:33:37 +00002805 break;
njn1d0825f2006-03-27 11:37:07 +00002806 case Ity_I32: helper = &MC_(helperc_LOADV32le);
2807 hname = "MC_(helperc_LOADV32le)";
sewardj2e595852005-06-30 23:33:37 +00002808 break;
njn1d0825f2006-03-27 11:37:07 +00002809 case Ity_I16: helper = &MC_(helperc_LOADV16le);
2810 hname = "MC_(helperc_LOADV16le)";
sewardj2e595852005-06-30 23:33:37 +00002811 break;
njn1d0825f2006-03-27 11:37:07 +00002812 case Ity_I8: helper = &MC_(helperc_LOADV8);
2813 hname = "MC_(helperc_LOADV8)";
sewardj2e595852005-06-30 23:33:37 +00002814 break;
2815 default: ppIRType(ty);
2816 VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
2817 }
2818 } else {
sewardj8cf88b72005-07-08 01:29:33 +00002819 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00002820 case Ity_I64: helper = &MC_(helperc_LOADV64be);
2821 hname = "MC_(helperc_LOADV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00002822 break;
njn1d0825f2006-03-27 11:37:07 +00002823 case Ity_I32: helper = &MC_(helperc_LOADV32be);
2824 hname = "MC_(helperc_LOADV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00002825 break;
njn1d0825f2006-03-27 11:37:07 +00002826 case Ity_I16: helper = &MC_(helperc_LOADV16be);
2827 hname = "MC_(helperc_LOADV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00002828 break;
njn1d0825f2006-03-27 11:37:07 +00002829 case Ity_I8: helper = &MC_(helperc_LOADV8);
2830 hname = "MC_(helperc_LOADV8)";
sewardj8cf88b72005-07-08 01:29:33 +00002831 break;
2832 default: ppIRType(ty);
2833 VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
2834 }
sewardj95448072004-11-22 20:19:51 +00002835 }
2836
2837 /* Generate the actual address into addrAct. */
2838 if (bias == 0) {
2839 addrAct = addr;
2840 } else {
sewardj7cf97ee2004-11-28 14:25:01 +00002841 IROp mkAdd;
2842 IRAtom* eBias;
sewardj95448072004-11-22 20:19:51 +00002843 IRType tyAddr = mce->hWordTy;
2844 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj7cf97ee2004-11-28 14:25:01 +00002845 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2846 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00002847 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
sewardj95448072004-11-22 20:19:51 +00002848 }
2849
2850 /* We need to have a place to park the V bits we're just about to
2851 read. */
sewardj1c0ce7a2009-07-01 08:10:49 +00002852 datavbits = newTemp(mce, ty, VSh);
sewardj95448072004-11-22 20:19:51 +00002853 di = unsafeIRDirty_1_N( datavbits,
sewardj53ee1fc2005-12-23 02:29:58 +00002854 1/*regparms*/,
2855 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj95448072004-11-22 20:19:51 +00002856 mkIRExprVec_1( addrAct ));
2857 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00002858 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00002859
2860 return mkexpr(datavbits);
2861}
2862
2863
2864static
sewardj2e595852005-06-30 23:33:37 +00002865IRAtom* expr2vbits_Load ( MCEnv* mce,
2866 IREndness end, IRType ty,
2867 IRAtom* addr, UInt bias )
sewardj170ee212004-12-10 18:57:51 +00002868{
2869 IRAtom *v64hi, *v64lo;
sewardj2e595852005-06-30 23:33:37 +00002870 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj7cf4e6b2008-05-01 20:24:26 +00002871 switch (shadowTypeV(ty)) {
sewardj170ee212004-12-10 18:57:51 +00002872 case Ity_I8:
2873 case Ity_I16:
2874 case Ity_I32:
2875 case Ity_I64:
sewardj2e595852005-06-30 23:33:37 +00002876 return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
sewardj170ee212004-12-10 18:57:51 +00002877 case Ity_V128:
sewardj2e595852005-06-30 23:33:37 +00002878 if (end == Iend_LE) {
2879 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
2880 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
2881 } else {
sewardj2e595852005-06-30 23:33:37 +00002882 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
2883 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
2884 }
sewardj7cf4e6b2008-05-01 20:24:26 +00002885 return assignNew( 'V', mce,
sewardj170ee212004-12-10 18:57:51 +00002886 Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +00002887 binop(Iop_64HLtoV128, v64hi, v64lo));
sewardj170ee212004-12-10 18:57:51 +00002888 default:
sewardj2e595852005-06-30 23:33:37 +00002889 VG_(tool_panic)("expr2vbits_Load");
sewardj170ee212004-12-10 18:57:51 +00002890 }
2891}
2892
2893
2894static
sewardj95448072004-11-22 20:19:51 +00002895IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
2896 IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
2897{
2898 IRAtom *vbitsC, *vbits0, *vbitsX;
2899 IRType ty;
2900 /* Given Mux0X(cond,expr0,exprX), generate
2901 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
2902 That is, steer the V bits like the originals, but trash the
2903 result if the steering value is undefined. This gives
2904 lazy propagation. */
2905 tl_assert(isOriginalAtom(mce, cond));
2906 tl_assert(isOriginalAtom(mce, expr0));
2907 tl_assert(isOriginalAtom(mce, exprX));
2908
2909 vbitsC = expr2vbits(mce, cond);
2910 vbits0 = expr2vbits(mce, expr0);
2911 vbitsX = expr2vbits(mce, exprX);
sewardj1c0ce7a2009-07-01 08:10:49 +00002912 ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
sewardj95448072004-11-22 20:19:51 +00002913
2914 return
sewardj7cf4e6b2008-05-01 20:24:26 +00002915 mkUifU(mce, ty, assignNew('V', mce, ty,
2916 IRExpr_Mux0X(cond, vbits0, vbitsX)),
sewardj95448072004-11-22 20:19:51 +00002917 mkPCastTo(mce, ty, vbitsC) );
2918}
2919
2920/* --------- This is the main expression-handling function. --------- */
2921
2922static
2923IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
2924{
2925 switch (e->tag) {
2926
2927 case Iex_Get:
2928 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
2929
2930 case Iex_GetI:
2931 return shadow_GETI( mce, e->Iex.GetI.descr,
2932 e->Iex.GetI.ix, e->Iex.GetI.bias );
2933
sewardj0b9d74a2006-12-24 02:24:11 +00002934 case Iex_RdTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00002935 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
sewardj95448072004-11-22 20:19:51 +00002936
2937 case Iex_Const:
sewardj1c0ce7a2009-07-01 08:10:49 +00002938 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
sewardj95448072004-11-22 20:19:51 +00002939
sewardje91cea72006-02-08 19:32:02 +00002940 case Iex_Qop:
2941 return expr2vbits_Qop(
2942 mce,
2943 e->Iex.Qop.op,
2944 e->Iex.Qop.arg1, e->Iex.Qop.arg2,
2945 e->Iex.Qop.arg3, e->Iex.Qop.arg4
2946 );
2947
sewardjed69fdb2006-02-03 16:12:27 +00002948 case Iex_Triop:
2949 return expr2vbits_Triop(
2950 mce,
2951 e->Iex.Triop.op,
2952 e->Iex.Triop.arg1, e->Iex.Triop.arg2, e->Iex.Triop.arg3
2953 );
2954
sewardj95448072004-11-22 20:19:51 +00002955 case Iex_Binop:
2956 return expr2vbits_Binop(
2957 mce,
2958 e->Iex.Binop.op,
2959 e->Iex.Binop.arg1, e->Iex.Binop.arg2
2960 );
2961
2962 case Iex_Unop:
2963 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
2964
sewardj2e595852005-06-30 23:33:37 +00002965 case Iex_Load:
2966 return expr2vbits_Load( mce, e->Iex.Load.end,
2967 e->Iex.Load.ty,
2968 e->Iex.Load.addr, 0/*addr bias*/ );
sewardj95448072004-11-22 20:19:51 +00002969
2970 case Iex_CCall:
2971 return mkLazyN( mce, e->Iex.CCall.args,
2972 e->Iex.CCall.retty,
2973 e->Iex.CCall.cee );
2974
2975 case Iex_Mux0X:
2976 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
2977 e->Iex.Mux0X.exprX);
njn25e49d8e72002-09-23 09:36:25 +00002978
2979 default:
sewardj95448072004-11-22 20:19:51 +00002980 VG_(printf)("\n");
2981 ppIRExpr(e);
2982 VG_(printf)("\n");
2983 VG_(tool_panic)("memcheck: expr2vbits");
njn25e49d8e72002-09-23 09:36:25 +00002984 }
njn25e49d8e72002-09-23 09:36:25 +00002985}
2986
2987/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00002988/*--- Generate shadow stmts from all kinds of IRStmts. ---*/
njn25e49d8e72002-09-23 09:36:25 +00002989/*------------------------------------------------------------*/
2990
sewardj95448072004-11-22 20:19:51 +00002991/* Widen a value to the host word size. */
njn25e49d8e72002-09-23 09:36:25 +00002992
2993static
sewardj95448072004-11-22 20:19:51 +00002994IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
njn25e49d8e72002-09-23 09:36:25 +00002995{
sewardj7cf97ee2004-11-28 14:25:01 +00002996 IRType ty, tyH;
2997
sewardj95448072004-11-22 20:19:51 +00002998 /* vatom is vbits-value and as such can only have a shadow type. */
2999 tl_assert(isShadowAtom(mce,vatom));
njn25e49d8e72002-09-23 09:36:25 +00003000
sewardj1c0ce7a2009-07-01 08:10:49 +00003001 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj7cf97ee2004-11-28 14:25:01 +00003002 tyH = mce->hWordTy;
njn25e49d8e72002-09-23 09:36:25 +00003003
sewardj95448072004-11-22 20:19:51 +00003004 if (tyH == Ity_I32) {
3005 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00003006 case Ity_I32:
3007 return vatom;
3008 case Ity_I16:
3009 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
3010 case Ity_I8:
3011 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
3012 default:
3013 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00003014 }
sewardj6cf40ff2005-04-20 22:31:26 +00003015 } else
3016 if (tyH == Ity_I64) {
3017 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00003018 case Ity_I32:
3019 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
3020 case Ity_I16:
3021 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
3022 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
3023 case Ity_I8:
3024 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
3025 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
3026 default:
3027 goto unhandled;
sewardj6cf40ff2005-04-20 22:31:26 +00003028 }
sewardj95448072004-11-22 20:19:51 +00003029 } else {
3030 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00003031 }
sewardj95448072004-11-22 20:19:51 +00003032 unhandled:
3033 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
3034 VG_(tool_panic)("zwidenToHostWord");
njn25e49d8e72002-09-23 09:36:25 +00003035}
3036
njn25e49d8e72002-09-23 09:36:25 +00003037
sewardj95448072004-11-22 20:19:51 +00003038/* Generate a shadow store. addr is always the original address atom.
3039 You can pass in either originals or V-bits for the data atom, but
sewardj1c0ce7a2009-07-01 08:10:49 +00003040 obviously not both. guard :: Ity_I1 controls whether the store
3041 really happens; NULL means it unconditionally does. Note that
3042 guard itself is not checked for definedness; the caller of this
3043 function must do that if necessary. */
njn25e49d8e72002-09-23 09:36:25 +00003044
sewardj95448072004-11-22 20:19:51 +00003045static
sewardj2e595852005-06-30 23:33:37 +00003046void do_shadow_Store ( MCEnv* mce,
3047 IREndness end,
3048 IRAtom* addr, UInt bias,
sewardj1c0ce7a2009-07-01 08:10:49 +00003049 IRAtom* data, IRAtom* vdata,
3050 IRAtom* guard )
njn25e49d8e72002-09-23 09:36:25 +00003051{
sewardj170ee212004-12-10 18:57:51 +00003052 IROp mkAdd;
3053 IRType ty, tyAddr;
sewardj95448072004-11-22 20:19:51 +00003054 void* helper = NULL;
3055 Char* hname = NULL;
njn1d0825f2006-03-27 11:37:07 +00003056 IRConst* c;
sewardj170ee212004-12-10 18:57:51 +00003057
3058 tyAddr = mce->hWordTy;
3059 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3060 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj2e595852005-06-30 23:33:37 +00003061 tl_assert( end == Iend_LE || end == Iend_BE );
sewardj170ee212004-12-10 18:57:51 +00003062
sewardj95448072004-11-22 20:19:51 +00003063 if (data) {
3064 tl_assert(!vdata);
3065 tl_assert(isOriginalAtom(mce, data));
3066 tl_assert(bias == 0);
3067 vdata = expr2vbits( mce, data );
3068 } else {
3069 tl_assert(vdata);
3070 }
njn25e49d8e72002-09-23 09:36:25 +00003071
sewardj95448072004-11-22 20:19:51 +00003072 tl_assert(isOriginalAtom(mce,addr));
3073 tl_assert(isShadowAtom(mce,vdata));
njn25e49d8e72002-09-23 09:36:25 +00003074
sewardj1c0ce7a2009-07-01 08:10:49 +00003075 if (guard) {
3076 tl_assert(isOriginalAtom(mce, guard));
3077 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
3078 }
3079
3080 ty = typeOfIRExpr(mce->sb->tyenv, vdata);
njn25e49d8e72002-09-23 09:36:25 +00003081
njn1d0825f2006-03-27 11:37:07 +00003082 // If we're not doing undefined value checking, pretend that this value
3083 // is "all valid". That lets Vex's optimiser remove some of the V bit
3084 // shadow computation ops that precede it.
sewardj7cf4e6b2008-05-01 20:24:26 +00003085 if (MC_(clo_mc_level) == 1) {
njn1d0825f2006-03-27 11:37:07 +00003086 switch (ty) {
sewardj1c0ce7a2009-07-01 08:10:49 +00003087 case Ity_V128: // V128 weirdness
3088 c = IRConst_V128(V_BITS16_DEFINED); break;
njn1d0825f2006-03-27 11:37:07 +00003089 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
3090 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
3091 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
3092 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
3093 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
3094 }
3095 vdata = IRExpr_Const( c );
3096 }
3097
sewardj95448072004-11-22 20:19:51 +00003098 /* First, emit a definedness test for the address. This also sets
3099 the address (shadow) to 'defined' following the test. */
3100 complainIfUndefined( mce, addr );
njn25e49d8e72002-09-23 09:36:25 +00003101
sewardj170ee212004-12-10 18:57:51 +00003102 /* Now decide which helper function to call to write the data V
3103 bits into shadow memory. */
sewardj2e595852005-06-30 23:33:37 +00003104 if (end == Iend_LE) {
3105 switch (ty) {
3106 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00003107 case Ity_I64: helper = &MC_(helperc_STOREV64le);
3108 hname = "MC_(helperc_STOREV64le)";
sewardj2e595852005-06-30 23:33:37 +00003109 break;
njn1d0825f2006-03-27 11:37:07 +00003110 case Ity_I32: helper = &MC_(helperc_STOREV32le);
3111 hname = "MC_(helperc_STOREV32le)";
sewardj2e595852005-06-30 23:33:37 +00003112 break;
njn1d0825f2006-03-27 11:37:07 +00003113 case Ity_I16: helper = &MC_(helperc_STOREV16le);
3114 hname = "MC_(helperc_STOREV16le)";
sewardj2e595852005-06-30 23:33:37 +00003115 break;
njn1d0825f2006-03-27 11:37:07 +00003116 case Ity_I8: helper = &MC_(helperc_STOREV8);
3117 hname = "MC_(helperc_STOREV8)";
sewardj2e595852005-06-30 23:33:37 +00003118 break;
3119 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
3120 }
3121 } else {
sewardj8cf88b72005-07-08 01:29:33 +00003122 switch (ty) {
3123 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00003124 case Ity_I64: helper = &MC_(helperc_STOREV64be);
3125 hname = "MC_(helperc_STOREV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00003126 break;
njn1d0825f2006-03-27 11:37:07 +00003127 case Ity_I32: helper = &MC_(helperc_STOREV32be);
3128 hname = "MC_(helperc_STOREV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00003129 break;
njn1d0825f2006-03-27 11:37:07 +00003130 case Ity_I16: helper = &MC_(helperc_STOREV16be);
3131 hname = "MC_(helperc_STOREV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00003132 break;
njn1d0825f2006-03-27 11:37:07 +00003133 case Ity_I8: helper = &MC_(helperc_STOREV8);
3134 hname = "MC_(helperc_STOREV8)";
sewardj8cf88b72005-07-08 01:29:33 +00003135 break;
3136 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
3137 }
sewardj95448072004-11-22 20:19:51 +00003138 }
njn25e49d8e72002-09-23 09:36:25 +00003139
sewardj170ee212004-12-10 18:57:51 +00003140 if (ty == Ity_V128) {
3141
sewardj20d38f22005-02-07 23:50:18 +00003142 /* V128-bit case */
sewardj170ee212004-12-10 18:57:51 +00003143 /* See comment in next clause re 64-bit regparms */
sewardj2e595852005-06-30 23:33:37 +00003144 /* also, need to be careful about endianness */
3145
njn4c245e52009-03-15 23:25:38 +00003146 Int offLo64, offHi64;
3147 IRDirty *diLo64, *diHi64;
3148 IRAtom *addrLo64, *addrHi64;
3149 IRAtom *vdataLo64, *vdataHi64;
3150 IRAtom *eBiasLo64, *eBiasHi64;
3151
sewardj2e595852005-06-30 23:33:37 +00003152 if (end == Iend_LE) {
3153 offLo64 = 0;
3154 offHi64 = 8;
3155 } else {
sewardj2e595852005-06-30 23:33:37 +00003156 offLo64 = 8;
3157 offHi64 = 0;
3158 }
3159
3160 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
sewardj7cf4e6b2008-05-01 20:24:26 +00003161 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
3162 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
sewardj170ee212004-12-10 18:57:51 +00003163 diLo64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003164 1/*regparms*/,
3165 hname, VG_(fnptr_to_fnentry)( helper ),
3166 mkIRExprVec_2( addrLo64, vdataLo64 )
3167 );
sewardj2e595852005-06-30 23:33:37 +00003168 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
sewardj7cf4e6b2008-05-01 20:24:26 +00003169 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
3170 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
sewardj170ee212004-12-10 18:57:51 +00003171 diHi64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003172 1/*regparms*/,
3173 hname, VG_(fnptr_to_fnentry)( helper ),
3174 mkIRExprVec_2( addrHi64, vdataHi64 )
3175 );
sewardj1c0ce7a2009-07-01 08:10:49 +00003176 if (guard) diLo64->guard = guard;
3177 if (guard) diHi64->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00003178 setHelperAnns( mce, diLo64 );
3179 setHelperAnns( mce, diHi64 );
sewardj7cf4e6b2008-05-01 20:24:26 +00003180 stmt( 'V', mce, IRStmt_Dirty(diLo64) );
3181 stmt( 'V', mce, IRStmt_Dirty(diHi64) );
sewardj170ee212004-12-10 18:57:51 +00003182
sewardj95448072004-11-22 20:19:51 +00003183 } else {
sewardj170ee212004-12-10 18:57:51 +00003184
njn4c245e52009-03-15 23:25:38 +00003185 IRDirty *di;
3186 IRAtom *addrAct;
3187
sewardj170ee212004-12-10 18:57:51 +00003188 /* 8/16/32/64-bit cases */
3189 /* Generate the actual address into addrAct. */
3190 if (bias == 0) {
3191 addrAct = addr;
3192 } else {
njn4c245e52009-03-15 23:25:38 +00003193 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00003194 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
sewardj170ee212004-12-10 18:57:51 +00003195 }
3196
3197 if (ty == Ity_I64) {
3198 /* We can't do this with regparm 2 on 32-bit platforms, since
3199 the back ends aren't clever enough to handle 64-bit
3200 regparm args. Therefore be different. */
3201 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003202 1/*regparms*/,
3203 hname, VG_(fnptr_to_fnentry)( helper ),
3204 mkIRExprVec_2( addrAct, vdata )
3205 );
sewardj170ee212004-12-10 18:57:51 +00003206 } else {
3207 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003208 2/*regparms*/,
3209 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj170ee212004-12-10 18:57:51 +00003210 mkIRExprVec_2( addrAct,
sewardj53ee1fc2005-12-23 02:29:58 +00003211 zwidenToHostWord( mce, vdata ))
3212 );
sewardj170ee212004-12-10 18:57:51 +00003213 }
sewardj1c0ce7a2009-07-01 08:10:49 +00003214 if (guard) di->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00003215 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00003216 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00003217 }
njn25e49d8e72002-09-23 09:36:25 +00003218
sewardj95448072004-11-22 20:19:51 +00003219}
njn25e49d8e72002-09-23 09:36:25 +00003220
njn25e49d8e72002-09-23 09:36:25 +00003221
sewardj95448072004-11-22 20:19:51 +00003222/* Do lazy pessimistic propagation through a dirty helper call, by
3223 looking at the annotations on it. This is the most complex part of
3224 Memcheck. */
njn25e49d8e72002-09-23 09:36:25 +00003225
sewardj95448072004-11-22 20:19:51 +00003226static IRType szToITy ( Int n )
3227{
3228 switch (n) {
3229 case 1: return Ity_I8;
3230 case 2: return Ity_I16;
3231 case 4: return Ity_I32;
3232 case 8: return Ity_I64;
3233 default: VG_(tool_panic)("szToITy(memcheck)");
3234 }
3235}
njn25e49d8e72002-09-23 09:36:25 +00003236
sewardj95448072004-11-22 20:19:51 +00003237static
3238void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
3239{
njn4c245e52009-03-15 23:25:38 +00003240 Int i, n, toDo, gSz, gOff;
sewardj2e595852005-06-30 23:33:37 +00003241 IRAtom *src, *here, *curr;
njn4c245e52009-03-15 23:25:38 +00003242 IRType tySrc, tyDst;
sewardj2e595852005-06-30 23:33:37 +00003243 IRTemp dst;
3244 IREndness end;
3245
3246 /* What's the native endianness? We need to know this. */
sewardj6e340c72005-07-10 00:53:42 +00003247# if defined(VG_BIGENDIAN)
sewardj2e595852005-06-30 23:33:37 +00003248 end = Iend_BE;
sewardj6e340c72005-07-10 00:53:42 +00003249# elif defined(VG_LITTLEENDIAN)
sewardj2e595852005-06-30 23:33:37 +00003250 end = Iend_LE;
3251# else
3252# error "Unknown endianness"
3253# endif
njn25e49d8e72002-09-23 09:36:25 +00003254
sewardj95448072004-11-22 20:19:51 +00003255 /* First check the guard. */
3256 complainIfUndefined(mce, d->guard);
3257
3258 /* Now round up all inputs and PCast over them. */
sewardj7cf97ee2004-11-28 14:25:01 +00003259 curr = definedOfType(Ity_I32);
sewardj95448072004-11-22 20:19:51 +00003260
3261 /* Inputs: unmasked args */
3262 for (i = 0; d->args[i]; i++) {
3263 if (d->cee->mcx_mask & (1<<i)) {
3264 /* ignore this arg */
njn25e49d8e72002-09-23 09:36:25 +00003265 } else {
sewardj95448072004-11-22 20:19:51 +00003266 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
3267 curr = mkUifU32(mce, here, curr);
njn25e49d8e72002-09-23 09:36:25 +00003268 }
3269 }
sewardj95448072004-11-22 20:19:51 +00003270
3271 /* Inputs: guest state that we read. */
3272 for (i = 0; i < d->nFxState; i++) {
3273 tl_assert(d->fxState[i].fx != Ifx_None);
3274 if (d->fxState[i].fx == Ifx_Write)
3275 continue;
sewardja7203252004-11-26 19:17:47 +00003276
3277 /* Ignore any sections marked as 'always defined'. */
3278 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
sewardje9e16d32004-12-10 13:17:55 +00003279 if (0)
sewardja7203252004-11-26 19:17:47 +00003280 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
3281 d->fxState[i].offset, d->fxState[i].size );
3282 continue;
3283 }
3284
sewardj95448072004-11-22 20:19:51 +00003285 /* This state element is read or modified. So we need to
sewardje9e16d32004-12-10 13:17:55 +00003286 consider it. If larger than 8 bytes, deal with it in 8-byte
3287 chunks. */
3288 gSz = d->fxState[i].size;
3289 gOff = d->fxState[i].offset;
3290 tl_assert(gSz > 0);
3291 while (True) {
3292 if (gSz == 0) break;
3293 n = gSz <= 8 ? gSz : 8;
3294 /* update 'curr' with UifU of the state slice
3295 gOff .. gOff+n-1 */
3296 tySrc = szToITy( n );
sewardj7cf4e6b2008-05-01 20:24:26 +00003297 src = assignNew( 'V', mce, tySrc,
3298 shadow_GET(mce, gOff, tySrc ) );
sewardje9e16d32004-12-10 13:17:55 +00003299 here = mkPCastTo( mce, Ity_I32, src );
3300 curr = mkUifU32(mce, here, curr);
3301 gSz -= n;
3302 gOff += n;
3303 }
3304
sewardj95448072004-11-22 20:19:51 +00003305 }
3306
3307 /* Inputs: memory. First set up some info needed regardless of
3308 whether we're doing reads or writes. */
sewardj95448072004-11-22 20:19:51 +00003309
3310 if (d->mFx != Ifx_None) {
3311 /* Because we may do multiple shadow loads/stores from the same
3312 base address, it's best to do a single test of its
3313 definedness right now. Post-instrumentation optimisation
3314 should remove all but this test. */
njn4c245e52009-03-15 23:25:38 +00003315 IRType tyAddr;
sewardj95448072004-11-22 20:19:51 +00003316 tl_assert(d->mAddr);
3317 complainIfUndefined(mce, d->mAddr);
3318
sewardj1c0ce7a2009-07-01 08:10:49 +00003319 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
sewardj95448072004-11-22 20:19:51 +00003320 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
3321 tl_assert(tyAddr == mce->hWordTy); /* not really right */
3322 }
3323
3324 /* Deal with memory inputs (reads or modifies) */
3325 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00003326 toDo = d->mSize;
sewardj2e595852005-06-30 23:33:37 +00003327 /* chew off 32-bit chunks. We don't care about the endianness
3328 since it's all going to be condensed down to a single bit,
3329 but nevertheless choose an endianness which is hopefully
3330 native to the platform. */
sewardj95448072004-11-22 20:19:51 +00003331 while (toDo >= 4) {
3332 here = mkPCastTo(
3333 mce, Ity_I32,
sewardj2e595852005-06-30 23:33:37 +00003334 expr2vbits_Load ( mce, end, Ity_I32,
sewardj95448072004-11-22 20:19:51 +00003335 d->mAddr, d->mSize - toDo )
3336 );
3337 curr = mkUifU32(mce, here, curr);
3338 toDo -= 4;
3339 }
3340 /* chew off 16-bit chunks */
3341 while (toDo >= 2) {
3342 here = mkPCastTo(
3343 mce, Ity_I32,
sewardj2e595852005-06-30 23:33:37 +00003344 expr2vbits_Load ( mce, end, Ity_I16,
sewardj95448072004-11-22 20:19:51 +00003345 d->mAddr, d->mSize - toDo )
3346 );
3347 curr = mkUifU32(mce, here, curr);
3348 toDo -= 2;
3349 }
3350 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
3351 }
3352
3353 /* Whew! So curr is a 32-bit V-value summarising pessimistically
3354 all the inputs to the helper. Now we need to re-distribute the
3355 results to all destinations. */
3356
3357 /* Outputs: the destination temporary, if there is one. */
3358 if (d->tmp != IRTemp_INVALID) {
sewardj7cf4e6b2008-05-01 20:24:26 +00003359 dst = findShadowTmpV(mce, d->tmp);
sewardj1c0ce7a2009-07-01 08:10:49 +00003360 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00003361 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
sewardj95448072004-11-22 20:19:51 +00003362 }
3363
3364 /* Outputs: guest state that we write or modify. */
3365 for (i = 0; i < d->nFxState; i++) {
3366 tl_assert(d->fxState[i].fx != Ifx_None);
3367 if (d->fxState[i].fx == Ifx_Read)
3368 continue;
sewardja7203252004-11-26 19:17:47 +00003369 /* Ignore any sections marked as 'always defined'. */
3370 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
3371 continue;
sewardje9e16d32004-12-10 13:17:55 +00003372 /* This state element is written or modified. So we need to
3373 consider it. If larger than 8 bytes, deal with it in 8-byte
3374 chunks. */
3375 gSz = d->fxState[i].size;
3376 gOff = d->fxState[i].offset;
3377 tl_assert(gSz > 0);
3378 while (True) {
3379 if (gSz == 0) break;
3380 n = gSz <= 8 ? gSz : 8;
3381 /* Write suitably-casted 'curr' to the state slice
3382 gOff .. gOff+n-1 */
3383 tyDst = szToITy( n );
3384 do_shadow_PUT( mce, gOff,
3385 NULL, /* original atom */
3386 mkPCastTo( mce, tyDst, curr ) );
3387 gSz -= n;
3388 gOff += n;
3389 }
sewardj95448072004-11-22 20:19:51 +00003390 }
3391
sewardj2e595852005-06-30 23:33:37 +00003392 /* Outputs: memory that we write or modify. Same comments about
3393 endianness as above apply. */
sewardj95448072004-11-22 20:19:51 +00003394 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00003395 toDo = d->mSize;
3396 /* chew off 32-bit chunks */
3397 while (toDo >= 4) {
sewardj2e595852005-06-30 23:33:37 +00003398 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
3399 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00003400 mkPCastTo( mce, Ity_I32, curr ),
3401 NULL/*guard*/ );
sewardj95448072004-11-22 20:19:51 +00003402 toDo -= 4;
3403 }
3404 /* chew off 16-bit chunks */
3405 while (toDo >= 2) {
sewardj2e595852005-06-30 23:33:37 +00003406 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
3407 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00003408 mkPCastTo( mce, Ity_I16, curr ),
3409 NULL/*guard*/ );
sewardj95448072004-11-22 20:19:51 +00003410 toDo -= 2;
3411 }
3412 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
3413 }
3414
njn25e49d8e72002-09-23 09:36:25 +00003415}
3416
sewardj1c0ce7a2009-07-01 08:10:49 +00003417
sewardj826ec492005-05-12 18:05:00 +00003418/* We have an ABI hint telling us that [base .. base+len-1] is to
3419 become undefined ("writable"). Generate code to call a helper to
3420 notify the A/V bit machinery of this fact.
3421
3422 We call
sewardj7cf4e6b2008-05-01 20:24:26 +00003423 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
3424 Addr nia );
sewardj826ec492005-05-12 18:05:00 +00003425*/
3426static
sewardj7cf4e6b2008-05-01 20:24:26 +00003427void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
sewardj826ec492005-05-12 18:05:00 +00003428{
3429 IRDirty* di;
sewardj7cf4e6b2008-05-01 20:24:26 +00003430 /* Minor optimisation: if not doing origin tracking, ignore the
3431 supplied nia and pass zero instead. This is on the basis that
3432 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
3433 almost always generate a shorter instruction to put zero into a
3434 register than any other value. */
3435 if (MC_(clo_mc_level) < 3)
3436 nia = mkIRExpr_HWord(0);
3437
sewardj826ec492005-05-12 18:05:00 +00003438 di = unsafeIRDirty_0_N(
3439 0/*regparms*/,
3440 "MC_(helperc_MAKE_STACK_UNINIT)",
sewardj53ee1fc2005-12-23 02:29:58 +00003441 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
sewardj7cf4e6b2008-05-01 20:24:26 +00003442 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
sewardj826ec492005-05-12 18:05:00 +00003443 );
sewardj7cf4e6b2008-05-01 20:24:26 +00003444 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj826ec492005-05-12 18:05:00 +00003445}
3446
njn25e49d8e72002-09-23 09:36:25 +00003447
sewardj1c0ce7a2009-07-01 08:10:49 +00003448/* ------ Dealing with IRCAS (big and complex) ------ */
3449
3450/* FWDS */
3451static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
3452 IRAtom* baseaddr, Int offset );
3453static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
3454static void gen_store_b ( MCEnv* mce, Int szB,
3455 IRAtom* baseaddr, Int offset, IRAtom* dataB,
3456 IRAtom* guard );
3457
3458static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
3459static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
3460
3461
3462/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
3463 IRExpr.Consts, else this asserts. If they are both Consts, it
3464 doesn't do anything. So that just leaves the RdTmp case.
3465
3466 In which case: this assigns the shadow value SHADOW to the IR
3467 shadow temporary associated with ORIG. That is, ORIG, being an
3468 original temporary, will have a shadow temporary associated with
3469 it. However, in the case envisaged here, there will so far have
3470 been no IR emitted to actually write a shadow value into that
3471 temporary. What this routine does is to (emit IR to) copy the
3472 value in SHADOW into said temporary, so that after this call,
3473 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
3474 value in SHADOW.
3475
3476 Point is to allow callers to compute "by hand" a shadow value for
3477 ORIG, and force it to be associated with ORIG.
3478
3479 How do we know that that shadow associated with ORIG has not so far
3480 been assigned to? Well, we don't per se know that, but supposing
3481 it had. Then this routine would create a second assignment to it,
3482 and later the IR sanity checker would barf. But that never
3483 happens. QED.
3484*/
3485static void bind_shadow_tmp_to_orig ( UChar how,
3486 MCEnv* mce,
3487 IRAtom* orig, IRAtom* shadow )
3488{
3489 tl_assert(isOriginalAtom(mce, orig));
3490 tl_assert(isShadowAtom(mce, shadow));
3491 switch (orig->tag) {
3492 case Iex_Const:
3493 tl_assert(shadow->tag == Iex_Const);
3494 break;
3495 case Iex_RdTmp:
3496 tl_assert(shadow->tag == Iex_RdTmp);
3497 if (how == 'V') {
3498 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
3499 shadow);
3500 } else {
3501 tl_assert(how == 'B');
3502 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
3503 shadow);
3504 }
3505 break;
3506 default:
3507 tl_assert(0);
3508 }
3509}
3510
3511
3512static
3513void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
3514{
3515 /* Scheme is (both single- and double- cases):
3516
3517 1. fetch data#,dataB (the proposed new value)
3518
3519 2. fetch expd#,expdB (what we expect to see at the address)
3520
3521 3. check definedness of address
3522
3523 4. load old#,oldB from shadow memory; this also checks
3524 addressibility of the address
3525
3526 5. the CAS itself
3527
sewardjafed4c52009-07-12 13:00:17 +00003528 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
sewardj1c0ce7a2009-07-01 08:10:49 +00003529
sewardjafed4c52009-07-12 13:00:17 +00003530 7. if "expected == old" (as computed by (6))
sewardj1c0ce7a2009-07-01 08:10:49 +00003531 store data#,dataB to shadow memory
3532
3533 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
3534 'data' but 7 stores 'data#'. Hence it is possible for the
3535 shadow data to be incorrectly checked and/or updated:
3536
sewardj1c0ce7a2009-07-01 08:10:49 +00003537 * 7 is at least gated correctly, since the 'expected == old'
3538 condition is derived from outputs of 5. However, the shadow
3539 write could happen too late: imagine after 5 we are
3540 descheduled, a different thread runs, writes a different
3541 (shadow) value at the address, and then we resume, hence
3542 overwriting the shadow value written by the other thread.
3543
3544 Because the original memory access is atomic, there's no way to
3545 make both the original and shadow accesses into a single atomic
3546 thing, hence this is unavoidable.
3547
3548 At least as Valgrind stands, I don't think it's a problem, since
3549 we're single threaded *and* we guarantee that there are no
3550 context switches during the execution of any specific superblock
3551 -- context switches can only happen at superblock boundaries.
3552
3553 If Valgrind ever becomes MT in the future, then it might be more
3554 of a problem. A possible kludge would be to artificially
3555 associate with the location, a lock, which we must acquire and
3556 release around the transaction as a whole. Hmm, that probably
3557 would't work properly since it only guards us against other
3558 threads doing CASs on the same location, not against other
3559 threads doing normal reads and writes.
sewardjafed4c52009-07-12 13:00:17 +00003560
3561 ------------------------------------------------------------
3562
3563 COMMENT_ON_CasCmpEQ:
3564
3565 Note two things. Firstly, in the sequence above, we compute
3566 "expected == old", but we don't check definedness of it. Why
3567 not? Also, the x86 and amd64 front ends use
3568 Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent
3569 determination (expected == old ?) for themselves, and we also
3570 don't check definedness for those primops; we just say that the
3571 result is defined. Why? Details follow.
3572
3573 x86/amd64 contains various forms of locked insns:
3574 * lock prefix before all basic arithmetic insn;
3575 eg lock xorl %reg1,(%reg2)
3576 * atomic exchange reg-mem
3577 * compare-and-swaps
3578
3579 Rather than attempt to represent them all, which would be a
3580 royal PITA, I used a result from Maurice Herlihy
3581 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
3582 demonstrates that compare-and-swap is a primitive more general
3583 than the other two, and so can be used to represent all of them.
3584 So the translation scheme for (eg) lock incl (%reg) is as
3585 follows:
3586
3587 again:
3588 old = * %reg
3589 new = old + 1
3590 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
3591
3592 The "atomically" is the CAS bit. The scheme is always the same:
3593 get old value from memory, compute new value, atomically stuff
3594 new value back in memory iff the old value has not changed (iow,
3595 no other thread modified it in the meantime). If it has changed
3596 then we've been out-raced and we have to start over.
3597
3598 Now that's all very neat, but it has the bad side effect of
3599 introducing an explicit equality test into the translation.
3600 Consider the behaviour of said code on a memory location which
3601 is uninitialised. We will wind up doing a comparison on
3602 uninitialised data, and mc duly complains.
3603
3604 What's difficult about this is, the common case is that the
3605 location is uncontended, and so we're usually comparing the same
3606 value (* %reg) with itself. So we shouldn't complain even if it
3607 is undefined. But mc doesn't know that.
3608
3609 My solution is to mark the == in the IR specially, so as to tell
3610 mc that it almost certainly compares a value with itself, and we
3611 should just regard the result as always defined. Rather than
3612 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
3613 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
3614
3615 So there's always the question of, can this give a false
3616 negative? eg, imagine that initially, * %reg is defined; and we
3617 read that; but then in the gap between the read and the CAS, a
3618 different thread writes an undefined (and different) value at
3619 the location. Then the CAS in this thread will fail and we will
3620 go back to "again:", but without knowing that the trip back
3621 there was based on an undefined comparison. No matter; at least
3622 the other thread won the race and the location is correctly
3623 marked as undefined. What if it wrote an uninitialised version
3624 of the same value that was there originally, though?
3625
3626 etc etc. Seems like there's a small corner case in which we
3627 might lose the fact that something's defined -- we're out-raced
3628 in between the "old = * reg" and the "atomically {", _and_ the
3629 other thread is writing in an undefined version of what's
3630 already there. Well, that seems pretty unlikely.
3631
3632 ---
3633
3634 If we ever need to reinstate it .. code which generates a
3635 definedness test for "expected == old" was removed at r10432 of
3636 this file.
sewardj1c0ce7a2009-07-01 08:10:49 +00003637 */
3638 if (cas->oldHi == IRTemp_INVALID) {
3639 do_shadow_CAS_single( mce, cas );
3640 } else {
3641 do_shadow_CAS_double( mce, cas );
3642 }
3643}
3644
3645
3646static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
3647{
3648 IRAtom *vdataLo = NULL, *bdataLo = NULL;
3649 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
3650 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00003651 IRAtom *expd_eq_old = NULL;
3652 IROp opCasCmpEQ;
sewardj1c0ce7a2009-07-01 08:10:49 +00003653 Int elemSzB;
3654 IRType elemTy;
3655 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
3656
3657 /* single CAS */
3658 tl_assert(cas->oldHi == IRTemp_INVALID);
3659 tl_assert(cas->expdHi == NULL);
3660 tl_assert(cas->dataHi == NULL);
3661
3662 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
3663 switch (elemTy) {
sewardjafed4c52009-07-12 13:00:17 +00003664 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break;
3665 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
3666 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
3667 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
sewardj1c0ce7a2009-07-01 08:10:49 +00003668 default: tl_assert(0); /* IR defn disallows any other types */
3669 }
3670
3671 /* 1. fetch data# (the proposed new value) */
3672 tl_assert(isOriginalAtom(mce, cas->dataLo));
3673 vdataLo
3674 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
3675 tl_assert(isShadowAtom(mce, vdataLo));
3676 if (otrak) {
3677 bdataLo
3678 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
3679 tl_assert(isShadowAtom(mce, bdataLo));
3680 }
3681
3682 /* 2. fetch expected# (what we expect to see at the address) */
3683 tl_assert(isOriginalAtom(mce, cas->expdLo));
3684 vexpdLo
3685 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
3686 tl_assert(isShadowAtom(mce, vexpdLo));
3687 if (otrak) {
3688 bexpdLo
3689 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
3690 tl_assert(isShadowAtom(mce, bexpdLo));
3691 }
3692
3693 /* 3. check definedness of address */
3694 /* 4. fetch old# from shadow memory; this also checks
3695 addressibility of the address */
3696 voldLo
3697 = assignNew(
3698 'V', mce, elemTy,
3699 expr2vbits_Load(
3700 mce,
3701 cas->end, elemTy, cas->addr, 0/*Addr bias*/
3702 ));
sewardjafed4c52009-07-12 13:00:17 +00003703 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00003704 if (otrak) {
3705 boldLo
3706 = assignNew('B', mce, Ity_I32,
3707 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00003708 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00003709 }
3710
3711 /* 5. the CAS itself */
3712 stmt( 'C', mce, IRStmt_CAS(cas) );
3713
sewardjafed4c52009-07-12 13:00:17 +00003714 /* 6. compute "expected == old" */
3715 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00003716 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
3717 tree, but it's not copied from the input block. */
3718 expd_eq_old
3719 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00003720 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00003721
3722 /* 7. if "expected == old"
3723 store data# to shadow memory */
3724 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
3725 NULL/*data*/, vdataLo/*vdata*/,
3726 expd_eq_old/*guard for store*/ );
3727 if (otrak) {
3728 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
3729 bdataLo/*bdata*/,
3730 expd_eq_old/*guard for store*/ );
3731 }
3732}
3733
3734
3735static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
3736{
3737 IRAtom *vdataHi = NULL, *bdataHi = NULL;
3738 IRAtom *vdataLo = NULL, *bdataLo = NULL;
3739 IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
3740 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
3741 IRAtom *voldHi = NULL, *boldHi = NULL;
3742 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00003743 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
3744 IRAtom *expd_eq_old = NULL, *zero = NULL;
3745 IROp opCasCmpEQ, opOr, opXor;
sewardj1c0ce7a2009-07-01 08:10:49 +00003746 Int elemSzB, memOffsLo, memOffsHi;
3747 IRType elemTy;
3748 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
3749
3750 /* double CAS */
3751 tl_assert(cas->oldHi != IRTemp_INVALID);
3752 tl_assert(cas->expdHi != NULL);
3753 tl_assert(cas->dataHi != NULL);
3754
3755 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
3756 switch (elemTy) {
3757 case Ity_I8:
sewardjafed4c52009-07-12 13:00:17 +00003758 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
sewardj1c0ce7a2009-07-01 08:10:49 +00003759 elemSzB = 1; zero = mkU8(0);
3760 break;
3761 case Ity_I16:
sewardjafed4c52009-07-12 13:00:17 +00003762 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
sewardj1c0ce7a2009-07-01 08:10:49 +00003763 elemSzB = 2; zero = mkU16(0);
3764 break;
3765 case Ity_I32:
sewardjafed4c52009-07-12 13:00:17 +00003766 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
sewardj1c0ce7a2009-07-01 08:10:49 +00003767 elemSzB = 4; zero = mkU32(0);
3768 break;
3769 case Ity_I64:
sewardjafed4c52009-07-12 13:00:17 +00003770 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
sewardj1c0ce7a2009-07-01 08:10:49 +00003771 elemSzB = 8; zero = mkU64(0);
3772 break;
3773 default:
3774 tl_assert(0); /* IR defn disallows any other types */
3775 }
3776
3777 /* 1. fetch data# (the proposed new value) */
3778 tl_assert(isOriginalAtom(mce, cas->dataHi));
3779 tl_assert(isOriginalAtom(mce, cas->dataLo));
3780 vdataHi
3781 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
3782 vdataLo
3783 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
3784 tl_assert(isShadowAtom(mce, vdataHi));
3785 tl_assert(isShadowAtom(mce, vdataLo));
3786 if (otrak) {
3787 bdataHi
3788 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
3789 bdataLo
3790 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
3791 tl_assert(isShadowAtom(mce, bdataHi));
3792 tl_assert(isShadowAtom(mce, bdataLo));
3793 }
3794
3795 /* 2. fetch expected# (what we expect to see at the address) */
3796 tl_assert(isOriginalAtom(mce, cas->expdHi));
3797 tl_assert(isOriginalAtom(mce, cas->expdLo));
3798 vexpdHi
3799 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
3800 vexpdLo
3801 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
3802 tl_assert(isShadowAtom(mce, vexpdHi));
3803 tl_assert(isShadowAtom(mce, vexpdLo));
3804 if (otrak) {
3805 bexpdHi
3806 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
3807 bexpdLo
3808 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
3809 tl_assert(isShadowAtom(mce, bexpdHi));
3810 tl_assert(isShadowAtom(mce, bexpdLo));
3811 }
3812
3813 /* 3. check definedness of address */
3814 /* 4. fetch old# from shadow memory; this also checks
3815 addressibility of the address */
3816 if (cas->end == Iend_LE) {
3817 memOffsLo = 0;
3818 memOffsHi = elemSzB;
3819 } else {
3820 tl_assert(cas->end == Iend_BE);
3821 memOffsLo = elemSzB;
3822 memOffsHi = 0;
3823 }
3824 voldHi
3825 = assignNew(
3826 'V', mce, elemTy,
3827 expr2vbits_Load(
3828 mce,
3829 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/
3830 ));
3831 voldLo
3832 = assignNew(
3833 'V', mce, elemTy,
3834 expr2vbits_Load(
3835 mce,
3836 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/
3837 ));
sewardjafed4c52009-07-12 13:00:17 +00003838 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
3839 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00003840 if (otrak) {
3841 boldHi
3842 = assignNew('B', mce, Ity_I32,
3843 gen_load_b(mce, elemSzB, cas->addr,
3844 memOffsHi/*addr bias*/));
3845 boldLo
3846 = assignNew('B', mce, Ity_I32,
3847 gen_load_b(mce, elemSzB, cas->addr,
3848 memOffsLo/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00003849 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
3850 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00003851 }
3852
3853 /* 5. the CAS itself */
3854 stmt( 'C', mce, IRStmt_CAS(cas) );
3855
sewardjafed4c52009-07-12 13:00:17 +00003856 /* 6. compute "expected == old" */
3857 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00003858 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
3859 tree, but it's not copied from the input block. */
3860 /*
3861 xHi = oldHi ^ expdHi;
3862 xLo = oldLo ^ expdLo;
3863 xHL = xHi | xLo;
3864 expd_eq_old = xHL == 0;
3865 */
sewardj1c0ce7a2009-07-01 08:10:49 +00003866 xHi = assignNew('C', mce, elemTy,
3867 binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
sewardj1c0ce7a2009-07-01 08:10:49 +00003868 xLo = assignNew('C', mce, elemTy,
3869 binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00003870 xHL = assignNew('C', mce, elemTy,
3871 binop(opOr, xHi, xLo));
sewardj1c0ce7a2009-07-01 08:10:49 +00003872 expd_eq_old
3873 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00003874 binop(opCasCmpEQ, xHL, zero));
sewardj1c0ce7a2009-07-01 08:10:49 +00003875
3876 /* 7. if "expected == old"
3877 store data# to shadow memory */
3878 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
3879 NULL/*data*/, vdataHi/*vdata*/,
3880 expd_eq_old/*guard for store*/ );
3881 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
3882 NULL/*data*/, vdataLo/*vdata*/,
3883 expd_eq_old/*guard for store*/ );
3884 if (otrak) {
3885 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
3886 bdataHi/*bdata*/,
3887 expd_eq_old/*guard for store*/ );
3888 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
3889 bdataLo/*bdata*/,
3890 expd_eq_old/*guard for store*/ );
3891 }
3892}
3893
3894
sewardjdb5907d2009-11-26 17:20:21 +00003895/* ------ Dealing with LL/SC (not difficult) ------ */
3896
3897static void do_shadow_LLSC ( MCEnv* mce,
3898 IREndness stEnd,
3899 IRTemp stResult,
3900 IRExpr* stAddr,
3901 IRExpr* stStoredata )
3902{
3903 /* In short: treat a load-linked like a normal load followed by an
3904 assignment of the loaded (shadow) data to the result temporary.
3905 Treat a store-conditional like a normal store, and mark the
3906 result temporary as defined. */
3907 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult);
3908 IRTemp resTmp = findShadowTmpV(mce, stResult);
3909
3910 tl_assert(isIRAtom(stAddr));
3911 if (stStoredata)
3912 tl_assert(isIRAtom(stStoredata));
3913
3914 if (stStoredata == NULL) {
3915 /* Load Linked */
3916 /* Just treat this as a normal load, followed by an assignment of
3917 the value to .result. */
3918 /* Stay sane */
3919 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
3920 || resTy == Ity_I16 || resTy == Ity_I8);
3921 assign( 'V', mce, resTmp,
3922 expr2vbits_Load(
3923 mce, stEnd, resTy, stAddr, 0/*addr bias*/));
3924 } else {
3925 /* Store Conditional */
3926 /* Stay sane */
3927 IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
3928 stStoredata);
3929 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
3930 || dataTy == Ity_I16 || dataTy == Ity_I8);
3931 do_shadow_Store( mce, stEnd,
3932 stAddr, 0/* addr bias */,
3933 stStoredata,
3934 NULL /* shadow data */,
3935 NULL/*guard*/ );
3936 /* This is a store conditional, so it writes to .result a value
3937 indicating whether or not the store succeeded. Just claim
3938 this value is always defined. In the PowerPC interpretation
3939 of store-conditional, definedness of the success indication
3940 depends on whether the address of the store matches the
3941 reservation address. But we can't tell that here (and
3942 anyway, we're not being PowerPC-specific). At least we are
3943 guaranteed that the definedness of the store address, and its
3944 addressibility, will be checked as per normal. So it seems
3945 pretty safe to just say that the success indication is always
3946 defined.
3947
3948 In schemeS, for origin tracking, we must correspondingly set
3949 a no-origin value for the origin shadow of .result.
3950 */
3951 tl_assert(resTy == Ity_I1);
3952 assign( 'V', mce, resTmp, definedOfType(resTy) );
3953 }
3954}
3955
3956
sewardj95448072004-11-22 20:19:51 +00003957/*------------------------------------------------------------*/
3958/*--- Memcheck main ---*/
3959/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00003960
sewardj7cf4e6b2008-05-01 20:24:26 +00003961static void schemeS ( MCEnv* mce, IRStmt* st );
3962
sewardj95448072004-11-22 20:19:51 +00003963static Bool isBogusAtom ( IRAtom* at )
njn25e49d8e72002-09-23 09:36:25 +00003964{
sewardj95448072004-11-22 20:19:51 +00003965 ULong n = 0;
3966 IRConst* con;
sewardj710d6c22005-03-20 18:55:15 +00003967 tl_assert(isIRAtom(at));
sewardj0b9d74a2006-12-24 02:24:11 +00003968 if (at->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +00003969 return False;
3970 tl_assert(at->tag == Iex_Const);
3971 con = at->Iex.Const.con;
3972 switch (con->tag) {
sewardjd5204dc2004-12-31 01:16:11 +00003973 case Ico_U1: return False;
3974 case Ico_U8: n = (ULong)con->Ico.U8; break;
3975 case Ico_U16: n = (ULong)con->Ico.U16; break;
3976 case Ico_U32: n = (ULong)con->Ico.U32; break;
3977 case Ico_U64: n = (ULong)con->Ico.U64; break;
3978 case Ico_F64: return False;
3979 case Ico_F64i: return False;
3980 case Ico_V128: return False;
sewardj95448072004-11-22 20:19:51 +00003981 default: ppIRExpr(at); tl_assert(0);
3982 }
3983 /* VG_(printf)("%llx\n", n); */
sewardj96a922e2005-04-23 23:26:29 +00003984 return (/*32*/ n == 0xFEFEFEFFULL
3985 /*32*/ || n == 0x80808080ULL
sewardj17b47432008-12-17 01:12:58 +00003986 /*32*/ || n == 0x7F7F7F7FULL
tomd9774d72005-06-27 08:11:01 +00003987 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
sewardj96a922e2005-04-23 23:26:29 +00003988 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
tomd9774d72005-06-27 08:11:01 +00003989 /*64*/ || n == 0x0000000000008080ULL
sewardj96a922e2005-04-23 23:26:29 +00003990 /*64*/ || n == 0x8080808080808080ULL
sewardj17b47432008-12-17 01:12:58 +00003991 /*64*/ || n == 0x0101010101010101ULL
sewardj96a922e2005-04-23 23:26:29 +00003992 );
sewardj95448072004-11-22 20:19:51 +00003993}
njn25e49d8e72002-09-23 09:36:25 +00003994
sewardj95448072004-11-22 20:19:51 +00003995static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
3996{
sewardjd5204dc2004-12-31 01:16:11 +00003997 Int i;
3998 IRExpr* e;
3999 IRDirty* d;
sewardj1c0ce7a2009-07-01 08:10:49 +00004000 IRCAS* cas;
sewardj95448072004-11-22 20:19:51 +00004001 switch (st->tag) {
sewardj0b9d74a2006-12-24 02:24:11 +00004002 case Ist_WrTmp:
4003 e = st->Ist.WrTmp.data;
sewardj95448072004-11-22 20:19:51 +00004004 switch (e->tag) {
4005 case Iex_Get:
sewardj0b9d74a2006-12-24 02:24:11 +00004006 case Iex_RdTmp:
sewardj95448072004-11-22 20:19:51 +00004007 return False;
sewardjd5204dc2004-12-31 01:16:11 +00004008 case Iex_Const:
4009 return isBogusAtom(e);
sewardj95448072004-11-22 20:19:51 +00004010 case Iex_Unop:
4011 return isBogusAtom(e->Iex.Unop.arg);
sewardjd5204dc2004-12-31 01:16:11 +00004012 case Iex_GetI:
4013 return isBogusAtom(e->Iex.GetI.ix);
sewardj95448072004-11-22 20:19:51 +00004014 case Iex_Binop:
4015 return isBogusAtom(e->Iex.Binop.arg1)
4016 || isBogusAtom(e->Iex.Binop.arg2);
sewardjed69fdb2006-02-03 16:12:27 +00004017 case Iex_Triop:
4018 return isBogusAtom(e->Iex.Triop.arg1)
4019 || isBogusAtom(e->Iex.Triop.arg2)
4020 || isBogusAtom(e->Iex.Triop.arg3);
sewardje91cea72006-02-08 19:32:02 +00004021 case Iex_Qop:
4022 return isBogusAtom(e->Iex.Qop.arg1)
4023 || isBogusAtom(e->Iex.Qop.arg2)
4024 || isBogusAtom(e->Iex.Qop.arg3)
4025 || isBogusAtom(e->Iex.Qop.arg4);
sewardj95448072004-11-22 20:19:51 +00004026 case Iex_Mux0X:
4027 return isBogusAtom(e->Iex.Mux0X.cond)
4028 || isBogusAtom(e->Iex.Mux0X.expr0)
4029 || isBogusAtom(e->Iex.Mux0X.exprX);
sewardj2e595852005-06-30 23:33:37 +00004030 case Iex_Load:
4031 return isBogusAtom(e->Iex.Load.addr);
sewardj95448072004-11-22 20:19:51 +00004032 case Iex_CCall:
4033 for (i = 0; e->Iex.CCall.args[i]; i++)
4034 if (isBogusAtom(e->Iex.CCall.args[i]))
4035 return True;
4036 return False;
4037 default:
4038 goto unhandled;
4039 }
sewardjd5204dc2004-12-31 01:16:11 +00004040 case Ist_Dirty:
4041 d = st->Ist.Dirty.details;
4042 for (i = 0; d->args[i]; i++)
4043 if (isBogusAtom(d->args[i]))
4044 return True;
4045 if (d->guard && isBogusAtom(d->guard))
4046 return True;
4047 if (d->mAddr && isBogusAtom(d->mAddr))
4048 return True;
4049 return False;
sewardj95448072004-11-22 20:19:51 +00004050 case Ist_Put:
4051 return isBogusAtom(st->Ist.Put.data);
sewardjd5204dc2004-12-31 01:16:11 +00004052 case Ist_PutI:
4053 return isBogusAtom(st->Ist.PutI.ix)
4054 || isBogusAtom(st->Ist.PutI.data);
sewardj2e595852005-06-30 23:33:37 +00004055 case Ist_Store:
4056 return isBogusAtom(st->Ist.Store.addr)
4057 || isBogusAtom(st->Ist.Store.data);
sewardj95448072004-11-22 20:19:51 +00004058 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00004059 return isBogusAtom(st->Ist.Exit.guard);
sewardj826ec492005-05-12 18:05:00 +00004060 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00004061 return isBogusAtom(st->Ist.AbiHint.base)
4062 || isBogusAtom(st->Ist.AbiHint.nia);
sewardj21dc3452005-03-21 00:27:41 +00004063 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00004064 case Ist_IMark:
sewardj72d75132007-11-09 23:06:35 +00004065 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00004066 return False;
sewardj1c0ce7a2009-07-01 08:10:49 +00004067 case Ist_CAS:
4068 cas = st->Ist.CAS.details;
4069 return isBogusAtom(cas->addr)
4070 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
4071 || isBogusAtom(cas->expdLo)
4072 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
4073 || isBogusAtom(cas->dataLo);
sewardjdb5907d2009-11-26 17:20:21 +00004074 case Ist_LLSC:
4075 return isBogusAtom(st->Ist.LLSC.addr)
4076 || (st->Ist.LLSC.storedata
4077 ? isBogusAtom(st->Ist.LLSC.storedata)
4078 : False);
sewardj95448072004-11-22 20:19:51 +00004079 default:
4080 unhandled:
4081 ppIRStmt(st);
4082 VG_(tool_panic)("hasBogusLiterals");
4083 }
4084}
njn25e49d8e72002-09-23 09:36:25 +00004085
njn25e49d8e72002-09-23 09:36:25 +00004086
sewardj0b9d74a2006-12-24 02:24:11 +00004087IRSB* MC_(instrument) ( VgCallbackClosure* closure,
sewardj1c0ce7a2009-07-01 08:10:49 +00004088 IRSB* sb_in,
sewardj461df9c2006-01-17 02:06:39 +00004089 VexGuestLayout* layout,
4090 VexGuestExtents* vge,
sewardjd54babf2005-03-21 00:55:49 +00004091 IRType gWordTy, IRType hWordTy )
sewardj95448072004-11-22 20:19:51 +00004092{
sewardj7cf4e6b2008-05-01 20:24:26 +00004093 Bool verboze = 0||False;
sewardj151b90d2005-07-06 19:42:23 +00004094 Bool bogus;
sewardjd5204dc2004-12-31 01:16:11 +00004095 Int i, j, first_stmt;
sewardj95448072004-11-22 20:19:51 +00004096 IRStmt* st;
sewardjd5204dc2004-12-31 01:16:11 +00004097 MCEnv mce;
sewardj1c0ce7a2009-07-01 08:10:49 +00004098 IRSB* sb_out;
sewardjd54babf2005-03-21 00:55:49 +00004099
4100 if (gWordTy != hWordTy) {
4101 /* We don't currently support this case. */
4102 VG_(tool_panic)("host/guest word size mismatch");
4103 }
njn25e49d8e72002-09-23 09:36:25 +00004104
sewardj6cf40ff2005-04-20 22:31:26 +00004105 /* Check we're not completely nuts */
sewardj7cf4e6b2008-05-01 20:24:26 +00004106 tl_assert(sizeof(UWord) == sizeof(void*));
4107 tl_assert(sizeof(Word) == sizeof(void*));
4108 tl_assert(sizeof(Addr) == sizeof(void*));
4109 tl_assert(sizeof(ULong) == 8);
4110 tl_assert(sizeof(Long) == 8);
4111 tl_assert(sizeof(Addr64) == 8);
4112 tl_assert(sizeof(UInt) == 4);
4113 tl_assert(sizeof(Int) == 4);
4114
4115 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
sewardj6cf40ff2005-04-20 22:31:26 +00004116
sewardj0b9d74a2006-12-24 02:24:11 +00004117 /* Set up SB */
sewardj1c0ce7a2009-07-01 08:10:49 +00004118 sb_out = deepCopyIRSBExceptStmts(sb_in);
njn25e49d8e72002-09-23 09:36:25 +00004119
sewardj1c0ce7a2009-07-01 08:10:49 +00004120 /* Set up the running environment. Both .sb and .tmpMap are
4121 modified as we go along. Note that tmps are added to both
4122 .sb->tyenv and .tmpMap together, so the valid index-set for
4123 those two arrays should always be identical. */
4124 VG_(memset)(&mce, 0, sizeof(mce));
4125 mce.sb = sb_out;
sewardj7cf4e6b2008-05-01 20:24:26 +00004126 mce.trace = verboze;
sewardj95448072004-11-22 20:19:51 +00004127 mce.layout = layout;
sewardj95448072004-11-22 20:19:51 +00004128 mce.hWordTy = hWordTy;
sewardjd5204dc2004-12-31 01:16:11 +00004129 mce.bogusLiterals = False;
sewardj1c0ce7a2009-07-01 08:10:49 +00004130
4131 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
4132 sizeof(TempMapEnt));
4133 for (i = 0; i < sb_in->tyenv->types_used; i++) {
4134 TempMapEnt ent;
4135 ent.kind = Orig;
4136 ent.shadowV = IRTemp_INVALID;
4137 ent.shadowB = IRTemp_INVALID;
4138 VG_(addToXA)( mce.tmpMap, &ent );
sewardj7cf4e6b2008-05-01 20:24:26 +00004139 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004140 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
sewardj95448072004-11-22 20:19:51 +00004141
sewardj151b90d2005-07-06 19:42:23 +00004142 /* Make a preliminary inspection of the statements, to see if there
4143 are any dodgy-looking literals. If there are, we generate
4144 extra-detailed (hence extra-expensive) instrumentation in
4145 places. Scan the whole bb even if dodgyness is found earlier,
4146 so that the flatness assertion is applied to all stmts. */
4147
4148 bogus = False;
sewardj95448072004-11-22 20:19:51 +00004149
sewardj1c0ce7a2009-07-01 08:10:49 +00004150 for (i = 0; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00004151
sewardj1c0ce7a2009-07-01 08:10:49 +00004152 st = sb_in->stmts[i];
sewardj21dc3452005-03-21 00:27:41 +00004153 tl_assert(st);
sewardj95448072004-11-22 20:19:51 +00004154 tl_assert(isFlatIRStmt(st));
4155
sewardj151b90d2005-07-06 19:42:23 +00004156 if (!bogus) {
4157 bogus = checkForBogusLiterals(st);
4158 if (0 && bogus) {
sewardj95448072004-11-22 20:19:51 +00004159 VG_(printf)("bogus: ");
4160 ppIRStmt(st);
4161 VG_(printf)("\n");
4162 }
4163 }
sewardjd5204dc2004-12-31 01:16:11 +00004164
sewardj151b90d2005-07-06 19:42:23 +00004165 }
4166
4167 mce.bogusLiterals = bogus;
4168
sewardja0871482006-10-18 12:41:55 +00004169 /* Copy verbatim any IR preamble preceding the first IMark */
sewardj151b90d2005-07-06 19:42:23 +00004170
sewardj1c0ce7a2009-07-01 08:10:49 +00004171 tl_assert(mce.sb == sb_out);
4172 tl_assert(mce.sb != sb_in);
sewardjf1962d32006-10-19 13:22:16 +00004173
sewardja0871482006-10-18 12:41:55 +00004174 i = 0;
sewardj1c0ce7a2009-07-01 08:10:49 +00004175 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
sewardja0871482006-10-18 12:41:55 +00004176
sewardj1c0ce7a2009-07-01 08:10:49 +00004177 st = sb_in->stmts[i];
sewardja0871482006-10-18 12:41:55 +00004178 tl_assert(st);
4179 tl_assert(isFlatIRStmt(st));
4180
sewardj1c0ce7a2009-07-01 08:10:49 +00004181 stmt( 'C', &mce, sb_in->stmts[i] );
sewardja0871482006-10-18 12:41:55 +00004182 i++;
4183 }
4184
sewardjf1962d32006-10-19 13:22:16 +00004185 /* Nasty problem. IR optimisation of the pre-instrumented IR may
4186 cause the IR following the preamble to contain references to IR
4187 temporaries defined in the preamble. Because the preamble isn't
4188 instrumented, these temporaries don't have any shadows.
4189 Nevertheless uses of them following the preamble will cause
4190 memcheck to generate references to their shadows. End effect is
4191 to cause IR sanity check failures, due to references to
4192 non-existent shadows. This is only evident for the complex
4193 preambles used for function wrapping on TOC-afflicted platforms
4194 (ppc64-linux, ppc32-aix5, ppc64-aix5).
4195
4196 The following loop therefore scans the preamble looking for
4197 assignments to temporaries. For each one found it creates an
sewardjafa617b2008-07-22 09:59:48 +00004198 assignment to the corresponding (V) shadow temp, marking it as
sewardjf1962d32006-10-19 13:22:16 +00004199 'defined'. This is the same resulting IR as if the main
4200 instrumentation loop before had been applied to the statement
4201 'tmp = CONSTANT'.
sewardjafa617b2008-07-22 09:59:48 +00004202
4203 Similarly, if origin tracking is enabled, we must generate an
4204 assignment for the corresponding origin (B) shadow, claiming
4205 no-origin, as appropriate for a defined value.
sewardjf1962d32006-10-19 13:22:16 +00004206 */
4207 for (j = 0; j < i; j++) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004208 if (sb_in->stmts[j]->tag == Ist_WrTmp) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004209 /* findShadowTmpV checks its arg is an original tmp;
sewardjf1962d32006-10-19 13:22:16 +00004210 no need to assert that here. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004211 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
sewardjafa617b2008-07-22 09:59:48 +00004212 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00004213 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
sewardjafa617b2008-07-22 09:59:48 +00004214 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
4215 if (MC_(clo_mc_level) == 3) {
4216 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00004217 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
sewardjafa617b2008-07-22 09:59:48 +00004218 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
4219 }
sewardjf1962d32006-10-19 13:22:16 +00004220 if (0) {
sewardjafa617b2008-07-22 09:59:48 +00004221 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
4222 ppIRType( ty_v );
sewardjf1962d32006-10-19 13:22:16 +00004223 VG_(printf)("\n");
4224 }
4225 }
4226 }
4227
sewardja0871482006-10-18 12:41:55 +00004228 /* Iterate over the remaining stmts to generate instrumentation. */
4229
sewardj1c0ce7a2009-07-01 08:10:49 +00004230 tl_assert(sb_in->stmts_used > 0);
sewardja0871482006-10-18 12:41:55 +00004231 tl_assert(i >= 0);
sewardj1c0ce7a2009-07-01 08:10:49 +00004232 tl_assert(i < sb_in->stmts_used);
4233 tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
sewardja0871482006-10-18 12:41:55 +00004234
sewardj1c0ce7a2009-07-01 08:10:49 +00004235 for (/* use current i*/; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00004236
sewardj1c0ce7a2009-07-01 08:10:49 +00004237 st = sb_in->stmts[i];
4238 first_stmt = sb_out->stmts_used;
sewardj95448072004-11-22 20:19:51 +00004239
4240 if (verboze) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004241 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00004242 ppIRStmt(st);
sewardj7cf4e6b2008-05-01 20:24:26 +00004243 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00004244 }
4245
sewardj1c0ce7a2009-07-01 08:10:49 +00004246 if (MC_(clo_mc_level) == 3) {
4247 /* See comments on case Ist_CAS below. */
4248 if (st->tag != Ist_CAS)
4249 schemeS( &mce, st );
4250 }
sewardj7cf4e6b2008-05-01 20:24:26 +00004251
sewardj29faa502005-03-16 18:20:21 +00004252 /* Generate instrumentation code for each stmt ... */
4253
sewardj95448072004-11-22 20:19:51 +00004254 switch (st->tag) {
4255
sewardj0b9d74a2006-12-24 02:24:11 +00004256 case Ist_WrTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00004257 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
4258 expr2vbits( &mce, st->Ist.WrTmp.data) );
njn25e49d8e72002-09-23 09:36:25 +00004259 break;
4260
sewardj95448072004-11-22 20:19:51 +00004261 case Ist_Put:
4262 do_shadow_PUT( &mce,
4263 st->Ist.Put.offset,
4264 st->Ist.Put.data,
4265 NULL /* shadow atom */ );
njn25e49d8e72002-09-23 09:36:25 +00004266 break;
4267
sewardj95448072004-11-22 20:19:51 +00004268 case Ist_PutI:
4269 do_shadow_PUTI( &mce,
4270 st->Ist.PutI.descr,
4271 st->Ist.PutI.ix,
4272 st->Ist.PutI.bias,
4273 st->Ist.PutI.data );
njn25e49d8e72002-09-23 09:36:25 +00004274 break;
4275
sewardj2e595852005-06-30 23:33:37 +00004276 case Ist_Store:
4277 do_shadow_Store( &mce, st->Ist.Store.end,
4278 st->Ist.Store.addr, 0/* addr bias */,
4279 st->Ist.Store.data,
sewardj1c0ce7a2009-07-01 08:10:49 +00004280 NULL /* shadow data */,
4281 NULL/*guard*/ );
njn25e49d8e72002-09-23 09:36:25 +00004282 break;
4283
sewardj95448072004-11-22 20:19:51 +00004284 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00004285 complainIfUndefined( &mce, st->Ist.Exit.guard );
njn25e49d8e72002-09-23 09:36:25 +00004286 break;
4287
sewardj29faa502005-03-16 18:20:21 +00004288 case Ist_IMark:
sewardj7cf4e6b2008-05-01 20:24:26 +00004289 break;
4290
4291 case Ist_NoOp:
sewardj72d75132007-11-09 23:06:35 +00004292 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00004293 break;
4294
sewardj95448072004-11-22 20:19:51 +00004295 case Ist_Dirty:
4296 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
njn25e49d8e72002-09-23 09:36:25 +00004297 break;
4298
sewardj826ec492005-05-12 18:05:00 +00004299 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00004300 do_AbiHint( &mce, st->Ist.AbiHint.base,
4301 st->Ist.AbiHint.len,
4302 st->Ist.AbiHint.nia );
sewardj826ec492005-05-12 18:05:00 +00004303 break;
4304
sewardj1c0ce7a2009-07-01 08:10:49 +00004305 case Ist_CAS:
4306 do_shadow_CAS( &mce, st->Ist.CAS.details );
4307 /* Note, do_shadow_CAS copies the CAS itself to the output
4308 block, because it needs to add instrumentation both
4309 before and after it. Hence skip the copy below. Also
4310 skip the origin-tracking stuff (call to schemeS) above,
4311 since that's all tangled up with it too; do_shadow_CAS
4312 does it all. */
4313 break;
4314
sewardjdb5907d2009-11-26 17:20:21 +00004315 case Ist_LLSC:
4316 do_shadow_LLSC( &mce,
4317 st->Ist.LLSC.end,
4318 st->Ist.LLSC.result,
4319 st->Ist.LLSC.addr,
4320 st->Ist.LLSC.storedata );
4321 break;
4322
njn25e49d8e72002-09-23 09:36:25 +00004323 default:
sewardj95448072004-11-22 20:19:51 +00004324 VG_(printf)("\n");
4325 ppIRStmt(st);
4326 VG_(printf)("\n");
4327 VG_(tool_panic)("memcheck: unhandled IRStmt");
4328
4329 } /* switch (st->tag) */
4330
sewardj7cf4e6b2008-05-01 20:24:26 +00004331 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004332 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00004333 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00004334 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00004335 VG_(printf)("\n");
4336 }
4337 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00004338 }
sewardj95448072004-11-22 20:19:51 +00004339
sewardj1c0ce7a2009-07-01 08:10:49 +00004340 /* ... and finally copy the stmt itself to the output. Except,
4341 skip the copy of IRCASs; see comments on case Ist_CAS
4342 above. */
4343 if (st->tag != Ist_CAS)
4344 stmt('C', &mce, st);
njn25e49d8e72002-09-23 09:36:25 +00004345 }
njn25e49d8e72002-09-23 09:36:25 +00004346
sewardj95448072004-11-22 20:19:51 +00004347 /* Now we need to complain if the jump target is undefined. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004348 first_stmt = sb_out->stmts_used;
njn25e49d8e72002-09-23 09:36:25 +00004349
sewardj95448072004-11-22 20:19:51 +00004350 if (verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004351 VG_(printf)("sb_in->next = ");
4352 ppIRExpr(sb_in->next);
sewardj95448072004-11-22 20:19:51 +00004353 VG_(printf)("\n\n");
4354 }
njn25e49d8e72002-09-23 09:36:25 +00004355
sewardj1c0ce7a2009-07-01 08:10:49 +00004356 complainIfUndefined( &mce, sb_in->next );
njn25e49d8e72002-09-23 09:36:25 +00004357
sewardj7cf4e6b2008-05-01 20:24:26 +00004358 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004359 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00004360 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00004361 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00004362 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00004363 }
sewardj95448072004-11-22 20:19:51 +00004364 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00004365 }
njn25e49d8e72002-09-23 09:36:25 +00004366
sewardj1c0ce7a2009-07-01 08:10:49 +00004367 /* If this fails, there's been some serious snafu with tmp management,
4368 that should be investigated. */
4369 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
4370 VG_(deleteXA)( mce.tmpMap );
4371
4372 tl_assert(mce.sb == sb_out);
4373 return sb_out;
sewardj95448072004-11-22 20:19:51 +00004374}
njn25e49d8e72002-09-23 09:36:25 +00004375
sewardj81651dc2007-08-28 06:05:20 +00004376/*------------------------------------------------------------*/
4377/*--- Post-tree-build final tidying ---*/
4378/*------------------------------------------------------------*/
4379
4380/* This exploits the observation that Memcheck often produces
4381 repeated conditional calls of the form
4382
sewardj7cf4e6b2008-05-01 20:24:26 +00004383 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
sewardj81651dc2007-08-28 06:05:20 +00004384
4385 with the same guard expression G guarding the same helper call.
4386 The second and subsequent calls are redundant. This usually
4387 results from instrumentation of guest code containing multiple
4388 memory references at different constant offsets from the same base
4389 register. After optimisation of the instrumentation, you get a
4390 test for the definedness of the base register for each memory
4391 reference, which is kinda pointless. MC_(final_tidy) therefore
4392 looks for such repeated calls and removes all but the first. */
4393
4394/* A struct for recording which (helper, guard) pairs we have already
4395 seen. */
4396typedef
4397 struct { void* entry; IRExpr* guard; }
4398 Pair;
4399
4400/* Return True if e1 and e2 definitely denote the same value (used to
4401 compare guards). Return False if unknown; False is the safe
4402 answer. Since guest registers and guest memory do not have the
4403 SSA property we must return False if any Gets or Loads appear in
4404 the expression. */
4405
4406static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
4407{
4408 if (e1->tag != e2->tag)
4409 return False;
4410 switch (e1->tag) {
4411 case Iex_Const:
4412 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
4413 case Iex_Binop:
4414 return e1->Iex.Binop.op == e2->Iex.Binop.op
4415 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
4416 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
4417 case Iex_Unop:
4418 return e1->Iex.Unop.op == e2->Iex.Unop.op
4419 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
4420 case Iex_RdTmp:
4421 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
4422 case Iex_Mux0X:
4423 return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
4424 && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
4425 && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX );
4426 case Iex_Qop:
4427 case Iex_Triop:
4428 case Iex_CCall:
4429 /* be lazy. Could define equality for these, but they never
4430 appear to be used. */
4431 return False;
4432 case Iex_Get:
4433 case Iex_GetI:
4434 case Iex_Load:
4435 /* be conservative - these may not give the same value each
4436 time */
4437 return False;
4438 case Iex_Binder:
4439 /* should never see this */
4440 /* fallthrough */
4441 default:
4442 VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
4443 ppIRExpr(e1);
4444 VG_(tool_panic)("memcheck:sameIRValue");
4445 return False;
4446 }
4447}
4448
4449/* See if 'pairs' already has an entry for (entry, guard). Return
4450 True if so. If not, add an entry. */
4451
4452static
4453Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
4454{
4455 Pair p;
4456 Pair* pp;
4457 Int i, n = VG_(sizeXA)( pairs );
4458 for (i = 0; i < n; i++) {
4459 pp = VG_(indexXA)( pairs, i );
4460 if (pp->entry == entry && sameIRValue(pp->guard, guard))
4461 return True;
4462 }
4463 p.guard = guard;
4464 p.entry = entry;
4465 VG_(addToXA)( pairs, &p );
4466 return False;
4467}
4468
4469static Bool is_helperc_value_checkN_fail ( HChar* name )
4470{
4471 return
sewardj7cf4e6b2008-05-01 20:24:26 +00004472 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
4473 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
4474 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
4475 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
4476 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
4477 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
4478 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
4479 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
sewardj81651dc2007-08-28 06:05:20 +00004480}
4481
4482IRSB* MC_(final_tidy) ( IRSB* sb_in )
4483{
4484 Int i;
4485 IRStmt* st;
4486 IRDirty* di;
4487 IRExpr* guard;
4488 IRCallee* cee;
4489 Bool alreadyPresent;
sewardj9c606bd2008-09-18 18:12:50 +00004490 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
4491 VG_(free), sizeof(Pair) );
sewardj81651dc2007-08-28 06:05:20 +00004492 /* Scan forwards through the statements. Each time a call to one
4493 of the relevant helpers is seen, check if we have made a
4494 previous call to the same helper using the same guard
4495 expression, and if so, delete the call. */
4496 for (i = 0; i < sb_in->stmts_used; i++) {
4497 st = sb_in->stmts[i];
4498 tl_assert(st);
4499 if (st->tag != Ist_Dirty)
4500 continue;
4501 di = st->Ist.Dirty.details;
4502 guard = di->guard;
4503 if (!guard)
4504 continue;
4505 if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
4506 cee = di->cee;
4507 if (!is_helperc_value_checkN_fail( cee->name ))
4508 continue;
4509 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
4510 guard 'guard'. Check if we have already seen a call to this
4511 function with the same guard. If so, delete it. If not,
4512 add it to the set of calls we do know about. */
4513 alreadyPresent = check_or_add( pairs, guard, cee->addr );
4514 if (alreadyPresent) {
4515 sb_in->stmts[i] = IRStmt_NoOp();
4516 if (0) VG_(printf)("XX\n");
4517 }
4518 }
4519 VG_(deleteXA)( pairs );
4520 return sb_in;
4521}
4522
4523
sewardj7cf4e6b2008-05-01 20:24:26 +00004524/*------------------------------------------------------------*/
4525/*--- Origin tracking stuff ---*/
4526/*------------------------------------------------------------*/
4527
sewardj1c0ce7a2009-07-01 08:10:49 +00004528/* Almost identical to findShadowTmpV. */
sewardj7cf4e6b2008-05-01 20:24:26 +00004529static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
4530{
sewardj1c0ce7a2009-07-01 08:10:49 +00004531 TempMapEnt* ent;
4532 /* VG_(indexXA) range-checks 'orig', hence no need to check
4533 here. */
4534 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
4535 tl_assert(ent->kind == Orig);
4536 if (ent->shadowB == IRTemp_INVALID) {
4537 IRTemp tmpB
4538 = newTemp( mce, Ity_I32, BSh );
4539 /* newTemp may cause mce->tmpMap to resize, hence previous results
4540 from VG_(indexXA) are invalid. */
4541 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
4542 tl_assert(ent->kind == Orig);
4543 tl_assert(ent->shadowB == IRTemp_INVALID);
4544 ent->shadowB = tmpB;
sewardj7cf4e6b2008-05-01 20:24:26 +00004545 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004546 return ent->shadowB;
sewardj7cf4e6b2008-05-01 20:24:26 +00004547}
4548
4549static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
4550{
4551 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
4552}
4553
4554static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
4555 IRAtom* baseaddr, Int offset )
4556{
4557 void* hFun;
4558 HChar* hName;
4559 IRTemp bTmp;
4560 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00004561 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00004562 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
4563 IRAtom* ea = baseaddr;
4564 if (offset != 0) {
4565 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
4566 : mkU64( (Long)(Int)offset );
4567 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
4568 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004569 bTmp = newTemp(mce, mce->hWordTy, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00004570
4571 switch (szB) {
4572 case 1: hFun = (void*)&MC_(helperc_b_load1);
4573 hName = "MC_(helperc_b_load1)";
4574 break;
4575 case 2: hFun = (void*)&MC_(helperc_b_load2);
4576 hName = "MC_(helperc_b_load2)";
4577 break;
4578 case 4: hFun = (void*)&MC_(helperc_b_load4);
4579 hName = "MC_(helperc_b_load4)";
4580 break;
4581 case 8: hFun = (void*)&MC_(helperc_b_load8);
4582 hName = "MC_(helperc_b_load8)";
4583 break;
4584 case 16: hFun = (void*)&MC_(helperc_b_load16);
4585 hName = "MC_(helperc_b_load16)";
4586 break;
4587 default:
4588 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
4589 tl_assert(0);
4590 }
4591 di = unsafeIRDirty_1_N(
4592 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
4593 mkIRExprVec_1( ea )
4594 );
4595 /* no need to mess with any annotations. This call accesses
4596 neither guest state nor guest memory. */
4597 stmt( 'B', mce, IRStmt_Dirty(di) );
4598 if (mce->hWordTy == Ity_I64) {
4599 /* 64-bit host */
sewardj1c0ce7a2009-07-01 08:10:49 +00004600 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00004601 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
4602 return mkexpr(bTmp32);
4603 } else {
4604 /* 32-bit host */
4605 return mkexpr(bTmp);
4606 }
4607}
sewardj1c0ce7a2009-07-01 08:10:49 +00004608
4609/* Generate a shadow store. guard :: Ity_I1 controls whether the
4610 store really happens; NULL means it unconditionally does. */
sewardj7cf4e6b2008-05-01 20:24:26 +00004611static void gen_store_b ( MCEnv* mce, Int szB,
sewardj1c0ce7a2009-07-01 08:10:49 +00004612 IRAtom* baseaddr, Int offset, IRAtom* dataB,
4613 IRAtom* guard )
sewardj7cf4e6b2008-05-01 20:24:26 +00004614{
4615 void* hFun;
4616 HChar* hName;
4617 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00004618 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00004619 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
4620 IRAtom* ea = baseaddr;
sewardj1c0ce7a2009-07-01 08:10:49 +00004621 if (guard) {
4622 tl_assert(isOriginalAtom(mce, guard));
4623 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
4624 }
sewardj7cf4e6b2008-05-01 20:24:26 +00004625 if (offset != 0) {
4626 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
4627 : mkU64( (Long)(Int)offset );
4628 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
4629 }
4630 if (mce->hWordTy == Ity_I64)
4631 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
4632
4633 switch (szB) {
4634 case 1: hFun = (void*)&MC_(helperc_b_store1);
4635 hName = "MC_(helperc_b_store1)";
4636 break;
4637 case 2: hFun = (void*)&MC_(helperc_b_store2);
4638 hName = "MC_(helperc_b_store2)";
4639 break;
4640 case 4: hFun = (void*)&MC_(helperc_b_store4);
4641 hName = "MC_(helperc_b_store4)";
4642 break;
4643 case 8: hFun = (void*)&MC_(helperc_b_store8);
4644 hName = "MC_(helperc_b_store8)";
4645 break;
4646 case 16: hFun = (void*)&MC_(helperc_b_store16);
4647 hName = "MC_(helperc_b_store16)";
4648 break;
4649 default:
4650 tl_assert(0);
4651 }
4652 di = unsafeIRDirty_0_N( 2/*regparms*/,
4653 hName, VG_(fnptr_to_fnentry)( hFun ),
4654 mkIRExprVec_2( ea, dataB )
4655 );
4656 /* no need to mess with any annotations. This call accesses
4657 neither guest state nor guest memory. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004658 if (guard) di->guard = guard;
sewardj7cf4e6b2008-05-01 20:24:26 +00004659 stmt( 'B', mce, IRStmt_Dirty(di) );
4660}
4661
4662static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004663 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00004664 if (eTy == Ity_I64)
4665 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
4666 if (eTy == Ity_I32)
4667 return e;
4668 tl_assert(0);
4669}
4670
4671static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004672 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00004673 tl_assert(eTy == Ity_I32);
4674 if (dstTy == Ity_I64)
4675 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
4676 tl_assert(0);
4677}
4678
sewardjdb5907d2009-11-26 17:20:21 +00004679
sewardj7cf4e6b2008-05-01 20:24:26 +00004680static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
4681{
4682 tl_assert(MC_(clo_mc_level) == 3);
4683
4684 switch (e->tag) {
4685
4686 case Iex_GetI: {
4687 IRRegArray* descr_b;
4688 IRAtom *t1, *t2, *t3, *t4;
4689 IRRegArray* descr = e->Iex.GetI.descr;
4690 IRType equivIntTy
4691 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
4692 /* If this array is unshadowable for whatever reason, use the
4693 usual approximation. */
4694 if (equivIntTy == Ity_INVALID)
4695 return mkU32(0);
4696 tl_assert(sizeofIRType(equivIntTy) >= 4);
4697 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
4698 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
4699 equivIntTy, descr->nElems );
4700 /* Do a shadow indexed get of the same size, giving t1. Take
4701 the bottom 32 bits of it, giving t2. Compute into t3 the
4702 origin for the index (almost certainly zero, but there's
4703 no harm in being completely general here, since iropt will
4704 remove any useless code), and fold it in, giving a final
4705 value t4. */
4706 t1 = assignNew( 'B', mce, equivIntTy,
4707 IRExpr_GetI( descr_b, e->Iex.GetI.ix,
4708 e->Iex.GetI.bias ));
4709 t2 = narrowTo32( mce, t1 );
4710 t3 = schemeE( mce, e->Iex.GetI.ix );
4711 t4 = gen_maxU32( mce, t2, t3 );
4712 return t4;
4713 }
4714 case Iex_CCall: {
4715 Int i;
4716 IRAtom* here;
4717 IRExpr** args = e->Iex.CCall.args;
4718 IRAtom* curr = mkU32(0);
4719 for (i = 0; args[i]; i++) {
4720 tl_assert(i < 32);
4721 tl_assert(isOriginalAtom(mce, args[i]));
4722 /* Only take notice of this arg if the callee's
4723 mc-exclusion mask does not say it is to be excluded. */
4724 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
4725 /* the arg is to be excluded from definedness checking.
4726 Do nothing. */
4727 if (0) VG_(printf)("excluding %s(%d)\n",
4728 e->Iex.CCall.cee->name, i);
4729 } else {
4730 /* calculate the arg's definedness, and pessimistically
4731 merge it in. */
4732 here = schemeE( mce, args[i] );
4733 curr = gen_maxU32( mce, curr, here );
4734 }
4735 }
4736 return curr;
4737 }
4738 case Iex_Load: {
4739 Int dszB;
4740 dszB = sizeofIRType(e->Iex.Load.ty);
4741 /* assert that the B value for the address is already
4742 available (somewhere) */
4743 tl_assert(isIRAtom(e->Iex.Load.addr));
4744 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
4745 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
4746 }
4747 case Iex_Mux0X: {
4748 IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond );
4749 IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 );
4750 IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX );
4751 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
4752 }
4753 case Iex_Qop: {
4754 IRAtom* b1 = schemeE( mce, e->Iex.Qop.arg1 );
4755 IRAtom* b2 = schemeE( mce, e->Iex.Qop.arg2 );
4756 IRAtom* b3 = schemeE( mce, e->Iex.Qop.arg3 );
4757 IRAtom* b4 = schemeE( mce, e->Iex.Qop.arg4 );
4758 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
4759 gen_maxU32( mce, b3, b4 ) );
4760 }
4761 case Iex_Triop: {
4762 IRAtom* b1 = schemeE( mce, e->Iex.Triop.arg1 );
4763 IRAtom* b2 = schemeE( mce, e->Iex.Triop.arg2 );
4764 IRAtom* b3 = schemeE( mce, e->Iex.Triop.arg3 );
4765 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
4766 }
4767 case Iex_Binop: {
sewardjafed4c52009-07-12 13:00:17 +00004768 switch (e->Iex.Binop.op) {
4769 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
4770 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
4771 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
4772 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
4773 /* Just say these all produce a defined result,
4774 regardless of their arguments. See
4775 COMMENT_ON_CasCmpEQ in this file. */
4776 return mkU32(0);
4777 default: {
4778 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
4779 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
4780 return gen_maxU32( mce, b1, b2 );
4781 }
4782 }
4783 tl_assert(0);
4784 /*NOTREACHED*/
sewardj7cf4e6b2008-05-01 20:24:26 +00004785 }
4786 case Iex_Unop: {
4787 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
4788 return b1;
4789 }
4790 case Iex_Const:
4791 return mkU32(0);
4792 case Iex_RdTmp:
4793 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
4794 case Iex_Get: {
4795 Int b_offset = MC_(get_otrack_shadow_offset)(
4796 e->Iex.Get.offset,
4797 sizeofIRType(e->Iex.Get.ty)
4798 );
4799 tl_assert(b_offset >= -1
4800 && b_offset <= mce->layout->total_sizeB -4);
4801 if (b_offset >= 0) {
4802 /* FIXME: this isn't an atom! */
4803 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
4804 Ity_I32 );
4805 }
4806 return mkU32(0);
4807 }
4808 default:
4809 VG_(printf)("mc_translate.c: schemeE: unhandled: ");
4810 ppIRExpr(e);
4811 VG_(tool_panic)("memcheck:schemeE");
4812 }
4813}
4814
sewardjdb5907d2009-11-26 17:20:21 +00004815
sewardj7cf4e6b2008-05-01 20:24:26 +00004816static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
4817{
4818 // This is a hacked version of do_shadow_Dirty
njn4c245e52009-03-15 23:25:38 +00004819 Int i, n, toDo, gSz, gOff;
sewardj7cf4e6b2008-05-01 20:24:26 +00004820 IRAtom *here, *curr;
4821 IRTemp dst;
sewardj7cf4e6b2008-05-01 20:24:26 +00004822
4823 /* First check the guard. */
4824 curr = schemeE( mce, d->guard );
4825
4826 /* Now round up all inputs and maxU32 over them. */
4827
4828 /* Inputs: unmasked args */
4829 for (i = 0; d->args[i]; i++) {
4830 if (d->cee->mcx_mask & (1<<i)) {
4831 /* ignore this arg */
4832 } else {
4833 here = schemeE( mce, d->args[i] );
4834 curr = gen_maxU32( mce, curr, here );
4835 }
4836 }
4837
4838 /* Inputs: guest state that we read. */
4839 for (i = 0; i < d->nFxState; i++) {
4840 tl_assert(d->fxState[i].fx != Ifx_None);
4841 if (d->fxState[i].fx == Ifx_Write)
4842 continue;
4843
4844 /* Ignore any sections marked as 'always defined'. */
4845 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
4846 if (0)
4847 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
4848 d->fxState[i].offset, d->fxState[i].size );
4849 continue;
4850 }
4851
4852 /* This state element is read or modified. So we need to
4853 consider it. If larger than 4 bytes, deal with it in 4-byte
4854 chunks. */
4855 gSz = d->fxState[i].size;
4856 gOff = d->fxState[i].offset;
4857 tl_assert(gSz > 0);
4858 while (True) {
4859 Int b_offset;
4860 if (gSz == 0) break;
4861 n = gSz <= 4 ? gSz : 4;
4862 /* update 'curr' with maxU32 of the state slice
4863 gOff .. gOff+n-1 */
4864 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
4865 if (b_offset != -1) {
4866 here = assignNew( 'B',mce,
4867 Ity_I32,
4868 IRExpr_Get(b_offset + 2*mce->layout->total_sizeB,
4869 Ity_I32));
4870 curr = gen_maxU32( mce, curr, here );
4871 }
4872 gSz -= n;
4873 gOff += n;
4874 }
4875
4876 }
4877
4878 /* Inputs: memory */
4879
4880 if (d->mFx != Ifx_None) {
4881 /* Because we may do multiple shadow loads/stores from the same
4882 base address, it's best to do a single test of its
4883 definedness right now. Post-instrumentation optimisation
4884 should remove all but this test. */
4885 tl_assert(d->mAddr);
4886 here = schemeE( mce, d->mAddr );
4887 curr = gen_maxU32( mce, curr, here );
4888 }
4889
4890 /* Deal with memory inputs (reads or modifies) */
4891 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004892 toDo = d->mSize;
4893 /* chew off 32-bit chunks. We don't care about the endianness
4894 since it's all going to be condensed down to a single bit,
4895 but nevertheless choose an endianness which is hopefully
4896 native to the platform. */
4897 while (toDo >= 4) {
4898 here = gen_load_b( mce, 4, d->mAddr, d->mSize - toDo );
4899 curr = gen_maxU32( mce, curr, here );
4900 toDo -= 4;
4901 }
sewardj8c93fcc2008-10-30 13:08:31 +00004902 /* handle possible 16-bit excess */
4903 while (toDo >= 2) {
4904 here = gen_load_b( mce, 2, d->mAddr, d->mSize - toDo );
4905 curr = gen_maxU32( mce, curr, here );
4906 toDo -= 2;
4907 }
4908 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
sewardj7cf4e6b2008-05-01 20:24:26 +00004909 }
4910
4911 /* Whew! So curr is a 32-bit B-value which should give an origin
4912 of some use if any of the inputs to the helper are undefined.
4913 Now we need to re-distribute the results to all destinations. */
4914
4915 /* Outputs: the destination temporary, if there is one. */
4916 if (d->tmp != IRTemp_INVALID) {
4917 dst = findShadowTmpB(mce, d->tmp);
4918 assign( 'V', mce, dst, curr );
4919 }
4920
4921 /* Outputs: guest state that we write or modify. */
4922 for (i = 0; i < d->nFxState; i++) {
4923 tl_assert(d->fxState[i].fx != Ifx_None);
4924 if (d->fxState[i].fx == Ifx_Read)
4925 continue;
4926
4927 /* Ignore any sections marked as 'always defined'. */
4928 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
4929 continue;
4930
4931 /* This state element is written or modified. So we need to
4932 consider it. If larger than 4 bytes, deal with it in 4-byte
4933 chunks. */
4934 gSz = d->fxState[i].size;
4935 gOff = d->fxState[i].offset;
4936 tl_assert(gSz > 0);
4937 while (True) {
4938 Int b_offset;
4939 if (gSz == 0) break;
4940 n = gSz <= 4 ? gSz : 4;
4941 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
4942 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
4943 if (b_offset != -1) {
4944 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
4945 curr ));
4946 }
4947 gSz -= n;
4948 gOff += n;
4949 }
4950 }
4951
4952 /* Outputs: memory that we write or modify. Same comments about
4953 endianness as above apply. */
4954 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004955 toDo = d->mSize;
4956 /* chew off 32-bit chunks */
4957 while (toDo >= 4) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004958 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
4959 NULL/*guard*/ );
sewardj7cf4e6b2008-05-01 20:24:26 +00004960 toDo -= 4;
4961 }
sewardj8c93fcc2008-10-30 13:08:31 +00004962 /* handle possible 16-bit excess */
4963 while (toDo >= 2) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004964 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
4965 NULL/*guard*/ );
sewardj8c93fcc2008-10-30 13:08:31 +00004966 toDo -= 2;
4967 }
4968 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
sewardj7cf4e6b2008-05-01 20:24:26 +00004969 }
sewardj7cf4e6b2008-05-01 20:24:26 +00004970}
4971
sewardjdb5907d2009-11-26 17:20:21 +00004972
4973static void do_origins_Store ( MCEnv* mce,
4974 IREndness stEnd,
4975 IRExpr* stAddr,
4976 IRExpr* stData )
4977{
4978 Int dszB;
4979 IRAtom* dataB;
4980 /* assert that the B value for the address is already available
4981 (somewhere), since the call to schemeE will want to see it.
4982 XXXX how does this actually ensure that?? */
4983 tl_assert(isIRAtom(stAddr));
4984 tl_assert(isIRAtom(stData));
4985 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
4986 dataB = schemeE( mce, stData );
4987 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB,
4988 NULL/*guard*/ );
4989}
4990
4991
sewardj7cf4e6b2008-05-01 20:24:26 +00004992static void schemeS ( MCEnv* mce, IRStmt* st )
4993{
4994 tl_assert(MC_(clo_mc_level) == 3);
4995
4996 switch (st->tag) {
4997
4998 case Ist_AbiHint:
4999 /* The value-check instrumenter handles this - by arranging
5000 to pass the address of the next instruction to
5001 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
5002 happen for origin tracking w.r.t. AbiHints. So there is
5003 nothing to do here. */
5004 break;
5005
5006 case Ist_PutI: {
5007 IRRegArray* descr_b;
5008 IRAtom *t1, *t2, *t3, *t4;
5009 IRRegArray* descr = st->Ist.PutI.descr;
5010 IRType equivIntTy
5011 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
5012 /* If this array is unshadowable for whatever reason,
5013 generate no code. */
5014 if (equivIntTy == Ity_INVALID)
5015 break;
5016 tl_assert(sizeofIRType(equivIntTy) >= 4);
5017 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
5018 descr_b
5019 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
5020 equivIntTy, descr->nElems );
5021 /* Compute a value to Put - the conjoinment of the origin for
5022 the data to be Put-ted (obviously) and of the index value
5023 (not so obviously). */
5024 t1 = schemeE( mce, st->Ist.PutI.data );
5025 t2 = schemeE( mce, st->Ist.PutI.ix );
5026 t3 = gen_maxU32( mce, t1, t2 );
5027 t4 = zWidenFrom32( mce, equivIntTy, t3 );
5028 stmt( 'B', mce, IRStmt_PutI( descr_b, st->Ist.PutI.ix,
5029 st->Ist.PutI.bias, t4 ));
5030 break;
5031 }
sewardjdb5907d2009-11-26 17:20:21 +00005032
sewardj7cf4e6b2008-05-01 20:24:26 +00005033 case Ist_Dirty:
5034 do_origins_Dirty( mce, st->Ist.Dirty.details );
5035 break;
sewardjdb5907d2009-11-26 17:20:21 +00005036
5037 case Ist_Store:
5038 do_origins_Store( mce, st->Ist.Store.end,
5039 st->Ist.Store.addr,
5040 st->Ist.Store.data );
5041 break;
5042
5043 case Ist_LLSC: {
5044 /* In short: treat a load-linked like a normal load followed
5045 by an assignment of the loaded (shadow) data the result
5046 temporary. Treat a store-conditional like a normal store,
5047 and mark the result temporary as defined. */
5048 if (st->Ist.LLSC.storedata == NULL) {
5049 /* Load Linked */
5050 IRType resTy
5051 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
5052 IRExpr* vanillaLoad
5053 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
5054 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
5055 || resTy == Ity_I16 || resTy == Ity_I8);
5056 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
5057 schemeE(mce, vanillaLoad));
5058 } else {
5059 /* Store conditional */
5060 do_origins_Store( mce, st->Ist.LLSC.end,
5061 st->Ist.LLSC.addr,
5062 st->Ist.LLSC.storedata );
5063 /* For the rationale behind this, see comments at the
5064 place where the V-shadow for .result is constructed, in
5065 do_shadow_LLSC. In short, we regard .result as
5066 always-defined. */
5067 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
5068 mkU32(0) );
sewardj1c0ce7a2009-07-01 08:10:49 +00005069 }
sewardj7cf4e6b2008-05-01 20:24:26 +00005070 break;
5071 }
sewardjdb5907d2009-11-26 17:20:21 +00005072
sewardj7cf4e6b2008-05-01 20:24:26 +00005073 case Ist_Put: {
5074 Int b_offset
5075 = MC_(get_otrack_shadow_offset)(
5076 st->Ist.Put.offset,
sewardj1c0ce7a2009-07-01 08:10:49 +00005077 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
sewardj7cf4e6b2008-05-01 20:24:26 +00005078 );
5079 if (b_offset >= 0) {
5080 /* FIXME: this isn't an atom! */
5081 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
5082 schemeE( mce, st->Ist.Put.data )) );
5083 }
5084 break;
5085 }
sewardjdb5907d2009-11-26 17:20:21 +00005086
sewardj7cf4e6b2008-05-01 20:24:26 +00005087 case Ist_WrTmp:
5088 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
5089 schemeE(mce, st->Ist.WrTmp.data) );
5090 break;
sewardjdb5907d2009-11-26 17:20:21 +00005091
sewardj7cf4e6b2008-05-01 20:24:26 +00005092 case Ist_MBE:
5093 case Ist_NoOp:
5094 case Ist_Exit:
5095 case Ist_IMark:
5096 break;
sewardjdb5907d2009-11-26 17:20:21 +00005097
sewardj7cf4e6b2008-05-01 20:24:26 +00005098 default:
5099 VG_(printf)("mc_translate.c: schemeS: unhandled: ");
5100 ppIRStmt(st);
5101 VG_(tool_panic)("memcheck:schemeS");
5102 }
5103}
5104
5105
njn25e49d8e72002-09-23 09:36:25 +00005106/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00005107/*--- end mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005108/*--------------------------------------------------------------------*/