blob: 65b266ae3f24f459acbb5819ecbb6aca2bf0f7c3 [file] [log] [blame]
nethercotebb1c9912004-01-04 16:43:23 +00001
njn25e49d8e72002-09-23 09:36:25 +00002/*--------------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003/*--- Instrument IR to perform memory checking operations. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00006
njn25e49d8e72002-09-23 09:36:25 +00007/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of MemCheck, a heavyweight Valgrind tool for
njnc9539842002-10-02 13:26:35 +00009 detecting memory errors.
njn25e49d8e72002-09-23 09:36:25 +000010
njn9f207462009-03-10 22:02:09 +000011 Copyright (C) 2000-2009 Julian Seward
njn25e49d8e72002-09-23 09:36:25 +000012 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
njnc7561b92005-06-19 01:24:32 +000032#include "pub_tool_basics.h"
njn1d0825f2006-03-27 11:37:07 +000033#include "pub_tool_hashtable.h" // For mc_include.h
njn132bfcc2005-06-04 19:16:06 +000034#include "pub_tool_libcassert.h"
njn36a20fa2005-06-03 03:08:39 +000035#include "pub_tool_libcprint.h"
njnc7561b92005-06-19 01:24:32 +000036#include "pub_tool_tooliface.h"
sewardj53ee1fc2005-12-23 02:29:58 +000037#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
sewardj81651dc2007-08-28 06:05:20 +000038#include "pub_tool_xarray.h"
39#include "pub_tool_mallocfree.h"
40#include "pub_tool_libcbase.h"
njn25e49d8e72002-09-23 09:36:25 +000041
sewardj7cf4e6b2008-05-01 20:24:26 +000042#include "mc_include.h"
43
44
sewardj992dff92005-10-07 11:08:55 +000045/* This file implements the Memcheck instrumentation, and in
46 particular contains the core of its undefined value detection
47 machinery. For a comprehensive background of the terminology,
48 algorithms and rationale used herein, read:
49
50 Using Valgrind to detect undefined value errors with
51 bit-precision
52
53 Julian Seward and Nicholas Nethercote
54
55 2005 USENIX Annual Technical Conference (General Track),
56 Anaheim, CA, USA, April 10-15, 2005.
njn6665ea22007-05-24 23:14:41 +000057
58 ----
59
60 Here is as good a place as any to record exactly when V bits are and
61 should be checked, why, and what function is responsible.
62
63
64 Memcheck complains when an undefined value is used:
65
66 1. In the condition of a conditional branch. Because it could cause
67 incorrect control flow, and thus cause incorrect externally-visible
68 behaviour. [mc_translate.c:complainIfUndefined]
69
70 2. As an argument to a system call, or as the value that specifies
71 the system call number. Because it could cause an incorrect
72 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
73
74 3. As the address in a load or store. Because it could cause an
75 incorrect value to be used later, which could cause externally-visible
76 behaviour (eg. via incorrect control flow or an incorrect system call
77 argument) [complainIfUndefined]
78
79 4. As the target address of a branch. Because it could cause incorrect
80 control flow. [complainIfUndefined]
81
82 5. As an argument to setenv, unsetenv, or putenv. Because it could put
83 an incorrect value into the external environment.
84 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
85
86 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
87 [complainIfUndefined]
88
89 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
90 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
91 requested it. [in memcheck.h]
92
93
94 Memcheck also complains, but should not, when an undefined value is used:
95
96 8. As the shift value in certain SIMD shift operations (but not in the
97 standard integer shift operations). This inconsistency is due to
98 historical reasons.) [complainIfUndefined]
99
100
101 Memcheck does not complain, but should, when an undefined value is used:
102
103 9. As an input to a client request. Because the client request may
104 affect the visible behaviour -- see bug #144362 for an example
105 involving the malloc replacements in vg_replace_malloc.c and
106 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
107 isn't identified. That bug report also has some info on how to solve
108 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
109
110
111 In practice, 1 and 2 account for the vast majority of cases.
sewardj992dff92005-10-07 11:08:55 +0000112*/
113
sewardj95448072004-11-22 20:19:51 +0000114/*------------------------------------------------------------*/
115/*--- Forward decls ---*/
116/*------------------------------------------------------------*/
117
118struct _MCEnv;
119
sewardj7cf4e6b2008-05-01 20:24:26 +0000120static IRType shadowTypeV ( IRType ty );
sewardj95448072004-11-22 20:19:51 +0000121static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
sewardjafa617b2008-07-22 09:59:48 +0000122static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
sewardj95448072004-11-22 20:19:51 +0000123
124
125/*------------------------------------------------------------*/
126/*--- Memcheck running state, and tmp management. ---*/
127/*------------------------------------------------------------*/
128
sewardj1c0ce7a2009-07-01 08:10:49 +0000129/* Carries info about a particular tmp. The tmp's number is not
130 recorded, as this is implied by (equal to) its index in the tmpMap
131 in MCEnv. The tmp's type is also not recorded, as this is present
132 in MCEnv.sb->tyenv.
133
134 When .kind is Orig, .shadowV and .shadowB may give the identities
135 of the temps currently holding the associated definedness (shadowV)
136 and origin (shadowB) values, or these may be IRTemp_INVALID if code
137 to compute such values has not yet been emitted.
138
139 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
140 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
141 illogical for a shadow tmp itself to be shadowed.
142*/
143typedef
144 enum { Orig=1, VSh=2, BSh=3 }
145 TempKind;
146
147typedef
148 struct {
149 TempKind kind;
150 IRTemp shadowV;
151 IRTemp shadowB;
152 }
153 TempMapEnt;
154
155
sewardj95448072004-11-22 20:19:51 +0000156/* Carries around state during memcheck instrumentation. */
157typedef
158 struct _MCEnv {
sewardj0b9d74a2006-12-24 02:24:11 +0000159 /* MODIFIED: the superblock being constructed. IRStmts are
160 added. */
sewardj1c0ce7a2009-07-01 08:10:49 +0000161 IRSB* sb;
sewardj7cf4e6b2008-05-01 20:24:26 +0000162 Bool trace;
sewardj95448072004-11-22 20:19:51 +0000163
sewardj1c0ce7a2009-07-01 08:10:49 +0000164 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
165 current kind and possibly shadow temps for each temp in the
166 IRSB being constructed. Note that it does not contain the
167 type of each tmp. If you want to know the type, look at the
168 relevant entry in sb->tyenv. It follows that at all times
169 during the instrumentation process, the valid indices for
170 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
171 total number of Orig, V- and B- temps allocated so far.
172
173 The reason for this strange split (types in one place, all
174 other info in another) is that we need the types to be
175 attached to sb so as to make it possible to do
176 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
177 instrumentation process. */
178 XArray* /* of TempMapEnt */ tmpMap;
sewardj95448072004-11-22 20:19:51 +0000179
sewardjd5204dc2004-12-31 01:16:11 +0000180 /* MODIFIED: indicates whether "bogus" literals have so far been
181 found. Starts off False, and may change to True. */
182 Bool bogusLiterals;
183
sewardj95448072004-11-22 20:19:51 +0000184 /* READONLY: the guest layout. This indicates which parts of
185 the guest state should be regarded as 'always defined'. */
186 VexGuestLayout* layout;
sewardj634ba772006-10-15 12:47:37 +0000187
sewardj95448072004-11-22 20:19:51 +0000188 /* READONLY: the host word type. Needed for constructing
189 arguments of type 'HWord' to be passed to helper functions.
190 Ity_I32 or Ity_I64 only. */
191 IRType hWordTy;
192 }
193 MCEnv;
194
195/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
196 demand), as they are encountered. This is for two reasons.
197
198 (1) (less important reason): Many original tmps are unused due to
199 initial IR optimisation, and we do not want to spaces in tables
200 tracking them.
201
202 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
203 table indexed [0 .. n_types-1], which gives the current shadow for
204 each original tmp, or INVALID_IRTEMP if none is so far assigned.
205 It is necessary to support making multiple assignments to a shadow
206 -- specifically, after testing a shadow for definedness, it needs
207 to be made defined. But IR's SSA property disallows this.
208
209 (2) (more important reason): Therefore, when a shadow needs to get
210 a new value, a new temporary is created, the value is assigned to
211 that, and the tmpMap is updated to reflect the new binding.
212
213 A corollary is that if the tmpMap maps a given tmp to
sewardjf1962d32006-10-19 13:22:16 +0000214 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
sewardj95448072004-11-22 20:19:51 +0000215 there's a read-before-write error in the original tmps. The IR
216 sanity checker should catch all such anomalies, however.
njn25e49d8e72002-09-23 09:36:25 +0000217*/
sewardj95448072004-11-22 20:19:51 +0000218
sewardj1c0ce7a2009-07-01 08:10:49 +0000219/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
220 both the table in mce->sb and to our auxiliary mapping. Note that
221 newTemp may cause mce->tmpMap to resize, hence previous results
222 from VG_(indexXA)(mce->tmpMap) are invalidated. */
223static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
224{
225 Word newIx;
226 TempMapEnt ent;
227 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
228 ent.kind = kind;
229 ent.shadowV = IRTemp_INVALID;
230 ent.shadowB = IRTemp_INVALID;
231 newIx = VG_(addToXA)( mce->tmpMap, &ent );
232 tl_assert(newIx == (Word)tmp);
233 return tmp;
234}
235
236
sewardj95448072004-11-22 20:19:51 +0000237/* Find the tmp currently shadowing the given original tmp. If none
238 so far exists, allocate one. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000239static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000240{
sewardj1c0ce7a2009-07-01 08:10:49 +0000241 TempMapEnt* ent;
242 /* VG_(indexXA) range-checks 'orig', hence no need to check
243 here. */
244 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
245 tl_assert(ent->kind == Orig);
246 if (ent->shadowV == IRTemp_INVALID) {
247 IRTemp tmpV
248 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
249 /* newTemp may cause mce->tmpMap to resize, hence previous results
250 from VG_(indexXA) are invalid. */
251 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
252 tl_assert(ent->kind == Orig);
253 tl_assert(ent->shadowV == IRTemp_INVALID);
254 ent->shadowV = tmpV;
njn25e49d8e72002-09-23 09:36:25 +0000255 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000256 return ent->shadowV;
njn25e49d8e72002-09-23 09:36:25 +0000257}
258
sewardj95448072004-11-22 20:19:51 +0000259/* Allocate a new shadow for the given original tmp. This means any
260 previous shadow is abandoned. This is needed because it is
261 necessary to give a new value to a shadow once it has been tested
262 for undefinedness, but unfortunately IR's SSA property disallows
263 this. Instead we must abandon the old shadow, allocate a new one
sewardj1c0ce7a2009-07-01 08:10:49 +0000264 and use that instead.
265
266 This is the same as findShadowTmpV, except we don't bother to see
267 if a shadow temp already existed -- we simply allocate a new one
268 regardless. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000269static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000270{
sewardj1c0ce7a2009-07-01 08:10:49 +0000271 TempMapEnt* ent;
272 /* VG_(indexXA) range-checks 'orig', hence no need to check
273 here. */
274 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
275 tl_assert(ent->kind == Orig);
276 if (1) {
277 IRTemp tmpV
278 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
279 /* newTemp may cause mce->tmpMap to resize, hence previous results
280 from VG_(indexXA) are invalid. */
281 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
282 tl_assert(ent->kind == Orig);
283 ent->shadowV = tmpV;
284 }
sewardj95448072004-11-22 20:19:51 +0000285}
286
287
288/*------------------------------------------------------------*/
289/*--- IRAtoms -- a subset of IRExprs ---*/
290/*------------------------------------------------------------*/
291
292/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
sewardj710d6c22005-03-20 18:55:15 +0000293 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
sewardj95448072004-11-22 20:19:51 +0000294 input, most of this code deals in atoms. Usefully, a value atom
295 always has a V-value which is also an atom: constants are shadowed
296 by constants, and temps are shadowed by the corresponding shadow
297 temporary. */
298
299typedef IRExpr IRAtom;
300
301/* (used for sanity checks only): is this an atom which looks
302 like it's from original code? */
303static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
304{
305 if (a1->tag == Iex_Const)
306 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000307 if (a1->tag == Iex_RdTmp) {
308 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
309 return ent->kind == Orig;
310 }
sewardj95448072004-11-22 20:19:51 +0000311 return False;
312}
313
314/* (used for sanity checks only): is this an atom which looks
315 like it's from shadow code? */
316static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
317{
318 if (a1->tag == Iex_Const)
319 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000320 if (a1->tag == Iex_RdTmp) {
321 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
322 return ent->kind == VSh || ent->kind == BSh;
323 }
sewardj95448072004-11-22 20:19:51 +0000324 return False;
325}
326
327/* (used for sanity checks only): check that both args are atoms and
328 are identically-kinded. */
329static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
330{
sewardj0b9d74a2006-12-24 02:24:11 +0000331 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +0000332 return True;
sewardjbef552a2005-08-30 12:54:36 +0000333 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
sewardj95448072004-11-22 20:19:51 +0000334 return True;
335 return False;
336}
337
338
339/*------------------------------------------------------------*/
340/*--- Type management ---*/
341/*------------------------------------------------------------*/
342
343/* Shadow state is always accessed using integer types. This returns
344 an integer type with the same size (as per sizeofIRType) as the
345 given type. The only valid shadow types are Bit, I8, I16, I32,
sewardj3245c912004-12-10 14:58:26 +0000346 I64, V128. */
sewardj95448072004-11-22 20:19:51 +0000347
sewardj7cf4e6b2008-05-01 20:24:26 +0000348static IRType shadowTypeV ( IRType ty )
sewardj95448072004-11-22 20:19:51 +0000349{
350 switch (ty) {
351 case Ity_I1:
352 case Ity_I8:
353 case Ity_I16:
354 case Ity_I32:
sewardj6cf40ff2005-04-20 22:31:26 +0000355 case Ity_I64:
356 case Ity_I128: return ty;
sewardj3245c912004-12-10 14:58:26 +0000357 case Ity_F32: return Ity_I32;
358 case Ity_F64: return Ity_I64;
359 case Ity_V128: return Ity_V128;
sewardj95448072004-11-22 20:19:51 +0000360 default: ppIRType(ty);
sewardj7cf4e6b2008-05-01 20:24:26 +0000361 VG_(tool_panic)("memcheck:shadowTypeV");
sewardj95448072004-11-22 20:19:51 +0000362 }
363}
364
365/* Produce a 'defined' value of the given shadow type. Should only be
366 supplied shadow types (Bit/I8/I16/I32/UI64). */
367static IRExpr* definedOfType ( IRType ty ) {
368 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +0000369 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
370 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
371 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
372 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
373 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
374 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
sewardjf1962d32006-10-19 13:22:16 +0000375 default: VG_(tool_panic)("memcheck:definedOfType");
njn25e49d8e72002-09-23 09:36:25 +0000376 }
377}
378
379
sewardj95448072004-11-22 20:19:51 +0000380/*------------------------------------------------------------*/
381/*--- Constructing IR fragments ---*/
382/*------------------------------------------------------------*/
383
sewardj95448072004-11-22 20:19:51 +0000384/* add stmt to a bb */
sewardj7cf4e6b2008-05-01 20:24:26 +0000385static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
386 if (mce->trace) {
387 VG_(printf)(" %c: ", cat);
388 ppIRStmt(st);
389 VG_(printf)("\n");
390 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000391 addStmtToIRSB(mce->sb, st);
sewardj7cf4e6b2008-05-01 20:24:26 +0000392}
393
394/* assign value to tmp */
395static inline
396void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
sewardj1c0ce7a2009-07-01 08:10:49 +0000397 stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
sewardj7cf4e6b2008-05-01 20:24:26 +0000398}
sewardj95448072004-11-22 20:19:51 +0000399
400/* build various kinds of expressions */
401#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
402#define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
403#define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
404#define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
405#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
406#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
sewardj170ee212004-12-10 18:57:51 +0000407#define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
sewardj0b9d74a2006-12-24 02:24:11 +0000408#define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
sewardj95448072004-11-22 20:19:51 +0000409
sewardj7cf4e6b2008-05-01 20:24:26 +0000410/* Bind the given expression to a new temporary, and return the
sewardj95448072004-11-22 20:19:51 +0000411 temporary. This effectively converts an arbitrary expression into
sewardj7cf4e6b2008-05-01 20:24:26 +0000412 an atom.
413
414 'ty' is the type of 'e' and hence the type that the new temporary
sewardj1c0ce7a2009-07-01 08:10:49 +0000415 needs to be. But passing it in is redundant, since we can deduce
416 the type merely by inspecting 'e'. So at least use that fact to
417 assert that the two types agree. */
418static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
419{
420 TempKind k;
421 IRTemp t;
422 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +0000423 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
sewardj1c0ce7a2009-07-01 08:10:49 +0000424 switch (cat) {
425 case 'V': k = VSh; break;
426 case 'B': k = BSh; break;
427 case 'C': k = Orig; break;
428 /* happens when we are making up new "orig"
429 expressions, for IRCAS handling */
430 default: tl_assert(0);
431 }
432 t = newTemp(mce, ty, k);
sewardj7cf4e6b2008-05-01 20:24:26 +0000433 assign(cat, mce, t, e);
sewardj95448072004-11-22 20:19:51 +0000434 return mkexpr(t);
435}
436
437
438/*------------------------------------------------------------*/
439/*--- Constructing definedness primitive ops ---*/
440/*------------------------------------------------------------*/
441
442/* --------- Defined-if-either-defined --------- */
443
444static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
445 tl_assert(isShadowAtom(mce,a1));
446 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000447 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000448}
449
450static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
451 tl_assert(isShadowAtom(mce,a1));
452 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000453 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000454}
455
456static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
457 tl_assert(isShadowAtom(mce,a1));
458 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000459 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000460}
461
sewardj7010f6e2004-12-10 13:35:22 +0000462static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
463 tl_assert(isShadowAtom(mce,a1));
464 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000465 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
sewardj7010f6e2004-12-10 13:35:22 +0000466}
467
sewardj20d38f22005-02-07 23:50:18 +0000468static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj170ee212004-12-10 18:57:51 +0000469 tl_assert(isShadowAtom(mce,a1));
470 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000471 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
sewardj170ee212004-12-10 18:57:51 +0000472}
473
sewardj95448072004-11-22 20:19:51 +0000474/* --------- Undefined-if-either-undefined --------- */
475
476static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
477 tl_assert(isShadowAtom(mce,a1));
478 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000479 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000480}
481
482static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
483 tl_assert(isShadowAtom(mce,a1));
484 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000485 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000486}
487
488static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
489 tl_assert(isShadowAtom(mce,a1));
490 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000491 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000492}
493
494static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
495 tl_assert(isShadowAtom(mce,a1));
496 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000497 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000498}
499
sewardj20d38f22005-02-07 23:50:18 +0000500static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj3245c912004-12-10 14:58:26 +0000501 tl_assert(isShadowAtom(mce,a1));
502 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000503 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
sewardj3245c912004-12-10 14:58:26 +0000504}
505
sewardje50a1b12004-12-17 01:24:54 +0000506static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
sewardj95448072004-11-22 20:19:51 +0000507 switch (vty) {
sewardje50a1b12004-12-17 01:24:54 +0000508 case Ity_I8: return mkUifU8(mce, a1, a2);
sewardja1d93302004-12-12 16:45:06 +0000509 case Ity_I16: return mkUifU16(mce, a1, a2);
510 case Ity_I32: return mkUifU32(mce, a1, a2);
511 case Ity_I64: return mkUifU64(mce, a1, a2);
sewardj20d38f22005-02-07 23:50:18 +0000512 case Ity_V128: return mkUifUV128(mce, a1, a2);
sewardj95448072004-11-22 20:19:51 +0000513 default:
514 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
515 VG_(tool_panic)("memcheck:mkUifU");
njn25e49d8e72002-09-23 09:36:25 +0000516 }
517}
518
sewardj95448072004-11-22 20:19:51 +0000519/* --------- The Left-family of operations. --------- */
njn25e49d8e72002-09-23 09:36:25 +0000520
sewardj95448072004-11-22 20:19:51 +0000521static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
522 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000523 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
sewardj95448072004-11-22 20:19:51 +0000524}
525
526static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
527 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000528 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
sewardj95448072004-11-22 20:19:51 +0000529}
530
531static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
532 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000533 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
sewardj95448072004-11-22 20:19:51 +0000534}
535
sewardj681be302005-01-15 20:43:58 +0000536static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
537 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000538 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
sewardj681be302005-01-15 20:43:58 +0000539}
540
sewardj95448072004-11-22 20:19:51 +0000541/* --------- 'Improvement' functions for AND/OR. --------- */
542
543/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
544 defined (0); all other -> undefined (1).
545*/
546static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
njn25e49d8e72002-09-23 09:36:25 +0000547{
sewardj95448072004-11-22 20:19:51 +0000548 tl_assert(isOriginalAtom(mce, data));
549 tl_assert(isShadowAtom(mce, vbits));
550 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000551 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000552}
njn25e49d8e72002-09-23 09:36:25 +0000553
sewardj95448072004-11-22 20:19:51 +0000554static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
555{
556 tl_assert(isOriginalAtom(mce, data));
557 tl_assert(isShadowAtom(mce, vbits));
558 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000559 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000560}
njn25e49d8e72002-09-23 09:36:25 +0000561
sewardj95448072004-11-22 20:19:51 +0000562static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
563{
564 tl_assert(isOriginalAtom(mce, data));
565 tl_assert(isShadowAtom(mce, vbits));
566 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000567 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000568}
njn25e49d8e72002-09-23 09:36:25 +0000569
sewardj7010f6e2004-12-10 13:35:22 +0000570static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
571{
572 tl_assert(isOriginalAtom(mce, data));
573 tl_assert(isShadowAtom(mce, vbits));
574 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000575 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
sewardj7010f6e2004-12-10 13:35:22 +0000576}
577
sewardj20d38f22005-02-07 23:50:18 +0000578static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000579{
580 tl_assert(isOriginalAtom(mce, data));
581 tl_assert(isShadowAtom(mce, vbits));
582 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000583 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
sewardj170ee212004-12-10 18:57:51 +0000584}
585
sewardj95448072004-11-22 20:19:51 +0000586/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
587 defined (0); all other -> undefined (1).
588*/
589static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
590{
591 tl_assert(isOriginalAtom(mce, data));
592 tl_assert(isShadowAtom(mce, vbits));
593 tl_assert(sameKindedAtoms(data, vbits));
594 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000595 'V', mce, Ity_I8,
sewardj95448072004-11-22 20:19:51 +0000596 binop(Iop_Or8,
sewardj7cf4e6b2008-05-01 20:24:26 +0000597 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
sewardj95448072004-11-22 20:19:51 +0000598 vbits) );
599}
njn25e49d8e72002-09-23 09:36:25 +0000600
sewardj95448072004-11-22 20:19:51 +0000601static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
602{
603 tl_assert(isOriginalAtom(mce, data));
604 tl_assert(isShadowAtom(mce, vbits));
605 tl_assert(sameKindedAtoms(data, vbits));
606 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000607 'V', mce, Ity_I16,
sewardj95448072004-11-22 20:19:51 +0000608 binop(Iop_Or16,
sewardj7cf4e6b2008-05-01 20:24:26 +0000609 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
sewardj95448072004-11-22 20:19:51 +0000610 vbits) );
611}
njn25e49d8e72002-09-23 09:36:25 +0000612
sewardj95448072004-11-22 20:19:51 +0000613static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
614{
615 tl_assert(isOriginalAtom(mce, data));
616 tl_assert(isShadowAtom(mce, vbits));
617 tl_assert(sameKindedAtoms(data, vbits));
618 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000619 'V', mce, Ity_I32,
sewardj95448072004-11-22 20:19:51 +0000620 binop(Iop_Or32,
sewardj7cf4e6b2008-05-01 20:24:26 +0000621 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
sewardj95448072004-11-22 20:19:51 +0000622 vbits) );
623}
624
sewardj7010f6e2004-12-10 13:35:22 +0000625static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
626{
627 tl_assert(isOriginalAtom(mce, data));
628 tl_assert(isShadowAtom(mce, vbits));
629 tl_assert(sameKindedAtoms(data, vbits));
630 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000631 'V', mce, Ity_I64,
sewardj7010f6e2004-12-10 13:35:22 +0000632 binop(Iop_Or64,
sewardj7cf4e6b2008-05-01 20:24:26 +0000633 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
sewardj7010f6e2004-12-10 13:35:22 +0000634 vbits) );
635}
636
sewardj20d38f22005-02-07 23:50:18 +0000637static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000638{
639 tl_assert(isOriginalAtom(mce, data));
640 tl_assert(isShadowAtom(mce, vbits));
641 tl_assert(sameKindedAtoms(data, vbits));
642 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000643 'V', mce, Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +0000644 binop(Iop_OrV128,
sewardj7cf4e6b2008-05-01 20:24:26 +0000645 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
sewardj170ee212004-12-10 18:57:51 +0000646 vbits) );
647}
648
sewardj95448072004-11-22 20:19:51 +0000649/* --------- Pessimising casts. --------- */
650
651static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
652{
sewardj4cc684b2007-08-25 23:09:36 +0000653 IRType src_ty;
sewardj7cf97ee2004-11-28 14:25:01 +0000654 IRAtom* tmp1;
sewardj95448072004-11-22 20:19:51 +0000655 /* Note, dst_ty is a shadow type, not an original type. */
656 /* First of all, collapse vbits down to a single bit. */
657 tl_assert(isShadowAtom(mce,vbits));
sewardj1c0ce7a2009-07-01 08:10:49 +0000658 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
sewardj4cc684b2007-08-25 23:09:36 +0000659
660 /* Fast-track some common cases */
661 if (src_ty == Ity_I32 && dst_ty == Ity_I32)
sewardj7cf4e6b2008-05-01 20:24:26 +0000662 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000663
664 if (src_ty == Ity_I64 && dst_ty == Ity_I64)
sewardj7cf4e6b2008-05-01 20:24:26 +0000665 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000666
667 if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
sewardj7cf4e6b2008-05-01 20:24:26 +0000668 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
669 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
sewardj4cc684b2007-08-25 23:09:36 +0000670 }
671
672 /* Else do it the slow way .. */
673 tmp1 = NULL;
674 switch (src_ty) {
sewardj95448072004-11-22 20:19:51 +0000675 case Ity_I1:
676 tmp1 = vbits;
njn25e49d8e72002-09-23 09:36:25 +0000677 break;
sewardj95448072004-11-22 20:19:51 +0000678 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000679 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
sewardj95448072004-11-22 20:19:51 +0000680 break;
681 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000682 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
sewardj95448072004-11-22 20:19:51 +0000683 break;
684 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000685 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
sewardj95448072004-11-22 20:19:51 +0000686 break;
687 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000688 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
sewardj95448072004-11-22 20:19:51 +0000689 break;
sewardj69a13322005-04-23 01:14:51 +0000690 case Ity_I128: {
691 /* Gah. Chop it in half, OR the halves together, and compare
692 that with zero. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000693 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
694 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
695 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
696 tmp1 = assignNew('V', mce, Ity_I1,
sewardj37c31cc2005-04-26 23:49:24 +0000697 unop(Iop_CmpNEZ64, tmp4));
sewardj69a13322005-04-23 01:14:51 +0000698 break;
699 }
sewardj95448072004-11-22 20:19:51 +0000700 default:
sewardj4cc684b2007-08-25 23:09:36 +0000701 ppIRType(src_ty);
sewardj95448072004-11-22 20:19:51 +0000702 VG_(tool_panic)("mkPCastTo(1)");
703 }
704 tl_assert(tmp1);
705 /* Now widen up to the dst type. */
706 switch (dst_ty) {
707 case Ity_I1:
708 return tmp1;
709 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000710 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
sewardj95448072004-11-22 20:19:51 +0000711 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000712 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
sewardj95448072004-11-22 20:19:51 +0000713 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000714 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
sewardj95448072004-11-22 20:19:51 +0000715 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000716 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000717 case Ity_V128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000718 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
719 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000720 return tmp1;
sewardj69a13322005-04-23 01:14:51 +0000721 case Ity_I128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000722 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
723 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
sewardj69a13322005-04-23 01:14:51 +0000724 return tmp1;
sewardj95448072004-11-22 20:19:51 +0000725 default:
726 ppIRType(dst_ty);
727 VG_(tool_panic)("mkPCastTo(2)");
728 }
729}
730
sewardjd5204dc2004-12-31 01:16:11 +0000731/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
732/*
733 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
734 PCasting to Ity_U1. However, sometimes it is necessary to be more
735 accurate. The insight is that the result is defined if two
736 corresponding bits can be found, one from each argument, so that
737 both bits are defined but are different -- that makes EQ say "No"
738 and NE say "Yes". Hence, we compute an improvement term and DifD
739 it onto the "normal" (UifU) result.
740
741 The result is:
742
743 PCastTo<1> (
sewardje6f8af42005-07-06 18:48:59 +0000744 -- naive version
745 PCastTo<sz>( UifU<sz>(vxx, vyy) )
746
sewardjd5204dc2004-12-31 01:16:11 +0000747 `DifD<sz>`
sewardje6f8af42005-07-06 18:48:59 +0000748
749 -- improvement term
750 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
sewardjd5204dc2004-12-31 01:16:11 +0000751 )
sewardje6f8af42005-07-06 18:48:59 +0000752
sewardjd5204dc2004-12-31 01:16:11 +0000753 where
754 vec contains 0 (defined) bits where the corresponding arg bits
sewardje6f8af42005-07-06 18:48:59 +0000755 are defined but different, and 1 bits otherwise.
sewardjd5204dc2004-12-31 01:16:11 +0000756
sewardje6f8af42005-07-06 18:48:59 +0000757 vec = Or<sz>( vxx, // 0 iff bit defined
758 vyy, // 0 iff bit defined
759 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
760 )
761
762 If any bit of vec is 0, the result is defined and so the
763 improvement term should produce 0...0, else it should produce
764 1...1.
765
766 Hence require for the improvement term:
767
768 if vec == 1...1 then 1...1 else 0...0
769 ->
770 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
771
772 This was extensively re-analysed and checked on 6 July 05.
sewardjd5204dc2004-12-31 01:16:11 +0000773*/
774static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
775 IRType ty,
776 IRAtom* vxx, IRAtom* vyy,
777 IRAtom* xx, IRAtom* yy )
778{
sewardje6f8af42005-07-06 18:48:59 +0000779 IRAtom *naive, *vec, *improvement_term;
780 IRAtom *improved, *final_cast, *top;
781 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
sewardjd5204dc2004-12-31 01:16:11 +0000782
783 tl_assert(isShadowAtom(mce,vxx));
784 tl_assert(isShadowAtom(mce,vyy));
785 tl_assert(isOriginalAtom(mce,xx));
786 tl_assert(isOriginalAtom(mce,yy));
787 tl_assert(sameKindedAtoms(vxx,xx));
788 tl_assert(sameKindedAtoms(vyy,yy));
789
790 switch (ty) {
791 case Ity_I32:
sewardje6f8af42005-07-06 18:48:59 +0000792 opOR = Iop_Or32;
sewardjd5204dc2004-12-31 01:16:11 +0000793 opDIFD = Iop_And32;
794 opUIFU = Iop_Or32;
795 opNOT = Iop_Not32;
796 opXOR = Iop_Xor32;
797 opCMP = Iop_CmpEQ32;
798 top = mkU32(0xFFFFFFFF);
799 break;
tomcd986332005-04-26 07:44:48 +0000800 case Ity_I64:
sewardje6f8af42005-07-06 18:48:59 +0000801 opOR = Iop_Or64;
tomcd986332005-04-26 07:44:48 +0000802 opDIFD = Iop_And64;
803 opUIFU = Iop_Or64;
804 opNOT = Iop_Not64;
805 opXOR = Iop_Xor64;
806 opCMP = Iop_CmpEQ64;
sewardj37c31cc2005-04-26 23:49:24 +0000807 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
tomcd986332005-04-26 07:44:48 +0000808 break;
sewardjd5204dc2004-12-31 01:16:11 +0000809 default:
810 VG_(tool_panic)("expensiveCmpEQorNE");
811 }
812
813 naive
sewardj7cf4e6b2008-05-01 20:24:26 +0000814 = mkPCastTo(mce,ty,
815 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
sewardjd5204dc2004-12-31 01:16:11 +0000816
817 vec
818 = assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000819 'V', mce,ty,
sewardje6f8af42005-07-06 18:48:59 +0000820 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +0000821 assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
sewardjd5204dc2004-12-31 01:16:11 +0000822 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000823 'V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +0000824 unop( opNOT,
sewardj7cf4e6b2008-05-01 20:24:26 +0000825 assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
sewardjd5204dc2004-12-31 01:16:11 +0000826
sewardje6f8af42005-07-06 18:48:59 +0000827 improvement_term
sewardj7cf4e6b2008-05-01 20:24:26 +0000828 = mkPCastTo( mce,ty,
829 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
sewardjd5204dc2004-12-31 01:16:11 +0000830
831 improved
sewardj7cf4e6b2008-05-01 20:24:26 +0000832 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
sewardjd5204dc2004-12-31 01:16:11 +0000833
834 final_cast
835 = mkPCastTo( mce, Ity_I1, improved );
836
837 return final_cast;
838}
839
sewardj95448072004-11-22 20:19:51 +0000840
sewardj992dff92005-10-07 11:08:55 +0000841/* --------- Semi-accurate interpretation of CmpORD. --------- */
842
843/* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
844
845 CmpORD32S(x,y) = 1<<3 if x <s y
846 = 1<<2 if x >s y
847 = 1<<1 if x == y
848
849 and similarly the unsigned variant. The default interpretation is:
850
851 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
sewardj1bc82102005-12-23 00:16:24 +0000852 & (7<<1)
sewardj992dff92005-10-07 11:08:55 +0000853
854 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
855 are zero and therefore defined (viz, zero).
sewardja9e62a92005-10-07 12:13:21 +0000856
857 Also deal with a special case better:
858
859 CmpORD32S(x,0)
860
861 Here, bit 3 (LT) of the result is a copy of the top bit of x and
862 will be defined even if the rest of x isn't. In which case we do:
863
864 CmpORD32S#(x,x#,0,{impliedly 0}#)
sewardj1bc82102005-12-23 00:16:24 +0000865 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
866 | (x# >>u 31) << 3 -- LT# = x#[31]
sewardja9e62a92005-10-07 12:13:21 +0000867
sewardj1bc82102005-12-23 00:16:24 +0000868 Analogous handling for CmpORD64{S,U}.
sewardj992dff92005-10-07 11:08:55 +0000869*/
sewardja9e62a92005-10-07 12:13:21 +0000870static Bool isZeroU32 ( IRAtom* e )
871{
872 return
873 toBool( e->tag == Iex_Const
874 && e->Iex.Const.con->tag == Ico_U32
875 && e->Iex.Const.con->Ico.U32 == 0 );
876}
877
sewardj1bc82102005-12-23 00:16:24 +0000878static Bool isZeroU64 ( IRAtom* e )
sewardj992dff92005-10-07 11:08:55 +0000879{
sewardj1bc82102005-12-23 00:16:24 +0000880 return
881 toBool( e->tag == Iex_Const
882 && e->Iex.Const.con->tag == Ico_U64
883 && e->Iex.Const.con->Ico.U64 == 0 );
884}
885
886static IRAtom* doCmpORD ( MCEnv* mce,
887 IROp cmp_op,
888 IRAtom* xxhash, IRAtom* yyhash,
889 IRAtom* xx, IRAtom* yy )
890{
891 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
892 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
893 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
894 IROp opAND = m64 ? Iop_And64 : Iop_And32;
895 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
896 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
897 IRType ty = m64 ? Ity_I64 : Ity_I32;
898 Int width = m64 ? 64 : 32;
899
900 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
901
902 IRAtom* threeLeft1 = NULL;
903 IRAtom* sevenLeft1 = NULL;
904
sewardj992dff92005-10-07 11:08:55 +0000905 tl_assert(isShadowAtom(mce,xxhash));
906 tl_assert(isShadowAtom(mce,yyhash));
907 tl_assert(isOriginalAtom(mce,xx));
908 tl_assert(isOriginalAtom(mce,yy));
909 tl_assert(sameKindedAtoms(xxhash,xx));
910 tl_assert(sameKindedAtoms(yyhash,yy));
sewardj1bc82102005-12-23 00:16:24 +0000911 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
912 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
sewardj992dff92005-10-07 11:08:55 +0000913
sewardja9e62a92005-10-07 12:13:21 +0000914 if (0) {
915 ppIROp(cmp_op); VG_(printf)(" ");
916 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
917 }
918
sewardj1bc82102005-12-23 00:16:24 +0000919 if (syned && isZero(yy)) {
sewardja9e62a92005-10-07 12:13:21 +0000920 /* fancy interpretation */
921 /* if yy is zero, then it must be fully defined (zero#). */
sewardj1bc82102005-12-23 00:16:24 +0000922 tl_assert(isZero(yyhash));
923 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
sewardja9e62a92005-10-07 12:13:21 +0000924 return
925 binop(
sewardj1bc82102005-12-23 00:16:24 +0000926 opOR,
sewardja9e62a92005-10-07 12:13:21 +0000927 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000928 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +0000929 binop(
sewardj1bc82102005-12-23 00:16:24 +0000930 opAND,
931 mkPCastTo(mce,ty, xxhash),
932 threeLeft1
sewardja9e62a92005-10-07 12:13:21 +0000933 )),
934 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000935 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +0000936 binop(
sewardj1bc82102005-12-23 00:16:24 +0000937 opSHL,
sewardja9e62a92005-10-07 12:13:21 +0000938 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000939 'V', mce,ty,
sewardj1bc82102005-12-23 00:16:24 +0000940 binop(opSHR, xxhash, mkU8(width-1))),
sewardja9e62a92005-10-07 12:13:21 +0000941 mkU8(3)
942 ))
943 );
944 } else {
945 /* standard interpretation */
sewardj1bc82102005-12-23 00:16:24 +0000946 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
sewardja9e62a92005-10-07 12:13:21 +0000947 return
948 binop(
sewardj1bc82102005-12-23 00:16:24 +0000949 opAND,
950 mkPCastTo( mce,ty,
951 mkUifU(mce,ty, xxhash,yyhash)),
952 sevenLeft1
sewardja9e62a92005-10-07 12:13:21 +0000953 );
954 }
sewardj992dff92005-10-07 11:08:55 +0000955}
956
957
sewardj95448072004-11-22 20:19:51 +0000958/*------------------------------------------------------------*/
959/*--- Emit a test and complaint if something is undefined. ---*/
960/*------------------------------------------------------------*/
961
sewardj7cf4e6b2008-05-01 20:24:26 +0000962static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
963
964
sewardj95448072004-11-22 20:19:51 +0000965/* Set the annotations on a dirty helper to indicate that the stack
966 pointer and instruction pointers might be read. This is the
967 behaviour of all 'emit-a-complaint' style functions we might
968 call. */
969
970static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
971 di->nFxState = 2;
972 di->fxState[0].fx = Ifx_Read;
973 di->fxState[0].offset = mce->layout->offset_SP;
974 di->fxState[0].size = mce->layout->sizeof_SP;
975 di->fxState[1].fx = Ifx_Read;
976 di->fxState[1].offset = mce->layout->offset_IP;
977 di->fxState[1].size = mce->layout->sizeof_IP;
978}
979
980
981/* Check the supplied **original** atom for undefinedness, and emit a
982 complaint if so. Once that happens, mark it as defined. This is
983 possible because the atom is either a tmp or literal. If it's a
984 tmp, it will be shadowed by a tmp, and so we can set the shadow to
985 be defined. In fact as mentioned above, we will have to allocate a
986 new tmp to carry the new 'defined' shadow value, and update the
987 original->tmp mapping accordingly; we cannot simply assign a new
988 value to an existing shadow tmp as this breaks SSAness -- resulting
989 in the post-instrumentation sanity checker spluttering in disapproval.
990*/
991static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
992{
sewardj7cf97ee2004-11-28 14:25:01 +0000993 IRAtom* vatom;
994 IRType ty;
995 Int sz;
996 IRDirty* di;
997 IRAtom* cond;
sewardj7cf4e6b2008-05-01 20:24:26 +0000998 IRAtom* origin;
999 void* fn;
1000 HChar* nm;
1001 IRExpr** args;
1002 Int nargs;
sewardj7cf97ee2004-11-28 14:25:01 +00001003
njn1d0825f2006-03-27 11:37:07 +00001004 // Don't do V bit tests if we're not reporting undefined value errors.
sewardj7cf4e6b2008-05-01 20:24:26 +00001005 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001006 return;
1007
sewardj95448072004-11-22 20:19:51 +00001008 /* Since the original expression is atomic, there's no duplicated
1009 work generated by making multiple V-expressions for it. So we
1010 don't really care about the possibility that someone else may
1011 also create a V-interpretion for it. */
1012 tl_assert(isOriginalAtom(mce, atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001013 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001014 tl_assert(isShadowAtom(mce, vatom));
1015 tl_assert(sameKindedAtoms(atom, vatom));
1016
sewardj1c0ce7a2009-07-01 08:10:49 +00001017 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001018
1019 /* sz is only used for constructing the error message */
sewardj7cf97ee2004-11-28 14:25:01 +00001020 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001021
sewardj7cf97ee2004-11-28 14:25:01 +00001022 cond = mkPCastTo( mce, Ity_I1, vatom );
sewardj95448072004-11-22 20:19:51 +00001023 /* cond will be 0 if all defined, and 1 if any not defined. */
1024
sewardj7cf4e6b2008-05-01 20:24:26 +00001025 /* Get the origin info for the value we are about to check. At
1026 least, if we are doing origin tracking. If not, use a dummy
1027 zero origin. */
1028 if (MC_(clo_mc_level) == 3) {
1029 origin = schemeE( mce, atom );
1030 if (mce->hWordTy == Ity_I64) {
1031 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1032 }
1033 } else {
1034 origin = NULL;
1035 }
1036
1037 fn = NULL;
1038 nm = NULL;
1039 args = NULL;
1040 nargs = -1;
1041
sewardj95448072004-11-22 20:19:51 +00001042 switch (sz) {
1043 case 0:
sewardj7cf4e6b2008-05-01 20:24:26 +00001044 if (origin) {
1045 fn = &MC_(helperc_value_check0_fail_w_o);
1046 nm = "MC_(helperc_value_check0_fail_w_o)";
1047 args = mkIRExprVec_1(origin);
1048 nargs = 1;
1049 } else {
1050 fn = &MC_(helperc_value_check0_fail_no_o);
1051 nm = "MC_(helperc_value_check0_fail_no_o)";
1052 args = mkIRExprVec_0();
1053 nargs = 0;
1054 }
sewardj95448072004-11-22 20:19:51 +00001055 break;
1056 case 1:
sewardj7cf4e6b2008-05-01 20:24:26 +00001057 if (origin) {
1058 fn = &MC_(helperc_value_check1_fail_w_o);
1059 nm = "MC_(helperc_value_check1_fail_w_o)";
1060 args = mkIRExprVec_1(origin);
1061 nargs = 1;
1062 } else {
1063 fn = &MC_(helperc_value_check1_fail_no_o);
1064 nm = "MC_(helperc_value_check1_fail_no_o)";
1065 args = mkIRExprVec_0();
1066 nargs = 0;
1067 }
sewardj95448072004-11-22 20:19:51 +00001068 break;
1069 case 4:
sewardj7cf4e6b2008-05-01 20:24:26 +00001070 if (origin) {
1071 fn = &MC_(helperc_value_check4_fail_w_o);
1072 nm = "MC_(helperc_value_check4_fail_w_o)";
1073 args = mkIRExprVec_1(origin);
1074 nargs = 1;
1075 } else {
1076 fn = &MC_(helperc_value_check4_fail_no_o);
1077 nm = "MC_(helperc_value_check4_fail_no_o)";
1078 args = mkIRExprVec_0();
1079 nargs = 0;
1080 }
sewardj95448072004-11-22 20:19:51 +00001081 break;
sewardj11bcc4e2005-04-23 22:38:38 +00001082 case 8:
sewardj7cf4e6b2008-05-01 20:24:26 +00001083 if (origin) {
1084 fn = &MC_(helperc_value_check8_fail_w_o);
1085 nm = "MC_(helperc_value_check8_fail_w_o)";
1086 args = mkIRExprVec_1(origin);
1087 nargs = 1;
1088 } else {
1089 fn = &MC_(helperc_value_check8_fail_no_o);
1090 nm = "MC_(helperc_value_check8_fail_no_o)";
1091 args = mkIRExprVec_0();
1092 nargs = 0;
1093 }
sewardj11bcc4e2005-04-23 22:38:38 +00001094 break;
njn4c245e52009-03-15 23:25:38 +00001095 case 2:
1096 case 16:
sewardj7cf4e6b2008-05-01 20:24:26 +00001097 if (origin) {
1098 fn = &MC_(helperc_value_checkN_fail_w_o);
1099 nm = "MC_(helperc_value_checkN_fail_w_o)";
1100 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1101 nargs = 2;
1102 } else {
1103 fn = &MC_(helperc_value_checkN_fail_no_o);
1104 nm = "MC_(helperc_value_checkN_fail_no_o)";
1105 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1106 nargs = 1;
1107 }
sewardj95448072004-11-22 20:19:51 +00001108 break;
njn4c245e52009-03-15 23:25:38 +00001109 default:
1110 VG_(tool_panic)("unexpected szB");
sewardj95448072004-11-22 20:19:51 +00001111 }
sewardj7cf4e6b2008-05-01 20:24:26 +00001112
1113 tl_assert(fn);
1114 tl_assert(nm);
1115 tl_assert(args);
1116 tl_assert(nargs >= 0 && nargs <= 2);
1117 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1118 || (MC_(clo_mc_level) == 2 && origin == NULL) );
1119
1120 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1121 VG_(fnptr_to_fnentry)( fn ), args );
sewardj95448072004-11-22 20:19:51 +00001122 di->guard = cond;
1123 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00001124 stmt( 'V', mce, IRStmt_Dirty(di));
sewardj95448072004-11-22 20:19:51 +00001125
1126 /* Set the shadow tmp to be defined. First, update the
1127 orig->shadow tmp mapping to reflect the fact that this shadow is
1128 getting a new value. */
sewardj710d6c22005-03-20 18:55:15 +00001129 tl_assert(isIRAtom(vatom));
sewardj95448072004-11-22 20:19:51 +00001130 /* sameKindedAtoms ... */
sewardj0b9d74a2006-12-24 02:24:11 +00001131 if (vatom->tag == Iex_RdTmp) {
1132 tl_assert(atom->tag == Iex_RdTmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00001133 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1134 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1135 definedOfType(ty));
sewardj95448072004-11-22 20:19:51 +00001136 }
1137}
1138
1139
1140/*------------------------------------------------------------*/
1141/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1142/*------------------------------------------------------------*/
1143
1144/* Examine the always-defined sections declared in layout to see if
1145 the (offset,size) section is within one. Note, is is an error to
1146 partially fall into such a region: (offset,size) should either be
1147 completely in such a region or completely not-in such a region.
1148*/
1149static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1150{
1151 Int minoffD, maxoffD, i;
1152 Int minoff = offset;
1153 Int maxoff = minoff + size - 1;
1154 tl_assert((minoff & ~0xFFFF) == 0);
1155 tl_assert((maxoff & ~0xFFFF) == 0);
1156
1157 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1158 minoffD = mce->layout->alwaysDefd[i].offset;
1159 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1160 tl_assert((minoffD & ~0xFFFF) == 0);
1161 tl_assert((maxoffD & ~0xFFFF) == 0);
1162
1163 if (maxoff < minoffD || maxoffD < minoff)
1164 continue; /* no overlap */
1165 if (minoff >= minoffD && maxoff <= maxoffD)
1166 return True; /* completely contained in an always-defd section */
1167
1168 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1169 }
1170 return False; /* could not find any containing section */
1171}
1172
1173
1174/* Generate into bb suitable actions to shadow this Put. If the state
1175 slice is marked 'always defined', do nothing. Otherwise, write the
1176 supplied V bits to the shadow state. We can pass in either an
1177 original atom or a V-atom, but not both. In the former case the
1178 relevant V-bits are then generated from the original.
1179*/
1180static
1181void do_shadow_PUT ( MCEnv* mce, Int offset,
1182 IRAtom* atom, IRAtom* vatom )
1183{
sewardj7cf97ee2004-11-28 14:25:01 +00001184 IRType ty;
njn1d0825f2006-03-27 11:37:07 +00001185
1186 // Don't do shadow PUTs if we're not doing undefined value checking.
1187 // Their absence lets Vex's optimiser remove all the shadow computation
1188 // that they depend on, which includes GETs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001189 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001190 return;
1191
sewardj95448072004-11-22 20:19:51 +00001192 if (atom) {
1193 tl_assert(!vatom);
1194 tl_assert(isOriginalAtom(mce, atom));
1195 vatom = expr2vbits( mce, atom );
1196 } else {
1197 tl_assert(vatom);
1198 tl_assert(isShadowAtom(mce, vatom));
1199 }
1200
sewardj1c0ce7a2009-07-01 08:10:49 +00001201 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001202 tl_assert(ty != Ity_I1);
1203 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1204 /* later: no ... */
1205 /* emit code to emit a complaint if any of the vbits are 1. */
1206 /* complainIfUndefined(mce, atom); */
1207 } else {
1208 /* Do a plain shadow Put. */
sewardj7cf4e6b2008-05-01 20:24:26 +00001209 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
sewardj95448072004-11-22 20:19:51 +00001210 }
1211}
1212
1213
1214/* Return an expression which contains the V bits corresponding to the
1215 given GETI (passed in in pieces).
1216*/
1217static
1218void do_shadow_PUTI ( MCEnv* mce,
sewardj0b9d74a2006-12-24 02:24:11 +00001219 IRRegArray* descr,
1220 IRAtom* ix, Int bias, IRAtom* atom )
sewardj95448072004-11-22 20:19:51 +00001221{
sewardj7cf97ee2004-11-28 14:25:01 +00001222 IRAtom* vatom;
1223 IRType ty, tyS;
1224 Int arrSize;;
1225
njn1d0825f2006-03-27 11:37:07 +00001226 // Don't do shadow PUTIs if we're not doing undefined value checking.
1227 // Their absence lets Vex's optimiser remove all the shadow computation
1228 // that they depend on, which includes GETIs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001229 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001230 return;
1231
sewardj95448072004-11-22 20:19:51 +00001232 tl_assert(isOriginalAtom(mce,atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001233 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001234 tl_assert(sameKindedAtoms(atom, vatom));
sewardj7cf97ee2004-11-28 14:25:01 +00001235 ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001236 tyS = shadowTypeV(ty);
sewardj7cf97ee2004-11-28 14:25:01 +00001237 arrSize = descr->nElems * sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001238 tl_assert(ty != Ity_I1);
1239 tl_assert(isOriginalAtom(mce,ix));
1240 complainIfUndefined(mce,ix);
1241 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1242 /* later: no ... */
1243 /* emit code to emit a complaint if any of the vbits are 1. */
1244 /* complainIfUndefined(mce, atom); */
1245 } else {
1246 /* Do a cloned version of the Put that refers to the shadow
1247 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001248 IRRegArray* new_descr
1249 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1250 tyS, descr->nElems);
sewardj7cf4e6b2008-05-01 20:24:26 +00001251 stmt( 'V', mce, IRStmt_PutI( new_descr, ix, bias, vatom ));
sewardj95448072004-11-22 20:19:51 +00001252 }
1253}
1254
1255
1256/* Return an expression which contains the V bits corresponding to the
1257 given GET (passed in in pieces).
1258*/
1259static
1260IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1261{
sewardj7cf4e6b2008-05-01 20:24:26 +00001262 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001263 tl_assert(ty != Ity_I1);
1264 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1265 /* Always defined, return all zeroes of the relevant type */
1266 return definedOfType(tyS);
1267 } else {
1268 /* return a cloned version of the Get that refers to the shadow
1269 area. */
sewardj7cf4e6b2008-05-01 20:24:26 +00001270 /* FIXME: this isn't an atom! */
sewardj95448072004-11-22 20:19:51 +00001271 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1272 }
1273}
1274
1275
1276/* Return an expression which contains the V bits corresponding to the
1277 given GETI (passed in in pieces).
1278*/
1279static
sewardj0b9d74a2006-12-24 02:24:11 +00001280IRExpr* shadow_GETI ( MCEnv* mce,
1281 IRRegArray* descr, IRAtom* ix, Int bias )
sewardj95448072004-11-22 20:19:51 +00001282{
1283 IRType ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001284 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001285 Int arrSize = descr->nElems * sizeofIRType(ty);
1286 tl_assert(ty != Ity_I1);
1287 tl_assert(isOriginalAtom(mce,ix));
1288 complainIfUndefined(mce,ix);
1289 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1290 /* Always defined, return all zeroes of the relevant type */
1291 return definedOfType(tyS);
1292 } else {
1293 /* return a cloned version of the Get that refers to the shadow
1294 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001295 IRRegArray* new_descr
1296 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1297 tyS, descr->nElems);
sewardj95448072004-11-22 20:19:51 +00001298 return IRExpr_GetI( new_descr, ix, bias );
1299 }
1300}
1301
1302
1303/*------------------------------------------------------------*/
1304/*--- Generating approximations for unknown operations, ---*/
1305/*--- using lazy-propagate semantics ---*/
1306/*------------------------------------------------------------*/
1307
1308/* Lazy propagation of undefinedness from two values, resulting in the
1309 specified shadow type.
1310*/
1311static
1312IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1313{
sewardj95448072004-11-22 20:19:51 +00001314 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001315 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1316 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
sewardj95448072004-11-22 20:19:51 +00001317 tl_assert(isShadowAtom(mce,va1));
1318 tl_assert(isShadowAtom(mce,va2));
sewardj37c31cc2005-04-26 23:49:24 +00001319
1320 /* The general case is inefficient because PCast is an expensive
1321 operation. Here are some special cases which use PCast only
1322 once rather than twice. */
1323
1324 /* I64 x I64 -> I64 */
1325 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1326 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1327 at = mkUifU(mce, Ity_I64, va1, va2);
1328 at = mkPCastTo(mce, Ity_I64, at);
1329 return at;
1330 }
1331
1332 /* I64 x I64 -> I32 */
1333 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1334 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1335 at = mkUifU(mce, Ity_I64, va1, va2);
1336 at = mkPCastTo(mce, Ity_I32, at);
1337 return at;
1338 }
1339
1340 if (0) {
1341 VG_(printf)("mkLazy2 ");
1342 ppIRType(t1);
1343 VG_(printf)("_");
1344 ppIRType(t2);
1345 VG_(printf)("_");
1346 ppIRType(finalVty);
1347 VG_(printf)("\n");
1348 }
1349
1350 /* General case: force everything via 32-bit intermediaries. */
sewardj95448072004-11-22 20:19:51 +00001351 at = mkPCastTo(mce, Ity_I32, va1);
1352 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1353 at = mkPCastTo(mce, finalVty, at);
1354 return at;
1355}
1356
1357
sewardjed69fdb2006-02-03 16:12:27 +00001358/* 3-arg version of the above. */
1359static
1360IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1361 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1362{
1363 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001364 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1365 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1366 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
sewardjed69fdb2006-02-03 16:12:27 +00001367 tl_assert(isShadowAtom(mce,va1));
1368 tl_assert(isShadowAtom(mce,va2));
1369 tl_assert(isShadowAtom(mce,va3));
1370
1371 /* The general case is inefficient because PCast is an expensive
1372 operation. Here are some special cases which use PCast only
1373 twice rather than three times. */
1374
1375 /* I32 x I64 x I64 -> I64 */
1376 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1377 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1378 && finalVty == Ity_I64) {
1379 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1380 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1381 mode indication which is fully defined, this should get
1382 folded out later. */
1383 at = mkPCastTo(mce, Ity_I64, va1);
1384 /* Now fold in 2nd and 3rd args. */
1385 at = mkUifU(mce, Ity_I64, at, va2);
1386 at = mkUifU(mce, Ity_I64, at, va3);
1387 /* and PCast once again. */
1388 at = mkPCastTo(mce, Ity_I64, at);
1389 return at;
1390 }
1391
sewardj453e8f82006-02-09 03:25:06 +00001392 /* I32 x I64 x I64 -> I32 */
1393 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1394 && finalVty == Ity_I32) {
1395 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1396 at = mkPCastTo(mce, Ity_I64, va1);
1397 at = mkUifU(mce, Ity_I64, at, va2);
1398 at = mkUifU(mce, Ity_I64, at, va3);
1399 at = mkPCastTo(mce, Ity_I32, at);
1400 return at;
1401 }
1402
1403 if (1) {
1404 VG_(printf)("mkLazy3: ");
sewardjed69fdb2006-02-03 16:12:27 +00001405 ppIRType(t1);
sewardj453e8f82006-02-09 03:25:06 +00001406 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001407 ppIRType(t2);
sewardj453e8f82006-02-09 03:25:06 +00001408 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001409 ppIRType(t3);
sewardj453e8f82006-02-09 03:25:06 +00001410 VG_(printf)(" -> ");
sewardjed69fdb2006-02-03 16:12:27 +00001411 ppIRType(finalVty);
1412 VG_(printf)("\n");
1413 }
1414
sewardj453e8f82006-02-09 03:25:06 +00001415 tl_assert(0);
sewardjed69fdb2006-02-03 16:12:27 +00001416 /* General case: force everything via 32-bit intermediaries. */
sewardj453e8f82006-02-09 03:25:06 +00001417 /*
sewardjed69fdb2006-02-03 16:12:27 +00001418 at = mkPCastTo(mce, Ity_I32, va1);
1419 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1420 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1421 at = mkPCastTo(mce, finalVty, at);
1422 return at;
sewardj453e8f82006-02-09 03:25:06 +00001423 */
sewardjed69fdb2006-02-03 16:12:27 +00001424}
1425
1426
sewardje91cea72006-02-08 19:32:02 +00001427/* 4-arg version of the above. */
1428static
1429IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1430 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1431{
1432 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001433 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1434 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1435 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1436 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
sewardje91cea72006-02-08 19:32:02 +00001437 tl_assert(isShadowAtom(mce,va1));
1438 tl_assert(isShadowAtom(mce,va2));
1439 tl_assert(isShadowAtom(mce,va3));
1440 tl_assert(isShadowAtom(mce,va4));
1441
1442 /* The general case is inefficient because PCast is an expensive
1443 operation. Here are some special cases which use PCast only
1444 twice rather than three times. */
1445
1446 /* I32 x I64 x I64 x I64 -> I64 */
1447 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1448 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1449 && finalVty == Ity_I64) {
1450 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1451 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1452 mode indication which is fully defined, this should get
1453 folded out later. */
1454 at = mkPCastTo(mce, Ity_I64, va1);
1455 /* Now fold in 2nd, 3rd, 4th args. */
1456 at = mkUifU(mce, Ity_I64, at, va2);
1457 at = mkUifU(mce, Ity_I64, at, va3);
1458 at = mkUifU(mce, Ity_I64, at, va4);
1459 /* and PCast once again. */
1460 at = mkPCastTo(mce, Ity_I64, at);
1461 return at;
1462 }
1463
1464 if (1) {
sewardj453e8f82006-02-09 03:25:06 +00001465 VG_(printf)("mkLazy4: ");
sewardje91cea72006-02-08 19:32:02 +00001466 ppIRType(t1);
1467 VG_(printf)(" x ");
1468 ppIRType(t2);
1469 VG_(printf)(" x ");
1470 ppIRType(t3);
1471 VG_(printf)(" x ");
1472 ppIRType(t4);
1473 VG_(printf)(" -> ");
1474 ppIRType(finalVty);
1475 VG_(printf)("\n");
1476 }
1477
1478 tl_assert(0);
1479}
1480
1481
sewardj95448072004-11-22 20:19:51 +00001482/* Do the lazy propagation game from a null-terminated vector of
1483 atoms. This is presumably the arguments to a helper call, so the
1484 IRCallee info is also supplied in order that we can know which
1485 arguments should be ignored (via the .mcx_mask field).
1486*/
1487static
1488IRAtom* mkLazyN ( MCEnv* mce,
1489 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1490{
sewardj4cc684b2007-08-25 23:09:36 +00001491 Int i;
sewardj95448072004-11-22 20:19:51 +00001492 IRAtom* here;
sewardj4cc684b2007-08-25 23:09:36 +00001493 IRAtom* curr;
1494 IRType mergeTy;
1495 IRType mergeTy64 = True;
1496
1497 /* Decide on the type of the merge intermediary. If all relevant
1498 args are I64, then it's I64. In all other circumstances, use
1499 I32. */
1500 for (i = 0; exprvec[i]; i++) {
1501 tl_assert(i < 32);
1502 tl_assert(isOriginalAtom(mce, exprvec[i]));
1503 if (cee->mcx_mask & (1<<i))
1504 continue;
sewardj1c0ce7a2009-07-01 08:10:49 +00001505 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
sewardj4cc684b2007-08-25 23:09:36 +00001506 mergeTy64 = False;
1507 }
1508
1509 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
1510 curr = definedOfType(mergeTy);
1511
sewardj95448072004-11-22 20:19:51 +00001512 for (i = 0; exprvec[i]; i++) {
1513 tl_assert(i < 32);
1514 tl_assert(isOriginalAtom(mce, exprvec[i]));
1515 /* Only take notice of this arg if the callee's mc-exclusion
1516 mask does not say it is to be excluded. */
1517 if (cee->mcx_mask & (1<<i)) {
1518 /* the arg is to be excluded from definedness checking. Do
1519 nothing. */
1520 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1521 } else {
1522 /* calculate the arg's definedness, and pessimistically merge
1523 it in. */
sewardj4cc684b2007-08-25 23:09:36 +00001524 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1525 curr = mergeTy64
1526 ? mkUifU64(mce, here, curr)
1527 : mkUifU32(mce, here, curr);
sewardj95448072004-11-22 20:19:51 +00001528 }
1529 }
1530 return mkPCastTo(mce, finalVtype, curr );
1531}
1532
1533
1534/*------------------------------------------------------------*/
1535/*--- Generating expensive sequences for exact carry-chain ---*/
1536/*--- propagation in add/sub and related operations. ---*/
1537/*------------------------------------------------------------*/
1538
1539static
sewardjd5204dc2004-12-31 01:16:11 +00001540IRAtom* expensiveAddSub ( MCEnv* mce,
1541 Bool add,
1542 IRType ty,
1543 IRAtom* qaa, IRAtom* qbb,
1544 IRAtom* aa, IRAtom* bb )
sewardj95448072004-11-22 20:19:51 +00001545{
sewardj7cf97ee2004-11-28 14:25:01 +00001546 IRAtom *a_min, *b_min, *a_max, *b_max;
sewardjd5204dc2004-12-31 01:16:11 +00001547 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
sewardj7cf97ee2004-11-28 14:25:01 +00001548
sewardj95448072004-11-22 20:19:51 +00001549 tl_assert(isShadowAtom(mce,qaa));
1550 tl_assert(isShadowAtom(mce,qbb));
1551 tl_assert(isOriginalAtom(mce,aa));
1552 tl_assert(isOriginalAtom(mce,bb));
1553 tl_assert(sameKindedAtoms(qaa,aa));
1554 tl_assert(sameKindedAtoms(qbb,bb));
1555
sewardjd5204dc2004-12-31 01:16:11 +00001556 switch (ty) {
1557 case Ity_I32:
1558 opAND = Iop_And32;
1559 opOR = Iop_Or32;
1560 opXOR = Iop_Xor32;
1561 opNOT = Iop_Not32;
1562 opADD = Iop_Add32;
1563 opSUB = Iop_Sub32;
1564 break;
tomd9774d72005-06-27 08:11:01 +00001565 case Ity_I64:
1566 opAND = Iop_And64;
1567 opOR = Iop_Or64;
1568 opXOR = Iop_Xor64;
1569 opNOT = Iop_Not64;
1570 opADD = Iop_Add64;
1571 opSUB = Iop_Sub64;
1572 break;
sewardjd5204dc2004-12-31 01:16:11 +00001573 default:
1574 VG_(tool_panic)("expensiveAddSub");
1575 }
sewardj95448072004-11-22 20:19:51 +00001576
1577 // a_min = aa & ~qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001578 a_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001579 binop(opAND, aa,
sewardj7cf4e6b2008-05-01 20:24:26 +00001580 assignNew('V', mce,ty, unop(opNOT, qaa))));
sewardj95448072004-11-22 20:19:51 +00001581
1582 // b_min = bb & ~qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001583 b_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001584 binop(opAND, bb,
sewardj7cf4e6b2008-05-01 20:24:26 +00001585 assignNew('V', mce,ty, unop(opNOT, qbb))));
sewardj95448072004-11-22 20:19:51 +00001586
1587 // a_max = aa | qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001588 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
sewardj95448072004-11-22 20:19:51 +00001589
1590 // b_max = bb | qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001591 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
sewardj95448072004-11-22 20:19:51 +00001592
sewardjd5204dc2004-12-31 01:16:11 +00001593 if (add) {
1594 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1595 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001596 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001597 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001598 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1599 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001600 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001601 assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1602 assignNew('V', mce,ty, binop(opADD, a_max, b_max))
sewardjd5204dc2004-12-31 01:16:11 +00001603 )
sewardj95448072004-11-22 20:19:51 +00001604 )
sewardjd5204dc2004-12-31 01:16:11 +00001605 )
1606 );
1607 } else {
1608 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1609 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001610 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001611 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001612 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1613 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001614 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001615 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1616 assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
sewardjd5204dc2004-12-31 01:16:11 +00001617 )
1618 )
1619 )
1620 );
1621 }
1622
sewardj95448072004-11-22 20:19:51 +00001623}
1624
1625
1626/*------------------------------------------------------------*/
sewardjaaddbc22005-10-07 09:49:53 +00001627/*--- Scalar shifts. ---*/
1628/*------------------------------------------------------------*/
1629
1630/* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
1631 idea is to shift the definedness bits by the original shift amount.
1632 This introduces 0s ("defined") in new positions for left shifts and
1633 unsigned right shifts, and copies the top definedness bit for
1634 signed right shifts. So, conveniently, applying the original shift
1635 operator to the definedness bits for the left arg is exactly the
1636 right thing to do:
1637
1638 (qaa << bb)
1639
1640 However if the shift amount is undefined then the whole result
1641 is undefined. Hence need:
1642
1643 (qaa << bb) `UifU` PCast(qbb)
1644
1645 If the shift amount bb is a literal than qbb will say 'all defined'
1646 and the UifU and PCast will get folded out by post-instrumentation
1647 optimisation.
1648*/
1649static IRAtom* scalarShift ( MCEnv* mce,
1650 IRType ty,
1651 IROp original_op,
1652 IRAtom* qaa, IRAtom* qbb,
1653 IRAtom* aa, IRAtom* bb )
1654{
1655 tl_assert(isShadowAtom(mce,qaa));
1656 tl_assert(isShadowAtom(mce,qbb));
1657 tl_assert(isOriginalAtom(mce,aa));
1658 tl_assert(isOriginalAtom(mce,bb));
1659 tl_assert(sameKindedAtoms(qaa,aa));
1660 tl_assert(sameKindedAtoms(qbb,bb));
1661 return
1662 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001663 'V', mce, ty,
sewardjaaddbc22005-10-07 09:49:53 +00001664 mkUifU( mce, ty,
sewardj7cf4e6b2008-05-01 20:24:26 +00001665 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
sewardjaaddbc22005-10-07 09:49:53 +00001666 mkPCastTo(mce, ty, qbb)
1667 )
1668 );
1669}
1670
1671
1672/*------------------------------------------------------------*/
1673/*--- Helpers for dealing with vector primops. ---*/
sewardj3245c912004-12-10 14:58:26 +00001674/*------------------------------------------------------------*/
1675
sewardja1d93302004-12-12 16:45:06 +00001676/* Vector pessimisation -- pessimise within each lane individually. */
1677
1678static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1679{
sewardj7cf4e6b2008-05-01 20:24:26 +00001680 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
sewardja1d93302004-12-12 16:45:06 +00001681}
1682
1683static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1684{
sewardj7cf4e6b2008-05-01 20:24:26 +00001685 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
sewardja1d93302004-12-12 16:45:06 +00001686}
1687
1688static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1689{
sewardj7cf4e6b2008-05-01 20:24:26 +00001690 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
sewardja1d93302004-12-12 16:45:06 +00001691}
1692
1693static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1694{
sewardj7cf4e6b2008-05-01 20:24:26 +00001695 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
sewardja1d93302004-12-12 16:45:06 +00001696}
1697
sewardjacd2e912005-01-13 19:17:06 +00001698static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1699{
sewardj7cf4e6b2008-05-01 20:24:26 +00001700 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
sewardjacd2e912005-01-13 19:17:06 +00001701}
1702
1703static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1704{
sewardj7cf4e6b2008-05-01 20:24:26 +00001705 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
sewardjacd2e912005-01-13 19:17:06 +00001706}
1707
1708static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1709{
sewardj7cf4e6b2008-05-01 20:24:26 +00001710 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
sewardjacd2e912005-01-13 19:17:06 +00001711}
1712
sewardja1d93302004-12-12 16:45:06 +00001713
sewardj3245c912004-12-10 14:58:26 +00001714/* Here's a simple scheme capable of handling ops derived from SSE1
1715 code and while only generating ops that can be efficiently
1716 implemented in SSE1. */
1717
1718/* All-lanes versions are straightforward:
1719
sewardj20d38f22005-02-07 23:50:18 +00001720 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
sewardj3245c912004-12-10 14:58:26 +00001721
1722 unary32Fx4(x,y) ==> PCast32x4(x#)
1723
1724 Lowest-lane-only versions are more complex:
1725
sewardj20d38f22005-02-07 23:50:18 +00001726 binary32F0x4(x,y) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001727 x#,
sewardj20d38f22005-02-07 23:50:18 +00001728 PCast32(V128to32(UifUV128(x#,y#)))
sewardj3245c912004-12-10 14:58:26 +00001729 )
1730
1731 This is perhaps not so obvious. In particular, it's faster to
sewardj20d38f22005-02-07 23:50:18 +00001732 do a V128-bit UifU and then take the bottom 32 bits than the more
sewardj3245c912004-12-10 14:58:26 +00001733 obvious scheme of taking the bottom 32 bits of each operand
1734 and doing a 32-bit UifU. Basically since UifU is fast and
1735 chopping lanes off vector values is slow.
1736
1737 Finally:
1738
sewardj20d38f22005-02-07 23:50:18 +00001739 unary32F0x4(x) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001740 x#,
sewardj20d38f22005-02-07 23:50:18 +00001741 PCast32(V128to32(x#))
sewardj3245c912004-12-10 14:58:26 +00001742 )
1743
1744 Where:
1745
1746 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
1747 PCast32x4(v#) = CmpNEZ32x4(v#)
1748*/
1749
1750static
1751IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1752{
1753 IRAtom* at;
1754 tl_assert(isShadowAtom(mce, vatomX));
1755 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001756 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001757 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
sewardj3245c912004-12-10 14:58:26 +00001758 return at;
1759}
1760
1761static
1762IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1763{
1764 IRAtom* at;
1765 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001766 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001767 return at;
1768}
1769
1770static
1771IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1772{
1773 IRAtom* at;
1774 tl_assert(isShadowAtom(mce, vatomX));
1775 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001776 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001777 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
sewardj3245c912004-12-10 14:58:26 +00001778 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001779 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001780 return at;
1781}
1782
1783static
1784IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1785{
1786 IRAtom* at;
1787 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001788 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001789 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001790 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001791 return at;
1792}
1793
sewardj0b070592004-12-10 21:44:22 +00001794/* --- ... and ... 64Fx2 versions of the same ... --- */
1795
1796static
1797IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1798{
1799 IRAtom* at;
1800 tl_assert(isShadowAtom(mce, vatomX));
1801 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001802 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001803 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
sewardj0b070592004-12-10 21:44:22 +00001804 return at;
1805}
1806
1807static
1808IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1809{
1810 IRAtom* at;
1811 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001812 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001813 return at;
1814}
1815
1816static
1817IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1818{
1819 IRAtom* at;
1820 tl_assert(isShadowAtom(mce, vatomX));
1821 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001822 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001823 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
sewardj0b070592004-12-10 21:44:22 +00001824 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001825 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001826 return at;
1827}
1828
1829static
1830IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1831{
1832 IRAtom* at;
1833 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001834 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001835 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001836 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001837 return at;
1838}
1839
sewardja1d93302004-12-12 16:45:06 +00001840/* --- --- Vector saturated narrowing --- --- */
1841
1842/* This is quite subtle. What to do is simple:
1843
1844 Let the original narrowing op be QNarrowW{S,U}xN. Produce:
1845
1846 the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1847
1848 Why this is right is not so simple. Consider a lane in the args,
1849 vatom1 or 2, doesn't matter.
1850
1851 After the PCast, that lane is all 0s (defined) or all
1852 1s(undefined).
1853
1854 Both signed and unsigned saturating narrowing of all 0s produces
1855 all 0s, which is what we want.
1856
1857 The all-1s case is more complex. Unsigned narrowing interprets an
1858 all-1s input as the largest unsigned integer, and so produces all
1859 1s as a result since that is the largest unsigned value at the
1860 smaller width.
1861
1862 Signed narrowing interprets all 1s as -1. Fortunately, -1 narrows
1863 to -1, so we still wind up with all 1s at the smaller width.
1864
1865 So: In short, pessimise the args, then apply the original narrowing
1866 op.
1867*/
1868static
sewardj20d38f22005-02-07 23:50:18 +00001869IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
sewardja1d93302004-12-12 16:45:06 +00001870 IRAtom* vatom1, IRAtom* vatom2)
1871{
1872 IRAtom *at1, *at2, *at3;
1873 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1874 switch (narrow_op) {
1875 case Iop_QNarrow32Sx4: pcast = mkPCast32x4; break;
sewardj43d60752005-11-10 18:13:01 +00001876 case Iop_QNarrow32Ux4: pcast = mkPCast32x4; break;
sewardja1d93302004-12-12 16:45:06 +00001877 case Iop_QNarrow16Sx8: pcast = mkPCast16x8; break;
1878 case Iop_QNarrow16Ux8: pcast = mkPCast16x8; break;
sewardj20d38f22005-02-07 23:50:18 +00001879 default: VG_(tool_panic)("vectorNarrowV128");
sewardja1d93302004-12-12 16:45:06 +00001880 }
1881 tl_assert(isShadowAtom(mce,vatom1));
1882 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00001883 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
1884 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
1885 at3 = assignNew('V', mce, Ity_V128, binop(narrow_op, at1, at2));
sewardja1d93302004-12-12 16:45:06 +00001886 return at3;
1887}
1888
sewardjacd2e912005-01-13 19:17:06 +00001889static
1890IRAtom* vectorNarrow64 ( MCEnv* mce, IROp narrow_op,
1891 IRAtom* vatom1, IRAtom* vatom2)
1892{
1893 IRAtom *at1, *at2, *at3;
1894 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1895 switch (narrow_op) {
1896 case Iop_QNarrow32Sx2: pcast = mkPCast32x2; break;
1897 case Iop_QNarrow16Sx4: pcast = mkPCast16x4; break;
1898 case Iop_QNarrow16Ux4: pcast = mkPCast16x4; break;
1899 default: VG_(tool_panic)("vectorNarrow64");
1900 }
1901 tl_assert(isShadowAtom(mce,vatom1));
1902 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00001903 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
1904 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
1905 at3 = assignNew('V', mce, Ity_I64, binop(narrow_op, at1, at2));
sewardjacd2e912005-01-13 19:17:06 +00001906 return at3;
1907}
1908
sewardja1d93302004-12-12 16:45:06 +00001909
1910/* --- --- Vector integer arithmetic --- --- */
1911
1912/* Simple ... UifU the args and per-lane pessimise the results. */
sewardjacd2e912005-01-13 19:17:06 +00001913
sewardj20d38f22005-02-07 23:50:18 +00001914/* --- V128-bit versions --- */
sewardjacd2e912005-01-13 19:17:06 +00001915
sewardja1d93302004-12-12 16:45:06 +00001916static
1917IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1918{
1919 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001920 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001921 at = mkPCast8x16(mce, at);
1922 return at;
1923}
1924
1925static
1926IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1927{
1928 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001929 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001930 at = mkPCast16x8(mce, at);
1931 return at;
1932}
1933
1934static
1935IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1936{
1937 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001938 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001939 at = mkPCast32x4(mce, at);
1940 return at;
1941}
1942
1943static
1944IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1945{
1946 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001947 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001948 at = mkPCast64x2(mce, at);
1949 return at;
1950}
sewardj3245c912004-12-10 14:58:26 +00001951
sewardjacd2e912005-01-13 19:17:06 +00001952/* --- 64-bit versions --- */
1953
1954static
1955IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1956{
1957 IRAtom* at;
1958 at = mkUifU64(mce, vatom1, vatom2);
1959 at = mkPCast8x8(mce, at);
1960 return at;
1961}
1962
1963static
1964IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1965{
1966 IRAtom* at;
1967 at = mkUifU64(mce, vatom1, vatom2);
1968 at = mkPCast16x4(mce, at);
1969 return at;
1970}
1971
1972static
1973IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1974{
1975 IRAtom* at;
1976 at = mkUifU64(mce, vatom1, vatom2);
1977 at = mkPCast32x2(mce, at);
1978 return at;
1979}
1980
sewardj3245c912004-12-10 14:58:26 +00001981
1982/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00001983/*--- Generate shadow values from all kinds of IRExprs. ---*/
1984/*------------------------------------------------------------*/
1985
1986static
sewardje91cea72006-02-08 19:32:02 +00001987IRAtom* expr2vbits_Qop ( MCEnv* mce,
1988 IROp op,
1989 IRAtom* atom1, IRAtom* atom2,
1990 IRAtom* atom3, IRAtom* atom4 )
1991{
1992 IRAtom* vatom1 = expr2vbits( mce, atom1 );
1993 IRAtom* vatom2 = expr2vbits( mce, atom2 );
1994 IRAtom* vatom3 = expr2vbits( mce, atom3 );
1995 IRAtom* vatom4 = expr2vbits( mce, atom4 );
1996
1997 tl_assert(isOriginalAtom(mce,atom1));
1998 tl_assert(isOriginalAtom(mce,atom2));
1999 tl_assert(isOriginalAtom(mce,atom3));
2000 tl_assert(isOriginalAtom(mce,atom4));
2001 tl_assert(isShadowAtom(mce,vatom1));
2002 tl_assert(isShadowAtom(mce,vatom2));
2003 tl_assert(isShadowAtom(mce,vatom3));
2004 tl_assert(isShadowAtom(mce,vatom4));
2005 tl_assert(sameKindedAtoms(atom1,vatom1));
2006 tl_assert(sameKindedAtoms(atom2,vatom2));
2007 tl_assert(sameKindedAtoms(atom3,vatom3));
2008 tl_assert(sameKindedAtoms(atom4,vatom4));
2009 switch (op) {
2010 case Iop_MAddF64:
2011 case Iop_MAddF64r32:
2012 case Iop_MSubF64:
2013 case Iop_MSubF64r32:
2014 /* I32(rm) x F64 x F64 x F64 -> F64 */
2015 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
2016 default:
2017 ppIROp(op);
2018 VG_(tool_panic)("memcheck:expr2vbits_Qop");
2019 }
2020}
2021
2022
2023static
sewardjed69fdb2006-02-03 16:12:27 +00002024IRAtom* expr2vbits_Triop ( MCEnv* mce,
2025 IROp op,
2026 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2027{
sewardjed69fdb2006-02-03 16:12:27 +00002028 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2029 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2030 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2031
2032 tl_assert(isOriginalAtom(mce,atom1));
2033 tl_assert(isOriginalAtom(mce,atom2));
2034 tl_assert(isOriginalAtom(mce,atom3));
2035 tl_assert(isShadowAtom(mce,vatom1));
2036 tl_assert(isShadowAtom(mce,vatom2));
2037 tl_assert(isShadowAtom(mce,vatom3));
2038 tl_assert(sameKindedAtoms(atom1,vatom1));
2039 tl_assert(sameKindedAtoms(atom2,vatom2));
2040 tl_assert(sameKindedAtoms(atom3,vatom3));
2041 switch (op) {
2042 case Iop_AddF64:
2043 case Iop_AddF64r32:
2044 case Iop_SubF64:
2045 case Iop_SubF64r32:
2046 case Iop_MulF64:
2047 case Iop_MulF64r32:
2048 case Iop_DivF64:
2049 case Iop_DivF64r32:
sewardj22ac5f42006-02-03 22:55:04 +00002050 case Iop_ScaleF64:
2051 case Iop_Yl2xF64:
2052 case Iop_Yl2xp1F64:
2053 case Iop_AtanF64:
sewardjd6075eb2006-02-04 15:25:23 +00002054 case Iop_PRemF64:
2055 case Iop_PRem1F64:
sewardj22ac5f42006-02-03 22:55:04 +00002056 /* I32(rm) x F64 x F64 -> F64 */
sewardjed69fdb2006-02-03 16:12:27 +00002057 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
sewardjd6075eb2006-02-04 15:25:23 +00002058 case Iop_PRemC3210F64:
2059 case Iop_PRem1C3210F64:
2060 /* I32(rm) x F64 x F64 -> I32 */
2061 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardjed69fdb2006-02-03 16:12:27 +00002062 default:
2063 ppIROp(op);
2064 VG_(tool_panic)("memcheck:expr2vbits_Triop");
2065 }
2066}
2067
2068
2069static
sewardj95448072004-11-22 20:19:51 +00002070IRAtom* expr2vbits_Binop ( MCEnv* mce,
2071 IROp op,
2072 IRAtom* atom1, IRAtom* atom2 )
2073{
2074 IRType and_or_ty;
2075 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
2076 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
2077 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2078
2079 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2080 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2081
2082 tl_assert(isOriginalAtom(mce,atom1));
2083 tl_assert(isOriginalAtom(mce,atom2));
2084 tl_assert(isShadowAtom(mce,vatom1));
2085 tl_assert(isShadowAtom(mce,vatom2));
2086 tl_assert(sameKindedAtoms(atom1,vatom1));
2087 tl_assert(sameKindedAtoms(atom2,vatom2));
2088 switch (op) {
2089
sewardjacd2e912005-01-13 19:17:06 +00002090 /* 64-bit SIMD */
2091
2092 case Iop_ShrN16x4:
2093 case Iop_ShrN32x2:
sewardj03809ae2006-12-27 01:16:58 +00002094 case Iop_SarN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002095 case Iop_SarN16x4:
2096 case Iop_SarN32x2:
2097 case Iop_ShlN16x4:
2098 case Iop_ShlN32x2:
sewardj114a9172008-02-09 01:49:32 +00002099 case Iop_ShlN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002100 /* Same scheme as with all other shifts. */
2101 complainIfUndefined(mce, atom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002102 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
sewardjacd2e912005-01-13 19:17:06 +00002103
2104 case Iop_QNarrow32Sx2:
2105 case Iop_QNarrow16Sx4:
2106 case Iop_QNarrow16Ux4:
2107 return vectorNarrow64(mce, op, vatom1, vatom2);
2108
2109 case Iop_Min8Ux8:
2110 case Iop_Max8Ux8:
2111 case Iop_Avg8Ux8:
2112 case Iop_QSub8Sx8:
2113 case Iop_QSub8Ux8:
2114 case Iop_Sub8x8:
2115 case Iop_CmpGT8Sx8:
2116 case Iop_CmpEQ8x8:
2117 case Iop_QAdd8Sx8:
2118 case Iop_QAdd8Ux8:
2119 case Iop_Add8x8:
2120 return binary8Ix8(mce, vatom1, vatom2);
2121
2122 case Iop_Min16Sx4:
2123 case Iop_Max16Sx4:
2124 case Iop_Avg16Ux4:
2125 case Iop_QSub16Ux4:
2126 case Iop_QSub16Sx4:
2127 case Iop_Sub16x4:
2128 case Iop_Mul16x4:
2129 case Iop_MulHi16Sx4:
2130 case Iop_MulHi16Ux4:
2131 case Iop_CmpGT16Sx4:
2132 case Iop_CmpEQ16x4:
2133 case Iop_QAdd16Sx4:
2134 case Iop_QAdd16Ux4:
2135 case Iop_Add16x4:
2136 return binary16Ix4(mce, vatom1, vatom2);
2137
2138 case Iop_Sub32x2:
sewardj114a9172008-02-09 01:49:32 +00002139 case Iop_Mul32x2:
sewardjacd2e912005-01-13 19:17:06 +00002140 case Iop_CmpGT32Sx2:
2141 case Iop_CmpEQ32x2:
2142 case Iop_Add32x2:
2143 return binary32Ix2(mce, vatom1, vatom2);
2144
2145 /* 64-bit data-steering */
2146 case Iop_InterleaveLO32x2:
2147 case Iop_InterleaveLO16x4:
2148 case Iop_InterleaveLO8x8:
2149 case Iop_InterleaveHI32x2:
2150 case Iop_InterleaveHI16x4:
2151 case Iop_InterleaveHI8x8:
sewardj114a9172008-02-09 01:49:32 +00002152 case Iop_CatOddLanes16x4:
2153 case Iop_CatEvenLanes16x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00002154 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardjacd2e912005-01-13 19:17:06 +00002155
sewardj114a9172008-02-09 01:49:32 +00002156 /* Perm8x8: rearrange values in left arg using steering values
2157 from right arg. So rearrange the vbits in the same way but
2158 pessimise wrt steering values. */
2159 case Iop_Perm8x8:
2160 return mkUifU64(
2161 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002162 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
sewardj114a9172008-02-09 01:49:32 +00002163 mkPCast8x8(mce, vatom2)
2164 );
2165
sewardj20d38f22005-02-07 23:50:18 +00002166 /* V128-bit SIMD */
sewardj0b070592004-12-10 21:44:22 +00002167
sewardja1d93302004-12-12 16:45:06 +00002168 case Iop_ShrN16x8:
2169 case Iop_ShrN32x4:
2170 case Iop_ShrN64x2:
2171 case Iop_SarN16x8:
2172 case Iop_SarN32x4:
2173 case Iop_ShlN16x8:
2174 case Iop_ShlN32x4:
2175 case Iop_ShlN64x2:
sewardj620eb5b2005-10-22 12:50:43 +00002176 case Iop_ShlN8x16:
2177 case Iop_SarN8x16:
2178 /* Same scheme as with all other shifts. Note: 22 Oct 05:
2179 this is wrong now, scalar shifts are done properly lazily.
2180 Vector shifts should be fixed too. */
sewardja1d93302004-12-12 16:45:06 +00002181 complainIfUndefined(mce, atom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002182 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardja1d93302004-12-12 16:45:06 +00002183
sewardjcbf8be72005-11-10 18:34:41 +00002184 /* V x V shifts/rotates are done using the standard lazy scheme. */
sewardj43d60752005-11-10 18:13:01 +00002185 case Iop_Shl8x16:
2186 case Iop_Shr8x16:
2187 case Iop_Sar8x16:
sewardjcbf8be72005-11-10 18:34:41 +00002188 case Iop_Rol8x16:
sewardj43d60752005-11-10 18:13:01 +00002189 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002190 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002191 mkPCast8x16(mce,vatom2)
2192 );
2193
2194 case Iop_Shl16x8:
2195 case Iop_Shr16x8:
2196 case Iop_Sar16x8:
sewardjcbf8be72005-11-10 18:34:41 +00002197 case Iop_Rol16x8:
sewardj43d60752005-11-10 18:13:01 +00002198 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002199 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002200 mkPCast16x8(mce,vatom2)
2201 );
2202
2203 case Iop_Shl32x4:
2204 case Iop_Shr32x4:
2205 case Iop_Sar32x4:
sewardjcbf8be72005-11-10 18:34:41 +00002206 case Iop_Rol32x4:
sewardj43d60752005-11-10 18:13:01 +00002207 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002208 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002209 mkPCast32x4(mce,vatom2)
2210 );
2211
sewardja1d93302004-12-12 16:45:06 +00002212 case Iop_QSub8Ux16:
2213 case Iop_QSub8Sx16:
2214 case Iop_Sub8x16:
2215 case Iop_Min8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002216 case Iop_Min8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002217 case Iop_Max8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002218 case Iop_Max8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002219 case Iop_CmpGT8Sx16:
sewardj43d60752005-11-10 18:13:01 +00002220 case Iop_CmpGT8Ux16:
sewardja1d93302004-12-12 16:45:06 +00002221 case Iop_CmpEQ8x16:
2222 case Iop_Avg8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002223 case Iop_Avg8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002224 case Iop_QAdd8Ux16:
2225 case Iop_QAdd8Sx16:
2226 case Iop_Add8x16:
2227 return binary8Ix16(mce, vatom1, vatom2);
2228
2229 case Iop_QSub16Ux8:
2230 case Iop_QSub16Sx8:
2231 case Iop_Sub16x8:
2232 case Iop_Mul16x8:
2233 case Iop_MulHi16Sx8:
2234 case Iop_MulHi16Ux8:
2235 case Iop_Min16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002236 case Iop_Min16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002237 case Iop_Max16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002238 case Iop_Max16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002239 case Iop_CmpGT16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002240 case Iop_CmpGT16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002241 case Iop_CmpEQ16x8:
2242 case Iop_Avg16Ux8:
sewardj43d60752005-11-10 18:13:01 +00002243 case Iop_Avg16Sx8:
sewardja1d93302004-12-12 16:45:06 +00002244 case Iop_QAdd16Ux8:
2245 case Iop_QAdd16Sx8:
2246 case Iop_Add16x8:
2247 return binary16Ix8(mce, vatom1, vatom2);
2248
2249 case Iop_Sub32x4:
2250 case Iop_CmpGT32Sx4:
sewardj43d60752005-11-10 18:13:01 +00002251 case Iop_CmpGT32Ux4:
sewardja1d93302004-12-12 16:45:06 +00002252 case Iop_CmpEQ32x4:
sewardj43d60752005-11-10 18:13:01 +00002253 case Iop_QAdd32Sx4:
2254 case Iop_QAdd32Ux4:
2255 case Iop_QSub32Sx4:
2256 case Iop_QSub32Ux4:
2257 case Iop_Avg32Ux4:
2258 case Iop_Avg32Sx4:
sewardja1d93302004-12-12 16:45:06 +00002259 case Iop_Add32x4:
sewardj43d60752005-11-10 18:13:01 +00002260 case Iop_Max32Ux4:
2261 case Iop_Max32Sx4:
2262 case Iop_Min32Ux4:
2263 case Iop_Min32Sx4:
sewardja1d93302004-12-12 16:45:06 +00002264 return binary32Ix4(mce, vatom1, vatom2);
2265
2266 case Iop_Sub64x2:
2267 case Iop_Add64x2:
2268 return binary64Ix2(mce, vatom1, vatom2);
2269
2270 case Iop_QNarrow32Sx4:
sewardj43d60752005-11-10 18:13:01 +00002271 case Iop_QNarrow32Ux4:
sewardja1d93302004-12-12 16:45:06 +00002272 case Iop_QNarrow16Sx8:
2273 case Iop_QNarrow16Ux8:
sewardj20d38f22005-02-07 23:50:18 +00002274 return vectorNarrowV128(mce, op, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002275
sewardj0b070592004-12-10 21:44:22 +00002276 case Iop_Sub64Fx2:
2277 case Iop_Mul64Fx2:
2278 case Iop_Min64Fx2:
2279 case Iop_Max64Fx2:
2280 case Iop_Div64Fx2:
2281 case Iop_CmpLT64Fx2:
2282 case Iop_CmpLE64Fx2:
2283 case Iop_CmpEQ64Fx2:
sewardj545663e2005-11-05 01:55:04 +00002284 case Iop_CmpUN64Fx2:
sewardj0b070592004-12-10 21:44:22 +00002285 case Iop_Add64Fx2:
2286 return binary64Fx2(mce, vatom1, vatom2);
2287
2288 case Iop_Sub64F0x2:
2289 case Iop_Mul64F0x2:
2290 case Iop_Min64F0x2:
2291 case Iop_Max64F0x2:
2292 case Iop_Div64F0x2:
2293 case Iop_CmpLT64F0x2:
2294 case Iop_CmpLE64F0x2:
2295 case Iop_CmpEQ64F0x2:
sewardj545663e2005-11-05 01:55:04 +00002296 case Iop_CmpUN64F0x2:
sewardj0b070592004-12-10 21:44:22 +00002297 case Iop_Add64F0x2:
2298 return binary64F0x2(mce, vatom1, vatom2);
2299
sewardj170ee212004-12-10 18:57:51 +00002300 case Iop_Sub32Fx4:
2301 case Iop_Mul32Fx4:
2302 case Iop_Min32Fx4:
2303 case Iop_Max32Fx4:
2304 case Iop_Div32Fx4:
2305 case Iop_CmpLT32Fx4:
2306 case Iop_CmpLE32Fx4:
2307 case Iop_CmpEQ32Fx4:
sewardj545663e2005-11-05 01:55:04 +00002308 case Iop_CmpUN32Fx4:
cerione78ba2a2005-11-14 03:00:35 +00002309 case Iop_CmpGT32Fx4:
2310 case Iop_CmpGE32Fx4:
sewardj3245c912004-12-10 14:58:26 +00002311 case Iop_Add32Fx4:
2312 return binary32Fx4(mce, vatom1, vatom2);
2313
sewardj170ee212004-12-10 18:57:51 +00002314 case Iop_Sub32F0x4:
2315 case Iop_Mul32F0x4:
2316 case Iop_Min32F0x4:
2317 case Iop_Max32F0x4:
2318 case Iop_Div32F0x4:
2319 case Iop_CmpLT32F0x4:
2320 case Iop_CmpLE32F0x4:
2321 case Iop_CmpEQ32F0x4:
sewardj545663e2005-11-05 01:55:04 +00002322 case Iop_CmpUN32F0x4:
sewardj170ee212004-12-10 18:57:51 +00002323 case Iop_Add32F0x4:
2324 return binary32F0x4(mce, vatom1, vatom2);
2325
sewardj20d38f22005-02-07 23:50:18 +00002326 /* V128-bit data-steering */
2327 case Iop_SetV128lo32:
2328 case Iop_SetV128lo64:
2329 case Iop_64HLtoV128:
sewardja1d93302004-12-12 16:45:06 +00002330 case Iop_InterleaveLO64x2:
2331 case Iop_InterleaveLO32x4:
2332 case Iop_InterleaveLO16x8:
2333 case Iop_InterleaveLO8x16:
2334 case Iop_InterleaveHI64x2:
2335 case Iop_InterleaveHI32x4:
2336 case Iop_InterleaveHI16x8:
2337 case Iop_InterleaveHI8x16:
sewardj7cf4e6b2008-05-01 20:24:26 +00002338 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
sewardj620eb5b2005-10-22 12:50:43 +00002339
2340 /* Perm8x16: rearrange values in left arg using steering values
2341 from right arg. So rearrange the vbits in the same way but
2342 pessimise wrt steering values. */
2343 case Iop_Perm8x16:
2344 return mkUifUV128(
2345 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002346 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj620eb5b2005-10-22 12:50:43 +00002347 mkPCast8x16(mce, vatom2)
2348 );
sewardj170ee212004-12-10 18:57:51 +00002349
sewardj43d60752005-11-10 18:13:01 +00002350 /* These two take the lower half of each 16-bit lane, sign/zero
2351 extend it to 32, and multiply together, producing a 32x4
2352 result (and implicitly ignoring half the operand bits). So
2353 treat it as a bunch of independent 16x8 operations, but then
2354 do 32-bit shifts left-right to copy the lower half results
2355 (which are all 0s or all 1s due to PCasting in binary16Ix8)
2356 into the upper half of each result lane. */
2357 case Iop_MullEven16Ux8:
2358 case Iop_MullEven16Sx8: {
2359 IRAtom* at;
2360 at = binary16Ix8(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002361 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
2362 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
sewardj43d60752005-11-10 18:13:01 +00002363 return at;
2364 }
2365
2366 /* Same deal as Iop_MullEven16{S,U}x8 */
2367 case Iop_MullEven8Ux16:
2368 case Iop_MullEven8Sx16: {
2369 IRAtom* at;
2370 at = binary8Ix16(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002371 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
2372 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
sewardj43d60752005-11-10 18:13:01 +00002373 return at;
2374 }
2375
2376 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
2377 32x4 -> 16x8 laneage, discarding the upper half of each lane.
2378 Simply apply same op to the V bits, since this really no more
2379 than a data steering operation. */
sewardjcbf8be72005-11-10 18:34:41 +00002380 case Iop_Narrow32x4:
2381 case Iop_Narrow16x8:
sewardj7cf4e6b2008-05-01 20:24:26 +00002382 return assignNew('V', mce, Ity_V128,
2383 binop(op, vatom1, vatom2));
sewardj43d60752005-11-10 18:13:01 +00002384
2385 case Iop_ShrV128:
2386 case Iop_ShlV128:
2387 /* Same scheme as with all other shifts. Note: 10 Nov 05:
2388 this is wrong now, scalar shifts are done properly lazily.
2389 Vector shifts should be fixed too. */
2390 complainIfUndefined(mce, atom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002391 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardj43d60752005-11-10 18:13:01 +00002392
2393
sewardj69a13322005-04-23 01:14:51 +00002394 /* I128-bit data-steering */
2395 case Iop_64HLto128:
sewardj7cf4e6b2008-05-01 20:24:26 +00002396 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
sewardj69a13322005-04-23 01:14:51 +00002397
sewardj3245c912004-12-10 14:58:26 +00002398 /* Scalar floating point */
2399
sewardjed69fdb2006-02-03 16:12:27 +00002400 case Iop_RoundF64toInt:
2401 case Iop_RoundF64toF32:
sewardj95448072004-11-22 20:19:51 +00002402 case Iop_F64toI64:
sewardje9e16d32004-12-10 13:17:55 +00002403 case Iop_I64toF64:
sewardj22ac5f42006-02-03 22:55:04 +00002404 case Iop_SinF64:
2405 case Iop_CosF64:
2406 case Iop_TanF64:
2407 case Iop_2xm1F64:
2408 case Iop_SqrtF64:
2409 /* I32(rm) x I64/F64 -> I64/F64 */
sewardj95448072004-11-22 20:19:51 +00002410 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2411
sewardj95448072004-11-22 20:19:51 +00002412 case Iop_F64toI32:
sewardje9e16d32004-12-10 13:17:55 +00002413 case Iop_F64toF32:
sewardj95448072004-11-22 20:19:51 +00002414 /* First arg is I32 (rounding mode), second is F64 (data). */
2415 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2416
2417 case Iop_F64toI16:
2418 /* First arg is I32 (rounding mode), second is F64 (data). */
2419 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
2420
sewardj95448072004-11-22 20:19:51 +00002421 case Iop_CmpF64:
2422 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2423
2424 /* non-FP after here */
2425
2426 case Iop_DivModU64to32:
2427 case Iop_DivModS64to32:
2428 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2429
sewardj69a13322005-04-23 01:14:51 +00002430 case Iop_DivModU128to64:
2431 case Iop_DivModS128to64:
2432 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
2433
sewardj95448072004-11-22 20:19:51 +00002434 case Iop_16HLto32:
sewardj7cf4e6b2008-05-01 20:24:26 +00002435 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00002436 case Iop_32HLto64:
sewardj7cf4e6b2008-05-01 20:24:26 +00002437 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00002438
sewardj6cf40ff2005-04-20 22:31:26 +00002439 case Iop_MullS64:
2440 case Iop_MullU64: {
2441 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
2442 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
sewardj7cf4e6b2008-05-01 20:24:26 +00002443 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, vHi64, vLo64));
sewardj6cf40ff2005-04-20 22:31:26 +00002444 }
2445
sewardj95448072004-11-22 20:19:51 +00002446 case Iop_MullS32:
2447 case Iop_MullU32: {
2448 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
2449 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
sewardj7cf4e6b2008-05-01 20:24:26 +00002450 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
sewardj95448072004-11-22 20:19:51 +00002451 }
2452
2453 case Iop_MullS16:
2454 case Iop_MullU16: {
2455 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
2456 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
sewardj7cf4e6b2008-05-01 20:24:26 +00002457 return assignNew('V', mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
sewardj95448072004-11-22 20:19:51 +00002458 }
2459
2460 case Iop_MullS8:
2461 case Iop_MullU8: {
2462 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
2463 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
sewardj7cf4e6b2008-05-01 20:24:26 +00002464 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
sewardj95448072004-11-22 20:19:51 +00002465 }
2466
cerion9e591082005-06-23 15:28:34 +00002467 case Iop_DivS32:
2468 case Iop_DivU32:
2469 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2470
sewardjb00944a2005-12-23 12:47:16 +00002471 case Iop_DivS64:
2472 case Iop_DivU64:
2473 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2474
sewardj95448072004-11-22 20:19:51 +00002475 case Iop_Add32:
sewardjd5204dc2004-12-31 01:16:11 +00002476 if (mce->bogusLiterals)
2477 return expensiveAddSub(mce,True,Ity_I32,
2478 vatom1,vatom2, atom1,atom2);
2479 else
2480 goto cheap_AddSub32;
sewardj95448072004-11-22 20:19:51 +00002481 case Iop_Sub32:
sewardjd5204dc2004-12-31 01:16:11 +00002482 if (mce->bogusLiterals)
2483 return expensiveAddSub(mce,False,Ity_I32,
2484 vatom1,vatom2, atom1,atom2);
2485 else
2486 goto cheap_AddSub32;
2487
2488 cheap_AddSub32:
sewardj95448072004-11-22 20:19:51 +00002489 case Iop_Mul32:
sewardj992dff92005-10-07 11:08:55 +00002490 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
2491
sewardj463b3d92005-07-18 11:41:15 +00002492 case Iop_CmpORD32S:
2493 case Iop_CmpORD32U:
sewardj1bc82102005-12-23 00:16:24 +00002494 case Iop_CmpORD64S:
2495 case Iop_CmpORD64U:
2496 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
sewardj95448072004-11-22 20:19:51 +00002497
sewardj681be302005-01-15 20:43:58 +00002498 case Iop_Add64:
tomd9774d72005-06-27 08:11:01 +00002499 if (mce->bogusLiterals)
2500 return expensiveAddSub(mce,True,Ity_I64,
2501 vatom1,vatom2, atom1,atom2);
2502 else
2503 goto cheap_AddSub64;
sewardj681be302005-01-15 20:43:58 +00002504 case Iop_Sub64:
tomd9774d72005-06-27 08:11:01 +00002505 if (mce->bogusLiterals)
2506 return expensiveAddSub(mce,False,Ity_I64,
2507 vatom1,vatom2, atom1,atom2);
2508 else
2509 goto cheap_AddSub64;
2510
2511 cheap_AddSub64:
2512 case Iop_Mul64:
sewardj681be302005-01-15 20:43:58 +00002513 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
2514
sewardj95448072004-11-22 20:19:51 +00002515 case Iop_Mul16:
2516 case Iop_Add16:
2517 case Iop_Sub16:
2518 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
2519
2520 case Iop_Sub8:
2521 case Iop_Add8:
2522 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
2523
sewardj69a13322005-04-23 01:14:51 +00002524 case Iop_CmpEQ64:
sewardje6f8af42005-07-06 18:48:59 +00002525 case Iop_CmpNE64:
sewardj69a13322005-04-23 01:14:51 +00002526 if (mce->bogusLiterals)
2527 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
2528 else
2529 goto cheap_cmp64;
2530 cheap_cmp64:
tomcd986332005-04-26 07:44:48 +00002531 case Iop_CmpLE64S: case Iop_CmpLE64U:
2532 case Iop_CmpLT64U: case Iop_CmpLT64S:
sewardj69a13322005-04-23 01:14:51 +00002533 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
2534
sewardjd5204dc2004-12-31 01:16:11 +00002535 case Iop_CmpEQ32:
sewardje6f8af42005-07-06 18:48:59 +00002536 case Iop_CmpNE32:
sewardjd5204dc2004-12-31 01:16:11 +00002537 if (mce->bogusLiterals)
2538 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
2539 else
2540 goto cheap_cmp32;
sewardjd5204dc2004-12-31 01:16:11 +00002541 cheap_cmp32:
sewardj95448072004-11-22 20:19:51 +00002542 case Iop_CmpLE32S: case Iop_CmpLE32U:
2543 case Iop_CmpLT32U: case Iop_CmpLT32S:
sewardj95448072004-11-22 20:19:51 +00002544 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
2545
2546 case Iop_CmpEQ16: case Iop_CmpNE16:
2547 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
2548
2549 case Iop_CmpEQ8: case Iop_CmpNE8:
2550 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
2551
sewardjaaddbc22005-10-07 09:49:53 +00002552 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
2553 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
2554
sewardj95448072004-11-22 20:19:51 +00002555 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
sewardjaaddbc22005-10-07 09:49:53 +00002556 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00002557
sewardjdb67f5f2004-12-14 01:15:31 +00002558 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
sewardjaaddbc22005-10-07 09:49:53 +00002559 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00002560
2561 case Iop_Shl8: case Iop_Shr8:
sewardjaaddbc22005-10-07 09:49:53 +00002562 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00002563
sewardj20d38f22005-02-07 23:50:18 +00002564 case Iop_AndV128:
2565 uifu = mkUifUV128; difd = mkDifDV128;
2566 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00002567 case Iop_And64:
2568 uifu = mkUifU64; difd = mkDifD64;
2569 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00002570 case Iop_And32:
2571 uifu = mkUifU32; difd = mkDifD32;
2572 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
2573 case Iop_And16:
2574 uifu = mkUifU16; difd = mkDifD16;
2575 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
2576 case Iop_And8:
2577 uifu = mkUifU8; difd = mkDifD8;
2578 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
2579
sewardj20d38f22005-02-07 23:50:18 +00002580 case Iop_OrV128:
2581 uifu = mkUifUV128; difd = mkDifDV128;
2582 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00002583 case Iop_Or64:
2584 uifu = mkUifU64; difd = mkDifD64;
2585 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00002586 case Iop_Or32:
2587 uifu = mkUifU32; difd = mkDifD32;
2588 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
2589 case Iop_Or16:
2590 uifu = mkUifU16; difd = mkDifD16;
2591 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
2592 case Iop_Or8:
2593 uifu = mkUifU8; difd = mkDifD8;
2594 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
2595
2596 do_And_Or:
2597 return
2598 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00002599 'V', mce,
sewardj95448072004-11-22 20:19:51 +00002600 and_or_ty,
2601 difd(mce, uifu(mce, vatom1, vatom2),
2602 difd(mce, improve(mce, atom1, vatom1),
2603 improve(mce, atom2, vatom2) ) ) );
2604
2605 case Iop_Xor8:
2606 return mkUifU8(mce, vatom1, vatom2);
2607 case Iop_Xor16:
2608 return mkUifU16(mce, vatom1, vatom2);
2609 case Iop_Xor32:
2610 return mkUifU32(mce, vatom1, vatom2);
sewardj7010f6e2004-12-10 13:35:22 +00002611 case Iop_Xor64:
2612 return mkUifU64(mce, vatom1, vatom2);
sewardj20d38f22005-02-07 23:50:18 +00002613 case Iop_XorV128:
2614 return mkUifUV128(mce, vatom1, vatom2);
njn25e49d8e72002-09-23 09:36:25 +00002615
2616 default:
sewardj95448072004-11-22 20:19:51 +00002617 ppIROp(op);
2618 VG_(tool_panic)("memcheck:expr2vbits_Binop");
njn25e49d8e72002-09-23 09:36:25 +00002619 }
njn25e49d8e72002-09-23 09:36:25 +00002620}
2621
njn25e49d8e72002-09-23 09:36:25 +00002622
sewardj95448072004-11-22 20:19:51 +00002623static
2624IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
2625{
2626 IRAtom* vatom = expr2vbits( mce, atom );
2627 tl_assert(isOriginalAtom(mce,atom));
2628 switch (op) {
2629
sewardj0b070592004-12-10 21:44:22 +00002630 case Iop_Sqrt64Fx2:
2631 return unary64Fx2(mce, vatom);
2632
2633 case Iop_Sqrt64F0x2:
2634 return unary64F0x2(mce, vatom);
2635
sewardj170ee212004-12-10 18:57:51 +00002636 case Iop_Sqrt32Fx4:
2637 case Iop_RSqrt32Fx4:
2638 case Iop_Recip32Fx4:
cerion176cb4c2005-11-16 17:21:49 +00002639 case Iop_I32UtoFx4:
2640 case Iop_I32StoFx4:
2641 case Iop_QFtoI32Ux4_RZ:
2642 case Iop_QFtoI32Sx4_RZ:
2643 case Iop_RoundF32x4_RM:
2644 case Iop_RoundF32x4_RP:
2645 case Iop_RoundF32x4_RN:
2646 case Iop_RoundF32x4_RZ:
sewardj170ee212004-12-10 18:57:51 +00002647 return unary32Fx4(mce, vatom);
2648
2649 case Iop_Sqrt32F0x4:
2650 case Iop_RSqrt32F0x4:
2651 case Iop_Recip32F0x4:
2652 return unary32F0x4(mce, vatom);
2653
sewardj20d38f22005-02-07 23:50:18 +00002654 case Iop_32UtoV128:
2655 case Iop_64UtoV128:
sewardj620eb5b2005-10-22 12:50:43 +00002656 case Iop_Dup8x16:
2657 case Iop_Dup16x8:
2658 case Iop_Dup32x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00002659 return assignNew('V', mce, Ity_V128, unop(op, vatom));
sewardj170ee212004-12-10 18:57:51 +00002660
sewardj95448072004-11-22 20:19:51 +00002661 case Iop_F32toF64:
2662 case Iop_I32toF64:
sewardj95448072004-11-22 20:19:51 +00002663 case Iop_NegF64:
sewardj95448072004-11-22 20:19:51 +00002664 case Iop_AbsF64:
sewardj4f3e0f22006-01-27 15:11:31 +00002665 case Iop_Est5FRSqrt:
sewardjdead90a2008-08-08 08:38:23 +00002666 case Iop_RoundF64toF64_NEAREST:
2667 case Iop_RoundF64toF64_NegINF:
2668 case Iop_RoundF64toF64_PosINF:
2669 case Iop_RoundF64toF64_ZERO:
sewardj39cc7352005-06-09 21:31:55 +00002670 case Iop_Clz64:
2671 case Iop_Ctz64:
sewardj95448072004-11-22 20:19:51 +00002672 return mkPCastTo(mce, Ity_I64, vatom);
2673
sewardj95448072004-11-22 20:19:51 +00002674 case Iop_Clz32:
2675 case Iop_Ctz32:
sewardjed69fdb2006-02-03 16:12:27 +00002676 case Iop_TruncF64asF32:
sewardj95448072004-11-22 20:19:51 +00002677 return mkPCastTo(mce, Ity_I32, vatom);
2678
sewardjd9dbc192005-04-27 11:40:27 +00002679 case Iop_1Uto64:
2680 case Iop_8Uto64:
2681 case Iop_8Sto64:
2682 case Iop_16Uto64:
2683 case Iop_16Sto64:
sewardj95448072004-11-22 20:19:51 +00002684 case Iop_32Sto64:
2685 case Iop_32Uto64:
sewardj20d38f22005-02-07 23:50:18 +00002686 case Iop_V128to64:
2687 case Iop_V128HIto64:
sewardj6cf40ff2005-04-20 22:31:26 +00002688 case Iop_128HIto64:
2689 case Iop_128to64:
sewardj7cf4e6b2008-05-01 20:24:26 +00002690 return assignNew('V', mce, Ity_I64, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00002691
2692 case Iop_64to32:
2693 case Iop_64HIto32:
2694 case Iop_1Uto32:
sewardj463b3d92005-07-18 11:41:15 +00002695 case Iop_1Sto32:
sewardj95448072004-11-22 20:19:51 +00002696 case Iop_8Uto32:
2697 case Iop_16Uto32:
2698 case Iop_16Sto32:
2699 case Iop_8Sto32:
cerionfafaa0d2005-09-12 22:29:38 +00002700 case Iop_V128to32:
sewardj7cf4e6b2008-05-01 20:24:26 +00002701 return assignNew('V', mce, Ity_I32, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00002702
2703 case Iop_8Sto16:
2704 case Iop_8Uto16:
2705 case Iop_32to16:
2706 case Iop_32HIto16:
sewardjd9dbc192005-04-27 11:40:27 +00002707 case Iop_64to16:
sewardj7cf4e6b2008-05-01 20:24:26 +00002708 return assignNew('V', mce, Ity_I16, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00002709
2710 case Iop_1Uto8:
2711 case Iop_16to8:
sewardj9a807e02006-12-17 14:20:31 +00002712 case Iop_16HIto8:
sewardj95448072004-11-22 20:19:51 +00002713 case Iop_32to8:
sewardjd9dbc192005-04-27 11:40:27 +00002714 case Iop_64to8:
sewardj7cf4e6b2008-05-01 20:24:26 +00002715 return assignNew('V', mce, Ity_I8, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00002716
2717 case Iop_32to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00002718 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
sewardj95448072004-11-22 20:19:51 +00002719
sewardjd9dbc192005-04-27 11:40:27 +00002720 case Iop_64to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00002721 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
sewardjd9dbc192005-04-27 11:40:27 +00002722
sewardj95448072004-11-22 20:19:51 +00002723 case Iop_ReinterpF64asI64:
2724 case Iop_ReinterpI64asF64:
sewardj0b070592004-12-10 21:44:22 +00002725 case Iop_ReinterpI32asF32:
sewardj20d38f22005-02-07 23:50:18 +00002726 case Iop_NotV128:
sewardj7010f6e2004-12-10 13:35:22 +00002727 case Iop_Not64:
sewardj95448072004-11-22 20:19:51 +00002728 case Iop_Not32:
2729 case Iop_Not16:
2730 case Iop_Not8:
2731 case Iop_Not1:
2732 return vatom;
sewardj7010f6e2004-12-10 13:35:22 +00002733
sewardj95448072004-11-22 20:19:51 +00002734 default:
2735 ppIROp(op);
2736 VG_(tool_panic)("memcheck:expr2vbits_Unop");
2737 }
2738}
2739
2740
sewardj170ee212004-12-10 18:57:51 +00002741/* Worker function; do not call directly. */
sewardj95448072004-11-22 20:19:51 +00002742static
sewardj2e595852005-06-30 23:33:37 +00002743IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
2744 IREndness end, IRType ty,
2745 IRAtom* addr, UInt bias )
sewardj95448072004-11-22 20:19:51 +00002746{
2747 void* helper;
2748 Char* hname;
2749 IRDirty* di;
2750 IRTemp datavbits;
2751 IRAtom* addrAct;
2752
2753 tl_assert(isOriginalAtom(mce,addr));
sewardj2e595852005-06-30 23:33:37 +00002754 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj95448072004-11-22 20:19:51 +00002755
2756 /* First, emit a definedness test for the address. This also sets
2757 the address (shadow) to 'defined' following the test. */
2758 complainIfUndefined( mce, addr );
2759
2760 /* Now cook up a call to the relevant helper function, to read the
2761 data V bits from shadow memory. */
sewardj7cf4e6b2008-05-01 20:24:26 +00002762 ty = shadowTypeV(ty);
sewardj2e595852005-06-30 23:33:37 +00002763
2764 if (end == Iend_LE) {
2765 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00002766 case Ity_I64: helper = &MC_(helperc_LOADV64le);
2767 hname = "MC_(helperc_LOADV64le)";
sewardj2e595852005-06-30 23:33:37 +00002768 break;
njn1d0825f2006-03-27 11:37:07 +00002769 case Ity_I32: helper = &MC_(helperc_LOADV32le);
2770 hname = "MC_(helperc_LOADV32le)";
sewardj2e595852005-06-30 23:33:37 +00002771 break;
njn1d0825f2006-03-27 11:37:07 +00002772 case Ity_I16: helper = &MC_(helperc_LOADV16le);
2773 hname = "MC_(helperc_LOADV16le)";
sewardj2e595852005-06-30 23:33:37 +00002774 break;
njn1d0825f2006-03-27 11:37:07 +00002775 case Ity_I8: helper = &MC_(helperc_LOADV8);
2776 hname = "MC_(helperc_LOADV8)";
sewardj2e595852005-06-30 23:33:37 +00002777 break;
2778 default: ppIRType(ty);
2779 VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
2780 }
2781 } else {
sewardj8cf88b72005-07-08 01:29:33 +00002782 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00002783 case Ity_I64: helper = &MC_(helperc_LOADV64be);
2784 hname = "MC_(helperc_LOADV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00002785 break;
njn1d0825f2006-03-27 11:37:07 +00002786 case Ity_I32: helper = &MC_(helperc_LOADV32be);
2787 hname = "MC_(helperc_LOADV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00002788 break;
njn1d0825f2006-03-27 11:37:07 +00002789 case Ity_I16: helper = &MC_(helperc_LOADV16be);
2790 hname = "MC_(helperc_LOADV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00002791 break;
njn1d0825f2006-03-27 11:37:07 +00002792 case Ity_I8: helper = &MC_(helperc_LOADV8);
2793 hname = "MC_(helperc_LOADV8)";
sewardj8cf88b72005-07-08 01:29:33 +00002794 break;
2795 default: ppIRType(ty);
2796 VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
2797 }
sewardj95448072004-11-22 20:19:51 +00002798 }
2799
2800 /* Generate the actual address into addrAct. */
2801 if (bias == 0) {
2802 addrAct = addr;
2803 } else {
sewardj7cf97ee2004-11-28 14:25:01 +00002804 IROp mkAdd;
2805 IRAtom* eBias;
sewardj95448072004-11-22 20:19:51 +00002806 IRType tyAddr = mce->hWordTy;
2807 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj7cf97ee2004-11-28 14:25:01 +00002808 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2809 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00002810 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
sewardj95448072004-11-22 20:19:51 +00002811 }
2812
2813 /* We need to have a place to park the V bits we're just about to
2814 read. */
sewardj1c0ce7a2009-07-01 08:10:49 +00002815 datavbits = newTemp(mce, ty, VSh);
sewardj95448072004-11-22 20:19:51 +00002816 di = unsafeIRDirty_1_N( datavbits,
sewardj53ee1fc2005-12-23 02:29:58 +00002817 1/*regparms*/,
2818 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj95448072004-11-22 20:19:51 +00002819 mkIRExprVec_1( addrAct ));
2820 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00002821 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00002822
2823 return mkexpr(datavbits);
2824}
2825
2826
2827static
sewardj2e595852005-06-30 23:33:37 +00002828IRAtom* expr2vbits_Load ( MCEnv* mce,
2829 IREndness end, IRType ty,
2830 IRAtom* addr, UInt bias )
sewardj170ee212004-12-10 18:57:51 +00002831{
2832 IRAtom *v64hi, *v64lo;
sewardj2e595852005-06-30 23:33:37 +00002833 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj7cf4e6b2008-05-01 20:24:26 +00002834 switch (shadowTypeV(ty)) {
sewardj170ee212004-12-10 18:57:51 +00002835 case Ity_I8:
2836 case Ity_I16:
2837 case Ity_I32:
2838 case Ity_I64:
sewardj2e595852005-06-30 23:33:37 +00002839 return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
sewardj170ee212004-12-10 18:57:51 +00002840 case Ity_V128:
sewardj2e595852005-06-30 23:33:37 +00002841 if (end == Iend_LE) {
2842 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
2843 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
2844 } else {
sewardj2e595852005-06-30 23:33:37 +00002845 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
2846 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
2847 }
sewardj7cf4e6b2008-05-01 20:24:26 +00002848 return assignNew( 'V', mce,
sewardj170ee212004-12-10 18:57:51 +00002849 Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +00002850 binop(Iop_64HLtoV128, v64hi, v64lo));
sewardj170ee212004-12-10 18:57:51 +00002851 default:
sewardj2e595852005-06-30 23:33:37 +00002852 VG_(tool_panic)("expr2vbits_Load");
sewardj170ee212004-12-10 18:57:51 +00002853 }
2854}
2855
2856
2857static
sewardj95448072004-11-22 20:19:51 +00002858IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
2859 IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
2860{
2861 IRAtom *vbitsC, *vbits0, *vbitsX;
2862 IRType ty;
2863 /* Given Mux0X(cond,expr0,exprX), generate
2864 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
2865 That is, steer the V bits like the originals, but trash the
2866 result if the steering value is undefined. This gives
2867 lazy propagation. */
2868 tl_assert(isOriginalAtom(mce, cond));
2869 tl_assert(isOriginalAtom(mce, expr0));
2870 tl_assert(isOriginalAtom(mce, exprX));
2871
2872 vbitsC = expr2vbits(mce, cond);
2873 vbits0 = expr2vbits(mce, expr0);
2874 vbitsX = expr2vbits(mce, exprX);
sewardj1c0ce7a2009-07-01 08:10:49 +00002875 ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
sewardj95448072004-11-22 20:19:51 +00002876
2877 return
sewardj7cf4e6b2008-05-01 20:24:26 +00002878 mkUifU(mce, ty, assignNew('V', mce, ty,
2879 IRExpr_Mux0X(cond, vbits0, vbitsX)),
sewardj95448072004-11-22 20:19:51 +00002880 mkPCastTo(mce, ty, vbitsC) );
2881}
2882
2883/* --------- This is the main expression-handling function. --------- */
2884
2885static
2886IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
2887{
2888 switch (e->tag) {
2889
2890 case Iex_Get:
2891 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
2892
2893 case Iex_GetI:
2894 return shadow_GETI( mce, e->Iex.GetI.descr,
2895 e->Iex.GetI.ix, e->Iex.GetI.bias );
2896
sewardj0b9d74a2006-12-24 02:24:11 +00002897 case Iex_RdTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00002898 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
sewardj95448072004-11-22 20:19:51 +00002899
2900 case Iex_Const:
sewardj1c0ce7a2009-07-01 08:10:49 +00002901 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
sewardj95448072004-11-22 20:19:51 +00002902
sewardje91cea72006-02-08 19:32:02 +00002903 case Iex_Qop:
2904 return expr2vbits_Qop(
2905 mce,
2906 e->Iex.Qop.op,
2907 e->Iex.Qop.arg1, e->Iex.Qop.arg2,
2908 e->Iex.Qop.arg3, e->Iex.Qop.arg4
2909 );
2910
sewardjed69fdb2006-02-03 16:12:27 +00002911 case Iex_Triop:
2912 return expr2vbits_Triop(
2913 mce,
2914 e->Iex.Triop.op,
2915 e->Iex.Triop.arg1, e->Iex.Triop.arg2, e->Iex.Triop.arg3
2916 );
2917
sewardj95448072004-11-22 20:19:51 +00002918 case Iex_Binop:
2919 return expr2vbits_Binop(
2920 mce,
2921 e->Iex.Binop.op,
2922 e->Iex.Binop.arg1, e->Iex.Binop.arg2
2923 );
2924
2925 case Iex_Unop:
2926 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
2927
sewardj2e595852005-06-30 23:33:37 +00002928 case Iex_Load:
2929 return expr2vbits_Load( mce, e->Iex.Load.end,
2930 e->Iex.Load.ty,
2931 e->Iex.Load.addr, 0/*addr bias*/ );
sewardj95448072004-11-22 20:19:51 +00002932
2933 case Iex_CCall:
2934 return mkLazyN( mce, e->Iex.CCall.args,
2935 e->Iex.CCall.retty,
2936 e->Iex.CCall.cee );
2937
2938 case Iex_Mux0X:
2939 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
2940 e->Iex.Mux0X.exprX);
njn25e49d8e72002-09-23 09:36:25 +00002941
2942 default:
sewardj95448072004-11-22 20:19:51 +00002943 VG_(printf)("\n");
2944 ppIRExpr(e);
2945 VG_(printf)("\n");
2946 VG_(tool_panic)("memcheck: expr2vbits");
njn25e49d8e72002-09-23 09:36:25 +00002947 }
njn25e49d8e72002-09-23 09:36:25 +00002948}
2949
2950/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00002951/*--- Generate shadow stmts from all kinds of IRStmts. ---*/
njn25e49d8e72002-09-23 09:36:25 +00002952/*------------------------------------------------------------*/
2953
sewardj95448072004-11-22 20:19:51 +00002954/* Widen a value to the host word size. */
njn25e49d8e72002-09-23 09:36:25 +00002955
2956static
sewardj95448072004-11-22 20:19:51 +00002957IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
njn25e49d8e72002-09-23 09:36:25 +00002958{
sewardj7cf97ee2004-11-28 14:25:01 +00002959 IRType ty, tyH;
2960
sewardj95448072004-11-22 20:19:51 +00002961 /* vatom is vbits-value and as such can only have a shadow type. */
2962 tl_assert(isShadowAtom(mce,vatom));
njn25e49d8e72002-09-23 09:36:25 +00002963
sewardj1c0ce7a2009-07-01 08:10:49 +00002964 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj7cf97ee2004-11-28 14:25:01 +00002965 tyH = mce->hWordTy;
njn25e49d8e72002-09-23 09:36:25 +00002966
sewardj95448072004-11-22 20:19:51 +00002967 if (tyH == Ity_I32) {
2968 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00002969 case Ity_I32:
2970 return vatom;
2971 case Ity_I16:
2972 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
2973 case Ity_I8:
2974 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
2975 default:
2976 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00002977 }
sewardj6cf40ff2005-04-20 22:31:26 +00002978 } else
2979 if (tyH == Ity_I64) {
2980 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00002981 case Ity_I32:
2982 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
2983 case Ity_I16:
2984 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
2985 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
2986 case Ity_I8:
2987 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
2988 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
2989 default:
2990 goto unhandled;
sewardj6cf40ff2005-04-20 22:31:26 +00002991 }
sewardj95448072004-11-22 20:19:51 +00002992 } else {
2993 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00002994 }
sewardj95448072004-11-22 20:19:51 +00002995 unhandled:
2996 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
2997 VG_(tool_panic)("zwidenToHostWord");
njn25e49d8e72002-09-23 09:36:25 +00002998}
2999
njn25e49d8e72002-09-23 09:36:25 +00003000
sewardj95448072004-11-22 20:19:51 +00003001/* Generate a shadow store. addr is always the original address atom.
3002 You can pass in either originals or V-bits for the data atom, but
sewardj1c0ce7a2009-07-01 08:10:49 +00003003 obviously not both. guard :: Ity_I1 controls whether the store
3004 really happens; NULL means it unconditionally does. Note that
3005 guard itself is not checked for definedness; the caller of this
3006 function must do that if necessary. */
njn25e49d8e72002-09-23 09:36:25 +00003007
sewardj95448072004-11-22 20:19:51 +00003008static
sewardj2e595852005-06-30 23:33:37 +00003009void do_shadow_Store ( MCEnv* mce,
3010 IREndness end,
3011 IRAtom* addr, UInt bias,
sewardj1c0ce7a2009-07-01 08:10:49 +00003012 IRAtom* data, IRAtom* vdata,
3013 IRAtom* guard )
njn25e49d8e72002-09-23 09:36:25 +00003014{
sewardj170ee212004-12-10 18:57:51 +00003015 IROp mkAdd;
3016 IRType ty, tyAddr;
sewardj95448072004-11-22 20:19:51 +00003017 void* helper = NULL;
3018 Char* hname = NULL;
njn1d0825f2006-03-27 11:37:07 +00003019 IRConst* c;
sewardj170ee212004-12-10 18:57:51 +00003020
3021 tyAddr = mce->hWordTy;
3022 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3023 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj2e595852005-06-30 23:33:37 +00003024 tl_assert( end == Iend_LE || end == Iend_BE );
sewardj170ee212004-12-10 18:57:51 +00003025
sewardj95448072004-11-22 20:19:51 +00003026 if (data) {
3027 tl_assert(!vdata);
3028 tl_assert(isOriginalAtom(mce, data));
3029 tl_assert(bias == 0);
3030 vdata = expr2vbits( mce, data );
3031 } else {
3032 tl_assert(vdata);
3033 }
njn25e49d8e72002-09-23 09:36:25 +00003034
sewardj95448072004-11-22 20:19:51 +00003035 tl_assert(isOriginalAtom(mce,addr));
3036 tl_assert(isShadowAtom(mce,vdata));
njn25e49d8e72002-09-23 09:36:25 +00003037
sewardj1c0ce7a2009-07-01 08:10:49 +00003038 if (guard) {
3039 tl_assert(isOriginalAtom(mce, guard));
3040 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
3041 }
3042
3043 ty = typeOfIRExpr(mce->sb->tyenv, vdata);
njn25e49d8e72002-09-23 09:36:25 +00003044
njn1d0825f2006-03-27 11:37:07 +00003045 // If we're not doing undefined value checking, pretend that this value
3046 // is "all valid". That lets Vex's optimiser remove some of the V bit
3047 // shadow computation ops that precede it.
sewardj7cf4e6b2008-05-01 20:24:26 +00003048 if (MC_(clo_mc_level) == 1) {
njn1d0825f2006-03-27 11:37:07 +00003049 switch (ty) {
sewardj1c0ce7a2009-07-01 08:10:49 +00003050 case Ity_V128: // V128 weirdness
3051 c = IRConst_V128(V_BITS16_DEFINED); break;
njn1d0825f2006-03-27 11:37:07 +00003052 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
3053 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
3054 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
3055 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
3056 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
3057 }
3058 vdata = IRExpr_Const( c );
3059 }
3060
sewardj95448072004-11-22 20:19:51 +00003061 /* First, emit a definedness test for the address. This also sets
3062 the address (shadow) to 'defined' following the test. */
3063 complainIfUndefined( mce, addr );
njn25e49d8e72002-09-23 09:36:25 +00003064
sewardj170ee212004-12-10 18:57:51 +00003065 /* Now decide which helper function to call to write the data V
3066 bits into shadow memory. */
sewardj2e595852005-06-30 23:33:37 +00003067 if (end == Iend_LE) {
3068 switch (ty) {
3069 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00003070 case Ity_I64: helper = &MC_(helperc_STOREV64le);
3071 hname = "MC_(helperc_STOREV64le)";
sewardj2e595852005-06-30 23:33:37 +00003072 break;
njn1d0825f2006-03-27 11:37:07 +00003073 case Ity_I32: helper = &MC_(helperc_STOREV32le);
3074 hname = "MC_(helperc_STOREV32le)";
sewardj2e595852005-06-30 23:33:37 +00003075 break;
njn1d0825f2006-03-27 11:37:07 +00003076 case Ity_I16: helper = &MC_(helperc_STOREV16le);
3077 hname = "MC_(helperc_STOREV16le)";
sewardj2e595852005-06-30 23:33:37 +00003078 break;
njn1d0825f2006-03-27 11:37:07 +00003079 case Ity_I8: helper = &MC_(helperc_STOREV8);
3080 hname = "MC_(helperc_STOREV8)";
sewardj2e595852005-06-30 23:33:37 +00003081 break;
3082 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
3083 }
3084 } else {
sewardj8cf88b72005-07-08 01:29:33 +00003085 switch (ty) {
3086 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00003087 case Ity_I64: helper = &MC_(helperc_STOREV64be);
3088 hname = "MC_(helperc_STOREV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00003089 break;
njn1d0825f2006-03-27 11:37:07 +00003090 case Ity_I32: helper = &MC_(helperc_STOREV32be);
3091 hname = "MC_(helperc_STOREV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00003092 break;
njn1d0825f2006-03-27 11:37:07 +00003093 case Ity_I16: helper = &MC_(helperc_STOREV16be);
3094 hname = "MC_(helperc_STOREV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00003095 break;
njn1d0825f2006-03-27 11:37:07 +00003096 case Ity_I8: helper = &MC_(helperc_STOREV8);
3097 hname = "MC_(helperc_STOREV8)";
sewardj8cf88b72005-07-08 01:29:33 +00003098 break;
3099 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
3100 }
sewardj95448072004-11-22 20:19:51 +00003101 }
njn25e49d8e72002-09-23 09:36:25 +00003102
sewardj170ee212004-12-10 18:57:51 +00003103 if (ty == Ity_V128) {
3104
sewardj20d38f22005-02-07 23:50:18 +00003105 /* V128-bit case */
sewardj170ee212004-12-10 18:57:51 +00003106 /* See comment in next clause re 64-bit regparms */
sewardj2e595852005-06-30 23:33:37 +00003107 /* also, need to be careful about endianness */
3108
njn4c245e52009-03-15 23:25:38 +00003109 Int offLo64, offHi64;
3110 IRDirty *diLo64, *diHi64;
3111 IRAtom *addrLo64, *addrHi64;
3112 IRAtom *vdataLo64, *vdataHi64;
3113 IRAtom *eBiasLo64, *eBiasHi64;
3114
sewardj2e595852005-06-30 23:33:37 +00003115 if (end == Iend_LE) {
3116 offLo64 = 0;
3117 offHi64 = 8;
3118 } else {
sewardj2e595852005-06-30 23:33:37 +00003119 offLo64 = 8;
3120 offHi64 = 0;
3121 }
3122
3123 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
sewardj7cf4e6b2008-05-01 20:24:26 +00003124 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
3125 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
sewardj170ee212004-12-10 18:57:51 +00003126 diLo64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003127 1/*regparms*/,
3128 hname, VG_(fnptr_to_fnentry)( helper ),
3129 mkIRExprVec_2( addrLo64, vdataLo64 )
3130 );
sewardj2e595852005-06-30 23:33:37 +00003131 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
sewardj7cf4e6b2008-05-01 20:24:26 +00003132 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
3133 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
sewardj170ee212004-12-10 18:57:51 +00003134 diHi64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003135 1/*regparms*/,
3136 hname, VG_(fnptr_to_fnentry)( helper ),
3137 mkIRExprVec_2( addrHi64, vdataHi64 )
3138 );
sewardj1c0ce7a2009-07-01 08:10:49 +00003139 if (guard) diLo64->guard = guard;
3140 if (guard) diHi64->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00003141 setHelperAnns( mce, diLo64 );
3142 setHelperAnns( mce, diHi64 );
sewardj7cf4e6b2008-05-01 20:24:26 +00003143 stmt( 'V', mce, IRStmt_Dirty(diLo64) );
3144 stmt( 'V', mce, IRStmt_Dirty(diHi64) );
sewardj170ee212004-12-10 18:57:51 +00003145
sewardj95448072004-11-22 20:19:51 +00003146 } else {
sewardj170ee212004-12-10 18:57:51 +00003147
njn4c245e52009-03-15 23:25:38 +00003148 IRDirty *di;
3149 IRAtom *addrAct;
3150
sewardj170ee212004-12-10 18:57:51 +00003151 /* 8/16/32/64-bit cases */
3152 /* Generate the actual address into addrAct. */
3153 if (bias == 0) {
3154 addrAct = addr;
3155 } else {
njn4c245e52009-03-15 23:25:38 +00003156 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00003157 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
sewardj170ee212004-12-10 18:57:51 +00003158 }
3159
3160 if (ty == Ity_I64) {
3161 /* We can't do this with regparm 2 on 32-bit platforms, since
3162 the back ends aren't clever enough to handle 64-bit
3163 regparm args. Therefore be different. */
3164 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003165 1/*regparms*/,
3166 hname, VG_(fnptr_to_fnentry)( helper ),
3167 mkIRExprVec_2( addrAct, vdata )
3168 );
sewardj170ee212004-12-10 18:57:51 +00003169 } else {
3170 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003171 2/*regparms*/,
3172 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj170ee212004-12-10 18:57:51 +00003173 mkIRExprVec_2( addrAct,
sewardj53ee1fc2005-12-23 02:29:58 +00003174 zwidenToHostWord( mce, vdata ))
3175 );
sewardj170ee212004-12-10 18:57:51 +00003176 }
sewardj1c0ce7a2009-07-01 08:10:49 +00003177 if (guard) di->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00003178 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00003179 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00003180 }
njn25e49d8e72002-09-23 09:36:25 +00003181
sewardj95448072004-11-22 20:19:51 +00003182}
njn25e49d8e72002-09-23 09:36:25 +00003183
njn25e49d8e72002-09-23 09:36:25 +00003184
sewardj95448072004-11-22 20:19:51 +00003185/* Do lazy pessimistic propagation through a dirty helper call, by
3186 looking at the annotations on it. This is the most complex part of
3187 Memcheck. */
njn25e49d8e72002-09-23 09:36:25 +00003188
sewardj95448072004-11-22 20:19:51 +00003189static IRType szToITy ( Int n )
3190{
3191 switch (n) {
3192 case 1: return Ity_I8;
3193 case 2: return Ity_I16;
3194 case 4: return Ity_I32;
3195 case 8: return Ity_I64;
3196 default: VG_(tool_panic)("szToITy(memcheck)");
3197 }
3198}
njn25e49d8e72002-09-23 09:36:25 +00003199
sewardj95448072004-11-22 20:19:51 +00003200static
3201void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
3202{
njn4c245e52009-03-15 23:25:38 +00003203 Int i, n, toDo, gSz, gOff;
sewardj2e595852005-06-30 23:33:37 +00003204 IRAtom *src, *here, *curr;
njn4c245e52009-03-15 23:25:38 +00003205 IRType tySrc, tyDst;
sewardj2e595852005-06-30 23:33:37 +00003206 IRTemp dst;
3207 IREndness end;
3208
3209 /* What's the native endianness? We need to know this. */
sewardj6e340c72005-07-10 00:53:42 +00003210# if defined(VG_BIGENDIAN)
sewardj2e595852005-06-30 23:33:37 +00003211 end = Iend_BE;
sewardj6e340c72005-07-10 00:53:42 +00003212# elif defined(VG_LITTLEENDIAN)
sewardj2e595852005-06-30 23:33:37 +00003213 end = Iend_LE;
3214# else
3215# error "Unknown endianness"
3216# endif
njn25e49d8e72002-09-23 09:36:25 +00003217
sewardj95448072004-11-22 20:19:51 +00003218 /* First check the guard. */
3219 complainIfUndefined(mce, d->guard);
3220
3221 /* Now round up all inputs and PCast over them. */
sewardj7cf97ee2004-11-28 14:25:01 +00003222 curr = definedOfType(Ity_I32);
sewardj95448072004-11-22 20:19:51 +00003223
3224 /* Inputs: unmasked args */
3225 for (i = 0; d->args[i]; i++) {
3226 if (d->cee->mcx_mask & (1<<i)) {
3227 /* ignore this arg */
njn25e49d8e72002-09-23 09:36:25 +00003228 } else {
sewardj95448072004-11-22 20:19:51 +00003229 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
3230 curr = mkUifU32(mce, here, curr);
njn25e49d8e72002-09-23 09:36:25 +00003231 }
3232 }
sewardj95448072004-11-22 20:19:51 +00003233
3234 /* Inputs: guest state that we read. */
3235 for (i = 0; i < d->nFxState; i++) {
3236 tl_assert(d->fxState[i].fx != Ifx_None);
3237 if (d->fxState[i].fx == Ifx_Write)
3238 continue;
sewardja7203252004-11-26 19:17:47 +00003239
3240 /* Ignore any sections marked as 'always defined'. */
3241 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
sewardje9e16d32004-12-10 13:17:55 +00003242 if (0)
sewardja7203252004-11-26 19:17:47 +00003243 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
3244 d->fxState[i].offset, d->fxState[i].size );
3245 continue;
3246 }
3247
sewardj95448072004-11-22 20:19:51 +00003248 /* This state element is read or modified. So we need to
sewardje9e16d32004-12-10 13:17:55 +00003249 consider it. If larger than 8 bytes, deal with it in 8-byte
3250 chunks. */
3251 gSz = d->fxState[i].size;
3252 gOff = d->fxState[i].offset;
3253 tl_assert(gSz > 0);
3254 while (True) {
3255 if (gSz == 0) break;
3256 n = gSz <= 8 ? gSz : 8;
3257 /* update 'curr' with UifU of the state slice
3258 gOff .. gOff+n-1 */
3259 tySrc = szToITy( n );
sewardj7cf4e6b2008-05-01 20:24:26 +00003260 src = assignNew( 'V', mce, tySrc,
3261 shadow_GET(mce, gOff, tySrc ) );
sewardje9e16d32004-12-10 13:17:55 +00003262 here = mkPCastTo( mce, Ity_I32, src );
3263 curr = mkUifU32(mce, here, curr);
3264 gSz -= n;
3265 gOff += n;
3266 }
3267
sewardj95448072004-11-22 20:19:51 +00003268 }
3269
3270 /* Inputs: memory. First set up some info needed regardless of
3271 whether we're doing reads or writes. */
sewardj95448072004-11-22 20:19:51 +00003272
3273 if (d->mFx != Ifx_None) {
3274 /* Because we may do multiple shadow loads/stores from the same
3275 base address, it's best to do a single test of its
3276 definedness right now. Post-instrumentation optimisation
3277 should remove all but this test. */
njn4c245e52009-03-15 23:25:38 +00003278 IRType tyAddr;
sewardj95448072004-11-22 20:19:51 +00003279 tl_assert(d->mAddr);
3280 complainIfUndefined(mce, d->mAddr);
3281
sewardj1c0ce7a2009-07-01 08:10:49 +00003282 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
sewardj95448072004-11-22 20:19:51 +00003283 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
3284 tl_assert(tyAddr == mce->hWordTy); /* not really right */
3285 }
3286
3287 /* Deal with memory inputs (reads or modifies) */
3288 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00003289 toDo = d->mSize;
sewardj2e595852005-06-30 23:33:37 +00003290 /* chew off 32-bit chunks. We don't care about the endianness
3291 since it's all going to be condensed down to a single bit,
3292 but nevertheless choose an endianness which is hopefully
3293 native to the platform. */
sewardj95448072004-11-22 20:19:51 +00003294 while (toDo >= 4) {
3295 here = mkPCastTo(
3296 mce, Ity_I32,
sewardj2e595852005-06-30 23:33:37 +00003297 expr2vbits_Load ( mce, end, Ity_I32,
sewardj95448072004-11-22 20:19:51 +00003298 d->mAddr, d->mSize - toDo )
3299 );
3300 curr = mkUifU32(mce, here, curr);
3301 toDo -= 4;
3302 }
3303 /* chew off 16-bit chunks */
3304 while (toDo >= 2) {
3305 here = mkPCastTo(
3306 mce, Ity_I32,
sewardj2e595852005-06-30 23:33:37 +00003307 expr2vbits_Load ( mce, end, Ity_I16,
sewardj95448072004-11-22 20:19:51 +00003308 d->mAddr, d->mSize - toDo )
3309 );
3310 curr = mkUifU32(mce, here, curr);
3311 toDo -= 2;
3312 }
3313 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
3314 }
3315
3316 /* Whew! So curr is a 32-bit V-value summarising pessimistically
3317 all the inputs to the helper. Now we need to re-distribute the
3318 results to all destinations. */
3319
3320 /* Outputs: the destination temporary, if there is one. */
3321 if (d->tmp != IRTemp_INVALID) {
sewardj7cf4e6b2008-05-01 20:24:26 +00003322 dst = findShadowTmpV(mce, d->tmp);
sewardj1c0ce7a2009-07-01 08:10:49 +00003323 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00003324 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
sewardj95448072004-11-22 20:19:51 +00003325 }
3326
3327 /* Outputs: guest state that we write or modify. */
3328 for (i = 0; i < d->nFxState; i++) {
3329 tl_assert(d->fxState[i].fx != Ifx_None);
3330 if (d->fxState[i].fx == Ifx_Read)
3331 continue;
sewardja7203252004-11-26 19:17:47 +00003332 /* Ignore any sections marked as 'always defined'. */
3333 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
3334 continue;
sewardje9e16d32004-12-10 13:17:55 +00003335 /* This state element is written or modified. So we need to
3336 consider it. If larger than 8 bytes, deal with it in 8-byte
3337 chunks. */
3338 gSz = d->fxState[i].size;
3339 gOff = d->fxState[i].offset;
3340 tl_assert(gSz > 0);
3341 while (True) {
3342 if (gSz == 0) break;
3343 n = gSz <= 8 ? gSz : 8;
3344 /* Write suitably-casted 'curr' to the state slice
3345 gOff .. gOff+n-1 */
3346 tyDst = szToITy( n );
3347 do_shadow_PUT( mce, gOff,
3348 NULL, /* original atom */
3349 mkPCastTo( mce, tyDst, curr ) );
3350 gSz -= n;
3351 gOff += n;
3352 }
sewardj95448072004-11-22 20:19:51 +00003353 }
3354
sewardj2e595852005-06-30 23:33:37 +00003355 /* Outputs: memory that we write or modify. Same comments about
3356 endianness as above apply. */
sewardj95448072004-11-22 20:19:51 +00003357 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00003358 toDo = d->mSize;
3359 /* chew off 32-bit chunks */
3360 while (toDo >= 4) {
sewardj2e595852005-06-30 23:33:37 +00003361 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
3362 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00003363 mkPCastTo( mce, Ity_I32, curr ),
3364 NULL/*guard*/ );
sewardj95448072004-11-22 20:19:51 +00003365 toDo -= 4;
3366 }
3367 /* chew off 16-bit chunks */
3368 while (toDo >= 2) {
sewardj2e595852005-06-30 23:33:37 +00003369 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
3370 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00003371 mkPCastTo( mce, Ity_I16, curr ),
3372 NULL/*guard*/ );
sewardj95448072004-11-22 20:19:51 +00003373 toDo -= 2;
3374 }
3375 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
3376 }
3377
njn25e49d8e72002-09-23 09:36:25 +00003378}
3379
sewardj1c0ce7a2009-07-01 08:10:49 +00003380
sewardj826ec492005-05-12 18:05:00 +00003381/* We have an ABI hint telling us that [base .. base+len-1] is to
3382 become undefined ("writable"). Generate code to call a helper to
3383 notify the A/V bit machinery of this fact.
3384
3385 We call
sewardj7cf4e6b2008-05-01 20:24:26 +00003386 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
3387 Addr nia );
sewardj826ec492005-05-12 18:05:00 +00003388*/
3389static
sewardj7cf4e6b2008-05-01 20:24:26 +00003390void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
sewardj826ec492005-05-12 18:05:00 +00003391{
3392 IRDirty* di;
sewardj7cf4e6b2008-05-01 20:24:26 +00003393 /* Minor optimisation: if not doing origin tracking, ignore the
3394 supplied nia and pass zero instead. This is on the basis that
3395 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
3396 almost always generate a shorter instruction to put zero into a
3397 register than any other value. */
3398 if (MC_(clo_mc_level) < 3)
3399 nia = mkIRExpr_HWord(0);
3400
sewardj826ec492005-05-12 18:05:00 +00003401 di = unsafeIRDirty_0_N(
3402 0/*regparms*/,
3403 "MC_(helperc_MAKE_STACK_UNINIT)",
sewardj53ee1fc2005-12-23 02:29:58 +00003404 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
sewardj7cf4e6b2008-05-01 20:24:26 +00003405 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
sewardj826ec492005-05-12 18:05:00 +00003406 );
sewardj7cf4e6b2008-05-01 20:24:26 +00003407 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj826ec492005-05-12 18:05:00 +00003408}
3409
njn25e49d8e72002-09-23 09:36:25 +00003410
sewardj1c0ce7a2009-07-01 08:10:49 +00003411/* ------ Dealing with IRCAS (big and complex) ------ */
3412
3413/* FWDS */
3414static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
3415 IRAtom* baseaddr, Int offset );
3416static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
3417static void gen_store_b ( MCEnv* mce, Int szB,
3418 IRAtom* baseaddr, Int offset, IRAtom* dataB,
3419 IRAtom* guard );
3420
3421static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
3422static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
3423
3424
3425/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
3426 IRExpr.Consts, else this asserts. If they are both Consts, it
3427 doesn't do anything. So that just leaves the RdTmp case.
3428
3429 In which case: this assigns the shadow value SHADOW to the IR
3430 shadow temporary associated with ORIG. That is, ORIG, being an
3431 original temporary, will have a shadow temporary associated with
3432 it. However, in the case envisaged here, there will so far have
3433 been no IR emitted to actually write a shadow value into that
3434 temporary. What this routine does is to (emit IR to) copy the
3435 value in SHADOW into said temporary, so that after this call,
3436 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
3437 value in SHADOW.
3438
3439 Point is to allow callers to compute "by hand" a shadow value for
3440 ORIG, and force it to be associated with ORIG.
3441
3442 How do we know that that shadow associated with ORIG has not so far
3443 been assigned to? Well, we don't per se know that, but supposing
3444 it had. Then this routine would create a second assignment to it,
3445 and later the IR sanity checker would barf. But that never
3446 happens. QED.
3447*/
3448static void bind_shadow_tmp_to_orig ( UChar how,
3449 MCEnv* mce,
3450 IRAtom* orig, IRAtom* shadow )
3451{
3452 tl_assert(isOriginalAtom(mce, orig));
3453 tl_assert(isShadowAtom(mce, shadow));
3454 switch (orig->tag) {
3455 case Iex_Const:
3456 tl_assert(shadow->tag == Iex_Const);
3457 break;
3458 case Iex_RdTmp:
3459 tl_assert(shadow->tag == Iex_RdTmp);
3460 if (how == 'V') {
3461 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
3462 shadow);
3463 } else {
3464 tl_assert(how == 'B');
3465 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
3466 shadow);
3467 }
3468 break;
3469 default:
3470 tl_assert(0);
3471 }
3472}
3473
3474
3475static
3476void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
3477{
3478 /* Scheme is (both single- and double- cases):
3479
3480 1. fetch data#,dataB (the proposed new value)
3481
3482 2. fetch expd#,expdB (what we expect to see at the address)
3483
3484 3. check definedness of address
3485
3486 4. load old#,oldB from shadow memory; this also checks
3487 addressibility of the address
3488
3489 5. the CAS itself
3490
3491 6. complain if "expected == old" is undefined
3492
3493 7. if "expected == old"
3494 store data#,dataB to shadow memory
3495
3496 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
3497 'data' but 7 stores 'data#'. Hence it is possible for the
3498 shadow data to be incorrectly checked and/or updated:
3499
3500 * 6 could falsely complain if 4 read old# as undefined, but some
3501 other thread wrote a defined value to the location after 4 but
3502 before 5.
3503
3504 * 6 could falsely not-complain if 4 read old# as defined, but
3505 some other thread wrote an undefined value to the location
3506 after 4 but before 5.
3507
3508 * 7 is at least gated correctly, since the 'expected == old'
3509 condition is derived from outputs of 5. However, the shadow
3510 write could happen too late: imagine after 5 we are
3511 descheduled, a different thread runs, writes a different
3512 (shadow) value at the address, and then we resume, hence
3513 overwriting the shadow value written by the other thread.
3514
3515 Because the original memory access is atomic, there's no way to
3516 make both the original and shadow accesses into a single atomic
3517 thing, hence this is unavoidable.
3518
3519 At least as Valgrind stands, I don't think it's a problem, since
3520 we're single threaded *and* we guarantee that there are no
3521 context switches during the execution of any specific superblock
3522 -- context switches can only happen at superblock boundaries.
3523
3524 If Valgrind ever becomes MT in the future, then it might be more
3525 of a problem. A possible kludge would be to artificially
3526 associate with the location, a lock, which we must acquire and
3527 release around the transaction as a whole. Hmm, that probably
3528 would't work properly since it only guards us against other
3529 threads doing CASs on the same location, not against other
3530 threads doing normal reads and writes.
3531 */
3532 if (cas->oldHi == IRTemp_INVALID) {
3533 do_shadow_CAS_single( mce, cas );
3534 } else {
3535 do_shadow_CAS_double( mce, cas );
3536 }
3537}
3538
3539
3540static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
3541{
3542 IRAtom *vdataLo = NULL, *bdataLo = NULL;
3543 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
3544 IRAtom *voldLo = NULL, *boldLo = NULL;
3545 IRAtom *expd_eq_old_V = NULL, *expd_eq_old_B = NULL;
3546 IRAtom *expd_eq_old = NULL;
3547 IROp opCmpEQ;
3548 Int elemSzB;
3549 IRType elemTy;
3550 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
3551
3552 /* single CAS */
3553 tl_assert(cas->oldHi == IRTemp_INVALID);
3554 tl_assert(cas->expdHi == NULL);
3555 tl_assert(cas->dataHi == NULL);
3556
3557 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
3558 switch (elemTy) {
3559 case Ity_I8: elemSzB = 1; opCmpEQ = Iop_CmpEQ8; break;
3560 case Ity_I16: elemSzB = 2; opCmpEQ = Iop_CmpEQ16; break;
3561 case Ity_I32: elemSzB = 4; opCmpEQ = Iop_CmpEQ32; break;
3562 case Ity_I64: elemSzB = 8; opCmpEQ = Iop_CmpEQ64; break;
3563 default: tl_assert(0); /* IR defn disallows any other types */
3564 }
3565
3566 /* 1. fetch data# (the proposed new value) */
3567 tl_assert(isOriginalAtom(mce, cas->dataLo));
3568 vdataLo
3569 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
3570 tl_assert(isShadowAtom(mce, vdataLo));
3571 if (otrak) {
3572 bdataLo
3573 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
3574 tl_assert(isShadowAtom(mce, bdataLo));
3575 }
3576
3577 /* 2. fetch expected# (what we expect to see at the address) */
3578 tl_assert(isOriginalAtom(mce, cas->expdLo));
3579 vexpdLo
3580 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
3581 tl_assert(isShadowAtom(mce, vexpdLo));
3582 if (otrak) {
3583 bexpdLo
3584 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
3585 tl_assert(isShadowAtom(mce, bexpdLo));
3586 }
3587
3588 /* 3. check definedness of address */
3589 /* 4. fetch old# from shadow memory; this also checks
3590 addressibility of the address */
3591 voldLo
3592 = assignNew(
3593 'V', mce, elemTy,
3594 expr2vbits_Load(
3595 mce,
3596 cas->end, elemTy, cas->addr, 0/*Addr bias*/
3597 ));
3598 if (otrak) {
3599 boldLo
3600 = assignNew('B', mce, Ity_I32,
3601 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
3602 }
3603
3604 /* 5. the CAS itself */
3605 stmt( 'C', mce, IRStmt_CAS(cas) );
3606
3607 /* 6. complain if "expected == old" is undefined */
3608 /* Doing this directly interacts in a complex way with origin
3609 tracking. Much easier to make up an expression tree and hand
3610 that off to expr2vbits_Binop. We will need the expression
3611 tree in any case in order to decide whether or not to do a
3612 shadow store. */
3613 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
3614 tree, but it's not copied from the input block. */
3615 expd_eq_old
3616 = assignNew('C', mce, Ity_I1,
3617 binop(opCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
3618
3619 /* Compute into expd_eq_old_V the definedness for expd_eq_old.
3620 First we need to ensure that cas->oldLo's V-shadow is bound
3621 voldLo, since expr2vbits_Binop will generate a use of it. */
3622 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
3623 expd_eq_old_V
3624 = expr2vbits_Binop( mce, opCmpEQ, cas->expdLo, mkexpr(cas->oldLo) );
3625 if (otrak) {
3626 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
3627 expd_eq_old_B
3628 = gen_maxU32( mce, bexpdLo, boldLo );
3629 }
3630
3631 /* Generate a complaint if expd_eq_old is undefined. As above,
3632 first force expd_eq_old's definedness to be bound to its
3633 V-shadow tmp. */
3634 bind_shadow_tmp_to_orig('V', mce, expd_eq_old, expd_eq_old_V);
3635 if (otrak)
3636 bind_shadow_tmp_to_orig('B', mce, expd_eq_old, expd_eq_old_B);
3637 complainIfUndefined(mce, expd_eq_old);
3638
3639 /* 7. if "expected == old"
3640 store data# to shadow memory */
3641 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
3642 NULL/*data*/, vdataLo/*vdata*/,
3643 expd_eq_old/*guard for store*/ );
3644 if (otrak) {
3645 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
3646 bdataLo/*bdata*/,
3647 expd_eq_old/*guard for store*/ );
3648 }
3649}
3650
3651
3652static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
3653{
3654 IRAtom *vdataHi = NULL, *bdataHi = NULL;
3655 IRAtom *vdataLo = NULL, *bdataLo = NULL;
3656 IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
3657 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
3658 IRAtom *voldHi = NULL, *boldHi = NULL;
3659 IRAtom *voldLo = NULL, *boldLo = NULL;
3660 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
3661 IRAtom *xHi_V = NULL, *xLo_V = NULL, *xHL_V = NULL;
3662 IRAtom *xHi_B = NULL, *xLo_B = NULL, *xHL_B = NULL;
3663 IRAtom *expd_eq_old_V = NULL, *expd_eq_old_B = NULL;
3664 IRAtom *expd_eq_old = NULL, *zero = NULL;
3665 IROp opCmpEQ, opOr, opXor;
3666 Int elemSzB, memOffsLo, memOffsHi;
3667 IRType elemTy;
3668 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
3669
3670 /* double CAS */
3671 tl_assert(cas->oldHi != IRTemp_INVALID);
3672 tl_assert(cas->expdHi != NULL);
3673 tl_assert(cas->dataHi != NULL);
3674
3675 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
3676 switch (elemTy) {
3677 case Ity_I8:
3678 opCmpEQ = Iop_CmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
3679 elemSzB = 1; zero = mkU8(0);
3680 break;
3681 case Ity_I16:
3682 opCmpEQ = Iop_CmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
3683 elemSzB = 2; zero = mkU16(0);
3684 break;
3685 case Ity_I32:
3686 opCmpEQ = Iop_CmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
3687 elemSzB = 4; zero = mkU32(0);
3688 break;
3689 case Ity_I64:
3690 opCmpEQ = Iop_CmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
3691 elemSzB = 8; zero = mkU64(0);
3692 break;
3693 default:
3694 tl_assert(0); /* IR defn disallows any other types */
3695 }
3696
3697 /* 1. fetch data# (the proposed new value) */
3698 tl_assert(isOriginalAtom(mce, cas->dataHi));
3699 tl_assert(isOriginalAtom(mce, cas->dataLo));
3700 vdataHi
3701 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
3702 vdataLo
3703 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
3704 tl_assert(isShadowAtom(mce, vdataHi));
3705 tl_assert(isShadowAtom(mce, vdataLo));
3706 if (otrak) {
3707 bdataHi
3708 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
3709 bdataLo
3710 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
3711 tl_assert(isShadowAtom(mce, bdataHi));
3712 tl_assert(isShadowAtom(mce, bdataLo));
3713 }
3714
3715 /* 2. fetch expected# (what we expect to see at the address) */
3716 tl_assert(isOriginalAtom(mce, cas->expdHi));
3717 tl_assert(isOriginalAtom(mce, cas->expdLo));
3718 vexpdHi
3719 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
3720 vexpdLo
3721 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
3722 tl_assert(isShadowAtom(mce, vexpdHi));
3723 tl_assert(isShadowAtom(mce, vexpdLo));
3724 if (otrak) {
3725 bexpdHi
3726 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
3727 bexpdLo
3728 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
3729 tl_assert(isShadowAtom(mce, bexpdHi));
3730 tl_assert(isShadowAtom(mce, bexpdLo));
3731 }
3732
3733 /* 3. check definedness of address */
3734 /* 4. fetch old# from shadow memory; this also checks
3735 addressibility of the address */
3736 if (cas->end == Iend_LE) {
3737 memOffsLo = 0;
3738 memOffsHi = elemSzB;
3739 } else {
3740 tl_assert(cas->end == Iend_BE);
3741 memOffsLo = elemSzB;
3742 memOffsHi = 0;
3743 }
3744 voldHi
3745 = assignNew(
3746 'V', mce, elemTy,
3747 expr2vbits_Load(
3748 mce,
3749 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/
3750 ));
3751 voldLo
3752 = assignNew(
3753 'V', mce, elemTy,
3754 expr2vbits_Load(
3755 mce,
3756 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/
3757 ));
3758 if (otrak) {
3759 boldHi
3760 = assignNew('B', mce, Ity_I32,
3761 gen_load_b(mce, elemSzB, cas->addr,
3762 memOffsHi/*addr bias*/));
3763 boldLo
3764 = assignNew('B', mce, Ity_I32,
3765 gen_load_b(mce, elemSzB, cas->addr,
3766 memOffsLo/*addr bias*/));
3767 }
3768
3769 /* 5. the CAS itself */
3770 stmt( 'C', mce, IRStmt_CAS(cas) );
3771
3772 /* 6. complain if "expected == old" is undefined */
3773 /* Doing this directly interacts in a complex way with origin
3774 tracking. Much easier to make up an expression tree and hand
3775 that off to expr2vbits_Binop. We will need the expression
3776 tree in any case in order to decide whether or not to do a
3777 shadow store. */
3778 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
3779 tree, but it's not copied from the input block. */
3780 /*
3781 xHi = oldHi ^ expdHi;
3782 xLo = oldLo ^ expdLo;
3783 xHL = xHi | xLo;
3784 expd_eq_old = xHL == 0;
3785 */
3786
3787 /* --- xHi = oldHi ^ expdHi --- */
3788 xHi = assignNew('C', mce, elemTy,
3789 binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
3790 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
3791 xHi_V
3792 = expr2vbits_Binop( mce, opXor, cas->expdHi, mkexpr(cas->oldHi));
3793 if (otrak) {
3794 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
3795 xHi_B = gen_maxU32( mce, bexpdHi, boldHi );
3796 }
3797
3798 /* --- xLo = oldLo ^ expdLo --- */
3799 xLo = assignNew('C', mce, elemTy,
3800 binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
3801 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
3802 xLo_V
3803 = expr2vbits_Binop( mce, opXor, cas->expdLo, mkexpr(cas->oldLo));
3804 if (otrak) {
3805 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
3806 xLo_B = gen_maxU32( mce, bexpdLo, boldLo );
3807 }
3808
3809 /* --- xHL = xHi | xLo --- */
3810 xHL = assignNew('C', mce, elemTy,
3811 binop(opOr, xHi, xLo));
3812 bind_shadow_tmp_to_orig('V', mce, xHi, xHi_V);
3813 bind_shadow_tmp_to_orig('V', mce, xLo, xLo_V);
3814 xHL_V
3815 = expr2vbits_Binop( mce, opOr, xHi, xLo );
3816 if (otrak) {
3817 bind_shadow_tmp_to_orig('B', mce, xHi, xHi_B);
3818 bind_shadow_tmp_to_orig('B', mce, xLo, xLo_B);
3819 xHL_B = gen_maxU32( mce, xHi_B, xLo_B );
3820 }
3821
3822 /* --- expd_eq_old = xHL == 0 --- */
3823 expd_eq_old
3824 = assignNew('C', mce, Ity_I1,
3825 binop(opCmpEQ, xHL, zero));
3826 bind_shadow_tmp_to_orig('V', mce, xHL, xHL_V);
3827 expd_eq_old_V
3828 = expr2vbits_Binop( mce, opCmpEQ, xHL, zero);
3829 if (otrak) {
3830 expd_eq_old_B = xHL_B; /* since the zero literal isn't going to
3831 contribute any interesting origin */
3832 }
3833
3834 /* The backend's register allocator is probably on fire by now :-) */
3835 /* Generate a complaint if expd_eq_old is undefined. As above,
3836 first force expd_eq_old's definedness to be bound to its
3837 V-shadow tmp. */
3838 bind_shadow_tmp_to_orig('V', mce, expd_eq_old, expd_eq_old_V);
3839 if (otrak)
3840 bind_shadow_tmp_to_orig('B', mce, expd_eq_old, expd_eq_old_B);
3841 complainIfUndefined(mce, expd_eq_old);
3842
3843 /* 7. if "expected == old"
3844 store data# to shadow memory */
3845 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
3846 NULL/*data*/, vdataHi/*vdata*/,
3847 expd_eq_old/*guard for store*/ );
3848 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
3849 NULL/*data*/, vdataLo/*vdata*/,
3850 expd_eq_old/*guard for store*/ );
3851 if (otrak) {
3852 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
3853 bdataHi/*bdata*/,
3854 expd_eq_old/*guard for store*/ );
3855 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
3856 bdataLo/*bdata*/,
3857 expd_eq_old/*guard for store*/ );
3858 }
3859}
3860
3861
sewardj95448072004-11-22 20:19:51 +00003862/*------------------------------------------------------------*/
3863/*--- Memcheck main ---*/
3864/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00003865
sewardj7cf4e6b2008-05-01 20:24:26 +00003866static void schemeS ( MCEnv* mce, IRStmt* st );
3867
sewardj95448072004-11-22 20:19:51 +00003868static Bool isBogusAtom ( IRAtom* at )
njn25e49d8e72002-09-23 09:36:25 +00003869{
sewardj95448072004-11-22 20:19:51 +00003870 ULong n = 0;
3871 IRConst* con;
sewardj710d6c22005-03-20 18:55:15 +00003872 tl_assert(isIRAtom(at));
sewardj0b9d74a2006-12-24 02:24:11 +00003873 if (at->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +00003874 return False;
3875 tl_assert(at->tag == Iex_Const);
3876 con = at->Iex.Const.con;
3877 switch (con->tag) {
sewardjd5204dc2004-12-31 01:16:11 +00003878 case Ico_U1: return False;
3879 case Ico_U8: n = (ULong)con->Ico.U8; break;
3880 case Ico_U16: n = (ULong)con->Ico.U16; break;
3881 case Ico_U32: n = (ULong)con->Ico.U32; break;
3882 case Ico_U64: n = (ULong)con->Ico.U64; break;
3883 case Ico_F64: return False;
3884 case Ico_F64i: return False;
3885 case Ico_V128: return False;
sewardj95448072004-11-22 20:19:51 +00003886 default: ppIRExpr(at); tl_assert(0);
3887 }
3888 /* VG_(printf)("%llx\n", n); */
sewardj96a922e2005-04-23 23:26:29 +00003889 return (/*32*/ n == 0xFEFEFEFFULL
3890 /*32*/ || n == 0x80808080ULL
sewardj17b47432008-12-17 01:12:58 +00003891 /*32*/ || n == 0x7F7F7F7FULL
tomd9774d72005-06-27 08:11:01 +00003892 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
sewardj96a922e2005-04-23 23:26:29 +00003893 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
tomd9774d72005-06-27 08:11:01 +00003894 /*64*/ || n == 0x0000000000008080ULL
sewardj96a922e2005-04-23 23:26:29 +00003895 /*64*/ || n == 0x8080808080808080ULL
sewardj17b47432008-12-17 01:12:58 +00003896 /*64*/ || n == 0x0101010101010101ULL
sewardj96a922e2005-04-23 23:26:29 +00003897 );
sewardj95448072004-11-22 20:19:51 +00003898}
njn25e49d8e72002-09-23 09:36:25 +00003899
sewardj95448072004-11-22 20:19:51 +00003900static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
3901{
sewardjd5204dc2004-12-31 01:16:11 +00003902 Int i;
3903 IRExpr* e;
3904 IRDirty* d;
sewardj1c0ce7a2009-07-01 08:10:49 +00003905 IRCAS* cas;
sewardj95448072004-11-22 20:19:51 +00003906 switch (st->tag) {
sewardj0b9d74a2006-12-24 02:24:11 +00003907 case Ist_WrTmp:
3908 e = st->Ist.WrTmp.data;
sewardj95448072004-11-22 20:19:51 +00003909 switch (e->tag) {
3910 case Iex_Get:
sewardj0b9d74a2006-12-24 02:24:11 +00003911 case Iex_RdTmp:
sewardj95448072004-11-22 20:19:51 +00003912 return False;
sewardjd5204dc2004-12-31 01:16:11 +00003913 case Iex_Const:
3914 return isBogusAtom(e);
sewardj95448072004-11-22 20:19:51 +00003915 case Iex_Unop:
3916 return isBogusAtom(e->Iex.Unop.arg);
sewardjd5204dc2004-12-31 01:16:11 +00003917 case Iex_GetI:
3918 return isBogusAtom(e->Iex.GetI.ix);
sewardj95448072004-11-22 20:19:51 +00003919 case Iex_Binop:
3920 return isBogusAtom(e->Iex.Binop.arg1)
3921 || isBogusAtom(e->Iex.Binop.arg2);
sewardjed69fdb2006-02-03 16:12:27 +00003922 case Iex_Triop:
3923 return isBogusAtom(e->Iex.Triop.arg1)
3924 || isBogusAtom(e->Iex.Triop.arg2)
3925 || isBogusAtom(e->Iex.Triop.arg3);
sewardje91cea72006-02-08 19:32:02 +00003926 case Iex_Qop:
3927 return isBogusAtom(e->Iex.Qop.arg1)
3928 || isBogusAtom(e->Iex.Qop.arg2)
3929 || isBogusAtom(e->Iex.Qop.arg3)
3930 || isBogusAtom(e->Iex.Qop.arg4);
sewardj95448072004-11-22 20:19:51 +00003931 case Iex_Mux0X:
3932 return isBogusAtom(e->Iex.Mux0X.cond)
3933 || isBogusAtom(e->Iex.Mux0X.expr0)
3934 || isBogusAtom(e->Iex.Mux0X.exprX);
sewardj2e595852005-06-30 23:33:37 +00003935 case Iex_Load:
3936 return isBogusAtom(e->Iex.Load.addr);
sewardj95448072004-11-22 20:19:51 +00003937 case Iex_CCall:
3938 for (i = 0; e->Iex.CCall.args[i]; i++)
3939 if (isBogusAtom(e->Iex.CCall.args[i]))
3940 return True;
3941 return False;
3942 default:
3943 goto unhandled;
3944 }
sewardjd5204dc2004-12-31 01:16:11 +00003945 case Ist_Dirty:
3946 d = st->Ist.Dirty.details;
3947 for (i = 0; d->args[i]; i++)
3948 if (isBogusAtom(d->args[i]))
3949 return True;
3950 if (d->guard && isBogusAtom(d->guard))
3951 return True;
3952 if (d->mAddr && isBogusAtom(d->mAddr))
3953 return True;
3954 return False;
sewardj95448072004-11-22 20:19:51 +00003955 case Ist_Put:
3956 return isBogusAtom(st->Ist.Put.data);
sewardjd5204dc2004-12-31 01:16:11 +00003957 case Ist_PutI:
3958 return isBogusAtom(st->Ist.PutI.ix)
3959 || isBogusAtom(st->Ist.PutI.data);
sewardj2e595852005-06-30 23:33:37 +00003960 case Ist_Store:
3961 return isBogusAtom(st->Ist.Store.addr)
3962 || isBogusAtom(st->Ist.Store.data);
sewardj95448072004-11-22 20:19:51 +00003963 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00003964 return isBogusAtom(st->Ist.Exit.guard);
sewardj826ec492005-05-12 18:05:00 +00003965 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00003966 return isBogusAtom(st->Ist.AbiHint.base)
3967 || isBogusAtom(st->Ist.AbiHint.nia);
sewardj21dc3452005-03-21 00:27:41 +00003968 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00003969 case Ist_IMark:
sewardj72d75132007-11-09 23:06:35 +00003970 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00003971 return False;
sewardj1c0ce7a2009-07-01 08:10:49 +00003972 case Ist_CAS:
3973 cas = st->Ist.CAS.details;
3974 return isBogusAtom(cas->addr)
3975 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
3976 || isBogusAtom(cas->expdLo)
3977 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
3978 || isBogusAtom(cas->dataLo);
sewardj95448072004-11-22 20:19:51 +00003979 default:
3980 unhandled:
3981 ppIRStmt(st);
3982 VG_(tool_panic)("hasBogusLiterals");
3983 }
3984}
njn25e49d8e72002-09-23 09:36:25 +00003985
njn25e49d8e72002-09-23 09:36:25 +00003986
sewardj0b9d74a2006-12-24 02:24:11 +00003987IRSB* MC_(instrument) ( VgCallbackClosure* closure,
sewardj1c0ce7a2009-07-01 08:10:49 +00003988 IRSB* sb_in,
sewardj461df9c2006-01-17 02:06:39 +00003989 VexGuestLayout* layout,
3990 VexGuestExtents* vge,
sewardjd54babf2005-03-21 00:55:49 +00003991 IRType gWordTy, IRType hWordTy )
sewardj95448072004-11-22 20:19:51 +00003992{
sewardj7cf4e6b2008-05-01 20:24:26 +00003993 Bool verboze = 0||False;
sewardj151b90d2005-07-06 19:42:23 +00003994 Bool bogus;
sewardjd5204dc2004-12-31 01:16:11 +00003995 Int i, j, first_stmt;
sewardj95448072004-11-22 20:19:51 +00003996 IRStmt* st;
sewardjd5204dc2004-12-31 01:16:11 +00003997 MCEnv mce;
sewardj1c0ce7a2009-07-01 08:10:49 +00003998 IRSB* sb_out;
sewardjd54babf2005-03-21 00:55:49 +00003999
4000 if (gWordTy != hWordTy) {
4001 /* We don't currently support this case. */
4002 VG_(tool_panic)("host/guest word size mismatch");
4003 }
njn25e49d8e72002-09-23 09:36:25 +00004004
sewardj6cf40ff2005-04-20 22:31:26 +00004005 /* Check we're not completely nuts */
sewardj7cf4e6b2008-05-01 20:24:26 +00004006 tl_assert(sizeof(UWord) == sizeof(void*));
4007 tl_assert(sizeof(Word) == sizeof(void*));
4008 tl_assert(sizeof(Addr) == sizeof(void*));
4009 tl_assert(sizeof(ULong) == 8);
4010 tl_assert(sizeof(Long) == 8);
4011 tl_assert(sizeof(Addr64) == 8);
4012 tl_assert(sizeof(UInt) == 4);
4013 tl_assert(sizeof(Int) == 4);
4014
4015 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
sewardj6cf40ff2005-04-20 22:31:26 +00004016
sewardj0b9d74a2006-12-24 02:24:11 +00004017 /* Set up SB */
sewardj1c0ce7a2009-07-01 08:10:49 +00004018 sb_out = deepCopyIRSBExceptStmts(sb_in);
njn25e49d8e72002-09-23 09:36:25 +00004019
sewardj1c0ce7a2009-07-01 08:10:49 +00004020 /* Set up the running environment. Both .sb and .tmpMap are
4021 modified as we go along. Note that tmps are added to both
4022 .sb->tyenv and .tmpMap together, so the valid index-set for
4023 those two arrays should always be identical. */
4024 VG_(memset)(&mce, 0, sizeof(mce));
4025 mce.sb = sb_out;
sewardj7cf4e6b2008-05-01 20:24:26 +00004026 mce.trace = verboze;
sewardj95448072004-11-22 20:19:51 +00004027 mce.layout = layout;
sewardj95448072004-11-22 20:19:51 +00004028 mce.hWordTy = hWordTy;
sewardjd5204dc2004-12-31 01:16:11 +00004029 mce.bogusLiterals = False;
sewardj1c0ce7a2009-07-01 08:10:49 +00004030
4031 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
4032 sizeof(TempMapEnt));
4033 for (i = 0; i < sb_in->tyenv->types_used; i++) {
4034 TempMapEnt ent;
4035 ent.kind = Orig;
4036 ent.shadowV = IRTemp_INVALID;
4037 ent.shadowB = IRTemp_INVALID;
4038 VG_(addToXA)( mce.tmpMap, &ent );
sewardj7cf4e6b2008-05-01 20:24:26 +00004039 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004040 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
sewardj95448072004-11-22 20:19:51 +00004041
sewardj151b90d2005-07-06 19:42:23 +00004042 /* Make a preliminary inspection of the statements, to see if there
4043 are any dodgy-looking literals. If there are, we generate
4044 extra-detailed (hence extra-expensive) instrumentation in
4045 places. Scan the whole bb even if dodgyness is found earlier,
4046 so that the flatness assertion is applied to all stmts. */
4047
4048 bogus = False;
sewardj95448072004-11-22 20:19:51 +00004049
sewardj1c0ce7a2009-07-01 08:10:49 +00004050 for (i = 0; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00004051
sewardj1c0ce7a2009-07-01 08:10:49 +00004052 st = sb_in->stmts[i];
sewardj21dc3452005-03-21 00:27:41 +00004053 tl_assert(st);
sewardj95448072004-11-22 20:19:51 +00004054 tl_assert(isFlatIRStmt(st));
4055
sewardj151b90d2005-07-06 19:42:23 +00004056 if (!bogus) {
4057 bogus = checkForBogusLiterals(st);
4058 if (0 && bogus) {
sewardj95448072004-11-22 20:19:51 +00004059 VG_(printf)("bogus: ");
4060 ppIRStmt(st);
4061 VG_(printf)("\n");
4062 }
4063 }
sewardjd5204dc2004-12-31 01:16:11 +00004064
sewardj151b90d2005-07-06 19:42:23 +00004065 }
4066
4067 mce.bogusLiterals = bogus;
4068
sewardja0871482006-10-18 12:41:55 +00004069 /* Copy verbatim any IR preamble preceding the first IMark */
sewardj151b90d2005-07-06 19:42:23 +00004070
sewardj1c0ce7a2009-07-01 08:10:49 +00004071 tl_assert(mce.sb == sb_out);
4072 tl_assert(mce.sb != sb_in);
sewardjf1962d32006-10-19 13:22:16 +00004073
sewardja0871482006-10-18 12:41:55 +00004074 i = 0;
sewardj1c0ce7a2009-07-01 08:10:49 +00004075 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
sewardja0871482006-10-18 12:41:55 +00004076
sewardj1c0ce7a2009-07-01 08:10:49 +00004077 st = sb_in->stmts[i];
sewardja0871482006-10-18 12:41:55 +00004078 tl_assert(st);
4079 tl_assert(isFlatIRStmt(st));
4080
sewardj1c0ce7a2009-07-01 08:10:49 +00004081 stmt( 'C', &mce, sb_in->stmts[i] );
sewardja0871482006-10-18 12:41:55 +00004082 i++;
4083 }
4084
sewardjf1962d32006-10-19 13:22:16 +00004085 /* Nasty problem. IR optimisation of the pre-instrumented IR may
4086 cause the IR following the preamble to contain references to IR
4087 temporaries defined in the preamble. Because the preamble isn't
4088 instrumented, these temporaries don't have any shadows.
4089 Nevertheless uses of them following the preamble will cause
4090 memcheck to generate references to their shadows. End effect is
4091 to cause IR sanity check failures, due to references to
4092 non-existent shadows. This is only evident for the complex
4093 preambles used for function wrapping on TOC-afflicted platforms
4094 (ppc64-linux, ppc32-aix5, ppc64-aix5).
4095
4096 The following loop therefore scans the preamble looking for
4097 assignments to temporaries. For each one found it creates an
sewardjafa617b2008-07-22 09:59:48 +00004098 assignment to the corresponding (V) shadow temp, marking it as
sewardjf1962d32006-10-19 13:22:16 +00004099 'defined'. This is the same resulting IR as if the main
4100 instrumentation loop before had been applied to the statement
4101 'tmp = CONSTANT'.
sewardjafa617b2008-07-22 09:59:48 +00004102
4103 Similarly, if origin tracking is enabled, we must generate an
4104 assignment for the corresponding origin (B) shadow, claiming
4105 no-origin, as appropriate for a defined value.
sewardjf1962d32006-10-19 13:22:16 +00004106 */
4107 for (j = 0; j < i; j++) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004108 if (sb_in->stmts[j]->tag == Ist_WrTmp) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004109 /* findShadowTmpV checks its arg is an original tmp;
sewardjf1962d32006-10-19 13:22:16 +00004110 no need to assert that here. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004111 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
sewardjafa617b2008-07-22 09:59:48 +00004112 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00004113 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
sewardjafa617b2008-07-22 09:59:48 +00004114 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
4115 if (MC_(clo_mc_level) == 3) {
4116 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00004117 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
sewardjafa617b2008-07-22 09:59:48 +00004118 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
4119 }
sewardjf1962d32006-10-19 13:22:16 +00004120 if (0) {
sewardjafa617b2008-07-22 09:59:48 +00004121 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
4122 ppIRType( ty_v );
sewardjf1962d32006-10-19 13:22:16 +00004123 VG_(printf)("\n");
4124 }
4125 }
4126 }
4127
sewardja0871482006-10-18 12:41:55 +00004128 /* Iterate over the remaining stmts to generate instrumentation. */
4129
sewardj1c0ce7a2009-07-01 08:10:49 +00004130 tl_assert(sb_in->stmts_used > 0);
sewardja0871482006-10-18 12:41:55 +00004131 tl_assert(i >= 0);
sewardj1c0ce7a2009-07-01 08:10:49 +00004132 tl_assert(i < sb_in->stmts_used);
4133 tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
sewardja0871482006-10-18 12:41:55 +00004134
sewardj1c0ce7a2009-07-01 08:10:49 +00004135 for (/* use current i*/; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00004136
sewardj1c0ce7a2009-07-01 08:10:49 +00004137 st = sb_in->stmts[i];
4138 first_stmt = sb_out->stmts_used;
sewardj95448072004-11-22 20:19:51 +00004139
4140 if (verboze) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004141 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00004142 ppIRStmt(st);
sewardj7cf4e6b2008-05-01 20:24:26 +00004143 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00004144 }
4145
sewardj1c0ce7a2009-07-01 08:10:49 +00004146 if (MC_(clo_mc_level) == 3) {
4147 /* See comments on case Ist_CAS below. */
4148 if (st->tag != Ist_CAS)
4149 schemeS( &mce, st );
4150 }
sewardj7cf4e6b2008-05-01 20:24:26 +00004151
sewardj29faa502005-03-16 18:20:21 +00004152 /* Generate instrumentation code for each stmt ... */
4153
sewardj95448072004-11-22 20:19:51 +00004154 switch (st->tag) {
4155
sewardj0b9d74a2006-12-24 02:24:11 +00004156 case Ist_WrTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00004157 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
4158 expr2vbits( &mce, st->Ist.WrTmp.data) );
njn25e49d8e72002-09-23 09:36:25 +00004159 break;
4160
sewardj95448072004-11-22 20:19:51 +00004161 case Ist_Put:
4162 do_shadow_PUT( &mce,
4163 st->Ist.Put.offset,
4164 st->Ist.Put.data,
4165 NULL /* shadow atom */ );
njn25e49d8e72002-09-23 09:36:25 +00004166 break;
4167
sewardj95448072004-11-22 20:19:51 +00004168 case Ist_PutI:
4169 do_shadow_PUTI( &mce,
4170 st->Ist.PutI.descr,
4171 st->Ist.PutI.ix,
4172 st->Ist.PutI.bias,
4173 st->Ist.PutI.data );
njn25e49d8e72002-09-23 09:36:25 +00004174 break;
4175
sewardj2e595852005-06-30 23:33:37 +00004176 case Ist_Store:
4177 do_shadow_Store( &mce, st->Ist.Store.end,
4178 st->Ist.Store.addr, 0/* addr bias */,
4179 st->Ist.Store.data,
sewardj1c0ce7a2009-07-01 08:10:49 +00004180 NULL /* shadow data */,
4181 NULL/*guard*/ );
4182 /* If this is a store conditional, it writes to .resSC a
4183 value indicating whether or not the store succeeded.
4184 Just claim this value is always defined. In the
4185 PowerPC interpretation of store-conditional,
4186 definedness of the success indication depends on
4187 whether the address of the store matches the
4188 reservation address. But we can't tell that here (and
4189 anyway, we're not being PowerPC-specific). At least we
4190 are guarantted that the definedness of the store
4191 address, and its addressibility, will be checked as per
4192 normal. So it seems pretty safe to just say that the
4193 success indication is always defined.
4194
4195 In schemeS, for origin tracking, we must
4196 correspondingly set a no-origin value for the origin
4197 shadow of resSC.
4198 */
4199 if (st->Ist.Store.resSC != IRTemp_INVALID) {
4200 assign( 'V', &mce,
4201 findShadowTmpV(&mce, st->Ist.Store.resSC),
4202 definedOfType(
4203 shadowTypeV(
4204 typeOfIRTemp(mce.sb->tyenv,
4205 st->Ist.Store.resSC)
4206 )));
4207 }
njn25e49d8e72002-09-23 09:36:25 +00004208 break;
4209
sewardj95448072004-11-22 20:19:51 +00004210 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00004211 complainIfUndefined( &mce, st->Ist.Exit.guard );
njn25e49d8e72002-09-23 09:36:25 +00004212 break;
4213
sewardj29faa502005-03-16 18:20:21 +00004214 case Ist_IMark:
sewardj7cf4e6b2008-05-01 20:24:26 +00004215 break;
4216
4217 case Ist_NoOp:
sewardj72d75132007-11-09 23:06:35 +00004218 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00004219 break;
4220
sewardj95448072004-11-22 20:19:51 +00004221 case Ist_Dirty:
4222 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
njn25e49d8e72002-09-23 09:36:25 +00004223 break;
4224
sewardj826ec492005-05-12 18:05:00 +00004225 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00004226 do_AbiHint( &mce, st->Ist.AbiHint.base,
4227 st->Ist.AbiHint.len,
4228 st->Ist.AbiHint.nia );
sewardj826ec492005-05-12 18:05:00 +00004229 break;
4230
sewardj1c0ce7a2009-07-01 08:10:49 +00004231 case Ist_CAS:
4232 do_shadow_CAS( &mce, st->Ist.CAS.details );
4233 /* Note, do_shadow_CAS copies the CAS itself to the output
4234 block, because it needs to add instrumentation both
4235 before and after it. Hence skip the copy below. Also
4236 skip the origin-tracking stuff (call to schemeS) above,
4237 since that's all tangled up with it too; do_shadow_CAS
4238 does it all. */
4239 break;
4240
njn25e49d8e72002-09-23 09:36:25 +00004241 default:
sewardj95448072004-11-22 20:19:51 +00004242 VG_(printf)("\n");
4243 ppIRStmt(st);
4244 VG_(printf)("\n");
4245 VG_(tool_panic)("memcheck: unhandled IRStmt");
4246
4247 } /* switch (st->tag) */
4248
sewardj7cf4e6b2008-05-01 20:24:26 +00004249 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004250 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00004251 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00004252 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00004253 VG_(printf)("\n");
4254 }
4255 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00004256 }
sewardj95448072004-11-22 20:19:51 +00004257
sewardj1c0ce7a2009-07-01 08:10:49 +00004258 /* ... and finally copy the stmt itself to the output. Except,
4259 skip the copy of IRCASs; see comments on case Ist_CAS
4260 above. */
4261 if (st->tag != Ist_CAS)
4262 stmt('C', &mce, st);
njn25e49d8e72002-09-23 09:36:25 +00004263 }
njn25e49d8e72002-09-23 09:36:25 +00004264
sewardj95448072004-11-22 20:19:51 +00004265 /* Now we need to complain if the jump target is undefined. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004266 first_stmt = sb_out->stmts_used;
njn25e49d8e72002-09-23 09:36:25 +00004267
sewardj95448072004-11-22 20:19:51 +00004268 if (verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004269 VG_(printf)("sb_in->next = ");
4270 ppIRExpr(sb_in->next);
sewardj95448072004-11-22 20:19:51 +00004271 VG_(printf)("\n\n");
4272 }
njn25e49d8e72002-09-23 09:36:25 +00004273
sewardj1c0ce7a2009-07-01 08:10:49 +00004274 complainIfUndefined( &mce, sb_in->next );
njn25e49d8e72002-09-23 09:36:25 +00004275
sewardj7cf4e6b2008-05-01 20:24:26 +00004276 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004277 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00004278 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00004279 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00004280 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00004281 }
sewardj95448072004-11-22 20:19:51 +00004282 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00004283 }
njn25e49d8e72002-09-23 09:36:25 +00004284
sewardj1c0ce7a2009-07-01 08:10:49 +00004285 /* If this fails, there's been some serious snafu with tmp management,
4286 that should be investigated. */
4287 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
4288 VG_(deleteXA)( mce.tmpMap );
4289
4290 tl_assert(mce.sb == sb_out);
4291 return sb_out;
sewardj95448072004-11-22 20:19:51 +00004292}
njn25e49d8e72002-09-23 09:36:25 +00004293
sewardj81651dc2007-08-28 06:05:20 +00004294/*------------------------------------------------------------*/
4295/*--- Post-tree-build final tidying ---*/
4296/*------------------------------------------------------------*/
4297
4298/* This exploits the observation that Memcheck often produces
4299 repeated conditional calls of the form
4300
sewardj7cf4e6b2008-05-01 20:24:26 +00004301 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
sewardj81651dc2007-08-28 06:05:20 +00004302
4303 with the same guard expression G guarding the same helper call.
4304 The second and subsequent calls are redundant. This usually
4305 results from instrumentation of guest code containing multiple
4306 memory references at different constant offsets from the same base
4307 register. After optimisation of the instrumentation, you get a
4308 test for the definedness of the base register for each memory
4309 reference, which is kinda pointless. MC_(final_tidy) therefore
4310 looks for such repeated calls and removes all but the first. */
4311
4312/* A struct for recording which (helper, guard) pairs we have already
4313 seen. */
4314typedef
4315 struct { void* entry; IRExpr* guard; }
4316 Pair;
4317
4318/* Return True if e1 and e2 definitely denote the same value (used to
4319 compare guards). Return False if unknown; False is the safe
4320 answer. Since guest registers and guest memory do not have the
4321 SSA property we must return False if any Gets or Loads appear in
4322 the expression. */
4323
4324static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
4325{
4326 if (e1->tag != e2->tag)
4327 return False;
4328 switch (e1->tag) {
4329 case Iex_Const:
4330 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
4331 case Iex_Binop:
4332 return e1->Iex.Binop.op == e2->Iex.Binop.op
4333 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
4334 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
4335 case Iex_Unop:
4336 return e1->Iex.Unop.op == e2->Iex.Unop.op
4337 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
4338 case Iex_RdTmp:
4339 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
4340 case Iex_Mux0X:
4341 return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
4342 && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
4343 && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX );
4344 case Iex_Qop:
4345 case Iex_Triop:
4346 case Iex_CCall:
4347 /* be lazy. Could define equality for these, but they never
4348 appear to be used. */
4349 return False;
4350 case Iex_Get:
4351 case Iex_GetI:
4352 case Iex_Load:
4353 /* be conservative - these may not give the same value each
4354 time */
4355 return False;
4356 case Iex_Binder:
4357 /* should never see this */
4358 /* fallthrough */
4359 default:
4360 VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
4361 ppIRExpr(e1);
4362 VG_(tool_panic)("memcheck:sameIRValue");
4363 return False;
4364 }
4365}
4366
4367/* See if 'pairs' already has an entry for (entry, guard). Return
4368 True if so. If not, add an entry. */
4369
4370static
4371Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
4372{
4373 Pair p;
4374 Pair* pp;
4375 Int i, n = VG_(sizeXA)( pairs );
4376 for (i = 0; i < n; i++) {
4377 pp = VG_(indexXA)( pairs, i );
4378 if (pp->entry == entry && sameIRValue(pp->guard, guard))
4379 return True;
4380 }
4381 p.guard = guard;
4382 p.entry = entry;
4383 VG_(addToXA)( pairs, &p );
4384 return False;
4385}
4386
4387static Bool is_helperc_value_checkN_fail ( HChar* name )
4388{
4389 return
sewardj7cf4e6b2008-05-01 20:24:26 +00004390 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
4391 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
4392 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
4393 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
4394 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
4395 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
4396 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
4397 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
sewardj81651dc2007-08-28 06:05:20 +00004398}
4399
4400IRSB* MC_(final_tidy) ( IRSB* sb_in )
4401{
4402 Int i;
4403 IRStmt* st;
4404 IRDirty* di;
4405 IRExpr* guard;
4406 IRCallee* cee;
4407 Bool alreadyPresent;
sewardj9c606bd2008-09-18 18:12:50 +00004408 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
4409 VG_(free), sizeof(Pair) );
sewardj81651dc2007-08-28 06:05:20 +00004410 /* Scan forwards through the statements. Each time a call to one
4411 of the relevant helpers is seen, check if we have made a
4412 previous call to the same helper using the same guard
4413 expression, and if so, delete the call. */
4414 for (i = 0; i < sb_in->stmts_used; i++) {
4415 st = sb_in->stmts[i];
4416 tl_assert(st);
4417 if (st->tag != Ist_Dirty)
4418 continue;
4419 di = st->Ist.Dirty.details;
4420 guard = di->guard;
4421 if (!guard)
4422 continue;
4423 if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
4424 cee = di->cee;
4425 if (!is_helperc_value_checkN_fail( cee->name ))
4426 continue;
4427 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
4428 guard 'guard'. Check if we have already seen a call to this
4429 function with the same guard. If so, delete it. If not,
4430 add it to the set of calls we do know about. */
4431 alreadyPresent = check_or_add( pairs, guard, cee->addr );
4432 if (alreadyPresent) {
4433 sb_in->stmts[i] = IRStmt_NoOp();
4434 if (0) VG_(printf)("XX\n");
4435 }
4436 }
4437 VG_(deleteXA)( pairs );
4438 return sb_in;
4439}
4440
4441
sewardj7cf4e6b2008-05-01 20:24:26 +00004442/*------------------------------------------------------------*/
4443/*--- Origin tracking stuff ---*/
4444/*------------------------------------------------------------*/
4445
sewardj1c0ce7a2009-07-01 08:10:49 +00004446/* Almost identical to findShadowTmpV. */
sewardj7cf4e6b2008-05-01 20:24:26 +00004447static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
4448{
sewardj1c0ce7a2009-07-01 08:10:49 +00004449 TempMapEnt* ent;
4450 /* VG_(indexXA) range-checks 'orig', hence no need to check
4451 here. */
4452 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
4453 tl_assert(ent->kind == Orig);
4454 if (ent->shadowB == IRTemp_INVALID) {
4455 IRTemp tmpB
4456 = newTemp( mce, Ity_I32, BSh );
4457 /* newTemp may cause mce->tmpMap to resize, hence previous results
4458 from VG_(indexXA) are invalid. */
4459 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
4460 tl_assert(ent->kind == Orig);
4461 tl_assert(ent->shadowB == IRTemp_INVALID);
4462 ent->shadowB = tmpB;
sewardj7cf4e6b2008-05-01 20:24:26 +00004463 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004464 return ent->shadowB;
sewardj7cf4e6b2008-05-01 20:24:26 +00004465}
4466
4467static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
4468{
4469 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
4470}
4471
4472static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
4473 IRAtom* baseaddr, Int offset )
4474{
4475 void* hFun;
4476 HChar* hName;
4477 IRTemp bTmp;
4478 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00004479 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00004480 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
4481 IRAtom* ea = baseaddr;
4482 if (offset != 0) {
4483 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
4484 : mkU64( (Long)(Int)offset );
4485 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
4486 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004487 bTmp = newTemp(mce, mce->hWordTy, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00004488
4489 switch (szB) {
4490 case 1: hFun = (void*)&MC_(helperc_b_load1);
4491 hName = "MC_(helperc_b_load1)";
4492 break;
4493 case 2: hFun = (void*)&MC_(helperc_b_load2);
4494 hName = "MC_(helperc_b_load2)";
4495 break;
4496 case 4: hFun = (void*)&MC_(helperc_b_load4);
4497 hName = "MC_(helperc_b_load4)";
4498 break;
4499 case 8: hFun = (void*)&MC_(helperc_b_load8);
4500 hName = "MC_(helperc_b_load8)";
4501 break;
4502 case 16: hFun = (void*)&MC_(helperc_b_load16);
4503 hName = "MC_(helperc_b_load16)";
4504 break;
4505 default:
4506 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
4507 tl_assert(0);
4508 }
4509 di = unsafeIRDirty_1_N(
4510 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
4511 mkIRExprVec_1( ea )
4512 );
4513 /* no need to mess with any annotations. This call accesses
4514 neither guest state nor guest memory. */
4515 stmt( 'B', mce, IRStmt_Dirty(di) );
4516 if (mce->hWordTy == Ity_I64) {
4517 /* 64-bit host */
sewardj1c0ce7a2009-07-01 08:10:49 +00004518 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00004519 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
4520 return mkexpr(bTmp32);
4521 } else {
4522 /* 32-bit host */
4523 return mkexpr(bTmp);
4524 }
4525}
sewardj1c0ce7a2009-07-01 08:10:49 +00004526
4527/* Generate a shadow store. guard :: Ity_I1 controls whether the
4528 store really happens; NULL means it unconditionally does. */
sewardj7cf4e6b2008-05-01 20:24:26 +00004529static void gen_store_b ( MCEnv* mce, Int szB,
sewardj1c0ce7a2009-07-01 08:10:49 +00004530 IRAtom* baseaddr, Int offset, IRAtom* dataB,
4531 IRAtom* guard )
sewardj7cf4e6b2008-05-01 20:24:26 +00004532{
4533 void* hFun;
4534 HChar* hName;
4535 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00004536 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00004537 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
4538 IRAtom* ea = baseaddr;
sewardj1c0ce7a2009-07-01 08:10:49 +00004539 if (guard) {
4540 tl_assert(isOriginalAtom(mce, guard));
4541 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
4542 }
sewardj7cf4e6b2008-05-01 20:24:26 +00004543 if (offset != 0) {
4544 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
4545 : mkU64( (Long)(Int)offset );
4546 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
4547 }
4548 if (mce->hWordTy == Ity_I64)
4549 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
4550
4551 switch (szB) {
4552 case 1: hFun = (void*)&MC_(helperc_b_store1);
4553 hName = "MC_(helperc_b_store1)";
4554 break;
4555 case 2: hFun = (void*)&MC_(helperc_b_store2);
4556 hName = "MC_(helperc_b_store2)";
4557 break;
4558 case 4: hFun = (void*)&MC_(helperc_b_store4);
4559 hName = "MC_(helperc_b_store4)";
4560 break;
4561 case 8: hFun = (void*)&MC_(helperc_b_store8);
4562 hName = "MC_(helperc_b_store8)";
4563 break;
4564 case 16: hFun = (void*)&MC_(helperc_b_store16);
4565 hName = "MC_(helperc_b_store16)";
4566 break;
4567 default:
4568 tl_assert(0);
4569 }
4570 di = unsafeIRDirty_0_N( 2/*regparms*/,
4571 hName, VG_(fnptr_to_fnentry)( hFun ),
4572 mkIRExprVec_2( ea, dataB )
4573 );
4574 /* no need to mess with any annotations. This call accesses
4575 neither guest state nor guest memory. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004576 if (guard) di->guard = guard;
sewardj7cf4e6b2008-05-01 20:24:26 +00004577 stmt( 'B', mce, IRStmt_Dirty(di) );
4578}
4579
4580static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004581 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00004582 if (eTy == Ity_I64)
4583 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
4584 if (eTy == Ity_I32)
4585 return e;
4586 tl_assert(0);
4587}
4588
4589static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004590 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00004591 tl_assert(eTy == Ity_I32);
4592 if (dstTy == Ity_I64)
4593 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
4594 tl_assert(0);
4595}
4596
4597static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
4598{
4599 tl_assert(MC_(clo_mc_level) == 3);
4600
4601 switch (e->tag) {
4602
4603 case Iex_GetI: {
4604 IRRegArray* descr_b;
4605 IRAtom *t1, *t2, *t3, *t4;
4606 IRRegArray* descr = e->Iex.GetI.descr;
4607 IRType equivIntTy
4608 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
4609 /* If this array is unshadowable for whatever reason, use the
4610 usual approximation. */
4611 if (equivIntTy == Ity_INVALID)
4612 return mkU32(0);
4613 tl_assert(sizeofIRType(equivIntTy) >= 4);
4614 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
4615 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
4616 equivIntTy, descr->nElems );
4617 /* Do a shadow indexed get of the same size, giving t1. Take
4618 the bottom 32 bits of it, giving t2. Compute into t3 the
4619 origin for the index (almost certainly zero, but there's
4620 no harm in being completely general here, since iropt will
4621 remove any useless code), and fold it in, giving a final
4622 value t4. */
4623 t1 = assignNew( 'B', mce, equivIntTy,
4624 IRExpr_GetI( descr_b, e->Iex.GetI.ix,
4625 e->Iex.GetI.bias ));
4626 t2 = narrowTo32( mce, t1 );
4627 t3 = schemeE( mce, e->Iex.GetI.ix );
4628 t4 = gen_maxU32( mce, t2, t3 );
4629 return t4;
4630 }
4631 case Iex_CCall: {
4632 Int i;
4633 IRAtom* here;
4634 IRExpr** args = e->Iex.CCall.args;
4635 IRAtom* curr = mkU32(0);
4636 for (i = 0; args[i]; i++) {
4637 tl_assert(i < 32);
4638 tl_assert(isOriginalAtom(mce, args[i]));
4639 /* Only take notice of this arg if the callee's
4640 mc-exclusion mask does not say it is to be excluded. */
4641 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
4642 /* the arg is to be excluded from definedness checking.
4643 Do nothing. */
4644 if (0) VG_(printf)("excluding %s(%d)\n",
4645 e->Iex.CCall.cee->name, i);
4646 } else {
4647 /* calculate the arg's definedness, and pessimistically
4648 merge it in. */
4649 here = schemeE( mce, args[i] );
4650 curr = gen_maxU32( mce, curr, here );
4651 }
4652 }
4653 return curr;
4654 }
4655 case Iex_Load: {
4656 Int dszB;
4657 dszB = sizeofIRType(e->Iex.Load.ty);
4658 /* assert that the B value for the address is already
4659 available (somewhere) */
4660 tl_assert(isIRAtom(e->Iex.Load.addr));
4661 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
4662 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
4663 }
4664 case Iex_Mux0X: {
4665 IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond );
4666 IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 );
4667 IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX );
4668 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
4669 }
4670 case Iex_Qop: {
4671 IRAtom* b1 = schemeE( mce, e->Iex.Qop.arg1 );
4672 IRAtom* b2 = schemeE( mce, e->Iex.Qop.arg2 );
4673 IRAtom* b3 = schemeE( mce, e->Iex.Qop.arg3 );
4674 IRAtom* b4 = schemeE( mce, e->Iex.Qop.arg4 );
4675 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
4676 gen_maxU32( mce, b3, b4 ) );
4677 }
4678 case Iex_Triop: {
4679 IRAtom* b1 = schemeE( mce, e->Iex.Triop.arg1 );
4680 IRAtom* b2 = schemeE( mce, e->Iex.Triop.arg2 );
4681 IRAtom* b3 = schemeE( mce, e->Iex.Triop.arg3 );
4682 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
4683 }
4684 case Iex_Binop: {
4685 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
4686 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
4687 return gen_maxU32( mce, b1, b2 );
4688 }
4689 case Iex_Unop: {
4690 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
4691 return b1;
4692 }
4693 case Iex_Const:
4694 return mkU32(0);
4695 case Iex_RdTmp:
4696 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
4697 case Iex_Get: {
4698 Int b_offset = MC_(get_otrack_shadow_offset)(
4699 e->Iex.Get.offset,
4700 sizeofIRType(e->Iex.Get.ty)
4701 );
4702 tl_assert(b_offset >= -1
4703 && b_offset <= mce->layout->total_sizeB -4);
4704 if (b_offset >= 0) {
4705 /* FIXME: this isn't an atom! */
4706 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
4707 Ity_I32 );
4708 }
4709 return mkU32(0);
4710 }
4711 default:
4712 VG_(printf)("mc_translate.c: schemeE: unhandled: ");
4713 ppIRExpr(e);
4714 VG_(tool_panic)("memcheck:schemeE");
4715 }
4716}
4717
4718static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
4719{
4720 // This is a hacked version of do_shadow_Dirty
njn4c245e52009-03-15 23:25:38 +00004721 Int i, n, toDo, gSz, gOff;
sewardj7cf4e6b2008-05-01 20:24:26 +00004722 IRAtom *here, *curr;
4723 IRTemp dst;
sewardj7cf4e6b2008-05-01 20:24:26 +00004724
4725 /* First check the guard. */
4726 curr = schemeE( mce, d->guard );
4727
4728 /* Now round up all inputs and maxU32 over them. */
4729
4730 /* Inputs: unmasked args */
4731 for (i = 0; d->args[i]; i++) {
4732 if (d->cee->mcx_mask & (1<<i)) {
4733 /* ignore this arg */
4734 } else {
4735 here = schemeE( mce, d->args[i] );
4736 curr = gen_maxU32( mce, curr, here );
4737 }
4738 }
4739
4740 /* Inputs: guest state that we read. */
4741 for (i = 0; i < d->nFxState; i++) {
4742 tl_assert(d->fxState[i].fx != Ifx_None);
4743 if (d->fxState[i].fx == Ifx_Write)
4744 continue;
4745
4746 /* Ignore any sections marked as 'always defined'. */
4747 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
4748 if (0)
4749 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
4750 d->fxState[i].offset, d->fxState[i].size );
4751 continue;
4752 }
4753
4754 /* This state element is read or modified. So we need to
4755 consider it. If larger than 4 bytes, deal with it in 4-byte
4756 chunks. */
4757 gSz = d->fxState[i].size;
4758 gOff = d->fxState[i].offset;
4759 tl_assert(gSz > 0);
4760 while (True) {
4761 Int b_offset;
4762 if (gSz == 0) break;
4763 n = gSz <= 4 ? gSz : 4;
4764 /* update 'curr' with maxU32 of the state slice
4765 gOff .. gOff+n-1 */
4766 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
4767 if (b_offset != -1) {
4768 here = assignNew( 'B',mce,
4769 Ity_I32,
4770 IRExpr_Get(b_offset + 2*mce->layout->total_sizeB,
4771 Ity_I32));
4772 curr = gen_maxU32( mce, curr, here );
4773 }
4774 gSz -= n;
4775 gOff += n;
4776 }
4777
4778 }
4779
4780 /* Inputs: memory */
4781
4782 if (d->mFx != Ifx_None) {
4783 /* Because we may do multiple shadow loads/stores from the same
4784 base address, it's best to do a single test of its
4785 definedness right now. Post-instrumentation optimisation
4786 should remove all but this test. */
4787 tl_assert(d->mAddr);
4788 here = schemeE( mce, d->mAddr );
4789 curr = gen_maxU32( mce, curr, here );
4790 }
4791
4792 /* Deal with memory inputs (reads or modifies) */
4793 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004794 toDo = d->mSize;
4795 /* chew off 32-bit chunks. We don't care about the endianness
4796 since it's all going to be condensed down to a single bit,
4797 but nevertheless choose an endianness which is hopefully
4798 native to the platform. */
4799 while (toDo >= 4) {
4800 here = gen_load_b( mce, 4, d->mAddr, d->mSize - toDo );
4801 curr = gen_maxU32( mce, curr, here );
4802 toDo -= 4;
4803 }
sewardj8c93fcc2008-10-30 13:08:31 +00004804 /* handle possible 16-bit excess */
4805 while (toDo >= 2) {
4806 here = gen_load_b( mce, 2, d->mAddr, d->mSize - toDo );
4807 curr = gen_maxU32( mce, curr, here );
4808 toDo -= 2;
4809 }
4810 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
sewardj7cf4e6b2008-05-01 20:24:26 +00004811 }
4812
4813 /* Whew! So curr is a 32-bit B-value which should give an origin
4814 of some use if any of the inputs to the helper are undefined.
4815 Now we need to re-distribute the results to all destinations. */
4816
4817 /* Outputs: the destination temporary, if there is one. */
4818 if (d->tmp != IRTemp_INVALID) {
4819 dst = findShadowTmpB(mce, d->tmp);
4820 assign( 'V', mce, dst, curr );
4821 }
4822
4823 /* Outputs: guest state that we write or modify. */
4824 for (i = 0; i < d->nFxState; i++) {
4825 tl_assert(d->fxState[i].fx != Ifx_None);
4826 if (d->fxState[i].fx == Ifx_Read)
4827 continue;
4828
4829 /* Ignore any sections marked as 'always defined'. */
4830 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
4831 continue;
4832
4833 /* This state element is written or modified. So we need to
4834 consider it. If larger than 4 bytes, deal with it in 4-byte
4835 chunks. */
4836 gSz = d->fxState[i].size;
4837 gOff = d->fxState[i].offset;
4838 tl_assert(gSz > 0);
4839 while (True) {
4840 Int b_offset;
4841 if (gSz == 0) break;
4842 n = gSz <= 4 ? gSz : 4;
4843 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
4844 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
4845 if (b_offset != -1) {
4846 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
4847 curr ));
4848 }
4849 gSz -= n;
4850 gOff += n;
4851 }
4852 }
4853
4854 /* Outputs: memory that we write or modify. Same comments about
4855 endianness as above apply. */
4856 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004857 toDo = d->mSize;
4858 /* chew off 32-bit chunks */
4859 while (toDo >= 4) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004860 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
4861 NULL/*guard*/ );
sewardj7cf4e6b2008-05-01 20:24:26 +00004862 toDo -= 4;
4863 }
sewardj8c93fcc2008-10-30 13:08:31 +00004864 /* handle possible 16-bit excess */
4865 while (toDo >= 2) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004866 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
4867 NULL/*guard*/ );
sewardj8c93fcc2008-10-30 13:08:31 +00004868 toDo -= 2;
4869 }
4870 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
sewardj7cf4e6b2008-05-01 20:24:26 +00004871 }
sewardj7cf4e6b2008-05-01 20:24:26 +00004872}
4873
4874static void schemeS ( MCEnv* mce, IRStmt* st )
4875{
4876 tl_assert(MC_(clo_mc_level) == 3);
4877
4878 switch (st->tag) {
4879
4880 case Ist_AbiHint:
4881 /* The value-check instrumenter handles this - by arranging
4882 to pass the address of the next instruction to
4883 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
4884 happen for origin tracking w.r.t. AbiHints. So there is
4885 nothing to do here. */
4886 break;
4887
4888 case Ist_PutI: {
4889 IRRegArray* descr_b;
4890 IRAtom *t1, *t2, *t3, *t4;
4891 IRRegArray* descr = st->Ist.PutI.descr;
4892 IRType equivIntTy
4893 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
4894 /* If this array is unshadowable for whatever reason,
4895 generate no code. */
4896 if (equivIntTy == Ity_INVALID)
4897 break;
4898 tl_assert(sizeofIRType(equivIntTy) >= 4);
4899 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
4900 descr_b
4901 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
4902 equivIntTy, descr->nElems );
4903 /* Compute a value to Put - the conjoinment of the origin for
4904 the data to be Put-ted (obviously) and of the index value
4905 (not so obviously). */
4906 t1 = schemeE( mce, st->Ist.PutI.data );
4907 t2 = schemeE( mce, st->Ist.PutI.ix );
4908 t3 = gen_maxU32( mce, t1, t2 );
4909 t4 = zWidenFrom32( mce, equivIntTy, t3 );
4910 stmt( 'B', mce, IRStmt_PutI( descr_b, st->Ist.PutI.ix,
4911 st->Ist.PutI.bias, t4 ));
4912 break;
4913 }
4914 case Ist_Dirty:
4915 do_origins_Dirty( mce, st->Ist.Dirty.details );
4916 break;
4917 case Ist_Store: {
4918 Int dszB;
4919 IRAtom* dataB;
4920 /* assert that the B value for the address is already
4921 available (somewhere) */
4922 tl_assert(isIRAtom(st->Ist.Store.addr));
4923 dszB = sizeofIRType(
sewardj1c0ce7a2009-07-01 08:10:49 +00004924 typeOfIRExpr(mce->sb->tyenv, st->Ist.Store.data ));
sewardj7cf4e6b2008-05-01 20:24:26 +00004925 dataB = schemeE( mce, st->Ist.Store.data );
sewardj1c0ce7a2009-07-01 08:10:49 +00004926 gen_store_b( mce, dszB, st->Ist.Store.addr, 0/*offset*/, dataB,
4927 NULL/*guard*/ );
4928 /* For the rationale behind this, see comments at the place
4929 where the V-shadow for .resSC is constructed, in the main
4930 loop in MC_(instrument). In short, wee regard .resSc as
4931 always-defined. */
4932 if (st->Ist.Store.resSC != IRTemp_INVALID) {
4933 assign( 'B', mce, findShadowTmpB(mce, st->Ist.Store.resSC),
4934 mkU32(0) );
4935 }
sewardj7cf4e6b2008-05-01 20:24:26 +00004936 break;
4937 }
4938 case Ist_Put: {
4939 Int b_offset
4940 = MC_(get_otrack_shadow_offset)(
4941 st->Ist.Put.offset,
sewardj1c0ce7a2009-07-01 08:10:49 +00004942 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
sewardj7cf4e6b2008-05-01 20:24:26 +00004943 );
4944 if (b_offset >= 0) {
4945 /* FIXME: this isn't an atom! */
4946 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
4947 schemeE( mce, st->Ist.Put.data )) );
4948 }
4949 break;
4950 }
4951 case Ist_WrTmp:
4952 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
4953 schemeE(mce, st->Ist.WrTmp.data) );
4954 break;
4955 case Ist_MBE:
4956 case Ist_NoOp:
4957 case Ist_Exit:
4958 case Ist_IMark:
4959 break;
4960 default:
4961 VG_(printf)("mc_translate.c: schemeS: unhandled: ");
4962 ppIRStmt(st);
4963 VG_(tool_panic)("memcheck:schemeS");
4964 }
4965}
4966
4967
njn25e49d8e72002-09-23 09:36:25 +00004968/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00004969/*--- end mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00004970/*--------------------------------------------------------------------*/