blob: 50f2d49cc301f64e853d2e1e62133bd97a0c1b90 [file] [log] [blame]
nethercotebb1c9912004-01-04 16:43:23 +00001
njn25e49d8e72002-09-23 09:36:25 +00002/*--------------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003/*--- Instrument IR to perform memory checking operations. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00006
njn25e49d8e72002-09-23 09:36:25 +00007/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of MemCheck, a heavyweight Valgrind tool for
njnc9539842002-10-02 13:26:35 +00009 detecting memory errors.
njn25e49d8e72002-09-23 09:36:25 +000010
sewardj9eecbbb2010-05-03 21:37:12 +000011 Copyright (C) 2000-2010 Julian Seward
njn25e49d8e72002-09-23 09:36:25 +000012 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
njnc7561b92005-06-19 01:24:32 +000032#include "pub_tool_basics.h"
njn1d0825f2006-03-27 11:37:07 +000033#include "pub_tool_hashtable.h" // For mc_include.h
njn132bfcc2005-06-04 19:16:06 +000034#include "pub_tool_libcassert.h"
njn36a20fa2005-06-03 03:08:39 +000035#include "pub_tool_libcprint.h"
njnc7561b92005-06-19 01:24:32 +000036#include "pub_tool_tooliface.h"
sewardj53ee1fc2005-12-23 02:29:58 +000037#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
sewardj81651dc2007-08-28 06:05:20 +000038#include "pub_tool_xarray.h"
39#include "pub_tool_mallocfree.h"
40#include "pub_tool_libcbase.h"
njn25e49d8e72002-09-23 09:36:25 +000041
sewardj7cf4e6b2008-05-01 20:24:26 +000042#include "mc_include.h"
43
44
sewardj992dff92005-10-07 11:08:55 +000045/* This file implements the Memcheck instrumentation, and in
46 particular contains the core of its undefined value detection
47 machinery. For a comprehensive background of the terminology,
48 algorithms and rationale used herein, read:
49
50 Using Valgrind to detect undefined value errors with
51 bit-precision
52
53 Julian Seward and Nicholas Nethercote
54
55 2005 USENIX Annual Technical Conference (General Track),
56 Anaheim, CA, USA, April 10-15, 2005.
njn6665ea22007-05-24 23:14:41 +000057
58 ----
59
60 Here is as good a place as any to record exactly when V bits are and
61 should be checked, why, and what function is responsible.
62
63
64 Memcheck complains when an undefined value is used:
65
66 1. In the condition of a conditional branch. Because it could cause
67 incorrect control flow, and thus cause incorrect externally-visible
68 behaviour. [mc_translate.c:complainIfUndefined]
69
70 2. As an argument to a system call, or as the value that specifies
71 the system call number. Because it could cause an incorrect
72 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
73
74 3. As the address in a load or store. Because it could cause an
75 incorrect value to be used later, which could cause externally-visible
76 behaviour (eg. via incorrect control flow or an incorrect system call
77 argument) [complainIfUndefined]
78
79 4. As the target address of a branch. Because it could cause incorrect
80 control flow. [complainIfUndefined]
81
82 5. As an argument to setenv, unsetenv, or putenv. Because it could put
83 an incorrect value into the external environment.
84 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
85
86 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
87 [complainIfUndefined]
88
89 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
90 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
91 requested it. [in memcheck.h]
92
93
94 Memcheck also complains, but should not, when an undefined value is used:
95
96 8. As the shift value in certain SIMD shift operations (but not in the
97 standard integer shift operations). This inconsistency is due to
98 historical reasons.) [complainIfUndefined]
99
100
101 Memcheck does not complain, but should, when an undefined value is used:
102
103 9. As an input to a client request. Because the client request may
104 affect the visible behaviour -- see bug #144362 for an example
105 involving the malloc replacements in vg_replace_malloc.c and
106 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
107 isn't identified. That bug report also has some info on how to solve
108 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
109
110
111 In practice, 1 and 2 account for the vast majority of cases.
sewardj992dff92005-10-07 11:08:55 +0000112*/
113
sewardj95448072004-11-22 20:19:51 +0000114/*------------------------------------------------------------*/
115/*--- Forward decls ---*/
116/*------------------------------------------------------------*/
117
118struct _MCEnv;
119
sewardj7cf4e6b2008-05-01 20:24:26 +0000120static IRType shadowTypeV ( IRType ty );
sewardj95448072004-11-22 20:19:51 +0000121static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
sewardjafa617b2008-07-22 09:59:48 +0000122static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
sewardj95448072004-11-22 20:19:51 +0000123
124
125/*------------------------------------------------------------*/
126/*--- Memcheck running state, and tmp management. ---*/
127/*------------------------------------------------------------*/
128
sewardj1c0ce7a2009-07-01 08:10:49 +0000129/* Carries info about a particular tmp. The tmp's number is not
130 recorded, as this is implied by (equal to) its index in the tmpMap
131 in MCEnv. The tmp's type is also not recorded, as this is present
132 in MCEnv.sb->tyenv.
133
134 When .kind is Orig, .shadowV and .shadowB may give the identities
135 of the temps currently holding the associated definedness (shadowV)
136 and origin (shadowB) values, or these may be IRTemp_INVALID if code
137 to compute such values has not yet been emitted.
138
139 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
140 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
141 illogical for a shadow tmp itself to be shadowed.
142*/
143typedef
144 enum { Orig=1, VSh=2, BSh=3 }
145 TempKind;
146
147typedef
148 struct {
149 TempKind kind;
150 IRTemp shadowV;
151 IRTemp shadowB;
152 }
153 TempMapEnt;
154
155
sewardj95448072004-11-22 20:19:51 +0000156/* Carries around state during memcheck instrumentation. */
157typedef
158 struct _MCEnv {
sewardj0b9d74a2006-12-24 02:24:11 +0000159 /* MODIFIED: the superblock being constructed. IRStmts are
160 added. */
sewardj1c0ce7a2009-07-01 08:10:49 +0000161 IRSB* sb;
sewardj7cf4e6b2008-05-01 20:24:26 +0000162 Bool trace;
sewardj95448072004-11-22 20:19:51 +0000163
sewardj1c0ce7a2009-07-01 08:10:49 +0000164 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
165 current kind and possibly shadow temps for each temp in the
166 IRSB being constructed. Note that it does not contain the
167 type of each tmp. If you want to know the type, look at the
168 relevant entry in sb->tyenv. It follows that at all times
169 during the instrumentation process, the valid indices for
170 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
171 total number of Orig, V- and B- temps allocated so far.
172
173 The reason for this strange split (types in one place, all
174 other info in another) is that we need the types to be
175 attached to sb so as to make it possible to do
176 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
177 instrumentation process. */
178 XArray* /* of TempMapEnt */ tmpMap;
sewardj95448072004-11-22 20:19:51 +0000179
sewardjd5204dc2004-12-31 01:16:11 +0000180 /* MODIFIED: indicates whether "bogus" literals have so far been
181 found. Starts off False, and may change to True. */
182 Bool bogusLiterals;
183
sewardj95448072004-11-22 20:19:51 +0000184 /* READONLY: the guest layout. This indicates which parts of
185 the guest state should be regarded as 'always defined'. */
186 VexGuestLayout* layout;
sewardj634ba772006-10-15 12:47:37 +0000187
sewardj95448072004-11-22 20:19:51 +0000188 /* READONLY: the host word type. Needed for constructing
189 arguments of type 'HWord' to be passed to helper functions.
190 Ity_I32 or Ity_I64 only. */
191 IRType hWordTy;
192 }
193 MCEnv;
194
195/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
196 demand), as they are encountered. This is for two reasons.
197
198 (1) (less important reason): Many original tmps are unused due to
199 initial IR optimisation, and we do not want to spaces in tables
200 tracking them.
201
202 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
203 table indexed [0 .. n_types-1], which gives the current shadow for
204 each original tmp, or INVALID_IRTEMP if none is so far assigned.
205 It is necessary to support making multiple assignments to a shadow
206 -- specifically, after testing a shadow for definedness, it needs
207 to be made defined. But IR's SSA property disallows this.
208
209 (2) (more important reason): Therefore, when a shadow needs to get
210 a new value, a new temporary is created, the value is assigned to
211 that, and the tmpMap is updated to reflect the new binding.
212
213 A corollary is that if the tmpMap maps a given tmp to
sewardjf1962d32006-10-19 13:22:16 +0000214 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
sewardj95448072004-11-22 20:19:51 +0000215 there's a read-before-write error in the original tmps. The IR
216 sanity checker should catch all such anomalies, however.
njn25e49d8e72002-09-23 09:36:25 +0000217*/
sewardj95448072004-11-22 20:19:51 +0000218
sewardj1c0ce7a2009-07-01 08:10:49 +0000219/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
220 both the table in mce->sb and to our auxiliary mapping. Note that
221 newTemp may cause mce->tmpMap to resize, hence previous results
222 from VG_(indexXA)(mce->tmpMap) are invalidated. */
223static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
224{
225 Word newIx;
226 TempMapEnt ent;
227 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
228 ent.kind = kind;
229 ent.shadowV = IRTemp_INVALID;
230 ent.shadowB = IRTemp_INVALID;
231 newIx = VG_(addToXA)( mce->tmpMap, &ent );
232 tl_assert(newIx == (Word)tmp);
233 return tmp;
234}
235
236
sewardj95448072004-11-22 20:19:51 +0000237/* Find the tmp currently shadowing the given original tmp. If none
238 so far exists, allocate one. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000239static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000240{
sewardj1c0ce7a2009-07-01 08:10:49 +0000241 TempMapEnt* ent;
242 /* VG_(indexXA) range-checks 'orig', hence no need to check
243 here. */
244 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
245 tl_assert(ent->kind == Orig);
246 if (ent->shadowV == IRTemp_INVALID) {
247 IRTemp tmpV
248 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
249 /* newTemp may cause mce->tmpMap to resize, hence previous results
250 from VG_(indexXA) are invalid. */
251 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
252 tl_assert(ent->kind == Orig);
253 tl_assert(ent->shadowV == IRTemp_INVALID);
254 ent->shadowV = tmpV;
njn25e49d8e72002-09-23 09:36:25 +0000255 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000256 return ent->shadowV;
njn25e49d8e72002-09-23 09:36:25 +0000257}
258
sewardj95448072004-11-22 20:19:51 +0000259/* Allocate a new shadow for the given original tmp. This means any
260 previous shadow is abandoned. This is needed because it is
261 necessary to give a new value to a shadow once it has been tested
262 for undefinedness, but unfortunately IR's SSA property disallows
263 this. Instead we must abandon the old shadow, allocate a new one
sewardj1c0ce7a2009-07-01 08:10:49 +0000264 and use that instead.
265
266 This is the same as findShadowTmpV, except we don't bother to see
267 if a shadow temp already existed -- we simply allocate a new one
268 regardless. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000269static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000270{
sewardj1c0ce7a2009-07-01 08:10:49 +0000271 TempMapEnt* ent;
272 /* VG_(indexXA) range-checks 'orig', hence no need to check
273 here. */
274 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
275 tl_assert(ent->kind == Orig);
276 if (1) {
277 IRTemp tmpV
278 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
279 /* newTemp may cause mce->tmpMap to resize, hence previous results
280 from VG_(indexXA) are invalid. */
281 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
282 tl_assert(ent->kind == Orig);
283 ent->shadowV = tmpV;
284 }
sewardj95448072004-11-22 20:19:51 +0000285}
286
287
288/*------------------------------------------------------------*/
289/*--- IRAtoms -- a subset of IRExprs ---*/
290/*------------------------------------------------------------*/
291
292/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
sewardj710d6c22005-03-20 18:55:15 +0000293 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
sewardj95448072004-11-22 20:19:51 +0000294 input, most of this code deals in atoms. Usefully, a value atom
295 always has a V-value which is also an atom: constants are shadowed
296 by constants, and temps are shadowed by the corresponding shadow
297 temporary. */
298
299typedef IRExpr IRAtom;
300
301/* (used for sanity checks only): is this an atom which looks
302 like it's from original code? */
303static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
304{
305 if (a1->tag == Iex_Const)
306 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000307 if (a1->tag == Iex_RdTmp) {
308 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
309 return ent->kind == Orig;
310 }
sewardj95448072004-11-22 20:19:51 +0000311 return False;
312}
313
314/* (used for sanity checks only): is this an atom which looks
315 like it's from shadow code? */
316static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
317{
318 if (a1->tag == Iex_Const)
319 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000320 if (a1->tag == Iex_RdTmp) {
321 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
322 return ent->kind == VSh || ent->kind == BSh;
323 }
sewardj95448072004-11-22 20:19:51 +0000324 return False;
325}
326
327/* (used for sanity checks only): check that both args are atoms and
328 are identically-kinded. */
329static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
330{
sewardj0b9d74a2006-12-24 02:24:11 +0000331 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +0000332 return True;
sewardjbef552a2005-08-30 12:54:36 +0000333 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
sewardj95448072004-11-22 20:19:51 +0000334 return True;
335 return False;
336}
337
338
339/*------------------------------------------------------------*/
340/*--- Type management ---*/
341/*------------------------------------------------------------*/
342
343/* Shadow state is always accessed using integer types. This returns
344 an integer type with the same size (as per sizeofIRType) as the
345 given type. The only valid shadow types are Bit, I8, I16, I32,
sewardj3245c912004-12-10 14:58:26 +0000346 I64, V128. */
sewardj95448072004-11-22 20:19:51 +0000347
sewardj7cf4e6b2008-05-01 20:24:26 +0000348static IRType shadowTypeV ( IRType ty )
sewardj95448072004-11-22 20:19:51 +0000349{
350 switch (ty) {
351 case Ity_I1:
352 case Ity_I8:
353 case Ity_I16:
354 case Ity_I32:
sewardj6cf40ff2005-04-20 22:31:26 +0000355 case Ity_I64:
356 case Ity_I128: return ty;
sewardj3245c912004-12-10 14:58:26 +0000357 case Ity_F32: return Ity_I32;
358 case Ity_F64: return Ity_I64;
359 case Ity_V128: return Ity_V128;
sewardj95448072004-11-22 20:19:51 +0000360 default: ppIRType(ty);
sewardj7cf4e6b2008-05-01 20:24:26 +0000361 VG_(tool_panic)("memcheck:shadowTypeV");
sewardj95448072004-11-22 20:19:51 +0000362 }
363}
364
365/* Produce a 'defined' value of the given shadow type. Should only be
366 supplied shadow types (Bit/I8/I16/I32/UI64). */
367static IRExpr* definedOfType ( IRType ty ) {
368 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +0000369 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
370 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
371 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
372 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
373 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
374 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
sewardjf1962d32006-10-19 13:22:16 +0000375 default: VG_(tool_panic)("memcheck:definedOfType");
njn25e49d8e72002-09-23 09:36:25 +0000376 }
377}
378
379
sewardj95448072004-11-22 20:19:51 +0000380/*------------------------------------------------------------*/
381/*--- Constructing IR fragments ---*/
382/*------------------------------------------------------------*/
383
sewardj95448072004-11-22 20:19:51 +0000384/* add stmt to a bb */
sewardj7cf4e6b2008-05-01 20:24:26 +0000385static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
386 if (mce->trace) {
387 VG_(printf)(" %c: ", cat);
388 ppIRStmt(st);
389 VG_(printf)("\n");
390 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000391 addStmtToIRSB(mce->sb, st);
sewardj7cf4e6b2008-05-01 20:24:26 +0000392}
393
394/* assign value to tmp */
395static inline
396void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
sewardj1c0ce7a2009-07-01 08:10:49 +0000397 stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
sewardj7cf4e6b2008-05-01 20:24:26 +0000398}
sewardj95448072004-11-22 20:19:51 +0000399
400/* build various kinds of expressions */
sewardj57f92b02010-08-22 11:54:14 +0000401#define triop(_op, _arg1, _arg2, _arg3) \
402 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
sewardj95448072004-11-22 20:19:51 +0000403#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
404#define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
405#define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
406#define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
407#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
408#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
sewardj170ee212004-12-10 18:57:51 +0000409#define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
sewardj0b9d74a2006-12-24 02:24:11 +0000410#define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
sewardj95448072004-11-22 20:19:51 +0000411
sewardj7cf4e6b2008-05-01 20:24:26 +0000412/* Bind the given expression to a new temporary, and return the
sewardj95448072004-11-22 20:19:51 +0000413 temporary. This effectively converts an arbitrary expression into
sewardj7cf4e6b2008-05-01 20:24:26 +0000414 an atom.
415
416 'ty' is the type of 'e' and hence the type that the new temporary
sewardj1c0ce7a2009-07-01 08:10:49 +0000417 needs to be. But passing it in is redundant, since we can deduce
418 the type merely by inspecting 'e'. So at least use that fact to
419 assert that the two types agree. */
420static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
421{
422 TempKind k;
423 IRTemp t;
424 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +0000425 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
sewardj1c0ce7a2009-07-01 08:10:49 +0000426 switch (cat) {
427 case 'V': k = VSh; break;
428 case 'B': k = BSh; break;
429 case 'C': k = Orig; break;
430 /* happens when we are making up new "orig"
431 expressions, for IRCAS handling */
432 default: tl_assert(0);
433 }
434 t = newTemp(mce, ty, k);
sewardj7cf4e6b2008-05-01 20:24:26 +0000435 assign(cat, mce, t, e);
sewardj95448072004-11-22 20:19:51 +0000436 return mkexpr(t);
437}
438
439
440/*------------------------------------------------------------*/
441/*--- Constructing definedness primitive ops ---*/
442/*------------------------------------------------------------*/
443
444/* --------- Defined-if-either-defined --------- */
445
446static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
447 tl_assert(isShadowAtom(mce,a1));
448 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000449 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000450}
451
452static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
453 tl_assert(isShadowAtom(mce,a1));
454 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000455 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000456}
457
458static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
459 tl_assert(isShadowAtom(mce,a1));
460 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000461 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000462}
463
sewardj7010f6e2004-12-10 13:35:22 +0000464static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
465 tl_assert(isShadowAtom(mce,a1));
466 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000467 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
sewardj7010f6e2004-12-10 13:35:22 +0000468}
469
sewardj20d38f22005-02-07 23:50:18 +0000470static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj170ee212004-12-10 18:57:51 +0000471 tl_assert(isShadowAtom(mce,a1));
472 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000473 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
sewardj170ee212004-12-10 18:57:51 +0000474}
475
sewardj95448072004-11-22 20:19:51 +0000476/* --------- Undefined-if-either-undefined --------- */
477
478static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
479 tl_assert(isShadowAtom(mce,a1));
480 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000481 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000482}
483
484static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
485 tl_assert(isShadowAtom(mce,a1));
486 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000487 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000488}
489
490static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
491 tl_assert(isShadowAtom(mce,a1));
492 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000493 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000494}
495
496static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
497 tl_assert(isShadowAtom(mce,a1));
498 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000499 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000500}
501
sewardj20d38f22005-02-07 23:50:18 +0000502static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj3245c912004-12-10 14:58:26 +0000503 tl_assert(isShadowAtom(mce,a1));
504 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000505 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
sewardj3245c912004-12-10 14:58:26 +0000506}
507
sewardje50a1b12004-12-17 01:24:54 +0000508static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
sewardj95448072004-11-22 20:19:51 +0000509 switch (vty) {
sewardje50a1b12004-12-17 01:24:54 +0000510 case Ity_I8: return mkUifU8(mce, a1, a2);
sewardja1d93302004-12-12 16:45:06 +0000511 case Ity_I16: return mkUifU16(mce, a1, a2);
512 case Ity_I32: return mkUifU32(mce, a1, a2);
513 case Ity_I64: return mkUifU64(mce, a1, a2);
sewardj20d38f22005-02-07 23:50:18 +0000514 case Ity_V128: return mkUifUV128(mce, a1, a2);
sewardj95448072004-11-22 20:19:51 +0000515 default:
516 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
517 VG_(tool_panic)("memcheck:mkUifU");
njn25e49d8e72002-09-23 09:36:25 +0000518 }
519}
520
sewardj95448072004-11-22 20:19:51 +0000521/* --------- The Left-family of operations. --------- */
njn25e49d8e72002-09-23 09:36:25 +0000522
sewardj95448072004-11-22 20:19:51 +0000523static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
524 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000525 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
sewardj95448072004-11-22 20:19:51 +0000526}
527
528static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
529 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000530 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
sewardj95448072004-11-22 20:19:51 +0000531}
532
533static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
534 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000535 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
sewardj95448072004-11-22 20:19:51 +0000536}
537
sewardj681be302005-01-15 20:43:58 +0000538static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
539 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000540 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
sewardj681be302005-01-15 20:43:58 +0000541}
542
sewardj95448072004-11-22 20:19:51 +0000543/* --------- 'Improvement' functions for AND/OR. --------- */
544
545/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
546 defined (0); all other -> undefined (1).
547*/
548static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
njn25e49d8e72002-09-23 09:36:25 +0000549{
sewardj95448072004-11-22 20:19:51 +0000550 tl_assert(isOriginalAtom(mce, data));
551 tl_assert(isShadowAtom(mce, vbits));
552 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000553 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000554}
njn25e49d8e72002-09-23 09:36:25 +0000555
sewardj95448072004-11-22 20:19:51 +0000556static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
557{
558 tl_assert(isOriginalAtom(mce, data));
559 tl_assert(isShadowAtom(mce, vbits));
560 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000561 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000562}
njn25e49d8e72002-09-23 09:36:25 +0000563
sewardj95448072004-11-22 20:19:51 +0000564static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
565{
566 tl_assert(isOriginalAtom(mce, data));
567 tl_assert(isShadowAtom(mce, vbits));
568 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000569 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000570}
njn25e49d8e72002-09-23 09:36:25 +0000571
sewardj7010f6e2004-12-10 13:35:22 +0000572static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
573{
574 tl_assert(isOriginalAtom(mce, data));
575 tl_assert(isShadowAtom(mce, vbits));
576 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000577 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
sewardj7010f6e2004-12-10 13:35:22 +0000578}
579
sewardj20d38f22005-02-07 23:50:18 +0000580static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000581{
582 tl_assert(isOriginalAtom(mce, data));
583 tl_assert(isShadowAtom(mce, vbits));
584 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000585 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
sewardj170ee212004-12-10 18:57:51 +0000586}
587
sewardj95448072004-11-22 20:19:51 +0000588/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
589 defined (0); all other -> undefined (1).
590*/
591static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
592{
593 tl_assert(isOriginalAtom(mce, data));
594 tl_assert(isShadowAtom(mce, vbits));
595 tl_assert(sameKindedAtoms(data, vbits));
596 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000597 'V', mce, Ity_I8,
sewardj95448072004-11-22 20:19:51 +0000598 binop(Iop_Or8,
sewardj7cf4e6b2008-05-01 20:24:26 +0000599 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
sewardj95448072004-11-22 20:19:51 +0000600 vbits) );
601}
njn25e49d8e72002-09-23 09:36:25 +0000602
sewardj95448072004-11-22 20:19:51 +0000603static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
604{
605 tl_assert(isOriginalAtom(mce, data));
606 tl_assert(isShadowAtom(mce, vbits));
607 tl_assert(sameKindedAtoms(data, vbits));
608 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000609 'V', mce, Ity_I16,
sewardj95448072004-11-22 20:19:51 +0000610 binop(Iop_Or16,
sewardj7cf4e6b2008-05-01 20:24:26 +0000611 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
sewardj95448072004-11-22 20:19:51 +0000612 vbits) );
613}
njn25e49d8e72002-09-23 09:36:25 +0000614
sewardj95448072004-11-22 20:19:51 +0000615static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
616{
617 tl_assert(isOriginalAtom(mce, data));
618 tl_assert(isShadowAtom(mce, vbits));
619 tl_assert(sameKindedAtoms(data, vbits));
620 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000621 'V', mce, Ity_I32,
sewardj95448072004-11-22 20:19:51 +0000622 binop(Iop_Or32,
sewardj7cf4e6b2008-05-01 20:24:26 +0000623 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
sewardj95448072004-11-22 20:19:51 +0000624 vbits) );
625}
626
sewardj7010f6e2004-12-10 13:35:22 +0000627static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
628{
629 tl_assert(isOriginalAtom(mce, data));
630 tl_assert(isShadowAtom(mce, vbits));
631 tl_assert(sameKindedAtoms(data, vbits));
632 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000633 'V', mce, Ity_I64,
sewardj7010f6e2004-12-10 13:35:22 +0000634 binop(Iop_Or64,
sewardj7cf4e6b2008-05-01 20:24:26 +0000635 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
sewardj7010f6e2004-12-10 13:35:22 +0000636 vbits) );
637}
638
sewardj20d38f22005-02-07 23:50:18 +0000639static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000640{
641 tl_assert(isOriginalAtom(mce, data));
642 tl_assert(isShadowAtom(mce, vbits));
643 tl_assert(sameKindedAtoms(data, vbits));
644 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000645 'V', mce, Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +0000646 binop(Iop_OrV128,
sewardj7cf4e6b2008-05-01 20:24:26 +0000647 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
sewardj170ee212004-12-10 18:57:51 +0000648 vbits) );
649}
650
sewardj95448072004-11-22 20:19:51 +0000651/* --------- Pessimising casts. --------- */
652
653static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
654{
sewardj4cc684b2007-08-25 23:09:36 +0000655 IRType src_ty;
sewardj7cf97ee2004-11-28 14:25:01 +0000656 IRAtom* tmp1;
sewardj95448072004-11-22 20:19:51 +0000657 /* Note, dst_ty is a shadow type, not an original type. */
658 /* First of all, collapse vbits down to a single bit. */
659 tl_assert(isShadowAtom(mce,vbits));
sewardj1c0ce7a2009-07-01 08:10:49 +0000660 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
sewardj4cc684b2007-08-25 23:09:36 +0000661
662 /* Fast-track some common cases */
663 if (src_ty == Ity_I32 && dst_ty == Ity_I32)
sewardj7cf4e6b2008-05-01 20:24:26 +0000664 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000665
666 if (src_ty == Ity_I64 && dst_ty == Ity_I64)
sewardj7cf4e6b2008-05-01 20:24:26 +0000667 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000668
669 if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
sewardj7cf4e6b2008-05-01 20:24:26 +0000670 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
671 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
sewardj4cc684b2007-08-25 23:09:36 +0000672 }
673
674 /* Else do it the slow way .. */
675 tmp1 = NULL;
676 switch (src_ty) {
sewardj95448072004-11-22 20:19:51 +0000677 case Ity_I1:
678 tmp1 = vbits;
njn25e49d8e72002-09-23 09:36:25 +0000679 break;
sewardj95448072004-11-22 20:19:51 +0000680 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000681 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
sewardj95448072004-11-22 20:19:51 +0000682 break;
683 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000684 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
sewardj95448072004-11-22 20:19:51 +0000685 break;
686 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000687 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
sewardj95448072004-11-22 20:19:51 +0000688 break;
689 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000690 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
sewardj95448072004-11-22 20:19:51 +0000691 break;
sewardj69a13322005-04-23 01:14:51 +0000692 case Ity_I128: {
693 /* Gah. Chop it in half, OR the halves together, and compare
694 that with zero. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000695 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
696 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
697 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
698 tmp1 = assignNew('V', mce, Ity_I1,
sewardj37c31cc2005-04-26 23:49:24 +0000699 unop(Iop_CmpNEZ64, tmp4));
sewardj69a13322005-04-23 01:14:51 +0000700 break;
701 }
sewardj95448072004-11-22 20:19:51 +0000702 default:
sewardj4cc684b2007-08-25 23:09:36 +0000703 ppIRType(src_ty);
sewardj95448072004-11-22 20:19:51 +0000704 VG_(tool_panic)("mkPCastTo(1)");
705 }
706 tl_assert(tmp1);
707 /* Now widen up to the dst type. */
708 switch (dst_ty) {
709 case Ity_I1:
710 return tmp1;
711 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000712 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
sewardj95448072004-11-22 20:19:51 +0000713 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000714 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
sewardj95448072004-11-22 20:19:51 +0000715 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000716 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
sewardj95448072004-11-22 20:19:51 +0000717 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000718 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000719 case Ity_V128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000720 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
721 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000722 return tmp1;
sewardj69a13322005-04-23 01:14:51 +0000723 case Ity_I128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000724 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
725 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
sewardj69a13322005-04-23 01:14:51 +0000726 return tmp1;
sewardj95448072004-11-22 20:19:51 +0000727 default:
728 ppIRType(dst_ty);
729 VG_(tool_panic)("mkPCastTo(2)");
730 }
731}
732
sewardjd5204dc2004-12-31 01:16:11 +0000733/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
734/*
735 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
736 PCasting to Ity_U1. However, sometimes it is necessary to be more
737 accurate. The insight is that the result is defined if two
738 corresponding bits can be found, one from each argument, so that
739 both bits are defined but are different -- that makes EQ say "No"
740 and NE say "Yes". Hence, we compute an improvement term and DifD
741 it onto the "normal" (UifU) result.
742
743 The result is:
744
745 PCastTo<1> (
sewardje6f8af42005-07-06 18:48:59 +0000746 -- naive version
747 PCastTo<sz>( UifU<sz>(vxx, vyy) )
748
sewardjd5204dc2004-12-31 01:16:11 +0000749 `DifD<sz>`
sewardje6f8af42005-07-06 18:48:59 +0000750
751 -- improvement term
752 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
sewardjd5204dc2004-12-31 01:16:11 +0000753 )
sewardje6f8af42005-07-06 18:48:59 +0000754
sewardjd5204dc2004-12-31 01:16:11 +0000755 where
756 vec contains 0 (defined) bits where the corresponding arg bits
sewardje6f8af42005-07-06 18:48:59 +0000757 are defined but different, and 1 bits otherwise.
sewardjd5204dc2004-12-31 01:16:11 +0000758
sewardje6f8af42005-07-06 18:48:59 +0000759 vec = Or<sz>( vxx, // 0 iff bit defined
760 vyy, // 0 iff bit defined
761 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
762 )
763
764 If any bit of vec is 0, the result is defined and so the
765 improvement term should produce 0...0, else it should produce
766 1...1.
767
768 Hence require for the improvement term:
769
770 if vec == 1...1 then 1...1 else 0...0
771 ->
772 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
773
774 This was extensively re-analysed and checked on 6 July 05.
sewardjd5204dc2004-12-31 01:16:11 +0000775*/
776static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
777 IRType ty,
778 IRAtom* vxx, IRAtom* vyy,
779 IRAtom* xx, IRAtom* yy )
780{
sewardje6f8af42005-07-06 18:48:59 +0000781 IRAtom *naive, *vec, *improvement_term;
782 IRAtom *improved, *final_cast, *top;
783 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
sewardjd5204dc2004-12-31 01:16:11 +0000784
785 tl_assert(isShadowAtom(mce,vxx));
786 tl_assert(isShadowAtom(mce,vyy));
787 tl_assert(isOriginalAtom(mce,xx));
788 tl_assert(isOriginalAtom(mce,yy));
789 tl_assert(sameKindedAtoms(vxx,xx));
790 tl_assert(sameKindedAtoms(vyy,yy));
791
792 switch (ty) {
793 case Ity_I32:
sewardje6f8af42005-07-06 18:48:59 +0000794 opOR = Iop_Or32;
sewardjd5204dc2004-12-31 01:16:11 +0000795 opDIFD = Iop_And32;
796 opUIFU = Iop_Or32;
797 opNOT = Iop_Not32;
798 opXOR = Iop_Xor32;
799 opCMP = Iop_CmpEQ32;
800 top = mkU32(0xFFFFFFFF);
801 break;
tomcd986332005-04-26 07:44:48 +0000802 case Ity_I64:
sewardje6f8af42005-07-06 18:48:59 +0000803 opOR = Iop_Or64;
tomcd986332005-04-26 07:44:48 +0000804 opDIFD = Iop_And64;
805 opUIFU = Iop_Or64;
806 opNOT = Iop_Not64;
807 opXOR = Iop_Xor64;
808 opCMP = Iop_CmpEQ64;
sewardj37c31cc2005-04-26 23:49:24 +0000809 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
tomcd986332005-04-26 07:44:48 +0000810 break;
sewardjd5204dc2004-12-31 01:16:11 +0000811 default:
812 VG_(tool_panic)("expensiveCmpEQorNE");
813 }
814
815 naive
sewardj7cf4e6b2008-05-01 20:24:26 +0000816 = mkPCastTo(mce,ty,
817 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
sewardjd5204dc2004-12-31 01:16:11 +0000818
819 vec
820 = assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000821 'V', mce,ty,
sewardje6f8af42005-07-06 18:48:59 +0000822 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +0000823 assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
sewardjd5204dc2004-12-31 01:16:11 +0000824 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000825 'V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +0000826 unop( opNOT,
sewardj7cf4e6b2008-05-01 20:24:26 +0000827 assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
sewardjd5204dc2004-12-31 01:16:11 +0000828
sewardje6f8af42005-07-06 18:48:59 +0000829 improvement_term
sewardj7cf4e6b2008-05-01 20:24:26 +0000830 = mkPCastTo( mce,ty,
831 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
sewardjd5204dc2004-12-31 01:16:11 +0000832
833 improved
sewardj7cf4e6b2008-05-01 20:24:26 +0000834 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
sewardjd5204dc2004-12-31 01:16:11 +0000835
836 final_cast
837 = mkPCastTo( mce, Ity_I1, improved );
838
839 return final_cast;
840}
841
sewardj95448072004-11-22 20:19:51 +0000842
sewardj992dff92005-10-07 11:08:55 +0000843/* --------- Semi-accurate interpretation of CmpORD. --------- */
844
845/* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
846
847 CmpORD32S(x,y) = 1<<3 if x <s y
848 = 1<<2 if x >s y
849 = 1<<1 if x == y
850
851 and similarly the unsigned variant. The default interpretation is:
852
853 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
sewardj1bc82102005-12-23 00:16:24 +0000854 & (7<<1)
sewardj992dff92005-10-07 11:08:55 +0000855
856 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
857 are zero and therefore defined (viz, zero).
sewardja9e62a92005-10-07 12:13:21 +0000858
859 Also deal with a special case better:
860
861 CmpORD32S(x,0)
862
863 Here, bit 3 (LT) of the result is a copy of the top bit of x and
864 will be defined even if the rest of x isn't. In which case we do:
865
866 CmpORD32S#(x,x#,0,{impliedly 0}#)
sewardj1bc82102005-12-23 00:16:24 +0000867 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
868 | (x# >>u 31) << 3 -- LT# = x#[31]
sewardja9e62a92005-10-07 12:13:21 +0000869
sewardj1bc82102005-12-23 00:16:24 +0000870 Analogous handling for CmpORD64{S,U}.
sewardj992dff92005-10-07 11:08:55 +0000871*/
sewardja9e62a92005-10-07 12:13:21 +0000872static Bool isZeroU32 ( IRAtom* e )
873{
874 return
875 toBool( e->tag == Iex_Const
876 && e->Iex.Const.con->tag == Ico_U32
877 && e->Iex.Const.con->Ico.U32 == 0 );
878}
879
sewardj1bc82102005-12-23 00:16:24 +0000880static Bool isZeroU64 ( IRAtom* e )
sewardj992dff92005-10-07 11:08:55 +0000881{
sewardj1bc82102005-12-23 00:16:24 +0000882 return
883 toBool( e->tag == Iex_Const
884 && e->Iex.Const.con->tag == Ico_U64
885 && e->Iex.Const.con->Ico.U64 == 0 );
886}
887
888static IRAtom* doCmpORD ( MCEnv* mce,
889 IROp cmp_op,
890 IRAtom* xxhash, IRAtom* yyhash,
891 IRAtom* xx, IRAtom* yy )
892{
893 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
894 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
895 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
896 IROp opAND = m64 ? Iop_And64 : Iop_And32;
897 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
898 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
899 IRType ty = m64 ? Ity_I64 : Ity_I32;
900 Int width = m64 ? 64 : 32;
901
902 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
903
904 IRAtom* threeLeft1 = NULL;
905 IRAtom* sevenLeft1 = NULL;
906
sewardj992dff92005-10-07 11:08:55 +0000907 tl_assert(isShadowAtom(mce,xxhash));
908 tl_assert(isShadowAtom(mce,yyhash));
909 tl_assert(isOriginalAtom(mce,xx));
910 tl_assert(isOriginalAtom(mce,yy));
911 tl_assert(sameKindedAtoms(xxhash,xx));
912 tl_assert(sameKindedAtoms(yyhash,yy));
sewardj1bc82102005-12-23 00:16:24 +0000913 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
914 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
sewardj992dff92005-10-07 11:08:55 +0000915
sewardja9e62a92005-10-07 12:13:21 +0000916 if (0) {
917 ppIROp(cmp_op); VG_(printf)(" ");
918 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
919 }
920
sewardj1bc82102005-12-23 00:16:24 +0000921 if (syned && isZero(yy)) {
sewardja9e62a92005-10-07 12:13:21 +0000922 /* fancy interpretation */
923 /* if yy is zero, then it must be fully defined (zero#). */
sewardj1bc82102005-12-23 00:16:24 +0000924 tl_assert(isZero(yyhash));
925 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
sewardja9e62a92005-10-07 12:13:21 +0000926 return
927 binop(
sewardj1bc82102005-12-23 00:16:24 +0000928 opOR,
sewardja9e62a92005-10-07 12:13:21 +0000929 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000930 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +0000931 binop(
sewardj1bc82102005-12-23 00:16:24 +0000932 opAND,
933 mkPCastTo(mce,ty, xxhash),
934 threeLeft1
sewardja9e62a92005-10-07 12:13:21 +0000935 )),
936 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000937 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +0000938 binop(
sewardj1bc82102005-12-23 00:16:24 +0000939 opSHL,
sewardja9e62a92005-10-07 12:13:21 +0000940 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000941 'V', mce,ty,
sewardj1bc82102005-12-23 00:16:24 +0000942 binop(opSHR, xxhash, mkU8(width-1))),
sewardja9e62a92005-10-07 12:13:21 +0000943 mkU8(3)
944 ))
945 );
946 } else {
947 /* standard interpretation */
sewardj1bc82102005-12-23 00:16:24 +0000948 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
sewardja9e62a92005-10-07 12:13:21 +0000949 return
950 binop(
sewardj1bc82102005-12-23 00:16:24 +0000951 opAND,
952 mkPCastTo( mce,ty,
953 mkUifU(mce,ty, xxhash,yyhash)),
954 sevenLeft1
sewardja9e62a92005-10-07 12:13:21 +0000955 );
956 }
sewardj992dff92005-10-07 11:08:55 +0000957}
958
959
sewardj95448072004-11-22 20:19:51 +0000960/*------------------------------------------------------------*/
961/*--- Emit a test and complaint if something is undefined. ---*/
962/*------------------------------------------------------------*/
963
sewardj7cf4e6b2008-05-01 20:24:26 +0000964static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
965
966
sewardj95448072004-11-22 20:19:51 +0000967/* Set the annotations on a dirty helper to indicate that the stack
968 pointer and instruction pointers might be read. This is the
969 behaviour of all 'emit-a-complaint' style functions we might
970 call. */
971
972static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
973 di->nFxState = 2;
974 di->fxState[0].fx = Ifx_Read;
975 di->fxState[0].offset = mce->layout->offset_SP;
976 di->fxState[0].size = mce->layout->sizeof_SP;
977 di->fxState[1].fx = Ifx_Read;
978 di->fxState[1].offset = mce->layout->offset_IP;
979 di->fxState[1].size = mce->layout->sizeof_IP;
980}
981
982
983/* Check the supplied **original** atom for undefinedness, and emit a
984 complaint if so. Once that happens, mark it as defined. This is
985 possible because the atom is either a tmp or literal. If it's a
986 tmp, it will be shadowed by a tmp, and so we can set the shadow to
987 be defined. In fact as mentioned above, we will have to allocate a
988 new tmp to carry the new 'defined' shadow value, and update the
989 original->tmp mapping accordingly; we cannot simply assign a new
990 value to an existing shadow tmp as this breaks SSAness -- resulting
991 in the post-instrumentation sanity checker spluttering in disapproval.
992*/
993static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
994{
sewardj7cf97ee2004-11-28 14:25:01 +0000995 IRAtom* vatom;
996 IRType ty;
997 Int sz;
998 IRDirty* di;
999 IRAtom* cond;
sewardj7cf4e6b2008-05-01 20:24:26 +00001000 IRAtom* origin;
1001 void* fn;
1002 HChar* nm;
1003 IRExpr** args;
1004 Int nargs;
sewardj7cf97ee2004-11-28 14:25:01 +00001005
njn1d0825f2006-03-27 11:37:07 +00001006 // Don't do V bit tests if we're not reporting undefined value errors.
sewardj7cf4e6b2008-05-01 20:24:26 +00001007 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001008 return;
1009
sewardj95448072004-11-22 20:19:51 +00001010 /* Since the original expression is atomic, there's no duplicated
1011 work generated by making multiple V-expressions for it. So we
1012 don't really care about the possibility that someone else may
1013 also create a V-interpretion for it. */
1014 tl_assert(isOriginalAtom(mce, atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001015 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001016 tl_assert(isShadowAtom(mce, vatom));
1017 tl_assert(sameKindedAtoms(atom, vatom));
1018
sewardj1c0ce7a2009-07-01 08:10:49 +00001019 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001020
1021 /* sz is only used for constructing the error message */
sewardj7cf97ee2004-11-28 14:25:01 +00001022 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001023
sewardj7cf97ee2004-11-28 14:25:01 +00001024 cond = mkPCastTo( mce, Ity_I1, vatom );
sewardj95448072004-11-22 20:19:51 +00001025 /* cond will be 0 if all defined, and 1 if any not defined. */
1026
sewardj7cf4e6b2008-05-01 20:24:26 +00001027 /* Get the origin info for the value we are about to check. At
1028 least, if we are doing origin tracking. If not, use a dummy
1029 zero origin. */
1030 if (MC_(clo_mc_level) == 3) {
1031 origin = schemeE( mce, atom );
1032 if (mce->hWordTy == Ity_I64) {
1033 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1034 }
1035 } else {
1036 origin = NULL;
1037 }
1038
1039 fn = NULL;
1040 nm = NULL;
1041 args = NULL;
1042 nargs = -1;
1043
sewardj95448072004-11-22 20:19:51 +00001044 switch (sz) {
1045 case 0:
sewardj7cf4e6b2008-05-01 20:24:26 +00001046 if (origin) {
1047 fn = &MC_(helperc_value_check0_fail_w_o);
1048 nm = "MC_(helperc_value_check0_fail_w_o)";
1049 args = mkIRExprVec_1(origin);
1050 nargs = 1;
1051 } else {
1052 fn = &MC_(helperc_value_check0_fail_no_o);
1053 nm = "MC_(helperc_value_check0_fail_no_o)";
1054 args = mkIRExprVec_0();
1055 nargs = 0;
1056 }
sewardj95448072004-11-22 20:19:51 +00001057 break;
1058 case 1:
sewardj7cf4e6b2008-05-01 20:24:26 +00001059 if (origin) {
1060 fn = &MC_(helperc_value_check1_fail_w_o);
1061 nm = "MC_(helperc_value_check1_fail_w_o)";
1062 args = mkIRExprVec_1(origin);
1063 nargs = 1;
1064 } else {
1065 fn = &MC_(helperc_value_check1_fail_no_o);
1066 nm = "MC_(helperc_value_check1_fail_no_o)";
1067 args = mkIRExprVec_0();
1068 nargs = 0;
1069 }
sewardj95448072004-11-22 20:19:51 +00001070 break;
1071 case 4:
sewardj7cf4e6b2008-05-01 20:24:26 +00001072 if (origin) {
1073 fn = &MC_(helperc_value_check4_fail_w_o);
1074 nm = "MC_(helperc_value_check4_fail_w_o)";
1075 args = mkIRExprVec_1(origin);
1076 nargs = 1;
1077 } else {
1078 fn = &MC_(helperc_value_check4_fail_no_o);
1079 nm = "MC_(helperc_value_check4_fail_no_o)";
1080 args = mkIRExprVec_0();
1081 nargs = 0;
1082 }
sewardj95448072004-11-22 20:19:51 +00001083 break;
sewardj11bcc4e2005-04-23 22:38:38 +00001084 case 8:
sewardj7cf4e6b2008-05-01 20:24:26 +00001085 if (origin) {
1086 fn = &MC_(helperc_value_check8_fail_w_o);
1087 nm = "MC_(helperc_value_check8_fail_w_o)";
1088 args = mkIRExprVec_1(origin);
1089 nargs = 1;
1090 } else {
1091 fn = &MC_(helperc_value_check8_fail_no_o);
1092 nm = "MC_(helperc_value_check8_fail_no_o)";
1093 args = mkIRExprVec_0();
1094 nargs = 0;
1095 }
sewardj11bcc4e2005-04-23 22:38:38 +00001096 break;
njn4c245e52009-03-15 23:25:38 +00001097 case 2:
1098 case 16:
sewardj7cf4e6b2008-05-01 20:24:26 +00001099 if (origin) {
1100 fn = &MC_(helperc_value_checkN_fail_w_o);
1101 nm = "MC_(helperc_value_checkN_fail_w_o)";
1102 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1103 nargs = 2;
1104 } else {
1105 fn = &MC_(helperc_value_checkN_fail_no_o);
1106 nm = "MC_(helperc_value_checkN_fail_no_o)";
1107 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1108 nargs = 1;
1109 }
sewardj95448072004-11-22 20:19:51 +00001110 break;
njn4c245e52009-03-15 23:25:38 +00001111 default:
1112 VG_(tool_panic)("unexpected szB");
sewardj95448072004-11-22 20:19:51 +00001113 }
sewardj7cf4e6b2008-05-01 20:24:26 +00001114
1115 tl_assert(fn);
1116 tl_assert(nm);
1117 tl_assert(args);
1118 tl_assert(nargs >= 0 && nargs <= 2);
1119 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1120 || (MC_(clo_mc_level) == 2 && origin == NULL) );
1121
1122 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1123 VG_(fnptr_to_fnentry)( fn ), args );
sewardj95448072004-11-22 20:19:51 +00001124 di->guard = cond;
1125 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00001126 stmt( 'V', mce, IRStmt_Dirty(di));
sewardj95448072004-11-22 20:19:51 +00001127
1128 /* Set the shadow tmp to be defined. First, update the
1129 orig->shadow tmp mapping to reflect the fact that this shadow is
1130 getting a new value. */
sewardj710d6c22005-03-20 18:55:15 +00001131 tl_assert(isIRAtom(vatom));
sewardj95448072004-11-22 20:19:51 +00001132 /* sameKindedAtoms ... */
sewardj0b9d74a2006-12-24 02:24:11 +00001133 if (vatom->tag == Iex_RdTmp) {
1134 tl_assert(atom->tag == Iex_RdTmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00001135 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1136 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1137 definedOfType(ty));
sewardj95448072004-11-22 20:19:51 +00001138 }
1139}
1140
1141
1142/*------------------------------------------------------------*/
1143/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1144/*------------------------------------------------------------*/
1145
1146/* Examine the always-defined sections declared in layout to see if
1147 the (offset,size) section is within one. Note, is is an error to
1148 partially fall into such a region: (offset,size) should either be
1149 completely in such a region or completely not-in such a region.
1150*/
1151static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1152{
1153 Int minoffD, maxoffD, i;
1154 Int minoff = offset;
1155 Int maxoff = minoff + size - 1;
1156 tl_assert((minoff & ~0xFFFF) == 0);
1157 tl_assert((maxoff & ~0xFFFF) == 0);
1158
1159 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1160 minoffD = mce->layout->alwaysDefd[i].offset;
1161 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1162 tl_assert((minoffD & ~0xFFFF) == 0);
1163 tl_assert((maxoffD & ~0xFFFF) == 0);
1164
1165 if (maxoff < minoffD || maxoffD < minoff)
1166 continue; /* no overlap */
1167 if (minoff >= minoffD && maxoff <= maxoffD)
1168 return True; /* completely contained in an always-defd section */
1169
1170 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1171 }
1172 return False; /* could not find any containing section */
1173}
1174
1175
1176/* Generate into bb suitable actions to shadow this Put. If the state
1177 slice is marked 'always defined', do nothing. Otherwise, write the
1178 supplied V bits to the shadow state. We can pass in either an
1179 original atom or a V-atom, but not both. In the former case the
1180 relevant V-bits are then generated from the original.
1181*/
1182static
1183void do_shadow_PUT ( MCEnv* mce, Int offset,
1184 IRAtom* atom, IRAtom* vatom )
1185{
sewardj7cf97ee2004-11-28 14:25:01 +00001186 IRType ty;
njn1d0825f2006-03-27 11:37:07 +00001187
1188 // Don't do shadow PUTs if we're not doing undefined value checking.
1189 // Their absence lets Vex's optimiser remove all the shadow computation
1190 // that they depend on, which includes GETs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001191 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001192 return;
1193
sewardj95448072004-11-22 20:19:51 +00001194 if (atom) {
1195 tl_assert(!vatom);
1196 tl_assert(isOriginalAtom(mce, atom));
1197 vatom = expr2vbits( mce, atom );
1198 } else {
1199 tl_assert(vatom);
1200 tl_assert(isShadowAtom(mce, vatom));
1201 }
1202
sewardj1c0ce7a2009-07-01 08:10:49 +00001203 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001204 tl_assert(ty != Ity_I1);
1205 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1206 /* later: no ... */
1207 /* emit code to emit a complaint if any of the vbits are 1. */
1208 /* complainIfUndefined(mce, atom); */
1209 } else {
1210 /* Do a plain shadow Put. */
sewardj7cf4e6b2008-05-01 20:24:26 +00001211 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
sewardj95448072004-11-22 20:19:51 +00001212 }
1213}
1214
1215
1216/* Return an expression which contains the V bits corresponding to the
1217 given GETI (passed in in pieces).
1218*/
1219static
1220void do_shadow_PUTI ( MCEnv* mce,
sewardj0b9d74a2006-12-24 02:24:11 +00001221 IRRegArray* descr,
1222 IRAtom* ix, Int bias, IRAtom* atom )
sewardj95448072004-11-22 20:19:51 +00001223{
sewardj7cf97ee2004-11-28 14:25:01 +00001224 IRAtom* vatom;
1225 IRType ty, tyS;
1226 Int arrSize;;
1227
njn1d0825f2006-03-27 11:37:07 +00001228 // Don't do shadow PUTIs if we're not doing undefined value checking.
1229 // Their absence lets Vex's optimiser remove all the shadow computation
1230 // that they depend on, which includes GETIs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001231 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001232 return;
1233
sewardj95448072004-11-22 20:19:51 +00001234 tl_assert(isOriginalAtom(mce,atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001235 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001236 tl_assert(sameKindedAtoms(atom, vatom));
sewardj7cf97ee2004-11-28 14:25:01 +00001237 ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001238 tyS = shadowTypeV(ty);
sewardj7cf97ee2004-11-28 14:25:01 +00001239 arrSize = descr->nElems * sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001240 tl_assert(ty != Ity_I1);
1241 tl_assert(isOriginalAtom(mce,ix));
1242 complainIfUndefined(mce,ix);
1243 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1244 /* later: no ... */
1245 /* emit code to emit a complaint if any of the vbits are 1. */
1246 /* complainIfUndefined(mce, atom); */
1247 } else {
1248 /* Do a cloned version of the Put that refers to the shadow
1249 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001250 IRRegArray* new_descr
1251 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1252 tyS, descr->nElems);
sewardj7cf4e6b2008-05-01 20:24:26 +00001253 stmt( 'V', mce, IRStmt_PutI( new_descr, ix, bias, vatom ));
sewardj95448072004-11-22 20:19:51 +00001254 }
1255}
1256
1257
1258/* Return an expression which contains the V bits corresponding to the
1259 given GET (passed in in pieces).
1260*/
1261static
1262IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1263{
sewardj7cf4e6b2008-05-01 20:24:26 +00001264 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001265 tl_assert(ty != Ity_I1);
1266 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1267 /* Always defined, return all zeroes of the relevant type */
1268 return definedOfType(tyS);
1269 } else {
1270 /* return a cloned version of the Get that refers to the shadow
1271 area. */
sewardj7cf4e6b2008-05-01 20:24:26 +00001272 /* FIXME: this isn't an atom! */
sewardj95448072004-11-22 20:19:51 +00001273 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1274 }
1275}
1276
1277
1278/* Return an expression which contains the V bits corresponding to the
1279 given GETI (passed in in pieces).
1280*/
1281static
sewardj0b9d74a2006-12-24 02:24:11 +00001282IRExpr* shadow_GETI ( MCEnv* mce,
1283 IRRegArray* descr, IRAtom* ix, Int bias )
sewardj95448072004-11-22 20:19:51 +00001284{
1285 IRType ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001286 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001287 Int arrSize = descr->nElems * sizeofIRType(ty);
1288 tl_assert(ty != Ity_I1);
1289 tl_assert(isOriginalAtom(mce,ix));
1290 complainIfUndefined(mce,ix);
1291 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1292 /* Always defined, return all zeroes of the relevant type */
1293 return definedOfType(tyS);
1294 } else {
1295 /* return a cloned version of the Get that refers to the shadow
1296 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001297 IRRegArray* new_descr
1298 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1299 tyS, descr->nElems);
sewardj95448072004-11-22 20:19:51 +00001300 return IRExpr_GetI( new_descr, ix, bias );
1301 }
1302}
1303
1304
1305/*------------------------------------------------------------*/
1306/*--- Generating approximations for unknown operations, ---*/
1307/*--- using lazy-propagate semantics ---*/
1308/*------------------------------------------------------------*/
1309
1310/* Lazy propagation of undefinedness from two values, resulting in the
1311 specified shadow type.
1312*/
1313static
1314IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1315{
sewardj95448072004-11-22 20:19:51 +00001316 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001317 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1318 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
sewardj95448072004-11-22 20:19:51 +00001319 tl_assert(isShadowAtom(mce,va1));
1320 tl_assert(isShadowAtom(mce,va2));
sewardj37c31cc2005-04-26 23:49:24 +00001321
1322 /* The general case is inefficient because PCast is an expensive
1323 operation. Here are some special cases which use PCast only
1324 once rather than twice. */
1325
1326 /* I64 x I64 -> I64 */
1327 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1328 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1329 at = mkUifU(mce, Ity_I64, va1, va2);
1330 at = mkPCastTo(mce, Ity_I64, at);
1331 return at;
1332 }
1333
1334 /* I64 x I64 -> I32 */
1335 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1336 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1337 at = mkUifU(mce, Ity_I64, va1, va2);
1338 at = mkPCastTo(mce, Ity_I32, at);
1339 return at;
1340 }
1341
1342 if (0) {
1343 VG_(printf)("mkLazy2 ");
1344 ppIRType(t1);
1345 VG_(printf)("_");
1346 ppIRType(t2);
1347 VG_(printf)("_");
1348 ppIRType(finalVty);
1349 VG_(printf)("\n");
1350 }
1351
1352 /* General case: force everything via 32-bit intermediaries. */
sewardj95448072004-11-22 20:19:51 +00001353 at = mkPCastTo(mce, Ity_I32, va1);
1354 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1355 at = mkPCastTo(mce, finalVty, at);
1356 return at;
1357}
1358
1359
sewardjed69fdb2006-02-03 16:12:27 +00001360/* 3-arg version of the above. */
1361static
1362IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1363 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1364{
1365 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001366 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1367 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1368 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
sewardjed69fdb2006-02-03 16:12:27 +00001369 tl_assert(isShadowAtom(mce,va1));
1370 tl_assert(isShadowAtom(mce,va2));
1371 tl_assert(isShadowAtom(mce,va3));
1372
1373 /* The general case is inefficient because PCast is an expensive
1374 operation. Here are some special cases which use PCast only
1375 twice rather than three times. */
1376
1377 /* I32 x I64 x I64 -> I64 */
1378 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1379 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1380 && finalVty == Ity_I64) {
1381 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1382 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1383 mode indication which is fully defined, this should get
1384 folded out later. */
1385 at = mkPCastTo(mce, Ity_I64, va1);
1386 /* Now fold in 2nd and 3rd args. */
1387 at = mkUifU(mce, Ity_I64, at, va2);
1388 at = mkUifU(mce, Ity_I64, at, va3);
1389 /* and PCast once again. */
1390 at = mkPCastTo(mce, Ity_I64, at);
1391 return at;
1392 }
1393
sewardj453e8f82006-02-09 03:25:06 +00001394 /* I32 x I64 x I64 -> I32 */
1395 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1396 && finalVty == Ity_I32) {
sewardj59570ff2010-01-01 11:59:33 +00001397 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
sewardj453e8f82006-02-09 03:25:06 +00001398 at = mkPCastTo(mce, Ity_I64, va1);
1399 at = mkUifU(mce, Ity_I64, at, va2);
1400 at = mkUifU(mce, Ity_I64, at, va3);
1401 at = mkPCastTo(mce, Ity_I32, at);
1402 return at;
1403 }
1404
sewardj59570ff2010-01-01 11:59:33 +00001405 /* I32 x I32 x I32 -> I32 */
1406 /* 32-bit FP idiom, as (eg) happens on ARM */
1407 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1408 && finalVty == Ity_I32) {
1409 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1410 at = va1;
1411 at = mkUifU(mce, Ity_I32, at, va2);
1412 at = mkUifU(mce, Ity_I32, at, va3);
1413 at = mkPCastTo(mce, Ity_I32, at);
1414 return at;
1415 }
1416
sewardj453e8f82006-02-09 03:25:06 +00001417 if (1) {
1418 VG_(printf)("mkLazy3: ");
sewardjed69fdb2006-02-03 16:12:27 +00001419 ppIRType(t1);
sewardj453e8f82006-02-09 03:25:06 +00001420 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001421 ppIRType(t2);
sewardj453e8f82006-02-09 03:25:06 +00001422 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001423 ppIRType(t3);
sewardj453e8f82006-02-09 03:25:06 +00001424 VG_(printf)(" -> ");
sewardjed69fdb2006-02-03 16:12:27 +00001425 ppIRType(finalVty);
1426 VG_(printf)("\n");
1427 }
1428
sewardj453e8f82006-02-09 03:25:06 +00001429 tl_assert(0);
sewardjed69fdb2006-02-03 16:12:27 +00001430 /* General case: force everything via 32-bit intermediaries. */
sewardj453e8f82006-02-09 03:25:06 +00001431 /*
sewardjed69fdb2006-02-03 16:12:27 +00001432 at = mkPCastTo(mce, Ity_I32, va1);
1433 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1434 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1435 at = mkPCastTo(mce, finalVty, at);
1436 return at;
sewardj453e8f82006-02-09 03:25:06 +00001437 */
sewardjed69fdb2006-02-03 16:12:27 +00001438}
1439
1440
sewardje91cea72006-02-08 19:32:02 +00001441/* 4-arg version of the above. */
1442static
1443IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1444 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1445{
1446 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001447 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1448 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1449 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1450 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
sewardje91cea72006-02-08 19:32:02 +00001451 tl_assert(isShadowAtom(mce,va1));
1452 tl_assert(isShadowAtom(mce,va2));
1453 tl_assert(isShadowAtom(mce,va3));
1454 tl_assert(isShadowAtom(mce,va4));
1455
1456 /* The general case is inefficient because PCast is an expensive
1457 operation. Here are some special cases which use PCast only
1458 twice rather than three times. */
1459
1460 /* I32 x I64 x I64 x I64 -> I64 */
1461 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1462 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1463 && finalVty == Ity_I64) {
1464 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1465 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1466 mode indication which is fully defined, this should get
1467 folded out later. */
1468 at = mkPCastTo(mce, Ity_I64, va1);
1469 /* Now fold in 2nd, 3rd, 4th args. */
1470 at = mkUifU(mce, Ity_I64, at, va2);
1471 at = mkUifU(mce, Ity_I64, at, va3);
1472 at = mkUifU(mce, Ity_I64, at, va4);
1473 /* and PCast once again. */
1474 at = mkPCastTo(mce, Ity_I64, at);
1475 return at;
1476 }
1477
1478 if (1) {
sewardj453e8f82006-02-09 03:25:06 +00001479 VG_(printf)("mkLazy4: ");
sewardje91cea72006-02-08 19:32:02 +00001480 ppIRType(t1);
1481 VG_(printf)(" x ");
1482 ppIRType(t2);
1483 VG_(printf)(" x ");
1484 ppIRType(t3);
1485 VG_(printf)(" x ");
1486 ppIRType(t4);
1487 VG_(printf)(" -> ");
1488 ppIRType(finalVty);
1489 VG_(printf)("\n");
1490 }
1491
1492 tl_assert(0);
1493}
1494
1495
sewardj95448072004-11-22 20:19:51 +00001496/* Do the lazy propagation game from a null-terminated vector of
1497 atoms. This is presumably the arguments to a helper call, so the
1498 IRCallee info is also supplied in order that we can know which
1499 arguments should be ignored (via the .mcx_mask field).
1500*/
1501static
1502IRAtom* mkLazyN ( MCEnv* mce,
1503 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1504{
sewardj4cc684b2007-08-25 23:09:36 +00001505 Int i;
sewardj95448072004-11-22 20:19:51 +00001506 IRAtom* here;
sewardj4cc684b2007-08-25 23:09:36 +00001507 IRAtom* curr;
1508 IRType mergeTy;
1509 IRType mergeTy64 = True;
1510
1511 /* Decide on the type of the merge intermediary. If all relevant
1512 args are I64, then it's I64. In all other circumstances, use
1513 I32. */
1514 for (i = 0; exprvec[i]; i++) {
1515 tl_assert(i < 32);
1516 tl_assert(isOriginalAtom(mce, exprvec[i]));
1517 if (cee->mcx_mask & (1<<i))
1518 continue;
sewardj1c0ce7a2009-07-01 08:10:49 +00001519 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
sewardj4cc684b2007-08-25 23:09:36 +00001520 mergeTy64 = False;
1521 }
1522
1523 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
1524 curr = definedOfType(mergeTy);
1525
sewardj95448072004-11-22 20:19:51 +00001526 for (i = 0; exprvec[i]; i++) {
1527 tl_assert(i < 32);
1528 tl_assert(isOriginalAtom(mce, exprvec[i]));
1529 /* Only take notice of this arg if the callee's mc-exclusion
1530 mask does not say it is to be excluded. */
1531 if (cee->mcx_mask & (1<<i)) {
1532 /* the arg is to be excluded from definedness checking. Do
1533 nothing. */
1534 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1535 } else {
1536 /* calculate the arg's definedness, and pessimistically merge
1537 it in. */
sewardj4cc684b2007-08-25 23:09:36 +00001538 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1539 curr = mergeTy64
1540 ? mkUifU64(mce, here, curr)
1541 : mkUifU32(mce, here, curr);
sewardj95448072004-11-22 20:19:51 +00001542 }
1543 }
1544 return mkPCastTo(mce, finalVtype, curr );
1545}
1546
1547
1548/*------------------------------------------------------------*/
1549/*--- Generating expensive sequences for exact carry-chain ---*/
1550/*--- propagation in add/sub and related operations. ---*/
1551/*------------------------------------------------------------*/
1552
1553static
sewardjd5204dc2004-12-31 01:16:11 +00001554IRAtom* expensiveAddSub ( MCEnv* mce,
1555 Bool add,
1556 IRType ty,
1557 IRAtom* qaa, IRAtom* qbb,
1558 IRAtom* aa, IRAtom* bb )
sewardj95448072004-11-22 20:19:51 +00001559{
sewardj7cf97ee2004-11-28 14:25:01 +00001560 IRAtom *a_min, *b_min, *a_max, *b_max;
sewardjd5204dc2004-12-31 01:16:11 +00001561 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
sewardj7cf97ee2004-11-28 14:25:01 +00001562
sewardj95448072004-11-22 20:19:51 +00001563 tl_assert(isShadowAtom(mce,qaa));
1564 tl_assert(isShadowAtom(mce,qbb));
1565 tl_assert(isOriginalAtom(mce,aa));
1566 tl_assert(isOriginalAtom(mce,bb));
1567 tl_assert(sameKindedAtoms(qaa,aa));
1568 tl_assert(sameKindedAtoms(qbb,bb));
1569
sewardjd5204dc2004-12-31 01:16:11 +00001570 switch (ty) {
1571 case Ity_I32:
1572 opAND = Iop_And32;
1573 opOR = Iop_Or32;
1574 opXOR = Iop_Xor32;
1575 opNOT = Iop_Not32;
1576 opADD = Iop_Add32;
1577 opSUB = Iop_Sub32;
1578 break;
tomd9774d72005-06-27 08:11:01 +00001579 case Ity_I64:
1580 opAND = Iop_And64;
1581 opOR = Iop_Or64;
1582 opXOR = Iop_Xor64;
1583 opNOT = Iop_Not64;
1584 opADD = Iop_Add64;
1585 opSUB = Iop_Sub64;
1586 break;
sewardjd5204dc2004-12-31 01:16:11 +00001587 default:
1588 VG_(tool_panic)("expensiveAddSub");
1589 }
sewardj95448072004-11-22 20:19:51 +00001590
1591 // a_min = aa & ~qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001592 a_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001593 binop(opAND, aa,
sewardj7cf4e6b2008-05-01 20:24:26 +00001594 assignNew('V', mce,ty, unop(opNOT, qaa))));
sewardj95448072004-11-22 20:19:51 +00001595
1596 // b_min = bb & ~qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001597 b_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001598 binop(opAND, bb,
sewardj7cf4e6b2008-05-01 20:24:26 +00001599 assignNew('V', mce,ty, unop(opNOT, qbb))));
sewardj95448072004-11-22 20:19:51 +00001600
1601 // a_max = aa | qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001602 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
sewardj95448072004-11-22 20:19:51 +00001603
1604 // b_max = bb | qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001605 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
sewardj95448072004-11-22 20:19:51 +00001606
sewardjd5204dc2004-12-31 01:16:11 +00001607 if (add) {
1608 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1609 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001610 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001611 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001612 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1613 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001614 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001615 assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1616 assignNew('V', mce,ty, binop(opADD, a_max, b_max))
sewardjd5204dc2004-12-31 01:16:11 +00001617 )
sewardj95448072004-11-22 20:19:51 +00001618 )
sewardjd5204dc2004-12-31 01:16:11 +00001619 )
1620 );
1621 } else {
1622 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1623 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001624 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001625 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001626 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1627 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001628 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001629 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1630 assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
sewardjd5204dc2004-12-31 01:16:11 +00001631 )
1632 )
1633 )
1634 );
1635 }
1636
sewardj95448072004-11-22 20:19:51 +00001637}
1638
1639
1640/*------------------------------------------------------------*/
sewardjaaddbc22005-10-07 09:49:53 +00001641/*--- Scalar shifts. ---*/
1642/*------------------------------------------------------------*/
1643
1644/* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
1645 idea is to shift the definedness bits by the original shift amount.
1646 This introduces 0s ("defined") in new positions for left shifts and
1647 unsigned right shifts, and copies the top definedness bit for
1648 signed right shifts. So, conveniently, applying the original shift
1649 operator to the definedness bits for the left arg is exactly the
1650 right thing to do:
1651
1652 (qaa << bb)
1653
1654 However if the shift amount is undefined then the whole result
1655 is undefined. Hence need:
1656
1657 (qaa << bb) `UifU` PCast(qbb)
1658
1659 If the shift amount bb is a literal than qbb will say 'all defined'
1660 and the UifU and PCast will get folded out by post-instrumentation
1661 optimisation.
1662*/
1663static IRAtom* scalarShift ( MCEnv* mce,
1664 IRType ty,
1665 IROp original_op,
1666 IRAtom* qaa, IRAtom* qbb,
1667 IRAtom* aa, IRAtom* bb )
1668{
1669 tl_assert(isShadowAtom(mce,qaa));
1670 tl_assert(isShadowAtom(mce,qbb));
1671 tl_assert(isOriginalAtom(mce,aa));
1672 tl_assert(isOriginalAtom(mce,bb));
1673 tl_assert(sameKindedAtoms(qaa,aa));
1674 tl_assert(sameKindedAtoms(qbb,bb));
1675 return
1676 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001677 'V', mce, ty,
sewardjaaddbc22005-10-07 09:49:53 +00001678 mkUifU( mce, ty,
sewardj7cf4e6b2008-05-01 20:24:26 +00001679 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
sewardjaaddbc22005-10-07 09:49:53 +00001680 mkPCastTo(mce, ty, qbb)
1681 )
1682 );
1683}
1684
1685
1686/*------------------------------------------------------------*/
1687/*--- Helpers for dealing with vector primops. ---*/
sewardj3245c912004-12-10 14:58:26 +00001688/*------------------------------------------------------------*/
1689
sewardja1d93302004-12-12 16:45:06 +00001690/* Vector pessimisation -- pessimise within each lane individually. */
1691
1692static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1693{
sewardj7cf4e6b2008-05-01 20:24:26 +00001694 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
sewardja1d93302004-12-12 16:45:06 +00001695}
1696
1697static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1698{
sewardj7cf4e6b2008-05-01 20:24:26 +00001699 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
sewardja1d93302004-12-12 16:45:06 +00001700}
1701
1702static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1703{
sewardj7cf4e6b2008-05-01 20:24:26 +00001704 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
sewardja1d93302004-12-12 16:45:06 +00001705}
1706
1707static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1708{
sewardj7cf4e6b2008-05-01 20:24:26 +00001709 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
sewardja1d93302004-12-12 16:45:06 +00001710}
1711
sewardjacd2e912005-01-13 19:17:06 +00001712static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1713{
sewardj7cf4e6b2008-05-01 20:24:26 +00001714 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
sewardjacd2e912005-01-13 19:17:06 +00001715}
1716
1717static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1718{
sewardj7cf4e6b2008-05-01 20:24:26 +00001719 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
sewardjacd2e912005-01-13 19:17:06 +00001720}
1721
1722static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1723{
sewardj7cf4e6b2008-05-01 20:24:26 +00001724 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
sewardjacd2e912005-01-13 19:17:06 +00001725}
1726
sewardjc678b852010-09-22 00:58:51 +00001727static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
1728{
1729 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
1730}
1731
1732static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
1733{
1734 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
1735}
1736
sewardja1d93302004-12-12 16:45:06 +00001737
sewardj3245c912004-12-10 14:58:26 +00001738/* Here's a simple scheme capable of handling ops derived from SSE1
1739 code and while only generating ops that can be efficiently
1740 implemented in SSE1. */
1741
1742/* All-lanes versions are straightforward:
1743
sewardj20d38f22005-02-07 23:50:18 +00001744 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
sewardj3245c912004-12-10 14:58:26 +00001745
1746 unary32Fx4(x,y) ==> PCast32x4(x#)
1747
1748 Lowest-lane-only versions are more complex:
1749
sewardj20d38f22005-02-07 23:50:18 +00001750 binary32F0x4(x,y) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001751 x#,
sewardj20d38f22005-02-07 23:50:18 +00001752 PCast32(V128to32(UifUV128(x#,y#)))
sewardj3245c912004-12-10 14:58:26 +00001753 )
1754
1755 This is perhaps not so obvious. In particular, it's faster to
sewardj20d38f22005-02-07 23:50:18 +00001756 do a V128-bit UifU and then take the bottom 32 bits than the more
sewardj3245c912004-12-10 14:58:26 +00001757 obvious scheme of taking the bottom 32 bits of each operand
1758 and doing a 32-bit UifU. Basically since UifU is fast and
1759 chopping lanes off vector values is slow.
1760
1761 Finally:
1762
sewardj20d38f22005-02-07 23:50:18 +00001763 unary32F0x4(x) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001764 x#,
sewardj20d38f22005-02-07 23:50:18 +00001765 PCast32(V128to32(x#))
sewardj3245c912004-12-10 14:58:26 +00001766 )
1767
1768 Where:
1769
1770 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
1771 PCast32x4(v#) = CmpNEZ32x4(v#)
1772*/
1773
1774static
1775IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1776{
1777 IRAtom* at;
1778 tl_assert(isShadowAtom(mce, vatomX));
1779 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001780 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001781 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
sewardj3245c912004-12-10 14:58:26 +00001782 return at;
1783}
1784
1785static
1786IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1787{
1788 IRAtom* at;
1789 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001790 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001791 return at;
1792}
1793
1794static
1795IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1796{
1797 IRAtom* at;
1798 tl_assert(isShadowAtom(mce, vatomX));
1799 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001800 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001801 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
sewardj3245c912004-12-10 14:58:26 +00001802 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001803 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001804 return at;
1805}
1806
1807static
1808IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1809{
1810 IRAtom* at;
1811 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001812 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001813 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001814 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001815 return at;
1816}
1817
sewardj0b070592004-12-10 21:44:22 +00001818/* --- ... and ... 64Fx2 versions of the same ... --- */
1819
1820static
1821IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1822{
1823 IRAtom* at;
1824 tl_assert(isShadowAtom(mce, vatomX));
1825 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001826 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001827 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
sewardj0b070592004-12-10 21:44:22 +00001828 return at;
1829}
1830
1831static
1832IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1833{
1834 IRAtom* at;
1835 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001836 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001837 return at;
1838}
1839
1840static
1841IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1842{
1843 IRAtom* at;
1844 tl_assert(isShadowAtom(mce, vatomX));
1845 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001846 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001847 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
sewardj0b070592004-12-10 21:44:22 +00001848 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001849 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001850 return at;
1851}
1852
1853static
1854IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1855{
1856 IRAtom* at;
1857 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001858 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001859 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001860 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001861 return at;
1862}
1863
sewardj57f92b02010-08-22 11:54:14 +00001864/* --- --- ... and ... 32Fx2 versions of the same --- --- */
1865
1866static
1867IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1868{
1869 IRAtom* at;
1870 tl_assert(isShadowAtom(mce, vatomX));
1871 tl_assert(isShadowAtom(mce, vatomY));
1872 at = mkUifU64(mce, vatomX, vatomY);
1873 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
1874 return at;
1875}
1876
1877static
1878IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
1879{
1880 IRAtom* at;
1881 tl_assert(isShadowAtom(mce, vatomX));
1882 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
1883 return at;
1884}
1885
sewardja1d93302004-12-12 16:45:06 +00001886/* --- --- Vector saturated narrowing --- --- */
1887
1888/* This is quite subtle. What to do is simple:
1889
1890 Let the original narrowing op be QNarrowW{S,U}xN. Produce:
1891
1892 the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1893
1894 Why this is right is not so simple. Consider a lane in the args,
1895 vatom1 or 2, doesn't matter.
1896
1897 After the PCast, that lane is all 0s (defined) or all
1898 1s(undefined).
1899
1900 Both signed and unsigned saturating narrowing of all 0s produces
1901 all 0s, which is what we want.
1902
1903 The all-1s case is more complex. Unsigned narrowing interprets an
1904 all-1s input as the largest unsigned integer, and so produces all
1905 1s as a result since that is the largest unsigned value at the
1906 smaller width.
1907
1908 Signed narrowing interprets all 1s as -1. Fortunately, -1 narrows
1909 to -1, so we still wind up with all 1s at the smaller width.
1910
1911 So: In short, pessimise the args, then apply the original narrowing
1912 op.
1913*/
1914static
sewardj20d38f22005-02-07 23:50:18 +00001915IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
sewardja1d93302004-12-12 16:45:06 +00001916 IRAtom* vatom1, IRAtom* vatom2)
1917{
1918 IRAtom *at1, *at2, *at3;
1919 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1920 switch (narrow_op) {
1921 case Iop_QNarrow32Sx4: pcast = mkPCast32x4; break;
sewardj43d60752005-11-10 18:13:01 +00001922 case Iop_QNarrow32Ux4: pcast = mkPCast32x4; break;
sewardja1d93302004-12-12 16:45:06 +00001923 case Iop_QNarrow16Sx8: pcast = mkPCast16x8; break;
1924 case Iop_QNarrow16Ux8: pcast = mkPCast16x8; break;
sewardj20d38f22005-02-07 23:50:18 +00001925 default: VG_(tool_panic)("vectorNarrowV128");
sewardja1d93302004-12-12 16:45:06 +00001926 }
1927 tl_assert(isShadowAtom(mce,vatom1));
1928 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00001929 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
1930 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
1931 at3 = assignNew('V', mce, Ity_V128, binop(narrow_op, at1, at2));
sewardja1d93302004-12-12 16:45:06 +00001932 return at3;
1933}
1934
sewardjacd2e912005-01-13 19:17:06 +00001935static
1936IRAtom* vectorNarrow64 ( MCEnv* mce, IROp narrow_op,
1937 IRAtom* vatom1, IRAtom* vatom2)
1938{
1939 IRAtom *at1, *at2, *at3;
1940 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1941 switch (narrow_op) {
1942 case Iop_QNarrow32Sx2: pcast = mkPCast32x2; break;
1943 case Iop_QNarrow16Sx4: pcast = mkPCast16x4; break;
1944 case Iop_QNarrow16Ux4: pcast = mkPCast16x4; break;
1945 default: VG_(tool_panic)("vectorNarrow64");
1946 }
1947 tl_assert(isShadowAtom(mce,vatom1));
1948 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00001949 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
1950 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
1951 at3 = assignNew('V', mce, Ity_I64, binop(narrow_op, at1, at2));
sewardjacd2e912005-01-13 19:17:06 +00001952 return at3;
1953}
1954
sewardj57f92b02010-08-22 11:54:14 +00001955static
1956IRAtom* vectorShortenV128 ( MCEnv* mce, IROp shorten_op,
1957 IRAtom* vatom1)
1958{
1959 IRAtom *at1, *at2;
1960 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1961 switch (shorten_op) {
1962 case Iop_Shorten16x8: pcast = mkPCast16x8; break;
1963 case Iop_Shorten32x4: pcast = mkPCast32x4; break;
1964 case Iop_Shorten64x2: pcast = mkPCast64x2; break;
1965 case Iop_QShortenS16Sx8: pcast = mkPCast16x8; break;
1966 case Iop_QShortenU16Sx8: pcast = mkPCast16x8; break;
1967 case Iop_QShortenU16Ux8: pcast = mkPCast16x8; break;
1968 case Iop_QShortenS32Sx4: pcast = mkPCast32x4; break;
1969 case Iop_QShortenU32Sx4: pcast = mkPCast32x4; break;
1970 case Iop_QShortenU32Ux4: pcast = mkPCast32x4; break;
1971 case Iop_QShortenS64Sx2: pcast = mkPCast64x2; break;
1972 case Iop_QShortenU64Sx2: pcast = mkPCast64x2; break;
1973 case Iop_QShortenU64Ux2: pcast = mkPCast64x2; break;
1974 default: VG_(tool_panic)("vectorShortenV128");
1975 }
1976 tl_assert(isShadowAtom(mce,vatom1));
1977 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
1978 at2 = assignNew('V', mce, Ity_I64, unop(shorten_op, at1));
1979 return at2;
1980}
1981
1982static
1983IRAtom* vectorLongenI64 ( MCEnv* mce, IROp longen_op,
1984 IRAtom* vatom1)
1985{
1986 IRAtom *at1, *at2;
1987 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1988 switch (longen_op) {
1989 case Iop_Longen8Ux8: pcast = mkPCast16x8; break;
1990 case Iop_Longen8Sx8: pcast = mkPCast16x8; break;
1991 case Iop_Longen16Ux4: pcast = mkPCast32x4; break;
1992 case Iop_Longen16Sx4: pcast = mkPCast32x4; break;
1993 case Iop_Longen32Ux2: pcast = mkPCast64x2; break;
1994 case Iop_Longen32Sx2: pcast = mkPCast64x2; break;
1995 default: VG_(tool_panic)("vectorLongenI64");
1996 }
1997 tl_assert(isShadowAtom(mce,vatom1));
1998 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
1999 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2000 return at2;
2001}
2002
sewardja1d93302004-12-12 16:45:06 +00002003
2004/* --- --- Vector integer arithmetic --- --- */
2005
2006/* Simple ... UifU the args and per-lane pessimise the results. */
sewardjacd2e912005-01-13 19:17:06 +00002007
sewardj20d38f22005-02-07 23:50:18 +00002008/* --- V128-bit versions --- */
sewardjacd2e912005-01-13 19:17:06 +00002009
sewardja1d93302004-12-12 16:45:06 +00002010static
2011IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2012{
2013 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002014 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002015 at = mkPCast8x16(mce, at);
2016 return at;
2017}
2018
2019static
2020IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2021{
2022 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002023 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002024 at = mkPCast16x8(mce, at);
2025 return at;
2026}
2027
2028static
2029IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2030{
2031 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002032 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002033 at = mkPCast32x4(mce, at);
2034 return at;
2035}
2036
2037static
2038IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2039{
2040 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002041 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002042 at = mkPCast64x2(mce, at);
2043 return at;
2044}
sewardj3245c912004-12-10 14:58:26 +00002045
sewardjacd2e912005-01-13 19:17:06 +00002046/* --- 64-bit versions --- */
2047
2048static
2049IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2050{
2051 IRAtom* at;
2052 at = mkUifU64(mce, vatom1, vatom2);
2053 at = mkPCast8x8(mce, at);
2054 return at;
2055}
2056
2057static
2058IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2059{
2060 IRAtom* at;
2061 at = mkUifU64(mce, vatom1, vatom2);
2062 at = mkPCast16x4(mce, at);
2063 return at;
2064}
2065
2066static
2067IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2068{
2069 IRAtom* at;
2070 at = mkUifU64(mce, vatom1, vatom2);
2071 at = mkPCast32x2(mce, at);
2072 return at;
2073}
2074
sewardj57f92b02010-08-22 11:54:14 +00002075static
2076IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2077{
2078 IRAtom* at;
2079 at = mkUifU64(mce, vatom1, vatom2);
2080 at = mkPCastTo(mce, Ity_I64, at);
2081 return at;
2082}
2083
sewardjc678b852010-09-22 00:58:51 +00002084/* --- 32-bit versions --- */
2085
2086static
2087IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2088{
2089 IRAtom* at;
2090 at = mkUifU32(mce, vatom1, vatom2);
2091 at = mkPCast8x4(mce, at);
2092 return at;
2093}
2094
2095static
2096IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2097{
2098 IRAtom* at;
2099 at = mkUifU32(mce, vatom1, vatom2);
2100 at = mkPCast16x2(mce, at);
2101 return at;
2102}
2103
sewardj3245c912004-12-10 14:58:26 +00002104
2105/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00002106/*--- Generate shadow values from all kinds of IRExprs. ---*/
2107/*------------------------------------------------------------*/
2108
2109static
sewardje91cea72006-02-08 19:32:02 +00002110IRAtom* expr2vbits_Qop ( MCEnv* mce,
2111 IROp op,
2112 IRAtom* atom1, IRAtom* atom2,
2113 IRAtom* atom3, IRAtom* atom4 )
2114{
2115 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2116 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2117 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2118 IRAtom* vatom4 = expr2vbits( mce, atom4 );
2119
2120 tl_assert(isOriginalAtom(mce,atom1));
2121 tl_assert(isOriginalAtom(mce,atom2));
2122 tl_assert(isOriginalAtom(mce,atom3));
2123 tl_assert(isOriginalAtom(mce,atom4));
2124 tl_assert(isShadowAtom(mce,vatom1));
2125 tl_assert(isShadowAtom(mce,vatom2));
2126 tl_assert(isShadowAtom(mce,vatom3));
2127 tl_assert(isShadowAtom(mce,vatom4));
2128 tl_assert(sameKindedAtoms(atom1,vatom1));
2129 tl_assert(sameKindedAtoms(atom2,vatom2));
2130 tl_assert(sameKindedAtoms(atom3,vatom3));
2131 tl_assert(sameKindedAtoms(atom4,vatom4));
2132 switch (op) {
2133 case Iop_MAddF64:
2134 case Iop_MAddF64r32:
2135 case Iop_MSubF64:
2136 case Iop_MSubF64r32:
2137 /* I32(rm) x F64 x F64 x F64 -> F64 */
2138 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
2139 default:
2140 ppIROp(op);
2141 VG_(tool_panic)("memcheck:expr2vbits_Qop");
2142 }
2143}
2144
2145
2146static
sewardjed69fdb2006-02-03 16:12:27 +00002147IRAtom* expr2vbits_Triop ( MCEnv* mce,
2148 IROp op,
2149 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2150{
sewardjed69fdb2006-02-03 16:12:27 +00002151 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2152 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2153 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2154
2155 tl_assert(isOriginalAtom(mce,atom1));
2156 tl_assert(isOriginalAtom(mce,atom2));
2157 tl_assert(isOriginalAtom(mce,atom3));
2158 tl_assert(isShadowAtom(mce,vatom1));
2159 tl_assert(isShadowAtom(mce,vatom2));
2160 tl_assert(isShadowAtom(mce,vatom3));
2161 tl_assert(sameKindedAtoms(atom1,vatom1));
2162 tl_assert(sameKindedAtoms(atom2,vatom2));
2163 tl_assert(sameKindedAtoms(atom3,vatom3));
2164 switch (op) {
2165 case Iop_AddF64:
2166 case Iop_AddF64r32:
2167 case Iop_SubF64:
2168 case Iop_SubF64r32:
2169 case Iop_MulF64:
2170 case Iop_MulF64r32:
2171 case Iop_DivF64:
2172 case Iop_DivF64r32:
sewardj22ac5f42006-02-03 22:55:04 +00002173 case Iop_ScaleF64:
2174 case Iop_Yl2xF64:
2175 case Iop_Yl2xp1F64:
2176 case Iop_AtanF64:
sewardjd6075eb2006-02-04 15:25:23 +00002177 case Iop_PRemF64:
2178 case Iop_PRem1F64:
sewardj22ac5f42006-02-03 22:55:04 +00002179 /* I32(rm) x F64 x F64 -> F64 */
sewardjed69fdb2006-02-03 16:12:27 +00002180 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
sewardjd6075eb2006-02-04 15:25:23 +00002181 case Iop_PRemC3210F64:
2182 case Iop_PRem1C3210F64:
2183 /* I32(rm) x F64 x F64 -> I32 */
2184 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj59570ff2010-01-01 11:59:33 +00002185 case Iop_AddF32:
2186 case Iop_SubF32:
2187 case Iop_MulF32:
2188 case Iop_DivF32:
2189 /* I32(rm) x F32 x F32 -> I32 */
2190 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj57f92b02010-08-22 11:54:14 +00002191 case Iop_ExtractV128:
2192 complainIfUndefined(mce, atom3);
2193 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2194 case Iop_Extract64:
2195 complainIfUndefined(mce, atom3);
2196 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2197 case Iop_SetElem8x8:
2198 case Iop_SetElem16x4:
2199 case Iop_SetElem32x2:
2200 complainIfUndefined(mce, atom2);
2201 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
sewardjed69fdb2006-02-03 16:12:27 +00002202 default:
2203 ppIROp(op);
2204 VG_(tool_panic)("memcheck:expr2vbits_Triop");
2205 }
2206}
2207
2208
2209static
sewardj95448072004-11-22 20:19:51 +00002210IRAtom* expr2vbits_Binop ( MCEnv* mce,
2211 IROp op,
2212 IRAtom* atom1, IRAtom* atom2 )
2213{
2214 IRType and_or_ty;
2215 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
2216 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
2217 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2218
2219 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2220 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2221
2222 tl_assert(isOriginalAtom(mce,atom1));
2223 tl_assert(isOriginalAtom(mce,atom2));
2224 tl_assert(isShadowAtom(mce,vatom1));
2225 tl_assert(isShadowAtom(mce,vatom2));
2226 tl_assert(sameKindedAtoms(atom1,vatom1));
2227 tl_assert(sameKindedAtoms(atom2,vatom2));
2228 switch (op) {
2229
sewardjc678b852010-09-22 00:58:51 +00002230 /* 32-bit SIMD */
2231
2232 case Iop_Add16x2:
2233 case Iop_HAdd16Ux2:
2234 case Iop_HAdd16Sx2:
2235 case Iop_Sub16x2:
2236 case Iop_HSub16Ux2:
2237 case Iop_HSub16Sx2:
2238 case Iop_QAdd16Sx2:
2239 case Iop_QSub16Sx2:
2240 return binary16Ix2(mce, vatom1, vatom2);
2241
2242 case Iop_Add8x4:
2243 case Iop_HAdd8Ux4:
2244 case Iop_HAdd8Sx4:
2245 case Iop_Sub8x4:
2246 case Iop_HSub8Ux4:
2247 case Iop_HSub8Sx4:
2248 case Iop_QSub8Ux4:
2249 case Iop_QAdd8Ux4:
2250 case Iop_QSub8Sx4:
2251 case Iop_QAdd8Sx4:
2252 return binary8Ix4(mce, vatom1, vatom2);
2253
sewardjacd2e912005-01-13 19:17:06 +00002254 /* 64-bit SIMD */
2255
sewardj57f92b02010-08-22 11:54:14 +00002256 case Iop_ShrN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002257 case Iop_ShrN16x4:
2258 case Iop_ShrN32x2:
sewardj03809ae2006-12-27 01:16:58 +00002259 case Iop_SarN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002260 case Iop_SarN16x4:
2261 case Iop_SarN32x2:
2262 case Iop_ShlN16x4:
2263 case Iop_ShlN32x2:
sewardj114a9172008-02-09 01:49:32 +00002264 case Iop_ShlN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002265 /* Same scheme as with all other shifts. */
2266 complainIfUndefined(mce, atom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002267 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
sewardjacd2e912005-01-13 19:17:06 +00002268
2269 case Iop_QNarrow32Sx2:
2270 case Iop_QNarrow16Sx4:
2271 case Iop_QNarrow16Ux4:
2272 return vectorNarrow64(mce, op, vatom1, vatom2);
2273
2274 case Iop_Min8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002275 case Iop_Min8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002276 case Iop_Max8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002277 case Iop_Max8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002278 case Iop_Avg8Ux8:
2279 case Iop_QSub8Sx8:
2280 case Iop_QSub8Ux8:
2281 case Iop_Sub8x8:
2282 case Iop_CmpGT8Sx8:
sewardj57f92b02010-08-22 11:54:14 +00002283 case Iop_CmpGT8Ux8:
sewardjacd2e912005-01-13 19:17:06 +00002284 case Iop_CmpEQ8x8:
2285 case Iop_QAdd8Sx8:
2286 case Iop_QAdd8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002287 case Iop_QSal8x8:
2288 case Iop_QShl8x8:
sewardjacd2e912005-01-13 19:17:06 +00002289 case Iop_Add8x8:
sewardj57f92b02010-08-22 11:54:14 +00002290 case Iop_Mul8x8:
2291 case Iop_PolynomialMul8x8:
sewardjacd2e912005-01-13 19:17:06 +00002292 return binary8Ix8(mce, vatom1, vatom2);
2293
2294 case Iop_Min16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002295 case Iop_Min16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002296 case Iop_Max16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002297 case Iop_Max16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002298 case Iop_Avg16Ux4:
2299 case Iop_QSub16Ux4:
2300 case Iop_QSub16Sx4:
2301 case Iop_Sub16x4:
2302 case Iop_Mul16x4:
2303 case Iop_MulHi16Sx4:
2304 case Iop_MulHi16Ux4:
2305 case Iop_CmpGT16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002306 case Iop_CmpGT16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002307 case Iop_CmpEQ16x4:
2308 case Iop_QAdd16Sx4:
2309 case Iop_QAdd16Ux4:
sewardj57f92b02010-08-22 11:54:14 +00002310 case Iop_QSal16x4:
2311 case Iop_QShl16x4:
sewardjacd2e912005-01-13 19:17:06 +00002312 case Iop_Add16x4:
sewardj57f92b02010-08-22 11:54:14 +00002313 case Iop_QDMulHi16Sx4:
2314 case Iop_QRDMulHi16Sx4:
sewardjacd2e912005-01-13 19:17:06 +00002315 return binary16Ix4(mce, vatom1, vatom2);
2316
2317 case Iop_Sub32x2:
sewardj114a9172008-02-09 01:49:32 +00002318 case Iop_Mul32x2:
sewardj57f92b02010-08-22 11:54:14 +00002319 case Iop_Max32Sx2:
2320 case Iop_Max32Ux2:
2321 case Iop_Min32Sx2:
2322 case Iop_Min32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002323 case Iop_CmpGT32Sx2:
sewardj57f92b02010-08-22 11:54:14 +00002324 case Iop_CmpGT32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002325 case Iop_CmpEQ32x2:
2326 case Iop_Add32x2:
sewardj57f92b02010-08-22 11:54:14 +00002327 case Iop_QAdd32Ux2:
2328 case Iop_QAdd32Sx2:
2329 case Iop_QSub32Ux2:
2330 case Iop_QSub32Sx2:
2331 case Iop_QSal32x2:
2332 case Iop_QShl32x2:
2333 case Iop_QDMulHi32Sx2:
2334 case Iop_QRDMulHi32Sx2:
sewardjacd2e912005-01-13 19:17:06 +00002335 return binary32Ix2(mce, vatom1, vatom2);
2336
sewardj57f92b02010-08-22 11:54:14 +00002337 case Iop_QSub64Ux1:
2338 case Iop_QSub64Sx1:
2339 case Iop_QAdd64Ux1:
2340 case Iop_QAdd64Sx1:
2341 case Iop_QSal64x1:
2342 case Iop_QShl64x1:
2343 case Iop_Sal64x1:
2344 return binary64Ix1(mce, vatom1, vatom2);
2345
2346 case Iop_QShlN8Sx8:
2347 case Iop_QShlN8x8:
2348 case Iop_QSalN8x8:
2349 complainIfUndefined(mce, atom2);
2350 return mkPCast8x8(mce, vatom1);
2351
2352 case Iop_QShlN16Sx4:
2353 case Iop_QShlN16x4:
2354 case Iop_QSalN16x4:
2355 complainIfUndefined(mce, atom2);
2356 return mkPCast16x4(mce, vatom1);
2357
2358 case Iop_QShlN32Sx2:
2359 case Iop_QShlN32x2:
2360 case Iop_QSalN32x2:
2361 complainIfUndefined(mce, atom2);
2362 return mkPCast32x2(mce, vatom1);
2363
2364 case Iop_QShlN64Sx1:
2365 case Iop_QShlN64x1:
2366 case Iop_QSalN64x1:
2367 complainIfUndefined(mce, atom2);
2368 return mkPCast32x2(mce, vatom1);
2369
2370 case Iop_PwMax32Sx2:
2371 case Iop_PwMax32Ux2:
2372 case Iop_PwMin32Sx2:
2373 case Iop_PwMin32Ux2:
2374 case Iop_PwMax32Fx2:
2375 case Iop_PwMin32Fx2:
2376 return assignNew('V', mce, Ity_I64, binop(Iop_PwMax32Ux2, mkPCast32x2(mce, vatom1),
2377 mkPCast32x2(mce, vatom2)));
2378
2379 case Iop_PwMax16Sx4:
2380 case Iop_PwMax16Ux4:
2381 case Iop_PwMin16Sx4:
2382 case Iop_PwMin16Ux4:
2383 return assignNew('V', mce, Ity_I64, binop(Iop_PwMax16Ux4, mkPCast16x4(mce, vatom1),
2384 mkPCast16x4(mce, vatom2)));
2385
2386 case Iop_PwMax8Sx8:
2387 case Iop_PwMax8Ux8:
2388 case Iop_PwMin8Sx8:
2389 case Iop_PwMin8Ux8:
2390 return assignNew('V', mce, Ity_I64, binop(Iop_PwMax8Ux8, mkPCast8x8(mce, vatom1),
2391 mkPCast8x8(mce, vatom2)));
2392
2393 case Iop_PwAdd32x2:
2394 case Iop_PwAdd32Fx2:
2395 return mkPCast32x2(mce,
2396 assignNew('V', mce, Ity_I64, binop(Iop_PwAdd32x2, mkPCast32x2(mce, vatom1),
2397 mkPCast32x2(mce, vatom2))));
2398
2399 case Iop_PwAdd16x4:
2400 return mkPCast16x4(mce,
2401 assignNew('V', mce, Ity_I64, binop(op, mkPCast16x4(mce, vatom1),
2402 mkPCast16x4(mce, vatom2))));
2403
2404 case Iop_PwAdd8x8:
2405 return mkPCast8x8(mce,
2406 assignNew('V', mce, Ity_I64, binop(op, mkPCast8x8(mce, vatom1),
2407 mkPCast8x8(mce, vatom2))));
2408
2409 case Iop_Shl8x8:
2410 case Iop_Shr8x8:
2411 case Iop_Sar8x8:
2412 case Iop_Sal8x8:
2413 return mkUifU64(mce,
2414 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2415 mkPCast8x8(mce,vatom2)
2416 );
2417
2418 case Iop_Shl16x4:
2419 case Iop_Shr16x4:
2420 case Iop_Sar16x4:
2421 case Iop_Sal16x4:
2422 return mkUifU64(mce,
2423 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2424 mkPCast16x4(mce,vatom2)
2425 );
2426
2427 case Iop_Shl32x2:
2428 case Iop_Shr32x2:
2429 case Iop_Sar32x2:
2430 case Iop_Sal32x2:
2431 return mkUifU64(mce,
2432 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2433 mkPCast32x2(mce,vatom2)
2434 );
2435
sewardjacd2e912005-01-13 19:17:06 +00002436 /* 64-bit data-steering */
2437 case Iop_InterleaveLO32x2:
2438 case Iop_InterleaveLO16x4:
2439 case Iop_InterleaveLO8x8:
2440 case Iop_InterleaveHI32x2:
2441 case Iop_InterleaveHI16x4:
2442 case Iop_InterleaveHI8x8:
sewardj57f92b02010-08-22 11:54:14 +00002443 case Iop_CatOddLanes8x8:
2444 case Iop_CatEvenLanes8x8:
sewardj114a9172008-02-09 01:49:32 +00002445 case Iop_CatOddLanes16x4:
2446 case Iop_CatEvenLanes16x4:
sewardj57f92b02010-08-22 11:54:14 +00002447 case Iop_InterleaveOddLanes8x8:
2448 case Iop_InterleaveEvenLanes8x8:
2449 case Iop_InterleaveOddLanes16x4:
2450 case Iop_InterleaveEvenLanes16x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00002451 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardjacd2e912005-01-13 19:17:06 +00002452
sewardj57f92b02010-08-22 11:54:14 +00002453 case Iop_GetElem8x8:
2454 complainIfUndefined(mce, atom2);
2455 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2456 case Iop_GetElem16x4:
2457 complainIfUndefined(mce, atom2);
2458 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2459 case Iop_GetElem32x2:
2460 complainIfUndefined(mce, atom2);
2461 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2462
sewardj114a9172008-02-09 01:49:32 +00002463 /* Perm8x8: rearrange values in left arg using steering values
2464 from right arg. So rearrange the vbits in the same way but
2465 pessimise wrt steering values. */
2466 case Iop_Perm8x8:
2467 return mkUifU64(
2468 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002469 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
sewardj114a9172008-02-09 01:49:32 +00002470 mkPCast8x8(mce, vatom2)
2471 );
2472
sewardj20d38f22005-02-07 23:50:18 +00002473 /* V128-bit SIMD */
sewardj0b070592004-12-10 21:44:22 +00002474
sewardj57f92b02010-08-22 11:54:14 +00002475 case Iop_ShrN8x16:
sewardja1d93302004-12-12 16:45:06 +00002476 case Iop_ShrN16x8:
2477 case Iop_ShrN32x4:
2478 case Iop_ShrN64x2:
sewardj57f92b02010-08-22 11:54:14 +00002479 case Iop_SarN8x16:
sewardja1d93302004-12-12 16:45:06 +00002480 case Iop_SarN16x8:
2481 case Iop_SarN32x4:
sewardj57f92b02010-08-22 11:54:14 +00002482 case Iop_SarN64x2:
2483 case Iop_ShlN8x16:
sewardja1d93302004-12-12 16:45:06 +00002484 case Iop_ShlN16x8:
2485 case Iop_ShlN32x4:
2486 case Iop_ShlN64x2:
sewardj620eb5b2005-10-22 12:50:43 +00002487 /* Same scheme as with all other shifts. Note: 22 Oct 05:
2488 this is wrong now, scalar shifts are done properly lazily.
2489 Vector shifts should be fixed too. */
sewardja1d93302004-12-12 16:45:06 +00002490 complainIfUndefined(mce, atom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002491 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardja1d93302004-12-12 16:45:06 +00002492
sewardjcbf8be72005-11-10 18:34:41 +00002493 /* V x V shifts/rotates are done using the standard lazy scheme. */
sewardj43d60752005-11-10 18:13:01 +00002494 case Iop_Shl8x16:
2495 case Iop_Shr8x16:
2496 case Iop_Sar8x16:
sewardj57f92b02010-08-22 11:54:14 +00002497 case Iop_Sal8x16:
sewardjcbf8be72005-11-10 18:34:41 +00002498 case Iop_Rol8x16:
sewardj43d60752005-11-10 18:13:01 +00002499 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002500 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002501 mkPCast8x16(mce,vatom2)
2502 );
2503
2504 case Iop_Shl16x8:
2505 case Iop_Shr16x8:
2506 case Iop_Sar16x8:
sewardj57f92b02010-08-22 11:54:14 +00002507 case Iop_Sal16x8:
sewardjcbf8be72005-11-10 18:34:41 +00002508 case Iop_Rol16x8:
sewardj43d60752005-11-10 18:13:01 +00002509 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002510 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002511 mkPCast16x8(mce,vatom2)
2512 );
2513
2514 case Iop_Shl32x4:
2515 case Iop_Shr32x4:
2516 case Iop_Sar32x4:
sewardj57f92b02010-08-22 11:54:14 +00002517 case Iop_Sal32x4:
sewardjcbf8be72005-11-10 18:34:41 +00002518 case Iop_Rol32x4:
sewardj43d60752005-11-10 18:13:01 +00002519 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002520 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002521 mkPCast32x4(mce,vatom2)
2522 );
2523
sewardj57f92b02010-08-22 11:54:14 +00002524 case Iop_Shl64x2:
2525 case Iop_Shr64x2:
2526 case Iop_Sar64x2:
2527 case Iop_Sal64x2:
2528 return mkUifUV128(mce,
2529 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2530 mkPCast64x2(mce,vatom2)
2531 );
2532
2533 case Iop_F32ToFixed32Ux4_RZ:
2534 case Iop_F32ToFixed32Sx4_RZ:
2535 case Iop_Fixed32UToF32x4_RN:
2536 case Iop_Fixed32SToF32x4_RN:
2537 complainIfUndefined(mce, atom2);
2538 return mkPCast32x4(mce, vatom1);
2539
2540 case Iop_F32ToFixed32Ux2_RZ:
2541 case Iop_F32ToFixed32Sx2_RZ:
2542 case Iop_Fixed32UToF32x2_RN:
2543 case Iop_Fixed32SToF32x2_RN:
2544 complainIfUndefined(mce, atom2);
2545 return mkPCast32x2(mce, vatom1);
2546
sewardja1d93302004-12-12 16:45:06 +00002547 case Iop_QSub8Ux16:
2548 case Iop_QSub8Sx16:
2549 case Iop_Sub8x16:
2550 case Iop_Min8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002551 case Iop_Min8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002552 case Iop_Max8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002553 case Iop_Max8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002554 case Iop_CmpGT8Sx16:
sewardj43d60752005-11-10 18:13:01 +00002555 case Iop_CmpGT8Ux16:
sewardja1d93302004-12-12 16:45:06 +00002556 case Iop_CmpEQ8x16:
2557 case Iop_Avg8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002558 case Iop_Avg8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002559 case Iop_QAdd8Ux16:
2560 case Iop_QAdd8Sx16:
sewardj57f92b02010-08-22 11:54:14 +00002561 case Iop_QSal8x16:
2562 case Iop_QShl8x16:
sewardja1d93302004-12-12 16:45:06 +00002563 case Iop_Add8x16:
sewardj57f92b02010-08-22 11:54:14 +00002564 case Iop_Mul8x16:
2565 case Iop_PolynomialMul8x16:
sewardja1d93302004-12-12 16:45:06 +00002566 return binary8Ix16(mce, vatom1, vatom2);
2567
2568 case Iop_QSub16Ux8:
2569 case Iop_QSub16Sx8:
2570 case Iop_Sub16x8:
2571 case Iop_Mul16x8:
2572 case Iop_MulHi16Sx8:
2573 case Iop_MulHi16Ux8:
2574 case Iop_Min16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002575 case Iop_Min16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002576 case Iop_Max16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002577 case Iop_Max16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002578 case Iop_CmpGT16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002579 case Iop_CmpGT16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002580 case Iop_CmpEQ16x8:
2581 case Iop_Avg16Ux8:
sewardj43d60752005-11-10 18:13:01 +00002582 case Iop_Avg16Sx8:
sewardja1d93302004-12-12 16:45:06 +00002583 case Iop_QAdd16Ux8:
2584 case Iop_QAdd16Sx8:
sewardj57f92b02010-08-22 11:54:14 +00002585 case Iop_QSal16x8:
2586 case Iop_QShl16x8:
sewardja1d93302004-12-12 16:45:06 +00002587 case Iop_Add16x8:
sewardj57f92b02010-08-22 11:54:14 +00002588 case Iop_QDMulHi16Sx8:
2589 case Iop_QRDMulHi16Sx8:
sewardja1d93302004-12-12 16:45:06 +00002590 return binary16Ix8(mce, vatom1, vatom2);
2591
2592 case Iop_Sub32x4:
2593 case Iop_CmpGT32Sx4:
sewardj43d60752005-11-10 18:13:01 +00002594 case Iop_CmpGT32Ux4:
sewardja1d93302004-12-12 16:45:06 +00002595 case Iop_CmpEQ32x4:
sewardj43d60752005-11-10 18:13:01 +00002596 case Iop_QAdd32Sx4:
2597 case Iop_QAdd32Ux4:
2598 case Iop_QSub32Sx4:
2599 case Iop_QSub32Ux4:
sewardj57f92b02010-08-22 11:54:14 +00002600 case Iop_QSal32x4:
2601 case Iop_QShl32x4:
sewardj43d60752005-11-10 18:13:01 +00002602 case Iop_Avg32Ux4:
2603 case Iop_Avg32Sx4:
sewardja1d93302004-12-12 16:45:06 +00002604 case Iop_Add32x4:
sewardj43d60752005-11-10 18:13:01 +00002605 case Iop_Max32Ux4:
2606 case Iop_Max32Sx4:
2607 case Iop_Min32Ux4:
2608 case Iop_Min32Sx4:
sewardjb823b852010-06-18 08:18:38 +00002609 case Iop_Mul32x4:
sewardj57f92b02010-08-22 11:54:14 +00002610 case Iop_QDMulHi32Sx4:
2611 case Iop_QRDMulHi32Sx4:
sewardja1d93302004-12-12 16:45:06 +00002612 return binary32Ix4(mce, vatom1, vatom2);
2613
2614 case Iop_Sub64x2:
2615 case Iop_Add64x2:
sewardjb823b852010-06-18 08:18:38 +00002616 case Iop_CmpGT64Sx2:
sewardj57f92b02010-08-22 11:54:14 +00002617 case Iop_QSal64x2:
2618 case Iop_QShl64x2:
2619 case Iop_QAdd64Ux2:
2620 case Iop_QAdd64Sx2:
2621 case Iop_QSub64Ux2:
2622 case Iop_QSub64Sx2:
sewardja1d93302004-12-12 16:45:06 +00002623 return binary64Ix2(mce, vatom1, vatom2);
2624
2625 case Iop_QNarrow32Sx4:
sewardj43d60752005-11-10 18:13:01 +00002626 case Iop_QNarrow32Ux4:
sewardja1d93302004-12-12 16:45:06 +00002627 case Iop_QNarrow16Sx8:
2628 case Iop_QNarrow16Ux8:
sewardj20d38f22005-02-07 23:50:18 +00002629 return vectorNarrowV128(mce, op, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002630
sewardj0b070592004-12-10 21:44:22 +00002631 case Iop_Sub64Fx2:
2632 case Iop_Mul64Fx2:
2633 case Iop_Min64Fx2:
2634 case Iop_Max64Fx2:
2635 case Iop_Div64Fx2:
2636 case Iop_CmpLT64Fx2:
2637 case Iop_CmpLE64Fx2:
2638 case Iop_CmpEQ64Fx2:
sewardj545663e2005-11-05 01:55:04 +00002639 case Iop_CmpUN64Fx2:
sewardj0b070592004-12-10 21:44:22 +00002640 case Iop_Add64Fx2:
2641 return binary64Fx2(mce, vatom1, vatom2);
2642
2643 case Iop_Sub64F0x2:
2644 case Iop_Mul64F0x2:
2645 case Iop_Min64F0x2:
2646 case Iop_Max64F0x2:
2647 case Iop_Div64F0x2:
2648 case Iop_CmpLT64F0x2:
2649 case Iop_CmpLE64F0x2:
2650 case Iop_CmpEQ64F0x2:
sewardj545663e2005-11-05 01:55:04 +00002651 case Iop_CmpUN64F0x2:
sewardj0b070592004-12-10 21:44:22 +00002652 case Iop_Add64F0x2:
2653 return binary64F0x2(mce, vatom1, vatom2);
2654
sewardj170ee212004-12-10 18:57:51 +00002655 case Iop_Sub32Fx4:
2656 case Iop_Mul32Fx4:
2657 case Iop_Min32Fx4:
2658 case Iop_Max32Fx4:
2659 case Iop_Div32Fx4:
2660 case Iop_CmpLT32Fx4:
2661 case Iop_CmpLE32Fx4:
2662 case Iop_CmpEQ32Fx4:
sewardj545663e2005-11-05 01:55:04 +00002663 case Iop_CmpUN32Fx4:
cerione78ba2a2005-11-14 03:00:35 +00002664 case Iop_CmpGT32Fx4:
2665 case Iop_CmpGE32Fx4:
sewardj3245c912004-12-10 14:58:26 +00002666 case Iop_Add32Fx4:
sewardj57f92b02010-08-22 11:54:14 +00002667 case Iop_Recps32Fx4:
2668 case Iop_Rsqrts32Fx4:
sewardj3245c912004-12-10 14:58:26 +00002669 return binary32Fx4(mce, vatom1, vatom2);
2670
sewardj57f92b02010-08-22 11:54:14 +00002671 case Iop_Sub32Fx2:
2672 case Iop_Mul32Fx2:
2673 case Iop_Min32Fx2:
2674 case Iop_Max32Fx2:
2675 case Iop_CmpEQ32Fx2:
2676 case Iop_CmpGT32Fx2:
2677 case Iop_CmpGE32Fx2:
2678 case Iop_Add32Fx2:
2679 case Iop_Recps32Fx2:
2680 case Iop_Rsqrts32Fx2:
2681 return binary32Fx2(mce, vatom1, vatom2);
2682
sewardj170ee212004-12-10 18:57:51 +00002683 case Iop_Sub32F0x4:
2684 case Iop_Mul32F0x4:
2685 case Iop_Min32F0x4:
2686 case Iop_Max32F0x4:
2687 case Iop_Div32F0x4:
2688 case Iop_CmpLT32F0x4:
2689 case Iop_CmpLE32F0x4:
2690 case Iop_CmpEQ32F0x4:
sewardj545663e2005-11-05 01:55:04 +00002691 case Iop_CmpUN32F0x4:
sewardj170ee212004-12-10 18:57:51 +00002692 case Iop_Add32F0x4:
2693 return binary32F0x4(mce, vatom1, vatom2);
2694
sewardj57f92b02010-08-22 11:54:14 +00002695 case Iop_QShlN8Sx16:
2696 case Iop_QShlN8x16:
2697 case Iop_QSalN8x16:
2698 complainIfUndefined(mce, atom2);
2699 return mkPCast8x16(mce, vatom1);
2700
2701 case Iop_QShlN16Sx8:
2702 case Iop_QShlN16x8:
2703 case Iop_QSalN16x8:
2704 complainIfUndefined(mce, atom2);
2705 return mkPCast16x8(mce, vatom1);
2706
2707 case Iop_QShlN32Sx4:
2708 case Iop_QShlN32x4:
2709 case Iop_QSalN32x4:
2710 complainIfUndefined(mce, atom2);
2711 return mkPCast32x4(mce, vatom1);
2712
2713 case Iop_QShlN64Sx2:
2714 case Iop_QShlN64x2:
2715 case Iop_QSalN64x2:
2716 complainIfUndefined(mce, atom2);
2717 return mkPCast32x4(mce, vatom1);
2718
2719 case Iop_Mull32Sx2:
2720 case Iop_Mull32Ux2:
2721 case Iop_QDMulLong32Sx2:
2722 return vectorLongenI64(mce, Iop_Longen32Sx2,
2723 mkUifU64(mce, vatom1, vatom2));
2724
2725 case Iop_Mull16Sx4:
2726 case Iop_Mull16Ux4:
2727 case Iop_QDMulLong16Sx4:
2728 return vectorLongenI64(mce, Iop_Longen16Sx4,
2729 mkUifU64(mce, vatom1, vatom2));
2730
2731 case Iop_Mull8Sx8:
2732 case Iop_Mull8Ux8:
2733 case Iop_PolynomialMull8x8:
2734 return vectorLongenI64(mce, Iop_Longen8Sx8,
2735 mkUifU64(mce, vatom1, vatom2));
2736
2737 case Iop_PwAdd32x4:
2738 return mkPCast32x4(mce,
2739 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
2740 mkPCast32x4(mce, vatom2))));
2741
2742 case Iop_PwAdd16x8:
2743 return mkPCast16x8(mce,
2744 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
2745 mkPCast16x8(mce, vatom2))));
2746
2747 case Iop_PwAdd8x16:
2748 return mkPCast8x16(mce,
2749 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
2750 mkPCast8x16(mce, vatom2))));
2751
sewardj20d38f22005-02-07 23:50:18 +00002752 /* V128-bit data-steering */
2753 case Iop_SetV128lo32:
2754 case Iop_SetV128lo64:
2755 case Iop_64HLtoV128:
sewardja1d93302004-12-12 16:45:06 +00002756 case Iop_InterleaveLO64x2:
2757 case Iop_InterleaveLO32x4:
2758 case Iop_InterleaveLO16x8:
2759 case Iop_InterleaveLO8x16:
2760 case Iop_InterleaveHI64x2:
2761 case Iop_InterleaveHI32x4:
2762 case Iop_InterleaveHI16x8:
2763 case Iop_InterleaveHI8x16:
sewardj57f92b02010-08-22 11:54:14 +00002764 case Iop_CatOddLanes8x16:
2765 case Iop_CatOddLanes16x8:
2766 case Iop_CatOddLanes32x4:
2767 case Iop_CatEvenLanes8x16:
2768 case Iop_CatEvenLanes16x8:
2769 case Iop_CatEvenLanes32x4:
2770 case Iop_InterleaveOddLanes8x16:
2771 case Iop_InterleaveOddLanes16x8:
2772 case Iop_InterleaveOddLanes32x4:
2773 case Iop_InterleaveEvenLanes8x16:
2774 case Iop_InterleaveEvenLanes16x8:
2775 case Iop_InterleaveEvenLanes32x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00002776 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00002777
2778 case Iop_GetElem8x16:
2779 complainIfUndefined(mce, atom2);
2780 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2781 case Iop_GetElem16x8:
2782 complainIfUndefined(mce, atom2);
2783 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2784 case Iop_GetElem32x4:
2785 complainIfUndefined(mce, atom2);
2786 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2787 case Iop_GetElem64x2:
2788 complainIfUndefined(mce, atom2);
2789 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
2790
sewardj620eb5b2005-10-22 12:50:43 +00002791 /* Perm8x16: rearrange values in left arg using steering values
2792 from right arg. So rearrange the vbits in the same way but
2793 pessimise wrt steering values. */
2794 case Iop_Perm8x16:
2795 return mkUifUV128(
2796 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002797 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj620eb5b2005-10-22 12:50:43 +00002798 mkPCast8x16(mce, vatom2)
2799 );
sewardj170ee212004-12-10 18:57:51 +00002800
sewardj43d60752005-11-10 18:13:01 +00002801 /* These two take the lower half of each 16-bit lane, sign/zero
2802 extend it to 32, and multiply together, producing a 32x4
2803 result (and implicitly ignoring half the operand bits). So
2804 treat it as a bunch of independent 16x8 operations, but then
2805 do 32-bit shifts left-right to copy the lower half results
2806 (which are all 0s or all 1s due to PCasting in binary16Ix8)
2807 into the upper half of each result lane. */
2808 case Iop_MullEven16Ux8:
2809 case Iop_MullEven16Sx8: {
2810 IRAtom* at;
2811 at = binary16Ix8(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002812 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
2813 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
sewardj43d60752005-11-10 18:13:01 +00002814 return at;
2815 }
2816
2817 /* Same deal as Iop_MullEven16{S,U}x8 */
2818 case Iop_MullEven8Ux16:
2819 case Iop_MullEven8Sx16: {
2820 IRAtom* at;
2821 at = binary8Ix16(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002822 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
2823 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
sewardj43d60752005-11-10 18:13:01 +00002824 return at;
2825 }
2826
2827 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
2828 32x4 -> 16x8 laneage, discarding the upper half of each lane.
2829 Simply apply same op to the V bits, since this really no more
2830 than a data steering operation. */
sewardjcbf8be72005-11-10 18:34:41 +00002831 case Iop_Narrow32x4:
2832 case Iop_Narrow16x8:
sewardj7cf4e6b2008-05-01 20:24:26 +00002833 return assignNew('V', mce, Ity_V128,
2834 binop(op, vatom1, vatom2));
sewardj43d60752005-11-10 18:13:01 +00002835
2836 case Iop_ShrV128:
2837 case Iop_ShlV128:
2838 /* Same scheme as with all other shifts. Note: 10 Nov 05:
2839 this is wrong now, scalar shifts are done properly lazily.
2840 Vector shifts should be fixed too. */
2841 complainIfUndefined(mce, atom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002842 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardj43d60752005-11-10 18:13:01 +00002843
sewardj69a13322005-04-23 01:14:51 +00002844 /* I128-bit data-steering */
2845 case Iop_64HLto128:
sewardj7cf4e6b2008-05-01 20:24:26 +00002846 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
sewardj69a13322005-04-23 01:14:51 +00002847
sewardj3245c912004-12-10 14:58:26 +00002848 /* Scalar floating point */
2849
sewardjed69fdb2006-02-03 16:12:27 +00002850 case Iop_RoundF64toInt:
2851 case Iop_RoundF64toF32:
sewardj06f96d02009-12-31 19:24:12 +00002852 case Iop_F64toI64S:
2853 case Iop_I64StoF64:
sewardj22ac5f42006-02-03 22:55:04 +00002854 case Iop_SinF64:
2855 case Iop_CosF64:
2856 case Iop_TanF64:
2857 case Iop_2xm1F64:
2858 case Iop_SqrtF64:
2859 /* I32(rm) x I64/F64 -> I64/F64 */
sewardj95448072004-11-22 20:19:51 +00002860 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2861
sewardjd376a762010-06-27 09:08:54 +00002862 case Iop_RoundF32toInt:
sewardjaec1be32010-01-03 22:29:32 +00002863 case Iop_SqrtF32:
2864 /* I32(rm) x I32/F32 -> I32/F32 */
2865 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2866
sewardj59570ff2010-01-01 11:59:33 +00002867 case Iop_F64toI32U:
sewardj06f96d02009-12-31 19:24:12 +00002868 case Iop_F64toI32S:
sewardje9e16d32004-12-10 13:17:55 +00002869 case Iop_F64toF32:
sewardj95448072004-11-22 20:19:51 +00002870 /* First arg is I32 (rounding mode), second is F64 (data). */
2871 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2872
sewardj06f96d02009-12-31 19:24:12 +00002873 case Iop_F64toI16S:
sewardj95448072004-11-22 20:19:51 +00002874 /* First arg is I32 (rounding mode), second is F64 (data). */
2875 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
2876
sewardj95448072004-11-22 20:19:51 +00002877 case Iop_CmpF64:
2878 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2879
2880 /* non-FP after here */
2881
2882 case Iop_DivModU64to32:
2883 case Iop_DivModS64to32:
2884 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2885
sewardj69a13322005-04-23 01:14:51 +00002886 case Iop_DivModU128to64:
2887 case Iop_DivModS128to64:
2888 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
2889
sewardj95448072004-11-22 20:19:51 +00002890 case Iop_16HLto32:
sewardj7cf4e6b2008-05-01 20:24:26 +00002891 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00002892 case Iop_32HLto64:
sewardj7cf4e6b2008-05-01 20:24:26 +00002893 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00002894
sewardj6cf40ff2005-04-20 22:31:26 +00002895 case Iop_MullS64:
2896 case Iop_MullU64: {
2897 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
2898 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
sewardj7cf4e6b2008-05-01 20:24:26 +00002899 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, vHi64, vLo64));
sewardj6cf40ff2005-04-20 22:31:26 +00002900 }
2901
sewardj95448072004-11-22 20:19:51 +00002902 case Iop_MullS32:
2903 case Iop_MullU32: {
2904 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
2905 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
sewardj7cf4e6b2008-05-01 20:24:26 +00002906 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
sewardj95448072004-11-22 20:19:51 +00002907 }
2908
2909 case Iop_MullS16:
2910 case Iop_MullU16: {
2911 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
2912 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
sewardj7cf4e6b2008-05-01 20:24:26 +00002913 return assignNew('V', mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
sewardj95448072004-11-22 20:19:51 +00002914 }
2915
2916 case Iop_MullS8:
2917 case Iop_MullU8: {
2918 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
2919 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
sewardj7cf4e6b2008-05-01 20:24:26 +00002920 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
sewardj95448072004-11-22 20:19:51 +00002921 }
2922
sewardj5af05062010-10-18 16:31:14 +00002923 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */
cerion9e591082005-06-23 15:28:34 +00002924 case Iop_DivS32:
2925 case Iop_DivU32:
2926 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2927
sewardjb00944a2005-12-23 12:47:16 +00002928 case Iop_DivS64:
2929 case Iop_DivU64:
2930 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2931
sewardj95448072004-11-22 20:19:51 +00002932 case Iop_Add32:
sewardjd5204dc2004-12-31 01:16:11 +00002933 if (mce->bogusLiterals)
2934 return expensiveAddSub(mce,True,Ity_I32,
2935 vatom1,vatom2, atom1,atom2);
2936 else
2937 goto cheap_AddSub32;
sewardj95448072004-11-22 20:19:51 +00002938 case Iop_Sub32:
sewardjd5204dc2004-12-31 01:16:11 +00002939 if (mce->bogusLiterals)
2940 return expensiveAddSub(mce,False,Ity_I32,
2941 vatom1,vatom2, atom1,atom2);
2942 else
2943 goto cheap_AddSub32;
2944
2945 cheap_AddSub32:
sewardj95448072004-11-22 20:19:51 +00002946 case Iop_Mul32:
sewardj992dff92005-10-07 11:08:55 +00002947 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
2948
sewardj463b3d92005-07-18 11:41:15 +00002949 case Iop_CmpORD32S:
2950 case Iop_CmpORD32U:
sewardj1bc82102005-12-23 00:16:24 +00002951 case Iop_CmpORD64S:
2952 case Iop_CmpORD64U:
2953 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
sewardj95448072004-11-22 20:19:51 +00002954
sewardj681be302005-01-15 20:43:58 +00002955 case Iop_Add64:
tomd9774d72005-06-27 08:11:01 +00002956 if (mce->bogusLiterals)
2957 return expensiveAddSub(mce,True,Ity_I64,
2958 vatom1,vatom2, atom1,atom2);
2959 else
2960 goto cheap_AddSub64;
sewardj681be302005-01-15 20:43:58 +00002961 case Iop_Sub64:
tomd9774d72005-06-27 08:11:01 +00002962 if (mce->bogusLiterals)
2963 return expensiveAddSub(mce,False,Ity_I64,
2964 vatom1,vatom2, atom1,atom2);
2965 else
2966 goto cheap_AddSub64;
2967
2968 cheap_AddSub64:
2969 case Iop_Mul64:
sewardj681be302005-01-15 20:43:58 +00002970 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
2971
sewardj95448072004-11-22 20:19:51 +00002972 case Iop_Mul16:
2973 case Iop_Add16:
2974 case Iop_Sub16:
2975 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
2976
2977 case Iop_Sub8:
2978 case Iop_Add8:
2979 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
2980
sewardj69a13322005-04-23 01:14:51 +00002981 case Iop_CmpEQ64:
sewardje6f8af42005-07-06 18:48:59 +00002982 case Iop_CmpNE64:
sewardj69a13322005-04-23 01:14:51 +00002983 if (mce->bogusLiterals)
2984 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
2985 else
2986 goto cheap_cmp64;
2987 cheap_cmp64:
tomcd986332005-04-26 07:44:48 +00002988 case Iop_CmpLE64S: case Iop_CmpLE64U:
2989 case Iop_CmpLT64U: case Iop_CmpLT64S:
sewardj69a13322005-04-23 01:14:51 +00002990 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
2991
sewardjd5204dc2004-12-31 01:16:11 +00002992 case Iop_CmpEQ32:
sewardje6f8af42005-07-06 18:48:59 +00002993 case Iop_CmpNE32:
sewardjd5204dc2004-12-31 01:16:11 +00002994 if (mce->bogusLiterals)
2995 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
2996 else
2997 goto cheap_cmp32;
sewardjd5204dc2004-12-31 01:16:11 +00002998 cheap_cmp32:
sewardj95448072004-11-22 20:19:51 +00002999 case Iop_CmpLE32S: case Iop_CmpLE32U:
3000 case Iop_CmpLT32U: case Iop_CmpLT32S:
sewardj95448072004-11-22 20:19:51 +00003001 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
3002
3003 case Iop_CmpEQ16: case Iop_CmpNE16:
3004 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
3005
3006 case Iop_CmpEQ8: case Iop_CmpNE8:
3007 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
3008
sewardjafed4c52009-07-12 13:00:17 +00003009 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
3010 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
3011 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
3012 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
3013 /* Just say these all produce a defined result, regardless
3014 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
3015 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
3016
sewardjaaddbc22005-10-07 09:49:53 +00003017 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
3018 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
3019
sewardj95448072004-11-22 20:19:51 +00003020 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
sewardjaaddbc22005-10-07 09:49:53 +00003021 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003022
sewardjdb67f5f2004-12-14 01:15:31 +00003023 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
sewardjaaddbc22005-10-07 09:49:53 +00003024 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003025
3026 case Iop_Shl8: case Iop_Shr8:
sewardjaaddbc22005-10-07 09:49:53 +00003027 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003028
sewardj20d38f22005-02-07 23:50:18 +00003029 case Iop_AndV128:
3030 uifu = mkUifUV128; difd = mkDifDV128;
3031 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00003032 case Iop_And64:
3033 uifu = mkUifU64; difd = mkDifD64;
3034 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00003035 case Iop_And32:
3036 uifu = mkUifU32; difd = mkDifD32;
3037 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
3038 case Iop_And16:
3039 uifu = mkUifU16; difd = mkDifD16;
3040 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
3041 case Iop_And8:
3042 uifu = mkUifU8; difd = mkDifD8;
3043 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
3044
sewardj20d38f22005-02-07 23:50:18 +00003045 case Iop_OrV128:
3046 uifu = mkUifUV128; difd = mkDifDV128;
3047 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00003048 case Iop_Or64:
3049 uifu = mkUifU64; difd = mkDifD64;
3050 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00003051 case Iop_Or32:
3052 uifu = mkUifU32; difd = mkDifD32;
3053 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
3054 case Iop_Or16:
3055 uifu = mkUifU16; difd = mkDifD16;
3056 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
3057 case Iop_Or8:
3058 uifu = mkUifU8; difd = mkDifD8;
3059 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
3060
3061 do_And_Or:
3062 return
3063 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00003064 'V', mce,
sewardj95448072004-11-22 20:19:51 +00003065 and_or_ty,
3066 difd(mce, uifu(mce, vatom1, vatom2),
3067 difd(mce, improve(mce, atom1, vatom1),
3068 improve(mce, atom2, vatom2) ) ) );
3069
3070 case Iop_Xor8:
3071 return mkUifU8(mce, vatom1, vatom2);
3072 case Iop_Xor16:
3073 return mkUifU16(mce, vatom1, vatom2);
3074 case Iop_Xor32:
3075 return mkUifU32(mce, vatom1, vatom2);
sewardj7010f6e2004-12-10 13:35:22 +00003076 case Iop_Xor64:
3077 return mkUifU64(mce, vatom1, vatom2);
sewardj20d38f22005-02-07 23:50:18 +00003078 case Iop_XorV128:
3079 return mkUifUV128(mce, vatom1, vatom2);
njn25e49d8e72002-09-23 09:36:25 +00003080
3081 default:
sewardj95448072004-11-22 20:19:51 +00003082 ppIROp(op);
3083 VG_(tool_panic)("memcheck:expr2vbits_Binop");
njn25e49d8e72002-09-23 09:36:25 +00003084 }
njn25e49d8e72002-09-23 09:36:25 +00003085}
3086
njn25e49d8e72002-09-23 09:36:25 +00003087
sewardj95448072004-11-22 20:19:51 +00003088static
3089IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
3090{
3091 IRAtom* vatom = expr2vbits( mce, atom );
3092 tl_assert(isOriginalAtom(mce,atom));
3093 switch (op) {
3094
sewardj0b070592004-12-10 21:44:22 +00003095 case Iop_Sqrt64Fx2:
3096 return unary64Fx2(mce, vatom);
3097
3098 case Iop_Sqrt64F0x2:
3099 return unary64F0x2(mce, vatom);
3100
sewardj170ee212004-12-10 18:57:51 +00003101 case Iop_Sqrt32Fx4:
3102 case Iop_RSqrt32Fx4:
3103 case Iop_Recip32Fx4:
cerion176cb4c2005-11-16 17:21:49 +00003104 case Iop_I32UtoFx4:
3105 case Iop_I32StoFx4:
3106 case Iop_QFtoI32Ux4_RZ:
3107 case Iop_QFtoI32Sx4_RZ:
3108 case Iop_RoundF32x4_RM:
3109 case Iop_RoundF32x4_RP:
3110 case Iop_RoundF32x4_RN:
3111 case Iop_RoundF32x4_RZ:
sewardj57f92b02010-08-22 11:54:14 +00003112 case Iop_Recip32x4:
3113 case Iop_Abs32Fx4:
3114 case Iop_Neg32Fx4:
3115 case Iop_Rsqrte32Fx4:
sewardj170ee212004-12-10 18:57:51 +00003116 return unary32Fx4(mce, vatom);
3117
sewardj57f92b02010-08-22 11:54:14 +00003118 case Iop_I32UtoFx2:
3119 case Iop_I32StoFx2:
3120 case Iop_Recip32Fx2:
3121 case Iop_Recip32x2:
3122 case Iop_Abs32Fx2:
3123 case Iop_Neg32Fx2:
3124 case Iop_Rsqrte32Fx2:
3125 return unary32Fx2(mce, vatom);
3126
sewardj170ee212004-12-10 18:57:51 +00003127 case Iop_Sqrt32F0x4:
3128 case Iop_RSqrt32F0x4:
3129 case Iop_Recip32F0x4:
3130 return unary32F0x4(mce, vatom);
3131
sewardj20d38f22005-02-07 23:50:18 +00003132 case Iop_32UtoV128:
3133 case Iop_64UtoV128:
sewardj620eb5b2005-10-22 12:50:43 +00003134 case Iop_Dup8x16:
3135 case Iop_Dup16x8:
3136 case Iop_Dup32x4:
sewardj57f92b02010-08-22 11:54:14 +00003137 case Iop_Reverse16_8x16:
3138 case Iop_Reverse32_8x16:
3139 case Iop_Reverse32_16x8:
3140 case Iop_Reverse64_8x16:
3141 case Iop_Reverse64_16x8:
3142 case Iop_Reverse64_32x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00003143 return assignNew('V', mce, Ity_V128, unop(op, vatom));
sewardj170ee212004-12-10 18:57:51 +00003144
sewardj95448072004-11-22 20:19:51 +00003145 case Iop_F32toF64:
sewardj06f96d02009-12-31 19:24:12 +00003146 case Iop_I32StoF64:
sewardj59570ff2010-01-01 11:59:33 +00003147 case Iop_I32UtoF64:
sewardj95448072004-11-22 20:19:51 +00003148 case Iop_NegF64:
sewardj95448072004-11-22 20:19:51 +00003149 case Iop_AbsF64:
sewardj4f3e0f22006-01-27 15:11:31 +00003150 case Iop_Est5FRSqrt:
sewardjdead90a2008-08-08 08:38:23 +00003151 case Iop_RoundF64toF64_NEAREST:
3152 case Iop_RoundF64toF64_NegINF:
3153 case Iop_RoundF64toF64_PosINF:
3154 case Iop_RoundF64toF64_ZERO:
sewardj39cc7352005-06-09 21:31:55 +00003155 case Iop_Clz64:
3156 case Iop_Ctz64:
sewardj95448072004-11-22 20:19:51 +00003157 return mkPCastTo(mce, Ity_I64, vatom);
3158
sewardj95448072004-11-22 20:19:51 +00003159 case Iop_Clz32:
3160 case Iop_Ctz32:
sewardjed69fdb2006-02-03 16:12:27 +00003161 case Iop_TruncF64asF32:
sewardj59570ff2010-01-01 11:59:33 +00003162 case Iop_NegF32:
3163 case Iop_AbsF32:
sewardj95448072004-11-22 20:19:51 +00003164 return mkPCastTo(mce, Ity_I32, vatom);
3165
sewardjd9dbc192005-04-27 11:40:27 +00003166 case Iop_1Uto64:
3167 case Iop_8Uto64:
3168 case Iop_8Sto64:
3169 case Iop_16Uto64:
3170 case Iop_16Sto64:
sewardj95448072004-11-22 20:19:51 +00003171 case Iop_32Sto64:
3172 case Iop_32Uto64:
sewardj20d38f22005-02-07 23:50:18 +00003173 case Iop_V128to64:
3174 case Iop_V128HIto64:
sewardj6cf40ff2005-04-20 22:31:26 +00003175 case Iop_128HIto64:
3176 case Iop_128to64:
sewardj57f92b02010-08-22 11:54:14 +00003177 case Iop_Dup8x8:
3178 case Iop_Dup16x4:
3179 case Iop_Dup32x2:
3180 case Iop_Reverse16_8x8:
3181 case Iop_Reverse32_8x8:
3182 case Iop_Reverse32_16x4:
3183 case Iop_Reverse64_8x8:
3184 case Iop_Reverse64_16x4:
3185 case Iop_Reverse64_32x2:
sewardj7cf4e6b2008-05-01 20:24:26 +00003186 return assignNew('V', mce, Ity_I64, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003187
3188 case Iop_64to32:
3189 case Iop_64HIto32:
3190 case Iop_1Uto32:
sewardj463b3d92005-07-18 11:41:15 +00003191 case Iop_1Sto32:
sewardj95448072004-11-22 20:19:51 +00003192 case Iop_8Uto32:
3193 case Iop_16Uto32:
3194 case Iop_16Sto32:
3195 case Iop_8Sto32:
cerionfafaa0d2005-09-12 22:29:38 +00003196 case Iop_V128to32:
sewardj7cf4e6b2008-05-01 20:24:26 +00003197 return assignNew('V', mce, Ity_I32, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003198
3199 case Iop_8Sto16:
3200 case Iop_8Uto16:
3201 case Iop_32to16:
3202 case Iop_32HIto16:
sewardjd9dbc192005-04-27 11:40:27 +00003203 case Iop_64to16:
sewardj7cf4e6b2008-05-01 20:24:26 +00003204 return assignNew('V', mce, Ity_I16, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003205
3206 case Iop_1Uto8:
3207 case Iop_16to8:
sewardj9a807e02006-12-17 14:20:31 +00003208 case Iop_16HIto8:
sewardj95448072004-11-22 20:19:51 +00003209 case Iop_32to8:
sewardjd9dbc192005-04-27 11:40:27 +00003210 case Iop_64to8:
sewardj7cf4e6b2008-05-01 20:24:26 +00003211 return assignNew('V', mce, Ity_I8, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003212
3213 case Iop_32to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00003214 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
sewardj95448072004-11-22 20:19:51 +00003215
sewardjd9dbc192005-04-27 11:40:27 +00003216 case Iop_64to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00003217 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
sewardjd9dbc192005-04-27 11:40:27 +00003218
sewardj95448072004-11-22 20:19:51 +00003219 case Iop_ReinterpF64asI64:
3220 case Iop_ReinterpI64asF64:
sewardj0b070592004-12-10 21:44:22 +00003221 case Iop_ReinterpI32asF32:
sewardj59570ff2010-01-01 11:59:33 +00003222 case Iop_ReinterpF32asI32:
sewardj20d38f22005-02-07 23:50:18 +00003223 case Iop_NotV128:
sewardj7010f6e2004-12-10 13:35:22 +00003224 case Iop_Not64:
sewardj95448072004-11-22 20:19:51 +00003225 case Iop_Not32:
3226 case Iop_Not16:
3227 case Iop_Not8:
3228 case Iop_Not1:
3229 return vatom;
sewardj7010f6e2004-12-10 13:35:22 +00003230
sewardj57f92b02010-08-22 11:54:14 +00003231 case Iop_CmpNEZ8x8:
3232 case Iop_Cnt8x8:
3233 case Iop_Clz8Sx8:
3234 case Iop_Cls8Sx8:
3235 case Iop_Abs8x8:
3236 return mkPCast8x8(mce, vatom);
3237
3238 case Iop_CmpNEZ8x16:
3239 case Iop_Cnt8x16:
3240 case Iop_Clz8Sx16:
3241 case Iop_Cls8Sx16:
3242 case Iop_Abs8x16:
3243 return mkPCast8x16(mce, vatom);
3244
3245 case Iop_CmpNEZ16x4:
3246 case Iop_Clz16Sx4:
3247 case Iop_Cls16Sx4:
3248 case Iop_Abs16x4:
3249 return mkPCast16x4(mce, vatom);
3250
3251 case Iop_CmpNEZ16x8:
3252 case Iop_Clz16Sx8:
3253 case Iop_Cls16Sx8:
3254 case Iop_Abs16x8:
3255 return mkPCast16x8(mce, vatom);
3256
3257 case Iop_CmpNEZ32x2:
3258 case Iop_Clz32Sx2:
3259 case Iop_Cls32Sx2:
3260 case Iop_FtoI32Ux2_RZ:
3261 case Iop_FtoI32Sx2_RZ:
3262 case Iop_Abs32x2:
3263 return mkPCast32x2(mce, vatom);
3264
3265 case Iop_CmpNEZ32x4:
3266 case Iop_Clz32Sx4:
3267 case Iop_Cls32Sx4:
3268 case Iop_FtoI32Ux4_RZ:
3269 case Iop_FtoI32Sx4_RZ:
3270 case Iop_Abs32x4:
3271 return mkPCast32x4(mce, vatom);
3272
3273 case Iop_CmpwNEZ64:
3274 return mkPCastTo(mce, Ity_I64, vatom);
3275
3276 case Iop_CmpNEZ64x2:
3277 return mkPCast64x2(mce, vatom);
3278
3279 case Iop_Shorten16x8:
3280 case Iop_Shorten32x4:
3281 case Iop_Shorten64x2:
3282 case Iop_QShortenS16Sx8:
3283 case Iop_QShortenU16Sx8:
3284 case Iop_QShortenU16Ux8:
3285 case Iop_QShortenS32Sx4:
3286 case Iop_QShortenU32Sx4:
3287 case Iop_QShortenU32Ux4:
3288 case Iop_QShortenS64Sx2:
3289 case Iop_QShortenU64Sx2:
3290 case Iop_QShortenU64Ux2:
3291 return vectorShortenV128(mce, op, vatom);
3292
3293 case Iop_Longen8Sx8:
3294 case Iop_Longen8Ux8:
3295 case Iop_Longen16Sx4:
3296 case Iop_Longen16Ux4:
3297 case Iop_Longen32Sx2:
3298 case Iop_Longen32Ux2:
3299 return vectorLongenI64(mce, op, vatom);
3300
3301 case Iop_PwAddL32Ux2:
3302 case Iop_PwAddL32Sx2:
3303 return mkPCastTo(mce, Ity_I64,
3304 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
3305
3306 case Iop_PwAddL16Ux4:
3307 case Iop_PwAddL16Sx4:
3308 return mkPCast32x2(mce,
3309 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
3310
3311 case Iop_PwAddL8Ux8:
3312 case Iop_PwAddL8Sx8:
3313 return mkPCast16x4(mce,
3314 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
3315
3316 case Iop_PwAddL32Ux4:
3317 case Iop_PwAddL32Sx4:
3318 return mkPCast64x2(mce,
3319 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
3320
3321 case Iop_PwAddL16Ux8:
3322 case Iop_PwAddL16Sx8:
3323 return mkPCast32x4(mce,
3324 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
3325
3326 case Iop_PwAddL8Ux16:
3327 case Iop_PwAddL8Sx16:
3328 return mkPCast16x8(mce,
3329 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
3330
sewardj95448072004-11-22 20:19:51 +00003331 default:
3332 ppIROp(op);
3333 VG_(tool_panic)("memcheck:expr2vbits_Unop");
3334 }
3335}
3336
3337
sewardj170ee212004-12-10 18:57:51 +00003338/* Worker function; do not call directly. */
sewardj95448072004-11-22 20:19:51 +00003339static
sewardj2e595852005-06-30 23:33:37 +00003340IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
3341 IREndness end, IRType ty,
3342 IRAtom* addr, UInt bias )
sewardj95448072004-11-22 20:19:51 +00003343{
3344 void* helper;
3345 Char* hname;
3346 IRDirty* di;
3347 IRTemp datavbits;
3348 IRAtom* addrAct;
3349
3350 tl_assert(isOriginalAtom(mce,addr));
sewardj2e595852005-06-30 23:33:37 +00003351 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj95448072004-11-22 20:19:51 +00003352
3353 /* First, emit a definedness test for the address. This also sets
3354 the address (shadow) to 'defined' following the test. */
3355 complainIfUndefined( mce, addr );
3356
3357 /* Now cook up a call to the relevant helper function, to read the
3358 data V bits from shadow memory. */
sewardj7cf4e6b2008-05-01 20:24:26 +00003359 ty = shadowTypeV(ty);
sewardj2e595852005-06-30 23:33:37 +00003360
3361 if (end == Iend_LE) {
3362 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00003363 case Ity_I64: helper = &MC_(helperc_LOADV64le);
3364 hname = "MC_(helperc_LOADV64le)";
sewardj2e595852005-06-30 23:33:37 +00003365 break;
njn1d0825f2006-03-27 11:37:07 +00003366 case Ity_I32: helper = &MC_(helperc_LOADV32le);
3367 hname = "MC_(helperc_LOADV32le)";
sewardj2e595852005-06-30 23:33:37 +00003368 break;
njn1d0825f2006-03-27 11:37:07 +00003369 case Ity_I16: helper = &MC_(helperc_LOADV16le);
3370 hname = "MC_(helperc_LOADV16le)";
sewardj2e595852005-06-30 23:33:37 +00003371 break;
njn1d0825f2006-03-27 11:37:07 +00003372 case Ity_I8: helper = &MC_(helperc_LOADV8);
3373 hname = "MC_(helperc_LOADV8)";
sewardj2e595852005-06-30 23:33:37 +00003374 break;
3375 default: ppIRType(ty);
3376 VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
3377 }
3378 } else {
sewardj8cf88b72005-07-08 01:29:33 +00003379 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00003380 case Ity_I64: helper = &MC_(helperc_LOADV64be);
3381 hname = "MC_(helperc_LOADV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00003382 break;
njn1d0825f2006-03-27 11:37:07 +00003383 case Ity_I32: helper = &MC_(helperc_LOADV32be);
3384 hname = "MC_(helperc_LOADV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00003385 break;
njn1d0825f2006-03-27 11:37:07 +00003386 case Ity_I16: helper = &MC_(helperc_LOADV16be);
3387 hname = "MC_(helperc_LOADV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00003388 break;
njn1d0825f2006-03-27 11:37:07 +00003389 case Ity_I8: helper = &MC_(helperc_LOADV8);
3390 hname = "MC_(helperc_LOADV8)";
sewardj8cf88b72005-07-08 01:29:33 +00003391 break;
3392 default: ppIRType(ty);
3393 VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
3394 }
sewardj95448072004-11-22 20:19:51 +00003395 }
3396
3397 /* Generate the actual address into addrAct. */
3398 if (bias == 0) {
3399 addrAct = addr;
3400 } else {
sewardj7cf97ee2004-11-28 14:25:01 +00003401 IROp mkAdd;
3402 IRAtom* eBias;
sewardj95448072004-11-22 20:19:51 +00003403 IRType tyAddr = mce->hWordTy;
3404 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj7cf97ee2004-11-28 14:25:01 +00003405 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3406 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00003407 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
sewardj95448072004-11-22 20:19:51 +00003408 }
3409
3410 /* We need to have a place to park the V bits we're just about to
3411 read. */
sewardj1c0ce7a2009-07-01 08:10:49 +00003412 datavbits = newTemp(mce, ty, VSh);
sewardj95448072004-11-22 20:19:51 +00003413 di = unsafeIRDirty_1_N( datavbits,
sewardj53ee1fc2005-12-23 02:29:58 +00003414 1/*regparms*/,
3415 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj95448072004-11-22 20:19:51 +00003416 mkIRExprVec_1( addrAct ));
3417 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00003418 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00003419
3420 return mkexpr(datavbits);
3421}
3422
3423
3424static
sewardj2e595852005-06-30 23:33:37 +00003425IRAtom* expr2vbits_Load ( MCEnv* mce,
3426 IREndness end, IRType ty,
3427 IRAtom* addr, UInt bias )
sewardj170ee212004-12-10 18:57:51 +00003428{
3429 IRAtom *v64hi, *v64lo;
sewardj2e595852005-06-30 23:33:37 +00003430 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj7cf4e6b2008-05-01 20:24:26 +00003431 switch (shadowTypeV(ty)) {
sewardj170ee212004-12-10 18:57:51 +00003432 case Ity_I8:
3433 case Ity_I16:
3434 case Ity_I32:
3435 case Ity_I64:
sewardj2e595852005-06-30 23:33:37 +00003436 return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
sewardj170ee212004-12-10 18:57:51 +00003437 case Ity_V128:
sewardj2e595852005-06-30 23:33:37 +00003438 if (end == Iend_LE) {
3439 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
3440 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3441 } else {
sewardj2e595852005-06-30 23:33:37 +00003442 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
3443 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3444 }
sewardj7cf4e6b2008-05-01 20:24:26 +00003445 return assignNew( 'V', mce,
sewardj170ee212004-12-10 18:57:51 +00003446 Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +00003447 binop(Iop_64HLtoV128, v64hi, v64lo));
sewardj170ee212004-12-10 18:57:51 +00003448 default:
sewardj2e595852005-06-30 23:33:37 +00003449 VG_(tool_panic)("expr2vbits_Load");
sewardj170ee212004-12-10 18:57:51 +00003450 }
3451}
3452
3453
3454static
sewardj95448072004-11-22 20:19:51 +00003455IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
3456 IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
3457{
3458 IRAtom *vbitsC, *vbits0, *vbitsX;
3459 IRType ty;
3460 /* Given Mux0X(cond,expr0,exprX), generate
3461 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
3462 That is, steer the V bits like the originals, but trash the
3463 result if the steering value is undefined. This gives
3464 lazy propagation. */
3465 tl_assert(isOriginalAtom(mce, cond));
3466 tl_assert(isOriginalAtom(mce, expr0));
3467 tl_assert(isOriginalAtom(mce, exprX));
3468
3469 vbitsC = expr2vbits(mce, cond);
3470 vbits0 = expr2vbits(mce, expr0);
3471 vbitsX = expr2vbits(mce, exprX);
sewardj1c0ce7a2009-07-01 08:10:49 +00003472 ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
sewardj95448072004-11-22 20:19:51 +00003473
3474 return
sewardj7cf4e6b2008-05-01 20:24:26 +00003475 mkUifU(mce, ty, assignNew('V', mce, ty,
3476 IRExpr_Mux0X(cond, vbits0, vbitsX)),
sewardj95448072004-11-22 20:19:51 +00003477 mkPCastTo(mce, ty, vbitsC) );
3478}
3479
3480/* --------- This is the main expression-handling function. --------- */
3481
3482static
3483IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
3484{
3485 switch (e->tag) {
3486
3487 case Iex_Get:
3488 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
3489
3490 case Iex_GetI:
3491 return shadow_GETI( mce, e->Iex.GetI.descr,
3492 e->Iex.GetI.ix, e->Iex.GetI.bias );
3493
sewardj0b9d74a2006-12-24 02:24:11 +00003494 case Iex_RdTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00003495 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
sewardj95448072004-11-22 20:19:51 +00003496
3497 case Iex_Const:
sewardj1c0ce7a2009-07-01 08:10:49 +00003498 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
sewardj95448072004-11-22 20:19:51 +00003499
sewardje91cea72006-02-08 19:32:02 +00003500 case Iex_Qop:
3501 return expr2vbits_Qop(
3502 mce,
3503 e->Iex.Qop.op,
3504 e->Iex.Qop.arg1, e->Iex.Qop.arg2,
3505 e->Iex.Qop.arg3, e->Iex.Qop.arg4
3506 );
3507
sewardjed69fdb2006-02-03 16:12:27 +00003508 case Iex_Triop:
3509 return expr2vbits_Triop(
3510 mce,
3511 e->Iex.Triop.op,
3512 e->Iex.Triop.arg1, e->Iex.Triop.arg2, e->Iex.Triop.arg3
3513 );
3514
sewardj95448072004-11-22 20:19:51 +00003515 case Iex_Binop:
3516 return expr2vbits_Binop(
3517 mce,
3518 e->Iex.Binop.op,
3519 e->Iex.Binop.arg1, e->Iex.Binop.arg2
3520 );
3521
3522 case Iex_Unop:
3523 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
3524
sewardj2e595852005-06-30 23:33:37 +00003525 case Iex_Load:
3526 return expr2vbits_Load( mce, e->Iex.Load.end,
3527 e->Iex.Load.ty,
3528 e->Iex.Load.addr, 0/*addr bias*/ );
sewardj95448072004-11-22 20:19:51 +00003529
3530 case Iex_CCall:
3531 return mkLazyN( mce, e->Iex.CCall.args,
3532 e->Iex.CCall.retty,
3533 e->Iex.CCall.cee );
3534
3535 case Iex_Mux0X:
3536 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
3537 e->Iex.Mux0X.exprX);
njn25e49d8e72002-09-23 09:36:25 +00003538
3539 default:
sewardj95448072004-11-22 20:19:51 +00003540 VG_(printf)("\n");
3541 ppIRExpr(e);
3542 VG_(printf)("\n");
3543 VG_(tool_panic)("memcheck: expr2vbits");
njn25e49d8e72002-09-23 09:36:25 +00003544 }
njn25e49d8e72002-09-23 09:36:25 +00003545}
3546
3547/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003548/*--- Generate shadow stmts from all kinds of IRStmts. ---*/
njn25e49d8e72002-09-23 09:36:25 +00003549/*------------------------------------------------------------*/
3550
sewardj95448072004-11-22 20:19:51 +00003551/* Widen a value to the host word size. */
njn25e49d8e72002-09-23 09:36:25 +00003552
3553static
sewardj95448072004-11-22 20:19:51 +00003554IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
njn25e49d8e72002-09-23 09:36:25 +00003555{
sewardj7cf97ee2004-11-28 14:25:01 +00003556 IRType ty, tyH;
3557
sewardj95448072004-11-22 20:19:51 +00003558 /* vatom is vbits-value and as such can only have a shadow type. */
3559 tl_assert(isShadowAtom(mce,vatom));
njn25e49d8e72002-09-23 09:36:25 +00003560
sewardj1c0ce7a2009-07-01 08:10:49 +00003561 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj7cf97ee2004-11-28 14:25:01 +00003562 tyH = mce->hWordTy;
njn25e49d8e72002-09-23 09:36:25 +00003563
sewardj95448072004-11-22 20:19:51 +00003564 if (tyH == Ity_I32) {
3565 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00003566 case Ity_I32:
3567 return vatom;
3568 case Ity_I16:
3569 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
3570 case Ity_I8:
3571 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
3572 default:
3573 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00003574 }
sewardj6cf40ff2005-04-20 22:31:26 +00003575 } else
3576 if (tyH == Ity_I64) {
3577 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00003578 case Ity_I32:
3579 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
3580 case Ity_I16:
3581 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
3582 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
3583 case Ity_I8:
3584 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
3585 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
3586 default:
3587 goto unhandled;
sewardj6cf40ff2005-04-20 22:31:26 +00003588 }
sewardj95448072004-11-22 20:19:51 +00003589 } else {
3590 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00003591 }
sewardj95448072004-11-22 20:19:51 +00003592 unhandled:
3593 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
3594 VG_(tool_panic)("zwidenToHostWord");
njn25e49d8e72002-09-23 09:36:25 +00003595}
3596
njn25e49d8e72002-09-23 09:36:25 +00003597
sewardj95448072004-11-22 20:19:51 +00003598/* Generate a shadow store. addr is always the original address atom.
3599 You can pass in either originals or V-bits for the data atom, but
sewardj1c0ce7a2009-07-01 08:10:49 +00003600 obviously not both. guard :: Ity_I1 controls whether the store
3601 really happens; NULL means it unconditionally does. Note that
3602 guard itself is not checked for definedness; the caller of this
3603 function must do that if necessary. */
njn25e49d8e72002-09-23 09:36:25 +00003604
sewardj95448072004-11-22 20:19:51 +00003605static
sewardj2e595852005-06-30 23:33:37 +00003606void do_shadow_Store ( MCEnv* mce,
3607 IREndness end,
3608 IRAtom* addr, UInt bias,
sewardj1c0ce7a2009-07-01 08:10:49 +00003609 IRAtom* data, IRAtom* vdata,
3610 IRAtom* guard )
njn25e49d8e72002-09-23 09:36:25 +00003611{
sewardj170ee212004-12-10 18:57:51 +00003612 IROp mkAdd;
3613 IRType ty, tyAddr;
sewardj95448072004-11-22 20:19:51 +00003614 void* helper = NULL;
3615 Char* hname = NULL;
njn1d0825f2006-03-27 11:37:07 +00003616 IRConst* c;
sewardj170ee212004-12-10 18:57:51 +00003617
3618 tyAddr = mce->hWordTy;
3619 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3620 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj2e595852005-06-30 23:33:37 +00003621 tl_assert( end == Iend_LE || end == Iend_BE );
sewardj170ee212004-12-10 18:57:51 +00003622
sewardj95448072004-11-22 20:19:51 +00003623 if (data) {
3624 tl_assert(!vdata);
3625 tl_assert(isOriginalAtom(mce, data));
3626 tl_assert(bias == 0);
3627 vdata = expr2vbits( mce, data );
3628 } else {
3629 tl_assert(vdata);
3630 }
njn25e49d8e72002-09-23 09:36:25 +00003631
sewardj95448072004-11-22 20:19:51 +00003632 tl_assert(isOriginalAtom(mce,addr));
3633 tl_assert(isShadowAtom(mce,vdata));
njn25e49d8e72002-09-23 09:36:25 +00003634
sewardj1c0ce7a2009-07-01 08:10:49 +00003635 if (guard) {
3636 tl_assert(isOriginalAtom(mce, guard));
3637 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
3638 }
3639
3640 ty = typeOfIRExpr(mce->sb->tyenv, vdata);
njn25e49d8e72002-09-23 09:36:25 +00003641
njn1d0825f2006-03-27 11:37:07 +00003642 // If we're not doing undefined value checking, pretend that this value
3643 // is "all valid". That lets Vex's optimiser remove some of the V bit
3644 // shadow computation ops that precede it.
sewardj7cf4e6b2008-05-01 20:24:26 +00003645 if (MC_(clo_mc_level) == 1) {
njn1d0825f2006-03-27 11:37:07 +00003646 switch (ty) {
sewardj1c0ce7a2009-07-01 08:10:49 +00003647 case Ity_V128: // V128 weirdness
3648 c = IRConst_V128(V_BITS16_DEFINED); break;
njn1d0825f2006-03-27 11:37:07 +00003649 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
3650 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
3651 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
3652 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
3653 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
3654 }
3655 vdata = IRExpr_Const( c );
3656 }
3657
sewardj95448072004-11-22 20:19:51 +00003658 /* First, emit a definedness test for the address. This also sets
3659 the address (shadow) to 'defined' following the test. */
3660 complainIfUndefined( mce, addr );
njn25e49d8e72002-09-23 09:36:25 +00003661
sewardj170ee212004-12-10 18:57:51 +00003662 /* Now decide which helper function to call to write the data V
3663 bits into shadow memory. */
sewardj2e595852005-06-30 23:33:37 +00003664 if (end == Iend_LE) {
3665 switch (ty) {
3666 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00003667 case Ity_I64: helper = &MC_(helperc_STOREV64le);
3668 hname = "MC_(helperc_STOREV64le)";
sewardj2e595852005-06-30 23:33:37 +00003669 break;
njn1d0825f2006-03-27 11:37:07 +00003670 case Ity_I32: helper = &MC_(helperc_STOREV32le);
3671 hname = "MC_(helperc_STOREV32le)";
sewardj2e595852005-06-30 23:33:37 +00003672 break;
njn1d0825f2006-03-27 11:37:07 +00003673 case Ity_I16: helper = &MC_(helperc_STOREV16le);
3674 hname = "MC_(helperc_STOREV16le)";
sewardj2e595852005-06-30 23:33:37 +00003675 break;
njn1d0825f2006-03-27 11:37:07 +00003676 case Ity_I8: helper = &MC_(helperc_STOREV8);
3677 hname = "MC_(helperc_STOREV8)";
sewardj2e595852005-06-30 23:33:37 +00003678 break;
3679 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
3680 }
3681 } else {
sewardj8cf88b72005-07-08 01:29:33 +00003682 switch (ty) {
3683 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00003684 case Ity_I64: helper = &MC_(helperc_STOREV64be);
3685 hname = "MC_(helperc_STOREV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00003686 break;
njn1d0825f2006-03-27 11:37:07 +00003687 case Ity_I32: helper = &MC_(helperc_STOREV32be);
3688 hname = "MC_(helperc_STOREV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00003689 break;
njn1d0825f2006-03-27 11:37:07 +00003690 case Ity_I16: helper = &MC_(helperc_STOREV16be);
3691 hname = "MC_(helperc_STOREV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00003692 break;
njn1d0825f2006-03-27 11:37:07 +00003693 case Ity_I8: helper = &MC_(helperc_STOREV8);
3694 hname = "MC_(helperc_STOREV8)";
sewardj8cf88b72005-07-08 01:29:33 +00003695 break;
3696 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
3697 }
sewardj95448072004-11-22 20:19:51 +00003698 }
njn25e49d8e72002-09-23 09:36:25 +00003699
sewardj170ee212004-12-10 18:57:51 +00003700 if (ty == Ity_V128) {
3701
sewardj20d38f22005-02-07 23:50:18 +00003702 /* V128-bit case */
sewardj170ee212004-12-10 18:57:51 +00003703 /* See comment in next clause re 64-bit regparms */
sewardj2e595852005-06-30 23:33:37 +00003704 /* also, need to be careful about endianness */
3705
njn4c245e52009-03-15 23:25:38 +00003706 Int offLo64, offHi64;
3707 IRDirty *diLo64, *diHi64;
3708 IRAtom *addrLo64, *addrHi64;
3709 IRAtom *vdataLo64, *vdataHi64;
3710 IRAtom *eBiasLo64, *eBiasHi64;
3711
sewardj2e595852005-06-30 23:33:37 +00003712 if (end == Iend_LE) {
3713 offLo64 = 0;
3714 offHi64 = 8;
3715 } else {
sewardj2e595852005-06-30 23:33:37 +00003716 offLo64 = 8;
3717 offHi64 = 0;
3718 }
3719
3720 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
sewardj7cf4e6b2008-05-01 20:24:26 +00003721 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
3722 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
sewardj170ee212004-12-10 18:57:51 +00003723 diLo64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003724 1/*regparms*/,
3725 hname, VG_(fnptr_to_fnentry)( helper ),
3726 mkIRExprVec_2( addrLo64, vdataLo64 )
3727 );
sewardj2e595852005-06-30 23:33:37 +00003728 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
sewardj7cf4e6b2008-05-01 20:24:26 +00003729 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
3730 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
sewardj170ee212004-12-10 18:57:51 +00003731 diHi64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003732 1/*regparms*/,
3733 hname, VG_(fnptr_to_fnentry)( helper ),
3734 mkIRExprVec_2( addrHi64, vdataHi64 )
3735 );
sewardj1c0ce7a2009-07-01 08:10:49 +00003736 if (guard) diLo64->guard = guard;
3737 if (guard) diHi64->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00003738 setHelperAnns( mce, diLo64 );
3739 setHelperAnns( mce, diHi64 );
sewardj7cf4e6b2008-05-01 20:24:26 +00003740 stmt( 'V', mce, IRStmt_Dirty(diLo64) );
3741 stmt( 'V', mce, IRStmt_Dirty(diHi64) );
sewardj170ee212004-12-10 18:57:51 +00003742
sewardj95448072004-11-22 20:19:51 +00003743 } else {
sewardj170ee212004-12-10 18:57:51 +00003744
njn4c245e52009-03-15 23:25:38 +00003745 IRDirty *di;
3746 IRAtom *addrAct;
3747
sewardj170ee212004-12-10 18:57:51 +00003748 /* 8/16/32/64-bit cases */
3749 /* Generate the actual address into addrAct. */
3750 if (bias == 0) {
3751 addrAct = addr;
3752 } else {
njn4c245e52009-03-15 23:25:38 +00003753 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00003754 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
sewardj170ee212004-12-10 18:57:51 +00003755 }
3756
3757 if (ty == Ity_I64) {
3758 /* We can't do this with regparm 2 on 32-bit platforms, since
3759 the back ends aren't clever enough to handle 64-bit
3760 regparm args. Therefore be different. */
3761 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003762 1/*regparms*/,
3763 hname, VG_(fnptr_to_fnentry)( helper ),
3764 mkIRExprVec_2( addrAct, vdata )
3765 );
sewardj170ee212004-12-10 18:57:51 +00003766 } else {
3767 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003768 2/*regparms*/,
3769 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj170ee212004-12-10 18:57:51 +00003770 mkIRExprVec_2( addrAct,
sewardj53ee1fc2005-12-23 02:29:58 +00003771 zwidenToHostWord( mce, vdata ))
3772 );
sewardj170ee212004-12-10 18:57:51 +00003773 }
sewardj1c0ce7a2009-07-01 08:10:49 +00003774 if (guard) di->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00003775 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00003776 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00003777 }
njn25e49d8e72002-09-23 09:36:25 +00003778
sewardj95448072004-11-22 20:19:51 +00003779}
njn25e49d8e72002-09-23 09:36:25 +00003780
njn25e49d8e72002-09-23 09:36:25 +00003781
sewardj95448072004-11-22 20:19:51 +00003782/* Do lazy pessimistic propagation through a dirty helper call, by
3783 looking at the annotations on it. This is the most complex part of
3784 Memcheck. */
njn25e49d8e72002-09-23 09:36:25 +00003785
sewardj95448072004-11-22 20:19:51 +00003786static IRType szToITy ( Int n )
3787{
3788 switch (n) {
3789 case 1: return Ity_I8;
3790 case 2: return Ity_I16;
3791 case 4: return Ity_I32;
3792 case 8: return Ity_I64;
3793 default: VG_(tool_panic)("szToITy(memcheck)");
3794 }
3795}
njn25e49d8e72002-09-23 09:36:25 +00003796
sewardj95448072004-11-22 20:19:51 +00003797static
3798void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
3799{
njn4c245e52009-03-15 23:25:38 +00003800 Int i, n, toDo, gSz, gOff;
sewardj2e595852005-06-30 23:33:37 +00003801 IRAtom *src, *here, *curr;
njn4c245e52009-03-15 23:25:38 +00003802 IRType tySrc, tyDst;
sewardj2e595852005-06-30 23:33:37 +00003803 IRTemp dst;
3804 IREndness end;
3805
3806 /* What's the native endianness? We need to know this. */
sewardj6e340c72005-07-10 00:53:42 +00003807# if defined(VG_BIGENDIAN)
sewardj2e595852005-06-30 23:33:37 +00003808 end = Iend_BE;
sewardj6e340c72005-07-10 00:53:42 +00003809# elif defined(VG_LITTLEENDIAN)
sewardj2e595852005-06-30 23:33:37 +00003810 end = Iend_LE;
3811# else
3812# error "Unknown endianness"
3813# endif
njn25e49d8e72002-09-23 09:36:25 +00003814
sewardj95448072004-11-22 20:19:51 +00003815 /* First check the guard. */
3816 complainIfUndefined(mce, d->guard);
3817
3818 /* Now round up all inputs and PCast over them. */
sewardj7cf97ee2004-11-28 14:25:01 +00003819 curr = definedOfType(Ity_I32);
sewardj95448072004-11-22 20:19:51 +00003820
3821 /* Inputs: unmasked args */
3822 for (i = 0; d->args[i]; i++) {
3823 if (d->cee->mcx_mask & (1<<i)) {
3824 /* ignore this arg */
njn25e49d8e72002-09-23 09:36:25 +00003825 } else {
sewardj95448072004-11-22 20:19:51 +00003826 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
3827 curr = mkUifU32(mce, here, curr);
njn25e49d8e72002-09-23 09:36:25 +00003828 }
3829 }
sewardj95448072004-11-22 20:19:51 +00003830
3831 /* Inputs: guest state that we read. */
3832 for (i = 0; i < d->nFxState; i++) {
3833 tl_assert(d->fxState[i].fx != Ifx_None);
3834 if (d->fxState[i].fx == Ifx_Write)
3835 continue;
sewardja7203252004-11-26 19:17:47 +00003836
3837 /* Ignore any sections marked as 'always defined'. */
3838 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
sewardje9e16d32004-12-10 13:17:55 +00003839 if (0)
sewardja7203252004-11-26 19:17:47 +00003840 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
3841 d->fxState[i].offset, d->fxState[i].size );
3842 continue;
3843 }
3844
sewardj95448072004-11-22 20:19:51 +00003845 /* This state element is read or modified. So we need to
sewardje9e16d32004-12-10 13:17:55 +00003846 consider it. If larger than 8 bytes, deal with it in 8-byte
3847 chunks. */
3848 gSz = d->fxState[i].size;
3849 gOff = d->fxState[i].offset;
3850 tl_assert(gSz > 0);
3851 while (True) {
3852 if (gSz == 0) break;
3853 n = gSz <= 8 ? gSz : 8;
3854 /* update 'curr' with UifU of the state slice
3855 gOff .. gOff+n-1 */
3856 tySrc = szToITy( n );
sewardj7cf4e6b2008-05-01 20:24:26 +00003857 src = assignNew( 'V', mce, tySrc,
3858 shadow_GET(mce, gOff, tySrc ) );
sewardje9e16d32004-12-10 13:17:55 +00003859 here = mkPCastTo( mce, Ity_I32, src );
3860 curr = mkUifU32(mce, here, curr);
3861 gSz -= n;
3862 gOff += n;
3863 }
3864
sewardj95448072004-11-22 20:19:51 +00003865 }
3866
3867 /* Inputs: memory. First set up some info needed regardless of
3868 whether we're doing reads or writes. */
sewardj95448072004-11-22 20:19:51 +00003869
3870 if (d->mFx != Ifx_None) {
3871 /* Because we may do multiple shadow loads/stores from the same
3872 base address, it's best to do a single test of its
3873 definedness right now. Post-instrumentation optimisation
3874 should remove all but this test. */
njn4c245e52009-03-15 23:25:38 +00003875 IRType tyAddr;
sewardj95448072004-11-22 20:19:51 +00003876 tl_assert(d->mAddr);
3877 complainIfUndefined(mce, d->mAddr);
3878
sewardj1c0ce7a2009-07-01 08:10:49 +00003879 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
sewardj95448072004-11-22 20:19:51 +00003880 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
3881 tl_assert(tyAddr == mce->hWordTy); /* not really right */
3882 }
3883
3884 /* Deal with memory inputs (reads or modifies) */
3885 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00003886 toDo = d->mSize;
sewardj2e595852005-06-30 23:33:37 +00003887 /* chew off 32-bit chunks. We don't care about the endianness
3888 since it's all going to be condensed down to a single bit,
3889 but nevertheless choose an endianness which is hopefully
3890 native to the platform. */
sewardj95448072004-11-22 20:19:51 +00003891 while (toDo >= 4) {
3892 here = mkPCastTo(
3893 mce, Ity_I32,
sewardj2e595852005-06-30 23:33:37 +00003894 expr2vbits_Load ( mce, end, Ity_I32,
sewardj95448072004-11-22 20:19:51 +00003895 d->mAddr, d->mSize - toDo )
3896 );
3897 curr = mkUifU32(mce, here, curr);
3898 toDo -= 4;
3899 }
3900 /* chew off 16-bit chunks */
3901 while (toDo >= 2) {
3902 here = mkPCastTo(
3903 mce, Ity_I32,
sewardj2e595852005-06-30 23:33:37 +00003904 expr2vbits_Load ( mce, end, Ity_I16,
sewardj95448072004-11-22 20:19:51 +00003905 d->mAddr, d->mSize - toDo )
3906 );
3907 curr = mkUifU32(mce, here, curr);
3908 toDo -= 2;
3909 }
3910 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
3911 }
3912
3913 /* Whew! So curr is a 32-bit V-value summarising pessimistically
3914 all the inputs to the helper. Now we need to re-distribute the
3915 results to all destinations. */
3916
3917 /* Outputs: the destination temporary, if there is one. */
3918 if (d->tmp != IRTemp_INVALID) {
sewardj7cf4e6b2008-05-01 20:24:26 +00003919 dst = findShadowTmpV(mce, d->tmp);
sewardj1c0ce7a2009-07-01 08:10:49 +00003920 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00003921 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
sewardj95448072004-11-22 20:19:51 +00003922 }
3923
3924 /* Outputs: guest state that we write or modify. */
3925 for (i = 0; i < d->nFxState; i++) {
3926 tl_assert(d->fxState[i].fx != Ifx_None);
3927 if (d->fxState[i].fx == Ifx_Read)
3928 continue;
sewardja7203252004-11-26 19:17:47 +00003929 /* Ignore any sections marked as 'always defined'. */
3930 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
3931 continue;
sewardje9e16d32004-12-10 13:17:55 +00003932 /* This state element is written or modified. So we need to
3933 consider it. If larger than 8 bytes, deal with it in 8-byte
3934 chunks. */
3935 gSz = d->fxState[i].size;
3936 gOff = d->fxState[i].offset;
3937 tl_assert(gSz > 0);
3938 while (True) {
3939 if (gSz == 0) break;
3940 n = gSz <= 8 ? gSz : 8;
3941 /* Write suitably-casted 'curr' to the state slice
3942 gOff .. gOff+n-1 */
3943 tyDst = szToITy( n );
3944 do_shadow_PUT( mce, gOff,
3945 NULL, /* original atom */
3946 mkPCastTo( mce, tyDst, curr ) );
3947 gSz -= n;
3948 gOff += n;
3949 }
sewardj95448072004-11-22 20:19:51 +00003950 }
3951
sewardj2e595852005-06-30 23:33:37 +00003952 /* Outputs: memory that we write or modify. Same comments about
3953 endianness as above apply. */
sewardj95448072004-11-22 20:19:51 +00003954 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00003955 toDo = d->mSize;
3956 /* chew off 32-bit chunks */
3957 while (toDo >= 4) {
sewardj2e595852005-06-30 23:33:37 +00003958 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
3959 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00003960 mkPCastTo( mce, Ity_I32, curr ),
3961 NULL/*guard*/ );
sewardj95448072004-11-22 20:19:51 +00003962 toDo -= 4;
3963 }
3964 /* chew off 16-bit chunks */
3965 while (toDo >= 2) {
sewardj2e595852005-06-30 23:33:37 +00003966 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
3967 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00003968 mkPCastTo( mce, Ity_I16, curr ),
3969 NULL/*guard*/ );
sewardj95448072004-11-22 20:19:51 +00003970 toDo -= 2;
3971 }
3972 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
3973 }
3974
njn25e49d8e72002-09-23 09:36:25 +00003975}
3976
sewardj1c0ce7a2009-07-01 08:10:49 +00003977
sewardj826ec492005-05-12 18:05:00 +00003978/* We have an ABI hint telling us that [base .. base+len-1] is to
3979 become undefined ("writable"). Generate code to call a helper to
3980 notify the A/V bit machinery of this fact.
3981
3982 We call
sewardj7cf4e6b2008-05-01 20:24:26 +00003983 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
3984 Addr nia );
sewardj826ec492005-05-12 18:05:00 +00003985*/
3986static
sewardj7cf4e6b2008-05-01 20:24:26 +00003987void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
sewardj826ec492005-05-12 18:05:00 +00003988{
3989 IRDirty* di;
sewardj7cf4e6b2008-05-01 20:24:26 +00003990 /* Minor optimisation: if not doing origin tracking, ignore the
3991 supplied nia and pass zero instead. This is on the basis that
3992 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
3993 almost always generate a shorter instruction to put zero into a
3994 register than any other value. */
3995 if (MC_(clo_mc_level) < 3)
3996 nia = mkIRExpr_HWord(0);
3997
sewardj826ec492005-05-12 18:05:00 +00003998 di = unsafeIRDirty_0_N(
3999 0/*regparms*/,
4000 "MC_(helperc_MAKE_STACK_UNINIT)",
sewardj53ee1fc2005-12-23 02:29:58 +00004001 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
sewardj7cf4e6b2008-05-01 20:24:26 +00004002 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
sewardj826ec492005-05-12 18:05:00 +00004003 );
sewardj7cf4e6b2008-05-01 20:24:26 +00004004 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj826ec492005-05-12 18:05:00 +00004005}
4006
njn25e49d8e72002-09-23 09:36:25 +00004007
sewardj1c0ce7a2009-07-01 08:10:49 +00004008/* ------ Dealing with IRCAS (big and complex) ------ */
4009
4010/* FWDS */
4011static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
4012 IRAtom* baseaddr, Int offset );
4013static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
4014static void gen_store_b ( MCEnv* mce, Int szB,
4015 IRAtom* baseaddr, Int offset, IRAtom* dataB,
4016 IRAtom* guard );
4017
4018static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
4019static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
4020
4021
4022/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
4023 IRExpr.Consts, else this asserts. If they are both Consts, it
4024 doesn't do anything. So that just leaves the RdTmp case.
4025
4026 In which case: this assigns the shadow value SHADOW to the IR
4027 shadow temporary associated with ORIG. That is, ORIG, being an
4028 original temporary, will have a shadow temporary associated with
4029 it. However, in the case envisaged here, there will so far have
4030 been no IR emitted to actually write a shadow value into that
4031 temporary. What this routine does is to (emit IR to) copy the
4032 value in SHADOW into said temporary, so that after this call,
4033 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
4034 value in SHADOW.
4035
4036 Point is to allow callers to compute "by hand" a shadow value for
4037 ORIG, and force it to be associated with ORIG.
4038
4039 How do we know that that shadow associated with ORIG has not so far
4040 been assigned to? Well, we don't per se know that, but supposing
4041 it had. Then this routine would create a second assignment to it,
4042 and later the IR sanity checker would barf. But that never
4043 happens. QED.
4044*/
4045static void bind_shadow_tmp_to_orig ( UChar how,
4046 MCEnv* mce,
4047 IRAtom* orig, IRAtom* shadow )
4048{
4049 tl_assert(isOriginalAtom(mce, orig));
4050 tl_assert(isShadowAtom(mce, shadow));
4051 switch (orig->tag) {
4052 case Iex_Const:
4053 tl_assert(shadow->tag == Iex_Const);
4054 break;
4055 case Iex_RdTmp:
4056 tl_assert(shadow->tag == Iex_RdTmp);
4057 if (how == 'V') {
4058 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
4059 shadow);
4060 } else {
4061 tl_assert(how == 'B');
4062 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
4063 shadow);
4064 }
4065 break;
4066 default:
4067 tl_assert(0);
4068 }
4069}
4070
4071
4072static
4073void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
4074{
4075 /* Scheme is (both single- and double- cases):
4076
4077 1. fetch data#,dataB (the proposed new value)
4078
4079 2. fetch expd#,expdB (what we expect to see at the address)
4080
4081 3. check definedness of address
4082
4083 4. load old#,oldB from shadow memory; this also checks
4084 addressibility of the address
4085
4086 5. the CAS itself
4087
sewardjafed4c52009-07-12 13:00:17 +00004088 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
sewardj1c0ce7a2009-07-01 08:10:49 +00004089
sewardjafed4c52009-07-12 13:00:17 +00004090 7. if "expected == old" (as computed by (6))
sewardj1c0ce7a2009-07-01 08:10:49 +00004091 store data#,dataB to shadow memory
4092
4093 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
4094 'data' but 7 stores 'data#'. Hence it is possible for the
4095 shadow data to be incorrectly checked and/or updated:
4096
sewardj1c0ce7a2009-07-01 08:10:49 +00004097 * 7 is at least gated correctly, since the 'expected == old'
4098 condition is derived from outputs of 5. However, the shadow
4099 write could happen too late: imagine after 5 we are
4100 descheduled, a different thread runs, writes a different
4101 (shadow) value at the address, and then we resume, hence
4102 overwriting the shadow value written by the other thread.
4103
4104 Because the original memory access is atomic, there's no way to
4105 make both the original and shadow accesses into a single atomic
4106 thing, hence this is unavoidable.
4107
4108 At least as Valgrind stands, I don't think it's a problem, since
4109 we're single threaded *and* we guarantee that there are no
4110 context switches during the execution of any specific superblock
4111 -- context switches can only happen at superblock boundaries.
4112
4113 If Valgrind ever becomes MT in the future, then it might be more
4114 of a problem. A possible kludge would be to artificially
4115 associate with the location, a lock, which we must acquire and
4116 release around the transaction as a whole. Hmm, that probably
4117 would't work properly since it only guards us against other
4118 threads doing CASs on the same location, not against other
4119 threads doing normal reads and writes.
sewardjafed4c52009-07-12 13:00:17 +00004120
4121 ------------------------------------------------------------
4122
4123 COMMENT_ON_CasCmpEQ:
4124
4125 Note two things. Firstly, in the sequence above, we compute
4126 "expected == old", but we don't check definedness of it. Why
4127 not? Also, the x86 and amd64 front ends use
4128 Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent
4129 determination (expected == old ?) for themselves, and we also
4130 don't check definedness for those primops; we just say that the
4131 result is defined. Why? Details follow.
4132
4133 x86/amd64 contains various forms of locked insns:
4134 * lock prefix before all basic arithmetic insn;
4135 eg lock xorl %reg1,(%reg2)
4136 * atomic exchange reg-mem
4137 * compare-and-swaps
4138
4139 Rather than attempt to represent them all, which would be a
4140 royal PITA, I used a result from Maurice Herlihy
4141 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
4142 demonstrates that compare-and-swap is a primitive more general
4143 than the other two, and so can be used to represent all of them.
4144 So the translation scheme for (eg) lock incl (%reg) is as
4145 follows:
4146
4147 again:
4148 old = * %reg
4149 new = old + 1
4150 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
4151
4152 The "atomically" is the CAS bit. The scheme is always the same:
4153 get old value from memory, compute new value, atomically stuff
4154 new value back in memory iff the old value has not changed (iow,
4155 no other thread modified it in the meantime). If it has changed
4156 then we've been out-raced and we have to start over.
4157
4158 Now that's all very neat, but it has the bad side effect of
4159 introducing an explicit equality test into the translation.
4160 Consider the behaviour of said code on a memory location which
4161 is uninitialised. We will wind up doing a comparison on
4162 uninitialised data, and mc duly complains.
4163
4164 What's difficult about this is, the common case is that the
4165 location is uncontended, and so we're usually comparing the same
4166 value (* %reg) with itself. So we shouldn't complain even if it
4167 is undefined. But mc doesn't know that.
4168
4169 My solution is to mark the == in the IR specially, so as to tell
4170 mc that it almost certainly compares a value with itself, and we
4171 should just regard the result as always defined. Rather than
4172 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
4173 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
4174
4175 So there's always the question of, can this give a false
4176 negative? eg, imagine that initially, * %reg is defined; and we
4177 read that; but then in the gap between the read and the CAS, a
4178 different thread writes an undefined (and different) value at
4179 the location. Then the CAS in this thread will fail and we will
4180 go back to "again:", but without knowing that the trip back
4181 there was based on an undefined comparison. No matter; at least
4182 the other thread won the race and the location is correctly
4183 marked as undefined. What if it wrote an uninitialised version
4184 of the same value that was there originally, though?
4185
4186 etc etc. Seems like there's a small corner case in which we
4187 might lose the fact that something's defined -- we're out-raced
4188 in between the "old = * reg" and the "atomically {", _and_ the
4189 other thread is writing in an undefined version of what's
4190 already there. Well, that seems pretty unlikely.
4191
4192 ---
4193
4194 If we ever need to reinstate it .. code which generates a
4195 definedness test for "expected == old" was removed at r10432 of
4196 this file.
sewardj1c0ce7a2009-07-01 08:10:49 +00004197 */
4198 if (cas->oldHi == IRTemp_INVALID) {
4199 do_shadow_CAS_single( mce, cas );
4200 } else {
4201 do_shadow_CAS_double( mce, cas );
4202 }
4203}
4204
4205
4206static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
4207{
4208 IRAtom *vdataLo = NULL, *bdataLo = NULL;
4209 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4210 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00004211 IRAtom *expd_eq_old = NULL;
4212 IROp opCasCmpEQ;
sewardj1c0ce7a2009-07-01 08:10:49 +00004213 Int elemSzB;
4214 IRType elemTy;
4215 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4216
4217 /* single CAS */
4218 tl_assert(cas->oldHi == IRTemp_INVALID);
4219 tl_assert(cas->expdHi == NULL);
4220 tl_assert(cas->dataHi == NULL);
4221
4222 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4223 switch (elemTy) {
sewardjafed4c52009-07-12 13:00:17 +00004224 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break;
4225 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
4226 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
4227 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
sewardj1c0ce7a2009-07-01 08:10:49 +00004228 default: tl_assert(0); /* IR defn disallows any other types */
4229 }
4230
4231 /* 1. fetch data# (the proposed new value) */
4232 tl_assert(isOriginalAtom(mce, cas->dataLo));
4233 vdataLo
4234 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4235 tl_assert(isShadowAtom(mce, vdataLo));
4236 if (otrak) {
4237 bdataLo
4238 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4239 tl_assert(isShadowAtom(mce, bdataLo));
4240 }
4241
4242 /* 2. fetch expected# (what we expect to see at the address) */
4243 tl_assert(isOriginalAtom(mce, cas->expdLo));
4244 vexpdLo
4245 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
4246 tl_assert(isShadowAtom(mce, vexpdLo));
4247 if (otrak) {
4248 bexpdLo
4249 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
4250 tl_assert(isShadowAtom(mce, bexpdLo));
4251 }
4252
4253 /* 3. check definedness of address */
4254 /* 4. fetch old# from shadow memory; this also checks
4255 addressibility of the address */
4256 voldLo
4257 = assignNew(
4258 'V', mce, elemTy,
4259 expr2vbits_Load(
4260 mce,
4261 cas->end, elemTy, cas->addr, 0/*Addr bias*/
4262 ));
sewardjafed4c52009-07-12 13:00:17 +00004263 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00004264 if (otrak) {
4265 boldLo
4266 = assignNew('B', mce, Ity_I32,
4267 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00004268 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00004269 }
4270
4271 /* 5. the CAS itself */
4272 stmt( 'C', mce, IRStmt_CAS(cas) );
4273
sewardjafed4c52009-07-12 13:00:17 +00004274 /* 6. compute "expected == old" */
4275 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004276 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
4277 tree, but it's not copied from the input block. */
4278 expd_eq_old
4279 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00004280 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00004281
4282 /* 7. if "expected == old"
4283 store data# to shadow memory */
4284 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
4285 NULL/*data*/, vdataLo/*vdata*/,
4286 expd_eq_old/*guard for store*/ );
4287 if (otrak) {
4288 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
4289 bdataLo/*bdata*/,
4290 expd_eq_old/*guard for store*/ );
4291 }
4292}
4293
4294
4295static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
4296{
4297 IRAtom *vdataHi = NULL, *bdataHi = NULL;
4298 IRAtom *vdataLo = NULL, *bdataLo = NULL;
4299 IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
4300 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4301 IRAtom *voldHi = NULL, *boldHi = NULL;
4302 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00004303 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
4304 IRAtom *expd_eq_old = NULL, *zero = NULL;
4305 IROp opCasCmpEQ, opOr, opXor;
sewardj1c0ce7a2009-07-01 08:10:49 +00004306 Int elemSzB, memOffsLo, memOffsHi;
4307 IRType elemTy;
4308 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4309
4310 /* double CAS */
4311 tl_assert(cas->oldHi != IRTemp_INVALID);
4312 tl_assert(cas->expdHi != NULL);
4313 tl_assert(cas->dataHi != NULL);
4314
4315 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4316 switch (elemTy) {
4317 case Ity_I8:
sewardjafed4c52009-07-12 13:00:17 +00004318 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
sewardj1c0ce7a2009-07-01 08:10:49 +00004319 elemSzB = 1; zero = mkU8(0);
4320 break;
4321 case Ity_I16:
sewardjafed4c52009-07-12 13:00:17 +00004322 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
sewardj1c0ce7a2009-07-01 08:10:49 +00004323 elemSzB = 2; zero = mkU16(0);
4324 break;
4325 case Ity_I32:
sewardjafed4c52009-07-12 13:00:17 +00004326 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
sewardj1c0ce7a2009-07-01 08:10:49 +00004327 elemSzB = 4; zero = mkU32(0);
4328 break;
4329 case Ity_I64:
sewardjafed4c52009-07-12 13:00:17 +00004330 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
sewardj1c0ce7a2009-07-01 08:10:49 +00004331 elemSzB = 8; zero = mkU64(0);
4332 break;
4333 default:
4334 tl_assert(0); /* IR defn disallows any other types */
4335 }
4336
4337 /* 1. fetch data# (the proposed new value) */
4338 tl_assert(isOriginalAtom(mce, cas->dataHi));
4339 tl_assert(isOriginalAtom(mce, cas->dataLo));
4340 vdataHi
4341 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
4342 vdataLo
4343 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4344 tl_assert(isShadowAtom(mce, vdataHi));
4345 tl_assert(isShadowAtom(mce, vdataLo));
4346 if (otrak) {
4347 bdataHi
4348 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
4349 bdataLo
4350 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4351 tl_assert(isShadowAtom(mce, bdataHi));
4352 tl_assert(isShadowAtom(mce, bdataLo));
4353 }
4354
4355 /* 2. fetch expected# (what we expect to see at the address) */
4356 tl_assert(isOriginalAtom(mce, cas->expdHi));
4357 tl_assert(isOriginalAtom(mce, cas->expdLo));
4358 vexpdHi
4359 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
4360 vexpdLo
4361 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
4362 tl_assert(isShadowAtom(mce, vexpdHi));
4363 tl_assert(isShadowAtom(mce, vexpdLo));
4364 if (otrak) {
4365 bexpdHi
4366 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
4367 bexpdLo
4368 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
4369 tl_assert(isShadowAtom(mce, bexpdHi));
4370 tl_assert(isShadowAtom(mce, bexpdLo));
4371 }
4372
4373 /* 3. check definedness of address */
4374 /* 4. fetch old# from shadow memory; this also checks
4375 addressibility of the address */
4376 if (cas->end == Iend_LE) {
4377 memOffsLo = 0;
4378 memOffsHi = elemSzB;
4379 } else {
4380 tl_assert(cas->end == Iend_BE);
4381 memOffsLo = elemSzB;
4382 memOffsHi = 0;
4383 }
4384 voldHi
4385 = assignNew(
4386 'V', mce, elemTy,
4387 expr2vbits_Load(
4388 mce,
4389 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/
4390 ));
4391 voldLo
4392 = assignNew(
4393 'V', mce, elemTy,
4394 expr2vbits_Load(
4395 mce,
4396 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/
4397 ));
sewardjafed4c52009-07-12 13:00:17 +00004398 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
4399 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00004400 if (otrak) {
4401 boldHi
4402 = assignNew('B', mce, Ity_I32,
4403 gen_load_b(mce, elemSzB, cas->addr,
4404 memOffsHi/*addr bias*/));
4405 boldLo
4406 = assignNew('B', mce, Ity_I32,
4407 gen_load_b(mce, elemSzB, cas->addr,
4408 memOffsLo/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00004409 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
4410 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00004411 }
4412
4413 /* 5. the CAS itself */
4414 stmt( 'C', mce, IRStmt_CAS(cas) );
4415
sewardjafed4c52009-07-12 13:00:17 +00004416 /* 6. compute "expected == old" */
4417 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004418 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
4419 tree, but it's not copied from the input block. */
4420 /*
4421 xHi = oldHi ^ expdHi;
4422 xLo = oldLo ^ expdLo;
4423 xHL = xHi | xLo;
4424 expd_eq_old = xHL == 0;
4425 */
sewardj1c0ce7a2009-07-01 08:10:49 +00004426 xHi = assignNew('C', mce, elemTy,
4427 binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
sewardj1c0ce7a2009-07-01 08:10:49 +00004428 xLo = assignNew('C', mce, elemTy,
4429 binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00004430 xHL = assignNew('C', mce, elemTy,
4431 binop(opOr, xHi, xLo));
sewardj1c0ce7a2009-07-01 08:10:49 +00004432 expd_eq_old
4433 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00004434 binop(opCasCmpEQ, xHL, zero));
sewardj1c0ce7a2009-07-01 08:10:49 +00004435
4436 /* 7. if "expected == old"
4437 store data# to shadow memory */
4438 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
4439 NULL/*data*/, vdataHi/*vdata*/,
4440 expd_eq_old/*guard for store*/ );
4441 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
4442 NULL/*data*/, vdataLo/*vdata*/,
4443 expd_eq_old/*guard for store*/ );
4444 if (otrak) {
4445 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
4446 bdataHi/*bdata*/,
4447 expd_eq_old/*guard for store*/ );
4448 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
4449 bdataLo/*bdata*/,
4450 expd_eq_old/*guard for store*/ );
4451 }
4452}
4453
4454
sewardjdb5907d2009-11-26 17:20:21 +00004455/* ------ Dealing with LL/SC (not difficult) ------ */
4456
4457static void do_shadow_LLSC ( MCEnv* mce,
4458 IREndness stEnd,
4459 IRTemp stResult,
4460 IRExpr* stAddr,
4461 IRExpr* stStoredata )
4462{
4463 /* In short: treat a load-linked like a normal load followed by an
4464 assignment of the loaded (shadow) data to the result temporary.
4465 Treat a store-conditional like a normal store, and mark the
4466 result temporary as defined. */
4467 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult);
4468 IRTemp resTmp = findShadowTmpV(mce, stResult);
4469
4470 tl_assert(isIRAtom(stAddr));
4471 if (stStoredata)
4472 tl_assert(isIRAtom(stStoredata));
4473
4474 if (stStoredata == NULL) {
4475 /* Load Linked */
4476 /* Just treat this as a normal load, followed by an assignment of
4477 the value to .result. */
4478 /* Stay sane */
4479 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
4480 || resTy == Ity_I16 || resTy == Ity_I8);
4481 assign( 'V', mce, resTmp,
4482 expr2vbits_Load(
4483 mce, stEnd, resTy, stAddr, 0/*addr bias*/));
4484 } else {
4485 /* Store Conditional */
4486 /* Stay sane */
4487 IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
4488 stStoredata);
4489 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
4490 || dataTy == Ity_I16 || dataTy == Ity_I8);
4491 do_shadow_Store( mce, stEnd,
4492 stAddr, 0/* addr bias */,
4493 stStoredata,
4494 NULL /* shadow data */,
4495 NULL/*guard*/ );
4496 /* This is a store conditional, so it writes to .result a value
4497 indicating whether or not the store succeeded. Just claim
4498 this value is always defined. In the PowerPC interpretation
4499 of store-conditional, definedness of the success indication
4500 depends on whether the address of the store matches the
4501 reservation address. But we can't tell that here (and
4502 anyway, we're not being PowerPC-specific). At least we are
4503 guaranteed that the definedness of the store address, and its
4504 addressibility, will be checked as per normal. So it seems
4505 pretty safe to just say that the success indication is always
4506 defined.
4507
4508 In schemeS, for origin tracking, we must correspondingly set
4509 a no-origin value for the origin shadow of .result.
4510 */
4511 tl_assert(resTy == Ity_I1);
4512 assign( 'V', mce, resTmp, definedOfType(resTy) );
4513 }
4514}
4515
4516
sewardj95448072004-11-22 20:19:51 +00004517/*------------------------------------------------------------*/
4518/*--- Memcheck main ---*/
4519/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00004520
sewardj7cf4e6b2008-05-01 20:24:26 +00004521static void schemeS ( MCEnv* mce, IRStmt* st );
4522
sewardj95448072004-11-22 20:19:51 +00004523static Bool isBogusAtom ( IRAtom* at )
njn25e49d8e72002-09-23 09:36:25 +00004524{
sewardj95448072004-11-22 20:19:51 +00004525 ULong n = 0;
4526 IRConst* con;
sewardj710d6c22005-03-20 18:55:15 +00004527 tl_assert(isIRAtom(at));
sewardj0b9d74a2006-12-24 02:24:11 +00004528 if (at->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +00004529 return False;
4530 tl_assert(at->tag == Iex_Const);
4531 con = at->Iex.Const.con;
4532 switch (con->tag) {
sewardjd5204dc2004-12-31 01:16:11 +00004533 case Ico_U1: return False;
4534 case Ico_U8: n = (ULong)con->Ico.U8; break;
4535 case Ico_U16: n = (ULong)con->Ico.U16; break;
4536 case Ico_U32: n = (ULong)con->Ico.U32; break;
4537 case Ico_U64: n = (ULong)con->Ico.U64; break;
4538 case Ico_F64: return False;
4539 case Ico_F64i: return False;
4540 case Ico_V128: return False;
sewardj95448072004-11-22 20:19:51 +00004541 default: ppIRExpr(at); tl_assert(0);
4542 }
4543 /* VG_(printf)("%llx\n", n); */
sewardj96a922e2005-04-23 23:26:29 +00004544 return (/*32*/ n == 0xFEFEFEFFULL
4545 /*32*/ || n == 0x80808080ULL
sewardj17b47432008-12-17 01:12:58 +00004546 /*32*/ || n == 0x7F7F7F7FULL
tomd9774d72005-06-27 08:11:01 +00004547 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
sewardj96a922e2005-04-23 23:26:29 +00004548 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
tomd9774d72005-06-27 08:11:01 +00004549 /*64*/ || n == 0x0000000000008080ULL
sewardj96a922e2005-04-23 23:26:29 +00004550 /*64*/ || n == 0x8080808080808080ULL
sewardj17b47432008-12-17 01:12:58 +00004551 /*64*/ || n == 0x0101010101010101ULL
sewardj96a922e2005-04-23 23:26:29 +00004552 );
sewardj95448072004-11-22 20:19:51 +00004553}
njn25e49d8e72002-09-23 09:36:25 +00004554
sewardj95448072004-11-22 20:19:51 +00004555static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
4556{
sewardjd5204dc2004-12-31 01:16:11 +00004557 Int i;
4558 IRExpr* e;
4559 IRDirty* d;
sewardj1c0ce7a2009-07-01 08:10:49 +00004560 IRCAS* cas;
sewardj95448072004-11-22 20:19:51 +00004561 switch (st->tag) {
sewardj0b9d74a2006-12-24 02:24:11 +00004562 case Ist_WrTmp:
4563 e = st->Ist.WrTmp.data;
sewardj95448072004-11-22 20:19:51 +00004564 switch (e->tag) {
4565 case Iex_Get:
sewardj0b9d74a2006-12-24 02:24:11 +00004566 case Iex_RdTmp:
sewardj95448072004-11-22 20:19:51 +00004567 return False;
sewardjd5204dc2004-12-31 01:16:11 +00004568 case Iex_Const:
4569 return isBogusAtom(e);
sewardj95448072004-11-22 20:19:51 +00004570 case Iex_Unop:
4571 return isBogusAtom(e->Iex.Unop.arg);
sewardjd5204dc2004-12-31 01:16:11 +00004572 case Iex_GetI:
4573 return isBogusAtom(e->Iex.GetI.ix);
sewardj95448072004-11-22 20:19:51 +00004574 case Iex_Binop:
4575 return isBogusAtom(e->Iex.Binop.arg1)
4576 || isBogusAtom(e->Iex.Binop.arg2);
sewardjed69fdb2006-02-03 16:12:27 +00004577 case Iex_Triop:
4578 return isBogusAtom(e->Iex.Triop.arg1)
4579 || isBogusAtom(e->Iex.Triop.arg2)
4580 || isBogusAtom(e->Iex.Triop.arg3);
sewardje91cea72006-02-08 19:32:02 +00004581 case Iex_Qop:
4582 return isBogusAtom(e->Iex.Qop.arg1)
4583 || isBogusAtom(e->Iex.Qop.arg2)
4584 || isBogusAtom(e->Iex.Qop.arg3)
4585 || isBogusAtom(e->Iex.Qop.arg4);
sewardj95448072004-11-22 20:19:51 +00004586 case Iex_Mux0X:
4587 return isBogusAtom(e->Iex.Mux0X.cond)
4588 || isBogusAtom(e->Iex.Mux0X.expr0)
4589 || isBogusAtom(e->Iex.Mux0X.exprX);
sewardj2e595852005-06-30 23:33:37 +00004590 case Iex_Load:
4591 return isBogusAtom(e->Iex.Load.addr);
sewardj95448072004-11-22 20:19:51 +00004592 case Iex_CCall:
4593 for (i = 0; e->Iex.CCall.args[i]; i++)
4594 if (isBogusAtom(e->Iex.CCall.args[i]))
4595 return True;
4596 return False;
4597 default:
4598 goto unhandled;
4599 }
sewardjd5204dc2004-12-31 01:16:11 +00004600 case Ist_Dirty:
4601 d = st->Ist.Dirty.details;
4602 for (i = 0; d->args[i]; i++)
4603 if (isBogusAtom(d->args[i]))
4604 return True;
4605 if (d->guard && isBogusAtom(d->guard))
4606 return True;
4607 if (d->mAddr && isBogusAtom(d->mAddr))
4608 return True;
4609 return False;
sewardj95448072004-11-22 20:19:51 +00004610 case Ist_Put:
4611 return isBogusAtom(st->Ist.Put.data);
sewardjd5204dc2004-12-31 01:16:11 +00004612 case Ist_PutI:
4613 return isBogusAtom(st->Ist.PutI.ix)
4614 || isBogusAtom(st->Ist.PutI.data);
sewardj2e595852005-06-30 23:33:37 +00004615 case Ist_Store:
4616 return isBogusAtom(st->Ist.Store.addr)
4617 || isBogusAtom(st->Ist.Store.data);
sewardj95448072004-11-22 20:19:51 +00004618 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00004619 return isBogusAtom(st->Ist.Exit.guard);
sewardj826ec492005-05-12 18:05:00 +00004620 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00004621 return isBogusAtom(st->Ist.AbiHint.base)
4622 || isBogusAtom(st->Ist.AbiHint.nia);
sewardj21dc3452005-03-21 00:27:41 +00004623 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00004624 case Ist_IMark:
sewardj72d75132007-11-09 23:06:35 +00004625 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00004626 return False;
sewardj1c0ce7a2009-07-01 08:10:49 +00004627 case Ist_CAS:
4628 cas = st->Ist.CAS.details;
4629 return isBogusAtom(cas->addr)
4630 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
4631 || isBogusAtom(cas->expdLo)
4632 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
4633 || isBogusAtom(cas->dataLo);
sewardjdb5907d2009-11-26 17:20:21 +00004634 case Ist_LLSC:
4635 return isBogusAtom(st->Ist.LLSC.addr)
4636 || (st->Ist.LLSC.storedata
4637 ? isBogusAtom(st->Ist.LLSC.storedata)
4638 : False);
sewardj95448072004-11-22 20:19:51 +00004639 default:
4640 unhandled:
4641 ppIRStmt(st);
4642 VG_(tool_panic)("hasBogusLiterals");
4643 }
4644}
njn25e49d8e72002-09-23 09:36:25 +00004645
njn25e49d8e72002-09-23 09:36:25 +00004646
sewardj0b9d74a2006-12-24 02:24:11 +00004647IRSB* MC_(instrument) ( VgCallbackClosure* closure,
sewardj1c0ce7a2009-07-01 08:10:49 +00004648 IRSB* sb_in,
sewardj461df9c2006-01-17 02:06:39 +00004649 VexGuestLayout* layout,
4650 VexGuestExtents* vge,
sewardjd54babf2005-03-21 00:55:49 +00004651 IRType gWordTy, IRType hWordTy )
sewardj95448072004-11-22 20:19:51 +00004652{
sewardj7cf4e6b2008-05-01 20:24:26 +00004653 Bool verboze = 0||False;
sewardj151b90d2005-07-06 19:42:23 +00004654 Bool bogus;
sewardjd5204dc2004-12-31 01:16:11 +00004655 Int i, j, first_stmt;
sewardj95448072004-11-22 20:19:51 +00004656 IRStmt* st;
sewardjd5204dc2004-12-31 01:16:11 +00004657 MCEnv mce;
sewardj1c0ce7a2009-07-01 08:10:49 +00004658 IRSB* sb_out;
sewardjd54babf2005-03-21 00:55:49 +00004659
4660 if (gWordTy != hWordTy) {
4661 /* We don't currently support this case. */
4662 VG_(tool_panic)("host/guest word size mismatch");
4663 }
njn25e49d8e72002-09-23 09:36:25 +00004664
sewardj6cf40ff2005-04-20 22:31:26 +00004665 /* Check we're not completely nuts */
sewardj7cf4e6b2008-05-01 20:24:26 +00004666 tl_assert(sizeof(UWord) == sizeof(void*));
4667 tl_assert(sizeof(Word) == sizeof(void*));
4668 tl_assert(sizeof(Addr) == sizeof(void*));
4669 tl_assert(sizeof(ULong) == 8);
4670 tl_assert(sizeof(Long) == 8);
4671 tl_assert(sizeof(Addr64) == 8);
4672 tl_assert(sizeof(UInt) == 4);
4673 tl_assert(sizeof(Int) == 4);
4674
4675 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
sewardj6cf40ff2005-04-20 22:31:26 +00004676
sewardj0b9d74a2006-12-24 02:24:11 +00004677 /* Set up SB */
sewardj1c0ce7a2009-07-01 08:10:49 +00004678 sb_out = deepCopyIRSBExceptStmts(sb_in);
njn25e49d8e72002-09-23 09:36:25 +00004679
sewardj1c0ce7a2009-07-01 08:10:49 +00004680 /* Set up the running environment. Both .sb and .tmpMap are
4681 modified as we go along. Note that tmps are added to both
4682 .sb->tyenv and .tmpMap together, so the valid index-set for
4683 those two arrays should always be identical. */
4684 VG_(memset)(&mce, 0, sizeof(mce));
4685 mce.sb = sb_out;
sewardj7cf4e6b2008-05-01 20:24:26 +00004686 mce.trace = verboze;
sewardj95448072004-11-22 20:19:51 +00004687 mce.layout = layout;
sewardj95448072004-11-22 20:19:51 +00004688 mce.hWordTy = hWordTy;
sewardjd5204dc2004-12-31 01:16:11 +00004689 mce.bogusLiterals = False;
sewardj1c0ce7a2009-07-01 08:10:49 +00004690
4691 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
4692 sizeof(TempMapEnt));
4693 for (i = 0; i < sb_in->tyenv->types_used; i++) {
4694 TempMapEnt ent;
4695 ent.kind = Orig;
4696 ent.shadowV = IRTemp_INVALID;
4697 ent.shadowB = IRTemp_INVALID;
4698 VG_(addToXA)( mce.tmpMap, &ent );
sewardj7cf4e6b2008-05-01 20:24:26 +00004699 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004700 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
sewardj95448072004-11-22 20:19:51 +00004701
sewardj151b90d2005-07-06 19:42:23 +00004702 /* Make a preliminary inspection of the statements, to see if there
4703 are any dodgy-looking literals. If there are, we generate
4704 extra-detailed (hence extra-expensive) instrumentation in
4705 places. Scan the whole bb even if dodgyness is found earlier,
4706 so that the flatness assertion is applied to all stmts. */
4707
4708 bogus = False;
sewardj95448072004-11-22 20:19:51 +00004709
sewardj1c0ce7a2009-07-01 08:10:49 +00004710 for (i = 0; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00004711
sewardj1c0ce7a2009-07-01 08:10:49 +00004712 st = sb_in->stmts[i];
sewardj21dc3452005-03-21 00:27:41 +00004713 tl_assert(st);
sewardj95448072004-11-22 20:19:51 +00004714 tl_assert(isFlatIRStmt(st));
4715
sewardj151b90d2005-07-06 19:42:23 +00004716 if (!bogus) {
4717 bogus = checkForBogusLiterals(st);
4718 if (0 && bogus) {
sewardj95448072004-11-22 20:19:51 +00004719 VG_(printf)("bogus: ");
4720 ppIRStmt(st);
4721 VG_(printf)("\n");
4722 }
4723 }
sewardjd5204dc2004-12-31 01:16:11 +00004724
sewardj151b90d2005-07-06 19:42:23 +00004725 }
4726
4727 mce.bogusLiterals = bogus;
4728
sewardja0871482006-10-18 12:41:55 +00004729 /* Copy verbatim any IR preamble preceding the first IMark */
sewardj151b90d2005-07-06 19:42:23 +00004730
sewardj1c0ce7a2009-07-01 08:10:49 +00004731 tl_assert(mce.sb == sb_out);
4732 tl_assert(mce.sb != sb_in);
sewardjf1962d32006-10-19 13:22:16 +00004733
sewardja0871482006-10-18 12:41:55 +00004734 i = 0;
sewardj1c0ce7a2009-07-01 08:10:49 +00004735 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
sewardja0871482006-10-18 12:41:55 +00004736
sewardj1c0ce7a2009-07-01 08:10:49 +00004737 st = sb_in->stmts[i];
sewardja0871482006-10-18 12:41:55 +00004738 tl_assert(st);
4739 tl_assert(isFlatIRStmt(st));
4740
sewardj1c0ce7a2009-07-01 08:10:49 +00004741 stmt( 'C', &mce, sb_in->stmts[i] );
sewardja0871482006-10-18 12:41:55 +00004742 i++;
4743 }
4744
sewardjf1962d32006-10-19 13:22:16 +00004745 /* Nasty problem. IR optimisation of the pre-instrumented IR may
4746 cause the IR following the preamble to contain references to IR
4747 temporaries defined in the preamble. Because the preamble isn't
4748 instrumented, these temporaries don't have any shadows.
4749 Nevertheless uses of them following the preamble will cause
4750 memcheck to generate references to their shadows. End effect is
4751 to cause IR sanity check failures, due to references to
4752 non-existent shadows. This is only evident for the complex
4753 preambles used for function wrapping on TOC-afflicted platforms
4754 (ppc64-linux, ppc32-aix5, ppc64-aix5).
4755
4756 The following loop therefore scans the preamble looking for
4757 assignments to temporaries. For each one found it creates an
sewardjafa617b2008-07-22 09:59:48 +00004758 assignment to the corresponding (V) shadow temp, marking it as
sewardjf1962d32006-10-19 13:22:16 +00004759 'defined'. This is the same resulting IR as if the main
4760 instrumentation loop before had been applied to the statement
4761 'tmp = CONSTANT'.
sewardjafa617b2008-07-22 09:59:48 +00004762
4763 Similarly, if origin tracking is enabled, we must generate an
4764 assignment for the corresponding origin (B) shadow, claiming
4765 no-origin, as appropriate for a defined value.
sewardjf1962d32006-10-19 13:22:16 +00004766 */
4767 for (j = 0; j < i; j++) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004768 if (sb_in->stmts[j]->tag == Ist_WrTmp) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004769 /* findShadowTmpV checks its arg is an original tmp;
sewardjf1962d32006-10-19 13:22:16 +00004770 no need to assert that here. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004771 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
sewardjafa617b2008-07-22 09:59:48 +00004772 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00004773 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
sewardjafa617b2008-07-22 09:59:48 +00004774 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
4775 if (MC_(clo_mc_level) == 3) {
4776 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00004777 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
sewardjafa617b2008-07-22 09:59:48 +00004778 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
4779 }
sewardjf1962d32006-10-19 13:22:16 +00004780 if (0) {
sewardjafa617b2008-07-22 09:59:48 +00004781 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
4782 ppIRType( ty_v );
sewardjf1962d32006-10-19 13:22:16 +00004783 VG_(printf)("\n");
4784 }
4785 }
4786 }
4787
sewardja0871482006-10-18 12:41:55 +00004788 /* Iterate over the remaining stmts to generate instrumentation. */
4789
sewardj1c0ce7a2009-07-01 08:10:49 +00004790 tl_assert(sb_in->stmts_used > 0);
sewardja0871482006-10-18 12:41:55 +00004791 tl_assert(i >= 0);
sewardj1c0ce7a2009-07-01 08:10:49 +00004792 tl_assert(i < sb_in->stmts_used);
4793 tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
sewardja0871482006-10-18 12:41:55 +00004794
sewardj1c0ce7a2009-07-01 08:10:49 +00004795 for (/* use current i*/; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00004796
sewardj1c0ce7a2009-07-01 08:10:49 +00004797 st = sb_in->stmts[i];
4798 first_stmt = sb_out->stmts_used;
sewardj95448072004-11-22 20:19:51 +00004799
4800 if (verboze) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004801 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00004802 ppIRStmt(st);
sewardj7cf4e6b2008-05-01 20:24:26 +00004803 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00004804 }
4805
sewardj1c0ce7a2009-07-01 08:10:49 +00004806 if (MC_(clo_mc_level) == 3) {
4807 /* See comments on case Ist_CAS below. */
4808 if (st->tag != Ist_CAS)
4809 schemeS( &mce, st );
4810 }
sewardj7cf4e6b2008-05-01 20:24:26 +00004811
sewardj29faa502005-03-16 18:20:21 +00004812 /* Generate instrumentation code for each stmt ... */
4813
sewardj95448072004-11-22 20:19:51 +00004814 switch (st->tag) {
4815
sewardj0b9d74a2006-12-24 02:24:11 +00004816 case Ist_WrTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00004817 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
4818 expr2vbits( &mce, st->Ist.WrTmp.data) );
njn25e49d8e72002-09-23 09:36:25 +00004819 break;
4820
sewardj95448072004-11-22 20:19:51 +00004821 case Ist_Put:
4822 do_shadow_PUT( &mce,
4823 st->Ist.Put.offset,
4824 st->Ist.Put.data,
4825 NULL /* shadow atom */ );
njn25e49d8e72002-09-23 09:36:25 +00004826 break;
4827
sewardj95448072004-11-22 20:19:51 +00004828 case Ist_PutI:
4829 do_shadow_PUTI( &mce,
4830 st->Ist.PutI.descr,
4831 st->Ist.PutI.ix,
4832 st->Ist.PutI.bias,
4833 st->Ist.PutI.data );
njn25e49d8e72002-09-23 09:36:25 +00004834 break;
4835
sewardj2e595852005-06-30 23:33:37 +00004836 case Ist_Store:
4837 do_shadow_Store( &mce, st->Ist.Store.end,
4838 st->Ist.Store.addr, 0/* addr bias */,
4839 st->Ist.Store.data,
sewardj1c0ce7a2009-07-01 08:10:49 +00004840 NULL /* shadow data */,
4841 NULL/*guard*/ );
njn25e49d8e72002-09-23 09:36:25 +00004842 break;
4843
sewardj95448072004-11-22 20:19:51 +00004844 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00004845 complainIfUndefined( &mce, st->Ist.Exit.guard );
njn25e49d8e72002-09-23 09:36:25 +00004846 break;
4847
sewardj29faa502005-03-16 18:20:21 +00004848 case Ist_IMark:
sewardj7cf4e6b2008-05-01 20:24:26 +00004849 break;
4850
4851 case Ist_NoOp:
sewardj72d75132007-11-09 23:06:35 +00004852 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00004853 break;
4854
sewardj95448072004-11-22 20:19:51 +00004855 case Ist_Dirty:
4856 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
njn25e49d8e72002-09-23 09:36:25 +00004857 break;
4858
sewardj826ec492005-05-12 18:05:00 +00004859 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00004860 do_AbiHint( &mce, st->Ist.AbiHint.base,
4861 st->Ist.AbiHint.len,
4862 st->Ist.AbiHint.nia );
sewardj826ec492005-05-12 18:05:00 +00004863 break;
4864
sewardj1c0ce7a2009-07-01 08:10:49 +00004865 case Ist_CAS:
4866 do_shadow_CAS( &mce, st->Ist.CAS.details );
4867 /* Note, do_shadow_CAS copies the CAS itself to the output
4868 block, because it needs to add instrumentation both
4869 before and after it. Hence skip the copy below. Also
4870 skip the origin-tracking stuff (call to schemeS) above,
4871 since that's all tangled up with it too; do_shadow_CAS
4872 does it all. */
4873 break;
4874
sewardjdb5907d2009-11-26 17:20:21 +00004875 case Ist_LLSC:
4876 do_shadow_LLSC( &mce,
4877 st->Ist.LLSC.end,
4878 st->Ist.LLSC.result,
4879 st->Ist.LLSC.addr,
4880 st->Ist.LLSC.storedata );
4881 break;
4882
njn25e49d8e72002-09-23 09:36:25 +00004883 default:
sewardj95448072004-11-22 20:19:51 +00004884 VG_(printf)("\n");
4885 ppIRStmt(st);
4886 VG_(printf)("\n");
4887 VG_(tool_panic)("memcheck: unhandled IRStmt");
4888
4889 } /* switch (st->tag) */
4890
sewardj7cf4e6b2008-05-01 20:24:26 +00004891 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004892 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00004893 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00004894 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00004895 VG_(printf)("\n");
4896 }
4897 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00004898 }
sewardj95448072004-11-22 20:19:51 +00004899
sewardj1c0ce7a2009-07-01 08:10:49 +00004900 /* ... and finally copy the stmt itself to the output. Except,
4901 skip the copy of IRCASs; see comments on case Ist_CAS
4902 above. */
4903 if (st->tag != Ist_CAS)
4904 stmt('C', &mce, st);
njn25e49d8e72002-09-23 09:36:25 +00004905 }
njn25e49d8e72002-09-23 09:36:25 +00004906
sewardj95448072004-11-22 20:19:51 +00004907 /* Now we need to complain if the jump target is undefined. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004908 first_stmt = sb_out->stmts_used;
njn25e49d8e72002-09-23 09:36:25 +00004909
sewardj95448072004-11-22 20:19:51 +00004910 if (verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004911 VG_(printf)("sb_in->next = ");
4912 ppIRExpr(sb_in->next);
sewardj95448072004-11-22 20:19:51 +00004913 VG_(printf)("\n\n");
4914 }
njn25e49d8e72002-09-23 09:36:25 +00004915
sewardj1c0ce7a2009-07-01 08:10:49 +00004916 complainIfUndefined( &mce, sb_in->next );
njn25e49d8e72002-09-23 09:36:25 +00004917
sewardj7cf4e6b2008-05-01 20:24:26 +00004918 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004919 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00004920 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00004921 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00004922 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00004923 }
sewardj95448072004-11-22 20:19:51 +00004924 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00004925 }
njn25e49d8e72002-09-23 09:36:25 +00004926
sewardj1c0ce7a2009-07-01 08:10:49 +00004927 /* If this fails, there's been some serious snafu with tmp management,
4928 that should be investigated. */
4929 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
4930 VG_(deleteXA)( mce.tmpMap );
4931
4932 tl_assert(mce.sb == sb_out);
4933 return sb_out;
sewardj95448072004-11-22 20:19:51 +00004934}
njn25e49d8e72002-09-23 09:36:25 +00004935
sewardj81651dc2007-08-28 06:05:20 +00004936/*------------------------------------------------------------*/
4937/*--- Post-tree-build final tidying ---*/
4938/*------------------------------------------------------------*/
4939
4940/* This exploits the observation that Memcheck often produces
4941 repeated conditional calls of the form
4942
sewardj7cf4e6b2008-05-01 20:24:26 +00004943 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
sewardj81651dc2007-08-28 06:05:20 +00004944
4945 with the same guard expression G guarding the same helper call.
4946 The second and subsequent calls are redundant. This usually
4947 results from instrumentation of guest code containing multiple
4948 memory references at different constant offsets from the same base
4949 register. After optimisation of the instrumentation, you get a
4950 test for the definedness of the base register for each memory
4951 reference, which is kinda pointless. MC_(final_tidy) therefore
4952 looks for such repeated calls and removes all but the first. */
4953
4954/* A struct for recording which (helper, guard) pairs we have already
4955 seen. */
4956typedef
4957 struct { void* entry; IRExpr* guard; }
4958 Pair;
4959
4960/* Return True if e1 and e2 definitely denote the same value (used to
4961 compare guards). Return False if unknown; False is the safe
4962 answer. Since guest registers and guest memory do not have the
4963 SSA property we must return False if any Gets or Loads appear in
4964 the expression. */
4965
4966static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
4967{
4968 if (e1->tag != e2->tag)
4969 return False;
4970 switch (e1->tag) {
4971 case Iex_Const:
4972 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
4973 case Iex_Binop:
4974 return e1->Iex.Binop.op == e2->Iex.Binop.op
4975 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
4976 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
4977 case Iex_Unop:
4978 return e1->Iex.Unop.op == e2->Iex.Unop.op
4979 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
4980 case Iex_RdTmp:
4981 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
4982 case Iex_Mux0X:
4983 return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
4984 && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
4985 && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX );
4986 case Iex_Qop:
4987 case Iex_Triop:
4988 case Iex_CCall:
4989 /* be lazy. Could define equality for these, but they never
4990 appear to be used. */
4991 return False;
4992 case Iex_Get:
4993 case Iex_GetI:
4994 case Iex_Load:
4995 /* be conservative - these may not give the same value each
4996 time */
4997 return False;
4998 case Iex_Binder:
4999 /* should never see this */
5000 /* fallthrough */
5001 default:
5002 VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
5003 ppIRExpr(e1);
5004 VG_(tool_panic)("memcheck:sameIRValue");
5005 return False;
5006 }
5007}
5008
5009/* See if 'pairs' already has an entry for (entry, guard). Return
5010 True if so. If not, add an entry. */
5011
5012static
5013Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
5014{
5015 Pair p;
5016 Pair* pp;
5017 Int i, n = VG_(sizeXA)( pairs );
5018 for (i = 0; i < n; i++) {
5019 pp = VG_(indexXA)( pairs, i );
5020 if (pp->entry == entry && sameIRValue(pp->guard, guard))
5021 return True;
5022 }
5023 p.guard = guard;
5024 p.entry = entry;
5025 VG_(addToXA)( pairs, &p );
5026 return False;
5027}
5028
5029static Bool is_helperc_value_checkN_fail ( HChar* name )
5030{
5031 return
sewardj7cf4e6b2008-05-01 20:24:26 +00005032 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
5033 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
5034 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
5035 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
5036 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
5037 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
5038 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
5039 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
sewardj81651dc2007-08-28 06:05:20 +00005040}
5041
5042IRSB* MC_(final_tidy) ( IRSB* sb_in )
5043{
5044 Int i;
5045 IRStmt* st;
5046 IRDirty* di;
5047 IRExpr* guard;
5048 IRCallee* cee;
5049 Bool alreadyPresent;
sewardj9c606bd2008-09-18 18:12:50 +00005050 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
5051 VG_(free), sizeof(Pair) );
sewardj81651dc2007-08-28 06:05:20 +00005052 /* Scan forwards through the statements. Each time a call to one
5053 of the relevant helpers is seen, check if we have made a
5054 previous call to the same helper using the same guard
5055 expression, and if so, delete the call. */
5056 for (i = 0; i < sb_in->stmts_used; i++) {
5057 st = sb_in->stmts[i];
5058 tl_assert(st);
5059 if (st->tag != Ist_Dirty)
5060 continue;
5061 di = st->Ist.Dirty.details;
5062 guard = di->guard;
5063 if (!guard)
5064 continue;
5065 if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
5066 cee = di->cee;
5067 if (!is_helperc_value_checkN_fail( cee->name ))
5068 continue;
5069 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
5070 guard 'guard'. Check if we have already seen a call to this
5071 function with the same guard. If so, delete it. If not,
5072 add it to the set of calls we do know about. */
5073 alreadyPresent = check_or_add( pairs, guard, cee->addr );
5074 if (alreadyPresent) {
5075 sb_in->stmts[i] = IRStmt_NoOp();
5076 if (0) VG_(printf)("XX\n");
5077 }
5078 }
5079 VG_(deleteXA)( pairs );
5080 return sb_in;
5081}
5082
5083
sewardj7cf4e6b2008-05-01 20:24:26 +00005084/*------------------------------------------------------------*/
5085/*--- Origin tracking stuff ---*/
5086/*------------------------------------------------------------*/
5087
sewardj1c0ce7a2009-07-01 08:10:49 +00005088/* Almost identical to findShadowTmpV. */
sewardj7cf4e6b2008-05-01 20:24:26 +00005089static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
5090{
sewardj1c0ce7a2009-07-01 08:10:49 +00005091 TempMapEnt* ent;
5092 /* VG_(indexXA) range-checks 'orig', hence no need to check
5093 here. */
5094 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5095 tl_assert(ent->kind == Orig);
5096 if (ent->shadowB == IRTemp_INVALID) {
5097 IRTemp tmpB
5098 = newTemp( mce, Ity_I32, BSh );
5099 /* newTemp may cause mce->tmpMap to resize, hence previous results
5100 from VG_(indexXA) are invalid. */
5101 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5102 tl_assert(ent->kind == Orig);
5103 tl_assert(ent->shadowB == IRTemp_INVALID);
5104 ent->shadowB = tmpB;
sewardj7cf4e6b2008-05-01 20:24:26 +00005105 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005106 return ent->shadowB;
sewardj7cf4e6b2008-05-01 20:24:26 +00005107}
5108
5109static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
5110{
5111 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
5112}
5113
5114static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5115 IRAtom* baseaddr, Int offset )
5116{
5117 void* hFun;
5118 HChar* hName;
5119 IRTemp bTmp;
5120 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00005121 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00005122 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5123 IRAtom* ea = baseaddr;
5124 if (offset != 0) {
5125 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5126 : mkU64( (Long)(Int)offset );
5127 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5128 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005129 bTmp = newTemp(mce, mce->hWordTy, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00005130
5131 switch (szB) {
5132 case 1: hFun = (void*)&MC_(helperc_b_load1);
5133 hName = "MC_(helperc_b_load1)";
5134 break;
5135 case 2: hFun = (void*)&MC_(helperc_b_load2);
5136 hName = "MC_(helperc_b_load2)";
5137 break;
5138 case 4: hFun = (void*)&MC_(helperc_b_load4);
5139 hName = "MC_(helperc_b_load4)";
5140 break;
5141 case 8: hFun = (void*)&MC_(helperc_b_load8);
5142 hName = "MC_(helperc_b_load8)";
5143 break;
5144 case 16: hFun = (void*)&MC_(helperc_b_load16);
5145 hName = "MC_(helperc_b_load16)";
5146 break;
5147 default:
5148 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
5149 tl_assert(0);
5150 }
5151 di = unsafeIRDirty_1_N(
5152 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
5153 mkIRExprVec_1( ea )
5154 );
5155 /* no need to mess with any annotations. This call accesses
5156 neither guest state nor guest memory. */
5157 stmt( 'B', mce, IRStmt_Dirty(di) );
5158 if (mce->hWordTy == Ity_I64) {
5159 /* 64-bit host */
sewardj1c0ce7a2009-07-01 08:10:49 +00005160 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00005161 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
5162 return mkexpr(bTmp32);
5163 } else {
5164 /* 32-bit host */
5165 return mkexpr(bTmp);
5166 }
5167}
sewardj1c0ce7a2009-07-01 08:10:49 +00005168
5169/* Generate a shadow store. guard :: Ity_I1 controls whether the
5170 store really happens; NULL means it unconditionally does. */
sewardj7cf4e6b2008-05-01 20:24:26 +00005171static void gen_store_b ( MCEnv* mce, Int szB,
sewardj1c0ce7a2009-07-01 08:10:49 +00005172 IRAtom* baseaddr, Int offset, IRAtom* dataB,
5173 IRAtom* guard )
sewardj7cf4e6b2008-05-01 20:24:26 +00005174{
5175 void* hFun;
5176 HChar* hName;
5177 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00005178 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00005179 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5180 IRAtom* ea = baseaddr;
sewardj1c0ce7a2009-07-01 08:10:49 +00005181 if (guard) {
5182 tl_assert(isOriginalAtom(mce, guard));
5183 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
5184 }
sewardj7cf4e6b2008-05-01 20:24:26 +00005185 if (offset != 0) {
5186 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5187 : mkU64( (Long)(Int)offset );
5188 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5189 }
5190 if (mce->hWordTy == Ity_I64)
5191 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
5192
5193 switch (szB) {
5194 case 1: hFun = (void*)&MC_(helperc_b_store1);
5195 hName = "MC_(helperc_b_store1)";
5196 break;
5197 case 2: hFun = (void*)&MC_(helperc_b_store2);
5198 hName = "MC_(helperc_b_store2)";
5199 break;
5200 case 4: hFun = (void*)&MC_(helperc_b_store4);
5201 hName = "MC_(helperc_b_store4)";
5202 break;
5203 case 8: hFun = (void*)&MC_(helperc_b_store8);
5204 hName = "MC_(helperc_b_store8)";
5205 break;
5206 case 16: hFun = (void*)&MC_(helperc_b_store16);
5207 hName = "MC_(helperc_b_store16)";
5208 break;
5209 default:
5210 tl_assert(0);
5211 }
5212 di = unsafeIRDirty_0_N( 2/*regparms*/,
5213 hName, VG_(fnptr_to_fnentry)( hFun ),
5214 mkIRExprVec_2( ea, dataB )
5215 );
5216 /* no need to mess with any annotations. This call accesses
5217 neither guest state nor guest memory. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005218 if (guard) di->guard = guard;
sewardj7cf4e6b2008-05-01 20:24:26 +00005219 stmt( 'B', mce, IRStmt_Dirty(di) );
5220}
5221
5222static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005223 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00005224 if (eTy == Ity_I64)
5225 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
5226 if (eTy == Ity_I32)
5227 return e;
5228 tl_assert(0);
5229}
5230
5231static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005232 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00005233 tl_assert(eTy == Ity_I32);
5234 if (dstTy == Ity_I64)
5235 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
5236 tl_assert(0);
5237}
5238
sewardjdb5907d2009-11-26 17:20:21 +00005239
sewardj7cf4e6b2008-05-01 20:24:26 +00005240static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
5241{
5242 tl_assert(MC_(clo_mc_level) == 3);
5243
5244 switch (e->tag) {
5245
5246 case Iex_GetI: {
5247 IRRegArray* descr_b;
5248 IRAtom *t1, *t2, *t3, *t4;
5249 IRRegArray* descr = e->Iex.GetI.descr;
5250 IRType equivIntTy
5251 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
5252 /* If this array is unshadowable for whatever reason, use the
5253 usual approximation. */
5254 if (equivIntTy == Ity_INVALID)
5255 return mkU32(0);
5256 tl_assert(sizeofIRType(equivIntTy) >= 4);
5257 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
5258 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
5259 equivIntTy, descr->nElems );
5260 /* Do a shadow indexed get of the same size, giving t1. Take
5261 the bottom 32 bits of it, giving t2. Compute into t3 the
5262 origin for the index (almost certainly zero, but there's
5263 no harm in being completely general here, since iropt will
5264 remove any useless code), and fold it in, giving a final
5265 value t4. */
5266 t1 = assignNew( 'B', mce, equivIntTy,
5267 IRExpr_GetI( descr_b, e->Iex.GetI.ix,
5268 e->Iex.GetI.bias ));
5269 t2 = narrowTo32( mce, t1 );
5270 t3 = schemeE( mce, e->Iex.GetI.ix );
5271 t4 = gen_maxU32( mce, t2, t3 );
5272 return t4;
5273 }
5274 case Iex_CCall: {
5275 Int i;
5276 IRAtom* here;
5277 IRExpr** args = e->Iex.CCall.args;
5278 IRAtom* curr = mkU32(0);
5279 for (i = 0; args[i]; i++) {
5280 tl_assert(i < 32);
5281 tl_assert(isOriginalAtom(mce, args[i]));
5282 /* Only take notice of this arg if the callee's
5283 mc-exclusion mask does not say it is to be excluded. */
5284 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
5285 /* the arg is to be excluded from definedness checking.
5286 Do nothing. */
5287 if (0) VG_(printf)("excluding %s(%d)\n",
5288 e->Iex.CCall.cee->name, i);
5289 } else {
5290 /* calculate the arg's definedness, and pessimistically
5291 merge it in. */
5292 here = schemeE( mce, args[i] );
5293 curr = gen_maxU32( mce, curr, here );
5294 }
5295 }
5296 return curr;
5297 }
5298 case Iex_Load: {
5299 Int dszB;
5300 dszB = sizeofIRType(e->Iex.Load.ty);
5301 /* assert that the B value for the address is already
5302 available (somewhere) */
5303 tl_assert(isIRAtom(e->Iex.Load.addr));
5304 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
5305 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
5306 }
5307 case Iex_Mux0X: {
5308 IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond );
5309 IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 );
5310 IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX );
5311 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
5312 }
5313 case Iex_Qop: {
5314 IRAtom* b1 = schemeE( mce, e->Iex.Qop.arg1 );
5315 IRAtom* b2 = schemeE( mce, e->Iex.Qop.arg2 );
5316 IRAtom* b3 = schemeE( mce, e->Iex.Qop.arg3 );
5317 IRAtom* b4 = schemeE( mce, e->Iex.Qop.arg4 );
5318 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
5319 gen_maxU32( mce, b3, b4 ) );
5320 }
5321 case Iex_Triop: {
5322 IRAtom* b1 = schemeE( mce, e->Iex.Triop.arg1 );
5323 IRAtom* b2 = schemeE( mce, e->Iex.Triop.arg2 );
5324 IRAtom* b3 = schemeE( mce, e->Iex.Triop.arg3 );
5325 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
5326 }
5327 case Iex_Binop: {
sewardjafed4c52009-07-12 13:00:17 +00005328 switch (e->Iex.Binop.op) {
5329 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
5330 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
5331 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
5332 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
5333 /* Just say these all produce a defined result,
5334 regardless of their arguments. See
5335 COMMENT_ON_CasCmpEQ in this file. */
5336 return mkU32(0);
5337 default: {
5338 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
5339 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
5340 return gen_maxU32( mce, b1, b2 );
5341 }
5342 }
5343 tl_assert(0);
5344 /*NOTREACHED*/
sewardj7cf4e6b2008-05-01 20:24:26 +00005345 }
5346 case Iex_Unop: {
5347 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
5348 return b1;
5349 }
5350 case Iex_Const:
5351 return mkU32(0);
5352 case Iex_RdTmp:
5353 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
5354 case Iex_Get: {
5355 Int b_offset = MC_(get_otrack_shadow_offset)(
5356 e->Iex.Get.offset,
5357 sizeofIRType(e->Iex.Get.ty)
5358 );
5359 tl_assert(b_offset >= -1
5360 && b_offset <= mce->layout->total_sizeB -4);
5361 if (b_offset >= 0) {
5362 /* FIXME: this isn't an atom! */
5363 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
5364 Ity_I32 );
5365 }
5366 return mkU32(0);
5367 }
5368 default:
5369 VG_(printf)("mc_translate.c: schemeE: unhandled: ");
5370 ppIRExpr(e);
5371 VG_(tool_panic)("memcheck:schemeE");
5372 }
5373}
5374
sewardjdb5907d2009-11-26 17:20:21 +00005375
sewardj7cf4e6b2008-05-01 20:24:26 +00005376static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
5377{
5378 // This is a hacked version of do_shadow_Dirty
njn4c245e52009-03-15 23:25:38 +00005379 Int i, n, toDo, gSz, gOff;
sewardj7cf4e6b2008-05-01 20:24:26 +00005380 IRAtom *here, *curr;
5381 IRTemp dst;
sewardj7cf4e6b2008-05-01 20:24:26 +00005382
5383 /* First check the guard. */
5384 curr = schemeE( mce, d->guard );
5385
5386 /* Now round up all inputs and maxU32 over them. */
5387
5388 /* Inputs: unmasked args */
5389 for (i = 0; d->args[i]; i++) {
5390 if (d->cee->mcx_mask & (1<<i)) {
5391 /* ignore this arg */
5392 } else {
5393 here = schemeE( mce, d->args[i] );
5394 curr = gen_maxU32( mce, curr, here );
5395 }
5396 }
5397
5398 /* Inputs: guest state that we read. */
5399 for (i = 0; i < d->nFxState; i++) {
5400 tl_assert(d->fxState[i].fx != Ifx_None);
5401 if (d->fxState[i].fx == Ifx_Write)
5402 continue;
5403
5404 /* Ignore any sections marked as 'always defined'. */
5405 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
5406 if (0)
5407 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
5408 d->fxState[i].offset, d->fxState[i].size );
5409 continue;
5410 }
5411
5412 /* This state element is read or modified. So we need to
5413 consider it. If larger than 4 bytes, deal with it in 4-byte
5414 chunks. */
5415 gSz = d->fxState[i].size;
5416 gOff = d->fxState[i].offset;
5417 tl_assert(gSz > 0);
5418 while (True) {
5419 Int b_offset;
5420 if (gSz == 0) break;
5421 n = gSz <= 4 ? gSz : 4;
5422 /* update 'curr' with maxU32 of the state slice
5423 gOff .. gOff+n-1 */
5424 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
5425 if (b_offset != -1) {
5426 here = assignNew( 'B',mce,
5427 Ity_I32,
5428 IRExpr_Get(b_offset + 2*mce->layout->total_sizeB,
5429 Ity_I32));
5430 curr = gen_maxU32( mce, curr, here );
5431 }
5432 gSz -= n;
5433 gOff += n;
5434 }
5435
5436 }
5437
5438 /* Inputs: memory */
5439
5440 if (d->mFx != Ifx_None) {
5441 /* Because we may do multiple shadow loads/stores from the same
5442 base address, it's best to do a single test of its
5443 definedness right now. Post-instrumentation optimisation
5444 should remove all but this test. */
5445 tl_assert(d->mAddr);
5446 here = schemeE( mce, d->mAddr );
5447 curr = gen_maxU32( mce, curr, here );
5448 }
5449
5450 /* Deal with memory inputs (reads or modifies) */
5451 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00005452 toDo = d->mSize;
5453 /* chew off 32-bit chunks. We don't care about the endianness
5454 since it's all going to be condensed down to a single bit,
5455 but nevertheless choose an endianness which is hopefully
5456 native to the platform. */
5457 while (toDo >= 4) {
5458 here = gen_load_b( mce, 4, d->mAddr, d->mSize - toDo );
5459 curr = gen_maxU32( mce, curr, here );
5460 toDo -= 4;
5461 }
sewardj8c93fcc2008-10-30 13:08:31 +00005462 /* handle possible 16-bit excess */
5463 while (toDo >= 2) {
5464 here = gen_load_b( mce, 2, d->mAddr, d->mSize - toDo );
5465 curr = gen_maxU32( mce, curr, here );
5466 toDo -= 2;
5467 }
5468 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
sewardj7cf4e6b2008-05-01 20:24:26 +00005469 }
5470
5471 /* Whew! So curr is a 32-bit B-value which should give an origin
5472 of some use if any of the inputs to the helper are undefined.
5473 Now we need to re-distribute the results to all destinations. */
5474
5475 /* Outputs: the destination temporary, if there is one. */
5476 if (d->tmp != IRTemp_INVALID) {
5477 dst = findShadowTmpB(mce, d->tmp);
5478 assign( 'V', mce, dst, curr );
5479 }
5480
5481 /* Outputs: guest state that we write or modify. */
5482 for (i = 0; i < d->nFxState; i++) {
5483 tl_assert(d->fxState[i].fx != Ifx_None);
5484 if (d->fxState[i].fx == Ifx_Read)
5485 continue;
5486
5487 /* Ignore any sections marked as 'always defined'. */
5488 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
5489 continue;
5490
5491 /* This state element is written or modified. So we need to
5492 consider it. If larger than 4 bytes, deal with it in 4-byte
5493 chunks. */
5494 gSz = d->fxState[i].size;
5495 gOff = d->fxState[i].offset;
5496 tl_assert(gSz > 0);
5497 while (True) {
5498 Int b_offset;
5499 if (gSz == 0) break;
5500 n = gSz <= 4 ? gSz : 4;
5501 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
5502 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
5503 if (b_offset != -1) {
5504 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
5505 curr ));
5506 }
5507 gSz -= n;
5508 gOff += n;
5509 }
5510 }
5511
5512 /* Outputs: memory that we write or modify. Same comments about
5513 endianness as above apply. */
5514 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00005515 toDo = d->mSize;
5516 /* chew off 32-bit chunks */
5517 while (toDo >= 4) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005518 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
5519 NULL/*guard*/ );
sewardj7cf4e6b2008-05-01 20:24:26 +00005520 toDo -= 4;
5521 }
sewardj8c93fcc2008-10-30 13:08:31 +00005522 /* handle possible 16-bit excess */
5523 while (toDo >= 2) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005524 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
5525 NULL/*guard*/ );
sewardj8c93fcc2008-10-30 13:08:31 +00005526 toDo -= 2;
5527 }
5528 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
sewardj7cf4e6b2008-05-01 20:24:26 +00005529 }
sewardj7cf4e6b2008-05-01 20:24:26 +00005530}
5531
sewardjdb5907d2009-11-26 17:20:21 +00005532
5533static void do_origins_Store ( MCEnv* mce,
5534 IREndness stEnd,
5535 IRExpr* stAddr,
5536 IRExpr* stData )
5537{
5538 Int dszB;
5539 IRAtom* dataB;
5540 /* assert that the B value for the address is already available
5541 (somewhere), since the call to schemeE will want to see it.
5542 XXXX how does this actually ensure that?? */
5543 tl_assert(isIRAtom(stAddr));
5544 tl_assert(isIRAtom(stData));
5545 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
5546 dataB = schemeE( mce, stData );
5547 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB,
5548 NULL/*guard*/ );
5549}
5550
5551
sewardj7cf4e6b2008-05-01 20:24:26 +00005552static void schemeS ( MCEnv* mce, IRStmt* st )
5553{
5554 tl_assert(MC_(clo_mc_level) == 3);
5555
5556 switch (st->tag) {
5557
5558 case Ist_AbiHint:
5559 /* The value-check instrumenter handles this - by arranging
5560 to pass the address of the next instruction to
5561 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
5562 happen for origin tracking w.r.t. AbiHints. So there is
5563 nothing to do here. */
5564 break;
5565
5566 case Ist_PutI: {
5567 IRRegArray* descr_b;
5568 IRAtom *t1, *t2, *t3, *t4;
5569 IRRegArray* descr = st->Ist.PutI.descr;
5570 IRType equivIntTy
5571 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
5572 /* If this array is unshadowable for whatever reason,
5573 generate no code. */
5574 if (equivIntTy == Ity_INVALID)
5575 break;
5576 tl_assert(sizeofIRType(equivIntTy) >= 4);
5577 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
5578 descr_b
5579 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
5580 equivIntTy, descr->nElems );
5581 /* Compute a value to Put - the conjoinment of the origin for
5582 the data to be Put-ted (obviously) and of the index value
5583 (not so obviously). */
5584 t1 = schemeE( mce, st->Ist.PutI.data );
5585 t2 = schemeE( mce, st->Ist.PutI.ix );
5586 t3 = gen_maxU32( mce, t1, t2 );
5587 t4 = zWidenFrom32( mce, equivIntTy, t3 );
5588 stmt( 'B', mce, IRStmt_PutI( descr_b, st->Ist.PutI.ix,
5589 st->Ist.PutI.bias, t4 ));
5590 break;
5591 }
sewardjdb5907d2009-11-26 17:20:21 +00005592
sewardj7cf4e6b2008-05-01 20:24:26 +00005593 case Ist_Dirty:
5594 do_origins_Dirty( mce, st->Ist.Dirty.details );
5595 break;
sewardjdb5907d2009-11-26 17:20:21 +00005596
5597 case Ist_Store:
5598 do_origins_Store( mce, st->Ist.Store.end,
5599 st->Ist.Store.addr,
5600 st->Ist.Store.data );
5601 break;
5602
5603 case Ist_LLSC: {
5604 /* In short: treat a load-linked like a normal load followed
5605 by an assignment of the loaded (shadow) data the result
5606 temporary. Treat a store-conditional like a normal store,
5607 and mark the result temporary as defined. */
5608 if (st->Ist.LLSC.storedata == NULL) {
5609 /* Load Linked */
5610 IRType resTy
5611 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
5612 IRExpr* vanillaLoad
5613 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
5614 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
5615 || resTy == Ity_I16 || resTy == Ity_I8);
5616 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
5617 schemeE(mce, vanillaLoad));
5618 } else {
5619 /* Store conditional */
5620 do_origins_Store( mce, st->Ist.LLSC.end,
5621 st->Ist.LLSC.addr,
5622 st->Ist.LLSC.storedata );
5623 /* For the rationale behind this, see comments at the
5624 place where the V-shadow for .result is constructed, in
5625 do_shadow_LLSC. In short, we regard .result as
5626 always-defined. */
5627 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
5628 mkU32(0) );
sewardj1c0ce7a2009-07-01 08:10:49 +00005629 }
sewardj7cf4e6b2008-05-01 20:24:26 +00005630 break;
5631 }
sewardjdb5907d2009-11-26 17:20:21 +00005632
sewardj7cf4e6b2008-05-01 20:24:26 +00005633 case Ist_Put: {
5634 Int b_offset
5635 = MC_(get_otrack_shadow_offset)(
5636 st->Ist.Put.offset,
sewardj1c0ce7a2009-07-01 08:10:49 +00005637 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
sewardj7cf4e6b2008-05-01 20:24:26 +00005638 );
5639 if (b_offset >= 0) {
5640 /* FIXME: this isn't an atom! */
5641 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
5642 schemeE( mce, st->Ist.Put.data )) );
5643 }
5644 break;
5645 }
sewardjdb5907d2009-11-26 17:20:21 +00005646
sewardj7cf4e6b2008-05-01 20:24:26 +00005647 case Ist_WrTmp:
5648 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
5649 schemeE(mce, st->Ist.WrTmp.data) );
5650 break;
sewardjdb5907d2009-11-26 17:20:21 +00005651
sewardj7cf4e6b2008-05-01 20:24:26 +00005652 case Ist_MBE:
5653 case Ist_NoOp:
5654 case Ist_Exit:
5655 case Ist_IMark:
5656 break;
sewardjdb5907d2009-11-26 17:20:21 +00005657
sewardj7cf4e6b2008-05-01 20:24:26 +00005658 default:
5659 VG_(printf)("mc_translate.c: schemeS: unhandled: ");
5660 ppIRStmt(st);
5661 VG_(tool_panic)("memcheck:schemeS");
5662 }
5663}
5664
5665
njn25e49d8e72002-09-23 09:36:25 +00005666/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00005667/*--- end mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005668/*--------------------------------------------------------------------*/