blob: 394bec29c2182eb242658832fb2d24b02f18a841 [file] [log] [blame]
nethercotebb1c9912004-01-04 16:43:23 +00001
njn25e49d8e72002-09-23 09:36:25 +00002/*--------------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003/*--- Instrument IR to perform memory checking operations. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00006
njn25e49d8e72002-09-23 09:36:25 +00007/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of MemCheck, a heavyweight Valgrind tool for
njnc9539842002-10-02 13:26:35 +00009 detecting memory errors.
njn25e49d8e72002-09-23 09:36:25 +000010
sewardj9eecbbb2010-05-03 21:37:12 +000011 Copyright (C) 2000-2010 Julian Seward
njn25e49d8e72002-09-23 09:36:25 +000012 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
njnc7561b92005-06-19 01:24:32 +000032#include "pub_tool_basics.h"
njn1d0825f2006-03-27 11:37:07 +000033#include "pub_tool_hashtable.h" // For mc_include.h
njn132bfcc2005-06-04 19:16:06 +000034#include "pub_tool_libcassert.h"
njn36a20fa2005-06-03 03:08:39 +000035#include "pub_tool_libcprint.h"
njnc7561b92005-06-19 01:24:32 +000036#include "pub_tool_tooliface.h"
sewardj53ee1fc2005-12-23 02:29:58 +000037#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
sewardj81651dc2007-08-28 06:05:20 +000038#include "pub_tool_xarray.h"
39#include "pub_tool_mallocfree.h"
40#include "pub_tool_libcbase.h"
njn25e49d8e72002-09-23 09:36:25 +000041
sewardj7cf4e6b2008-05-01 20:24:26 +000042#include "mc_include.h"
43
44
sewardj992dff92005-10-07 11:08:55 +000045/* This file implements the Memcheck instrumentation, and in
46 particular contains the core of its undefined value detection
47 machinery. For a comprehensive background of the terminology,
48 algorithms and rationale used herein, read:
49
50 Using Valgrind to detect undefined value errors with
51 bit-precision
52
53 Julian Seward and Nicholas Nethercote
54
55 2005 USENIX Annual Technical Conference (General Track),
56 Anaheim, CA, USA, April 10-15, 2005.
njn6665ea22007-05-24 23:14:41 +000057
58 ----
59
60 Here is as good a place as any to record exactly when V bits are and
61 should be checked, why, and what function is responsible.
62
63
64 Memcheck complains when an undefined value is used:
65
66 1. In the condition of a conditional branch. Because it could cause
67 incorrect control flow, and thus cause incorrect externally-visible
68 behaviour. [mc_translate.c:complainIfUndefined]
69
70 2. As an argument to a system call, or as the value that specifies
71 the system call number. Because it could cause an incorrect
72 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
73
74 3. As the address in a load or store. Because it could cause an
75 incorrect value to be used later, which could cause externally-visible
76 behaviour (eg. via incorrect control flow or an incorrect system call
77 argument) [complainIfUndefined]
78
79 4. As the target address of a branch. Because it could cause incorrect
80 control flow. [complainIfUndefined]
81
82 5. As an argument to setenv, unsetenv, or putenv. Because it could put
83 an incorrect value into the external environment.
84 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
85
86 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
87 [complainIfUndefined]
88
89 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
90 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
91 requested it. [in memcheck.h]
92
93
94 Memcheck also complains, but should not, when an undefined value is used:
95
96 8. As the shift value in certain SIMD shift operations (but not in the
97 standard integer shift operations). This inconsistency is due to
98 historical reasons.) [complainIfUndefined]
99
100
101 Memcheck does not complain, but should, when an undefined value is used:
102
103 9. As an input to a client request. Because the client request may
104 affect the visible behaviour -- see bug #144362 for an example
105 involving the malloc replacements in vg_replace_malloc.c and
106 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
107 isn't identified. That bug report also has some info on how to solve
108 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
109
110
111 In practice, 1 and 2 account for the vast majority of cases.
sewardj992dff92005-10-07 11:08:55 +0000112*/
113
sewardj95448072004-11-22 20:19:51 +0000114/*------------------------------------------------------------*/
115/*--- Forward decls ---*/
116/*------------------------------------------------------------*/
117
118struct _MCEnv;
119
sewardj7cf4e6b2008-05-01 20:24:26 +0000120static IRType shadowTypeV ( IRType ty );
sewardj95448072004-11-22 20:19:51 +0000121static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
sewardjafa617b2008-07-22 09:59:48 +0000122static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
sewardj95448072004-11-22 20:19:51 +0000123
124
125/*------------------------------------------------------------*/
126/*--- Memcheck running state, and tmp management. ---*/
127/*------------------------------------------------------------*/
128
sewardj1c0ce7a2009-07-01 08:10:49 +0000129/* Carries info about a particular tmp. The tmp's number is not
130 recorded, as this is implied by (equal to) its index in the tmpMap
131 in MCEnv. The tmp's type is also not recorded, as this is present
132 in MCEnv.sb->tyenv.
133
134 When .kind is Orig, .shadowV and .shadowB may give the identities
135 of the temps currently holding the associated definedness (shadowV)
136 and origin (shadowB) values, or these may be IRTemp_INVALID if code
137 to compute such values has not yet been emitted.
138
139 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
140 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
141 illogical for a shadow tmp itself to be shadowed.
142*/
143typedef
144 enum { Orig=1, VSh=2, BSh=3 }
145 TempKind;
146
147typedef
148 struct {
149 TempKind kind;
150 IRTemp shadowV;
151 IRTemp shadowB;
152 }
153 TempMapEnt;
154
155
sewardj95448072004-11-22 20:19:51 +0000156/* Carries around state during memcheck instrumentation. */
157typedef
158 struct _MCEnv {
sewardj0b9d74a2006-12-24 02:24:11 +0000159 /* MODIFIED: the superblock being constructed. IRStmts are
160 added. */
sewardj1c0ce7a2009-07-01 08:10:49 +0000161 IRSB* sb;
sewardj7cf4e6b2008-05-01 20:24:26 +0000162 Bool trace;
sewardj95448072004-11-22 20:19:51 +0000163
sewardj1c0ce7a2009-07-01 08:10:49 +0000164 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
165 current kind and possibly shadow temps for each temp in the
166 IRSB being constructed. Note that it does not contain the
167 type of each tmp. If you want to know the type, look at the
168 relevant entry in sb->tyenv. It follows that at all times
169 during the instrumentation process, the valid indices for
170 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
171 total number of Orig, V- and B- temps allocated so far.
172
173 The reason for this strange split (types in one place, all
174 other info in another) is that we need the types to be
175 attached to sb so as to make it possible to do
176 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
177 instrumentation process. */
178 XArray* /* of TempMapEnt */ tmpMap;
sewardj95448072004-11-22 20:19:51 +0000179
sewardjd5204dc2004-12-31 01:16:11 +0000180 /* MODIFIED: indicates whether "bogus" literals have so far been
181 found. Starts off False, and may change to True. */
182 Bool bogusLiterals;
183
sewardj95448072004-11-22 20:19:51 +0000184 /* READONLY: the guest layout. This indicates which parts of
185 the guest state should be regarded as 'always defined'. */
186 VexGuestLayout* layout;
sewardj634ba772006-10-15 12:47:37 +0000187
sewardj95448072004-11-22 20:19:51 +0000188 /* READONLY: the host word type. Needed for constructing
189 arguments of type 'HWord' to be passed to helper functions.
190 Ity_I32 or Ity_I64 only. */
191 IRType hWordTy;
192 }
193 MCEnv;
194
195/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
196 demand), as they are encountered. This is for two reasons.
197
198 (1) (less important reason): Many original tmps are unused due to
199 initial IR optimisation, and we do not want to spaces in tables
200 tracking them.
201
202 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
203 table indexed [0 .. n_types-1], which gives the current shadow for
204 each original tmp, or INVALID_IRTEMP if none is so far assigned.
205 It is necessary to support making multiple assignments to a shadow
206 -- specifically, after testing a shadow for definedness, it needs
207 to be made defined. But IR's SSA property disallows this.
208
209 (2) (more important reason): Therefore, when a shadow needs to get
210 a new value, a new temporary is created, the value is assigned to
211 that, and the tmpMap is updated to reflect the new binding.
212
213 A corollary is that if the tmpMap maps a given tmp to
sewardjf1962d32006-10-19 13:22:16 +0000214 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
sewardj95448072004-11-22 20:19:51 +0000215 there's a read-before-write error in the original tmps. The IR
216 sanity checker should catch all such anomalies, however.
njn25e49d8e72002-09-23 09:36:25 +0000217*/
sewardj95448072004-11-22 20:19:51 +0000218
sewardj1c0ce7a2009-07-01 08:10:49 +0000219/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
220 both the table in mce->sb and to our auxiliary mapping. Note that
221 newTemp may cause mce->tmpMap to resize, hence previous results
222 from VG_(indexXA)(mce->tmpMap) are invalidated. */
223static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
224{
225 Word newIx;
226 TempMapEnt ent;
227 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
228 ent.kind = kind;
229 ent.shadowV = IRTemp_INVALID;
230 ent.shadowB = IRTemp_INVALID;
231 newIx = VG_(addToXA)( mce->tmpMap, &ent );
232 tl_assert(newIx == (Word)tmp);
233 return tmp;
234}
235
236
sewardj95448072004-11-22 20:19:51 +0000237/* Find the tmp currently shadowing the given original tmp. If none
238 so far exists, allocate one. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000239static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000240{
sewardj1c0ce7a2009-07-01 08:10:49 +0000241 TempMapEnt* ent;
242 /* VG_(indexXA) range-checks 'orig', hence no need to check
243 here. */
244 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
245 tl_assert(ent->kind == Orig);
246 if (ent->shadowV == IRTemp_INVALID) {
247 IRTemp tmpV
248 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
249 /* newTemp may cause mce->tmpMap to resize, hence previous results
250 from VG_(indexXA) are invalid. */
251 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
252 tl_assert(ent->kind == Orig);
253 tl_assert(ent->shadowV == IRTemp_INVALID);
254 ent->shadowV = tmpV;
njn25e49d8e72002-09-23 09:36:25 +0000255 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000256 return ent->shadowV;
njn25e49d8e72002-09-23 09:36:25 +0000257}
258
sewardj95448072004-11-22 20:19:51 +0000259/* Allocate a new shadow for the given original tmp. This means any
260 previous shadow is abandoned. This is needed because it is
261 necessary to give a new value to a shadow once it has been tested
262 for undefinedness, but unfortunately IR's SSA property disallows
263 this. Instead we must abandon the old shadow, allocate a new one
sewardj1c0ce7a2009-07-01 08:10:49 +0000264 and use that instead.
265
266 This is the same as findShadowTmpV, except we don't bother to see
267 if a shadow temp already existed -- we simply allocate a new one
268 regardless. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000269static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000270{
sewardj1c0ce7a2009-07-01 08:10:49 +0000271 TempMapEnt* ent;
272 /* VG_(indexXA) range-checks 'orig', hence no need to check
273 here. */
274 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
275 tl_assert(ent->kind == Orig);
276 if (1) {
277 IRTemp tmpV
278 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
279 /* newTemp may cause mce->tmpMap to resize, hence previous results
280 from VG_(indexXA) are invalid. */
281 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
282 tl_assert(ent->kind == Orig);
283 ent->shadowV = tmpV;
284 }
sewardj95448072004-11-22 20:19:51 +0000285}
286
287
288/*------------------------------------------------------------*/
289/*--- IRAtoms -- a subset of IRExprs ---*/
290/*------------------------------------------------------------*/
291
292/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
sewardj710d6c22005-03-20 18:55:15 +0000293 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
sewardj95448072004-11-22 20:19:51 +0000294 input, most of this code deals in atoms. Usefully, a value atom
295 always has a V-value which is also an atom: constants are shadowed
296 by constants, and temps are shadowed by the corresponding shadow
297 temporary. */
298
299typedef IRExpr IRAtom;
300
301/* (used for sanity checks only): is this an atom which looks
302 like it's from original code? */
303static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
304{
305 if (a1->tag == Iex_Const)
306 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000307 if (a1->tag == Iex_RdTmp) {
308 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
309 return ent->kind == Orig;
310 }
sewardj95448072004-11-22 20:19:51 +0000311 return False;
312}
313
314/* (used for sanity checks only): is this an atom which looks
315 like it's from shadow code? */
316static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
317{
318 if (a1->tag == Iex_Const)
319 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000320 if (a1->tag == Iex_RdTmp) {
321 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
322 return ent->kind == VSh || ent->kind == BSh;
323 }
sewardj95448072004-11-22 20:19:51 +0000324 return False;
325}
326
327/* (used for sanity checks only): check that both args are atoms and
328 are identically-kinded. */
329static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
330{
sewardj0b9d74a2006-12-24 02:24:11 +0000331 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +0000332 return True;
sewardjbef552a2005-08-30 12:54:36 +0000333 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
sewardj95448072004-11-22 20:19:51 +0000334 return True;
335 return False;
336}
337
338
339/*------------------------------------------------------------*/
340/*--- Type management ---*/
341/*------------------------------------------------------------*/
342
343/* Shadow state is always accessed using integer types. This returns
344 an integer type with the same size (as per sizeofIRType) as the
345 given type. The only valid shadow types are Bit, I8, I16, I32,
sewardj3245c912004-12-10 14:58:26 +0000346 I64, V128. */
sewardj95448072004-11-22 20:19:51 +0000347
sewardj7cf4e6b2008-05-01 20:24:26 +0000348static IRType shadowTypeV ( IRType ty )
sewardj95448072004-11-22 20:19:51 +0000349{
350 switch (ty) {
351 case Ity_I1:
352 case Ity_I8:
353 case Ity_I16:
354 case Ity_I32:
sewardj6cf40ff2005-04-20 22:31:26 +0000355 case Ity_I64:
356 case Ity_I128: return ty;
sewardj3245c912004-12-10 14:58:26 +0000357 case Ity_F32: return Ity_I32;
358 case Ity_F64: return Ity_I64;
359 case Ity_V128: return Ity_V128;
sewardj95448072004-11-22 20:19:51 +0000360 default: ppIRType(ty);
sewardj7cf4e6b2008-05-01 20:24:26 +0000361 VG_(tool_panic)("memcheck:shadowTypeV");
sewardj95448072004-11-22 20:19:51 +0000362 }
363}
364
365/* Produce a 'defined' value of the given shadow type. Should only be
366 supplied shadow types (Bit/I8/I16/I32/UI64). */
367static IRExpr* definedOfType ( IRType ty ) {
368 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +0000369 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
370 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
371 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
372 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
373 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
374 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
sewardjf1962d32006-10-19 13:22:16 +0000375 default: VG_(tool_panic)("memcheck:definedOfType");
njn25e49d8e72002-09-23 09:36:25 +0000376 }
377}
378
379
sewardj95448072004-11-22 20:19:51 +0000380/*------------------------------------------------------------*/
381/*--- Constructing IR fragments ---*/
382/*------------------------------------------------------------*/
383
sewardj95448072004-11-22 20:19:51 +0000384/* add stmt to a bb */
sewardj7cf4e6b2008-05-01 20:24:26 +0000385static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
386 if (mce->trace) {
387 VG_(printf)(" %c: ", cat);
388 ppIRStmt(st);
389 VG_(printf)("\n");
390 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000391 addStmtToIRSB(mce->sb, st);
sewardj7cf4e6b2008-05-01 20:24:26 +0000392}
393
394/* assign value to tmp */
395static inline
396void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
sewardj1c0ce7a2009-07-01 08:10:49 +0000397 stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
sewardj7cf4e6b2008-05-01 20:24:26 +0000398}
sewardj95448072004-11-22 20:19:51 +0000399
400/* build various kinds of expressions */
sewardj57f92b02010-08-22 11:54:14 +0000401#define triop(_op, _arg1, _arg2, _arg3) \
402 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
sewardj95448072004-11-22 20:19:51 +0000403#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
404#define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
405#define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
406#define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
407#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
408#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
sewardj170ee212004-12-10 18:57:51 +0000409#define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
sewardj0b9d74a2006-12-24 02:24:11 +0000410#define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
sewardj95448072004-11-22 20:19:51 +0000411
sewardj7cf4e6b2008-05-01 20:24:26 +0000412/* Bind the given expression to a new temporary, and return the
sewardj95448072004-11-22 20:19:51 +0000413 temporary. This effectively converts an arbitrary expression into
sewardj7cf4e6b2008-05-01 20:24:26 +0000414 an atom.
415
416 'ty' is the type of 'e' and hence the type that the new temporary
sewardj1c0ce7a2009-07-01 08:10:49 +0000417 needs to be. But passing it in is redundant, since we can deduce
418 the type merely by inspecting 'e'. So at least use that fact to
419 assert that the two types agree. */
420static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
421{
422 TempKind k;
423 IRTemp t;
424 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +0000425 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
sewardj1c0ce7a2009-07-01 08:10:49 +0000426 switch (cat) {
427 case 'V': k = VSh; break;
428 case 'B': k = BSh; break;
429 case 'C': k = Orig; break;
430 /* happens when we are making up new "orig"
431 expressions, for IRCAS handling */
432 default: tl_assert(0);
433 }
434 t = newTemp(mce, ty, k);
sewardj7cf4e6b2008-05-01 20:24:26 +0000435 assign(cat, mce, t, e);
sewardj95448072004-11-22 20:19:51 +0000436 return mkexpr(t);
437}
438
439
440/*------------------------------------------------------------*/
441/*--- Constructing definedness primitive ops ---*/
442/*------------------------------------------------------------*/
443
444/* --------- Defined-if-either-defined --------- */
445
446static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
447 tl_assert(isShadowAtom(mce,a1));
448 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000449 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000450}
451
452static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
453 tl_assert(isShadowAtom(mce,a1));
454 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000455 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000456}
457
458static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
459 tl_assert(isShadowAtom(mce,a1));
460 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000461 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000462}
463
sewardj7010f6e2004-12-10 13:35:22 +0000464static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
465 tl_assert(isShadowAtom(mce,a1));
466 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000467 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
sewardj7010f6e2004-12-10 13:35:22 +0000468}
469
sewardj20d38f22005-02-07 23:50:18 +0000470static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj170ee212004-12-10 18:57:51 +0000471 tl_assert(isShadowAtom(mce,a1));
472 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000473 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
sewardj170ee212004-12-10 18:57:51 +0000474}
475
sewardj95448072004-11-22 20:19:51 +0000476/* --------- Undefined-if-either-undefined --------- */
477
478static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
479 tl_assert(isShadowAtom(mce,a1));
480 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000481 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000482}
483
484static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
485 tl_assert(isShadowAtom(mce,a1));
486 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000487 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000488}
489
490static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
491 tl_assert(isShadowAtom(mce,a1));
492 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000493 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000494}
495
496static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
497 tl_assert(isShadowAtom(mce,a1));
498 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000499 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000500}
501
sewardj20d38f22005-02-07 23:50:18 +0000502static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj3245c912004-12-10 14:58:26 +0000503 tl_assert(isShadowAtom(mce,a1));
504 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000505 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
sewardj3245c912004-12-10 14:58:26 +0000506}
507
sewardje50a1b12004-12-17 01:24:54 +0000508static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
sewardj95448072004-11-22 20:19:51 +0000509 switch (vty) {
sewardje50a1b12004-12-17 01:24:54 +0000510 case Ity_I8: return mkUifU8(mce, a1, a2);
sewardja1d93302004-12-12 16:45:06 +0000511 case Ity_I16: return mkUifU16(mce, a1, a2);
512 case Ity_I32: return mkUifU32(mce, a1, a2);
513 case Ity_I64: return mkUifU64(mce, a1, a2);
sewardj20d38f22005-02-07 23:50:18 +0000514 case Ity_V128: return mkUifUV128(mce, a1, a2);
sewardj95448072004-11-22 20:19:51 +0000515 default:
516 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
517 VG_(tool_panic)("memcheck:mkUifU");
njn25e49d8e72002-09-23 09:36:25 +0000518 }
519}
520
sewardj95448072004-11-22 20:19:51 +0000521/* --------- The Left-family of operations. --------- */
njn25e49d8e72002-09-23 09:36:25 +0000522
sewardj95448072004-11-22 20:19:51 +0000523static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
524 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000525 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
sewardj95448072004-11-22 20:19:51 +0000526}
527
528static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
529 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000530 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
sewardj95448072004-11-22 20:19:51 +0000531}
532
533static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
534 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000535 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
sewardj95448072004-11-22 20:19:51 +0000536}
537
sewardj681be302005-01-15 20:43:58 +0000538static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
539 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000540 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
sewardj681be302005-01-15 20:43:58 +0000541}
542
sewardj95448072004-11-22 20:19:51 +0000543/* --------- 'Improvement' functions for AND/OR. --------- */
544
545/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
546 defined (0); all other -> undefined (1).
547*/
548static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
njn25e49d8e72002-09-23 09:36:25 +0000549{
sewardj95448072004-11-22 20:19:51 +0000550 tl_assert(isOriginalAtom(mce, data));
551 tl_assert(isShadowAtom(mce, vbits));
552 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000553 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000554}
njn25e49d8e72002-09-23 09:36:25 +0000555
sewardj95448072004-11-22 20:19:51 +0000556static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
557{
558 tl_assert(isOriginalAtom(mce, data));
559 tl_assert(isShadowAtom(mce, vbits));
560 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000561 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000562}
njn25e49d8e72002-09-23 09:36:25 +0000563
sewardj95448072004-11-22 20:19:51 +0000564static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
565{
566 tl_assert(isOriginalAtom(mce, data));
567 tl_assert(isShadowAtom(mce, vbits));
568 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000569 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000570}
njn25e49d8e72002-09-23 09:36:25 +0000571
sewardj7010f6e2004-12-10 13:35:22 +0000572static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
573{
574 tl_assert(isOriginalAtom(mce, data));
575 tl_assert(isShadowAtom(mce, vbits));
576 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000577 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
sewardj7010f6e2004-12-10 13:35:22 +0000578}
579
sewardj20d38f22005-02-07 23:50:18 +0000580static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000581{
582 tl_assert(isOriginalAtom(mce, data));
583 tl_assert(isShadowAtom(mce, vbits));
584 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000585 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
sewardj170ee212004-12-10 18:57:51 +0000586}
587
sewardj95448072004-11-22 20:19:51 +0000588/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
589 defined (0); all other -> undefined (1).
590*/
591static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
592{
593 tl_assert(isOriginalAtom(mce, data));
594 tl_assert(isShadowAtom(mce, vbits));
595 tl_assert(sameKindedAtoms(data, vbits));
596 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000597 'V', mce, Ity_I8,
sewardj95448072004-11-22 20:19:51 +0000598 binop(Iop_Or8,
sewardj7cf4e6b2008-05-01 20:24:26 +0000599 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
sewardj95448072004-11-22 20:19:51 +0000600 vbits) );
601}
njn25e49d8e72002-09-23 09:36:25 +0000602
sewardj95448072004-11-22 20:19:51 +0000603static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
604{
605 tl_assert(isOriginalAtom(mce, data));
606 tl_assert(isShadowAtom(mce, vbits));
607 tl_assert(sameKindedAtoms(data, vbits));
608 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000609 'V', mce, Ity_I16,
sewardj95448072004-11-22 20:19:51 +0000610 binop(Iop_Or16,
sewardj7cf4e6b2008-05-01 20:24:26 +0000611 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
sewardj95448072004-11-22 20:19:51 +0000612 vbits) );
613}
njn25e49d8e72002-09-23 09:36:25 +0000614
sewardj95448072004-11-22 20:19:51 +0000615static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
616{
617 tl_assert(isOriginalAtom(mce, data));
618 tl_assert(isShadowAtom(mce, vbits));
619 tl_assert(sameKindedAtoms(data, vbits));
620 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000621 'V', mce, Ity_I32,
sewardj95448072004-11-22 20:19:51 +0000622 binop(Iop_Or32,
sewardj7cf4e6b2008-05-01 20:24:26 +0000623 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
sewardj95448072004-11-22 20:19:51 +0000624 vbits) );
625}
626
sewardj7010f6e2004-12-10 13:35:22 +0000627static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
628{
629 tl_assert(isOriginalAtom(mce, data));
630 tl_assert(isShadowAtom(mce, vbits));
631 tl_assert(sameKindedAtoms(data, vbits));
632 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000633 'V', mce, Ity_I64,
sewardj7010f6e2004-12-10 13:35:22 +0000634 binop(Iop_Or64,
sewardj7cf4e6b2008-05-01 20:24:26 +0000635 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
sewardj7010f6e2004-12-10 13:35:22 +0000636 vbits) );
637}
638
sewardj20d38f22005-02-07 23:50:18 +0000639static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000640{
641 tl_assert(isOriginalAtom(mce, data));
642 tl_assert(isShadowAtom(mce, vbits));
643 tl_assert(sameKindedAtoms(data, vbits));
644 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000645 'V', mce, Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +0000646 binop(Iop_OrV128,
sewardj7cf4e6b2008-05-01 20:24:26 +0000647 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
sewardj170ee212004-12-10 18:57:51 +0000648 vbits) );
649}
650
sewardj95448072004-11-22 20:19:51 +0000651/* --------- Pessimising casts. --------- */
652
653static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
654{
sewardj4cc684b2007-08-25 23:09:36 +0000655 IRType src_ty;
sewardj7cf97ee2004-11-28 14:25:01 +0000656 IRAtom* tmp1;
sewardj95448072004-11-22 20:19:51 +0000657 /* Note, dst_ty is a shadow type, not an original type. */
658 /* First of all, collapse vbits down to a single bit. */
659 tl_assert(isShadowAtom(mce,vbits));
sewardj1c0ce7a2009-07-01 08:10:49 +0000660 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
sewardj4cc684b2007-08-25 23:09:36 +0000661
662 /* Fast-track some common cases */
663 if (src_ty == Ity_I32 && dst_ty == Ity_I32)
sewardj7cf4e6b2008-05-01 20:24:26 +0000664 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000665
666 if (src_ty == Ity_I64 && dst_ty == Ity_I64)
sewardj7cf4e6b2008-05-01 20:24:26 +0000667 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000668
669 if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
sewardj7cf4e6b2008-05-01 20:24:26 +0000670 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
671 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
sewardj4cc684b2007-08-25 23:09:36 +0000672 }
673
674 /* Else do it the slow way .. */
675 tmp1 = NULL;
676 switch (src_ty) {
sewardj95448072004-11-22 20:19:51 +0000677 case Ity_I1:
678 tmp1 = vbits;
njn25e49d8e72002-09-23 09:36:25 +0000679 break;
sewardj95448072004-11-22 20:19:51 +0000680 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000681 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
sewardj95448072004-11-22 20:19:51 +0000682 break;
683 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000684 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
sewardj95448072004-11-22 20:19:51 +0000685 break;
686 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000687 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
sewardj95448072004-11-22 20:19:51 +0000688 break;
689 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000690 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
sewardj95448072004-11-22 20:19:51 +0000691 break;
sewardj69a13322005-04-23 01:14:51 +0000692 case Ity_I128: {
693 /* Gah. Chop it in half, OR the halves together, and compare
694 that with zero. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000695 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
696 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
697 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
698 tmp1 = assignNew('V', mce, Ity_I1,
sewardj37c31cc2005-04-26 23:49:24 +0000699 unop(Iop_CmpNEZ64, tmp4));
sewardj69a13322005-04-23 01:14:51 +0000700 break;
701 }
sewardj95448072004-11-22 20:19:51 +0000702 default:
sewardj4cc684b2007-08-25 23:09:36 +0000703 ppIRType(src_ty);
sewardj95448072004-11-22 20:19:51 +0000704 VG_(tool_panic)("mkPCastTo(1)");
705 }
706 tl_assert(tmp1);
707 /* Now widen up to the dst type. */
708 switch (dst_ty) {
709 case Ity_I1:
710 return tmp1;
711 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000712 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
sewardj95448072004-11-22 20:19:51 +0000713 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000714 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
sewardj95448072004-11-22 20:19:51 +0000715 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000716 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
sewardj95448072004-11-22 20:19:51 +0000717 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000718 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000719 case Ity_V128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000720 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
721 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000722 return tmp1;
sewardj69a13322005-04-23 01:14:51 +0000723 case Ity_I128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000724 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
725 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
sewardj69a13322005-04-23 01:14:51 +0000726 return tmp1;
sewardj95448072004-11-22 20:19:51 +0000727 default:
728 ppIRType(dst_ty);
729 VG_(tool_panic)("mkPCastTo(2)");
730 }
731}
732
sewardjd5204dc2004-12-31 01:16:11 +0000733/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
734/*
735 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
736 PCasting to Ity_U1. However, sometimes it is necessary to be more
737 accurate. The insight is that the result is defined if two
738 corresponding bits can be found, one from each argument, so that
739 both bits are defined but are different -- that makes EQ say "No"
740 and NE say "Yes". Hence, we compute an improvement term and DifD
741 it onto the "normal" (UifU) result.
742
743 The result is:
744
745 PCastTo<1> (
sewardje6f8af42005-07-06 18:48:59 +0000746 -- naive version
747 PCastTo<sz>( UifU<sz>(vxx, vyy) )
748
sewardjd5204dc2004-12-31 01:16:11 +0000749 `DifD<sz>`
sewardje6f8af42005-07-06 18:48:59 +0000750
751 -- improvement term
752 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
sewardjd5204dc2004-12-31 01:16:11 +0000753 )
sewardje6f8af42005-07-06 18:48:59 +0000754
sewardjd5204dc2004-12-31 01:16:11 +0000755 where
756 vec contains 0 (defined) bits where the corresponding arg bits
sewardje6f8af42005-07-06 18:48:59 +0000757 are defined but different, and 1 bits otherwise.
sewardjd5204dc2004-12-31 01:16:11 +0000758
sewardje6f8af42005-07-06 18:48:59 +0000759 vec = Or<sz>( vxx, // 0 iff bit defined
760 vyy, // 0 iff bit defined
761 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
762 )
763
764 If any bit of vec is 0, the result is defined and so the
765 improvement term should produce 0...0, else it should produce
766 1...1.
767
768 Hence require for the improvement term:
769
770 if vec == 1...1 then 1...1 else 0...0
771 ->
772 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
773
774 This was extensively re-analysed and checked on 6 July 05.
sewardjd5204dc2004-12-31 01:16:11 +0000775*/
776static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
777 IRType ty,
778 IRAtom* vxx, IRAtom* vyy,
779 IRAtom* xx, IRAtom* yy )
780{
sewardje6f8af42005-07-06 18:48:59 +0000781 IRAtom *naive, *vec, *improvement_term;
782 IRAtom *improved, *final_cast, *top;
783 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
sewardjd5204dc2004-12-31 01:16:11 +0000784
785 tl_assert(isShadowAtom(mce,vxx));
786 tl_assert(isShadowAtom(mce,vyy));
787 tl_assert(isOriginalAtom(mce,xx));
788 tl_assert(isOriginalAtom(mce,yy));
789 tl_assert(sameKindedAtoms(vxx,xx));
790 tl_assert(sameKindedAtoms(vyy,yy));
791
792 switch (ty) {
793 case Ity_I32:
sewardje6f8af42005-07-06 18:48:59 +0000794 opOR = Iop_Or32;
sewardjd5204dc2004-12-31 01:16:11 +0000795 opDIFD = Iop_And32;
796 opUIFU = Iop_Or32;
797 opNOT = Iop_Not32;
798 opXOR = Iop_Xor32;
799 opCMP = Iop_CmpEQ32;
800 top = mkU32(0xFFFFFFFF);
801 break;
tomcd986332005-04-26 07:44:48 +0000802 case Ity_I64:
sewardje6f8af42005-07-06 18:48:59 +0000803 opOR = Iop_Or64;
tomcd986332005-04-26 07:44:48 +0000804 opDIFD = Iop_And64;
805 opUIFU = Iop_Or64;
806 opNOT = Iop_Not64;
807 opXOR = Iop_Xor64;
808 opCMP = Iop_CmpEQ64;
sewardj37c31cc2005-04-26 23:49:24 +0000809 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
tomcd986332005-04-26 07:44:48 +0000810 break;
sewardjd5204dc2004-12-31 01:16:11 +0000811 default:
812 VG_(tool_panic)("expensiveCmpEQorNE");
813 }
814
815 naive
sewardj7cf4e6b2008-05-01 20:24:26 +0000816 = mkPCastTo(mce,ty,
817 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
sewardjd5204dc2004-12-31 01:16:11 +0000818
819 vec
820 = assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000821 'V', mce,ty,
sewardje6f8af42005-07-06 18:48:59 +0000822 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +0000823 assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
sewardjd5204dc2004-12-31 01:16:11 +0000824 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000825 'V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +0000826 unop( opNOT,
sewardj7cf4e6b2008-05-01 20:24:26 +0000827 assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
sewardjd5204dc2004-12-31 01:16:11 +0000828
sewardje6f8af42005-07-06 18:48:59 +0000829 improvement_term
sewardj7cf4e6b2008-05-01 20:24:26 +0000830 = mkPCastTo( mce,ty,
831 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
sewardjd5204dc2004-12-31 01:16:11 +0000832
833 improved
sewardj7cf4e6b2008-05-01 20:24:26 +0000834 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
sewardjd5204dc2004-12-31 01:16:11 +0000835
836 final_cast
837 = mkPCastTo( mce, Ity_I1, improved );
838
839 return final_cast;
840}
841
sewardj95448072004-11-22 20:19:51 +0000842
sewardj992dff92005-10-07 11:08:55 +0000843/* --------- Semi-accurate interpretation of CmpORD. --------- */
844
845/* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
846
847 CmpORD32S(x,y) = 1<<3 if x <s y
848 = 1<<2 if x >s y
849 = 1<<1 if x == y
850
851 and similarly the unsigned variant. The default interpretation is:
852
853 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
sewardj1bc82102005-12-23 00:16:24 +0000854 & (7<<1)
sewardj992dff92005-10-07 11:08:55 +0000855
856 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
857 are zero and therefore defined (viz, zero).
sewardja9e62a92005-10-07 12:13:21 +0000858
859 Also deal with a special case better:
860
861 CmpORD32S(x,0)
862
863 Here, bit 3 (LT) of the result is a copy of the top bit of x and
864 will be defined even if the rest of x isn't. In which case we do:
865
866 CmpORD32S#(x,x#,0,{impliedly 0}#)
sewardj1bc82102005-12-23 00:16:24 +0000867 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
868 | (x# >>u 31) << 3 -- LT# = x#[31]
sewardja9e62a92005-10-07 12:13:21 +0000869
sewardj1bc82102005-12-23 00:16:24 +0000870 Analogous handling for CmpORD64{S,U}.
sewardj992dff92005-10-07 11:08:55 +0000871*/
sewardja9e62a92005-10-07 12:13:21 +0000872static Bool isZeroU32 ( IRAtom* e )
873{
874 return
875 toBool( e->tag == Iex_Const
876 && e->Iex.Const.con->tag == Ico_U32
877 && e->Iex.Const.con->Ico.U32 == 0 );
878}
879
sewardj1bc82102005-12-23 00:16:24 +0000880static Bool isZeroU64 ( IRAtom* e )
sewardj992dff92005-10-07 11:08:55 +0000881{
sewardj1bc82102005-12-23 00:16:24 +0000882 return
883 toBool( e->tag == Iex_Const
884 && e->Iex.Const.con->tag == Ico_U64
885 && e->Iex.Const.con->Ico.U64 == 0 );
886}
887
888static IRAtom* doCmpORD ( MCEnv* mce,
889 IROp cmp_op,
890 IRAtom* xxhash, IRAtom* yyhash,
891 IRAtom* xx, IRAtom* yy )
892{
893 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
894 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
895 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
896 IROp opAND = m64 ? Iop_And64 : Iop_And32;
897 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
898 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
899 IRType ty = m64 ? Ity_I64 : Ity_I32;
900 Int width = m64 ? 64 : 32;
901
902 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
903
904 IRAtom* threeLeft1 = NULL;
905 IRAtom* sevenLeft1 = NULL;
906
sewardj992dff92005-10-07 11:08:55 +0000907 tl_assert(isShadowAtom(mce,xxhash));
908 tl_assert(isShadowAtom(mce,yyhash));
909 tl_assert(isOriginalAtom(mce,xx));
910 tl_assert(isOriginalAtom(mce,yy));
911 tl_assert(sameKindedAtoms(xxhash,xx));
912 tl_assert(sameKindedAtoms(yyhash,yy));
sewardj1bc82102005-12-23 00:16:24 +0000913 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
914 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
sewardj992dff92005-10-07 11:08:55 +0000915
sewardja9e62a92005-10-07 12:13:21 +0000916 if (0) {
917 ppIROp(cmp_op); VG_(printf)(" ");
918 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
919 }
920
sewardj1bc82102005-12-23 00:16:24 +0000921 if (syned && isZero(yy)) {
sewardja9e62a92005-10-07 12:13:21 +0000922 /* fancy interpretation */
923 /* if yy is zero, then it must be fully defined (zero#). */
sewardj1bc82102005-12-23 00:16:24 +0000924 tl_assert(isZero(yyhash));
925 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
sewardja9e62a92005-10-07 12:13:21 +0000926 return
927 binop(
sewardj1bc82102005-12-23 00:16:24 +0000928 opOR,
sewardja9e62a92005-10-07 12:13:21 +0000929 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000930 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +0000931 binop(
sewardj1bc82102005-12-23 00:16:24 +0000932 opAND,
933 mkPCastTo(mce,ty, xxhash),
934 threeLeft1
sewardja9e62a92005-10-07 12:13:21 +0000935 )),
936 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000937 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +0000938 binop(
sewardj1bc82102005-12-23 00:16:24 +0000939 opSHL,
sewardja9e62a92005-10-07 12:13:21 +0000940 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000941 'V', mce,ty,
sewardj1bc82102005-12-23 00:16:24 +0000942 binop(opSHR, xxhash, mkU8(width-1))),
sewardja9e62a92005-10-07 12:13:21 +0000943 mkU8(3)
944 ))
945 );
946 } else {
947 /* standard interpretation */
sewardj1bc82102005-12-23 00:16:24 +0000948 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
sewardja9e62a92005-10-07 12:13:21 +0000949 return
950 binop(
sewardj1bc82102005-12-23 00:16:24 +0000951 opAND,
952 mkPCastTo( mce,ty,
953 mkUifU(mce,ty, xxhash,yyhash)),
954 sevenLeft1
sewardja9e62a92005-10-07 12:13:21 +0000955 );
956 }
sewardj992dff92005-10-07 11:08:55 +0000957}
958
959
sewardj95448072004-11-22 20:19:51 +0000960/*------------------------------------------------------------*/
961/*--- Emit a test and complaint if something is undefined. ---*/
962/*------------------------------------------------------------*/
963
sewardj7cf4e6b2008-05-01 20:24:26 +0000964static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
965
966
sewardj95448072004-11-22 20:19:51 +0000967/* Set the annotations on a dirty helper to indicate that the stack
968 pointer and instruction pointers might be read. This is the
969 behaviour of all 'emit-a-complaint' style functions we might
970 call. */
971
972static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
973 di->nFxState = 2;
974 di->fxState[0].fx = Ifx_Read;
975 di->fxState[0].offset = mce->layout->offset_SP;
976 di->fxState[0].size = mce->layout->sizeof_SP;
977 di->fxState[1].fx = Ifx_Read;
978 di->fxState[1].offset = mce->layout->offset_IP;
979 di->fxState[1].size = mce->layout->sizeof_IP;
980}
981
982
983/* Check the supplied **original** atom for undefinedness, and emit a
984 complaint if so. Once that happens, mark it as defined. This is
985 possible because the atom is either a tmp or literal. If it's a
986 tmp, it will be shadowed by a tmp, and so we can set the shadow to
987 be defined. In fact as mentioned above, we will have to allocate a
988 new tmp to carry the new 'defined' shadow value, and update the
989 original->tmp mapping accordingly; we cannot simply assign a new
990 value to an existing shadow tmp as this breaks SSAness -- resulting
991 in the post-instrumentation sanity checker spluttering in disapproval.
992*/
993static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
994{
sewardj7cf97ee2004-11-28 14:25:01 +0000995 IRAtom* vatom;
996 IRType ty;
997 Int sz;
998 IRDirty* di;
999 IRAtom* cond;
sewardj7cf4e6b2008-05-01 20:24:26 +00001000 IRAtom* origin;
1001 void* fn;
1002 HChar* nm;
1003 IRExpr** args;
1004 Int nargs;
sewardj7cf97ee2004-11-28 14:25:01 +00001005
njn1d0825f2006-03-27 11:37:07 +00001006 // Don't do V bit tests if we're not reporting undefined value errors.
sewardj7cf4e6b2008-05-01 20:24:26 +00001007 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001008 return;
1009
sewardj95448072004-11-22 20:19:51 +00001010 /* Since the original expression is atomic, there's no duplicated
1011 work generated by making multiple V-expressions for it. So we
1012 don't really care about the possibility that someone else may
1013 also create a V-interpretion for it. */
1014 tl_assert(isOriginalAtom(mce, atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001015 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001016 tl_assert(isShadowAtom(mce, vatom));
1017 tl_assert(sameKindedAtoms(atom, vatom));
1018
sewardj1c0ce7a2009-07-01 08:10:49 +00001019 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001020
1021 /* sz is only used for constructing the error message */
sewardj7cf97ee2004-11-28 14:25:01 +00001022 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001023
sewardj7cf97ee2004-11-28 14:25:01 +00001024 cond = mkPCastTo( mce, Ity_I1, vatom );
sewardj95448072004-11-22 20:19:51 +00001025 /* cond will be 0 if all defined, and 1 if any not defined. */
1026
sewardj7cf4e6b2008-05-01 20:24:26 +00001027 /* Get the origin info for the value we are about to check. At
1028 least, if we are doing origin tracking. If not, use a dummy
1029 zero origin. */
1030 if (MC_(clo_mc_level) == 3) {
1031 origin = schemeE( mce, atom );
1032 if (mce->hWordTy == Ity_I64) {
1033 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1034 }
1035 } else {
1036 origin = NULL;
1037 }
1038
1039 fn = NULL;
1040 nm = NULL;
1041 args = NULL;
1042 nargs = -1;
1043
sewardj95448072004-11-22 20:19:51 +00001044 switch (sz) {
1045 case 0:
sewardj7cf4e6b2008-05-01 20:24:26 +00001046 if (origin) {
1047 fn = &MC_(helperc_value_check0_fail_w_o);
1048 nm = "MC_(helperc_value_check0_fail_w_o)";
1049 args = mkIRExprVec_1(origin);
1050 nargs = 1;
1051 } else {
1052 fn = &MC_(helperc_value_check0_fail_no_o);
1053 nm = "MC_(helperc_value_check0_fail_no_o)";
1054 args = mkIRExprVec_0();
1055 nargs = 0;
1056 }
sewardj95448072004-11-22 20:19:51 +00001057 break;
1058 case 1:
sewardj7cf4e6b2008-05-01 20:24:26 +00001059 if (origin) {
1060 fn = &MC_(helperc_value_check1_fail_w_o);
1061 nm = "MC_(helperc_value_check1_fail_w_o)";
1062 args = mkIRExprVec_1(origin);
1063 nargs = 1;
1064 } else {
1065 fn = &MC_(helperc_value_check1_fail_no_o);
1066 nm = "MC_(helperc_value_check1_fail_no_o)";
1067 args = mkIRExprVec_0();
1068 nargs = 0;
1069 }
sewardj95448072004-11-22 20:19:51 +00001070 break;
1071 case 4:
sewardj7cf4e6b2008-05-01 20:24:26 +00001072 if (origin) {
1073 fn = &MC_(helperc_value_check4_fail_w_o);
1074 nm = "MC_(helperc_value_check4_fail_w_o)";
1075 args = mkIRExprVec_1(origin);
1076 nargs = 1;
1077 } else {
1078 fn = &MC_(helperc_value_check4_fail_no_o);
1079 nm = "MC_(helperc_value_check4_fail_no_o)";
1080 args = mkIRExprVec_0();
1081 nargs = 0;
1082 }
sewardj95448072004-11-22 20:19:51 +00001083 break;
sewardj11bcc4e2005-04-23 22:38:38 +00001084 case 8:
sewardj7cf4e6b2008-05-01 20:24:26 +00001085 if (origin) {
1086 fn = &MC_(helperc_value_check8_fail_w_o);
1087 nm = "MC_(helperc_value_check8_fail_w_o)";
1088 args = mkIRExprVec_1(origin);
1089 nargs = 1;
1090 } else {
1091 fn = &MC_(helperc_value_check8_fail_no_o);
1092 nm = "MC_(helperc_value_check8_fail_no_o)";
1093 args = mkIRExprVec_0();
1094 nargs = 0;
1095 }
sewardj11bcc4e2005-04-23 22:38:38 +00001096 break;
njn4c245e52009-03-15 23:25:38 +00001097 case 2:
1098 case 16:
sewardj7cf4e6b2008-05-01 20:24:26 +00001099 if (origin) {
1100 fn = &MC_(helperc_value_checkN_fail_w_o);
1101 nm = "MC_(helperc_value_checkN_fail_w_o)";
1102 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1103 nargs = 2;
1104 } else {
1105 fn = &MC_(helperc_value_checkN_fail_no_o);
1106 nm = "MC_(helperc_value_checkN_fail_no_o)";
1107 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1108 nargs = 1;
1109 }
sewardj95448072004-11-22 20:19:51 +00001110 break;
njn4c245e52009-03-15 23:25:38 +00001111 default:
1112 VG_(tool_panic)("unexpected szB");
sewardj95448072004-11-22 20:19:51 +00001113 }
sewardj7cf4e6b2008-05-01 20:24:26 +00001114
1115 tl_assert(fn);
1116 tl_assert(nm);
1117 tl_assert(args);
1118 tl_assert(nargs >= 0 && nargs <= 2);
1119 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1120 || (MC_(clo_mc_level) == 2 && origin == NULL) );
1121
1122 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1123 VG_(fnptr_to_fnentry)( fn ), args );
sewardj95448072004-11-22 20:19:51 +00001124 di->guard = cond;
1125 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00001126 stmt( 'V', mce, IRStmt_Dirty(di));
sewardj95448072004-11-22 20:19:51 +00001127
1128 /* Set the shadow tmp to be defined. First, update the
1129 orig->shadow tmp mapping to reflect the fact that this shadow is
1130 getting a new value. */
sewardj710d6c22005-03-20 18:55:15 +00001131 tl_assert(isIRAtom(vatom));
sewardj95448072004-11-22 20:19:51 +00001132 /* sameKindedAtoms ... */
sewardj0b9d74a2006-12-24 02:24:11 +00001133 if (vatom->tag == Iex_RdTmp) {
1134 tl_assert(atom->tag == Iex_RdTmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00001135 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1136 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1137 definedOfType(ty));
sewardj95448072004-11-22 20:19:51 +00001138 }
1139}
1140
1141
1142/*------------------------------------------------------------*/
1143/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1144/*------------------------------------------------------------*/
1145
1146/* Examine the always-defined sections declared in layout to see if
1147 the (offset,size) section is within one. Note, is is an error to
1148 partially fall into such a region: (offset,size) should either be
1149 completely in such a region or completely not-in such a region.
1150*/
1151static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1152{
1153 Int minoffD, maxoffD, i;
1154 Int minoff = offset;
1155 Int maxoff = minoff + size - 1;
1156 tl_assert((minoff & ~0xFFFF) == 0);
1157 tl_assert((maxoff & ~0xFFFF) == 0);
1158
1159 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1160 minoffD = mce->layout->alwaysDefd[i].offset;
1161 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1162 tl_assert((minoffD & ~0xFFFF) == 0);
1163 tl_assert((maxoffD & ~0xFFFF) == 0);
1164
1165 if (maxoff < minoffD || maxoffD < minoff)
1166 continue; /* no overlap */
1167 if (minoff >= minoffD && maxoff <= maxoffD)
1168 return True; /* completely contained in an always-defd section */
1169
1170 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1171 }
1172 return False; /* could not find any containing section */
1173}
1174
1175
1176/* Generate into bb suitable actions to shadow this Put. If the state
1177 slice is marked 'always defined', do nothing. Otherwise, write the
1178 supplied V bits to the shadow state. We can pass in either an
1179 original atom or a V-atom, but not both. In the former case the
1180 relevant V-bits are then generated from the original.
1181*/
1182static
1183void do_shadow_PUT ( MCEnv* mce, Int offset,
1184 IRAtom* atom, IRAtom* vatom )
1185{
sewardj7cf97ee2004-11-28 14:25:01 +00001186 IRType ty;
njn1d0825f2006-03-27 11:37:07 +00001187
1188 // Don't do shadow PUTs if we're not doing undefined value checking.
1189 // Their absence lets Vex's optimiser remove all the shadow computation
1190 // that they depend on, which includes GETs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001191 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001192 return;
1193
sewardj95448072004-11-22 20:19:51 +00001194 if (atom) {
1195 tl_assert(!vatom);
1196 tl_assert(isOriginalAtom(mce, atom));
1197 vatom = expr2vbits( mce, atom );
1198 } else {
1199 tl_assert(vatom);
1200 tl_assert(isShadowAtom(mce, vatom));
1201 }
1202
sewardj1c0ce7a2009-07-01 08:10:49 +00001203 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001204 tl_assert(ty != Ity_I1);
1205 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1206 /* later: no ... */
1207 /* emit code to emit a complaint if any of the vbits are 1. */
1208 /* complainIfUndefined(mce, atom); */
1209 } else {
1210 /* Do a plain shadow Put. */
sewardj7cf4e6b2008-05-01 20:24:26 +00001211 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
sewardj95448072004-11-22 20:19:51 +00001212 }
1213}
1214
1215
1216/* Return an expression which contains the V bits corresponding to the
1217 given GETI (passed in in pieces).
1218*/
1219static
1220void do_shadow_PUTI ( MCEnv* mce,
sewardj0b9d74a2006-12-24 02:24:11 +00001221 IRRegArray* descr,
1222 IRAtom* ix, Int bias, IRAtom* atom )
sewardj95448072004-11-22 20:19:51 +00001223{
sewardj7cf97ee2004-11-28 14:25:01 +00001224 IRAtom* vatom;
1225 IRType ty, tyS;
1226 Int arrSize;;
1227
njn1d0825f2006-03-27 11:37:07 +00001228 // Don't do shadow PUTIs if we're not doing undefined value checking.
1229 // Their absence lets Vex's optimiser remove all the shadow computation
1230 // that they depend on, which includes GETIs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001231 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001232 return;
1233
sewardj95448072004-11-22 20:19:51 +00001234 tl_assert(isOriginalAtom(mce,atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001235 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001236 tl_assert(sameKindedAtoms(atom, vatom));
sewardj7cf97ee2004-11-28 14:25:01 +00001237 ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001238 tyS = shadowTypeV(ty);
sewardj7cf97ee2004-11-28 14:25:01 +00001239 arrSize = descr->nElems * sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001240 tl_assert(ty != Ity_I1);
1241 tl_assert(isOriginalAtom(mce,ix));
1242 complainIfUndefined(mce,ix);
1243 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1244 /* later: no ... */
1245 /* emit code to emit a complaint if any of the vbits are 1. */
1246 /* complainIfUndefined(mce, atom); */
1247 } else {
1248 /* Do a cloned version of the Put that refers to the shadow
1249 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001250 IRRegArray* new_descr
1251 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1252 tyS, descr->nElems);
sewardj7cf4e6b2008-05-01 20:24:26 +00001253 stmt( 'V', mce, IRStmt_PutI( new_descr, ix, bias, vatom ));
sewardj95448072004-11-22 20:19:51 +00001254 }
1255}
1256
1257
1258/* Return an expression which contains the V bits corresponding to the
1259 given GET (passed in in pieces).
1260*/
1261static
1262IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1263{
sewardj7cf4e6b2008-05-01 20:24:26 +00001264 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001265 tl_assert(ty != Ity_I1);
1266 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1267 /* Always defined, return all zeroes of the relevant type */
1268 return definedOfType(tyS);
1269 } else {
1270 /* return a cloned version of the Get that refers to the shadow
1271 area. */
sewardj7cf4e6b2008-05-01 20:24:26 +00001272 /* FIXME: this isn't an atom! */
sewardj95448072004-11-22 20:19:51 +00001273 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1274 }
1275}
1276
1277
1278/* Return an expression which contains the V bits corresponding to the
1279 given GETI (passed in in pieces).
1280*/
1281static
sewardj0b9d74a2006-12-24 02:24:11 +00001282IRExpr* shadow_GETI ( MCEnv* mce,
1283 IRRegArray* descr, IRAtom* ix, Int bias )
sewardj95448072004-11-22 20:19:51 +00001284{
1285 IRType ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001286 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001287 Int arrSize = descr->nElems * sizeofIRType(ty);
1288 tl_assert(ty != Ity_I1);
1289 tl_assert(isOriginalAtom(mce,ix));
1290 complainIfUndefined(mce,ix);
1291 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1292 /* Always defined, return all zeroes of the relevant type */
1293 return definedOfType(tyS);
1294 } else {
1295 /* return a cloned version of the Get that refers to the shadow
1296 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001297 IRRegArray* new_descr
1298 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1299 tyS, descr->nElems);
sewardj95448072004-11-22 20:19:51 +00001300 return IRExpr_GetI( new_descr, ix, bias );
1301 }
1302}
1303
1304
1305/*------------------------------------------------------------*/
1306/*--- Generating approximations for unknown operations, ---*/
1307/*--- using lazy-propagate semantics ---*/
1308/*------------------------------------------------------------*/
1309
1310/* Lazy propagation of undefinedness from two values, resulting in the
1311 specified shadow type.
1312*/
1313static
1314IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1315{
sewardj95448072004-11-22 20:19:51 +00001316 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001317 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1318 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
sewardj95448072004-11-22 20:19:51 +00001319 tl_assert(isShadowAtom(mce,va1));
1320 tl_assert(isShadowAtom(mce,va2));
sewardj37c31cc2005-04-26 23:49:24 +00001321
1322 /* The general case is inefficient because PCast is an expensive
1323 operation. Here are some special cases which use PCast only
1324 once rather than twice. */
1325
1326 /* I64 x I64 -> I64 */
1327 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1328 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1329 at = mkUifU(mce, Ity_I64, va1, va2);
1330 at = mkPCastTo(mce, Ity_I64, at);
1331 return at;
1332 }
1333
1334 /* I64 x I64 -> I32 */
1335 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1336 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1337 at = mkUifU(mce, Ity_I64, va1, va2);
1338 at = mkPCastTo(mce, Ity_I32, at);
1339 return at;
1340 }
1341
1342 if (0) {
1343 VG_(printf)("mkLazy2 ");
1344 ppIRType(t1);
1345 VG_(printf)("_");
1346 ppIRType(t2);
1347 VG_(printf)("_");
1348 ppIRType(finalVty);
1349 VG_(printf)("\n");
1350 }
1351
1352 /* General case: force everything via 32-bit intermediaries. */
sewardj95448072004-11-22 20:19:51 +00001353 at = mkPCastTo(mce, Ity_I32, va1);
1354 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1355 at = mkPCastTo(mce, finalVty, at);
1356 return at;
1357}
1358
1359
sewardjed69fdb2006-02-03 16:12:27 +00001360/* 3-arg version of the above. */
1361static
1362IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1363 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1364{
1365 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001366 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1367 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1368 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
sewardjed69fdb2006-02-03 16:12:27 +00001369 tl_assert(isShadowAtom(mce,va1));
1370 tl_assert(isShadowAtom(mce,va2));
1371 tl_assert(isShadowAtom(mce,va3));
1372
1373 /* The general case is inefficient because PCast is an expensive
1374 operation. Here are some special cases which use PCast only
1375 twice rather than three times. */
1376
1377 /* I32 x I64 x I64 -> I64 */
1378 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1379 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1380 && finalVty == Ity_I64) {
1381 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1382 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1383 mode indication which is fully defined, this should get
1384 folded out later. */
1385 at = mkPCastTo(mce, Ity_I64, va1);
1386 /* Now fold in 2nd and 3rd args. */
1387 at = mkUifU(mce, Ity_I64, at, va2);
1388 at = mkUifU(mce, Ity_I64, at, va3);
1389 /* and PCast once again. */
1390 at = mkPCastTo(mce, Ity_I64, at);
1391 return at;
1392 }
1393
sewardj453e8f82006-02-09 03:25:06 +00001394 /* I32 x I64 x I64 -> I32 */
1395 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1396 && finalVty == Ity_I32) {
sewardj59570ff2010-01-01 11:59:33 +00001397 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
sewardj453e8f82006-02-09 03:25:06 +00001398 at = mkPCastTo(mce, Ity_I64, va1);
1399 at = mkUifU(mce, Ity_I64, at, va2);
1400 at = mkUifU(mce, Ity_I64, at, va3);
1401 at = mkPCastTo(mce, Ity_I32, at);
1402 return at;
1403 }
1404
sewardj59570ff2010-01-01 11:59:33 +00001405 /* I32 x I32 x I32 -> I32 */
1406 /* 32-bit FP idiom, as (eg) happens on ARM */
1407 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1408 && finalVty == Ity_I32) {
1409 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1410 at = va1;
1411 at = mkUifU(mce, Ity_I32, at, va2);
1412 at = mkUifU(mce, Ity_I32, at, va3);
1413 at = mkPCastTo(mce, Ity_I32, at);
1414 return at;
1415 }
1416
sewardj453e8f82006-02-09 03:25:06 +00001417 if (1) {
1418 VG_(printf)("mkLazy3: ");
sewardjed69fdb2006-02-03 16:12:27 +00001419 ppIRType(t1);
sewardj453e8f82006-02-09 03:25:06 +00001420 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001421 ppIRType(t2);
sewardj453e8f82006-02-09 03:25:06 +00001422 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001423 ppIRType(t3);
sewardj453e8f82006-02-09 03:25:06 +00001424 VG_(printf)(" -> ");
sewardjed69fdb2006-02-03 16:12:27 +00001425 ppIRType(finalVty);
1426 VG_(printf)("\n");
1427 }
1428
sewardj453e8f82006-02-09 03:25:06 +00001429 tl_assert(0);
sewardjed69fdb2006-02-03 16:12:27 +00001430 /* General case: force everything via 32-bit intermediaries. */
sewardj453e8f82006-02-09 03:25:06 +00001431 /*
sewardjed69fdb2006-02-03 16:12:27 +00001432 at = mkPCastTo(mce, Ity_I32, va1);
1433 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1434 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1435 at = mkPCastTo(mce, finalVty, at);
1436 return at;
sewardj453e8f82006-02-09 03:25:06 +00001437 */
sewardjed69fdb2006-02-03 16:12:27 +00001438}
1439
1440
sewardje91cea72006-02-08 19:32:02 +00001441/* 4-arg version of the above. */
1442static
1443IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1444 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1445{
1446 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001447 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1448 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1449 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1450 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
sewardje91cea72006-02-08 19:32:02 +00001451 tl_assert(isShadowAtom(mce,va1));
1452 tl_assert(isShadowAtom(mce,va2));
1453 tl_assert(isShadowAtom(mce,va3));
1454 tl_assert(isShadowAtom(mce,va4));
1455
1456 /* The general case is inefficient because PCast is an expensive
1457 operation. Here are some special cases which use PCast only
1458 twice rather than three times. */
1459
1460 /* I32 x I64 x I64 x I64 -> I64 */
1461 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1462 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1463 && finalVty == Ity_I64) {
1464 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1465 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1466 mode indication which is fully defined, this should get
1467 folded out later. */
1468 at = mkPCastTo(mce, Ity_I64, va1);
1469 /* Now fold in 2nd, 3rd, 4th args. */
1470 at = mkUifU(mce, Ity_I64, at, va2);
1471 at = mkUifU(mce, Ity_I64, at, va3);
1472 at = mkUifU(mce, Ity_I64, at, va4);
1473 /* and PCast once again. */
1474 at = mkPCastTo(mce, Ity_I64, at);
1475 return at;
1476 }
1477
1478 if (1) {
sewardj453e8f82006-02-09 03:25:06 +00001479 VG_(printf)("mkLazy4: ");
sewardje91cea72006-02-08 19:32:02 +00001480 ppIRType(t1);
1481 VG_(printf)(" x ");
1482 ppIRType(t2);
1483 VG_(printf)(" x ");
1484 ppIRType(t3);
1485 VG_(printf)(" x ");
1486 ppIRType(t4);
1487 VG_(printf)(" -> ");
1488 ppIRType(finalVty);
1489 VG_(printf)("\n");
1490 }
1491
1492 tl_assert(0);
1493}
1494
1495
sewardj95448072004-11-22 20:19:51 +00001496/* Do the lazy propagation game from a null-terminated vector of
1497 atoms. This is presumably the arguments to a helper call, so the
1498 IRCallee info is also supplied in order that we can know which
1499 arguments should be ignored (via the .mcx_mask field).
1500*/
1501static
1502IRAtom* mkLazyN ( MCEnv* mce,
1503 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1504{
sewardj4cc684b2007-08-25 23:09:36 +00001505 Int i;
sewardj95448072004-11-22 20:19:51 +00001506 IRAtom* here;
sewardj4cc684b2007-08-25 23:09:36 +00001507 IRAtom* curr;
1508 IRType mergeTy;
1509 IRType mergeTy64 = True;
1510
1511 /* Decide on the type of the merge intermediary. If all relevant
1512 args are I64, then it's I64. In all other circumstances, use
1513 I32. */
1514 for (i = 0; exprvec[i]; i++) {
1515 tl_assert(i < 32);
1516 tl_assert(isOriginalAtom(mce, exprvec[i]));
1517 if (cee->mcx_mask & (1<<i))
1518 continue;
sewardj1c0ce7a2009-07-01 08:10:49 +00001519 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
sewardj4cc684b2007-08-25 23:09:36 +00001520 mergeTy64 = False;
1521 }
1522
1523 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
1524 curr = definedOfType(mergeTy);
1525
sewardj95448072004-11-22 20:19:51 +00001526 for (i = 0; exprvec[i]; i++) {
1527 tl_assert(i < 32);
1528 tl_assert(isOriginalAtom(mce, exprvec[i]));
1529 /* Only take notice of this arg if the callee's mc-exclusion
1530 mask does not say it is to be excluded. */
1531 if (cee->mcx_mask & (1<<i)) {
1532 /* the arg is to be excluded from definedness checking. Do
1533 nothing. */
1534 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1535 } else {
1536 /* calculate the arg's definedness, and pessimistically merge
1537 it in. */
sewardj4cc684b2007-08-25 23:09:36 +00001538 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1539 curr = mergeTy64
1540 ? mkUifU64(mce, here, curr)
1541 : mkUifU32(mce, here, curr);
sewardj95448072004-11-22 20:19:51 +00001542 }
1543 }
1544 return mkPCastTo(mce, finalVtype, curr );
1545}
1546
1547
1548/*------------------------------------------------------------*/
1549/*--- Generating expensive sequences for exact carry-chain ---*/
1550/*--- propagation in add/sub and related operations. ---*/
1551/*------------------------------------------------------------*/
1552
1553static
sewardjd5204dc2004-12-31 01:16:11 +00001554IRAtom* expensiveAddSub ( MCEnv* mce,
1555 Bool add,
1556 IRType ty,
1557 IRAtom* qaa, IRAtom* qbb,
1558 IRAtom* aa, IRAtom* bb )
sewardj95448072004-11-22 20:19:51 +00001559{
sewardj7cf97ee2004-11-28 14:25:01 +00001560 IRAtom *a_min, *b_min, *a_max, *b_max;
sewardjd5204dc2004-12-31 01:16:11 +00001561 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
sewardj7cf97ee2004-11-28 14:25:01 +00001562
sewardj95448072004-11-22 20:19:51 +00001563 tl_assert(isShadowAtom(mce,qaa));
1564 tl_assert(isShadowAtom(mce,qbb));
1565 tl_assert(isOriginalAtom(mce,aa));
1566 tl_assert(isOriginalAtom(mce,bb));
1567 tl_assert(sameKindedAtoms(qaa,aa));
1568 tl_assert(sameKindedAtoms(qbb,bb));
1569
sewardjd5204dc2004-12-31 01:16:11 +00001570 switch (ty) {
1571 case Ity_I32:
1572 opAND = Iop_And32;
1573 opOR = Iop_Or32;
1574 opXOR = Iop_Xor32;
1575 opNOT = Iop_Not32;
1576 opADD = Iop_Add32;
1577 opSUB = Iop_Sub32;
1578 break;
tomd9774d72005-06-27 08:11:01 +00001579 case Ity_I64:
1580 opAND = Iop_And64;
1581 opOR = Iop_Or64;
1582 opXOR = Iop_Xor64;
1583 opNOT = Iop_Not64;
1584 opADD = Iop_Add64;
1585 opSUB = Iop_Sub64;
1586 break;
sewardjd5204dc2004-12-31 01:16:11 +00001587 default:
1588 VG_(tool_panic)("expensiveAddSub");
1589 }
sewardj95448072004-11-22 20:19:51 +00001590
1591 // a_min = aa & ~qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001592 a_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001593 binop(opAND, aa,
sewardj7cf4e6b2008-05-01 20:24:26 +00001594 assignNew('V', mce,ty, unop(opNOT, qaa))));
sewardj95448072004-11-22 20:19:51 +00001595
1596 // b_min = bb & ~qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001597 b_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001598 binop(opAND, bb,
sewardj7cf4e6b2008-05-01 20:24:26 +00001599 assignNew('V', mce,ty, unop(opNOT, qbb))));
sewardj95448072004-11-22 20:19:51 +00001600
1601 // a_max = aa | qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001602 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
sewardj95448072004-11-22 20:19:51 +00001603
1604 // b_max = bb | qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001605 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
sewardj95448072004-11-22 20:19:51 +00001606
sewardjd5204dc2004-12-31 01:16:11 +00001607 if (add) {
1608 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1609 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001610 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001611 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001612 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1613 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001614 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001615 assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1616 assignNew('V', mce,ty, binop(opADD, a_max, b_max))
sewardjd5204dc2004-12-31 01:16:11 +00001617 )
sewardj95448072004-11-22 20:19:51 +00001618 )
sewardjd5204dc2004-12-31 01:16:11 +00001619 )
1620 );
1621 } else {
1622 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1623 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001624 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001625 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001626 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1627 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001628 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001629 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1630 assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
sewardjd5204dc2004-12-31 01:16:11 +00001631 )
1632 )
1633 )
1634 );
1635 }
1636
sewardj95448072004-11-22 20:19:51 +00001637}
1638
1639
1640/*------------------------------------------------------------*/
sewardjaaddbc22005-10-07 09:49:53 +00001641/*--- Scalar shifts. ---*/
1642/*------------------------------------------------------------*/
1643
1644/* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
1645 idea is to shift the definedness bits by the original shift amount.
1646 This introduces 0s ("defined") in new positions for left shifts and
1647 unsigned right shifts, and copies the top definedness bit for
1648 signed right shifts. So, conveniently, applying the original shift
1649 operator to the definedness bits for the left arg is exactly the
1650 right thing to do:
1651
1652 (qaa << bb)
1653
1654 However if the shift amount is undefined then the whole result
1655 is undefined. Hence need:
1656
1657 (qaa << bb) `UifU` PCast(qbb)
1658
1659 If the shift amount bb is a literal than qbb will say 'all defined'
1660 and the UifU and PCast will get folded out by post-instrumentation
1661 optimisation.
1662*/
1663static IRAtom* scalarShift ( MCEnv* mce,
1664 IRType ty,
1665 IROp original_op,
1666 IRAtom* qaa, IRAtom* qbb,
1667 IRAtom* aa, IRAtom* bb )
1668{
1669 tl_assert(isShadowAtom(mce,qaa));
1670 tl_assert(isShadowAtom(mce,qbb));
1671 tl_assert(isOriginalAtom(mce,aa));
1672 tl_assert(isOriginalAtom(mce,bb));
1673 tl_assert(sameKindedAtoms(qaa,aa));
1674 tl_assert(sameKindedAtoms(qbb,bb));
1675 return
1676 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001677 'V', mce, ty,
sewardjaaddbc22005-10-07 09:49:53 +00001678 mkUifU( mce, ty,
sewardj7cf4e6b2008-05-01 20:24:26 +00001679 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
sewardjaaddbc22005-10-07 09:49:53 +00001680 mkPCastTo(mce, ty, qbb)
1681 )
1682 );
1683}
1684
1685
1686/*------------------------------------------------------------*/
1687/*--- Helpers for dealing with vector primops. ---*/
sewardj3245c912004-12-10 14:58:26 +00001688/*------------------------------------------------------------*/
1689
sewardja1d93302004-12-12 16:45:06 +00001690/* Vector pessimisation -- pessimise within each lane individually. */
1691
1692static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1693{
sewardj7cf4e6b2008-05-01 20:24:26 +00001694 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
sewardja1d93302004-12-12 16:45:06 +00001695}
1696
1697static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1698{
sewardj7cf4e6b2008-05-01 20:24:26 +00001699 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
sewardja1d93302004-12-12 16:45:06 +00001700}
1701
1702static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1703{
sewardj7cf4e6b2008-05-01 20:24:26 +00001704 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
sewardja1d93302004-12-12 16:45:06 +00001705}
1706
1707static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1708{
sewardj7cf4e6b2008-05-01 20:24:26 +00001709 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
sewardja1d93302004-12-12 16:45:06 +00001710}
1711
sewardjacd2e912005-01-13 19:17:06 +00001712static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1713{
sewardj7cf4e6b2008-05-01 20:24:26 +00001714 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
sewardjacd2e912005-01-13 19:17:06 +00001715}
1716
1717static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1718{
sewardj7cf4e6b2008-05-01 20:24:26 +00001719 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
sewardjacd2e912005-01-13 19:17:06 +00001720}
1721
1722static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1723{
sewardj7cf4e6b2008-05-01 20:24:26 +00001724 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
sewardjacd2e912005-01-13 19:17:06 +00001725}
1726
sewardja1d93302004-12-12 16:45:06 +00001727
sewardj3245c912004-12-10 14:58:26 +00001728/* Here's a simple scheme capable of handling ops derived from SSE1
1729 code and while only generating ops that can be efficiently
1730 implemented in SSE1. */
1731
1732/* All-lanes versions are straightforward:
1733
sewardj20d38f22005-02-07 23:50:18 +00001734 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
sewardj3245c912004-12-10 14:58:26 +00001735
1736 unary32Fx4(x,y) ==> PCast32x4(x#)
1737
1738 Lowest-lane-only versions are more complex:
1739
sewardj20d38f22005-02-07 23:50:18 +00001740 binary32F0x4(x,y) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001741 x#,
sewardj20d38f22005-02-07 23:50:18 +00001742 PCast32(V128to32(UifUV128(x#,y#)))
sewardj3245c912004-12-10 14:58:26 +00001743 )
1744
1745 This is perhaps not so obvious. In particular, it's faster to
sewardj20d38f22005-02-07 23:50:18 +00001746 do a V128-bit UifU and then take the bottom 32 bits than the more
sewardj3245c912004-12-10 14:58:26 +00001747 obvious scheme of taking the bottom 32 bits of each operand
1748 and doing a 32-bit UifU. Basically since UifU is fast and
1749 chopping lanes off vector values is slow.
1750
1751 Finally:
1752
sewardj20d38f22005-02-07 23:50:18 +00001753 unary32F0x4(x) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001754 x#,
sewardj20d38f22005-02-07 23:50:18 +00001755 PCast32(V128to32(x#))
sewardj3245c912004-12-10 14:58:26 +00001756 )
1757
1758 Where:
1759
1760 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
1761 PCast32x4(v#) = CmpNEZ32x4(v#)
1762*/
1763
1764static
1765IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1766{
1767 IRAtom* at;
1768 tl_assert(isShadowAtom(mce, vatomX));
1769 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001770 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001771 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
sewardj3245c912004-12-10 14:58:26 +00001772 return at;
1773}
1774
1775static
1776IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1777{
1778 IRAtom* at;
1779 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001780 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001781 return at;
1782}
1783
1784static
1785IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1786{
1787 IRAtom* at;
1788 tl_assert(isShadowAtom(mce, vatomX));
1789 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001790 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001791 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
sewardj3245c912004-12-10 14:58:26 +00001792 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001793 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001794 return at;
1795}
1796
1797static
1798IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1799{
1800 IRAtom* at;
1801 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001802 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001803 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001804 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001805 return at;
1806}
1807
sewardj0b070592004-12-10 21:44:22 +00001808/* --- ... and ... 64Fx2 versions of the same ... --- */
1809
1810static
1811IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1812{
1813 IRAtom* at;
1814 tl_assert(isShadowAtom(mce, vatomX));
1815 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001816 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001817 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
sewardj0b070592004-12-10 21:44:22 +00001818 return at;
1819}
1820
1821static
1822IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1823{
1824 IRAtom* at;
1825 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001826 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001827 return at;
1828}
1829
1830static
1831IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1832{
1833 IRAtom* at;
1834 tl_assert(isShadowAtom(mce, vatomX));
1835 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001836 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001837 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
sewardj0b070592004-12-10 21:44:22 +00001838 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001839 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001840 return at;
1841}
1842
1843static
1844IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1845{
1846 IRAtom* at;
1847 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001848 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001849 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001850 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001851 return at;
1852}
1853
sewardj57f92b02010-08-22 11:54:14 +00001854/* --- --- ... and ... 32Fx2 versions of the same --- --- */
1855
1856static
1857IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1858{
1859 IRAtom* at;
1860 tl_assert(isShadowAtom(mce, vatomX));
1861 tl_assert(isShadowAtom(mce, vatomY));
1862 at = mkUifU64(mce, vatomX, vatomY);
1863 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
1864 return at;
1865}
1866
1867static
1868IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
1869{
1870 IRAtom* at;
1871 tl_assert(isShadowAtom(mce, vatomX));
1872 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
1873 return at;
1874}
1875
sewardja1d93302004-12-12 16:45:06 +00001876/* --- --- Vector saturated narrowing --- --- */
1877
1878/* This is quite subtle. What to do is simple:
1879
1880 Let the original narrowing op be QNarrowW{S,U}xN. Produce:
1881
1882 the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1883
1884 Why this is right is not so simple. Consider a lane in the args,
1885 vatom1 or 2, doesn't matter.
1886
1887 After the PCast, that lane is all 0s (defined) or all
1888 1s(undefined).
1889
1890 Both signed and unsigned saturating narrowing of all 0s produces
1891 all 0s, which is what we want.
1892
1893 The all-1s case is more complex. Unsigned narrowing interprets an
1894 all-1s input as the largest unsigned integer, and so produces all
1895 1s as a result since that is the largest unsigned value at the
1896 smaller width.
1897
1898 Signed narrowing interprets all 1s as -1. Fortunately, -1 narrows
1899 to -1, so we still wind up with all 1s at the smaller width.
1900
1901 So: In short, pessimise the args, then apply the original narrowing
1902 op.
1903*/
1904static
sewardj20d38f22005-02-07 23:50:18 +00001905IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
sewardja1d93302004-12-12 16:45:06 +00001906 IRAtom* vatom1, IRAtom* vatom2)
1907{
1908 IRAtom *at1, *at2, *at3;
1909 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1910 switch (narrow_op) {
1911 case Iop_QNarrow32Sx4: pcast = mkPCast32x4; break;
sewardj43d60752005-11-10 18:13:01 +00001912 case Iop_QNarrow32Ux4: pcast = mkPCast32x4; break;
sewardja1d93302004-12-12 16:45:06 +00001913 case Iop_QNarrow16Sx8: pcast = mkPCast16x8; break;
1914 case Iop_QNarrow16Ux8: pcast = mkPCast16x8; break;
sewardj20d38f22005-02-07 23:50:18 +00001915 default: VG_(tool_panic)("vectorNarrowV128");
sewardja1d93302004-12-12 16:45:06 +00001916 }
1917 tl_assert(isShadowAtom(mce,vatom1));
1918 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00001919 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
1920 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
1921 at3 = assignNew('V', mce, Ity_V128, binop(narrow_op, at1, at2));
sewardja1d93302004-12-12 16:45:06 +00001922 return at3;
1923}
1924
sewardjacd2e912005-01-13 19:17:06 +00001925static
1926IRAtom* vectorNarrow64 ( MCEnv* mce, IROp narrow_op,
1927 IRAtom* vatom1, IRAtom* vatom2)
1928{
1929 IRAtom *at1, *at2, *at3;
1930 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1931 switch (narrow_op) {
1932 case Iop_QNarrow32Sx2: pcast = mkPCast32x2; break;
1933 case Iop_QNarrow16Sx4: pcast = mkPCast16x4; break;
1934 case Iop_QNarrow16Ux4: pcast = mkPCast16x4; break;
1935 default: VG_(tool_panic)("vectorNarrow64");
1936 }
1937 tl_assert(isShadowAtom(mce,vatom1));
1938 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00001939 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
1940 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
1941 at3 = assignNew('V', mce, Ity_I64, binop(narrow_op, at1, at2));
sewardjacd2e912005-01-13 19:17:06 +00001942 return at3;
1943}
1944
sewardj57f92b02010-08-22 11:54:14 +00001945static
1946IRAtom* vectorShortenV128 ( MCEnv* mce, IROp shorten_op,
1947 IRAtom* vatom1)
1948{
1949 IRAtom *at1, *at2;
1950 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1951 switch (shorten_op) {
1952 case Iop_Shorten16x8: pcast = mkPCast16x8; break;
1953 case Iop_Shorten32x4: pcast = mkPCast32x4; break;
1954 case Iop_Shorten64x2: pcast = mkPCast64x2; break;
1955 case Iop_QShortenS16Sx8: pcast = mkPCast16x8; break;
1956 case Iop_QShortenU16Sx8: pcast = mkPCast16x8; break;
1957 case Iop_QShortenU16Ux8: pcast = mkPCast16x8; break;
1958 case Iop_QShortenS32Sx4: pcast = mkPCast32x4; break;
1959 case Iop_QShortenU32Sx4: pcast = mkPCast32x4; break;
1960 case Iop_QShortenU32Ux4: pcast = mkPCast32x4; break;
1961 case Iop_QShortenS64Sx2: pcast = mkPCast64x2; break;
1962 case Iop_QShortenU64Sx2: pcast = mkPCast64x2; break;
1963 case Iop_QShortenU64Ux2: pcast = mkPCast64x2; break;
1964 default: VG_(tool_panic)("vectorShortenV128");
1965 }
1966 tl_assert(isShadowAtom(mce,vatom1));
1967 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
1968 at2 = assignNew('V', mce, Ity_I64, unop(shorten_op, at1));
1969 return at2;
1970}
1971
1972static
1973IRAtom* vectorLongenI64 ( MCEnv* mce, IROp longen_op,
1974 IRAtom* vatom1)
1975{
1976 IRAtom *at1, *at2;
1977 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1978 switch (longen_op) {
1979 case Iop_Longen8Ux8: pcast = mkPCast16x8; break;
1980 case Iop_Longen8Sx8: pcast = mkPCast16x8; break;
1981 case Iop_Longen16Ux4: pcast = mkPCast32x4; break;
1982 case Iop_Longen16Sx4: pcast = mkPCast32x4; break;
1983 case Iop_Longen32Ux2: pcast = mkPCast64x2; break;
1984 case Iop_Longen32Sx2: pcast = mkPCast64x2; break;
1985 default: VG_(tool_panic)("vectorLongenI64");
1986 }
1987 tl_assert(isShadowAtom(mce,vatom1));
1988 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
1989 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
1990 return at2;
1991}
1992
sewardja1d93302004-12-12 16:45:06 +00001993
1994/* --- --- Vector integer arithmetic --- --- */
1995
1996/* Simple ... UifU the args and per-lane pessimise the results. */
sewardjacd2e912005-01-13 19:17:06 +00001997
sewardj20d38f22005-02-07 23:50:18 +00001998/* --- V128-bit versions --- */
sewardjacd2e912005-01-13 19:17:06 +00001999
sewardja1d93302004-12-12 16:45:06 +00002000static
2001IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2002{
2003 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002004 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002005 at = mkPCast8x16(mce, at);
2006 return at;
2007}
2008
2009static
2010IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2011{
2012 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002013 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002014 at = mkPCast16x8(mce, at);
2015 return at;
2016}
2017
2018static
2019IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2020{
2021 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002022 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002023 at = mkPCast32x4(mce, at);
2024 return at;
2025}
2026
2027static
2028IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2029{
2030 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002031 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002032 at = mkPCast64x2(mce, at);
2033 return at;
2034}
sewardj3245c912004-12-10 14:58:26 +00002035
sewardjacd2e912005-01-13 19:17:06 +00002036/* --- 64-bit versions --- */
2037
2038static
2039IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2040{
2041 IRAtom* at;
2042 at = mkUifU64(mce, vatom1, vatom2);
2043 at = mkPCast8x8(mce, at);
2044 return at;
2045}
2046
2047static
2048IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2049{
2050 IRAtom* at;
2051 at = mkUifU64(mce, vatom1, vatom2);
2052 at = mkPCast16x4(mce, at);
2053 return at;
2054}
2055
2056static
2057IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2058{
2059 IRAtom* at;
2060 at = mkUifU64(mce, vatom1, vatom2);
2061 at = mkPCast32x2(mce, at);
2062 return at;
2063}
2064
sewardj57f92b02010-08-22 11:54:14 +00002065static
2066IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2067{
2068 IRAtom* at;
2069 at = mkUifU64(mce, vatom1, vatom2);
2070 at = mkPCastTo(mce, Ity_I64, at);
2071 return at;
2072}
2073
sewardj3245c912004-12-10 14:58:26 +00002074
2075/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00002076/*--- Generate shadow values from all kinds of IRExprs. ---*/
2077/*------------------------------------------------------------*/
2078
2079static
sewardje91cea72006-02-08 19:32:02 +00002080IRAtom* expr2vbits_Qop ( MCEnv* mce,
2081 IROp op,
2082 IRAtom* atom1, IRAtom* atom2,
2083 IRAtom* atom3, IRAtom* atom4 )
2084{
2085 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2086 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2087 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2088 IRAtom* vatom4 = expr2vbits( mce, atom4 );
2089
2090 tl_assert(isOriginalAtom(mce,atom1));
2091 tl_assert(isOriginalAtom(mce,atom2));
2092 tl_assert(isOriginalAtom(mce,atom3));
2093 tl_assert(isOriginalAtom(mce,atom4));
2094 tl_assert(isShadowAtom(mce,vatom1));
2095 tl_assert(isShadowAtom(mce,vatom2));
2096 tl_assert(isShadowAtom(mce,vatom3));
2097 tl_assert(isShadowAtom(mce,vatom4));
2098 tl_assert(sameKindedAtoms(atom1,vatom1));
2099 tl_assert(sameKindedAtoms(atom2,vatom2));
2100 tl_assert(sameKindedAtoms(atom3,vatom3));
2101 tl_assert(sameKindedAtoms(atom4,vatom4));
2102 switch (op) {
2103 case Iop_MAddF64:
2104 case Iop_MAddF64r32:
2105 case Iop_MSubF64:
2106 case Iop_MSubF64r32:
2107 /* I32(rm) x F64 x F64 x F64 -> F64 */
2108 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
2109 default:
2110 ppIROp(op);
2111 VG_(tool_panic)("memcheck:expr2vbits_Qop");
2112 }
2113}
2114
2115
2116static
sewardjed69fdb2006-02-03 16:12:27 +00002117IRAtom* expr2vbits_Triop ( MCEnv* mce,
2118 IROp op,
2119 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2120{
sewardjed69fdb2006-02-03 16:12:27 +00002121 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2122 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2123 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2124
2125 tl_assert(isOriginalAtom(mce,atom1));
2126 tl_assert(isOriginalAtom(mce,atom2));
2127 tl_assert(isOriginalAtom(mce,atom3));
2128 tl_assert(isShadowAtom(mce,vatom1));
2129 tl_assert(isShadowAtom(mce,vatom2));
2130 tl_assert(isShadowAtom(mce,vatom3));
2131 tl_assert(sameKindedAtoms(atom1,vatom1));
2132 tl_assert(sameKindedAtoms(atom2,vatom2));
2133 tl_assert(sameKindedAtoms(atom3,vatom3));
2134 switch (op) {
2135 case Iop_AddF64:
2136 case Iop_AddF64r32:
2137 case Iop_SubF64:
2138 case Iop_SubF64r32:
2139 case Iop_MulF64:
2140 case Iop_MulF64r32:
2141 case Iop_DivF64:
2142 case Iop_DivF64r32:
sewardj22ac5f42006-02-03 22:55:04 +00002143 case Iop_ScaleF64:
2144 case Iop_Yl2xF64:
2145 case Iop_Yl2xp1F64:
2146 case Iop_AtanF64:
sewardjd6075eb2006-02-04 15:25:23 +00002147 case Iop_PRemF64:
2148 case Iop_PRem1F64:
sewardj22ac5f42006-02-03 22:55:04 +00002149 /* I32(rm) x F64 x F64 -> F64 */
sewardjed69fdb2006-02-03 16:12:27 +00002150 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
sewardjd6075eb2006-02-04 15:25:23 +00002151 case Iop_PRemC3210F64:
2152 case Iop_PRem1C3210F64:
2153 /* I32(rm) x F64 x F64 -> I32 */
2154 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj59570ff2010-01-01 11:59:33 +00002155 case Iop_AddF32:
2156 case Iop_SubF32:
2157 case Iop_MulF32:
2158 case Iop_DivF32:
2159 /* I32(rm) x F32 x F32 -> I32 */
2160 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj57f92b02010-08-22 11:54:14 +00002161 case Iop_ExtractV128:
2162 complainIfUndefined(mce, atom3);
2163 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2164 case Iop_Extract64:
2165 complainIfUndefined(mce, atom3);
2166 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2167 case Iop_SetElem8x8:
2168 case Iop_SetElem16x4:
2169 case Iop_SetElem32x2:
2170 complainIfUndefined(mce, atom2);
2171 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
sewardjed69fdb2006-02-03 16:12:27 +00002172 default:
2173 ppIROp(op);
2174 VG_(tool_panic)("memcheck:expr2vbits_Triop");
2175 }
2176}
2177
2178
2179static
sewardj95448072004-11-22 20:19:51 +00002180IRAtom* expr2vbits_Binop ( MCEnv* mce,
2181 IROp op,
2182 IRAtom* atom1, IRAtom* atom2 )
2183{
2184 IRType and_or_ty;
2185 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
2186 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
2187 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2188
2189 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2190 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2191
2192 tl_assert(isOriginalAtom(mce,atom1));
2193 tl_assert(isOriginalAtom(mce,atom2));
2194 tl_assert(isShadowAtom(mce,vatom1));
2195 tl_assert(isShadowAtom(mce,vatom2));
2196 tl_assert(sameKindedAtoms(atom1,vatom1));
2197 tl_assert(sameKindedAtoms(atom2,vatom2));
2198 switch (op) {
2199
sewardjacd2e912005-01-13 19:17:06 +00002200 /* 64-bit SIMD */
2201
sewardj57f92b02010-08-22 11:54:14 +00002202 case Iop_ShrN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002203 case Iop_ShrN16x4:
2204 case Iop_ShrN32x2:
sewardj03809ae2006-12-27 01:16:58 +00002205 case Iop_SarN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002206 case Iop_SarN16x4:
2207 case Iop_SarN32x2:
2208 case Iop_ShlN16x4:
2209 case Iop_ShlN32x2:
sewardj114a9172008-02-09 01:49:32 +00002210 case Iop_ShlN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002211 /* Same scheme as with all other shifts. */
2212 complainIfUndefined(mce, atom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002213 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
sewardjacd2e912005-01-13 19:17:06 +00002214
2215 case Iop_QNarrow32Sx2:
2216 case Iop_QNarrow16Sx4:
2217 case Iop_QNarrow16Ux4:
2218 return vectorNarrow64(mce, op, vatom1, vatom2);
2219
2220 case Iop_Min8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002221 case Iop_Min8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002222 case Iop_Max8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002223 case Iop_Max8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002224 case Iop_Avg8Ux8:
2225 case Iop_QSub8Sx8:
2226 case Iop_QSub8Ux8:
2227 case Iop_Sub8x8:
2228 case Iop_CmpGT8Sx8:
sewardj57f92b02010-08-22 11:54:14 +00002229 case Iop_CmpGT8Ux8:
sewardjacd2e912005-01-13 19:17:06 +00002230 case Iop_CmpEQ8x8:
2231 case Iop_QAdd8Sx8:
2232 case Iop_QAdd8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002233 case Iop_QSal8x8:
2234 case Iop_QShl8x8:
sewardjacd2e912005-01-13 19:17:06 +00002235 case Iop_Add8x8:
sewardj57f92b02010-08-22 11:54:14 +00002236 case Iop_Mul8x8:
2237 case Iop_PolynomialMul8x8:
sewardjacd2e912005-01-13 19:17:06 +00002238 return binary8Ix8(mce, vatom1, vatom2);
2239
2240 case Iop_Min16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002241 case Iop_Min16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002242 case Iop_Max16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002243 case Iop_Max16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002244 case Iop_Avg16Ux4:
2245 case Iop_QSub16Ux4:
2246 case Iop_QSub16Sx4:
2247 case Iop_Sub16x4:
2248 case Iop_Mul16x4:
2249 case Iop_MulHi16Sx4:
2250 case Iop_MulHi16Ux4:
2251 case Iop_CmpGT16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002252 case Iop_CmpGT16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002253 case Iop_CmpEQ16x4:
2254 case Iop_QAdd16Sx4:
2255 case Iop_QAdd16Ux4:
sewardj57f92b02010-08-22 11:54:14 +00002256 case Iop_QSal16x4:
2257 case Iop_QShl16x4:
sewardjacd2e912005-01-13 19:17:06 +00002258 case Iop_Add16x4:
sewardj57f92b02010-08-22 11:54:14 +00002259 case Iop_QDMulHi16Sx4:
2260 case Iop_QRDMulHi16Sx4:
sewardjacd2e912005-01-13 19:17:06 +00002261 return binary16Ix4(mce, vatom1, vatom2);
2262
2263 case Iop_Sub32x2:
sewardj114a9172008-02-09 01:49:32 +00002264 case Iop_Mul32x2:
sewardj57f92b02010-08-22 11:54:14 +00002265 case Iop_Max32Sx2:
2266 case Iop_Max32Ux2:
2267 case Iop_Min32Sx2:
2268 case Iop_Min32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002269 case Iop_CmpGT32Sx2:
sewardj57f92b02010-08-22 11:54:14 +00002270 case Iop_CmpGT32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002271 case Iop_CmpEQ32x2:
2272 case Iop_Add32x2:
sewardj57f92b02010-08-22 11:54:14 +00002273 case Iop_QAdd32Ux2:
2274 case Iop_QAdd32Sx2:
2275 case Iop_QSub32Ux2:
2276 case Iop_QSub32Sx2:
2277 case Iop_QSal32x2:
2278 case Iop_QShl32x2:
2279 case Iop_QDMulHi32Sx2:
2280 case Iop_QRDMulHi32Sx2:
sewardjacd2e912005-01-13 19:17:06 +00002281 return binary32Ix2(mce, vatom1, vatom2);
2282
sewardj57f92b02010-08-22 11:54:14 +00002283 case Iop_QSub64Ux1:
2284 case Iop_QSub64Sx1:
2285 case Iop_QAdd64Ux1:
2286 case Iop_QAdd64Sx1:
2287 case Iop_QSal64x1:
2288 case Iop_QShl64x1:
2289 case Iop_Sal64x1:
2290 return binary64Ix1(mce, vatom1, vatom2);
2291
2292 case Iop_QShlN8Sx8:
2293 case Iop_QShlN8x8:
2294 case Iop_QSalN8x8:
2295 complainIfUndefined(mce, atom2);
2296 return mkPCast8x8(mce, vatom1);
2297
2298 case Iop_QShlN16Sx4:
2299 case Iop_QShlN16x4:
2300 case Iop_QSalN16x4:
2301 complainIfUndefined(mce, atom2);
2302 return mkPCast16x4(mce, vatom1);
2303
2304 case Iop_QShlN32Sx2:
2305 case Iop_QShlN32x2:
2306 case Iop_QSalN32x2:
2307 complainIfUndefined(mce, atom2);
2308 return mkPCast32x2(mce, vatom1);
2309
2310 case Iop_QShlN64Sx1:
2311 case Iop_QShlN64x1:
2312 case Iop_QSalN64x1:
2313 complainIfUndefined(mce, atom2);
2314 return mkPCast32x2(mce, vatom1);
2315
2316 case Iop_PwMax32Sx2:
2317 case Iop_PwMax32Ux2:
2318 case Iop_PwMin32Sx2:
2319 case Iop_PwMin32Ux2:
2320 case Iop_PwMax32Fx2:
2321 case Iop_PwMin32Fx2:
2322 return assignNew('V', mce, Ity_I64, binop(Iop_PwMax32Ux2, mkPCast32x2(mce, vatom1),
2323 mkPCast32x2(mce, vatom2)));
2324
2325 case Iop_PwMax16Sx4:
2326 case Iop_PwMax16Ux4:
2327 case Iop_PwMin16Sx4:
2328 case Iop_PwMin16Ux4:
2329 return assignNew('V', mce, Ity_I64, binop(Iop_PwMax16Ux4, mkPCast16x4(mce, vatom1),
2330 mkPCast16x4(mce, vatom2)));
2331
2332 case Iop_PwMax8Sx8:
2333 case Iop_PwMax8Ux8:
2334 case Iop_PwMin8Sx8:
2335 case Iop_PwMin8Ux8:
2336 return assignNew('V', mce, Ity_I64, binop(Iop_PwMax8Ux8, mkPCast8x8(mce, vatom1),
2337 mkPCast8x8(mce, vatom2)));
2338
2339 case Iop_PwAdd32x2:
2340 case Iop_PwAdd32Fx2:
2341 return mkPCast32x2(mce,
2342 assignNew('V', mce, Ity_I64, binop(Iop_PwAdd32x2, mkPCast32x2(mce, vatom1),
2343 mkPCast32x2(mce, vatom2))));
2344
2345 case Iop_PwAdd16x4:
2346 return mkPCast16x4(mce,
2347 assignNew('V', mce, Ity_I64, binop(op, mkPCast16x4(mce, vatom1),
2348 mkPCast16x4(mce, vatom2))));
2349
2350 case Iop_PwAdd8x8:
2351 return mkPCast8x8(mce,
2352 assignNew('V', mce, Ity_I64, binop(op, mkPCast8x8(mce, vatom1),
2353 mkPCast8x8(mce, vatom2))));
2354
2355 case Iop_Shl8x8:
2356 case Iop_Shr8x8:
2357 case Iop_Sar8x8:
2358 case Iop_Sal8x8:
2359 return mkUifU64(mce,
2360 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2361 mkPCast8x8(mce,vatom2)
2362 );
2363
2364 case Iop_Shl16x4:
2365 case Iop_Shr16x4:
2366 case Iop_Sar16x4:
2367 case Iop_Sal16x4:
2368 return mkUifU64(mce,
2369 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2370 mkPCast16x4(mce,vatom2)
2371 );
2372
2373 case Iop_Shl32x2:
2374 case Iop_Shr32x2:
2375 case Iop_Sar32x2:
2376 case Iop_Sal32x2:
2377 return mkUifU64(mce,
2378 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2379 mkPCast32x2(mce,vatom2)
2380 );
2381
sewardjacd2e912005-01-13 19:17:06 +00002382 /* 64-bit data-steering */
2383 case Iop_InterleaveLO32x2:
2384 case Iop_InterleaveLO16x4:
2385 case Iop_InterleaveLO8x8:
2386 case Iop_InterleaveHI32x2:
2387 case Iop_InterleaveHI16x4:
2388 case Iop_InterleaveHI8x8:
sewardj57f92b02010-08-22 11:54:14 +00002389 case Iop_CatOddLanes8x8:
2390 case Iop_CatEvenLanes8x8:
sewardj114a9172008-02-09 01:49:32 +00002391 case Iop_CatOddLanes16x4:
2392 case Iop_CatEvenLanes16x4:
sewardj57f92b02010-08-22 11:54:14 +00002393 case Iop_InterleaveOddLanes8x8:
2394 case Iop_InterleaveEvenLanes8x8:
2395 case Iop_InterleaveOddLanes16x4:
2396 case Iop_InterleaveEvenLanes16x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00002397 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardjacd2e912005-01-13 19:17:06 +00002398
sewardj57f92b02010-08-22 11:54:14 +00002399 case Iop_GetElem8x8:
2400 complainIfUndefined(mce, atom2);
2401 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2402 case Iop_GetElem16x4:
2403 complainIfUndefined(mce, atom2);
2404 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2405 case Iop_GetElem32x2:
2406 complainIfUndefined(mce, atom2);
2407 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2408
sewardj114a9172008-02-09 01:49:32 +00002409 /* Perm8x8: rearrange values in left arg using steering values
2410 from right arg. So rearrange the vbits in the same way but
2411 pessimise wrt steering values. */
2412 case Iop_Perm8x8:
2413 return mkUifU64(
2414 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002415 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
sewardj114a9172008-02-09 01:49:32 +00002416 mkPCast8x8(mce, vatom2)
2417 );
2418
sewardj20d38f22005-02-07 23:50:18 +00002419 /* V128-bit SIMD */
sewardj0b070592004-12-10 21:44:22 +00002420
sewardj57f92b02010-08-22 11:54:14 +00002421 case Iop_ShrN8x16:
sewardja1d93302004-12-12 16:45:06 +00002422 case Iop_ShrN16x8:
2423 case Iop_ShrN32x4:
2424 case Iop_ShrN64x2:
sewardj57f92b02010-08-22 11:54:14 +00002425 case Iop_SarN8x16:
sewardja1d93302004-12-12 16:45:06 +00002426 case Iop_SarN16x8:
2427 case Iop_SarN32x4:
sewardj57f92b02010-08-22 11:54:14 +00002428 case Iop_SarN64x2:
2429 case Iop_ShlN8x16:
sewardja1d93302004-12-12 16:45:06 +00002430 case Iop_ShlN16x8:
2431 case Iop_ShlN32x4:
2432 case Iop_ShlN64x2:
sewardj620eb5b2005-10-22 12:50:43 +00002433 /* Same scheme as with all other shifts. Note: 22 Oct 05:
2434 this is wrong now, scalar shifts are done properly lazily.
2435 Vector shifts should be fixed too. */
sewardja1d93302004-12-12 16:45:06 +00002436 complainIfUndefined(mce, atom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002437 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardja1d93302004-12-12 16:45:06 +00002438
sewardjcbf8be72005-11-10 18:34:41 +00002439 /* V x V shifts/rotates are done using the standard lazy scheme. */
sewardj43d60752005-11-10 18:13:01 +00002440 case Iop_Shl8x16:
2441 case Iop_Shr8x16:
2442 case Iop_Sar8x16:
sewardj57f92b02010-08-22 11:54:14 +00002443 case Iop_Sal8x16:
sewardjcbf8be72005-11-10 18:34:41 +00002444 case Iop_Rol8x16:
sewardj43d60752005-11-10 18:13:01 +00002445 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002446 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002447 mkPCast8x16(mce,vatom2)
2448 );
2449
2450 case Iop_Shl16x8:
2451 case Iop_Shr16x8:
2452 case Iop_Sar16x8:
sewardj57f92b02010-08-22 11:54:14 +00002453 case Iop_Sal16x8:
sewardjcbf8be72005-11-10 18:34:41 +00002454 case Iop_Rol16x8:
sewardj43d60752005-11-10 18:13:01 +00002455 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002456 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002457 mkPCast16x8(mce,vatom2)
2458 );
2459
2460 case Iop_Shl32x4:
2461 case Iop_Shr32x4:
2462 case Iop_Sar32x4:
sewardj57f92b02010-08-22 11:54:14 +00002463 case Iop_Sal32x4:
sewardjcbf8be72005-11-10 18:34:41 +00002464 case Iop_Rol32x4:
sewardj43d60752005-11-10 18:13:01 +00002465 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002466 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002467 mkPCast32x4(mce,vatom2)
2468 );
2469
sewardj57f92b02010-08-22 11:54:14 +00002470 case Iop_Shl64x2:
2471 case Iop_Shr64x2:
2472 case Iop_Sar64x2:
2473 case Iop_Sal64x2:
2474 return mkUifUV128(mce,
2475 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2476 mkPCast64x2(mce,vatom2)
2477 );
2478
2479 case Iop_F32ToFixed32Ux4_RZ:
2480 case Iop_F32ToFixed32Sx4_RZ:
2481 case Iop_Fixed32UToF32x4_RN:
2482 case Iop_Fixed32SToF32x4_RN:
2483 complainIfUndefined(mce, atom2);
2484 return mkPCast32x4(mce, vatom1);
2485
2486 case Iop_F32ToFixed32Ux2_RZ:
2487 case Iop_F32ToFixed32Sx2_RZ:
2488 case Iop_Fixed32UToF32x2_RN:
2489 case Iop_Fixed32SToF32x2_RN:
2490 complainIfUndefined(mce, atom2);
2491 return mkPCast32x2(mce, vatom1);
2492
sewardja1d93302004-12-12 16:45:06 +00002493 case Iop_QSub8Ux16:
2494 case Iop_QSub8Sx16:
2495 case Iop_Sub8x16:
2496 case Iop_Min8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002497 case Iop_Min8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002498 case Iop_Max8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002499 case Iop_Max8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002500 case Iop_CmpGT8Sx16:
sewardj43d60752005-11-10 18:13:01 +00002501 case Iop_CmpGT8Ux16:
sewardja1d93302004-12-12 16:45:06 +00002502 case Iop_CmpEQ8x16:
2503 case Iop_Avg8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002504 case Iop_Avg8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002505 case Iop_QAdd8Ux16:
2506 case Iop_QAdd8Sx16:
sewardj57f92b02010-08-22 11:54:14 +00002507 case Iop_QSal8x16:
2508 case Iop_QShl8x16:
sewardja1d93302004-12-12 16:45:06 +00002509 case Iop_Add8x16:
sewardj57f92b02010-08-22 11:54:14 +00002510 case Iop_Mul8x16:
2511 case Iop_PolynomialMul8x16:
sewardja1d93302004-12-12 16:45:06 +00002512 return binary8Ix16(mce, vatom1, vatom2);
2513
2514 case Iop_QSub16Ux8:
2515 case Iop_QSub16Sx8:
2516 case Iop_Sub16x8:
2517 case Iop_Mul16x8:
2518 case Iop_MulHi16Sx8:
2519 case Iop_MulHi16Ux8:
2520 case Iop_Min16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002521 case Iop_Min16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002522 case Iop_Max16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002523 case Iop_Max16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002524 case Iop_CmpGT16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002525 case Iop_CmpGT16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002526 case Iop_CmpEQ16x8:
2527 case Iop_Avg16Ux8:
sewardj43d60752005-11-10 18:13:01 +00002528 case Iop_Avg16Sx8:
sewardja1d93302004-12-12 16:45:06 +00002529 case Iop_QAdd16Ux8:
2530 case Iop_QAdd16Sx8:
sewardj57f92b02010-08-22 11:54:14 +00002531 case Iop_QSal16x8:
2532 case Iop_QShl16x8:
sewardja1d93302004-12-12 16:45:06 +00002533 case Iop_Add16x8:
sewardj57f92b02010-08-22 11:54:14 +00002534 case Iop_QDMulHi16Sx8:
2535 case Iop_QRDMulHi16Sx8:
sewardja1d93302004-12-12 16:45:06 +00002536 return binary16Ix8(mce, vatom1, vatom2);
2537
2538 case Iop_Sub32x4:
2539 case Iop_CmpGT32Sx4:
sewardj43d60752005-11-10 18:13:01 +00002540 case Iop_CmpGT32Ux4:
sewardja1d93302004-12-12 16:45:06 +00002541 case Iop_CmpEQ32x4:
sewardj43d60752005-11-10 18:13:01 +00002542 case Iop_QAdd32Sx4:
2543 case Iop_QAdd32Ux4:
2544 case Iop_QSub32Sx4:
2545 case Iop_QSub32Ux4:
sewardj57f92b02010-08-22 11:54:14 +00002546 case Iop_QSal32x4:
2547 case Iop_QShl32x4:
sewardj43d60752005-11-10 18:13:01 +00002548 case Iop_Avg32Ux4:
2549 case Iop_Avg32Sx4:
sewardja1d93302004-12-12 16:45:06 +00002550 case Iop_Add32x4:
sewardj43d60752005-11-10 18:13:01 +00002551 case Iop_Max32Ux4:
2552 case Iop_Max32Sx4:
2553 case Iop_Min32Ux4:
2554 case Iop_Min32Sx4:
sewardjb823b852010-06-18 08:18:38 +00002555 case Iop_Mul32x4:
sewardj57f92b02010-08-22 11:54:14 +00002556 case Iop_QDMulHi32Sx4:
2557 case Iop_QRDMulHi32Sx4:
sewardja1d93302004-12-12 16:45:06 +00002558 return binary32Ix4(mce, vatom1, vatom2);
2559
2560 case Iop_Sub64x2:
2561 case Iop_Add64x2:
sewardjb823b852010-06-18 08:18:38 +00002562 case Iop_CmpGT64Sx2:
sewardj57f92b02010-08-22 11:54:14 +00002563 case Iop_QSal64x2:
2564 case Iop_QShl64x2:
2565 case Iop_QAdd64Ux2:
2566 case Iop_QAdd64Sx2:
2567 case Iop_QSub64Ux2:
2568 case Iop_QSub64Sx2:
sewardja1d93302004-12-12 16:45:06 +00002569 return binary64Ix2(mce, vatom1, vatom2);
2570
2571 case Iop_QNarrow32Sx4:
sewardj43d60752005-11-10 18:13:01 +00002572 case Iop_QNarrow32Ux4:
sewardja1d93302004-12-12 16:45:06 +00002573 case Iop_QNarrow16Sx8:
2574 case Iop_QNarrow16Ux8:
sewardj20d38f22005-02-07 23:50:18 +00002575 return vectorNarrowV128(mce, op, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002576
sewardj0b070592004-12-10 21:44:22 +00002577 case Iop_Sub64Fx2:
2578 case Iop_Mul64Fx2:
2579 case Iop_Min64Fx2:
2580 case Iop_Max64Fx2:
2581 case Iop_Div64Fx2:
2582 case Iop_CmpLT64Fx2:
2583 case Iop_CmpLE64Fx2:
2584 case Iop_CmpEQ64Fx2:
sewardj545663e2005-11-05 01:55:04 +00002585 case Iop_CmpUN64Fx2:
sewardj0b070592004-12-10 21:44:22 +00002586 case Iop_Add64Fx2:
2587 return binary64Fx2(mce, vatom1, vatom2);
2588
2589 case Iop_Sub64F0x2:
2590 case Iop_Mul64F0x2:
2591 case Iop_Min64F0x2:
2592 case Iop_Max64F0x2:
2593 case Iop_Div64F0x2:
2594 case Iop_CmpLT64F0x2:
2595 case Iop_CmpLE64F0x2:
2596 case Iop_CmpEQ64F0x2:
sewardj545663e2005-11-05 01:55:04 +00002597 case Iop_CmpUN64F0x2:
sewardj0b070592004-12-10 21:44:22 +00002598 case Iop_Add64F0x2:
2599 return binary64F0x2(mce, vatom1, vatom2);
2600
sewardj170ee212004-12-10 18:57:51 +00002601 case Iop_Sub32Fx4:
2602 case Iop_Mul32Fx4:
2603 case Iop_Min32Fx4:
2604 case Iop_Max32Fx4:
2605 case Iop_Div32Fx4:
2606 case Iop_CmpLT32Fx4:
2607 case Iop_CmpLE32Fx4:
2608 case Iop_CmpEQ32Fx4:
sewardj545663e2005-11-05 01:55:04 +00002609 case Iop_CmpUN32Fx4:
cerione78ba2a2005-11-14 03:00:35 +00002610 case Iop_CmpGT32Fx4:
2611 case Iop_CmpGE32Fx4:
sewardj3245c912004-12-10 14:58:26 +00002612 case Iop_Add32Fx4:
sewardj57f92b02010-08-22 11:54:14 +00002613 case Iop_Recps32Fx4:
2614 case Iop_Rsqrts32Fx4:
sewardj3245c912004-12-10 14:58:26 +00002615 return binary32Fx4(mce, vatom1, vatom2);
2616
sewardj57f92b02010-08-22 11:54:14 +00002617 case Iop_Sub32Fx2:
2618 case Iop_Mul32Fx2:
2619 case Iop_Min32Fx2:
2620 case Iop_Max32Fx2:
2621 case Iop_CmpEQ32Fx2:
2622 case Iop_CmpGT32Fx2:
2623 case Iop_CmpGE32Fx2:
2624 case Iop_Add32Fx2:
2625 case Iop_Recps32Fx2:
2626 case Iop_Rsqrts32Fx2:
2627 return binary32Fx2(mce, vatom1, vatom2);
2628
sewardj170ee212004-12-10 18:57:51 +00002629 case Iop_Sub32F0x4:
2630 case Iop_Mul32F0x4:
2631 case Iop_Min32F0x4:
2632 case Iop_Max32F0x4:
2633 case Iop_Div32F0x4:
2634 case Iop_CmpLT32F0x4:
2635 case Iop_CmpLE32F0x4:
2636 case Iop_CmpEQ32F0x4:
sewardj545663e2005-11-05 01:55:04 +00002637 case Iop_CmpUN32F0x4:
sewardj170ee212004-12-10 18:57:51 +00002638 case Iop_Add32F0x4:
2639 return binary32F0x4(mce, vatom1, vatom2);
2640
sewardj57f92b02010-08-22 11:54:14 +00002641 case Iop_QShlN8Sx16:
2642 case Iop_QShlN8x16:
2643 case Iop_QSalN8x16:
2644 complainIfUndefined(mce, atom2);
2645 return mkPCast8x16(mce, vatom1);
2646
2647 case Iop_QShlN16Sx8:
2648 case Iop_QShlN16x8:
2649 case Iop_QSalN16x8:
2650 complainIfUndefined(mce, atom2);
2651 return mkPCast16x8(mce, vatom1);
2652
2653 case Iop_QShlN32Sx4:
2654 case Iop_QShlN32x4:
2655 case Iop_QSalN32x4:
2656 complainIfUndefined(mce, atom2);
2657 return mkPCast32x4(mce, vatom1);
2658
2659 case Iop_QShlN64Sx2:
2660 case Iop_QShlN64x2:
2661 case Iop_QSalN64x2:
2662 complainIfUndefined(mce, atom2);
2663 return mkPCast32x4(mce, vatom1);
2664
2665 case Iop_Mull32Sx2:
2666 case Iop_Mull32Ux2:
2667 case Iop_QDMulLong32Sx2:
2668 return vectorLongenI64(mce, Iop_Longen32Sx2,
2669 mkUifU64(mce, vatom1, vatom2));
2670
2671 case Iop_Mull16Sx4:
2672 case Iop_Mull16Ux4:
2673 case Iop_QDMulLong16Sx4:
2674 return vectorLongenI64(mce, Iop_Longen16Sx4,
2675 mkUifU64(mce, vatom1, vatom2));
2676
2677 case Iop_Mull8Sx8:
2678 case Iop_Mull8Ux8:
2679 case Iop_PolynomialMull8x8:
2680 return vectorLongenI64(mce, Iop_Longen8Sx8,
2681 mkUifU64(mce, vatom1, vatom2));
2682
2683 case Iop_PwAdd32x4:
2684 return mkPCast32x4(mce,
2685 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
2686 mkPCast32x4(mce, vatom2))));
2687
2688 case Iop_PwAdd16x8:
2689 return mkPCast16x8(mce,
2690 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
2691 mkPCast16x8(mce, vatom2))));
2692
2693 case Iop_PwAdd8x16:
2694 return mkPCast8x16(mce,
2695 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
2696 mkPCast8x16(mce, vatom2))));
2697
sewardj20d38f22005-02-07 23:50:18 +00002698 /* V128-bit data-steering */
2699 case Iop_SetV128lo32:
2700 case Iop_SetV128lo64:
2701 case Iop_64HLtoV128:
sewardja1d93302004-12-12 16:45:06 +00002702 case Iop_InterleaveLO64x2:
2703 case Iop_InterleaveLO32x4:
2704 case Iop_InterleaveLO16x8:
2705 case Iop_InterleaveLO8x16:
2706 case Iop_InterleaveHI64x2:
2707 case Iop_InterleaveHI32x4:
2708 case Iop_InterleaveHI16x8:
2709 case Iop_InterleaveHI8x16:
sewardj57f92b02010-08-22 11:54:14 +00002710 case Iop_CatOddLanes8x16:
2711 case Iop_CatOddLanes16x8:
2712 case Iop_CatOddLanes32x4:
2713 case Iop_CatEvenLanes8x16:
2714 case Iop_CatEvenLanes16x8:
2715 case Iop_CatEvenLanes32x4:
2716 case Iop_InterleaveOddLanes8x16:
2717 case Iop_InterleaveOddLanes16x8:
2718 case Iop_InterleaveOddLanes32x4:
2719 case Iop_InterleaveEvenLanes8x16:
2720 case Iop_InterleaveEvenLanes16x8:
2721 case Iop_InterleaveEvenLanes32x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00002722 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00002723
2724 case Iop_GetElem8x16:
2725 complainIfUndefined(mce, atom2);
2726 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2727 case Iop_GetElem16x8:
2728 complainIfUndefined(mce, atom2);
2729 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2730 case Iop_GetElem32x4:
2731 complainIfUndefined(mce, atom2);
2732 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2733 case Iop_GetElem64x2:
2734 complainIfUndefined(mce, atom2);
2735 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
2736
sewardj620eb5b2005-10-22 12:50:43 +00002737 /* Perm8x16: rearrange values in left arg using steering values
2738 from right arg. So rearrange the vbits in the same way but
2739 pessimise wrt steering values. */
2740 case Iop_Perm8x16:
2741 return mkUifUV128(
2742 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002743 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj620eb5b2005-10-22 12:50:43 +00002744 mkPCast8x16(mce, vatom2)
2745 );
sewardj170ee212004-12-10 18:57:51 +00002746
sewardj43d60752005-11-10 18:13:01 +00002747 /* These two take the lower half of each 16-bit lane, sign/zero
2748 extend it to 32, and multiply together, producing a 32x4
2749 result (and implicitly ignoring half the operand bits). So
2750 treat it as a bunch of independent 16x8 operations, but then
2751 do 32-bit shifts left-right to copy the lower half results
2752 (which are all 0s or all 1s due to PCasting in binary16Ix8)
2753 into the upper half of each result lane. */
2754 case Iop_MullEven16Ux8:
2755 case Iop_MullEven16Sx8: {
2756 IRAtom* at;
2757 at = binary16Ix8(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002758 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
2759 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
sewardj43d60752005-11-10 18:13:01 +00002760 return at;
2761 }
2762
2763 /* Same deal as Iop_MullEven16{S,U}x8 */
2764 case Iop_MullEven8Ux16:
2765 case Iop_MullEven8Sx16: {
2766 IRAtom* at;
2767 at = binary8Ix16(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002768 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
2769 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
sewardj43d60752005-11-10 18:13:01 +00002770 return at;
2771 }
2772
2773 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
2774 32x4 -> 16x8 laneage, discarding the upper half of each lane.
2775 Simply apply same op to the V bits, since this really no more
2776 than a data steering operation. */
sewardjcbf8be72005-11-10 18:34:41 +00002777 case Iop_Narrow32x4:
2778 case Iop_Narrow16x8:
sewardj7cf4e6b2008-05-01 20:24:26 +00002779 return assignNew('V', mce, Ity_V128,
2780 binop(op, vatom1, vatom2));
sewardj43d60752005-11-10 18:13:01 +00002781
2782 case Iop_ShrV128:
2783 case Iop_ShlV128:
2784 /* Same scheme as with all other shifts. Note: 10 Nov 05:
2785 this is wrong now, scalar shifts are done properly lazily.
2786 Vector shifts should be fixed too. */
2787 complainIfUndefined(mce, atom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002788 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardj43d60752005-11-10 18:13:01 +00002789
sewardj69a13322005-04-23 01:14:51 +00002790 /* I128-bit data-steering */
2791 case Iop_64HLto128:
sewardj7cf4e6b2008-05-01 20:24:26 +00002792 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
sewardj69a13322005-04-23 01:14:51 +00002793
sewardj3245c912004-12-10 14:58:26 +00002794 /* Scalar floating point */
2795
sewardjed69fdb2006-02-03 16:12:27 +00002796 case Iop_RoundF64toInt:
2797 case Iop_RoundF64toF32:
sewardj06f96d02009-12-31 19:24:12 +00002798 case Iop_F64toI64S:
2799 case Iop_I64StoF64:
sewardj22ac5f42006-02-03 22:55:04 +00002800 case Iop_SinF64:
2801 case Iop_CosF64:
2802 case Iop_TanF64:
2803 case Iop_2xm1F64:
2804 case Iop_SqrtF64:
2805 /* I32(rm) x I64/F64 -> I64/F64 */
sewardj95448072004-11-22 20:19:51 +00002806 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2807
sewardjd376a762010-06-27 09:08:54 +00002808 case Iop_RoundF32toInt:
sewardjaec1be32010-01-03 22:29:32 +00002809 case Iop_SqrtF32:
2810 /* I32(rm) x I32/F32 -> I32/F32 */
2811 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2812
sewardj59570ff2010-01-01 11:59:33 +00002813 case Iop_F64toI32U:
sewardj06f96d02009-12-31 19:24:12 +00002814 case Iop_F64toI32S:
sewardje9e16d32004-12-10 13:17:55 +00002815 case Iop_F64toF32:
sewardj95448072004-11-22 20:19:51 +00002816 /* First arg is I32 (rounding mode), second is F64 (data). */
2817 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2818
sewardj06f96d02009-12-31 19:24:12 +00002819 case Iop_F64toI16S:
sewardj95448072004-11-22 20:19:51 +00002820 /* First arg is I32 (rounding mode), second is F64 (data). */
2821 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
2822
sewardj95448072004-11-22 20:19:51 +00002823 case Iop_CmpF64:
2824 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2825
2826 /* non-FP after here */
2827
2828 case Iop_DivModU64to32:
2829 case Iop_DivModS64to32:
2830 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2831
sewardj69a13322005-04-23 01:14:51 +00002832 case Iop_DivModU128to64:
2833 case Iop_DivModS128to64:
2834 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
2835
sewardj95448072004-11-22 20:19:51 +00002836 case Iop_16HLto32:
sewardj7cf4e6b2008-05-01 20:24:26 +00002837 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00002838 case Iop_32HLto64:
sewardj7cf4e6b2008-05-01 20:24:26 +00002839 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00002840
sewardj6cf40ff2005-04-20 22:31:26 +00002841 case Iop_MullS64:
2842 case Iop_MullU64: {
2843 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
2844 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
sewardj7cf4e6b2008-05-01 20:24:26 +00002845 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, vHi64, vLo64));
sewardj6cf40ff2005-04-20 22:31:26 +00002846 }
2847
sewardj95448072004-11-22 20:19:51 +00002848 case Iop_MullS32:
2849 case Iop_MullU32: {
2850 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
2851 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
sewardj7cf4e6b2008-05-01 20:24:26 +00002852 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
sewardj95448072004-11-22 20:19:51 +00002853 }
2854
2855 case Iop_MullS16:
2856 case Iop_MullU16: {
2857 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
2858 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
sewardj7cf4e6b2008-05-01 20:24:26 +00002859 return assignNew('V', mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
sewardj95448072004-11-22 20:19:51 +00002860 }
2861
2862 case Iop_MullS8:
2863 case Iop_MullU8: {
2864 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
2865 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
sewardj7cf4e6b2008-05-01 20:24:26 +00002866 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
sewardj95448072004-11-22 20:19:51 +00002867 }
2868
cerion9e591082005-06-23 15:28:34 +00002869 case Iop_DivS32:
2870 case Iop_DivU32:
2871 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2872
sewardjb00944a2005-12-23 12:47:16 +00002873 case Iop_DivS64:
2874 case Iop_DivU64:
2875 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2876
sewardj95448072004-11-22 20:19:51 +00002877 case Iop_Add32:
sewardjd5204dc2004-12-31 01:16:11 +00002878 if (mce->bogusLiterals)
2879 return expensiveAddSub(mce,True,Ity_I32,
2880 vatom1,vatom2, atom1,atom2);
2881 else
2882 goto cheap_AddSub32;
sewardj95448072004-11-22 20:19:51 +00002883 case Iop_Sub32:
sewardjd5204dc2004-12-31 01:16:11 +00002884 if (mce->bogusLiterals)
2885 return expensiveAddSub(mce,False,Ity_I32,
2886 vatom1,vatom2, atom1,atom2);
2887 else
2888 goto cheap_AddSub32;
2889
2890 cheap_AddSub32:
sewardj95448072004-11-22 20:19:51 +00002891 case Iop_Mul32:
sewardj992dff92005-10-07 11:08:55 +00002892 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
2893
sewardj463b3d92005-07-18 11:41:15 +00002894 case Iop_CmpORD32S:
2895 case Iop_CmpORD32U:
sewardj1bc82102005-12-23 00:16:24 +00002896 case Iop_CmpORD64S:
2897 case Iop_CmpORD64U:
2898 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
sewardj95448072004-11-22 20:19:51 +00002899
sewardj681be302005-01-15 20:43:58 +00002900 case Iop_Add64:
tomd9774d72005-06-27 08:11:01 +00002901 if (mce->bogusLiterals)
2902 return expensiveAddSub(mce,True,Ity_I64,
2903 vatom1,vatom2, atom1,atom2);
2904 else
2905 goto cheap_AddSub64;
sewardj681be302005-01-15 20:43:58 +00002906 case Iop_Sub64:
tomd9774d72005-06-27 08:11:01 +00002907 if (mce->bogusLiterals)
2908 return expensiveAddSub(mce,False,Ity_I64,
2909 vatom1,vatom2, atom1,atom2);
2910 else
2911 goto cheap_AddSub64;
2912
2913 cheap_AddSub64:
2914 case Iop_Mul64:
sewardj681be302005-01-15 20:43:58 +00002915 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
2916
sewardj95448072004-11-22 20:19:51 +00002917 case Iop_Mul16:
2918 case Iop_Add16:
2919 case Iop_Sub16:
2920 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
2921
2922 case Iop_Sub8:
2923 case Iop_Add8:
2924 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
2925
sewardj69a13322005-04-23 01:14:51 +00002926 case Iop_CmpEQ64:
sewardje6f8af42005-07-06 18:48:59 +00002927 case Iop_CmpNE64:
sewardj69a13322005-04-23 01:14:51 +00002928 if (mce->bogusLiterals)
2929 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
2930 else
2931 goto cheap_cmp64;
2932 cheap_cmp64:
tomcd986332005-04-26 07:44:48 +00002933 case Iop_CmpLE64S: case Iop_CmpLE64U:
2934 case Iop_CmpLT64U: case Iop_CmpLT64S:
sewardj69a13322005-04-23 01:14:51 +00002935 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
2936
sewardjd5204dc2004-12-31 01:16:11 +00002937 case Iop_CmpEQ32:
sewardje6f8af42005-07-06 18:48:59 +00002938 case Iop_CmpNE32:
sewardjd5204dc2004-12-31 01:16:11 +00002939 if (mce->bogusLiterals)
2940 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
2941 else
2942 goto cheap_cmp32;
sewardjd5204dc2004-12-31 01:16:11 +00002943 cheap_cmp32:
sewardj95448072004-11-22 20:19:51 +00002944 case Iop_CmpLE32S: case Iop_CmpLE32U:
2945 case Iop_CmpLT32U: case Iop_CmpLT32S:
sewardj95448072004-11-22 20:19:51 +00002946 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
2947
2948 case Iop_CmpEQ16: case Iop_CmpNE16:
2949 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
2950
2951 case Iop_CmpEQ8: case Iop_CmpNE8:
2952 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
2953
sewardjafed4c52009-07-12 13:00:17 +00002954 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
2955 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
2956 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
2957 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
2958 /* Just say these all produce a defined result, regardless
2959 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
2960 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
2961
sewardjaaddbc22005-10-07 09:49:53 +00002962 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
2963 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
2964
sewardj95448072004-11-22 20:19:51 +00002965 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
sewardjaaddbc22005-10-07 09:49:53 +00002966 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00002967
sewardjdb67f5f2004-12-14 01:15:31 +00002968 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
sewardjaaddbc22005-10-07 09:49:53 +00002969 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00002970
2971 case Iop_Shl8: case Iop_Shr8:
sewardjaaddbc22005-10-07 09:49:53 +00002972 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00002973
sewardj20d38f22005-02-07 23:50:18 +00002974 case Iop_AndV128:
2975 uifu = mkUifUV128; difd = mkDifDV128;
2976 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00002977 case Iop_And64:
2978 uifu = mkUifU64; difd = mkDifD64;
2979 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00002980 case Iop_And32:
2981 uifu = mkUifU32; difd = mkDifD32;
2982 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
2983 case Iop_And16:
2984 uifu = mkUifU16; difd = mkDifD16;
2985 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
2986 case Iop_And8:
2987 uifu = mkUifU8; difd = mkDifD8;
2988 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
2989
sewardj20d38f22005-02-07 23:50:18 +00002990 case Iop_OrV128:
2991 uifu = mkUifUV128; difd = mkDifDV128;
2992 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00002993 case Iop_Or64:
2994 uifu = mkUifU64; difd = mkDifD64;
2995 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00002996 case Iop_Or32:
2997 uifu = mkUifU32; difd = mkDifD32;
2998 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
2999 case Iop_Or16:
3000 uifu = mkUifU16; difd = mkDifD16;
3001 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
3002 case Iop_Or8:
3003 uifu = mkUifU8; difd = mkDifD8;
3004 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
3005
3006 do_And_Or:
3007 return
3008 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00003009 'V', mce,
sewardj95448072004-11-22 20:19:51 +00003010 and_or_ty,
3011 difd(mce, uifu(mce, vatom1, vatom2),
3012 difd(mce, improve(mce, atom1, vatom1),
3013 improve(mce, atom2, vatom2) ) ) );
3014
3015 case Iop_Xor8:
3016 return mkUifU8(mce, vatom1, vatom2);
3017 case Iop_Xor16:
3018 return mkUifU16(mce, vatom1, vatom2);
3019 case Iop_Xor32:
3020 return mkUifU32(mce, vatom1, vatom2);
sewardj7010f6e2004-12-10 13:35:22 +00003021 case Iop_Xor64:
3022 return mkUifU64(mce, vatom1, vatom2);
sewardj20d38f22005-02-07 23:50:18 +00003023 case Iop_XorV128:
3024 return mkUifUV128(mce, vatom1, vatom2);
njn25e49d8e72002-09-23 09:36:25 +00003025
3026 default:
sewardj95448072004-11-22 20:19:51 +00003027 ppIROp(op);
3028 VG_(tool_panic)("memcheck:expr2vbits_Binop");
njn25e49d8e72002-09-23 09:36:25 +00003029 }
njn25e49d8e72002-09-23 09:36:25 +00003030}
3031
njn25e49d8e72002-09-23 09:36:25 +00003032
sewardj95448072004-11-22 20:19:51 +00003033static
3034IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
3035{
3036 IRAtom* vatom = expr2vbits( mce, atom );
3037 tl_assert(isOriginalAtom(mce,atom));
3038 switch (op) {
3039
sewardj0b070592004-12-10 21:44:22 +00003040 case Iop_Sqrt64Fx2:
3041 return unary64Fx2(mce, vatom);
3042
3043 case Iop_Sqrt64F0x2:
3044 return unary64F0x2(mce, vatom);
3045
sewardj170ee212004-12-10 18:57:51 +00003046 case Iop_Sqrt32Fx4:
3047 case Iop_RSqrt32Fx4:
3048 case Iop_Recip32Fx4:
cerion176cb4c2005-11-16 17:21:49 +00003049 case Iop_I32UtoFx4:
3050 case Iop_I32StoFx4:
3051 case Iop_QFtoI32Ux4_RZ:
3052 case Iop_QFtoI32Sx4_RZ:
3053 case Iop_RoundF32x4_RM:
3054 case Iop_RoundF32x4_RP:
3055 case Iop_RoundF32x4_RN:
3056 case Iop_RoundF32x4_RZ:
sewardj57f92b02010-08-22 11:54:14 +00003057 case Iop_Recip32x4:
3058 case Iop_Abs32Fx4:
3059 case Iop_Neg32Fx4:
3060 case Iop_Rsqrte32Fx4:
sewardj170ee212004-12-10 18:57:51 +00003061 return unary32Fx4(mce, vatom);
3062
sewardj57f92b02010-08-22 11:54:14 +00003063 case Iop_I32UtoFx2:
3064 case Iop_I32StoFx2:
3065 case Iop_Recip32Fx2:
3066 case Iop_Recip32x2:
3067 case Iop_Abs32Fx2:
3068 case Iop_Neg32Fx2:
3069 case Iop_Rsqrte32Fx2:
3070 return unary32Fx2(mce, vatom);
3071
sewardj170ee212004-12-10 18:57:51 +00003072 case Iop_Sqrt32F0x4:
3073 case Iop_RSqrt32F0x4:
3074 case Iop_Recip32F0x4:
3075 return unary32F0x4(mce, vatom);
3076
sewardj20d38f22005-02-07 23:50:18 +00003077 case Iop_32UtoV128:
3078 case Iop_64UtoV128:
sewardj620eb5b2005-10-22 12:50:43 +00003079 case Iop_Dup8x16:
3080 case Iop_Dup16x8:
3081 case Iop_Dup32x4:
sewardj57f92b02010-08-22 11:54:14 +00003082 case Iop_Reverse16_8x16:
3083 case Iop_Reverse32_8x16:
3084 case Iop_Reverse32_16x8:
3085 case Iop_Reverse64_8x16:
3086 case Iop_Reverse64_16x8:
3087 case Iop_Reverse64_32x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00003088 return assignNew('V', mce, Ity_V128, unop(op, vatom));
sewardj170ee212004-12-10 18:57:51 +00003089
sewardj95448072004-11-22 20:19:51 +00003090 case Iop_F32toF64:
sewardj06f96d02009-12-31 19:24:12 +00003091 case Iop_I32StoF64:
sewardj59570ff2010-01-01 11:59:33 +00003092 case Iop_I32UtoF64:
sewardj95448072004-11-22 20:19:51 +00003093 case Iop_NegF64:
sewardj95448072004-11-22 20:19:51 +00003094 case Iop_AbsF64:
sewardj4f3e0f22006-01-27 15:11:31 +00003095 case Iop_Est5FRSqrt:
sewardjdead90a2008-08-08 08:38:23 +00003096 case Iop_RoundF64toF64_NEAREST:
3097 case Iop_RoundF64toF64_NegINF:
3098 case Iop_RoundF64toF64_PosINF:
3099 case Iop_RoundF64toF64_ZERO:
sewardj39cc7352005-06-09 21:31:55 +00003100 case Iop_Clz64:
3101 case Iop_Ctz64:
sewardj95448072004-11-22 20:19:51 +00003102 return mkPCastTo(mce, Ity_I64, vatom);
3103
sewardj95448072004-11-22 20:19:51 +00003104 case Iop_Clz32:
3105 case Iop_Ctz32:
sewardjed69fdb2006-02-03 16:12:27 +00003106 case Iop_TruncF64asF32:
sewardj59570ff2010-01-01 11:59:33 +00003107 case Iop_NegF32:
3108 case Iop_AbsF32:
sewardj95448072004-11-22 20:19:51 +00003109 return mkPCastTo(mce, Ity_I32, vatom);
3110
sewardjd9dbc192005-04-27 11:40:27 +00003111 case Iop_1Uto64:
3112 case Iop_8Uto64:
3113 case Iop_8Sto64:
3114 case Iop_16Uto64:
3115 case Iop_16Sto64:
sewardj95448072004-11-22 20:19:51 +00003116 case Iop_32Sto64:
3117 case Iop_32Uto64:
sewardj20d38f22005-02-07 23:50:18 +00003118 case Iop_V128to64:
3119 case Iop_V128HIto64:
sewardj6cf40ff2005-04-20 22:31:26 +00003120 case Iop_128HIto64:
3121 case Iop_128to64:
sewardj57f92b02010-08-22 11:54:14 +00003122 case Iop_Dup8x8:
3123 case Iop_Dup16x4:
3124 case Iop_Dup32x2:
3125 case Iop_Reverse16_8x8:
3126 case Iop_Reverse32_8x8:
3127 case Iop_Reverse32_16x4:
3128 case Iop_Reverse64_8x8:
3129 case Iop_Reverse64_16x4:
3130 case Iop_Reverse64_32x2:
sewardj7cf4e6b2008-05-01 20:24:26 +00003131 return assignNew('V', mce, Ity_I64, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003132
3133 case Iop_64to32:
3134 case Iop_64HIto32:
3135 case Iop_1Uto32:
sewardj463b3d92005-07-18 11:41:15 +00003136 case Iop_1Sto32:
sewardj95448072004-11-22 20:19:51 +00003137 case Iop_8Uto32:
3138 case Iop_16Uto32:
3139 case Iop_16Sto32:
3140 case Iop_8Sto32:
cerionfafaa0d2005-09-12 22:29:38 +00003141 case Iop_V128to32:
sewardj7cf4e6b2008-05-01 20:24:26 +00003142 return assignNew('V', mce, Ity_I32, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003143
3144 case Iop_8Sto16:
3145 case Iop_8Uto16:
3146 case Iop_32to16:
3147 case Iop_32HIto16:
sewardjd9dbc192005-04-27 11:40:27 +00003148 case Iop_64to16:
sewardj7cf4e6b2008-05-01 20:24:26 +00003149 return assignNew('V', mce, Ity_I16, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003150
3151 case Iop_1Uto8:
3152 case Iop_16to8:
sewardj9a807e02006-12-17 14:20:31 +00003153 case Iop_16HIto8:
sewardj95448072004-11-22 20:19:51 +00003154 case Iop_32to8:
sewardjd9dbc192005-04-27 11:40:27 +00003155 case Iop_64to8:
sewardj7cf4e6b2008-05-01 20:24:26 +00003156 return assignNew('V', mce, Ity_I8, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003157
3158 case Iop_32to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00003159 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
sewardj95448072004-11-22 20:19:51 +00003160
sewardjd9dbc192005-04-27 11:40:27 +00003161 case Iop_64to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00003162 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
sewardjd9dbc192005-04-27 11:40:27 +00003163
sewardj95448072004-11-22 20:19:51 +00003164 case Iop_ReinterpF64asI64:
3165 case Iop_ReinterpI64asF64:
sewardj0b070592004-12-10 21:44:22 +00003166 case Iop_ReinterpI32asF32:
sewardj59570ff2010-01-01 11:59:33 +00003167 case Iop_ReinterpF32asI32:
sewardj20d38f22005-02-07 23:50:18 +00003168 case Iop_NotV128:
sewardj7010f6e2004-12-10 13:35:22 +00003169 case Iop_Not64:
sewardj95448072004-11-22 20:19:51 +00003170 case Iop_Not32:
3171 case Iop_Not16:
3172 case Iop_Not8:
3173 case Iop_Not1:
3174 return vatom;
sewardj7010f6e2004-12-10 13:35:22 +00003175
sewardj57f92b02010-08-22 11:54:14 +00003176 case Iop_CmpNEZ8x8:
3177 case Iop_Cnt8x8:
3178 case Iop_Clz8Sx8:
3179 case Iop_Cls8Sx8:
3180 case Iop_Abs8x8:
3181 return mkPCast8x8(mce, vatom);
3182
3183 case Iop_CmpNEZ8x16:
3184 case Iop_Cnt8x16:
3185 case Iop_Clz8Sx16:
3186 case Iop_Cls8Sx16:
3187 case Iop_Abs8x16:
3188 return mkPCast8x16(mce, vatom);
3189
3190 case Iop_CmpNEZ16x4:
3191 case Iop_Clz16Sx4:
3192 case Iop_Cls16Sx4:
3193 case Iop_Abs16x4:
3194 return mkPCast16x4(mce, vatom);
3195
3196 case Iop_CmpNEZ16x8:
3197 case Iop_Clz16Sx8:
3198 case Iop_Cls16Sx8:
3199 case Iop_Abs16x8:
3200 return mkPCast16x8(mce, vatom);
3201
3202 case Iop_CmpNEZ32x2:
3203 case Iop_Clz32Sx2:
3204 case Iop_Cls32Sx2:
3205 case Iop_FtoI32Ux2_RZ:
3206 case Iop_FtoI32Sx2_RZ:
3207 case Iop_Abs32x2:
3208 return mkPCast32x2(mce, vatom);
3209
3210 case Iop_CmpNEZ32x4:
3211 case Iop_Clz32Sx4:
3212 case Iop_Cls32Sx4:
3213 case Iop_FtoI32Ux4_RZ:
3214 case Iop_FtoI32Sx4_RZ:
3215 case Iop_Abs32x4:
3216 return mkPCast32x4(mce, vatom);
3217
3218 case Iop_CmpwNEZ64:
3219 return mkPCastTo(mce, Ity_I64, vatom);
3220
3221 case Iop_CmpNEZ64x2:
3222 return mkPCast64x2(mce, vatom);
3223
3224 case Iop_Shorten16x8:
3225 case Iop_Shorten32x4:
3226 case Iop_Shorten64x2:
3227 case Iop_QShortenS16Sx8:
3228 case Iop_QShortenU16Sx8:
3229 case Iop_QShortenU16Ux8:
3230 case Iop_QShortenS32Sx4:
3231 case Iop_QShortenU32Sx4:
3232 case Iop_QShortenU32Ux4:
3233 case Iop_QShortenS64Sx2:
3234 case Iop_QShortenU64Sx2:
3235 case Iop_QShortenU64Ux2:
3236 return vectorShortenV128(mce, op, vatom);
3237
3238 case Iop_Longen8Sx8:
3239 case Iop_Longen8Ux8:
3240 case Iop_Longen16Sx4:
3241 case Iop_Longen16Ux4:
3242 case Iop_Longen32Sx2:
3243 case Iop_Longen32Ux2:
3244 return vectorLongenI64(mce, op, vatom);
3245
3246 case Iop_PwAddL32Ux2:
3247 case Iop_PwAddL32Sx2:
3248 return mkPCastTo(mce, Ity_I64,
3249 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
3250
3251 case Iop_PwAddL16Ux4:
3252 case Iop_PwAddL16Sx4:
3253 return mkPCast32x2(mce,
3254 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
3255
3256 case Iop_PwAddL8Ux8:
3257 case Iop_PwAddL8Sx8:
3258 return mkPCast16x4(mce,
3259 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
3260
3261 case Iop_PwAddL32Ux4:
3262 case Iop_PwAddL32Sx4:
3263 return mkPCast64x2(mce,
3264 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
3265
3266 case Iop_PwAddL16Ux8:
3267 case Iop_PwAddL16Sx8:
3268 return mkPCast32x4(mce,
3269 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
3270
3271 case Iop_PwAddL8Ux16:
3272 case Iop_PwAddL8Sx16:
3273 return mkPCast16x8(mce,
3274 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
3275
sewardj95448072004-11-22 20:19:51 +00003276 default:
3277 ppIROp(op);
3278 VG_(tool_panic)("memcheck:expr2vbits_Unop");
3279 }
3280}
3281
3282
sewardj170ee212004-12-10 18:57:51 +00003283/* Worker function; do not call directly. */
sewardj95448072004-11-22 20:19:51 +00003284static
sewardj2e595852005-06-30 23:33:37 +00003285IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
3286 IREndness end, IRType ty,
3287 IRAtom* addr, UInt bias )
sewardj95448072004-11-22 20:19:51 +00003288{
3289 void* helper;
3290 Char* hname;
3291 IRDirty* di;
3292 IRTemp datavbits;
3293 IRAtom* addrAct;
3294
3295 tl_assert(isOriginalAtom(mce,addr));
sewardj2e595852005-06-30 23:33:37 +00003296 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj95448072004-11-22 20:19:51 +00003297
3298 /* First, emit a definedness test for the address. This also sets
3299 the address (shadow) to 'defined' following the test. */
3300 complainIfUndefined( mce, addr );
3301
3302 /* Now cook up a call to the relevant helper function, to read the
3303 data V bits from shadow memory. */
sewardj7cf4e6b2008-05-01 20:24:26 +00003304 ty = shadowTypeV(ty);
sewardj2e595852005-06-30 23:33:37 +00003305
3306 if (end == Iend_LE) {
3307 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00003308 case Ity_I64: helper = &MC_(helperc_LOADV64le);
3309 hname = "MC_(helperc_LOADV64le)";
sewardj2e595852005-06-30 23:33:37 +00003310 break;
njn1d0825f2006-03-27 11:37:07 +00003311 case Ity_I32: helper = &MC_(helperc_LOADV32le);
3312 hname = "MC_(helperc_LOADV32le)";
sewardj2e595852005-06-30 23:33:37 +00003313 break;
njn1d0825f2006-03-27 11:37:07 +00003314 case Ity_I16: helper = &MC_(helperc_LOADV16le);
3315 hname = "MC_(helperc_LOADV16le)";
sewardj2e595852005-06-30 23:33:37 +00003316 break;
njn1d0825f2006-03-27 11:37:07 +00003317 case Ity_I8: helper = &MC_(helperc_LOADV8);
3318 hname = "MC_(helperc_LOADV8)";
sewardj2e595852005-06-30 23:33:37 +00003319 break;
3320 default: ppIRType(ty);
3321 VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
3322 }
3323 } else {
sewardj8cf88b72005-07-08 01:29:33 +00003324 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00003325 case Ity_I64: helper = &MC_(helperc_LOADV64be);
3326 hname = "MC_(helperc_LOADV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00003327 break;
njn1d0825f2006-03-27 11:37:07 +00003328 case Ity_I32: helper = &MC_(helperc_LOADV32be);
3329 hname = "MC_(helperc_LOADV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00003330 break;
njn1d0825f2006-03-27 11:37:07 +00003331 case Ity_I16: helper = &MC_(helperc_LOADV16be);
3332 hname = "MC_(helperc_LOADV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00003333 break;
njn1d0825f2006-03-27 11:37:07 +00003334 case Ity_I8: helper = &MC_(helperc_LOADV8);
3335 hname = "MC_(helperc_LOADV8)";
sewardj8cf88b72005-07-08 01:29:33 +00003336 break;
3337 default: ppIRType(ty);
3338 VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
3339 }
sewardj95448072004-11-22 20:19:51 +00003340 }
3341
3342 /* Generate the actual address into addrAct. */
3343 if (bias == 0) {
3344 addrAct = addr;
3345 } else {
sewardj7cf97ee2004-11-28 14:25:01 +00003346 IROp mkAdd;
3347 IRAtom* eBias;
sewardj95448072004-11-22 20:19:51 +00003348 IRType tyAddr = mce->hWordTy;
3349 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj7cf97ee2004-11-28 14:25:01 +00003350 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3351 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00003352 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
sewardj95448072004-11-22 20:19:51 +00003353 }
3354
3355 /* We need to have a place to park the V bits we're just about to
3356 read. */
sewardj1c0ce7a2009-07-01 08:10:49 +00003357 datavbits = newTemp(mce, ty, VSh);
sewardj95448072004-11-22 20:19:51 +00003358 di = unsafeIRDirty_1_N( datavbits,
sewardj53ee1fc2005-12-23 02:29:58 +00003359 1/*regparms*/,
3360 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj95448072004-11-22 20:19:51 +00003361 mkIRExprVec_1( addrAct ));
3362 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00003363 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00003364
3365 return mkexpr(datavbits);
3366}
3367
3368
3369static
sewardj2e595852005-06-30 23:33:37 +00003370IRAtom* expr2vbits_Load ( MCEnv* mce,
3371 IREndness end, IRType ty,
3372 IRAtom* addr, UInt bias )
sewardj170ee212004-12-10 18:57:51 +00003373{
3374 IRAtom *v64hi, *v64lo;
sewardj2e595852005-06-30 23:33:37 +00003375 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj7cf4e6b2008-05-01 20:24:26 +00003376 switch (shadowTypeV(ty)) {
sewardj170ee212004-12-10 18:57:51 +00003377 case Ity_I8:
3378 case Ity_I16:
3379 case Ity_I32:
3380 case Ity_I64:
sewardj2e595852005-06-30 23:33:37 +00003381 return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
sewardj170ee212004-12-10 18:57:51 +00003382 case Ity_V128:
sewardj2e595852005-06-30 23:33:37 +00003383 if (end == Iend_LE) {
3384 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
3385 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3386 } else {
sewardj2e595852005-06-30 23:33:37 +00003387 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
3388 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3389 }
sewardj7cf4e6b2008-05-01 20:24:26 +00003390 return assignNew( 'V', mce,
sewardj170ee212004-12-10 18:57:51 +00003391 Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +00003392 binop(Iop_64HLtoV128, v64hi, v64lo));
sewardj170ee212004-12-10 18:57:51 +00003393 default:
sewardj2e595852005-06-30 23:33:37 +00003394 VG_(tool_panic)("expr2vbits_Load");
sewardj170ee212004-12-10 18:57:51 +00003395 }
3396}
3397
3398
3399static
sewardj95448072004-11-22 20:19:51 +00003400IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
3401 IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
3402{
3403 IRAtom *vbitsC, *vbits0, *vbitsX;
3404 IRType ty;
3405 /* Given Mux0X(cond,expr0,exprX), generate
3406 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
3407 That is, steer the V bits like the originals, but trash the
3408 result if the steering value is undefined. This gives
3409 lazy propagation. */
3410 tl_assert(isOriginalAtom(mce, cond));
3411 tl_assert(isOriginalAtom(mce, expr0));
3412 tl_assert(isOriginalAtom(mce, exprX));
3413
3414 vbitsC = expr2vbits(mce, cond);
3415 vbits0 = expr2vbits(mce, expr0);
3416 vbitsX = expr2vbits(mce, exprX);
sewardj1c0ce7a2009-07-01 08:10:49 +00003417 ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
sewardj95448072004-11-22 20:19:51 +00003418
3419 return
sewardj7cf4e6b2008-05-01 20:24:26 +00003420 mkUifU(mce, ty, assignNew('V', mce, ty,
3421 IRExpr_Mux0X(cond, vbits0, vbitsX)),
sewardj95448072004-11-22 20:19:51 +00003422 mkPCastTo(mce, ty, vbitsC) );
3423}
3424
3425/* --------- This is the main expression-handling function. --------- */
3426
3427static
3428IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
3429{
3430 switch (e->tag) {
3431
3432 case Iex_Get:
3433 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
3434
3435 case Iex_GetI:
3436 return shadow_GETI( mce, e->Iex.GetI.descr,
3437 e->Iex.GetI.ix, e->Iex.GetI.bias );
3438
sewardj0b9d74a2006-12-24 02:24:11 +00003439 case Iex_RdTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00003440 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
sewardj95448072004-11-22 20:19:51 +00003441
3442 case Iex_Const:
sewardj1c0ce7a2009-07-01 08:10:49 +00003443 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
sewardj95448072004-11-22 20:19:51 +00003444
sewardje91cea72006-02-08 19:32:02 +00003445 case Iex_Qop:
3446 return expr2vbits_Qop(
3447 mce,
3448 e->Iex.Qop.op,
3449 e->Iex.Qop.arg1, e->Iex.Qop.arg2,
3450 e->Iex.Qop.arg3, e->Iex.Qop.arg4
3451 );
3452
sewardjed69fdb2006-02-03 16:12:27 +00003453 case Iex_Triop:
3454 return expr2vbits_Triop(
3455 mce,
3456 e->Iex.Triop.op,
3457 e->Iex.Triop.arg1, e->Iex.Triop.arg2, e->Iex.Triop.arg3
3458 );
3459
sewardj95448072004-11-22 20:19:51 +00003460 case Iex_Binop:
3461 return expr2vbits_Binop(
3462 mce,
3463 e->Iex.Binop.op,
3464 e->Iex.Binop.arg1, e->Iex.Binop.arg2
3465 );
3466
3467 case Iex_Unop:
3468 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
3469
sewardj2e595852005-06-30 23:33:37 +00003470 case Iex_Load:
3471 return expr2vbits_Load( mce, e->Iex.Load.end,
3472 e->Iex.Load.ty,
3473 e->Iex.Load.addr, 0/*addr bias*/ );
sewardj95448072004-11-22 20:19:51 +00003474
3475 case Iex_CCall:
3476 return mkLazyN( mce, e->Iex.CCall.args,
3477 e->Iex.CCall.retty,
3478 e->Iex.CCall.cee );
3479
3480 case Iex_Mux0X:
3481 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
3482 e->Iex.Mux0X.exprX);
njn25e49d8e72002-09-23 09:36:25 +00003483
3484 default:
sewardj95448072004-11-22 20:19:51 +00003485 VG_(printf)("\n");
3486 ppIRExpr(e);
3487 VG_(printf)("\n");
3488 VG_(tool_panic)("memcheck: expr2vbits");
njn25e49d8e72002-09-23 09:36:25 +00003489 }
njn25e49d8e72002-09-23 09:36:25 +00003490}
3491
3492/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003493/*--- Generate shadow stmts from all kinds of IRStmts. ---*/
njn25e49d8e72002-09-23 09:36:25 +00003494/*------------------------------------------------------------*/
3495
sewardj95448072004-11-22 20:19:51 +00003496/* Widen a value to the host word size. */
njn25e49d8e72002-09-23 09:36:25 +00003497
3498static
sewardj95448072004-11-22 20:19:51 +00003499IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
njn25e49d8e72002-09-23 09:36:25 +00003500{
sewardj7cf97ee2004-11-28 14:25:01 +00003501 IRType ty, tyH;
3502
sewardj95448072004-11-22 20:19:51 +00003503 /* vatom is vbits-value and as such can only have a shadow type. */
3504 tl_assert(isShadowAtom(mce,vatom));
njn25e49d8e72002-09-23 09:36:25 +00003505
sewardj1c0ce7a2009-07-01 08:10:49 +00003506 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj7cf97ee2004-11-28 14:25:01 +00003507 tyH = mce->hWordTy;
njn25e49d8e72002-09-23 09:36:25 +00003508
sewardj95448072004-11-22 20:19:51 +00003509 if (tyH == Ity_I32) {
3510 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00003511 case Ity_I32:
3512 return vatom;
3513 case Ity_I16:
3514 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
3515 case Ity_I8:
3516 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
3517 default:
3518 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00003519 }
sewardj6cf40ff2005-04-20 22:31:26 +00003520 } else
3521 if (tyH == Ity_I64) {
3522 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00003523 case Ity_I32:
3524 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
3525 case Ity_I16:
3526 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
3527 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
3528 case Ity_I8:
3529 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
3530 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
3531 default:
3532 goto unhandled;
sewardj6cf40ff2005-04-20 22:31:26 +00003533 }
sewardj95448072004-11-22 20:19:51 +00003534 } else {
3535 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00003536 }
sewardj95448072004-11-22 20:19:51 +00003537 unhandled:
3538 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
3539 VG_(tool_panic)("zwidenToHostWord");
njn25e49d8e72002-09-23 09:36:25 +00003540}
3541
njn25e49d8e72002-09-23 09:36:25 +00003542
sewardj95448072004-11-22 20:19:51 +00003543/* Generate a shadow store. addr is always the original address atom.
3544 You can pass in either originals or V-bits for the data atom, but
sewardj1c0ce7a2009-07-01 08:10:49 +00003545 obviously not both. guard :: Ity_I1 controls whether the store
3546 really happens; NULL means it unconditionally does. Note that
3547 guard itself is not checked for definedness; the caller of this
3548 function must do that if necessary. */
njn25e49d8e72002-09-23 09:36:25 +00003549
sewardj95448072004-11-22 20:19:51 +00003550static
sewardj2e595852005-06-30 23:33:37 +00003551void do_shadow_Store ( MCEnv* mce,
3552 IREndness end,
3553 IRAtom* addr, UInt bias,
sewardj1c0ce7a2009-07-01 08:10:49 +00003554 IRAtom* data, IRAtom* vdata,
3555 IRAtom* guard )
njn25e49d8e72002-09-23 09:36:25 +00003556{
sewardj170ee212004-12-10 18:57:51 +00003557 IROp mkAdd;
3558 IRType ty, tyAddr;
sewardj95448072004-11-22 20:19:51 +00003559 void* helper = NULL;
3560 Char* hname = NULL;
njn1d0825f2006-03-27 11:37:07 +00003561 IRConst* c;
sewardj170ee212004-12-10 18:57:51 +00003562
3563 tyAddr = mce->hWordTy;
3564 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3565 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj2e595852005-06-30 23:33:37 +00003566 tl_assert( end == Iend_LE || end == Iend_BE );
sewardj170ee212004-12-10 18:57:51 +00003567
sewardj95448072004-11-22 20:19:51 +00003568 if (data) {
3569 tl_assert(!vdata);
3570 tl_assert(isOriginalAtom(mce, data));
3571 tl_assert(bias == 0);
3572 vdata = expr2vbits( mce, data );
3573 } else {
3574 tl_assert(vdata);
3575 }
njn25e49d8e72002-09-23 09:36:25 +00003576
sewardj95448072004-11-22 20:19:51 +00003577 tl_assert(isOriginalAtom(mce,addr));
3578 tl_assert(isShadowAtom(mce,vdata));
njn25e49d8e72002-09-23 09:36:25 +00003579
sewardj1c0ce7a2009-07-01 08:10:49 +00003580 if (guard) {
3581 tl_assert(isOriginalAtom(mce, guard));
3582 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
3583 }
3584
3585 ty = typeOfIRExpr(mce->sb->tyenv, vdata);
njn25e49d8e72002-09-23 09:36:25 +00003586
njn1d0825f2006-03-27 11:37:07 +00003587 // If we're not doing undefined value checking, pretend that this value
3588 // is "all valid". That lets Vex's optimiser remove some of the V bit
3589 // shadow computation ops that precede it.
sewardj7cf4e6b2008-05-01 20:24:26 +00003590 if (MC_(clo_mc_level) == 1) {
njn1d0825f2006-03-27 11:37:07 +00003591 switch (ty) {
sewardj1c0ce7a2009-07-01 08:10:49 +00003592 case Ity_V128: // V128 weirdness
3593 c = IRConst_V128(V_BITS16_DEFINED); break;
njn1d0825f2006-03-27 11:37:07 +00003594 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
3595 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
3596 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
3597 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
3598 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
3599 }
3600 vdata = IRExpr_Const( c );
3601 }
3602
sewardj95448072004-11-22 20:19:51 +00003603 /* First, emit a definedness test for the address. This also sets
3604 the address (shadow) to 'defined' following the test. */
3605 complainIfUndefined( mce, addr );
njn25e49d8e72002-09-23 09:36:25 +00003606
sewardj170ee212004-12-10 18:57:51 +00003607 /* Now decide which helper function to call to write the data V
3608 bits into shadow memory. */
sewardj2e595852005-06-30 23:33:37 +00003609 if (end == Iend_LE) {
3610 switch (ty) {
3611 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00003612 case Ity_I64: helper = &MC_(helperc_STOREV64le);
3613 hname = "MC_(helperc_STOREV64le)";
sewardj2e595852005-06-30 23:33:37 +00003614 break;
njn1d0825f2006-03-27 11:37:07 +00003615 case Ity_I32: helper = &MC_(helperc_STOREV32le);
3616 hname = "MC_(helperc_STOREV32le)";
sewardj2e595852005-06-30 23:33:37 +00003617 break;
njn1d0825f2006-03-27 11:37:07 +00003618 case Ity_I16: helper = &MC_(helperc_STOREV16le);
3619 hname = "MC_(helperc_STOREV16le)";
sewardj2e595852005-06-30 23:33:37 +00003620 break;
njn1d0825f2006-03-27 11:37:07 +00003621 case Ity_I8: helper = &MC_(helperc_STOREV8);
3622 hname = "MC_(helperc_STOREV8)";
sewardj2e595852005-06-30 23:33:37 +00003623 break;
3624 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
3625 }
3626 } else {
sewardj8cf88b72005-07-08 01:29:33 +00003627 switch (ty) {
3628 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00003629 case Ity_I64: helper = &MC_(helperc_STOREV64be);
3630 hname = "MC_(helperc_STOREV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00003631 break;
njn1d0825f2006-03-27 11:37:07 +00003632 case Ity_I32: helper = &MC_(helperc_STOREV32be);
3633 hname = "MC_(helperc_STOREV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00003634 break;
njn1d0825f2006-03-27 11:37:07 +00003635 case Ity_I16: helper = &MC_(helperc_STOREV16be);
3636 hname = "MC_(helperc_STOREV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00003637 break;
njn1d0825f2006-03-27 11:37:07 +00003638 case Ity_I8: helper = &MC_(helperc_STOREV8);
3639 hname = "MC_(helperc_STOREV8)";
sewardj8cf88b72005-07-08 01:29:33 +00003640 break;
3641 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
3642 }
sewardj95448072004-11-22 20:19:51 +00003643 }
njn25e49d8e72002-09-23 09:36:25 +00003644
sewardj170ee212004-12-10 18:57:51 +00003645 if (ty == Ity_V128) {
3646
sewardj20d38f22005-02-07 23:50:18 +00003647 /* V128-bit case */
sewardj170ee212004-12-10 18:57:51 +00003648 /* See comment in next clause re 64-bit regparms */
sewardj2e595852005-06-30 23:33:37 +00003649 /* also, need to be careful about endianness */
3650
njn4c245e52009-03-15 23:25:38 +00003651 Int offLo64, offHi64;
3652 IRDirty *diLo64, *diHi64;
3653 IRAtom *addrLo64, *addrHi64;
3654 IRAtom *vdataLo64, *vdataHi64;
3655 IRAtom *eBiasLo64, *eBiasHi64;
3656
sewardj2e595852005-06-30 23:33:37 +00003657 if (end == Iend_LE) {
3658 offLo64 = 0;
3659 offHi64 = 8;
3660 } else {
sewardj2e595852005-06-30 23:33:37 +00003661 offLo64 = 8;
3662 offHi64 = 0;
3663 }
3664
3665 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
sewardj7cf4e6b2008-05-01 20:24:26 +00003666 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
3667 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
sewardj170ee212004-12-10 18:57:51 +00003668 diLo64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003669 1/*regparms*/,
3670 hname, VG_(fnptr_to_fnentry)( helper ),
3671 mkIRExprVec_2( addrLo64, vdataLo64 )
3672 );
sewardj2e595852005-06-30 23:33:37 +00003673 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
sewardj7cf4e6b2008-05-01 20:24:26 +00003674 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
3675 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
sewardj170ee212004-12-10 18:57:51 +00003676 diHi64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003677 1/*regparms*/,
3678 hname, VG_(fnptr_to_fnentry)( helper ),
3679 mkIRExprVec_2( addrHi64, vdataHi64 )
3680 );
sewardj1c0ce7a2009-07-01 08:10:49 +00003681 if (guard) diLo64->guard = guard;
3682 if (guard) diHi64->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00003683 setHelperAnns( mce, diLo64 );
3684 setHelperAnns( mce, diHi64 );
sewardj7cf4e6b2008-05-01 20:24:26 +00003685 stmt( 'V', mce, IRStmt_Dirty(diLo64) );
3686 stmt( 'V', mce, IRStmt_Dirty(diHi64) );
sewardj170ee212004-12-10 18:57:51 +00003687
sewardj95448072004-11-22 20:19:51 +00003688 } else {
sewardj170ee212004-12-10 18:57:51 +00003689
njn4c245e52009-03-15 23:25:38 +00003690 IRDirty *di;
3691 IRAtom *addrAct;
3692
sewardj170ee212004-12-10 18:57:51 +00003693 /* 8/16/32/64-bit cases */
3694 /* Generate the actual address into addrAct. */
3695 if (bias == 0) {
3696 addrAct = addr;
3697 } else {
njn4c245e52009-03-15 23:25:38 +00003698 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00003699 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
sewardj170ee212004-12-10 18:57:51 +00003700 }
3701
3702 if (ty == Ity_I64) {
3703 /* We can't do this with regparm 2 on 32-bit platforms, since
3704 the back ends aren't clever enough to handle 64-bit
3705 regparm args. Therefore be different. */
3706 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003707 1/*regparms*/,
3708 hname, VG_(fnptr_to_fnentry)( helper ),
3709 mkIRExprVec_2( addrAct, vdata )
3710 );
sewardj170ee212004-12-10 18:57:51 +00003711 } else {
3712 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003713 2/*regparms*/,
3714 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj170ee212004-12-10 18:57:51 +00003715 mkIRExprVec_2( addrAct,
sewardj53ee1fc2005-12-23 02:29:58 +00003716 zwidenToHostWord( mce, vdata ))
3717 );
sewardj170ee212004-12-10 18:57:51 +00003718 }
sewardj1c0ce7a2009-07-01 08:10:49 +00003719 if (guard) di->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00003720 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00003721 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00003722 }
njn25e49d8e72002-09-23 09:36:25 +00003723
sewardj95448072004-11-22 20:19:51 +00003724}
njn25e49d8e72002-09-23 09:36:25 +00003725
njn25e49d8e72002-09-23 09:36:25 +00003726
sewardj95448072004-11-22 20:19:51 +00003727/* Do lazy pessimistic propagation through a dirty helper call, by
3728 looking at the annotations on it. This is the most complex part of
3729 Memcheck. */
njn25e49d8e72002-09-23 09:36:25 +00003730
sewardj95448072004-11-22 20:19:51 +00003731static IRType szToITy ( Int n )
3732{
3733 switch (n) {
3734 case 1: return Ity_I8;
3735 case 2: return Ity_I16;
3736 case 4: return Ity_I32;
3737 case 8: return Ity_I64;
3738 default: VG_(tool_panic)("szToITy(memcheck)");
3739 }
3740}
njn25e49d8e72002-09-23 09:36:25 +00003741
sewardj95448072004-11-22 20:19:51 +00003742static
3743void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
3744{
njn4c245e52009-03-15 23:25:38 +00003745 Int i, n, toDo, gSz, gOff;
sewardj2e595852005-06-30 23:33:37 +00003746 IRAtom *src, *here, *curr;
njn4c245e52009-03-15 23:25:38 +00003747 IRType tySrc, tyDst;
sewardj2e595852005-06-30 23:33:37 +00003748 IRTemp dst;
3749 IREndness end;
3750
3751 /* What's the native endianness? We need to know this. */
sewardj6e340c72005-07-10 00:53:42 +00003752# if defined(VG_BIGENDIAN)
sewardj2e595852005-06-30 23:33:37 +00003753 end = Iend_BE;
sewardj6e340c72005-07-10 00:53:42 +00003754# elif defined(VG_LITTLEENDIAN)
sewardj2e595852005-06-30 23:33:37 +00003755 end = Iend_LE;
3756# else
3757# error "Unknown endianness"
3758# endif
njn25e49d8e72002-09-23 09:36:25 +00003759
sewardj95448072004-11-22 20:19:51 +00003760 /* First check the guard. */
3761 complainIfUndefined(mce, d->guard);
3762
3763 /* Now round up all inputs and PCast over them. */
sewardj7cf97ee2004-11-28 14:25:01 +00003764 curr = definedOfType(Ity_I32);
sewardj95448072004-11-22 20:19:51 +00003765
3766 /* Inputs: unmasked args */
3767 for (i = 0; d->args[i]; i++) {
3768 if (d->cee->mcx_mask & (1<<i)) {
3769 /* ignore this arg */
njn25e49d8e72002-09-23 09:36:25 +00003770 } else {
sewardj95448072004-11-22 20:19:51 +00003771 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
3772 curr = mkUifU32(mce, here, curr);
njn25e49d8e72002-09-23 09:36:25 +00003773 }
3774 }
sewardj95448072004-11-22 20:19:51 +00003775
3776 /* Inputs: guest state that we read. */
3777 for (i = 0; i < d->nFxState; i++) {
3778 tl_assert(d->fxState[i].fx != Ifx_None);
3779 if (d->fxState[i].fx == Ifx_Write)
3780 continue;
sewardja7203252004-11-26 19:17:47 +00003781
3782 /* Ignore any sections marked as 'always defined'. */
3783 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
sewardje9e16d32004-12-10 13:17:55 +00003784 if (0)
sewardja7203252004-11-26 19:17:47 +00003785 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
3786 d->fxState[i].offset, d->fxState[i].size );
3787 continue;
3788 }
3789
sewardj95448072004-11-22 20:19:51 +00003790 /* This state element is read or modified. So we need to
sewardje9e16d32004-12-10 13:17:55 +00003791 consider it. If larger than 8 bytes, deal with it in 8-byte
3792 chunks. */
3793 gSz = d->fxState[i].size;
3794 gOff = d->fxState[i].offset;
3795 tl_assert(gSz > 0);
3796 while (True) {
3797 if (gSz == 0) break;
3798 n = gSz <= 8 ? gSz : 8;
3799 /* update 'curr' with UifU of the state slice
3800 gOff .. gOff+n-1 */
3801 tySrc = szToITy( n );
sewardj7cf4e6b2008-05-01 20:24:26 +00003802 src = assignNew( 'V', mce, tySrc,
3803 shadow_GET(mce, gOff, tySrc ) );
sewardje9e16d32004-12-10 13:17:55 +00003804 here = mkPCastTo( mce, Ity_I32, src );
3805 curr = mkUifU32(mce, here, curr);
3806 gSz -= n;
3807 gOff += n;
3808 }
3809
sewardj95448072004-11-22 20:19:51 +00003810 }
3811
3812 /* Inputs: memory. First set up some info needed regardless of
3813 whether we're doing reads or writes. */
sewardj95448072004-11-22 20:19:51 +00003814
3815 if (d->mFx != Ifx_None) {
3816 /* Because we may do multiple shadow loads/stores from the same
3817 base address, it's best to do a single test of its
3818 definedness right now. Post-instrumentation optimisation
3819 should remove all but this test. */
njn4c245e52009-03-15 23:25:38 +00003820 IRType tyAddr;
sewardj95448072004-11-22 20:19:51 +00003821 tl_assert(d->mAddr);
3822 complainIfUndefined(mce, d->mAddr);
3823
sewardj1c0ce7a2009-07-01 08:10:49 +00003824 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
sewardj95448072004-11-22 20:19:51 +00003825 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
3826 tl_assert(tyAddr == mce->hWordTy); /* not really right */
3827 }
3828
3829 /* Deal with memory inputs (reads or modifies) */
3830 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00003831 toDo = d->mSize;
sewardj2e595852005-06-30 23:33:37 +00003832 /* chew off 32-bit chunks. We don't care about the endianness
3833 since it's all going to be condensed down to a single bit,
3834 but nevertheless choose an endianness which is hopefully
3835 native to the platform. */
sewardj95448072004-11-22 20:19:51 +00003836 while (toDo >= 4) {
3837 here = mkPCastTo(
3838 mce, Ity_I32,
sewardj2e595852005-06-30 23:33:37 +00003839 expr2vbits_Load ( mce, end, Ity_I32,
sewardj95448072004-11-22 20:19:51 +00003840 d->mAddr, d->mSize - toDo )
3841 );
3842 curr = mkUifU32(mce, here, curr);
3843 toDo -= 4;
3844 }
3845 /* chew off 16-bit chunks */
3846 while (toDo >= 2) {
3847 here = mkPCastTo(
3848 mce, Ity_I32,
sewardj2e595852005-06-30 23:33:37 +00003849 expr2vbits_Load ( mce, end, Ity_I16,
sewardj95448072004-11-22 20:19:51 +00003850 d->mAddr, d->mSize - toDo )
3851 );
3852 curr = mkUifU32(mce, here, curr);
3853 toDo -= 2;
3854 }
3855 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
3856 }
3857
3858 /* Whew! So curr is a 32-bit V-value summarising pessimistically
3859 all the inputs to the helper. Now we need to re-distribute the
3860 results to all destinations. */
3861
3862 /* Outputs: the destination temporary, if there is one. */
3863 if (d->tmp != IRTemp_INVALID) {
sewardj7cf4e6b2008-05-01 20:24:26 +00003864 dst = findShadowTmpV(mce, d->tmp);
sewardj1c0ce7a2009-07-01 08:10:49 +00003865 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00003866 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
sewardj95448072004-11-22 20:19:51 +00003867 }
3868
3869 /* Outputs: guest state that we write or modify. */
3870 for (i = 0; i < d->nFxState; i++) {
3871 tl_assert(d->fxState[i].fx != Ifx_None);
3872 if (d->fxState[i].fx == Ifx_Read)
3873 continue;
sewardja7203252004-11-26 19:17:47 +00003874 /* Ignore any sections marked as 'always defined'. */
3875 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
3876 continue;
sewardje9e16d32004-12-10 13:17:55 +00003877 /* This state element is written or modified. So we need to
3878 consider it. If larger than 8 bytes, deal with it in 8-byte
3879 chunks. */
3880 gSz = d->fxState[i].size;
3881 gOff = d->fxState[i].offset;
3882 tl_assert(gSz > 0);
3883 while (True) {
3884 if (gSz == 0) break;
3885 n = gSz <= 8 ? gSz : 8;
3886 /* Write suitably-casted 'curr' to the state slice
3887 gOff .. gOff+n-1 */
3888 tyDst = szToITy( n );
3889 do_shadow_PUT( mce, gOff,
3890 NULL, /* original atom */
3891 mkPCastTo( mce, tyDst, curr ) );
3892 gSz -= n;
3893 gOff += n;
3894 }
sewardj95448072004-11-22 20:19:51 +00003895 }
3896
sewardj2e595852005-06-30 23:33:37 +00003897 /* Outputs: memory that we write or modify. Same comments about
3898 endianness as above apply. */
sewardj95448072004-11-22 20:19:51 +00003899 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00003900 toDo = d->mSize;
3901 /* chew off 32-bit chunks */
3902 while (toDo >= 4) {
sewardj2e595852005-06-30 23:33:37 +00003903 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
3904 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00003905 mkPCastTo( mce, Ity_I32, curr ),
3906 NULL/*guard*/ );
sewardj95448072004-11-22 20:19:51 +00003907 toDo -= 4;
3908 }
3909 /* chew off 16-bit chunks */
3910 while (toDo >= 2) {
sewardj2e595852005-06-30 23:33:37 +00003911 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
3912 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00003913 mkPCastTo( mce, Ity_I16, curr ),
3914 NULL/*guard*/ );
sewardj95448072004-11-22 20:19:51 +00003915 toDo -= 2;
3916 }
3917 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
3918 }
3919
njn25e49d8e72002-09-23 09:36:25 +00003920}
3921
sewardj1c0ce7a2009-07-01 08:10:49 +00003922
sewardj826ec492005-05-12 18:05:00 +00003923/* We have an ABI hint telling us that [base .. base+len-1] is to
3924 become undefined ("writable"). Generate code to call a helper to
3925 notify the A/V bit machinery of this fact.
3926
3927 We call
sewardj7cf4e6b2008-05-01 20:24:26 +00003928 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
3929 Addr nia );
sewardj826ec492005-05-12 18:05:00 +00003930*/
3931static
sewardj7cf4e6b2008-05-01 20:24:26 +00003932void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
sewardj826ec492005-05-12 18:05:00 +00003933{
3934 IRDirty* di;
sewardj7cf4e6b2008-05-01 20:24:26 +00003935 /* Minor optimisation: if not doing origin tracking, ignore the
3936 supplied nia and pass zero instead. This is on the basis that
3937 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
3938 almost always generate a shorter instruction to put zero into a
3939 register than any other value. */
3940 if (MC_(clo_mc_level) < 3)
3941 nia = mkIRExpr_HWord(0);
3942
sewardj826ec492005-05-12 18:05:00 +00003943 di = unsafeIRDirty_0_N(
3944 0/*regparms*/,
3945 "MC_(helperc_MAKE_STACK_UNINIT)",
sewardj53ee1fc2005-12-23 02:29:58 +00003946 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
sewardj7cf4e6b2008-05-01 20:24:26 +00003947 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
sewardj826ec492005-05-12 18:05:00 +00003948 );
sewardj7cf4e6b2008-05-01 20:24:26 +00003949 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj826ec492005-05-12 18:05:00 +00003950}
3951
njn25e49d8e72002-09-23 09:36:25 +00003952
sewardj1c0ce7a2009-07-01 08:10:49 +00003953/* ------ Dealing with IRCAS (big and complex) ------ */
3954
3955/* FWDS */
3956static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
3957 IRAtom* baseaddr, Int offset );
3958static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
3959static void gen_store_b ( MCEnv* mce, Int szB,
3960 IRAtom* baseaddr, Int offset, IRAtom* dataB,
3961 IRAtom* guard );
3962
3963static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
3964static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
3965
3966
3967/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
3968 IRExpr.Consts, else this asserts. If they are both Consts, it
3969 doesn't do anything. So that just leaves the RdTmp case.
3970
3971 In which case: this assigns the shadow value SHADOW to the IR
3972 shadow temporary associated with ORIG. That is, ORIG, being an
3973 original temporary, will have a shadow temporary associated with
3974 it. However, in the case envisaged here, there will so far have
3975 been no IR emitted to actually write a shadow value into that
3976 temporary. What this routine does is to (emit IR to) copy the
3977 value in SHADOW into said temporary, so that after this call,
3978 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
3979 value in SHADOW.
3980
3981 Point is to allow callers to compute "by hand" a shadow value for
3982 ORIG, and force it to be associated with ORIG.
3983
3984 How do we know that that shadow associated with ORIG has not so far
3985 been assigned to? Well, we don't per se know that, but supposing
3986 it had. Then this routine would create a second assignment to it,
3987 and later the IR sanity checker would barf. But that never
3988 happens. QED.
3989*/
3990static void bind_shadow_tmp_to_orig ( UChar how,
3991 MCEnv* mce,
3992 IRAtom* orig, IRAtom* shadow )
3993{
3994 tl_assert(isOriginalAtom(mce, orig));
3995 tl_assert(isShadowAtom(mce, shadow));
3996 switch (orig->tag) {
3997 case Iex_Const:
3998 tl_assert(shadow->tag == Iex_Const);
3999 break;
4000 case Iex_RdTmp:
4001 tl_assert(shadow->tag == Iex_RdTmp);
4002 if (how == 'V') {
4003 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
4004 shadow);
4005 } else {
4006 tl_assert(how == 'B');
4007 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
4008 shadow);
4009 }
4010 break;
4011 default:
4012 tl_assert(0);
4013 }
4014}
4015
4016
4017static
4018void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
4019{
4020 /* Scheme is (both single- and double- cases):
4021
4022 1. fetch data#,dataB (the proposed new value)
4023
4024 2. fetch expd#,expdB (what we expect to see at the address)
4025
4026 3. check definedness of address
4027
4028 4. load old#,oldB from shadow memory; this also checks
4029 addressibility of the address
4030
4031 5. the CAS itself
4032
sewardjafed4c52009-07-12 13:00:17 +00004033 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
sewardj1c0ce7a2009-07-01 08:10:49 +00004034
sewardjafed4c52009-07-12 13:00:17 +00004035 7. if "expected == old" (as computed by (6))
sewardj1c0ce7a2009-07-01 08:10:49 +00004036 store data#,dataB to shadow memory
4037
4038 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
4039 'data' but 7 stores 'data#'. Hence it is possible for the
4040 shadow data to be incorrectly checked and/or updated:
4041
sewardj1c0ce7a2009-07-01 08:10:49 +00004042 * 7 is at least gated correctly, since the 'expected == old'
4043 condition is derived from outputs of 5. However, the shadow
4044 write could happen too late: imagine after 5 we are
4045 descheduled, a different thread runs, writes a different
4046 (shadow) value at the address, and then we resume, hence
4047 overwriting the shadow value written by the other thread.
4048
4049 Because the original memory access is atomic, there's no way to
4050 make both the original and shadow accesses into a single atomic
4051 thing, hence this is unavoidable.
4052
4053 At least as Valgrind stands, I don't think it's a problem, since
4054 we're single threaded *and* we guarantee that there are no
4055 context switches during the execution of any specific superblock
4056 -- context switches can only happen at superblock boundaries.
4057
4058 If Valgrind ever becomes MT in the future, then it might be more
4059 of a problem. A possible kludge would be to artificially
4060 associate with the location, a lock, which we must acquire and
4061 release around the transaction as a whole. Hmm, that probably
4062 would't work properly since it only guards us against other
4063 threads doing CASs on the same location, not against other
4064 threads doing normal reads and writes.
sewardjafed4c52009-07-12 13:00:17 +00004065
4066 ------------------------------------------------------------
4067
4068 COMMENT_ON_CasCmpEQ:
4069
4070 Note two things. Firstly, in the sequence above, we compute
4071 "expected == old", but we don't check definedness of it. Why
4072 not? Also, the x86 and amd64 front ends use
4073 Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent
4074 determination (expected == old ?) for themselves, and we also
4075 don't check definedness for those primops; we just say that the
4076 result is defined. Why? Details follow.
4077
4078 x86/amd64 contains various forms of locked insns:
4079 * lock prefix before all basic arithmetic insn;
4080 eg lock xorl %reg1,(%reg2)
4081 * atomic exchange reg-mem
4082 * compare-and-swaps
4083
4084 Rather than attempt to represent them all, which would be a
4085 royal PITA, I used a result from Maurice Herlihy
4086 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
4087 demonstrates that compare-and-swap is a primitive more general
4088 than the other two, and so can be used to represent all of them.
4089 So the translation scheme for (eg) lock incl (%reg) is as
4090 follows:
4091
4092 again:
4093 old = * %reg
4094 new = old + 1
4095 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
4096
4097 The "atomically" is the CAS bit. The scheme is always the same:
4098 get old value from memory, compute new value, atomically stuff
4099 new value back in memory iff the old value has not changed (iow,
4100 no other thread modified it in the meantime). If it has changed
4101 then we've been out-raced and we have to start over.
4102
4103 Now that's all very neat, but it has the bad side effect of
4104 introducing an explicit equality test into the translation.
4105 Consider the behaviour of said code on a memory location which
4106 is uninitialised. We will wind up doing a comparison on
4107 uninitialised data, and mc duly complains.
4108
4109 What's difficult about this is, the common case is that the
4110 location is uncontended, and so we're usually comparing the same
4111 value (* %reg) with itself. So we shouldn't complain even if it
4112 is undefined. But mc doesn't know that.
4113
4114 My solution is to mark the == in the IR specially, so as to tell
4115 mc that it almost certainly compares a value with itself, and we
4116 should just regard the result as always defined. Rather than
4117 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
4118 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
4119
4120 So there's always the question of, can this give a false
4121 negative? eg, imagine that initially, * %reg is defined; and we
4122 read that; but then in the gap between the read and the CAS, a
4123 different thread writes an undefined (and different) value at
4124 the location. Then the CAS in this thread will fail and we will
4125 go back to "again:", but without knowing that the trip back
4126 there was based on an undefined comparison. No matter; at least
4127 the other thread won the race and the location is correctly
4128 marked as undefined. What if it wrote an uninitialised version
4129 of the same value that was there originally, though?
4130
4131 etc etc. Seems like there's a small corner case in which we
4132 might lose the fact that something's defined -- we're out-raced
4133 in between the "old = * reg" and the "atomically {", _and_ the
4134 other thread is writing in an undefined version of what's
4135 already there. Well, that seems pretty unlikely.
4136
4137 ---
4138
4139 If we ever need to reinstate it .. code which generates a
4140 definedness test for "expected == old" was removed at r10432 of
4141 this file.
sewardj1c0ce7a2009-07-01 08:10:49 +00004142 */
4143 if (cas->oldHi == IRTemp_INVALID) {
4144 do_shadow_CAS_single( mce, cas );
4145 } else {
4146 do_shadow_CAS_double( mce, cas );
4147 }
4148}
4149
4150
4151static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
4152{
4153 IRAtom *vdataLo = NULL, *bdataLo = NULL;
4154 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4155 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00004156 IRAtom *expd_eq_old = NULL;
4157 IROp opCasCmpEQ;
sewardj1c0ce7a2009-07-01 08:10:49 +00004158 Int elemSzB;
4159 IRType elemTy;
4160 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4161
4162 /* single CAS */
4163 tl_assert(cas->oldHi == IRTemp_INVALID);
4164 tl_assert(cas->expdHi == NULL);
4165 tl_assert(cas->dataHi == NULL);
4166
4167 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4168 switch (elemTy) {
sewardjafed4c52009-07-12 13:00:17 +00004169 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break;
4170 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
4171 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
4172 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
sewardj1c0ce7a2009-07-01 08:10:49 +00004173 default: tl_assert(0); /* IR defn disallows any other types */
4174 }
4175
4176 /* 1. fetch data# (the proposed new value) */
4177 tl_assert(isOriginalAtom(mce, cas->dataLo));
4178 vdataLo
4179 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4180 tl_assert(isShadowAtom(mce, vdataLo));
4181 if (otrak) {
4182 bdataLo
4183 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4184 tl_assert(isShadowAtom(mce, bdataLo));
4185 }
4186
4187 /* 2. fetch expected# (what we expect to see at the address) */
4188 tl_assert(isOriginalAtom(mce, cas->expdLo));
4189 vexpdLo
4190 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
4191 tl_assert(isShadowAtom(mce, vexpdLo));
4192 if (otrak) {
4193 bexpdLo
4194 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
4195 tl_assert(isShadowAtom(mce, bexpdLo));
4196 }
4197
4198 /* 3. check definedness of address */
4199 /* 4. fetch old# from shadow memory; this also checks
4200 addressibility of the address */
4201 voldLo
4202 = assignNew(
4203 'V', mce, elemTy,
4204 expr2vbits_Load(
4205 mce,
4206 cas->end, elemTy, cas->addr, 0/*Addr bias*/
4207 ));
sewardjafed4c52009-07-12 13:00:17 +00004208 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00004209 if (otrak) {
4210 boldLo
4211 = assignNew('B', mce, Ity_I32,
4212 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00004213 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00004214 }
4215
4216 /* 5. the CAS itself */
4217 stmt( 'C', mce, IRStmt_CAS(cas) );
4218
sewardjafed4c52009-07-12 13:00:17 +00004219 /* 6. compute "expected == old" */
4220 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004221 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
4222 tree, but it's not copied from the input block. */
4223 expd_eq_old
4224 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00004225 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00004226
4227 /* 7. if "expected == old"
4228 store data# to shadow memory */
4229 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
4230 NULL/*data*/, vdataLo/*vdata*/,
4231 expd_eq_old/*guard for store*/ );
4232 if (otrak) {
4233 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
4234 bdataLo/*bdata*/,
4235 expd_eq_old/*guard for store*/ );
4236 }
4237}
4238
4239
4240static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
4241{
4242 IRAtom *vdataHi = NULL, *bdataHi = NULL;
4243 IRAtom *vdataLo = NULL, *bdataLo = NULL;
4244 IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
4245 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4246 IRAtom *voldHi = NULL, *boldHi = NULL;
4247 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00004248 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
4249 IRAtom *expd_eq_old = NULL, *zero = NULL;
4250 IROp opCasCmpEQ, opOr, opXor;
sewardj1c0ce7a2009-07-01 08:10:49 +00004251 Int elemSzB, memOffsLo, memOffsHi;
4252 IRType elemTy;
4253 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4254
4255 /* double CAS */
4256 tl_assert(cas->oldHi != IRTemp_INVALID);
4257 tl_assert(cas->expdHi != NULL);
4258 tl_assert(cas->dataHi != NULL);
4259
4260 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4261 switch (elemTy) {
4262 case Ity_I8:
sewardjafed4c52009-07-12 13:00:17 +00004263 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
sewardj1c0ce7a2009-07-01 08:10:49 +00004264 elemSzB = 1; zero = mkU8(0);
4265 break;
4266 case Ity_I16:
sewardjafed4c52009-07-12 13:00:17 +00004267 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
sewardj1c0ce7a2009-07-01 08:10:49 +00004268 elemSzB = 2; zero = mkU16(0);
4269 break;
4270 case Ity_I32:
sewardjafed4c52009-07-12 13:00:17 +00004271 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
sewardj1c0ce7a2009-07-01 08:10:49 +00004272 elemSzB = 4; zero = mkU32(0);
4273 break;
4274 case Ity_I64:
sewardjafed4c52009-07-12 13:00:17 +00004275 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
sewardj1c0ce7a2009-07-01 08:10:49 +00004276 elemSzB = 8; zero = mkU64(0);
4277 break;
4278 default:
4279 tl_assert(0); /* IR defn disallows any other types */
4280 }
4281
4282 /* 1. fetch data# (the proposed new value) */
4283 tl_assert(isOriginalAtom(mce, cas->dataHi));
4284 tl_assert(isOriginalAtom(mce, cas->dataLo));
4285 vdataHi
4286 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
4287 vdataLo
4288 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4289 tl_assert(isShadowAtom(mce, vdataHi));
4290 tl_assert(isShadowAtom(mce, vdataLo));
4291 if (otrak) {
4292 bdataHi
4293 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
4294 bdataLo
4295 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4296 tl_assert(isShadowAtom(mce, bdataHi));
4297 tl_assert(isShadowAtom(mce, bdataLo));
4298 }
4299
4300 /* 2. fetch expected# (what we expect to see at the address) */
4301 tl_assert(isOriginalAtom(mce, cas->expdHi));
4302 tl_assert(isOriginalAtom(mce, cas->expdLo));
4303 vexpdHi
4304 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
4305 vexpdLo
4306 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
4307 tl_assert(isShadowAtom(mce, vexpdHi));
4308 tl_assert(isShadowAtom(mce, vexpdLo));
4309 if (otrak) {
4310 bexpdHi
4311 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
4312 bexpdLo
4313 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
4314 tl_assert(isShadowAtom(mce, bexpdHi));
4315 tl_assert(isShadowAtom(mce, bexpdLo));
4316 }
4317
4318 /* 3. check definedness of address */
4319 /* 4. fetch old# from shadow memory; this also checks
4320 addressibility of the address */
4321 if (cas->end == Iend_LE) {
4322 memOffsLo = 0;
4323 memOffsHi = elemSzB;
4324 } else {
4325 tl_assert(cas->end == Iend_BE);
4326 memOffsLo = elemSzB;
4327 memOffsHi = 0;
4328 }
4329 voldHi
4330 = assignNew(
4331 'V', mce, elemTy,
4332 expr2vbits_Load(
4333 mce,
4334 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/
4335 ));
4336 voldLo
4337 = assignNew(
4338 'V', mce, elemTy,
4339 expr2vbits_Load(
4340 mce,
4341 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/
4342 ));
sewardjafed4c52009-07-12 13:00:17 +00004343 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
4344 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00004345 if (otrak) {
4346 boldHi
4347 = assignNew('B', mce, Ity_I32,
4348 gen_load_b(mce, elemSzB, cas->addr,
4349 memOffsHi/*addr bias*/));
4350 boldLo
4351 = assignNew('B', mce, Ity_I32,
4352 gen_load_b(mce, elemSzB, cas->addr,
4353 memOffsLo/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00004354 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
4355 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00004356 }
4357
4358 /* 5. the CAS itself */
4359 stmt( 'C', mce, IRStmt_CAS(cas) );
4360
sewardjafed4c52009-07-12 13:00:17 +00004361 /* 6. compute "expected == old" */
4362 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004363 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
4364 tree, but it's not copied from the input block. */
4365 /*
4366 xHi = oldHi ^ expdHi;
4367 xLo = oldLo ^ expdLo;
4368 xHL = xHi | xLo;
4369 expd_eq_old = xHL == 0;
4370 */
sewardj1c0ce7a2009-07-01 08:10:49 +00004371 xHi = assignNew('C', mce, elemTy,
4372 binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
sewardj1c0ce7a2009-07-01 08:10:49 +00004373 xLo = assignNew('C', mce, elemTy,
4374 binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00004375 xHL = assignNew('C', mce, elemTy,
4376 binop(opOr, xHi, xLo));
sewardj1c0ce7a2009-07-01 08:10:49 +00004377 expd_eq_old
4378 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00004379 binop(opCasCmpEQ, xHL, zero));
sewardj1c0ce7a2009-07-01 08:10:49 +00004380
4381 /* 7. if "expected == old"
4382 store data# to shadow memory */
4383 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
4384 NULL/*data*/, vdataHi/*vdata*/,
4385 expd_eq_old/*guard for store*/ );
4386 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
4387 NULL/*data*/, vdataLo/*vdata*/,
4388 expd_eq_old/*guard for store*/ );
4389 if (otrak) {
4390 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
4391 bdataHi/*bdata*/,
4392 expd_eq_old/*guard for store*/ );
4393 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
4394 bdataLo/*bdata*/,
4395 expd_eq_old/*guard for store*/ );
4396 }
4397}
4398
4399
sewardjdb5907d2009-11-26 17:20:21 +00004400/* ------ Dealing with LL/SC (not difficult) ------ */
4401
4402static void do_shadow_LLSC ( MCEnv* mce,
4403 IREndness stEnd,
4404 IRTemp stResult,
4405 IRExpr* stAddr,
4406 IRExpr* stStoredata )
4407{
4408 /* In short: treat a load-linked like a normal load followed by an
4409 assignment of the loaded (shadow) data to the result temporary.
4410 Treat a store-conditional like a normal store, and mark the
4411 result temporary as defined. */
4412 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult);
4413 IRTemp resTmp = findShadowTmpV(mce, stResult);
4414
4415 tl_assert(isIRAtom(stAddr));
4416 if (stStoredata)
4417 tl_assert(isIRAtom(stStoredata));
4418
4419 if (stStoredata == NULL) {
4420 /* Load Linked */
4421 /* Just treat this as a normal load, followed by an assignment of
4422 the value to .result. */
4423 /* Stay sane */
4424 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
4425 || resTy == Ity_I16 || resTy == Ity_I8);
4426 assign( 'V', mce, resTmp,
4427 expr2vbits_Load(
4428 mce, stEnd, resTy, stAddr, 0/*addr bias*/));
4429 } else {
4430 /* Store Conditional */
4431 /* Stay sane */
4432 IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
4433 stStoredata);
4434 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
4435 || dataTy == Ity_I16 || dataTy == Ity_I8);
4436 do_shadow_Store( mce, stEnd,
4437 stAddr, 0/* addr bias */,
4438 stStoredata,
4439 NULL /* shadow data */,
4440 NULL/*guard*/ );
4441 /* This is a store conditional, so it writes to .result a value
4442 indicating whether or not the store succeeded. Just claim
4443 this value is always defined. In the PowerPC interpretation
4444 of store-conditional, definedness of the success indication
4445 depends on whether the address of the store matches the
4446 reservation address. But we can't tell that here (and
4447 anyway, we're not being PowerPC-specific). At least we are
4448 guaranteed that the definedness of the store address, and its
4449 addressibility, will be checked as per normal. So it seems
4450 pretty safe to just say that the success indication is always
4451 defined.
4452
4453 In schemeS, for origin tracking, we must correspondingly set
4454 a no-origin value for the origin shadow of .result.
4455 */
4456 tl_assert(resTy == Ity_I1);
4457 assign( 'V', mce, resTmp, definedOfType(resTy) );
4458 }
4459}
4460
4461
sewardj95448072004-11-22 20:19:51 +00004462/*------------------------------------------------------------*/
4463/*--- Memcheck main ---*/
4464/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00004465
sewardj7cf4e6b2008-05-01 20:24:26 +00004466static void schemeS ( MCEnv* mce, IRStmt* st );
4467
sewardj95448072004-11-22 20:19:51 +00004468static Bool isBogusAtom ( IRAtom* at )
njn25e49d8e72002-09-23 09:36:25 +00004469{
sewardj95448072004-11-22 20:19:51 +00004470 ULong n = 0;
4471 IRConst* con;
sewardj710d6c22005-03-20 18:55:15 +00004472 tl_assert(isIRAtom(at));
sewardj0b9d74a2006-12-24 02:24:11 +00004473 if (at->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +00004474 return False;
4475 tl_assert(at->tag == Iex_Const);
4476 con = at->Iex.Const.con;
4477 switch (con->tag) {
sewardjd5204dc2004-12-31 01:16:11 +00004478 case Ico_U1: return False;
4479 case Ico_U8: n = (ULong)con->Ico.U8; break;
4480 case Ico_U16: n = (ULong)con->Ico.U16; break;
4481 case Ico_U32: n = (ULong)con->Ico.U32; break;
4482 case Ico_U64: n = (ULong)con->Ico.U64; break;
4483 case Ico_F64: return False;
4484 case Ico_F64i: return False;
4485 case Ico_V128: return False;
sewardj95448072004-11-22 20:19:51 +00004486 default: ppIRExpr(at); tl_assert(0);
4487 }
4488 /* VG_(printf)("%llx\n", n); */
sewardj96a922e2005-04-23 23:26:29 +00004489 return (/*32*/ n == 0xFEFEFEFFULL
4490 /*32*/ || n == 0x80808080ULL
sewardj17b47432008-12-17 01:12:58 +00004491 /*32*/ || n == 0x7F7F7F7FULL
tomd9774d72005-06-27 08:11:01 +00004492 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
sewardj96a922e2005-04-23 23:26:29 +00004493 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
tomd9774d72005-06-27 08:11:01 +00004494 /*64*/ || n == 0x0000000000008080ULL
sewardj96a922e2005-04-23 23:26:29 +00004495 /*64*/ || n == 0x8080808080808080ULL
sewardj17b47432008-12-17 01:12:58 +00004496 /*64*/ || n == 0x0101010101010101ULL
sewardj96a922e2005-04-23 23:26:29 +00004497 );
sewardj95448072004-11-22 20:19:51 +00004498}
njn25e49d8e72002-09-23 09:36:25 +00004499
sewardj95448072004-11-22 20:19:51 +00004500static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
4501{
sewardjd5204dc2004-12-31 01:16:11 +00004502 Int i;
4503 IRExpr* e;
4504 IRDirty* d;
sewardj1c0ce7a2009-07-01 08:10:49 +00004505 IRCAS* cas;
sewardj95448072004-11-22 20:19:51 +00004506 switch (st->tag) {
sewardj0b9d74a2006-12-24 02:24:11 +00004507 case Ist_WrTmp:
4508 e = st->Ist.WrTmp.data;
sewardj95448072004-11-22 20:19:51 +00004509 switch (e->tag) {
4510 case Iex_Get:
sewardj0b9d74a2006-12-24 02:24:11 +00004511 case Iex_RdTmp:
sewardj95448072004-11-22 20:19:51 +00004512 return False;
sewardjd5204dc2004-12-31 01:16:11 +00004513 case Iex_Const:
4514 return isBogusAtom(e);
sewardj95448072004-11-22 20:19:51 +00004515 case Iex_Unop:
4516 return isBogusAtom(e->Iex.Unop.arg);
sewardjd5204dc2004-12-31 01:16:11 +00004517 case Iex_GetI:
4518 return isBogusAtom(e->Iex.GetI.ix);
sewardj95448072004-11-22 20:19:51 +00004519 case Iex_Binop:
4520 return isBogusAtom(e->Iex.Binop.arg1)
4521 || isBogusAtom(e->Iex.Binop.arg2);
sewardjed69fdb2006-02-03 16:12:27 +00004522 case Iex_Triop:
4523 return isBogusAtom(e->Iex.Triop.arg1)
4524 || isBogusAtom(e->Iex.Triop.arg2)
4525 || isBogusAtom(e->Iex.Triop.arg3);
sewardje91cea72006-02-08 19:32:02 +00004526 case Iex_Qop:
4527 return isBogusAtom(e->Iex.Qop.arg1)
4528 || isBogusAtom(e->Iex.Qop.arg2)
4529 || isBogusAtom(e->Iex.Qop.arg3)
4530 || isBogusAtom(e->Iex.Qop.arg4);
sewardj95448072004-11-22 20:19:51 +00004531 case Iex_Mux0X:
4532 return isBogusAtom(e->Iex.Mux0X.cond)
4533 || isBogusAtom(e->Iex.Mux0X.expr0)
4534 || isBogusAtom(e->Iex.Mux0X.exprX);
sewardj2e595852005-06-30 23:33:37 +00004535 case Iex_Load:
4536 return isBogusAtom(e->Iex.Load.addr);
sewardj95448072004-11-22 20:19:51 +00004537 case Iex_CCall:
4538 for (i = 0; e->Iex.CCall.args[i]; i++)
4539 if (isBogusAtom(e->Iex.CCall.args[i]))
4540 return True;
4541 return False;
4542 default:
4543 goto unhandled;
4544 }
sewardjd5204dc2004-12-31 01:16:11 +00004545 case Ist_Dirty:
4546 d = st->Ist.Dirty.details;
4547 for (i = 0; d->args[i]; i++)
4548 if (isBogusAtom(d->args[i]))
4549 return True;
4550 if (d->guard && isBogusAtom(d->guard))
4551 return True;
4552 if (d->mAddr && isBogusAtom(d->mAddr))
4553 return True;
4554 return False;
sewardj95448072004-11-22 20:19:51 +00004555 case Ist_Put:
4556 return isBogusAtom(st->Ist.Put.data);
sewardjd5204dc2004-12-31 01:16:11 +00004557 case Ist_PutI:
4558 return isBogusAtom(st->Ist.PutI.ix)
4559 || isBogusAtom(st->Ist.PutI.data);
sewardj2e595852005-06-30 23:33:37 +00004560 case Ist_Store:
4561 return isBogusAtom(st->Ist.Store.addr)
4562 || isBogusAtom(st->Ist.Store.data);
sewardj95448072004-11-22 20:19:51 +00004563 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00004564 return isBogusAtom(st->Ist.Exit.guard);
sewardj826ec492005-05-12 18:05:00 +00004565 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00004566 return isBogusAtom(st->Ist.AbiHint.base)
4567 || isBogusAtom(st->Ist.AbiHint.nia);
sewardj21dc3452005-03-21 00:27:41 +00004568 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00004569 case Ist_IMark:
sewardj72d75132007-11-09 23:06:35 +00004570 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00004571 return False;
sewardj1c0ce7a2009-07-01 08:10:49 +00004572 case Ist_CAS:
4573 cas = st->Ist.CAS.details;
4574 return isBogusAtom(cas->addr)
4575 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
4576 || isBogusAtom(cas->expdLo)
4577 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
4578 || isBogusAtom(cas->dataLo);
sewardjdb5907d2009-11-26 17:20:21 +00004579 case Ist_LLSC:
4580 return isBogusAtom(st->Ist.LLSC.addr)
4581 || (st->Ist.LLSC.storedata
4582 ? isBogusAtom(st->Ist.LLSC.storedata)
4583 : False);
sewardj95448072004-11-22 20:19:51 +00004584 default:
4585 unhandled:
4586 ppIRStmt(st);
4587 VG_(tool_panic)("hasBogusLiterals");
4588 }
4589}
njn25e49d8e72002-09-23 09:36:25 +00004590
njn25e49d8e72002-09-23 09:36:25 +00004591
sewardj0b9d74a2006-12-24 02:24:11 +00004592IRSB* MC_(instrument) ( VgCallbackClosure* closure,
sewardj1c0ce7a2009-07-01 08:10:49 +00004593 IRSB* sb_in,
sewardj461df9c2006-01-17 02:06:39 +00004594 VexGuestLayout* layout,
4595 VexGuestExtents* vge,
sewardjd54babf2005-03-21 00:55:49 +00004596 IRType gWordTy, IRType hWordTy )
sewardj95448072004-11-22 20:19:51 +00004597{
sewardj7cf4e6b2008-05-01 20:24:26 +00004598 Bool verboze = 0||False;
sewardj151b90d2005-07-06 19:42:23 +00004599 Bool bogus;
sewardjd5204dc2004-12-31 01:16:11 +00004600 Int i, j, first_stmt;
sewardj95448072004-11-22 20:19:51 +00004601 IRStmt* st;
sewardjd5204dc2004-12-31 01:16:11 +00004602 MCEnv mce;
sewardj1c0ce7a2009-07-01 08:10:49 +00004603 IRSB* sb_out;
sewardjd54babf2005-03-21 00:55:49 +00004604
4605 if (gWordTy != hWordTy) {
4606 /* We don't currently support this case. */
4607 VG_(tool_panic)("host/guest word size mismatch");
4608 }
njn25e49d8e72002-09-23 09:36:25 +00004609
sewardj6cf40ff2005-04-20 22:31:26 +00004610 /* Check we're not completely nuts */
sewardj7cf4e6b2008-05-01 20:24:26 +00004611 tl_assert(sizeof(UWord) == sizeof(void*));
4612 tl_assert(sizeof(Word) == sizeof(void*));
4613 tl_assert(sizeof(Addr) == sizeof(void*));
4614 tl_assert(sizeof(ULong) == 8);
4615 tl_assert(sizeof(Long) == 8);
4616 tl_assert(sizeof(Addr64) == 8);
4617 tl_assert(sizeof(UInt) == 4);
4618 tl_assert(sizeof(Int) == 4);
4619
4620 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
sewardj6cf40ff2005-04-20 22:31:26 +00004621
sewardj0b9d74a2006-12-24 02:24:11 +00004622 /* Set up SB */
sewardj1c0ce7a2009-07-01 08:10:49 +00004623 sb_out = deepCopyIRSBExceptStmts(sb_in);
njn25e49d8e72002-09-23 09:36:25 +00004624
sewardj1c0ce7a2009-07-01 08:10:49 +00004625 /* Set up the running environment. Both .sb and .tmpMap are
4626 modified as we go along. Note that tmps are added to both
4627 .sb->tyenv and .tmpMap together, so the valid index-set for
4628 those two arrays should always be identical. */
4629 VG_(memset)(&mce, 0, sizeof(mce));
4630 mce.sb = sb_out;
sewardj7cf4e6b2008-05-01 20:24:26 +00004631 mce.trace = verboze;
sewardj95448072004-11-22 20:19:51 +00004632 mce.layout = layout;
sewardj95448072004-11-22 20:19:51 +00004633 mce.hWordTy = hWordTy;
sewardjd5204dc2004-12-31 01:16:11 +00004634 mce.bogusLiterals = False;
sewardj1c0ce7a2009-07-01 08:10:49 +00004635
4636 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
4637 sizeof(TempMapEnt));
4638 for (i = 0; i < sb_in->tyenv->types_used; i++) {
4639 TempMapEnt ent;
4640 ent.kind = Orig;
4641 ent.shadowV = IRTemp_INVALID;
4642 ent.shadowB = IRTemp_INVALID;
4643 VG_(addToXA)( mce.tmpMap, &ent );
sewardj7cf4e6b2008-05-01 20:24:26 +00004644 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004645 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
sewardj95448072004-11-22 20:19:51 +00004646
sewardj151b90d2005-07-06 19:42:23 +00004647 /* Make a preliminary inspection of the statements, to see if there
4648 are any dodgy-looking literals. If there are, we generate
4649 extra-detailed (hence extra-expensive) instrumentation in
4650 places. Scan the whole bb even if dodgyness is found earlier,
4651 so that the flatness assertion is applied to all stmts. */
4652
4653 bogus = False;
sewardj95448072004-11-22 20:19:51 +00004654
sewardj1c0ce7a2009-07-01 08:10:49 +00004655 for (i = 0; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00004656
sewardj1c0ce7a2009-07-01 08:10:49 +00004657 st = sb_in->stmts[i];
sewardj21dc3452005-03-21 00:27:41 +00004658 tl_assert(st);
sewardj95448072004-11-22 20:19:51 +00004659 tl_assert(isFlatIRStmt(st));
4660
sewardj151b90d2005-07-06 19:42:23 +00004661 if (!bogus) {
4662 bogus = checkForBogusLiterals(st);
4663 if (0 && bogus) {
sewardj95448072004-11-22 20:19:51 +00004664 VG_(printf)("bogus: ");
4665 ppIRStmt(st);
4666 VG_(printf)("\n");
4667 }
4668 }
sewardjd5204dc2004-12-31 01:16:11 +00004669
sewardj151b90d2005-07-06 19:42:23 +00004670 }
4671
4672 mce.bogusLiterals = bogus;
4673
sewardja0871482006-10-18 12:41:55 +00004674 /* Copy verbatim any IR preamble preceding the first IMark */
sewardj151b90d2005-07-06 19:42:23 +00004675
sewardj1c0ce7a2009-07-01 08:10:49 +00004676 tl_assert(mce.sb == sb_out);
4677 tl_assert(mce.sb != sb_in);
sewardjf1962d32006-10-19 13:22:16 +00004678
sewardja0871482006-10-18 12:41:55 +00004679 i = 0;
sewardj1c0ce7a2009-07-01 08:10:49 +00004680 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
sewardja0871482006-10-18 12:41:55 +00004681
sewardj1c0ce7a2009-07-01 08:10:49 +00004682 st = sb_in->stmts[i];
sewardja0871482006-10-18 12:41:55 +00004683 tl_assert(st);
4684 tl_assert(isFlatIRStmt(st));
4685
sewardj1c0ce7a2009-07-01 08:10:49 +00004686 stmt( 'C', &mce, sb_in->stmts[i] );
sewardja0871482006-10-18 12:41:55 +00004687 i++;
4688 }
4689
sewardjf1962d32006-10-19 13:22:16 +00004690 /* Nasty problem. IR optimisation of the pre-instrumented IR may
4691 cause the IR following the preamble to contain references to IR
4692 temporaries defined in the preamble. Because the preamble isn't
4693 instrumented, these temporaries don't have any shadows.
4694 Nevertheless uses of them following the preamble will cause
4695 memcheck to generate references to their shadows. End effect is
4696 to cause IR sanity check failures, due to references to
4697 non-existent shadows. This is only evident for the complex
4698 preambles used for function wrapping on TOC-afflicted platforms
4699 (ppc64-linux, ppc32-aix5, ppc64-aix5).
4700
4701 The following loop therefore scans the preamble looking for
4702 assignments to temporaries. For each one found it creates an
sewardjafa617b2008-07-22 09:59:48 +00004703 assignment to the corresponding (V) shadow temp, marking it as
sewardjf1962d32006-10-19 13:22:16 +00004704 'defined'. This is the same resulting IR as if the main
4705 instrumentation loop before had been applied to the statement
4706 'tmp = CONSTANT'.
sewardjafa617b2008-07-22 09:59:48 +00004707
4708 Similarly, if origin tracking is enabled, we must generate an
4709 assignment for the corresponding origin (B) shadow, claiming
4710 no-origin, as appropriate for a defined value.
sewardjf1962d32006-10-19 13:22:16 +00004711 */
4712 for (j = 0; j < i; j++) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004713 if (sb_in->stmts[j]->tag == Ist_WrTmp) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004714 /* findShadowTmpV checks its arg is an original tmp;
sewardjf1962d32006-10-19 13:22:16 +00004715 no need to assert that here. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004716 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
sewardjafa617b2008-07-22 09:59:48 +00004717 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00004718 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
sewardjafa617b2008-07-22 09:59:48 +00004719 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
4720 if (MC_(clo_mc_level) == 3) {
4721 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00004722 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
sewardjafa617b2008-07-22 09:59:48 +00004723 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
4724 }
sewardjf1962d32006-10-19 13:22:16 +00004725 if (0) {
sewardjafa617b2008-07-22 09:59:48 +00004726 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
4727 ppIRType( ty_v );
sewardjf1962d32006-10-19 13:22:16 +00004728 VG_(printf)("\n");
4729 }
4730 }
4731 }
4732
sewardja0871482006-10-18 12:41:55 +00004733 /* Iterate over the remaining stmts to generate instrumentation. */
4734
sewardj1c0ce7a2009-07-01 08:10:49 +00004735 tl_assert(sb_in->stmts_used > 0);
sewardja0871482006-10-18 12:41:55 +00004736 tl_assert(i >= 0);
sewardj1c0ce7a2009-07-01 08:10:49 +00004737 tl_assert(i < sb_in->stmts_used);
4738 tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
sewardja0871482006-10-18 12:41:55 +00004739
sewardj1c0ce7a2009-07-01 08:10:49 +00004740 for (/* use current i*/; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00004741
sewardj1c0ce7a2009-07-01 08:10:49 +00004742 st = sb_in->stmts[i];
4743 first_stmt = sb_out->stmts_used;
sewardj95448072004-11-22 20:19:51 +00004744
4745 if (verboze) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004746 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00004747 ppIRStmt(st);
sewardj7cf4e6b2008-05-01 20:24:26 +00004748 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00004749 }
4750
sewardj1c0ce7a2009-07-01 08:10:49 +00004751 if (MC_(clo_mc_level) == 3) {
4752 /* See comments on case Ist_CAS below. */
4753 if (st->tag != Ist_CAS)
4754 schemeS( &mce, st );
4755 }
sewardj7cf4e6b2008-05-01 20:24:26 +00004756
sewardj29faa502005-03-16 18:20:21 +00004757 /* Generate instrumentation code for each stmt ... */
4758
sewardj95448072004-11-22 20:19:51 +00004759 switch (st->tag) {
4760
sewardj0b9d74a2006-12-24 02:24:11 +00004761 case Ist_WrTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00004762 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
4763 expr2vbits( &mce, st->Ist.WrTmp.data) );
njn25e49d8e72002-09-23 09:36:25 +00004764 break;
4765
sewardj95448072004-11-22 20:19:51 +00004766 case Ist_Put:
4767 do_shadow_PUT( &mce,
4768 st->Ist.Put.offset,
4769 st->Ist.Put.data,
4770 NULL /* shadow atom */ );
njn25e49d8e72002-09-23 09:36:25 +00004771 break;
4772
sewardj95448072004-11-22 20:19:51 +00004773 case Ist_PutI:
4774 do_shadow_PUTI( &mce,
4775 st->Ist.PutI.descr,
4776 st->Ist.PutI.ix,
4777 st->Ist.PutI.bias,
4778 st->Ist.PutI.data );
njn25e49d8e72002-09-23 09:36:25 +00004779 break;
4780
sewardj2e595852005-06-30 23:33:37 +00004781 case Ist_Store:
4782 do_shadow_Store( &mce, st->Ist.Store.end,
4783 st->Ist.Store.addr, 0/* addr bias */,
4784 st->Ist.Store.data,
sewardj1c0ce7a2009-07-01 08:10:49 +00004785 NULL /* shadow data */,
4786 NULL/*guard*/ );
njn25e49d8e72002-09-23 09:36:25 +00004787 break;
4788
sewardj95448072004-11-22 20:19:51 +00004789 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00004790 complainIfUndefined( &mce, st->Ist.Exit.guard );
njn25e49d8e72002-09-23 09:36:25 +00004791 break;
4792
sewardj29faa502005-03-16 18:20:21 +00004793 case Ist_IMark:
sewardj7cf4e6b2008-05-01 20:24:26 +00004794 break;
4795
4796 case Ist_NoOp:
sewardj72d75132007-11-09 23:06:35 +00004797 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00004798 break;
4799
sewardj95448072004-11-22 20:19:51 +00004800 case Ist_Dirty:
4801 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
njn25e49d8e72002-09-23 09:36:25 +00004802 break;
4803
sewardj826ec492005-05-12 18:05:00 +00004804 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00004805 do_AbiHint( &mce, st->Ist.AbiHint.base,
4806 st->Ist.AbiHint.len,
4807 st->Ist.AbiHint.nia );
sewardj826ec492005-05-12 18:05:00 +00004808 break;
4809
sewardj1c0ce7a2009-07-01 08:10:49 +00004810 case Ist_CAS:
4811 do_shadow_CAS( &mce, st->Ist.CAS.details );
4812 /* Note, do_shadow_CAS copies the CAS itself to the output
4813 block, because it needs to add instrumentation both
4814 before and after it. Hence skip the copy below. Also
4815 skip the origin-tracking stuff (call to schemeS) above,
4816 since that's all tangled up with it too; do_shadow_CAS
4817 does it all. */
4818 break;
4819
sewardjdb5907d2009-11-26 17:20:21 +00004820 case Ist_LLSC:
4821 do_shadow_LLSC( &mce,
4822 st->Ist.LLSC.end,
4823 st->Ist.LLSC.result,
4824 st->Ist.LLSC.addr,
4825 st->Ist.LLSC.storedata );
4826 break;
4827
njn25e49d8e72002-09-23 09:36:25 +00004828 default:
sewardj95448072004-11-22 20:19:51 +00004829 VG_(printf)("\n");
4830 ppIRStmt(st);
4831 VG_(printf)("\n");
4832 VG_(tool_panic)("memcheck: unhandled IRStmt");
4833
4834 } /* switch (st->tag) */
4835
sewardj7cf4e6b2008-05-01 20:24:26 +00004836 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004837 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00004838 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00004839 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00004840 VG_(printf)("\n");
4841 }
4842 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00004843 }
sewardj95448072004-11-22 20:19:51 +00004844
sewardj1c0ce7a2009-07-01 08:10:49 +00004845 /* ... and finally copy the stmt itself to the output. Except,
4846 skip the copy of IRCASs; see comments on case Ist_CAS
4847 above. */
4848 if (st->tag != Ist_CAS)
4849 stmt('C', &mce, st);
njn25e49d8e72002-09-23 09:36:25 +00004850 }
njn25e49d8e72002-09-23 09:36:25 +00004851
sewardj95448072004-11-22 20:19:51 +00004852 /* Now we need to complain if the jump target is undefined. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004853 first_stmt = sb_out->stmts_used;
njn25e49d8e72002-09-23 09:36:25 +00004854
sewardj95448072004-11-22 20:19:51 +00004855 if (verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004856 VG_(printf)("sb_in->next = ");
4857 ppIRExpr(sb_in->next);
sewardj95448072004-11-22 20:19:51 +00004858 VG_(printf)("\n\n");
4859 }
njn25e49d8e72002-09-23 09:36:25 +00004860
sewardj1c0ce7a2009-07-01 08:10:49 +00004861 complainIfUndefined( &mce, sb_in->next );
njn25e49d8e72002-09-23 09:36:25 +00004862
sewardj7cf4e6b2008-05-01 20:24:26 +00004863 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004864 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00004865 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00004866 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00004867 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00004868 }
sewardj95448072004-11-22 20:19:51 +00004869 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00004870 }
njn25e49d8e72002-09-23 09:36:25 +00004871
sewardj1c0ce7a2009-07-01 08:10:49 +00004872 /* If this fails, there's been some serious snafu with tmp management,
4873 that should be investigated. */
4874 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
4875 VG_(deleteXA)( mce.tmpMap );
4876
4877 tl_assert(mce.sb == sb_out);
4878 return sb_out;
sewardj95448072004-11-22 20:19:51 +00004879}
njn25e49d8e72002-09-23 09:36:25 +00004880
sewardj81651dc2007-08-28 06:05:20 +00004881/*------------------------------------------------------------*/
4882/*--- Post-tree-build final tidying ---*/
4883/*------------------------------------------------------------*/
4884
4885/* This exploits the observation that Memcheck often produces
4886 repeated conditional calls of the form
4887
sewardj7cf4e6b2008-05-01 20:24:26 +00004888 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
sewardj81651dc2007-08-28 06:05:20 +00004889
4890 with the same guard expression G guarding the same helper call.
4891 The second and subsequent calls are redundant. This usually
4892 results from instrumentation of guest code containing multiple
4893 memory references at different constant offsets from the same base
4894 register. After optimisation of the instrumentation, you get a
4895 test for the definedness of the base register for each memory
4896 reference, which is kinda pointless. MC_(final_tidy) therefore
4897 looks for such repeated calls and removes all but the first. */
4898
4899/* A struct for recording which (helper, guard) pairs we have already
4900 seen. */
4901typedef
4902 struct { void* entry; IRExpr* guard; }
4903 Pair;
4904
4905/* Return True if e1 and e2 definitely denote the same value (used to
4906 compare guards). Return False if unknown; False is the safe
4907 answer. Since guest registers and guest memory do not have the
4908 SSA property we must return False if any Gets or Loads appear in
4909 the expression. */
4910
4911static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
4912{
4913 if (e1->tag != e2->tag)
4914 return False;
4915 switch (e1->tag) {
4916 case Iex_Const:
4917 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
4918 case Iex_Binop:
4919 return e1->Iex.Binop.op == e2->Iex.Binop.op
4920 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
4921 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
4922 case Iex_Unop:
4923 return e1->Iex.Unop.op == e2->Iex.Unop.op
4924 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
4925 case Iex_RdTmp:
4926 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
4927 case Iex_Mux0X:
4928 return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
4929 && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
4930 && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX );
4931 case Iex_Qop:
4932 case Iex_Triop:
4933 case Iex_CCall:
4934 /* be lazy. Could define equality for these, but they never
4935 appear to be used. */
4936 return False;
4937 case Iex_Get:
4938 case Iex_GetI:
4939 case Iex_Load:
4940 /* be conservative - these may not give the same value each
4941 time */
4942 return False;
4943 case Iex_Binder:
4944 /* should never see this */
4945 /* fallthrough */
4946 default:
4947 VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
4948 ppIRExpr(e1);
4949 VG_(tool_panic)("memcheck:sameIRValue");
4950 return False;
4951 }
4952}
4953
4954/* See if 'pairs' already has an entry for (entry, guard). Return
4955 True if so. If not, add an entry. */
4956
4957static
4958Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
4959{
4960 Pair p;
4961 Pair* pp;
4962 Int i, n = VG_(sizeXA)( pairs );
4963 for (i = 0; i < n; i++) {
4964 pp = VG_(indexXA)( pairs, i );
4965 if (pp->entry == entry && sameIRValue(pp->guard, guard))
4966 return True;
4967 }
4968 p.guard = guard;
4969 p.entry = entry;
4970 VG_(addToXA)( pairs, &p );
4971 return False;
4972}
4973
4974static Bool is_helperc_value_checkN_fail ( HChar* name )
4975{
4976 return
sewardj7cf4e6b2008-05-01 20:24:26 +00004977 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
4978 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
4979 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
4980 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
4981 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
4982 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
4983 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
4984 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
sewardj81651dc2007-08-28 06:05:20 +00004985}
4986
4987IRSB* MC_(final_tidy) ( IRSB* sb_in )
4988{
4989 Int i;
4990 IRStmt* st;
4991 IRDirty* di;
4992 IRExpr* guard;
4993 IRCallee* cee;
4994 Bool alreadyPresent;
sewardj9c606bd2008-09-18 18:12:50 +00004995 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
4996 VG_(free), sizeof(Pair) );
sewardj81651dc2007-08-28 06:05:20 +00004997 /* Scan forwards through the statements. Each time a call to one
4998 of the relevant helpers is seen, check if we have made a
4999 previous call to the same helper using the same guard
5000 expression, and if so, delete the call. */
5001 for (i = 0; i < sb_in->stmts_used; i++) {
5002 st = sb_in->stmts[i];
5003 tl_assert(st);
5004 if (st->tag != Ist_Dirty)
5005 continue;
5006 di = st->Ist.Dirty.details;
5007 guard = di->guard;
5008 if (!guard)
5009 continue;
5010 if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
5011 cee = di->cee;
5012 if (!is_helperc_value_checkN_fail( cee->name ))
5013 continue;
5014 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
5015 guard 'guard'. Check if we have already seen a call to this
5016 function with the same guard. If so, delete it. If not,
5017 add it to the set of calls we do know about. */
5018 alreadyPresent = check_or_add( pairs, guard, cee->addr );
5019 if (alreadyPresent) {
5020 sb_in->stmts[i] = IRStmt_NoOp();
5021 if (0) VG_(printf)("XX\n");
5022 }
5023 }
5024 VG_(deleteXA)( pairs );
5025 return sb_in;
5026}
5027
5028
sewardj7cf4e6b2008-05-01 20:24:26 +00005029/*------------------------------------------------------------*/
5030/*--- Origin tracking stuff ---*/
5031/*------------------------------------------------------------*/
5032
sewardj1c0ce7a2009-07-01 08:10:49 +00005033/* Almost identical to findShadowTmpV. */
sewardj7cf4e6b2008-05-01 20:24:26 +00005034static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
5035{
sewardj1c0ce7a2009-07-01 08:10:49 +00005036 TempMapEnt* ent;
5037 /* VG_(indexXA) range-checks 'orig', hence no need to check
5038 here. */
5039 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5040 tl_assert(ent->kind == Orig);
5041 if (ent->shadowB == IRTemp_INVALID) {
5042 IRTemp tmpB
5043 = newTemp( mce, Ity_I32, BSh );
5044 /* newTemp may cause mce->tmpMap to resize, hence previous results
5045 from VG_(indexXA) are invalid. */
5046 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5047 tl_assert(ent->kind == Orig);
5048 tl_assert(ent->shadowB == IRTemp_INVALID);
5049 ent->shadowB = tmpB;
sewardj7cf4e6b2008-05-01 20:24:26 +00005050 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005051 return ent->shadowB;
sewardj7cf4e6b2008-05-01 20:24:26 +00005052}
5053
5054static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
5055{
5056 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
5057}
5058
5059static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5060 IRAtom* baseaddr, Int offset )
5061{
5062 void* hFun;
5063 HChar* hName;
5064 IRTemp bTmp;
5065 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00005066 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00005067 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5068 IRAtom* ea = baseaddr;
5069 if (offset != 0) {
5070 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5071 : mkU64( (Long)(Int)offset );
5072 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5073 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005074 bTmp = newTemp(mce, mce->hWordTy, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00005075
5076 switch (szB) {
5077 case 1: hFun = (void*)&MC_(helperc_b_load1);
5078 hName = "MC_(helperc_b_load1)";
5079 break;
5080 case 2: hFun = (void*)&MC_(helperc_b_load2);
5081 hName = "MC_(helperc_b_load2)";
5082 break;
5083 case 4: hFun = (void*)&MC_(helperc_b_load4);
5084 hName = "MC_(helperc_b_load4)";
5085 break;
5086 case 8: hFun = (void*)&MC_(helperc_b_load8);
5087 hName = "MC_(helperc_b_load8)";
5088 break;
5089 case 16: hFun = (void*)&MC_(helperc_b_load16);
5090 hName = "MC_(helperc_b_load16)";
5091 break;
5092 default:
5093 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
5094 tl_assert(0);
5095 }
5096 di = unsafeIRDirty_1_N(
5097 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
5098 mkIRExprVec_1( ea )
5099 );
5100 /* no need to mess with any annotations. This call accesses
5101 neither guest state nor guest memory. */
5102 stmt( 'B', mce, IRStmt_Dirty(di) );
5103 if (mce->hWordTy == Ity_I64) {
5104 /* 64-bit host */
sewardj1c0ce7a2009-07-01 08:10:49 +00005105 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00005106 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
5107 return mkexpr(bTmp32);
5108 } else {
5109 /* 32-bit host */
5110 return mkexpr(bTmp);
5111 }
5112}
sewardj1c0ce7a2009-07-01 08:10:49 +00005113
5114/* Generate a shadow store. guard :: Ity_I1 controls whether the
5115 store really happens; NULL means it unconditionally does. */
sewardj7cf4e6b2008-05-01 20:24:26 +00005116static void gen_store_b ( MCEnv* mce, Int szB,
sewardj1c0ce7a2009-07-01 08:10:49 +00005117 IRAtom* baseaddr, Int offset, IRAtom* dataB,
5118 IRAtom* guard )
sewardj7cf4e6b2008-05-01 20:24:26 +00005119{
5120 void* hFun;
5121 HChar* hName;
5122 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00005123 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00005124 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5125 IRAtom* ea = baseaddr;
sewardj1c0ce7a2009-07-01 08:10:49 +00005126 if (guard) {
5127 tl_assert(isOriginalAtom(mce, guard));
5128 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
5129 }
sewardj7cf4e6b2008-05-01 20:24:26 +00005130 if (offset != 0) {
5131 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5132 : mkU64( (Long)(Int)offset );
5133 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5134 }
5135 if (mce->hWordTy == Ity_I64)
5136 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
5137
5138 switch (szB) {
5139 case 1: hFun = (void*)&MC_(helperc_b_store1);
5140 hName = "MC_(helperc_b_store1)";
5141 break;
5142 case 2: hFun = (void*)&MC_(helperc_b_store2);
5143 hName = "MC_(helperc_b_store2)";
5144 break;
5145 case 4: hFun = (void*)&MC_(helperc_b_store4);
5146 hName = "MC_(helperc_b_store4)";
5147 break;
5148 case 8: hFun = (void*)&MC_(helperc_b_store8);
5149 hName = "MC_(helperc_b_store8)";
5150 break;
5151 case 16: hFun = (void*)&MC_(helperc_b_store16);
5152 hName = "MC_(helperc_b_store16)";
5153 break;
5154 default:
5155 tl_assert(0);
5156 }
5157 di = unsafeIRDirty_0_N( 2/*regparms*/,
5158 hName, VG_(fnptr_to_fnentry)( hFun ),
5159 mkIRExprVec_2( ea, dataB )
5160 );
5161 /* no need to mess with any annotations. This call accesses
5162 neither guest state nor guest memory. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005163 if (guard) di->guard = guard;
sewardj7cf4e6b2008-05-01 20:24:26 +00005164 stmt( 'B', mce, IRStmt_Dirty(di) );
5165}
5166
5167static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005168 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00005169 if (eTy == Ity_I64)
5170 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
5171 if (eTy == Ity_I32)
5172 return e;
5173 tl_assert(0);
5174}
5175
5176static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005177 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00005178 tl_assert(eTy == Ity_I32);
5179 if (dstTy == Ity_I64)
5180 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
5181 tl_assert(0);
5182}
5183
sewardjdb5907d2009-11-26 17:20:21 +00005184
sewardj7cf4e6b2008-05-01 20:24:26 +00005185static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
5186{
5187 tl_assert(MC_(clo_mc_level) == 3);
5188
5189 switch (e->tag) {
5190
5191 case Iex_GetI: {
5192 IRRegArray* descr_b;
5193 IRAtom *t1, *t2, *t3, *t4;
5194 IRRegArray* descr = e->Iex.GetI.descr;
5195 IRType equivIntTy
5196 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
5197 /* If this array is unshadowable for whatever reason, use the
5198 usual approximation. */
5199 if (equivIntTy == Ity_INVALID)
5200 return mkU32(0);
5201 tl_assert(sizeofIRType(equivIntTy) >= 4);
5202 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
5203 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
5204 equivIntTy, descr->nElems );
5205 /* Do a shadow indexed get of the same size, giving t1. Take
5206 the bottom 32 bits of it, giving t2. Compute into t3 the
5207 origin for the index (almost certainly zero, but there's
5208 no harm in being completely general here, since iropt will
5209 remove any useless code), and fold it in, giving a final
5210 value t4. */
5211 t1 = assignNew( 'B', mce, equivIntTy,
5212 IRExpr_GetI( descr_b, e->Iex.GetI.ix,
5213 e->Iex.GetI.bias ));
5214 t2 = narrowTo32( mce, t1 );
5215 t3 = schemeE( mce, e->Iex.GetI.ix );
5216 t4 = gen_maxU32( mce, t2, t3 );
5217 return t4;
5218 }
5219 case Iex_CCall: {
5220 Int i;
5221 IRAtom* here;
5222 IRExpr** args = e->Iex.CCall.args;
5223 IRAtom* curr = mkU32(0);
5224 for (i = 0; args[i]; i++) {
5225 tl_assert(i < 32);
5226 tl_assert(isOriginalAtom(mce, args[i]));
5227 /* Only take notice of this arg if the callee's
5228 mc-exclusion mask does not say it is to be excluded. */
5229 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
5230 /* the arg is to be excluded from definedness checking.
5231 Do nothing. */
5232 if (0) VG_(printf)("excluding %s(%d)\n",
5233 e->Iex.CCall.cee->name, i);
5234 } else {
5235 /* calculate the arg's definedness, and pessimistically
5236 merge it in. */
5237 here = schemeE( mce, args[i] );
5238 curr = gen_maxU32( mce, curr, here );
5239 }
5240 }
5241 return curr;
5242 }
5243 case Iex_Load: {
5244 Int dszB;
5245 dszB = sizeofIRType(e->Iex.Load.ty);
5246 /* assert that the B value for the address is already
5247 available (somewhere) */
5248 tl_assert(isIRAtom(e->Iex.Load.addr));
5249 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
5250 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
5251 }
5252 case Iex_Mux0X: {
5253 IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond );
5254 IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 );
5255 IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX );
5256 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
5257 }
5258 case Iex_Qop: {
5259 IRAtom* b1 = schemeE( mce, e->Iex.Qop.arg1 );
5260 IRAtom* b2 = schemeE( mce, e->Iex.Qop.arg2 );
5261 IRAtom* b3 = schemeE( mce, e->Iex.Qop.arg3 );
5262 IRAtom* b4 = schemeE( mce, e->Iex.Qop.arg4 );
5263 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
5264 gen_maxU32( mce, b3, b4 ) );
5265 }
5266 case Iex_Triop: {
5267 IRAtom* b1 = schemeE( mce, e->Iex.Triop.arg1 );
5268 IRAtom* b2 = schemeE( mce, e->Iex.Triop.arg2 );
5269 IRAtom* b3 = schemeE( mce, e->Iex.Triop.arg3 );
5270 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
5271 }
5272 case Iex_Binop: {
sewardjafed4c52009-07-12 13:00:17 +00005273 switch (e->Iex.Binop.op) {
5274 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
5275 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
5276 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
5277 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
5278 /* Just say these all produce a defined result,
5279 regardless of their arguments. See
5280 COMMENT_ON_CasCmpEQ in this file. */
5281 return mkU32(0);
5282 default: {
5283 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
5284 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
5285 return gen_maxU32( mce, b1, b2 );
5286 }
5287 }
5288 tl_assert(0);
5289 /*NOTREACHED*/
sewardj7cf4e6b2008-05-01 20:24:26 +00005290 }
5291 case Iex_Unop: {
5292 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
5293 return b1;
5294 }
5295 case Iex_Const:
5296 return mkU32(0);
5297 case Iex_RdTmp:
5298 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
5299 case Iex_Get: {
5300 Int b_offset = MC_(get_otrack_shadow_offset)(
5301 e->Iex.Get.offset,
5302 sizeofIRType(e->Iex.Get.ty)
5303 );
5304 tl_assert(b_offset >= -1
5305 && b_offset <= mce->layout->total_sizeB -4);
5306 if (b_offset >= 0) {
5307 /* FIXME: this isn't an atom! */
5308 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
5309 Ity_I32 );
5310 }
5311 return mkU32(0);
5312 }
5313 default:
5314 VG_(printf)("mc_translate.c: schemeE: unhandled: ");
5315 ppIRExpr(e);
5316 VG_(tool_panic)("memcheck:schemeE");
5317 }
5318}
5319
sewardjdb5907d2009-11-26 17:20:21 +00005320
sewardj7cf4e6b2008-05-01 20:24:26 +00005321static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
5322{
5323 // This is a hacked version of do_shadow_Dirty
njn4c245e52009-03-15 23:25:38 +00005324 Int i, n, toDo, gSz, gOff;
sewardj7cf4e6b2008-05-01 20:24:26 +00005325 IRAtom *here, *curr;
5326 IRTemp dst;
sewardj7cf4e6b2008-05-01 20:24:26 +00005327
5328 /* First check the guard. */
5329 curr = schemeE( mce, d->guard );
5330
5331 /* Now round up all inputs and maxU32 over them. */
5332
5333 /* Inputs: unmasked args */
5334 for (i = 0; d->args[i]; i++) {
5335 if (d->cee->mcx_mask & (1<<i)) {
5336 /* ignore this arg */
5337 } else {
5338 here = schemeE( mce, d->args[i] );
5339 curr = gen_maxU32( mce, curr, here );
5340 }
5341 }
5342
5343 /* Inputs: guest state that we read. */
5344 for (i = 0; i < d->nFxState; i++) {
5345 tl_assert(d->fxState[i].fx != Ifx_None);
5346 if (d->fxState[i].fx == Ifx_Write)
5347 continue;
5348
5349 /* Ignore any sections marked as 'always defined'. */
5350 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
5351 if (0)
5352 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
5353 d->fxState[i].offset, d->fxState[i].size );
5354 continue;
5355 }
5356
5357 /* This state element is read or modified. So we need to
5358 consider it. If larger than 4 bytes, deal with it in 4-byte
5359 chunks. */
5360 gSz = d->fxState[i].size;
5361 gOff = d->fxState[i].offset;
5362 tl_assert(gSz > 0);
5363 while (True) {
5364 Int b_offset;
5365 if (gSz == 0) break;
5366 n = gSz <= 4 ? gSz : 4;
5367 /* update 'curr' with maxU32 of the state slice
5368 gOff .. gOff+n-1 */
5369 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
5370 if (b_offset != -1) {
5371 here = assignNew( 'B',mce,
5372 Ity_I32,
5373 IRExpr_Get(b_offset + 2*mce->layout->total_sizeB,
5374 Ity_I32));
5375 curr = gen_maxU32( mce, curr, here );
5376 }
5377 gSz -= n;
5378 gOff += n;
5379 }
5380
5381 }
5382
5383 /* Inputs: memory */
5384
5385 if (d->mFx != Ifx_None) {
5386 /* Because we may do multiple shadow loads/stores from the same
5387 base address, it's best to do a single test of its
5388 definedness right now. Post-instrumentation optimisation
5389 should remove all but this test. */
5390 tl_assert(d->mAddr);
5391 here = schemeE( mce, d->mAddr );
5392 curr = gen_maxU32( mce, curr, here );
5393 }
5394
5395 /* Deal with memory inputs (reads or modifies) */
5396 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00005397 toDo = d->mSize;
5398 /* chew off 32-bit chunks. We don't care about the endianness
5399 since it's all going to be condensed down to a single bit,
5400 but nevertheless choose an endianness which is hopefully
5401 native to the platform. */
5402 while (toDo >= 4) {
5403 here = gen_load_b( mce, 4, d->mAddr, d->mSize - toDo );
5404 curr = gen_maxU32( mce, curr, here );
5405 toDo -= 4;
5406 }
sewardj8c93fcc2008-10-30 13:08:31 +00005407 /* handle possible 16-bit excess */
5408 while (toDo >= 2) {
5409 here = gen_load_b( mce, 2, d->mAddr, d->mSize - toDo );
5410 curr = gen_maxU32( mce, curr, here );
5411 toDo -= 2;
5412 }
5413 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
sewardj7cf4e6b2008-05-01 20:24:26 +00005414 }
5415
5416 /* Whew! So curr is a 32-bit B-value which should give an origin
5417 of some use if any of the inputs to the helper are undefined.
5418 Now we need to re-distribute the results to all destinations. */
5419
5420 /* Outputs: the destination temporary, if there is one. */
5421 if (d->tmp != IRTemp_INVALID) {
5422 dst = findShadowTmpB(mce, d->tmp);
5423 assign( 'V', mce, dst, curr );
5424 }
5425
5426 /* Outputs: guest state that we write or modify. */
5427 for (i = 0; i < d->nFxState; i++) {
5428 tl_assert(d->fxState[i].fx != Ifx_None);
5429 if (d->fxState[i].fx == Ifx_Read)
5430 continue;
5431
5432 /* Ignore any sections marked as 'always defined'. */
5433 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
5434 continue;
5435
5436 /* This state element is written or modified. So we need to
5437 consider it. If larger than 4 bytes, deal with it in 4-byte
5438 chunks. */
5439 gSz = d->fxState[i].size;
5440 gOff = d->fxState[i].offset;
5441 tl_assert(gSz > 0);
5442 while (True) {
5443 Int b_offset;
5444 if (gSz == 0) break;
5445 n = gSz <= 4 ? gSz : 4;
5446 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
5447 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
5448 if (b_offset != -1) {
5449 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
5450 curr ));
5451 }
5452 gSz -= n;
5453 gOff += n;
5454 }
5455 }
5456
5457 /* Outputs: memory that we write or modify. Same comments about
5458 endianness as above apply. */
5459 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00005460 toDo = d->mSize;
5461 /* chew off 32-bit chunks */
5462 while (toDo >= 4) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005463 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
5464 NULL/*guard*/ );
sewardj7cf4e6b2008-05-01 20:24:26 +00005465 toDo -= 4;
5466 }
sewardj8c93fcc2008-10-30 13:08:31 +00005467 /* handle possible 16-bit excess */
5468 while (toDo >= 2) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005469 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
5470 NULL/*guard*/ );
sewardj8c93fcc2008-10-30 13:08:31 +00005471 toDo -= 2;
5472 }
5473 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
sewardj7cf4e6b2008-05-01 20:24:26 +00005474 }
sewardj7cf4e6b2008-05-01 20:24:26 +00005475}
5476
sewardjdb5907d2009-11-26 17:20:21 +00005477
5478static void do_origins_Store ( MCEnv* mce,
5479 IREndness stEnd,
5480 IRExpr* stAddr,
5481 IRExpr* stData )
5482{
5483 Int dszB;
5484 IRAtom* dataB;
5485 /* assert that the B value for the address is already available
5486 (somewhere), since the call to schemeE will want to see it.
5487 XXXX how does this actually ensure that?? */
5488 tl_assert(isIRAtom(stAddr));
5489 tl_assert(isIRAtom(stData));
5490 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
5491 dataB = schemeE( mce, stData );
5492 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB,
5493 NULL/*guard*/ );
5494}
5495
5496
sewardj7cf4e6b2008-05-01 20:24:26 +00005497static void schemeS ( MCEnv* mce, IRStmt* st )
5498{
5499 tl_assert(MC_(clo_mc_level) == 3);
5500
5501 switch (st->tag) {
5502
5503 case Ist_AbiHint:
5504 /* The value-check instrumenter handles this - by arranging
5505 to pass the address of the next instruction to
5506 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
5507 happen for origin tracking w.r.t. AbiHints. So there is
5508 nothing to do here. */
5509 break;
5510
5511 case Ist_PutI: {
5512 IRRegArray* descr_b;
5513 IRAtom *t1, *t2, *t3, *t4;
5514 IRRegArray* descr = st->Ist.PutI.descr;
5515 IRType equivIntTy
5516 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
5517 /* If this array is unshadowable for whatever reason,
5518 generate no code. */
5519 if (equivIntTy == Ity_INVALID)
5520 break;
5521 tl_assert(sizeofIRType(equivIntTy) >= 4);
5522 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
5523 descr_b
5524 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
5525 equivIntTy, descr->nElems );
5526 /* Compute a value to Put - the conjoinment of the origin for
5527 the data to be Put-ted (obviously) and of the index value
5528 (not so obviously). */
5529 t1 = schemeE( mce, st->Ist.PutI.data );
5530 t2 = schemeE( mce, st->Ist.PutI.ix );
5531 t3 = gen_maxU32( mce, t1, t2 );
5532 t4 = zWidenFrom32( mce, equivIntTy, t3 );
5533 stmt( 'B', mce, IRStmt_PutI( descr_b, st->Ist.PutI.ix,
5534 st->Ist.PutI.bias, t4 ));
5535 break;
5536 }
sewardjdb5907d2009-11-26 17:20:21 +00005537
sewardj7cf4e6b2008-05-01 20:24:26 +00005538 case Ist_Dirty:
5539 do_origins_Dirty( mce, st->Ist.Dirty.details );
5540 break;
sewardjdb5907d2009-11-26 17:20:21 +00005541
5542 case Ist_Store:
5543 do_origins_Store( mce, st->Ist.Store.end,
5544 st->Ist.Store.addr,
5545 st->Ist.Store.data );
5546 break;
5547
5548 case Ist_LLSC: {
5549 /* In short: treat a load-linked like a normal load followed
5550 by an assignment of the loaded (shadow) data the result
5551 temporary. Treat a store-conditional like a normal store,
5552 and mark the result temporary as defined. */
5553 if (st->Ist.LLSC.storedata == NULL) {
5554 /* Load Linked */
5555 IRType resTy
5556 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
5557 IRExpr* vanillaLoad
5558 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
5559 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
5560 || resTy == Ity_I16 || resTy == Ity_I8);
5561 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
5562 schemeE(mce, vanillaLoad));
5563 } else {
5564 /* Store conditional */
5565 do_origins_Store( mce, st->Ist.LLSC.end,
5566 st->Ist.LLSC.addr,
5567 st->Ist.LLSC.storedata );
5568 /* For the rationale behind this, see comments at the
5569 place where the V-shadow for .result is constructed, in
5570 do_shadow_LLSC. In short, we regard .result as
5571 always-defined. */
5572 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
5573 mkU32(0) );
sewardj1c0ce7a2009-07-01 08:10:49 +00005574 }
sewardj7cf4e6b2008-05-01 20:24:26 +00005575 break;
5576 }
sewardjdb5907d2009-11-26 17:20:21 +00005577
sewardj7cf4e6b2008-05-01 20:24:26 +00005578 case Ist_Put: {
5579 Int b_offset
5580 = MC_(get_otrack_shadow_offset)(
5581 st->Ist.Put.offset,
sewardj1c0ce7a2009-07-01 08:10:49 +00005582 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
sewardj7cf4e6b2008-05-01 20:24:26 +00005583 );
5584 if (b_offset >= 0) {
5585 /* FIXME: this isn't an atom! */
5586 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
5587 schemeE( mce, st->Ist.Put.data )) );
5588 }
5589 break;
5590 }
sewardjdb5907d2009-11-26 17:20:21 +00005591
sewardj7cf4e6b2008-05-01 20:24:26 +00005592 case Ist_WrTmp:
5593 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
5594 schemeE(mce, st->Ist.WrTmp.data) );
5595 break;
sewardjdb5907d2009-11-26 17:20:21 +00005596
sewardj7cf4e6b2008-05-01 20:24:26 +00005597 case Ist_MBE:
5598 case Ist_NoOp:
5599 case Ist_Exit:
5600 case Ist_IMark:
5601 break;
sewardjdb5907d2009-11-26 17:20:21 +00005602
sewardj7cf4e6b2008-05-01 20:24:26 +00005603 default:
5604 VG_(printf)("mc_translate.c: schemeS: unhandled: ");
5605 ppIRStmt(st);
5606 VG_(tool_panic)("memcheck:schemeS");
5607 }
5608}
5609
5610
njn25e49d8e72002-09-23 09:36:25 +00005611/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00005612/*--- end mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005613/*--------------------------------------------------------------------*/