blob: ce165bf8648a2c00862af3362dae71dcadec11c6 [file] [log] [blame]
nethercotebb1c9912004-01-04 16:43:23 +00001
njn25e49d8e72002-09-23 09:36:25 +00002/*--------------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003/*--- Instrument IR to perform memory checking operations. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00006
njn25e49d8e72002-09-23 09:36:25 +00007/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of MemCheck, a heavyweight Valgrind tool for
njnc9539842002-10-02 13:26:35 +00009 detecting memory errors.
njn25e49d8e72002-09-23 09:36:25 +000010
njn53612422005-03-12 16:22:54 +000011 Copyright (C) 2000-2005 Julian Seward
njn25e49d8e72002-09-23 09:36:25 +000012 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
njnc7561b92005-06-19 01:24:32 +000032#include "pub_tool_basics.h"
33#include "pub_tool_hashtable.h" // For mac_shared.h
njn132bfcc2005-06-04 19:16:06 +000034#include "pub_tool_libcassert.h"
njn36a20fa2005-06-03 03:08:39 +000035#include "pub_tool_libcprint.h"
njnc7561b92005-06-19 01:24:32 +000036#include "pub_tool_profile.h"
37#include "pub_tool_tooliface.h"
38#include "mc_include.h"
njn25e49d8e72002-09-23 09:36:25 +000039
njn25e49d8e72002-09-23 09:36:25 +000040
sewardj95448072004-11-22 20:19:51 +000041/*------------------------------------------------------------*/
42/*--- Forward decls ---*/
43/*------------------------------------------------------------*/
44
45struct _MCEnv;
46
47static IRType shadowType ( IRType ty );
48static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
49
50
51/*------------------------------------------------------------*/
52/*--- Memcheck running state, and tmp management. ---*/
53/*------------------------------------------------------------*/
54
55/* Carries around state during memcheck instrumentation. */
56typedef
57 struct _MCEnv {
58 /* MODIFIED: the bb being constructed. IRStmts are added. */
59 IRBB* bb;
60
61 /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
62 original temps to their current their current shadow temp.
63 Initially all entries are IRTemp_INVALID. Entries are added
64 lazily since many original temps are not used due to
65 optimisation prior to instrumentation. Note that floating
66 point original tmps are shadowed by integer tmps of the same
67 size, and Bit-typed original tmps are shadowed by the type
68 Ity_I8. See comment below. */
69 IRTemp* tmpMap;
70 Int n_originalTmps; /* for range checking */
71
sewardjd5204dc2004-12-31 01:16:11 +000072 /* MODIFIED: indicates whether "bogus" literals have so far been
73 found. Starts off False, and may change to True. */
74 Bool bogusLiterals;
75
sewardj95448072004-11-22 20:19:51 +000076 /* READONLY: the guest layout. This indicates which parts of
77 the guest state should be regarded as 'always defined'. */
78 VexGuestLayout* layout;
79 /* READONLY: the host word type. Needed for constructing
80 arguments of type 'HWord' to be passed to helper functions.
81 Ity_I32 or Ity_I64 only. */
82 IRType hWordTy;
83 }
84 MCEnv;
85
86/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
87 demand), as they are encountered. This is for two reasons.
88
89 (1) (less important reason): Many original tmps are unused due to
90 initial IR optimisation, and we do not want to spaces in tables
91 tracking them.
92
93 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
94 table indexed [0 .. n_types-1], which gives the current shadow for
95 each original tmp, or INVALID_IRTEMP if none is so far assigned.
96 It is necessary to support making multiple assignments to a shadow
97 -- specifically, after testing a shadow for definedness, it needs
98 to be made defined. But IR's SSA property disallows this.
99
100 (2) (more important reason): Therefore, when a shadow needs to get
101 a new value, a new temporary is created, the value is assigned to
102 that, and the tmpMap is updated to reflect the new binding.
103
104 A corollary is that if the tmpMap maps a given tmp to
105 INVALID_IRTEMP and we are hoping to read that shadow tmp, it means
106 there's a read-before-write error in the original tmps. The IR
107 sanity checker should catch all such anomalies, however.
njn25e49d8e72002-09-23 09:36:25 +0000108*/
sewardj95448072004-11-22 20:19:51 +0000109
110/* Find the tmp currently shadowing the given original tmp. If none
111 so far exists, allocate one. */
112static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000113{
sewardj95448072004-11-22 20:19:51 +0000114 tl_assert(orig < mce->n_originalTmps);
115 if (mce->tmpMap[orig] == IRTemp_INVALID) {
116 mce->tmpMap[orig]
117 = newIRTemp(mce->bb->tyenv,
118 shadowType(mce->bb->tyenv->types[orig]));
njn25e49d8e72002-09-23 09:36:25 +0000119 }
sewardj95448072004-11-22 20:19:51 +0000120 return mce->tmpMap[orig];
njn25e49d8e72002-09-23 09:36:25 +0000121}
122
sewardj95448072004-11-22 20:19:51 +0000123/* Allocate a new shadow for the given original tmp. This means any
124 previous shadow is abandoned. This is needed because it is
125 necessary to give a new value to a shadow once it has been tested
126 for undefinedness, but unfortunately IR's SSA property disallows
127 this. Instead we must abandon the old shadow, allocate a new one
128 and use that instead. */
129static void newShadowTmp ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000130{
sewardj95448072004-11-22 20:19:51 +0000131 tl_assert(orig < mce->n_originalTmps);
132 mce->tmpMap[orig]
133 = newIRTemp(mce->bb->tyenv,
134 shadowType(mce->bb->tyenv->types[orig]));
135}
136
137
138/*------------------------------------------------------------*/
139/*--- IRAtoms -- a subset of IRExprs ---*/
140/*------------------------------------------------------------*/
141
142/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
sewardj710d6c22005-03-20 18:55:15 +0000143 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
sewardj95448072004-11-22 20:19:51 +0000144 input, most of this code deals in atoms. Usefully, a value atom
145 always has a V-value which is also an atom: constants are shadowed
146 by constants, and temps are shadowed by the corresponding shadow
147 temporary. */
148
149typedef IRExpr IRAtom;
150
151/* (used for sanity checks only): is this an atom which looks
152 like it's from original code? */
153static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
154{
155 if (a1->tag == Iex_Const)
156 return True;
157 if (a1->tag == Iex_Tmp && a1->Iex.Tmp.tmp < mce->n_originalTmps)
158 return True;
159 return False;
160}
161
162/* (used for sanity checks only): is this an atom which looks
163 like it's from shadow code? */
164static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
165{
166 if (a1->tag == Iex_Const)
167 return True;
168 if (a1->tag == Iex_Tmp && a1->Iex.Tmp.tmp >= mce->n_originalTmps)
169 return True;
170 return False;
171}
172
173/* (used for sanity checks only): check that both args are atoms and
174 are identically-kinded. */
175static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
176{
177 if (a1->tag == Iex_Tmp && a1->tag == Iex_Tmp)
178 return True;
179 if (a1->tag == Iex_Const && a1->tag == Iex_Const)
180 return True;
181 return False;
182}
183
184
185/*------------------------------------------------------------*/
186/*--- Type management ---*/
187/*------------------------------------------------------------*/
188
189/* Shadow state is always accessed using integer types. This returns
190 an integer type with the same size (as per sizeofIRType) as the
191 given type. The only valid shadow types are Bit, I8, I16, I32,
sewardj3245c912004-12-10 14:58:26 +0000192 I64, V128. */
sewardj95448072004-11-22 20:19:51 +0000193
194static IRType shadowType ( IRType ty )
195{
196 switch (ty) {
197 case Ity_I1:
198 case Ity_I8:
199 case Ity_I16:
200 case Ity_I32:
sewardj6cf40ff2005-04-20 22:31:26 +0000201 case Ity_I64:
202 case Ity_I128: return ty;
sewardj3245c912004-12-10 14:58:26 +0000203 case Ity_F32: return Ity_I32;
204 case Ity_F64: return Ity_I64;
205 case Ity_V128: return Ity_V128;
sewardj95448072004-11-22 20:19:51 +0000206 default: ppIRType(ty);
207 VG_(tool_panic)("memcheck:shadowType");
208 }
209}
210
211/* Produce a 'defined' value of the given shadow type. Should only be
212 supplied shadow types (Bit/I8/I16/I32/UI64). */
213static IRExpr* definedOfType ( IRType ty ) {
214 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +0000215 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
216 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
217 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
218 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
219 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
220 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
sewardj95448072004-11-22 20:19:51 +0000221 default: VG_(tool_panic)("memcheck:definedOfType");
njn25e49d8e72002-09-23 09:36:25 +0000222 }
223}
224
225
sewardj95448072004-11-22 20:19:51 +0000226/*------------------------------------------------------------*/
227/*--- Constructing IR fragments ---*/
228/*------------------------------------------------------------*/
229
230/* assign value to tmp */
231#define assign(_bb,_tmp,_expr) \
232 addStmtToIRBB((_bb), IRStmt_Tmp((_tmp),(_expr)))
233
234/* add stmt to a bb */
235#define stmt(_bb,_stmt) \
236 addStmtToIRBB((_bb), (_stmt))
237
238/* build various kinds of expressions */
239#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
240#define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
241#define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
242#define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
243#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
244#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
sewardj170ee212004-12-10 18:57:51 +0000245#define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
sewardj95448072004-11-22 20:19:51 +0000246#define mkexpr(_tmp) IRExpr_Tmp((_tmp))
247
248/* bind the given expression to a new temporary, and return the
249 temporary. This effectively converts an arbitrary expression into
250 an atom. */
251static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) {
252 IRTemp t = newIRTemp(mce->bb->tyenv, ty);
253 assign(mce->bb, t, e);
254 return mkexpr(t);
255}
256
257
258/*------------------------------------------------------------*/
259/*--- Constructing definedness primitive ops ---*/
260/*------------------------------------------------------------*/
261
262/* --------- Defined-if-either-defined --------- */
263
264static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
265 tl_assert(isShadowAtom(mce,a1));
266 tl_assert(isShadowAtom(mce,a2));
267 return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2));
268}
269
270static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
271 tl_assert(isShadowAtom(mce,a1));
272 tl_assert(isShadowAtom(mce,a2));
273 return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2));
274}
275
276static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
277 tl_assert(isShadowAtom(mce,a1));
278 tl_assert(isShadowAtom(mce,a2));
279 return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2));
280}
281
sewardj7010f6e2004-12-10 13:35:22 +0000282static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
283 tl_assert(isShadowAtom(mce,a1));
284 tl_assert(isShadowAtom(mce,a2));
285 return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2));
286}
287
sewardj20d38f22005-02-07 23:50:18 +0000288static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj170ee212004-12-10 18:57:51 +0000289 tl_assert(isShadowAtom(mce,a1));
290 tl_assert(isShadowAtom(mce,a2));
sewardj20d38f22005-02-07 23:50:18 +0000291 return assignNew(mce, Ity_V128, binop(Iop_AndV128, a1, a2));
sewardj170ee212004-12-10 18:57:51 +0000292}
293
sewardj95448072004-11-22 20:19:51 +0000294/* --------- Undefined-if-either-undefined --------- */
295
296static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
297 tl_assert(isShadowAtom(mce,a1));
298 tl_assert(isShadowAtom(mce,a2));
299 return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2));
300}
301
302static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
303 tl_assert(isShadowAtom(mce,a1));
304 tl_assert(isShadowAtom(mce,a2));
305 return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2));
306}
307
308static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
309 tl_assert(isShadowAtom(mce,a1));
310 tl_assert(isShadowAtom(mce,a2));
311 return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2));
312}
313
314static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
315 tl_assert(isShadowAtom(mce,a1));
316 tl_assert(isShadowAtom(mce,a2));
317 return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2));
318}
319
sewardj20d38f22005-02-07 23:50:18 +0000320static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj3245c912004-12-10 14:58:26 +0000321 tl_assert(isShadowAtom(mce,a1));
322 tl_assert(isShadowAtom(mce,a2));
sewardj20d38f22005-02-07 23:50:18 +0000323 return assignNew(mce, Ity_V128, binop(Iop_OrV128, a1, a2));
sewardj3245c912004-12-10 14:58:26 +0000324}
325
sewardje50a1b12004-12-17 01:24:54 +0000326static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
sewardj95448072004-11-22 20:19:51 +0000327 switch (vty) {
sewardje50a1b12004-12-17 01:24:54 +0000328 case Ity_I8: return mkUifU8(mce, a1, a2);
sewardja1d93302004-12-12 16:45:06 +0000329 case Ity_I16: return mkUifU16(mce, a1, a2);
330 case Ity_I32: return mkUifU32(mce, a1, a2);
331 case Ity_I64: return mkUifU64(mce, a1, a2);
sewardj20d38f22005-02-07 23:50:18 +0000332 case Ity_V128: return mkUifUV128(mce, a1, a2);
sewardj95448072004-11-22 20:19:51 +0000333 default:
334 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
335 VG_(tool_panic)("memcheck:mkUifU");
njn25e49d8e72002-09-23 09:36:25 +0000336 }
337}
338
sewardj95448072004-11-22 20:19:51 +0000339/* --------- The Left-family of operations. --------- */
njn25e49d8e72002-09-23 09:36:25 +0000340
sewardj95448072004-11-22 20:19:51 +0000341static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
342 tl_assert(isShadowAtom(mce,a1));
343 /* It's safe to duplicate a1 since it's only an atom */
344 return assignNew(mce, Ity_I8,
345 binop(Iop_Or8, a1,
346 assignNew(mce, Ity_I8,
sewardj37c31cc2005-04-26 23:49:24 +0000347 unop(Iop_Neg8, a1))));
sewardj95448072004-11-22 20:19:51 +0000348}
349
350static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
351 tl_assert(isShadowAtom(mce,a1));
352 /* It's safe to duplicate a1 since it's only an atom */
353 return assignNew(mce, Ity_I16,
354 binop(Iop_Or16, a1,
355 assignNew(mce, Ity_I16,
sewardj37c31cc2005-04-26 23:49:24 +0000356 unop(Iop_Neg16, a1))));
sewardj95448072004-11-22 20:19:51 +0000357}
358
359static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
360 tl_assert(isShadowAtom(mce,a1));
361 /* It's safe to duplicate a1 since it's only an atom */
362 return assignNew(mce, Ity_I32,
363 binop(Iop_Or32, a1,
364 assignNew(mce, Ity_I32,
sewardj37c31cc2005-04-26 23:49:24 +0000365 unop(Iop_Neg32, a1))));
sewardj95448072004-11-22 20:19:51 +0000366}
367
sewardj681be302005-01-15 20:43:58 +0000368static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
369 tl_assert(isShadowAtom(mce,a1));
370 /* It's safe to duplicate a1 since it's only an atom */
371 return assignNew(mce, Ity_I64,
372 binop(Iop_Or64, a1,
373 assignNew(mce, Ity_I64,
sewardj37c31cc2005-04-26 23:49:24 +0000374 unop(Iop_Neg64, a1))));
sewardj681be302005-01-15 20:43:58 +0000375}
376
sewardj95448072004-11-22 20:19:51 +0000377/* --------- 'Improvement' functions for AND/OR. --------- */
378
379/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
380 defined (0); all other -> undefined (1).
381*/
382static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
njn25e49d8e72002-09-23 09:36:25 +0000383{
sewardj95448072004-11-22 20:19:51 +0000384 tl_assert(isOriginalAtom(mce, data));
385 tl_assert(isShadowAtom(mce, vbits));
386 tl_assert(sameKindedAtoms(data, vbits));
387 return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits));
388}
njn25e49d8e72002-09-23 09:36:25 +0000389
sewardj95448072004-11-22 20:19:51 +0000390static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
391{
392 tl_assert(isOriginalAtom(mce, data));
393 tl_assert(isShadowAtom(mce, vbits));
394 tl_assert(sameKindedAtoms(data, vbits));
395 return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits));
396}
njn25e49d8e72002-09-23 09:36:25 +0000397
sewardj95448072004-11-22 20:19:51 +0000398static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
399{
400 tl_assert(isOriginalAtom(mce, data));
401 tl_assert(isShadowAtom(mce, vbits));
402 tl_assert(sameKindedAtoms(data, vbits));
403 return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits));
404}
njn25e49d8e72002-09-23 09:36:25 +0000405
sewardj7010f6e2004-12-10 13:35:22 +0000406static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
407{
408 tl_assert(isOriginalAtom(mce, data));
409 tl_assert(isShadowAtom(mce, vbits));
410 tl_assert(sameKindedAtoms(data, vbits));
411 return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits));
412}
413
sewardj20d38f22005-02-07 23:50:18 +0000414static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000415{
416 tl_assert(isOriginalAtom(mce, data));
417 tl_assert(isShadowAtom(mce, vbits));
418 tl_assert(sameKindedAtoms(data, vbits));
sewardj20d38f22005-02-07 23:50:18 +0000419 return assignNew(mce, Ity_V128, binop(Iop_OrV128, data, vbits));
sewardj170ee212004-12-10 18:57:51 +0000420}
421
sewardj95448072004-11-22 20:19:51 +0000422/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
423 defined (0); all other -> undefined (1).
424*/
425static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
426{
427 tl_assert(isOriginalAtom(mce, data));
428 tl_assert(isShadowAtom(mce, vbits));
429 tl_assert(sameKindedAtoms(data, vbits));
430 return assignNew(
431 mce, Ity_I8,
432 binop(Iop_Or8,
433 assignNew(mce, Ity_I8, unop(Iop_Not8, data)),
434 vbits) );
435}
njn25e49d8e72002-09-23 09:36:25 +0000436
sewardj95448072004-11-22 20:19:51 +0000437static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
438{
439 tl_assert(isOriginalAtom(mce, data));
440 tl_assert(isShadowAtom(mce, vbits));
441 tl_assert(sameKindedAtoms(data, vbits));
442 return assignNew(
443 mce, Ity_I16,
444 binop(Iop_Or16,
445 assignNew(mce, Ity_I16, unop(Iop_Not16, data)),
446 vbits) );
447}
njn25e49d8e72002-09-23 09:36:25 +0000448
sewardj95448072004-11-22 20:19:51 +0000449static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
450{
451 tl_assert(isOriginalAtom(mce, data));
452 tl_assert(isShadowAtom(mce, vbits));
453 tl_assert(sameKindedAtoms(data, vbits));
454 return assignNew(
455 mce, Ity_I32,
456 binop(Iop_Or32,
457 assignNew(mce, Ity_I32, unop(Iop_Not32, data)),
458 vbits) );
459}
460
sewardj7010f6e2004-12-10 13:35:22 +0000461static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
462{
463 tl_assert(isOriginalAtom(mce, data));
464 tl_assert(isShadowAtom(mce, vbits));
465 tl_assert(sameKindedAtoms(data, vbits));
466 return assignNew(
467 mce, Ity_I64,
468 binop(Iop_Or64,
469 assignNew(mce, Ity_I64, unop(Iop_Not64, data)),
470 vbits) );
471}
472
sewardj20d38f22005-02-07 23:50:18 +0000473static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000474{
475 tl_assert(isOriginalAtom(mce, data));
476 tl_assert(isShadowAtom(mce, vbits));
477 tl_assert(sameKindedAtoms(data, vbits));
478 return assignNew(
479 mce, Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +0000480 binop(Iop_OrV128,
481 assignNew(mce, Ity_V128, unop(Iop_NotV128, data)),
sewardj170ee212004-12-10 18:57:51 +0000482 vbits) );
483}
484
sewardj95448072004-11-22 20:19:51 +0000485/* --------- Pessimising casts. --------- */
486
487static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
488{
sewardj7cf97ee2004-11-28 14:25:01 +0000489 IRType ty;
490 IRAtom* tmp1;
sewardj95448072004-11-22 20:19:51 +0000491 /* Note, dst_ty is a shadow type, not an original type. */
492 /* First of all, collapse vbits down to a single bit. */
493 tl_assert(isShadowAtom(mce,vbits));
sewardj7cf97ee2004-11-28 14:25:01 +0000494 ty = typeOfIRExpr(mce->bb->tyenv, vbits);
495 tmp1 = NULL;
sewardj95448072004-11-22 20:19:51 +0000496 switch (ty) {
497 case Ity_I1:
498 tmp1 = vbits;
njn25e49d8e72002-09-23 09:36:25 +0000499 break;
sewardj95448072004-11-22 20:19:51 +0000500 case Ity_I8:
sewardj37c31cc2005-04-26 23:49:24 +0000501 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
sewardj95448072004-11-22 20:19:51 +0000502 break;
503 case Ity_I16:
sewardj37c31cc2005-04-26 23:49:24 +0000504 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
sewardj95448072004-11-22 20:19:51 +0000505 break;
506 case Ity_I32:
sewardj37c31cc2005-04-26 23:49:24 +0000507 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
sewardj95448072004-11-22 20:19:51 +0000508 break;
509 case Ity_I64:
sewardj37c31cc2005-04-26 23:49:24 +0000510 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
sewardj95448072004-11-22 20:19:51 +0000511 break;
sewardj69a13322005-04-23 01:14:51 +0000512 case Ity_I128: {
513 /* Gah. Chop it in half, OR the halves together, and compare
514 that with zero. */
515 IRAtom* tmp2 = assignNew(mce, Ity_I64, unop(Iop_128HIto64, vbits));
516 IRAtom* tmp3 = assignNew(mce, Ity_I64, unop(Iop_128to64, vbits));
517 IRAtom* tmp4 = assignNew(mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
518 tmp1 = assignNew(mce, Ity_I1,
sewardj37c31cc2005-04-26 23:49:24 +0000519 unop(Iop_CmpNEZ64, tmp4));
sewardj69a13322005-04-23 01:14:51 +0000520 break;
521 }
sewardj95448072004-11-22 20:19:51 +0000522 default:
sewardj69a13322005-04-23 01:14:51 +0000523 ppIRType(ty);
sewardj95448072004-11-22 20:19:51 +0000524 VG_(tool_panic)("mkPCastTo(1)");
525 }
526 tl_assert(tmp1);
527 /* Now widen up to the dst type. */
528 switch (dst_ty) {
529 case Ity_I1:
530 return tmp1;
531 case Ity_I8:
532 return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1));
533 case Ity_I16:
534 return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1));
535 case Ity_I32:
536 return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1));
537 case Ity_I64:
538 return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000539 case Ity_V128:
540 tmp1 = assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardj20d38f22005-02-07 23:50:18 +0000541 tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000542 return tmp1;
sewardj69a13322005-04-23 01:14:51 +0000543 case Ity_I128:
544 tmp1 = assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
545 tmp1 = assignNew(mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
546 return tmp1;
sewardj95448072004-11-22 20:19:51 +0000547 default:
548 ppIRType(dst_ty);
549 VG_(tool_panic)("mkPCastTo(2)");
550 }
551}
552
sewardjd5204dc2004-12-31 01:16:11 +0000553/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
554/*
555 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
556 PCasting to Ity_U1. However, sometimes it is necessary to be more
557 accurate. The insight is that the result is defined if two
558 corresponding bits can be found, one from each argument, so that
559 both bits are defined but are different -- that makes EQ say "No"
560 and NE say "Yes". Hence, we compute an improvement term and DifD
561 it onto the "normal" (UifU) result.
562
563 The result is:
564
565 PCastTo<1> (
566 PCastTo<sz>( UifU<sz>(vxx, vyy) ) -- naive version
567 `DifD<sz>`
568 PCastTo<sz>( CmpEQ<sz>( vec, 1....1 ) ) -- improvement term
569 )
570 where
571 vec contains 0 (defined) bits where the corresponding arg bits
572 are defined but different, and 1 bits otherwise:
573
574 vec = UifU<sz>( vxx, vyy, Not<sz>(Xor<sz>( xx, yy )) )
575*/
576static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
577 IRType ty,
578 IRAtom* vxx, IRAtom* vyy,
579 IRAtom* xx, IRAtom* yy )
580{
581 IRAtom *naive, *vec, *vec_cmpd, *improved, *final_cast, *top;
582 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP;
583
584 tl_assert(isShadowAtom(mce,vxx));
585 tl_assert(isShadowAtom(mce,vyy));
586 tl_assert(isOriginalAtom(mce,xx));
587 tl_assert(isOriginalAtom(mce,yy));
588 tl_assert(sameKindedAtoms(vxx,xx));
589 tl_assert(sameKindedAtoms(vyy,yy));
590
591 switch (ty) {
592 case Ity_I32:
593 opDIFD = Iop_And32;
594 opUIFU = Iop_Or32;
595 opNOT = Iop_Not32;
596 opXOR = Iop_Xor32;
597 opCMP = Iop_CmpEQ32;
598 top = mkU32(0xFFFFFFFF);
599 break;
tomcd986332005-04-26 07:44:48 +0000600 case Ity_I64:
601 opDIFD = Iop_And64;
602 opUIFU = Iop_Or64;
603 opNOT = Iop_Not64;
604 opXOR = Iop_Xor64;
605 opCMP = Iop_CmpEQ64;
sewardj37c31cc2005-04-26 23:49:24 +0000606 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
tomcd986332005-04-26 07:44:48 +0000607 break;
sewardjd5204dc2004-12-31 01:16:11 +0000608 default:
609 VG_(tool_panic)("expensiveCmpEQorNE");
610 }
611
612 naive
613 = mkPCastTo(mce,ty, assignNew(mce, ty, binop(opUIFU, vxx, vyy)));
614
615 vec
616 = assignNew(
617 mce,ty,
618 binop( opUIFU,
619 assignNew(mce,ty, binop(opUIFU, vxx, vyy)),
620 assignNew(
621 mce,ty,
622 unop( opNOT,
623 assignNew(mce,ty, binop(opXOR, xx, yy))))));
624
625 vec_cmpd
626 = mkPCastTo( mce,ty, assignNew(mce,Ity_I1, binop(opCMP, vec, top)));
627
628 improved
629 = assignNew( mce,ty, binop(opDIFD, naive, vec_cmpd) );
630
631 final_cast
632 = mkPCastTo( mce, Ity_I1, improved );
633
634 return final_cast;
635}
636
sewardj95448072004-11-22 20:19:51 +0000637
638/*------------------------------------------------------------*/
639/*--- Emit a test and complaint if something is undefined. ---*/
640/*------------------------------------------------------------*/
641
642/* Set the annotations on a dirty helper to indicate that the stack
643 pointer and instruction pointers might be read. This is the
644 behaviour of all 'emit-a-complaint' style functions we might
645 call. */
646
647static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
648 di->nFxState = 2;
649 di->fxState[0].fx = Ifx_Read;
650 di->fxState[0].offset = mce->layout->offset_SP;
651 di->fxState[0].size = mce->layout->sizeof_SP;
652 di->fxState[1].fx = Ifx_Read;
653 di->fxState[1].offset = mce->layout->offset_IP;
654 di->fxState[1].size = mce->layout->sizeof_IP;
655}
656
657
658/* Check the supplied **original** atom for undefinedness, and emit a
659 complaint if so. Once that happens, mark it as defined. This is
660 possible because the atom is either a tmp or literal. If it's a
661 tmp, it will be shadowed by a tmp, and so we can set the shadow to
662 be defined. In fact as mentioned above, we will have to allocate a
663 new tmp to carry the new 'defined' shadow value, and update the
664 original->tmp mapping accordingly; we cannot simply assign a new
665 value to an existing shadow tmp as this breaks SSAness -- resulting
666 in the post-instrumentation sanity checker spluttering in disapproval.
667*/
668static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
669{
sewardj7cf97ee2004-11-28 14:25:01 +0000670 IRAtom* vatom;
671 IRType ty;
672 Int sz;
673 IRDirty* di;
674 IRAtom* cond;
675
sewardj95448072004-11-22 20:19:51 +0000676 /* Since the original expression is atomic, there's no duplicated
677 work generated by making multiple V-expressions for it. So we
678 don't really care about the possibility that someone else may
679 also create a V-interpretion for it. */
680 tl_assert(isOriginalAtom(mce, atom));
sewardj7cf97ee2004-11-28 14:25:01 +0000681 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +0000682 tl_assert(isShadowAtom(mce, vatom));
683 tl_assert(sameKindedAtoms(atom, vatom));
684
sewardj7cf97ee2004-11-28 14:25:01 +0000685 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +0000686
687 /* sz is only used for constructing the error message */
sewardj7cf97ee2004-11-28 14:25:01 +0000688 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +0000689
sewardj7cf97ee2004-11-28 14:25:01 +0000690 cond = mkPCastTo( mce, Ity_I1, vatom );
sewardj95448072004-11-22 20:19:51 +0000691 /* cond will be 0 if all defined, and 1 if any not defined. */
692
sewardj95448072004-11-22 20:19:51 +0000693 switch (sz) {
694 case 0:
695 di = unsafeIRDirty_0_N( 0/*regparms*/,
696 "MC_(helperc_value_check0_fail)",
697 &MC_(helperc_value_check0_fail),
698 mkIRExprVec_0()
699 );
700 break;
701 case 1:
702 di = unsafeIRDirty_0_N( 0/*regparms*/,
703 "MC_(helperc_value_check1_fail)",
704 &MC_(helperc_value_check1_fail),
705 mkIRExprVec_0()
706 );
707 break;
708 case 4:
709 di = unsafeIRDirty_0_N( 0/*regparms*/,
710 "MC_(helperc_value_check4_fail)",
711 &MC_(helperc_value_check4_fail),
712 mkIRExprVec_0()
713 );
714 break;
sewardj11bcc4e2005-04-23 22:38:38 +0000715 case 8:
716 di = unsafeIRDirty_0_N( 0/*regparms*/,
717 "MC_(helperc_value_check8_fail)",
718 &MC_(helperc_value_check8_fail),
719 mkIRExprVec_0()
720 );
721 break;
sewardj95448072004-11-22 20:19:51 +0000722 default:
723 di = unsafeIRDirty_0_N( 1/*regparms*/,
724 "MC_(helperc_complain_undef)",
725 &MC_(helperc_complain_undef),
726 mkIRExprVec_1( mkIRExpr_HWord( sz ))
727 );
728 break;
729 }
730 di->guard = cond;
731 setHelperAnns( mce, di );
732 stmt( mce->bb, IRStmt_Dirty(di));
733
734 /* Set the shadow tmp to be defined. First, update the
735 orig->shadow tmp mapping to reflect the fact that this shadow is
736 getting a new value. */
sewardj710d6c22005-03-20 18:55:15 +0000737 tl_assert(isIRAtom(vatom));
sewardj95448072004-11-22 20:19:51 +0000738 /* sameKindedAtoms ... */
739 if (vatom->tag == Iex_Tmp) {
740 tl_assert(atom->tag == Iex_Tmp);
741 newShadowTmp(mce, atom->Iex.Tmp.tmp);
742 assign(mce->bb, findShadowTmp(mce, atom->Iex.Tmp.tmp),
743 definedOfType(ty));
744 }
745}
746
747
748/*------------------------------------------------------------*/
749/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
750/*------------------------------------------------------------*/
751
752/* Examine the always-defined sections declared in layout to see if
753 the (offset,size) section is within one. Note, is is an error to
754 partially fall into such a region: (offset,size) should either be
755 completely in such a region or completely not-in such a region.
756*/
757static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
758{
759 Int minoffD, maxoffD, i;
760 Int minoff = offset;
761 Int maxoff = minoff + size - 1;
762 tl_assert((minoff & ~0xFFFF) == 0);
763 tl_assert((maxoff & ~0xFFFF) == 0);
764
765 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
766 minoffD = mce->layout->alwaysDefd[i].offset;
767 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
768 tl_assert((minoffD & ~0xFFFF) == 0);
769 tl_assert((maxoffD & ~0xFFFF) == 0);
770
771 if (maxoff < minoffD || maxoffD < minoff)
772 continue; /* no overlap */
773 if (minoff >= minoffD && maxoff <= maxoffD)
774 return True; /* completely contained in an always-defd section */
775
776 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
777 }
778 return False; /* could not find any containing section */
779}
780
781
782/* Generate into bb suitable actions to shadow this Put. If the state
783 slice is marked 'always defined', do nothing. Otherwise, write the
784 supplied V bits to the shadow state. We can pass in either an
785 original atom or a V-atom, but not both. In the former case the
786 relevant V-bits are then generated from the original.
787*/
788static
789void do_shadow_PUT ( MCEnv* mce, Int offset,
790 IRAtom* atom, IRAtom* vatom )
791{
sewardj7cf97ee2004-11-28 14:25:01 +0000792 IRType ty;
sewardj95448072004-11-22 20:19:51 +0000793 if (atom) {
794 tl_assert(!vatom);
795 tl_assert(isOriginalAtom(mce, atom));
796 vatom = expr2vbits( mce, atom );
797 } else {
798 tl_assert(vatom);
799 tl_assert(isShadowAtom(mce, vatom));
800 }
801
sewardj7cf97ee2004-11-28 14:25:01 +0000802 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +0000803 tl_assert(ty != Ity_I1);
804 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
805 /* later: no ... */
806 /* emit code to emit a complaint if any of the vbits are 1. */
807 /* complainIfUndefined(mce, atom); */
808 } else {
809 /* Do a plain shadow Put. */
810 stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
811 }
812}
813
814
815/* Return an expression which contains the V bits corresponding to the
816 given GETI (passed in in pieces).
817*/
818static
819void do_shadow_PUTI ( MCEnv* mce,
820 IRArray* descr, IRAtom* ix, Int bias, IRAtom* atom )
821{
sewardj7cf97ee2004-11-28 14:25:01 +0000822 IRAtom* vatom;
823 IRType ty, tyS;
824 Int arrSize;;
825
sewardj95448072004-11-22 20:19:51 +0000826 tl_assert(isOriginalAtom(mce,atom));
sewardj7cf97ee2004-11-28 14:25:01 +0000827 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +0000828 tl_assert(sameKindedAtoms(atom, vatom));
sewardj7cf97ee2004-11-28 14:25:01 +0000829 ty = descr->elemTy;
830 tyS = shadowType(ty);
831 arrSize = descr->nElems * sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +0000832 tl_assert(ty != Ity_I1);
833 tl_assert(isOriginalAtom(mce,ix));
834 complainIfUndefined(mce,ix);
835 if (isAlwaysDefd(mce, descr->base, arrSize)) {
836 /* later: no ... */
837 /* emit code to emit a complaint if any of the vbits are 1. */
838 /* complainIfUndefined(mce, atom); */
839 } else {
840 /* Do a cloned version of the Put that refers to the shadow
841 area. */
842 IRArray* new_descr
843 = mkIRArray( descr->base + mce->layout->total_sizeB,
844 tyS, descr->nElems);
845 stmt( mce->bb, IRStmt_PutI( new_descr, ix, bias, vatom ));
846 }
847}
848
849
850/* Return an expression which contains the V bits corresponding to the
851 given GET (passed in in pieces).
852*/
853static
854IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
855{
856 IRType tyS = shadowType(ty);
857 tl_assert(ty != Ity_I1);
858 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
859 /* Always defined, return all zeroes of the relevant type */
860 return definedOfType(tyS);
861 } else {
862 /* return a cloned version of the Get that refers to the shadow
863 area. */
864 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
865 }
866}
867
868
869/* Return an expression which contains the V bits corresponding to the
870 given GETI (passed in in pieces).
871*/
872static
873IRExpr* shadow_GETI ( MCEnv* mce, IRArray* descr, IRAtom* ix, Int bias )
874{
875 IRType ty = descr->elemTy;
876 IRType tyS = shadowType(ty);
877 Int arrSize = descr->nElems * sizeofIRType(ty);
878 tl_assert(ty != Ity_I1);
879 tl_assert(isOriginalAtom(mce,ix));
880 complainIfUndefined(mce,ix);
881 if (isAlwaysDefd(mce, descr->base, arrSize)) {
882 /* Always defined, return all zeroes of the relevant type */
883 return definedOfType(tyS);
884 } else {
885 /* return a cloned version of the Get that refers to the shadow
886 area. */
887 IRArray* new_descr
888 = mkIRArray( descr->base + mce->layout->total_sizeB,
889 tyS, descr->nElems);
890 return IRExpr_GetI( new_descr, ix, bias );
891 }
892}
893
894
895/*------------------------------------------------------------*/
896/*--- Generating approximations for unknown operations, ---*/
897/*--- using lazy-propagate semantics ---*/
898/*------------------------------------------------------------*/
899
900/* Lazy propagation of undefinedness from two values, resulting in the
901 specified shadow type.
902*/
903static
904IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
905{
sewardj95448072004-11-22 20:19:51 +0000906 IRAtom* at;
sewardj37c31cc2005-04-26 23:49:24 +0000907 IRType t1 = typeOfIRExpr(mce->bb->tyenv, va1);
908 IRType t2 = typeOfIRExpr(mce->bb->tyenv, va2);
sewardj95448072004-11-22 20:19:51 +0000909 tl_assert(isShadowAtom(mce,va1));
910 tl_assert(isShadowAtom(mce,va2));
sewardj37c31cc2005-04-26 23:49:24 +0000911
912 /* The general case is inefficient because PCast is an expensive
913 operation. Here are some special cases which use PCast only
914 once rather than twice. */
915
916 /* I64 x I64 -> I64 */
917 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
918 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
919 at = mkUifU(mce, Ity_I64, va1, va2);
920 at = mkPCastTo(mce, Ity_I64, at);
921 return at;
922 }
923
924 /* I64 x I64 -> I32 */
925 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
926 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
927 at = mkUifU(mce, Ity_I64, va1, va2);
928 at = mkPCastTo(mce, Ity_I32, at);
929 return at;
930 }
931
932 if (0) {
933 VG_(printf)("mkLazy2 ");
934 ppIRType(t1);
935 VG_(printf)("_");
936 ppIRType(t2);
937 VG_(printf)("_");
938 ppIRType(finalVty);
939 VG_(printf)("\n");
940 }
941
942 /* General case: force everything via 32-bit intermediaries. */
sewardj95448072004-11-22 20:19:51 +0000943 at = mkPCastTo(mce, Ity_I32, va1);
944 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
945 at = mkPCastTo(mce, finalVty, at);
946 return at;
947}
948
949
950/* Do the lazy propagation game from a null-terminated vector of
951 atoms. This is presumably the arguments to a helper call, so the
952 IRCallee info is also supplied in order that we can know which
953 arguments should be ignored (via the .mcx_mask field).
954*/
955static
956IRAtom* mkLazyN ( MCEnv* mce,
957 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
958{
959 Int i;
960 IRAtom* here;
961 IRAtom* curr = definedOfType(Ity_I32);
962 for (i = 0; exprvec[i]; i++) {
963 tl_assert(i < 32);
964 tl_assert(isOriginalAtom(mce, exprvec[i]));
965 /* Only take notice of this arg if the callee's mc-exclusion
966 mask does not say it is to be excluded. */
967 if (cee->mcx_mask & (1<<i)) {
968 /* the arg is to be excluded from definedness checking. Do
969 nothing. */
970 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
971 } else {
972 /* calculate the arg's definedness, and pessimistically merge
973 it in. */
974 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) );
975 curr = mkUifU32(mce, here, curr);
976 }
977 }
978 return mkPCastTo(mce, finalVtype, curr );
979}
980
981
982/*------------------------------------------------------------*/
983/*--- Generating expensive sequences for exact carry-chain ---*/
984/*--- propagation in add/sub and related operations. ---*/
985/*------------------------------------------------------------*/
986
987static
sewardjd5204dc2004-12-31 01:16:11 +0000988IRAtom* expensiveAddSub ( MCEnv* mce,
989 Bool add,
990 IRType ty,
991 IRAtom* qaa, IRAtom* qbb,
992 IRAtom* aa, IRAtom* bb )
sewardj95448072004-11-22 20:19:51 +0000993{
sewardj7cf97ee2004-11-28 14:25:01 +0000994 IRAtom *a_min, *b_min, *a_max, *b_max;
sewardjd5204dc2004-12-31 01:16:11 +0000995 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
sewardj7cf97ee2004-11-28 14:25:01 +0000996
sewardj95448072004-11-22 20:19:51 +0000997 tl_assert(isShadowAtom(mce,qaa));
998 tl_assert(isShadowAtom(mce,qbb));
999 tl_assert(isOriginalAtom(mce,aa));
1000 tl_assert(isOriginalAtom(mce,bb));
1001 tl_assert(sameKindedAtoms(qaa,aa));
1002 tl_assert(sameKindedAtoms(qbb,bb));
1003
sewardjd5204dc2004-12-31 01:16:11 +00001004 switch (ty) {
1005 case Ity_I32:
1006 opAND = Iop_And32;
1007 opOR = Iop_Or32;
1008 opXOR = Iop_Xor32;
1009 opNOT = Iop_Not32;
1010 opADD = Iop_Add32;
1011 opSUB = Iop_Sub32;
1012 break;
tomd9774d72005-06-27 08:11:01 +00001013 case Ity_I64:
1014 opAND = Iop_And64;
1015 opOR = Iop_Or64;
1016 opXOR = Iop_Xor64;
1017 opNOT = Iop_Not64;
1018 opADD = Iop_Add64;
1019 opSUB = Iop_Sub64;
1020 break;
sewardjd5204dc2004-12-31 01:16:11 +00001021 default:
1022 VG_(tool_panic)("expensiveAddSub");
1023 }
sewardj95448072004-11-22 20:19:51 +00001024
1025 // a_min = aa & ~qaa
1026 a_min = assignNew(mce,ty,
1027 binop(opAND, aa,
1028 assignNew(mce,ty, unop(opNOT, qaa))));
1029
1030 // b_min = bb & ~qbb
1031 b_min = assignNew(mce,ty,
1032 binop(opAND, bb,
1033 assignNew(mce,ty, unop(opNOT, qbb))));
1034
1035 // a_max = aa | qaa
1036 a_max = assignNew(mce,ty, binop(opOR, aa, qaa));
1037
1038 // b_max = bb | qbb
1039 b_max = assignNew(mce,ty, binop(opOR, bb, qbb));
1040
sewardjd5204dc2004-12-31 01:16:11 +00001041 if (add) {
1042 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1043 return
1044 assignNew(mce,ty,
1045 binop( opOR,
1046 assignNew(mce,ty, binop(opOR, qaa, qbb)),
1047 assignNew(mce,ty,
1048 binop( opXOR,
1049 assignNew(mce,ty, binop(opADD, a_min, b_min)),
1050 assignNew(mce,ty, binop(opADD, a_max, b_max))
1051 )
sewardj95448072004-11-22 20:19:51 +00001052 )
sewardjd5204dc2004-12-31 01:16:11 +00001053 )
1054 );
1055 } else {
1056 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1057 return
1058 assignNew(mce,ty,
1059 binop( opOR,
1060 assignNew(mce,ty, binop(opOR, qaa, qbb)),
1061 assignNew(mce,ty,
1062 binop( opXOR,
1063 assignNew(mce,ty, binop(opSUB, a_min, b_max)),
1064 assignNew(mce,ty, binop(opSUB, a_max, b_min))
1065 )
1066 )
1067 )
1068 );
1069 }
1070
sewardj95448072004-11-22 20:19:51 +00001071}
1072
1073
1074/*------------------------------------------------------------*/
sewardj3245c912004-12-10 14:58:26 +00001075/*--- Helpers for dealing with vector primops. ---*/
1076/*------------------------------------------------------------*/
1077
sewardja1d93302004-12-12 16:45:06 +00001078/* Vector pessimisation -- pessimise within each lane individually. */
1079
1080static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1081{
1082 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1083}
1084
1085static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1086{
1087 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1088}
1089
1090static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1091{
1092 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1093}
1094
1095static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1096{
1097 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1098}
1099
sewardjacd2e912005-01-13 19:17:06 +00001100static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1101{
1102 return assignNew(mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
1103}
1104
1105static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1106{
1107 return assignNew(mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
1108}
1109
1110static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1111{
1112 return assignNew(mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
1113}
1114
sewardja1d93302004-12-12 16:45:06 +00001115
sewardj3245c912004-12-10 14:58:26 +00001116/* Here's a simple scheme capable of handling ops derived from SSE1
1117 code and while only generating ops that can be efficiently
1118 implemented in SSE1. */
1119
1120/* All-lanes versions are straightforward:
1121
sewardj20d38f22005-02-07 23:50:18 +00001122 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
sewardj3245c912004-12-10 14:58:26 +00001123
1124 unary32Fx4(x,y) ==> PCast32x4(x#)
1125
1126 Lowest-lane-only versions are more complex:
1127
sewardj20d38f22005-02-07 23:50:18 +00001128 binary32F0x4(x,y) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001129 x#,
sewardj20d38f22005-02-07 23:50:18 +00001130 PCast32(V128to32(UifUV128(x#,y#)))
sewardj3245c912004-12-10 14:58:26 +00001131 )
1132
1133 This is perhaps not so obvious. In particular, it's faster to
sewardj20d38f22005-02-07 23:50:18 +00001134 do a V128-bit UifU and then take the bottom 32 bits than the more
sewardj3245c912004-12-10 14:58:26 +00001135 obvious scheme of taking the bottom 32 bits of each operand
1136 and doing a 32-bit UifU. Basically since UifU is fast and
1137 chopping lanes off vector values is slow.
1138
1139 Finally:
1140
sewardj20d38f22005-02-07 23:50:18 +00001141 unary32F0x4(x) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001142 x#,
sewardj20d38f22005-02-07 23:50:18 +00001143 PCast32(V128to32(x#))
sewardj3245c912004-12-10 14:58:26 +00001144 )
1145
1146 Where:
1147
1148 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
1149 PCast32x4(v#) = CmpNEZ32x4(v#)
1150*/
1151
1152static
1153IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1154{
1155 IRAtom* at;
1156 tl_assert(isShadowAtom(mce, vatomX));
1157 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001158 at = mkUifUV128(mce, vatomX, vatomY);
sewardja1d93302004-12-12 16:45:06 +00001159 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at));
sewardj3245c912004-12-10 14:58:26 +00001160 return at;
1161}
1162
1163static
1164IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1165{
1166 IRAtom* at;
1167 tl_assert(isShadowAtom(mce, vatomX));
sewardja1d93302004-12-12 16:45:06 +00001168 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001169 return at;
1170}
1171
1172static
1173IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1174{
1175 IRAtom* at;
1176 tl_assert(isShadowAtom(mce, vatomX));
1177 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001178 at = mkUifUV128(mce, vatomX, vatomY);
1179 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, at));
sewardj3245c912004-12-10 14:58:26 +00001180 at = mkPCastTo(mce, Ity_I32, at);
sewardj20d38f22005-02-07 23:50:18 +00001181 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001182 return at;
1183}
1184
1185static
1186IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1187{
1188 IRAtom* at;
1189 tl_assert(isShadowAtom(mce, vatomX));
sewardj20d38f22005-02-07 23:50:18 +00001190 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001191 at = mkPCastTo(mce, Ity_I32, at);
sewardj20d38f22005-02-07 23:50:18 +00001192 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001193 return at;
1194}
1195
sewardj0b070592004-12-10 21:44:22 +00001196/* --- ... and ... 64Fx2 versions of the same ... --- */
1197
1198static
1199IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1200{
1201 IRAtom* at;
1202 tl_assert(isShadowAtom(mce, vatomX));
1203 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001204 at = mkUifUV128(mce, vatomX, vatomY);
sewardja1d93302004-12-12 16:45:06 +00001205 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at));
sewardj0b070592004-12-10 21:44:22 +00001206 return at;
1207}
1208
1209static
1210IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1211{
1212 IRAtom* at;
1213 tl_assert(isShadowAtom(mce, vatomX));
sewardja1d93302004-12-12 16:45:06 +00001214 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001215 return at;
1216}
1217
1218static
1219IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1220{
1221 IRAtom* at;
1222 tl_assert(isShadowAtom(mce, vatomX));
1223 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001224 at = mkUifUV128(mce, vatomX, vatomY);
1225 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, at));
sewardj0b070592004-12-10 21:44:22 +00001226 at = mkPCastTo(mce, Ity_I64, at);
sewardj20d38f22005-02-07 23:50:18 +00001227 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001228 return at;
1229}
1230
1231static
1232IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1233{
1234 IRAtom* at;
1235 tl_assert(isShadowAtom(mce, vatomX));
sewardj20d38f22005-02-07 23:50:18 +00001236 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001237 at = mkPCastTo(mce, Ity_I64, at);
sewardj20d38f22005-02-07 23:50:18 +00001238 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001239 return at;
1240}
1241
sewardja1d93302004-12-12 16:45:06 +00001242/* --- --- Vector saturated narrowing --- --- */
1243
1244/* This is quite subtle. What to do is simple:
1245
1246 Let the original narrowing op be QNarrowW{S,U}xN. Produce:
1247
1248 the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1249
1250 Why this is right is not so simple. Consider a lane in the args,
1251 vatom1 or 2, doesn't matter.
1252
1253 After the PCast, that lane is all 0s (defined) or all
1254 1s(undefined).
1255
1256 Both signed and unsigned saturating narrowing of all 0s produces
1257 all 0s, which is what we want.
1258
1259 The all-1s case is more complex. Unsigned narrowing interprets an
1260 all-1s input as the largest unsigned integer, and so produces all
1261 1s as a result since that is the largest unsigned value at the
1262 smaller width.
1263
1264 Signed narrowing interprets all 1s as -1. Fortunately, -1 narrows
1265 to -1, so we still wind up with all 1s at the smaller width.
1266
1267 So: In short, pessimise the args, then apply the original narrowing
1268 op.
1269*/
1270static
sewardj20d38f22005-02-07 23:50:18 +00001271IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
sewardja1d93302004-12-12 16:45:06 +00001272 IRAtom* vatom1, IRAtom* vatom2)
1273{
1274 IRAtom *at1, *at2, *at3;
1275 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1276 switch (narrow_op) {
1277 case Iop_QNarrow32Sx4: pcast = mkPCast32x4; break;
1278 case Iop_QNarrow16Sx8: pcast = mkPCast16x8; break;
1279 case Iop_QNarrow16Ux8: pcast = mkPCast16x8; break;
sewardj20d38f22005-02-07 23:50:18 +00001280 default: VG_(tool_panic)("vectorNarrowV128");
sewardja1d93302004-12-12 16:45:06 +00001281 }
1282 tl_assert(isShadowAtom(mce,vatom1));
1283 tl_assert(isShadowAtom(mce,vatom2));
1284 at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1));
1285 at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2));
1286 at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2));
1287 return at3;
1288}
1289
sewardjacd2e912005-01-13 19:17:06 +00001290static
1291IRAtom* vectorNarrow64 ( MCEnv* mce, IROp narrow_op,
1292 IRAtom* vatom1, IRAtom* vatom2)
1293{
1294 IRAtom *at1, *at2, *at3;
1295 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1296 switch (narrow_op) {
1297 case Iop_QNarrow32Sx2: pcast = mkPCast32x2; break;
1298 case Iop_QNarrow16Sx4: pcast = mkPCast16x4; break;
1299 case Iop_QNarrow16Ux4: pcast = mkPCast16x4; break;
1300 default: VG_(tool_panic)("vectorNarrow64");
1301 }
1302 tl_assert(isShadowAtom(mce,vatom1));
1303 tl_assert(isShadowAtom(mce,vatom2));
1304 at1 = assignNew(mce, Ity_I64, pcast(mce, vatom1));
1305 at2 = assignNew(mce, Ity_I64, pcast(mce, vatom2));
1306 at3 = assignNew(mce, Ity_I64, binop(narrow_op, at1, at2));
1307 return at3;
1308}
1309
sewardja1d93302004-12-12 16:45:06 +00001310
1311/* --- --- Vector integer arithmetic --- --- */
1312
1313/* Simple ... UifU the args and per-lane pessimise the results. */
sewardjacd2e912005-01-13 19:17:06 +00001314
sewardj20d38f22005-02-07 23:50:18 +00001315/* --- V128-bit versions --- */
sewardjacd2e912005-01-13 19:17:06 +00001316
sewardja1d93302004-12-12 16:45:06 +00001317static
1318IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1319{
1320 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001321 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001322 at = mkPCast8x16(mce, at);
1323 return at;
1324}
1325
1326static
1327IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1328{
1329 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001330 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001331 at = mkPCast16x8(mce, at);
1332 return at;
1333}
1334
1335static
1336IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1337{
1338 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001339 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001340 at = mkPCast32x4(mce, at);
1341 return at;
1342}
1343
1344static
1345IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1346{
1347 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001348 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001349 at = mkPCast64x2(mce, at);
1350 return at;
1351}
sewardj3245c912004-12-10 14:58:26 +00001352
sewardjacd2e912005-01-13 19:17:06 +00001353/* --- 64-bit versions --- */
1354
1355static
1356IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1357{
1358 IRAtom* at;
1359 at = mkUifU64(mce, vatom1, vatom2);
1360 at = mkPCast8x8(mce, at);
1361 return at;
1362}
1363
1364static
1365IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1366{
1367 IRAtom* at;
1368 at = mkUifU64(mce, vatom1, vatom2);
1369 at = mkPCast16x4(mce, at);
1370 return at;
1371}
1372
1373static
1374IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1375{
1376 IRAtom* at;
1377 at = mkUifU64(mce, vatom1, vatom2);
1378 at = mkPCast32x2(mce, at);
1379 return at;
1380}
1381
sewardj3245c912004-12-10 14:58:26 +00001382
1383/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00001384/*--- Generate shadow values from all kinds of IRExprs. ---*/
1385/*------------------------------------------------------------*/
1386
1387static
1388IRAtom* expr2vbits_Binop ( MCEnv* mce,
1389 IROp op,
1390 IRAtom* atom1, IRAtom* atom2 )
1391{
1392 IRType and_or_ty;
1393 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
1394 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
1395 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
1396
1397 IRAtom* vatom1 = expr2vbits( mce, atom1 );
1398 IRAtom* vatom2 = expr2vbits( mce, atom2 );
1399
1400 tl_assert(isOriginalAtom(mce,atom1));
1401 tl_assert(isOriginalAtom(mce,atom2));
1402 tl_assert(isShadowAtom(mce,vatom1));
1403 tl_assert(isShadowAtom(mce,vatom2));
1404 tl_assert(sameKindedAtoms(atom1,vatom1));
1405 tl_assert(sameKindedAtoms(atom2,vatom2));
1406 switch (op) {
1407
sewardjacd2e912005-01-13 19:17:06 +00001408 /* 64-bit SIMD */
1409
1410 case Iop_ShrN16x4:
1411 case Iop_ShrN32x2:
1412 case Iop_SarN16x4:
1413 case Iop_SarN32x2:
1414 case Iop_ShlN16x4:
1415 case Iop_ShlN32x2:
1416 /* Same scheme as with all other shifts. */
1417 complainIfUndefined(mce, atom2);
1418 return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1419
1420 case Iop_QNarrow32Sx2:
1421 case Iop_QNarrow16Sx4:
1422 case Iop_QNarrow16Ux4:
1423 return vectorNarrow64(mce, op, vatom1, vatom2);
1424
1425 case Iop_Min8Ux8:
1426 case Iop_Max8Ux8:
1427 case Iop_Avg8Ux8:
1428 case Iop_QSub8Sx8:
1429 case Iop_QSub8Ux8:
1430 case Iop_Sub8x8:
1431 case Iop_CmpGT8Sx8:
1432 case Iop_CmpEQ8x8:
1433 case Iop_QAdd8Sx8:
1434 case Iop_QAdd8Ux8:
1435 case Iop_Add8x8:
1436 return binary8Ix8(mce, vatom1, vatom2);
1437
1438 case Iop_Min16Sx4:
1439 case Iop_Max16Sx4:
1440 case Iop_Avg16Ux4:
1441 case Iop_QSub16Ux4:
1442 case Iop_QSub16Sx4:
1443 case Iop_Sub16x4:
1444 case Iop_Mul16x4:
1445 case Iop_MulHi16Sx4:
1446 case Iop_MulHi16Ux4:
1447 case Iop_CmpGT16Sx4:
1448 case Iop_CmpEQ16x4:
1449 case Iop_QAdd16Sx4:
1450 case Iop_QAdd16Ux4:
1451 case Iop_Add16x4:
1452 return binary16Ix4(mce, vatom1, vatom2);
1453
1454 case Iop_Sub32x2:
1455 case Iop_CmpGT32Sx2:
1456 case Iop_CmpEQ32x2:
1457 case Iop_Add32x2:
1458 return binary32Ix2(mce, vatom1, vatom2);
1459
1460 /* 64-bit data-steering */
1461 case Iop_InterleaveLO32x2:
1462 case Iop_InterleaveLO16x4:
1463 case Iop_InterleaveLO8x8:
1464 case Iop_InterleaveHI32x2:
1465 case Iop_InterleaveHI16x4:
1466 case Iop_InterleaveHI8x8:
1467 return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
1468
sewardj20d38f22005-02-07 23:50:18 +00001469 /* V128-bit SIMD */
sewardj0b070592004-12-10 21:44:22 +00001470
sewardja1d93302004-12-12 16:45:06 +00001471 case Iop_ShrN16x8:
1472 case Iop_ShrN32x4:
1473 case Iop_ShrN64x2:
1474 case Iop_SarN16x8:
1475 case Iop_SarN32x4:
1476 case Iop_ShlN16x8:
1477 case Iop_ShlN32x4:
1478 case Iop_ShlN64x2:
1479 /* Same scheme as with all other shifts. */
1480 complainIfUndefined(mce, atom2);
1481 return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
1482
1483 case Iop_QSub8Ux16:
1484 case Iop_QSub8Sx16:
1485 case Iop_Sub8x16:
1486 case Iop_Min8Ux16:
1487 case Iop_Max8Ux16:
1488 case Iop_CmpGT8Sx16:
1489 case Iop_CmpEQ8x16:
1490 case Iop_Avg8Ux16:
1491 case Iop_QAdd8Ux16:
1492 case Iop_QAdd8Sx16:
1493 case Iop_Add8x16:
1494 return binary8Ix16(mce, vatom1, vatom2);
1495
1496 case Iop_QSub16Ux8:
1497 case Iop_QSub16Sx8:
1498 case Iop_Sub16x8:
1499 case Iop_Mul16x8:
1500 case Iop_MulHi16Sx8:
1501 case Iop_MulHi16Ux8:
1502 case Iop_Min16Sx8:
1503 case Iop_Max16Sx8:
1504 case Iop_CmpGT16Sx8:
1505 case Iop_CmpEQ16x8:
1506 case Iop_Avg16Ux8:
1507 case Iop_QAdd16Ux8:
1508 case Iop_QAdd16Sx8:
1509 case Iop_Add16x8:
1510 return binary16Ix8(mce, vatom1, vatom2);
1511
1512 case Iop_Sub32x4:
1513 case Iop_CmpGT32Sx4:
1514 case Iop_CmpEQ32x4:
1515 case Iop_Add32x4:
1516 return binary32Ix4(mce, vatom1, vatom2);
1517
1518 case Iop_Sub64x2:
1519 case Iop_Add64x2:
1520 return binary64Ix2(mce, vatom1, vatom2);
1521
1522 case Iop_QNarrow32Sx4:
1523 case Iop_QNarrow16Sx8:
1524 case Iop_QNarrow16Ux8:
sewardj20d38f22005-02-07 23:50:18 +00001525 return vectorNarrowV128(mce, op, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001526
sewardj0b070592004-12-10 21:44:22 +00001527 case Iop_Sub64Fx2:
1528 case Iop_Mul64Fx2:
1529 case Iop_Min64Fx2:
1530 case Iop_Max64Fx2:
1531 case Iop_Div64Fx2:
1532 case Iop_CmpLT64Fx2:
1533 case Iop_CmpLE64Fx2:
1534 case Iop_CmpEQ64Fx2:
1535 case Iop_Add64Fx2:
1536 return binary64Fx2(mce, vatom1, vatom2);
1537
1538 case Iop_Sub64F0x2:
1539 case Iop_Mul64F0x2:
1540 case Iop_Min64F0x2:
1541 case Iop_Max64F0x2:
1542 case Iop_Div64F0x2:
1543 case Iop_CmpLT64F0x2:
1544 case Iop_CmpLE64F0x2:
1545 case Iop_CmpEQ64F0x2:
1546 case Iop_Add64F0x2:
1547 return binary64F0x2(mce, vatom1, vatom2);
1548
sewardj170ee212004-12-10 18:57:51 +00001549 case Iop_Sub32Fx4:
1550 case Iop_Mul32Fx4:
1551 case Iop_Min32Fx4:
1552 case Iop_Max32Fx4:
1553 case Iop_Div32Fx4:
1554 case Iop_CmpLT32Fx4:
1555 case Iop_CmpLE32Fx4:
1556 case Iop_CmpEQ32Fx4:
sewardj3245c912004-12-10 14:58:26 +00001557 case Iop_Add32Fx4:
1558 return binary32Fx4(mce, vatom1, vatom2);
1559
sewardj170ee212004-12-10 18:57:51 +00001560 case Iop_Sub32F0x4:
1561 case Iop_Mul32F0x4:
1562 case Iop_Min32F0x4:
1563 case Iop_Max32F0x4:
1564 case Iop_Div32F0x4:
1565 case Iop_CmpLT32F0x4:
1566 case Iop_CmpLE32F0x4:
1567 case Iop_CmpEQ32F0x4:
1568 case Iop_Add32F0x4:
1569 return binary32F0x4(mce, vatom1, vatom2);
1570
sewardj20d38f22005-02-07 23:50:18 +00001571 /* V128-bit data-steering */
1572 case Iop_SetV128lo32:
1573 case Iop_SetV128lo64:
1574 case Iop_64HLtoV128:
sewardja1d93302004-12-12 16:45:06 +00001575 case Iop_InterleaveLO64x2:
1576 case Iop_InterleaveLO32x4:
1577 case Iop_InterleaveLO16x8:
1578 case Iop_InterleaveLO8x16:
1579 case Iop_InterleaveHI64x2:
1580 case Iop_InterleaveHI32x4:
1581 case Iop_InterleaveHI16x8:
1582 case Iop_InterleaveHI8x16:
sewardj170ee212004-12-10 18:57:51 +00001583 return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2));
1584
sewardj69a13322005-04-23 01:14:51 +00001585 /* I128-bit data-steering */
1586 case Iop_64HLto128:
1587 return assignNew(mce, Ity_I128, binop(op, vatom1, vatom2));
1588
sewardj3245c912004-12-10 14:58:26 +00001589 /* Scalar floating point */
1590
sewardj95448072004-11-22 20:19:51 +00001591 case Iop_RoundF64:
1592 case Iop_F64toI64:
sewardje9e16d32004-12-10 13:17:55 +00001593 case Iop_I64toF64:
1594 /* First arg is I32 (rounding mode), second is F64 or I64
1595 (data). */
sewardj95448072004-11-22 20:19:51 +00001596 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1597
1598 case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
1599 /* Takes two F64 args. */
1600 case Iop_F64toI32:
sewardje9e16d32004-12-10 13:17:55 +00001601 case Iop_F64toF32:
sewardj95448072004-11-22 20:19:51 +00001602 /* First arg is I32 (rounding mode), second is F64 (data). */
1603 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1604
1605 case Iop_F64toI16:
1606 /* First arg is I32 (rounding mode), second is F64 (data). */
1607 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
1608
1609 case Iop_ScaleF64:
1610 case Iop_Yl2xF64:
1611 case Iop_Yl2xp1F64:
1612 case Iop_PRemF64:
sewardj96403eb2005-04-01 20:20:12 +00001613 case Iop_PRem1F64:
sewardj95448072004-11-22 20:19:51 +00001614 case Iop_AtanF64:
1615 case Iop_AddF64:
1616 case Iop_DivF64:
1617 case Iop_SubF64:
1618 case Iop_MulF64:
1619 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1620
1621 case Iop_CmpF64:
1622 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1623
1624 /* non-FP after here */
1625
1626 case Iop_DivModU64to32:
1627 case Iop_DivModS64to32:
1628 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1629
sewardj69a13322005-04-23 01:14:51 +00001630 case Iop_DivModU128to64:
1631 case Iop_DivModS128to64:
1632 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
1633
sewardj95448072004-11-22 20:19:51 +00001634 case Iop_16HLto32:
sewardj170ee212004-12-10 18:57:51 +00001635 return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00001636 case Iop_32HLto64:
sewardj170ee212004-12-10 18:57:51 +00001637 return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00001638
sewardj6cf40ff2005-04-20 22:31:26 +00001639 case Iop_MullS64:
1640 case Iop_MullU64: {
1641 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
1642 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
1643 return assignNew(mce, Ity_I128, binop(Iop_64HLto128, vHi64, vLo64));
1644 }
1645
sewardj95448072004-11-22 20:19:51 +00001646 case Iop_MullS32:
1647 case Iop_MullU32: {
1648 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1649 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
1650 return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
1651 }
1652
1653 case Iop_MullS16:
1654 case Iop_MullU16: {
1655 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1656 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
1657 return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
1658 }
1659
1660 case Iop_MullS8:
1661 case Iop_MullU8: {
1662 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1663 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
1664 return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
1665 }
1666
cerion9e591082005-06-23 15:28:34 +00001667 case Iop_DivS32:
1668 case Iop_DivU32:
1669 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1670
sewardj95448072004-11-22 20:19:51 +00001671 case Iop_Add32:
sewardjd5204dc2004-12-31 01:16:11 +00001672 if (mce->bogusLiterals)
1673 return expensiveAddSub(mce,True,Ity_I32,
1674 vatom1,vatom2, atom1,atom2);
1675 else
1676 goto cheap_AddSub32;
sewardj95448072004-11-22 20:19:51 +00001677 case Iop_Sub32:
sewardjd5204dc2004-12-31 01:16:11 +00001678 if (mce->bogusLiterals)
1679 return expensiveAddSub(mce,False,Ity_I32,
1680 vatom1,vatom2, atom1,atom2);
1681 else
1682 goto cheap_AddSub32;
1683
1684 cheap_AddSub32:
sewardj95448072004-11-22 20:19:51 +00001685 case Iop_Mul32:
1686 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1687
sewardj681be302005-01-15 20:43:58 +00001688 /* could do better: Add64, Sub64 */
1689 case Iop_Add64:
tomd9774d72005-06-27 08:11:01 +00001690 if (mce->bogusLiterals)
1691 return expensiveAddSub(mce,True,Ity_I64,
1692 vatom1,vatom2, atom1,atom2);
1693 else
1694 goto cheap_AddSub64;
sewardj681be302005-01-15 20:43:58 +00001695 case Iop_Sub64:
tomd9774d72005-06-27 08:11:01 +00001696 if (mce->bogusLiterals)
1697 return expensiveAddSub(mce,False,Ity_I64,
1698 vatom1,vatom2, atom1,atom2);
1699 else
1700 goto cheap_AddSub64;
1701
1702 cheap_AddSub64:
1703 case Iop_Mul64:
sewardj681be302005-01-15 20:43:58 +00001704 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
1705
sewardj95448072004-11-22 20:19:51 +00001706 case Iop_Mul16:
1707 case Iop_Add16:
1708 case Iop_Sub16:
1709 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1710
1711 case Iop_Sub8:
1712 case Iop_Add8:
1713 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1714
sewardj69a13322005-04-23 01:14:51 +00001715 case Iop_CmpEQ64:
1716 if (mce->bogusLiterals)
1717 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
1718 else
1719 goto cheap_cmp64;
1720 cheap_cmp64:
tomcd986332005-04-26 07:44:48 +00001721 case Iop_CmpLE64S: case Iop_CmpLE64U:
1722 case Iop_CmpLT64U: case Iop_CmpLT64S:
1723 case Iop_CmpNE64:
sewardj69a13322005-04-23 01:14:51 +00001724 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
1725
sewardjd5204dc2004-12-31 01:16:11 +00001726 case Iop_CmpEQ32:
1727 if (mce->bogusLiterals)
1728 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
1729 else
1730 goto cheap_cmp32;
sewardjd5204dc2004-12-31 01:16:11 +00001731 cheap_cmp32:
sewardj95448072004-11-22 20:19:51 +00001732 case Iop_CmpLE32S: case Iop_CmpLE32U:
1733 case Iop_CmpLT32U: case Iop_CmpLT32S:
sewardjd5204dc2004-12-31 01:16:11 +00001734 case Iop_CmpNE32:
sewardj95448072004-11-22 20:19:51 +00001735 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
1736
1737 case Iop_CmpEQ16: case Iop_CmpNE16:
1738 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
1739
1740 case Iop_CmpEQ8: case Iop_CmpNE8:
1741 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
1742
1743 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
1744 /* Complain if the shift amount is undefined. Then simply
1745 shift the first arg's V bits by the real shift amount. */
1746 complainIfUndefined(mce, atom2);
1747 return assignNew(mce, Ity_I32, binop(op, vatom1, atom2));
1748
sewardjdb67f5f2004-12-14 01:15:31 +00001749 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
sewardj95448072004-11-22 20:19:51 +00001750 /* Same scheme as with 32-bit shifts. */
1751 complainIfUndefined(mce, atom2);
1752 return assignNew(mce, Ity_I16, binop(op, vatom1, atom2));
1753
1754 case Iop_Shl8: case Iop_Shr8:
1755 /* Same scheme as with 32-bit shifts. */
1756 complainIfUndefined(mce, atom2);
1757 return assignNew(mce, Ity_I8, binop(op, vatom1, atom2));
1758
sewardj69a13322005-04-23 01:14:51 +00001759 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
sewardj95448072004-11-22 20:19:51 +00001760 /* Same scheme as with 32-bit shifts. */
1761 complainIfUndefined(mce, atom2);
1762 return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1763
sewardj20d38f22005-02-07 23:50:18 +00001764 case Iop_AndV128:
1765 uifu = mkUifUV128; difd = mkDifDV128;
1766 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00001767 case Iop_And64:
1768 uifu = mkUifU64; difd = mkDifD64;
1769 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00001770 case Iop_And32:
1771 uifu = mkUifU32; difd = mkDifD32;
1772 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
1773 case Iop_And16:
1774 uifu = mkUifU16; difd = mkDifD16;
1775 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
1776 case Iop_And8:
1777 uifu = mkUifU8; difd = mkDifD8;
1778 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
1779
sewardj20d38f22005-02-07 23:50:18 +00001780 case Iop_OrV128:
1781 uifu = mkUifUV128; difd = mkDifDV128;
1782 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00001783 case Iop_Or64:
1784 uifu = mkUifU64; difd = mkDifD64;
1785 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00001786 case Iop_Or32:
1787 uifu = mkUifU32; difd = mkDifD32;
1788 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
1789 case Iop_Or16:
1790 uifu = mkUifU16; difd = mkDifD16;
1791 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
1792 case Iop_Or8:
1793 uifu = mkUifU8; difd = mkDifD8;
1794 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
1795
1796 do_And_Or:
1797 return
1798 assignNew(
1799 mce,
1800 and_or_ty,
1801 difd(mce, uifu(mce, vatom1, vatom2),
1802 difd(mce, improve(mce, atom1, vatom1),
1803 improve(mce, atom2, vatom2) ) ) );
1804
1805 case Iop_Xor8:
1806 return mkUifU8(mce, vatom1, vatom2);
1807 case Iop_Xor16:
1808 return mkUifU16(mce, vatom1, vatom2);
1809 case Iop_Xor32:
1810 return mkUifU32(mce, vatom1, vatom2);
sewardj7010f6e2004-12-10 13:35:22 +00001811 case Iop_Xor64:
1812 return mkUifU64(mce, vatom1, vatom2);
sewardj20d38f22005-02-07 23:50:18 +00001813 case Iop_XorV128:
1814 return mkUifUV128(mce, vatom1, vatom2);
njn25e49d8e72002-09-23 09:36:25 +00001815
1816 default:
sewardj95448072004-11-22 20:19:51 +00001817 ppIROp(op);
1818 VG_(tool_panic)("memcheck:expr2vbits_Binop");
njn25e49d8e72002-09-23 09:36:25 +00001819 }
njn25e49d8e72002-09-23 09:36:25 +00001820}
1821
njn25e49d8e72002-09-23 09:36:25 +00001822
sewardj95448072004-11-22 20:19:51 +00001823static
1824IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
1825{
1826 IRAtom* vatom = expr2vbits( mce, atom );
1827 tl_assert(isOriginalAtom(mce,atom));
1828 switch (op) {
1829
sewardj0b070592004-12-10 21:44:22 +00001830 case Iop_Sqrt64Fx2:
1831 return unary64Fx2(mce, vatom);
1832
1833 case Iop_Sqrt64F0x2:
1834 return unary64F0x2(mce, vatom);
1835
sewardj170ee212004-12-10 18:57:51 +00001836 case Iop_Sqrt32Fx4:
1837 case Iop_RSqrt32Fx4:
1838 case Iop_Recip32Fx4:
1839 return unary32Fx4(mce, vatom);
1840
1841 case Iop_Sqrt32F0x4:
1842 case Iop_RSqrt32F0x4:
1843 case Iop_Recip32F0x4:
1844 return unary32F0x4(mce, vatom);
1845
sewardj20d38f22005-02-07 23:50:18 +00001846 case Iop_32UtoV128:
1847 case Iop_64UtoV128:
sewardj170ee212004-12-10 18:57:51 +00001848 return assignNew(mce, Ity_V128, unop(op, vatom));
1849
sewardj95448072004-11-22 20:19:51 +00001850 case Iop_F32toF64:
1851 case Iop_I32toF64:
sewardj95448072004-11-22 20:19:51 +00001852 case Iop_NegF64:
1853 case Iop_SinF64:
1854 case Iop_CosF64:
1855 case Iop_TanF64:
1856 case Iop_SqrtF64:
1857 case Iop_AbsF64:
1858 case Iop_2xm1F64:
sewardj39cc7352005-06-09 21:31:55 +00001859 case Iop_Clz64:
1860 case Iop_Ctz64:
sewardj95448072004-11-22 20:19:51 +00001861 return mkPCastTo(mce, Ity_I64, vatom);
1862
sewardj95448072004-11-22 20:19:51 +00001863 case Iop_Clz32:
1864 case Iop_Ctz32:
1865 return mkPCastTo(mce, Ity_I32, vatom);
1866
sewardjd9dbc192005-04-27 11:40:27 +00001867 case Iop_1Uto64:
1868 case Iop_8Uto64:
1869 case Iop_8Sto64:
1870 case Iop_16Uto64:
1871 case Iop_16Sto64:
sewardj95448072004-11-22 20:19:51 +00001872 case Iop_32Sto64:
1873 case Iop_32Uto64:
sewardj20d38f22005-02-07 23:50:18 +00001874 case Iop_V128to64:
1875 case Iop_V128HIto64:
sewardj6cf40ff2005-04-20 22:31:26 +00001876 case Iop_128HIto64:
1877 case Iop_128to64:
sewardj95448072004-11-22 20:19:51 +00001878 return assignNew(mce, Ity_I64, unop(op, vatom));
1879
1880 case Iop_64to32:
1881 case Iop_64HIto32:
1882 case Iop_1Uto32:
1883 case Iop_8Uto32:
1884 case Iop_16Uto32:
1885 case Iop_16Sto32:
1886 case Iop_8Sto32:
1887 return assignNew(mce, Ity_I32, unop(op, vatom));
1888
1889 case Iop_8Sto16:
1890 case Iop_8Uto16:
1891 case Iop_32to16:
1892 case Iop_32HIto16:
sewardjd9dbc192005-04-27 11:40:27 +00001893 case Iop_64to16:
sewardj95448072004-11-22 20:19:51 +00001894 return assignNew(mce, Ity_I16, unop(op, vatom));
1895
1896 case Iop_1Uto8:
1897 case Iop_16to8:
1898 case Iop_32to8:
sewardjd9dbc192005-04-27 11:40:27 +00001899 case Iop_64to8:
sewardj95448072004-11-22 20:19:51 +00001900 return assignNew(mce, Ity_I8, unop(op, vatom));
1901
1902 case Iop_32to1:
1903 return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom));
1904
sewardjd9dbc192005-04-27 11:40:27 +00001905 case Iop_64to1:
1906 return assignNew(mce, Ity_I1, unop(Iop_64to1, vatom));
1907
sewardj95448072004-11-22 20:19:51 +00001908 case Iop_ReinterpF64asI64:
1909 case Iop_ReinterpI64asF64:
sewardj0b070592004-12-10 21:44:22 +00001910 case Iop_ReinterpI32asF32:
sewardj20d38f22005-02-07 23:50:18 +00001911 case Iop_NotV128:
sewardj7010f6e2004-12-10 13:35:22 +00001912 case Iop_Not64:
sewardj95448072004-11-22 20:19:51 +00001913 case Iop_Not32:
1914 case Iop_Not16:
1915 case Iop_Not8:
1916 case Iop_Not1:
1917 return vatom;
sewardj7010f6e2004-12-10 13:35:22 +00001918
sewardje53bd992005-05-03 12:22:00 +00001919 /* Neg* really fall under the Add/Sub banner, and as such you
1920 might think would qualify for the 'expensive add/sub'
1921 treatment. However, in this case since the implied literal
1922 is zero (0 - arg), we just do the cheap thing anyway. */
1923 case Iop_Neg8:
1924 return mkLeft8(mce, vatom);
1925 case Iop_Neg16:
1926 return mkLeft16(mce, vatom);
1927 case Iop_Neg32:
1928 return mkLeft32(mce, vatom);
1929
sewardj95448072004-11-22 20:19:51 +00001930 default:
1931 ppIROp(op);
1932 VG_(tool_panic)("memcheck:expr2vbits_Unop");
1933 }
1934}
1935
1936
sewardj170ee212004-12-10 18:57:51 +00001937/* Worker function; do not call directly. */
sewardj95448072004-11-22 20:19:51 +00001938static
sewardj170ee212004-12-10 18:57:51 +00001939IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
sewardj95448072004-11-22 20:19:51 +00001940{
1941 void* helper;
1942 Char* hname;
1943 IRDirty* di;
1944 IRTemp datavbits;
1945 IRAtom* addrAct;
1946
1947 tl_assert(isOriginalAtom(mce,addr));
1948
1949 /* First, emit a definedness test for the address. This also sets
1950 the address (shadow) to 'defined' following the test. */
1951 complainIfUndefined( mce, addr );
1952
1953 /* Now cook up a call to the relevant helper function, to read the
1954 data V bits from shadow memory. */
1955 ty = shadowType(ty);
1956 switch (ty) {
1957 case Ity_I64: helper = &MC_(helperc_LOADV8);
1958 hname = "MC_(helperc_LOADV8)";
1959 break;
1960 case Ity_I32: helper = &MC_(helperc_LOADV4);
1961 hname = "MC_(helperc_LOADV4)";
1962 break;
1963 case Ity_I16: helper = &MC_(helperc_LOADV2);
1964 hname = "MC_(helperc_LOADV2)";
1965 break;
1966 case Ity_I8: helper = &MC_(helperc_LOADV1);
1967 hname = "MC_(helperc_LOADV1)";
1968 break;
1969 default: ppIRType(ty);
1970 VG_(tool_panic)("memcheck:do_shadow_LDle");
1971 }
1972
1973 /* Generate the actual address into addrAct. */
1974 if (bias == 0) {
1975 addrAct = addr;
1976 } else {
sewardj7cf97ee2004-11-28 14:25:01 +00001977 IROp mkAdd;
1978 IRAtom* eBias;
sewardj95448072004-11-22 20:19:51 +00001979 IRType tyAddr = mce->hWordTy;
1980 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj7cf97ee2004-11-28 14:25:01 +00001981 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
1982 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj95448072004-11-22 20:19:51 +00001983 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
1984 }
1985
1986 /* We need to have a place to park the V bits we're just about to
1987 read. */
1988 datavbits = newIRTemp(mce->bb->tyenv, ty);
1989 di = unsafeIRDirty_1_N( datavbits,
1990 1/*regparms*/, hname, helper,
1991 mkIRExprVec_1( addrAct ));
1992 setHelperAnns( mce, di );
1993 stmt( mce->bb, IRStmt_Dirty(di) );
1994
1995 return mkexpr(datavbits);
1996}
1997
1998
1999static
sewardj170ee212004-12-10 18:57:51 +00002000IRAtom* expr2vbits_LDle ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
2001{
2002 IRAtom *v64hi, *v64lo;
2003 switch (shadowType(ty)) {
2004 case Ity_I8:
2005 case Ity_I16:
2006 case Ity_I32:
2007 case Ity_I64:
2008 return expr2vbits_LDle_WRK(mce, ty, addr, bias);
2009 case Ity_V128:
2010 v64lo = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias);
2011 v64hi = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias+8);
2012 return assignNew( mce,
2013 Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +00002014 binop(Iop_64HLtoV128, v64hi, v64lo));
sewardj170ee212004-12-10 18:57:51 +00002015 default:
2016 VG_(tool_panic)("expr2vbits_LDle");
2017 }
2018}
2019
2020
2021static
sewardj95448072004-11-22 20:19:51 +00002022IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
2023 IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
2024{
2025 IRAtom *vbitsC, *vbits0, *vbitsX;
2026 IRType ty;
2027 /* Given Mux0X(cond,expr0,exprX), generate
2028 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
2029 That is, steer the V bits like the originals, but trash the
2030 result if the steering value is undefined. This gives
2031 lazy propagation. */
2032 tl_assert(isOriginalAtom(mce, cond));
2033 tl_assert(isOriginalAtom(mce, expr0));
2034 tl_assert(isOriginalAtom(mce, exprX));
2035
2036 vbitsC = expr2vbits(mce, cond);
2037 vbits0 = expr2vbits(mce, expr0);
2038 vbitsX = expr2vbits(mce, exprX);
2039 ty = typeOfIRExpr(mce->bb->tyenv, vbits0);
2040
2041 return
2042 mkUifU(mce, ty, assignNew(mce, ty, IRExpr_Mux0X(cond, vbits0, vbitsX)),
2043 mkPCastTo(mce, ty, vbitsC) );
2044}
2045
2046/* --------- This is the main expression-handling function. --------- */
2047
2048static
2049IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
2050{
2051 switch (e->tag) {
2052
2053 case Iex_Get:
2054 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
2055
2056 case Iex_GetI:
2057 return shadow_GETI( mce, e->Iex.GetI.descr,
2058 e->Iex.GetI.ix, e->Iex.GetI.bias );
2059
2060 case Iex_Tmp:
2061 return IRExpr_Tmp( findShadowTmp(mce, e->Iex.Tmp.tmp) );
2062
2063 case Iex_Const:
2064 return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
2065
2066 case Iex_Binop:
2067 return expr2vbits_Binop(
2068 mce,
2069 e->Iex.Binop.op,
2070 e->Iex.Binop.arg1, e->Iex.Binop.arg2
2071 );
2072
2073 case Iex_Unop:
2074 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
2075
2076 case Iex_LDle:
2077 return expr2vbits_LDle( mce, e->Iex.LDle.ty,
2078 e->Iex.LDle.addr, 0/*addr bias*/ );
2079
2080 case Iex_CCall:
2081 return mkLazyN( mce, e->Iex.CCall.args,
2082 e->Iex.CCall.retty,
2083 e->Iex.CCall.cee );
2084
2085 case Iex_Mux0X:
2086 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
2087 e->Iex.Mux0X.exprX);
njn25e49d8e72002-09-23 09:36:25 +00002088
2089 default:
sewardj95448072004-11-22 20:19:51 +00002090 VG_(printf)("\n");
2091 ppIRExpr(e);
2092 VG_(printf)("\n");
2093 VG_(tool_panic)("memcheck: expr2vbits");
njn25e49d8e72002-09-23 09:36:25 +00002094 }
njn25e49d8e72002-09-23 09:36:25 +00002095}
2096
2097/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00002098/*--- Generate shadow stmts from all kinds of IRStmts. ---*/
njn25e49d8e72002-09-23 09:36:25 +00002099/*------------------------------------------------------------*/
2100
sewardj95448072004-11-22 20:19:51 +00002101/* Widen a value to the host word size. */
njn25e49d8e72002-09-23 09:36:25 +00002102
2103static
sewardj95448072004-11-22 20:19:51 +00002104IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
njn25e49d8e72002-09-23 09:36:25 +00002105{
sewardj7cf97ee2004-11-28 14:25:01 +00002106 IRType ty, tyH;
2107
sewardj95448072004-11-22 20:19:51 +00002108 /* vatom is vbits-value and as such can only have a shadow type. */
2109 tl_assert(isShadowAtom(mce,vatom));
njn25e49d8e72002-09-23 09:36:25 +00002110
sewardj7cf97ee2004-11-28 14:25:01 +00002111 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
2112 tyH = mce->hWordTy;
njn25e49d8e72002-09-23 09:36:25 +00002113
sewardj95448072004-11-22 20:19:51 +00002114 if (tyH == Ity_I32) {
2115 switch (ty) {
2116 case Ity_I32: return vatom;
2117 case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom));
2118 case Ity_I8: return assignNew(mce, tyH, unop(Iop_8Uto32, vatom));
2119 default: goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00002120 }
sewardj6cf40ff2005-04-20 22:31:26 +00002121 } else
2122 if (tyH == Ity_I64) {
2123 switch (ty) {
2124 case Ity_I32: return assignNew(mce, tyH, unop(Iop_32Uto64, vatom));
sewardj69a13322005-04-23 01:14:51 +00002125 case Ity_I16: return assignNew(mce, tyH, unop(Iop_32Uto64,
2126 assignNew(mce, Ity_I32, unop(Iop_16Uto32, vatom))));
2127 case Ity_I8: return assignNew(mce, tyH, unop(Iop_32Uto64,
2128 assignNew(mce, Ity_I32, unop(Iop_8Uto32, vatom))));
sewardj6cf40ff2005-04-20 22:31:26 +00002129 default: goto unhandled;
2130 }
sewardj95448072004-11-22 20:19:51 +00002131 } else {
2132 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00002133 }
sewardj95448072004-11-22 20:19:51 +00002134 unhandled:
2135 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
2136 VG_(tool_panic)("zwidenToHostWord");
njn25e49d8e72002-09-23 09:36:25 +00002137}
2138
njn25e49d8e72002-09-23 09:36:25 +00002139
sewardj95448072004-11-22 20:19:51 +00002140/* Generate a shadow store. addr is always the original address atom.
2141 You can pass in either originals or V-bits for the data atom, but
2142 obviously not both. */
njn25e49d8e72002-09-23 09:36:25 +00002143
sewardj95448072004-11-22 20:19:51 +00002144static
2145void do_shadow_STle ( MCEnv* mce,
2146 IRAtom* addr, UInt bias,
2147 IRAtom* data, IRAtom* vdata )
njn25e49d8e72002-09-23 09:36:25 +00002148{
sewardj170ee212004-12-10 18:57:51 +00002149 IROp mkAdd;
2150 IRType ty, tyAddr;
2151 IRDirty *di, *diLo64, *diHi64;
2152 IRAtom *addrAct, *addrLo64, *addrHi64;
2153 IRAtom *vdataLo64, *vdataHi64;
2154 IRAtom *eBias, *eBias0, *eBias8;
sewardj95448072004-11-22 20:19:51 +00002155 void* helper = NULL;
2156 Char* hname = NULL;
sewardj170ee212004-12-10 18:57:51 +00002157
2158 tyAddr = mce->hWordTy;
2159 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2160 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2161
2162 di = diLo64 = diHi64 = NULL;
2163 eBias = eBias0 = eBias8 = NULL;
2164 addrAct = addrLo64 = addrHi64 = NULL;
2165 vdataLo64 = vdataHi64 = NULL;
njn25e49d8e72002-09-23 09:36:25 +00002166
sewardj95448072004-11-22 20:19:51 +00002167 if (data) {
2168 tl_assert(!vdata);
2169 tl_assert(isOriginalAtom(mce, data));
2170 tl_assert(bias == 0);
2171 vdata = expr2vbits( mce, data );
2172 } else {
2173 tl_assert(vdata);
2174 }
njn25e49d8e72002-09-23 09:36:25 +00002175
sewardj95448072004-11-22 20:19:51 +00002176 tl_assert(isOriginalAtom(mce,addr));
2177 tl_assert(isShadowAtom(mce,vdata));
njn25e49d8e72002-09-23 09:36:25 +00002178
sewardj95448072004-11-22 20:19:51 +00002179 ty = typeOfIRExpr(mce->bb->tyenv, vdata);
njn25e49d8e72002-09-23 09:36:25 +00002180
sewardj95448072004-11-22 20:19:51 +00002181 /* First, emit a definedness test for the address. This also sets
2182 the address (shadow) to 'defined' following the test. */
2183 complainIfUndefined( mce, addr );
njn25e49d8e72002-09-23 09:36:25 +00002184
sewardj170ee212004-12-10 18:57:51 +00002185 /* Now decide which helper function to call to write the data V
2186 bits into shadow memory. */
sewardj95448072004-11-22 20:19:51 +00002187 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +00002188 case Ity_V128: /* we'll use the helper twice */
sewardj95448072004-11-22 20:19:51 +00002189 case Ity_I64: helper = &MC_(helperc_STOREV8);
2190 hname = "MC_(helperc_STOREV8)";
2191 break;
2192 case Ity_I32: helper = &MC_(helperc_STOREV4);
2193 hname = "MC_(helperc_STOREV4)";
2194 break;
2195 case Ity_I16: helper = &MC_(helperc_STOREV2);
2196 hname = "MC_(helperc_STOREV2)";
2197 break;
2198 case Ity_I8: helper = &MC_(helperc_STOREV1);
2199 hname = "MC_(helperc_STOREV1)";
2200 break;
2201 default: VG_(tool_panic)("memcheck:do_shadow_STle");
2202 }
njn25e49d8e72002-09-23 09:36:25 +00002203
sewardj170ee212004-12-10 18:57:51 +00002204 if (ty == Ity_V128) {
2205
sewardj20d38f22005-02-07 23:50:18 +00002206 /* V128-bit case */
sewardj170ee212004-12-10 18:57:51 +00002207 /* See comment in next clause re 64-bit regparms */
2208 eBias0 = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2209 addrLo64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias0) );
sewardj20d38f22005-02-07 23:50:18 +00002210 vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata));
sewardj170ee212004-12-10 18:57:51 +00002211 diLo64 = unsafeIRDirty_0_N(
2212 1/*regparms*/, hname, helper,
2213 mkIRExprVec_2( addrLo64, vdataLo64 ));
2214
2215 eBias8 = tyAddr==Ity_I32 ? mkU32(bias+8) : mkU64(bias+8);
2216 addrHi64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias8) );
sewardj20d38f22005-02-07 23:50:18 +00002217 vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata));
sewardj170ee212004-12-10 18:57:51 +00002218 diHi64 = unsafeIRDirty_0_N(
2219 1/*regparms*/, hname, helper,
2220 mkIRExprVec_2( addrHi64, vdataHi64 ));
2221
2222 setHelperAnns( mce, diLo64 );
2223 setHelperAnns( mce, diHi64 );
2224 stmt( mce->bb, IRStmt_Dirty(diLo64) );
2225 stmt( mce->bb, IRStmt_Dirty(diHi64) );
2226
sewardj95448072004-11-22 20:19:51 +00002227 } else {
sewardj170ee212004-12-10 18:57:51 +00002228
2229 /* 8/16/32/64-bit cases */
2230 /* Generate the actual address into addrAct. */
2231 if (bias == 0) {
2232 addrAct = addr;
2233 } else {
2234 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2235 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2236 }
2237
2238 if (ty == Ity_I64) {
2239 /* We can't do this with regparm 2 on 32-bit platforms, since
2240 the back ends aren't clever enough to handle 64-bit
2241 regparm args. Therefore be different. */
2242 di = unsafeIRDirty_0_N(
2243 1/*regparms*/, hname, helper,
2244 mkIRExprVec_2( addrAct, vdata ));
2245 } else {
2246 di = unsafeIRDirty_0_N(
2247 2/*regparms*/, hname, helper,
2248 mkIRExprVec_2( addrAct,
2249 zwidenToHostWord( mce, vdata )));
2250 }
2251 setHelperAnns( mce, di );
2252 stmt( mce->bb, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00002253 }
njn25e49d8e72002-09-23 09:36:25 +00002254
sewardj95448072004-11-22 20:19:51 +00002255}
njn25e49d8e72002-09-23 09:36:25 +00002256
njn25e49d8e72002-09-23 09:36:25 +00002257
sewardj95448072004-11-22 20:19:51 +00002258/* Do lazy pessimistic propagation through a dirty helper call, by
2259 looking at the annotations on it. This is the most complex part of
2260 Memcheck. */
njn25e49d8e72002-09-23 09:36:25 +00002261
sewardj95448072004-11-22 20:19:51 +00002262static IRType szToITy ( Int n )
2263{
2264 switch (n) {
2265 case 1: return Ity_I8;
2266 case 2: return Ity_I16;
2267 case 4: return Ity_I32;
2268 case 8: return Ity_I64;
2269 default: VG_(tool_panic)("szToITy(memcheck)");
2270 }
2271}
njn25e49d8e72002-09-23 09:36:25 +00002272
sewardj95448072004-11-22 20:19:51 +00002273static
2274void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
2275{
sewardje9e16d32004-12-10 13:17:55 +00002276 Int i, n, offset, toDo, gSz, gOff;
sewardj7cf97ee2004-11-28 14:25:01 +00002277 IRAtom *src, *here, *curr;
sewardj95448072004-11-22 20:19:51 +00002278 IRType tyAddr, tySrc, tyDst;
2279 IRTemp dst;
njn25e49d8e72002-09-23 09:36:25 +00002280
sewardj95448072004-11-22 20:19:51 +00002281 /* First check the guard. */
2282 complainIfUndefined(mce, d->guard);
2283
2284 /* Now round up all inputs and PCast over them. */
sewardj7cf97ee2004-11-28 14:25:01 +00002285 curr = definedOfType(Ity_I32);
sewardj95448072004-11-22 20:19:51 +00002286
2287 /* Inputs: unmasked args */
2288 for (i = 0; d->args[i]; i++) {
2289 if (d->cee->mcx_mask & (1<<i)) {
2290 /* ignore this arg */
njn25e49d8e72002-09-23 09:36:25 +00002291 } else {
sewardj95448072004-11-22 20:19:51 +00002292 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
2293 curr = mkUifU32(mce, here, curr);
njn25e49d8e72002-09-23 09:36:25 +00002294 }
2295 }
sewardj95448072004-11-22 20:19:51 +00002296
2297 /* Inputs: guest state that we read. */
2298 for (i = 0; i < d->nFxState; i++) {
2299 tl_assert(d->fxState[i].fx != Ifx_None);
2300 if (d->fxState[i].fx == Ifx_Write)
2301 continue;
sewardja7203252004-11-26 19:17:47 +00002302
2303 /* Ignore any sections marked as 'always defined'. */
2304 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
sewardje9e16d32004-12-10 13:17:55 +00002305 if (0)
sewardja7203252004-11-26 19:17:47 +00002306 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
2307 d->fxState[i].offset, d->fxState[i].size );
2308 continue;
2309 }
2310
sewardj95448072004-11-22 20:19:51 +00002311 /* This state element is read or modified. So we need to
sewardje9e16d32004-12-10 13:17:55 +00002312 consider it. If larger than 8 bytes, deal with it in 8-byte
2313 chunks. */
2314 gSz = d->fxState[i].size;
2315 gOff = d->fxState[i].offset;
2316 tl_assert(gSz > 0);
2317 while (True) {
2318 if (gSz == 0) break;
2319 n = gSz <= 8 ? gSz : 8;
2320 /* update 'curr' with UifU of the state slice
2321 gOff .. gOff+n-1 */
2322 tySrc = szToITy( n );
2323 src = assignNew( mce, tySrc,
2324 shadow_GET(mce, gOff, tySrc ) );
2325 here = mkPCastTo( mce, Ity_I32, src );
2326 curr = mkUifU32(mce, here, curr);
2327 gSz -= n;
2328 gOff += n;
2329 }
2330
sewardj95448072004-11-22 20:19:51 +00002331 }
2332
2333 /* Inputs: memory. First set up some info needed regardless of
2334 whether we're doing reads or writes. */
2335 tyAddr = Ity_INVALID;
2336
2337 if (d->mFx != Ifx_None) {
2338 /* Because we may do multiple shadow loads/stores from the same
2339 base address, it's best to do a single test of its
2340 definedness right now. Post-instrumentation optimisation
2341 should remove all but this test. */
2342 tl_assert(d->mAddr);
2343 complainIfUndefined(mce, d->mAddr);
2344
2345 tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr);
2346 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2347 tl_assert(tyAddr == mce->hWordTy); /* not really right */
2348 }
2349
2350 /* Deal with memory inputs (reads or modifies) */
2351 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2352 offset = 0;
2353 toDo = d->mSize;
2354 /* chew off 32-bit chunks */
2355 while (toDo >= 4) {
2356 here = mkPCastTo(
2357 mce, Ity_I32,
2358 expr2vbits_LDle ( mce, Ity_I32,
2359 d->mAddr, d->mSize - toDo )
2360 );
2361 curr = mkUifU32(mce, here, curr);
2362 toDo -= 4;
2363 }
2364 /* chew off 16-bit chunks */
2365 while (toDo >= 2) {
2366 here = mkPCastTo(
2367 mce, Ity_I32,
2368 expr2vbits_LDle ( mce, Ity_I16,
2369 d->mAddr, d->mSize - toDo )
2370 );
2371 curr = mkUifU32(mce, here, curr);
2372 toDo -= 2;
2373 }
2374 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2375 }
2376
2377 /* Whew! So curr is a 32-bit V-value summarising pessimistically
2378 all the inputs to the helper. Now we need to re-distribute the
2379 results to all destinations. */
2380
2381 /* Outputs: the destination temporary, if there is one. */
2382 if (d->tmp != IRTemp_INVALID) {
2383 dst = findShadowTmp(mce, d->tmp);
2384 tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp);
2385 assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) );
2386 }
2387
2388 /* Outputs: guest state that we write or modify. */
2389 for (i = 0; i < d->nFxState; i++) {
2390 tl_assert(d->fxState[i].fx != Ifx_None);
2391 if (d->fxState[i].fx == Ifx_Read)
2392 continue;
sewardja7203252004-11-26 19:17:47 +00002393 /* Ignore any sections marked as 'always defined'. */
2394 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
2395 continue;
sewardje9e16d32004-12-10 13:17:55 +00002396 /* This state element is written or modified. So we need to
2397 consider it. If larger than 8 bytes, deal with it in 8-byte
2398 chunks. */
2399 gSz = d->fxState[i].size;
2400 gOff = d->fxState[i].offset;
2401 tl_assert(gSz > 0);
2402 while (True) {
2403 if (gSz == 0) break;
2404 n = gSz <= 8 ? gSz : 8;
2405 /* Write suitably-casted 'curr' to the state slice
2406 gOff .. gOff+n-1 */
2407 tyDst = szToITy( n );
2408 do_shadow_PUT( mce, gOff,
2409 NULL, /* original atom */
2410 mkPCastTo( mce, tyDst, curr ) );
2411 gSz -= n;
2412 gOff += n;
2413 }
sewardj95448072004-11-22 20:19:51 +00002414 }
2415
2416 /* Outputs: memory that we write or modify. */
2417 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2418 offset = 0;
2419 toDo = d->mSize;
2420 /* chew off 32-bit chunks */
2421 while (toDo >= 4) {
2422 do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2423 NULL, /* original data */
2424 mkPCastTo( mce, Ity_I32, curr ) );
2425 toDo -= 4;
2426 }
2427 /* chew off 16-bit chunks */
2428 while (toDo >= 2) {
2429 do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2430 NULL, /* original data */
2431 mkPCastTo( mce, Ity_I16, curr ) );
2432 toDo -= 2;
2433 }
2434 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2435 }
2436
njn25e49d8e72002-09-23 09:36:25 +00002437}
2438
sewardj826ec492005-05-12 18:05:00 +00002439/* We have an ABI hint telling us that [base .. base+len-1] is to
2440 become undefined ("writable"). Generate code to call a helper to
2441 notify the A/V bit machinery of this fact.
2442
2443 We call
2444 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len );
2445*/
2446static
2447void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len )
2448{
2449 IRDirty* di;
2450 di = unsafeIRDirty_0_N(
2451 0/*regparms*/,
2452 "MC_(helperc_MAKE_STACK_UNINIT)",
2453 &MC_(helperc_MAKE_STACK_UNINIT),
2454 mkIRExprVec_2( base, mkIRExpr_HWord( (UInt)len) )
2455 );
2456 stmt( mce->bb, IRStmt_Dirty(di) );
2457}
2458
njn25e49d8e72002-09-23 09:36:25 +00002459
sewardj95448072004-11-22 20:19:51 +00002460/*------------------------------------------------------------*/
2461/*--- Memcheck main ---*/
2462/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00002463
sewardj95448072004-11-22 20:19:51 +00002464static Bool isBogusAtom ( IRAtom* at )
njn25e49d8e72002-09-23 09:36:25 +00002465{
sewardj95448072004-11-22 20:19:51 +00002466 ULong n = 0;
2467 IRConst* con;
sewardj710d6c22005-03-20 18:55:15 +00002468 tl_assert(isIRAtom(at));
sewardj95448072004-11-22 20:19:51 +00002469 if (at->tag == Iex_Tmp)
2470 return False;
2471 tl_assert(at->tag == Iex_Const);
2472 con = at->Iex.Const.con;
2473 switch (con->tag) {
sewardjd5204dc2004-12-31 01:16:11 +00002474 case Ico_U1: return False;
2475 case Ico_U8: n = (ULong)con->Ico.U8; break;
2476 case Ico_U16: n = (ULong)con->Ico.U16; break;
2477 case Ico_U32: n = (ULong)con->Ico.U32; break;
2478 case Ico_U64: n = (ULong)con->Ico.U64; break;
2479 case Ico_F64: return False;
2480 case Ico_F64i: return False;
2481 case Ico_V128: return False;
sewardj95448072004-11-22 20:19:51 +00002482 default: ppIRExpr(at); tl_assert(0);
2483 }
2484 /* VG_(printf)("%llx\n", n); */
sewardj96a922e2005-04-23 23:26:29 +00002485 return (/*32*/ n == 0xFEFEFEFFULL
2486 /*32*/ || n == 0x80808080ULL
tomd9774d72005-06-27 08:11:01 +00002487 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
sewardj96a922e2005-04-23 23:26:29 +00002488 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
tomd9774d72005-06-27 08:11:01 +00002489 /*64*/ || n == 0x0000000000008080ULL
sewardj96a922e2005-04-23 23:26:29 +00002490 /*64*/ || n == 0x8080808080808080ULL
2491 /*64*/ || n == 0x0101010101010101ULL
2492 );
sewardj95448072004-11-22 20:19:51 +00002493}
njn25e49d8e72002-09-23 09:36:25 +00002494
sewardj95448072004-11-22 20:19:51 +00002495static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
2496{
sewardjd5204dc2004-12-31 01:16:11 +00002497 Int i;
2498 IRExpr* e;
2499 IRDirty* d;
sewardj95448072004-11-22 20:19:51 +00002500 switch (st->tag) {
2501 case Ist_Tmp:
2502 e = st->Ist.Tmp.data;
2503 switch (e->tag) {
2504 case Iex_Get:
2505 case Iex_Tmp:
2506 return False;
sewardjd5204dc2004-12-31 01:16:11 +00002507 case Iex_Const:
2508 return isBogusAtom(e);
sewardj95448072004-11-22 20:19:51 +00002509 case Iex_Unop:
2510 return isBogusAtom(e->Iex.Unop.arg);
sewardjd5204dc2004-12-31 01:16:11 +00002511 case Iex_GetI:
2512 return isBogusAtom(e->Iex.GetI.ix);
sewardj95448072004-11-22 20:19:51 +00002513 case Iex_Binop:
2514 return isBogusAtom(e->Iex.Binop.arg1)
2515 || isBogusAtom(e->Iex.Binop.arg2);
2516 case Iex_Mux0X:
2517 return isBogusAtom(e->Iex.Mux0X.cond)
2518 || isBogusAtom(e->Iex.Mux0X.expr0)
2519 || isBogusAtom(e->Iex.Mux0X.exprX);
2520 case Iex_LDle:
2521 return isBogusAtom(e->Iex.LDle.addr);
2522 case Iex_CCall:
2523 for (i = 0; e->Iex.CCall.args[i]; i++)
2524 if (isBogusAtom(e->Iex.CCall.args[i]))
2525 return True;
2526 return False;
2527 default:
2528 goto unhandled;
2529 }
sewardjd5204dc2004-12-31 01:16:11 +00002530 case Ist_Dirty:
2531 d = st->Ist.Dirty.details;
2532 for (i = 0; d->args[i]; i++)
2533 if (isBogusAtom(d->args[i]))
2534 return True;
2535 if (d->guard && isBogusAtom(d->guard))
2536 return True;
2537 if (d->mAddr && isBogusAtom(d->mAddr))
2538 return True;
2539 return False;
sewardj95448072004-11-22 20:19:51 +00002540 case Ist_Put:
2541 return isBogusAtom(st->Ist.Put.data);
sewardjd5204dc2004-12-31 01:16:11 +00002542 case Ist_PutI:
2543 return isBogusAtom(st->Ist.PutI.ix)
2544 || isBogusAtom(st->Ist.PutI.data);
sewardj95448072004-11-22 20:19:51 +00002545 case Ist_STle:
2546 return isBogusAtom(st->Ist.STle.addr)
2547 || isBogusAtom(st->Ist.STle.data);
2548 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00002549 return isBogusAtom(st->Ist.Exit.guard);
sewardj826ec492005-05-12 18:05:00 +00002550 case Ist_AbiHint:
2551 return isBogusAtom(st->Ist.AbiHint.base);
sewardj21dc3452005-03-21 00:27:41 +00002552 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00002553 case Ist_IMark:
sewardjbd598e12005-01-07 12:10:21 +00002554 case Ist_MFence:
2555 return False;
sewardj95448072004-11-22 20:19:51 +00002556 default:
2557 unhandled:
2558 ppIRStmt(st);
2559 VG_(tool_panic)("hasBogusLiterals");
2560 }
2561}
njn25e49d8e72002-09-23 09:36:25 +00002562
njn25e49d8e72002-09-23 09:36:25 +00002563
njn51d827b2005-05-09 01:02:08 +00002564IRBB* MC_(instrument) ( IRBB* bb_in, VexGuestLayout* layout,
sewardjd54babf2005-03-21 00:55:49 +00002565 IRType gWordTy, IRType hWordTy )
sewardj95448072004-11-22 20:19:51 +00002566{
2567 Bool verboze = False; //True;
njn25e49d8e72002-09-23 09:36:25 +00002568
sewardjd5204dc2004-12-31 01:16:11 +00002569 Int i, j, first_stmt;
sewardj95448072004-11-22 20:19:51 +00002570 IRStmt* st;
sewardjd5204dc2004-12-31 01:16:11 +00002571 MCEnv mce;
sewardjd54babf2005-03-21 00:55:49 +00002572 IRBB* bb;
2573
2574 if (gWordTy != hWordTy) {
2575 /* We don't currently support this case. */
2576 VG_(tool_panic)("host/guest word size mismatch");
2577 }
njn25e49d8e72002-09-23 09:36:25 +00002578
sewardj6cf40ff2005-04-20 22:31:26 +00002579 /* Check we're not completely nuts */
2580 tl_assert(sizeof(UWord) == sizeof(void*));
2581 tl_assert(sizeof(Word) == sizeof(void*));
2582 tl_assert(sizeof(ULong) == 8);
2583 tl_assert(sizeof(Long) == 8);
2584 tl_assert(sizeof(UInt) == 4);
2585 tl_assert(sizeof(Int) == 4);
2586
sewardj95448072004-11-22 20:19:51 +00002587 /* Set up BB */
sewardjd54babf2005-03-21 00:55:49 +00002588 bb = emptyIRBB();
sewardj95448072004-11-22 20:19:51 +00002589 bb->tyenv = dopyIRTypeEnv(bb_in->tyenv);
2590 bb->next = dopyIRExpr(bb_in->next);
2591 bb->jumpkind = bb_in->jumpkind;
njn25e49d8e72002-09-23 09:36:25 +00002592
sewardj95448072004-11-22 20:19:51 +00002593 /* Set up the running environment. Only .bb is modified as we go
2594 along. */
2595 mce.bb = bb;
2596 mce.layout = layout;
2597 mce.n_originalTmps = bb->tyenv->types_used;
2598 mce.hWordTy = hWordTy;
sewardjd5204dc2004-12-31 01:16:11 +00002599 mce.bogusLiterals = False;
sewardj95448072004-11-22 20:19:51 +00002600 mce.tmpMap = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
2601 for (i = 0; i < mce.n_originalTmps; i++)
2602 mce.tmpMap[i] = IRTemp_INVALID;
2603
2604 /* Iterate over the stmts. */
2605
2606 for (i = 0; i < bb_in->stmts_used; i++) {
2607 st = bb_in->stmts[i];
sewardj21dc3452005-03-21 00:27:41 +00002608 tl_assert(st);
sewardj95448072004-11-22 20:19:51 +00002609
2610 tl_assert(isFlatIRStmt(st));
2611
sewardjd5204dc2004-12-31 01:16:11 +00002612 if (!mce.bogusLiterals) {
2613 mce.bogusLiterals = checkForBogusLiterals(st);
2614 if (0&& mce.bogusLiterals) {
sewardj95448072004-11-22 20:19:51 +00002615 VG_(printf)("bogus: ");
2616 ppIRStmt(st);
2617 VG_(printf)("\n");
2618 }
2619 }
sewardjd5204dc2004-12-31 01:16:11 +00002620
sewardj95448072004-11-22 20:19:51 +00002621 first_stmt = bb->stmts_used;
2622
2623 if (verboze) {
2624 ppIRStmt(st);
2625 VG_(printf)("\n\n");
2626 }
2627
sewardj29faa502005-03-16 18:20:21 +00002628 /* Generate instrumentation code for each stmt ... */
2629
sewardj95448072004-11-22 20:19:51 +00002630 switch (st->tag) {
2631
2632 case Ist_Tmp:
2633 assign( bb, findShadowTmp(&mce, st->Ist.Tmp.tmp),
2634 expr2vbits( &mce, st->Ist.Tmp.data) );
njn25e49d8e72002-09-23 09:36:25 +00002635 break;
2636
sewardj95448072004-11-22 20:19:51 +00002637 case Ist_Put:
2638 do_shadow_PUT( &mce,
2639 st->Ist.Put.offset,
2640 st->Ist.Put.data,
2641 NULL /* shadow atom */ );
njn25e49d8e72002-09-23 09:36:25 +00002642 break;
2643
sewardj95448072004-11-22 20:19:51 +00002644 case Ist_PutI:
2645 do_shadow_PUTI( &mce,
2646 st->Ist.PutI.descr,
2647 st->Ist.PutI.ix,
2648 st->Ist.PutI.bias,
2649 st->Ist.PutI.data );
njn25e49d8e72002-09-23 09:36:25 +00002650 break;
2651
sewardj95448072004-11-22 20:19:51 +00002652 case Ist_STle:
2653 do_shadow_STle( &mce, st->Ist.STle.addr, 0/* addr bias */,
2654 st->Ist.STle.data,
2655 NULL /* shadow data */ );
njn25e49d8e72002-09-23 09:36:25 +00002656 break;
2657
sewardj95448072004-11-22 20:19:51 +00002658 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00002659 complainIfUndefined( &mce, st->Ist.Exit.guard );
njn25e49d8e72002-09-23 09:36:25 +00002660 break;
2661
sewardj21dc3452005-03-21 00:27:41 +00002662 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00002663 case Ist_IMark:
sewardjbd598e12005-01-07 12:10:21 +00002664 case Ist_MFence:
2665 break;
2666
sewardj95448072004-11-22 20:19:51 +00002667 case Ist_Dirty:
2668 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
njn25e49d8e72002-09-23 09:36:25 +00002669 break;
2670
sewardj826ec492005-05-12 18:05:00 +00002671 case Ist_AbiHint:
2672 do_AbiHint( &mce, st->Ist.AbiHint.base, st->Ist.AbiHint.len );
2673 break;
2674
njn25e49d8e72002-09-23 09:36:25 +00002675 default:
sewardj95448072004-11-22 20:19:51 +00002676 VG_(printf)("\n");
2677 ppIRStmt(st);
2678 VG_(printf)("\n");
2679 VG_(tool_panic)("memcheck: unhandled IRStmt");
2680
2681 } /* switch (st->tag) */
2682
2683 if (verboze) {
2684 for (j = first_stmt; j < bb->stmts_used; j++) {
2685 VG_(printf)(" ");
2686 ppIRStmt(bb->stmts[j]);
2687 VG_(printf)("\n");
2688 }
2689 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00002690 }
sewardj95448072004-11-22 20:19:51 +00002691
sewardj29faa502005-03-16 18:20:21 +00002692 /* ... and finally copy the stmt itself to the output. */
sewardj95448072004-11-22 20:19:51 +00002693 addStmtToIRBB(bb, st);
2694
njn25e49d8e72002-09-23 09:36:25 +00002695 }
njn25e49d8e72002-09-23 09:36:25 +00002696
sewardj95448072004-11-22 20:19:51 +00002697 /* Now we need to complain if the jump target is undefined. */
2698 first_stmt = bb->stmts_used;
njn25e49d8e72002-09-23 09:36:25 +00002699
sewardj95448072004-11-22 20:19:51 +00002700 if (verboze) {
2701 VG_(printf)("bb->next = ");
2702 ppIRExpr(bb->next);
2703 VG_(printf)("\n\n");
2704 }
njn25e49d8e72002-09-23 09:36:25 +00002705
sewardj95448072004-11-22 20:19:51 +00002706 complainIfUndefined( &mce, bb->next );
njn25e49d8e72002-09-23 09:36:25 +00002707
sewardj95448072004-11-22 20:19:51 +00002708 if (verboze) {
2709 for (j = first_stmt; j < bb->stmts_used; j++) {
2710 VG_(printf)(" ");
2711 ppIRStmt(bb->stmts[j]);
2712 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00002713 }
sewardj95448072004-11-22 20:19:51 +00002714 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00002715 }
njn25e49d8e72002-09-23 09:36:25 +00002716
sewardj95448072004-11-22 20:19:51 +00002717 return bb;
2718}
njn25e49d8e72002-09-23 09:36:25 +00002719
2720/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00002721/*--- end mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00002722/*--------------------------------------------------------------------*/