blob: d4a1327c782212b065d9b3452b53436df4c5be00 [file] [log] [blame]
nethercotebb1c9912004-01-04 16:43:23 +00001
njn25e49d8e72002-09-23 09:36:25 +00002/*--------------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003/*--- Instrument IR to perform memory checking operations. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00006
njn25e49d8e72002-09-23 09:36:25 +00007/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of MemCheck, a heavyweight Valgrind tool for
njnc9539842002-10-02 13:26:35 +00009 detecting memory errors.
njn25e49d8e72002-09-23 09:36:25 +000010
njn53612422005-03-12 16:22:54 +000011 Copyright (C) 2000-2005 Julian Seward
njn25e49d8e72002-09-23 09:36:25 +000012 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
njn25cac76cb2002-09-23 11:21:57 +000032#include "mc_include.h"
njn36a20fa2005-06-03 03:08:39 +000033#include "pub_tool_libcprint.h"
njn25e49d8e72002-09-23 09:36:25 +000034
njn25e49d8e72002-09-23 09:36:25 +000035
sewardj95448072004-11-22 20:19:51 +000036/*------------------------------------------------------------*/
37/*--- Forward decls ---*/
38/*------------------------------------------------------------*/
39
40struct _MCEnv;
41
42static IRType shadowType ( IRType ty );
43static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
44
45
46/*------------------------------------------------------------*/
47/*--- Memcheck running state, and tmp management. ---*/
48/*------------------------------------------------------------*/
49
50/* Carries around state during memcheck instrumentation. */
51typedef
52 struct _MCEnv {
53 /* MODIFIED: the bb being constructed. IRStmts are added. */
54 IRBB* bb;
55
56 /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
57 original temps to their current their current shadow temp.
58 Initially all entries are IRTemp_INVALID. Entries are added
59 lazily since many original temps are not used due to
60 optimisation prior to instrumentation. Note that floating
61 point original tmps are shadowed by integer tmps of the same
62 size, and Bit-typed original tmps are shadowed by the type
63 Ity_I8. See comment below. */
64 IRTemp* tmpMap;
65 Int n_originalTmps; /* for range checking */
66
sewardjd5204dc2004-12-31 01:16:11 +000067 /* MODIFIED: indicates whether "bogus" literals have so far been
68 found. Starts off False, and may change to True. */
69 Bool bogusLiterals;
70
sewardj95448072004-11-22 20:19:51 +000071 /* READONLY: the guest layout. This indicates which parts of
72 the guest state should be regarded as 'always defined'. */
73 VexGuestLayout* layout;
74 /* READONLY: the host word type. Needed for constructing
75 arguments of type 'HWord' to be passed to helper functions.
76 Ity_I32 or Ity_I64 only. */
77 IRType hWordTy;
78 }
79 MCEnv;
80
81/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
82 demand), as they are encountered. This is for two reasons.
83
84 (1) (less important reason): Many original tmps are unused due to
85 initial IR optimisation, and we do not want to spaces in tables
86 tracking them.
87
88 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
89 table indexed [0 .. n_types-1], which gives the current shadow for
90 each original tmp, or INVALID_IRTEMP if none is so far assigned.
91 It is necessary to support making multiple assignments to a shadow
92 -- specifically, after testing a shadow for definedness, it needs
93 to be made defined. But IR's SSA property disallows this.
94
95 (2) (more important reason): Therefore, when a shadow needs to get
96 a new value, a new temporary is created, the value is assigned to
97 that, and the tmpMap is updated to reflect the new binding.
98
99 A corollary is that if the tmpMap maps a given tmp to
100 INVALID_IRTEMP and we are hoping to read that shadow tmp, it means
101 there's a read-before-write error in the original tmps. The IR
102 sanity checker should catch all such anomalies, however.
njn25e49d8e72002-09-23 09:36:25 +0000103*/
sewardj95448072004-11-22 20:19:51 +0000104
105/* Find the tmp currently shadowing the given original tmp. If none
106 so far exists, allocate one. */
107static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000108{
sewardj95448072004-11-22 20:19:51 +0000109 tl_assert(orig < mce->n_originalTmps);
110 if (mce->tmpMap[orig] == IRTemp_INVALID) {
111 mce->tmpMap[orig]
112 = newIRTemp(mce->bb->tyenv,
113 shadowType(mce->bb->tyenv->types[orig]));
njn25e49d8e72002-09-23 09:36:25 +0000114 }
sewardj95448072004-11-22 20:19:51 +0000115 return mce->tmpMap[orig];
njn25e49d8e72002-09-23 09:36:25 +0000116}
117
sewardj95448072004-11-22 20:19:51 +0000118/* Allocate a new shadow for the given original tmp. This means any
119 previous shadow is abandoned. This is needed because it is
120 necessary to give a new value to a shadow once it has been tested
121 for undefinedness, but unfortunately IR's SSA property disallows
122 this. Instead we must abandon the old shadow, allocate a new one
123 and use that instead. */
124static void newShadowTmp ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000125{
sewardj95448072004-11-22 20:19:51 +0000126 tl_assert(orig < mce->n_originalTmps);
127 mce->tmpMap[orig]
128 = newIRTemp(mce->bb->tyenv,
129 shadowType(mce->bb->tyenv->types[orig]));
130}
131
132
133/*------------------------------------------------------------*/
134/*--- IRAtoms -- a subset of IRExprs ---*/
135/*------------------------------------------------------------*/
136
137/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
sewardj710d6c22005-03-20 18:55:15 +0000138 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
sewardj95448072004-11-22 20:19:51 +0000139 input, most of this code deals in atoms. Usefully, a value atom
140 always has a V-value which is also an atom: constants are shadowed
141 by constants, and temps are shadowed by the corresponding shadow
142 temporary. */
143
144typedef IRExpr IRAtom;
145
146/* (used for sanity checks only): is this an atom which looks
147 like it's from original code? */
148static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
149{
150 if (a1->tag == Iex_Const)
151 return True;
152 if (a1->tag == Iex_Tmp && a1->Iex.Tmp.tmp < mce->n_originalTmps)
153 return True;
154 return False;
155}
156
157/* (used for sanity checks only): is this an atom which looks
158 like it's from shadow code? */
159static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
160{
161 if (a1->tag == Iex_Const)
162 return True;
163 if (a1->tag == Iex_Tmp && a1->Iex.Tmp.tmp >= mce->n_originalTmps)
164 return True;
165 return False;
166}
167
168/* (used for sanity checks only): check that both args are atoms and
169 are identically-kinded. */
170static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
171{
172 if (a1->tag == Iex_Tmp && a1->tag == Iex_Tmp)
173 return True;
174 if (a1->tag == Iex_Const && a1->tag == Iex_Const)
175 return True;
176 return False;
177}
178
179
180/*------------------------------------------------------------*/
181/*--- Type management ---*/
182/*------------------------------------------------------------*/
183
184/* Shadow state is always accessed using integer types. This returns
185 an integer type with the same size (as per sizeofIRType) as the
186 given type. The only valid shadow types are Bit, I8, I16, I32,
sewardj3245c912004-12-10 14:58:26 +0000187 I64, V128. */
sewardj95448072004-11-22 20:19:51 +0000188
189static IRType shadowType ( IRType ty )
190{
191 switch (ty) {
192 case Ity_I1:
193 case Ity_I8:
194 case Ity_I16:
195 case Ity_I32:
sewardj6cf40ff2005-04-20 22:31:26 +0000196 case Ity_I64:
197 case Ity_I128: return ty;
sewardj3245c912004-12-10 14:58:26 +0000198 case Ity_F32: return Ity_I32;
199 case Ity_F64: return Ity_I64;
200 case Ity_V128: return Ity_V128;
sewardj95448072004-11-22 20:19:51 +0000201 default: ppIRType(ty);
202 VG_(tool_panic)("memcheck:shadowType");
203 }
204}
205
206/* Produce a 'defined' value of the given shadow type. Should only be
207 supplied shadow types (Bit/I8/I16/I32/UI64). */
208static IRExpr* definedOfType ( IRType ty ) {
209 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +0000210 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
211 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
212 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
213 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
214 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
215 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
sewardj95448072004-11-22 20:19:51 +0000216 default: VG_(tool_panic)("memcheck:definedOfType");
njn25e49d8e72002-09-23 09:36:25 +0000217 }
218}
219
220
sewardj95448072004-11-22 20:19:51 +0000221/*------------------------------------------------------------*/
222/*--- Constructing IR fragments ---*/
223/*------------------------------------------------------------*/
224
225/* assign value to tmp */
226#define assign(_bb,_tmp,_expr) \
227 addStmtToIRBB((_bb), IRStmt_Tmp((_tmp),(_expr)))
228
229/* add stmt to a bb */
230#define stmt(_bb,_stmt) \
231 addStmtToIRBB((_bb), (_stmt))
232
233/* build various kinds of expressions */
234#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
235#define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
236#define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
237#define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
238#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
239#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
sewardj170ee212004-12-10 18:57:51 +0000240#define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
sewardj95448072004-11-22 20:19:51 +0000241#define mkexpr(_tmp) IRExpr_Tmp((_tmp))
242
243/* bind the given expression to a new temporary, and return the
244 temporary. This effectively converts an arbitrary expression into
245 an atom. */
246static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) {
247 IRTemp t = newIRTemp(mce->bb->tyenv, ty);
248 assign(mce->bb, t, e);
249 return mkexpr(t);
250}
251
252
253/*------------------------------------------------------------*/
254/*--- Constructing definedness primitive ops ---*/
255/*------------------------------------------------------------*/
256
257/* --------- Defined-if-either-defined --------- */
258
259static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
260 tl_assert(isShadowAtom(mce,a1));
261 tl_assert(isShadowAtom(mce,a2));
262 return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2));
263}
264
265static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
266 tl_assert(isShadowAtom(mce,a1));
267 tl_assert(isShadowAtom(mce,a2));
268 return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2));
269}
270
271static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
272 tl_assert(isShadowAtom(mce,a1));
273 tl_assert(isShadowAtom(mce,a2));
274 return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2));
275}
276
sewardj7010f6e2004-12-10 13:35:22 +0000277static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
278 tl_assert(isShadowAtom(mce,a1));
279 tl_assert(isShadowAtom(mce,a2));
280 return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2));
281}
282
sewardj20d38f22005-02-07 23:50:18 +0000283static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj170ee212004-12-10 18:57:51 +0000284 tl_assert(isShadowAtom(mce,a1));
285 tl_assert(isShadowAtom(mce,a2));
sewardj20d38f22005-02-07 23:50:18 +0000286 return assignNew(mce, Ity_V128, binop(Iop_AndV128, a1, a2));
sewardj170ee212004-12-10 18:57:51 +0000287}
288
sewardj95448072004-11-22 20:19:51 +0000289/* --------- Undefined-if-either-undefined --------- */
290
291static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
292 tl_assert(isShadowAtom(mce,a1));
293 tl_assert(isShadowAtom(mce,a2));
294 return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2));
295}
296
297static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
298 tl_assert(isShadowAtom(mce,a1));
299 tl_assert(isShadowAtom(mce,a2));
300 return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2));
301}
302
303static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
304 tl_assert(isShadowAtom(mce,a1));
305 tl_assert(isShadowAtom(mce,a2));
306 return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2));
307}
308
309static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
310 tl_assert(isShadowAtom(mce,a1));
311 tl_assert(isShadowAtom(mce,a2));
312 return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2));
313}
314
sewardj20d38f22005-02-07 23:50:18 +0000315static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj3245c912004-12-10 14:58:26 +0000316 tl_assert(isShadowAtom(mce,a1));
317 tl_assert(isShadowAtom(mce,a2));
sewardj20d38f22005-02-07 23:50:18 +0000318 return assignNew(mce, Ity_V128, binop(Iop_OrV128, a1, a2));
sewardj3245c912004-12-10 14:58:26 +0000319}
320
sewardje50a1b12004-12-17 01:24:54 +0000321static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
sewardj95448072004-11-22 20:19:51 +0000322 switch (vty) {
sewardje50a1b12004-12-17 01:24:54 +0000323 case Ity_I8: return mkUifU8(mce, a1, a2);
sewardja1d93302004-12-12 16:45:06 +0000324 case Ity_I16: return mkUifU16(mce, a1, a2);
325 case Ity_I32: return mkUifU32(mce, a1, a2);
326 case Ity_I64: return mkUifU64(mce, a1, a2);
sewardj20d38f22005-02-07 23:50:18 +0000327 case Ity_V128: return mkUifUV128(mce, a1, a2);
sewardj95448072004-11-22 20:19:51 +0000328 default:
329 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
330 VG_(tool_panic)("memcheck:mkUifU");
njn25e49d8e72002-09-23 09:36:25 +0000331 }
332}
333
sewardj95448072004-11-22 20:19:51 +0000334/* --------- The Left-family of operations. --------- */
njn25e49d8e72002-09-23 09:36:25 +0000335
sewardj95448072004-11-22 20:19:51 +0000336static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
337 tl_assert(isShadowAtom(mce,a1));
338 /* It's safe to duplicate a1 since it's only an atom */
339 return assignNew(mce, Ity_I8,
340 binop(Iop_Or8, a1,
341 assignNew(mce, Ity_I8,
sewardj37c31cc2005-04-26 23:49:24 +0000342 unop(Iop_Neg8, a1))));
sewardj95448072004-11-22 20:19:51 +0000343}
344
345static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
346 tl_assert(isShadowAtom(mce,a1));
347 /* It's safe to duplicate a1 since it's only an atom */
348 return assignNew(mce, Ity_I16,
349 binop(Iop_Or16, a1,
350 assignNew(mce, Ity_I16,
sewardj37c31cc2005-04-26 23:49:24 +0000351 unop(Iop_Neg16, a1))));
sewardj95448072004-11-22 20:19:51 +0000352}
353
354static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
355 tl_assert(isShadowAtom(mce,a1));
356 /* It's safe to duplicate a1 since it's only an atom */
357 return assignNew(mce, Ity_I32,
358 binop(Iop_Or32, a1,
359 assignNew(mce, Ity_I32,
sewardj37c31cc2005-04-26 23:49:24 +0000360 unop(Iop_Neg32, a1))));
sewardj95448072004-11-22 20:19:51 +0000361}
362
sewardj681be302005-01-15 20:43:58 +0000363static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
364 tl_assert(isShadowAtom(mce,a1));
365 /* It's safe to duplicate a1 since it's only an atom */
366 return assignNew(mce, Ity_I64,
367 binop(Iop_Or64, a1,
368 assignNew(mce, Ity_I64,
sewardj37c31cc2005-04-26 23:49:24 +0000369 unop(Iop_Neg64, a1))));
sewardj681be302005-01-15 20:43:58 +0000370}
371
sewardj95448072004-11-22 20:19:51 +0000372/* --------- 'Improvement' functions for AND/OR. --------- */
373
374/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
375 defined (0); all other -> undefined (1).
376*/
377static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
njn25e49d8e72002-09-23 09:36:25 +0000378{
sewardj95448072004-11-22 20:19:51 +0000379 tl_assert(isOriginalAtom(mce, data));
380 tl_assert(isShadowAtom(mce, vbits));
381 tl_assert(sameKindedAtoms(data, vbits));
382 return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits));
383}
njn25e49d8e72002-09-23 09:36:25 +0000384
sewardj95448072004-11-22 20:19:51 +0000385static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
386{
387 tl_assert(isOriginalAtom(mce, data));
388 tl_assert(isShadowAtom(mce, vbits));
389 tl_assert(sameKindedAtoms(data, vbits));
390 return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits));
391}
njn25e49d8e72002-09-23 09:36:25 +0000392
sewardj95448072004-11-22 20:19:51 +0000393static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
394{
395 tl_assert(isOriginalAtom(mce, data));
396 tl_assert(isShadowAtom(mce, vbits));
397 tl_assert(sameKindedAtoms(data, vbits));
398 return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits));
399}
njn25e49d8e72002-09-23 09:36:25 +0000400
sewardj7010f6e2004-12-10 13:35:22 +0000401static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
402{
403 tl_assert(isOriginalAtom(mce, data));
404 tl_assert(isShadowAtom(mce, vbits));
405 tl_assert(sameKindedAtoms(data, vbits));
406 return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits));
407}
408
sewardj20d38f22005-02-07 23:50:18 +0000409static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000410{
411 tl_assert(isOriginalAtom(mce, data));
412 tl_assert(isShadowAtom(mce, vbits));
413 tl_assert(sameKindedAtoms(data, vbits));
sewardj20d38f22005-02-07 23:50:18 +0000414 return assignNew(mce, Ity_V128, binop(Iop_OrV128, data, vbits));
sewardj170ee212004-12-10 18:57:51 +0000415}
416
sewardj95448072004-11-22 20:19:51 +0000417/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
418 defined (0); all other -> undefined (1).
419*/
420static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
421{
422 tl_assert(isOriginalAtom(mce, data));
423 tl_assert(isShadowAtom(mce, vbits));
424 tl_assert(sameKindedAtoms(data, vbits));
425 return assignNew(
426 mce, Ity_I8,
427 binop(Iop_Or8,
428 assignNew(mce, Ity_I8, unop(Iop_Not8, data)),
429 vbits) );
430}
njn25e49d8e72002-09-23 09:36:25 +0000431
sewardj95448072004-11-22 20:19:51 +0000432static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
433{
434 tl_assert(isOriginalAtom(mce, data));
435 tl_assert(isShadowAtom(mce, vbits));
436 tl_assert(sameKindedAtoms(data, vbits));
437 return assignNew(
438 mce, Ity_I16,
439 binop(Iop_Or16,
440 assignNew(mce, Ity_I16, unop(Iop_Not16, data)),
441 vbits) );
442}
njn25e49d8e72002-09-23 09:36:25 +0000443
sewardj95448072004-11-22 20:19:51 +0000444static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
445{
446 tl_assert(isOriginalAtom(mce, data));
447 tl_assert(isShadowAtom(mce, vbits));
448 tl_assert(sameKindedAtoms(data, vbits));
449 return assignNew(
450 mce, Ity_I32,
451 binop(Iop_Or32,
452 assignNew(mce, Ity_I32, unop(Iop_Not32, data)),
453 vbits) );
454}
455
sewardj7010f6e2004-12-10 13:35:22 +0000456static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
457{
458 tl_assert(isOriginalAtom(mce, data));
459 tl_assert(isShadowAtom(mce, vbits));
460 tl_assert(sameKindedAtoms(data, vbits));
461 return assignNew(
462 mce, Ity_I64,
463 binop(Iop_Or64,
464 assignNew(mce, Ity_I64, unop(Iop_Not64, data)),
465 vbits) );
466}
467
sewardj20d38f22005-02-07 23:50:18 +0000468static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000469{
470 tl_assert(isOriginalAtom(mce, data));
471 tl_assert(isShadowAtom(mce, vbits));
472 tl_assert(sameKindedAtoms(data, vbits));
473 return assignNew(
474 mce, Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +0000475 binop(Iop_OrV128,
476 assignNew(mce, Ity_V128, unop(Iop_NotV128, data)),
sewardj170ee212004-12-10 18:57:51 +0000477 vbits) );
478}
479
sewardj95448072004-11-22 20:19:51 +0000480/* --------- Pessimising casts. --------- */
481
482static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
483{
sewardj7cf97ee2004-11-28 14:25:01 +0000484 IRType ty;
485 IRAtom* tmp1;
sewardj95448072004-11-22 20:19:51 +0000486 /* Note, dst_ty is a shadow type, not an original type. */
487 /* First of all, collapse vbits down to a single bit. */
488 tl_assert(isShadowAtom(mce,vbits));
sewardj7cf97ee2004-11-28 14:25:01 +0000489 ty = typeOfIRExpr(mce->bb->tyenv, vbits);
490 tmp1 = NULL;
sewardj95448072004-11-22 20:19:51 +0000491 switch (ty) {
492 case Ity_I1:
493 tmp1 = vbits;
njn25e49d8e72002-09-23 09:36:25 +0000494 break;
sewardj95448072004-11-22 20:19:51 +0000495 case Ity_I8:
sewardj37c31cc2005-04-26 23:49:24 +0000496 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
sewardj95448072004-11-22 20:19:51 +0000497 break;
498 case Ity_I16:
sewardj37c31cc2005-04-26 23:49:24 +0000499 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
sewardj95448072004-11-22 20:19:51 +0000500 break;
501 case Ity_I32:
sewardj37c31cc2005-04-26 23:49:24 +0000502 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
sewardj95448072004-11-22 20:19:51 +0000503 break;
504 case Ity_I64:
sewardj37c31cc2005-04-26 23:49:24 +0000505 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
sewardj95448072004-11-22 20:19:51 +0000506 break;
sewardj69a13322005-04-23 01:14:51 +0000507 case Ity_I128: {
508 /* Gah. Chop it in half, OR the halves together, and compare
509 that with zero. */
510 IRAtom* tmp2 = assignNew(mce, Ity_I64, unop(Iop_128HIto64, vbits));
511 IRAtom* tmp3 = assignNew(mce, Ity_I64, unop(Iop_128to64, vbits));
512 IRAtom* tmp4 = assignNew(mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
513 tmp1 = assignNew(mce, Ity_I1,
sewardj37c31cc2005-04-26 23:49:24 +0000514 unop(Iop_CmpNEZ64, tmp4));
sewardj69a13322005-04-23 01:14:51 +0000515 break;
516 }
sewardj95448072004-11-22 20:19:51 +0000517 default:
sewardj69a13322005-04-23 01:14:51 +0000518 ppIRType(ty);
sewardj95448072004-11-22 20:19:51 +0000519 VG_(tool_panic)("mkPCastTo(1)");
520 }
521 tl_assert(tmp1);
522 /* Now widen up to the dst type. */
523 switch (dst_ty) {
524 case Ity_I1:
525 return tmp1;
526 case Ity_I8:
527 return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1));
528 case Ity_I16:
529 return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1));
530 case Ity_I32:
531 return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1));
532 case Ity_I64:
533 return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000534 case Ity_V128:
535 tmp1 = assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardj20d38f22005-02-07 23:50:18 +0000536 tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000537 return tmp1;
sewardj69a13322005-04-23 01:14:51 +0000538 case Ity_I128:
539 tmp1 = assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
540 tmp1 = assignNew(mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
541 return tmp1;
sewardj95448072004-11-22 20:19:51 +0000542 default:
543 ppIRType(dst_ty);
544 VG_(tool_panic)("mkPCastTo(2)");
545 }
546}
547
sewardjd5204dc2004-12-31 01:16:11 +0000548/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
549/*
550 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
551 PCasting to Ity_U1. However, sometimes it is necessary to be more
552 accurate. The insight is that the result is defined if two
553 corresponding bits can be found, one from each argument, so that
554 both bits are defined but are different -- that makes EQ say "No"
555 and NE say "Yes". Hence, we compute an improvement term and DifD
556 it onto the "normal" (UifU) result.
557
558 The result is:
559
560 PCastTo<1> (
561 PCastTo<sz>( UifU<sz>(vxx, vyy) ) -- naive version
562 `DifD<sz>`
563 PCastTo<sz>( CmpEQ<sz>( vec, 1....1 ) ) -- improvement term
564 )
565 where
566 vec contains 0 (defined) bits where the corresponding arg bits
567 are defined but different, and 1 bits otherwise:
568
569 vec = UifU<sz>( vxx, vyy, Not<sz>(Xor<sz>( xx, yy )) )
570*/
571static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
572 IRType ty,
573 IRAtom* vxx, IRAtom* vyy,
574 IRAtom* xx, IRAtom* yy )
575{
576 IRAtom *naive, *vec, *vec_cmpd, *improved, *final_cast, *top;
577 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP;
578
579 tl_assert(isShadowAtom(mce,vxx));
580 tl_assert(isShadowAtom(mce,vyy));
581 tl_assert(isOriginalAtom(mce,xx));
582 tl_assert(isOriginalAtom(mce,yy));
583 tl_assert(sameKindedAtoms(vxx,xx));
584 tl_assert(sameKindedAtoms(vyy,yy));
585
586 switch (ty) {
587 case Ity_I32:
588 opDIFD = Iop_And32;
589 opUIFU = Iop_Or32;
590 opNOT = Iop_Not32;
591 opXOR = Iop_Xor32;
592 opCMP = Iop_CmpEQ32;
593 top = mkU32(0xFFFFFFFF);
594 break;
tomcd986332005-04-26 07:44:48 +0000595 case Ity_I64:
596 opDIFD = Iop_And64;
597 opUIFU = Iop_Or64;
598 opNOT = Iop_Not64;
599 opXOR = Iop_Xor64;
600 opCMP = Iop_CmpEQ64;
sewardj37c31cc2005-04-26 23:49:24 +0000601 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
tomcd986332005-04-26 07:44:48 +0000602 break;
sewardjd5204dc2004-12-31 01:16:11 +0000603 default:
604 VG_(tool_panic)("expensiveCmpEQorNE");
605 }
606
607 naive
608 = mkPCastTo(mce,ty, assignNew(mce, ty, binop(opUIFU, vxx, vyy)));
609
610 vec
611 = assignNew(
612 mce,ty,
613 binop( opUIFU,
614 assignNew(mce,ty, binop(opUIFU, vxx, vyy)),
615 assignNew(
616 mce,ty,
617 unop( opNOT,
618 assignNew(mce,ty, binop(opXOR, xx, yy))))));
619
620 vec_cmpd
621 = mkPCastTo( mce,ty, assignNew(mce,Ity_I1, binop(opCMP, vec, top)));
622
623 improved
624 = assignNew( mce,ty, binop(opDIFD, naive, vec_cmpd) );
625
626 final_cast
627 = mkPCastTo( mce, Ity_I1, improved );
628
629 return final_cast;
630}
631
sewardj95448072004-11-22 20:19:51 +0000632
633/*------------------------------------------------------------*/
634/*--- Emit a test and complaint if something is undefined. ---*/
635/*------------------------------------------------------------*/
636
637/* Set the annotations on a dirty helper to indicate that the stack
638 pointer and instruction pointers might be read. This is the
639 behaviour of all 'emit-a-complaint' style functions we might
640 call. */
641
642static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
643 di->nFxState = 2;
644 di->fxState[0].fx = Ifx_Read;
645 di->fxState[0].offset = mce->layout->offset_SP;
646 di->fxState[0].size = mce->layout->sizeof_SP;
647 di->fxState[1].fx = Ifx_Read;
648 di->fxState[1].offset = mce->layout->offset_IP;
649 di->fxState[1].size = mce->layout->sizeof_IP;
650}
651
652
653/* Check the supplied **original** atom for undefinedness, and emit a
654 complaint if so. Once that happens, mark it as defined. This is
655 possible because the atom is either a tmp or literal. If it's a
656 tmp, it will be shadowed by a tmp, and so we can set the shadow to
657 be defined. In fact as mentioned above, we will have to allocate a
658 new tmp to carry the new 'defined' shadow value, and update the
659 original->tmp mapping accordingly; we cannot simply assign a new
660 value to an existing shadow tmp as this breaks SSAness -- resulting
661 in the post-instrumentation sanity checker spluttering in disapproval.
662*/
663static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
664{
sewardj7cf97ee2004-11-28 14:25:01 +0000665 IRAtom* vatom;
666 IRType ty;
667 Int sz;
668 IRDirty* di;
669 IRAtom* cond;
670
sewardj95448072004-11-22 20:19:51 +0000671 /* Since the original expression is atomic, there's no duplicated
672 work generated by making multiple V-expressions for it. So we
673 don't really care about the possibility that someone else may
674 also create a V-interpretion for it. */
675 tl_assert(isOriginalAtom(mce, atom));
sewardj7cf97ee2004-11-28 14:25:01 +0000676 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +0000677 tl_assert(isShadowAtom(mce, vatom));
678 tl_assert(sameKindedAtoms(atom, vatom));
679
sewardj7cf97ee2004-11-28 14:25:01 +0000680 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +0000681
682 /* sz is only used for constructing the error message */
sewardj7cf97ee2004-11-28 14:25:01 +0000683 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +0000684
sewardj7cf97ee2004-11-28 14:25:01 +0000685 cond = mkPCastTo( mce, Ity_I1, vatom );
sewardj95448072004-11-22 20:19:51 +0000686 /* cond will be 0 if all defined, and 1 if any not defined. */
687
sewardj95448072004-11-22 20:19:51 +0000688 switch (sz) {
689 case 0:
690 di = unsafeIRDirty_0_N( 0/*regparms*/,
691 "MC_(helperc_value_check0_fail)",
692 &MC_(helperc_value_check0_fail),
693 mkIRExprVec_0()
694 );
695 break;
696 case 1:
697 di = unsafeIRDirty_0_N( 0/*regparms*/,
698 "MC_(helperc_value_check1_fail)",
699 &MC_(helperc_value_check1_fail),
700 mkIRExprVec_0()
701 );
702 break;
703 case 4:
704 di = unsafeIRDirty_0_N( 0/*regparms*/,
705 "MC_(helperc_value_check4_fail)",
706 &MC_(helperc_value_check4_fail),
707 mkIRExprVec_0()
708 );
709 break;
sewardj11bcc4e2005-04-23 22:38:38 +0000710 case 8:
711 di = unsafeIRDirty_0_N( 0/*regparms*/,
712 "MC_(helperc_value_check8_fail)",
713 &MC_(helperc_value_check8_fail),
714 mkIRExprVec_0()
715 );
716 break;
sewardj95448072004-11-22 20:19:51 +0000717 default:
718 di = unsafeIRDirty_0_N( 1/*regparms*/,
719 "MC_(helperc_complain_undef)",
720 &MC_(helperc_complain_undef),
721 mkIRExprVec_1( mkIRExpr_HWord( sz ))
722 );
723 break;
724 }
725 di->guard = cond;
726 setHelperAnns( mce, di );
727 stmt( mce->bb, IRStmt_Dirty(di));
728
729 /* Set the shadow tmp to be defined. First, update the
730 orig->shadow tmp mapping to reflect the fact that this shadow is
731 getting a new value. */
sewardj710d6c22005-03-20 18:55:15 +0000732 tl_assert(isIRAtom(vatom));
sewardj95448072004-11-22 20:19:51 +0000733 /* sameKindedAtoms ... */
734 if (vatom->tag == Iex_Tmp) {
735 tl_assert(atom->tag == Iex_Tmp);
736 newShadowTmp(mce, atom->Iex.Tmp.tmp);
737 assign(mce->bb, findShadowTmp(mce, atom->Iex.Tmp.tmp),
738 definedOfType(ty));
739 }
740}
741
742
743/*------------------------------------------------------------*/
744/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
745/*------------------------------------------------------------*/
746
747/* Examine the always-defined sections declared in layout to see if
748 the (offset,size) section is within one. Note, is is an error to
749 partially fall into such a region: (offset,size) should either be
750 completely in such a region or completely not-in such a region.
751*/
752static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
753{
754 Int minoffD, maxoffD, i;
755 Int minoff = offset;
756 Int maxoff = minoff + size - 1;
757 tl_assert((minoff & ~0xFFFF) == 0);
758 tl_assert((maxoff & ~0xFFFF) == 0);
759
760 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
761 minoffD = mce->layout->alwaysDefd[i].offset;
762 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
763 tl_assert((minoffD & ~0xFFFF) == 0);
764 tl_assert((maxoffD & ~0xFFFF) == 0);
765
766 if (maxoff < minoffD || maxoffD < minoff)
767 continue; /* no overlap */
768 if (minoff >= minoffD && maxoff <= maxoffD)
769 return True; /* completely contained in an always-defd section */
770
771 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
772 }
773 return False; /* could not find any containing section */
774}
775
776
777/* Generate into bb suitable actions to shadow this Put. If the state
778 slice is marked 'always defined', do nothing. Otherwise, write the
779 supplied V bits to the shadow state. We can pass in either an
780 original atom or a V-atom, but not both. In the former case the
781 relevant V-bits are then generated from the original.
782*/
783static
784void do_shadow_PUT ( MCEnv* mce, Int offset,
785 IRAtom* atom, IRAtom* vatom )
786{
sewardj7cf97ee2004-11-28 14:25:01 +0000787 IRType ty;
sewardj95448072004-11-22 20:19:51 +0000788 if (atom) {
789 tl_assert(!vatom);
790 tl_assert(isOriginalAtom(mce, atom));
791 vatom = expr2vbits( mce, atom );
792 } else {
793 tl_assert(vatom);
794 tl_assert(isShadowAtom(mce, vatom));
795 }
796
sewardj7cf97ee2004-11-28 14:25:01 +0000797 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +0000798 tl_assert(ty != Ity_I1);
799 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
800 /* later: no ... */
801 /* emit code to emit a complaint if any of the vbits are 1. */
802 /* complainIfUndefined(mce, atom); */
803 } else {
804 /* Do a plain shadow Put. */
805 stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
806 }
807}
808
809
810/* Return an expression which contains the V bits corresponding to the
811 given GETI (passed in in pieces).
812*/
813static
814void do_shadow_PUTI ( MCEnv* mce,
815 IRArray* descr, IRAtom* ix, Int bias, IRAtom* atom )
816{
sewardj7cf97ee2004-11-28 14:25:01 +0000817 IRAtom* vatom;
818 IRType ty, tyS;
819 Int arrSize;;
820
sewardj95448072004-11-22 20:19:51 +0000821 tl_assert(isOriginalAtom(mce,atom));
sewardj7cf97ee2004-11-28 14:25:01 +0000822 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +0000823 tl_assert(sameKindedAtoms(atom, vatom));
sewardj7cf97ee2004-11-28 14:25:01 +0000824 ty = descr->elemTy;
825 tyS = shadowType(ty);
826 arrSize = descr->nElems * sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +0000827 tl_assert(ty != Ity_I1);
828 tl_assert(isOriginalAtom(mce,ix));
829 complainIfUndefined(mce,ix);
830 if (isAlwaysDefd(mce, descr->base, arrSize)) {
831 /* later: no ... */
832 /* emit code to emit a complaint if any of the vbits are 1. */
833 /* complainIfUndefined(mce, atom); */
834 } else {
835 /* Do a cloned version of the Put that refers to the shadow
836 area. */
837 IRArray* new_descr
838 = mkIRArray( descr->base + mce->layout->total_sizeB,
839 tyS, descr->nElems);
840 stmt( mce->bb, IRStmt_PutI( new_descr, ix, bias, vatom ));
841 }
842}
843
844
845/* Return an expression which contains the V bits corresponding to the
846 given GET (passed in in pieces).
847*/
848static
849IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
850{
851 IRType tyS = shadowType(ty);
852 tl_assert(ty != Ity_I1);
853 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
854 /* Always defined, return all zeroes of the relevant type */
855 return definedOfType(tyS);
856 } else {
857 /* return a cloned version of the Get that refers to the shadow
858 area. */
859 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
860 }
861}
862
863
864/* Return an expression which contains the V bits corresponding to the
865 given GETI (passed in in pieces).
866*/
867static
868IRExpr* shadow_GETI ( MCEnv* mce, IRArray* descr, IRAtom* ix, Int bias )
869{
870 IRType ty = descr->elemTy;
871 IRType tyS = shadowType(ty);
872 Int arrSize = descr->nElems * sizeofIRType(ty);
873 tl_assert(ty != Ity_I1);
874 tl_assert(isOriginalAtom(mce,ix));
875 complainIfUndefined(mce,ix);
876 if (isAlwaysDefd(mce, descr->base, arrSize)) {
877 /* Always defined, return all zeroes of the relevant type */
878 return definedOfType(tyS);
879 } else {
880 /* return a cloned version of the Get that refers to the shadow
881 area. */
882 IRArray* new_descr
883 = mkIRArray( descr->base + mce->layout->total_sizeB,
884 tyS, descr->nElems);
885 return IRExpr_GetI( new_descr, ix, bias );
886 }
887}
888
889
890/*------------------------------------------------------------*/
891/*--- Generating approximations for unknown operations, ---*/
892/*--- using lazy-propagate semantics ---*/
893/*------------------------------------------------------------*/
894
895/* Lazy propagation of undefinedness from two values, resulting in the
896 specified shadow type.
897*/
898static
899IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
900{
sewardj95448072004-11-22 20:19:51 +0000901 IRAtom* at;
sewardj37c31cc2005-04-26 23:49:24 +0000902 IRType t1 = typeOfIRExpr(mce->bb->tyenv, va1);
903 IRType t2 = typeOfIRExpr(mce->bb->tyenv, va2);
sewardj95448072004-11-22 20:19:51 +0000904 tl_assert(isShadowAtom(mce,va1));
905 tl_assert(isShadowAtom(mce,va2));
sewardj37c31cc2005-04-26 23:49:24 +0000906
907 /* The general case is inefficient because PCast is an expensive
908 operation. Here are some special cases which use PCast only
909 once rather than twice. */
910
911 /* I64 x I64 -> I64 */
912 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
913 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
914 at = mkUifU(mce, Ity_I64, va1, va2);
915 at = mkPCastTo(mce, Ity_I64, at);
916 return at;
917 }
918
919 /* I64 x I64 -> I32 */
920 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
921 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
922 at = mkUifU(mce, Ity_I64, va1, va2);
923 at = mkPCastTo(mce, Ity_I32, at);
924 return at;
925 }
926
927 if (0) {
928 VG_(printf)("mkLazy2 ");
929 ppIRType(t1);
930 VG_(printf)("_");
931 ppIRType(t2);
932 VG_(printf)("_");
933 ppIRType(finalVty);
934 VG_(printf)("\n");
935 }
936
937 /* General case: force everything via 32-bit intermediaries. */
sewardj95448072004-11-22 20:19:51 +0000938 at = mkPCastTo(mce, Ity_I32, va1);
939 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
940 at = mkPCastTo(mce, finalVty, at);
941 return at;
942}
943
944
945/* Do the lazy propagation game from a null-terminated vector of
946 atoms. This is presumably the arguments to a helper call, so the
947 IRCallee info is also supplied in order that we can know which
948 arguments should be ignored (via the .mcx_mask field).
949*/
950static
951IRAtom* mkLazyN ( MCEnv* mce,
952 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
953{
954 Int i;
955 IRAtom* here;
956 IRAtom* curr = definedOfType(Ity_I32);
957 for (i = 0; exprvec[i]; i++) {
958 tl_assert(i < 32);
959 tl_assert(isOriginalAtom(mce, exprvec[i]));
960 /* Only take notice of this arg if the callee's mc-exclusion
961 mask does not say it is to be excluded. */
962 if (cee->mcx_mask & (1<<i)) {
963 /* the arg is to be excluded from definedness checking. Do
964 nothing. */
965 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
966 } else {
967 /* calculate the arg's definedness, and pessimistically merge
968 it in. */
969 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) );
970 curr = mkUifU32(mce, here, curr);
971 }
972 }
973 return mkPCastTo(mce, finalVtype, curr );
974}
975
976
977/*------------------------------------------------------------*/
978/*--- Generating expensive sequences for exact carry-chain ---*/
979/*--- propagation in add/sub and related operations. ---*/
980/*------------------------------------------------------------*/
981
982static
sewardjd5204dc2004-12-31 01:16:11 +0000983IRAtom* expensiveAddSub ( MCEnv* mce,
984 Bool add,
985 IRType ty,
986 IRAtom* qaa, IRAtom* qbb,
987 IRAtom* aa, IRAtom* bb )
sewardj95448072004-11-22 20:19:51 +0000988{
sewardj7cf97ee2004-11-28 14:25:01 +0000989 IRAtom *a_min, *b_min, *a_max, *b_max;
sewardjd5204dc2004-12-31 01:16:11 +0000990 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
sewardj7cf97ee2004-11-28 14:25:01 +0000991
sewardj95448072004-11-22 20:19:51 +0000992 tl_assert(isShadowAtom(mce,qaa));
993 tl_assert(isShadowAtom(mce,qbb));
994 tl_assert(isOriginalAtom(mce,aa));
995 tl_assert(isOriginalAtom(mce,bb));
996 tl_assert(sameKindedAtoms(qaa,aa));
997 tl_assert(sameKindedAtoms(qbb,bb));
998
sewardjd5204dc2004-12-31 01:16:11 +0000999 switch (ty) {
1000 case Ity_I32:
1001 opAND = Iop_And32;
1002 opOR = Iop_Or32;
1003 opXOR = Iop_Xor32;
1004 opNOT = Iop_Not32;
1005 opADD = Iop_Add32;
1006 opSUB = Iop_Sub32;
1007 break;
1008 default:
1009 VG_(tool_panic)("expensiveAddSub");
1010 }
sewardj95448072004-11-22 20:19:51 +00001011
1012 // a_min = aa & ~qaa
1013 a_min = assignNew(mce,ty,
1014 binop(opAND, aa,
1015 assignNew(mce,ty, unop(opNOT, qaa))));
1016
1017 // b_min = bb & ~qbb
1018 b_min = assignNew(mce,ty,
1019 binop(opAND, bb,
1020 assignNew(mce,ty, unop(opNOT, qbb))));
1021
1022 // a_max = aa | qaa
1023 a_max = assignNew(mce,ty, binop(opOR, aa, qaa));
1024
1025 // b_max = bb | qbb
1026 b_max = assignNew(mce,ty, binop(opOR, bb, qbb));
1027
sewardjd5204dc2004-12-31 01:16:11 +00001028 if (add) {
1029 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1030 return
1031 assignNew(mce,ty,
1032 binop( opOR,
1033 assignNew(mce,ty, binop(opOR, qaa, qbb)),
1034 assignNew(mce,ty,
1035 binop( opXOR,
1036 assignNew(mce,ty, binop(opADD, a_min, b_min)),
1037 assignNew(mce,ty, binop(opADD, a_max, b_max))
1038 )
sewardj95448072004-11-22 20:19:51 +00001039 )
sewardjd5204dc2004-12-31 01:16:11 +00001040 )
1041 );
1042 } else {
1043 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1044 return
1045 assignNew(mce,ty,
1046 binop( opOR,
1047 assignNew(mce,ty, binop(opOR, qaa, qbb)),
1048 assignNew(mce,ty,
1049 binop( opXOR,
1050 assignNew(mce,ty, binop(opSUB, a_min, b_max)),
1051 assignNew(mce,ty, binop(opSUB, a_max, b_min))
1052 )
1053 )
1054 )
1055 );
1056 }
1057
sewardj95448072004-11-22 20:19:51 +00001058}
1059
1060
1061/*------------------------------------------------------------*/
sewardj3245c912004-12-10 14:58:26 +00001062/*--- Helpers for dealing with vector primops. ---*/
1063/*------------------------------------------------------------*/
1064
sewardja1d93302004-12-12 16:45:06 +00001065/* Vector pessimisation -- pessimise within each lane individually. */
1066
1067static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1068{
1069 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1070}
1071
1072static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1073{
1074 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1075}
1076
1077static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1078{
1079 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1080}
1081
1082static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1083{
1084 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1085}
1086
sewardjacd2e912005-01-13 19:17:06 +00001087static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1088{
1089 return assignNew(mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
1090}
1091
1092static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1093{
1094 return assignNew(mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
1095}
1096
1097static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1098{
1099 return assignNew(mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
1100}
1101
sewardja1d93302004-12-12 16:45:06 +00001102
sewardj3245c912004-12-10 14:58:26 +00001103/* Here's a simple scheme capable of handling ops derived from SSE1
1104 code and while only generating ops that can be efficiently
1105 implemented in SSE1. */
1106
1107/* All-lanes versions are straightforward:
1108
sewardj20d38f22005-02-07 23:50:18 +00001109 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
sewardj3245c912004-12-10 14:58:26 +00001110
1111 unary32Fx4(x,y) ==> PCast32x4(x#)
1112
1113 Lowest-lane-only versions are more complex:
1114
sewardj20d38f22005-02-07 23:50:18 +00001115 binary32F0x4(x,y) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001116 x#,
sewardj20d38f22005-02-07 23:50:18 +00001117 PCast32(V128to32(UifUV128(x#,y#)))
sewardj3245c912004-12-10 14:58:26 +00001118 )
1119
1120 This is perhaps not so obvious. In particular, it's faster to
sewardj20d38f22005-02-07 23:50:18 +00001121 do a V128-bit UifU and then take the bottom 32 bits than the more
sewardj3245c912004-12-10 14:58:26 +00001122 obvious scheme of taking the bottom 32 bits of each operand
1123 and doing a 32-bit UifU. Basically since UifU is fast and
1124 chopping lanes off vector values is slow.
1125
1126 Finally:
1127
sewardj20d38f22005-02-07 23:50:18 +00001128 unary32F0x4(x) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001129 x#,
sewardj20d38f22005-02-07 23:50:18 +00001130 PCast32(V128to32(x#))
sewardj3245c912004-12-10 14:58:26 +00001131 )
1132
1133 Where:
1134
1135 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
1136 PCast32x4(v#) = CmpNEZ32x4(v#)
1137*/
1138
1139static
1140IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1141{
1142 IRAtom* at;
1143 tl_assert(isShadowAtom(mce, vatomX));
1144 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001145 at = mkUifUV128(mce, vatomX, vatomY);
sewardja1d93302004-12-12 16:45:06 +00001146 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at));
sewardj3245c912004-12-10 14:58:26 +00001147 return at;
1148}
1149
1150static
1151IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1152{
1153 IRAtom* at;
1154 tl_assert(isShadowAtom(mce, vatomX));
sewardja1d93302004-12-12 16:45:06 +00001155 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001156 return at;
1157}
1158
1159static
1160IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1161{
1162 IRAtom* at;
1163 tl_assert(isShadowAtom(mce, vatomX));
1164 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001165 at = mkUifUV128(mce, vatomX, vatomY);
1166 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, at));
sewardj3245c912004-12-10 14:58:26 +00001167 at = mkPCastTo(mce, Ity_I32, at);
sewardj20d38f22005-02-07 23:50:18 +00001168 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001169 return at;
1170}
1171
1172static
1173IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1174{
1175 IRAtom* at;
1176 tl_assert(isShadowAtom(mce, vatomX));
sewardj20d38f22005-02-07 23:50:18 +00001177 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001178 at = mkPCastTo(mce, Ity_I32, at);
sewardj20d38f22005-02-07 23:50:18 +00001179 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001180 return at;
1181}
1182
sewardj0b070592004-12-10 21:44:22 +00001183/* --- ... and ... 64Fx2 versions of the same ... --- */
1184
1185static
1186IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1187{
1188 IRAtom* at;
1189 tl_assert(isShadowAtom(mce, vatomX));
1190 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001191 at = mkUifUV128(mce, vatomX, vatomY);
sewardja1d93302004-12-12 16:45:06 +00001192 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at));
sewardj0b070592004-12-10 21:44:22 +00001193 return at;
1194}
1195
1196static
1197IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1198{
1199 IRAtom* at;
1200 tl_assert(isShadowAtom(mce, vatomX));
sewardja1d93302004-12-12 16:45:06 +00001201 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001202 return at;
1203}
1204
1205static
1206IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1207{
1208 IRAtom* at;
1209 tl_assert(isShadowAtom(mce, vatomX));
1210 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001211 at = mkUifUV128(mce, vatomX, vatomY);
1212 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, at));
sewardj0b070592004-12-10 21:44:22 +00001213 at = mkPCastTo(mce, Ity_I64, at);
sewardj20d38f22005-02-07 23:50:18 +00001214 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001215 return at;
1216}
1217
1218static
1219IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1220{
1221 IRAtom* at;
1222 tl_assert(isShadowAtom(mce, vatomX));
sewardj20d38f22005-02-07 23:50:18 +00001223 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001224 at = mkPCastTo(mce, Ity_I64, at);
sewardj20d38f22005-02-07 23:50:18 +00001225 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001226 return at;
1227}
1228
sewardja1d93302004-12-12 16:45:06 +00001229/* --- --- Vector saturated narrowing --- --- */
1230
1231/* This is quite subtle. What to do is simple:
1232
1233 Let the original narrowing op be QNarrowW{S,U}xN. Produce:
1234
1235 the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1236
1237 Why this is right is not so simple. Consider a lane in the args,
1238 vatom1 or 2, doesn't matter.
1239
1240 After the PCast, that lane is all 0s (defined) or all
1241 1s(undefined).
1242
1243 Both signed and unsigned saturating narrowing of all 0s produces
1244 all 0s, which is what we want.
1245
1246 The all-1s case is more complex. Unsigned narrowing interprets an
1247 all-1s input as the largest unsigned integer, and so produces all
1248 1s as a result since that is the largest unsigned value at the
1249 smaller width.
1250
1251 Signed narrowing interprets all 1s as -1. Fortunately, -1 narrows
1252 to -1, so we still wind up with all 1s at the smaller width.
1253
1254 So: In short, pessimise the args, then apply the original narrowing
1255 op.
1256*/
1257static
sewardj20d38f22005-02-07 23:50:18 +00001258IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
sewardja1d93302004-12-12 16:45:06 +00001259 IRAtom* vatom1, IRAtom* vatom2)
1260{
1261 IRAtom *at1, *at2, *at3;
1262 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1263 switch (narrow_op) {
1264 case Iop_QNarrow32Sx4: pcast = mkPCast32x4; break;
1265 case Iop_QNarrow16Sx8: pcast = mkPCast16x8; break;
1266 case Iop_QNarrow16Ux8: pcast = mkPCast16x8; break;
sewardj20d38f22005-02-07 23:50:18 +00001267 default: VG_(tool_panic)("vectorNarrowV128");
sewardja1d93302004-12-12 16:45:06 +00001268 }
1269 tl_assert(isShadowAtom(mce,vatom1));
1270 tl_assert(isShadowAtom(mce,vatom2));
1271 at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1));
1272 at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2));
1273 at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2));
1274 return at3;
1275}
1276
sewardjacd2e912005-01-13 19:17:06 +00001277static
1278IRAtom* vectorNarrow64 ( MCEnv* mce, IROp narrow_op,
1279 IRAtom* vatom1, IRAtom* vatom2)
1280{
1281 IRAtom *at1, *at2, *at3;
1282 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1283 switch (narrow_op) {
1284 case Iop_QNarrow32Sx2: pcast = mkPCast32x2; break;
1285 case Iop_QNarrow16Sx4: pcast = mkPCast16x4; break;
1286 case Iop_QNarrow16Ux4: pcast = mkPCast16x4; break;
1287 default: VG_(tool_panic)("vectorNarrow64");
1288 }
1289 tl_assert(isShadowAtom(mce,vatom1));
1290 tl_assert(isShadowAtom(mce,vatom2));
1291 at1 = assignNew(mce, Ity_I64, pcast(mce, vatom1));
1292 at2 = assignNew(mce, Ity_I64, pcast(mce, vatom2));
1293 at3 = assignNew(mce, Ity_I64, binop(narrow_op, at1, at2));
1294 return at3;
1295}
1296
sewardja1d93302004-12-12 16:45:06 +00001297
1298/* --- --- Vector integer arithmetic --- --- */
1299
1300/* Simple ... UifU the args and per-lane pessimise the results. */
sewardjacd2e912005-01-13 19:17:06 +00001301
sewardj20d38f22005-02-07 23:50:18 +00001302/* --- V128-bit versions --- */
sewardjacd2e912005-01-13 19:17:06 +00001303
sewardja1d93302004-12-12 16:45:06 +00001304static
1305IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1306{
1307 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001308 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001309 at = mkPCast8x16(mce, at);
1310 return at;
1311}
1312
1313static
1314IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1315{
1316 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001317 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001318 at = mkPCast16x8(mce, at);
1319 return at;
1320}
1321
1322static
1323IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1324{
1325 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001326 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001327 at = mkPCast32x4(mce, at);
1328 return at;
1329}
1330
1331static
1332IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1333{
1334 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001335 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001336 at = mkPCast64x2(mce, at);
1337 return at;
1338}
sewardj3245c912004-12-10 14:58:26 +00001339
sewardjacd2e912005-01-13 19:17:06 +00001340/* --- 64-bit versions --- */
1341
1342static
1343IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1344{
1345 IRAtom* at;
1346 at = mkUifU64(mce, vatom1, vatom2);
1347 at = mkPCast8x8(mce, at);
1348 return at;
1349}
1350
1351static
1352IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1353{
1354 IRAtom* at;
1355 at = mkUifU64(mce, vatom1, vatom2);
1356 at = mkPCast16x4(mce, at);
1357 return at;
1358}
1359
1360static
1361IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1362{
1363 IRAtom* at;
1364 at = mkUifU64(mce, vatom1, vatom2);
1365 at = mkPCast32x2(mce, at);
1366 return at;
1367}
1368
sewardj3245c912004-12-10 14:58:26 +00001369
1370/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00001371/*--- Generate shadow values from all kinds of IRExprs. ---*/
1372/*------------------------------------------------------------*/
1373
1374static
1375IRAtom* expr2vbits_Binop ( MCEnv* mce,
1376 IROp op,
1377 IRAtom* atom1, IRAtom* atom2 )
1378{
1379 IRType and_or_ty;
1380 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
1381 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
1382 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
1383
1384 IRAtom* vatom1 = expr2vbits( mce, atom1 );
1385 IRAtom* vatom2 = expr2vbits( mce, atom2 );
1386
1387 tl_assert(isOriginalAtom(mce,atom1));
1388 tl_assert(isOriginalAtom(mce,atom2));
1389 tl_assert(isShadowAtom(mce,vatom1));
1390 tl_assert(isShadowAtom(mce,vatom2));
1391 tl_assert(sameKindedAtoms(atom1,vatom1));
1392 tl_assert(sameKindedAtoms(atom2,vatom2));
1393 switch (op) {
1394
sewardjacd2e912005-01-13 19:17:06 +00001395 /* 64-bit SIMD */
1396
1397 case Iop_ShrN16x4:
1398 case Iop_ShrN32x2:
1399 case Iop_SarN16x4:
1400 case Iop_SarN32x2:
1401 case Iop_ShlN16x4:
1402 case Iop_ShlN32x2:
1403 /* Same scheme as with all other shifts. */
1404 complainIfUndefined(mce, atom2);
1405 return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1406
1407 case Iop_QNarrow32Sx2:
1408 case Iop_QNarrow16Sx4:
1409 case Iop_QNarrow16Ux4:
1410 return vectorNarrow64(mce, op, vatom1, vatom2);
1411
1412 case Iop_Min8Ux8:
1413 case Iop_Max8Ux8:
1414 case Iop_Avg8Ux8:
1415 case Iop_QSub8Sx8:
1416 case Iop_QSub8Ux8:
1417 case Iop_Sub8x8:
1418 case Iop_CmpGT8Sx8:
1419 case Iop_CmpEQ8x8:
1420 case Iop_QAdd8Sx8:
1421 case Iop_QAdd8Ux8:
1422 case Iop_Add8x8:
1423 return binary8Ix8(mce, vatom1, vatom2);
1424
1425 case Iop_Min16Sx4:
1426 case Iop_Max16Sx4:
1427 case Iop_Avg16Ux4:
1428 case Iop_QSub16Ux4:
1429 case Iop_QSub16Sx4:
1430 case Iop_Sub16x4:
1431 case Iop_Mul16x4:
1432 case Iop_MulHi16Sx4:
1433 case Iop_MulHi16Ux4:
1434 case Iop_CmpGT16Sx4:
1435 case Iop_CmpEQ16x4:
1436 case Iop_QAdd16Sx4:
1437 case Iop_QAdd16Ux4:
1438 case Iop_Add16x4:
1439 return binary16Ix4(mce, vatom1, vatom2);
1440
1441 case Iop_Sub32x2:
1442 case Iop_CmpGT32Sx2:
1443 case Iop_CmpEQ32x2:
1444 case Iop_Add32x2:
1445 return binary32Ix2(mce, vatom1, vatom2);
1446
1447 /* 64-bit data-steering */
1448 case Iop_InterleaveLO32x2:
1449 case Iop_InterleaveLO16x4:
1450 case Iop_InterleaveLO8x8:
1451 case Iop_InterleaveHI32x2:
1452 case Iop_InterleaveHI16x4:
1453 case Iop_InterleaveHI8x8:
1454 return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
1455
sewardj20d38f22005-02-07 23:50:18 +00001456 /* V128-bit SIMD */
sewardj0b070592004-12-10 21:44:22 +00001457
sewardja1d93302004-12-12 16:45:06 +00001458 case Iop_ShrN16x8:
1459 case Iop_ShrN32x4:
1460 case Iop_ShrN64x2:
1461 case Iop_SarN16x8:
1462 case Iop_SarN32x4:
1463 case Iop_ShlN16x8:
1464 case Iop_ShlN32x4:
1465 case Iop_ShlN64x2:
1466 /* Same scheme as with all other shifts. */
1467 complainIfUndefined(mce, atom2);
1468 return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
1469
1470 case Iop_QSub8Ux16:
1471 case Iop_QSub8Sx16:
1472 case Iop_Sub8x16:
1473 case Iop_Min8Ux16:
1474 case Iop_Max8Ux16:
1475 case Iop_CmpGT8Sx16:
1476 case Iop_CmpEQ8x16:
1477 case Iop_Avg8Ux16:
1478 case Iop_QAdd8Ux16:
1479 case Iop_QAdd8Sx16:
1480 case Iop_Add8x16:
1481 return binary8Ix16(mce, vatom1, vatom2);
1482
1483 case Iop_QSub16Ux8:
1484 case Iop_QSub16Sx8:
1485 case Iop_Sub16x8:
1486 case Iop_Mul16x8:
1487 case Iop_MulHi16Sx8:
1488 case Iop_MulHi16Ux8:
1489 case Iop_Min16Sx8:
1490 case Iop_Max16Sx8:
1491 case Iop_CmpGT16Sx8:
1492 case Iop_CmpEQ16x8:
1493 case Iop_Avg16Ux8:
1494 case Iop_QAdd16Ux8:
1495 case Iop_QAdd16Sx8:
1496 case Iop_Add16x8:
1497 return binary16Ix8(mce, vatom1, vatom2);
1498
1499 case Iop_Sub32x4:
1500 case Iop_CmpGT32Sx4:
1501 case Iop_CmpEQ32x4:
1502 case Iop_Add32x4:
1503 return binary32Ix4(mce, vatom1, vatom2);
1504
1505 case Iop_Sub64x2:
1506 case Iop_Add64x2:
1507 return binary64Ix2(mce, vatom1, vatom2);
1508
1509 case Iop_QNarrow32Sx4:
1510 case Iop_QNarrow16Sx8:
1511 case Iop_QNarrow16Ux8:
sewardj20d38f22005-02-07 23:50:18 +00001512 return vectorNarrowV128(mce, op, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001513
sewardj0b070592004-12-10 21:44:22 +00001514 case Iop_Sub64Fx2:
1515 case Iop_Mul64Fx2:
1516 case Iop_Min64Fx2:
1517 case Iop_Max64Fx2:
1518 case Iop_Div64Fx2:
1519 case Iop_CmpLT64Fx2:
1520 case Iop_CmpLE64Fx2:
1521 case Iop_CmpEQ64Fx2:
1522 case Iop_Add64Fx2:
1523 return binary64Fx2(mce, vatom1, vatom2);
1524
1525 case Iop_Sub64F0x2:
1526 case Iop_Mul64F0x2:
1527 case Iop_Min64F0x2:
1528 case Iop_Max64F0x2:
1529 case Iop_Div64F0x2:
1530 case Iop_CmpLT64F0x2:
1531 case Iop_CmpLE64F0x2:
1532 case Iop_CmpEQ64F0x2:
1533 case Iop_Add64F0x2:
1534 return binary64F0x2(mce, vatom1, vatom2);
1535
sewardj170ee212004-12-10 18:57:51 +00001536 case Iop_Sub32Fx4:
1537 case Iop_Mul32Fx4:
1538 case Iop_Min32Fx4:
1539 case Iop_Max32Fx4:
1540 case Iop_Div32Fx4:
1541 case Iop_CmpLT32Fx4:
1542 case Iop_CmpLE32Fx4:
1543 case Iop_CmpEQ32Fx4:
sewardj3245c912004-12-10 14:58:26 +00001544 case Iop_Add32Fx4:
1545 return binary32Fx4(mce, vatom1, vatom2);
1546
sewardj170ee212004-12-10 18:57:51 +00001547 case Iop_Sub32F0x4:
1548 case Iop_Mul32F0x4:
1549 case Iop_Min32F0x4:
1550 case Iop_Max32F0x4:
1551 case Iop_Div32F0x4:
1552 case Iop_CmpLT32F0x4:
1553 case Iop_CmpLE32F0x4:
1554 case Iop_CmpEQ32F0x4:
1555 case Iop_Add32F0x4:
1556 return binary32F0x4(mce, vatom1, vatom2);
1557
sewardj20d38f22005-02-07 23:50:18 +00001558 /* V128-bit data-steering */
1559 case Iop_SetV128lo32:
1560 case Iop_SetV128lo64:
1561 case Iop_64HLtoV128:
sewardja1d93302004-12-12 16:45:06 +00001562 case Iop_InterleaveLO64x2:
1563 case Iop_InterleaveLO32x4:
1564 case Iop_InterleaveLO16x8:
1565 case Iop_InterleaveLO8x16:
1566 case Iop_InterleaveHI64x2:
1567 case Iop_InterleaveHI32x4:
1568 case Iop_InterleaveHI16x8:
1569 case Iop_InterleaveHI8x16:
sewardj170ee212004-12-10 18:57:51 +00001570 return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2));
1571
sewardj69a13322005-04-23 01:14:51 +00001572 /* I128-bit data-steering */
1573 case Iop_64HLto128:
1574 return assignNew(mce, Ity_I128, binop(op, vatom1, vatom2));
1575
sewardj3245c912004-12-10 14:58:26 +00001576 /* Scalar floating point */
1577
sewardj95448072004-11-22 20:19:51 +00001578 case Iop_RoundF64:
1579 case Iop_F64toI64:
sewardje9e16d32004-12-10 13:17:55 +00001580 case Iop_I64toF64:
1581 /* First arg is I32 (rounding mode), second is F64 or I64
1582 (data). */
sewardj95448072004-11-22 20:19:51 +00001583 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1584
1585 case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
1586 /* Takes two F64 args. */
1587 case Iop_F64toI32:
sewardje9e16d32004-12-10 13:17:55 +00001588 case Iop_F64toF32:
sewardj95448072004-11-22 20:19:51 +00001589 /* First arg is I32 (rounding mode), second is F64 (data). */
1590 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1591
1592 case Iop_F64toI16:
1593 /* First arg is I32 (rounding mode), second is F64 (data). */
1594 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
1595
1596 case Iop_ScaleF64:
1597 case Iop_Yl2xF64:
1598 case Iop_Yl2xp1F64:
1599 case Iop_PRemF64:
sewardj96403eb2005-04-01 20:20:12 +00001600 case Iop_PRem1F64:
sewardj95448072004-11-22 20:19:51 +00001601 case Iop_AtanF64:
1602 case Iop_AddF64:
1603 case Iop_DivF64:
1604 case Iop_SubF64:
1605 case Iop_MulF64:
1606 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1607
1608 case Iop_CmpF64:
1609 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1610
1611 /* non-FP after here */
1612
1613 case Iop_DivModU64to32:
1614 case Iop_DivModS64to32:
1615 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1616
sewardj69a13322005-04-23 01:14:51 +00001617 case Iop_DivModU128to64:
1618 case Iop_DivModS128to64:
1619 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
1620
sewardj95448072004-11-22 20:19:51 +00001621 case Iop_16HLto32:
sewardj170ee212004-12-10 18:57:51 +00001622 return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00001623 case Iop_32HLto64:
sewardj170ee212004-12-10 18:57:51 +00001624 return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00001625
sewardj6cf40ff2005-04-20 22:31:26 +00001626 case Iop_MullS64:
1627 case Iop_MullU64: {
1628 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
1629 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
1630 return assignNew(mce, Ity_I128, binop(Iop_64HLto128, vHi64, vLo64));
1631 }
1632
sewardj95448072004-11-22 20:19:51 +00001633 case Iop_MullS32:
1634 case Iop_MullU32: {
1635 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1636 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
1637 return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
1638 }
1639
1640 case Iop_MullS16:
1641 case Iop_MullU16: {
1642 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1643 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
1644 return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
1645 }
1646
1647 case Iop_MullS8:
1648 case Iop_MullU8: {
1649 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1650 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
1651 return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
1652 }
1653
1654 case Iop_Add32:
sewardjd5204dc2004-12-31 01:16:11 +00001655 if (mce->bogusLiterals)
1656 return expensiveAddSub(mce,True,Ity_I32,
1657 vatom1,vatom2, atom1,atom2);
1658 else
1659 goto cheap_AddSub32;
sewardj95448072004-11-22 20:19:51 +00001660 case Iop_Sub32:
sewardjd5204dc2004-12-31 01:16:11 +00001661 if (mce->bogusLiterals)
1662 return expensiveAddSub(mce,False,Ity_I32,
1663 vatom1,vatom2, atom1,atom2);
1664 else
1665 goto cheap_AddSub32;
1666
1667 cheap_AddSub32:
sewardj95448072004-11-22 20:19:51 +00001668 case Iop_Mul32:
1669 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1670
sewardj681be302005-01-15 20:43:58 +00001671 /* could do better: Add64, Sub64 */
sewardj69a13322005-04-23 01:14:51 +00001672 case Iop_Mul64:
sewardj681be302005-01-15 20:43:58 +00001673 case Iop_Add64:
1674 case Iop_Sub64:
1675 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
1676
sewardj95448072004-11-22 20:19:51 +00001677 case Iop_Mul16:
1678 case Iop_Add16:
1679 case Iop_Sub16:
1680 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1681
1682 case Iop_Sub8:
1683 case Iop_Add8:
1684 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1685
sewardj69a13322005-04-23 01:14:51 +00001686 case Iop_CmpEQ64:
1687 if (mce->bogusLiterals)
1688 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
1689 else
1690 goto cheap_cmp64;
1691 cheap_cmp64:
tomcd986332005-04-26 07:44:48 +00001692 case Iop_CmpLE64S: case Iop_CmpLE64U:
1693 case Iop_CmpLT64U: case Iop_CmpLT64S:
1694 case Iop_CmpNE64:
sewardj69a13322005-04-23 01:14:51 +00001695 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
1696
sewardjd5204dc2004-12-31 01:16:11 +00001697 case Iop_CmpEQ32:
1698 if (mce->bogusLiterals)
1699 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
1700 else
1701 goto cheap_cmp32;
sewardjd5204dc2004-12-31 01:16:11 +00001702 cheap_cmp32:
sewardj95448072004-11-22 20:19:51 +00001703 case Iop_CmpLE32S: case Iop_CmpLE32U:
1704 case Iop_CmpLT32U: case Iop_CmpLT32S:
sewardjd5204dc2004-12-31 01:16:11 +00001705 case Iop_CmpNE32:
sewardj95448072004-11-22 20:19:51 +00001706 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
1707
1708 case Iop_CmpEQ16: case Iop_CmpNE16:
1709 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
1710
1711 case Iop_CmpEQ8: case Iop_CmpNE8:
1712 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
1713
1714 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
1715 /* Complain if the shift amount is undefined. Then simply
1716 shift the first arg's V bits by the real shift amount. */
1717 complainIfUndefined(mce, atom2);
1718 return assignNew(mce, Ity_I32, binop(op, vatom1, atom2));
1719
sewardjdb67f5f2004-12-14 01:15:31 +00001720 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
sewardj95448072004-11-22 20:19:51 +00001721 /* Same scheme as with 32-bit shifts. */
1722 complainIfUndefined(mce, atom2);
1723 return assignNew(mce, Ity_I16, binop(op, vatom1, atom2));
1724
1725 case Iop_Shl8: case Iop_Shr8:
1726 /* Same scheme as with 32-bit shifts. */
1727 complainIfUndefined(mce, atom2);
1728 return assignNew(mce, Ity_I8, binop(op, vatom1, atom2));
1729
sewardj69a13322005-04-23 01:14:51 +00001730 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
sewardj95448072004-11-22 20:19:51 +00001731 /* Same scheme as with 32-bit shifts. */
1732 complainIfUndefined(mce, atom2);
1733 return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1734
sewardj20d38f22005-02-07 23:50:18 +00001735 case Iop_AndV128:
1736 uifu = mkUifUV128; difd = mkDifDV128;
1737 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00001738 case Iop_And64:
1739 uifu = mkUifU64; difd = mkDifD64;
1740 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00001741 case Iop_And32:
1742 uifu = mkUifU32; difd = mkDifD32;
1743 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
1744 case Iop_And16:
1745 uifu = mkUifU16; difd = mkDifD16;
1746 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
1747 case Iop_And8:
1748 uifu = mkUifU8; difd = mkDifD8;
1749 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
1750
sewardj20d38f22005-02-07 23:50:18 +00001751 case Iop_OrV128:
1752 uifu = mkUifUV128; difd = mkDifDV128;
1753 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00001754 case Iop_Or64:
1755 uifu = mkUifU64; difd = mkDifD64;
1756 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00001757 case Iop_Or32:
1758 uifu = mkUifU32; difd = mkDifD32;
1759 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
1760 case Iop_Or16:
1761 uifu = mkUifU16; difd = mkDifD16;
1762 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
1763 case Iop_Or8:
1764 uifu = mkUifU8; difd = mkDifD8;
1765 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
1766
1767 do_And_Or:
1768 return
1769 assignNew(
1770 mce,
1771 and_or_ty,
1772 difd(mce, uifu(mce, vatom1, vatom2),
1773 difd(mce, improve(mce, atom1, vatom1),
1774 improve(mce, atom2, vatom2) ) ) );
1775
1776 case Iop_Xor8:
1777 return mkUifU8(mce, vatom1, vatom2);
1778 case Iop_Xor16:
1779 return mkUifU16(mce, vatom1, vatom2);
1780 case Iop_Xor32:
1781 return mkUifU32(mce, vatom1, vatom2);
sewardj7010f6e2004-12-10 13:35:22 +00001782 case Iop_Xor64:
1783 return mkUifU64(mce, vatom1, vatom2);
sewardj20d38f22005-02-07 23:50:18 +00001784 case Iop_XorV128:
1785 return mkUifUV128(mce, vatom1, vatom2);
njn25e49d8e72002-09-23 09:36:25 +00001786
1787 default:
sewardj95448072004-11-22 20:19:51 +00001788 ppIROp(op);
1789 VG_(tool_panic)("memcheck:expr2vbits_Binop");
njn25e49d8e72002-09-23 09:36:25 +00001790 }
njn25e49d8e72002-09-23 09:36:25 +00001791}
1792
njn25e49d8e72002-09-23 09:36:25 +00001793
sewardj95448072004-11-22 20:19:51 +00001794static
1795IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
1796{
1797 IRAtom* vatom = expr2vbits( mce, atom );
1798 tl_assert(isOriginalAtom(mce,atom));
1799 switch (op) {
1800
sewardj0b070592004-12-10 21:44:22 +00001801 case Iop_Sqrt64Fx2:
1802 return unary64Fx2(mce, vatom);
1803
1804 case Iop_Sqrt64F0x2:
1805 return unary64F0x2(mce, vatom);
1806
sewardj170ee212004-12-10 18:57:51 +00001807 case Iop_Sqrt32Fx4:
1808 case Iop_RSqrt32Fx4:
1809 case Iop_Recip32Fx4:
1810 return unary32Fx4(mce, vatom);
1811
1812 case Iop_Sqrt32F0x4:
1813 case Iop_RSqrt32F0x4:
1814 case Iop_Recip32F0x4:
1815 return unary32F0x4(mce, vatom);
1816
sewardj20d38f22005-02-07 23:50:18 +00001817 case Iop_32UtoV128:
1818 case Iop_64UtoV128:
sewardj170ee212004-12-10 18:57:51 +00001819 return assignNew(mce, Ity_V128, unop(op, vatom));
1820
sewardj95448072004-11-22 20:19:51 +00001821 case Iop_F32toF64:
1822 case Iop_I32toF64:
sewardj95448072004-11-22 20:19:51 +00001823 case Iop_NegF64:
1824 case Iop_SinF64:
1825 case Iop_CosF64:
1826 case Iop_TanF64:
1827 case Iop_SqrtF64:
1828 case Iop_AbsF64:
1829 case Iop_2xm1F64:
1830 return mkPCastTo(mce, Ity_I64, vatom);
1831
sewardj95448072004-11-22 20:19:51 +00001832 case Iop_Clz32:
1833 case Iop_Ctz32:
1834 return mkPCastTo(mce, Ity_I32, vatom);
1835
sewardjd9dbc192005-04-27 11:40:27 +00001836 case Iop_1Uto64:
1837 case Iop_8Uto64:
1838 case Iop_8Sto64:
1839 case Iop_16Uto64:
1840 case Iop_16Sto64:
sewardj95448072004-11-22 20:19:51 +00001841 case Iop_32Sto64:
1842 case Iop_32Uto64:
sewardj20d38f22005-02-07 23:50:18 +00001843 case Iop_V128to64:
1844 case Iop_V128HIto64:
sewardj6cf40ff2005-04-20 22:31:26 +00001845 case Iop_128HIto64:
1846 case Iop_128to64:
sewardj95448072004-11-22 20:19:51 +00001847 return assignNew(mce, Ity_I64, unop(op, vatom));
1848
1849 case Iop_64to32:
1850 case Iop_64HIto32:
1851 case Iop_1Uto32:
1852 case Iop_8Uto32:
1853 case Iop_16Uto32:
1854 case Iop_16Sto32:
1855 case Iop_8Sto32:
1856 return assignNew(mce, Ity_I32, unop(op, vatom));
1857
1858 case Iop_8Sto16:
1859 case Iop_8Uto16:
1860 case Iop_32to16:
1861 case Iop_32HIto16:
sewardjd9dbc192005-04-27 11:40:27 +00001862 case Iop_64to16:
sewardj95448072004-11-22 20:19:51 +00001863 return assignNew(mce, Ity_I16, unop(op, vatom));
1864
1865 case Iop_1Uto8:
1866 case Iop_16to8:
1867 case Iop_32to8:
sewardjd9dbc192005-04-27 11:40:27 +00001868 case Iop_64to8:
sewardj95448072004-11-22 20:19:51 +00001869 return assignNew(mce, Ity_I8, unop(op, vatom));
1870
1871 case Iop_32to1:
1872 return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom));
1873
sewardjd9dbc192005-04-27 11:40:27 +00001874 case Iop_64to1:
1875 return assignNew(mce, Ity_I1, unop(Iop_64to1, vatom));
1876
sewardj95448072004-11-22 20:19:51 +00001877 case Iop_ReinterpF64asI64:
1878 case Iop_ReinterpI64asF64:
sewardj0b070592004-12-10 21:44:22 +00001879 case Iop_ReinterpI32asF32:
sewardj20d38f22005-02-07 23:50:18 +00001880 case Iop_NotV128:
sewardj7010f6e2004-12-10 13:35:22 +00001881 case Iop_Not64:
sewardj95448072004-11-22 20:19:51 +00001882 case Iop_Not32:
1883 case Iop_Not16:
1884 case Iop_Not8:
1885 case Iop_Not1:
1886 return vatom;
sewardj7010f6e2004-12-10 13:35:22 +00001887
sewardje53bd992005-05-03 12:22:00 +00001888 /* Neg* really fall under the Add/Sub banner, and as such you
1889 might think would qualify for the 'expensive add/sub'
1890 treatment. However, in this case since the implied literal
1891 is zero (0 - arg), we just do the cheap thing anyway. */
1892 case Iop_Neg8:
1893 return mkLeft8(mce, vatom);
1894 case Iop_Neg16:
1895 return mkLeft16(mce, vatom);
1896 case Iop_Neg32:
1897 return mkLeft32(mce, vatom);
1898
sewardj95448072004-11-22 20:19:51 +00001899 default:
1900 ppIROp(op);
1901 VG_(tool_panic)("memcheck:expr2vbits_Unop");
1902 }
1903}
1904
1905
sewardj170ee212004-12-10 18:57:51 +00001906/* Worker function; do not call directly. */
sewardj95448072004-11-22 20:19:51 +00001907static
sewardj170ee212004-12-10 18:57:51 +00001908IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
sewardj95448072004-11-22 20:19:51 +00001909{
1910 void* helper;
1911 Char* hname;
1912 IRDirty* di;
1913 IRTemp datavbits;
1914 IRAtom* addrAct;
1915
1916 tl_assert(isOriginalAtom(mce,addr));
1917
1918 /* First, emit a definedness test for the address. This also sets
1919 the address (shadow) to 'defined' following the test. */
1920 complainIfUndefined( mce, addr );
1921
1922 /* Now cook up a call to the relevant helper function, to read the
1923 data V bits from shadow memory. */
1924 ty = shadowType(ty);
1925 switch (ty) {
1926 case Ity_I64: helper = &MC_(helperc_LOADV8);
1927 hname = "MC_(helperc_LOADV8)";
1928 break;
1929 case Ity_I32: helper = &MC_(helperc_LOADV4);
1930 hname = "MC_(helperc_LOADV4)";
1931 break;
1932 case Ity_I16: helper = &MC_(helperc_LOADV2);
1933 hname = "MC_(helperc_LOADV2)";
1934 break;
1935 case Ity_I8: helper = &MC_(helperc_LOADV1);
1936 hname = "MC_(helperc_LOADV1)";
1937 break;
1938 default: ppIRType(ty);
1939 VG_(tool_panic)("memcheck:do_shadow_LDle");
1940 }
1941
1942 /* Generate the actual address into addrAct. */
1943 if (bias == 0) {
1944 addrAct = addr;
1945 } else {
sewardj7cf97ee2004-11-28 14:25:01 +00001946 IROp mkAdd;
1947 IRAtom* eBias;
sewardj95448072004-11-22 20:19:51 +00001948 IRType tyAddr = mce->hWordTy;
1949 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj7cf97ee2004-11-28 14:25:01 +00001950 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
1951 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj95448072004-11-22 20:19:51 +00001952 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
1953 }
1954
1955 /* We need to have a place to park the V bits we're just about to
1956 read. */
1957 datavbits = newIRTemp(mce->bb->tyenv, ty);
1958 di = unsafeIRDirty_1_N( datavbits,
1959 1/*regparms*/, hname, helper,
1960 mkIRExprVec_1( addrAct ));
1961 setHelperAnns( mce, di );
1962 stmt( mce->bb, IRStmt_Dirty(di) );
1963
1964 return mkexpr(datavbits);
1965}
1966
1967
1968static
sewardj170ee212004-12-10 18:57:51 +00001969IRAtom* expr2vbits_LDle ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
1970{
1971 IRAtom *v64hi, *v64lo;
1972 switch (shadowType(ty)) {
1973 case Ity_I8:
1974 case Ity_I16:
1975 case Ity_I32:
1976 case Ity_I64:
1977 return expr2vbits_LDle_WRK(mce, ty, addr, bias);
1978 case Ity_V128:
1979 v64lo = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias);
1980 v64hi = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias+8);
1981 return assignNew( mce,
1982 Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +00001983 binop(Iop_64HLtoV128, v64hi, v64lo));
sewardj170ee212004-12-10 18:57:51 +00001984 default:
1985 VG_(tool_panic)("expr2vbits_LDle");
1986 }
1987}
1988
1989
1990static
sewardj95448072004-11-22 20:19:51 +00001991IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
1992 IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
1993{
1994 IRAtom *vbitsC, *vbits0, *vbitsX;
1995 IRType ty;
1996 /* Given Mux0X(cond,expr0,exprX), generate
1997 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
1998 That is, steer the V bits like the originals, but trash the
1999 result if the steering value is undefined. This gives
2000 lazy propagation. */
2001 tl_assert(isOriginalAtom(mce, cond));
2002 tl_assert(isOriginalAtom(mce, expr0));
2003 tl_assert(isOriginalAtom(mce, exprX));
2004
2005 vbitsC = expr2vbits(mce, cond);
2006 vbits0 = expr2vbits(mce, expr0);
2007 vbitsX = expr2vbits(mce, exprX);
2008 ty = typeOfIRExpr(mce->bb->tyenv, vbits0);
2009
2010 return
2011 mkUifU(mce, ty, assignNew(mce, ty, IRExpr_Mux0X(cond, vbits0, vbitsX)),
2012 mkPCastTo(mce, ty, vbitsC) );
2013}
2014
2015/* --------- This is the main expression-handling function. --------- */
2016
2017static
2018IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
2019{
2020 switch (e->tag) {
2021
2022 case Iex_Get:
2023 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
2024
2025 case Iex_GetI:
2026 return shadow_GETI( mce, e->Iex.GetI.descr,
2027 e->Iex.GetI.ix, e->Iex.GetI.bias );
2028
2029 case Iex_Tmp:
2030 return IRExpr_Tmp( findShadowTmp(mce, e->Iex.Tmp.tmp) );
2031
2032 case Iex_Const:
2033 return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
2034
2035 case Iex_Binop:
2036 return expr2vbits_Binop(
2037 mce,
2038 e->Iex.Binop.op,
2039 e->Iex.Binop.arg1, e->Iex.Binop.arg2
2040 );
2041
2042 case Iex_Unop:
2043 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
2044
2045 case Iex_LDle:
2046 return expr2vbits_LDle( mce, e->Iex.LDle.ty,
2047 e->Iex.LDle.addr, 0/*addr bias*/ );
2048
2049 case Iex_CCall:
2050 return mkLazyN( mce, e->Iex.CCall.args,
2051 e->Iex.CCall.retty,
2052 e->Iex.CCall.cee );
2053
2054 case Iex_Mux0X:
2055 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
2056 e->Iex.Mux0X.exprX);
njn25e49d8e72002-09-23 09:36:25 +00002057
2058 default:
sewardj95448072004-11-22 20:19:51 +00002059 VG_(printf)("\n");
2060 ppIRExpr(e);
2061 VG_(printf)("\n");
2062 VG_(tool_panic)("memcheck: expr2vbits");
njn25e49d8e72002-09-23 09:36:25 +00002063 }
njn25e49d8e72002-09-23 09:36:25 +00002064}
2065
2066/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00002067/*--- Generate shadow stmts from all kinds of IRStmts. ---*/
njn25e49d8e72002-09-23 09:36:25 +00002068/*------------------------------------------------------------*/
2069
sewardj95448072004-11-22 20:19:51 +00002070/* Widen a value to the host word size. */
njn25e49d8e72002-09-23 09:36:25 +00002071
2072static
sewardj95448072004-11-22 20:19:51 +00002073IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
njn25e49d8e72002-09-23 09:36:25 +00002074{
sewardj7cf97ee2004-11-28 14:25:01 +00002075 IRType ty, tyH;
2076
sewardj95448072004-11-22 20:19:51 +00002077 /* vatom is vbits-value and as such can only have a shadow type. */
2078 tl_assert(isShadowAtom(mce,vatom));
njn25e49d8e72002-09-23 09:36:25 +00002079
sewardj7cf97ee2004-11-28 14:25:01 +00002080 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
2081 tyH = mce->hWordTy;
njn25e49d8e72002-09-23 09:36:25 +00002082
sewardj95448072004-11-22 20:19:51 +00002083 if (tyH == Ity_I32) {
2084 switch (ty) {
2085 case Ity_I32: return vatom;
2086 case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom));
2087 case Ity_I8: return assignNew(mce, tyH, unop(Iop_8Uto32, vatom));
2088 default: goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00002089 }
sewardj6cf40ff2005-04-20 22:31:26 +00002090 } else
2091 if (tyH == Ity_I64) {
2092 switch (ty) {
2093 case Ity_I32: return assignNew(mce, tyH, unop(Iop_32Uto64, vatom));
sewardj69a13322005-04-23 01:14:51 +00002094 case Ity_I16: return assignNew(mce, tyH, unop(Iop_32Uto64,
2095 assignNew(mce, Ity_I32, unop(Iop_16Uto32, vatom))));
2096 case Ity_I8: return assignNew(mce, tyH, unop(Iop_32Uto64,
2097 assignNew(mce, Ity_I32, unop(Iop_8Uto32, vatom))));
sewardj6cf40ff2005-04-20 22:31:26 +00002098 default: goto unhandled;
2099 }
sewardj95448072004-11-22 20:19:51 +00002100 } else {
2101 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00002102 }
sewardj95448072004-11-22 20:19:51 +00002103 unhandled:
2104 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
2105 VG_(tool_panic)("zwidenToHostWord");
njn25e49d8e72002-09-23 09:36:25 +00002106}
2107
njn25e49d8e72002-09-23 09:36:25 +00002108
sewardj95448072004-11-22 20:19:51 +00002109/* Generate a shadow store. addr is always the original address atom.
2110 You can pass in either originals or V-bits for the data atom, but
2111 obviously not both. */
njn25e49d8e72002-09-23 09:36:25 +00002112
sewardj95448072004-11-22 20:19:51 +00002113static
2114void do_shadow_STle ( MCEnv* mce,
2115 IRAtom* addr, UInt bias,
2116 IRAtom* data, IRAtom* vdata )
njn25e49d8e72002-09-23 09:36:25 +00002117{
sewardj170ee212004-12-10 18:57:51 +00002118 IROp mkAdd;
2119 IRType ty, tyAddr;
2120 IRDirty *di, *diLo64, *diHi64;
2121 IRAtom *addrAct, *addrLo64, *addrHi64;
2122 IRAtom *vdataLo64, *vdataHi64;
2123 IRAtom *eBias, *eBias0, *eBias8;
sewardj95448072004-11-22 20:19:51 +00002124 void* helper = NULL;
2125 Char* hname = NULL;
sewardj170ee212004-12-10 18:57:51 +00002126
2127 tyAddr = mce->hWordTy;
2128 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2129 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2130
2131 di = diLo64 = diHi64 = NULL;
2132 eBias = eBias0 = eBias8 = NULL;
2133 addrAct = addrLo64 = addrHi64 = NULL;
2134 vdataLo64 = vdataHi64 = NULL;
njn25e49d8e72002-09-23 09:36:25 +00002135
sewardj95448072004-11-22 20:19:51 +00002136 if (data) {
2137 tl_assert(!vdata);
2138 tl_assert(isOriginalAtom(mce, data));
2139 tl_assert(bias == 0);
2140 vdata = expr2vbits( mce, data );
2141 } else {
2142 tl_assert(vdata);
2143 }
njn25e49d8e72002-09-23 09:36:25 +00002144
sewardj95448072004-11-22 20:19:51 +00002145 tl_assert(isOriginalAtom(mce,addr));
2146 tl_assert(isShadowAtom(mce,vdata));
njn25e49d8e72002-09-23 09:36:25 +00002147
sewardj95448072004-11-22 20:19:51 +00002148 ty = typeOfIRExpr(mce->bb->tyenv, vdata);
njn25e49d8e72002-09-23 09:36:25 +00002149
sewardj95448072004-11-22 20:19:51 +00002150 /* First, emit a definedness test for the address. This also sets
2151 the address (shadow) to 'defined' following the test. */
2152 complainIfUndefined( mce, addr );
njn25e49d8e72002-09-23 09:36:25 +00002153
sewardj170ee212004-12-10 18:57:51 +00002154 /* Now decide which helper function to call to write the data V
2155 bits into shadow memory. */
sewardj95448072004-11-22 20:19:51 +00002156 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +00002157 case Ity_V128: /* we'll use the helper twice */
sewardj95448072004-11-22 20:19:51 +00002158 case Ity_I64: helper = &MC_(helperc_STOREV8);
2159 hname = "MC_(helperc_STOREV8)";
2160 break;
2161 case Ity_I32: helper = &MC_(helperc_STOREV4);
2162 hname = "MC_(helperc_STOREV4)";
2163 break;
2164 case Ity_I16: helper = &MC_(helperc_STOREV2);
2165 hname = "MC_(helperc_STOREV2)";
2166 break;
2167 case Ity_I8: helper = &MC_(helperc_STOREV1);
2168 hname = "MC_(helperc_STOREV1)";
2169 break;
2170 default: VG_(tool_panic)("memcheck:do_shadow_STle");
2171 }
njn25e49d8e72002-09-23 09:36:25 +00002172
sewardj170ee212004-12-10 18:57:51 +00002173 if (ty == Ity_V128) {
2174
sewardj20d38f22005-02-07 23:50:18 +00002175 /* V128-bit case */
sewardj170ee212004-12-10 18:57:51 +00002176 /* See comment in next clause re 64-bit regparms */
2177 eBias0 = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2178 addrLo64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias0) );
sewardj20d38f22005-02-07 23:50:18 +00002179 vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata));
sewardj170ee212004-12-10 18:57:51 +00002180 diLo64 = unsafeIRDirty_0_N(
2181 1/*regparms*/, hname, helper,
2182 mkIRExprVec_2( addrLo64, vdataLo64 ));
2183
2184 eBias8 = tyAddr==Ity_I32 ? mkU32(bias+8) : mkU64(bias+8);
2185 addrHi64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias8) );
sewardj20d38f22005-02-07 23:50:18 +00002186 vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata));
sewardj170ee212004-12-10 18:57:51 +00002187 diHi64 = unsafeIRDirty_0_N(
2188 1/*regparms*/, hname, helper,
2189 mkIRExprVec_2( addrHi64, vdataHi64 ));
2190
2191 setHelperAnns( mce, diLo64 );
2192 setHelperAnns( mce, diHi64 );
2193 stmt( mce->bb, IRStmt_Dirty(diLo64) );
2194 stmt( mce->bb, IRStmt_Dirty(diHi64) );
2195
sewardj95448072004-11-22 20:19:51 +00002196 } else {
sewardj170ee212004-12-10 18:57:51 +00002197
2198 /* 8/16/32/64-bit cases */
2199 /* Generate the actual address into addrAct. */
2200 if (bias == 0) {
2201 addrAct = addr;
2202 } else {
2203 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2204 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2205 }
2206
2207 if (ty == Ity_I64) {
2208 /* We can't do this with regparm 2 on 32-bit platforms, since
2209 the back ends aren't clever enough to handle 64-bit
2210 regparm args. Therefore be different. */
2211 di = unsafeIRDirty_0_N(
2212 1/*regparms*/, hname, helper,
2213 mkIRExprVec_2( addrAct, vdata ));
2214 } else {
2215 di = unsafeIRDirty_0_N(
2216 2/*regparms*/, hname, helper,
2217 mkIRExprVec_2( addrAct,
2218 zwidenToHostWord( mce, vdata )));
2219 }
2220 setHelperAnns( mce, di );
2221 stmt( mce->bb, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00002222 }
njn25e49d8e72002-09-23 09:36:25 +00002223
sewardj95448072004-11-22 20:19:51 +00002224}
njn25e49d8e72002-09-23 09:36:25 +00002225
njn25e49d8e72002-09-23 09:36:25 +00002226
sewardj95448072004-11-22 20:19:51 +00002227/* Do lazy pessimistic propagation through a dirty helper call, by
2228 looking at the annotations on it. This is the most complex part of
2229 Memcheck. */
njn25e49d8e72002-09-23 09:36:25 +00002230
sewardj95448072004-11-22 20:19:51 +00002231static IRType szToITy ( Int n )
2232{
2233 switch (n) {
2234 case 1: return Ity_I8;
2235 case 2: return Ity_I16;
2236 case 4: return Ity_I32;
2237 case 8: return Ity_I64;
2238 default: VG_(tool_panic)("szToITy(memcheck)");
2239 }
2240}
njn25e49d8e72002-09-23 09:36:25 +00002241
sewardj95448072004-11-22 20:19:51 +00002242static
2243void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
2244{
sewardje9e16d32004-12-10 13:17:55 +00002245 Int i, n, offset, toDo, gSz, gOff;
sewardj7cf97ee2004-11-28 14:25:01 +00002246 IRAtom *src, *here, *curr;
sewardj95448072004-11-22 20:19:51 +00002247 IRType tyAddr, tySrc, tyDst;
2248 IRTemp dst;
njn25e49d8e72002-09-23 09:36:25 +00002249
sewardj95448072004-11-22 20:19:51 +00002250 /* First check the guard. */
2251 complainIfUndefined(mce, d->guard);
2252
2253 /* Now round up all inputs and PCast over them. */
sewardj7cf97ee2004-11-28 14:25:01 +00002254 curr = definedOfType(Ity_I32);
sewardj95448072004-11-22 20:19:51 +00002255
2256 /* Inputs: unmasked args */
2257 for (i = 0; d->args[i]; i++) {
2258 if (d->cee->mcx_mask & (1<<i)) {
2259 /* ignore this arg */
njn25e49d8e72002-09-23 09:36:25 +00002260 } else {
sewardj95448072004-11-22 20:19:51 +00002261 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
2262 curr = mkUifU32(mce, here, curr);
njn25e49d8e72002-09-23 09:36:25 +00002263 }
2264 }
sewardj95448072004-11-22 20:19:51 +00002265
2266 /* Inputs: guest state that we read. */
2267 for (i = 0; i < d->nFxState; i++) {
2268 tl_assert(d->fxState[i].fx != Ifx_None);
2269 if (d->fxState[i].fx == Ifx_Write)
2270 continue;
sewardja7203252004-11-26 19:17:47 +00002271
2272 /* Ignore any sections marked as 'always defined'. */
2273 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
sewardje9e16d32004-12-10 13:17:55 +00002274 if (0)
sewardja7203252004-11-26 19:17:47 +00002275 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
2276 d->fxState[i].offset, d->fxState[i].size );
2277 continue;
2278 }
2279
sewardj95448072004-11-22 20:19:51 +00002280 /* This state element is read or modified. So we need to
sewardje9e16d32004-12-10 13:17:55 +00002281 consider it. If larger than 8 bytes, deal with it in 8-byte
2282 chunks. */
2283 gSz = d->fxState[i].size;
2284 gOff = d->fxState[i].offset;
2285 tl_assert(gSz > 0);
2286 while (True) {
2287 if (gSz == 0) break;
2288 n = gSz <= 8 ? gSz : 8;
2289 /* update 'curr' with UifU of the state slice
2290 gOff .. gOff+n-1 */
2291 tySrc = szToITy( n );
2292 src = assignNew( mce, tySrc,
2293 shadow_GET(mce, gOff, tySrc ) );
2294 here = mkPCastTo( mce, Ity_I32, src );
2295 curr = mkUifU32(mce, here, curr);
2296 gSz -= n;
2297 gOff += n;
2298 }
2299
sewardj95448072004-11-22 20:19:51 +00002300 }
2301
2302 /* Inputs: memory. First set up some info needed regardless of
2303 whether we're doing reads or writes. */
2304 tyAddr = Ity_INVALID;
2305
2306 if (d->mFx != Ifx_None) {
2307 /* Because we may do multiple shadow loads/stores from the same
2308 base address, it's best to do a single test of its
2309 definedness right now. Post-instrumentation optimisation
2310 should remove all but this test. */
2311 tl_assert(d->mAddr);
2312 complainIfUndefined(mce, d->mAddr);
2313
2314 tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr);
2315 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2316 tl_assert(tyAddr == mce->hWordTy); /* not really right */
2317 }
2318
2319 /* Deal with memory inputs (reads or modifies) */
2320 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2321 offset = 0;
2322 toDo = d->mSize;
2323 /* chew off 32-bit chunks */
2324 while (toDo >= 4) {
2325 here = mkPCastTo(
2326 mce, Ity_I32,
2327 expr2vbits_LDle ( mce, Ity_I32,
2328 d->mAddr, d->mSize - toDo )
2329 );
2330 curr = mkUifU32(mce, here, curr);
2331 toDo -= 4;
2332 }
2333 /* chew off 16-bit chunks */
2334 while (toDo >= 2) {
2335 here = mkPCastTo(
2336 mce, Ity_I32,
2337 expr2vbits_LDle ( mce, Ity_I16,
2338 d->mAddr, d->mSize - toDo )
2339 );
2340 curr = mkUifU32(mce, here, curr);
2341 toDo -= 2;
2342 }
2343 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2344 }
2345
2346 /* Whew! So curr is a 32-bit V-value summarising pessimistically
2347 all the inputs to the helper. Now we need to re-distribute the
2348 results to all destinations. */
2349
2350 /* Outputs: the destination temporary, if there is one. */
2351 if (d->tmp != IRTemp_INVALID) {
2352 dst = findShadowTmp(mce, d->tmp);
2353 tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp);
2354 assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) );
2355 }
2356
2357 /* Outputs: guest state that we write or modify. */
2358 for (i = 0; i < d->nFxState; i++) {
2359 tl_assert(d->fxState[i].fx != Ifx_None);
2360 if (d->fxState[i].fx == Ifx_Read)
2361 continue;
sewardja7203252004-11-26 19:17:47 +00002362 /* Ignore any sections marked as 'always defined'. */
2363 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
2364 continue;
sewardje9e16d32004-12-10 13:17:55 +00002365 /* This state element is written or modified. So we need to
2366 consider it. If larger than 8 bytes, deal with it in 8-byte
2367 chunks. */
2368 gSz = d->fxState[i].size;
2369 gOff = d->fxState[i].offset;
2370 tl_assert(gSz > 0);
2371 while (True) {
2372 if (gSz == 0) break;
2373 n = gSz <= 8 ? gSz : 8;
2374 /* Write suitably-casted 'curr' to the state slice
2375 gOff .. gOff+n-1 */
2376 tyDst = szToITy( n );
2377 do_shadow_PUT( mce, gOff,
2378 NULL, /* original atom */
2379 mkPCastTo( mce, tyDst, curr ) );
2380 gSz -= n;
2381 gOff += n;
2382 }
sewardj95448072004-11-22 20:19:51 +00002383 }
2384
2385 /* Outputs: memory that we write or modify. */
2386 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2387 offset = 0;
2388 toDo = d->mSize;
2389 /* chew off 32-bit chunks */
2390 while (toDo >= 4) {
2391 do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2392 NULL, /* original data */
2393 mkPCastTo( mce, Ity_I32, curr ) );
2394 toDo -= 4;
2395 }
2396 /* chew off 16-bit chunks */
2397 while (toDo >= 2) {
2398 do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2399 NULL, /* original data */
2400 mkPCastTo( mce, Ity_I16, curr ) );
2401 toDo -= 2;
2402 }
2403 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2404 }
2405
njn25e49d8e72002-09-23 09:36:25 +00002406}
2407
sewardj826ec492005-05-12 18:05:00 +00002408/* We have an ABI hint telling us that [base .. base+len-1] is to
2409 become undefined ("writable"). Generate code to call a helper to
2410 notify the A/V bit machinery of this fact.
2411
2412 We call
2413 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len );
2414*/
2415static
2416void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len )
2417{
2418 IRDirty* di;
2419 di = unsafeIRDirty_0_N(
2420 0/*regparms*/,
2421 "MC_(helperc_MAKE_STACK_UNINIT)",
2422 &MC_(helperc_MAKE_STACK_UNINIT),
2423 mkIRExprVec_2( base, mkIRExpr_HWord( (UInt)len) )
2424 );
2425 stmt( mce->bb, IRStmt_Dirty(di) );
2426}
2427
njn25e49d8e72002-09-23 09:36:25 +00002428
sewardj95448072004-11-22 20:19:51 +00002429/*------------------------------------------------------------*/
2430/*--- Memcheck main ---*/
2431/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00002432
sewardj95448072004-11-22 20:19:51 +00002433static Bool isBogusAtom ( IRAtom* at )
njn25e49d8e72002-09-23 09:36:25 +00002434{
sewardj95448072004-11-22 20:19:51 +00002435 ULong n = 0;
2436 IRConst* con;
sewardj710d6c22005-03-20 18:55:15 +00002437 tl_assert(isIRAtom(at));
sewardj95448072004-11-22 20:19:51 +00002438 if (at->tag == Iex_Tmp)
2439 return False;
2440 tl_assert(at->tag == Iex_Const);
2441 con = at->Iex.Const.con;
2442 switch (con->tag) {
sewardjd5204dc2004-12-31 01:16:11 +00002443 case Ico_U1: return False;
2444 case Ico_U8: n = (ULong)con->Ico.U8; break;
2445 case Ico_U16: n = (ULong)con->Ico.U16; break;
2446 case Ico_U32: n = (ULong)con->Ico.U32; break;
2447 case Ico_U64: n = (ULong)con->Ico.U64; break;
2448 case Ico_F64: return False;
2449 case Ico_F64i: return False;
2450 case Ico_V128: return False;
sewardj95448072004-11-22 20:19:51 +00002451 default: ppIRExpr(at); tl_assert(0);
2452 }
2453 /* VG_(printf)("%llx\n", n); */
sewardj96a922e2005-04-23 23:26:29 +00002454 return (/*32*/ n == 0xFEFEFEFFULL
2455 /*32*/ || n == 0x80808080ULL
2456 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
2457 /*64*/ || n == 0x8080808080808080ULL
2458 /*64*/ || n == 0x0101010101010101ULL
2459 );
sewardj95448072004-11-22 20:19:51 +00002460}
njn25e49d8e72002-09-23 09:36:25 +00002461
sewardj95448072004-11-22 20:19:51 +00002462static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
2463{
sewardjd5204dc2004-12-31 01:16:11 +00002464 Int i;
2465 IRExpr* e;
2466 IRDirty* d;
sewardj95448072004-11-22 20:19:51 +00002467 switch (st->tag) {
2468 case Ist_Tmp:
2469 e = st->Ist.Tmp.data;
2470 switch (e->tag) {
2471 case Iex_Get:
2472 case Iex_Tmp:
2473 return False;
sewardjd5204dc2004-12-31 01:16:11 +00002474 case Iex_Const:
2475 return isBogusAtom(e);
sewardj95448072004-11-22 20:19:51 +00002476 case Iex_Unop:
2477 return isBogusAtom(e->Iex.Unop.arg);
sewardjd5204dc2004-12-31 01:16:11 +00002478 case Iex_GetI:
2479 return isBogusAtom(e->Iex.GetI.ix);
sewardj95448072004-11-22 20:19:51 +00002480 case Iex_Binop:
2481 return isBogusAtom(e->Iex.Binop.arg1)
2482 || isBogusAtom(e->Iex.Binop.arg2);
2483 case Iex_Mux0X:
2484 return isBogusAtom(e->Iex.Mux0X.cond)
2485 || isBogusAtom(e->Iex.Mux0X.expr0)
2486 || isBogusAtom(e->Iex.Mux0X.exprX);
2487 case Iex_LDle:
2488 return isBogusAtom(e->Iex.LDle.addr);
2489 case Iex_CCall:
2490 for (i = 0; e->Iex.CCall.args[i]; i++)
2491 if (isBogusAtom(e->Iex.CCall.args[i]))
2492 return True;
2493 return False;
2494 default:
2495 goto unhandled;
2496 }
sewardjd5204dc2004-12-31 01:16:11 +00002497 case Ist_Dirty:
2498 d = st->Ist.Dirty.details;
2499 for (i = 0; d->args[i]; i++)
2500 if (isBogusAtom(d->args[i]))
2501 return True;
2502 if (d->guard && isBogusAtom(d->guard))
2503 return True;
2504 if (d->mAddr && isBogusAtom(d->mAddr))
2505 return True;
2506 return False;
sewardj95448072004-11-22 20:19:51 +00002507 case Ist_Put:
2508 return isBogusAtom(st->Ist.Put.data);
sewardjd5204dc2004-12-31 01:16:11 +00002509 case Ist_PutI:
2510 return isBogusAtom(st->Ist.PutI.ix)
2511 || isBogusAtom(st->Ist.PutI.data);
sewardj95448072004-11-22 20:19:51 +00002512 case Ist_STle:
2513 return isBogusAtom(st->Ist.STle.addr)
2514 || isBogusAtom(st->Ist.STle.data);
2515 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00002516 return isBogusAtom(st->Ist.Exit.guard);
sewardj826ec492005-05-12 18:05:00 +00002517 case Ist_AbiHint:
2518 return isBogusAtom(st->Ist.AbiHint.base);
sewardj21dc3452005-03-21 00:27:41 +00002519 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00002520 case Ist_IMark:
sewardjbd598e12005-01-07 12:10:21 +00002521 case Ist_MFence:
2522 return False;
sewardj95448072004-11-22 20:19:51 +00002523 default:
2524 unhandled:
2525 ppIRStmt(st);
2526 VG_(tool_panic)("hasBogusLiterals");
2527 }
2528}
njn25e49d8e72002-09-23 09:36:25 +00002529
njn25e49d8e72002-09-23 09:36:25 +00002530
njn51d827b2005-05-09 01:02:08 +00002531IRBB* MC_(instrument) ( IRBB* bb_in, VexGuestLayout* layout,
sewardjd54babf2005-03-21 00:55:49 +00002532 IRType gWordTy, IRType hWordTy )
sewardj95448072004-11-22 20:19:51 +00002533{
2534 Bool verboze = False; //True;
njn25e49d8e72002-09-23 09:36:25 +00002535
sewardjd5204dc2004-12-31 01:16:11 +00002536 Int i, j, first_stmt;
sewardj95448072004-11-22 20:19:51 +00002537 IRStmt* st;
sewardjd5204dc2004-12-31 01:16:11 +00002538 MCEnv mce;
sewardjd54babf2005-03-21 00:55:49 +00002539 IRBB* bb;
2540
2541 if (gWordTy != hWordTy) {
2542 /* We don't currently support this case. */
2543 VG_(tool_panic)("host/guest word size mismatch");
2544 }
njn25e49d8e72002-09-23 09:36:25 +00002545
sewardj6cf40ff2005-04-20 22:31:26 +00002546 /* Check we're not completely nuts */
2547 tl_assert(sizeof(UWord) == sizeof(void*));
2548 tl_assert(sizeof(Word) == sizeof(void*));
2549 tl_assert(sizeof(ULong) == 8);
2550 tl_assert(sizeof(Long) == 8);
2551 tl_assert(sizeof(UInt) == 4);
2552 tl_assert(sizeof(Int) == 4);
2553
sewardj95448072004-11-22 20:19:51 +00002554 /* Set up BB */
sewardjd54babf2005-03-21 00:55:49 +00002555 bb = emptyIRBB();
sewardj95448072004-11-22 20:19:51 +00002556 bb->tyenv = dopyIRTypeEnv(bb_in->tyenv);
2557 bb->next = dopyIRExpr(bb_in->next);
2558 bb->jumpkind = bb_in->jumpkind;
njn25e49d8e72002-09-23 09:36:25 +00002559
sewardj95448072004-11-22 20:19:51 +00002560 /* Set up the running environment. Only .bb is modified as we go
2561 along. */
2562 mce.bb = bb;
2563 mce.layout = layout;
2564 mce.n_originalTmps = bb->tyenv->types_used;
2565 mce.hWordTy = hWordTy;
sewardjd5204dc2004-12-31 01:16:11 +00002566 mce.bogusLiterals = False;
sewardj95448072004-11-22 20:19:51 +00002567 mce.tmpMap = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
2568 for (i = 0; i < mce.n_originalTmps; i++)
2569 mce.tmpMap[i] = IRTemp_INVALID;
2570
2571 /* Iterate over the stmts. */
2572
2573 for (i = 0; i < bb_in->stmts_used; i++) {
2574 st = bb_in->stmts[i];
sewardj21dc3452005-03-21 00:27:41 +00002575 tl_assert(st);
sewardj95448072004-11-22 20:19:51 +00002576
2577 tl_assert(isFlatIRStmt(st));
2578
sewardjd5204dc2004-12-31 01:16:11 +00002579 if (!mce.bogusLiterals) {
2580 mce.bogusLiterals = checkForBogusLiterals(st);
2581 if (0&& mce.bogusLiterals) {
sewardj95448072004-11-22 20:19:51 +00002582 VG_(printf)("bogus: ");
2583 ppIRStmt(st);
2584 VG_(printf)("\n");
2585 }
2586 }
sewardjd5204dc2004-12-31 01:16:11 +00002587
sewardj95448072004-11-22 20:19:51 +00002588 first_stmt = bb->stmts_used;
2589
2590 if (verboze) {
2591 ppIRStmt(st);
2592 VG_(printf)("\n\n");
2593 }
2594
sewardj29faa502005-03-16 18:20:21 +00002595 /* Generate instrumentation code for each stmt ... */
2596
sewardj95448072004-11-22 20:19:51 +00002597 switch (st->tag) {
2598
2599 case Ist_Tmp:
2600 assign( bb, findShadowTmp(&mce, st->Ist.Tmp.tmp),
2601 expr2vbits( &mce, st->Ist.Tmp.data) );
njn25e49d8e72002-09-23 09:36:25 +00002602 break;
2603
sewardj95448072004-11-22 20:19:51 +00002604 case Ist_Put:
2605 do_shadow_PUT( &mce,
2606 st->Ist.Put.offset,
2607 st->Ist.Put.data,
2608 NULL /* shadow atom */ );
njn25e49d8e72002-09-23 09:36:25 +00002609 break;
2610
sewardj95448072004-11-22 20:19:51 +00002611 case Ist_PutI:
2612 do_shadow_PUTI( &mce,
2613 st->Ist.PutI.descr,
2614 st->Ist.PutI.ix,
2615 st->Ist.PutI.bias,
2616 st->Ist.PutI.data );
njn25e49d8e72002-09-23 09:36:25 +00002617 break;
2618
sewardj95448072004-11-22 20:19:51 +00002619 case Ist_STle:
2620 do_shadow_STle( &mce, st->Ist.STle.addr, 0/* addr bias */,
2621 st->Ist.STle.data,
2622 NULL /* shadow data */ );
njn25e49d8e72002-09-23 09:36:25 +00002623 break;
2624
sewardj95448072004-11-22 20:19:51 +00002625 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00002626 complainIfUndefined( &mce, st->Ist.Exit.guard );
njn25e49d8e72002-09-23 09:36:25 +00002627 break;
2628
sewardj21dc3452005-03-21 00:27:41 +00002629 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00002630 case Ist_IMark:
sewardjbd598e12005-01-07 12:10:21 +00002631 case Ist_MFence:
2632 break;
2633
sewardj95448072004-11-22 20:19:51 +00002634 case Ist_Dirty:
2635 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
njn25e49d8e72002-09-23 09:36:25 +00002636 break;
2637
sewardj826ec492005-05-12 18:05:00 +00002638 case Ist_AbiHint:
2639 do_AbiHint( &mce, st->Ist.AbiHint.base, st->Ist.AbiHint.len );
2640 break;
2641
njn25e49d8e72002-09-23 09:36:25 +00002642 default:
sewardj95448072004-11-22 20:19:51 +00002643 VG_(printf)("\n");
2644 ppIRStmt(st);
2645 VG_(printf)("\n");
2646 VG_(tool_panic)("memcheck: unhandled IRStmt");
2647
2648 } /* switch (st->tag) */
2649
2650 if (verboze) {
2651 for (j = first_stmt; j < bb->stmts_used; j++) {
2652 VG_(printf)(" ");
2653 ppIRStmt(bb->stmts[j]);
2654 VG_(printf)("\n");
2655 }
2656 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00002657 }
sewardj95448072004-11-22 20:19:51 +00002658
sewardj29faa502005-03-16 18:20:21 +00002659 /* ... and finally copy the stmt itself to the output. */
sewardj95448072004-11-22 20:19:51 +00002660 addStmtToIRBB(bb, st);
2661
njn25e49d8e72002-09-23 09:36:25 +00002662 }
njn25e49d8e72002-09-23 09:36:25 +00002663
sewardj95448072004-11-22 20:19:51 +00002664 /* Now we need to complain if the jump target is undefined. */
2665 first_stmt = bb->stmts_used;
njn25e49d8e72002-09-23 09:36:25 +00002666
sewardj95448072004-11-22 20:19:51 +00002667 if (verboze) {
2668 VG_(printf)("bb->next = ");
2669 ppIRExpr(bb->next);
2670 VG_(printf)("\n\n");
2671 }
njn25e49d8e72002-09-23 09:36:25 +00002672
sewardj95448072004-11-22 20:19:51 +00002673 complainIfUndefined( &mce, bb->next );
njn25e49d8e72002-09-23 09:36:25 +00002674
sewardj95448072004-11-22 20:19:51 +00002675 if (verboze) {
2676 for (j = first_stmt; j < bb->stmts_used; j++) {
2677 VG_(printf)(" ");
2678 ppIRStmt(bb->stmts[j]);
2679 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00002680 }
sewardj95448072004-11-22 20:19:51 +00002681 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00002682 }
njn25e49d8e72002-09-23 09:36:25 +00002683
sewardj95448072004-11-22 20:19:51 +00002684 return bb;
2685}
njn25e49d8e72002-09-23 09:36:25 +00002686
2687/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00002688/*--- end mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00002689/*--------------------------------------------------------------------*/