blob: ff00ce9170821edbba008720a8b1225603776661 [file] [log] [blame]
nethercotebb1c9912004-01-04 16:43:23 +00001
njn25e49d8e72002-09-23 09:36:25 +00002/*--------------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003/*--- Instrument IR to perform memory checking operations. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00006
njn25e49d8e72002-09-23 09:36:25 +00007/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of MemCheck, a heavyweight Valgrind tool for
njnc9539842002-10-02 13:26:35 +00009 detecting memory errors.
njn25e49d8e72002-09-23 09:36:25 +000010
njn53612422005-03-12 16:22:54 +000011 Copyright (C) 2000-2005 Julian Seward
njn25e49d8e72002-09-23 09:36:25 +000012 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
njn25cac76cb2002-09-23 11:21:57 +000032#include "mc_include.h"
njn25e49d8e72002-09-23 09:36:25 +000033
njn25e49d8e72002-09-23 09:36:25 +000034
sewardj95448072004-11-22 20:19:51 +000035/*------------------------------------------------------------*/
36/*--- Forward decls ---*/
37/*------------------------------------------------------------*/
38
39struct _MCEnv;
40
41static IRType shadowType ( IRType ty );
42static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
43
44
45/*------------------------------------------------------------*/
46/*--- Memcheck running state, and tmp management. ---*/
47/*------------------------------------------------------------*/
48
49/* Carries around state during memcheck instrumentation. */
50typedef
51 struct _MCEnv {
52 /* MODIFIED: the bb being constructed. IRStmts are added. */
53 IRBB* bb;
54
55 /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
56 original temps to their current their current shadow temp.
57 Initially all entries are IRTemp_INVALID. Entries are added
58 lazily since many original temps are not used due to
59 optimisation prior to instrumentation. Note that floating
60 point original tmps are shadowed by integer tmps of the same
61 size, and Bit-typed original tmps are shadowed by the type
62 Ity_I8. See comment below. */
63 IRTemp* tmpMap;
64 Int n_originalTmps; /* for range checking */
65
sewardjd5204dc2004-12-31 01:16:11 +000066 /* MODIFIED: indicates whether "bogus" literals have so far been
67 found. Starts off False, and may change to True. */
68 Bool bogusLiterals;
69
sewardj95448072004-11-22 20:19:51 +000070 /* READONLY: the guest layout. This indicates which parts of
71 the guest state should be regarded as 'always defined'. */
72 VexGuestLayout* layout;
73 /* READONLY: the host word type. Needed for constructing
74 arguments of type 'HWord' to be passed to helper functions.
75 Ity_I32 or Ity_I64 only. */
76 IRType hWordTy;
77 }
78 MCEnv;
79
80/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
81 demand), as they are encountered. This is for two reasons.
82
83 (1) (less important reason): Many original tmps are unused due to
84 initial IR optimisation, and we do not want to spaces in tables
85 tracking them.
86
87 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
88 table indexed [0 .. n_types-1], which gives the current shadow for
89 each original tmp, or INVALID_IRTEMP if none is so far assigned.
90 It is necessary to support making multiple assignments to a shadow
91 -- specifically, after testing a shadow for definedness, it needs
92 to be made defined. But IR's SSA property disallows this.
93
94 (2) (more important reason): Therefore, when a shadow needs to get
95 a new value, a new temporary is created, the value is assigned to
96 that, and the tmpMap is updated to reflect the new binding.
97
98 A corollary is that if the tmpMap maps a given tmp to
99 INVALID_IRTEMP and we are hoping to read that shadow tmp, it means
100 there's a read-before-write error in the original tmps. The IR
101 sanity checker should catch all such anomalies, however.
njn25e49d8e72002-09-23 09:36:25 +0000102*/
sewardj95448072004-11-22 20:19:51 +0000103
104/* Find the tmp currently shadowing the given original tmp. If none
105 so far exists, allocate one. */
106static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000107{
sewardj95448072004-11-22 20:19:51 +0000108 tl_assert(orig < mce->n_originalTmps);
109 if (mce->tmpMap[orig] == IRTemp_INVALID) {
110 mce->tmpMap[orig]
111 = newIRTemp(mce->bb->tyenv,
112 shadowType(mce->bb->tyenv->types[orig]));
njn25e49d8e72002-09-23 09:36:25 +0000113 }
sewardj95448072004-11-22 20:19:51 +0000114 return mce->tmpMap[orig];
njn25e49d8e72002-09-23 09:36:25 +0000115}
116
sewardj95448072004-11-22 20:19:51 +0000117/* Allocate a new shadow for the given original tmp. This means any
118 previous shadow is abandoned. This is needed because it is
119 necessary to give a new value to a shadow once it has been tested
120 for undefinedness, but unfortunately IR's SSA property disallows
121 this. Instead we must abandon the old shadow, allocate a new one
122 and use that instead. */
123static void newShadowTmp ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000124{
sewardj95448072004-11-22 20:19:51 +0000125 tl_assert(orig < mce->n_originalTmps);
126 mce->tmpMap[orig]
127 = newIRTemp(mce->bb->tyenv,
128 shadowType(mce->bb->tyenv->types[orig]));
129}
130
131
132/*------------------------------------------------------------*/
133/*--- IRAtoms -- a subset of IRExprs ---*/
134/*------------------------------------------------------------*/
135
136/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
sewardj710d6c22005-03-20 18:55:15 +0000137 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
sewardj95448072004-11-22 20:19:51 +0000138 input, most of this code deals in atoms. Usefully, a value atom
139 always has a V-value which is also an atom: constants are shadowed
140 by constants, and temps are shadowed by the corresponding shadow
141 temporary. */
142
143typedef IRExpr IRAtom;
144
145/* (used for sanity checks only): is this an atom which looks
146 like it's from original code? */
147static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
148{
149 if (a1->tag == Iex_Const)
150 return True;
151 if (a1->tag == Iex_Tmp && a1->Iex.Tmp.tmp < mce->n_originalTmps)
152 return True;
153 return False;
154}
155
156/* (used for sanity checks only): is this an atom which looks
157 like it's from shadow code? */
158static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
159{
160 if (a1->tag == Iex_Const)
161 return True;
162 if (a1->tag == Iex_Tmp && a1->Iex.Tmp.tmp >= mce->n_originalTmps)
163 return True;
164 return False;
165}
166
167/* (used for sanity checks only): check that both args are atoms and
168 are identically-kinded. */
169static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
170{
171 if (a1->tag == Iex_Tmp && a1->tag == Iex_Tmp)
172 return True;
173 if (a1->tag == Iex_Const && a1->tag == Iex_Const)
174 return True;
175 return False;
176}
177
178
179/*------------------------------------------------------------*/
180/*--- Type management ---*/
181/*------------------------------------------------------------*/
182
183/* Shadow state is always accessed using integer types. This returns
184 an integer type with the same size (as per sizeofIRType) as the
185 given type. The only valid shadow types are Bit, I8, I16, I32,
sewardj3245c912004-12-10 14:58:26 +0000186 I64, V128. */
sewardj95448072004-11-22 20:19:51 +0000187
188static IRType shadowType ( IRType ty )
189{
190 switch (ty) {
191 case Ity_I1:
192 case Ity_I8:
193 case Ity_I16:
194 case Ity_I32:
sewardj6cf40ff2005-04-20 22:31:26 +0000195 case Ity_I64:
196 case Ity_I128: return ty;
sewardj3245c912004-12-10 14:58:26 +0000197 case Ity_F32: return Ity_I32;
198 case Ity_F64: return Ity_I64;
199 case Ity_V128: return Ity_V128;
sewardj95448072004-11-22 20:19:51 +0000200 default: ppIRType(ty);
201 VG_(tool_panic)("memcheck:shadowType");
202 }
203}
204
205/* Produce a 'defined' value of the given shadow type. Should only be
206 supplied shadow types (Bit/I8/I16/I32/UI64). */
207static IRExpr* definedOfType ( IRType ty ) {
208 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +0000209 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
210 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
211 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
212 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
213 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
214 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
sewardj95448072004-11-22 20:19:51 +0000215 default: VG_(tool_panic)("memcheck:definedOfType");
njn25e49d8e72002-09-23 09:36:25 +0000216 }
217}
218
219
sewardj95448072004-11-22 20:19:51 +0000220/*------------------------------------------------------------*/
221/*--- Constructing IR fragments ---*/
222/*------------------------------------------------------------*/
223
224/* assign value to tmp */
225#define assign(_bb,_tmp,_expr) \
226 addStmtToIRBB((_bb), IRStmt_Tmp((_tmp),(_expr)))
227
228/* add stmt to a bb */
229#define stmt(_bb,_stmt) \
230 addStmtToIRBB((_bb), (_stmt))
231
232/* build various kinds of expressions */
233#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
234#define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
235#define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
236#define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
237#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
238#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
sewardj170ee212004-12-10 18:57:51 +0000239#define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
sewardj95448072004-11-22 20:19:51 +0000240#define mkexpr(_tmp) IRExpr_Tmp((_tmp))
241
242/* bind the given expression to a new temporary, and return the
243 temporary. This effectively converts an arbitrary expression into
244 an atom. */
245static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) {
246 IRTemp t = newIRTemp(mce->bb->tyenv, ty);
247 assign(mce->bb, t, e);
248 return mkexpr(t);
249}
250
251
252/*------------------------------------------------------------*/
253/*--- Constructing definedness primitive ops ---*/
254/*------------------------------------------------------------*/
255
256/* --------- Defined-if-either-defined --------- */
257
258static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
259 tl_assert(isShadowAtom(mce,a1));
260 tl_assert(isShadowAtom(mce,a2));
261 return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2));
262}
263
264static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
265 tl_assert(isShadowAtom(mce,a1));
266 tl_assert(isShadowAtom(mce,a2));
267 return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2));
268}
269
270static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
271 tl_assert(isShadowAtom(mce,a1));
272 tl_assert(isShadowAtom(mce,a2));
273 return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2));
274}
275
sewardj7010f6e2004-12-10 13:35:22 +0000276static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
277 tl_assert(isShadowAtom(mce,a1));
278 tl_assert(isShadowAtom(mce,a2));
279 return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2));
280}
281
sewardj20d38f22005-02-07 23:50:18 +0000282static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj170ee212004-12-10 18:57:51 +0000283 tl_assert(isShadowAtom(mce,a1));
284 tl_assert(isShadowAtom(mce,a2));
sewardj20d38f22005-02-07 23:50:18 +0000285 return assignNew(mce, Ity_V128, binop(Iop_AndV128, a1, a2));
sewardj170ee212004-12-10 18:57:51 +0000286}
287
sewardj95448072004-11-22 20:19:51 +0000288/* --------- Undefined-if-either-undefined --------- */
289
290static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
291 tl_assert(isShadowAtom(mce,a1));
292 tl_assert(isShadowAtom(mce,a2));
293 return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2));
294}
295
296static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
297 tl_assert(isShadowAtom(mce,a1));
298 tl_assert(isShadowAtom(mce,a2));
299 return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2));
300}
301
302static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
303 tl_assert(isShadowAtom(mce,a1));
304 tl_assert(isShadowAtom(mce,a2));
305 return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2));
306}
307
308static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
309 tl_assert(isShadowAtom(mce,a1));
310 tl_assert(isShadowAtom(mce,a2));
311 return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2));
312}
313
sewardj20d38f22005-02-07 23:50:18 +0000314static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj3245c912004-12-10 14:58:26 +0000315 tl_assert(isShadowAtom(mce,a1));
316 tl_assert(isShadowAtom(mce,a2));
sewardj20d38f22005-02-07 23:50:18 +0000317 return assignNew(mce, Ity_V128, binop(Iop_OrV128, a1, a2));
sewardj3245c912004-12-10 14:58:26 +0000318}
319
sewardje50a1b12004-12-17 01:24:54 +0000320static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
sewardj95448072004-11-22 20:19:51 +0000321 switch (vty) {
sewardje50a1b12004-12-17 01:24:54 +0000322 case Ity_I8: return mkUifU8(mce, a1, a2);
sewardja1d93302004-12-12 16:45:06 +0000323 case Ity_I16: return mkUifU16(mce, a1, a2);
324 case Ity_I32: return mkUifU32(mce, a1, a2);
325 case Ity_I64: return mkUifU64(mce, a1, a2);
sewardj20d38f22005-02-07 23:50:18 +0000326 case Ity_V128: return mkUifUV128(mce, a1, a2);
sewardj95448072004-11-22 20:19:51 +0000327 default:
328 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
329 VG_(tool_panic)("memcheck:mkUifU");
njn25e49d8e72002-09-23 09:36:25 +0000330 }
331}
332
sewardj95448072004-11-22 20:19:51 +0000333/* --------- The Left-family of operations. --------- */
njn25e49d8e72002-09-23 09:36:25 +0000334
sewardj95448072004-11-22 20:19:51 +0000335static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
336 tl_assert(isShadowAtom(mce,a1));
337 /* It's safe to duplicate a1 since it's only an atom */
338 return assignNew(mce, Ity_I8,
339 binop(Iop_Or8, a1,
340 assignNew(mce, Ity_I8,
sewardj37c31cc2005-04-26 23:49:24 +0000341 unop(Iop_Neg8, a1))));
sewardj95448072004-11-22 20:19:51 +0000342}
343
344static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
345 tl_assert(isShadowAtom(mce,a1));
346 /* It's safe to duplicate a1 since it's only an atom */
347 return assignNew(mce, Ity_I16,
348 binop(Iop_Or16, a1,
349 assignNew(mce, Ity_I16,
sewardj37c31cc2005-04-26 23:49:24 +0000350 unop(Iop_Neg16, a1))));
sewardj95448072004-11-22 20:19:51 +0000351}
352
353static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
354 tl_assert(isShadowAtom(mce,a1));
355 /* It's safe to duplicate a1 since it's only an atom */
356 return assignNew(mce, Ity_I32,
357 binop(Iop_Or32, a1,
358 assignNew(mce, Ity_I32,
sewardj37c31cc2005-04-26 23:49:24 +0000359 unop(Iop_Neg32, a1))));
sewardj95448072004-11-22 20:19:51 +0000360}
361
sewardj681be302005-01-15 20:43:58 +0000362static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
363 tl_assert(isShadowAtom(mce,a1));
364 /* It's safe to duplicate a1 since it's only an atom */
365 return assignNew(mce, Ity_I64,
366 binop(Iop_Or64, a1,
367 assignNew(mce, Ity_I64,
sewardj37c31cc2005-04-26 23:49:24 +0000368 unop(Iop_Neg64, a1))));
sewardj681be302005-01-15 20:43:58 +0000369}
370
sewardj95448072004-11-22 20:19:51 +0000371/* --------- 'Improvement' functions for AND/OR. --------- */
372
373/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
374 defined (0); all other -> undefined (1).
375*/
376static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
njn25e49d8e72002-09-23 09:36:25 +0000377{
sewardj95448072004-11-22 20:19:51 +0000378 tl_assert(isOriginalAtom(mce, data));
379 tl_assert(isShadowAtom(mce, vbits));
380 tl_assert(sameKindedAtoms(data, vbits));
381 return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits));
382}
njn25e49d8e72002-09-23 09:36:25 +0000383
sewardj95448072004-11-22 20:19:51 +0000384static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
385{
386 tl_assert(isOriginalAtom(mce, data));
387 tl_assert(isShadowAtom(mce, vbits));
388 tl_assert(sameKindedAtoms(data, vbits));
389 return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits));
390}
njn25e49d8e72002-09-23 09:36:25 +0000391
sewardj95448072004-11-22 20:19:51 +0000392static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
393{
394 tl_assert(isOriginalAtom(mce, data));
395 tl_assert(isShadowAtom(mce, vbits));
396 tl_assert(sameKindedAtoms(data, vbits));
397 return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits));
398}
njn25e49d8e72002-09-23 09:36:25 +0000399
sewardj7010f6e2004-12-10 13:35:22 +0000400static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
401{
402 tl_assert(isOriginalAtom(mce, data));
403 tl_assert(isShadowAtom(mce, vbits));
404 tl_assert(sameKindedAtoms(data, vbits));
405 return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits));
406}
407
sewardj20d38f22005-02-07 23:50:18 +0000408static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000409{
410 tl_assert(isOriginalAtom(mce, data));
411 tl_assert(isShadowAtom(mce, vbits));
412 tl_assert(sameKindedAtoms(data, vbits));
sewardj20d38f22005-02-07 23:50:18 +0000413 return assignNew(mce, Ity_V128, binop(Iop_OrV128, data, vbits));
sewardj170ee212004-12-10 18:57:51 +0000414}
415
sewardj95448072004-11-22 20:19:51 +0000416/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
417 defined (0); all other -> undefined (1).
418*/
419static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
420{
421 tl_assert(isOriginalAtom(mce, data));
422 tl_assert(isShadowAtom(mce, vbits));
423 tl_assert(sameKindedAtoms(data, vbits));
424 return assignNew(
425 mce, Ity_I8,
426 binop(Iop_Or8,
427 assignNew(mce, Ity_I8, unop(Iop_Not8, data)),
428 vbits) );
429}
njn25e49d8e72002-09-23 09:36:25 +0000430
sewardj95448072004-11-22 20:19:51 +0000431static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
432{
433 tl_assert(isOriginalAtom(mce, data));
434 tl_assert(isShadowAtom(mce, vbits));
435 tl_assert(sameKindedAtoms(data, vbits));
436 return assignNew(
437 mce, Ity_I16,
438 binop(Iop_Or16,
439 assignNew(mce, Ity_I16, unop(Iop_Not16, data)),
440 vbits) );
441}
njn25e49d8e72002-09-23 09:36:25 +0000442
sewardj95448072004-11-22 20:19:51 +0000443static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
444{
445 tl_assert(isOriginalAtom(mce, data));
446 tl_assert(isShadowAtom(mce, vbits));
447 tl_assert(sameKindedAtoms(data, vbits));
448 return assignNew(
449 mce, Ity_I32,
450 binop(Iop_Or32,
451 assignNew(mce, Ity_I32, unop(Iop_Not32, data)),
452 vbits) );
453}
454
sewardj7010f6e2004-12-10 13:35:22 +0000455static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
456{
457 tl_assert(isOriginalAtom(mce, data));
458 tl_assert(isShadowAtom(mce, vbits));
459 tl_assert(sameKindedAtoms(data, vbits));
460 return assignNew(
461 mce, Ity_I64,
462 binop(Iop_Or64,
463 assignNew(mce, Ity_I64, unop(Iop_Not64, data)),
464 vbits) );
465}
466
sewardj20d38f22005-02-07 23:50:18 +0000467static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000468{
469 tl_assert(isOriginalAtom(mce, data));
470 tl_assert(isShadowAtom(mce, vbits));
471 tl_assert(sameKindedAtoms(data, vbits));
472 return assignNew(
473 mce, Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +0000474 binop(Iop_OrV128,
475 assignNew(mce, Ity_V128, unop(Iop_NotV128, data)),
sewardj170ee212004-12-10 18:57:51 +0000476 vbits) );
477}
478
sewardj95448072004-11-22 20:19:51 +0000479/* --------- Pessimising casts. --------- */
480
481static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
482{
sewardj7cf97ee2004-11-28 14:25:01 +0000483 IRType ty;
484 IRAtom* tmp1;
sewardj95448072004-11-22 20:19:51 +0000485 /* Note, dst_ty is a shadow type, not an original type. */
486 /* First of all, collapse vbits down to a single bit. */
487 tl_assert(isShadowAtom(mce,vbits));
sewardj7cf97ee2004-11-28 14:25:01 +0000488 ty = typeOfIRExpr(mce->bb->tyenv, vbits);
489 tmp1 = NULL;
sewardj95448072004-11-22 20:19:51 +0000490 switch (ty) {
491 case Ity_I1:
492 tmp1 = vbits;
njn25e49d8e72002-09-23 09:36:25 +0000493 break;
sewardj95448072004-11-22 20:19:51 +0000494 case Ity_I8:
sewardj37c31cc2005-04-26 23:49:24 +0000495 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
sewardj95448072004-11-22 20:19:51 +0000496 break;
497 case Ity_I16:
sewardj37c31cc2005-04-26 23:49:24 +0000498 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
sewardj95448072004-11-22 20:19:51 +0000499 break;
500 case Ity_I32:
sewardj37c31cc2005-04-26 23:49:24 +0000501 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
sewardj95448072004-11-22 20:19:51 +0000502 break;
503 case Ity_I64:
sewardj37c31cc2005-04-26 23:49:24 +0000504 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
sewardj95448072004-11-22 20:19:51 +0000505 break;
sewardj69a13322005-04-23 01:14:51 +0000506 case Ity_I128: {
507 /* Gah. Chop it in half, OR the halves together, and compare
508 that with zero. */
509 IRAtom* tmp2 = assignNew(mce, Ity_I64, unop(Iop_128HIto64, vbits));
510 IRAtom* tmp3 = assignNew(mce, Ity_I64, unop(Iop_128to64, vbits));
511 IRAtom* tmp4 = assignNew(mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
512 tmp1 = assignNew(mce, Ity_I1,
sewardj37c31cc2005-04-26 23:49:24 +0000513 unop(Iop_CmpNEZ64, tmp4));
sewardj69a13322005-04-23 01:14:51 +0000514 break;
515 }
sewardj95448072004-11-22 20:19:51 +0000516 default:
sewardj69a13322005-04-23 01:14:51 +0000517 ppIRType(ty);
sewardj95448072004-11-22 20:19:51 +0000518 VG_(tool_panic)("mkPCastTo(1)");
519 }
520 tl_assert(tmp1);
521 /* Now widen up to the dst type. */
522 switch (dst_ty) {
523 case Ity_I1:
524 return tmp1;
525 case Ity_I8:
526 return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1));
527 case Ity_I16:
528 return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1));
529 case Ity_I32:
530 return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1));
531 case Ity_I64:
532 return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000533 case Ity_V128:
534 tmp1 = assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardj20d38f22005-02-07 23:50:18 +0000535 tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000536 return tmp1;
sewardj69a13322005-04-23 01:14:51 +0000537 case Ity_I128:
538 tmp1 = assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
539 tmp1 = assignNew(mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
540 return tmp1;
sewardj95448072004-11-22 20:19:51 +0000541 default:
542 ppIRType(dst_ty);
543 VG_(tool_panic)("mkPCastTo(2)");
544 }
545}
546
sewardjd5204dc2004-12-31 01:16:11 +0000547/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
548/*
549 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
550 PCasting to Ity_U1. However, sometimes it is necessary to be more
551 accurate. The insight is that the result is defined if two
552 corresponding bits can be found, one from each argument, so that
553 both bits are defined but are different -- that makes EQ say "No"
554 and NE say "Yes". Hence, we compute an improvement term and DifD
555 it onto the "normal" (UifU) result.
556
557 The result is:
558
559 PCastTo<1> (
560 PCastTo<sz>( UifU<sz>(vxx, vyy) ) -- naive version
561 `DifD<sz>`
562 PCastTo<sz>( CmpEQ<sz>( vec, 1....1 ) ) -- improvement term
563 )
564 where
565 vec contains 0 (defined) bits where the corresponding arg bits
566 are defined but different, and 1 bits otherwise:
567
568 vec = UifU<sz>( vxx, vyy, Not<sz>(Xor<sz>( xx, yy )) )
569*/
570static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
571 IRType ty,
572 IRAtom* vxx, IRAtom* vyy,
573 IRAtom* xx, IRAtom* yy )
574{
575 IRAtom *naive, *vec, *vec_cmpd, *improved, *final_cast, *top;
576 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP;
577
578 tl_assert(isShadowAtom(mce,vxx));
579 tl_assert(isShadowAtom(mce,vyy));
580 tl_assert(isOriginalAtom(mce,xx));
581 tl_assert(isOriginalAtom(mce,yy));
582 tl_assert(sameKindedAtoms(vxx,xx));
583 tl_assert(sameKindedAtoms(vyy,yy));
584
585 switch (ty) {
586 case Ity_I32:
587 opDIFD = Iop_And32;
588 opUIFU = Iop_Or32;
589 opNOT = Iop_Not32;
590 opXOR = Iop_Xor32;
591 opCMP = Iop_CmpEQ32;
592 top = mkU32(0xFFFFFFFF);
593 break;
tomcd986332005-04-26 07:44:48 +0000594 case Ity_I64:
595 opDIFD = Iop_And64;
596 opUIFU = Iop_Or64;
597 opNOT = Iop_Not64;
598 opXOR = Iop_Xor64;
599 opCMP = Iop_CmpEQ64;
sewardj37c31cc2005-04-26 23:49:24 +0000600 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
tomcd986332005-04-26 07:44:48 +0000601 break;
sewardjd5204dc2004-12-31 01:16:11 +0000602 default:
603 VG_(tool_panic)("expensiveCmpEQorNE");
604 }
605
606 naive
607 = mkPCastTo(mce,ty, assignNew(mce, ty, binop(opUIFU, vxx, vyy)));
608
609 vec
610 = assignNew(
611 mce,ty,
612 binop( opUIFU,
613 assignNew(mce,ty, binop(opUIFU, vxx, vyy)),
614 assignNew(
615 mce,ty,
616 unop( opNOT,
617 assignNew(mce,ty, binop(opXOR, xx, yy))))));
618
619 vec_cmpd
620 = mkPCastTo( mce,ty, assignNew(mce,Ity_I1, binop(opCMP, vec, top)));
621
622 improved
623 = assignNew( mce,ty, binop(opDIFD, naive, vec_cmpd) );
624
625 final_cast
626 = mkPCastTo( mce, Ity_I1, improved );
627
628 return final_cast;
629}
630
sewardj95448072004-11-22 20:19:51 +0000631
632/*------------------------------------------------------------*/
633/*--- Emit a test and complaint if something is undefined. ---*/
634/*------------------------------------------------------------*/
635
636/* Set the annotations on a dirty helper to indicate that the stack
637 pointer and instruction pointers might be read. This is the
638 behaviour of all 'emit-a-complaint' style functions we might
639 call. */
640
641static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
642 di->nFxState = 2;
643 di->fxState[0].fx = Ifx_Read;
644 di->fxState[0].offset = mce->layout->offset_SP;
645 di->fxState[0].size = mce->layout->sizeof_SP;
646 di->fxState[1].fx = Ifx_Read;
647 di->fxState[1].offset = mce->layout->offset_IP;
648 di->fxState[1].size = mce->layout->sizeof_IP;
649}
650
651
652/* Check the supplied **original** atom for undefinedness, and emit a
653 complaint if so. Once that happens, mark it as defined. This is
654 possible because the atom is either a tmp or literal. If it's a
655 tmp, it will be shadowed by a tmp, and so we can set the shadow to
656 be defined. In fact as mentioned above, we will have to allocate a
657 new tmp to carry the new 'defined' shadow value, and update the
658 original->tmp mapping accordingly; we cannot simply assign a new
659 value to an existing shadow tmp as this breaks SSAness -- resulting
660 in the post-instrumentation sanity checker spluttering in disapproval.
661*/
662static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
663{
sewardj7cf97ee2004-11-28 14:25:01 +0000664 IRAtom* vatom;
665 IRType ty;
666 Int sz;
667 IRDirty* di;
668 IRAtom* cond;
669
sewardj95448072004-11-22 20:19:51 +0000670 /* Since the original expression is atomic, there's no duplicated
671 work generated by making multiple V-expressions for it. So we
672 don't really care about the possibility that someone else may
673 also create a V-interpretion for it. */
674 tl_assert(isOriginalAtom(mce, atom));
sewardj7cf97ee2004-11-28 14:25:01 +0000675 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +0000676 tl_assert(isShadowAtom(mce, vatom));
677 tl_assert(sameKindedAtoms(atom, vatom));
678
sewardj7cf97ee2004-11-28 14:25:01 +0000679 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +0000680
681 /* sz is only used for constructing the error message */
sewardj7cf97ee2004-11-28 14:25:01 +0000682 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +0000683
sewardj7cf97ee2004-11-28 14:25:01 +0000684 cond = mkPCastTo( mce, Ity_I1, vatom );
sewardj95448072004-11-22 20:19:51 +0000685 /* cond will be 0 if all defined, and 1 if any not defined. */
686
sewardj95448072004-11-22 20:19:51 +0000687 switch (sz) {
688 case 0:
689 di = unsafeIRDirty_0_N( 0/*regparms*/,
690 "MC_(helperc_value_check0_fail)",
691 &MC_(helperc_value_check0_fail),
692 mkIRExprVec_0()
693 );
694 break;
695 case 1:
696 di = unsafeIRDirty_0_N( 0/*regparms*/,
697 "MC_(helperc_value_check1_fail)",
698 &MC_(helperc_value_check1_fail),
699 mkIRExprVec_0()
700 );
701 break;
702 case 4:
703 di = unsafeIRDirty_0_N( 0/*regparms*/,
704 "MC_(helperc_value_check4_fail)",
705 &MC_(helperc_value_check4_fail),
706 mkIRExprVec_0()
707 );
708 break;
sewardj11bcc4e2005-04-23 22:38:38 +0000709 case 8:
710 di = unsafeIRDirty_0_N( 0/*regparms*/,
711 "MC_(helperc_value_check8_fail)",
712 &MC_(helperc_value_check8_fail),
713 mkIRExprVec_0()
714 );
715 break;
sewardj95448072004-11-22 20:19:51 +0000716 default:
717 di = unsafeIRDirty_0_N( 1/*regparms*/,
718 "MC_(helperc_complain_undef)",
719 &MC_(helperc_complain_undef),
720 mkIRExprVec_1( mkIRExpr_HWord( sz ))
721 );
722 break;
723 }
724 di->guard = cond;
725 setHelperAnns( mce, di );
726 stmt( mce->bb, IRStmt_Dirty(di));
727
728 /* Set the shadow tmp to be defined. First, update the
729 orig->shadow tmp mapping to reflect the fact that this shadow is
730 getting a new value. */
sewardj710d6c22005-03-20 18:55:15 +0000731 tl_assert(isIRAtom(vatom));
sewardj95448072004-11-22 20:19:51 +0000732 /* sameKindedAtoms ... */
733 if (vatom->tag == Iex_Tmp) {
734 tl_assert(atom->tag == Iex_Tmp);
735 newShadowTmp(mce, atom->Iex.Tmp.tmp);
736 assign(mce->bb, findShadowTmp(mce, atom->Iex.Tmp.tmp),
737 definedOfType(ty));
738 }
739}
740
741
742/*------------------------------------------------------------*/
743/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
744/*------------------------------------------------------------*/
745
746/* Examine the always-defined sections declared in layout to see if
747 the (offset,size) section is within one. Note, is is an error to
748 partially fall into such a region: (offset,size) should either be
749 completely in such a region or completely not-in such a region.
750*/
751static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
752{
753 Int minoffD, maxoffD, i;
754 Int minoff = offset;
755 Int maxoff = minoff + size - 1;
756 tl_assert((minoff & ~0xFFFF) == 0);
757 tl_assert((maxoff & ~0xFFFF) == 0);
758
759 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
760 minoffD = mce->layout->alwaysDefd[i].offset;
761 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
762 tl_assert((minoffD & ~0xFFFF) == 0);
763 tl_assert((maxoffD & ~0xFFFF) == 0);
764
765 if (maxoff < minoffD || maxoffD < minoff)
766 continue; /* no overlap */
767 if (minoff >= minoffD && maxoff <= maxoffD)
768 return True; /* completely contained in an always-defd section */
769
770 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
771 }
772 return False; /* could not find any containing section */
773}
774
775
776/* Generate into bb suitable actions to shadow this Put. If the state
777 slice is marked 'always defined', do nothing. Otherwise, write the
778 supplied V bits to the shadow state. We can pass in either an
779 original atom or a V-atom, but not both. In the former case the
780 relevant V-bits are then generated from the original.
781*/
782static
783void do_shadow_PUT ( MCEnv* mce, Int offset,
784 IRAtom* atom, IRAtom* vatom )
785{
sewardj7cf97ee2004-11-28 14:25:01 +0000786 IRType ty;
sewardj95448072004-11-22 20:19:51 +0000787 if (atom) {
788 tl_assert(!vatom);
789 tl_assert(isOriginalAtom(mce, atom));
790 vatom = expr2vbits( mce, atom );
791 } else {
792 tl_assert(vatom);
793 tl_assert(isShadowAtom(mce, vatom));
794 }
795
sewardj7cf97ee2004-11-28 14:25:01 +0000796 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +0000797 tl_assert(ty != Ity_I1);
798 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
799 /* later: no ... */
800 /* emit code to emit a complaint if any of the vbits are 1. */
801 /* complainIfUndefined(mce, atom); */
802 } else {
803 /* Do a plain shadow Put. */
804 stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
805 }
806}
807
808
809/* Return an expression which contains the V bits corresponding to the
810 given GETI (passed in in pieces).
811*/
812static
813void do_shadow_PUTI ( MCEnv* mce,
814 IRArray* descr, IRAtom* ix, Int bias, IRAtom* atom )
815{
sewardj7cf97ee2004-11-28 14:25:01 +0000816 IRAtom* vatom;
817 IRType ty, tyS;
818 Int arrSize;;
819
sewardj95448072004-11-22 20:19:51 +0000820 tl_assert(isOriginalAtom(mce,atom));
sewardj7cf97ee2004-11-28 14:25:01 +0000821 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +0000822 tl_assert(sameKindedAtoms(atom, vatom));
sewardj7cf97ee2004-11-28 14:25:01 +0000823 ty = descr->elemTy;
824 tyS = shadowType(ty);
825 arrSize = descr->nElems * sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +0000826 tl_assert(ty != Ity_I1);
827 tl_assert(isOriginalAtom(mce,ix));
828 complainIfUndefined(mce,ix);
829 if (isAlwaysDefd(mce, descr->base, arrSize)) {
830 /* later: no ... */
831 /* emit code to emit a complaint if any of the vbits are 1. */
832 /* complainIfUndefined(mce, atom); */
833 } else {
834 /* Do a cloned version of the Put that refers to the shadow
835 area. */
836 IRArray* new_descr
837 = mkIRArray( descr->base + mce->layout->total_sizeB,
838 tyS, descr->nElems);
839 stmt( mce->bb, IRStmt_PutI( new_descr, ix, bias, vatom ));
840 }
841}
842
843
844/* Return an expression which contains the V bits corresponding to the
845 given GET (passed in in pieces).
846*/
847static
848IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
849{
850 IRType tyS = shadowType(ty);
851 tl_assert(ty != Ity_I1);
852 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
853 /* Always defined, return all zeroes of the relevant type */
854 return definedOfType(tyS);
855 } else {
856 /* return a cloned version of the Get that refers to the shadow
857 area. */
858 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
859 }
860}
861
862
863/* Return an expression which contains the V bits corresponding to the
864 given GETI (passed in in pieces).
865*/
866static
867IRExpr* shadow_GETI ( MCEnv* mce, IRArray* descr, IRAtom* ix, Int bias )
868{
869 IRType ty = descr->elemTy;
870 IRType tyS = shadowType(ty);
871 Int arrSize = descr->nElems * sizeofIRType(ty);
872 tl_assert(ty != Ity_I1);
873 tl_assert(isOriginalAtom(mce,ix));
874 complainIfUndefined(mce,ix);
875 if (isAlwaysDefd(mce, descr->base, arrSize)) {
876 /* Always defined, return all zeroes of the relevant type */
877 return definedOfType(tyS);
878 } else {
879 /* return a cloned version of the Get that refers to the shadow
880 area. */
881 IRArray* new_descr
882 = mkIRArray( descr->base + mce->layout->total_sizeB,
883 tyS, descr->nElems);
884 return IRExpr_GetI( new_descr, ix, bias );
885 }
886}
887
888
889/*------------------------------------------------------------*/
890/*--- Generating approximations for unknown operations, ---*/
891/*--- using lazy-propagate semantics ---*/
892/*------------------------------------------------------------*/
893
894/* Lazy propagation of undefinedness from two values, resulting in the
895 specified shadow type.
896*/
897static
898IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
899{
sewardj95448072004-11-22 20:19:51 +0000900 IRAtom* at;
sewardj37c31cc2005-04-26 23:49:24 +0000901 IRType t1 = typeOfIRExpr(mce->bb->tyenv, va1);
902 IRType t2 = typeOfIRExpr(mce->bb->tyenv, va2);
sewardj95448072004-11-22 20:19:51 +0000903 tl_assert(isShadowAtom(mce,va1));
904 tl_assert(isShadowAtom(mce,va2));
sewardj37c31cc2005-04-26 23:49:24 +0000905
906 /* The general case is inefficient because PCast is an expensive
907 operation. Here are some special cases which use PCast only
908 once rather than twice. */
909
910 /* I64 x I64 -> I64 */
911 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
912 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
913 at = mkUifU(mce, Ity_I64, va1, va2);
914 at = mkPCastTo(mce, Ity_I64, at);
915 return at;
916 }
917
918 /* I64 x I64 -> I32 */
919 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
920 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
921 at = mkUifU(mce, Ity_I64, va1, va2);
922 at = mkPCastTo(mce, Ity_I32, at);
923 return at;
924 }
925
926 if (0) {
927 VG_(printf)("mkLazy2 ");
928 ppIRType(t1);
929 VG_(printf)("_");
930 ppIRType(t2);
931 VG_(printf)("_");
932 ppIRType(finalVty);
933 VG_(printf)("\n");
934 }
935
936 /* General case: force everything via 32-bit intermediaries. */
sewardj95448072004-11-22 20:19:51 +0000937 at = mkPCastTo(mce, Ity_I32, va1);
938 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
939 at = mkPCastTo(mce, finalVty, at);
940 return at;
941}
942
943
944/* Do the lazy propagation game from a null-terminated vector of
945 atoms. This is presumably the arguments to a helper call, so the
946 IRCallee info is also supplied in order that we can know which
947 arguments should be ignored (via the .mcx_mask field).
948*/
949static
950IRAtom* mkLazyN ( MCEnv* mce,
951 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
952{
953 Int i;
954 IRAtom* here;
955 IRAtom* curr = definedOfType(Ity_I32);
956 for (i = 0; exprvec[i]; i++) {
957 tl_assert(i < 32);
958 tl_assert(isOriginalAtom(mce, exprvec[i]));
959 /* Only take notice of this arg if the callee's mc-exclusion
960 mask does not say it is to be excluded. */
961 if (cee->mcx_mask & (1<<i)) {
962 /* the arg is to be excluded from definedness checking. Do
963 nothing. */
964 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
965 } else {
966 /* calculate the arg's definedness, and pessimistically merge
967 it in. */
968 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) );
969 curr = mkUifU32(mce, here, curr);
970 }
971 }
972 return mkPCastTo(mce, finalVtype, curr );
973}
974
975
976/*------------------------------------------------------------*/
977/*--- Generating expensive sequences for exact carry-chain ---*/
978/*--- propagation in add/sub and related operations. ---*/
979/*------------------------------------------------------------*/
980
981static
sewardjd5204dc2004-12-31 01:16:11 +0000982IRAtom* expensiveAddSub ( MCEnv* mce,
983 Bool add,
984 IRType ty,
985 IRAtom* qaa, IRAtom* qbb,
986 IRAtom* aa, IRAtom* bb )
sewardj95448072004-11-22 20:19:51 +0000987{
sewardj7cf97ee2004-11-28 14:25:01 +0000988 IRAtom *a_min, *b_min, *a_max, *b_max;
sewardjd5204dc2004-12-31 01:16:11 +0000989 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
sewardj7cf97ee2004-11-28 14:25:01 +0000990
sewardj95448072004-11-22 20:19:51 +0000991 tl_assert(isShadowAtom(mce,qaa));
992 tl_assert(isShadowAtom(mce,qbb));
993 tl_assert(isOriginalAtom(mce,aa));
994 tl_assert(isOriginalAtom(mce,bb));
995 tl_assert(sameKindedAtoms(qaa,aa));
996 tl_assert(sameKindedAtoms(qbb,bb));
997
sewardjd5204dc2004-12-31 01:16:11 +0000998 switch (ty) {
999 case Ity_I32:
1000 opAND = Iop_And32;
1001 opOR = Iop_Or32;
1002 opXOR = Iop_Xor32;
1003 opNOT = Iop_Not32;
1004 opADD = Iop_Add32;
1005 opSUB = Iop_Sub32;
1006 break;
1007 default:
1008 VG_(tool_panic)("expensiveAddSub");
1009 }
sewardj95448072004-11-22 20:19:51 +00001010
1011 // a_min = aa & ~qaa
1012 a_min = assignNew(mce,ty,
1013 binop(opAND, aa,
1014 assignNew(mce,ty, unop(opNOT, qaa))));
1015
1016 // b_min = bb & ~qbb
1017 b_min = assignNew(mce,ty,
1018 binop(opAND, bb,
1019 assignNew(mce,ty, unop(opNOT, qbb))));
1020
1021 // a_max = aa | qaa
1022 a_max = assignNew(mce,ty, binop(opOR, aa, qaa));
1023
1024 // b_max = bb | qbb
1025 b_max = assignNew(mce,ty, binop(opOR, bb, qbb));
1026
sewardjd5204dc2004-12-31 01:16:11 +00001027 if (add) {
1028 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1029 return
1030 assignNew(mce,ty,
1031 binop( opOR,
1032 assignNew(mce,ty, binop(opOR, qaa, qbb)),
1033 assignNew(mce,ty,
1034 binop( opXOR,
1035 assignNew(mce,ty, binop(opADD, a_min, b_min)),
1036 assignNew(mce,ty, binop(opADD, a_max, b_max))
1037 )
sewardj95448072004-11-22 20:19:51 +00001038 )
sewardjd5204dc2004-12-31 01:16:11 +00001039 )
1040 );
1041 } else {
1042 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1043 return
1044 assignNew(mce,ty,
1045 binop( opOR,
1046 assignNew(mce,ty, binop(opOR, qaa, qbb)),
1047 assignNew(mce,ty,
1048 binop( opXOR,
1049 assignNew(mce,ty, binop(opSUB, a_min, b_max)),
1050 assignNew(mce,ty, binop(opSUB, a_max, b_min))
1051 )
1052 )
1053 )
1054 );
1055 }
1056
sewardj95448072004-11-22 20:19:51 +00001057}
1058
1059
1060/*------------------------------------------------------------*/
sewardj3245c912004-12-10 14:58:26 +00001061/*--- Helpers for dealing with vector primops. ---*/
1062/*------------------------------------------------------------*/
1063
sewardja1d93302004-12-12 16:45:06 +00001064/* Vector pessimisation -- pessimise within each lane individually. */
1065
1066static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1067{
1068 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1069}
1070
1071static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1072{
1073 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1074}
1075
1076static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1077{
1078 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1079}
1080
1081static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1082{
1083 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1084}
1085
sewardjacd2e912005-01-13 19:17:06 +00001086static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1087{
1088 return assignNew(mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
1089}
1090
1091static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1092{
1093 return assignNew(mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
1094}
1095
1096static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1097{
1098 return assignNew(mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
1099}
1100
sewardja1d93302004-12-12 16:45:06 +00001101
sewardj3245c912004-12-10 14:58:26 +00001102/* Here's a simple scheme capable of handling ops derived from SSE1
1103 code and while only generating ops that can be efficiently
1104 implemented in SSE1. */
1105
1106/* All-lanes versions are straightforward:
1107
sewardj20d38f22005-02-07 23:50:18 +00001108 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
sewardj3245c912004-12-10 14:58:26 +00001109
1110 unary32Fx4(x,y) ==> PCast32x4(x#)
1111
1112 Lowest-lane-only versions are more complex:
1113
sewardj20d38f22005-02-07 23:50:18 +00001114 binary32F0x4(x,y) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001115 x#,
sewardj20d38f22005-02-07 23:50:18 +00001116 PCast32(V128to32(UifUV128(x#,y#)))
sewardj3245c912004-12-10 14:58:26 +00001117 )
1118
1119 This is perhaps not so obvious. In particular, it's faster to
sewardj20d38f22005-02-07 23:50:18 +00001120 do a V128-bit UifU and then take the bottom 32 bits than the more
sewardj3245c912004-12-10 14:58:26 +00001121 obvious scheme of taking the bottom 32 bits of each operand
1122 and doing a 32-bit UifU. Basically since UifU is fast and
1123 chopping lanes off vector values is slow.
1124
1125 Finally:
1126
sewardj20d38f22005-02-07 23:50:18 +00001127 unary32F0x4(x) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001128 x#,
sewardj20d38f22005-02-07 23:50:18 +00001129 PCast32(V128to32(x#))
sewardj3245c912004-12-10 14:58:26 +00001130 )
1131
1132 Where:
1133
1134 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
1135 PCast32x4(v#) = CmpNEZ32x4(v#)
1136*/
1137
1138static
1139IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1140{
1141 IRAtom* at;
1142 tl_assert(isShadowAtom(mce, vatomX));
1143 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001144 at = mkUifUV128(mce, vatomX, vatomY);
sewardja1d93302004-12-12 16:45:06 +00001145 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at));
sewardj3245c912004-12-10 14:58:26 +00001146 return at;
1147}
1148
1149static
1150IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1151{
1152 IRAtom* at;
1153 tl_assert(isShadowAtom(mce, vatomX));
sewardja1d93302004-12-12 16:45:06 +00001154 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001155 return at;
1156}
1157
1158static
1159IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1160{
1161 IRAtom* at;
1162 tl_assert(isShadowAtom(mce, vatomX));
1163 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001164 at = mkUifUV128(mce, vatomX, vatomY);
1165 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, at));
sewardj3245c912004-12-10 14:58:26 +00001166 at = mkPCastTo(mce, Ity_I32, at);
sewardj20d38f22005-02-07 23:50:18 +00001167 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001168 return at;
1169}
1170
1171static
1172IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1173{
1174 IRAtom* at;
1175 tl_assert(isShadowAtom(mce, vatomX));
sewardj20d38f22005-02-07 23:50:18 +00001176 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001177 at = mkPCastTo(mce, Ity_I32, at);
sewardj20d38f22005-02-07 23:50:18 +00001178 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001179 return at;
1180}
1181
sewardj0b070592004-12-10 21:44:22 +00001182/* --- ... and ... 64Fx2 versions of the same ... --- */
1183
1184static
1185IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1186{
1187 IRAtom* at;
1188 tl_assert(isShadowAtom(mce, vatomX));
1189 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001190 at = mkUifUV128(mce, vatomX, vatomY);
sewardja1d93302004-12-12 16:45:06 +00001191 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at));
sewardj0b070592004-12-10 21:44:22 +00001192 return at;
1193}
1194
1195static
1196IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1197{
1198 IRAtom* at;
1199 tl_assert(isShadowAtom(mce, vatomX));
sewardja1d93302004-12-12 16:45:06 +00001200 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001201 return at;
1202}
1203
1204static
1205IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1206{
1207 IRAtom* at;
1208 tl_assert(isShadowAtom(mce, vatomX));
1209 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001210 at = mkUifUV128(mce, vatomX, vatomY);
1211 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, at));
sewardj0b070592004-12-10 21:44:22 +00001212 at = mkPCastTo(mce, Ity_I64, at);
sewardj20d38f22005-02-07 23:50:18 +00001213 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001214 return at;
1215}
1216
1217static
1218IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1219{
1220 IRAtom* at;
1221 tl_assert(isShadowAtom(mce, vatomX));
sewardj20d38f22005-02-07 23:50:18 +00001222 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001223 at = mkPCastTo(mce, Ity_I64, at);
sewardj20d38f22005-02-07 23:50:18 +00001224 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001225 return at;
1226}
1227
sewardja1d93302004-12-12 16:45:06 +00001228/* --- --- Vector saturated narrowing --- --- */
1229
1230/* This is quite subtle. What to do is simple:
1231
1232 Let the original narrowing op be QNarrowW{S,U}xN. Produce:
1233
1234 the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1235
1236 Why this is right is not so simple. Consider a lane in the args,
1237 vatom1 or 2, doesn't matter.
1238
1239 After the PCast, that lane is all 0s (defined) or all
1240 1s(undefined).
1241
1242 Both signed and unsigned saturating narrowing of all 0s produces
1243 all 0s, which is what we want.
1244
1245 The all-1s case is more complex. Unsigned narrowing interprets an
1246 all-1s input as the largest unsigned integer, and so produces all
1247 1s as a result since that is the largest unsigned value at the
1248 smaller width.
1249
1250 Signed narrowing interprets all 1s as -1. Fortunately, -1 narrows
1251 to -1, so we still wind up with all 1s at the smaller width.
1252
1253 So: In short, pessimise the args, then apply the original narrowing
1254 op.
1255*/
1256static
sewardj20d38f22005-02-07 23:50:18 +00001257IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
sewardja1d93302004-12-12 16:45:06 +00001258 IRAtom* vatom1, IRAtom* vatom2)
1259{
1260 IRAtom *at1, *at2, *at3;
1261 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1262 switch (narrow_op) {
1263 case Iop_QNarrow32Sx4: pcast = mkPCast32x4; break;
1264 case Iop_QNarrow16Sx8: pcast = mkPCast16x8; break;
1265 case Iop_QNarrow16Ux8: pcast = mkPCast16x8; break;
sewardj20d38f22005-02-07 23:50:18 +00001266 default: VG_(tool_panic)("vectorNarrowV128");
sewardja1d93302004-12-12 16:45:06 +00001267 }
1268 tl_assert(isShadowAtom(mce,vatom1));
1269 tl_assert(isShadowAtom(mce,vatom2));
1270 at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1));
1271 at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2));
1272 at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2));
1273 return at3;
1274}
1275
sewardjacd2e912005-01-13 19:17:06 +00001276static
1277IRAtom* vectorNarrow64 ( MCEnv* mce, IROp narrow_op,
1278 IRAtom* vatom1, IRAtom* vatom2)
1279{
1280 IRAtom *at1, *at2, *at3;
1281 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1282 switch (narrow_op) {
1283 case Iop_QNarrow32Sx2: pcast = mkPCast32x2; break;
1284 case Iop_QNarrow16Sx4: pcast = mkPCast16x4; break;
1285 case Iop_QNarrow16Ux4: pcast = mkPCast16x4; break;
1286 default: VG_(tool_panic)("vectorNarrow64");
1287 }
1288 tl_assert(isShadowAtom(mce,vatom1));
1289 tl_assert(isShadowAtom(mce,vatom2));
1290 at1 = assignNew(mce, Ity_I64, pcast(mce, vatom1));
1291 at2 = assignNew(mce, Ity_I64, pcast(mce, vatom2));
1292 at3 = assignNew(mce, Ity_I64, binop(narrow_op, at1, at2));
1293 return at3;
1294}
1295
sewardja1d93302004-12-12 16:45:06 +00001296
1297/* --- --- Vector integer arithmetic --- --- */
1298
1299/* Simple ... UifU the args and per-lane pessimise the results. */
sewardjacd2e912005-01-13 19:17:06 +00001300
sewardj20d38f22005-02-07 23:50:18 +00001301/* --- V128-bit versions --- */
sewardjacd2e912005-01-13 19:17:06 +00001302
sewardja1d93302004-12-12 16:45:06 +00001303static
1304IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1305{
1306 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001307 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001308 at = mkPCast8x16(mce, at);
1309 return at;
1310}
1311
1312static
1313IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1314{
1315 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001316 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001317 at = mkPCast16x8(mce, at);
1318 return at;
1319}
1320
1321static
1322IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1323{
1324 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001325 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001326 at = mkPCast32x4(mce, at);
1327 return at;
1328}
1329
1330static
1331IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1332{
1333 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001334 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001335 at = mkPCast64x2(mce, at);
1336 return at;
1337}
sewardj3245c912004-12-10 14:58:26 +00001338
sewardjacd2e912005-01-13 19:17:06 +00001339/* --- 64-bit versions --- */
1340
1341static
1342IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1343{
1344 IRAtom* at;
1345 at = mkUifU64(mce, vatom1, vatom2);
1346 at = mkPCast8x8(mce, at);
1347 return at;
1348}
1349
1350static
1351IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1352{
1353 IRAtom* at;
1354 at = mkUifU64(mce, vatom1, vatom2);
1355 at = mkPCast16x4(mce, at);
1356 return at;
1357}
1358
1359static
1360IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1361{
1362 IRAtom* at;
1363 at = mkUifU64(mce, vatom1, vatom2);
1364 at = mkPCast32x2(mce, at);
1365 return at;
1366}
1367
sewardj3245c912004-12-10 14:58:26 +00001368
1369/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00001370/*--- Generate shadow values from all kinds of IRExprs. ---*/
1371/*------------------------------------------------------------*/
1372
1373static
1374IRAtom* expr2vbits_Binop ( MCEnv* mce,
1375 IROp op,
1376 IRAtom* atom1, IRAtom* atom2 )
1377{
1378 IRType and_or_ty;
1379 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
1380 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
1381 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
1382
1383 IRAtom* vatom1 = expr2vbits( mce, atom1 );
1384 IRAtom* vatom2 = expr2vbits( mce, atom2 );
1385
1386 tl_assert(isOriginalAtom(mce,atom1));
1387 tl_assert(isOriginalAtom(mce,atom2));
1388 tl_assert(isShadowAtom(mce,vatom1));
1389 tl_assert(isShadowAtom(mce,vatom2));
1390 tl_assert(sameKindedAtoms(atom1,vatom1));
1391 tl_assert(sameKindedAtoms(atom2,vatom2));
1392 switch (op) {
1393
sewardjacd2e912005-01-13 19:17:06 +00001394 /* 64-bit SIMD */
1395
1396 case Iop_ShrN16x4:
1397 case Iop_ShrN32x2:
1398 case Iop_SarN16x4:
1399 case Iop_SarN32x2:
1400 case Iop_ShlN16x4:
1401 case Iop_ShlN32x2:
1402 /* Same scheme as with all other shifts. */
1403 complainIfUndefined(mce, atom2);
1404 return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1405
1406 case Iop_QNarrow32Sx2:
1407 case Iop_QNarrow16Sx4:
1408 case Iop_QNarrow16Ux4:
1409 return vectorNarrow64(mce, op, vatom1, vatom2);
1410
1411 case Iop_Min8Ux8:
1412 case Iop_Max8Ux8:
1413 case Iop_Avg8Ux8:
1414 case Iop_QSub8Sx8:
1415 case Iop_QSub8Ux8:
1416 case Iop_Sub8x8:
1417 case Iop_CmpGT8Sx8:
1418 case Iop_CmpEQ8x8:
1419 case Iop_QAdd8Sx8:
1420 case Iop_QAdd8Ux8:
1421 case Iop_Add8x8:
1422 return binary8Ix8(mce, vatom1, vatom2);
1423
1424 case Iop_Min16Sx4:
1425 case Iop_Max16Sx4:
1426 case Iop_Avg16Ux4:
1427 case Iop_QSub16Ux4:
1428 case Iop_QSub16Sx4:
1429 case Iop_Sub16x4:
1430 case Iop_Mul16x4:
1431 case Iop_MulHi16Sx4:
1432 case Iop_MulHi16Ux4:
1433 case Iop_CmpGT16Sx4:
1434 case Iop_CmpEQ16x4:
1435 case Iop_QAdd16Sx4:
1436 case Iop_QAdd16Ux4:
1437 case Iop_Add16x4:
1438 return binary16Ix4(mce, vatom1, vatom2);
1439
1440 case Iop_Sub32x2:
1441 case Iop_CmpGT32Sx2:
1442 case Iop_CmpEQ32x2:
1443 case Iop_Add32x2:
1444 return binary32Ix2(mce, vatom1, vatom2);
1445
1446 /* 64-bit data-steering */
1447 case Iop_InterleaveLO32x2:
1448 case Iop_InterleaveLO16x4:
1449 case Iop_InterleaveLO8x8:
1450 case Iop_InterleaveHI32x2:
1451 case Iop_InterleaveHI16x4:
1452 case Iop_InterleaveHI8x8:
1453 return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
1454
sewardj20d38f22005-02-07 23:50:18 +00001455 /* V128-bit SIMD */
sewardj0b070592004-12-10 21:44:22 +00001456
sewardja1d93302004-12-12 16:45:06 +00001457 case Iop_ShrN16x8:
1458 case Iop_ShrN32x4:
1459 case Iop_ShrN64x2:
1460 case Iop_SarN16x8:
1461 case Iop_SarN32x4:
1462 case Iop_ShlN16x8:
1463 case Iop_ShlN32x4:
1464 case Iop_ShlN64x2:
1465 /* Same scheme as with all other shifts. */
1466 complainIfUndefined(mce, atom2);
1467 return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
1468
1469 case Iop_QSub8Ux16:
1470 case Iop_QSub8Sx16:
1471 case Iop_Sub8x16:
1472 case Iop_Min8Ux16:
1473 case Iop_Max8Ux16:
1474 case Iop_CmpGT8Sx16:
1475 case Iop_CmpEQ8x16:
1476 case Iop_Avg8Ux16:
1477 case Iop_QAdd8Ux16:
1478 case Iop_QAdd8Sx16:
1479 case Iop_Add8x16:
1480 return binary8Ix16(mce, vatom1, vatom2);
1481
1482 case Iop_QSub16Ux8:
1483 case Iop_QSub16Sx8:
1484 case Iop_Sub16x8:
1485 case Iop_Mul16x8:
1486 case Iop_MulHi16Sx8:
1487 case Iop_MulHi16Ux8:
1488 case Iop_Min16Sx8:
1489 case Iop_Max16Sx8:
1490 case Iop_CmpGT16Sx8:
1491 case Iop_CmpEQ16x8:
1492 case Iop_Avg16Ux8:
1493 case Iop_QAdd16Ux8:
1494 case Iop_QAdd16Sx8:
1495 case Iop_Add16x8:
1496 return binary16Ix8(mce, vatom1, vatom2);
1497
1498 case Iop_Sub32x4:
1499 case Iop_CmpGT32Sx4:
1500 case Iop_CmpEQ32x4:
1501 case Iop_Add32x4:
1502 return binary32Ix4(mce, vatom1, vatom2);
1503
1504 case Iop_Sub64x2:
1505 case Iop_Add64x2:
1506 return binary64Ix2(mce, vatom1, vatom2);
1507
1508 case Iop_QNarrow32Sx4:
1509 case Iop_QNarrow16Sx8:
1510 case Iop_QNarrow16Ux8:
sewardj20d38f22005-02-07 23:50:18 +00001511 return vectorNarrowV128(mce, op, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001512
sewardj0b070592004-12-10 21:44:22 +00001513 case Iop_Sub64Fx2:
1514 case Iop_Mul64Fx2:
1515 case Iop_Min64Fx2:
1516 case Iop_Max64Fx2:
1517 case Iop_Div64Fx2:
1518 case Iop_CmpLT64Fx2:
1519 case Iop_CmpLE64Fx2:
1520 case Iop_CmpEQ64Fx2:
1521 case Iop_Add64Fx2:
1522 return binary64Fx2(mce, vatom1, vatom2);
1523
1524 case Iop_Sub64F0x2:
1525 case Iop_Mul64F0x2:
1526 case Iop_Min64F0x2:
1527 case Iop_Max64F0x2:
1528 case Iop_Div64F0x2:
1529 case Iop_CmpLT64F0x2:
1530 case Iop_CmpLE64F0x2:
1531 case Iop_CmpEQ64F0x2:
1532 case Iop_Add64F0x2:
1533 return binary64F0x2(mce, vatom1, vatom2);
1534
sewardj170ee212004-12-10 18:57:51 +00001535 case Iop_Sub32Fx4:
1536 case Iop_Mul32Fx4:
1537 case Iop_Min32Fx4:
1538 case Iop_Max32Fx4:
1539 case Iop_Div32Fx4:
1540 case Iop_CmpLT32Fx4:
1541 case Iop_CmpLE32Fx4:
1542 case Iop_CmpEQ32Fx4:
sewardj3245c912004-12-10 14:58:26 +00001543 case Iop_Add32Fx4:
1544 return binary32Fx4(mce, vatom1, vatom2);
1545
sewardj170ee212004-12-10 18:57:51 +00001546 case Iop_Sub32F0x4:
1547 case Iop_Mul32F0x4:
1548 case Iop_Min32F0x4:
1549 case Iop_Max32F0x4:
1550 case Iop_Div32F0x4:
1551 case Iop_CmpLT32F0x4:
1552 case Iop_CmpLE32F0x4:
1553 case Iop_CmpEQ32F0x4:
1554 case Iop_Add32F0x4:
1555 return binary32F0x4(mce, vatom1, vatom2);
1556
sewardj20d38f22005-02-07 23:50:18 +00001557 /* V128-bit data-steering */
1558 case Iop_SetV128lo32:
1559 case Iop_SetV128lo64:
1560 case Iop_64HLtoV128:
sewardja1d93302004-12-12 16:45:06 +00001561 case Iop_InterleaveLO64x2:
1562 case Iop_InterleaveLO32x4:
1563 case Iop_InterleaveLO16x8:
1564 case Iop_InterleaveLO8x16:
1565 case Iop_InterleaveHI64x2:
1566 case Iop_InterleaveHI32x4:
1567 case Iop_InterleaveHI16x8:
1568 case Iop_InterleaveHI8x16:
sewardj170ee212004-12-10 18:57:51 +00001569 return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2));
1570
sewardj69a13322005-04-23 01:14:51 +00001571 /* I128-bit data-steering */
1572 case Iop_64HLto128:
1573 return assignNew(mce, Ity_I128, binop(op, vatom1, vatom2));
1574
sewardj3245c912004-12-10 14:58:26 +00001575 /* Scalar floating point */
1576
sewardj95448072004-11-22 20:19:51 +00001577 case Iop_RoundF64:
1578 case Iop_F64toI64:
sewardje9e16d32004-12-10 13:17:55 +00001579 case Iop_I64toF64:
1580 /* First arg is I32 (rounding mode), second is F64 or I64
1581 (data). */
sewardj95448072004-11-22 20:19:51 +00001582 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1583
1584 case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
1585 /* Takes two F64 args. */
1586 case Iop_F64toI32:
sewardje9e16d32004-12-10 13:17:55 +00001587 case Iop_F64toF32:
sewardj95448072004-11-22 20:19:51 +00001588 /* First arg is I32 (rounding mode), second is F64 (data). */
1589 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1590
1591 case Iop_F64toI16:
1592 /* First arg is I32 (rounding mode), second is F64 (data). */
1593 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
1594
1595 case Iop_ScaleF64:
1596 case Iop_Yl2xF64:
1597 case Iop_Yl2xp1F64:
1598 case Iop_PRemF64:
sewardj96403eb2005-04-01 20:20:12 +00001599 case Iop_PRem1F64:
sewardj95448072004-11-22 20:19:51 +00001600 case Iop_AtanF64:
1601 case Iop_AddF64:
1602 case Iop_DivF64:
1603 case Iop_SubF64:
1604 case Iop_MulF64:
1605 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1606
1607 case Iop_CmpF64:
1608 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1609
1610 /* non-FP after here */
1611
1612 case Iop_DivModU64to32:
1613 case Iop_DivModS64to32:
1614 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1615
sewardj69a13322005-04-23 01:14:51 +00001616 case Iop_DivModU128to64:
1617 case Iop_DivModS128to64:
1618 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
1619
sewardj95448072004-11-22 20:19:51 +00001620 case Iop_16HLto32:
sewardj170ee212004-12-10 18:57:51 +00001621 return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00001622 case Iop_32HLto64:
sewardj170ee212004-12-10 18:57:51 +00001623 return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00001624
sewardj6cf40ff2005-04-20 22:31:26 +00001625 case Iop_MullS64:
1626 case Iop_MullU64: {
1627 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
1628 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
1629 return assignNew(mce, Ity_I128, binop(Iop_64HLto128, vHi64, vLo64));
1630 }
1631
sewardj95448072004-11-22 20:19:51 +00001632 case Iop_MullS32:
1633 case Iop_MullU32: {
1634 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1635 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
1636 return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
1637 }
1638
1639 case Iop_MullS16:
1640 case Iop_MullU16: {
1641 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1642 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
1643 return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
1644 }
1645
1646 case Iop_MullS8:
1647 case Iop_MullU8: {
1648 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1649 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
1650 return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
1651 }
1652
1653 case Iop_Add32:
sewardjd5204dc2004-12-31 01:16:11 +00001654 if (mce->bogusLiterals)
1655 return expensiveAddSub(mce,True,Ity_I32,
1656 vatom1,vatom2, atom1,atom2);
1657 else
1658 goto cheap_AddSub32;
sewardj95448072004-11-22 20:19:51 +00001659 case Iop_Sub32:
sewardjd5204dc2004-12-31 01:16:11 +00001660 if (mce->bogusLiterals)
1661 return expensiveAddSub(mce,False,Ity_I32,
1662 vatom1,vatom2, atom1,atom2);
1663 else
1664 goto cheap_AddSub32;
1665
1666 cheap_AddSub32:
sewardj95448072004-11-22 20:19:51 +00001667 case Iop_Mul32:
1668 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1669
sewardj681be302005-01-15 20:43:58 +00001670 /* could do better: Add64, Sub64 */
sewardj69a13322005-04-23 01:14:51 +00001671 case Iop_Mul64:
sewardj681be302005-01-15 20:43:58 +00001672 case Iop_Add64:
1673 case Iop_Sub64:
1674 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
1675
sewardj95448072004-11-22 20:19:51 +00001676 case Iop_Mul16:
1677 case Iop_Add16:
1678 case Iop_Sub16:
1679 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1680
1681 case Iop_Sub8:
1682 case Iop_Add8:
1683 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1684
sewardj69a13322005-04-23 01:14:51 +00001685 case Iop_CmpEQ64:
1686 if (mce->bogusLiterals)
1687 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
1688 else
1689 goto cheap_cmp64;
1690 cheap_cmp64:
tomcd986332005-04-26 07:44:48 +00001691 case Iop_CmpLE64S: case Iop_CmpLE64U:
1692 case Iop_CmpLT64U: case Iop_CmpLT64S:
1693 case Iop_CmpNE64:
sewardj69a13322005-04-23 01:14:51 +00001694 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
1695
sewardjd5204dc2004-12-31 01:16:11 +00001696 case Iop_CmpEQ32:
1697 if (mce->bogusLiterals)
1698 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
1699 else
1700 goto cheap_cmp32;
sewardjd5204dc2004-12-31 01:16:11 +00001701 cheap_cmp32:
sewardj95448072004-11-22 20:19:51 +00001702 case Iop_CmpLE32S: case Iop_CmpLE32U:
1703 case Iop_CmpLT32U: case Iop_CmpLT32S:
sewardjd5204dc2004-12-31 01:16:11 +00001704 case Iop_CmpNE32:
sewardj95448072004-11-22 20:19:51 +00001705 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
1706
1707 case Iop_CmpEQ16: case Iop_CmpNE16:
1708 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
1709
1710 case Iop_CmpEQ8: case Iop_CmpNE8:
1711 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
1712
1713 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
1714 /* Complain if the shift amount is undefined. Then simply
1715 shift the first arg's V bits by the real shift amount. */
1716 complainIfUndefined(mce, atom2);
1717 return assignNew(mce, Ity_I32, binop(op, vatom1, atom2));
1718
sewardjdb67f5f2004-12-14 01:15:31 +00001719 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
sewardj95448072004-11-22 20:19:51 +00001720 /* Same scheme as with 32-bit shifts. */
1721 complainIfUndefined(mce, atom2);
1722 return assignNew(mce, Ity_I16, binop(op, vatom1, atom2));
1723
1724 case Iop_Shl8: case Iop_Shr8:
1725 /* Same scheme as with 32-bit shifts. */
1726 complainIfUndefined(mce, atom2);
1727 return assignNew(mce, Ity_I8, binop(op, vatom1, atom2));
1728
sewardj69a13322005-04-23 01:14:51 +00001729 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
sewardj95448072004-11-22 20:19:51 +00001730 /* Same scheme as with 32-bit shifts. */
1731 complainIfUndefined(mce, atom2);
1732 return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1733
sewardj20d38f22005-02-07 23:50:18 +00001734 case Iop_AndV128:
1735 uifu = mkUifUV128; difd = mkDifDV128;
1736 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00001737 case Iop_And64:
1738 uifu = mkUifU64; difd = mkDifD64;
1739 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00001740 case Iop_And32:
1741 uifu = mkUifU32; difd = mkDifD32;
1742 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
1743 case Iop_And16:
1744 uifu = mkUifU16; difd = mkDifD16;
1745 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
1746 case Iop_And8:
1747 uifu = mkUifU8; difd = mkDifD8;
1748 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
1749
sewardj20d38f22005-02-07 23:50:18 +00001750 case Iop_OrV128:
1751 uifu = mkUifUV128; difd = mkDifDV128;
1752 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00001753 case Iop_Or64:
1754 uifu = mkUifU64; difd = mkDifD64;
1755 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00001756 case Iop_Or32:
1757 uifu = mkUifU32; difd = mkDifD32;
1758 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
1759 case Iop_Or16:
1760 uifu = mkUifU16; difd = mkDifD16;
1761 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
1762 case Iop_Or8:
1763 uifu = mkUifU8; difd = mkDifD8;
1764 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
1765
1766 do_And_Or:
1767 return
1768 assignNew(
1769 mce,
1770 and_or_ty,
1771 difd(mce, uifu(mce, vatom1, vatom2),
1772 difd(mce, improve(mce, atom1, vatom1),
1773 improve(mce, atom2, vatom2) ) ) );
1774
1775 case Iop_Xor8:
1776 return mkUifU8(mce, vatom1, vatom2);
1777 case Iop_Xor16:
1778 return mkUifU16(mce, vatom1, vatom2);
1779 case Iop_Xor32:
1780 return mkUifU32(mce, vatom1, vatom2);
sewardj7010f6e2004-12-10 13:35:22 +00001781 case Iop_Xor64:
1782 return mkUifU64(mce, vatom1, vatom2);
sewardj20d38f22005-02-07 23:50:18 +00001783 case Iop_XorV128:
1784 return mkUifUV128(mce, vatom1, vatom2);
njn25e49d8e72002-09-23 09:36:25 +00001785
1786 default:
sewardj95448072004-11-22 20:19:51 +00001787 ppIROp(op);
1788 VG_(tool_panic)("memcheck:expr2vbits_Binop");
njn25e49d8e72002-09-23 09:36:25 +00001789 }
njn25e49d8e72002-09-23 09:36:25 +00001790}
1791
njn25e49d8e72002-09-23 09:36:25 +00001792
sewardj95448072004-11-22 20:19:51 +00001793static
1794IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
1795{
1796 IRAtom* vatom = expr2vbits( mce, atom );
1797 tl_assert(isOriginalAtom(mce,atom));
1798 switch (op) {
1799
sewardj0b070592004-12-10 21:44:22 +00001800 case Iop_Sqrt64Fx2:
1801 return unary64Fx2(mce, vatom);
1802
1803 case Iop_Sqrt64F0x2:
1804 return unary64F0x2(mce, vatom);
1805
sewardj170ee212004-12-10 18:57:51 +00001806 case Iop_Sqrt32Fx4:
1807 case Iop_RSqrt32Fx4:
1808 case Iop_Recip32Fx4:
1809 return unary32Fx4(mce, vatom);
1810
1811 case Iop_Sqrt32F0x4:
1812 case Iop_RSqrt32F0x4:
1813 case Iop_Recip32F0x4:
1814 return unary32F0x4(mce, vatom);
1815
sewardj20d38f22005-02-07 23:50:18 +00001816 case Iop_32UtoV128:
1817 case Iop_64UtoV128:
sewardj170ee212004-12-10 18:57:51 +00001818 return assignNew(mce, Ity_V128, unop(op, vatom));
1819
sewardj95448072004-11-22 20:19:51 +00001820 case Iop_F32toF64:
1821 case Iop_I32toF64:
sewardj95448072004-11-22 20:19:51 +00001822 case Iop_NegF64:
1823 case Iop_SinF64:
1824 case Iop_CosF64:
1825 case Iop_TanF64:
1826 case Iop_SqrtF64:
1827 case Iop_AbsF64:
1828 case Iop_2xm1F64:
1829 return mkPCastTo(mce, Ity_I64, vatom);
1830
sewardj95448072004-11-22 20:19:51 +00001831 case Iop_Clz32:
1832 case Iop_Ctz32:
1833 return mkPCastTo(mce, Ity_I32, vatom);
1834
sewardjd9dbc192005-04-27 11:40:27 +00001835 case Iop_1Uto64:
1836 case Iop_8Uto64:
1837 case Iop_8Sto64:
1838 case Iop_16Uto64:
1839 case Iop_16Sto64:
sewardj95448072004-11-22 20:19:51 +00001840 case Iop_32Sto64:
1841 case Iop_32Uto64:
sewardj20d38f22005-02-07 23:50:18 +00001842 case Iop_V128to64:
1843 case Iop_V128HIto64:
sewardj6cf40ff2005-04-20 22:31:26 +00001844 case Iop_128HIto64:
1845 case Iop_128to64:
sewardj95448072004-11-22 20:19:51 +00001846 return assignNew(mce, Ity_I64, unop(op, vatom));
1847
1848 case Iop_64to32:
1849 case Iop_64HIto32:
1850 case Iop_1Uto32:
1851 case Iop_8Uto32:
1852 case Iop_16Uto32:
1853 case Iop_16Sto32:
1854 case Iop_8Sto32:
1855 return assignNew(mce, Ity_I32, unop(op, vatom));
1856
1857 case Iop_8Sto16:
1858 case Iop_8Uto16:
1859 case Iop_32to16:
1860 case Iop_32HIto16:
sewardjd9dbc192005-04-27 11:40:27 +00001861 case Iop_64to16:
sewardj95448072004-11-22 20:19:51 +00001862 return assignNew(mce, Ity_I16, unop(op, vatom));
1863
1864 case Iop_1Uto8:
1865 case Iop_16to8:
1866 case Iop_32to8:
sewardjd9dbc192005-04-27 11:40:27 +00001867 case Iop_64to8:
sewardj95448072004-11-22 20:19:51 +00001868 return assignNew(mce, Ity_I8, unop(op, vatom));
1869
1870 case Iop_32to1:
1871 return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom));
1872
sewardjd9dbc192005-04-27 11:40:27 +00001873 case Iop_64to1:
1874 return assignNew(mce, Ity_I1, unop(Iop_64to1, vatom));
1875
sewardj95448072004-11-22 20:19:51 +00001876 case Iop_ReinterpF64asI64:
1877 case Iop_ReinterpI64asF64:
sewardj0b070592004-12-10 21:44:22 +00001878 case Iop_ReinterpI32asF32:
sewardj20d38f22005-02-07 23:50:18 +00001879 case Iop_NotV128:
sewardj7010f6e2004-12-10 13:35:22 +00001880 case Iop_Not64:
sewardj95448072004-11-22 20:19:51 +00001881 case Iop_Not32:
1882 case Iop_Not16:
1883 case Iop_Not8:
1884 case Iop_Not1:
1885 return vatom;
sewardj7010f6e2004-12-10 13:35:22 +00001886
sewardj95448072004-11-22 20:19:51 +00001887 default:
1888 ppIROp(op);
1889 VG_(tool_panic)("memcheck:expr2vbits_Unop");
1890 }
1891}
1892
1893
sewardj170ee212004-12-10 18:57:51 +00001894/* Worker function; do not call directly. */
sewardj95448072004-11-22 20:19:51 +00001895static
sewardj170ee212004-12-10 18:57:51 +00001896IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
sewardj95448072004-11-22 20:19:51 +00001897{
1898 void* helper;
1899 Char* hname;
1900 IRDirty* di;
1901 IRTemp datavbits;
1902 IRAtom* addrAct;
1903
1904 tl_assert(isOriginalAtom(mce,addr));
1905
1906 /* First, emit a definedness test for the address. This also sets
1907 the address (shadow) to 'defined' following the test. */
1908 complainIfUndefined( mce, addr );
1909
1910 /* Now cook up a call to the relevant helper function, to read the
1911 data V bits from shadow memory. */
1912 ty = shadowType(ty);
1913 switch (ty) {
1914 case Ity_I64: helper = &MC_(helperc_LOADV8);
1915 hname = "MC_(helperc_LOADV8)";
1916 break;
1917 case Ity_I32: helper = &MC_(helperc_LOADV4);
1918 hname = "MC_(helperc_LOADV4)";
1919 break;
1920 case Ity_I16: helper = &MC_(helperc_LOADV2);
1921 hname = "MC_(helperc_LOADV2)";
1922 break;
1923 case Ity_I8: helper = &MC_(helperc_LOADV1);
1924 hname = "MC_(helperc_LOADV1)";
1925 break;
1926 default: ppIRType(ty);
1927 VG_(tool_panic)("memcheck:do_shadow_LDle");
1928 }
1929
1930 /* Generate the actual address into addrAct. */
1931 if (bias == 0) {
1932 addrAct = addr;
1933 } else {
sewardj7cf97ee2004-11-28 14:25:01 +00001934 IROp mkAdd;
1935 IRAtom* eBias;
sewardj95448072004-11-22 20:19:51 +00001936 IRType tyAddr = mce->hWordTy;
1937 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj7cf97ee2004-11-28 14:25:01 +00001938 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
1939 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj95448072004-11-22 20:19:51 +00001940 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
1941 }
1942
1943 /* We need to have a place to park the V bits we're just about to
1944 read. */
1945 datavbits = newIRTemp(mce->bb->tyenv, ty);
1946 di = unsafeIRDirty_1_N( datavbits,
1947 1/*regparms*/, hname, helper,
1948 mkIRExprVec_1( addrAct ));
1949 setHelperAnns( mce, di );
1950 stmt( mce->bb, IRStmt_Dirty(di) );
1951
1952 return mkexpr(datavbits);
1953}
1954
1955
1956static
sewardj170ee212004-12-10 18:57:51 +00001957IRAtom* expr2vbits_LDle ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
1958{
1959 IRAtom *v64hi, *v64lo;
1960 switch (shadowType(ty)) {
1961 case Ity_I8:
1962 case Ity_I16:
1963 case Ity_I32:
1964 case Ity_I64:
1965 return expr2vbits_LDle_WRK(mce, ty, addr, bias);
1966 case Ity_V128:
1967 v64lo = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias);
1968 v64hi = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias+8);
1969 return assignNew( mce,
1970 Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +00001971 binop(Iop_64HLtoV128, v64hi, v64lo));
sewardj170ee212004-12-10 18:57:51 +00001972 default:
1973 VG_(tool_panic)("expr2vbits_LDle");
1974 }
1975}
1976
1977
1978static
sewardj95448072004-11-22 20:19:51 +00001979IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
1980 IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
1981{
1982 IRAtom *vbitsC, *vbits0, *vbitsX;
1983 IRType ty;
1984 /* Given Mux0X(cond,expr0,exprX), generate
1985 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
1986 That is, steer the V bits like the originals, but trash the
1987 result if the steering value is undefined. This gives
1988 lazy propagation. */
1989 tl_assert(isOriginalAtom(mce, cond));
1990 tl_assert(isOriginalAtom(mce, expr0));
1991 tl_assert(isOriginalAtom(mce, exprX));
1992
1993 vbitsC = expr2vbits(mce, cond);
1994 vbits0 = expr2vbits(mce, expr0);
1995 vbitsX = expr2vbits(mce, exprX);
1996 ty = typeOfIRExpr(mce->bb->tyenv, vbits0);
1997
1998 return
1999 mkUifU(mce, ty, assignNew(mce, ty, IRExpr_Mux0X(cond, vbits0, vbitsX)),
2000 mkPCastTo(mce, ty, vbitsC) );
2001}
2002
2003/* --------- This is the main expression-handling function. --------- */
2004
2005static
2006IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
2007{
2008 switch (e->tag) {
2009
2010 case Iex_Get:
2011 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
2012
2013 case Iex_GetI:
2014 return shadow_GETI( mce, e->Iex.GetI.descr,
2015 e->Iex.GetI.ix, e->Iex.GetI.bias );
2016
2017 case Iex_Tmp:
2018 return IRExpr_Tmp( findShadowTmp(mce, e->Iex.Tmp.tmp) );
2019
2020 case Iex_Const:
2021 return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
2022
2023 case Iex_Binop:
2024 return expr2vbits_Binop(
2025 mce,
2026 e->Iex.Binop.op,
2027 e->Iex.Binop.arg1, e->Iex.Binop.arg2
2028 );
2029
2030 case Iex_Unop:
2031 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
2032
2033 case Iex_LDle:
2034 return expr2vbits_LDle( mce, e->Iex.LDle.ty,
2035 e->Iex.LDle.addr, 0/*addr bias*/ );
2036
2037 case Iex_CCall:
2038 return mkLazyN( mce, e->Iex.CCall.args,
2039 e->Iex.CCall.retty,
2040 e->Iex.CCall.cee );
2041
2042 case Iex_Mux0X:
2043 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
2044 e->Iex.Mux0X.exprX);
njn25e49d8e72002-09-23 09:36:25 +00002045
2046 default:
sewardj95448072004-11-22 20:19:51 +00002047 VG_(printf)("\n");
2048 ppIRExpr(e);
2049 VG_(printf)("\n");
2050 VG_(tool_panic)("memcheck: expr2vbits");
njn25e49d8e72002-09-23 09:36:25 +00002051 }
njn25e49d8e72002-09-23 09:36:25 +00002052}
2053
2054/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00002055/*--- Generate shadow stmts from all kinds of IRStmts. ---*/
njn25e49d8e72002-09-23 09:36:25 +00002056/*------------------------------------------------------------*/
2057
sewardj95448072004-11-22 20:19:51 +00002058/* Widen a value to the host word size. */
njn25e49d8e72002-09-23 09:36:25 +00002059
2060static
sewardj95448072004-11-22 20:19:51 +00002061IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
njn25e49d8e72002-09-23 09:36:25 +00002062{
sewardj7cf97ee2004-11-28 14:25:01 +00002063 IRType ty, tyH;
2064
sewardj95448072004-11-22 20:19:51 +00002065 /* vatom is vbits-value and as such can only have a shadow type. */
2066 tl_assert(isShadowAtom(mce,vatom));
njn25e49d8e72002-09-23 09:36:25 +00002067
sewardj7cf97ee2004-11-28 14:25:01 +00002068 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
2069 tyH = mce->hWordTy;
njn25e49d8e72002-09-23 09:36:25 +00002070
sewardj95448072004-11-22 20:19:51 +00002071 if (tyH == Ity_I32) {
2072 switch (ty) {
2073 case Ity_I32: return vatom;
2074 case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom));
2075 case Ity_I8: return assignNew(mce, tyH, unop(Iop_8Uto32, vatom));
2076 default: goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00002077 }
sewardj6cf40ff2005-04-20 22:31:26 +00002078 } else
2079 if (tyH == Ity_I64) {
2080 switch (ty) {
2081 case Ity_I32: return assignNew(mce, tyH, unop(Iop_32Uto64, vatom));
sewardj69a13322005-04-23 01:14:51 +00002082 case Ity_I16: return assignNew(mce, tyH, unop(Iop_32Uto64,
2083 assignNew(mce, Ity_I32, unop(Iop_16Uto32, vatom))));
2084 case Ity_I8: return assignNew(mce, tyH, unop(Iop_32Uto64,
2085 assignNew(mce, Ity_I32, unop(Iop_8Uto32, vatom))));
sewardj6cf40ff2005-04-20 22:31:26 +00002086 default: goto unhandled;
2087 }
sewardj95448072004-11-22 20:19:51 +00002088 } else {
2089 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00002090 }
sewardj95448072004-11-22 20:19:51 +00002091 unhandled:
2092 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
2093 VG_(tool_panic)("zwidenToHostWord");
njn25e49d8e72002-09-23 09:36:25 +00002094}
2095
njn25e49d8e72002-09-23 09:36:25 +00002096
sewardj95448072004-11-22 20:19:51 +00002097/* Generate a shadow store. addr is always the original address atom.
2098 You can pass in either originals or V-bits for the data atom, but
2099 obviously not both. */
njn25e49d8e72002-09-23 09:36:25 +00002100
sewardj95448072004-11-22 20:19:51 +00002101static
2102void do_shadow_STle ( MCEnv* mce,
2103 IRAtom* addr, UInt bias,
2104 IRAtom* data, IRAtom* vdata )
njn25e49d8e72002-09-23 09:36:25 +00002105{
sewardj170ee212004-12-10 18:57:51 +00002106 IROp mkAdd;
2107 IRType ty, tyAddr;
2108 IRDirty *di, *diLo64, *diHi64;
2109 IRAtom *addrAct, *addrLo64, *addrHi64;
2110 IRAtom *vdataLo64, *vdataHi64;
2111 IRAtom *eBias, *eBias0, *eBias8;
sewardj95448072004-11-22 20:19:51 +00002112 void* helper = NULL;
2113 Char* hname = NULL;
sewardj170ee212004-12-10 18:57:51 +00002114
2115 tyAddr = mce->hWordTy;
2116 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2117 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2118
2119 di = diLo64 = diHi64 = NULL;
2120 eBias = eBias0 = eBias8 = NULL;
2121 addrAct = addrLo64 = addrHi64 = NULL;
2122 vdataLo64 = vdataHi64 = NULL;
njn25e49d8e72002-09-23 09:36:25 +00002123
sewardj95448072004-11-22 20:19:51 +00002124 if (data) {
2125 tl_assert(!vdata);
2126 tl_assert(isOriginalAtom(mce, data));
2127 tl_assert(bias == 0);
2128 vdata = expr2vbits( mce, data );
2129 } else {
2130 tl_assert(vdata);
2131 }
njn25e49d8e72002-09-23 09:36:25 +00002132
sewardj95448072004-11-22 20:19:51 +00002133 tl_assert(isOriginalAtom(mce,addr));
2134 tl_assert(isShadowAtom(mce,vdata));
njn25e49d8e72002-09-23 09:36:25 +00002135
sewardj95448072004-11-22 20:19:51 +00002136 ty = typeOfIRExpr(mce->bb->tyenv, vdata);
njn25e49d8e72002-09-23 09:36:25 +00002137
sewardj95448072004-11-22 20:19:51 +00002138 /* First, emit a definedness test for the address. This also sets
2139 the address (shadow) to 'defined' following the test. */
2140 complainIfUndefined( mce, addr );
njn25e49d8e72002-09-23 09:36:25 +00002141
sewardj170ee212004-12-10 18:57:51 +00002142 /* Now decide which helper function to call to write the data V
2143 bits into shadow memory. */
sewardj95448072004-11-22 20:19:51 +00002144 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +00002145 case Ity_V128: /* we'll use the helper twice */
sewardj95448072004-11-22 20:19:51 +00002146 case Ity_I64: helper = &MC_(helperc_STOREV8);
2147 hname = "MC_(helperc_STOREV8)";
2148 break;
2149 case Ity_I32: helper = &MC_(helperc_STOREV4);
2150 hname = "MC_(helperc_STOREV4)";
2151 break;
2152 case Ity_I16: helper = &MC_(helperc_STOREV2);
2153 hname = "MC_(helperc_STOREV2)";
2154 break;
2155 case Ity_I8: helper = &MC_(helperc_STOREV1);
2156 hname = "MC_(helperc_STOREV1)";
2157 break;
2158 default: VG_(tool_panic)("memcheck:do_shadow_STle");
2159 }
njn25e49d8e72002-09-23 09:36:25 +00002160
sewardj170ee212004-12-10 18:57:51 +00002161 if (ty == Ity_V128) {
2162
sewardj20d38f22005-02-07 23:50:18 +00002163 /* V128-bit case */
sewardj170ee212004-12-10 18:57:51 +00002164 /* See comment in next clause re 64-bit regparms */
2165 eBias0 = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2166 addrLo64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias0) );
sewardj20d38f22005-02-07 23:50:18 +00002167 vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata));
sewardj170ee212004-12-10 18:57:51 +00002168 diLo64 = unsafeIRDirty_0_N(
2169 1/*regparms*/, hname, helper,
2170 mkIRExprVec_2( addrLo64, vdataLo64 ));
2171
2172 eBias8 = tyAddr==Ity_I32 ? mkU32(bias+8) : mkU64(bias+8);
2173 addrHi64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias8) );
sewardj20d38f22005-02-07 23:50:18 +00002174 vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata));
sewardj170ee212004-12-10 18:57:51 +00002175 diHi64 = unsafeIRDirty_0_N(
2176 1/*regparms*/, hname, helper,
2177 mkIRExprVec_2( addrHi64, vdataHi64 ));
2178
2179 setHelperAnns( mce, diLo64 );
2180 setHelperAnns( mce, diHi64 );
2181 stmt( mce->bb, IRStmt_Dirty(diLo64) );
2182 stmt( mce->bb, IRStmt_Dirty(diHi64) );
2183
sewardj95448072004-11-22 20:19:51 +00002184 } else {
sewardj170ee212004-12-10 18:57:51 +00002185
2186 /* 8/16/32/64-bit cases */
2187 /* Generate the actual address into addrAct. */
2188 if (bias == 0) {
2189 addrAct = addr;
2190 } else {
2191 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2192 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2193 }
2194
2195 if (ty == Ity_I64) {
2196 /* We can't do this with regparm 2 on 32-bit platforms, since
2197 the back ends aren't clever enough to handle 64-bit
2198 regparm args. Therefore be different. */
2199 di = unsafeIRDirty_0_N(
2200 1/*regparms*/, hname, helper,
2201 mkIRExprVec_2( addrAct, vdata ));
2202 } else {
2203 di = unsafeIRDirty_0_N(
2204 2/*regparms*/, hname, helper,
2205 mkIRExprVec_2( addrAct,
2206 zwidenToHostWord( mce, vdata )));
2207 }
2208 setHelperAnns( mce, di );
2209 stmt( mce->bb, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00002210 }
njn25e49d8e72002-09-23 09:36:25 +00002211
sewardj95448072004-11-22 20:19:51 +00002212}
njn25e49d8e72002-09-23 09:36:25 +00002213
njn25e49d8e72002-09-23 09:36:25 +00002214
sewardj95448072004-11-22 20:19:51 +00002215/* Do lazy pessimistic propagation through a dirty helper call, by
2216 looking at the annotations on it. This is the most complex part of
2217 Memcheck. */
njn25e49d8e72002-09-23 09:36:25 +00002218
sewardj95448072004-11-22 20:19:51 +00002219static IRType szToITy ( Int n )
2220{
2221 switch (n) {
2222 case 1: return Ity_I8;
2223 case 2: return Ity_I16;
2224 case 4: return Ity_I32;
2225 case 8: return Ity_I64;
2226 default: VG_(tool_panic)("szToITy(memcheck)");
2227 }
2228}
njn25e49d8e72002-09-23 09:36:25 +00002229
sewardj95448072004-11-22 20:19:51 +00002230static
2231void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
2232{
sewardje9e16d32004-12-10 13:17:55 +00002233 Int i, n, offset, toDo, gSz, gOff;
sewardj7cf97ee2004-11-28 14:25:01 +00002234 IRAtom *src, *here, *curr;
sewardj95448072004-11-22 20:19:51 +00002235 IRType tyAddr, tySrc, tyDst;
2236 IRTemp dst;
njn25e49d8e72002-09-23 09:36:25 +00002237
sewardj95448072004-11-22 20:19:51 +00002238 /* First check the guard. */
2239 complainIfUndefined(mce, d->guard);
2240
2241 /* Now round up all inputs and PCast over them. */
sewardj7cf97ee2004-11-28 14:25:01 +00002242 curr = definedOfType(Ity_I32);
sewardj95448072004-11-22 20:19:51 +00002243
2244 /* Inputs: unmasked args */
2245 for (i = 0; d->args[i]; i++) {
2246 if (d->cee->mcx_mask & (1<<i)) {
2247 /* ignore this arg */
njn25e49d8e72002-09-23 09:36:25 +00002248 } else {
sewardj95448072004-11-22 20:19:51 +00002249 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
2250 curr = mkUifU32(mce, here, curr);
njn25e49d8e72002-09-23 09:36:25 +00002251 }
2252 }
sewardj95448072004-11-22 20:19:51 +00002253
2254 /* Inputs: guest state that we read. */
2255 for (i = 0; i < d->nFxState; i++) {
2256 tl_assert(d->fxState[i].fx != Ifx_None);
2257 if (d->fxState[i].fx == Ifx_Write)
2258 continue;
sewardja7203252004-11-26 19:17:47 +00002259
2260 /* Ignore any sections marked as 'always defined'. */
2261 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
sewardje9e16d32004-12-10 13:17:55 +00002262 if (0)
sewardja7203252004-11-26 19:17:47 +00002263 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
2264 d->fxState[i].offset, d->fxState[i].size );
2265 continue;
2266 }
2267
sewardj95448072004-11-22 20:19:51 +00002268 /* This state element is read or modified. So we need to
sewardje9e16d32004-12-10 13:17:55 +00002269 consider it. If larger than 8 bytes, deal with it in 8-byte
2270 chunks. */
2271 gSz = d->fxState[i].size;
2272 gOff = d->fxState[i].offset;
2273 tl_assert(gSz > 0);
2274 while (True) {
2275 if (gSz == 0) break;
2276 n = gSz <= 8 ? gSz : 8;
2277 /* update 'curr' with UifU of the state slice
2278 gOff .. gOff+n-1 */
2279 tySrc = szToITy( n );
2280 src = assignNew( mce, tySrc,
2281 shadow_GET(mce, gOff, tySrc ) );
2282 here = mkPCastTo( mce, Ity_I32, src );
2283 curr = mkUifU32(mce, here, curr);
2284 gSz -= n;
2285 gOff += n;
2286 }
2287
sewardj95448072004-11-22 20:19:51 +00002288 }
2289
2290 /* Inputs: memory. First set up some info needed regardless of
2291 whether we're doing reads or writes. */
2292 tyAddr = Ity_INVALID;
2293
2294 if (d->mFx != Ifx_None) {
2295 /* Because we may do multiple shadow loads/stores from the same
2296 base address, it's best to do a single test of its
2297 definedness right now. Post-instrumentation optimisation
2298 should remove all but this test. */
2299 tl_assert(d->mAddr);
2300 complainIfUndefined(mce, d->mAddr);
2301
2302 tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr);
2303 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2304 tl_assert(tyAddr == mce->hWordTy); /* not really right */
2305 }
2306
2307 /* Deal with memory inputs (reads or modifies) */
2308 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2309 offset = 0;
2310 toDo = d->mSize;
2311 /* chew off 32-bit chunks */
2312 while (toDo >= 4) {
2313 here = mkPCastTo(
2314 mce, Ity_I32,
2315 expr2vbits_LDle ( mce, Ity_I32,
2316 d->mAddr, d->mSize - toDo )
2317 );
2318 curr = mkUifU32(mce, here, curr);
2319 toDo -= 4;
2320 }
2321 /* chew off 16-bit chunks */
2322 while (toDo >= 2) {
2323 here = mkPCastTo(
2324 mce, Ity_I32,
2325 expr2vbits_LDle ( mce, Ity_I16,
2326 d->mAddr, d->mSize - toDo )
2327 );
2328 curr = mkUifU32(mce, here, curr);
2329 toDo -= 2;
2330 }
2331 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2332 }
2333
2334 /* Whew! So curr is a 32-bit V-value summarising pessimistically
2335 all the inputs to the helper. Now we need to re-distribute the
2336 results to all destinations. */
2337
2338 /* Outputs: the destination temporary, if there is one. */
2339 if (d->tmp != IRTemp_INVALID) {
2340 dst = findShadowTmp(mce, d->tmp);
2341 tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp);
2342 assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) );
2343 }
2344
2345 /* Outputs: guest state that we write or modify. */
2346 for (i = 0; i < d->nFxState; i++) {
2347 tl_assert(d->fxState[i].fx != Ifx_None);
2348 if (d->fxState[i].fx == Ifx_Read)
2349 continue;
sewardja7203252004-11-26 19:17:47 +00002350 /* Ignore any sections marked as 'always defined'. */
2351 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
2352 continue;
sewardje9e16d32004-12-10 13:17:55 +00002353 /* This state element is written or modified. So we need to
2354 consider it. If larger than 8 bytes, deal with it in 8-byte
2355 chunks. */
2356 gSz = d->fxState[i].size;
2357 gOff = d->fxState[i].offset;
2358 tl_assert(gSz > 0);
2359 while (True) {
2360 if (gSz == 0) break;
2361 n = gSz <= 8 ? gSz : 8;
2362 /* Write suitably-casted 'curr' to the state slice
2363 gOff .. gOff+n-1 */
2364 tyDst = szToITy( n );
2365 do_shadow_PUT( mce, gOff,
2366 NULL, /* original atom */
2367 mkPCastTo( mce, tyDst, curr ) );
2368 gSz -= n;
2369 gOff += n;
2370 }
sewardj95448072004-11-22 20:19:51 +00002371 }
2372
2373 /* Outputs: memory that we write or modify. */
2374 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2375 offset = 0;
2376 toDo = d->mSize;
2377 /* chew off 32-bit chunks */
2378 while (toDo >= 4) {
2379 do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2380 NULL, /* original data */
2381 mkPCastTo( mce, Ity_I32, curr ) );
2382 toDo -= 4;
2383 }
2384 /* chew off 16-bit chunks */
2385 while (toDo >= 2) {
2386 do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2387 NULL, /* original data */
2388 mkPCastTo( mce, Ity_I16, curr ) );
2389 toDo -= 2;
2390 }
2391 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2392 }
2393
njn25e49d8e72002-09-23 09:36:25 +00002394}
2395
2396
sewardj95448072004-11-22 20:19:51 +00002397/*------------------------------------------------------------*/
2398/*--- Memcheck main ---*/
2399/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00002400
sewardj95448072004-11-22 20:19:51 +00002401static Bool isBogusAtom ( IRAtom* at )
njn25e49d8e72002-09-23 09:36:25 +00002402{
sewardj95448072004-11-22 20:19:51 +00002403 ULong n = 0;
2404 IRConst* con;
sewardj710d6c22005-03-20 18:55:15 +00002405 tl_assert(isIRAtom(at));
sewardj95448072004-11-22 20:19:51 +00002406 if (at->tag == Iex_Tmp)
2407 return False;
2408 tl_assert(at->tag == Iex_Const);
2409 con = at->Iex.Const.con;
2410 switch (con->tag) {
sewardjd5204dc2004-12-31 01:16:11 +00002411 case Ico_U1: return False;
2412 case Ico_U8: n = (ULong)con->Ico.U8; break;
2413 case Ico_U16: n = (ULong)con->Ico.U16; break;
2414 case Ico_U32: n = (ULong)con->Ico.U32; break;
2415 case Ico_U64: n = (ULong)con->Ico.U64; break;
2416 case Ico_F64: return False;
2417 case Ico_F64i: return False;
2418 case Ico_V128: return False;
sewardj95448072004-11-22 20:19:51 +00002419 default: ppIRExpr(at); tl_assert(0);
2420 }
2421 /* VG_(printf)("%llx\n", n); */
sewardj96a922e2005-04-23 23:26:29 +00002422 return (/*32*/ n == 0xFEFEFEFFULL
2423 /*32*/ || n == 0x80808080ULL
2424 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
2425 /*64*/ || n == 0x8080808080808080ULL
2426 /*64*/ || n == 0x0101010101010101ULL
2427 );
sewardj95448072004-11-22 20:19:51 +00002428}
njn25e49d8e72002-09-23 09:36:25 +00002429
sewardj95448072004-11-22 20:19:51 +00002430static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
2431{
sewardjd5204dc2004-12-31 01:16:11 +00002432 Int i;
2433 IRExpr* e;
2434 IRDirty* d;
sewardj95448072004-11-22 20:19:51 +00002435 switch (st->tag) {
2436 case Ist_Tmp:
2437 e = st->Ist.Tmp.data;
2438 switch (e->tag) {
2439 case Iex_Get:
2440 case Iex_Tmp:
2441 return False;
sewardjd5204dc2004-12-31 01:16:11 +00002442 case Iex_Const:
2443 return isBogusAtom(e);
sewardj95448072004-11-22 20:19:51 +00002444 case Iex_Unop:
2445 return isBogusAtom(e->Iex.Unop.arg);
sewardjd5204dc2004-12-31 01:16:11 +00002446 case Iex_GetI:
2447 return isBogusAtom(e->Iex.GetI.ix);
sewardj95448072004-11-22 20:19:51 +00002448 case Iex_Binop:
2449 return isBogusAtom(e->Iex.Binop.arg1)
2450 || isBogusAtom(e->Iex.Binop.arg2);
2451 case Iex_Mux0X:
2452 return isBogusAtom(e->Iex.Mux0X.cond)
2453 || isBogusAtom(e->Iex.Mux0X.expr0)
2454 || isBogusAtom(e->Iex.Mux0X.exprX);
2455 case Iex_LDle:
2456 return isBogusAtom(e->Iex.LDle.addr);
2457 case Iex_CCall:
2458 for (i = 0; e->Iex.CCall.args[i]; i++)
2459 if (isBogusAtom(e->Iex.CCall.args[i]))
2460 return True;
2461 return False;
2462 default:
2463 goto unhandled;
2464 }
sewardjd5204dc2004-12-31 01:16:11 +00002465 case Ist_Dirty:
2466 d = st->Ist.Dirty.details;
2467 for (i = 0; d->args[i]; i++)
2468 if (isBogusAtom(d->args[i]))
2469 return True;
2470 if (d->guard && isBogusAtom(d->guard))
2471 return True;
2472 if (d->mAddr && isBogusAtom(d->mAddr))
2473 return True;
2474 return False;
sewardj95448072004-11-22 20:19:51 +00002475 case Ist_Put:
2476 return isBogusAtom(st->Ist.Put.data);
sewardjd5204dc2004-12-31 01:16:11 +00002477 case Ist_PutI:
2478 return isBogusAtom(st->Ist.PutI.ix)
2479 || isBogusAtom(st->Ist.PutI.data);
sewardj95448072004-11-22 20:19:51 +00002480 case Ist_STle:
2481 return isBogusAtom(st->Ist.STle.addr)
2482 || isBogusAtom(st->Ist.STle.data);
2483 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00002484 return isBogusAtom(st->Ist.Exit.guard);
sewardj21dc3452005-03-21 00:27:41 +00002485 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00002486 case Ist_IMark:
sewardjbd598e12005-01-07 12:10:21 +00002487 case Ist_MFence:
2488 return False;
sewardj95448072004-11-22 20:19:51 +00002489 default:
2490 unhandled:
2491 ppIRStmt(st);
2492 VG_(tool_panic)("hasBogusLiterals");
2493 }
2494}
njn25e49d8e72002-09-23 09:36:25 +00002495
njn25e49d8e72002-09-23 09:36:25 +00002496
sewardjd54babf2005-03-21 00:55:49 +00002497IRBB* TL_(instrument) ( IRBB* bb_in, VexGuestLayout* layout,
2498 IRType gWordTy, IRType hWordTy )
sewardj95448072004-11-22 20:19:51 +00002499{
2500 Bool verboze = False; //True;
njn25e49d8e72002-09-23 09:36:25 +00002501
sewardjd5204dc2004-12-31 01:16:11 +00002502 Int i, j, first_stmt;
sewardj95448072004-11-22 20:19:51 +00002503 IRStmt* st;
sewardjd5204dc2004-12-31 01:16:11 +00002504 MCEnv mce;
sewardjd54babf2005-03-21 00:55:49 +00002505 IRBB* bb;
2506
2507 if (gWordTy != hWordTy) {
2508 /* We don't currently support this case. */
2509 VG_(tool_panic)("host/guest word size mismatch");
2510 }
njn25e49d8e72002-09-23 09:36:25 +00002511
sewardj6cf40ff2005-04-20 22:31:26 +00002512 /* Check we're not completely nuts */
2513 tl_assert(sizeof(UWord) == sizeof(void*));
2514 tl_assert(sizeof(Word) == sizeof(void*));
2515 tl_assert(sizeof(ULong) == 8);
2516 tl_assert(sizeof(Long) == 8);
2517 tl_assert(sizeof(UInt) == 4);
2518 tl_assert(sizeof(Int) == 4);
2519
sewardj95448072004-11-22 20:19:51 +00002520 /* Set up BB */
sewardjd54babf2005-03-21 00:55:49 +00002521 bb = emptyIRBB();
sewardj95448072004-11-22 20:19:51 +00002522 bb->tyenv = dopyIRTypeEnv(bb_in->tyenv);
2523 bb->next = dopyIRExpr(bb_in->next);
2524 bb->jumpkind = bb_in->jumpkind;
njn25e49d8e72002-09-23 09:36:25 +00002525
sewardj95448072004-11-22 20:19:51 +00002526 /* Set up the running environment. Only .bb is modified as we go
2527 along. */
2528 mce.bb = bb;
2529 mce.layout = layout;
2530 mce.n_originalTmps = bb->tyenv->types_used;
2531 mce.hWordTy = hWordTy;
sewardjd5204dc2004-12-31 01:16:11 +00002532 mce.bogusLiterals = False;
sewardj95448072004-11-22 20:19:51 +00002533 mce.tmpMap = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
2534 for (i = 0; i < mce.n_originalTmps; i++)
2535 mce.tmpMap[i] = IRTemp_INVALID;
2536
2537 /* Iterate over the stmts. */
2538
2539 for (i = 0; i < bb_in->stmts_used; i++) {
2540 st = bb_in->stmts[i];
sewardj21dc3452005-03-21 00:27:41 +00002541 tl_assert(st);
sewardj95448072004-11-22 20:19:51 +00002542
2543 tl_assert(isFlatIRStmt(st));
2544
sewardjd5204dc2004-12-31 01:16:11 +00002545 if (!mce.bogusLiterals) {
2546 mce.bogusLiterals = checkForBogusLiterals(st);
2547 if (0&& mce.bogusLiterals) {
sewardj95448072004-11-22 20:19:51 +00002548 VG_(printf)("bogus: ");
2549 ppIRStmt(st);
2550 VG_(printf)("\n");
2551 }
2552 }
sewardjd5204dc2004-12-31 01:16:11 +00002553
sewardj95448072004-11-22 20:19:51 +00002554 first_stmt = bb->stmts_used;
2555
2556 if (verboze) {
2557 ppIRStmt(st);
2558 VG_(printf)("\n\n");
2559 }
2560
sewardj29faa502005-03-16 18:20:21 +00002561 /* Generate instrumentation code for each stmt ... */
2562
sewardj95448072004-11-22 20:19:51 +00002563 switch (st->tag) {
2564
2565 case Ist_Tmp:
2566 assign( bb, findShadowTmp(&mce, st->Ist.Tmp.tmp),
2567 expr2vbits( &mce, st->Ist.Tmp.data) );
njn25e49d8e72002-09-23 09:36:25 +00002568 break;
2569
sewardj95448072004-11-22 20:19:51 +00002570 case Ist_Put:
2571 do_shadow_PUT( &mce,
2572 st->Ist.Put.offset,
2573 st->Ist.Put.data,
2574 NULL /* shadow atom */ );
njn25e49d8e72002-09-23 09:36:25 +00002575 break;
2576
sewardj95448072004-11-22 20:19:51 +00002577 case Ist_PutI:
2578 do_shadow_PUTI( &mce,
2579 st->Ist.PutI.descr,
2580 st->Ist.PutI.ix,
2581 st->Ist.PutI.bias,
2582 st->Ist.PutI.data );
njn25e49d8e72002-09-23 09:36:25 +00002583 break;
2584
sewardj95448072004-11-22 20:19:51 +00002585 case Ist_STle:
2586 do_shadow_STle( &mce, st->Ist.STle.addr, 0/* addr bias */,
2587 st->Ist.STle.data,
2588 NULL /* shadow data */ );
njn25e49d8e72002-09-23 09:36:25 +00002589 break;
2590
sewardj95448072004-11-22 20:19:51 +00002591 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00002592 complainIfUndefined( &mce, st->Ist.Exit.guard );
njn25e49d8e72002-09-23 09:36:25 +00002593 break;
2594
sewardj21dc3452005-03-21 00:27:41 +00002595 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00002596 case Ist_IMark:
sewardjbd598e12005-01-07 12:10:21 +00002597 case Ist_MFence:
2598 break;
2599
sewardj95448072004-11-22 20:19:51 +00002600 case Ist_Dirty:
2601 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
njn25e49d8e72002-09-23 09:36:25 +00002602 break;
2603
2604 default:
sewardj95448072004-11-22 20:19:51 +00002605 VG_(printf)("\n");
2606 ppIRStmt(st);
2607 VG_(printf)("\n");
2608 VG_(tool_panic)("memcheck: unhandled IRStmt");
2609
2610 } /* switch (st->tag) */
2611
2612 if (verboze) {
2613 for (j = first_stmt; j < bb->stmts_used; j++) {
2614 VG_(printf)(" ");
2615 ppIRStmt(bb->stmts[j]);
2616 VG_(printf)("\n");
2617 }
2618 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00002619 }
sewardj95448072004-11-22 20:19:51 +00002620
sewardj29faa502005-03-16 18:20:21 +00002621 /* ... and finally copy the stmt itself to the output. */
sewardj95448072004-11-22 20:19:51 +00002622 addStmtToIRBB(bb, st);
2623
njn25e49d8e72002-09-23 09:36:25 +00002624 }
njn25e49d8e72002-09-23 09:36:25 +00002625
sewardj95448072004-11-22 20:19:51 +00002626 /* Now we need to complain if the jump target is undefined. */
2627 first_stmt = bb->stmts_used;
njn25e49d8e72002-09-23 09:36:25 +00002628
sewardj95448072004-11-22 20:19:51 +00002629 if (verboze) {
2630 VG_(printf)("bb->next = ");
2631 ppIRExpr(bb->next);
2632 VG_(printf)("\n\n");
2633 }
njn25e49d8e72002-09-23 09:36:25 +00002634
sewardj95448072004-11-22 20:19:51 +00002635 complainIfUndefined( &mce, bb->next );
njn25e49d8e72002-09-23 09:36:25 +00002636
sewardj95448072004-11-22 20:19:51 +00002637 if (verboze) {
2638 for (j = first_stmt; j < bb->stmts_used; j++) {
2639 VG_(printf)(" ");
2640 ppIRStmt(bb->stmts[j]);
2641 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00002642 }
sewardj95448072004-11-22 20:19:51 +00002643 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00002644 }
njn25e49d8e72002-09-23 09:36:25 +00002645
sewardj95448072004-11-22 20:19:51 +00002646 return bb;
2647}
njn25e49d8e72002-09-23 09:36:25 +00002648
2649/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00002650/*--- end mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00002651/*--------------------------------------------------------------------*/