blob: fbe6d6d2806955dc7650074e78481cca9fe2c3d4 [file] [log] [blame]
nethercotebb1c9912004-01-04 16:43:23 +00001
njn25e49d8e72002-09-23 09:36:25 +00002/*--------------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003/*--- Instrument IR to perform memory checking operations. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00006
njn25e49d8e72002-09-23 09:36:25 +00007/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of MemCheck, a heavyweight Valgrind tool for
njnc9539842002-10-02 13:26:35 +00009 detecting memory errors.
njn25e49d8e72002-09-23 09:36:25 +000010
njn53612422005-03-12 16:22:54 +000011 Copyright (C) 2000-2005 Julian Seward
njn25e49d8e72002-09-23 09:36:25 +000012 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
njnc7561b92005-06-19 01:24:32 +000032#include "pub_tool_basics.h"
33#include "pub_tool_hashtable.h" // For mac_shared.h
njn132bfcc2005-06-04 19:16:06 +000034#include "pub_tool_libcassert.h"
njn36a20fa2005-06-03 03:08:39 +000035#include "pub_tool_libcprint.h"
njnc7561b92005-06-19 01:24:32 +000036#include "pub_tool_profile.h"
37#include "pub_tool_tooliface.h"
38#include "mc_include.h"
njn25e49d8e72002-09-23 09:36:25 +000039
njn25e49d8e72002-09-23 09:36:25 +000040
sewardj992dff92005-10-07 11:08:55 +000041/* This file implements the Memcheck instrumentation, and in
42 particular contains the core of its undefined value detection
43 machinery. For a comprehensive background of the terminology,
44 algorithms and rationale used herein, read:
45
46 Using Valgrind to detect undefined value errors with
47 bit-precision
48
49 Julian Seward and Nicholas Nethercote
50
51 2005 USENIX Annual Technical Conference (General Track),
52 Anaheim, CA, USA, April 10-15, 2005.
53*/
54
sewardj95448072004-11-22 20:19:51 +000055/*------------------------------------------------------------*/
56/*--- Forward decls ---*/
57/*------------------------------------------------------------*/
58
59struct _MCEnv;
60
61static IRType shadowType ( IRType ty );
62static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
63
64
65/*------------------------------------------------------------*/
66/*--- Memcheck running state, and tmp management. ---*/
67/*------------------------------------------------------------*/
68
69/* Carries around state during memcheck instrumentation. */
70typedef
71 struct _MCEnv {
72 /* MODIFIED: the bb being constructed. IRStmts are added. */
73 IRBB* bb;
74
75 /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
76 original temps to their current their current shadow temp.
77 Initially all entries are IRTemp_INVALID. Entries are added
78 lazily since many original temps are not used due to
79 optimisation prior to instrumentation. Note that floating
80 point original tmps are shadowed by integer tmps of the same
81 size, and Bit-typed original tmps are shadowed by the type
82 Ity_I8. See comment below. */
83 IRTemp* tmpMap;
84 Int n_originalTmps; /* for range checking */
85
sewardjd5204dc2004-12-31 01:16:11 +000086 /* MODIFIED: indicates whether "bogus" literals have so far been
87 found. Starts off False, and may change to True. */
88 Bool bogusLiterals;
89
sewardj95448072004-11-22 20:19:51 +000090 /* READONLY: the guest layout. This indicates which parts of
91 the guest state should be regarded as 'always defined'. */
92 VexGuestLayout* layout;
93 /* READONLY: the host word type. Needed for constructing
94 arguments of type 'HWord' to be passed to helper functions.
95 Ity_I32 or Ity_I64 only. */
96 IRType hWordTy;
97 }
98 MCEnv;
99
100/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
101 demand), as they are encountered. This is for two reasons.
102
103 (1) (less important reason): Many original tmps are unused due to
104 initial IR optimisation, and we do not want to spaces in tables
105 tracking them.
106
107 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
108 table indexed [0 .. n_types-1], which gives the current shadow for
109 each original tmp, or INVALID_IRTEMP if none is so far assigned.
110 It is necessary to support making multiple assignments to a shadow
111 -- specifically, after testing a shadow for definedness, it needs
112 to be made defined. But IR's SSA property disallows this.
113
114 (2) (more important reason): Therefore, when a shadow needs to get
115 a new value, a new temporary is created, the value is assigned to
116 that, and the tmpMap is updated to reflect the new binding.
117
118 A corollary is that if the tmpMap maps a given tmp to
119 INVALID_IRTEMP and we are hoping to read that shadow tmp, it means
120 there's a read-before-write error in the original tmps. The IR
121 sanity checker should catch all such anomalies, however.
njn25e49d8e72002-09-23 09:36:25 +0000122*/
sewardj95448072004-11-22 20:19:51 +0000123
124/* Find the tmp currently shadowing the given original tmp. If none
125 so far exists, allocate one. */
126static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000127{
sewardj95448072004-11-22 20:19:51 +0000128 tl_assert(orig < mce->n_originalTmps);
129 if (mce->tmpMap[orig] == IRTemp_INVALID) {
130 mce->tmpMap[orig]
131 = newIRTemp(mce->bb->tyenv,
132 shadowType(mce->bb->tyenv->types[orig]));
njn25e49d8e72002-09-23 09:36:25 +0000133 }
sewardj95448072004-11-22 20:19:51 +0000134 return mce->tmpMap[orig];
njn25e49d8e72002-09-23 09:36:25 +0000135}
136
sewardj95448072004-11-22 20:19:51 +0000137/* Allocate a new shadow for the given original tmp. This means any
138 previous shadow is abandoned. This is needed because it is
139 necessary to give a new value to a shadow once it has been tested
140 for undefinedness, but unfortunately IR's SSA property disallows
141 this. Instead we must abandon the old shadow, allocate a new one
142 and use that instead. */
143static void newShadowTmp ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000144{
sewardj95448072004-11-22 20:19:51 +0000145 tl_assert(orig < mce->n_originalTmps);
146 mce->tmpMap[orig]
147 = newIRTemp(mce->bb->tyenv,
148 shadowType(mce->bb->tyenv->types[orig]));
149}
150
151
152/*------------------------------------------------------------*/
153/*--- IRAtoms -- a subset of IRExprs ---*/
154/*------------------------------------------------------------*/
155
156/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
sewardj710d6c22005-03-20 18:55:15 +0000157 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
sewardj95448072004-11-22 20:19:51 +0000158 input, most of this code deals in atoms. Usefully, a value atom
159 always has a V-value which is also an atom: constants are shadowed
160 by constants, and temps are shadowed by the corresponding shadow
161 temporary. */
162
163typedef IRExpr IRAtom;
164
165/* (used for sanity checks only): is this an atom which looks
166 like it's from original code? */
167static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
168{
169 if (a1->tag == Iex_Const)
170 return True;
171 if (a1->tag == Iex_Tmp && a1->Iex.Tmp.tmp < mce->n_originalTmps)
172 return True;
173 return False;
174}
175
176/* (used for sanity checks only): is this an atom which looks
177 like it's from shadow code? */
178static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
179{
180 if (a1->tag == Iex_Const)
181 return True;
182 if (a1->tag == Iex_Tmp && a1->Iex.Tmp.tmp >= mce->n_originalTmps)
183 return True;
184 return False;
185}
186
187/* (used for sanity checks only): check that both args are atoms and
188 are identically-kinded. */
189static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
190{
sewardjbef552a2005-08-30 12:54:36 +0000191 if (a1->tag == Iex_Tmp && a2->tag == Iex_Tmp)
sewardj95448072004-11-22 20:19:51 +0000192 return True;
sewardjbef552a2005-08-30 12:54:36 +0000193 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
sewardj95448072004-11-22 20:19:51 +0000194 return True;
195 return False;
196}
197
198
199/*------------------------------------------------------------*/
200/*--- Type management ---*/
201/*------------------------------------------------------------*/
202
203/* Shadow state is always accessed using integer types. This returns
204 an integer type with the same size (as per sizeofIRType) as the
205 given type. The only valid shadow types are Bit, I8, I16, I32,
sewardj3245c912004-12-10 14:58:26 +0000206 I64, V128. */
sewardj95448072004-11-22 20:19:51 +0000207
208static IRType shadowType ( IRType ty )
209{
210 switch (ty) {
211 case Ity_I1:
212 case Ity_I8:
213 case Ity_I16:
214 case Ity_I32:
sewardj6cf40ff2005-04-20 22:31:26 +0000215 case Ity_I64:
216 case Ity_I128: return ty;
sewardj3245c912004-12-10 14:58:26 +0000217 case Ity_F32: return Ity_I32;
218 case Ity_F64: return Ity_I64;
219 case Ity_V128: return Ity_V128;
sewardj95448072004-11-22 20:19:51 +0000220 default: ppIRType(ty);
221 VG_(tool_panic)("memcheck:shadowType");
222 }
223}
224
225/* Produce a 'defined' value of the given shadow type. Should only be
226 supplied shadow types (Bit/I8/I16/I32/UI64). */
227static IRExpr* definedOfType ( IRType ty ) {
228 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +0000229 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
230 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
231 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
232 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
233 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
234 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
sewardj95448072004-11-22 20:19:51 +0000235 default: VG_(tool_panic)("memcheck:definedOfType");
njn25e49d8e72002-09-23 09:36:25 +0000236 }
237}
238
239
sewardj95448072004-11-22 20:19:51 +0000240/*------------------------------------------------------------*/
241/*--- Constructing IR fragments ---*/
242/*------------------------------------------------------------*/
243
244/* assign value to tmp */
245#define assign(_bb,_tmp,_expr) \
246 addStmtToIRBB((_bb), IRStmt_Tmp((_tmp),(_expr)))
247
248/* add stmt to a bb */
249#define stmt(_bb,_stmt) \
250 addStmtToIRBB((_bb), (_stmt))
251
252/* build various kinds of expressions */
253#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
254#define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
255#define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
256#define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
257#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
258#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
sewardj170ee212004-12-10 18:57:51 +0000259#define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
sewardj95448072004-11-22 20:19:51 +0000260#define mkexpr(_tmp) IRExpr_Tmp((_tmp))
261
262/* bind the given expression to a new temporary, and return the
263 temporary. This effectively converts an arbitrary expression into
264 an atom. */
265static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) {
266 IRTemp t = newIRTemp(mce->bb->tyenv, ty);
267 assign(mce->bb, t, e);
268 return mkexpr(t);
269}
270
271
272/*------------------------------------------------------------*/
273/*--- Constructing definedness primitive ops ---*/
274/*------------------------------------------------------------*/
275
276/* --------- Defined-if-either-defined --------- */
277
278static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
279 tl_assert(isShadowAtom(mce,a1));
280 tl_assert(isShadowAtom(mce,a2));
281 return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2));
282}
283
284static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
285 tl_assert(isShadowAtom(mce,a1));
286 tl_assert(isShadowAtom(mce,a2));
287 return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2));
288}
289
290static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
291 tl_assert(isShadowAtom(mce,a1));
292 tl_assert(isShadowAtom(mce,a2));
293 return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2));
294}
295
sewardj7010f6e2004-12-10 13:35:22 +0000296static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
297 tl_assert(isShadowAtom(mce,a1));
298 tl_assert(isShadowAtom(mce,a2));
299 return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2));
300}
301
sewardj20d38f22005-02-07 23:50:18 +0000302static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj170ee212004-12-10 18:57:51 +0000303 tl_assert(isShadowAtom(mce,a1));
304 tl_assert(isShadowAtom(mce,a2));
sewardj20d38f22005-02-07 23:50:18 +0000305 return assignNew(mce, Ity_V128, binop(Iop_AndV128, a1, a2));
sewardj170ee212004-12-10 18:57:51 +0000306}
307
sewardj95448072004-11-22 20:19:51 +0000308/* --------- Undefined-if-either-undefined --------- */
309
310static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
311 tl_assert(isShadowAtom(mce,a1));
312 tl_assert(isShadowAtom(mce,a2));
313 return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2));
314}
315
316static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
317 tl_assert(isShadowAtom(mce,a1));
318 tl_assert(isShadowAtom(mce,a2));
319 return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2));
320}
321
322static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
323 tl_assert(isShadowAtom(mce,a1));
324 tl_assert(isShadowAtom(mce,a2));
325 return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2));
326}
327
328static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
329 tl_assert(isShadowAtom(mce,a1));
330 tl_assert(isShadowAtom(mce,a2));
331 return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2));
332}
333
sewardj20d38f22005-02-07 23:50:18 +0000334static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj3245c912004-12-10 14:58:26 +0000335 tl_assert(isShadowAtom(mce,a1));
336 tl_assert(isShadowAtom(mce,a2));
sewardj20d38f22005-02-07 23:50:18 +0000337 return assignNew(mce, Ity_V128, binop(Iop_OrV128, a1, a2));
sewardj3245c912004-12-10 14:58:26 +0000338}
339
sewardje50a1b12004-12-17 01:24:54 +0000340static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
sewardj95448072004-11-22 20:19:51 +0000341 switch (vty) {
sewardje50a1b12004-12-17 01:24:54 +0000342 case Ity_I8: return mkUifU8(mce, a1, a2);
sewardja1d93302004-12-12 16:45:06 +0000343 case Ity_I16: return mkUifU16(mce, a1, a2);
344 case Ity_I32: return mkUifU32(mce, a1, a2);
345 case Ity_I64: return mkUifU64(mce, a1, a2);
sewardj20d38f22005-02-07 23:50:18 +0000346 case Ity_V128: return mkUifUV128(mce, a1, a2);
sewardj95448072004-11-22 20:19:51 +0000347 default:
348 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
349 VG_(tool_panic)("memcheck:mkUifU");
njn25e49d8e72002-09-23 09:36:25 +0000350 }
351}
352
sewardj95448072004-11-22 20:19:51 +0000353/* --------- The Left-family of operations. --------- */
njn25e49d8e72002-09-23 09:36:25 +0000354
sewardj95448072004-11-22 20:19:51 +0000355static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
356 tl_assert(isShadowAtom(mce,a1));
357 /* It's safe to duplicate a1 since it's only an atom */
358 return assignNew(mce, Ity_I8,
359 binop(Iop_Or8, a1,
360 assignNew(mce, Ity_I8,
sewardj37c31cc2005-04-26 23:49:24 +0000361 unop(Iop_Neg8, a1))));
sewardj95448072004-11-22 20:19:51 +0000362}
363
364static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
365 tl_assert(isShadowAtom(mce,a1));
366 /* It's safe to duplicate a1 since it's only an atom */
367 return assignNew(mce, Ity_I16,
368 binop(Iop_Or16, a1,
369 assignNew(mce, Ity_I16,
sewardj37c31cc2005-04-26 23:49:24 +0000370 unop(Iop_Neg16, a1))));
sewardj95448072004-11-22 20:19:51 +0000371}
372
373static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
374 tl_assert(isShadowAtom(mce,a1));
375 /* It's safe to duplicate a1 since it's only an atom */
376 return assignNew(mce, Ity_I32,
377 binop(Iop_Or32, a1,
378 assignNew(mce, Ity_I32,
sewardj37c31cc2005-04-26 23:49:24 +0000379 unop(Iop_Neg32, a1))));
sewardj95448072004-11-22 20:19:51 +0000380}
381
sewardj681be302005-01-15 20:43:58 +0000382static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
383 tl_assert(isShadowAtom(mce,a1));
384 /* It's safe to duplicate a1 since it's only an atom */
385 return assignNew(mce, Ity_I64,
386 binop(Iop_Or64, a1,
387 assignNew(mce, Ity_I64,
sewardj37c31cc2005-04-26 23:49:24 +0000388 unop(Iop_Neg64, a1))));
sewardj681be302005-01-15 20:43:58 +0000389}
390
sewardj95448072004-11-22 20:19:51 +0000391/* --------- 'Improvement' functions for AND/OR. --------- */
392
393/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
394 defined (0); all other -> undefined (1).
395*/
396static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
njn25e49d8e72002-09-23 09:36:25 +0000397{
sewardj95448072004-11-22 20:19:51 +0000398 tl_assert(isOriginalAtom(mce, data));
399 tl_assert(isShadowAtom(mce, vbits));
400 tl_assert(sameKindedAtoms(data, vbits));
401 return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits));
402}
njn25e49d8e72002-09-23 09:36:25 +0000403
sewardj95448072004-11-22 20:19:51 +0000404static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
405{
406 tl_assert(isOriginalAtom(mce, data));
407 tl_assert(isShadowAtom(mce, vbits));
408 tl_assert(sameKindedAtoms(data, vbits));
409 return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits));
410}
njn25e49d8e72002-09-23 09:36:25 +0000411
sewardj95448072004-11-22 20:19:51 +0000412static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
413{
414 tl_assert(isOriginalAtom(mce, data));
415 tl_assert(isShadowAtom(mce, vbits));
416 tl_assert(sameKindedAtoms(data, vbits));
417 return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits));
418}
njn25e49d8e72002-09-23 09:36:25 +0000419
sewardj7010f6e2004-12-10 13:35:22 +0000420static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
421{
422 tl_assert(isOriginalAtom(mce, data));
423 tl_assert(isShadowAtom(mce, vbits));
424 tl_assert(sameKindedAtoms(data, vbits));
425 return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits));
426}
427
sewardj20d38f22005-02-07 23:50:18 +0000428static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000429{
430 tl_assert(isOriginalAtom(mce, data));
431 tl_assert(isShadowAtom(mce, vbits));
432 tl_assert(sameKindedAtoms(data, vbits));
sewardj20d38f22005-02-07 23:50:18 +0000433 return assignNew(mce, Ity_V128, binop(Iop_OrV128, data, vbits));
sewardj170ee212004-12-10 18:57:51 +0000434}
435
sewardj95448072004-11-22 20:19:51 +0000436/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
437 defined (0); all other -> undefined (1).
438*/
439static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
440{
441 tl_assert(isOriginalAtom(mce, data));
442 tl_assert(isShadowAtom(mce, vbits));
443 tl_assert(sameKindedAtoms(data, vbits));
444 return assignNew(
445 mce, Ity_I8,
446 binop(Iop_Or8,
447 assignNew(mce, Ity_I8, unop(Iop_Not8, data)),
448 vbits) );
449}
njn25e49d8e72002-09-23 09:36:25 +0000450
sewardj95448072004-11-22 20:19:51 +0000451static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
452{
453 tl_assert(isOriginalAtom(mce, data));
454 tl_assert(isShadowAtom(mce, vbits));
455 tl_assert(sameKindedAtoms(data, vbits));
456 return assignNew(
457 mce, Ity_I16,
458 binop(Iop_Or16,
459 assignNew(mce, Ity_I16, unop(Iop_Not16, data)),
460 vbits) );
461}
njn25e49d8e72002-09-23 09:36:25 +0000462
sewardj95448072004-11-22 20:19:51 +0000463static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
464{
465 tl_assert(isOriginalAtom(mce, data));
466 tl_assert(isShadowAtom(mce, vbits));
467 tl_assert(sameKindedAtoms(data, vbits));
468 return assignNew(
469 mce, Ity_I32,
470 binop(Iop_Or32,
471 assignNew(mce, Ity_I32, unop(Iop_Not32, data)),
472 vbits) );
473}
474
sewardj7010f6e2004-12-10 13:35:22 +0000475static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
476{
477 tl_assert(isOriginalAtom(mce, data));
478 tl_assert(isShadowAtom(mce, vbits));
479 tl_assert(sameKindedAtoms(data, vbits));
480 return assignNew(
481 mce, Ity_I64,
482 binop(Iop_Or64,
483 assignNew(mce, Ity_I64, unop(Iop_Not64, data)),
484 vbits) );
485}
486
sewardj20d38f22005-02-07 23:50:18 +0000487static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000488{
489 tl_assert(isOriginalAtom(mce, data));
490 tl_assert(isShadowAtom(mce, vbits));
491 tl_assert(sameKindedAtoms(data, vbits));
492 return assignNew(
493 mce, Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +0000494 binop(Iop_OrV128,
495 assignNew(mce, Ity_V128, unop(Iop_NotV128, data)),
sewardj170ee212004-12-10 18:57:51 +0000496 vbits) );
497}
498
sewardj95448072004-11-22 20:19:51 +0000499/* --------- Pessimising casts. --------- */
500
501static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
502{
sewardj7cf97ee2004-11-28 14:25:01 +0000503 IRType ty;
504 IRAtom* tmp1;
sewardj95448072004-11-22 20:19:51 +0000505 /* Note, dst_ty is a shadow type, not an original type. */
506 /* First of all, collapse vbits down to a single bit. */
507 tl_assert(isShadowAtom(mce,vbits));
sewardj7cf97ee2004-11-28 14:25:01 +0000508 ty = typeOfIRExpr(mce->bb->tyenv, vbits);
509 tmp1 = NULL;
sewardj95448072004-11-22 20:19:51 +0000510 switch (ty) {
511 case Ity_I1:
512 tmp1 = vbits;
njn25e49d8e72002-09-23 09:36:25 +0000513 break;
sewardj95448072004-11-22 20:19:51 +0000514 case Ity_I8:
sewardj37c31cc2005-04-26 23:49:24 +0000515 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
sewardj95448072004-11-22 20:19:51 +0000516 break;
517 case Ity_I16:
sewardj37c31cc2005-04-26 23:49:24 +0000518 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
sewardj95448072004-11-22 20:19:51 +0000519 break;
520 case Ity_I32:
sewardj37c31cc2005-04-26 23:49:24 +0000521 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
sewardj95448072004-11-22 20:19:51 +0000522 break;
523 case Ity_I64:
sewardj37c31cc2005-04-26 23:49:24 +0000524 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
sewardj95448072004-11-22 20:19:51 +0000525 break;
sewardj69a13322005-04-23 01:14:51 +0000526 case Ity_I128: {
527 /* Gah. Chop it in half, OR the halves together, and compare
528 that with zero. */
529 IRAtom* tmp2 = assignNew(mce, Ity_I64, unop(Iop_128HIto64, vbits));
530 IRAtom* tmp3 = assignNew(mce, Ity_I64, unop(Iop_128to64, vbits));
531 IRAtom* tmp4 = assignNew(mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
532 tmp1 = assignNew(mce, Ity_I1,
sewardj37c31cc2005-04-26 23:49:24 +0000533 unop(Iop_CmpNEZ64, tmp4));
sewardj69a13322005-04-23 01:14:51 +0000534 break;
535 }
sewardj95448072004-11-22 20:19:51 +0000536 default:
sewardj69a13322005-04-23 01:14:51 +0000537 ppIRType(ty);
sewardj95448072004-11-22 20:19:51 +0000538 VG_(tool_panic)("mkPCastTo(1)");
539 }
540 tl_assert(tmp1);
541 /* Now widen up to the dst type. */
542 switch (dst_ty) {
543 case Ity_I1:
544 return tmp1;
545 case Ity_I8:
546 return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1));
547 case Ity_I16:
548 return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1));
549 case Ity_I32:
550 return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1));
551 case Ity_I64:
552 return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000553 case Ity_V128:
554 tmp1 = assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardj20d38f22005-02-07 23:50:18 +0000555 tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000556 return tmp1;
sewardj69a13322005-04-23 01:14:51 +0000557 case Ity_I128:
558 tmp1 = assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
559 tmp1 = assignNew(mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
560 return tmp1;
sewardj95448072004-11-22 20:19:51 +0000561 default:
562 ppIRType(dst_ty);
563 VG_(tool_panic)("mkPCastTo(2)");
564 }
565}
566
sewardjd5204dc2004-12-31 01:16:11 +0000567/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
568/*
569 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
570 PCasting to Ity_U1. However, sometimes it is necessary to be more
571 accurate. The insight is that the result is defined if two
572 corresponding bits can be found, one from each argument, so that
573 both bits are defined but are different -- that makes EQ say "No"
574 and NE say "Yes". Hence, we compute an improvement term and DifD
575 it onto the "normal" (UifU) result.
576
577 The result is:
578
579 PCastTo<1> (
sewardje6f8af42005-07-06 18:48:59 +0000580 -- naive version
581 PCastTo<sz>( UifU<sz>(vxx, vyy) )
582
sewardjd5204dc2004-12-31 01:16:11 +0000583 `DifD<sz>`
sewardje6f8af42005-07-06 18:48:59 +0000584
585 -- improvement term
586 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
sewardjd5204dc2004-12-31 01:16:11 +0000587 )
sewardje6f8af42005-07-06 18:48:59 +0000588
sewardjd5204dc2004-12-31 01:16:11 +0000589 where
590 vec contains 0 (defined) bits where the corresponding arg bits
sewardje6f8af42005-07-06 18:48:59 +0000591 are defined but different, and 1 bits otherwise.
sewardjd5204dc2004-12-31 01:16:11 +0000592
sewardje6f8af42005-07-06 18:48:59 +0000593 vec = Or<sz>( vxx, // 0 iff bit defined
594 vyy, // 0 iff bit defined
595 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
596 )
597
598 If any bit of vec is 0, the result is defined and so the
599 improvement term should produce 0...0, else it should produce
600 1...1.
601
602 Hence require for the improvement term:
603
604 if vec == 1...1 then 1...1 else 0...0
605 ->
606 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
607
608 This was extensively re-analysed and checked on 6 July 05.
sewardjd5204dc2004-12-31 01:16:11 +0000609*/
610static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
611 IRType ty,
612 IRAtom* vxx, IRAtom* vyy,
613 IRAtom* xx, IRAtom* yy )
614{
sewardje6f8af42005-07-06 18:48:59 +0000615 IRAtom *naive, *vec, *improvement_term;
616 IRAtom *improved, *final_cast, *top;
617 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
sewardjd5204dc2004-12-31 01:16:11 +0000618
619 tl_assert(isShadowAtom(mce,vxx));
620 tl_assert(isShadowAtom(mce,vyy));
621 tl_assert(isOriginalAtom(mce,xx));
622 tl_assert(isOriginalAtom(mce,yy));
623 tl_assert(sameKindedAtoms(vxx,xx));
624 tl_assert(sameKindedAtoms(vyy,yy));
625
626 switch (ty) {
627 case Ity_I32:
sewardje6f8af42005-07-06 18:48:59 +0000628 opOR = Iop_Or32;
sewardjd5204dc2004-12-31 01:16:11 +0000629 opDIFD = Iop_And32;
630 opUIFU = Iop_Or32;
631 opNOT = Iop_Not32;
632 opXOR = Iop_Xor32;
633 opCMP = Iop_CmpEQ32;
634 top = mkU32(0xFFFFFFFF);
635 break;
tomcd986332005-04-26 07:44:48 +0000636 case Ity_I64:
sewardje6f8af42005-07-06 18:48:59 +0000637 opOR = Iop_Or64;
tomcd986332005-04-26 07:44:48 +0000638 opDIFD = Iop_And64;
639 opUIFU = Iop_Or64;
640 opNOT = Iop_Not64;
641 opXOR = Iop_Xor64;
642 opCMP = Iop_CmpEQ64;
sewardj37c31cc2005-04-26 23:49:24 +0000643 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
tomcd986332005-04-26 07:44:48 +0000644 break;
sewardjd5204dc2004-12-31 01:16:11 +0000645 default:
646 VG_(tool_panic)("expensiveCmpEQorNE");
647 }
648
649 naive
650 = mkPCastTo(mce,ty, assignNew(mce, ty, binop(opUIFU, vxx, vyy)));
651
652 vec
653 = assignNew(
654 mce,ty,
sewardje6f8af42005-07-06 18:48:59 +0000655 binop( opOR,
656 assignNew(mce,ty, binop(opOR, vxx, vyy)),
sewardjd5204dc2004-12-31 01:16:11 +0000657 assignNew(
658 mce,ty,
659 unop( opNOT,
660 assignNew(mce,ty, binop(opXOR, xx, yy))))));
661
sewardje6f8af42005-07-06 18:48:59 +0000662 improvement_term
sewardjd5204dc2004-12-31 01:16:11 +0000663 = mkPCastTo( mce,ty, assignNew(mce,Ity_I1, binop(opCMP, vec, top)));
664
665 improved
sewardje6f8af42005-07-06 18:48:59 +0000666 = assignNew( mce,ty, binop(opDIFD, naive, improvement_term) );
sewardjd5204dc2004-12-31 01:16:11 +0000667
668 final_cast
669 = mkPCastTo( mce, Ity_I1, improved );
670
671 return final_cast;
672}
673
sewardj95448072004-11-22 20:19:51 +0000674
sewardj992dff92005-10-07 11:08:55 +0000675/* --------- Semi-accurate interpretation of CmpORD. --------- */
676
677/* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
678
679 CmpORD32S(x,y) = 1<<3 if x <s y
680 = 1<<2 if x >s y
681 = 1<<1 if x == y
682
683 and similarly the unsigned variant. The default interpretation is:
684
685 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
686 & (7<<1)
687
688 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
689 are zero and therefore defined (viz, zero).
sewardja9e62a92005-10-07 12:13:21 +0000690
691 Also deal with a special case better:
692
693 CmpORD32S(x,0)
694
695 Here, bit 3 (LT) of the result is a copy of the top bit of x and
696 will be defined even if the rest of x isn't. In which case we do:
697
698 CmpORD32S#(x,x#,0,{impliedly 0}#)
699 = PCast(x#) & (3<<1) -- standard interp for GT,EQ
700 | (x# >> 31) << 3 -- LT = x#[31]
701
sewardj992dff92005-10-07 11:08:55 +0000702*/
sewardja9e62a92005-10-07 12:13:21 +0000703static Bool isZeroU32 ( IRAtom* e )
704{
705 return
706 toBool( e->tag == Iex_Const
707 && e->Iex.Const.con->tag == Ico_U32
708 && e->Iex.Const.con->Ico.U32 == 0 );
709}
710
sewardj992dff92005-10-07 11:08:55 +0000711static IRAtom* doCmpORD32 ( MCEnv* mce,
712 IROp cmp_op,
713 IRAtom* xxhash, IRAtom* yyhash,
714 IRAtom* xx, IRAtom* yy )
715{
716 tl_assert(isShadowAtom(mce,xxhash));
717 tl_assert(isShadowAtom(mce,yyhash));
718 tl_assert(isOriginalAtom(mce,xx));
719 tl_assert(isOriginalAtom(mce,yy));
720 tl_assert(sameKindedAtoms(xxhash,xx));
721 tl_assert(sameKindedAtoms(yyhash,yy));
722 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U);
723
sewardja9e62a92005-10-07 12:13:21 +0000724 if (0) {
725 ppIROp(cmp_op); VG_(printf)(" ");
726 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
727 }
728
729 if (isZeroU32(yy)) {
730 /* fancy interpretation */
731 /* if yy is zero, then it must be fully defined (zero#). */
732 tl_assert(isZeroU32(yyhash));
733 return
734 binop(
735 Iop_Or32,
736 assignNew(
737 mce,Ity_I32,
738 binop(
739 Iop_And32,
740 mkPCastTo(mce,Ity_I32, xxhash),
741 mkU32(3<<1)
742 )),
743 assignNew(
744 mce,Ity_I32,
745 binop(
746 Iop_Shl32,
747 assignNew(
748 mce,Ity_I32,
749 binop(Iop_Shr32, xxhash, mkU8(31))),
750 mkU8(3)
751 ))
752 );
753 } else {
754 /* standard interpretation */
755 return
756 binop(
757 Iop_And32,
sewardj992dff92005-10-07 11:08:55 +0000758 mkPCastTo( mce,Ity_I32,
sewardja9e62a92005-10-07 12:13:21 +0000759 mkUifU32(mce, xxhash,yyhash)),
760 mkU32(7<<1)
761 );
762 }
sewardj992dff92005-10-07 11:08:55 +0000763}
764
765
sewardj95448072004-11-22 20:19:51 +0000766/*------------------------------------------------------------*/
767/*--- Emit a test and complaint if something is undefined. ---*/
768/*------------------------------------------------------------*/
769
770/* Set the annotations on a dirty helper to indicate that the stack
771 pointer and instruction pointers might be read. This is the
772 behaviour of all 'emit-a-complaint' style functions we might
773 call. */
774
775static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
776 di->nFxState = 2;
777 di->fxState[0].fx = Ifx_Read;
778 di->fxState[0].offset = mce->layout->offset_SP;
779 di->fxState[0].size = mce->layout->sizeof_SP;
780 di->fxState[1].fx = Ifx_Read;
781 di->fxState[1].offset = mce->layout->offset_IP;
782 di->fxState[1].size = mce->layout->sizeof_IP;
783}
784
785
786/* Check the supplied **original** atom for undefinedness, and emit a
787 complaint if so. Once that happens, mark it as defined. This is
788 possible because the atom is either a tmp or literal. If it's a
789 tmp, it will be shadowed by a tmp, and so we can set the shadow to
790 be defined. In fact as mentioned above, we will have to allocate a
791 new tmp to carry the new 'defined' shadow value, and update the
792 original->tmp mapping accordingly; we cannot simply assign a new
793 value to an existing shadow tmp as this breaks SSAness -- resulting
794 in the post-instrumentation sanity checker spluttering in disapproval.
795*/
796static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
797{
sewardj7cf97ee2004-11-28 14:25:01 +0000798 IRAtom* vatom;
799 IRType ty;
800 Int sz;
801 IRDirty* di;
802 IRAtom* cond;
803
sewardj95448072004-11-22 20:19:51 +0000804 /* Since the original expression is atomic, there's no duplicated
805 work generated by making multiple V-expressions for it. So we
806 don't really care about the possibility that someone else may
807 also create a V-interpretion for it. */
808 tl_assert(isOriginalAtom(mce, atom));
sewardj7cf97ee2004-11-28 14:25:01 +0000809 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +0000810 tl_assert(isShadowAtom(mce, vatom));
811 tl_assert(sameKindedAtoms(atom, vatom));
812
sewardj7cf97ee2004-11-28 14:25:01 +0000813 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +0000814
815 /* sz is only used for constructing the error message */
sewardj7cf97ee2004-11-28 14:25:01 +0000816 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +0000817
sewardj7cf97ee2004-11-28 14:25:01 +0000818 cond = mkPCastTo( mce, Ity_I1, vatom );
sewardj95448072004-11-22 20:19:51 +0000819 /* cond will be 0 if all defined, and 1 if any not defined. */
820
sewardj95448072004-11-22 20:19:51 +0000821 switch (sz) {
822 case 0:
823 di = unsafeIRDirty_0_N( 0/*regparms*/,
824 "MC_(helperc_value_check0_fail)",
825 &MC_(helperc_value_check0_fail),
826 mkIRExprVec_0()
827 );
828 break;
829 case 1:
830 di = unsafeIRDirty_0_N( 0/*regparms*/,
831 "MC_(helperc_value_check1_fail)",
832 &MC_(helperc_value_check1_fail),
833 mkIRExprVec_0()
834 );
835 break;
836 case 4:
837 di = unsafeIRDirty_0_N( 0/*regparms*/,
838 "MC_(helperc_value_check4_fail)",
839 &MC_(helperc_value_check4_fail),
840 mkIRExprVec_0()
841 );
842 break;
sewardj11bcc4e2005-04-23 22:38:38 +0000843 case 8:
844 di = unsafeIRDirty_0_N( 0/*regparms*/,
845 "MC_(helperc_value_check8_fail)",
846 &MC_(helperc_value_check8_fail),
847 mkIRExprVec_0()
848 );
849 break;
sewardj95448072004-11-22 20:19:51 +0000850 default:
851 di = unsafeIRDirty_0_N( 1/*regparms*/,
852 "MC_(helperc_complain_undef)",
853 &MC_(helperc_complain_undef),
854 mkIRExprVec_1( mkIRExpr_HWord( sz ))
855 );
856 break;
857 }
858 di->guard = cond;
859 setHelperAnns( mce, di );
860 stmt( mce->bb, IRStmt_Dirty(di));
861
862 /* Set the shadow tmp to be defined. First, update the
863 orig->shadow tmp mapping to reflect the fact that this shadow is
864 getting a new value. */
sewardj710d6c22005-03-20 18:55:15 +0000865 tl_assert(isIRAtom(vatom));
sewardj95448072004-11-22 20:19:51 +0000866 /* sameKindedAtoms ... */
867 if (vatom->tag == Iex_Tmp) {
868 tl_assert(atom->tag == Iex_Tmp);
869 newShadowTmp(mce, atom->Iex.Tmp.tmp);
870 assign(mce->bb, findShadowTmp(mce, atom->Iex.Tmp.tmp),
871 definedOfType(ty));
872 }
873}
874
875
876/*------------------------------------------------------------*/
877/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
878/*------------------------------------------------------------*/
879
880/* Examine the always-defined sections declared in layout to see if
881 the (offset,size) section is within one. Note, is is an error to
882 partially fall into such a region: (offset,size) should either be
883 completely in such a region or completely not-in such a region.
884*/
885static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
886{
887 Int minoffD, maxoffD, i;
888 Int minoff = offset;
889 Int maxoff = minoff + size - 1;
890 tl_assert((minoff & ~0xFFFF) == 0);
891 tl_assert((maxoff & ~0xFFFF) == 0);
892
893 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
894 minoffD = mce->layout->alwaysDefd[i].offset;
895 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
896 tl_assert((minoffD & ~0xFFFF) == 0);
897 tl_assert((maxoffD & ~0xFFFF) == 0);
898
899 if (maxoff < minoffD || maxoffD < minoff)
900 continue; /* no overlap */
901 if (minoff >= minoffD && maxoff <= maxoffD)
902 return True; /* completely contained in an always-defd section */
903
904 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
905 }
906 return False; /* could not find any containing section */
907}
908
909
910/* Generate into bb suitable actions to shadow this Put. If the state
911 slice is marked 'always defined', do nothing. Otherwise, write the
912 supplied V bits to the shadow state. We can pass in either an
913 original atom or a V-atom, but not both. In the former case the
914 relevant V-bits are then generated from the original.
915*/
916static
917void do_shadow_PUT ( MCEnv* mce, Int offset,
918 IRAtom* atom, IRAtom* vatom )
919{
sewardj7cf97ee2004-11-28 14:25:01 +0000920 IRType ty;
sewardj95448072004-11-22 20:19:51 +0000921 if (atom) {
922 tl_assert(!vatom);
923 tl_assert(isOriginalAtom(mce, atom));
924 vatom = expr2vbits( mce, atom );
925 } else {
926 tl_assert(vatom);
927 tl_assert(isShadowAtom(mce, vatom));
928 }
929
sewardj7cf97ee2004-11-28 14:25:01 +0000930 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +0000931 tl_assert(ty != Ity_I1);
932 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
933 /* later: no ... */
934 /* emit code to emit a complaint if any of the vbits are 1. */
935 /* complainIfUndefined(mce, atom); */
936 } else {
937 /* Do a plain shadow Put. */
938 stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
939 }
940}
941
942
943/* Return an expression which contains the V bits corresponding to the
944 given GETI (passed in in pieces).
945*/
946static
947void do_shadow_PUTI ( MCEnv* mce,
948 IRArray* descr, IRAtom* ix, Int bias, IRAtom* atom )
949{
sewardj7cf97ee2004-11-28 14:25:01 +0000950 IRAtom* vatom;
951 IRType ty, tyS;
952 Int arrSize;;
953
sewardj95448072004-11-22 20:19:51 +0000954 tl_assert(isOriginalAtom(mce,atom));
sewardj7cf97ee2004-11-28 14:25:01 +0000955 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +0000956 tl_assert(sameKindedAtoms(atom, vatom));
sewardj7cf97ee2004-11-28 14:25:01 +0000957 ty = descr->elemTy;
958 tyS = shadowType(ty);
959 arrSize = descr->nElems * sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +0000960 tl_assert(ty != Ity_I1);
961 tl_assert(isOriginalAtom(mce,ix));
962 complainIfUndefined(mce,ix);
963 if (isAlwaysDefd(mce, descr->base, arrSize)) {
964 /* later: no ... */
965 /* emit code to emit a complaint if any of the vbits are 1. */
966 /* complainIfUndefined(mce, atom); */
967 } else {
968 /* Do a cloned version of the Put that refers to the shadow
969 area. */
970 IRArray* new_descr
971 = mkIRArray( descr->base + mce->layout->total_sizeB,
972 tyS, descr->nElems);
973 stmt( mce->bb, IRStmt_PutI( new_descr, ix, bias, vatom ));
974 }
975}
976
977
978/* Return an expression which contains the V bits corresponding to the
979 given GET (passed in in pieces).
980*/
981static
982IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
983{
984 IRType tyS = shadowType(ty);
985 tl_assert(ty != Ity_I1);
986 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
987 /* Always defined, return all zeroes of the relevant type */
988 return definedOfType(tyS);
989 } else {
990 /* return a cloned version of the Get that refers to the shadow
991 area. */
992 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
993 }
994}
995
996
997/* Return an expression which contains the V bits corresponding to the
998 given GETI (passed in in pieces).
999*/
1000static
1001IRExpr* shadow_GETI ( MCEnv* mce, IRArray* descr, IRAtom* ix, Int bias )
1002{
1003 IRType ty = descr->elemTy;
1004 IRType tyS = shadowType(ty);
1005 Int arrSize = descr->nElems * sizeofIRType(ty);
1006 tl_assert(ty != Ity_I1);
1007 tl_assert(isOriginalAtom(mce,ix));
1008 complainIfUndefined(mce,ix);
1009 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1010 /* Always defined, return all zeroes of the relevant type */
1011 return definedOfType(tyS);
1012 } else {
1013 /* return a cloned version of the Get that refers to the shadow
1014 area. */
1015 IRArray* new_descr
1016 = mkIRArray( descr->base + mce->layout->total_sizeB,
1017 tyS, descr->nElems);
1018 return IRExpr_GetI( new_descr, ix, bias );
1019 }
1020}
1021
1022
1023/*------------------------------------------------------------*/
1024/*--- Generating approximations for unknown operations, ---*/
1025/*--- using lazy-propagate semantics ---*/
1026/*------------------------------------------------------------*/
1027
1028/* Lazy propagation of undefinedness from two values, resulting in the
1029 specified shadow type.
1030*/
1031static
1032IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1033{
sewardj95448072004-11-22 20:19:51 +00001034 IRAtom* at;
sewardj37c31cc2005-04-26 23:49:24 +00001035 IRType t1 = typeOfIRExpr(mce->bb->tyenv, va1);
1036 IRType t2 = typeOfIRExpr(mce->bb->tyenv, va2);
sewardj95448072004-11-22 20:19:51 +00001037 tl_assert(isShadowAtom(mce,va1));
1038 tl_assert(isShadowAtom(mce,va2));
sewardj37c31cc2005-04-26 23:49:24 +00001039
1040 /* The general case is inefficient because PCast is an expensive
1041 operation. Here are some special cases which use PCast only
1042 once rather than twice. */
1043
1044 /* I64 x I64 -> I64 */
1045 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1046 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1047 at = mkUifU(mce, Ity_I64, va1, va2);
1048 at = mkPCastTo(mce, Ity_I64, at);
1049 return at;
1050 }
1051
1052 /* I64 x I64 -> I32 */
1053 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1054 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1055 at = mkUifU(mce, Ity_I64, va1, va2);
1056 at = mkPCastTo(mce, Ity_I32, at);
1057 return at;
1058 }
1059
1060 if (0) {
1061 VG_(printf)("mkLazy2 ");
1062 ppIRType(t1);
1063 VG_(printf)("_");
1064 ppIRType(t2);
1065 VG_(printf)("_");
1066 ppIRType(finalVty);
1067 VG_(printf)("\n");
1068 }
1069
1070 /* General case: force everything via 32-bit intermediaries. */
sewardj95448072004-11-22 20:19:51 +00001071 at = mkPCastTo(mce, Ity_I32, va1);
1072 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1073 at = mkPCastTo(mce, finalVty, at);
1074 return at;
1075}
1076
1077
1078/* Do the lazy propagation game from a null-terminated vector of
1079 atoms. This is presumably the arguments to a helper call, so the
1080 IRCallee info is also supplied in order that we can know which
1081 arguments should be ignored (via the .mcx_mask field).
1082*/
1083static
1084IRAtom* mkLazyN ( MCEnv* mce,
1085 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1086{
1087 Int i;
1088 IRAtom* here;
1089 IRAtom* curr = definedOfType(Ity_I32);
1090 for (i = 0; exprvec[i]; i++) {
1091 tl_assert(i < 32);
1092 tl_assert(isOriginalAtom(mce, exprvec[i]));
1093 /* Only take notice of this arg if the callee's mc-exclusion
1094 mask does not say it is to be excluded. */
1095 if (cee->mcx_mask & (1<<i)) {
1096 /* the arg is to be excluded from definedness checking. Do
1097 nothing. */
1098 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1099 } else {
1100 /* calculate the arg's definedness, and pessimistically merge
1101 it in. */
1102 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) );
1103 curr = mkUifU32(mce, here, curr);
1104 }
1105 }
1106 return mkPCastTo(mce, finalVtype, curr );
1107}
1108
1109
1110/*------------------------------------------------------------*/
1111/*--- Generating expensive sequences for exact carry-chain ---*/
1112/*--- propagation in add/sub and related operations. ---*/
1113/*------------------------------------------------------------*/
1114
1115static
sewardjd5204dc2004-12-31 01:16:11 +00001116IRAtom* expensiveAddSub ( MCEnv* mce,
1117 Bool add,
1118 IRType ty,
1119 IRAtom* qaa, IRAtom* qbb,
1120 IRAtom* aa, IRAtom* bb )
sewardj95448072004-11-22 20:19:51 +00001121{
sewardj7cf97ee2004-11-28 14:25:01 +00001122 IRAtom *a_min, *b_min, *a_max, *b_max;
sewardjd5204dc2004-12-31 01:16:11 +00001123 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
sewardj7cf97ee2004-11-28 14:25:01 +00001124
sewardj95448072004-11-22 20:19:51 +00001125 tl_assert(isShadowAtom(mce,qaa));
1126 tl_assert(isShadowAtom(mce,qbb));
1127 tl_assert(isOriginalAtom(mce,aa));
1128 tl_assert(isOriginalAtom(mce,bb));
1129 tl_assert(sameKindedAtoms(qaa,aa));
1130 tl_assert(sameKindedAtoms(qbb,bb));
1131
sewardjd5204dc2004-12-31 01:16:11 +00001132 switch (ty) {
1133 case Ity_I32:
1134 opAND = Iop_And32;
1135 opOR = Iop_Or32;
1136 opXOR = Iop_Xor32;
1137 opNOT = Iop_Not32;
1138 opADD = Iop_Add32;
1139 opSUB = Iop_Sub32;
1140 break;
tomd9774d72005-06-27 08:11:01 +00001141 case Ity_I64:
1142 opAND = Iop_And64;
1143 opOR = Iop_Or64;
1144 opXOR = Iop_Xor64;
1145 opNOT = Iop_Not64;
1146 opADD = Iop_Add64;
1147 opSUB = Iop_Sub64;
1148 break;
sewardjd5204dc2004-12-31 01:16:11 +00001149 default:
1150 VG_(tool_panic)("expensiveAddSub");
1151 }
sewardj95448072004-11-22 20:19:51 +00001152
1153 // a_min = aa & ~qaa
1154 a_min = assignNew(mce,ty,
1155 binop(opAND, aa,
1156 assignNew(mce,ty, unop(opNOT, qaa))));
1157
1158 // b_min = bb & ~qbb
1159 b_min = assignNew(mce,ty,
1160 binop(opAND, bb,
1161 assignNew(mce,ty, unop(opNOT, qbb))));
1162
1163 // a_max = aa | qaa
1164 a_max = assignNew(mce,ty, binop(opOR, aa, qaa));
1165
1166 // b_max = bb | qbb
1167 b_max = assignNew(mce,ty, binop(opOR, bb, qbb));
1168
sewardjd5204dc2004-12-31 01:16:11 +00001169 if (add) {
1170 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1171 return
1172 assignNew(mce,ty,
1173 binop( opOR,
1174 assignNew(mce,ty, binop(opOR, qaa, qbb)),
1175 assignNew(mce,ty,
1176 binop( opXOR,
1177 assignNew(mce,ty, binop(opADD, a_min, b_min)),
1178 assignNew(mce,ty, binop(opADD, a_max, b_max))
1179 )
sewardj95448072004-11-22 20:19:51 +00001180 )
sewardjd5204dc2004-12-31 01:16:11 +00001181 )
1182 );
1183 } else {
1184 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1185 return
1186 assignNew(mce,ty,
1187 binop( opOR,
1188 assignNew(mce,ty, binop(opOR, qaa, qbb)),
1189 assignNew(mce,ty,
1190 binop( opXOR,
1191 assignNew(mce,ty, binop(opSUB, a_min, b_max)),
1192 assignNew(mce,ty, binop(opSUB, a_max, b_min))
1193 )
1194 )
1195 )
1196 );
1197 }
1198
sewardj95448072004-11-22 20:19:51 +00001199}
1200
1201
1202/*------------------------------------------------------------*/
sewardjaaddbc22005-10-07 09:49:53 +00001203/*--- Scalar shifts. ---*/
1204/*------------------------------------------------------------*/
1205
1206/* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
1207 idea is to shift the definedness bits by the original shift amount.
1208 This introduces 0s ("defined") in new positions for left shifts and
1209 unsigned right shifts, and copies the top definedness bit for
1210 signed right shifts. So, conveniently, applying the original shift
1211 operator to the definedness bits for the left arg is exactly the
1212 right thing to do:
1213
1214 (qaa << bb)
1215
1216 However if the shift amount is undefined then the whole result
1217 is undefined. Hence need:
1218
1219 (qaa << bb) `UifU` PCast(qbb)
1220
1221 If the shift amount bb is a literal than qbb will say 'all defined'
1222 and the UifU and PCast will get folded out by post-instrumentation
1223 optimisation.
1224*/
1225static IRAtom* scalarShift ( MCEnv* mce,
1226 IRType ty,
1227 IROp original_op,
1228 IRAtom* qaa, IRAtom* qbb,
1229 IRAtom* aa, IRAtom* bb )
1230{
1231 tl_assert(isShadowAtom(mce,qaa));
1232 tl_assert(isShadowAtom(mce,qbb));
1233 tl_assert(isOriginalAtom(mce,aa));
1234 tl_assert(isOriginalAtom(mce,bb));
1235 tl_assert(sameKindedAtoms(qaa,aa));
1236 tl_assert(sameKindedAtoms(qbb,bb));
1237 return
1238 assignNew(
1239 mce, ty,
1240 mkUifU( mce, ty,
1241 assignNew(mce, ty, binop(original_op, qaa, bb)),
1242 mkPCastTo(mce, ty, qbb)
1243 )
1244 );
1245}
1246
1247
1248/*------------------------------------------------------------*/
1249/*--- Helpers for dealing with vector primops. ---*/
sewardj3245c912004-12-10 14:58:26 +00001250/*------------------------------------------------------------*/
1251
sewardja1d93302004-12-12 16:45:06 +00001252/* Vector pessimisation -- pessimise within each lane individually. */
1253
1254static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1255{
1256 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1257}
1258
1259static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1260{
1261 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1262}
1263
1264static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1265{
1266 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1267}
1268
1269static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1270{
1271 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1272}
1273
sewardjacd2e912005-01-13 19:17:06 +00001274static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1275{
1276 return assignNew(mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
1277}
1278
1279static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1280{
1281 return assignNew(mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
1282}
1283
1284static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1285{
1286 return assignNew(mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
1287}
1288
sewardja1d93302004-12-12 16:45:06 +00001289
sewardj3245c912004-12-10 14:58:26 +00001290/* Here's a simple scheme capable of handling ops derived from SSE1
1291 code and while only generating ops that can be efficiently
1292 implemented in SSE1. */
1293
1294/* All-lanes versions are straightforward:
1295
sewardj20d38f22005-02-07 23:50:18 +00001296 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
sewardj3245c912004-12-10 14:58:26 +00001297
1298 unary32Fx4(x,y) ==> PCast32x4(x#)
1299
1300 Lowest-lane-only versions are more complex:
1301
sewardj20d38f22005-02-07 23:50:18 +00001302 binary32F0x4(x,y) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001303 x#,
sewardj20d38f22005-02-07 23:50:18 +00001304 PCast32(V128to32(UifUV128(x#,y#)))
sewardj3245c912004-12-10 14:58:26 +00001305 )
1306
1307 This is perhaps not so obvious. In particular, it's faster to
sewardj20d38f22005-02-07 23:50:18 +00001308 do a V128-bit UifU and then take the bottom 32 bits than the more
sewardj3245c912004-12-10 14:58:26 +00001309 obvious scheme of taking the bottom 32 bits of each operand
1310 and doing a 32-bit UifU. Basically since UifU is fast and
1311 chopping lanes off vector values is slow.
1312
1313 Finally:
1314
sewardj20d38f22005-02-07 23:50:18 +00001315 unary32F0x4(x) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001316 x#,
sewardj20d38f22005-02-07 23:50:18 +00001317 PCast32(V128to32(x#))
sewardj3245c912004-12-10 14:58:26 +00001318 )
1319
1320 Where:
1321
1322 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
1323 PCast32x4(v#) = CmpNEZ32x4(v#)
1324*/
1325
1326static
1327IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1328{
1329 IRAtom* at;
1330 tl_assert(isShadowAtom(mce, vatomX));
1331 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001332 at = mkUifUV128(mce, vatomX, vatomY);
sewardja1d93302004-12-12 16:45:06 +00001333 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at));
sewardj3245c912004-12-10 14:58:26 +00001334 return at;
1335}
1336
1337static
1338IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1339{
1340 IRAtom* at;
1341 tl_assert(isShadowAtom(mce, vatomX));
sewardja1d93302004-12-12 16:45:06 +00001342 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001343 return at;
1344}
1345
1346static
1347IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1348{
1349 IRAtom* at;
1350 tl_assert(isShadowAtom(mce, vatomX));
1351 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001352 at = mkUifUV128(mce, vatomX, vatomY);
1353 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, at));
sewardj3245c912004-12-10 14:58:26 +00001354 at = mkPCastTo(mce, Ity_I32, at);
sewardj20d38f22005-02-07 23:50:18 +00001355 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001356 return at;
1357}
1358
1359static
1360IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1361{
1362 IRAtom* at;
1363 tl_assert(isShadowAtom(mce, vatomX));
sewardj20d38f22005-02-07 23:50:18 +00001364 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001365 at = mkPCastTo(mce, Ity_I32, at);
sewardj20d38f22005-02-07 23:50:18 +00001366 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001367 return at;
1368}
1369
sewardj0b070592004-12-10 21:44:22 +00001370/* --- ... and ... 64Fx2 versions of the same ... --- */
1371
1372static
1373IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1374{
1375 IRAtom* at;
1376 tl_assert(isShadowAtom(mce, vatomX));
1377 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001378 at = mkUifUV128(mce, vatomX, vatomY);
sewardja1d93302004-12-12 16:45:06 +00001379 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at));
sewardj0b070592004-12-10 21:44:22 +00001380 return at;
1381}
1382
1383static
1384IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1385{
1386 IRAtom* at;
1387 tl_assert(isShadowAtom(mce, vatomX));
sewardja1d93302004-12-12 16:45:06 +00001388 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001389 return at;
1390}
1391
1392static
1393IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1394{
1395 IRAtom* at;
1396 tl_assert(isShadowAtom(mce, vatomX));
1397 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001398 at = mkUifUV128(mce, vatomX, vatomY);
1399 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, at));
sewardj0b070592004-12-10 21:44:22 +00001400 at = mkPCastTo(mce, Ity_I64, at);
sewardj20d38f22005-02-07 23:50:18 +00001401 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001402 return at;
1403}
1404
1405static
1406IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1407{
1408 IRAtom* at;
1409 tl_assert(isShadowAtom(mce, vatomX));
sewardj20d38f22005-02-07 23:50:18 +00001410 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001411 at = mkPCastTo(mce, Ity_I64, at);
sewardj20d38f22005-02-07 23:50:18 +00001412 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001413 return at;
1414}
1415
sewardja1d93302004-12-12 16:45:06 +00001416/* --- --- Vector saturated narrowing --- --- */
1417
1418/* This is quite subtle. What to do is simple:
1419
1420 Let the original narrowing op be QNarrowW{S,U}xN. Produce:
1421
1422 the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1423
1424 Why this is right is not so simple. Consider a lane in the args,
1425 vatom1 or 2, doesn't matter.
1426
1427 After the PCast, that lane is all 0s (defined) or all
1428 1s(undefined).
1429
1430 Both signed and unsigned saturating narrowing of all 0s produces
1431 all 0s, which is what we want.
1432
1433 The all-1s case is more complex. Unsigned narrowing interprets an
1434 all-1s input as the largest unsigned integer, and so produces all
1435 1s as a result since that is the largest unsigned value at the
1436 smaller width.
1437
1438 Signed narrowing interprets all 1s as -1. Fortunately, -1 narrows
1439 to -1, so we still wind up with all 1s at the smaller width.
1440
1441 So: In short, pessimise the args, then apply the original narrowing
1442 op.
1443*/
1444static
sewardj20d38f22005-02-07 23:50:18 +00001445IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
sewardja1d93302004-12-12 16:45:06 +00001446 IRAtom* vatom1, IRAtom* vatom2)
1447{
1448 IRAtom *at1, *at2, *at3;
1449 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1450 switch (narrow_op) {
1451 case Iop_QNarrow32Sx4: pcast = mkPCast32x4; break;
sewardj43d60752005-11-10 18:13:01 +00001452 case Iop_QNarrow32Ux4: pcast = mkPCast32x4; break;
sewardja1d93302004-12-12 16:45:06 +00001453 case Iop_QNarrow16Sx8: pcast = mkPCast16x8; break;
1454 case Iop_QNarrow16Ux8: pcast = mkPCast16x8; break;
sewardj20d38f22005-02-07 23:50:18 +00001455 default: VG_(tool_panic)("vectorNarrowV128");
sewardja1d93302004-12-12 16:45:06 +00001456 }
1457 tl_assert(isShadowAtom(mce,vatom1));
1458 tl_assert(isShadowAtom(mce,vatom2));
1459 at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1));
1460 at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2));
1461 at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2));
1462 return at3;
1463}
1464
sewardjacd2e912005-01-13 19:17:06 +00001465static
1466IRAtom* vectorNarrow64 ( MCEnv* mce, IROp narrow_op,
1467 IRAtom* vatom1, IRAtom* vatom2)
1468{
1469 IRAtom *at1, *at2, *at3;
1470 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1471 switch (narrow_op) {
1472 case Iop_QNarrow32Sx2: pcast = mkPCast32x2; break;
1473 case Iop_QNarrow16Sx4: pcast = mkPCast16x4; break;
1474 case Iop_QNarrow16Ux4: pcast = mkPCast16x4; break;
1475 default: VG_(tool_panic)("vectorNarrow64");
1476 }
1477 tl_assert(isShadowAtom(mce,vatom1));
1478 tl_assert(isShadowAtom(mce,vatom2));
1479 at1 = assignNew(mce, Ity_I64, pcast(mce, vatom1));
1480 at2 = assignNew(mce, Ity_I64, pcast(mce, vatom2));
1481 at3 = assignNew(mce, Ity_I64, binop(narrow_op, at1, at2));
1482 return at3;
1483}
1484
sewardja1d93302004-12-12 16:45:06 +00001485
1486/* --- --- Vector integer arithmetic --- --- */
1487
1488/* Simple ... UifU the args and per-lane pessimise the results. */
sewardjacd2e912005-01-13 19:17:06 +00001489
sewardj20d38f22005-02-07 23:50:18 +00001490/* --- V128-bit versions --- */
sewardjacd2e912005-01-13 19:17:06 +00001491
sewardja1d93302004-12-12 16:45:06 +00001492static
1493IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1494{
1495 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001496 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001497 at = mkPCast8x16(mce, at);
1498 return at;
1499}
1500
1501static
1502IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1503{
1504 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001505 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001506 at = mkPCast16x8(mce, at);
1507 return at;
1508}
1509
1510static
1511IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1512{
1513 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001514 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001515 at = mkPCast32x4(mce, at);
1516 return at;
1517}
1518
1519static
1520IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1521{
1522 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001523 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001524 at = mkPCast64x2(mce, at);
1525 return at;
1526}
sewardj3245c912004-12-10 14:58:26 +00001527
sewardjacd2e912005-01-13 19:17:06 +00001528/* --- 64-bit versions --- */
1529
1530static
1531IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1532{
1533 IRAtom* at;
1534 at = mkUifU64(mce, vatom1, vatom2);
1535 at = mkPCast8x8(mce, at);
1536 return at;
1537}
1538
1539static
1540IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1541{
1542 IRAtom* at;
1543 at = mkUifU64(mce, vatom1, vatom2);
1544 at = mkPCast16x4(mce, at);
1545 return at;
1546}
1547
1548static
1549IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1550{
1551 IRAtom* at;
1552 at = mkUifU64(mce, vatom1, vatom2);
1553 at = mkPCast32x2(mce, at);
1554 return at;
1555}
1556
sewardj3245c912004-12-10 14:58:26 +00001557
1558/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00001559/*--- Generate shadow values from all kinds of IRExprs. ---*/
1560/*------------------------------------------------------------*/
1561
1562static
1563IRAtom* expr2vbits_Binop ( MCEnv* mce,
1564 IROp op,
1565 IRAtom* atom1, IRAtom* atom2 )
1566{
1567 IRType and_or_ty;
1568 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
1569 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
1570 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
1571
1572 IRAtom* vatom1 = expr2vbits( mce, atom1 );
1573 IRAtom* vatom2 = expr2vbits( mce, atom2 );
1574
1575 tl_assert(isOriginalAtom(mce,atom1));
1576 tl_assert(isOriginalAtom(mce,atom2));
1577 tl_assert(isShadowAtom(mce,vatom1));
1578 tl_assert(isShadowAtom(mce,vatom2));
1579 tl_assert(sameKindedAtoms(atom1,vatom1));
1580 tl_assert(sameKindedAtoms(atom2,vatom2));
1581 switch (op) {
1582
sewardjacd2e912005-01-13 19:17:06 +00001583 /* 64-bit SIMD */
1584
1585 case Iop_ShrN16x4:
1586 case Iop_ShrN32x2:
1587 case Iop_SarN16x4:
1588 case Iop_SarN32x2:
1589 case Iop_ShlN16x4:
1590 case Iop_ShlN32x2:
1591 /* Same scheme as with all other shifts. */
1592 complainIfUndefined(mce, atom2);
1593 return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1594
1595 case Iop_QNarrow32Sx2:
1596 case Iop_QNarrow16Sx4:
1597 case Iop_QNarrow16Ux4:
1598 return vectorNarrow64(mce, op, vatom1, vatom2);
1599
1600 case Iop_Min8Ux8:
1601 case Iop_Max8Ux8:
1602 case Iop_Avg8Ux8:
1603 case Iop_QSub8Sx8:
1604 case Iop_QSub8Ux8:
1605 case Iop_Sub8x8:
1606 case Iop_CmpGT8Sx8:
1607 case Iop_CmpEQ8x8:
1608 case Iop_QAdd8Sx8:
1609 case Iop_QAdd8Ux8:
1610 case Iop_Add8x8:
1611 return binary8Ix8(mce, vatom1, vatom2);
1612
1613 case Iop_Min16Sx4:
1614 case Iop_Max16Sx4:
1615 case Iop_Avg16Ux4:
1616 case Iop_QSub16Ux4:
1617 case Iop_QSub16Sx4:
1618 case Iop_Sub16x4:
1619 case Iop_Mul16x4:
1620 case Iop_MulHi16Sx4:
1621 case Iop_MulHi16Ux4:
1622 case Iop_CmpGT16Sx4:
1623 case Iop_CmpEQ16x4:
1624 case Iop_QAdd16Sx4:
1625 case Iop_QAdd16Ux4:
1626 case Iop_Add16x4:
1627 return binary16Ix4(mce, vatom1, vatom2);
1628
1629 case Iop_Sub32x2:
1630 case Iop_CmpGT32Sx2:
1631 case Iop_CmpEQ32x2:
1632 case Iop_Add32x2:
1633 return binary32Ix2(mce, vatom1, vatom2);
1634
1635 /* 64-bit data-steering */
1636 case Iop_InterleaveLO32x2:
1637 case Iop_InterleaveLO16x4:
1638 case Iop_InterleaveLO8x8:
1639 case Iop_InterleaveHI32x2:
1640 case Iop_InterleaveHI16x4:
1641 case Iop_InterleaveHI8x8:
1642 return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
1643
sewardj20d38f22005-02-07 23:50:18 +00001644 /* V128-bit SIMD */
sewardj0b070592004-12-10 21:44:22 +00001645
sewardja1d93302004-12-12 16:45:06 +00001646 case Iop_ShrN16x8:
1647 case Iop_ShrN32x4:
1648 case Iop_ShrN64x2:
1649 case Iop_SarN16x8:
1650 case Iop_SarN32x4:
1651 case Iop_ShlN16x8:
1652 case Iop_ShlN32x4:
1653 case Iop_ShlN64x2:
sewardj620eb5b2005-10-22 12:50:43 +00001654 case Iop_ShlN8x16:
1655 case Iop_SarN8x16:
1656 /* Same scheme as with all other shifts. Note: 22 Oct 05:
1657 this is wrong now, scalar shifts are done properly lazily.
1658 Vector shifts should be fixed too. */
sewardja1d93302004-12-12 16:45:06 +00001659 complainIfUndefined(mce, atom2);
1660 return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
1661
sewardjcbf8be72005-11-10 18:34:41 +00001662 /* V x V shifts/rotates are done using the standard lazy scheme. */
sewardj43d60752005-11-10 18:13:01 +00001663 case Iop_Shl8x16:
1664 case Iop_Shr8x16:
1665 case Iop_Sar8x16:
sewardjcbf8be72005-11-10 18:34:41 +00001666 case Iop_Rol8x16:
sewardj43d60752005-11-10 18:13:01 +00001667 return mkUifUV128(mce,
1668 assignNew(mce, Ity_V128, binop(op, vatom1, atom2)),
1669 mkPCast8x16(mce,vatom2)
1670 );
1671
1672 case Iop_Shl16x8:
1673 case Iop_Shr16x8:
1674 case Iop_Sar16x8:
sewardjcbf8be72005-11-10 18:34:41 +00001675 case Iop_Rol16x8:
sewardj43d60752005-11-10 18:13:01 +00001676 return mkUifUV128(mce,
1677 assignNew(mce, Ity_V128, binop(op, vatom1, atom2)),
1678 mkPCast16x8(mce,vatom2)
1679 );
1680
1681 case Iop_Shl32x4:
1682 case Iop_Shr32x4:
1683 case Iop_Sar32x4:
sewardjcbf8be72005-11-10 18:34:41 +00001684 case Iop_Rol32x4:
sewardj43d60752005-11-10 18:13:01 +00001685 return mkUifUV128(mce,
1686 assignNew(mce, Ity_V128, binop(op, vatom1, atom2)),
1687 mkPCast32x4(mce,vatom2)
1688 );
1689
sewardja1d93302004-12-12 16:45:06 +00001690 case Iop_QSub8Ux16:
1691 case Iop_QSub8Sx16:
1692 case Iop_Sub8x16:
1693 case Iop_Min8Ux16:
sewardj43d60752005-11-10 18:13:01 +00001694 case Iop_Min8Sx16:
sewardja1d93302004-12-12 16:45:06 +00001695 case Iop_Max8Ux16:
sewardj43d60752005-11-10 18:13:01 +00001696 case Iop_Max8Sx16:
sewardja1d93302004-12-12 16:45:06 +00001697 case Iop_CmpGT8Sx16:
sewardj43d60752005-11-10 18:13:01 +00001698 case Iop_CmpGT8Ux16:
sewardja1d93302004-12-12 16:45:06 +00001699 case Iop_CmpEQ8x16:
1700 case Iop_Avg8Ux16:
sewardj43d60752005-11-10 18:13:01 +00001701 case Iop_Avg8Sx16:
sewardja1d93302004-12-12 16:45:06 +00001702 case Iop_QAdd8Ux16:
1703 case Iop_QAdd8Sx16:
1704 case Iop_Add8x16:
1705 return binary8Ix16(mce, vatom1, vatom2);
1706
1707 case Iop_QSub16Ux8:
1708 case Iop_QSub16Sx8:
1709 case Iop_Sub16x8:
1710 case Iop_Mul16x8:
1711 case Iop_MulHi16Sx8:
1712 case Iop_MulHi16Ux8:
1713 case Iop_Min16Sx8:
sewardj43d60752005-11-10 18:13:01 +00001714 case Iop_Min16Ux8:
sewardja1d93302004-12-12 16:45:06 +00001715 case Iop_Max16Sx8:
sewardj43d60752005-11-10 18:13:01 +00001716 case Iop_Max16Ux8:
sewardja1d93302004-12-12 16:45:06 +00001717 case Iop_CmpGT16Sx8:
sewardj43d60752005-11-10 18:13:01 +00001718 case Iop_CmpGT16Ux8:
sewardja1d93302004-12-12 16:45:06 +00001719 case Iop_CmpEQ16x8:
1720 case Iop_Avg16Ux8:
sewardj43d60752005-11-10 18:13:01 +00001721 case Iop_Avg16Sx8:
sewardja1d93302004-12-12 16:45:06 +00001722 case Iop_QAdd16Ux8:
1723 case Iop_QAdd16Sx8:
1724 case Iop_Add16x8:
1725 return binary16Ix8(mce, vatom1, vatom2);
1726
1727 case Iop_Sub32x4:
1728 case Iop_CmpGT32Sx4:
sewardj43d60752005-11-10 18:13:01 +00001729 case Iop_CmpGT32Ux4:
sewardja1d93302004-12-12 16:45:06 +00001730 case Iop_CmpEQ32x4:
sewardj43d60752005-11-10 18:13:01 +00001731 case Iop_QAdd32Sx4:
1732 case Iop_QAdd32Ux4:
1733 case Iop_QSub32Sx4:
1734 case Iop_QSub32Ux4:
1735 case Iop_Avg32Ux4:
1736 case Iop_Avg32Sx4:
sewardja1d93302004-12-12 16:45:06 +00001737 case Iop_Add32x4:
sewardj43d60752005-11-10 18:13:01 +00001738 case Iop_Max32Ux4:
1739 case Iop_Max32Sx4:
1740 case Iop_Min32Ux4:
1741 case Iop_Min32Sx4:
sewardja1d93302004-12-12 16:45:06 +00001742 return binary32Ix4(mce, vatom1, vatom2);
1743
1744 case Iop_Sub64x2:
1745 case Iop_Add64x2:
1746 return binary64Ix2(mce, vatom1, vatom2);
1747
1748 case Iop_QNarrow32Sx4:
sewardj43d60752005-11-10 18:13:01 +00001749 case Iop_QNarrow32Ux4:
sewardja1d93302004-12-12 16:45:06 +00001750 case Iop_QNarrow16Sx8:
1751 case Iop_QNarrow16Ux8:
sewardj20d38f22005-02-07 23:50:18 +00001752 return vectorNarrowV128(mce, op, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001753
sewardj0b070592004-12-10 21:44:22 +00001754 case Iop_Sub64Fx2:
1755 case Iop_Mul64Fx2:
1756 case Iop_Min64Fx2:
1757 case Iop_Max64Fx2:
1758 case Iop_Div64Fx2:
1759 case Iop_CmpLT64Fx2:
1760 case Iop_CmpLE64Fx2:
1761 case Iop_CmpEQ64Fx2:
sewardj545663e2005-11-05 01:55:04 +00001762 case Iop_CmpUN64Fx2:
sewardj0b070592004-12-10 21:44:22 +00001763 case Iop_Add64Fx2:
1764 return binary64Fx2(mce, vatom1, vatom2);
1765
1766 case Iop_Sub64F0x2:
1767 case Iop_Mul64F0x2:
1768 case Iop_Min64F0x2:
1769 case Iop_Max64F0x2:
1770 case Iop_Div64F0x2:
1771 case Iop_CmpLT64F0x2:
1772 case Iop_CmpLE64F0x2:
1773 case Iop_CmpEQ64F0x2:
sewardj545663e2005-11-05 01:55:04 +00001774 case Iop_CmpUN64F0x2:
sewardj0b070592004-12-10 21:44:22 +00001775 case Iop_Add64F0x2:
1776 return binary64F0x2(mce, vatom1, vatom2);
1777
sewardj170ee212004-12-10 18:57:51 +00001778 case Iop_Sub32Fx4:
1779 case Iop_Mul32Fx4:
1780 case Iop_Min32Fx4:
1781 case Iop_Max32Fx4:
1782 case Iop_Div32Fx4:
1783 case Iop_CmpLT32Fx4:
1784 case Iop_CmpLE32Fx4:
1785 case Iop_CmpEQ32Fx4:
sewardj545663e2005-11-05 01:55:04 +00001786 case Iop_CmpUN32Fx4:
cerione78ba2a2005-11-14 03:00:35 +00001787 case Iop_CmpGT32Fx4:
1788 case Iop_CmpGE32Fx4:
sewardj3245c912004-12-10 14:58:26 +00001789 case Iop_Add32Fx4:
1790 return binary32Fx4(mce, vatom1, vatom2);
1791
sewardj170ee212004-12-10 18:57:51 +00001792 case Iop_Sub32F0x4:
1793 case Iop_Mul32F0x4:
1794 case Iop_Min32F0x4:
1795 case Iop_Max32F0x4:
1796 case Iop_Div32F0x4:
1797 case Iop_CmpLT32F0x4:
1798 case Iop_CmpLE32F0x4:
1799 case Iop_CmpEQ32F0x4:
sewardj545663e2005-11-05 01:55:04 +00001800 case Iop_CmpUN32F0x4:
sewardj170ee212004-12-10 18:57:51 +00001801 case Iop_Add32F0x4:
1802 return binary32F0x4(mce, vatom1, vatom2);
1803
sewardj20d38f22005-02-07 23:50:18 +00001804 /* V128-bit data-steering */
1805 case Iop_SetV128lo32:
1806 case Iop_SetV128lo64:
1807 case Iop_64HLtoV128:
sewardja1d93302004-12-12 16:45:06 +00001808 case Iop_InterleaveLO64x2:
1809 case Iop_InterleaveLO32x4:
1810 case Iop_InterleaveLO16x8:
1811 case Iop_InterleaveLO8x16:
1812 case Iop_InterleaveHI64x2:
1813 case Iop_InterleaveHI32x4:
1814 case Iop_InterleaveHI16x8:
1815 case Iop_InterleaveHI8x16:
sewardj170ee212004-12-10 18:57:51 +00001816 return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2));
sewardj620eb5b2005-10-22 12:50:43 +00001817
1818 /* Perm8x16: rearrange values in left arg using steering values
1819 from right arg. So rearrange the vbits in the same way but
1820 pessimise wrt steering values. */
1821 case Iop_Perm8x16:
1822 return mkUifUV128(
1823 mce,
1824 assignNew(mce, Ity_V128, binop(op, vatom1, atom2)),
1825 mkPCast8x16(mce, vatom2)
1826 );
sewardj170ee212004-12-10 18:57:51 +00001827
sewardj43d60752005-11-10 18:13:01 +00001828 /* These two take the lower half of each 16-bit lane, sign/zero
1829 extend it to 32, and multiply together, producing a 32x4
1830 result (and implicitly ignoring half the operand bits). So
1831 treat it as a bunch of independent 16x8 operations, but then
1832 do 32-bit shifts left-right to copy the lower half results
1833 (which are all 0s or all 1s due to PCasting in binary16Ix8)
1834 into the upper half of each result lane. */
1835 case Iop_MullEven16Ux8:
1836 case Iop_MullEven16Sx8: {
1837 IRAtom* at;
1838 at = binary16Ix8(mce,vatom1,vatom2);
1839 at = assignNew(mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
1840 at = assignNew(mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
1841 return at;
1842 }
1843
1844 /* Same deal as Iop_MullEven16{S,U}x8 */
1845 case Iop_MullEven8Ux16:
1846 case Iop_MullEven8Sx16: {
1847 IRAtom* at;
1848 at = binary8Ix16(mce,vatom1,vatom2);
1849 at = assignNew(mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
1850 at = assignNew(mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
1851 return at;
1852 }
1853
1854 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
1855 32x4 -> 16x8 laneage, discarding the upper half of each lane.
1856 Simply apply same op to the V bits, since this really no more
1857 than a data steering operation. */
sewardjcbf8be72005-11-10 18:34:41 +00001858 case Iop_Narrow32x4:
1859 case Iop_Narrow16x8:
sewardj43d60752005-11-10 18:13:01 +00001860 return assignNew(mce, Ity_V128,
1861 binop(op, vatom1, vatom2));
1862
1863 case Iop_ShrV128:
1864 case Iop_ShlV128:
1865 /* Same scheme as with all other shifts. Note: 10 Nov 05:
1866 this is wrong now, scalar shifts are done properly lazily.
1867 Vector shifts should be fixed too. */
1868 complainIfUndefined(mce, atom2);
1869 return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
1870
1871
sewardj69a13322005-04-23 01:14:51 +00001872 /* I128-bit data-steering */
1873 case Iop_64HLto128:
1874 return assignNew(mce, Ity_I128, binop(op, vatom1, vatom2));
1875
sewardj3245c912004-12-10 14:58:26 +00001876 /* Scalar floating point */
1877
sewardj95448072004-11-22 20:19:51 +00001878 case Iop_RoundF64:
1879 case Iop_F64toI64:
sewardje9e16d32004-12-10 13:17:55 +00001880 case Iop_I64toF64:
1881 /* First arg is I32 (rounding mode), second is F64 or I64
1882 (data). */
sewardj95448072004-11-22 20:19:51 +00001883 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1884
1885 case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
1886 /* Takes two F64 args. */
1887 case Iop_F64toI32:
sewardje9e16d32004-12-10 13:17:55 +00001888 case Iop_F64toF32:
sewardj95448072004-11-22 20:19:51 +00001889 /* First arg is I32 (rounding mode), second is F64 (data). */
1890 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1891
1892 case Iop_F64toI16:
1893 /* First arg is I32 (rounding mode), second is F64 (data). */
1894 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
1895
1896 case Iop_ScaleF64:
1897 case Iop_Yl2xF64:
1898 case Iop_Yl2xp1F64:
1899 case Iop_PRemF64:
sewardj96403eb2005-04-01 20:20:12 +00001900 case Iop_PRem1F64:
sewardj95448072004-11-22 20:19:51 +00001901 case Iop_AtanF64:
1902 case Iop_AddF64:
1903 case Iop_DivF64:
1904 case Iop_SubF64:
1905 case Iop_MulF64:
1906 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1907
1908 case Iop_CmpF64:
1909 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1910
1911 /* non-FP after here */
1912
1913 case Iop_DivModU64to32:
1914 case Iop_DivModS64to32:
1915 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1916
sewardj69a13322005-04-23 01:14:51 +00001917 case Iop_DivModU128to64:
1918 case Iop_DivModS128to64:
1919 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
1920
sewardj95448072004-11-22 20:19:51 +00001921 case Iop_16HLto32:
sewardj170ee212004-12-10 18:57:51 +00001922 return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00001923 case Iop_32HLto64:
sewardj170ee212004-12-10 18:57:51 +00001924 return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00001925
sewardj6cf40ff2005-04-20 22:31:26 +00001926 case Iop_MullS64:
1927 case Iop_MullU64: {
1928 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
1929 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
1930 return assignNew(mce, Ity_I128, binop(Iop_64HLto128, vHi64, vLo64));
1931 }
1932
sewardj95448072004-11-22 20:19:51 +00001933 case Iop_MullS32:
1934 case Iop_MullU32: {
1935 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1936 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
1937 return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
1938 }
1939
1940 case Iop_MullS16:
1941 case Iop_MullU16: {
1942 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1943 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
1944 return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
1945 }
1946
1947 case Iop_MullS8:
1948 case Iop_MullU8: {
1949 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1950 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
1951 return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
1952 }
1953
cerion9e591082005-06-23 15:28:34 +00001954 case Iop_DivS32:
1955 case Iop_DivU32:
1956 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1957
sewardj95448072004-11-22 20:19:51 +00001958 case Iop_Add32:
sewardjd5204dc2004-12-31 01:16:11 +00001959 if (mce->bogusLiterals)
1960 return expensiveAddSub(mce,True,Ity_I32,
1961 vatom1,vatom2, atom1,atom2);
1962 else
1963 goto cheap_AddSub32;
sewardj95448072004-11-22 20:19:51 +00001964 case Iop_Sub32:
sewardjd5204dc2004-12-31 01:16:11 +00001965 if (mce->bogusLiterals)
1966 return expensiveAddSub(mce,False,Ity_I32,
1967 vatom1,vatom2, atom1,atom2);
1968 else
1969 goto cheap_AddSub32;
1970
1971 cheap_AddSub32:
sewardj95448072004-11-22 20:19:51 +00001972 case Iop_Mul32:
sewardj992dff92005-10-07 11:08:55 +00001973 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1974
sewardj463b3d92005-07-18 11:41:15 +00001975 case Iop_CmpORD32S:
1976 case Iop_CmpORD32U:
sewardj992dff92005-10-07 11:08:55 +00001977 return doCmpORD32(mce, op, vatom1,vatom2, atom1,atom2);
sewardj95448072004-11-22 20:19:51 +00001978
sewardj681be302005-01-15 20:43:58 +00001979 case Iop_Add64:
tomd9774d72005-06-27 08:11:01 +00001980 if (mce->bogusLiterals)
1981 return expensiveAddSub(mce,True,Ity_I64,
1982 vatom1,vatom2, atom1,atom2);
1983 else
1984 goto cheap_AddSub64;
sewardj681be302005-01-15 20:43:58 +00001985 case Iop_Sub64:
tomd9774d72005-06-27 08:11:01 +00001986 if (mce->bogusLiterals)
1987 return expensiveAddSub(mce,False,Ity_I64,
1988 vatom1,vatom2, atom1,atom2);
1989 else
1990 goto cheap_AddSub64;
1991
1992 cheap_AddSub64:
1993 case Iop_Mul64:
sewardj681be302005-01-15 20:43:58 +00001994 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
1995
sewardj95448072004-11-22 20:19:51 +00001996 case Iop_Mul16:
1997 case Iop_Add16:
1998 case Iop_Sub16:
1999 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
2000
2001 case Iop_Sub8:
2002 case Iop_Add8:
2003 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
2004
sewardj69a13322005-04-23 01:14:51 +00002005 case Iop_CmpEQ64:
sewardje6f8af42005-07-06 18:48:59 +00002006 case Iop_CmpNE64:
sewardj69a13322005-04-23 01:14:51 +00002007 if (mce->bogusLiterals)
2008 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
2009 else
2010 goto cheap_cmp64;
2011 cheap_cmp64:
tomcd986332005-04-26 07:44:48 +00002012 case Iop_CmpLE64S: case Iop_CmpLE64U:
2013 case Iop_CmpLT64U: case Iop_CmpLT64S:
sewardj69a13322005-04-23 01:14:51 +00002014 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
2015
sewardjd5204dc2004-12-31 01:16:11 +00002016 case Iop_CmpEQ32:
sewardje6f8af42005-07-06 18:48:59 +00002017 case Iop_CmpNE32:
sewardjd5204dc2004-12-31 01:16:11 +00002018 if (mce->bogusLiterals)
2019 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
2020 else
2021 goto cheap_cmp32;
sewardjd5204dc2004-12-31 01:16:11 +00002022 cheap_cmp32:
sewardj95448072004-11-22 20:19:51 +00002023 case Iop_CmpLE32S: case Iop_CmpLE32U:
2024 case Iop_CmpLT32U: case Iop_CmpLT32S:
sewardj95448072004-11-22 20:19:51 +00002025 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
2026
2027 case Iop_CmpEQ16: case Iop_CmpNE16:
2028 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
2029
2030 case Iop_CmpEQ8: case Iop_CmpNE8:
2031 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
2032
sewardjaaddbc22005-10-07 09:49:53 +00002033 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
2034 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
2035
sewardj95448072004-11-22 20:19:51 +00002036 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
sewardjaaddbc22005-10-07 09:49:53 +00002037 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00002038
sewardjdb67f5f2004-12-14 01:15:31 +00002039 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
sewardjaaddbc22005-10-07 09:49:53 +00002040 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00002041
2042 case Iop_Shl8: case Iop_Shr8:
sewardjaaddbc22005-10-07 09:49:53 +00002043 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00002044
sewardj20d38f22005-02-07 23:50:18 +00002045 case Iop_AndV128:
2046 uifu = mkUifUV128; difd = mkDifDV128;
2047 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00002048 case Iop_And64:
2049 uifu = mkUifU64; difd = mkDifD64;
2050 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00002051 case Iop_And32:
2052 uifu = mkUifU32; difd = mkDifD32;
2053 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
2054 case Iop_And16:
2055 uifu = mkUifU16; difd = mkDifD16;
2056 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
2057 case Iop_And8:
2058 uifu = mkUifU8; difd = mkDifD8;
2059 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
2060
sewardj20d38f22005-02-07 23:50:18 +00002061 case Iop_OrV128:
2062 uifu = mkUifUV128; difd = mkDifDV128;
2063 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00002064 case Iop_Or64:
2065 uifu = mkUifU64; difd = mkDifD64;
2066 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00002067 case Iop_Or32:
2068 uifu = mkUifU32; difd = mkDifD32;
2069 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
2070 case Iop_Or16:
2071 uifu = mkUifU16; difd = mkDifD16;
2072 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
2073 case Iop_Or8:
2074 uifu = mkUifU8; difd = mkDifD8;
2075 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
2076
2077 do_And_Or:
2078 return
2079 assignNew(
2080 mce,
2081 and_or_ty,
2082 difd(mce, uifu(mce, vatom1, vatom2),
2083 difd(mce, improve(mce, atom1, vatom1),
2084 improve(mce, atom2, vatom2) ) ) );
2085
2086 case Iop_Xor8:
2087 return mkUifU8(mce, vatom1, vatom2);
2088 case Iop_Xor16:
2089 return mkUifU16(mce, vatom1, vatom2);
2090 case Iop_Xor32:
2091 return mkUifU32(mce, vatom1, vatom2);
sewardj7010f6e2004-12-10 13:35:22 +00002092 case Iop_Xor64:
2093 return mkUifU64(mce, vatom1, vatom2);
sewardj20d38f22005-02-07 23:50:18 +00002094 case Iop_XorV128:
2095 return mkUifUV128(mce, vatom1, vatom2);
njn25e49d8e72002-09-23 09:36:25 +00002096
2097 default:
sewardj95448072004-11-22 20:19:51 +00002098 ppIROp(op);
2099 VG_(tool_panic)("memcheck:expr2vbits_Binop");
njn25e49d8e72002-09-23 09:36:25 +00002100 }
njn25e49d8e72002-09-23 09:36:25 +00002101}
2102
njn25e49d8e72002-09-23 09:36:25 +00002103
sewardj95448072004-11-22 20:19:51 +00002104static
2105IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
2106{
2107 IRAtom* vatom = expr2vbits( mce, atom );
2108 tl_assert(isOriginalAtom(mce,atom));
2109 switch (op) {
2110
sewardj0b070592004-12-10 21:44:22 +00002111 case Iop_Sqrt64Fx2:
2112 return unary64Fx2(mce, vatom);
2113
2114 case Iop_Sqrt64F0x2:
2115 return unary64F0x2(mce, vatom);
2116
sewardj170ee212004-12-10 18:57:51 +00002117 case Iop_Sqrt32Fx4:
2118 case Iop_RSqrt32Fx4:
2119 case Iop_Recip32Fx4:
cerion176cb4c2005-11-16 17:21:49 +00002120 case Iop_I32UtoFx4:
2121 case Iop_I32StoFx4:
2122 case Iop_QFtoI32Ux4_RZ:
2123 case Iop_QFtoI32Sx4_RZ:
2124 case Iop_RoundF32x4_RM:
2125 case Iop_RoundF32x4_RP:
2126 case Iop_RoundF32x4_RN:
2127 case Iop_RoundF32x4_RZ:
sewardj170ee212004-12-10 18:57:51 +00002128 return unary32Fx4(mce, vatom);
2129
2130 case Iop_Sqrt32F0x4:
2131 case Iop_RSqrt32F0x4:
2132 case Iop_Recip32F0x4:
2133 return unary32F0x4(mce, vatom);
2134
sewardj20d38f22005-02-07 23:50:18 +00002135 case Iop_32UtoV128:
2136 case Iop_64UtoV128:
sewardj620eb5b2005-10-22 12:50:43 +00002137 case Iop_Dup8x16:
2138 case Iop_Dup16x8:
2139 case Iop_Dup32x4:
sewardj170ee212004-12-10 18:57:51 +00002140 return assignNew(mce, Ity_V128, unop(op, vatom));
2141
sewardj95448072004-11-22 20:19:51 +00002142 case Iop_F32toF64:
2143 case Iop_I32toF64:
sewardj95448072004-11-22 20:19:51 +00002144 case Iop_NegF64:
2145 case Iop_SinF64:
2146 case Iop_CosF64:
2147 case Iop_TanF64:
2148 case Iop_SqrtF64:
2149 case Iop_AbsF64:
2150 case Iop_2xm1F64:
sewardj39cc7352005-06-09 21:31:55 +00002151 case Iop_Clz64:
2152 case Iop_Ctz64:
sewardj95448072004-11-22 20:19:51 +00002153 return mkPCastTo(mce, Ity_I64, vatom);
2154
sewardj95448072004-11-22 20:19:51 +00002155 case Iop_Clz32:
2156 case Iop_Ctz32:
2157 return mkPCastTo(mce, Ity_I32, vatom);
2158
sewardjd9dbc192005-04-27 11:40:27 +00002159 case Iop_1Uto64:
2160 case Iop_8Uto64:
2161 case Iop_8Sto64:
2162 case Iop_16Uto64:
2163 case Iop_16Sto64:
sewardj95448072004-11-22 20:19:51 +00002164 case Iop_32Sto64:
2165 case Iop_32Uto64:
sewardj20d38f22005-02-07 23:50:18 +00002166 case Iop_V128to64:
2167 case Iop_V128HIto64:
sewardj6cf40ff2005-04-20 22:31:26 +00002168 case Iop_128HIto64:
2169 case Iop_128to64:
sewardj95448072004-11-22 20:19:51 +00002170 return assignNew(mce, Ity_I64, unop(op, vatom));
2171
2172 case Iop_64to32:
2173 case Iop_64HIto32:
2174 case Iop_1Uto32:
sewardj463b3d92005-07-18 11:41:15 +00002175 case Iop_1Sto32:
sewardj95448072004-11-22 20:19:51 +00002176 case Iop_8Uto32:
2177 case Iop_16Uto32:
2178 case Iop_16Sto32:
2179 case Iop_8Sto32:
cerionfafaa0d2005-09-12 22:29:38 +00002180 case Iop_V128to32:
sewardj95448072004-11-22 20:19:51 +00002181 return assignNew(mce, Ity_I32, unop(op, vatom));
2182
2183 case Iop_8Sto16:
2184 case Iop_8Uto16:
2185 case Iop_32to16:
2186 case Iop_32HIto16:
sewardjd9dbc192005-04-27 11:40:27 +00002187 case Iop_64to16:
sewardj95448072004-11-22 20:19:51 +00002188 return assignNew(mce, Ity_I16, unop(op, vatom));
2189
2190 case Iop_1Uto8:
2191 case Iop_16to8:
2192 case Iop_32to8:
sewardjd9dbc192005-04-27 11:40:27 +00002193 case Iop_64to8:
sewardj95448072004-11-22 20:19:51 +00002194 return assignNew(mce, Ity_I8, unop(op, vatom));
2195
2196 case Iop_32to1:
2197 return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom));
2198
sewardjd9dbc192005-04-27 11:40:27 +00002199 case Iop_64to1:
2200 return assignNew(mce, Ity_I1, unop(Iop_64to1, vatom));
2201
sewardj95448072004-11-22 20:19:51 +00002202 case Iop_ReinterpF64asI64:
2203 case Iop_ReinterpI64asF64:
sewardj0b070592004-12-10 21:44:22 +00002204 case Iop_ReinterpI32asF32:
sewardj20d38f22005-02-07 23:50:18 +00002205 case Iop_NotV128:
sewardj7010f6e2004-12-10 13:35:22 +00002206 case Iop_Not64:
sewardj95448072004-11-22 20:19:51 +00002207 case Iop_Not32:
2208 case Iop_Not16:
2209 case Iop_Not8:
2210 case Iop_Not1:
2211 return vatom;
sewardj7010f6e2004-12-10 13:35:22 +00002212
sewardje53bd992005-05-03 12:22:00 +00002213 /* Neg* really fall under the Add/Sub banner, and as such you
2214 might think would qualify for the 'expensive add/sub'
2215 treatment. However, in this case since the implied literal
2216 is zero (0 - arg), we just do the cheap thing anyway. */
2217 case Iop_Neg8:
2218 return mkLeft8(mce, vatom);
2219 case Iop_Neg16:
2220 return mkLeft16(mce, vatom);
2221 case Iop_Neg32:
2222 return mkLeft32(mce, vatom);
2223
sewardj95448072004-11-22 20:19:51 +00002224 default:
2225 ppIROp(op);
2226 VG_(tool_panic)("memcheck:expr2vbits_Unop");
2227 }
2228}
2229
2230
sewardj170ee212004-12-10 18:57:51 +00002231/* Worker function; do not call directly. */
sewardj95448072004-11-22 20:19:51 +00002232static
sewardj2e595852005-06-30 23:33:37 +00002233IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
2234 IREndness end, IRType ty,
2235 IRAtom* addr, UInt bias )
sewardj95448072004-11-22 20:19:51 +00002236{
2237 void* helper;
2238 Char* hname;
2239 IRDirty* di;
2240 IRTemp datavbits;
2241 IRAtom* addrAct;
2242
2243 tl_assert(isOriginalAtom(mce,addr));
sewardj2e595852005-06-30 23:33:37 +00002244 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj95448072004-11-22 20:19:51 +00002245
2246 /* First, emit a definedness test for the address. This also sets
2247 the address (shadow) to 'defined' following the test. */
2248 complainIfUndefined( mce, addr );
2249
2250 /* Now cook up a call to the relevant helper function, to read the
2251 data V bits from shadow memory. */
2252 ty = shadowType(ty);
sewardj2e595852005-06-30 23:33:37 +00002253
2254 if (end == Iend_LE) {
2255 switch (ty) {
2256 case Ity_I64: helper = &MC_(helperc_LOADV8le);
2257 hname = "MC_(helperc_LOADV8le)";
2258 break;
2259 case Ity_I32: helper = &MC_(helperc_LOADV4le);
2260 hname = "MC_(helperc_LOADV4le)";
2261 break;
2262 case Ity_I16: helper = &MC_(helperc_LOADV2le);
2263 hname = "MC_(helperc_LOADV2le)";
2264 break;
sewardj8cf88b72005-07-08 01:29:33 +00002265 case Ity_I8: helper = &MC_(helperc_LOADV1);
2266 hname = "MC_(helperc_LOADV1)";
sewardj2e595852005-06-30 23:33:37 +00002267 break;
2268 default: ppIRType(ty);
2269 VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
2270 }
2271 } else {
sewardj8cf88b72005-07-08 01:29:33 +00002272 switch (ty) {
2273 case Ity_I64: helper = &MC_(helperc_LOADV8be);
2274 hname = "MC_(helperc_LOADV8be)";
2275 break;
2276 case Ity_I32: helper = &MC_(helperc_LOADV4be);
2277 hname = "MC_(helperc_LOADV4be)";
2278 break;
2279 case Ity_I16: helper = &MC_(helperc_LOADV2be);
2280 hname = "MC_(helperc_LOADV2be)";
2281 break;
2282 case Ity_I8: helper = &MC_(helperc_LOADV1);
2283 hname = "MC_(helperc_LOADV1)";
2284 break;
2285 default: ppIRType(ty);
2286 VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
2287 }
sewardj95448072004-11-22 20:19:51 +00002288 }
2289
2290 /* Generate the actual address into addrAct. */
2291 if (bias == 0) {
2292 addrAct = addr;
2293 } else {
sewardj7cf97ee2004-11-28 14:25:01 +00002294 IROp mkAdd;
2295 IRAtom* eBias;
sewardj95448072004-11-22 20:19:51 +00002296 IRType tyAddr = mce->hWordTy;
2297 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj7cf97ee2004-11-28 14:25:01 +00002298 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2299 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj95448072004-11-22 20:19:51 +00002300 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2301 }
2302
2303 /* We need to have a place to park the V bits we're just about to
2304 read. */
2305 datavbits = newIRTemp(mce->bb->tyenv, ty);
2306 di = unsafeIRDirty_1_N( datavbits,
2307 1/*regparms*/, hname, helper,
2308 mkIRExprVec_1( addrAct ));
2309 setHelperAnns( mce, di );
2310 stmt( mce->bb, IRStmt_Dirty(di) );
2311
2312 return mkexpr(datavbits);
2313}
2314
2315
2316static
sewardj2e595852005-06-30 23:33:37 +00002317IRAtom* expr2vbits_Load ( MCEnv* mce,
2318 IREndness end, IRType ty,
2319 IRAtom* addr, UInt bias )
sewardj170ee212004-12-10 18:57:51 +00002320{
2321 IRAtom *v64hi, *v64lo;
sewardj2e595852005-06-30 23:33:37 +00002322 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj170ee212004-12-10 18:57:51 +00002323 switch (shadowType(ty)) {
2324 case Ity_I8:
2325 case Ity_I16:
2326 case Ity_I32:
2327 case Ity_I64:
sewardj2e595852005-06-30 23:33:37 +00002328 return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
sewardj170ee212004-12-10 18:57:51 +00002329 case Ity_V128:
sewardj2e595852005-06-30 23:33:37 +00002330 if (end == Iend_LE) {
2331 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
2332 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
2333 } else {
sewardj2e595852005-06-30 23:33:37 +00002334 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
2335 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
2336 }
sewardj170ee212004-12-10 18:57:51 +00002337 return assignNew( mce,
2338 Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +00002339 binop(Iop_64HLtoV128, v64hi, v64lo));
sewardj170ee212004-12-10 18:57:51 +00002340 default:
sewardj2e595852005-06-30 23:33:37 +00002341 VG_(tool_panic)("expr2vbits_Load");
sewardj170ee212004-12-10 18:57:51 +00002342 }
2343}
2344
2345
2346static
sewardj95448072004-11-22 20:19:51 +00002347IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
2348 IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
2349{
2350 IRAtom *vbitsC, *vbits0, *vbitsX;
2351 IRType ty;
2352 /* Given Mux0X(cond,expr0,exprX), generate
2353 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
2354 That is, steer the V bits like the originals, but trash the
2355 result if the steering value is undefined. This gives
2356 lazy propagation. */
2357 tl_assert(isOriginalAtom(mce, cond));
2358 tl_assert(isOriginalAtom(mce, expr0));
2359 tl_assert(isOriginalAtom(mce, exprX));
2360
2361 vbitsC = expr2vbits(mce, cond);
2362 vbits0 = expr2vbits(mce, expr0);
2363 vbitsX = expr2vbits(mce, exprX);
2364 ty = typeOfIRExpr(mce->bb->tyenv, vbits0);
2365
2366 return
2367 mkUifU(mce, ty, assignNew(mce, ty, IRExpr_Mux0X(cond, vbits0, vbitsX)),
2368 mkPCastTo(mce, ty, vbitsC) );
2369}
2370
2371/* --------- This is the main expression-handling function. --------- */
2372
2373static
2374IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
2375{
2376 switch (e->tag) {
2377
2378 case Iex_Get:
2379 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
2380
2381 case Iex_GetI:
2382 return shadow_GETI( mce, e->Iex.GetI.descr,
2383 e->Iex.GetI.ix, e->Iex.GetI.bias );
2384
2385 case Iex_Tmp:
2386 return IRExpr_Tmp( findShadowTmp(mce, e->Iex.Tmp.tmp) );
2387
2388 case Iex_Const:
2389 return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
2390
2391 case Iex_Binop:
2392 return expr2vbits_Binop(
2393 mce,
2394 e->Iex.Binop.op,
2395 e->Iex.Binop.arg1, e->Iex.Binop.arg2
2396 );
2397
2398 case Iex_Unop:
2399 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
2400
sewardj2e595852005-06-30 23:33:37 +00002401 case Iex_Load:
2402 return expr2vbits_Load( mce, e->Iex.Load.end,
2403 e->Iex.Load.ty,
2404 e->Iex.Load.addr, 0/*addr bias*/ );
sewardj95448072004-11-22 20:19:51 +00002405
2406 case Iex_CCall:
2407 return mkLazyN( mce, e->Iex.CCall.args,
2408 e->Iex.CCall.retty,
2409 e->Iex.CCall.cee );
2410
2411 case Iex_Mux0X:
2412 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
2413 e->Iex.Mux0X.exprX);
njn25e49d8e72002-09-23 09:36:25 +00002414
2415 default:
sewardj95448072004-11-22 20:19:51 +00002416 VG_(printf)("\n");
2417 ppIRExpr(e);
2418 VG_(printf)("\n");
2419 VG_(tool_panic)("memcheck: expr2vbits");
njn25e49d8e72002-09-23 09:36:25 +00002420 }
njn25e49d8e72002-09-23 09:36:25 +00002421}
2422
2423/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00002424/*--- Generate shadow stmts from all kinds of IRStmts. ---*/
njn25e49d8e72002-09-23 09:36:25 +00002425/*------------------------------------------------------------*/
2426
sewardj95448072004-11-22 20:19:51 +00002427/* Widen a value to the host word size. */
njn25e49d8e72002-09-23 09:36:25 +00002428
2429static
sewardj95448072004-11-22 20:19:51 +00002430IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
njn25e49d8e72002-09-23 09:36:25 +00002431{
sewardj7cf97ee2004-11-28 14:25:01 +00002432 IRType ty, tyH;
2433
sewardj95448072004-11-22 20:19:51 +00002434 /* vatom is vbits-value and as such can only have a shadow type. */
2435 tl_assert(isShadowAtom(mce,vatom));
njn25e49d8e72002-09-23 09:36:25 +00002436
sewardj7cf97ee2004-11-28 14:25:01 +00002437 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
2438 tyH = mce->hWordTy;
njn25e49d8e72002-09-23 09:36:25 +00002439
sewardj95448072004-11-22 20:19:51 +00002440 if (tyH == Ity_I32) {
2441 switch (ty) {
2442 case Ity_I32: return vatom;
2443 case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom));
2444 case Ity_I8: return assignNew(mce, tyH, unop(Iop_8Uto32, vatom));
2445 default: goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00002446 }
sewardj6cf40ff2005-04-20 22:31:26 +00002447 } else
2448 if (tyH == Ity_I64) {
2449 switch (ty) {
2450 case Ity_I32: return assignNew(mce, tyH, unop(Iop_32Uto64, vatom));
sewardj69a13322005-04-23 01:14:51 +00002451 case Ity_I16: return assignNew(mce, tyH, unop(Iop_32Uto64,
2452 assignNew(mce, Ity_I32, unop(Iop_16Uto32, vatom))));
2453 case Ity_I8: return assignNew(mce, tyH, unop(Iop_32Uto64,
2454 assignNew(mce, Ity_I32, unop(Iop_8Uto32, vatom))));
sewardj6cf40ff2005-04-20 22:31:26 +00002455 default: goto unhandled;
2456 }
sewardj95448072004-11-22 20:19:51 +00002457 } else {
2458 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00002459 }
sewardj95448072004-11-22 20:19:51 +00002460 unhandled:
2461 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
2462 VG_(tool_panic)("zwidenToHostWord");
njn25e49d8e72002-09-23 09:36:25 +00002463}
2464
njn25e49d8e72002-09-23 09:36:25 +00002465
sewardj95448072004-11-22 20:19:51 +00002466/* Generate a shadow store. addr is always the original address atom.
2467 You can pass in either originals or V-bits for the data atom, but
2468 obviously not both. */
njn25e49d8e72002-09-23 09:36:25 +00002469
sewardj95448072004-11-22 20:19:51 +00002470static
sewardj2e595852005-06-30 23:33:37 +00002471void do_shadow_Store ( MCEnv* mce,
2472 IREndness end,
2473 IRAtom* addr, UInt bias,
2474 IRAtom* data, IRAtom* vdata )
njn25e49d8e72002-09-23 09:36:25 +00002475{
sewardj170ee212004-12-10 18:57:51 +00002476 IROp mkAdd;
2477 IRType ty, tyAddr;
2478 IRDirty *di, *diLo64, *diHi64;
2479 IRAtom *addrAct, *addrLo64, *addrHi64;
2480 IRAtom *vdataLo64, *vdataHi64;
sewardj2e595852005-06-30 23:33:37 +00002481 IRAtom *eBias, *eBiasLo64, *eBiasHi64;
sewardj95448072004-11-22 20:19:51 +00002482 void* helper = NULL;
2483 Char* hname = NULL;
sewardj170ee212004-12-10 18:57:51 +00002484
2485 tyAddr = mce->hWordTy;
2486 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2487 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj2e595852005-06-30 23:33:37 +00002488 tl_assert( end == Iend_LE || end == Iend_BE );
sewardj170ee212004-12-10 18:57:51 +00002489
2490 di = diLo64 = diHi64 = NULL;
sewardj2e595852005-06-30 23:33:37 +00002491 eBias = eBiasLo64 = eBiasHi64 = NULL;
sewardj170ee212004-12-10 18:57:51 +00002492 addrAct = addrLo64 = addrHi64 = NULL;
2493 vdataLo64 = vdataHi64 = NULL;
njn25e49d8e72002-09-23 09:36:25 +00002494
sewardj95448072004-11-22 20:19:51 +00002495 if (data) {
2496 tl_assert(!vdata);
2497 tl_assert(isOriginalAtom(mce, data));
2498 tl_assert(bias == 0);
2499 vdata = expr2vbits( mce, data );
2500 } else {
2501 tl_assert(vdata);
2502 }
njn25e49d8e72002-09-23 09:36:25 +00002503
sewardj95448072004-11-22 20:19:51 +00002504 tl_assert(isOriginalAtom(mce,addr));
2505 tl_assert(isShadowAtom(mce,vdata));
njn25e49d8e72002-09-23 09:36:25 +00002506
sewardj95448072004-11-22 20:19:51 +00002507 ty = typeOfIRExpr(mce->bb->tyenv, vdata);
njn25e49d8e72002-09-23 09:36:25 +00002508
sewardj95448072004-11-22 20:19:51 +00002509 /* First, emit a definedness test for the address. This also sets
2510 the address (shadow) to 'defined' following the test. */
2511 complainIfUndefined( mce, addr );
njn25e49d8e72002-09-23 09:36:25 +00002512
sewardj170ee212004-12-10 18:57:51 +00002513 /* Now decide which helper function to call to write the data V
2514 bits into shadow memory. */
sewardj2e595852005-06-30 23:33:37 +00002515 if (end == Iend_LE) {
2516 switch (ty) {
2517 case Ity_V128: /* we'll use the helper twice */
2518 case Ity_I64: helper = &MC_(helperc_STOREV8le);
2519 hname = "MC_(helperc_STOREV8le)";
2520 break;
2521 case Ity_I32: helper = &MC_(helperc_STOREV4le);
2522 hname = "MC_(helperc_STOREV4le)";
2523 break;
2524 case Ity_I16: helper = &MC_(helperc_STOREV2le);
2525 hname = "MC_(helperc_STOREV2le)";
2526 break;
sewardj8cf88b72005-07-08 01:29:33 +00002527 case Ity_I8: helper = &MC_(helperc_STOREV1);
2528 hname = "MC_(helperc_STOREV1)";
sewardj2e595852005-06-30 23:33:37 +00002529 break;
2530 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
2531 }
2532 } else {
sewardj8cf88b72005-07-08 01:29:33 +00002533 switch (ty) {
2534 case Ity_V128: /* we'll use the helper twice */
2535 case Ity_I64: helper = &MC_(helperc_STOREV8be);
2536 hname = "MC_(helperc_STOREV8be)";
2537 break;
2538 case Ity_I32: helper = &MC_(helperc_STOREV4be);
2539 hname = "MC_(helperc_STOREV4be)";
2540 break;
2541 case Ity_I16: helper = &MC_(helperc_STOREV2be);
2542 hname = "MC_(helperc_STOREV2be)";
2543 break;
2544 case Ity_I8: helper = &MC_(helperc_STOREV1);
2545 hname = "MC_(helperc_STOREV1)";
2546 break;
2547 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
2548 }
sewardj95448072004-11-22 20:19:51 +00002549 }
njn25e49d8e72002-09-23 09:36:25 +00002550
sewardj170ee212004-12-10 18:57:51 +00002551 if (ty == Ity_V128) {
2552
sewardj20d38f22005-02-07 23:50:18 +00002553 /* V128-bit case */
sewardj170ee212004-12-10 18:57:51 +00002554 /* See comment in next clause re 64-bit regparms */
sewardj2e595852005-06-30 23:33:37 +00002555 /* also, need to be careful about endianness */
2556
2557 Int offLo64, offHi64;
2558 if (end == Iend_LE) {
2559 offLo64 = 0;
2560 offHi64 = 8;
2561 } else {
sewardj2e595852005-06-30 23:33:37 +00002562 offLo64 = 8;
2563 offHi64 = 0;
2564 }
2565
2566 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
2567 addrLo64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
sewardj20d38f22005-02-07 23:50:18 +00002568 vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata));
sewardj170ee212004-12-10 18:57:51 +00002569 diLo64 = unsafeIRDirty_0_N(
2570 1/*regparms*/, hname, helper,
2571 mkIRExprVec_2( addrLo64, vdataLo64 ));
2572
sewardj2e595852005-06-30 23:33:37 +00002573 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
2574 addrHi64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
sewardj20d38f22005-02-07 23:50:18 +00002575 vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata));
sewardj170ee212004-12-10 18:57:51 +00002576 diHi64 = unsafeIRDirty_0_N(
2577 1/*regparms*/, hname, helper,
2578 mkIRExprVec_2( addrHi64, vdataHi64 ));
2579
2580 setHelperAnns( mce, diLo64 );
2581 setHelperAnns( mce, diHi64 );
2582 stmt( mce->bb, IRStmt_Dirty(diLo64) );
2583 stmt( mce->bb, IRStmt_Dirty(diHi64) );
2584
sewardj95448072004-11-22 20:19:51 +00002585 } else {
sewardj170ee212004-12-10 18:57:51 +00002586
2587 /* 8/16/32/64-bit cases */
2588 /* Generate the actual address into addrAct. */
2589 if (bias == 0) {
2590 addrAct = addr;
2591 } else {
2592 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2593 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2594 }
2595
2596 if (ty == Ity_I64) {
2597 /* We can't do this with regparm 2 on 32-bit platforms, since
2598 the back ends aren't clever enough to handle 64-bit
2599 regparm args. Therefore be different. */
2600 di = unsafeIRDirty_0_N(
2601 1/*regparms*/, hname, helper,
2602 mkIRExprVec_2( addrAct, vdata ));
2603 } else {
2604 di = unsafeIRDirty_0_N(
2605 2/*regparms*/, hname, helper,
2606 mkIRExprVec_2( addrAct,
2607 zwidenToHostWord( mce, vdata )));
2608 }
2609 setHelperAnns( mce, di );
2610 stmt( mce->bb, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00002611 }
njn25e49d8e72002-09-23 09:36:25 +00002612
sewardj95448072004-11-22 20:19:51 +00002613}
njn25e49d8e72002-09-23 09:36:25 +00002614
njn25e49d8e72002-09-23 09:36:25 +00002615
sewardj95448072004-11-22 20:19:51 +00002616/* Do lazy pessimistic propagation through a dirty helper call, by
2617 looking at the annotations on it. This is the most complex part of
2618 Memcheck. */
njn25e49d8e72002-09-23 09:36:25 +00002619
sewardj95448072004-11-22 20:19:51 +00002620static IRType szToITy ( Int n )
2621{
2622 switch (n) {
2623 case 1: return Ity_I8;
2624 case 2: return Ity_I16;
2625 case 4: return Ity_I32;
2626 case 8: return Ity_I64;
2627 default: VG_(tool_panic)("szToITy(memcheck)");
2628 }
2629}
njn25e49d8e72002-09-23 09:36:25 +00002630
sewardj95448072004-11-22 20:19:51 +00002631static
2632void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
2633{
sewardj2e595852005-06-30 23:33:37 +00002634 Int i, n, offset, toDo, gSz, gOff;
2635 IRAtom *src, *here, *curr;
2636 IRType tyAddr, tySrc, tyDst;
2637 IRTemp dst;
2638 IREndness end;
2639
2640 /* What's the native endianness? We need to know this. */
sewardj6e340c72005-07-10 00:53:42 +00002641# if defined(VG_BIGENDIAN)
sewardj2e595852005-06-30 23:33:37 +00002642 end = Iend_BE;
sewardj6e340c72005-07-10 00:53:42 +00002643# elif defined(VG_LITTLEENDIAN)
sewardj2e595852005-06-30 23:33:37 +00002644 end = Iend_LE;
2645# else
2646# error "Unknown endianness"
2647# endif
njn25e49d8e72002-09-23 09:36:25 +00002648
sewardj95448072004-11-22 20:19:51 +00002649 /* First check the guard. */
2650 complainIfUndefined(mce, d->guard);
2651
2652 /* Now round up all inputs and PCast over them. */
sewardj7cf97ee2004-11-28 14:25:01 +00002653 curr = definedOfType(Ity_I32);
sewardj95448072004-11-22 20:19:51 +00002654
2655 /* Inputs: unmasked args */
2656 for (i = 0; d->args[i]; i++) {
2657 if (d->cee->mcx_mask & (1<<i)) {
2658 /* ignore this arg */
njn25e49d8e72002-09-23 09:36:25 +00002659 } else {
sewardj95448072004-11-22 20:19:51 +00002660 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
2661 curr = mkUifU32(mce, here, curr);
njn25e49d8e72002-09-23 09:36:25 +00002662 }
2663 }
sewardj95448072004-11-22 20:19:51 +00002664
2665 /* Inputs: guest state that we read. */
2666 for (i = 0; i < d->nFxState; i++) {
2667 tl_assert(d->fxState[i].fx != Ifx_None);
2668 if (d->fxState[i].fx == Ifx_Write)
2669 continue;
sewardja7203252004-11-26 19:17:47 +00002670
2671 /* Ignore any sections marked as 'always defined'. */
2672 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
sewardje9e16d32004-12-10 13:17:55 +00002673 if (0)
sewardja7203252004-11-26 19:17:47 +00002674 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
2675 d->fxState[i].offset, d->fxState[i].size );
2676 continue;
2677 }
2678
sewardj95448072004-11-22 20:19:51 +00002679 /* This state element is read or modified. So we need to
sewardje9e16d32004-12-10 13:17:55 +00002680 consider it. If larger than 8 bytes, deal with it in 8-byte
2681 chunks. */
2682 gSz = d->fxState[i].size;
2683 gOff = d->fxState[i].offset;
2684 tl_assert(gSz > 0);
2685 while (True) {
2686 if (gSz == 0) break;
2687 n = gSz <= 8 ? gSz : 8;
2688 /* update 'curr' with UifU of the state slice
2689 gOff .. gOff+n-1 */
2690 tySrc = szToITy( n );
2691 src = assignNew( mce, tySrc,
2692 shadow_GET(mce, gOff, tySrc ) );
2693 here = mkPCastTo( mce, Ity_I32, src );
2694 curr = mkUifU32(mce, here, curr);
2695 gSz -= n;
2696 gOff += n;
2697 }
2698
sewardj95448072004-11-22 20:19:51 +00002699 }
2700
2701 /* Inputs: memory. First set up some info needed regardless of
2702 whether we're doing reads or writes. */
2703 tyAddr = Ity_INVALID;
2704
2705 if (d->mFx != Ifx_None) {
2706 /* Because we may do multiple shadow loads/stores from the same
2707 base address, it's best to do a single test of its
2708 definedness right now. Post-instrumentation optimisation
2709 should remove all but this test. */
2710 tl_assert(d->mAddr);
2711 complainIfUndefined(mce, d->mAddr);
2712
2713 tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr);
2714 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2715 tl_assert(tyAddr == mce->hWordTy); /* not really right */
2716 }
2717
2718 /* Deal with memory inputs (reads or modifies) */
2719 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2720 offset = 0;
2721 toDo = d->mSize;
sewardj2e595852005-06-30 23:33:37 +00002722 /* chew off 32-bit chunks. We don't care about the endianness
2723 since it's all going to be condensed down to a single bit,
2724 but nevertheless choose an endianness which is hopefully
2725 native to the platform. */
sewardj95448072004-11-22 20:19:51 +00002726 while (toDo >= 4) {
2727 here = mkPCastTo(
2728 mce, Ity_I32,
sewardj2e595852005-06-30 23:33:37 +00002729 expr2vbits_Load ( mce, end, Ity_I32,
sewardj95448072004-11-22 20:19:51 +00002730 d->mAddr, d->mSize - toDo )
2731 );
2732 curr = mkUifU32(mce, here, curr);
2733 toDo -= 4;
2734 }
2735 /* chew off 16-bit chunks */
2736 while (toDo >= 2) {
2737 here = mkPCastTo(
2738 mce, Ity_I32,
sewardj2e595852005-06-30 23:33:37 +00002739 expr2vbits_Load ( mce, end, Ity_I16,
sewardj95448072004-11-22 20:19:51 +00002740 d->mAddr, d->mSize - toDo )
2741 );
2742 curr = mkUifU32(mce, here, curr);
2743 toDo -= 2;
2744 }
2745 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2746 }
2747
2748 /* Whew! So curr is a 32-bit V-value summarising pessimistically
2749 all the inputs to the helper. Now we need to re-distribute the
2750 results to all destinations. */
2751
2752 /* Outputs: the destination temporary, if there is one. */
2753 if (d->tmp != IRTemp_INVALID) {
2754 dst = findShadowTmp(mce, d->tmp);
2755 tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp);
2756 assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) );
2757 }
2758
2759 /* Outputs: guest state that we write or modify. */
2760 for (i = 0; i < d->nFxState; i++) {
2761 tl_assert(d->fxState[i].fx != Ifx_None);
2762 if (d->fxState[i].fx == Ifx_Read)
2763 continue;
sewardja7203252004-11-26 19:17:47 +00002764 /* Ignore any sections marked as 'always defined'. */
2765 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
2766 continue;
sewardje9e16d32004-12-10 13:17:55 +00002767 /* This state element is written or modified. So we need to
2768 consider it. If larger than 8 bytes, deal with it in 8-byte
2769 chunks. */
2770 gSz = d->fxState[i].size;
2771 gOff = d->fxState[i].offset;
2772 tl_assert(gSz > 0);
2773 while (True) {
2774 if (gSz == 0) break;
2775 n = gSz <= 8 ? gSz : 8;
2776 /* Write suitably-casted 'curr' to the state slice
2777 gOff .. gOff+n-1 */
2778 tyDst = szToITy( n );
2779 do_shadow_PUT( mce, gOff,
2780 NULL, /* original atom */
2781 mkPCastTo( mce, tyDst, curr ) );
2782 gSz -= n;
2783 gOff += n;
2784 }
sewardj95448072004-11-22 20:19:51 +00002785 }
2786
sewardj2e595852005-06-30 23:33:37 +00002787 /* Outputs: memory that we write or modify. Same comments about
2788 endianness as above apply. */
sewardj95448072004-11-22 20:19:51 +00002789 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2790 offset = 0;
2791 toDo = d->mSize;
2792 /* chew off 32-bit chunks */
2793 while (toDo >= 4) {
sewardj2e595852005-06-30 23:33:37 +00002794 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
2795 NULL, /* original data */
2796 mkPCastTo( mce, Ity_I32, curr ) );
sewardj95448072004-11-22 20:19:51 +00002797 toDo -= 4;
2798 }
2799 /* chew off 16-bit chunks */
2800 while (toDo >= 2) {
sewardj2e595852005-06-30 23:33:37 +00002801 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
2802 NULL, /* original data */
2803 mkPCastTo( mce, Ity_I16, curr ) );
sewardj95448072004-11-22 20:19:51 +00002804 toDo -= 2;
2805 }
2806 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2807 }
2808
njn25e49d8e72002-09-23 09:36:25 +00002809}
2810
sewardj826ec492005-05-12 18:05:00 +00002811/* We have an ABI hint telling us that [base .. base+len-1] is to
2812 become undefined ("writable"). Generate code to call a helper to
2813 notify the A/V bit machinery of this fact.
2814
2815 We call
2816 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len );
2817*/
2818static
2819void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len )
2820{
2821 IRDirty* di;
2822 di = unsafeIRDirty_0_N(
2823 0/*regparms*/,
2824 "MC_(helperc_MAKE_STACK_UNINIT)",
2825 &MC_(helperc_MAKE_STACK_UNINIT),
2826 mkIRExprVec_2( base, mkIRExpr_HWord( (UInt)len) )
2827 );
2828 stmt( mce->bb, IRStmt_Dirty(di) );
2829}
2830
njn25e49d8e72002-09-23 09:36:25 +00002831
sewardj95448072004-11-22 20:19:51 +00002832/*------------------------------------------------------------*/
2833/*--- Memcheck main ---*/
2834/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00002835
sewardj95448072004-11-22 20:19:51 +00002836static Bool isBogusAtom ( IRAtom* at )
njn25e49d8e72002-09-23 09:36:25 +00002837{
sewardj95448072004-11-22 20:19:51 +00002838 ULong n = 0;
2839 IRConst* con;
sewardj710d6c22005-03-20 18:55:15 +00002840 tl_assert(isIRAtom(at));
sewardj95448072004-11-22 20:19:51 +00002841 if (at->tag == Iex_Tmp)
2842 return False;
2843 tl_assert(at->tag == Iex_Const);
2844 con = at->Iex.Const.con;
2845 switch (con->tag) {
sewardjd5204dc2004-12-31 01:16:11 +00002846 case Ico_U1: return False;
2847 case Ico_U8: n = (ULong)con->Ico.U8; break;
2848 case Ico_U16: n = (ULong)con->Ico.U16; break;
2849 case Ico_U32: n = (ULong)con->Ico.U32; break;
2850 case Ico_U64: n = (ULong)con->Ico.U64; break;
2851 case Ico_F64: return False;
2852 case Ico_F64i: return False;
2853 case Ico_V128: return False;
sewardj95448072004-11-22 20:19:51 +00002854 default: ppIRExpr(at); tl_assert(0);
2855 }
2856 /* VG_(printf)("%llx\n", n); */
sewardj96a922e2005-04-23 23:26:29 +00002857 return (/*32*/ n == 0xFEFEFEFFULL
2858 /*32*/ || n == 0x80808080ULL
tomd9774d72005-06-27 08:11:01 +00002859 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
sewardj96a922e2005-04-23 23:26:29 +00002860 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
tomd9774d72005-06-27 08:11:01 +00002861 /*64*/ || n == 0x0000000000008080ULL
sewardj96a922e2005-04-23 23:26:29 +00002862 /*64*/ || n == 0x8080808080808080ULL
2863 /*64*/ || n == 0x0101010101010101ULL
2864 );
sewardj95448072004-11-22 20:19:51 +00002865}
njn25e49d8e72002-09-23 09:36:25 +00002866
sewardj95448072004-11-22 20:19:51 +00002867static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
2868{
sewardjd5204dc2004-12-31 01:16:11 +00002869 Int i;
2870 IRExpr* e;
2871 IRDirty* d;
sewardj95448072004-11-22 20:19:51 +00002872 switch (st->tag) {
2873 case Ist_Tmp:
2874 e = st->Ist.Tmp.data;
2875 switch (e->tag) {
2876 case Iex_Get:
2877 case Iex_Tmp:
2878 return False;
sewardjd5204dc2004-12-31 01:16:11 +00002879 case Iex_Const:
2880 return isBogusAtom(e);
sewardj95448072004-11-22 20:19:51 +00002881 case Iex_Unop:
2882 return isBogusAtom(e->Iex.Unop.arg);
sewardjd5204dc2004-12-31 01:16:11 +00002883 case Iex_GetI:
2884 return isBogusAtom(e->Iex.GetI.ix);
sewardj95448072004-11-22 20:19:51 +00002885 case Iex_Binop:
2886 return isBogusAtom(e->Iex.Binop.arg1)
2887 || isBogusAtom(e->Iex.Binop.arg2);
2888 case Iex_Mux0X:
2889 return isBogusAtom(e->Iex.Mux0X.cond)
2890 || isBogusAtom(e->Iex.Mux0X.expr0)
2891 || isBogusAtom(e->Iex.Mux0X.exprX);
sewardj2e595852005-06-30 23:33:37 +00002892 case Iex_Load:
2893 return isBogusAtom(e->Iex.Load.addr);
sewardj95448072004-11-22 20:19:51 +00002894 case Iex_CCall:
2895 for (i = 0; e->Iex.CCall.args[i]; i++)
2896 if (isBogusAtom(e->Iex.CCall.args[i]))
2897 return True;
2898 return False;
2899 default:
2900 goto unhandled;
2901 }
sewardjd5204dc2004-12-31 01:16:11 +00002902 case Ist_Dirty:
2903 d = st->Ist.Dirty.details;
2904 for (i = 0; d->args[i]; i++)
2905 if (isBogusAtom(d->args[i]))
2906 return True;
2907 if (d->guard && isBogusAtom(d->guard))
2908 return True;
2909 if (d->mAddr && isBogusAtom(d->mAddr))
2910 return True;
2911 return False;
sewardj95448072004-11-22 20:19:51 +00002912 case Ist_Put:
2913 return isBogusAtom(st->Ist.Put.data);
sewardjd5204dc2004-12-31 01:16:11 +00002914 case Ist_PutI:
2915 return isBogusAtom(st->Ist.PutI.ix)
2916 || isBogusAtom(st->Ist.PutI.data);
sewardj2e595852005-06-30 23:33:37 +00002917 case Ist_Store:
2918 return isBogusAtom(st->Ist.Store.addr)
2919 || isBogusAtom(st->Ist.Store.data);
sewardj95448072004-11-22 20:19:51 +00002920 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00002921 return isBogusAtom(st->Ist.Exit.guard);
sewardj826ec492005-05-12 18:05:00 +00002922 case Ist_AbiHint:
2923 return isBogusAtom(st->Ist.AbiHint.base);
sewardj21dc3452005-03-21 00:27:41 +00002924 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00002925 case Ist_IMark:
sewardjbd598e12005-01-07 12:10:21 +00002926 case Ist_MFence:
2927 return False;
sewardj95448072004-11-22 20:19:51 +00002928 default:
2929 unhandled:
2930 ppIRStmt(st);
2931 VG_(tool_panic)("hasBogusLiterals");
2932 }
2933}
njn25e49d8e72002-09-23 09:36:25 +00002934
njn25e49d8e72002-09-23 09:36:25 +00002935
njn51d827b2005-05-09 01:02:08 +00002936IRBB* MC_(instrument) ( IRBB* bb_in, VexGuestLayout* layout,
sewardj4ba057c2005-10-18 12:04:18 +00002937 Addr64 orig_addr_noredir, VexGuestExtents* vge,
sewardjd54babf2005-03-21 00:55:49 +00002938 IRType gWordTy, IRType hWordTy )
sewardj95448072004-11-22 20:19:51 +00002939{
sewardj151b90d2005-07-06 19:42:23 +00002940 Bool verboze = False; //True;
2941 Bool bogus;
sewardjd5204dc2004-12-31 01:16:11 +00002942 Int i, j, first_stmt;
sewardj95448072004-11-22 20:19:51 +00002943 IRStmt* st;
sewardjd5204dc2004-12-31 01:16:11 +00002944 MCEnv mce;
sewardjd54babf2005-03-21 00:55:49 +00002945 IRBB* bb;
2946
2947 if (gWordTy != hWordTy) {
2948 /* We don't currently support this case. */
2949 VG_(tool_panic)("host/guest word size mismatch");
2950 }
njn25e49d8e72002-09-23 09:36:25 +00002951
sewardj6cf40ff2005-04-20 22:31:26 +00002952 /* Check we're not completely nuts */
2953 tl_assert(sizeof(UWord) == sizeof(void*));
2954 tl_assert(sizeof(Word) == sizeof(void*));
2955 tl_assert(sizeof(ULong) == 8);
2956 tl_assert(sizeof(Long) == 8);
2957 tl_assert(sizeof(UInt) == 4);
2958 tl_assert(sizeof(Int) == 4);
2959
sewardj95448072004-11-22 20:19:51 +00002960 /* Set up BB */
sewardjd54babf2005-03-21 00:55:49 +00002961 bb = emptyIRBB();
sewardj95448072004-11-22 20:19:51 +00002962 bb->tyenv = dopyIRTypeEnv(bb_in->tyenv);
2963 bb->next = dopyIRExpr(bb_in->next);
2964 bb->jumpkind = bb_in->jumpkind;
njn25e49d8e72002-09-23 09:36:25 +00002965
sewardj95448072004-11-22 20:19:51 +00002966 /* Set up the running environment. Only .bb is modified as we go
2967 along. */
2968 mce.bb = bb;
2969 mce.layout = layout;
2970 mce.n_originalTmps = bb->tyenv->types_used;
2971 mce.hWordTy = hWordTy;
sewardjd5204dc2004-12-31 01:16:11 +00002972 mce.bogusLiterals = False;
sewardj95448072004-11-22 20:19:51 +00002973 mce.tmpMap = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
2974 for (i = 0; i < mce.n_originalTmps; i++)
2975 mce.tmpMap[i] = IRTemp_INVALID;
2976
sewardj151b90d2005-07-06 19:42:23 +00002977 /* Make a preliminary inspection of the statements, to see if there
2978 are any dodgy-looking literals. If there are, we generate
2979 extra-detailed (hence extra-expensive) instrumentation in
2980 places. Scan the whole bb even if dodgyness is found earlier,
2981 so that the flatness assertion is applied to all stmts. */
2982
2983 bogus = False;
sewardj95448072004-11-22 20:19:51 +00002984
2985 for (i = 0; i < bb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00002986
sewardj95448072004-11-22 20:19:51 +00002987 st = bb_in->stmts[i];
sewardj21dc3452005-03-21 00:27:41 +00002988 tl_assert(st);
sewardj95448072004-11-22 20:19:51 +00002989 tl_assert(isFlatIRStmt(st));
2990
sewardj151b90d2005-07-06 19:42:23 +00002991 if (!bogus) {
2992 bogus = checkForBogusLiterals(st);
2993 if (0 && bogus) {
sewardj95448072004-11-22 20:19:51 +00002994 VG_(printf)("bogus: ");
2995 ppIRStmt(st);
2996 VG_(printf)("\n");
2997 }
2998 }
sewardjd5204dc2004-12-31 01:16:11 +00002999
sewardj151b90d2005-07-06 19:42:23 +00003000 }
3001
3002 mce.bogusLiterals = bogus;
3003
3004 /* Iterate over the stmts to generate instrumentation. */
3005
3006 for (i = 0; i < bb_in->stmts_used; i++) {
3007
3008 st = bb_in->stmts[i];
sewardj95448072004-11-22 20:19:51 +00003009 first_stmt = bb->stmts_used;
3010
3011 if (verboze) {
3012 ppIRStmt(st);
3013 VG_(printf)("\n\n");
3014 }
3015
sewardj29faa502005-03-16 18:20:21 +00003016 /* Generate instrumentation code for each stmt ... */
3017
sewardj95448072004-11-22 20:19:51 +00003018 switch (st->tag) {
3019
3020 case Ist_Tmp:
3021 assign( bb, findShadowTmp(&mce, st->Ist.Tmp.tmp),
3022 expr2vbits( &mce, st->Ist.Tmp.data) );
njn25e49d8e72002-09-23 09:36:25 +00003023 break;
3024
sewardj95448072004-11-22 20:19:51 +00003025 case Ist_Put:
3026 do_shadow_PUT( &mce,
3027 st->Ist.Put.offset,
3028 st->Ist.Put.data,
3029 NULL /* shadow atom */ );
njn25e49d8e72002-09-23 09:36:25 +00003030 break;
3031
sewardj95448072004-11-22 20:19:51 +00003032 case Ist_PutI:
3033 do_shadow_PUTI( &mce,
3034 st->Ist.PutI.descr,
3035 st->Ist.PutI.ix,
3036 st->Ist.PutI.bias,
3037 st->Ist.PutI.data );
njn25e49d8e72002-09-23 09:36:25 +00003038 break;
3039
sewardj2e595852005-06-30 23:33:37 +00003040 case Ist_Store:
3041 do_shadow_Store( &mce, st->Ist.Store.end,
3042 st->Ist.Store.addr, 0/* addr bias */,
3043 st->Ist.Store.data,
3044 NULL /* shadow data */ );
njn25e49d8e72002-09-23 09:36:25 +00003045 break;
3046
sewardj95448072004-11-22 20:19:51 +00003047 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00003048 complainIfUndefined( &mce, st->Ist.Exit.guard );
njn25e49d8e72002-09-23 09:36:25 +00003049 break;
3050
sewardj21dc3452005-03-21 00:27:41 +00003051 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00003052 case Ist_IMark:
sewardjbd598e12005-01-07 12:10:21 +00003053 case Ist_MFence:
3054 break;
3055
sewardj95448072004-11-22 20:19:51 +00003056 case Ist_Dirty:
3057 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
njn25e49d8e72002-09-23 09:36:25 +00003058 break;
3059
sewardj826ec492005-05-12 18:05:00 +00003060 case Ist_AbiHint:
3061 do_AbiHint( &mce, st->Ist.AbiHint.base, st->Ist.AbiHint.len );
3062 break;
3063
njn25e49d8e72002-09-23 09:36:25 +00003064 default:
sewardj95448072004-11-22 20:19:51 +00003065 VG_(printf)("\n");
3066 ppIRStmt(st);
3067 VG_(printf)("\n");
3068 VG_(tool_panic)("memcheck: unhandled IRStmt");
3069
3070 } /* switch (st->tag) */
3071
3072 if (verboze) {
3073 for (j = first_stmt; j < bb->stmts_used; j++) {
3074 VG_(printf)(" ");
3075 ppIRStmt(bb->stmts[j]);
3076 VG_(printf)("\n");
3077 }
3078 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00003079 }
sewardj95448072004-11-22 20:19:51 +00003080
sewardj29faa502005-03-16 18:20:21 +00003081 /* ... and finally copy the stmt itself to the output. */
sewardj95448072004-11-22 20:19:51 +00003082 addStmtToIRBB(bb, st);
3083
njn25e49d8e72002-09-23 09:36:25 +00003084 }
njn25e49d8e72002-09-23 09:36:25 +00003085
sewardj95448072004-11-22 20:19:51 +00003086 /* Now we need to complain if the jump target is undefined. */
3087 first_stmt = bb->stmts_used;
njn25e49d8e72002-09-23 09:36:25 +00003088
sewardj95448072004-11-22 20:19:51 +00003089 if (verboze) {
3090 VG_(printf)("bb->next = ");
3091 ppIRExpr(bb->next);
3092 VG_(printf)("\n\n");
3093 }
njn25e49d8e72002-09-23 09:36:25 +00003094
sewardj95448072004-11-22 20:19:51 +00003095 complainIfUndefined( &mce, bb->next );
njn25e49d8e72002-09-23 09:36:25 +00003096
sewardj95448072004-11-22 20:19:51 +00003097 if (verboze) {
3098 for (j = first_stmt; j < bb->stmts_used; j++) {
3099 VG_(printf)(" ");
3100 ppIRStmt(bb->stmts[j]);
3101 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00003102 }
sewardj95448072004-11-22 20:19:51 +00003103 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00003104 }
njn25e49d8e72002-09-23 09:36:25 +00003105
sewardj95448072004-11-22 20:19:51 +00003106 return bb;
3107}
njn25e49d8e72002-09-23 09:36:25 +00003108
3109/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00003110/*--- end mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00003111/*--------------------------------------------------------------------*/