blob: 23457bd39d214bb8d130095c8a3112292f20e0e3 [file] [log] [blame]
nethercotebb1c9912004-01-04 16:43:23 +00001
njn25e49d8e72002-09-23 09:36:25 +00002/*--------------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003/*--- Instrument IR to perform memory checking operations. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00006
njn25e49d8e72002-09-23 09:36:25 +00007/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of MemCheck, a heavyweight Valgrind tool for
njnc9539842002-10-02 13:26:35 +00009 detecting memory errors.
njn25e49d8e72002-09-23 09:36:25 +000010
sewardj9ebd6e02007-01-08 06:01:59 +000011 Copyright (C) 2000-2007 Julian Seward
njn25e49d8e72002-09-23 09:36:25 +000012 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
njnc7561b92005-06-19 01:24:32 +000032#include "pub_tool_basics.h"
njn1d0825f2006-03-27 11:37:07 +000033#include "pub_tool_hashtable.h" // For mc_include.h
njn132bfcc2005-06-04 19:16:06 +000034#include "pub_tool_libcassert.h"
njn36a20fa2005-06-03 03:08:39 +000035#include "pub_tool_libcprint.h"
njnc7561b92005-06-19 01:24:32 +000036#include "pub_tool_tooliface.h"
sewardj53ee1fc2005-12-23 02:29:58 +000037#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
njnc7561b92005-06-19 01:24:32 +000038#include "mc_include.h"
njn25e49d8e72002-09-23 09:36:25 +000039
njn25e49d8e72002-09-23 09:36:25 +000040
sewardj992dff92005-10-07 11:08:55 +000041/* This file implements the Memcheck instrumentation, and in
42 particular contains the core of its undefined value detection
43 machinery. For a comprehensive background of the terminology,
44 algorithms and rationale used herein, read:
45
46 Using Valgrind to detect undefined value errors with
47 bit-precision
48
49 Julian Seward and Nicholas Nethercote
50
51 2005 USENIX Annual Technical Conference (General Track),
52 Anaheim, CA, USA, April 10-15, 2005.
53*/
54
sewardj95448072004-11-22 20:19:51 +000055/*------------------------------------------------------------*/
56/*--- Forward decls ---*/
57/*------------------------------------------------------------*/
58
59struct _MCEnv;
60
61static IRType shadowType ( IRType ty );
62static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
63
64
65/*------------------------------------------------------------*/
66/*--- Memcheck running state, and tmp management. ---*/
67/*------------------------------------------------------------*/
68
69/* Carries around state during memcheck instrumentation. */
70typedef
71 struct _MCEnv {
sewardj0b9d74a2006-12-24 02:24:11 +000072 /* MODIFIED: the superblock being constructed. IRStmts are
73 added. */
74 IRSB* bb;
sewardj95448072004-11-22 20:19:51 +000075
76 /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
77 original temps to their current their current shadow temp.
78 Initially all entries are IRTemp_INVALID. Entries are added
79 lazily since many original temps are not used due to
80 optimisation prior to instrumentation. Note that floating
81 point original tmps are shadowed by integer tmps of the same
82 size, and Bit-typed original tmps are shadowed by the type
83 Ity_I8. See comment below. */
84 IRTemp* tmpMap;
85 Int n_originalTmps; /* for range checking */
86
sewardjd5204dc2004-12-31 01:16:11 +000087 /* MODIFIED: indicates whether "bogus" literals have so far been
88 found. Starts off False, and may change to True. */
89 Bool bogusLiterals;
90
sewardj95448072004-11-22 20:19:51 +000091 /* READONLY: the guest layout. This indicates which parts of
92 the guest state should be regarded as 'always defined'. */
93 VexGuestLayout* layout;
sewardj634ba772006-10-15 12:47:37 +000094
sewardj95448072004-11-22 20:19:51 +000095 /* READONLY: the host word type. Needed for constructing
96 arguments of type 'HWord' to be passed to helper functions.
97 Ity_I32 or Ity_I64 only. */
98 IRType hWordTy;
99 }
100 MCEnv;
101
102/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
103 demand), as they are encountered. This is for two reasons.
104
105 (1) (less important reason): Many original tmps are unused due to
106 initial IR optimisation, and we do not want to spaces in tables
107 tracking them.
108
109 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
110 table indexed [0 .. n_types-1], which gives the current shadow for
111 each original tmp, or INVALID_IRTEMP if none is so far assigned.
112 It is necessary to support making multiple assignments to a shadow
113 -- specifically, after testing a shadow for definedness, it needs
114 to be made defined. But IR's SSA property disallows this.
115
116 (2) (more important reason): Therefore, when a shadow needs to get
117 a new value, a new temporary is created, the value is assigned to
118 that, and the tmpMap is updated to reflect the new binding.
119
120 A corollary is that if the tmpMap maps a given tmp to
sewardjf1962d32006-10-19 13:22:16 +0000121 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
sewardj95448072004-11-22 20:19:51 +0000122 there's a read-before-write error in the original tmps. The IR
123 sanity checker should catch all such anomalies, however.
njn25e49d8e72002-09-23 09:36:25 +0000124*/
sewardj95448072004-11-22 20:19:51 +0000125
126/* Find the tmp currently shadowing the given original tmp. If none
127 so far exists, allocate one. */
128static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000129{
sewardj95448072004-11-22 20:19:51 +0000130 tl_assert(orig < mce->n_originalTmps);
131 if (mce->tmpMap[orig] == IRTemp_INVALID) {
132 mce->tmpMap[orig]
133 = newIRTemp(mce->bb->tyenv,
134 shadowType(mce->bb->tyenv->types[orig]));
njn25e49d8e72002-09-23 09:36:25 +0000135 }
sewardj95448072004-11-22 20:19:51 +0000136 return mce->tmpMap[orig];
njn25e49d8e72002-09-23 09:36:25 +0000137}
138
sewardj95448072004-11-22 20:19:51 +0000139/* Allocate a new shadow for the given original tmp. This means any
140 previous shadow is abandoned. This is needed because it is
141 necessary to give a new value to a shadow once it has been tested
142 for undefinedness, but unfortunately IR's SSA property disallows
143 this. Instead we must abandon the old shadow, allocate a new one
144 and use that instead. */
145static void newShadowTmp ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000146{
sewardj95448072004-11-22 20:19:51 +0000147 tl_assert(orig < mce->n_originalTmps);
148 mce->tmpMap[orig]
149 = newIRTemp(mce->bb->tyenv,
150 shadowType(mce->bb->tyenv->types[orig]));
151}
152
153
154/*------------------------------------------------------------*/
155/*--- IRAtoms -- a subset of IRExprs ---*/
156/*------------------------------------------------------------*/
157
158/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
sewardj710d6c22005-03-20 18:55:15 +0000159 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
sewardj95448072004-11-22 20:19:51 +0000160 input, most of this code deals in atoms. Usefully, a value atom
161 always has a V-value which is also an atom: constants are shadowed
162 by constants, and temps are shadowed by the corresponding shadow
163 temporary. */
164
165typedef IRExpr IRAtom;
166
167/* (used for sanity checks only): is this an atom which looks
168 like it's from original code? */
169static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
170{
171 if (a1->tag == Iex_Const)
172 return True;
sewardj0b9d74a2006-12-24 02:24:11 +0000173 if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp < mce->n_originalTmps)
sewardj95448072004-11-22 20:19:51 +0000174 return True;
175 return False;
176}
177
178/* (used for sanity checks only): is this an atom which looks
179 like it's from shadow code? */
180static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
181{
182 if (a1->tag == Iex_Const)
183 return True;
sewardj0b9d74a2006-12-24 02:24:11 +0000184 if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp >= mce->n_originalTmps)
sewardj95448072004-11-22 20:19:51 +0000185 return True;
186 return False;
187}
188
189/* (used for sanity checks only): check that both args are atoms and
190 are identically-kinded. */
191static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
192{
sewardj0b9d74a2006-12-24 02:24:11 +0000193 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +0000194 return True;
sewardjbef552a2005-08-30 12:54:36 +0000195 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
sewardj95448072004-11-22 20:19:51 +0000196 return True;
197 return False;
198}
199
200
201/*------------------------------------------------------------*/
202/*--- Type management ---*/
203/*------------------------------------------------------------*/
204
205/* Shadow state is always accessed using integer types. This returns
206 an integer type with the same size (as per sizeofIRType) as the
207 given type. The only valid shadow types are Bit, I8, I16, I32,
sewardj3245c912004-12-10 14:58:26 +0000208 I64, V128. */
sewardj95448072004-11-22 20:19:51 +0000209
210static IRType shadowType ( IRType ty )
211{
212 switch (ty) {
213 case Ity_I1:
214 case Ity_I8:
215 case Ity_I16:
216 case Ity_I32:
sewardj6cf40ff2005-04-20 22:31:26 +0000217 case Ity_I64:
218 case Ity_I128: return ty;
sewardj3245c912004-12-10 14:58:26 +0000219 case Ity_F32: return Ity_I32;
220 case Ity_F64: return Ity_I64;
221 case Ity_V128: return Ity_V128;
sewardj95448072004-11-22 20:19:51 +0000222 default: ppIRType(ty);
223 VG_(tool_panic)("memcheck:shadowType");
224 }
225}
226
227/* Produce a 'defined' value of the given shadow type. Should only be
228 supplied shadow types (Bit/I8/I16/I32/UI64). */
229static IRExpr* definedOfType ( IRType ty ) {
230 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +0000231 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
232 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
233 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
234 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
235 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
236 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
sewardjf1962d32006-10-19 13:22:16 +0000237 default: VG_(tool_panic)("memcheck:definedOfType");
njn25e49d8e72002-09-23 09:36:25 +0000238 }
239}
240
241
sewardj95448072004-11-22 20:19:51 +0000242/*------------------------------------------------------------*/
243/*--- Constructing IR fragments ---*/
244/*------------------------------------------------------------*/
245
246/* assign value to tmp */
247#define assign(_bb,_tmp,_expr) \
sewardj0b9d74a2006-12-24 02:24:11 +0000248 addStmtToIRSB((_bb), IRStmt_WrTmp((_tmp),(_expr)))
sewardj95448072004-11-22 20:19:51 +0000249
250/* add stmt to a bb */
251#define stmt(_bb,_stmt) \
sewardj0b9d74a2006-12-24 02:24:11 +0000252 addStmtToIRSB((_bb), (_stmt))
sewardj95448072004-11-22 20:19:51 +0000253
254/* build various kinds of expressions */
255#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
256#define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
257#define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
258#define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
259#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
260#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
sewardj170ee212004-12-10 18:57:51 +0000261#define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
sewardj0b9d74a2006-12-24 02:24:11 +0000262#define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
sewardj95448072004-11-22 20:19:51 +0000263
264/* bind the given expression to a new temporary, and return the
265 temporary. This effectively converts an arbitrary expression into
266 an atom. */
267static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) {
268 IRTemp t = newIRTemp(mce->bb->tyenv, ty);
269 assign(mce->bb, t, e);
270 return mkexpr(t);
271}
272
273
274/*------------------------------------------------------------*/
275/*--- Constructing definedness primitive ops ---*/
276/*------------------------------------------------------------*/
277
278/* --------- Defined-if-either-defined --------- */
279
280static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
281 tl_assert(isShadowAtom(mce,a1));
282 tl_assert(isShadowAtom(mce,a2));
283 return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2));
284}
285
286static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
287 tl_assert(isShadowAtom(mce,a1));
288 tl_assert(isShadowAtom(mce,a2));
289 return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2));
290}
291
292static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
293 tl_assert(isShadowAtom(mce,a1));
294 tl_assert(isShadowAtom(mce,a2));
295 return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2));
296}
297
sewardj7010f6e2004-12-10 13:35:22 +0000298static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
299 tl_assert(isShadowAtom(mce,a1));
300 tl_assert(isShadowAtom(mce,a2));
301 return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2));
302}
303
sewardj20d38f22005-02-07 23:50:18 +0000304static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj170ee212004-12-10 18:57:51 +0000305 tl_assert(isShadowAtom(mce,a1));
306 tl_assert(isShadowAtom(mce,a2));
sewardj20d38f22005-02-07 23:50:18 +0000307 return assignNew(mce, Ity_V128, binop(Iop_AndV128, a1, a2));
sewardj170ee212004-12-10 18:57:51 +0000308}
309
sewardj95448072004-11-22 20:19:51 +0000310/* --------- Undefined-if-either-undefined --------- */
311
312static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
313 tl_assert(isShadowAtom(mce,a1));
314 tl_assert(isShadowAtom(mce,a2));
315 return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2));
316}
317
318static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
319 tl_assert(isShadowAtom(mce,a1));
320 tl_assert(isShadowAtom(mce,a2));
321 return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2));
322}
323
324static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
325 tl_assert(isShadowAtom(mce,a1));
326 tl_assert(isShadowAtom(mce,a2));
327 return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2));
328}
329
330static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
331 tl_assert(isShadowAtom(mce,a1));
332 tl_assert(isShadowAtom(mce,a2));
333 return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2));
334}
335
sewardj20d38f22005-02-07 23:50:18 +0000336static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj3245c912004-12-10 14:58:26 +0000337 tl_assert(isShadowAtom(mce,a1));
338 tl_assert(isShadowAtom(mce,a2));
sewardj20d38f22005-02-07 23:50:18 +0000339 return assignNew(mce, Ity_V128, binop(Iop_OrV128, a1, a2));
sewardj3245c912004-12-10 14:58:26 +0000340}
341
sewardje50a1b12004-12-17 01:24:54 +0000342static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
sewardj95448072004-11-22 20:19:51 +0000343 switch (vty) {
sewardje50a1b12004-12-17 01:24:54 +0000344 case Ity_I8: return mkUifU8(mce, a1, a2);
sewardja1d93302004-12-12 16:45:06 +0000345 case Ity_I16: return mkUifU16(mce, a1, a2);
346 case Ity_I32: return mkUifU32(mce, a1, a2);
347 case Ity_I64: return mkUifU64(mce, a1, a2);
sewardj20d38f22005-02-07 23:50:18 +0000348 case Ity_V128: return mkUifUV128(mce, a1, a2);
sewardj95448072004-11-22 20:19:51 +0000349 default:
350 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
351 VG_(tool_panic)("memcheck:mkUifU");
njn25e49d8e72002-09-23 09:36:25 +0000352 }
353}
354
sewardj95448072004-11-22 20:19:51 +0000355/* --------- The Left-family of operations. --------- */
njn25e49d8e72002-09-23 09:36:25 +0000356
sewardj95448072004-11-22 20:19:51 +0000357static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
358 tl_assert(isShadowAtom(mce,a1));
359 /* It's safe to duplicate a1 since it's only an atom */
360 return assignNew(mce, Ity_I8,
361 binop(Iop_Or8, a1,
362 assignNew(mce, Ity_I8,
sewardj37c31cc2005-04-26 23:49:24 +0000363 unop(Iop_Neg8, a1))));
sewardj95448072004-11-22 20:19:51 +0000364}
365
366static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
367 tl_assert(isShadowAtom(mce,a1));
368 /* It's safe to duplicate a1 since it's only an atom */
369 return assignNew(mce, Ity_I16,
370 binop(Iop_Or16, a1,
371 assignNew(mce, Ity_I16,
sewardj37c31cc2005-04-26 23:49:24 +0000372 unop(Iop_Neg16, a1))));
sewardj95448072004-11-22 20:19:51 +0000373}
374
375static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
376 tl_assert(isShadowAtom(mce,a1));
377 /* It's safe to duplicate a1 since it's only an atom */
378 return assignNew(mce, Ity_I32,
379 binop(Iop_Or32, a1,
380 assignNew(mce, Ity_I32,
sewardj37c31cc2005-04-26 23:49:24 +0000381 unop(Iop_Neg32, a1))));
sewardj95448072004-11-22 20:19:51 +0000382}
383
sewardj681be302005-01-15 20:43:58 +0000384static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
385 tl_assert(isShadowAtom(mce,a1));
386 /* It's safe to duplicate a1 since it's only an atom */
387 return assignNew(mce, Ity_I64,
388 binop(Iop_Or64, a1,
389 assignNew(mce, Ity_I64,
sewardj37c31cc2005-04-26 23:49:24 +0000390 unop(Iop_Neg64, a1))));
sewardj681be302005-01-15 20:43:58 +0000391}
392
sewardj95448072004-11-22 20:19:51 +0000393/* --------- 'Improvement' functions for AND/OR. --------- */
394
395/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
396 defined (0); all other -> undefined (1).
397*/
398static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
njn25e49d8e72002-09-23 09:36:25 +0000399{
sewardj95448072004-11-22 20:19:51 +0000400 tl_assert(isOriginalAtom(mce, data));
401 tl_assert(isShadowAtom(mce, vbits));
402 tl_assert(sameKindedAtoms(data, vbits));
403 return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits));
404}
njn25e49d8e72002-09-23 09:36:25 +0000405
sewardj95448072004-11-22 20:19:51 +0000406static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
407{
408 tl_assert(isOriginalAtom(mce, data));
409 tl_assert(isShadowAtom(mce, vbits));
410 tl_assert(sameKindedAtoms(data, vbits));
411 return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits));
412}
njn25e49d8e72002-09-23 09:36:25 +0000413
sewardj95448072004-11-22 20:19:51 +0000414static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
415{
416 tl_assert(isOriginalAtom(mce, data));
417 tl_assert(isShadowAtom(mce, vbits));
418 tl_assert(sameKindedAtoms(data, vbits));
419 return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits));
420}
njn25e49d8e72002-09-23 09:36:25 +0000421
sewardj7010f6e2004-12-10 13:35:22 +0000422static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
423{
424 tl_assert(isOriginalAtom(mce, data));
425 tl_assert(isShadowAtom(mce, vbits));
426 tl_assert(sameKindedAtoms(data, vbits));
427 return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits));
428}
429
sewardj20d38f22005-02-07 23:50:18 +0000430static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000431{
432 tl_assert(isOriginalAtom(mce, data));
433 tl_assert(isShadowAtom(mce, vbits));
434 tl_assert(sameKindedAtoms(data, vbits));
sewardj20d38f22005-02-07 23:50:18 +0000435 return assignNew(mce, Ity_V128, binop(Iop_OrV128, data, vbits));
sewardj170ee212004-12-10 18:57:51 +0000436}
437
sewardj95448072004-11-22 20:19:51 +0000438/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
439 defined (0); all other -> undefined (1).
440*/
441static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
442{
443 tl_assert(isOriginalAtom(mce, data));
444 tl_assert(isShadowAtom(mce, vbits));
445 tl_assert(sameKindedAtoms(data, vbits));
446 return assignNew(
447 mce, Ity_I8,
448 binop(Iop_Or8,
449 assignNew(mce, Ity_I8, unop(Iop_Not8, data)),
450 vbits) );
451}
njn25e49d8e72002-09-23 09:36:25 +0000452
sewardj95448072004-11-22 20:19:51 +0000453static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
454{
455 tl_assert(isOriginalAtom(mce, data));
456 tl_assert(isShadowAtom(mce, vbits));
457 tl_assert(sameKindedAtoms(data, vbits));
458 return assignNew(
459 mce, Ity_I16,
460 binop(Iop_Or16,
461 assignNew(mce, Ity_I16, unop(Iop_Not16, data)),
462 vbits) );
463}
njn25e49d8e72002-09-23 09:36:25 +0000464
sewardj95448072004-11-22 20:19:51 +0000465static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
466{
467 tl_assert(isOriginalAtom(mce, data));
468 tl_assert(isShadowAtom(mce, vbits));
469 tl_assert(sameKindedAtoms(data, vbits));
470 return assignNew(
471 mce, Ity_I32,
472 binop(Iop_Or32,
473 assignNew(mce, Ity_I32, unop(Iop_Not32, data)),
474 vbits) );
475}
476
sewardj7010f6e2004-12-10 13:35:22 +0000477static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
478{
479 tl_assert(isOriginalAtom(mce, data));
480 tl_assert(isShadowAtom(mce, vbits));
481 tl_assert(sameKindedAtoms(data, vbits));
482 return assignNew(
483 mce, Ity_I64,
484 binop(Iop_Or64,
485 assignNew(mce, Ity_I64, unop(Iop_Not64, data)),
486 vbits) );
487}
488
sewardj20d38f22005-02-07 23:50:18 +0000489static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000490{
491 tl_assert(isOriginalAtom(mce, data));
492 tl_assert(isShadowAtom(mce, vbits));
493 tl_assert(sameKindedAtoms(data, vbits));
494 return assignNew(
495 mce, Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +0000496 binop(Iop_OrV128,
497 assignNew(mce, Ity_V128, unop(Iop_NotV128, data)),
sewardj170ee212004-12-10 18:57:51 +0000498 vbits) );
499}
500
sewardj95448072004-11-22 20:19:51 +0000501/* --------- Pessimising casts. --------- */
502
503static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
504{
sewardj7cf97ee2004-11-28 14:25:01 +0000505 IRType ty;
506 IRAtom* tmp1;
sewardj95448072004-11-22 20:19:51 +0000507 /* Note, dst_ty is a shadow type, not an original type. */
508 /* First of all, collapse vbits down to a single bit. */
509 tl_assert(isShadowAtom(mce,vbits));
sewardj7cf97ee2004-11-28 14:25:01 +0000510 ty = typeOfIRExpr(mce->bb->tyenv, vbits);
511 tmp1 = NULL;
sewardj95448072004-11-22 20:19:51 +0000512 switch (ty) {
513 case Ity_I1:
514 tmp1 = vbits;
njn25e49d8e72002-09-23 09:36:25 +0000515 break;
sewardj95448072004-11-22 20:19:51 +0000516 case Ity_I8:
sewardj37c31cc2005-04-26 23:49:24 +0000517 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
sewardj95448072004-11-22 20:19:51 +0000518 break;
519 case Ity_I16:
sewardj37c31cc2005-04-26 23:49:24 +0000520 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
sewardj95448072004-11-22 20:19:51 +0000521 break;
522 case Ity_I32:
sewardj37c31cc2005-04-26 23:49:24 +0000523 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
sewardj95448072004-11-22 20:19:51 +0000524 break;
525 case Ity_I64:
sewardj37c31cc2005-04-26 23:49:24 +0000526 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
sewardj95448072004-11-22 20:19:51 +0000527 break;
sewardj69a13322005-04-23 01:14:51 +0000528 case Ity_I128: {
529 /* Gah. Chop it in half, OR the halves together, and compare
530 that with zero. */
531 IRAtom* tmp2 = assignNew(mce, Ity_I64, unop(Iop_128HIto64, vbits));
532 IRAtom* tmp3 = assignNew(mce, Ity_I64, unop(Iop_128to64, vbits));
533 IRAtom* tmp4 = assignNew(mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
534 tmp1 = assignNew(mce, Ity_I1,
sewardj37c31cc2005-04-26 23:49:24 +0000535 unop(Iop_CmpNEZ64, tmp4));
sewardj69a13322005-04-23 01:14:51 +0000536 break;
537 }
sewardj95448072004-11-22 20:19:51 +0000538 default:
sewardj69a13322005-04-23 01:14:51 +0000539 ppIRType(ty);
sewardj95448072004-11-22 20:19:51 +0000540 VG_(tool_panic)("mkPCastTo(1)");
541 }
542 tl_assert(tmp1);
543 /* Now widen up to the dst type. */
544 switch (dst_ty) {
545 case Ity_I1:
546 return tmp1;
547 case Ity_I8:
548 return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1));
549 case Ity_I16:
550 return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1));
551 case Ity_I32:
552 return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1));
553 case Ity_I64:
554 return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000555 case Ity_V128:
556 tmp1 = assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardj20d38f22005-02-07 23:50:18 +0000557 tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000558 return tmp1;
sewardj69a13322005-04-23 01:14:51 +0000559 case Ity_I128:
560 tmp1 = assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
561 tmp1 = assignNew(mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
562 return tmp1;
sewardj95448072004-11-22 20:19:51 +0000563 default:
564 ppIRType(dst_ty);
565 VG_(tool_panic)("mkPCastTo(2)");
566 }
567}
568
sewardjd5204dc2004-12-31 01:16:11 +0000569/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
570/*
571 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
572 PCasting to Ity_U1. However, sometimes it is necessary to be more
573 accurate. The insight is that the result is defined if two
574 corresponding bits can be found, one from each argument, so that
575 both bits are defined but are different -- that makes EQ say "No"
576 and NE say "Yes". Hence, we compute an improvement term and DifD
577 it onto the "normal" (UifU) result.
578
579 The result is:
580
581 PCastTo<1> (
sewardje6f8af42005-07-06 18:48:59 +0000582 -- naive version
583 PCastTo<sz>( UifU<sz>(vxx, vyy) )
584
sewardjd5204dc2004-12-31 01:16:11 +0000585 `DifD<sz>`
sewardje6f8af42005-07-06 18:48:59 +0000586
587 -- improvement term
588 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
sewardjd5204dc2004-12-31 01:16:11 +0000589 )
sewardje6f8af42005-07-06 18:48:59 +0000590
sewardjd5204dc2004-12-31 01:16:11 +0000591 where
592 vec contains 0 (defined) bits where the corresponding arg bits
sewardje6f8af42005-07-06 18:48:59 +0000593 are defined but different, and 1 bits otherwise.
sewardjd5204dc2004-12-31 01:16:11 +0000594
sewardje6f8af42005-07-06 18:48:59 +0000595 vec = Or<sz>( vxx, // 0 iff bit defined
596 vyy, // 0 iff bit defined
597 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
598 )
599
600 If any bit of vec is 0, the result is defined and so the
601 improvement term should produce 0...0, else it should produce
602 1...1.
603
604 Hence require for the improvement term:
605
606 if vec == 1...1 then 1...1 else 0...0
607 ->
608 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
609
610 This was extensively re-analysed and checked on 6 July 05.
sewardjd5204dc2004-12-31 01:16:11 +0000611*/
612static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
613 IRType ty,
614 IRAtom* vxx, IRAtom* vyy,
615 IRAtom* xx, IRAtom* yy )
616{
sewardje6f8af42005-07-06 18:48:59 +0000617 IRAtom *naive, *vec, *improvement_term;
618 IRAtom *improved, *final_cast, *top;
619 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
sewardjd5204dc2004-12-31 01:16:11 +0000620
621 tl_assert(isShadowAtom(mce,vxx));
622 tl_assert(isShadowAtom(mce,vyy));
623 tl_assert(isOriginalAtom(mce,xx));
624 tl_assert(isOriginalAtom(mce,yy));
625 tl_assert(sameKindedAtoms(vxx,xx));
626 tl_assert(sameKindedAtoms(vyy,yy));
627
628 switch (ty) {
629 case Ity_I32:
sewardje6f8af42005-07-06 18:48:59 +0000630 opOR = Iop_Or32;
sewardjd5204dc2004-12-31 01:16:11 +0000631 opDIFD = Iop_And32;
632 opUIFU = Iop_Or32;
633 opNOT = Iop_Not32;
634 opXOR = Iop_Xor32;
635 opCMP = Iop_CmpEQ32;
636 top = mkU32(0xFFFFFFFF);
637 break;
tomcd986332005-04-26 07:44:48 +0000638 case Ity_I64:
sewardje6f8af42005-07-06 18:48:59 +0000639 opOR = Iop_Or64;
tomcd986332005-04-26 07:44:48 +0000640 opDIFD = Iop_And64;
641 opUIFU = Iop_Or64;
642 opNOT = Iop_Not64;
643 opXOR = Iop_Xor64;
644 opCMP = Iop_CmpEQ64;
sewardj37c31cc2005-04-26 23:49:24 +0000645 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
tomcd986332005-04-26 07:44:48 +0000646 break;
sewardjd5204dc2004-12-31 01:16:11 +0000647 default:
648 VG_(tool_panic)("expensiveCmpEQorNE");
649 }
650
651 naive
652 = mkPCastTo(mce,ty, assignNew(mce, ty, binop(opUIFU, vxx, vyy)));
653
654 vec
655 = assignNew(
656 mce,ty,
sewardje6f8af42005-07-06 18:48:59 +0000657 binop( opOR,
658 assignNew(mce,ty, binop(opOR, vxx, vyy)),
sewardjd5204dc2004-12-31 01:16:11 +0000659 assignNew(
660 mce,ty,
661 unop( opNOT,
662 assignNew(mce,ty, binop(opXOR, xx, yy))))));
663
sewardje6f8af42005-07-06 18:48:59 +0000664 improvement_term
sewardjd5204dc2004-12-31 01:16:11 +0000665 = mkPCastTo( mce,ty, assignNew(mce,Ity_I1, binop(opCMP, vec, top)));
666
667 improved
sewardje6f8af42005-07-06 18:48:59 +0000668 = assignNew( mce,ty, binop(opDIFD, naive, improvement_term) );
sewardjd5204dc2004-12-31 01:16:11 +0000669
670 final_cast
671 = mkPCastTo( mce, Ity_I1, improved );
672
673 return final_cast;
674}
675
sewardj95448072004-11-22 20:19:51 +0000676
sewardj992dff92005-10-07 11:08:55 +0000677/* --------- Semi-accurate interpretation of CmpORD. --------- */
678
679/* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
680
681 CmpORD32S(x,y) = 1<<3 if x <s y
682 = 1<<2 if x >s y
683 = 1<<1 if x == y
684
685 and similarly the unsigned variant. The default interpretation is:
686
687 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
sewardj1bc82102005-12-23 00:16:24 +0000688 & (7<<1)
sewardj992dff92005-10-07 11:08:55 +0000689
690 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
691 are zero and therefore defined (viz, zero).
sewardja9e62a92005-10-07 12:13:21 +0000692
693 Also deal with a special case better:
694
695 CmpORD32S(x,0)
696
697 Here, bit 3 (LT) of the result is a copy of the top bit of x and
698 will be defined even if the rest of x isn't. In which case we do:
699
700 CmpORD32S#(x,x#,0,{impliedly 0}#)
sewardj1bc82102005-12-23 00:16:24 +0000701 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
702 | (x# >>u 31) << 3 -- LT# = x#[31]
sewardja9e62a92005-10-07 12:13:21 +0000703
sewardj1bc82102005-12-23 00:16:24 +0000704 Analogous handling for CmpORD64{S,U}.
sewardj992dff92005-10-07 11:08:55 +0000705*/
sewardja9e62a92005-10-07 12:13:21 +0000706static Bool isZeroU32 ( IRAtom* e )
707{
708 return
709 toBool( e->tag == Iex_Const
710 && e->Iex.Const.con->tag == Ico_U32
711 && e->Iex.Const.con->Ico.U32 == 0 );
712}
713
sewardj1bc82102005-12-23 00:16:24 +0000714static Bool isZeroU64 ( IRAtom* e )
sewardj992dff92005-10-07 11:08:55 +0000715{
sewardj1bc82102005-12-23 00:16:24 +0000716 return
717 toBool( e->tag == Iex_Const
718 && e->Iex.Const.con->tag == Ico_U64
719 && e->Iex.Const.con->Ico.U64 == 0 );
720}
721
722static IRAtom* doCmpORD ( MCEnv* mce,
723 IROp cmp_op,
724 IRAtom* xxhash, IRAtom* yyhash,
725 IRAtom* xx, IRAtom* yy )
726{
727 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
728 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
729 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
730 IROp opAND = m64 ? Iop_And64 : Iop_And32;
731 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
732 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
733 IRType ty = m64 ? Ity_I64 : Ity_I32;
734 Int width = m64 ? 64 : 32;
735
736 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
737
738 IRAtom* threeLeft1 = NULL;
739 IRAtom* sevenLeft1 = NULL;
740
sewardj992dff92005-10-07 11:08:55 +0000741 tl_assert(isShadowAtom(mce,xxhash));
742 tl_assert(isShadowAtom(mce,yyhash));
743 tl_assert(isOriginalAtom(mce,xx));
744 tl_assert(isOriginalAtom(mce,yy));
745 tl_assert(sameKindedAtoms(xxhash,xx));
746 tl_assert(sameKindedAtoms(yyhash,yy));
sewardj1bc82102005-12-23 00:16:24 +0000747 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
748 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
sewardj992dff92005-10-07 11:08:55 +0000749
sewardja9e62a92005-10-07 12:13:21 +0000750 if (0) {
751 ppIROp(cmp_op); VG_(printf)(" ");
752 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
753 }
754
sewardj1bc82102005-12-23 00:16:24 +0000755 if (syned && isZero(yy)) {
sewardja9e62a92005-10-07 12:13:21 +0000756 /* fancy interpretation */
757 /* if yy is zero, then it must be fully defined (zero#). */
sewardj1bc82102005-12-23 00:16:24 +0000758 tl_assert(isZero(yyhash));
759 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
sewardja9e62a92005-10-07 12:13:21 +0000760 return
761 binop(
sewardj1bc82102005-12-23 00:16:24 +0000762 opOR,
sewardja9e62a92005-10-07 12:13:21 +0000763 assignNew(
sewardj1bc82102005-12-23 00:16:24 +0000764 mce,ty,
sewardja9e62a92005-10-07 12:13:21 +0000765 binop(
sewardj1bc82102005-12-23 00:16:24 +0000766 opAND,
767 mkPCastTo(mce,ty, xxhash),
768 threeLeft1
sewardja9e62a92005-10-07 12:13:21 +0000769 )),
770 assignNew(
sewardj1bc82102005-12-23 00:16:24 +0000771 mce,ty,
sewardja9e62a92005-10-07 12:13:21 +0000772 binop(
sewardj1bc82102005-12-23 00:16:24 +0000773 opSHL,
sewardja9e62a92005-10-07 12:13:21 +0000774 assignNew(
sewardj1bc82102005-12-23 00:16:24 +0000775 mce,ty,
776 binop(opSHR, xxhash, mkU8(width-1))),
sewardja9e62a92005-10-07 12:13:21 +0000777 mkU8(3)
778 ))
779 );
780 } else {
781 /* standard interpretation */
sewardj1bc82102005-12-23 00:16:24 +0000782 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
sewardja9e62a92005-10-07 12:13:21 +0000783 return
784 binop(
sewardj1bc82102005-12-23 00:16:24 +0000785 opAND,
786 mkPCastTo( mce,ty,
787 mkUifU(mce,ty, xxhash,yyhash)),
788 sevenLeft1
sewardja9e62a92005-10-07 12:13:21 +0000789 );
790 }
sewardj992dff92005-10-07 11:08:55 +0000791}
792
793
sewardj95448072004-11-22 20:19:51 +0000794/*------------------------------------------------------------*/
795/*--- Emit a test and complaint if something is undefined. ---*/
796/*------------------------------------------------------------*/
797
798/* Set the annotations on a dirty helper to indicate that the stack
799 pointer and instruction pointers might be read. This is the
800 behaviour of all 'emit-a-complaint' style functions we might
801 call. */
802
803static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
804 di->nFxState = 2;
805 di->fxState[0].fx = Ifx_Read;
806 di->fxState[0].offset = mce->layout->offset_SP;
807 di->fxState[0].size = mce->layout->sizeof_SP;
808 di->fxState[1].fx = Ifx_Read;
809 di->fxState[1].offset = mce->layout->offset_IP;
810 di->fxState[1].size = mce->layout->sizeof_IP;
811}
812
813
814/* Check the supplied **original** atom for undefinedness, and emit a
815 complaint if so. Once that happens, mark it as defined. This is
816 possible because the atom is either a tmp or literal. If it's a
817 tmp, it will be shadowed by a tmp, and so we can set the shadow to
818 be defined. In fact as mentioned above, we will have to allocate a
819 new tmp to carry the new 'defined' shadow value, and update the
820 original->tmp mapping accordingly; we cannot simply assign a new
821 value to an existing shadow tmp as this breaks SSAness -- resulting
822 in the post-instrumentation sanity checker spluttering in disapproval.
823*/
824static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
825{
sewardj7cf97ee2004-11-28 14:25:01 +0000826 IRAtom* vatom;
827 IRType ty;
828 Int sz;
829 IRDirty* di;
830 IRAtom* cond;
831
njn1d0825f2006-03-27 11:37:07 +0000832 // Don't do V bit tests if we're not reporting undefined value errors.
833 if (!MC_(clo_undef_value_errors))
834 return;
835
sewardj95448072004-11-22 20:19:51 +0000836 /* Since the original expression is atomic, there's no duplicated
837 work generated by making multiple V-expressions for it. So we
838 don't really care about the possibility that someone else may
839 also create a V-interpretion for it. */
840 tl_assert(isOriginalAtom(mce, atom));
sewardj7cf97ee2004-11-28 14:25:01 +0000841 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +0000842 tl_assert(isShadowAtom(mce, vatom));
843 tl_assert(sameKindedAtoms(atom, vatom));
844
sewardj7cf97ee2004-11-28 14:25:01 +0000845 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +0000846
847 /* sz is only used for constructing the error message */
sewardj7cf97ee2004-11-28 14:25:01 +0000848 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +0000849
sewardj7cf97ee2004-11-28 14:25:01 +0000850 cond = mkPCastTo( mce, Ity_I1, vatom );
sewardj95448072004-11-22 20:19:51 +0000851 /* cond will be 0 if all defined, and 1 if any not defined. */
852
sewardj95448072004-11-22 20:19:51 +0000853 switch (sz) {
854 case 0:
sewardj53ee1fc2005-12-23 02:29:58 +0000855 di = unsafeIRDirty_0_N(
856 0/*regparms*/,
857 "MC_(helperc_value_check0_fail)",
858 VG_(fnptr_to_fnentry)( &MC_(helperc_value_check0_fail) ),
859 mkIRExprVec_0()
860 );
sewardj95448072004-11-22 20:19:51 +0000861 break;
862 case 1:
sewardj53ee1fc2005-12-23 02:29:58 +0000863 di = unsafeIRDirty_0_N(
864 0/*regparms*/,
865 "MC_(helperc_value_check1_fail)",
866 VG_(fnptr_to_fnentry)( &MC_(helperc_value_check1_fail) ),
867 mkIRExprVec_0()
868 );
sewardj95448072004-11-22 20:19:51 +0000869 break;
870 case 4:
sewardj53ee1fc2005-12-23 02:29:58 +0000871 di = unsafeIRDirty_0_N(
872 0/*regparms*/,
873 "MC_(helperc_value_check4_fail)",
874 VG_(fnptr_to_fnentry)( &MC_(helperc_value_check4_fail) ),
875 mkIRExprVec_0()
876 );
sewardj95448072004-11-22 20:19:51 +0000877 break;
sewardj11bcc4e2005-04-23 22:38:38 +0000878 case 8:
sewardj53ee1fc2005-12-23 02:29:58 +0000879 di = unsafeIRDirty_0_N(
880 0/*regparms*/,
881 "MC_(helperc_value_check8_fail)",
882 VG_(fnptr_to_fnentry)( &MC_(helperc_value_check8_fail) ),
883 mkIRExprVec_0()
884 );
sewardj11bcc4e2005-04-23 22:38:38 +0000885 break;
sewardj95448072004-11-22 20:19:51 +0000886 default:
sewardj53ee1fc2005-12-23 02:29:58 +0000887 di = unsafeIRDirty_0_N(
888 1/*regparms*/,
889 "MC_(helperc_complain_undef)",
890 VG_(fnptr_to_fnentry)( &MC_(helperc_complain_undef) ),
891 mkIRExprVec_1( mkIRExpr_HWord( sz ))
892 );
sewardj95448072004-11-22 20:19:51 +0000893 break;
894 }
895 di->guard = cond;
896 setHelperAnns( mce, di );
897 stmt( mce->bb, IRStmt_Dirty(di));
898
899 /* Set the shadow tmp to be defined. First, update the
900 orig->shadow tmp mapping to reflect the fact that this shadow is
901 getting a new value. */
sewardj710d6c22005-03-20 18:55:15 +0000902 tl_assert(isIRAtom(vatom));
sewardj95448072004-11-22 20:19:51 +0000903 /* sameKindedAtoms ... */
sewardj0b9d74a2006-12-24 02:24:11 +0000904 if (vatom->tag == Iex_RdTmp) {
905 tl_assert(atom->tag == Iex_RdTmp);
906 newShadowTmp(mce, atom->Iex.RdTmp.tmp);
907 assign(mce->bb, findShadowTmp(mce, atom->Iex.RdTmp.tmp),
sewardj95448072004-11-22 20:19:51 +0000908 definedOfType(ty));
909 }
910}
911
912
913/*------------------------------------------------------------*/
914/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
915/*------------------------------------------------------------*/
916
917/* Examine the always-defined sections declared in layout to see if
918 the (offset,size) section is within one. Note, is is an error to
919 partially fall into such a region: (offset,size) should either be
920 completely in such a region or completely not-in such a region.
921*/
922static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
923{
924 Int minoffD, maxoffD, i;
925 Int minoff = offset;
926 Int maxoff = minoff + size - 1;
927 tl_assert((minoff & ~0xFFFF) == 0);
928 tl_assert((maxoff & ~0xFFFF) == 0);
929
930 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
931 minoffD = mce->layout->alwaysDefd[i].offset;
932 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
933 tl_assert((minoffD & ~0xFFFF) == 0);
934 tl_assert((maxoffD & ~0xFFFF) == 0);
935
936 if (maxoff < minoffD || maxoffD < minoff)
937 continue; /* no overlap */
938 if (minoff >= minoffD && maxoff <= maxoffD)
939 return True; /* completely contained in an always-defd section */
940
941 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
942 }
943 return False; /* could not find any containing section */
944}
945
946
947/* Generate into bb suitable actions to shadow this Put. If the state
948 slice is marked 'always defined', do nothing. Otherwise, write the
949 supplied V bits to the shadow state. We can pass in either an
950 original atom or a V-atom, but not both. In the former case the
951 relevant V-bits are then generated from the original.
952*/
953static
954void do_shadow_PUT ( MCEnv* mce, Int offset,
955 IRAtom* atom, IRAtom* vatom )
956{
sewardj7cf97ee2004-11-28 14:25:01 +0000957 IRType ty;
njn1d0825f2006-03-27 11:37:07 +0000958
959 // Don't do shadow PUTs if we're not doing undefined value checking.
960 // Their absence lets Vex's optimiser remove all the shadow computation
961 // that they depend on, which includes GETs of the shadow registers.
962 if (!MC_(clo_undef_value_errors))
963 return;
964
sewardj95448072004-11-22 20:19:51 +0000965 if (atom) {
966 tl_assert(!vatom);
967 tl_assert(isOriginalAtom(mce, atom));
968 vatom = expr2vbits( mce, atom );
969 } else {
970 tl_assert(vatom);
971 tl_assert(isShadowAtom(mce, vatom));
972 }
973
sewardj7cf97ee2004-11-28 14:25:01 +0000974 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +0000975 tl_assert(ty != Ity_I1);
976 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
977 /* later: no ... */
978 /* emit code to emit a complaint if any of the vbits are 1. */
979 /* complainIfUndefined(mce, atom); */
980 } else {
981 /* Do a plain shadow Put. */
982 stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
983 }
984}
985
986
987/* Return an expression which contains the V bits corresponding to the
988 given GETI (passed in in pieces).
989*/
990static
991void do_shadow_PUTI ( MCEnv* mce,
sewardj0b9d74a2006-12-24 02:24:11 +0000992 IRRegArray* descr,
993 IRAtom* ix, Int bias, IRAtom* atom )
sewardj95448072004-11-22 20:19:51 +0000994{
sewardj7cf97ee2004-11-28 14:25:01 +0000995 IRAtom* vatom;
996 IRType ty, tyS;
997 Int arrSize;;
998
njn1d0825f2006-03-27 11:37:07 +0000999 // Don't do shadow PUTIs if we're not doing undefined value checking.
1000 // Their absence lets Vex's optimiser remove all the shadow computation
1001 // that they depend on, which includes GETIs of the shadow registers.
1002 if (!MC_(clo_undef_value_errors))
1003 return;
1004
sewardj95448072004-11-22 20:19:51 +00001005 tl_assert(isOriginalAtom(mce,atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001006 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001007 tl_assert(sameKindedAtoms(atom, vatom));
sewardj7cf97ee2004-11-28 14:25:01 +00001008 ty = descr->elemTy;
1009 tyS = shadowType(ty);
1010 arrSize = descr->nElems * sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001011 tl_assert(ty != Ity_I1);
1012 tl_assert(isOriginalAtom(mce,ix));
1013 complainIfUndefined(mce,ix);
1014 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1015 /* later: no ... */
1016 /* emit code to emit a complaint if any of the vbits are 1. */
1017 /* complainIfUndefined(mce, atom); */
1018 } else {
1019 /* Do a cloned version of the Put that refers to the shadow
1020 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001021 IRRegArray* new_descr
1022 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1023 tyS, descr->nElems);
sewardj95448072004-11-22 20:19:51 +00001024 stmt( mce->bb, IRStmt_PutI( new_descr, ix, bias, vatom ));
1025 }
1026}
1027
1028
1029/* Return an expression which contains the V bits corresponding to the
1030 given GET (passed in in pieces).
1031*/
1032static
1033IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1034{
1035 IRType tyS = shadowType(ty);
1036 tl_assert(ty != Ity_I1);
1037 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1038 /* Always defined, return all zeroes of the relevant type */
1039 return definedOfType(tyS);
1040 } else {
1041 /* return a cloned version of the Get that refers to the shadow
1042 area. */
1043 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1044 }
1045}
1046
1047
1048/* Return an expression which contains the V bits corresponding to the
1049 given GETI (passed in in pieces).
1050*/
1051static
sewardj0b9d74a2006-12-24 02:24:11 +00001052IRExpr* shadow_GETI ( MCEnv* mce,
1053 IRRegArray* descr, IRAtom* ix, Int bias )
sewardj95448072004-11-22 20:19:51 +00001054{
1055 IRType ty = descr->elemTy;
1056 IRType tyS = shadowType(ty);
1057 Int arrSize = descr->nElems * sizeofIRType(ty);
1058 tl_assert(ty != Ity_I1);
1059 tl_assert(isOriginalAtom(mce,ix));
1060 complainIfUndefined(mce,ix);
1061 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1062 /* Always defined, return all zeroes of the relevant type */
1063 return definedOfType(tyS);
1064 } else {
1065 /* return a cloned version of the Get that refers to the shadow
1066 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001067 IRRegArray* new_descr
1068 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1069 tyS, descr->nElems);
sewardj95448072004-11-22 20:19:51 +00001070 return IRExpr_GetI( new_descr, ix, bias );
1071 }
1072}
1073
1074
1075/*------------------------------------------------------------*/
1076/*--- Generating approximations for unknown operations, ---*/
1077/*--- using lazy-propagate semantics ---*/
1078/*------------------------------------------------------------*/
1079
1080/* Lazy propagation of undefinedness from two values, resulting in the
1081 specified shadow type.
1082*/
1083static
1084IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1085{
sewardj95448072004-11-22 20:19:51 +00001086 IRAtom* at;
sewardj37c31cc2005-04-26 23:49:24 +00001087 IRType t1 = typeOfIRExpr(mce->bb->tyenv, va1);
1088 IRType t2 = typeOfIRExpr(mce->bb->tyenv, va2);
sewardj95448072004-11-22 20:19:51 +00001089 tl_assert(isShadowAtom(mce,va1));
1090 tl_assert(isShadowAtom(mce,va2));
sewardj37c31cc2005-04-26 23:49:24 +00001091
1092 /* The general case is inefficient because PCast is an expensive
1093 operation. Here are some special cases which use PCast only
1094 once rather than twice. */
1095
1096 /* I64 x I64 -> I64 */
1097 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1098 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1099 at = mkUifU(mce, Ity_I64, va1, va2);
1100 at = mkPCastTo(mce, Ity_I64, at);
1101 return at;
1102 }
1103
1104 /* I64 x I64 -> I32 */
1105 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1106 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1107 at = mkUifU(mce, Ity_I64, va1, va2);
1108 at = mkPCastTo(mce, Ity_I32, at);
1109 return at;
1110 }
1111
1112 if (0) {
1113 VG_(printf)("mkLazy2 ");
1114 ppIRType(t1);
1115 VG_(printf)("_");
1116 ppIRType(t2);
1117 VG_(printf)("_");
1118 ppIRType(finalVty);
1119 VG_(printf)("\n");
1120 }
1121
1122 /* General case: force everything via 32-bit intermediaries. */
sewardj95448072004-11-22 20:19:51 +00001123 at = mkPCastTo(mce, Ity_I32, va1);
1124 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1125 at = mkPCastTo(mce, finalVty, at);
1126 return at;
1127}
1128
1129
sewardjed69fdb2006-02-03 16:12:27 +00001130/* 3-arg version of the above. */
1131static
1132IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1133 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1134{
1135 IRAtom* at;
1136 IRType t1 = typeOfIRExpr(mce->bb->tyenv, va1);
1137 IRType t2 = typeOfIRExpr(mce->bb->tyenv, va2);
1138 IRType t3 = typeOfIRExpr(mce->bb->tyenv, va3);
1139 tl_assert(isShadowAtom(mce,va1));
1140 tl_assert(isShadowAtom(mce,va2));
1141 tl_assert(isShadowAtom(mce,va3));
1142
1143 /* The general case is inefficient because PCast is an expensive
1144 operation. Here are some special cases which use PCast only
1145 twice rather than three times. */
1146
1147 /* I32 x I64 x I64 -> I64 */
1148 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1149 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1150 && finalVty == Ity_I64) {
1151 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1152 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1153 mode indication which is fully defined, this should get
1154 folded out later. */
1155 at = mkPCastTo(mce, Ity_I64, va1);
1156 /* Now fold in 2nd and 3rd args. */
1157 at = mkUifU(mce, Ity_I64, at, va2);
1158 at = mkUifU(mce, Ity_I64, at, va3);
1159 /* and PCast once again. */
1160 at = mkPCastTo(mce, Ity_I64, at);
1161 return at;
1162 }
1163
sewardj453e8f82006-02-09 03:25:06 +00001164 /* I32 x I64 x I64 -> I32 */
1165 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1166 && finalVty == Ity_I32) {
1167 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1168 at = mkPCastTo(mce, Ity_I64, va1);
1169 at = mkUifU(mce, Ity_I64, at, va2);
1170 at = mkUifU(mce, Ity_I64, at, va3);
1171 at = mkPCastTo(mce, Ity_I32, at);
1172 return at;
1173 }
1174
1175 if (1) {
1176 VG_(printf)("mkLazy3: ");
sewardjed69fdb2006-02-03 16:12:27 +00001177 ppIRType(t1);
sewardj453e8f82006-02-09 03:25:06 +00001178 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001179 ppIRType(t2);
sewardj453e8f82006-02-09 03:25:06 +00001180 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001181 ppIRType(t3);
sewardj453e8f82006-02-09 03:25:06 +00001182 VG_(printf)(" -> ");
sewardjed69fdb2006-02-03 16:12:27 +00001183 ppIRType(finalVty);
1184 VG_(printf)("\n");
1185 }
1186
sewardj453e8f82006-02-09 03:25:06 +00001187 tl_assert(0);
sewardjed69fdb2006-02-03 16:12:27 +00001188 /* General case: force everything via 32-bit intermediaries. */
sewardj453e8f82006-02-09 03:25:06 +00001189 /*
sewardjed69fdb2006-02-03 16:12:27 +00001190 at = mkPCastTo(mce, Ity_I32, va1);
1191 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1192 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1193 at = mkPCastTo(mce, finalVty, at);
1194 return at;
sewardj453e8f82006-02-09 03:25:06 +00001195 */
sewardjed69fdb2006-02-03 16:12:27 +00001196}
1197
1198
sewardje91cea72006-02-08 19:32:02 +00001199/* 4-arg version of the above. */
1200static
1201IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1202 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1203{
1204 IRAtom* at;
1205 IRType t1 = typeOfIRExpr(mce->bb->tyenv, va1);
1206 IRType t2 = typeOfIRExpr(mce->bb->tyenv, va2);
1207 IRType t3 = typeOfIRExpr(mce->bb->tyenv, va3);
1208 IRType t4 = typeOfIRExpr(mce->bb->tyenv, va4);
1209 tl_assert(isShadowAtom(mce,va1));
1210 tl_assert(isShadowAtom(mce,va2));
1211 tl_assert(isShadowAtom(mce,va3));
1212 tl_assert(isShadowAtom(mce,va4));
1213
1214 /* The general case is inefficient because PCast is an expensive
1215 operation. Here are some special cases which use PCast only
1216 twice rather than three times. */
1217
1218 /* I32 x I64 x I64 x I64 -> I64 */
1219 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1220 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1221 && finalVty == Ity_I64) {
1222 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1223 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1224 mode indication which is fully defined, this should get
1225 folded out later. */
1226 at = mkPCastTo(mce, Ity_I64, va1);
1227 /* Now fold in 2nd, 3rd, 4th args. */
1228 at = mkUifU(mce, Ity_I64, at, va2);
1229 at = mkUifU(mce, Ity_I64, at, va3);
1230 at = mkUifU(mce, Ity_I64, at, va4);
1231 /* and PCast once again. */
1232 at = mkPCastTo(mce, Ity_I64, at);
1233 return at;
1234 }
1235
1236 if (1) {
sewardj453e8f82006-02-09 03:25:06 +00001237 VG_(printf)("mkLazy4: ");
sewardje91cea72006-02-08 19:32:02 +00001238 ppIRType(t1);
1239 VG_(printf)(" x ");
1240 ppIRType(t2);
1241 VG_(printf)(" x ");
1242 ppIRType(t3);
1243 VG_(printf)(" x ");
1244 ppIRType(t4);
1245 VG_(printf)(" -> ");
1246 ppIRType(finalVty);
1247 VG_(printf)("\n");
1248 }
1249
1250 tl_assert(0);
1251}
1252
1253
sewardj95448072004-11-22 20:19:51 +00001254/* Do the lazy propagation game from a null-terminated vector of
1255 atoms. This is presumably the arguments to a helper call, so the
1256 IRCallee info is also supplied in order that we can know which
1257 arguments should be ignored (via the .mcx_mask field).
1258*/
1259static
1260IRAtom* mkLazyN ( MCEnv* mce,
1261 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1262{
1263 Int i;
1264 IRAtom* here;
1265 IRAtom* curr = definedOfType(Ity_I32);
1266 for (i = 0; exprvec[i]; i++) {
1267 tl_assert(i < 32);
1268 tl_assert(isOriginalAtom(mce, exprvec[i]));
1269 /* Only take notice of this arg if the callee's mc-exclusion
1270 mask does not say it is to be excluded. */
1271 if (cee->mcx_mask & (1<<i)) {
1272 /* the arg is to be excluded from definedness checking. Do
1273 nothing. */
1274 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1275 } else {
1276 /* calculate the arg's definedness, and pessimistically merge
1277 it in. */
1278 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) );
1279 curr = mkUifU32(mce, here, curr);
1280 }
1281 }
1282 return mkPCastTo(mce, finalVtype, curr );
1283}
1284
1285
1286/*------------------------------------------------------------*/
1287/*--- Generating expensive sequences for exact carry-chain ---*/
1288/*--- propagation in add/sub and related operations. ---*/
1289/*------------------------------------------------------------*/
1290
1291static
sewardjd5204dc2004-12-31 01:16:11 +00001292IRAtom* expensiveAddSub ( MCEnv* mce,
1293 Bool add,
1294 IRType ty,
1295 IRAtom* qaa, IRAtom* qbb,
1296 IRAtom* aa, IRAtom* bb )
sewardj95448072004-11-22 20:19:51 +00001297{
sewardj7cf97ee2004-11-28 14:25:01 +00001298 IRAtom *a_min, *b_min, *a_max, *b_max;
sewardjd5204dc2004-12-31 01:16:11 +00001299 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
sewardj7cf97ee2004-11-28 14:25:01 +00001300
sewardj95448072004-11-22 20:19:51 +00001301 tl_assert(isShadowAtom(mce,qaa));
1302 tl_assert(isShadowAtom(mce,qbb));
1303 tl_assert(isOriginalAtom(mce,aa));
1304 tl_assert(isOriginalAtom(mce,bb));
1305 tl_assert(sameKindedAtoms(qaa,aa));
1306 tl_assert(sameKindedAtoms(qbb,bb));
1307
sewardjd5204dc2004-12-31 01:16:11 +00001308 switch (ty) {
1309 case Ity_I32:
1310 opAND = Iop_And32;
1311 opOR = Iop_Or32;
1312 opXOR = Iop_Xor32;
1313 opNOT = Iop_Not32;
1314 opADD = Iop_Add32;
1315 opSUB = Iop_Sub32;
1316 break;
tomd9774d72005-06-27 08:11:01 +00001317 case Ity_I64:
1318 opAND = Iop_And64;
1319 opOR = Iop_Or64;
1320 opXOR = Iop_Xor64;
1321 opNOT = Iop_Not64;
1322 opADD = Iop_Add64;
1323 opSUB = Iop_Sub64;
1324 break;
sewardjd5204dc2004-12-31 01:16:11 +00001325 default:
1326 VG_(tool_panic)("expensiveAddSub");
1327 }
sewardj95448072004-11-22 20:19:51 +00001328
1329 // a_min = aa & ~qaa
1330 a_min = assignNew(mce,ty,
1331 binop(opAND, aa,
1332 assignNew(mce,ty, unop(opNOT, qaa))));
1333
1334 // b_min = bb & ~qbb
1335 b_min = assignNew(mce,ty,
1336 binop(opAND, bb,
1337 assignNew(mce,ty, unop(opNOT, qbb))));
1338
1339 // a_max = aa | qaa
1340 a_max = assignNew(mce,ty, binop(opOR, aa, qaa));
1341
1342 // b_max = bb | qbb
1343 b_max = assignNew(mce,ty, binop(opOR, bb, qbb));
1344
sewardjd5204dc2004-12-31 01:16:11 +00001345 if (add) {
1346 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1347 return
1348 assignNew(mce,ty,
1349 binop( opOR,
1350 assignNew(mce,ty, binop(opOR, qaa, qbb)),
1351 assignNew(mce,ty,
1352 binop( opXOR,
1353 assignNew(mce,ty, binop(opADD, a_min, b_min)),
1354 assignNew(mce,ty, binop(opADD, a_max, b_max))
1355 )
sewardj95448072004-11-22 20:19:51 +00001356 )
sewardjd5204dc2004-12-31 01:16:11 +00001357 )
1358 );
1359 } else {
1360 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1361 return
1362 assignNew(mce,ty,
1363 binop( opOR,
1364 assignNew(mce,ty, binop(opOR, qaa, qbb)),
1365 assignNew(mce,ty,
1366 binop( opXOR,
1367 assignNew(mce,ty, binop(opSUB, a_min, b_max)),
1368 assignNew(mce,ty, binop(opSUB, a_max, b_min))
1369 )
1370 )
1371 )
1372 );
1373 }
1374
sewardj95448072004-11-22 20:19:51 +00001375}
1376
1377
1378/*------------------------------------------------------------*/
sewardjaaddbc22005-10-07 09:49:53 +00001379/*--- Scalar shifts. ---*/
1380/*------------------------------------------------------------*/
1381
1382/* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
1383 idea is to shift the definedness bits by the original shift amount.
1384 This introduces 0s ("defined") in new positions for left shifts and
1385 unsigned right shifts, and copies the top definedness bit for
1386 signed right shifts. So, conveniently, applying the original shift
1387 operator to the definedness bits for the left arg is exactly the
1388 right thing to do:
1389
1390 (qaa << bb)
1391
1392 However if the shift amount is undefined then the whole result
1393 is undefined. Hence need:
1394
1395 (qaa << bb) `UifU` PCast(qbb)
1396
1397 If the shift amount bb is a literal than qbb will say 'all defined'
1398 and the UifU and PCast will get folded out by post-instrumentation
1399 optimisation.
1400*/
1401static IRAtom* scalarShift ( MCEnv* mce,
1402 IRType ty,
1403 IROp original_op,
1404 IRAtom* qaa, IRAtom* qbb,
1405 IRAtom* aa, IRAtom* bb )
1406{
1407 tl_assert(isShadowAtom(mce,qaa));
1408 tl_assert(isShadowAtom(mce,qbb));
1409 tl_assert(isOriginalAtom(mce,aa));
1410 tl_assert(isOriginalAtom(mce,bb));
1411 tl_assert(sameKindedAtoms(qaa,aa));
1412 tl_assert(sameKindedAtoms(qbb,bb));
1413 return
1414 assignNew(
1415 mce, ty,
1416 mkUifU( mce, ty,
1417 assignNew(mce, ty, binop(original_op, qaa, bb)),
1418 mkPCastTo(mce, ty, qbb)
1419 )
1420 );
1421}
1422
1423
1424/*------------------------------------------------------------*/
1425/*--- Helpers for dealing with vector primops. ---*/
sewardj3245c912004-12-10 14:58:26 +00001426/*------------------------------------------------------------*/
1427
sewardja1d93302004-12-12 16:45:06 +00001428/* Vector pessimisation -- pessimise within each lane individually. */
1429
1430static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1431{
1432 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1433}
1434
1435static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1436{
1437 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1438}
1439
1440static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1441{
1442 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1443}
1444
1445static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1446{
1447 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1448}
1449
sewardjacd2e912005-01-13 19:17:06 +00001450static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1451{
1452 return assignNew(mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
1453}
1454
1455static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1456{
1457 return assignNew(mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
1458}
1459
1460static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1461{
1462 return assignNew(mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
1463}
1464
sewardja1d93302004-12-12 16:45:06 +00001465
sewardj3245c912004-12-10 14:58:26 +00001466/* Here's a simple scheme capable of handling ops derived from SSE1
1467 code and while only generating ops that can be efficiently
1468 implemented in SSE1. */
1469
1470/* All-lanes versions are straightforward:
1471
sewardj20d38f22005-02-07 23:50:18 +00001472 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
sewardj3245c912004-12-10 14:58:26 +00001473
1474 unary32Fx4(x,y) ==> PCast32x4(x#)
1475
1476 Lowest-lane-only versions are more complex:
1477
sewardj20d38f22005-02-07 23:50:18 +00001478 binary32F0x4(x,y) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001479 x#,
sewardj20d38f22005-02-07 23:50:18 +00001480 PCast32(V128to32(UifUV128(x#,y#)))
sewardj3245c912004-12-10 14:58:26 +00001481 )
1482
1483 This is perhaps not so obvious. In particular, it's faster to
sewardj20d38f22005-02-07 23:50:18 +00001484 do a V128-bit UifU and then take the bottom 32 bits than the more
sewardj3245c912004-12-10 14:58:26 +00001485 obvious scheme of taking the bottom 32 bits of each operand
1486 and doing a 32-bit UifU. Basically since UifU is fast and
1487 chopping lanes off vector values is slow.
1488
1489 Finally:
1490
sewardj20d38f22005-02-07 23:50:18 +00001491 unary32F0x4(x) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001492 x#,
sewardj20d38f22005-02-07 23:50:18 +00001493 PCast32(V128to32(x#))
sewardj3245c912004-12-10 14:58:26 +00001494 )
1495
1496 Where:
1497
1498 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
1499 PCast32x4(v#) = CmpNEZ32x4(v#)
1500*/
1501
1502static
1503IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1504{
1505 IRAtom* at;
1506 tl_assert(isShadowAtom(mce, vatomX));
1507 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001508 at = mkUifUV128(mce, vatomX, vatomY);
sewardja1d93302004-12-12 16:45:06 +00001509 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at));
sewardj3245c912004-12-10 14:58:26 +00001510 return at;
1511}
1512
1513static
1514IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1515{
1516 IRAtom* at;
1517 tl_assert(isShadowAtom(mce, vatomX));
sewardja1d93302004-12-12 16:45:06 +00001518 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001519 return at;
1520}
1521
1522static
1523IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1524{
1525 IRAtom* at;
1526 tl_assert(isShadowAtom(mce, vatomX));
1527 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001528 at = mkUifUV128(mce, vatomX, vatomY);
1529 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, at));
sewardj3245c912004-12-10 14:58:26 +00001530 at = mkPCastTo(mce, Ity_I32, at);
sewardj20d38f22005-02-07 23:50:18 +00001531 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001532 return at;
1533}
1534
1535static
1536IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1537{
1538 IRAtom* at;
1539 tl_assert(isShadowAtom(mce, vatomX));
sewardj20d38f22005-02-07 23:50:18 +00001540 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001541 at = mkPCastTo(mce, Ity_I32, at);
sewardj20d38f22005-02-07 23:50:18 +00001542 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001543 return at;
1544}
1545
sewardj0b070592004-12-10 21:44:22 +00001546/* --- ... and ... 64Fx2 versions of the same ... --- */
1547
1548static
1549IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1550{
1551 IRAtom* at;
1552 tl_assert(isShadowAtom(mce, vatomX));
1553 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001554 at = mkUifUV128(mce, vatomX, vatomY);
sewardja1d93302004-12-12 16:45:06 +00001555 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at));
sewardj0b070592004-12-10 21:44:22 +00001556 return at;
1557}
1558
1559static
1560IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1561{
1562 IRAtom* at;
1563 tl_assert(isShadowAtom(mce, vatomX));
sewardja1d93302004-12-12 16:45:06 +00001564 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001565 return at;
1566}
1567
1568static
1569IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1570{
1571 IRAtom* at;
1572 tl_assert(isShadowAtom(mce, vatomX));
1573 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001574 at = mkUifUV128(mce, vatomX, vatomY);
1575 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, at));
sewardj0b070592004-12-10 21:44:22 +00001576 at = mkPCastTo(mce, Ity_I64, at);
sewardj20d38f22005-02-07 23:50:18 +00001577 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001578 return at;
1579}
1580
1581static
1582IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1583{
1584 IRAtom* at;
1585 tl_assert(isShadowAtom(mce, vatomX));
sewardj20d38f22005-02-07 23:50:18 +00001586 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001587 at = mkPCastTo(mce, Ity_I64, at);
sewardj20d38f22005-02-07 23:50:18 +00001588 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001589 return at;
1590}
1591
sewardja1d93302004-12-12 16:45:06 +00001592/* --- --- Vector saturated narrowing --- --- */
1593
1594/* This is quite subtle. What to do is simple:
1595
1596 Let the original narrowing op be QNarrowW{S,U}xN. Produce:
1597
1598 the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1599
1600 Why this is right is not so simple. Consider a lane in the args,
1601 vatom1 or 2, doesn't matter.
1602
1603 After the PCast, that lane is all 0s (defined) or all
1604 1s(undefined).
1605
1606 Both signed and unsigned saturating narrowing of all 0s produces
1607 all 0s, which is what we want.
1608
1609 The all-1s case is more complex. Unsigned narrowing interprets an
1610 all-1s input as the largest unsigned integer, and so produces all
1611 1s as a result since that is the largest unsigned value at the
1612 smaller width.
1613
1614 Signed narrowing interprets all 1s as -1. Fortunately, -1 narrows
1615 to -1, so we still wind up with all 1s at the smaller width.
1616
1617 So: In short, pessimise the args, then apply the original narrowing
1618 op.
1619*/
1620static
sewardj20d38f22005-02-07 23:50:18 +00001621IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
sewardja1d93302004-12-12 16:45:06 +00001622 IRAtom* vatom1, IRAtom* vatom2)
1623{
1624 IRAtom *at1, *at2, *at3;
1625 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1626 switch (narrow_op) {
1627 case Iop_QNarrow32Sx4: pcast = mkPCast32x4; break;
sewardj43d60752005-11-10 18:13:01 +00001628 case Iop_QNarrow32Ux4: pcast = mkPCast32x4; break;
sewardja1d93302004-12-12 16:45:06 +00001629 case Iop_QNarrow16Sx8: pcast = mkPCast16x8; break;
1630 case Iop_QNarrow16Ux8: pcast = mkPCast16x8; break;
sewardj20d38f22005-02-07 23:50:18 +00001631 default: VG_(tool_panic)("vectorNarrowV128");
sewardja1d93302004-12-12 16:45:06 +00001632 }
1633 tl_assert(isShadowAtom(mce,vatom1));
1634 tl_assert(isShadowAtom(mce,vatom2));
1635 at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1));
1636 at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2));
1637 at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2));
1638 return at3;
1639}
1640
sewardjacd2e912005-01-13 19:17:06 +00001641static
1642IRAtom* vectorNarrow64 ( MCEnv* mce, IROp narrow_op,
1643 IRAtom* vatom1, IRAtom* vatom2)
1644{
1645 IRAtom *at1, *at2, *at3;
1646 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1647 switch (narrow_op) {
1648 case Iop_QNarrow32Sx2: pcast = mkPCast32x2; break;
1649 case Iop_QNarrow16Sx4: pcast = mkPCast16x4; break;
1650 case Iop_QNarrow16Ux4: pcast = mkPCast16x4; break;
1651 default: VG_(tool_panic)("vectorNarrow64");
1652 }
1653 tl_assert(isShadowAtom(mce,vatom1));
1654 tl_assert(isShadowAtom(mce,vatom2));
1655 at1 = assignNew(mce, Ity_I64, pcast(mce, vatom1));
1656 at2 = assignNew(mce, Ity_I64, pcast(mce, vatom2));
1657 at3 = assignNew(mce, Ity_I64, binop(narrow_op, at1, at2));
1658 return at3;
1659}
1660
sewardja1d93302004-12-12 16:45:06 +00001661
1662/* --- --- Vector integer arithmetic --- --- */
1663
1664/* Simple ... UifU the args and per-lane pessimise the results. */
sewardjacd2e912005-01-13 19:17:06 +00001665
sewardj20d38f22005-02-07 23:50:18 +00001666/* --- V128-bit versions --- */
sewardjacd2e912005-01-13 19:17:06 +00001667
sewardja1d93302004-12-12 16:45:06 +00001668static
1669IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1670{
1671 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001672 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001673 at = mkPCast8x16(mce, at);
1674 return at;
1675}
1676
1677static
1678IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1679{
1680 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001681 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001682 at = mkPCast16x8(mce, at);
1683 return at;
1684}
1685
1686static
1687IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1688{
1689 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001690 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001691 at = mkPCast32x4(mce, at);
1692 return at;
1693}
1694
1695static
1696IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1697{
1698 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00001699 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00001700 at = mkPCast64x2(mce, at);
1701 return at;
1702}
sewardj3245c912004-12-10 14:58:26 +00001703
sewardjacd2e912005-01-13 19:17:06 +00001704/* --- 64-bit versions --- */
1705
1706static
1707IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1708{
1709 IRAtom* at;
1710 at = mkUifU64(mce, vatom1, vatom2);
1711 at = mkPCast8x8(mce, at);
1712 return at;
1713}
1714
1715static
1716IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1717{
1718 IRAtom* at;
1719 at = mkUifU64(mce, vatom1, vatom2);
1720 at = mkPCast16x4(mce, at);
1721 return at;
1722}
1723
1724static
1725IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1726{
1727 IRAtom* at;
1728 at = mkUifU64(mce, vatom1, vatom2);
1729 at = mkPCast32x2(mce, at);
1730 return at;
1731}
1732
sewardj3245c912004-12-10 14:58:26 +00001733
1734/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00001735/*--- Generate shadow values from all kinds of IRExprs. ---*/
1736/*------------------------------------------------------------*/
1737
1738static
sewardje91cea72006-02-08 19:32:02 +00001739IRAtom* expr2vbits_Qop ( MCEnv* mce,
1740 IROp op,
1741 IRAtom* atom1, IRAtom* atom2,
1742 IRAtom* atom3, IRAtom* atom4 )
1743{
1744 IRAtom* vatom1 = expr2vbits( mce, atom1 );
1745 IRAtom* vatom2 = expr2vbits( mce, atom2 );
1746 IRAtom* vatom3 = expr2vbits( mce, atom3 );
1747 IRAtom* vatom4 = expr2vbits( mce, atom4 );
1748
1749 tl_assert(isOriginalAtom(mce,atom1));
1750 tl_assert(isOriginalAtom(mce,atom2));
1751 tl_assert(isOriginalAtom(mce,atom3));
1752 tl_assert(isOriginalAtom(mce,atom4));
1753 tl_assert(isShadowAtom(mce,vatom1));
1754 tl_assert(isShadowAtom(mce,vatom2));
1755 tl_assert(isShadowAtom(mce,vatom3));
1756 tl_assert(isShadowAtom(mce,vatom4));
1757 tl_assert(sameKindedAtoms(atom1,vatom1));
1758 tl_assert(sameKindedAtoms(atom2,vatom2));
1759 tl_assert(sameKindedAtoms(atom3,vatom3));
1760 tl_assert(sameKindedAtoms(atom4,vatom4));
1761 switch (op) {
1762 case Iop_MAddF64:
1763 case Iop_MAddF64r32:
1764 case Iop_MSubF64:
1765 case Iop_MSubF64r32:
1766 /* I32(rm) x F64 x F64 x F64 -> F64 */
1767 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
1768 default:
1769 ppIROp(op);
1770 VG_(tool_panic)("memcheck:expr2vbits_Qop");
1771 }
1772}
1773
1774
1775static
sewardjed69fdb2006-02-03 16:12:27 +00001776IRAtom* expr2vbits_Triop ( MCEnv* mce,
1777 IROp op,
1778 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
1779{
sewardjed69fdb2006-02-03 16:12:27 +00001780 IRAtom* vatom1 = expr2vbits( mce, atom1 );
1781 IRAtom* vatom2 = expr2vbits( mce, atom2 );
1782 IRAtom* vatom3 = expr2vbits( mce, atom3 );
1783
1784 tl_assert(isOriginalAtom(mce,atom1));
1785 tl_assert(isOriginalAtom(mce,atom2));
1786 tl_assert(isOriginalAtom(mce,atom3));
1787 tl_assert(isShadowAtom(mce,vatom1));
1788 tl_assert(isShadowAtom(mce,vatom2));
1789 tl_assert(isShadowAtom(mce,vatom3));
1790 tl_assert(sameKindedAtoms(atom1,vatom1));
1791 tl_assert(sameKindedAtoms(atom2,vatom2));
1792 tl_assert(sameKindedAtoms(atom3,vatom3));
1793 switch (op) {
1794 case Iop_AddF64:
1795 case Iop_AddF64r32:
1796 case Iop_SubF64:
1797 case Iop_SubF64r32:
1798 case Iop_MulF64:
1799 case Iop_MulF64r32:
1800 case Iop_DivF64:
1801 case Iop_DivF64r32:
sewardj22ac5f42006-02-03 22:55:04 +00001802 case Iop_ScaleF64:
1803 case Iop_Yl2xF64:
1804 case Iop_Yl2xp1F64:
1805 case Iop_AtanF64:
sewardjd6075eb2006-02-04 15:25:23 +00001806 case Iop_PRemF64:
1807 case Iop_PRem1F64:
sewardj22ac5f42006-02-03 22:55:04 +00001808 /* I32(rm) x F64 x F64 -> F64 */
sewardjed69fdb2006-02-03 16:12:27 +00001809 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
sewardjd6075eb2006-02-04 15:25:23 +00001810 case Iop_PRemC3210F64:
1811 case Iop_PRem1C3210F64:
1812 /* I32(rm) x F64 x F64 -> I32 */
1813 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardjed69fdb2006-02-03 16:12:27 +00001814 default:
1815 ppIROp(op);
1816 VG_(tool_panic)("memcheck:expr2vbits_Triop");
1817 }
1818}
1819
1820
1821static
sewardj95448072004-11-22 20:19:51 +00001822IRAtom* expr2vbits_Binop ( MCEnv* mce,
1823 IROp op,
1824 IRAtom* atom1, IRAtom* atom2 )
1825{
1826 IRType and_or_ty;
1827 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
1828 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
1829 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
1830
1831 IRAtom* vatom1 = expr2vbits( mce, atom1 );
1832 IRAtom* vatom2 = expr2vbits( mce, atom2 );
1833
1834 tl_assert(isOriginalAtom(mce,atom1));
1835 tl_assert(isOriginalAtom(mce,atom2));
1836 tl_assert(isShadowAtom(mce,vatom1));
1837 tl_assert(isShadowAtom(mce,vatom2));
1838 tl_assert(sameKindedAtoms(atom1,vatom1));
1839 tl_assert(sameKindedAtoms(atom2,vatom2));
1840 switch (op) {
1841
sewardjacd2e912005-01-13 19:17:06 +00001842 /* 64-bit SIMD */
1843
1844 case Iop_ShrN16x4:
1845 case Iop_ShrN32x2:
sewardj03809ae2006-12-27 01:16:58 +00001846 case Iop_SarN8x8:
sewardjacd2e912005-01-13 19:17:06 +00001847 case Iop_SarN16x4:
1848 case Iop_SarN32x2:
1849 case Iop_ShlN16x4:
1850 case Iop_ShlN32x2:
1851 /* Same scheme as with all other shifts. */
1852 complainIfUndefined(mce, atom2);
1853 return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1854
1855 case Iop_QNarrow32Sx2:
1856 case Iop_QNarrow16Sx4:
1857 case Iop_QNarrow16Ux4:
1858 return vectorNarrow64(mce, op, vatom1, vatom2);
1859
1860 case Iop_Min8Ux8:
1861 case Iop_Max8Ux8:
1862 case Iop_Avg8Ux8:
1863 case Iop_QSub8Sx8:
1864 case Iop_QSub8Ux8:
1865 case Iop_Sub8x8:
1866 case Iop_CmpGT8Sx8:
1867 case Iop_CmpEQ8x8:
1868 case Iop_QAdd8Sx8:
1869 case Iop_QAdd8Ux8:
1870 case Iop_Add8x8:
1871 return binary8Ix8(mce, vatom1, vatom2);
1872
1873 case Iop_Min16Sx4:
1874 case Iop_Max16Sx4:
1875 case Iop_Avg16Ux4:
1876 case Iop_QSub16Ux4:
1877 case Iop_QSub16Sx4:
1878 case Iop_Sub16x4:
1879 case Iop_Mul16x4:
1880 case Iop_MulHi16Sx4:
1881 case Iop_MulHi16Ux4:
1882 case Iop_CmpGT16Sx4:
1883 case Iop_CmpEQ16x4:
1884 case Iop_QAdd16Sx4:
1885 case Iop_QAdd16Ux4:
1886 case Iop_Add16x4:
1887 return binary16Ix4(mce, vatom1, vatom2);
1888
1889 case Iop_Sub32x2:
1890 case Iop_CmpGT32Sx2:
1891 case Iop_CmpEQ32x2:
1892 case Iop_Add32x2:
1893 return binary32Ix2(mce, vatom1, vatom2);
1894
1895 /* 64-bit data-steering */
1896 case Iop_InterleaveLO32x2:
1897 case Iop_InterleaveLO16x4:
1898 case Iop_InterleaveLO8x8:
1899 case Iop_InterleaveHI32x2:
1900 case Iop_InterleaveHI16x4:
1901 case Iop_InterleaveHI8x8:
1902 return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
1903
sewardj20d38f22005-02-07 23:50:18 +00001904 /* V128-bit SIMD */
sewardj0b070592004-12-10 21:44:22 +00001905
sewardja1d93302004-12-12 16:45:06 +00001906 case Iop_ShrN16x8:
1907 case Iop_ShrN32x4:
1908 case Iop_ShrN64x2:
1909 case Iop_SarN16x8:
1910 case Iop_SarN32x4:
1911 case Iop_ShlN16x8:
1912 case Iop_ShlN32x4:
1913 case Iop_ShlN64x2:
sewardj620eb5b2005-10-22 12:50:43 +00001914 case Iop_ShlN8x16:
1915 case Iop_SarN8x16:
1916 /* Same scheme as with all other shifts. Note: 22 Oct 05:
1917 this is wrong now, scalar shifts are done properly lazily.
1918 Vector shifts should be fixed too. */
sewardja1d93302004-12-12 16:45:06 +00001919 complainIfUndefined(mce, atom2);
1920 return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
1921
sewardjcbf8be72005-11-10 18:34:41 +00001922 /* V x V shifts/rotates are done using the standard lazy scheme. */
sewardj43d60752005-11-10 18:13:01 +00001923 case Iop_Shl8x16:
1924 case Iop_Shr8x16:
1925 case Iop_Sar8x16:
sewardjcbf8be72005-11-10 18:34:41 +00001926 case Iop_Rol8x16:
sewardj43d60752005-11-10 18:13:01 +00001927 return mkUifUV128(mce,
1928 assignNew(mce, Ity_V128, binop(op, vatom1, atom2)),
1929 mkPCast8x16(mce,vatom2)
1930 );
1931
1932 case Iop_Shl16x8:
1933 case Iop_Shr16x8:
1934 case Iop_Sar16x8:
sewardjcbf8be72005-11-10 18:34:41 +00001935 case Iop_Rol16x8:
sewardj43d60752005-11-10 18:13:01 +00001936 return mkUifUV128(mce,
1937 assignNew(mce, Ity_V128, binop(op, vatom1, atom2)),
1938 mkPCast16x8(mce,vatom2)
1939 );
1940
1941 case Iop_Shl32x4:
1942 case Iop_Shr32x4:
1943 case Iop_Sar32x4:
sewardjcbf8be72005-11-10 18:34:41 +00001944 case Iop_Rol32x4:
sewardj43d60752005-11-10 18:13:01 +00001945 return mkUifUV128(mce,
1946 assignNew(mce, Ity_V128, binop(op, vatom1, atom2)),
1947 mkPCast32x4(mce,vatom2)
1948 );
1949
sewardja1d93302004-12-12 16:45:06 +00001950 case Iop_QSub8Ux16:
1951 case Iop_QSub8Sx16:
1952 case Iop_Sub8x16:
1953 case Iop_Min8Ux16:
sewardj43d60752005-11-10 18:13:01 +00001954 case Iop_Min8Sx16:
sewardja1d93302004-12-12 16:45:06 +00001955 case Iop_Max8Ux16:
sewardj43d60752005-11-10 18:13:01 +00001956 case Iop_Max8Sx16:
sewardja1d93302004-12-12 16:45:06 +00001957 case Iop_CmpGT8Sx16:
sewardj43d60752005-11-10 18:13:01 +00001958 case Iop_CmpGT8Ux16:
sewardja1d93302004-12-12 16:45:06 +00001959 case Iop_CmpEQ8x16:
1960 case Iop_Avg8Ux16:
sewardj43d60752005-11-10 18:13:01 +00001961 case Iop_Avg8Sx16:
sewardja1d93302004-12-12 16:45:06 +00001962 case Iop_QAdd8Ux16:
1963 case Iop_QAdd8Sx16:
1964 case Iop_Add8x16:
1965 return binary8Ix16(mce, vatom1, vatom2);
1966
1967 case Iop_QSub16Ux8:
1968 case Iop_QSub16Sx8:
1969 case Iop_Sub16x8:
1970 case Iop_Mul16x8:
1971 case Iop_MulHi16Sx8:
1972 case Iop_MulHi16Ux8:
1973 case Iop_Min16Sx8:
sewardj43d60752005-11-10 18:13:01 +00001974 case Iop_Min16Ux8:
sewardja1d93302004-12-12 16:45:06 +00001975 case Iop_Max16Sx8:
sewardj43d60752005-11-10 18:13:01 +00001976 case Iop_Max16Ux8:
sewardja1d93302004-12-12 16:45:06 +00001977 case Iop_CmpGT16Sx8:
sewardj43d60752005-11-10 18:13:01 +00001978 case Iop_CmpGT16Ux8:
sewardja1d93302004-12-12 16:45:06 +00001979 case Iop_CmpEQ16x8:
1980 case Iop_Avg16Ux8:
sewardj43d60752005-11-10 18:13:01 +00001981 case Iop_Avg16Sx8:
sewardja1d93302004-12-12 16:45:06 +00001982 case Iop_QAdd16Ux8:
1983 case Iop_QAdd16Sx8:
1984 case Iop_Add16x8:
1985 return binary16Ix8(mce, vatom1, vatom2);
1986
1987 case Iop_Sub32x4:
1988 case Iop_CmpGT32Sx4:
sewardj43d60752005-11-10 18:13:01 +00001989 case Iop_CmpGT32Ux4:
sewardja1d93302004-12-12 16:45:06 +00001990 case Iop_CmpEQ32x4:
sewardj43d60752005-11-10 18:13:01 +00001991 case Iop_QAdd32Sx4:
1992 case Iop_QAdd32Ux4:
1993 case Iop_QSub32Sx4:
1994 case Iop_QSub32Ux4:
1995 case Iop_Avg32Ux4:
1996 case Iop_Avg32Sx4:
sewardja1d93302004-12-12 16:45:06 +00001997 case Iop_Add32x4:
sewardj43d60752005-11-10 18:13:01 +00001998 case Iop_Max32Ux4:
1999 case Iop_Max32Sx4:
2000 case Iop_Min32Ux4:
2001 case Iop_Min32Sx4:
sewardja1d93302004-12-12 16:45:06 +00002002 return binary32Ix4(mce, vatom1, vatom2);
2003
2004 case Iop_Sub64x2:
2005 case Iop_Add64x2:
2006 return binary64Ix2(mce, vatom1, vatom2);
2007
2008 case Iop_QNarrow32Sx4:
sewardj43d60752005-11-10 18:13:01 +00002009 case Iop_QNarrow32Ux4:
sewardja1d93302004-12-12 16:45:06 +00002010 case Iop_QNarrow16Sx8:
2011 case Iop_QNarrow16Ux8:
sewardj20d38f22005-02-07 23:50:18 +00002012 return vectorNarrowV128(mce, op, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002013
sewardj0b070592004-12-10 21:44:22 +00002014 case Iop_Sub64Fx2:
2015 case Iop_Mul64Fx2:
2016 case Iop_Min64Fx2:
2017 case Iop_Max64Fx2:
2018 case Iop_Div64Fx2:
2019 case Iop_CmpLT64Fx2:
2020 case Iop_CmpLE64Fx2:
2021 case Iop_CmpEQ64Fx2:
sewardj545663e2005-11-05 01:55:04 +00002022 case Iop_CmpUN64Fx2:
sewardj0b070592004-12-10 21:44:22 +00002023 case Iop_Add64Fx2:
2024 return binary64Fx2(mce, vatom1, vatom2);
2025
2026 case Iop_Sub64F0x2:
2027 case Iop_Mul64F0x2:
2028 case Iop_Min64F0x2:
2029 case Iop_Max64F0x2:
2030 case Iop_Div64F0x2:
2031 case Iop_CmpLT64F0x2:
2032 case Iop_CmpLE64F0x2:
2033 case Iop_CmpEQ64F0x2:
sewardj545663e2005-11-05 01:55:04 +00002034 case Iop_CmpUN64F0x2:
sewardj0b070592004-12-10 21:44:22 +00002035 case Iop_Add64F0x2:
2036 return binary64F0x2(mce, vatom1, vatom2);
2037
sewardj170ee212004-12-10 18:57:51 +00002038 case Iop_Sub32Fx4:
2039 case Iop_Mul32Fx4:
2040 case Iop_Min32Fx4:
2041 case Iop_Max32Fx4:
2042 case Iop_Div32Fx4:
2043 case Iop_CmpLT32Fx4:
2044 case Iop_CmpLE32Fx4:
2045 case Iop_CmpEQ32Fx4:
sewardj545663e2005-11-05 01:55:04 +00002046 case Iop_CmpUN32Fx4:
cerione78ba2a2005-11-14 03:00:35 +00002047 case Iop_CmpGT32Fx4:
2048 case Iop_CmpGE32Fx4:
sewardj3245c912004-12-10 14:58:26 +00002049 case Iop_Add32Fx4:
2050 return binary32Fx4(mce, vatom1, vatom2);
2051
sewardj170ee212004-12-10 18:57:51 +00002052 case Iop_Sub32F0x4:
2053 case Iop_Mul32F0x4:
2054 case Iop_Min32F0x4:
2055 case Iop_Max32F0x4:
2056 case Iop_Div32F0x4:
2057 case Iop_CmpLT32F0x4:
2058 case Iop_CmpLE32F0x4:
2059 case Iop_CmpEQ32F0x4:
sewardj545663e2005-11-05 01:55:04 +00002060 case Iop_CmpUN32F0x4:
sewardj170ee212004-12-10 18:57:51 +00002061 case Iop_Add32F0x4:
2062 return binary32F0x4(mce, vatom1, vatom2);
2063
sewardj20d38f22005-02-07 23:50:18 +00002064 /* V128-bit data-steering */
2065 case Iop_SetV128lo32:
2066 case Iop_SetV128lo64:
2067 case Iop_64HLtoV128:
sewardja1d93302004-12-12 16:45:06 +00002068 case Iop_InterleaveLO64x2:
2069 case Iop_InterleaveLO32x4:
2070 case Iop_InterleaveLO16x8:
2071 case Iop_InterleaveLO8x16:
2072 case Iop_InterleaveHI64x2:
2073 case Iop_InterleaveHI32x4:
2074 case Iop_InterleaveHI16x8:
2075 case Iop_InterleaveHI8x16:
sewardj170ee212004-12-10 18:57:51 +00002076 return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2));
sewardj620eb5b2005-10-22 12:50:43 +00002077
2078 /* Perm8x16: rearrange values in left arg using steering values
2079 from right arg. So rearrange the vbits in the same way but
2080 pessimise wrt steering values. */
2081 case Iop_Perm8x16:
2082 return mkUifUV128(
2083 mce,
2084 assignNew(mce, Ity_V128, binop(op, vatom1, atom2)),
2085 mkPCast8x16(mce, vatom2)
2086 );
sewardj170ee212004-12-10 18:57:51 +00002087
sewardj43d60752005-11-10 18:13:01 +00002088 /* These two take the lower half of each 16-bit lane, sign/zero
2089 extend it to 32, and multiply together, producing a 32x4
2090 result (and implicitly ignoring half the operand bits). So
2091 treat it as a bunch of independent 16x8 operations, but then
2092 do 32-bit shifts left-right to copy the lower half results
2093 (which are all 0s or all 1s due to PCasting in binary16Ix8)
2094 into the upper half of each result lane. */
2095 case Iop_MullEven16Ux8:
2096 case Iop_MullEven16Sx8: {
2097 IRAtom* at;
2098 at = binary16Ix8(mce,vatom1,vatom2);
2099 at = assignNew(mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
2100 at = assignNew(mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
2101 return at;
2102 }
2103
2104 /* Same deal as Iop_MullEven16{S,U}x8 */
2105 case Iop_MullEven8Ux16:
2106 case Iop_MullEven8Sx16: {
2107 IRAtom* at;
2108 at = binary8Ix16(mce,vatom1,vatom2);
2109 at = assignNew(mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
2110 at = assignNew(mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
2111 return at;
2112 }
2113
2114 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
2115 32x4 -> 16x8 laneage, discarding the upper half of each lane.
2116 Simply apply same op to the V bits, since this really no more
2117 than a data steering operation. */
sewardjcbf8be72005-11-10 18:34:41 +00002118 case Iop_Narrow32x4:
2119 case Iop_Narrow16x8:
sewardj43d60752005-11-10 18:13:01 +00002120 return assignNew(mce, Ity_V128,
2121 binop(op, vatom1, vatom2));
2122
2123 case Iop_ShrV128:
2124 case Iop_ShlV128:
2125 /* Same scheme as with all other shifts. Note: 10 Nov 05:
2126 this is wrong now, scalar shifts are done properly lazily.
2127 Vector shifts should be fixed too. */
2128 complainIfUndefined(mce, atom2);
2129 return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
2130
2131
sewardj69a13322005-04-23 01:14:51 +00002132 /* I128-bit data-steering */
2133 case Iop_64HLto128:
2134 return assignNew(mce, Ity_I128, binop(op, vatom1, vatom2));
2135
sewardj3245c912004-12-10 14:58:26 +00002136 /* Scalar floating point */
2137
sewardjed69fdb2006-02-03 16:12:27 +00002138 case Iop_RoundF64toInt:
2139 case Iop_RoundF64toF32:
sewardj95448072004-11-22 20:19:51 +00002140 case Iop_F64toI64:
sewardje9e16d32004-12-10 13:17:55 +00002141 case Iop_I64toF64:
sewardj22ac5f42006-02-03 22:55:04 +00002142 case Iop_SinF64:
2143 case Iop_CosF64:
2144 case Iop_TanF64:
2145 case Iop_2xm1F64:
2146 case Iop_SqrtF64:
2147 /* I32(rm) x I64/F64 -> I64/F64 */
sewardj95448072004-11-22 20:19:51 +00002148 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2149
sewardj95448072004-11-22 20:19:51 +00002150 case Iop_F64toI32:
sewardje9e16d32004-12-10 13:17:55 +00002151 case Iop_F64toF32:
sewardj95448072004-11-22 20:19:51 +00002152 /* First arg is I32 (rounding mode), second is F64 (data). */
2153 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2154
2155 case Iop_F64toI16:
2156 /* First arg is I32 (rounding mode), second is F64 (data). */
2157 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
2158
sewardj95448072004-11-22 20:19:51 +00002159 case Iop_CmpF64:
2160 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2161
2162 /* non-FP after here */
2163
2164 case Iop_DivModU64to32:
2165 case Iop_DivModS64to32:
2166 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2167
sewardj69a13322005-04-23 01:14:51 +00002168 case Iop_DivModU128to64:
2169 case Iop_DivModS128to64:
2170 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
2171
sewardj95448072004-11-22 20:19:51 +00002172 case Iop_16HLto32:
sewardj170ee212004-12-10 18:57:51 +00002173 return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00002174 case Iop_32HLto64:
sewardj170ee212004-12-10 18:57:51 +00002175 return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00002176
sewardj6cf40ff2005-04-20 22:31:26 +00002177 case Iop_MullS64:
2178 case Iop_MullU64: {
2179 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
2180 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
2181 return assignNew(mce, Ity_I128, binop(Iop_64HLto128, vHi64, vLo64));
2182 }
2183
sewardj95448072004-11-22 20:19:51 +00002184 case Iop_MullS32:
2185 case Iop_MullU32: {
2186 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
2187 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
2188 return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
2189 }
2190
2191 case Iop_MullS16:
2192 case Iop_MullU16: {
2193 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
2194 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
2195 return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
2196 }
2197
2198 case Iop_MullS8:
2199 case Iop_MullU8: {
2200 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
2201 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
2202 return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
2203 }
2204
cerion9e591082005-06-23 15:28:34 +00002205 case Iop_DivS32:
2206 case Iop_DivU32:
2207 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2208
sewardjb00944a2005-12-23 12:47:16 +00002209 case Iop_DivS64:
2210 case Iop_DivU64:
2211 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2212
sewardj95448072004-11-22 20:19:51 +00002213 case Iop_Add32:
sewardjd5204dc2004-12-31 01:16:11 +00002214 if (mce->bogusLiterals)
2215 return expensiveAddSub(mce,True,Ity_I32,
2216 vatom1,vatom2, atom1,atom2);
2217 else
2218 goto cheap_AddSub32;
sewardj95448072004-11-22 20:19:51 +00002219 case Iop_Sub32:
sewardjd5204dc2004-12-31 01:16:11 +00002220 if (mce->bogusLiterals)
2221 return expensiveAddSub(mce,False,Ity_I32,
2222 vatom1,vatom2, atom1,atom2);
2223 else
2224 goto cheap_AddSub32;
2225
2226 cheap_AddSub32:
sewardj95448072004-11-22 20:19:51 +00002227 case Iop_Mul32:
sewardj992dff92005-10-07 11:08:55 +00002228 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
2229
sewardj463b3d92005-07-18 11:41:15 +00002230 case Iop_CmpORD32S:
2231 case Iop_CmpORD32U:
sewardj1bc82102005-12-23 00:16:24 +00002232 case Iop_CmpORD64S:
2233 case Iop_CmpORD64U:
2234 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
sewardj95448072004-11-22 20:19:51 +00002235
sewardj681be302005-01-15 20:43:58 +00002236 case Iop_Add64:
tomd9774d72005-06-27 08:11:01 +00002237 if (mce->bogusLiterals)
2238 return expensiveAddSub(mce,True,Ity_I64,
2239 vatom1,vatom2, atom1,atom2);
2240 else
2241 goto cheap_AddSub64;
sewardj681be302005-01-15 20:43:58 +00002242 case Iop_Sub64:
tomd9774d72005-06-27 08:11:01 +00002243 if (mce->bogusLiterals)
2244 return expensiveAddSub(mce,False,Ity_I64,
2245 vatom1,vatom2, atom1,atom2);
2246 else
2247 goto cheap_AddSub64;
2248
2249 cheap_AddSub64:
2250 case Iop_Mul64:
sewardj681be302005-01-15 20:43:58 +00002251 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
2252
sewardj95448072004-11-22 20:19:51 +00002253 case Iop_Mul16:
2254 case Iop_Add16:
2255 case Iop_Sub16:
2256 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
2257
2258 case Iop_Sub8:
2259 case Iop_Add8:
2260 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
2261
sewardj69a13322005-04-23 01:14:51 +00002262 case Iop_CmpEQ64:
sewardje6f8af42005-07-06 18:48:59 +00002263 case Iop_CmpNE64:
sewardj69a13322005-04-23 01:14:51 +00002264 if (mce->bogusLiterals)
2265 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
2266 else
2267 goto cheap_cmp64;
2268 cheap_cmp64:
tomcd986332005-04-26 07:44:48 +00002269 case Iop_CmpLE64S: case Iop_CmpLE64U:
2270 case Iop_CmpLT64U: case Iop_CmpLT64S:
sewardj69a13322005-04-23 01:14:51 +00002271 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
2272
sewardjd5204dc2004-12-31 01:16:11 +00002273 case Iop_CmpEQ32:
sewardje6f8af42005-07-06 18:48:59 +00002274 case Iop_CmpNE32:
sewardjd5204dc2004-12-31 01:16:11 +00002275 if (mce->bogusLiterals)
2276 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
2277 else
2278 goto cheap_cmp32;
sewardjd5204dc2004-12-31 01:16:11 +00002279 cheap_cmp32:
sewardj95448072004-11-22 20:19:51 +00002280 case Iop_CmpLE32S: case Iop_CmpLE32U:
2281 case Iop_CmpLT32U: case Iop_CmpLT32S:
sewardj95448072004-11-22 20:19:51 +00002282 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
2283
2284 case Iop_CmpEQ16: case Iop_CmpNE16:
2285 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
2286
2287 case Iop_CmpEQ8: case Iop_CmpNE8:
2288 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
2289
sewardjaaddbc22005-10-07 09:49:53 +00002290 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
2291 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
2292
sewardj95448072004-11-22 20:19:51 +00002293 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
sewardjaaddbc22005-10-07 09:49:53 +00002294 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00002295
sewardjdb67f5f2004-12-14 01:15:31 +00002296 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
sewardjaaddbc22005-10-07 09:49:53 +00002297 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00002298
2299 case Iop_Shl8: case Iop_Shr8:
sewardjaaddbc22005-10-07 09:49:53 +00002300 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00002301
sewardj20d38f22005-02-07 23:50:18 +00002302 case Iop_AndV128:
2303 uifu = mkUifUV128; difd = mkDifDV128;
2304 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00002305 case Iop_And64:
2306 uifu = mkUifU64; difd = mkDifD64;
2307 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00002308 case Iop_And32:
2309 uifu = mkUifU32; difd = mkDifD32;
2310 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
2311 case Iop_And16:
2312 uifu = mkUifU16; difd = mkDifD16;
2313 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
2314 case Iop_And8:
2315 uifu = mkUifU8; difd = mkDifD8;
2316 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
2317
sewardj20d38f22005-02-07 23:50:18 +00002318 case Iop_OrV128:
2319 uifu = mkUifUV128; difd = mkDifDV128;
2320 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00002321 case Iop_Or64:
2322 uifu = mkUifU64; difd = mkDifD64;
2323 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00002324 case Iop_Or32:
2325 uifu = mkUifU32; difd = mkDifD32;
2326 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
2327 case Iop_Or16:
2328 uifu = mkUifU16; difd = mkDifD16;
2329 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
2330 case Iop_Or8:
2331 uifu = mkUifU8; difd = mkDifD8;
2332 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
2333
2334 do_And_Or:
2335 return
2336 assignNew(
2337 mce,
2338 and_or_ty,
2339 difd(mce, uifu(mce, vatom1, vatom2),
2340 difd(mce, improve(mce, atom1, vatom1),
2341 improve(mce, atom2, vatom2) ) ) );
2342
2343 case Iop_Xor8:
2344 return mkUifU8(mce, vatom1, vatom2);
2345 case Iop_Xor16:
2346 return mkUifU16(mce, vatom1, vatom2);
2347 case Iop_Xor32:
2348 return mkUifU32(mce, vatom1, vatom2);
sewardj7010f6e2004-12-10 13:35:22 +00002349 case Iop_Xor64:
2350 return mkUifU64(mce, vatom1, vatom2);
sewardj20d38f22005-02-07 23:50:18 +00002351 case Iop_XorV128:
2352 return mkUifUV128(mce, vatom1, vatom2);
njn25e49d8e72002-09-23 09:36:25 +00002353
2354 default:
sewardj95448072004-11-22 20:19:51 +00002355 ppIROp(op);
2356 VG_(tool_panic)("memcheck:expr2vbits_Binop");
njn25e49d8e72002-09-23 09:36:25 +00002357 }
njn25e49d8e72002-09-23 09:36:25 +00002358}
2359
njn25e49d8e72002-09-23 09:36:25 +00002360
sewardj95448072004-11-22 20:19:51 +00002361static
2362IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
2363{
2364 IRAtom* vatom = expr2vbits( mce, atom );
2365 tl_assert(isOriginalAtom(mce,atom));
2366 switch (op) {
2367
sewardj0b070592004-12-10 21:44:22 +00002368 case Iop_Sqrt64Fx2:
2369 return unary64Fx2(mce, vatom);
2370
2371 case Iop_Sqrt64F0x2:
2372 return unary64F0x2(mce, vatom);
2373
sewardj170ee212004-12-10 18:57:51 +00002374 case Iop_Sqrt32Fx4:
2375 case Iop_RSqrt32Fx4:
2376 case Iop_Recip32Fx4:
cerion176cb4c2005-11-16 17:21:49 +00002377 case Iop_I32UtoFx4:
2378 case Iop_I32StoFx4:
2379 case Iop_QFtoI32Ux4_RZ:
2380 case Iop_QFtoI32Sx4_RZ:
2381 case Iop_RoundF32x4_RM:
2382 case Iop_RoundF32x4_RP:
2383 case Iop_RoundF32x4_RN:
2384 case Iop_RoundF32x4_RZ:
sewardj170ee212004-12-10 18:57:51 +00002385 return unary32Fx4(mce, vatom);
2386
2387 case Iop_Sqrt32F0x4:
2388 case Iop_RSqrt32F0x4:
2389 case Iop_Recip32F0x4:
2390 return unary32F0x4(mce, vatom);
2391
sewardj20d38f22005-02-07 23:50:18 +00002392 case Iop_32UtoV128:
2393 case Iop_64UtoV128:
sewardj620eb5b2005-10-22 12:50:43 +00002394 case Iop_Dup8x16:
2395 case Iop_Dup16x8:
2396 case Iop_Dup32x4:
sewardj170ee212004-12-10 18:57:51 +00002397 return assignNew(mce, Ity_V128, unop(op, vatom));
2398
sewardj95448072004-11-22 20:19:51 +00002399 case Iop_F32toF64:
2400 case Iop_I32toF64:
sewardj95448072004-11-22 20:19:51 +00002401 case Iop_NegF64:
sewardj95448072004-11-22 20:19:51 +00002402 case Iop_AbsF64:
sewardj4f3e0f22006-01-27 15:11:31 +00002403 case Iop_Est5FRSqrt:
sewardj39cc7352005-06-09 21:31:55 +00002404 case Iop_Clz64:
2405 case Iop_Ctz64:
sewardj95448072004-11-22 20:19:51 +00002406 return mkPCastTo(mce, Ity_I64, vatom);
2407
sewardj95448072004-11-22 20:19:51 +00002408 case Iop_Clz32:
2409 case Iop_Ctz32:
sewardjed69fdb2006-02-03 16:12:27 +00002410 case Iop_TruncF64asF32:
sewardj95448072004-11-22 20:19:51 +00002411 return mkPCastTo(mce, Ity_I32, vatom);
2412
sewardjd9dbc192005-04-27 11:40:27 +00002413 case Iop_1Uto64:
2414 case Iop_8Uto64:
2415 case Iop_8Sto64:
2416 case Iop_16Uto64:
2417 case Iop_16Sto64:
sewardj95448072004-11-22 20:19:51 +00002418 case Iop_32Sto64:
2419 case Iop_32Uto64:
sewardj20d38f22005-02-07 23:50:18 +00002420 case Iop_V128to64:
2421 case Iop_V128HIto64:
sewardj6cf40ff2005-04-20 22:31:26 +00002422 case Iop_128HIto64:
2423 case Iop_128to64:
sewardj95448072004-11-22 20:19:51 +00002424 return assignNew(mce, Ity_I64, unop(op, vatom));
2425
2426 case Iop_64to32:
2427 case Iop_64HIto32:
2428 case Iop_1Uto32:
sewardj463b3d92005-07-18 11:41:15 +00002429 case Iop_1Sto32:
sewardj95448072004-11-22 20:19:51 +00002430 case Iop_8Uto32:
2431 case Iop_16Uto32:
2432 case Iop_16Sto32:
2433 case Iop_8Sto32:
cerionfafaa0d2005-09-12 22:29:38 +00002434 case Iop_V128to32:
sewardj95448072004-11-22 20:19:51 +00002435 return assignNew(mce, Ity_I32, unop(op, vatom));
2436
2437 case Iop_8Sto16:
2438 case Iop_8Uto16:
2439 case Iop_32to16:
2440 case Iop_32HIto16:
sewardjd9dbc192005-04-27 11:40:27 +00002441 case Iop_64to16:
sewardj95448072004-11-22 20:19:51 +00002442 return assignNew(mce, Ity_I16, unop(op, vatom));
2443
2444 case Iop_1Uto8:
2445 case Iop_16to8:
sewardj9a807e02006-12-17 14:20:31 +00002446 case Iop_16HIto8:
sewardj95448072004-11-22 20:19:51 +00002447 case Iop_32to8:
sewardjd9dbc192005-04-27 11:40:27 +00002448 case Iop_64to8:
sewardj95448072004-11-22 20:19:51 +00002449 return assignNew(mce, Ity_I8, unop(op, vatom));
2450
2451 case Iop_32to1:
2452 return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom));
2453
sewardjd9dbc192005-04-27 11:40:27 +00002454 case Iop_64to1:
2455 return assignNew(mce, Ity_I1, unop(Iop_64to1, vatom));
2456
sewardj95448072004-11-22 20:19:51 +00002457 case Iop_ReinterpF64asI64:
2458 case Iop_ReinterpI64asF64:
sewardj0b070592004-12-10 21:44:22 +00002459 case Iop_ReinterpI32asF32:
sewardj20d38f22005-02-07 23:50:18 +00002460 case Iop_NotV128:
sewardj7010f6e2004-12-10 13:35:22 +00002461 case Iop_Not64:
sewardj95448072004-11-22 20:19:51 +00002462 case Iop_Not32:
2463 case Iop_Not16:
2464 case Iop_Not8:
2465 case Iop_Not1:
2466 return vatom;
sewardj7010f6e2004-12-10 13:35:22 +00002467
sewardje53bd992005-05-03 12:22:00 +00002468 /* Neg* really fall under the Add/Sub banner, and as such you
2469 might think would qualify for the 'expensive add/sub'
2470 treatment. However, in this case since the implied literal
2471 is zero (0 - arg), we just do the cheap thing anyway. */
2472 case Iop_Neg8:
2473 return mkLeft8(mce, vatom);
2474 case Iop_Neg16:
2475 return mkLeft16(mce, vatom);
2476 case Iop_Neg32:
2477 return mkLeft32(mce, vatom);
2478
sewardj95448072004-11-22 20:19:51 +00002479 default:
2480 ppIROp(op);
2481 VG_(tool_panic)("memcheck:expr2vbits_Unop");
2482 }
2483}
2484
2485
sewardj170ee212004-12-10 18:57:51 +00002486/* Worker function; do not call directly. */
sewardj95448072004-11-22 20:19:51 +00002487static
sewardj2e595852005-06-30 23:33:37 +00002488IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
2489 IREndness end, IRType ty,
2490 IRAtom* addr, UInt bias )
sewardj95448072004-11-22 20:19:51 +00002491{
2492 void* helper;
2493 Char* hname;
2494 IRDirty* di;
2495 IRTemp datavbits;
2496 IRAtom* addrAct;
2497
2498 tl_assert(isOriginalAtom(mce,addr));
sewardj2e595852005-06-30 23:33:37 +00002499 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj95448072004-11-22 20:19:51 +00002500
2501 /* First, emit a definedness test for the address. This also sets
2502 the address (shadow) to 'defined' following the test. */
2503 complainIfUndefined( mce, addr );
2504
2505 /* Now cook up a call to the relevant helper function, to read the
2506 data V bits from shadow memory. */
2507 ty = shadowType(ty);
sewardj2e595852005-06-30 23:33:37 +00002508
2509 if (end == Iend_LE) {
2510 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00002511 case Ity_I64: helper = &MC_(helperc_LOADV64le);
2512 hname = "MC_(helperc_LOADV64le)";
sewardj2e595852005-06-30 23:33:37 +00002513 break;
njn1d0825f2006-03-27 11:37:07 +00002514 case Ity_I32: helper = &MC_(helperc_LOADV32le);
2515 hname = "MC_(helperc_LOADV32le)";
sewardj2e595852005-06-30 23:33:37 +00002516 break;
njn1d0825f2006-03-27 11:37:07 +00002517 case Ity_I16: helper = &MC_(helperc_LOADV16le);
2518 hname = "MC_(helperc_LOADV16le)";
sewardj2e595852005-06-30 23:33:37 +00002519 break;
njn1d0825f2006-03-27 11:37:07 +00002520 case Ity_I8: helper = &MC_(helperc_LOADV8);
2521 hname = "MC_(helperc_LOADV8)";
sewardj2e595852005-06-30 23:33:37 +00002522 break;
2523 default: ppIRType(ty);
2524 VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
2525 }
2526 } else {
sewardj8cf88b72005-07-08 01:29:33 +00002527 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00002528 case Ity_I64: helper = &MC_(helperc_LOADV64be);
2529 hname = "MC_(helperc_LOADV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00002530 break;
njn1d0825f2006-03-27 11:37:07 +00002531 case Ity_I32: helper = &MC_(helperc_LOADV32be);
2532 hname = "MC_(helperc_LOADV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00002533 break;
njn1d0825f2006-03-27 11:37:07 +00002534 case Ity_I16: helper = &MC_(helperc_LOADV16be);
2535 hname = "MC_(helperc_LOADV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00002536 break;
njn1d0825f2006-03-27 11:37:07 +00002537 case Ity_I8: helper = &MC_(helperc_LOADV8);
2538 hname = "MC_(helperc_LOADV8)";
sewardj8cf88b72005-07-08 01:29:33 +00002539 break;
2540 default: ppIRType(ty);
2541 VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
2542 }
sewardj95448072004-11-22 20:19:51 +00002543 }
2544
2545 /* Generate the actual address into addrAct. */
2546 if (bias == 0) {
2547 addrAct = addr;
2548 } else {
sewardj7cf97ee2004-11-28 14:25:01 +00002549 IROp mkAdd;
2550 IRAtom* eBias;
sewardj95448072004-11-22 20:19:51 +00002551 IRType tyAddr = mce->hWordTy;
2552 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj7cf97ee2004-11-28 14:25:01 +00002553 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2554 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj95448072004-11-22 20:19:51 +00002555 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2556 }
2557
2558 /* We need to have a place to park the V bits we're just about to
2559 read. */
2560 datavbits = newIRTemp(mce->bb->tyenv, ty);
2561 di = unsafeIRDirty_1_N( datavbits,
sewardj53ee1fc2005-12-23 02:29:58 +00002562 1/*regparms*/,
2563 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj95448072004-11-22 20:19:51 +00002564 mkIRExprVec_1( addrAct ));
2565 setHelperAnns( mce, di );
2566 stmt( mce->bb, IRStmt_Dirty(di) );
2567
2568 return mkexpr(datavbits);
2569}
2570
2571
2572static
sewardj2e595852005-06-30 23:33:37 +00002573IRAtom* expr2vbits_Load ( MCEnv* mce,
2574 IREndness end, IRType ty,
2575 IRAtom* addr, UInt bias )
sewardj170ee212004-12-10 18:57:51 +00002576{
2577 IRAtom *v64hi, *v64lo;
sewardj2e595852005-06-30 23:33:37 +00002578 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj170ee212004-12-10 18:57:51 +00002579 switch (shadowType(ty)) {
2580 case Ity_I8:
2581 case Ity_I16:
2582 case Ity_I32:
2583 case Ity_I64:
sewardj2e595852005-06-30 23:33:37 +00002584 return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
sewardj170ee212004-12-10 18:57:51 +00002585 case Ity_V128:
sewardj2e595852005-06-30 23:33:37 +00002586 if (end == Iend_LE) {
2587 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
2588 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
2589 } else {
sewardj2e595852005-06-30 23:33:37 +00002590 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
2591 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
2592 }
sewardj170ee212004-12-10 18:57:51 +00002593 return assignNew( mce,
2594 Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +00002595 binop(Iop_64HLtoV128, v64hi, v64lo));
sewardj170ee212004-12-10 18:57:51 +00002596 default:
sewardj2e595852005-06-30 23:33:37 +00002597 VG_(tool_panic)("expr2vbits_Load");
sewardj170ee212004-12-10 18:57:51 +00002598 }
2599}
2600
2601
2602static
sewardj95448072004-11-22 20:19:51 +00002603IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
2604 IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
2605{
2606 IRAtom *vbitsC, *vbits0, *vbitsX;
2607 IRType ty;
2608 /* Given Mux0X(cond,expr0,exprX), generate
2609 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
2610 That is, steer the V bits like the originals, but trash the
2611 result if the steering value is undefined. This gives
2612 lazy propagation. */
2613 tl_assert(isOriginalAtom(mce, cond));
2614 tl_assert(isOriginalAtom(mce, expr0));
2615 tl_assert(isOriginalAtom(mce, exprX));
2616
2617 vbitsC = expr2vbits(mce, cond);
2618 vbits0 = expr2vbits(mce, expr0);
2619 vbitsX = expr2vbits(mce, exprX);
2620 ty = typeOfIRExpr(mce->bb->tyenv, vbits0);
2621
2622 return
2623 mkUifU(mce, ty, assignNew(mce, ty, IRExpr_Mux0X(cond, vbits0, vbitsX)),
2624 mkPCastTo(mce, ty, vbitsC) );
2625}
2626
2627/* --------- This is the main expression-handling function. --------- */
2628
2629static
2630IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
2631{
2632 switch (e->tag) {
2633
2634 case Iex_Get:
2635 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
2636
2637 case Iex_GetI:
2638 return shadow_GETI( mce, e->Iex.GetI.descr,
2639 e->Iex.GetI.ix, e->Iex.GetI.bias );
2640
sewardj0b9d74a2006-12-24 02:24:11 +00002641 case Iex_RdTmp:
2642 return IRExpr_RdTmp( findShadowTmp(mce, e->Iex.RdTmp.tmp) );
sewardj95448072004-11-22 20:19:51 +00002643
2644 case Iex_Const:
2645 return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
2646
sewardje91cea72006-02-08 19:32:02 +00002647 case Iex_Qop:
2648 return expr2vbits_Qop(
2649 mce,
2650 e->Iex.Qop.op,
2651 e->Iex.Qop.arg1, e->Iex.Qop.arg2,
2652 e->Iex.Qop.arg3, e->Iex.Qop.arg4
2653 );
2654
sewardjed69fdb2006-02-03 16:12:27 +00002655 case Iex_Triop:
2656 return expr2vbits_Triop(
2657 mce,
2658 e->Iex.Triop.op,
2659 e->Iex.Triop.arg1, e->Iex.Triop.arg2, e->Iex.Triop.arg3
2660 );
2661
sewardj95448072004-11-22 20:19:51 +00002662 case Iex_Binop:
2663 return expr2vbits_Binop(
2664 mce,
2665 e->Iex.Binop.op,
2666 e->Iex.Binop.arg1, e->Iex.Binop.arg2
2667 );
2668
2669 case Iex_Unop:
2670 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
2671
sewardj2e595852005-06-30 23:33:37 +00002672 case Iex_Load:
2673 return expr2vbits_Load( mce, e->Iex.Load.end,
2674 e->Iex.Load.ty,
2675 e->Iex.Load.addr, 0/*addr bias*/ );
sewardj95448072004-11-22 20:19:51 +00002676
2677 case Iex_CCall:
2678 return mkLazyN( mce, e->Iex.CCall.args,
2679 e->Iex.CCall.retty,
2680 e->Iex.CCall.cee );
2681
2682 case Iex_Mux0X:
2683 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
2684 e->Iex.Mux0X.exprX);
njn25e49d8e72002-09-23 09:36:25 +00002685
2686 default:
sewardj95448072004-11-22 20:19:51 +00002687 VG_(printf)("\n");
2688 ppIRExpr(e);
2689 VG_(printf)("\n");
2690 VG_(tool_panic)("memcheck: expr2vbits");
njn25e49d8e72002-09-23 09:36:25 +00002691 }
njn25e49d8e72002-09-23 09:36:25 +00002692}
2693
2694/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00002695/*--- Generate shadow stmts from all kinds of IRStmts. ---*/
njn25e49d8e72002-09-23 09:36:25 +00002696/*------------------------------------------------------------*/
2697
sewardj95448072004-11-22 20:19:51 +00002698/* Widen a value to the host word size. */
njn25e49d8e72002-09-23 09:36:25 +00002699
2700static
sewardj95448072004-11-22 20:19:51 +00002701IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
njn25e49d8e72002-09-23 09:36:25 +00002702{
sewardj7cf97ee2004-11-28 14:25:01 +00002703 IRType ty, tyH;
2704
sewardj95448072004-11-22 20:19:51 +00002705 /* vatom is vbits-value and as such can only have a shadow type. */
2706 tl_assert(isShadowAtom(mce,vatom));
njn25e49d8e72002-09-23 09:36:25 +00002707
sewardj7cf97ee2004-11-28 14:25:01 +00002708 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
2709 tyH = mce->hWordTy;
njn25e49d8e72002-09-23 09:36:25 +00002710
sewardj95448072004-11-22 20:19:51 +00002711 if (tyH == Ity_I32) {
2712 switch (ty) {
2713 case Ity_I32: return vatom;
2714 case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom));
2715 case Ity_I8: return assignNew(mce, tyH, unop(Iop_8Uto32, vatom));
2716 default: goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00002717 }
sewardj6cf40ff2005-04-20 22:31:26 +00002718 } else
2719 if (tyH == Ity_I64) {
2720 switch (ty) {
2721 case Ity_I32: return assignNew(mce, tyH, unop(Iop_32Uto64, vatom));
sewardj69a13322005-04-23 01:14:51 +00002722 case Ity_I16: return assignNew(mce, tyH, unop(Iop_32Uto64,
2723 assignNew(mce, Ity_I32, unop(Iop_16Uto32, vatom))));
2724 case Ity_I8: return assignNew(mce, tyH, unop(Iop_32Uto64,
2725 assignNew(mce, Ity_I32, unop(Iop_8Uto32, vatom))));
sewardj6cf40ff2005-04-20 22:31:26 +00002726 default: goto unhandled;
2727 }
sewardj95448072004-11-22 20:19:51 +00002728 } else {
2729 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00002730 }
sewardj95448072004-11-22 20:19:51 +00002731 unhandled:
2732 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
2733 VG_(tool_panic)("zwidenToHostWord");
njn25e49d8e72002-09-23 09:36:25 +00002734}
2735
njn25e49d8e72002-09-23 09:36:25 +00002736
sewardj95448072004-11-22 20:19:51 +00002737/* Generate a shadow store. addr is always the original address atom.
2738 You can pass in either originals or V-bits for the data atom, but
2739 obviously not both. */
njn25e49d8e72002-09-23 09:36:25 +00002740
sewardj95448072004-11-22 20:19:51 +00002741static
sewardj2e595852005-06-30 23:33:37 +00002742void do_shadow_Store ( MCEnv* mce,
2743 IREndness end,
2744 IRAtom* addr, UInt bias,
2745 IRAtom* data, IRAtom* vdata )
njn25e49d8e72002-09-23 09:36:25 +00002746{
sewardj170ee212004-12-10 18:57:51 +00002747 IROp mkAdd;
2748 IRType ty, tyAddr;
2749 IRDirty *di, *diLo64, *diHi64;
2750 IRAtom *addrAct, *addrLo64, *addrHi64;
2751 IRAtom *vdataLo64, *vdataHi64;
sewardj2e595852005-06-30 23:33:37 +00002752 IRAtom *eBias, *eBiasLo64, *eBiasHi64;
sewardj95448072004-11-22 20:19:51 +00002753 void* helper = NULL;
2754 Char* hname = NULL;
njn1d0825f2006-03-27 11:37:07 +00002755 IRConst* c;
sewardj170ee212004-12-10 18:57:51 +00002756
2757 tyAddr = mce->hWordTy;
2758 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2759 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj2e595852005-06-30 23:33:37 +00002760 tl_assert( end == Iend_LE || end == Iend_BE );
sewardj170ee212004-12-10 18:57:51 +00002761
2762 di = diLo64 = diHi64 = NULL;
sewardj2e595852005-06-30 23:33:37 +00002763 eBias = eBiasLo64 = eBiasHi64 = NULL;
sewardj170ee212004-12-10 18:57:51 +00002764 addrAct = addrLo64 = addrHi64 = NULL;
2765 vdataLo64 = vdataHi64 = NULL;
njn25e49d8e72002-09-23 09:36:25 +00002766
sewardj95448072004-11-22 20:19:51 +00002767 if (data) {
2768 tl_assert(!vdata);
2769 tl_assert(isOriginalAtom(mce, data));
2770 tl_assert(bias == 0);
2771 vdata = expr2vbits( mce, data );
2772 } else {
2773 tl_assert(vdata);
2774 }
njn25e49d8e72002-09-23 09:36:25 +00002775
sewardj95448072004-11-22 20:19:51 +00002776 tl_assert(isOriginalAtom(mce,addr));
2777 tl_assert(isShadowAtom(mce,vdata));
njn25e49d8e72002-09-23 09:36:25 +00002778
sewardj95448072004-11-22 20:19:51 +00002779 ty = typeOfIRExpr(mce->bb->tyenv, vdata);
njn25e49d8e72002-09-23 09:36:25 +00002780
njn1d0825f2006-03-27 11:37:07 +00002781 // If we're not doing undefined value checking, pretend that this value
2782 // is "all valid". That lets Vex's optimiser remove some of the V bit
2783 // shadow computation ops that precede it.
2784 if (!MC_(clo_undef_value_errors)) {
2785 switch (ty) {
2786 case Ity_V128: c = IRConst_V128(V_BITS16_DEFINED); break; // V128 weirdness
2787 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
2788 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
2789 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
2790 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
2791 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
2792 }
2793 vdata = IRExpr_Const( c );
2794 }
2795
sewardj95448072004-11-22 20:19:51 +00002796 /* First, emit a definedness test for the address. This also sets
2797 the address (shadow) to 'defined' following the test. */
2798 complainIfUndefined( mce, addr );
njn25e49d8e72002-09-23 09:36:25 +00002799
sewardj170ee212004-12-10 18:57:51 +00002800 /* Now decide which helper function to call to write the data V
2801 bits into shadow memory. */
sewardj2e595852005-06-30 23:33:37 +00002802 if (end == Iend_LE) {
2803 switch (ty) {
2804 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00002805 case Ity_I64: helper = &MC_(helperc_STOREV64le);
2806 hname = "MC_(helperc_STOREV64le)";
sewardj2e595852005-06-30 23:33:37 +00002807 break;
njn1d0825f2006-03-27 11:37:07 +00002808 case Ity_I32: helper = &MC_(helperc_STOREV32le);
2809 hname = "MC_(helperc_STOREV32le)";
sewardj2e595852005-06-30 23:33:37 +00002810 break;
njn1d0825f2006-03-27 11:37:07 +00002811 case Ity_I16: helper = &MC_(helperc_STOREV16le);
2812 hname = "MC_(helperc_STOREV16le)";
sewardj2e595852005-06-30 23:33:37 +00002813 break;
njn1d0825f2006-03-27 11:37:07 +00002814 case Ity_I8: helper = &MC_(helperc_STOREV8);
2815 hname = "MC_(helperc_STOREV8)";
sewardj2e595852005-06-30 23:33:37 +00002816 break;
2817 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
2818 }
2819 } else {
sewardj8cf88b72005-07-08 01:29:33 +00002820 switch (ty) {
2821 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00002822 case Ity_I64: helper = &MC_(helperc_STOREV64be);
2823 hname = "MC_(helperc_STOREV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00002824 break;
njn1d0825f2006-03-27 11:37:07 +00002825 case Ity_I32: helper = &MC_(helperc_STOREV32be);
2826 hname = "MC_(helperc_STOREV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00002827 break;
njn1d0825f2006-03-27 11:37:07 +00002828 case Ity_I16: helper = &MC_(helperc_STOREV16be);
2829 hname = "MC_(helperc_STOREV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00002830 break;
njn1d0825f2006-03-27 11:37:07 +00002831 case Ity_I8: helper = &MC_(helperc_STOREV8);
2832 hname = "MC_(helperc_STOREV8)";
sewardj8cf88b72005-07-08 01:29:33 +00002833 break;
2834 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
2835 }
sewardj95448072004-11-22 20:19:51 +00002836 }
njn25e49d8e72002-09-23 09:36:25 +00002837
sewardj170ee212004-12-10 18:57:51 +00002838 if (ty == Ity_V128) {
2839
sewardj20d38f22005-02-07 23:50:18 +00002840 /* V128-bit case */
sewardj170ee212004-12-10 18:57:51 +00002841 /* See comment in next clause re 64-bit regparms */
sewardj2e595852005-06-30 23:33:37 +00002842 /* also, need to be careful about endianness */
2843
2844 Int offLo64, offHi64;
2845 if (end == Iend_LE) {
2846 offLo64 = 0;
2847 offHi64 = 8;
2848 } else {
sewardj2e595852005-06-30 23:33:37 +00002849 offLo64 = 8;
2850 offHi64 = 0;
2851 }
2852
2853 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
2854 addrLo64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
sewardj20d38f22005-02-07 23:50:18 +00002855 vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata));
sewardj170ee212004-12-10 18:57:51 +00002856 diLo64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00002857 1/*regparms*/,
2858 hname, VG_(fnptr_to_fnentry)( helper ),
2859 mkIRExprVec_2( addrLo64, vdataLo64 )
2860 );
sewardj2e595852005-06-30 23:33:37 +00002861 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
2862 addrHi64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
sewardj20d38f22005-02-07 23:50:18 +00002863 vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata));
sewardj170ee212004-12-10 18:57:51 +00002864 diHi64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00002865 1/*regparms*/,
2866 hname, VG_(fnptr_to_fnentry)( helper ),
2867 mkIRExprVec_2( addrHi64, vdataHi64 )
2868 );
sewardj170ee212004-12-10 18:57:51 +00002869 setHelperAnns( mce, diLo64 );
2870 setHelperAnns( mce, diHi64 );
2871 stmt( mce->bb, IRStmt_Dirty(diLo64) );
2872 stmt( mce->bb, IRStmt_Dirty(diHi64) );
2873
sewardj95448072004-11-22 20:19:51 +00002874 } else {
sewardj170ee212004-12-10 18:57:51 +00002875
2876 /* 8/16/32/64-bit cases */
2877 /* Generate the actual address into addrAct. */
2878 if (bias == 0) {
2879 addrAct = addr;
2880 } else {
2881 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2882 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2883 }
2884
2885 if (ty == Ity_I64) {
2886 /* We can't do this with regparm 2 on 32-bit platforms, since
2887 the back ends aren't clever enough to handle 64-bit
2888 regparm args. Therefore be different. */
2889 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00002890 1/*regparms*/,
2891 hname, VG_(fnptr_to_fnentry)( helper ),
2892 mkIRExprVec_2( addrAct, vdata )
2893 );
sewardj170ee212004-12-10 18:57:51 +00002894 } else {
2895 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00002896 2/*regparms*/,
2897 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj170ee212004-12-10 18:57:51 +00002898 mkIRExprVec_2( addrAct,
sewardj53ee1fc2005-12-23 02:29:58 +00002899 zwidenToHostWord( mce, vdata ))
2900 );
sewardj170ee212004-12-10 18:57:51 +00002901 }
2902 setHelperAnns( mce, di );
2903 stmt( mce->bb, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00002904 }
njn25e49d8e72002-09-23 09:36:25 +00002905
sewardj95448072004-11-22 20:19:51 +00002906}
njn25e49d8e72002-09-23 09:36:25 +00002907
njn25e49d8e72002-09-23 09:36:25 +00002908
sewardj95448072004-11-22 20:19:51 +00002909/* Do lazy pessimistic propagation through a dirty helper call, by
2910 looking at the annotations on it. This is the most complex part of
2911 Memcheck. */
njn25e49d8e72002-09-23 09:36:25 +00002912
sewardj95448072004-11-22 20:19:51 +00002913static IRType szToITy ( Int n )
2914{
2915 switch (n) {
2916 case 1: return Ity_I8;
2917 case 2: return Ity_I16;
2918 case 4: return Ity_I32;
2919 case 8: return Ity_I64;
2920 default: VG_(tool_panic)("szToITy(memcheck)");
2921 }
2922}
njn25e49d8e72002-09-23 09:36:25 +00002923
sewardj95448072004-11-22 20:19:51 +00002924static
2925void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
2926{
sewardj2e595852005-06-30 23:33:37 +00002927 Int i, n, offset, toDo, gSz, gOff;
2928 IRAtom *src, *here, *curr;
2929 IRType tyAddr, tySrc, tyDst;
2930 IRTemp dst;
2931 IREndness end;
2932
2933 /* What's the native endianness? We need to know this. */
sewardj6e340c72005-07-10 00:53:42 +00002934# if defined(VG_BIGENDIAN)
sewardj2e595852005-06-30 23:33:37 +00002935 end = Iend_BE;
sewardj6e340c72005-07-10 00:53:42 +00002936# elif defined(VG_LITTLEENDIAN)
sewardj2e595852005-06-30 23:33:37 +00002937 end = Iend_LE;
2938# else
2939# error "Unknown endianness"
2940# endif
njn25e49d8e72002-09-23 09:36:25 +00002941
sewardj95448072004-11-22 20:19:51 +00002942 /* First check the guard. */
2943 complainIfUndefined(mce, d->guard);
2944
2945 /* Now round up all inputs and PCast over them. */
sewardj7cf97ee2004-11-28 14:25:01 +00002946 curr = definedOfType(Ity_I32);
sewardj95448072004-11-22 20:19:51 +00002947
2948 /* Inputs: unmasked args */
2949 for (i = 0; d->args[i]; i++) {
2950 if (d->cee->mcx_mask & (1<<i)) {
2951 /* ignore this arg */
njn25e49d8e72002-09-23 09:36:25 +00002952 } else {
sewardj95448072004-11-22 20:19:51 +00002953 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
2954 curr = mkUifU32(mce, here, curr);
njn25e49d8e72002-09-23 09:36:25 +00002955 }
2956 }
sewardj95448072004-11-22 20:19:51 +00002957
2958 /* Inputs: guest state that we read. */
2959 for (i = 0; i < d->nFxState; i++) {
2960 tl_assert(d->fxState[i].fx != Ifx_None);
2961 if (d->fxState[i].fx == Ifx_Write)
2962 continue;
sewardja7203252004-11-26 19:17:47 +00002963
2964 /* Ignore any sections marked as 'always defined'. */
2965 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
sewardje9e16d32004-12-10 13:17:55 +00002966 if (0)
sewardja7203252004-11-26 19:17:47 +00002967 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
2968 d->fxState[i].offset, d->fxState[i].size );
2969 continue;
2970 }
2971
sewardj95448072004-11-22 20:19:51 +00002972 /* This state element is read or modified. So we need to
sewardje9e16d32004-12-10 13:17:55 +00002973 consider it. If larger than 8 bytes, deal with it in 8-byte
2974 chunks. */
2975 gSz = d->fxState[i].size;
2976 gOff = d->fxState[i].offset;
2977 tl_assert(gSz > 0);
2978 while (True) {
2979 if (gSz == 0) break;
2980 n = gSz <= 8 ? gSz : 8;
2981 /* update 'curr' with UifU of the state slice
2982 gOff .. gOff+n-1 */
2983 tySrc = szToITy( n );
2984 src = assignNew( mce, tySrc,
2985 shadow_GET(mce, gOff, tySrc ) );
2986 here = mkPCastTo( mce, Ity_I32, src );
2987 curr = mkUifU32(mce, here, curr);
2988 gSz -= n;
2989 gOff += n;
2990 }
2991
sewardj95448072004-11-22 20:19:51 +00002992 }
2993
2994 /* Inputs: memory. First set up some info needed regardless of
2995 whether we're doing reads or writes. */
2996 tyAddr = Ity_INVALID;
2997
2998 if (d->mFx != Ifx_None) {
2999 /* Because we may do multiple shadow loads/stores from the same
3000 base address, it's best to do a single test of its
3001 definedness right now. Post-instrumentation optimisation
3002 should remove all but this test. */
3003 tl_assert(d->mAddr);
3004 complainIfUndefined(mce, d->mAddr);
3005
3006 tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr);
3007 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
3008 tl_assert(tyAddr == mce->hWordTy); /* not really right */
3009 }
3010
3011 /* Deal with memory inputs (reads or modifies) */
3012 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
3013 offset = 0;
3014 toDo = d->mSize;
sewardj2e595852005-06-30 23:33:37 +00003015 /* chew off 32-bit chunks. We don't care about the endianness
3016 since it's all going to be condensed down to a single bit,
3017 but nevertheless choose an endianness which is hopefully
3018 native to the platform. */
sewardj95448072004-11-22 20:19:51 +00003019 while (toDo >= 4) {
3020 here = mkPCastTo(
3021 mce, Ity_I32,
sewardj2e595852005-06-30 23:33:37 +00003022 expr2vbits_Load ( mce, end, Ity_I32,
sewardj95448072004-11-22 20:19:51 +00003023 d->mAddr, d->mSize - toDo )
3024 );
3025 curr = mkUifU32(mce, here, curr);
3026 toDo -= 4;
3027 }
3028 /* chew off 16-bit chunks */
3029 while (toDo >= 2) {
3030 here = mkPCastTo(
3031 mce, Ity_I32,
sewardj2e595852005-06-30 23:33:37 +00003032 expr2vbits_Load ( mce, end, Ity_I16,
sewardj95448072004-11-22 20:19:51 +00003033 d->mAddr, d->mSize - toDo )
3034 );
3035 curr = mkUifU32(mce, here, curr);
3036 toDo -= 2;
3037 }
3038 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
3039 }
3040
3041 /* Whew! So curr is a 32-bit V-value summarising pessimistically
3042 all the inputs to the helper. Now we need to re-distribute the
3043 results to all destinations. */
3044
3045 /* Outputs: the destination temporary, if there is one. */
3046 if (d->tmp != IRTemp_INVALID) {
3047 dst = findShadowTmp(mce, d->tmp);
3048 tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp);
3049 assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) );
3050 }
3051
3052 /* Outputs: guest state that we write or modify. */
3053 for (i = 0; i < d->nFxState; i++) {
3054 tl_assert(d->fxState[i].fx != Ifx_None);
3055 if (d->fxState[i].fx == Ifx_Read)
3056 continue;
sewardja7203252004-11-26 19:17:47 +00003057 /* Ignore any sections marked as 'always defined'. */
3058 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
3059 continue;
sewardje9e16d32004-12-10 13:17:55 +00003060 /* This state element is written or modified. So we need to
3061 consider it. If larger than 8 bytes, deal with it in 8-byte
3062 chunks. */
3063 gSz = d->fxState[i].size;
3064 gOff = d->fxState[i].offset;
3065 tl_assert(gSz > 0);
3066 while (True) {
3067 if (gSz == 0) break;
3068 n = gSz <= 8 ? gSz : 8;
3069 /* Write suitably-casted 'curr' to the state slice
3070 gOff .. gOff+n-1 */
3071 tyDst = szToITy( n );
3072 do_shadow_PUT( mce, gOff,
3073 NULL, /* original atom */
3074 mkPCastTo( mce, tyDst, curr ) );
3075 gSz -= n;
3076 gOff += n;
3077 }
sewardj95448072004-11-22 20:19:51 +00003078 }
3079
sewardj2e595852005-06-30 23:33:37 +00003080 /* Outputs: memory that we write or modify. Same comments about
3081 endianness as above apply. */
sewardj95448072004-11-22 20:19:51 +00003082 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
3083 offset = 0;
3084 toDo = d->mSize;
3085 /* chew off 32-bit chunks */
3086 while (toDo >= 4) {
sewardj2e595852005-06-30 23:33:37 +00003087 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
3088 NULL, /* original data */
3089 mkPCastTo( mce, Ity_I32, curr ) );
sewardj95448072004-11-22 20:19:51 +00003090 toDo -= 4;
3091 }
3092 /* chew off 16-bit chunks */
3093 while (toDo >= 2) {
sewardj2e595852005-06-30 23:33:37 +00003094 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
3095 NULL, /* original data */
3096 mkPCastTo( mce, Ity_I16, curr ) );
sewardj95448072004-11-22 20:19:51 +00003097 toDo -= 2;
3098 }
3099 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
3100 }
3101
njn25e49d8e72002-09-23 09:36:25 +00003102}
3103
sewardj826ec492005-05-12 18:05:00 +00003104/* We have an ABI hint telling us that [base .. base+len-1] is to
3105 become undefined ("writable"). Generate code to call a helper to
3106 notify the A/V bit machinery of this fact.
3107
3108 We call
3109 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len );
3110*/
3111static
3112void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len )
3113{
3114 IRDirty* di;
3115 di = unsafeIRDirty_0_N(
3116 0/*regparms*/,
3117 "MC_(helperc_MAKE_STACK_UNINIT)",
sewardj53ee1fc2005-12-23 02:29:58 +00003118 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
sewardj826ec492005-05-12 18:05:00 +00003119 mkIRExprVec_2( base, mkIRExpr_HWord( (UInt)len) )
3120 );
3121 stmt( mce->bb, IRStmt_Dirty(di) );
3122}
3123
njn25e49d8e72002-09-23 09:36:25 +00003124
sewardj95448072004-11-22 20:19:51 +00003125/*------------------------------------------------------------*/
3126/*--- Memcheck main ---*/
3127/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00003128
sewardj95448072004-11-22 20:19:51 +00003129static Bool isBogusAtom ( IRAtom* at )
njn25e49d8e72002-09-23 09:36:25 +00003130{
sewardj95448072004-11-22 20:19:51 +00003131 ULong n = 0;
3132 IRConst* con;
sewardj710d6c22005-03-20 18:55:15 +00003133 tl_assert(isIRAtom(at));
sewardj0b9d74a2006-12-24 02:24:11 +00003134 if (at->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +00003135 return False;
3136 tl_assert(at->tag == Iex_Const);
3137 con = at->Iex.Const.con;
3138 switch (con->tag) {
sewardjd5204dc2004-12-31 01:16:11 +00003139 case Ico_U1: return False;
3140 case Ico_U8: n = (ULong)con->Ico.U8; break;
3141 case Ico_U16: n = (ULong)con->Ico.U16; break;
3142 case Ico_U32: n = (ULong)con->Ico.U32; break;
3143 case Ico_U64: n = (ULong)con->Ico.U64; break;
3144 case Ico_F64: return False;
3145 case Ico_F64i: return False;
3146 case Ico_V128: return False;
sewardj95448072004-11-22 20:19:51 +00003147 default: ppIRExpr(at); tl_assert(0);
3148 }
3149 /* VG_(printf)("%llx\n", n); */
sewardj96a922e2005-04-23 23:26:29 +00003150 return (/*32*/ n == 0xFEFEFEFFULL
3151 /*32*/ || n == 0x80808080ULL
tomd9774d72005-06-27 08:11:01 +00003152 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
sewardj96a922e2005-04-23 23:26:29 +00003153 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
tomd9774d72005-06-27 08:11:01 +00003154 /*64*/ || n == 0x0000000000008080ULL
sewardj96a922e2005-04-23 23:26:29 +00003155 /*64*/ || n == 0x8080808080808080ULL
3156 /*64*/ || n == 0x0101010101010101ULL
3157 );
sewardj95448072004-11-22 20:19:51 +00003158}
njn25e49d8e72002-09-23 09:36:25 +00003159
sewardj95448072004-11-22 20:19:51 +00003160static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
3161{
sewardjd5204dc2004-12-31 01:16:11 +00003162 Int i;
3163 IRExpr* e;
3164 IRDirty* d;
sewardj95448072004-11-22 20:19:51 +00003165 switch (st->tag) {
sewardj0b9d74a2006-12-24 02:24:11 +00003166 case Ist_WrTmp:
3167 e = st->Ist.WrTmp.data;
sewardj95448072004-11-22 20:19:51 +00003168 switch (e->tag) {
3169 case Iex_Get:
sewardj0b9d74a2006-12-24 02:24:11 +00003170 case Iex_RdTmp:
sewardj95448072004-11-22 20:19:51 +00003171 return False;
sewardjd5204dc2004-12-31 01:16:11 +00003172 case Iex_Const:
3173 return isBogusAtom(e);
sewardj95448072004-11-22 20:19:51 +00003174 case Iex_Unop:
3175 return isBogusAtom(e->Iex.Unop.arg);
sewardjd5204dc2004-12-31 01:16:11 +00003176 case Iex_GetI:
3177 return isBogusAtom(e->Iex.GetI.ix);
sewardj95448072004-11-22 20:19:51 +00003178 case Iex_Binop:
3179 return isBogusAtom(e->Iex.Binop.arg1)
3180 || isBogusAtom(e->Iex.Binop.arg2);
sewardjed69fdb2006-02-03 16:12:27 +00003181 case Iex_Triop:
3182 return isBogusAtom(e->Iex.Triop.arg1)
3183 || isBogusAtom(e->Iex.Triop.arg2)
3184 || isBogusAtom(e->Iex.Triop.arg3);
sewardje91cea72006-02-08 19:32:02 +00003185 case Iex_Qop:
3186 return isBogusAtom(e->Iex.Qop.arg1)
3187 || isBogusAtom(e->Iex.Qop.arg2)
3188 || isBogusAtom(e->Iex.Qop.arg3)
3189 || isBogusAtom(e->Iex.Qop.arg4);
sewardj95448072004-11-22 20:19:51 +00003190 case Iex_Mux0X:
3191 return isBogusAtom(e->Iex.Mux0X.cond)
3192 || isBogusAtom(e->Iex.Mux0X.expr0)
3193 || isBogusAtom(e->Iex.Mux0X.exprX);
sewardj2e595852005-06-30 23:33:37 +00003194 case Iex_Load:
3195 return isBogusAtom(e->Iex.Load.addr);
sewardj95448072004-11-22 20:19:51 +00003196 case Iex_CCall:
3197 for (i = 0; e->Iex.CCall.args[i]; i++)
3198 if (isBogusAtom(e->Iex.CCall.args[i]))
3199 return True;
3200 return False;
3201 default:
3202 goto unhandled;
3203 }
sewardjd5204dc2004-12-31 01:16:11 +00003204 case Ist_Dirty:
3205 d = st->Ist.Dirty.details;
3206 for (i = 0; d->args[i]; i++)
3207 if (isBogusAtom(d->args[i]))
3208 return True;
3209 if (d->guard && isBogusAtom(d->guard))
3210 return True;
3211 if (d->mAddr && isBogusAtom(d->mAddr))
3212 return True;
3213 return False;
sewardj95448072004-11-22 20:19:51 +00003214 case Ist_Put:
3215 return isBogusAtom(st->Ist.Put.data);
sewardjd5204dc2004-12-31 01:16:11 +00003216 case Ist_PutI:
3217 return isBogusAtom(st->Ist.PutI.ix)
3218 || isBogusAtom(st->Ist.PutI.data);
sewardj2e595852005-06-30 23:33:37 +00003219 case Ist_Store:
3220 return isBogusAtom(st->Ist.Store.addr)
3221 || isBogusAtom(st->Ist.Store.data);
sewardj95448072004-11-22 20:19:51 +00003222 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00003223 return isBogusAtom(st->Ist.Exit.guard);
sewardj826ec492005-05-12 18:05:00 +00003224 case Ist_AbiHint:
3225 return isBogusAtom(st->Ist.AbiHint.base);
sewardj21dc3452005-03-21 00:27:41 +00003226 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00003227 case Ist_IMark:
sewardjbd598e12005-01-07 12:10:21 +00003228 case Ist_MFence:
3229 return False;
sewardj95448072004-11-22 20:19:51 +00003230 default:
3231 unhandled:
3232 ppIRStmt(st);
3233 VG_(tool_panic)("hasBogusLiterals");
3234 }
3235}
njn25e49d8e72002-09-23 09:36:25 +00003236
njn25e49d8e72002-09-23 09:36:25 +00003237
sewardj0b9d74a2006-12-24 02:24:11 +00003238IRSB* MC_(instrument) ( VgCallbackClosure* closure,
3239 IRSB* bb_in,
sewardj461df9c2006-01-17 02:06:39 +00003240 VexGuestLayout* layout,
3241 VexGuestExtents* vge,
sewardjd54babf2005-03-21 00:55:49 +00003242 IRType gWordTy, IRType hWordTy )
sewardj95448072004-11-22 20:19:51 +00003243{
sewardj151b90d2005-07-06 19:42:23 +00003244 Bool verboze = False; //True;
3245 Bool bogus;
sewardjd5204dc2004-12-31 01:16:11 +00003246 Int i, j, first_stmt;
sewardj95448072004-11-22 20:19:51 +00003247 IRStmt* st;
sewardjd5204dc2004-12-31 01:16:11 +00003248 MCEnv mce;
sewardj0b9d74a2006-12-24 02:24:11 +00003249 IRSB* bb;
sewardjd54babf2005-03-21 00:55:49 +00003250
3251 if (gWordTy != hWordTy) {
3252 /* We don't currently support this case. */
3253 VG_(tool_panic)("host/guest word size mismatch");
3254 }
njn25e49d8e72002-09-23 09:36:25 +00003255
sewardj6cf40ff2005-04-20 22:31:26 +00003256 /* Check we're not completely nuts */
3257 tl_assert(sizeof(UWord) == sizeof(void*));
3258 tl_assert(sizeof(Word) == sizeof(void*));
3259 tl_assert(sizeof(ULong) == 8);
3260 tl_assert(sizeof(Long) == 8);
3261 tl_assert(sizeof(UInt) == 4);
3262 tl_assert(sizeof(Int) == 4);
3263
sewardj0b9d74a2006-12-24 02:24:11 +00003264 /* Set up SB */
3265 bb = deepCopyIRSBExceptStmts(bb_in);
njn25e49d8e72002-09-23 09:36:25 +00003266
sewardj95448072004-11-22 20:19:51 +00003267 /* Set up the running environment. Only .bb is modified as we go
3268 along. */
3269 mce.bb = bb;
3270 mce.layout = layout;
3271 mce.n_originalTmps = bb->tyenv->types_used;
3272 mce.hWordTy = hWordTy;
sewardjd5204dc2004-12-31 01:16:11 +00003273 mce.bogusLiterals = False;
sewardj95448072004-11-22 20:19:51 +00003274 mce.tmpMap = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
3275 for (i = 0; i < mce.n_originalTmps; i++)
3276 mce.tmpMap[i] = IRTemp_INVALID;
3277
sewardj151b90d2005-07-06 19:42:23 +00003278 /* Make a preliminary inspection of the statements, to see if there
3279 are any dodgy-looking literals. If there are, we generate
3280 extra-detailed (hence extra-expensive) instrumentation in
3281 places. Scan the whole bb even if dodgyness is found earlier,
3282 so that the flatness assertion is applied to all stmts. */
3283
3284 bogus = False;
sewardj95448072004-11-22 20:19:51 +00003285
sewardjf1962d32006-10-19 13:22:16 +00003286 for (i = 0; i < bb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00003287
sewardj95448072004-11-22 20:19:51 +00003288 st = bb_in->stmts[i];
sewardj21dc3452005-03-21 00:27:41 +00003289 tl_assert(st);
sewardj95448072004-11-22 20:19:51 +00003290 tl_assert(isFlatIRStmt(st));
3291
sewardj151b90d2005-07-06 19:42:23 +00003292 if (!bogus) {
3293 bogus = checkForBogusLiterals(st);
3294 if (0 && bogus) {
sewardj95448072004-11-22 20:19:51 +00003295 VG_(printf)("bogus: ");
3296 ppIRStmt(st);
3297 VG_(printf)("\n");
3298 }
3299 }
sewardjd5204dc2004-12-31 01:16:11 +00003300
sewardj151b90d2005-07-06 19:42:23 +00003301 }
3302
3303 mce.bogusLiterals = bogus;
3304
sewardja0871482006-10-18 12:41:55 +00003305 /* Copy verbatim any IR preamble preceding the first IMark */
sewardj151b90d2005-07-06 19:42:23 +00003306
sewardjf1962d32006-10-19 13:22:16 +00003307 tl_assert(mce.bb == bb);
3308
sewardja0871482006-10-18 12:41:55 +00003309 i = 0;
3310 while (i < bb_in->stmts_used && bb_in->stmts[i]->tag != Ist_IMark) {
3311
3312 st = bb_in->stmts[i];
3313 tl_assert(st);
3314 tl_assert(isFlatIRStmt(st));
3315
sewardj0b9d74a2006-12-24 02:24:11 +00003316 addStmtToIRSB( bb, bb_in->stmts[i] );
sewardja0871482006-10-18 12:41:55 +00003317 i++;
3318 }
3319
sewardjf1962d32006-10-19 13:22:16 +00003320 /* Nasty problem. IR optimisation of the pre-instrumented IR may
3321 cause the IR following the preamble to contain references to IR
3322 temporaries defined in the preamble. Because the preamble isn't
3323 instrumented, these temporaries don't have any shadows.
3324 Nevertheless uses of them following the preamble will cause
3325 memcheck to generate references to their shadows. End effect is
3326 to cause IR sanity check failures, due to references to
3327 non-existent shadows. This is only evident for the complex
3328 preambles used for function wrapping on TOC-afflicted platforms
3329 (ppc64-linux, ppc32-aix5, ppc64-aix5).
3330
3331 The following loop therefore scans the preamble looking for
3332 assignments to temporaries. For each one found it creates an
3333 assignment to the corresponding shadow temp, marking it as
3334 'defined'. This is the same resulting IR as if the main
3335 instrumentation loop before had been applied to the statement
3336 'tmp = CONSTANT'.
3337 */
3338 for (j = 0; j < i; j++) {
sewardj0b9d74a2006-12-24 02:24:11 +00003339 if (bb_in->stmts[j]->tag == Ist_WrTmp) {
sewardjf1962d32006-10-19 13:22:16 +00003340 /* findShadowTmp checks its arg is an original tmp;
3341 no need to assert that here. */
sewardj0b9d74a2006-12-24 02:24:11 +00003342 IRTemp tmp_o = bb_in->stmts[j]->Ist.WrTmp.tmp;
sewardjf1962d32006-10-19 13:22:16 +00003343 IRTemp tmp_s = findShadowTmp(&mce, tmp_o);
3344 IRType ty_s = typeOfIRTemp(bb->tyenv, tmp_s);
3345 assign( bb, tmp_s, definedOfType( ty_s ) );
3346 if (0) {
3347 VG_(printf)("create shadow tmp for preamble tmp [%d] ty ", j);
3348 ppIRType( ty_s );
3349 VG_(printf)("\n");
3350 }
3351 }
3352 }
3353
sewardja0871482006-10-18 12:41:55 +00003354 /* Iterate over the remaining stmts to generate instrumentation. */
3355
3356 tl_assert(bb_in->stmts_used > 0);
3357 tl_assert(i >= 0);
3358 tl_assert(i < bb_in->stmts_used);
3359 tl_assert(bb_in->stmts[i]->tag == Ist_IMark);
3360
3361 for (/* use current i*/; i < bb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00003362
3363 st = bb_in->stmts[i];
sewardj95448072004-11-22 20:19:51 +00003364 first_stmt = bb->stmts_used;
3365
3366 if (verboze) {
3367 ppIRStmt(st);
3368 VG_(printf)("\n\n");
3369 }
3370
sewardj29faa502005-03-16 18:20:21 +00003371 /* Generate instrumentation code for each stmt ... */
3372
sewardj95448072004-11-22 20:19:51 +00003373 switch (st->tag) {
3374
sewardj0b9d74a2006-12-24 02:24:11 +00003375 case Ist_WrTmp:
3376 assign( bb, findShadowTmp(&mce, st->Ist.WrTmp.tmp),
3377 expr2vbits( &mce, st->Ist.WrTmp.data) );
njn25e49d8e72002-09-23 09:36:25 +00003378 break;
3379
sewardj95448072004-11-22 20:19:51 +00003380 case Ist_Put:
3381 do_shadow_PUT( &mce,
3382 st->Ist.Put.offset,
3383 st->Ist.Put.data,
3384 NULL /* shadow atom */ );
njn25e49d8e72002-09-23 09:36:25 +00003385 break;
3386
sewardj95448072004-11-22 20:19:51 +00003387 case Ist_PutI:
3388 do_shadow_PUTI( &mce,
3389 st->Ist.PutI.descr,
3390 st->Ist.PutI.ix,
3391 st->Ist.PutI.bias,
3392 st->Ist.PutI.data );
njn25e49d8e72002-09-23 09:36:25 +00003393 break;
3394
sewardj2e595852005-06-30 23:33:37 +00003395 case Ist_Store:
3396 do_shadow_Store( &mce, st->Ist.Store.end,
3397 st->Ist.Store.addr, 0/* addr bias */,
3398 st->Ist.Store.data,
3399 NULL /* shadow data */ );
njn25e49d8e72002-09-23 09:36:25 +00003400 break;
3401
sewardj95448072004-11-22 20:19:51 +00003402 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00003403 complainIfUndefined( &mce, st->Ist.Exit.guard );
njn25e49d8e72002-09-23 09:36:25 +00003404 break;
3405
sewardj21dc3452005-03-21 00:27:41 +00003406 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00003407 case Ist_IMark:
sewardjbd598e12005-01-07 12:10:21 +00003408 case Ist_MFence:
3409 break;
3410
sewardj95448072004-11-22 20:19:51 +00003411 case Ist_Dirty:
3412 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
njn25e49d8e72002-09-23 09:36:25 +00003413 break;
3414
sewardj826ec492005-05-12 18:05:00 +00003415 case Ist_AbiHint:
3416 do_AbiHint( &mce, st->Ist.AbiHint.base, st->Ist.AbiHint.len );
3417 break;
3418
njn25e49d8e72002-09-23 09:36:25 +00003419 default:
sewardj95448072004-11-22 20:19:51 +00003420 VG_(printf)("\n");
3421 ppIRStmt(st);
3422 VG_(printf)("\n");
3423 VG_(tool_panic)("memcheck: unhandled IRStmt");
3424
3425 } /* switch (st->tag) */
3426
3427 if (verboze) {
3428 for (j = first_stmt; j < bb->stmts_used; j++) {
3429 VG_(printf)(" ");
3430 ppIRStmt(bb->stmts[j]);
3431 VG_(printf)("\n");
3432 }
3433 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00003434 }
sewardj95448072004-11-22 20:19:51 +00003435
sewardj29faa502005-03-16 18:20:21 +00003436 /* ... and finally copy the stmt itself to the output. */
sewardj0b9d74a2006-12-24 02:24:11 +00003437 addStmtToIRSB(bb, st);
sewardj95448072004-11-22 20:19:51 +00003438
njn25e49d8e72002-09-23 09:36:25 +00003439 }
njn25e49d8e72002-09-23 09:36:25 +00003440
sewardj95448072004-11-22 20:19:51 +00003441 /* Now we need to complain if the jump target is undefined. */
3442 first_stmt = bb->stmts_used;
njn25e49d8e72002-09-23 09:36:25 +00003443
sewardj95448072004-11-22 20:19:51 +00003444 if (verboze) {
3445 VG_(printf)("bb->next = ");
3446 ppIRExpr(bb->next);
3447 VG_(printf)("\n\n");
3448 }
njn25e49d8e72002-09-23 09:36:25 +00003449
sewardj95448072004-11-22 20:19:51 +00003450 complainIfUndefined( &mce, bb->next );
njn25e49d8e72002-09-23 09:36:25 +00003451
sewardj95448072004-11-22 20:19:51 +00003452 if (verboze) {
3453 for (j = first_stmt; j < bb->stmts_used; j++) {
3454 VG_(printf)(" ");
3455 ppIRStmt(bb->stmts[j]);
3456 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00003457 }
sewardj95448072004-11-22 20:19:51 +00003458 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00003459 }
njn25e49d8e72002-09-23 09:36:25 +00003460
sewardj95448072004-11-22 20:19:51 +00003461 return bb;
3462}
njn25e49d8e72002-09-23 09:36:25 +00003463
3464/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00003465/*--- end mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00003466/*--------------------------------------------------------------------*/