blob: b3f5ad2508da23428858d109f32385361f0e606a [file] [log] [blame]
nethercotebb1c9912004-01-04 16:43:23 +00001
njn25e49d8e72002-09-23 09:36:25 +00002/*--------------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003/*--- Instrument IR to perform memory checking operations. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00006
njn25e49d8e72002-09-23 09:36:25 +00007/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of MemCheck, a heavyweight Valgrind tool for
njnc9539842002-10-02 13:26:35 +00009 detecting memory errors.
njn25e49d8e72002-09-23 09:36:25 +000010
nethercotebb1c9912004-01-04 16:43:23 +000011 Copyright (C) 2000-2004 Julian Seward
njn25e49d8e72002-09-23 09:36:25 +000012 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
njn25cac76cb2002-09-23 11:21:57 +000032#include "mc_include.h"
njn25e49d8e72002-09-23 09:36:25 +000033
njn25e49d8e72002-09-23 09:36:25 +000034
sewardj95448072004-11-22 20:19:51 +000035/*------------------------------------------------------------*/
36/*--- Forward decls ---*/
37/*------------------------------------------------------------*/
38
39struct _MCEnv;
40
41static IRType shadowType ( IRType ty );
42static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
43
44
45/*------------------------------------------------------------*/
46/*--- Memcheck running state, and tmp management. ---*/
47/*------------------------------------------------------------*/
48
49/* Carries around state during memcheck instrumentation. */
50typedef
51 struct _MCEnv {
52 /* MODIFIED: the bb being constructed. IRStmts are added. */
53 IRBB* bb;
54
55 /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
56 original temps to their current their current shadow temp.
57 Initially all entries are IRTemp_INVALID. Entries are added
58 lazily since many original temps are not used due to
59 optimisation prior to instrumentation. Note that floating
60 point original tmps are shadowed by integer tmps of the same
61 size, and Bit-typed original tmps are shadowed by the type
62 Ity_I8. See comment below. */
63 IRTemp* tmpMap;
64 Int n_originalTmps; /* for range checking */
65
sewardjd5204dc2004-12-31 01:16:11 +000066 /* MODIFIED: indicates whether "bogus" literals have so far been
67 found. Starts off False, and may change to True. */
68 Bool bogusLiterals;
69
sewardj95448072004-11-22 20:19:51 +000070 /* READONLY: the guest layout. This indicates which parts of
71 the guest state should be regarded as 'always defined'. */
72 VexGuestLayout* layout;
73 /* READONLY: the host word type. Needed for constructing
74 arguments of type 'HWord' to be passed to helper functions.
75 Ity_I32 or Ity_I64 only. */
76 IRType hWordTy;
77 }
78 MCEnv;
79
80/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
81 demand), as they are encountered. This is for two reasons.
82
83 (1) (less important reason): Many original tmps are unused due to
84 initial IR optimisation, and we do not want to spaces in tables
85 tracking them.
86
87 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
88 table indexed [0 .. n_types-1], which gives the current shadow for
89 each original tmp, or INVALID_IRTEMP if none is so far assigned.
90 It is necessary to support making multiple assignments to a shadow
91 -- specifically, after testing a shadow for definedness, it needs
92 to be made defined. But IR's SSA property disallows this.
93
94 (2) (more important reason): Therefore, when a shadow needs to get
95 a new value, a new temporary is created, the value is assigned to
96 that, and the tmpMap is updated to reflect the new binding.
97
98 A corollary is that if the tmpMap maps a given tmp to
99 INVALID_IRTEMP and we are hoping to read that shadow tmp, it means
100 there's a read-before-write error in the original tmps. The IR
101 sanity checker should catch all such anomalies, however.
njn25e49d8e72002-09-23 09:36:25 +0000102*/
sewardj95448072004-11-22 20:19:51 +0000103
104/* Find the tmp currently shadowing the given original tmp. If none
105 so far exists, allocate one. */
106static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000107{
sewardj95448072004-11-22 20:19:51 +0000108 tl_assert(orig < mce->n_originalTmps);
109 if (mce->tmpMap[orig] == IRTemp_INVALID) {
110 mce->tmpMap[orig]
111 = newIRTemp(mce->bb->tyenv,
112 shadowType(mce->bb->tyenv->types[orig]));
njn25e49d8e72002-09-23 09:36:25 +0000113 }
sewardj95448072004-11-22 20:19:51 +0000114 return mce->tmpMap[orig];
njn25e49d8e72002-09-23 09:36:25 +0000115}
116
sewardj95448072004-11-22 20:19:51 +0000117/* Allocate a new shadow for the given original tmp. This means any
118 previous shadow is abandoned. This is needed because it is
119 necessary to give a new value to a shadow once it has been tested
120 for undefinedness, but unfortunately IR's SSA property disallows
121 this. Instead we must abandon the old shadow, allocate a new one
122 and use that instead. */
123static void newShadowTmp ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000124{
sewardj95448072004-11-22 20:19:51 +0000125 tl_assert(orig < mce->n_originalTmps);
126 mce->tmpMap[orig]
127 = newIRTemp(mce->bb->tyenv,
128 shadowType(mce->bb->tyenv->types[orig]));
129}
130
131
132/*------------------------------------------------------------*/
133/*--- IRAtoms -- a subset of IRExprs ---*/
134/*------------------------------------------------------------*/
135
136/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
137 isAtom() in libvex_ir.h. Because this instrumenter expects flat
138 input, most of this code deals in atoms. Usefully, a value atom
139 always has a V-value which is also an atom: constants are shadowed
140 by constants, and temps are shadowed by the corresponding shadow
141 temporary. */
142
143typedef IRExpr IRAtom;
144
145/* (used for sanity checks only): is this an atom which looks
146 like it's from original code? */
147static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
148{
149 if (a1->tag == Iex_Const)
150 return True;
151 if (a1->tag == Iex_Tmp && a1->Iex.Tmp.tmp < mce->n_originalTmps)
152 return True;
153 return False;
154}
155
156/* (used for sanity checks only): is this an atom which looks
157 like it's from shadow code? */
158static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
159{
160 if (a1->tag == Iex_Const)
161 return True;
162 if (a1->tag == Iex_Tmp && a1->Iex.Tmp.tmp >= mce->n_originalTmps)
163 return True;
164 return False;
165}
166
167/* (used for sanity checks only): check that both args are atoms and
168 are identically-kinded. */
169static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
170{
171 if (a1->tag == Iex_Tmp && a1->tag == Iex_Tmp)
172 return True;
173 if (a1->tag == Iex_Const && a1->tag == Iex_Const)
174 return True;
175 return False;
176}
177
178
179/*------------------------------------------------------------*/
180/*--- Type management ---*/
181/*------------------------------------------------------------*/
182
183/* Shadow state is always accessed using integer types. This returns
184 an integer type with the same size (as per sizeofIRType) as the
185 given type. The only valid shadow types are Bit, I8, I16, I32,
sewardj3245c912004-12-10 14:58:26 +0000186 I64, V128. */
sewardj95448072004-11-22 20:19:51 +0000187
188static IRType shadowType ( IRType ty )
189{
190 switch (ty) {
191 case Ity_I1:
192 case Ity_I8:
193 case Ity_I16:
194 case Ity_I32:
sewardj3245c912004-12-10 14:58:26 +0000195 case Ity_I64: return ty;
196 case Ity_F32: return Ity_I32;
197 case Ity_F64: return Ity_I64;
198 case Ity_V128: return Ity_V128;
sewardj95448072004-11-22 20:19:51 +0000199 default: ppIRType(ty);
200 VG_(tool_panic)("memcheck:shadowType");
201 }
202}
203
204/* Produce a 'defined' value of the given shadow type. Should only be
205 supplied shadow types (Bit/I8/I16/I32/UI64). */
206static IRExpr* definedOfType ( IRType ty ) {
207 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +0000208 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
209 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
210 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
211 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
212 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
213 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
sewardj95448072004-11-22 20:19:51 +0000214 default: VG_(tool_panic)("memcheck:definedOfType");
njn25e49d8e72002-09-23 09:36:25 +0000215 }
216}
217
218
sewardj95448072004-11-22 20:19:51 +0000219/*------------------------------------------------------------*/
220/*--- Constructing IR fragments ---*/
221/*------------------------------------------------------------*/
222
223/* assign value to tmp */
224#define assign(_bb,_tmp,_expr) \
225 addStmtToIRBB((_bb), IRStmt_Tmp((_tmp),(_expr)))
226
227/* add stmt to a bb */
228#define stmt(_bb,_stmt) \
229 addStmtToIRBB((_bb), (_stmt))
230
231/* build various kinds of expressions */
232#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
233#define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
234#define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
235#define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
236#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
237#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
sewardj170ee212004-12-10 18:57:51 +0000238#define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
sewardj95448072004-11-22 20:19:51 +0000239#define mkexpr(_tmp) IRExpr_Tmp((_tmp))
240
241/* bind the given expression to a new temporary, and return the
242 temporary. This effectively converts an arbitrary expression into
243 an atom. */
244static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) {
245 IRTemp t = newIRTemp(mce->bb->tyenv, ty);
246 assign(mce->bb, t, e);
247 return mkexpr(t);
248}
249
250
251/*------------------------------------------------------------*/
252/*--- Constructing definedness primitive ops ---*/
253/*------------------------------------------------------------*/
254
255/* --------- Defined-if-either-defined --------- */
256
257static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
258 tl_assert(isShadowAtom(mce,a1));
259 tl_assert(isShadowAtom(mce,a2));
260 return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2));
261}
262
263static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
264 tl_assert(isShadowAtom(mce,a1));
265 tl_assert(isShadowAtom(mce,a2));
266 return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2));
267}
268
269static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
270 tl_assert(isShadowAtom(mce,a1));
271 tl_assert(isShadowAtom(mce,a2));
272 return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2));
273}
274
sewardj7010f6e2004-12-10 13:35:22 +0000275static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
276 tl_assert(isShadowAtom(mce,a1));
277 tl_assert(isShadowAtom(mce,a2));
278 return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2));
279}
280
sewardj170ee212004-12-10 18:57:51 +0000281static IRAtom* mkDifD128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
282 tl_assert(isShadowAtom(mce,a1));
283 tl_assert(isShadowAtom(mce,a2));
284 return assignNew(mce, Ity_V128, binop(Iop_And128, a1, a2));
285}
286
sewardj95448072004-11-22 20:19:51 +0000287/* --------- Undefined-if-either-undefined --------- */
288
289static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
290 tl_assert(isShadowAtom(mce,a1));
291 tl_assert(isShadowAtom(mce,a2));
292 return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2));
293}
294
295static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
296 tl_assert(isShadowAtom(mce,a1));
297 tl_assert(isShadowAtom(mce,a2));
298 return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2));
299}
300
301static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
302 tl_assert(isShadowAtom(mce,a1));
303 tl_assert(isShadowAtom(mce,a2));
304 return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2));
305}
306
307static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
308 tl_assert(isShadowAtom(mce,a1));
309 tl_assert(isShadowAtom(mce,a2));
310 return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2));
311}
312
sewardj3245c912004-12-10 14:58:26 +0000313static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
314 tl_assert(isShadowAtom(mce,a1));
315 tl_assert(isShadowAtom(mce,a2));
316 return assignNew(mce, Ity_V128, binop(Iop_Or128, a1, a2));
317}
318
sewardje50a1b12004-12-17 01:24:54 +0000319static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
sewardj95448072004-11-22 20:19:51 +0000320 switch (vty) {
sewardje50a1b12004-12-17 01:24:54 +0000321 case Ity_I8: return mkUifU8(mce, a1, a2);
sewardja1d93302004-12-12 16:45:06 +0000322 case Ity_I16: return mkUifU16(mce, a1, a2);
323 case Ity_I32: return mkUifU32(mce, a1, a2);
324 case Ity_I64: return mkUifU64(mce, a1, a2);
325 case Ity_V128: return mkUifU128(mce, a1, a2);
sewardj95448072004-11-22 20:19:51 +0000326 default:
327 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
328 VG_(tool_panic)("memcheck:mkUifU");
njn25e49d8e72002-09-23 09:36:25 +0000329 }
330}
331
sewardj95448072004-11-22 20:19:51 +0000332/* --------- The Left-family of operations. --------- */
njn25e49d8e72002-09-23 09:36:25 +0000333
sewardj95448072004-11-22 20:19:51 +0000334static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
335 tl_assert(isShadowAtom(mce,a1));
336 /* It's safe to duplicate a1 since it's only an atom */
337 return assignNew(mce, Ity_I8,
338 binop(Iop_Or8, a1,
339 assignNew(mce, Ity_I8,
340 /* unop(Iop_Neg8, a1)))); */
341 binop(Iop_Sub8, mkU8(0), a1) )));
342}
343
344static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
345 tl_assert(isShadowAtom(mce,a1));
346 /* It's safe to duplicate a1 since it's only an atom */
347 return assignNew(mce, Ity_I16,
348 binop(Iop_Or16, a1,
349 assignNew(mce, Ity_I16,
350 /* unop(Iop_Neg16, a1)))); */
351 binop(Iop_Sub16, mkU16(0), a1) )));
352}
353
354static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
355 tl_assert(isShadowAtom(mce,a1));
356 /* It's safe to duplicate a1 since it's only an atom */
357 return assignNew(mce, Ity_I32,
358 binop(Iop_Or32, a1,
359 assignNew(mce, Ity_I32,
360 /* unop(Iop_Neg32, a1)))); */
361 binop(Iop_Sub32, mkU32(0), a1) )));
362}
363
364/* --------- 'Improvement' functions for AND/OR. --------- */
365
366/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
367 defined (0); all other -> undefined (1).
368*/
369static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
njn25e49d8e72002-09-23 09:36:25 +0000370{
sewardj95448072004-11-22 20:19:51 +0000371 tl_assert(isOriginalAtom(mce, data));
372 tl_assert(isShadowAtom(mce, vbits));
373 tl_assert(sameKindedAtoms(data, vbits));
374 return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits));
375}
njn25e49d8e72002-09-23 09:36:25 +0000376
sewardj95448072004-11-22 20:19:51 +0000377static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
378{
379 tl_assert(isOriginalAtom(mce, data));
380 tl_assert(isShadowAtom(mce, vbits));
381 tl_assert(sameKindedAtoms(data, vbits));
382 return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits));
383}
njn25e49d8e72002-09-23 09:36:25 +0000384
sewardj95448072004-11-22 20:19:51 +0000385static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
386{
387 tl_assert(isOriginalAtom(mce, data));
388 tl_assert(isShadowAtom(mce, vbits));
389 tl_assert(sameKindedAtoms(data, vbits));
390 return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits));
391}
njn25e49d8e72002-09-23 09:36:25 +0000392
sewardj7010f6e2004-12-10 13:35:22 +0000393static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
394{
395 tl_assert(isOriginalAtom(mce, data));
396 tl_assert(isShadowAtom(mce, vbits));
397 tl_assert(sameKindedAtoms(data, vbits));
398 return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits));
399}
400
sewardj170ee212004-12-10 18:57:51 +0000401static IRAtom* mkImproveAND128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
402{
403 tl_assert(isOriginalAtom(mce, data));
404 tl_assert(isShadowAtom(mce, vbits));
405 tl_assert(sameKindedAtoms(data, vbits));
406 return assignNew(mce, Ity_V128, binop(Iop_Or128, data, vbits));
407}
408
sewardj95448072004-11-22 20:19:51 +0000409/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
410 defined (0); all other -> undefined (1).
411*/
412static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
413{
414 tl_assert(isOriginalAtom(mce, data));
415 tl_assert(isShadowAtom(mce, vbits));
416 tl_assert(sameKindedAtoms(data, vbits));
417 return assignNew(
418 mce, Ity_I8,
419 binop(Iop_Or8,
420 assignNew(mce, Ity_I8, unop(Iop_Not8, data)),
421 vbits) );
422}
njn25e49d8e72002-09-23 09:36:25 +0000423
sewardj95448072004-11-22 20:19:51 +0000424static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
425{
426 tl_assert(isOriginalAtom(mce, data));
427 tl_assert(isShadowAtom(mce, vbits));
428 tl_assert(sameKindedAtoms(data, vbits));
429 return assignNew(
430 mce, Ity_I16,
431 binop(Iop_Or16,
432 assignNew(mce, Ity_I16, unop(Iop_Not16, data)),
433 vbits) );
434}
njn25e49d8e72002-09-23 09:36:25 +0000435
sewardj95448072004-11-22 20:19:51 +0000436static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
437{
438 tl_assert(isOriginalAtom(mce, data));
439 tl_assert(isShadowAtom(mce, vbits));
440 tl_assert(sameKindedAtoms(data, vbits));
441 return assignNew(
442 mce, Ity_I32,
443 binop(Iop_Or32,
444 assignNew(mce, Ity_I32, unop(Iop_Not32, data)),
445 vbits) );
446}
447
sewardj7010f6e2004-12-10 13:35:22 +0000448static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
449{
450 tl_assert(isOriginalAtom(mce, data));
451 tl_assert(isShadowAtom(mce, vbits));
452 tl_assert(sameKindedAtoms(data, vbits));
453 return assignNew(
454 mce, Ity_I64,
455 binop(Iop_Or64,
456 assignNew(mce, Ity_I64, unop(Iop_Not64, data)),
457 vbits) );
458}
459
sewardj170ee212004-12-10 18:57:51 +0000460static IRAtom* mkImproveOR128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
461{
462 tl_assert(isOriginalAtom(mce, data));
463 tl_assert(isShadowAtom(mce, vbits));
464 tl_assert(sameKindedAtoms(data, vbits));
465 return assignNew(
466 mce, Ity_V128,
467 binop(Iop_Or128,
sewardja1d93302004-12-12 16:45:06 +0000468 assignNew(mce, Ity_V128, unop(Iop_Not128, data)),
sewardj170ee212004-12-10 18:57:51 +0000469 vbits) );
470}
471
sewardj95448072004-11-22 20:19:51 +0000472/* --------- Pessimising casts. --------- */
473
474static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
475{
sewardj7cf97ee2004-11-28 14:25:01 +0000476 IRType ty;
477 IRAtom* tmp1;
sewardj95448072004-11-22 20:19:51 +0000478 /* Note, dst_ty is a shadow type, not an original type. */
479 /* First of all, collapse vbits down to a single bit. */
480 tl_assert(isShadowAtom(mce,vbits));
sewardj7cf97ee2004-11-28 14:25:01 +0000481 ty = typeOfIRExpr(mce->bb->tyenv, vbits);
482 tmp1 = NULL;
sewardj95448072004-11-22 20:19:51 +0000483 switch (ty) {
484 case Ity_I1:
485 tmp1 = vbits;
njn25e49d8e72002-09-23 09:36:25 +0000486 break;
sewardj95448072004-11-22 20:19:51 +0000487 case Ity_I8:
488 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE8, vbits, mkU8(0)));
489 break;
490 case Ity_I16:
491 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE16, vbits, mkU16(0)));
492 break;
493 case Ity_I32:
494 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE32, vbits, mkU32(0)));
495 break;
496 case Ity_I64:
497 tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE64, vbits, mkU64(0)));
498 break;
499 default:
500 VG_(tool_panic)("mkPCastTo(1)");
501 }
502 tl_assert(tmp1);
503 /* Now widen up to the dst type. */
504 switch (dst_ty) {
505 case Ity_I1:
506 return tmp1;
507 case Ity_I8:
508 return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1));
509 case Ity_I16:
510 return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1));
511 case Ity_I32:
512 return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1));
513 case Ity_I64:
514 return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000515 case Ity_V128:
516 tmp1 = assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
517 tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLto128, tmp1, tmp1));
518 return tmp1;
sewardj95448072004-11-22 20:19:51 +0000519 default:
520 ppIRType(dst_ty);
521 VG_(tool_panic)("mkPCastTo(2)");
522 }
523}
524
sewardjd5204dc2004-12-31 01:16:11 +0000525/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
526/*
527 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
528 PCasting to Ity_U1. However, sometimes it is necessary to be more
529 accurate. The insight is that the result is defined if two
530 corresponding bits can be found, one from each argument, so that
531 both bits are defined but are different -- that makes EQ say "No"
532 and NE say "Yes". Hence, we compute an improvement term and DifD
533 it onto the "normal" (UifU) result.
534
535 The result is:
536
537 PCastTo<1> (
538 PCastTo<sz>( UifU<sz>(vxx, vyy) ) -- naive version
539 `DifD<sz>`
540 PCastTo<sz>( CmpEQ<sz>( vec, 1....1 ) ) -- improvement term
541 )
542 where
543 vec contains 0 (defined) bits where the corresponding arg bits
544 are defined but different, and 1 bits otherwise:
545
546 vec = UifU<sz>( vxx, vyy, Not<sz>(Xor<sz>( xx, yy )) )
547*/
548static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
549 IRType ty,
550 IRAtom* vxx, IRAtom* vyy,
551 IRAtom* xx, IRAtom* yy )
552{
553 IRAtom *naive, *vec, *vec_cmpd, *improved, *final_cast, *top;
554 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP;
555
556 tl_assert(isShadowAtom(mce,vxx));
557 tl_assert(isShadowAtom(mce,vyy));
558 tl_assert(isOriginalAtom(mce,xx));
559 tl_assert(isOriginalAtom(mce,yy));
560 tl_assert(sameKindedAtoms(vxx,xx));
561 tl_assert(sameKindedAtoms(vyy,yy));
562
563 switch (ty) {
564 case Ity_I32:
565 opDIFD = Iop_And32;
566 opUIFU = Iop_Or32;
567 opNOT = Iop_Not32;
568 opXOR = Iop_Xor32;
569 opCMP = Iop_CmpEQ32;
570 top = mkU32(0xFFFFFFFF);
571 break;
572 default:
573 VG_(tool_panic)("expensiveCmpEQorNE");
574 }
575
576 naive
577 = mkPCastTo(mce,ty, assignNew(mce, ty, binop(opUIFU, vxx, vyy)));
578
579 vec
580 = assignNew(
581 mce,ty,
582 binop( opUIFU,
583 assignNew(mce,ty, binop(opUIFU, vxx, vyy)),
584 assignNew(
585 mce,ty,
586 unop( opNOT,
587 assignNew(mce,ty, binop(opXOR, xx, yy))))));
588
589 vec_cmpd
590 = mkPCastTo( mce,ty, assignNew(mce,Ity_I1, binop(opCMP, vec, top)));
591
592 improved
593 = assignNew( mce,ty, binop(opDIFD, naive, vec_cmpd) );
594
595 final_cast
596 = mkPCastTo( mce, Ity_I1, improved );
597
598 return final_cast;
599}
600
sewardj95448072004-11-22 20:19:51 +0000601
602/*------------------------------------------------------------*/
603/*--- Emit a test and complaint if something is undefined. ---*/
604/*------------------------------------------------------------*/
605
606/* Set the annotations on a dirty helper to indicate that the stack
607 pointer and instruction pointers might be read. This is the
608 behaviour of all 'emit-a-complaint' style functions we might
609 call. */
610
611static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
612 di->nFxState = 2;
613 di->fxState[0].fx = Ifx_Read;
614 di->fxState[0].offset = mce->layout->offset_SP;
615 di->fxState[0].size = mce->layout->sizeof_SP;
616 di->fxState[1].fx = Ifx_Read;
617 di->fxState[1].offset = mce->layout->offset_IP;
618 di->fxState[1].size = mce->layout->sizeof_IP;
619}
620
621
622/* Check the supplied **original** atom for undefinedness, and emit a
623 complaint if so. Once that happens, mark it as defined. This is
624 possible because the atom is either a tmp or literal. If it's a
625 tmp, it will be shadowed by a tmp, and so we can set the shadow to
626 be defined. In fact as mentioned above, we will have to allocate a
627 new tmp to carry the new 'defined' shadow value, and update the
628 original->tmp mapping accordingly; we cannot simply assign a new
629 value to an existing shadow tmp as this breaks SSAness -- resulting
630 in the post-instrumentation sanity checker spluttering in disapproval.
631*/
632static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
633{
sewardj7cf97ee2004-11-28 14:25:01 +0000634 IRAtom* vatom;
635 IRType ty;
636 Int sz;
637 IRDirty* di;
638 IRAtom* cond;
639
sewardj95448072004-11-22 20:19:51 +0000640 /* Since the original expression is atomic, there's no duplicated
641 work generated by making multiple V-expressions for it. So we
642 don't really care about the possibility that someone else may
643 also create a V-interpretion for it. */
644 tl_assert(isOriginalAtom(mce, atom));
sewardj7cf97ee2004-11-28 14:25:01 +0000645 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +0000646 tl_assert(isShadowAtom(mce, vatom));
647 tl_assert(sameKindedAtoms(atom, vatom));
648
sewardj7cf97ee2004-11-28 14:25:01 +0000649 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +0000650
651 /* sz is only used for constructing the error message */
sewardj7cf97ee2004-11-28 14:25:01 +0000652 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +0000653
sewardj7cf97ee2004-11-28 14:25:01 +0000654 cond = mkPCastTo( mce, Ity_I1, vatom );
sewardj95448072004-11-22 20:19:51 +0000655 /* cond will be 0 if all defined, and 1 if any not defined. */
656
sewardj95448072004-11-22 20:19:51 +0000657 switch (sz) {
658 case 0:
659 di = unsafeIRDirty_0_N( 0/*regparms*/,
660 "MC_(helperc_value_check0_fail)",
661 &MC_(helperc_value_check0_fail),
662 mkIRExprVec_0()
663 );
664 break;
665 case 1:
666 di = unsafeIRDirty_0_N( 0/*regparms*/,
667 "MC_(helperc_value_check1_fail)",
668 &MC_(helperc_value_check1_fail),
669 mkIRExprVec_0()
670 );
671 break;
672 case 4:
673 di = unsafeIRDirty_0_N( 0/*regparms*/,
674 "MC_(helperc_value_check4_fail)",
675 &MC_(helperc_value_check4_fail),
676 mkIRExprVec_0()
677 );
678 break;
679 default:
680 di = unsafeIRDirty_0_N( 1/*regparms*/,
681 "MC_(helperc_complain_undef)",
682 &MC_(helperc_complain_undef),
683 mkIRExprVec_1( mkIRExpr_HWord( sz ))
684 );
685 break;
686 }
687 di->guard = cond;
688 setHelperAnns( mce, di );
689 stmt( mce->bb, IRStmt_Dirty(di));
690
691 /* Set the shadow tmp to be defined. First, update the
692 orig->shadow tmp mapping to reflect the fact that this shadow is
693 getting a new value. */
694 tl_assert(isAtom(vatom));
695 /* sameKindedAtoms ... */
696 if (vatom->tag == Iex_Tmp) {
697 tl_assert(atom->tag == Iex_Tmp);
698 newShadowTmp(mce, atom->Iex.Tmp.tmp);
699 assign(mce->bb, findShadowTmp(mce, atom->Iex.Tmp.tmp),
700 definedOfType(ty));
701 }
702}
703
704
705/*------------------------------------------------------------*/
706/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
707/*------------------------------------------------------------*/
708
709/* Examine the always-defined sections declared in layout to see if
710 the (offset,size) section is within one. Note, is is an error to
711 partially fall into such a region: (offset,size) should either be
712 completely in such a region or completely not-in such a region.
713*/
714static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
715{
716 Int minoffD, maxoffD, i;
717 Int minoff = offset;
718 Int maxoff = minoff + size - 1;
719 tl_assert((minoff & ~0xFFFF) == 0);
720 tl_assert((maxoff & ~0xFFFF) == 0);
721
722 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
723 minoffD = mce->layout->alwaysDefd[i].offset;
724 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
725 tl_assert((minoffD & ~0xFFFF) == 0);
726 tl_assert((maxoffD & ~0xFFFF) == 0);
727
728 if (maxoff < minoffD || maxoffD < minoff)
729 continue; /* no overlap */
730 if (minoff >= minoffD && maxoff <= maxoffD)
731 return True; /* completely contained in an always-defd section */
732
733 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
734 }
735 return False; /* could not find any containing section */
736}
737
738
739/* Generate into bb suitable actions to shadow this Put. If the state
740 slice is marked 'always defined', do nothing. Otherwise, write the
741 supplied V bits to the shadow state. We can pass in either an
742 original atom or a V-atom, but not both. In the former case the
743 relevant V-bits are then generated from the original.
744*/
745static
746void do_shadow_PUT ( MCEnv* mce, Int offset,
747 IRAtom* atom, IRAtom* vatom )
748{
sewardj7cf97ee2004-11-28 14:25:01 +0000749 IRType ty;
sewardj95448072004-11-22 20:19:51 +0000750 if (atom) {
751 tl_assert(!vatom);
752 tl_assert(isOriginalAtom(mce, atom));
753 vatom = expr2vbits( mce, atom );
754 } else {
755 tl_assert(vatom);
756 tl_assert(isShadowAtom(mce, vatom));
757 }
758
sewardj7cf97ee2004-11-28 14:25:01 +0000759 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +0000760 tl_assert(ty != Ity_I1);
761 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
762 /* later: no ... */
763 /* emit code to emit a complaint if any of the vbits are 1. */
764 /* complainIfUndefined(mce, atom); */
765 } else {
766 /* Do a plain shadow Put. */
767 stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
768 }
769}
770
771
772/* Return an expression which contains the V bits corresponding to the
773 given GETI (passed in in pieces).
774*/
775static
776void do_shadow_PUTI ( MCEnv* mce,
777 IRArray* descr, IRAtom* ix, Int bias, IRAtom* atom )
778{
sewardj7cf97ee2004-11-28 14:25:01 +0000779 IRAtom* vatom;
780 IRType ty, tyS;
781 Int arrSize;;
782
sewardj95448072004-11-22 20:19:51 +0000783 tl_assert(isOriginalAtom(mce,atom));
sewardj7cf97ee2004-11-28 14:25:01 +0000784 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +0000785 tl_assert(sameKindedAtoms(atom, vatom));
sewardj7cf97ee2004-11-28 14:25:01 +0000786 ty = descr->elemTy;
787 tyS = shadowType(ty);
788 arrSize = descr->nElems * sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +0000789 tl_assert(ty != Ity_I1);
790 tl_assert(isOriginalAtom(mce,ix));
791 complainIfUndefined(mce,ix);
792 if (isAlwaysDefd(mce, descr->base, arrSize)) {
793 /* later: no ... */
794 /* emit code to emit a complaint if any of the vbits are 1. */
795 /* complainIfUndefined(mce, atom); */
796 } else {
797 /* Do a cloned version of the Put that refers to the shadow
798 area. */
799 IRArray* new_descr
800 = mkIRArray( descr->base + mce->layout->total_sizeB,
801 tyS, descr->nElems);
802 stmt( mce->bb, IRStmt_PutI( new_descr, ix, bias, vatom ));
803 }
804}
805
806
807/* Return an expression which contains the V bits corresponding to the
808 given GET (passed in in pieces).
809*/
810static
811IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
812{
813 IRType tyS = shadowType(ty);
814 tl_assert(ty != Ity_I1);
815 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
816 /* Always defined, return all zeroes of the relevant type */
817 return definedOfType(tyS);
818 } else {
819 /* return a cloned version of the Get that refers to the shadow
820 area. */
821 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
822 }
823}
824
825
826/* Return an expression which contains the V bits corresponding to the
827 given GETI (passed in in pieces).
828*/
829static
830IRExpr* shadow_GETI ( MCEnv* mce, IRArray* descr, IRAtom* ix, Int bias )
831{
832 IRType ty = descr->elemTy;
833 IRType tyS = shadowType(ty);
834 Int arrSize = descr->nElems * sizeofIRType(ty);
835 tl_assert(ty != Ity_I1);
836 tl_assert(isOriginalAtom(mce,ix));
837 complainIfUndefined(mce,ix);
838 if (isAlwaysDefd(mce, descr->base, arrSize)) {
839 /* Always defined, return all zeroes of the relevant type */
840 return definedOfType(tyS);
841 } else {
842 /* return a cloned version of the Get that refers to the shadow
843 area. */
844 IRArray* new_descr
845 = mkIRArray( descr->base + mce->layout->total_sizeB,
846 tyS, descr->nElems);
847 return IRExpr_GetI( new_descr, ix, bias );
848 }
849}
850
851
852/*------------------------------------------------------------*/
853/*--- Generating approximations for unknown operations, ---*/
854/*--- using lazy-propagate semantics ---*/
855/*------------------------------------------------------------*/
856
857/* Lazy propagation of undefinedness from two values, resulting in the
858 specified shadow type.
859*/
860static
861IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
862{
863 /* force everything via 32-bit intermediaries. */
864 IRAtom* at;
865 tl_assert(isShadowAtom(mce,va1));
866 tl_assert(isShadowAtom(mce,va2));
867 at = mkPCastTo(mce, Ity_I32, va1);
868 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
869 at = mkPCastTo(mce, finalVty, at);
870 return at;
871}
872
873
874/* Do the lazy propagation game from a null-terminated vector of
875 atoms. This is presumably the arguments to a helper call, so the
876 IRCallee info is also supplied in order that we can know which
877 arguments should be ignored (via the .mcx_mask field).
878*/
879static
880IRAtom* mkLazyN ( MCEnv* mce,
881 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
882{
883 Int i;
884 IRAtom* here;
885 IRAtom* curr = definedOfType(Ity_I32);
886 for (i = 0; exprvec[i]; i++) {
887 tl_assert(i < 32);
888 tl_assert(isOriginalAtom(mce, exprvec[i]));
889 /* Only take notice of this arg if the callee's mc-exclusion
890 mask does not say it is to be excluded. */
891 if (cee->mcx_mask & (1<<i)) {
892 /* the arg is to be excluded from definedness checking. Do
893 nothing. */
894 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
895 } else {
896 /* calculate the arg's definedness, and pessimistically merge
897 it in. */
898 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) );
899 curr = mkUifU32(mce, here, curr);
900 }
901 }
902 return mkPCastTo(mce, finalVtype, curr );
903}
904
905
906/*------------------------------------------------------------*/
907/*--- Generating expensive sequences for exact carry-chain ---*/
908/*--- propagation in add/sub and related operations. ---*/
909/*------------------------------------------------------------*/
910
911static
sewardjd5204dc2004-12-31 01:16:11 +0000912IRAtom* expensiveAddSub ( MCEnv* mce,
913 Bool add,
914 IRType ty,
915 IRAtom* qaa, IRAtom* qbb,
916 IRAtom* aa, IRAtom* bb )
sewardj95448072004-11-22 20:19:51 +0000917{
sewardj7cf97ee2004-11-28 14:25:01 +0000918 IRAtom *a_min, *b_min, *a_max, *b_max;
sewardjd5204dc2004-12-31 01:16:11 +0000919 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
sewardj7cf97ee2004-11-28 14:25:01 +0000920
sewardj95448072004-11-22 20:19:51 +0000921 tl_assert(isShadowAtom(mce,qaa));
922 tl_assert(isShadowAtom(mce,qbb));
923 tl_assert(isOriginalAtom(mce,aa));
924 tl_assert(isOriginalAtom(mce,bb));
925 tl_assert(sameKindedAtoms(qaa,aa));
926 tl_assert(sameKindedAtoms(qbb,bb));
927
sewardjd5204dc2004-12-31 01:16:11 +0000928 switch (ty) {
929 case Ity_I32:
930 opAND = Iop_And32;
931 opOR = Iop_Or32;
932 opXOR = Iop_Xor32;
933 opNOT = Iop_Not32;
934 opADD = Iop_Add32;
935 opSUB = Iop_Sub32;
936 break;
937 default:
938 VG_(tool_panic)("expensiveAddSub");
939 }
sewardj95448072004-11-22 20:19:51 +0000940
941 // a_min = aa & ~qaa
942 a_min = assignNew(mce,ty,
943 binop(opAND, aa,
944 assignNew(mce,ty, unop(opNOT, qaa))));
945
946 // b_min = bb & ~qbb
947 b_min = assignNew(mce,ty,
948 binop(opAND, bb,
949 assignNew(mce,ty, unop(opNOT, qbb))));
950
951 // a_max = aa | qaa
952 a_max = assignNew(mce,ty, binop(opOR, aa, qaa));
953
954 // b_max = bb | qbb
955 b_max = assignNew(mce,ty, binop(opOR, bb, qbb));
956
sewardjd5204dc2004-12-31 01:16:11 +0000957 if (add) {
958 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
959 return
960 assignNew(mce,ty,
961 binop( opOR,
962 assignNew(mce,ty, binop(opOR, qaa, qbb)),
963 assignNew(mce,ty,
964 binop( opXOR,
965 assignNew(mce,ty, binop(opADD, a_min, b_min)),
966 assignNew(mce,ty, binop(opADD, a_max, b_max))
967 )
sewardj95448072004-11-22 20:19:51 +0000968 )
sewardjd5204dc2004-12-31 01:16:11 +0000969 )
970 );
971 } else {
972 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
973 return
974 assignNew(mce,ty,
975 binop( opOR,
976 assignNew(mce,ty, binop(opOR, qaa, qbb)),
977 assignNew(mce,ty,
978 binop( opXOR,
979 assignNew(mce,ty, binop(opSUB, a_min, b_max)),
980 assignNew(mce,ty, binop(opSUB, a_max, b_min))
981 )
982 )
983 )
984 );
985 }
986
sewardj95448072004-11-22 20:19:51 +0000987}
988
989
990/*------------------------------------------------------------*/
sewardj3245c912004-12-10 14:58:26 +0000991/*--- Helpers for dealing with vector primops. ---*/
992/*------------------------------------------------------------*/
993
sewardja1d93302004-12-12 16:45:06 +0000994/* Vector pessimisation -- pessimise within each lane individually. */
995
996static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
997{
998 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
999}
1000
1001static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1002{
1003 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1004}
1005
1006static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1007{
1008 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1009}
1010
1011static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1012{
1013 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1014}
1015
1016
sewardj3245c912004-12-10 14:58:26 +00001017/* Here's a simple scheme capable of handling ops derived from SSE1
1018 code and while only generating ops that can be efficiently
1019 implemented in SSE1. */
1020
1021/* All-lanes versions are straightforward:
1022
1023 binary32Fx4(x,y) ==> PCast32x4(UifU128(x#,y#))
1024
1025 unary32Fx4(x,y) ==> PCast32x4(x#)
1026
1027 Lowest-lane-only versions are more complex:
1028
1029 binary32F0x4(x,y) ==> Set128lo32(
1030 x#,
1031 PCast32(128to32(UifU128(x#,y#)))
1032 )
1033
1034 This is perhaps not so obvious. In particular, it's faster to
1035 do a 128-bit UifU and then take the bottom 32 bits than the more
1036 obvious scheme of taking the bottom 32 bits of each operand
1037 and doing a 32-bit UifU. Basically since UifU is fast and
1038 chopping lanes off vector values is slow.
1039
1040 Finally:
1041
1042 unary32F0x4(x) ==> Set128lo32(
1043 x#,
1044 PCast32(128to32(x#))
1045 )
1046
1047 Where:
1048
1049 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
1050 PCast32x4(v#) = CmpNEZ32x4(v#)
1051*/
1052
1053static
1054IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1055{
1056 IRAtom* at;
1057 tl_assert(isShadowAtom(mce, vatomX));
1058 tl_assert(isShadowAtom(mce, vatomY));
1059 at = mkUifU128(mce, vatomX, vatomY);
sewardja1d93302004-12-12 16:45:06 +00001060 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at));
sewardj3245c912004-12-10 14:58:26 +00001061 return at;
1062}
1063
1064static
1065IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1066{
1067 IRAtom* at;
1068 tl_assert(isShadowAtom(mce, vatomX));
sewardja1d93302004-12-12 16:45:06 +00001069 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001070 return at;
1071}
1072
1073static
1074IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1075{
1076 IRAtom* at;
1077 tl_assert(isShadowAtom(mce, vatomX));
1078 tl_assert(isShadowAtom(mce, vatomY));
1079 at = mkUifU128(mce, vatomX, vatomY);
1080 at = assignNew(mce, Ity_I32, unop(Iop_128to32, at));
1081 at = mkPCastTo(mce, Ity_I32, at);
1082 at = assignNew(mce, Ity_V128, binop(Iop_Set128lo32, vatomX, at));
1083 return at;
1084}
1085
1086static
1087IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1088{
1089 IRAtom* at;
1090 tl_assert(isShadowAtom(mce, vatomX));
1091 at = assignNew(mce, Ity_I32, unop(Iop_128to32, vatomX));
1092 at = mkPCastTo(mce, Ity_I32, at);
1093 at = assignNew(mce, Ity_V128, binop(Iop_Set128lo32, vatomX, at));
1094 return at;
1095}
1096
sewardj0b070592004-12-10 21:44:22 +00001097/* --- ... and ... 64Fx2 versions of the same ... --- */
1098
1099static
1100IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1101{
1102 IRAtom* at;
1103 tl_assert(isShadowAtom(mce, vatomX));
1104 tl_assert(isShadowAtom(mce, vatomY));
1105 at = mkUifU128(mce, vatomX, vatomY);
sewardja1d93302004-12-12 16:45:06 +00001106 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at));
sewardj0b070592004-12-10 21:44:22 +00001107 return at;
1108}
1109
1110static
1111IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1112{
1113 IRAtom* at;
1114 tl_assert(isShadowAtom(mce, vatomX));
sewardja1d93302004-12-12 16:45:06 +00001115 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001116 return at;
1117}
1118
1119static
1120IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1121{
1122 IRAtom* at;
1123 tl_assert(isShadowAtom(mce, vatomX));
1124 tl_assert(isShadowAtom(mce, vatomY));
1125 at = mkUifU128(mce, vatomX, vatomY);
1126 at = assignNew(mce, Ity_I64, unop(Iop_128to64, at));
1127 at = mkPCastTo(mce, Ity_I64, at);
1128 at = assignNew(mce, Ity_V128, binop(Iop_Set128lo64, vatomX, at));
1129 return at;
1130}
1131
1132static
1133IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1134{
1135 IRAtom* at;
1136 tl_assert(isShadowAtom(mce, vatomX));
1137 at = assignNew(mce, Ity_I64, unop(Iop_128to64, vatomX));
1138 at = mkPCastTo(mce, Ity_I64, at);
1139 at = assignNew(mce, Ity_V128, binop(Iop_Set128lo64, vatomX, at));
1140 return at;
1141}
1142
sewardja1d93302004-12-12 16:45:06 +00001143/* --- --- Vector saturated narrowing --- --- */
1144
1145/* This is quite subtle. What to do is simple:
1146
1147 Let the original narrowing op be QNarrowW{S,U}xN. Produce:
1148
1149 the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1150
1151 Why this is right is not so simple. Consider a lane in the args,
1152 vatom1 or 2, doesn't matter.
1153
1154 After the PCast, that lane is all 0s (defined) or all
1155 1s(undefined).
1156
1157 Both signed and unsigned saturating narrowing of all 0s produces
1158 all 0s, which is what we want.
1159
1160 The all-1s case is more complex. Unsigned narrowing interprets an
1161 all-1s input as the largest unsigned integer, and so produces all
1162 1s as a result since that is the largest unsigned value at the
1163 smaller width.
1164
1165 Signed narrowing interprets all 1s as -1. Fortunately, -1 narrows
1166 to -1, so we still wind up with all 1s at the smaller width.
1167
1168 So: In short, pessimise the args, then apply the original narrowing
1169 op.
1170*/
1171static
1172IRAtom* vectorNarrow128 ( MCEnv* mce, IROp narrow_op,
1173 IRAtom* vatom1, IRAtom* vatom2)
1174{
1175 IRAtom *at1, *at2, *at3;
1176 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1177 switch (narrow_op) {
1178 case Iop_QNarrow32Sx4: pcast = mkPCast32x4; break;
1179 case Iop_QNarrow16Sx8: pcast = mkPCast16x8; break;
1180 case Iop_QNarrow16Ux8: pcast = mkPCast16x8; break;
1181 default: VG_(tool_panic)("vectorNarrow128");
1182 }
1183 tl_assert(isShadowAtom(mce,vatom1));
1184 tl_assert(isShadowAtom(mce,vatom2));
1185 at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1));
1186 at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2));
1187 at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2));
1188 return at3;
1189}
1190
1191
1192/* --- --- Vector integer arithmetic --- --- */
1193
1194/* Simple ... UifU the args and per-lane pessimise the results. */
1195static
1196IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1197{
1198 IRAtom* at;
1199 at = mkUifU128(mce, vatom1, vatom2);
1200 at = mkPCast8x16(mce, at);
1201 return at;
1202}
1203
1204static
1205IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1206{
1207 IRAtom* at;
1208 at = mkUifU128(mce, vatom1, vatom2);
1209 at = mkPCast16x8(mce, at);
1210 return at;
1211}
1212
1213static
1214IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1215{
1216 IRAtom* at;
1217 at = mkUifU128(mce, vatom1, vatom2);
1218 at = mkPCast32x4(mce, at);
1219 return at;
1220}
1221
1222static
1223IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1224{
1225 IRAtom* at;
1226 at = mkUifU128(mce, vatom1, vatom2);
1227 at = mkPCast64x2(mce, at);
1228 return at;
1229}
sewardj3245c912004-12-10 14:58:26 +00001230
1231
1232/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00001233/*--- Generate shadow values from all kinds of IRExprs. ---*/
1234/*------------------------------------------------------------*/
1235
1236static
1237IRAtom* expr2vbits_Binop ( MCEnv* mce,
1238 IROp op,
1239 IRAtom* atom1, IRAtom* atom2 )
1240{
1241 IRType and_or_ty;
1242 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
1243 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
1244 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
1245
1246 IRAtom* vatom1 = expr2vbits( mce, atom1 );
1247 IRAtom* vatom2 = expr2vbits( mce, atom2 );
1248
1249 tl_assert(isOriginalAtom(mce,atom1));
1250 tl_assert(isOriginalAtom(mce,atom2));
1251 tl_assert(isShadowAtom(mce,vatom1));
1252 tl_assert(isShadowAtom(mce,vatom2));
1253 tl_assert(sameKindedAtoms(atom1,vatom1));
1254 tl_assert(sameKindedAtoms(atom2,vatom2));
1255 switch (op) {
1256
sewardj0b070592004-12-10 21:44:22 +00001257 /* 128-bit SIMD (SSE2-esque) */
1258
sewardja1d93302004-12-12 16:45:06 +00001259 case Iop_ShrN16x8:
1260 case Iop_ShrN32x4:
1261 case Iop_ShrN64x2:
1262 case Iop_SarN16x8:
1263 case Iop_SarN32x4:
1264 case Iop_ShlN16x8:
1265 case Iop_ShlN32x4:
1266 case Iop_ShlN64x2:
1267 /* Same scheme as with all other shifts. */
1268 complainIfUndefined(mce, atom2);
1269 return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
1270
1271 case Iop_QSub8Ux16:
1272 case Iop_QSub8Sx16:
1273 case Iop_Sub8x16:
1274 case Iop_Min8Ux16:
1275 case Iop_Max8Ux16:
1276 case Iop_CmpGT8Sx16:
1277 case Iop_CmpEQ8x16:
1278 case Iop_Avg8Ux16:
1279 case Iop_QAdd8Ux16:
1280 case Iop_QAdd8Sx16:
1281 case Iop_Add8x16:
1282 return binary8Ix16(mce, vatom1, vatom2);
1283
1284 case Iop_QSub16Ux8:
1285 case Iop_QSub16Sx8:
1286 case Iop_Sub16x8:
1287 case Iop_Mul16x8:
1288 case Iop_MulHi16Sx8:
1289 case Iop_MulHi16Ux8:
1290 case Iop_Min16Sx8:
1291 case Iop_Max16Sx8:
1292 case Iop_CmpGT16Sx8:
1293 case Iop_CmpEQ16x8:
1294 case Iop_Avg16Ux8:
1295 case Iop_QAdd16Ux8:
1296 case Iop_QAdd16Sx8:
1297 case Iop_Add16x8:
1298 return binary16Ix8(mce, vatom1, vatom2);
1299
1300 case Iop_Sub32x4:
1301 case Iop_CmpGT32Sx4:
1302 case Iop_CmpEQ32x4:
1303 case Iop_Add32x4:
1304 return binary32Ix4(mce, vatom1, vatom2);
1305
1306 case Iop_Sub64x2:
1307 case Iop_Add64x2:
1308 return binary64Ix2(mce, vatom1, vatom2);
1309
1310 case Iop_QNarrow32Sx4:
1311 case Iop_QNarrow16Sx8:
1312 case Iop_QNarrow16Ux8:
1313 return vectorNarrow128(mce, op, vatom1, vatom2);
1314
sewardj0b070592004-12-10 21:44:22 +00001315 case Iop_Sub64Fx2:
1316 case Iop_Mul64Fx2:
1317 case Iop_Min64Fx2:
1318 case Iop_Max64Fx2:
1319 case Iop_Div64Fx2:
1320 case Iop_CmpLT64Fx2:
1321 case Iop_CmpLE64Fx2:
1322 case Iop_CmpEQ64Fx2:
1323 case Iop_Add64Fx2:
1324 return binary64Fx2(mce, vatom1, vatom2);
1325
1326 case Iop_Sub64F0x2:
1327 case Iop_Mul64F0x2:
1328 case Iop_Min64F0x2:
1329 case Iop_Max64F0x2:
1330 case Iop_Div64F0x2:
1331 case Iop_CmpLT64F0x2:
1332 case Iop_CmpLE64F0x2:
1333 case Iop_CmpEQ64F0x2:
1334 case Iop_Add64F0x2:
1335 return binary64F0x2(mce, vatom1, vatom2);
1336
1337 /* 128-bit SIMD (SSE1-esque) */
sewardj3245c912004-12-10 14:58:26 +00001338
sewardj170ee212004-12-10 18:57:51 +00001339 case Iop_Sub32Fx4:
1340 case Iop_Mul32Fx4:
1341 case Iop_Min32Fx4:
1342 case Iop_Max32Fx4:
1343 case Iop_Div32Fx4:
1344 case Iop_CmpLT32Fx4:
1345 case Iop_CmpLE32Fx4:
1346 case Iop_CmpEQ32Fx4:
sewardj3245c912004-12-10 14:58:26 +00001347 case Iop_Add32Fx4:
1348 return binary32Fx4(mce, vatom1, vatom2);
1349
sewardj170ee212004-12-10 18:57:51 +00001350 case Iop_Sub32F0x4:
1351 case Iop_Mul32F0x4:
1352 case Iop_Min32F0x4:
1353 case Iop_Max32F0x4:
1354 case Iop_Div32F0x4:
1355 case Iop_CmpLT32F0x4:
1356 case Iop_CmpLE32F0x4:
1357 case Iop_CmpEQ32F0x4:
1358 case Iop_Add32F0x4:
1359 return binary32F0x4(mce, vatom1, vatom2);
1360
sewardja1d93302004-12-12 16:45:06 +00001361 /* 128-bit data-steering */
sewardj170ee212004-12-10 18:57:51 +00001362 case Iop_Set128lo32:
sewardj0b070592004-12-10 21:44:22 +00001363 case Iop_Set128lo64:
sewardj170ee212004-12-10 18:57:51 +00001364 case Iop_64HLto128:
sewardja1d93302004-12-12 16:45:06 +00001365 case Iop_InterleaveLO64x2:
1366 case Iop_InterleaveLO32x4:
1367 case Iop_InterleaveLO16x8:
1368 case Iop_InterleaveLO8x16:
1369 case Iop_InterleaveHI64x2:
1370 case Iop_InterleaveHI32x4:
1371 case Iop_InterleaveHI16x8:
1372 case Iop_InterleaveHI8x16:
sewardj170ee212004-12-10 18:57:51 +00001373 return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2));
1374
sewardj3245c912004-12-10 14:58:26 +00001375 /* Scalar floating point */
1376
sewardj95448072004-11-22 20:19:51 +00001377 case Iop_RoundF64:
1378 case Iop_F64toI64:
sewardje9e16d32004-12-10 13:17:55 +00001379 case Iop_I64toF64:
1380 /* First arg is I32 (rounding mode), second is F64 or I64
1381 (data). */
sewardj95448072004-11-22 20:19:51 +00001382 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1383
1384 case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
1385 /* Takes two F64 args. */
1386 case Iop_F64toI32:
sewardje9e16d32004-12-10 13:17:55 +00001387 case Iop_F64toF32:
sewardj95448072004-11-22 20:19:51 +00001388 /* First arg is I32 (rounding mode), second is F64 (data). */
1389 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1390
1391 case Iop_F64toI16:
1392 /* First arg is I32 (rounding mode), second is F64 (data). */
1393 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
1394
1395 case Iop_ScaleF64:
1396 case Iop_Yl2xF64:
1397 case Iop_Yl2xp1F64:
1398 case Iop_PRemF64:
1399 case Iop_AtanF64:
1400 case Iop_AddF64:
1401 case Iop_DivF64:
1402 case Iop_SubF64:
1403 case Iop_MulF64:
1404 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1405
1406 case Iop_CmpF64:
1407 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1408
1409 /* non-FP after here */
1410
1411 case Iop_DivModU64to32:
1412 case Iop_DivModS64to32:
1413 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1414
1415 case Iop_16HLto32:
sewardj170ee212004-12-10 18:57:51 +00001416 return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00001417 case Iop_32HLto64:
sewardj170ee212004-12-10 18:57:51 +00001418 return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00001419
1420 case Iop_MullS32:
1421 case Iop_MullU32: {
1422 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1423 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
1424 return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
1425 }
1426
1427 case Iop_MullS16:
1428 case Iop_MullU16: {
1429 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1430 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
1431 return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
1432 }
1433
1434 case Iop_MullS8:
1435 case Iop_MullU8: {
1436 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1437 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
1438 return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
1439 }
1440
1441 case Iop_Add32:
sewardjd5204dc2004-12-31 01:16:11 +00001442 if (mce->bogusLiterals)
1443 return expensiveAddSub(mce,True,Ity_I32,
1444 vatom1,vatom2, atom1,atom2);
1445 else
1446 goto cheap_AddSub32;
sewardj95448072004-11-22 20:19:51 +00001447 case Iop_Sub32:
sewardjd5204dc2004-12-31 01:16:11 +00001448 if (mce->bogusLiterals)
1449 return expensiveAddSub(mce,False,Ity_I32,
1450 vatom1,vatom2, atom1,atom2);
1451 else
1452 goto cheap_AddSub32;
1453
1454 cheap_AddSub32:
sewardj95448072004-11-22 20:19:51 +00001455 case Iop_Mul32:
1456 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1457
1458 case Iop_Mul16:
1459 case Iop_Add16:
1460 case Iop_Sub16:
1461 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1462
1463 case Iop_Sub8:
1464 case Iop_Add8:
1465 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1466
sewardjd5204dc2004-12-31 01:16:11 +00001467 case Iop_CmpEQ32:
1468 if (mce->bogusLiterals)
1469 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
1470 else
1471 goto cheap_cmp32;
1472
1473 cheap_cmp32:
sewardj95448072004-11-22 20:19:51 +00001474 case Iop_CmpLE32S: case Iop_CmpLE32U:
1475 case Iop_CmpLT32U: case Iop_CmpLT32S:
sewardjd5204dc2004-12-31 01:16:11 +00001476 case Iop_CmpNE32:
sewardj95448072004-11-22 20:19:51 +00001477 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
1478
1479 case Iop_CmpEQ16: case Iop_CmpNE16:
1480 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
1481
1482 case Iop_CmpEQ8: case Iop_CmpNE8:
1483 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
1484
1485 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
1486 /* Complain if the shift amount is undefined. Then simply
1487 shift the first arg's V bits by the real shift amount. */
1488 complainIfUndefined(mce, atom2);
1489 return assignNew(mce, Ity_I32, binop(op, vatom1, atom2));
1490
sewardjdb67f5f2004-12-14 01:15:31 +00001491 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
sewardj95448072004-11-22 20:19:51 +00001492 /* Same scheme as with 32-bit shifts. */
1493 complainIfUndefined(mce, atom2);
1494 return assignNew(mce, Ity_I16, binop(op, vatom1, atom2));
1495
1496 case Iop_Shl8: case Iop_Shr8:
1497 /* Same scheme as with 32-bit shifts. */
1498 complainIfUndefined(mce, atom2);
1499 return assignNew(mce, Ity_I8, binop(op, vatom1, atom2));
1500
1501 case Iop_Shl64: case Iop_Shr64:
1502 /* Same scheme as with 32-bit shifts. */
1503 complainIfUndefined(mce, atom2);
1504 return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1505
sewardj170ee212004-12-10 18:57:51 +00001506 case Iop_And128:
1507 uifu = mkUifU128; difd = mkDifD128;
1508 and_or_ty = Ity_V128; improve = mkImproveAND128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00001509 case Iop_And64:
1510 uifu = mkUifU64; difd = mkDifD64;
1511 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00001512 case Iop_And32:
1513 uifu = mkUifU32; difd = mkDifD32;
1514 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
1515 case Iop_And16:
1516 uifu = mkUifU16; difd = mkDifD16;
1517 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
1518 case Iop_And8:
1519 uifu = mkUifU8; difd = mkDifD8;
1520 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
1521
sewardj170ee212004-12-10 18:57:51 +00001522 case Iop_Or128:
1523 uifu = mkUifU128; difd = mkDifD128;
1524 and_or_ty = Ity_V128; improve = mkImproveOR128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00001525 case Iop_Or64:
1526 uifu = mkUifU64; difd = mkDifD64;
1527 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00001528 case Iop_Or32:
1529 uifu = mkUifU32; difd = mkDifD32;
1530 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
1531 case Iop_Or16:
1532 uifu = mkUifU16; difd = mkDifD16;
1533 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
1534 case Iop_Or8:
1535 uifu = mkUifU8; difd = mkDifD8;
1536 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
1537
1538 do_And_Or:
1539 return
1540 assignNew(
1541 mce,
1542 and_or_ty,
1543 difd(mce, uifu(mce, vatom1, vatom2),
1544 difd(mce, improve(mce, atom1, vatom1),
1545 improve(mce, atom2, vatom2) ) ) );
1546
1547 case Iop_Xor8:
1548 return mkUifU8(mce, vatom1, vatom2);
1549 case Iop_Xor16:
1550 return mkUifU16(mce, vatom1, vatom2);
1551 case Iop_Xor32:
1552 return mkUifU32(mce, vatom1, vatom2);
sewardj7010f6e2004-12-10 13:35:22 +00001553 case Iop_Xor64:
1554 return mkUifU64(mce, vatom1, vatom2);
sewardj170ee212004-12-10 18:57:51 +00001555 case Iop_Xor128:
1556 return mkUifU128(mce, vatom1, vatom2);
njn25e49d8e72002-09-23 09:36:25 +00001557
1558 default:
sewardj95448072004-11-22 20:19:51 +00001559 ppIROp(op);
1560 VG_(tool_panic)("memcheck:expr2vbits_Binop");
njn25e49d8e72002-09-23 09:36:25 +00001561 }
njn25e49d8e72002-09-23 09:36:25 +00001562}
1563
njn25e49d8e72002-09-23 09:36:25 +00001564
sewardj95448072004-11-22 20:19:51 +00001565static
1566IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
1567{
1568 IRAtom* vatom = expr2vbits( mce, atom );
1569 tl_assert(isOriginalAtom(mce,atom));
1570 switch (op) {
1571
sewardj0b070592004-12-10 21:44:22 +00001572 case Iop_Sqrt64Fx2:
1573 return unary64Fx2(mce, vatom);
1574
1575 case Iop_Sqrt64F0x2:
1576 return unary64F0x2(mce, vatom);
1577
sewardj170ee212004-12-10 18:57:51 +00001578 case Iop_Sqrt32Fx4:
1579 case Iop_RSqrt32Fx4:
1580 case Iop_Recip32Fx4:
1581 return unary32Fx4(mce, vatom);
1582
1583 case Iop_Sqrt32F0x4:
1584 case Iop_RSqrt32F0x4:
1585 case Iop_Recip32F0x4:
1586 return unary32F0x4(mce, vatom);
1587
1588 case Iop_32Uto128:
sewardj0b070592004-12-10 21:44:22 +00001589 case Iop_64Uto128:
sewardj170ee212004-12-10 18:57:51 +00001590 return assignNew(mce, Ity_V128, unop(op, vatom));
1591
sewardj95448072004-11-22 20:19:51 +00001592 case Iop_F32toF64:
1593 case Iop_I32toF64:
sewardj95448072004-11-22 20:19:51 +00001594 case Iop_NegF64:
1595 case Iop_SinF64:
1596 case Iop_CosF64:
1597 case Iop_TanF64:
1598 case Iop_SqrtF64:
1599 case Iop_AbsF64:
1600 case Iop_2xm1F64:
1601 return mkPCastTo(mce, Ity_I64, vatom);
1602
sewardj95448072004-11-22 20:19:51 +00001603 case Iop_Clz32:
1604 case Iop_Ctz32:
1605 return mkPCastTo(mce, Ity_I32, vatom);
1606
1607 case Iop_32Sto64:
1608 case Iop_32Uto64:
sewardj170ee212004-12-10 18:57:51 +00001609 case Iop_128to64:
1610 case Iop_128HIto64:
sewardj95448072004-11-22 20:19:51 +00001611 return assignNew(mce, Ity_I64, unop(op, vatom));
1612
1613 case Iop_64to32:
1614 case Iop_64HIto32:
1615 case Iop_1Uto32:
1616 case Iop_8Uto32:
1617 case Iop_16Uto32:
1618 case Iop_16Sto32:
1619 case Iop_8Sto32:
1620 return assignNew(mce, Ity_I32, unop(op, vatom));
1621
1622 case Iop_8Sto16:
1623 case Iop_8Uto16:
1624 case Iop_32to16:
1625 case Iop_32HIto16:
1626 return assignNew(mce, Ity_I16, unop(op, vatom));
1627
1628 case Iop_1Uto8:
1629 case Iop_16to8:
1630 case Iop_32to8:
1631 return assignNew(mce, Ity_I8, unop(op, vatom));
1632
1633 case Iop_32to1:
1634 return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom));
1635
1636 case Iop_ReinterpF64asI64:
1637 case Iop_ReinterpI64asF64:
sewardj0b070592004-12-10 21:44:22 +00001638 case Iop_ReinterpI32asF32:
sewardja1d93302004-12-12 16:45:06 +00001639 case Iop_Not128:
sewardj7010f6e2004-12-10 13:35:22 +00001640 case Iop_Not64:
sewardj95448072004-11-22 20:19:51 +00001641 case Iop_Not32:
1642 case Iop_Not16:
1643 case Iop_Not8:
1644 case Iop_Not1:
1645 return vatom;
sewardj7010f6e2004-12-10 13:35:22 +00001646
sewardj95448072004-11-22 20:19:51 +00001647 default:
1648 ppIROp(op);
1649 VG_(tool_panic)("memcheck:expr2vbits_Unop");
1650 }
1651}
1652
1653
sewardj170ee212004-12-10 18:57:51 +00001654/* Worker function; do not call directly. */
sewardj95448072004-11-22 20:19:51 +00001655static
sewardj170ee212004-12-10 18:57:51 +00001656IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
sewardj95448072004-11-22 20:19:51 +00001657{
1658 void* helper;
1659 Char* hname;
1660 IRDirty* di;
1661 IRTemp datavbits;
1662 IRAtom* addrAct;
1663
1664 tl_assert(isOriginalAtom(mce,addr));
1665
1666 /* First, emit a definedness test for the address. This also sets
1667 the address (shadow) to 'defined' following the test. */
1668 complainIfUndefined( mce, addr );
1669
1670 /* Now cook up a call to the relevant helper function, to read the
1671 data V bits from shadow memory. */
1672 ty = shadowType(ty);
1673 switch (ty) {
1674 case Ity_I64: helper = &MC_(helperc_LOADV8);
1675 hname = "MC_(helperc_LOADV8)";
1676 break;
1677 case Ity_I32: helper = &MC_(helperc_LOADV4);
1678 hname = "MC_(helperc_LOADV4)";
1679 break;
1680 case Ity_I16: helper = &MC_(helperc_LOADV2);
1681 hname = "MC_(helperc_LOADV2)";
1682 break;
1683 case Ity_I8: helper = &MC_(helperc_LOADV1);
1684 hname = "MC_(helperc_LOADV1)";
1685 break;
1686 default: ppIRType(ty);
1687 VG_(tool_panic)("memcheck:do_shadow_LDle");
1688 }
1689
1690 /* Generate the actual address into addrAct. */
1691 if (bias == 0) {
1692 addrAct = addr;
1693 } else {
sewardj7cf97ee2004-11-28 14:25:01 +00001694 IROp mkAdd;
1695 IRAtom* eBias;
sewardj95448072004-11-22 20:19:51 +00001696 IRType tyAddr = mce->hWordTy;
1697 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj7cf97ee2004-11-28 14:25:01 +00001698 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
1699 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj95448072004-11-22 20:19:51 +00001700 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
1701 }
1702
1703 /* We need to have a place to park the V bits we're just about to
1704 read. */
1705 datavbits = newIRTemp(mce->bb->tyenv, ty);
1706 di = unsafeIRDirty_1_N( datavbits,
1707 1/*regparms*/, hname, helper,
1708 mkIRExprVec_1( addrAct ));
1709 setHelperAnns( mce, di );
1710 stmt( mce->bb, IRStmt_Dirty(di) );
1711
1712 return mkexpr(datavbits);
1713}
1714
1715
1716static
sewardj170ee212004-12-10 18:57:51 +00001717IRAtom* expr2vbits_LDle ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
1718{
1719 IRAtom *v64hi, *v64lo;
1720 switch (shadowType(ty)) {
1721 case Ity_I8:
1722 case Ity_I16:
1723 case Ity_I32:
1724 case Ity_I64:
1725 return expr2vbits_LDle_WRK(mce, ty, addr, bias);
1726 case Ity_V128:
1727 v64lo = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias);
1728 v64hi = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias+8);
1729 return assignNew( mce,
1730 Ity_V128,
1731 binop(Iop_64HLto128, v64hi, v64lo));
1732 default:
1733 VG_(tool_panic)("expr2vbits_LDle");
1734 }
1735}
1736
1737
1738static
sewardj95448072004-11-22 20:19:51 +00001739IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
1740 IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
1741{
1742 IRAtom *vbitsC, *vbits0, *vbitsX;
1743 IRType ty;
1744 /* Given Mux0X(cond,expr0,exprX), generate
1745 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
1746 That is, steer the V bits like the originals, but trash the
1747 result if the steering value is undefined. This gives
1748 lazy propagation. */
1749 tl_assert(isOriginalAtom(mce, cond));
1750 tl_assert(isOriginalAtom(mce, expr0));
1751 tl_assert(isOriginalAtom(mce, exprX));
1752
1753 vbitsC = expr2vbits(mce, cond);
1754 vbits0 = expr2vbits(mce, expr0);
1755 vbitsX = expr2vbits(mce, exprX);
1756 ty = typeOfIRExpr(mce->bb->tyenv, vbits0);
1757
1758 return
1759 mkUifU(mce, ty, assignNew(mce, ty, IRExpr_Mux0X(cond, vbits0, vbitsX)),
1760 mkPCastTo(mce, ty, vbitsC) );
1761}
1762
1763/* --------- This is the main expression-handling function. --------- */
1764
1765static
1766IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
1767{
1768 switch (e->tag) {
1769
1770 case Iex_Get:
1771 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
1772
1773 case Iex_GetI:
1774 return shadow_GETI( mce, e->Iex.GetI.descr,
1775 e->Iex.GetI.ix, e->Iex.GetI.bias );
1776
1777 case Iex_Tmp:
1778 return IRExpr_Tmp( findShadowTmp(mce, e->Iex.Tmp.tmp) );
1779
1780 case Iex_Const:
1781 return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
1782
1783 case Iex_Binop:
1784 return expr2vbits_Binop(
1785 mce,
1786 e->Iex.Binop.op,
1787 e->Iex.Binop.arg1, e->Iex.Binop.arg2
1788 );
1789
1790 case Iex_Unop:
1791 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
1792
1793 case Iex_LDle:
1794 return expr2vbits_LDle( mce, e->Iex.LDle.ty,
1795 e->Iex.LDle.addr, 0/*addr bias*/ );
1796
1797 case Iex_CCall:
1798 return mkLazyN( mce, e->Iex.CCall.args,
1799 e->Iex.CCall.retty,
1800 e->Iex.CCall.cee );
1801
1802 case Iex_Mux0X:
1803 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
1804 e->Iex.Mux0X.exprX);
njn25e49d8e72002-09-23 09:36:25 +00001805
1806 default:
sewardj95448072004-11-22 20:19:51 +00001807 VG_(printf)("\n");
1808 ppIRExpr(e);
1809 VG_(printf)("\n");
1810 VG_(tool_panic)("memcheck: expr2vbits");
njn25e49d8e72002-09-23 09:36:25 +00001811 }
njn25e49d8e72002-09-23 09:36:25 +00001812}
1813
1814/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00001815/*--- Generate shadow stmts from all kinds of IRStmts. ---*/
njn25e49d8e72002-09-23 09:36:25 +00001816/*------------------------------------------------------------*/
1817
sewardj95448072004-11-22 20:19:51 +00001818/* Widen a value to the host word size. */
njn25e49d8e72002-09-23 09:36:25 +00001819
1820static
sewardj95448072004-11-22 20:19:51 +00001821IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
njn25e49d8e72002-09-23 09:36:25 +00001822{
sewardj7cf97ee2004-11-28 14:25:01 +00001823 IRType ty, tyH;
1824
sewardj95448072004-11-22 20:19:51 +00001825 /* vatom is vbits-value and as such can only have a shadow type. */
1826 tl_assert(isShadowAtom(mce,vatom));
njn25e49d8e72002-09-23 09:36:25 +00001827
sewardj7cf97ee2004-11-28 14:25:01 +00001828 ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1829 tyH = mce->hWordTy;
njn25e49d8e72002-09-23 09:36:25 +00001830
sewardj95448072004-11-22 20:19:51 +00001831 if (tyH == Ity_I32) {
1832 switch (ty) {
1833 case Ity_I32: return vatom;
1834 case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom));
1835 case Ity_I8: return assignNew(mce, tyH, unop(Iop_8Uto32, vatom));
1836 default: goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00001837 }
sewardj95448072004-11-22 20:19:51 +00001838 } else {
1839 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00001840 }
sewardj95448072004-11-22 20:19:51 +00001841 unhandled:
1842 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
1843 VG_(tool_panic)("zwidenToHostWord");
njn25e49d8e72002-09-23 09:36:25 +00001844}
1845
njn25e49d8e72002-09-23 09:36:25 +00001846
sewardj95448072004-11-22 20:19:51 +00001847/* Generate a shadow store. addr is always the original address atom.
1848 You can pass in either originals or V-bits for the data atom, but
1849 obviously not both. */
njn25e49d8e72002-09-23 09:36:25 +00001850
sewardj95448072004-11-22 20:19:51 +00001851static
1852void do_shadow_STle ( MCEnv* mce,
1853 IRAtom* addr, UInt bias,
1854 IRAtom* data, IRAtom* vdata )
njn25e49d8e72002-09-23 09:36:25 +00001855{
sewardj170ee212004-12-10 18:57:51 +00001856 IROp mkAdd;
1857 IRType ty, tyAddr;
1858 IRDirty *di, *diLo64, *diHi64;
1859 IRAtom *addrAct, *addrLo64, *addrHi64;
1860 IRAtom *vdataLo64, *vdataHi64;
1861 IRAtom *eBias, *eBias0, *eBias8;
sewardj95448072004-11-22 20:19:51 +00001862 void* helper = NULL;
1863 Char* hname = NULL;
sewardj170ee212004-12-10 18:57:51 +00001864
1865 tyAddr = mce->hWordTy;
1866 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
1867 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
1868
1869 di = diLo64 = diHi64 = NULL;
1870 eBias = eBias0 = eBias8 = NULL;
1871 addrAct = addrLo64 = addrHi64 = NULL;
1872 vdataLo64 = vdataHi64 = NULL;
njn25e49d8e72002-09-23 09:36:25 +00001873
sewardj95448072004-11-22 20:19:51 +00001874 if (data) {
1875 tl_assert(!vdata);
1876 tl_assert(isOriginalAtom(mce, data));
1877 tl_assert(bias == 0);
1878 vdata = expr2vbits( mce, data );
1879 } else {
1880 tl_assert(vdata);
1881 }
njn25e49d8e72002-09-23 09:36:25 +00001882
sewardj95448072004-11-22 20:19:51 +00001883 tl_assert(isOriginalAtom(mce,addr));
1884 tl_assert(isShadowAtom(mce,vdata));
njn25e49d8e72002-09-23 09:36:25 +00001885
sewardj95448072004-11-22 20:19:51 +00001886 ty = typeOfIRExpr(mce->bb->tyenv, vdata);
njn25e49d8e72002-09-23 09:36:25 +00001887
sewardj95448072004-11-22 20:19:51 +00001888 /* First, emit a definedness test for the address. This also sets
1889 the address (shadow) to 'defined' following the test. */
1890 complainIfUndefined( mce, addr );
njn25e49d8e72002-09-23 09:36:25 +00001891
sewardj170ee212004-12-10 18:57:51 +00001892 /* Now decide which helper function to call to write the data V
1893 bits into shadow memory. */
sewardj95448072004-11-22 20:19:51 +00001894 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +00001895 case Ity_V128: /* we'll use the helper twice */
sewardj95448072004-11-22 20:19:51 +00001896 case Ity_I64: helper = &MC_(helperc_STOREV8);
1897 hname = "MC_(helperc_STOREV8)";
1898 break;
1899 case Ity_I32: helper = &MC_(helperc_STOREV4);
1900 hname = "MC_(helperc_STOREV4)";
1901 break;
1902 case Ity_I16: helper = &MC_(helperc_STOREV2);
1903 hname = "MC_(helperc_STOREV2)";
1904 break;
1905 case Ity_I8: helper = &MC_(helperc_STOREV1);
1906 hname = "MC_(helperc_STOREV1)";
1907 break;
1908 default: VG_(tool_panic)("memcheck:do_shadow_STle");
1909 }
njn25e49d8e72002-09-23 09:36:25 +00001910
sewardj170ee212004-12-10 18:57:51 +00001911 if (ty == Ity_V128) {
1912
1913 /* 128-bit case */
1914 /* See comment in next clause re 64-bit regparms */
1915 eBias0 = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
1916 addrLo64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias0) );
1917 vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_128to64, vdata));
1918 diLo64 = unsafeIRDirty_0_N(
1919 1/*regparms*/, hname, helper,
1920 mkIRExprVec_2( addrLo64, vdataLo64 ));
1921
1922 eBias8 = tyAddr==Ity_I32 ? mkU32(bias+8) : mkU64(bias+8);
1923 addrHi64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias8) );
1924 vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_128HIto64, vdata));
1925 diHi64 = unsafeIRDirty_0_N(
1926 1/*regparms*/, hname, helper,
1927 mkIRExprVec_2( addrHi64, vdataHi64 ));
1928
1929 setHelperAnns( mce, diLo64 );
1930 setHelperAnns( mce, diHi64 );
1931 stmt( mce->bb, IRStmt_Dirty(diLo64) );
1932 stmt( mce->bb, IRStmt_Dirty(diHi64) );
1933
sewardj95448072004-11-22 20:19:51 +00001934 } else {
sewardj170ee212004-12-10 18:57:51 +00001935
1936 /* 8/16/32/64-bit cases */
1937 /* Generate the actual address into addrAct. */
1938 if (bias == 0) {
1939 addrAct = addr;
1940 } else {
1941 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
1942 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
1943 }
1944
1945 if (ty == Ity_I64) {
1946 /* We can't do this with regparm 2 on 32-bit platforms, since
1947 the back ends aren't clever enough to handle 64-bit
1948 regparm args. Therefore be different. */
1949 di = unsafeIRDirty_0_N(
1950 1/*regparms*/, hname, helper,
1951 mkIRExprVec_2( addrAct, vdata ));
1952 } else {
1953 di = unsafeIRDirty_0_N(
1954 2/*regparms*/, hname, helper,
1955 mkIRExprVec_2( addrAct,
1956 zwidenToHostWord( mce, vdata )));
1957 }
1958 setHelperAnns( mce, di );
1959 stmt( mce->bb, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00001960 }
njn25e49d8e72002-09-23 09:36:25 +00001961
sewardj95448072004-11-22 20:19:51 +00001962}
njn25e49d8e72002-09-23 09:36:25 +00001963
njn25e49d8e72002-09-23 09:36:25 +00001964
sewardj95448072004-11-22 20:19:51 +00001965/* Do lazy pessimistic propagation through a dirty helper call, by
1966 looking at the annotations on it. This is the most complex part of
1967 Memcheck. */
njn25e49d8e72002-09-23 09:36:25 +00001968
sewardj95448072004-11-22 20:19:51 +00001969static IRType szToITy ( Int n )
1970{
1971 switch (n) {
1972 case 1: return Ity_I8;
1973 case 2: return Ity_I16;
1974 case 4: return Ity_I32;
1975 case 8: return Ity_I64;
1976 default: VG_(tool_panic)("szToITy(memcheck)");
1977 }
1978}
njn25e49d8e72002-09-23 09:36:25 +00001979
sewardj95448072004-11-22 20:19:51 +00001980static
1981void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
1982{
sewardje9e16d32004-12-10 13:17:55 +00001983 Int i, n, offset, toDo, gSz, gOff;
sewardj7cf97ee2004-11-28 14:25:01 +00001984 IRAtom *src, *here, *curr;
sewardj95448072004-11-22 20:19:51 +00001985 IRType tyAddr, tySrc, tyDst;
1986 IRTemp dst;
njn25e49d8e72002-09-23 09:36:25 +00001987
sewardj95448072004-11-22 20:19:51 +00001988 /* First check the guard. */
1989 complainIfUndefined(mce, d->guard);
1990
1991 /* Now round up all inputs and PCast over them. */
sewardj7cf97ee2004-11-28 14:25:01 +00001992 curr = definedOfType(Ity_I32);
sewardj95448072004-11-22 20:19:51 +00001993
1994 /* Inputs: unmasked args */
1995 for (i = 0; d->args[i]; i++) {
1996 if (d->cee->mcx_mask & (1<<i)) {
1997 /* ignore this arg */
njn25e49d8e72002-09-23 09:36:25 +00001998 } else {
sewardj95448072004-11-22 20:19:51 +00001999 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
2000 curr = mkUifU32(mce, here, curr);
njn25e49d8e72002-09-23 09:36:25 +00002001 }
2002 }
sewardj95448072004-11-22 20:19:51 +00002003
2004 /* Inputs: guest state that we read. */
2005 for (i = 0; i < d->nFxState; i++) {
2006 tl_assert(d->fxState[i].fx != Ifx_None);
2007 if (d->fxState[i].fx == Ifx_Write)
2008 continue;
sewardja7203252004-11-26 19:17:47 +00002009
2010 /* Ignore any sections marked as 'always defined'. */
2011 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
sewardje9e16d32004-12-10 13:17:55 +00002012 if (0)
sewardja7203252004-11-26 19:17:47 +00002013 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
2014 d->fxState[i].offset, d->fxState[i].size );
2015 continue;
2016 }
2017
sewardj95448072004-11-22 20:19:51 +00002018 /* This state element is read or modified. So we need to
sewardje9e16d32004-12-10 13:17:55 +00002019 consider it. If larger than 8 bytes, deal with it in 8-byte
2020 chunks. */
2021 gSz = d->fxState[i].size;
2022 gOff = d->fxState[i].offset;
2023 tl_assert(gSz > 0);
2024 while (True) {
2025 if (gSz == 0) break;
2026 n = gSz <= 8 ? gSz : 8;
2027 /* update 'curr' with UifU of the state slice
2028 gOff .. gOff+n-1 */
2029 tySrc = szToITy( n );
2030 src = assignNew( mce, tySrc,
2031 shadow_GET(mce, gOff, tySrc ) );
2032 here = mkPCastTo( mce, Ity_I32, src );
2033 curr = mkUifU32(mce, here, curr);
2034 gSz -= n;
2035 gOff += n;
2036 }
2037
sewardj95448072004-11-22 20:19:51 +00002038 }
2039
2040 /* Inputs: memory. First set up some info needed regardless of
2041 whether we're doing reads or writes. */
2042 tyAddr = Ity_INVALID;
2043
2044 if (d->mFx != Ifx_None) {
2045 /* Because we may do multiple shadow loads/stores from the same
2046 base address, it's best to do a single test of its
2047 definedness right now. Post-instrumentation optimisation
2048 should remove all but this test. */
2049 tl_assert(d->mAddr);
2050 complainIfUndefined(mce, d->mAddr);
2051
2052 tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr);
2053 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2054 tl_assert(tyAddr == mce->hWordTy); /* not really right */
2055 }
2056
2057 /* Deal with memory inputs (reads or modifies) */
2058 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2059 offset = 0;
2060 toDo = d->mSize;
2061 /* chew off 32-bit chunks */
2062 while (toDo >= 4) {
2063 here = mkPCastTo(
2064 mce, Ity_I32,
2065 expr2vbits_LDle ( mce, Ity_I32,
2066 d->mAddr, d->mSize - toDo )
2067 );
2068 curr = mkUifU32(mce, here, curr);
2069 toDo -= 4;
2070 }
2071 /* chew off 16-bit chunks */
2072 while (toDo >= 2) {
2073 here = mkPCastTo(
2074 mce, Ity_I32,
2075 expr2vbits_LDle ( mce, Ity_I16,
2076 d->mAddr, d->mSize - toDo )
2077 );
2078 curr = mkUifU32(mce, here, curr);
2079 toDo -= 2;
2080 }
2081 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2082 }
2083
2084 /* Whew! So curr is a 32-bit V-value summarising pessimistically
2085 all the inputs to the helper. Now we need to re-distribute the
2086 results to all destinations. */
2087
2088 /* Outputs: the destination temporary, if there is one. */
2089 if (d->tmp != IRTemp_INVALID) {
2090 dst = findShadowTmp(mce, d->tmp);
2091 tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp);
2092 assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) );
2093 }
2094
2095 /* Outputs: guest state that we write or modify. */
2096 for (i = 0; i < d->nFxState; i++) {
2097 tl_assert(d->fxState[i].fx != Ifx_None);
2098 if (d->fxState[i].fx == Ifx_Read)
2099 continue;
sewardja7203252004-11-26 19:17:47 +00002100 /* Ignore any sections marked as 'always defined'. */
2101 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
2102 continue;
sewardje9e16d32004-12-10 13:17:55 +00002103 /* This state element is written or modified. So we need to
2104 consider it. If larger than 8 bytes, deal with it in 8-byte
2105 chunks. */
2106 gSz = d->fxState[i].size;
2107 gOff = d->fxState[i].offset;
2108 tl_assert(gSz > 0);
2109 while (True) {
2110 if (gSz == 0) break;
2111 n = gSz <= 8 ? gSz : 8;
2112 /* Write suitably-casted 'curr' to the state slice
2113 gOff .. gOff+n-1 */
2114 tyDst = szToITy( n );
2115 do_shadow_PUT( mce, gOff,
2116 NULL, /* original atom */
2117 mkPCastTo( mce, tyDst, curr ) );
2118 gSz -= n;
2119 gOff += n;
2120 }
sewardj95448072004-11-22 20:19:51 +00002121 }
2122
2123 /* Outputs: memory that we write or modify. */
2124 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2125 offset = 0;
2126 toDo = d->mSize;
2127 /* chew off 32-bit chunks */
2128 while (toDo >= 4) {
2129 do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2130 NULL, /* original data */
2131 mkPCastTo( mce, Ity_I32, curr ) );
2132 toDo -= 4;
2133 }
2134 /* chew off 16-bit chunks */
2135 while (toDo >= 2) {
2136 do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2137 NULL, /* original data */
2138 mkPCastTo( mce, Ity_I16, curr ) );
2139 toDo -= 2;
2140 }
2141 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2142 }
2143
njn25e49d8e72002-09-23 09:36:25 +00002144}
2145
2146
sewardj95448072004-11-22 20:19:51 +00002147/*------------------------------------------------------------*/
2148/*--- Memcheck main ---*/
2149/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00002150
sewardj95448072004-11-22 20:19:51 +00002151static Bool isBogusAtom ( IRAtom* at )
njn25e49d8e72002-09-23 09:36:25 +00002152{
sewardj95448072004-11-22 20:19:51 +00002153 ULong n = 0;
2154 IRConst* con;
2155 tl_assert(isAtom(at));
2156 if (at->tag == Iex_Tmp)
2157 return False;
2158 tl_assert(at->tag == Iex_Const);
2159 con = at->Iex.Const.con;
2160 switch (con->tag) {
sewardjd5204dc2004-12-31 01:16:11 +00002161 case Ico_U1: return False;
2162 case Ico_U8: n = (ULong)con->Ico.U8; break;
2163 case Ico_U16: n = (ULong)con->Ico.U16; break;
2164 case Ico_U32: n = (ULong)con->Ico.U32; break;
2165 case Ico_U64: n = (ULong)con->Ico.U64; break;
2166 case Ico_F64: return False;
2167 case Ico_F64i: return False;
2168 case Ico_V128: return False;
sewardj95448072004-11-22 20:19:51 +00002169 default: ppIRExpr(at); tl_assert(0);
2170 }
2171 /* VG_(printf)("%llx\n", n); */
2172 return (n == 0xFEFEFEFF
sewardjd5204dc2004-12-31 01:16:11 +00002173 || n == 0x80808080 /*
2174 || n == 0x01010101
2175 || n == 0x01010100*/);
sewardj95448072004-11-22 20:19:51 +00002176}
njn25e49d8e72002-09-23 09:36:25 +00002177
sewardj95448072004-11-22 20:19:51 +00002178static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
2179{
sewardjd5204dc2004-12-31 01:16:11 +00002180 Int i;
2181 IRExpr* e;
2182 IRDirty* d;
sewardj95448072004-11-22 20:19:51 +00002183 switch (st->tag) {
2184 case Ist_Tmp:
2185 e = st->Ist.Tmp.data;
2186 switch (e->tag) {
2187 case Iex_Get:
2188 case Iex_Tmp:
2189 return False;
sewardjd5204dc2004-12-31 01:16:11 +00002190 case Iex_Const:
2191 return isBogusAtom(e);
sewardj95448072004-11-22 20:19:51 +00002192 case Iex_Unop:
2193 return isBogusAtom(e->Iex.Unop.arg);
sewardjd5204dc2004-12-31 01:16:11 +00002194 case Iex_GetI:
2195 return isBogusAtom(e->Iex.GetI.ix);
sewardj95448072004-11-22 20:19:51 +00002196 case Iex_Binop:
2197 return isBogusAtom(e->Iex.Binop.arg1)
2198 || isBogusAtom(e->Iex.Binop.arg2);
2199 case Iex_Mux0X:
2200 return isBogusAtom(e->Iex.Mux0X.cond)
2201 || isBogusAtom(e->Iex.Mux0X.expr0)
2202 || isBogusAtom(e->Iex.Mux0X.exprX);
2203 case Iex_LDle:
2204 return isBogusAtom(e->Iex.LDle.addr);
2205 case Iex_CCall:
2206 for (i = 0; e->Iex.CCall.args[i]; i++)
2207 if (isBogusAtom(e->Iex.CCall.args[i]))
2208 return True;
2209 return False;
2210 default:
2211 goto unhandled;
2212 }
sewardjd5204dc2004-12-31 01:16:11 +00002213 case Ist_Dirty:
2214 d = st->Ist.Dirty.details;
2215 for (i = 0; d->args[i]; i++)
2216 if (isBogusAtom(d->args[i]))
2217 return True;
2218 if (d->guard && isBogusAtom(d->guard))
2219 return True;
2220 if (d->mAddr && isBogusAtom(d->mAddr))
2221 return True;
2222 return False;
sewardj95448072004-11-22 20:19:51 +00002223 case Ist_Put:
2224 return isBogusAtom(st->Ist.Put.data);
sewardjd5204dc2004-12-31 01:16:11 +00002225 case Ist_PutI:
2226 return isBogusAtom(st->Ist.PutI.ix)
2227 || isBogusAtom(st->Ist.PutI.data);
sewardj95448072004-11-22 20:19:51 +00002228 case Ist_STle:
2229 return isBogusAtom(st->Ist.STle.addr)
2230 || isBogusAtom(st->Ist.STle.data);
2231 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00002232 return isBogusAtom(st->Ist.Exit.guard);
sewardj95448072004-11-22 20:19:51 +00002233 default:
2234 unhandled:
2235 ppIRStmt(st);
2236 VG_(tool_panic)("hasBogusLiterals");
2237 }
2238}
njn25e49d8e72002-09-23 09:36:25 +00002239
njn25e49d8e72002-09-23 09:36:25 +00002240
sewardj95448072004-11-22 20:19:51 +00002241IRBB* TL_(instrument) ( IRBB* bb_in, VexGuestLayout* layout, IRType hWordTy )
2242{
2243 Bool verboze = False; //True;
njn25e49d8e72002-09-23 09:36:25 +00002244
sewardjd5204dc2004-12-31 01:16:11 +00002245 Int i, j, first_stmt;
sewardj95448072004-11-22 20:19:51 +00002246 IRStmt* st;
sewardjd5204dc2004-12-31 01:16:11 +00002247 MCEnv mce;
njn25e49d8e72002-09-23 09:36:25 +00002248
sewardj95448072004-11-22 20:19:51 +00002249 /* Set up BB */
2250 IRBB* bb = emptyIRBB();
2251 bb->tyenv = dopyIRTypeEnv(bb_in->tyenv);
2252 bb->next = dopyIRExpr(bb_in->next);
2253 bb->jumpkind = bb_in->jumpkind;
njn25e49d8e72002-09-23 09:36:25 +00002254
sewardj95448072004-11-22 20:19:51 +00002255 /* Set up the running environment. Only .bb is modified as we go
2256 along. */
2257 mce.bb = bb;
2258 mce.layout = layout;
2259 mce.n_originalTmps = bb->tyenv->types_used;
2260 mce.hWordTy = hWordTy;
sewardjd5204dc2004-12-31 01:16:11 +00002261 mce.bogusLiterals = False;
sewardj95448072004-11-22 20:19:51 +00002262 mce.tmpMap = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
2263 for (i = 0; i < mce.n_originalTmps; i++)
2264 mce.tmpMap[i] = IRTemp_INVALID;
2265
2266 /* Iterate over the stmts. */
2267
2268 for (i = 0; i < bb_in->stmts_used; i++) {
2269 st = bb_in->stmts[i];
2270 if (!st) continue;
2271
2272 tl_assert(isFlatIRStmt(st));
2273
sewardjd5204dc2004-12-31 01:16:11 +00002274 if (!mce.bogusLiterals) {
2275 mce.bogusLiterals = checkForBogusLiterals(st);
2276 if (0&& mce.bogusLiterals) {
sewardj95448072004-11-22 20:19:51 +00002277 VG_(printf)("bogus: ");
2278 ppIRStmt(st);
2279 VG_(printf)("\n");
2280 }
2281 }
sewardjd5204dc2004-12-31 01:16:11 +00002282
sewardj95448072004-11-22 20:19:51 +00002283 first_stmt = bb->stmts_used;
2284
2285 if (verboze) {
2286 ppIRStmt(st);
2287 VG_(printf)("\n\n");
2288 }
2289
2290 switch (st->tag) {
2291
2292 case Ist_Tmp:
2293 assign( bb, findShadowTmp(&mce, st->Ist.Tmp.tmp),
2294 expr2vbits( &mce, st->Ist.Tmp.data) );
njn25e49d8e72002-09-23 09:36:25 +00002295 break;
2296
sewardj95448072004-11-22 20:19:51 +00002297 case Ist_Put:
2298 do_shadow_PUT( &mce,
2299 st->Ist.Put.offset,
2300 st->Ist.Put.data,
2301 NULL /* shadow atom */ );
njn25e49d8e72002-09-23 09:36:25 +00002302 break;
2303
sewardj95448072004-11-22 20:19:51 +00002304 case Ist_PutI:
2305 do_shadow_PUTI( &mce,
2306 st->Ist.PutI.descr,
2307 st->Ist.PutI.ix,
2308 st->Ist.PutI.bias,
2309 st->Ist.PutI.data );
njn25e49d8e72002-09-23 09:36:25 +00002310 break;
2311
sewardj95448072004-11-22 20:19:51 +00002312 case Ist_STle:
2313 do_shadow_STle( &mce, st->Ist.STle.addr, 0/* addr bias */,
2314 st->Ist.STle.data,
2315 NULL /* shadow data */ );
njn25e49d8e72002-09-23 09:36:25 +00002316 break;
2317
sewardj95448072004-11-22 20:19:51 +00002318 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00002319 complainIfUndefined( &mce, st->Ist.Exit.guard );
njn25e49d8e72002-09-23 09:36:25 +00002320 break;
2321
sewardj95448072004-11-22 20:19:51 +00002322 case Ist_Dirty:
2323 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
njn25e49d8e72002-09-23 09:36:25 +00002324 break;
2325
2326 default:
sewardj95448072004-11-22 20:19:51 +00002327 VG_(printf)("\n");
2328 ppIRStmt(st);
2329 VG_(printf)("\n");
2330 VG_(tool_panic)("memcheck: unhandled IRStmt");
2331
2332 } /* switch (st->tag) */
2333
2334 if (verboze) {
2335 for (j = first_stmt; j < bb->stmts_used; j++) {
2336 VG_(printf)(" ");
2337 ppIRStmt(bb->stmts[j]);
2338 VG_(printf)("\n");
2339 }
2340 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00002341 }
sewardj95448072004-11-22 20:19:51 +00002342
2343 addStmtToIRBB(bb, st);
2344
njn25e49d8e72002-09-23 09:36:25 +00002345 }
njn25e49d8e72002-09-23 09:36:25 +00002346
sewardj95448072004-11-22 20:19:51 +00002347 /* Now we need to complain if the jump target is undefined. */
2348 first_stmt = bb->stmts_used;
njn25e49d8e72002-09-23 09:36:25 +00002349
sewardj95448072004-11-22 20:19:51 +00002350 if (verboze) {
2351 VG_(printf)("bb->next = ");
2352 ppIRExpr(bb->next);
2353 VG_(printf)("\n\n");
2354 }
njn25e49d8e72002-09-23 09:36:25 +00002355
sewardj95448072004-11-22 20:19:51 +00002356 complainIfUndefined( &mce, bb->next );
njn25e49d8e72002-09-23 09:36:25 +00002357
sewardj95448072004-11-22 20:19:51 +00002358 if (verboze) {
2359 for (j = first_stmt; j < bb->stmts_used; j++) {
2360 VG_(printf)(" ");
2361 ppIRStmt(bb->stmts[j]);
2362 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00002363 }
sewardj95448072004-11-22 20:19:51 +00002364 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00002365 }
njn25e49d8e72002-09-23 09:36:25 +00002366
sewardj95448072004-11-22 20:19:51 +00002367 return bb;
2368}
njn25e49d8e72002-09-23 09:36:25 +00002369
2370/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00002371/*--- end mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00002372/*--------------------------------------------------------------------*/