blob: 04b23f23959280bab9cee4b0a2c046c20e39feed [file] [log] [blame]
nethercotebb1c9912004-01-04 16:43:23 +00001
njn25e49d8e72002-09-23 09:36:25 +00002/*--------------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003/*--- Instrument IR to perform memory checking operations. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00006
njn25e49d8e72002-09-23 09:36:25 +00007/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of MemCheck, a heavyweight Valgrind tool for
njnc9539842002-10-02 13:26:35 +00009 detecting memory errors.
njn25e49d8e72002-09-23 09:36:25 +000010
sewardj9eecbbb2010-05-03 21:37:12 +000011 Copyright (C) 2000-2010 Julian Seward
njn25e49d8e72002-09-23 09:36:25 +000012 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
njnc7561b92005-06-19 01:24:32 +000032#include "pub_tool_basics.h"
njn1d0825f2006-03-27 11:37:07 +000033#include "pub_tool_hashtable.h" // For mc_include.h
njn132bfcc2005-06-04 19:16:06 +000034#include "pub_tool_libcassert.h"
njn36a20fa2005-06-03 03:08:39 +000035#include "pub_tool_libcprint.h"
njnc7561b92005-06-19 01:24:32 +000036#include "pub_tool_tooliface.h"
sewardj53ee1fc2005-12-23 02:29:58 +000037#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
sewardj81651dc2007-08-28 06:05:20 +000038#include "pub_tool_xarray.h"
39#include "pub_tool_mallocfree.h"
40#include "pub_tool_libcbase.h"
njn25e49d8e72002-09-23 09:36:25 +000041
sewardj7cf4e6b2008-05-01 20:24:26 +000042#include "mc_include.h"
43
44
sewardj7ee7d852011-06-16 11:37:21 +000045/* FIXMEs JRS 2011-June-16.
46
47 Check the interpretation for vector narrowing and widening ops,
48 particularly the saturating ones. I suspect they are either overly
49 pessimistic and/or wrong.
50*/
51
sewardj992dff92005-10-07 11:08:55 +000052/* This file implements the Memcheck instrumentation, and in
53 particular contains the core of its undefined value detection
54 machinery. For a comprehensive background of the terminology,
55 algorithms and rationale used herein, read:
56
57 Using Valgrind to detect undefined value errors with
58 bit-precision
59
60 Julian Seward and Nicholas Nethercote
61
62 2005 USENIX Annual Technical Conference (General Track),
63 Anaheim, CA, USA, April 10-15, 2005.
njn6665ea22007-05-24 23:14:41 +000064
65 ----
66
67 Here is as good a place as any to record exactly when V bits are and
68 should be checked, why, and what function is responsible.
69
70
71 Memcheck complains when an undefined value is used:
72
73 1. In the condition of a conditional branch. Because it could cause
74 incorrect control flow, and thus cause incorrect externally-visible
75 behaviour. [mc_translate.c:complainIfUndefined]
76
77 2. As an argument to a system call, or as the value that specifies
78 the system call number. Because it could cause an incorrect
79 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
80
81 3. As the address in a load or store. Because it could cause an
82 incorrect value to be used later, which could cause externally-visible
83 behaviour (eg. via incorrect control flow or an incorrect system call
84 argument) [complainIfUndefined]
85
86 4. As the target address of a branch. Because it could cause incorrect
87 control flow. [complainIfUndefined]
88
89 5. As an argument to setenv, unsetenv, or putenv. Because it could put
90 an incorrect value into the external environment.
91 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
92
93 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
94 [complainIfUndefined]
95
96 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
97 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
98 requested it. [in memcheck.h]
99
100
101 Memcheck also complains, but should not, when an undefined value is used:
102
103 8. As the shift value in certain SIMD shift operations (but not in the
104 standard integer shift operations). This inconsistency is due to
105 historical reasons.) [complainIfUndefined]
106
107
108 Memcheck does not complain, but should, when an undefined value is used:
109
110 9. As an input to a client request. Because the client request may
111 affect the visible behaviour -- see bug #144362 for an example
112 involving the malloc replacements in vg_replace_malloc.c and
113 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
114 isn't identified. That bug report also has some info on how to solve
115 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
116
117
118 In practice, 1 and 2 account for the vast majority of cases.
sewardj992dff92005-10-07 11:08:55 +0000119*/
120
sewardj95448072004-11-22 20:19:51 +0000121/*------------------------------------------------------------*/
122/*--- Forward decls ---*/
123/*------------------------------------------------------------*/
124
125struct _MCEnv;
126
sewardj7cf4e6b2008-05-01 20:24:26 +0000127static IRType shadowTypeV ( IRType ty );
sewardj95448072004-11-22 20:19:51 +0000128static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
sewardjafa617b2008-07-22 09:59:48 +0000129static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
sewardj95448072004-11-22 20:19:51 +0000130
sewardjb5b87402011-03-07 16:05:35 +0000131static IRExpr *i128_const_zero(void);
sewardj95448072004-11-22 20:19:51 +0000132
133/*------------------------------------------------------------*/
134/*--- Memcheck running state, and tmp management. ---*/
135/*------------------------------------------------------------*/
136
sewardj1c0ce7a2009-07-01 08:10:49 +0000137/* Carries info about a particular tmp. The tmp's number is not
138 recorded, as this is implied by (equal to) its index in the tmpMap
139 in MCEnv. The tmp's type is also not recorded, as this is present
140 in MCEnv.sb->tyenv.
141
142 When .kind is Orig, .shadowV and .shadowB may give the identities
143 of the temps currently holding the associated definedness (shadowV)
144 and origin (shadowB) values, or these may be IRTemp_INVALID if code
145 to compute such values has not yet been emitted.
146
147 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
148 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
149 illogical for a shadow tmp itself to be shadowed.
150*/
151typedef
152 enum { Orig=1, VSh=2, BSh=3 }
153 TempKind;
154
155typedef
156 struct {
157 TempKind kind;
158 IRTemp shadowV;
159 IRTemp shadowB;
160 }
161 TempMapEnt;
162
163
sewardj95448072004-11-22 20:19:51 +0000164/* Carries around state during memcheck instrumentation. */
165typedef
166 struct _MCEnv {
sewardj0b9d74a2006-12-24 02:24:11 +0000167 /* MODIFIED: the superblock being constructed. IRStmts are
168 added. */
sewardj1c0ce7a2009-07-01 08:10:49 +0000169 IRSB* sb;
sewardj7cf4e6b2008-05-01 20:24:26 +0000170 Bool trace;
sewardj95448072004-11-22 20:19:51 +0000171
sewardj1c0ce7a2009-07-01 08:10:49 +0000172 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
173 current kind and possibly shadow temps for each temp in the
174 IRSB being constructed. Note that it does not contain the
175 type of each tmp. If you want to know the type, look at the
176 relevant entry in sb->tyenv. It follows that at all times
177 during the instrumentation process, the valid indices for
178 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
179 total number of Orig, V- and B- temps allocated so far.
180
181 The reason for this strange split (types in one place, all
182 other info in another) is that we need the types to be
183 attached to sb so as to make it possible to do
184 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
185 instrumentation process. */
186 XArray* /* of TempMapEnt */ tmpMap;
sewardj95448072004-11-22 20:19:51 +0000187
sewardjd5204dc2004-12-31 01:16:11 +0000188 /* MODIFIED: indicates whether "bogus" literals have so far been
189 found. Starts off False, and may change to True. */
190 Bool bogusLiterals;
191
sewardj95448072004-11-22 20:19:51 +0000192 /* READONLY: the guest layout. This indicates which parts of
193 the guest state should be regarded as 'always defined'. */
194 VexGuestLayout* layout;
sewardj634ba772006-10-15 12:47:37 +0000195
sewardj95448072004-11-22 20:19:51 +0000196 /* READONLY: the host word type. Needed for constructing
197 arguments of type 'HWord' to be passed to helper functions.
198 Ity_I32 or Ity_I64 only. */
199 IRType hWordTy;
200 }
201 MCEnv;
202
203/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
204 demand), as they are encountered. This is for two reasons.
205
206 (1) (less important reason): Many original tmps are unused due to
207 initial IR optimisation, and we do not want to spaces in tables
208 tracking them.
209
210 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
211 table indexed [0 .. n_types-1], which gives the current shadow for
212 each original tmp, or INVALID_IRTEMP if none is so far assigned.
213 It is necessary to support making multiple assignments to a shadow
214 -- specifically, after testing a shadow for definedness, it needs
215 to be made defined. But IR's SSA property disallows this.
216
217 (2) (more important reason): Therefore, when a shadow needs to get
218 a new value, a new temporary is created, the value is assigned to
219 that, and the tmpMap is updated to reflect the new binding.
220
221 A corollary is that if the tmpMap maps a given tmp to
sewardjf1962d32006-10-19 13:22:16 +0000222 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
sewardj95448072004-11-22 20:19:51 +0000223 there's a read-before-write error in the original tmps. The IR
224 sanity checker should catch all such anomalies, however.
njn25e49d8e72002-09-23 09:36:25 +0000225*/
sewardj95448072004-11-22 20:19:51 +0000226
sewardj1c0ce7a2009-07-01 08:10:49 +0000227/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
228 both the table in mce->sb and to our auxiliary mapping. Note that
229 newTemp may cause mce->tmpMap to resize, hence previous results
230 from VG_(indexXA)(mce->tmpMap) are invalidated. */
231static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
232{
233 Word newIx;
234 TempMapEnt ent;
235 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
236 ent.kind = kind;
237 ent.shadowV = IRTemp_INVALID;
238 ent.shadowB = IRTemp_INVALID;
239 newIx = VG_(addToXA)( mce->tmpMap, &ent );
240 tl_assert(newIx == (Word)tmp);
241 return tmp;
242}
243
244
sewardj95448072004-11-22 20:19:51 +0000245/* Find the tmp currently shadowing the given original tmp. If none
246 so far exists, allocate one. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000247static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000248{
sewardj1c0ce7a2009-07-01 08:10:49 +0000249 TempMapEnt* ent;
250 /* VG_(indexXA) range-checks 'orig', hence no need to check
251 here. */
252 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
253 tl_assert(ent->kind == Orig);
254 if (ent->shadowV == IRTemp_INVALID) {
255 IRTemp tmpV
256 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
257 /* newTemp may cause mce->tmpMap to resize, hence previous results
258 from VG_(indexXA) are invalid. */
259 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
260 tl_assert(ent->kind == Orig);
261 tl_assert(ent->shadowV == IRTemp_INVALID);
262 ent->shadowV = tmpV;
njn25e49d8e72002-09-23 09:36:25 +0000263 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000264 return ent->shadowV;
njn25e49d8e72002-09-23 09:36:25 +0000265}
266
sewardj95448072004-11-22 20:19:51 +0000267/* Allocate a new shadow for the given original tmp. This means any
268 previous shadow is abandoned. This is needed because it is
269 necessary to give a new value to a shadow once it has been tested
270 for undefinedness, but unfortunately IR's SSA property disallows
271 this. Instead we must abandon the old shadow, allocate a new one
sewardj1c0ce7a2009-07-01 08:10:49 +0000272 and use that instead.
273
274 This is the same as findShadowTmpV, except we don't bother to see
275 if a shadow temp already existed -- we simply allocate a new one
276 regardless. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000277static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000278{
sewardj1c0ce7a2009-07-01 08:10:49 +0000279 TempMapEnt* ent;
280 /* VG_(indexXA) range-checks 'orig', hence no need to check
281 here. */
282 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
283 tl_assert(ent->kind == Orig);
284 if (1) {
285 IRTemp tmpV
286 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
287 /* newTemp may cause mce->tmpMap to resize, hence previous results
288 from VG_(indexXA) are invalid. */
289 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
290 tl_assert(ent->kind == Orig);
291 ent->shadowV = tmpV;
292 }
sewardj95448072004-11-22 20:19:51 +0000293}
294
295
296/*------------------------------------------------------------*/
297/*--- IRAtoms -- a subset of IRExprs ---*/
298/*------------------------------------------------------------*/
299
300/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
sewardj710d6c22005-03-20 18:55:15 +0000301 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
sewardj95448072004-11-22 20:19:51 +0000302 input, most of this code deals in atoms. Usefully, a value atom
303 always has a V-value which is also an atom: constants are shadowed
304 by constants, and temps are shadowed by the corresponding shadow
305 temporary. */
306
307typedef IRExpr IRAtom;
308
309/* (used for sanity checks only): is this an atom which looks
310 like it's from original code? */
311static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
312{
313 if (a1->tag == Iex_Const)
314 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000315 if (a1->tag == Iex_RdTmp) {
316 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
317 return ent->kind == Orig;
318 }
sewardj95448072004-11-22 20:19:51 +0000319 return False;
320}
321
322/* (used for sanity checks only): is this an atom which looks
323 like it's from shadow code? */
324static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
325{
326 if (a1->tag == Iex_Const)
327 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000328 if (a1->tag == Iex_RdTmp) {
329 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
330 return ent->kind == VSh || ent->kind == BSh;
331 }
sewardj95448072004-11-22 20:19:51 +0000332 return False;
333}
334
335/* (used for sanity checks only): check that both args are atoms and
336 are identically-kinded. */
337static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
338{
sewardj0b9d74a2006-12-24 02:24:11 +0000339 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +0000340 return True;
sewardjbef552a2005-08-30 12:54:36 +0000341 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
sewardj95448072004-11-22 20:19:51 +0000342 return True;
343 return False;
344}
345
346
347/*------------------------------------------------------------*/
348/*--- Type management ---*/
349/*------------------------------------------------------------*/
350
351/* Shadow state is always accessed using integer types. This returns
352 an integer type with the same size (as per sizeofIRType) as the
353 given type. The only valid shadow types are Bit, I8, I16, I32,
sewardjb5b87402011-03-07 16:05:35 +0000354 I64, I128, V128. */
sewardj95448072004-11-22 20:19:51 +0000355
sewardj7cf4e6b2008-05-01 20:24:26 +0000356static IRType shadowTypeV ( IRType ty )
sewardj95448072004-11-22 20:19:51 +0000357{
358 switch (ty) {
359 case Ity_I1:
360 case Ity_I8:
361 case Ity_I16:
362 case Ity_I32:
sewardj6cf40ff2005-04-20 22:31:26 +0000363 case Ity_I64:
364 case Ity_I128: return ty;
sewardj3245c912004-12-10 14:58:26 +0000365 case Ity_F32: return Ity_I32;
366 case Ity_F64: return Ity_I64;
sewardjb5b87402011-03-07 16:05:35 +0000367 case Ity_F128: return Ity_I128;
sewardj3245c912004-12-10 14:58:26 +0000368 case Ity_V128: return Ity_V128;
sewardj95448072004-11-22 20:19:51 +0000369 default: ppIRType(ty);
sewardj7cf4e6b2008-05-01 20:24:26 +0000370 VG_(tool_panic)("memcheck:shadowTypeV");
sewardj95448072004-11-22 20:19:51 +0000371 }
372}
373
374/* Produce a 'defined' value of the given shadow type. Should only be
375 supplied shadow types (Bit/I8/I16/I32/UI64). */
376static IRExpr* definedOfType ( IRType ty ) {
377 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +0000378 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
379 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
380 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
381 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
382 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
sewardjb5b87402011-03-07 16:05:35 +0000383 case Ity_I128: return i128_const_zero();
sewardj170ee212004-12-10 18:57:51 +0000384 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
sewardjf1962d32006-10-19 13:22:16 +0000385 default: VG_(tool_panic)("memcheck:definedOfType");
njn25e49d8e72002-09-23 09:36:25 +0000386 }
387}
388
389
sewardj95448072004-11-22 20:19:51 +0000390/*------------------------------------------------------------*/
391/*--- Constructing IR fragments ---*/
392/*------------------------------------------------------------*/
393
sewardj95448072004-11-22 20:19:51 +0000394/* add stmt to a bb */
sewardj7cf4e6b2008-05-01 20:24:26 +0000395static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
396 if (mce->trace) {
397 VG_(printf)(" %c: ", cat);
398 ppIRStmt(st);
399 VG_(printf)("\n");
400 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000401 addStmtToIRSB(mce->sb, st);
sewardj7cf4e6b2008-05-01 20:24:26 +0000402}
403
404/* assign value to tmp */
405static inline
406void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
sewardj1c0ce7a2009-07-01 08:10:49 +0000407 stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
sewardj7cf4e6b2008-05-01 20:24:26 +0000408}
sewardj95448072004-11-22 20:19:51 +0000409
410/* build various kinds of expressions */
sewardj57f92b02010-08-22 11:54:14 +0000411#define triop(_op, _arg1, _arg2, _arg3) \
412 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
sewardj95448072004-11-22 20:19:51 +0000413#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
414#define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
415#define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
416#define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
417#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
418#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
sewardj170ee212004-12-10 18:57:51 +0000419#define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
sewardj0b9d74a2006-12-24 02:24:11 +0000420#define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
sewardj95448072004-11-22 20:19:51 +0000421
sewardj7cf4e6b2008-05-01 20:24:26 +0000422/* Bind the given expression to a new temporary, and return the
sewardj95448072004-11-22 20:19:51 +0000423 temporary. This effectively converts an arbitrary expression into
sewardj7cf4e6b2008-05-01 20:24:26 +0000424 an atom.
425
426 'ty' is the type of 'e' and hence the type that the new temporary
sewardj1c0ce7a2009-07-01 08:10:49 +0000427 needs to be. But passing it in is redundant, since we can deduce
428 the type merely by inspecting 'e'. So at least use that fact to
429 assert that the two types agree. */
430static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
431{
432 TempKind k;
433 IRTemp t;
434 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +0000435 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
sewardj1c0ce7a2009-07-01 08:10:49 +0000436 switch (cat) {
437 case 'V': k = VSh; break;
438 case 'B': k = BSh; break;
439 case 'C': k = Orig; break;
440 /* happens when we are making up new "orig"
441 expressions, for IRCAS handling */
442 default: tl_assert(0);
443 }
444 t = newTemp(mce, ty, k);
sewardj7cf4e6b2008-05-01 20:24:26 +0000445 assign(cat, mce, t, e);
sewardj95448072004-11-22 20:19:51 +0000446 return mkexpr(t);
447}
448
449
450/*------------------------------------------------------------*/
sewardjb5b87402011-03-07 16:05:35 +0000451/*--- Helper functions for 128-bit ops ---*/
452/*------------------------------------------------------------*/
453static IRExpr *i128_const_zero(void)
454{
455 return binop(Iop_64HLto128, IRExpr_Const(IRConst_U64(0)),
456 IRExpr_Const(IRConst_U64(0)));
457}
458
459/* There are no 128-bit loads and/or stores. So we do not need to worry
460 about that in expr2vbits_Load */
461
462/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +0000463/*--- Constructing definedness primitive ops ---*/
464/*------------------------------------------------------------*/
465
466/* --------- Defined-if-either-defined --------- */
467
468static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
469 tl_assert(isShadowAtom(mce,a1));
470 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000471 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000472}
473
474static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
475 tl_assert(isShadowAtom(mce,a1));
476 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000477 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000478}
479
480static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
481 tl_assert(isShadowAtom(mce,a1));
482 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000483 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000484}
485
sewardj7010f6e2004-12-10 13:35:22 +0000486static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
487 tl_assert(isShadowAtom(mce,a1));
488 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000489 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
sewardj7010f6e2004-12-10 13:35:22 +0000490}
491
sewardj20d38f22005-02-07 23:50:18 +0000492static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj170ee212004-12-10 18:57:51 +0000493 tl_assert(isShadowAtom(mce,a1));
494 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000495 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
sewardj170ee212004-12-10 18:57:51 +0000496}
497
sewardj95448072004-11-22 20:19:51 +0000498/* --------- Undefined-if-either-undefined --------- */
499
500static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
501 tl_assert(isShadowAtom(mce,a1));
502 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000503 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000504}
505
506static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
507 tl_assert(isShadowAtom(mce,a1));
508 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000509 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000510}
511
512static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
513 tl_assert(isShadowAtom(mce,a1));
514 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000515 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000516}
517
518static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
519 tl_assert(isShadowAtom(mce,a1));
520 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000521 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000522}
523
sewardjb5b87402011-03-07 16:05:35 +0000524static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
525 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
526 tl_assert(isShadowAtom(mce,a1));
527 tl_assert(isShadowAtom(mce,a2));
528 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
529 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
530 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
531 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
532 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
533 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
534
535 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
536}
537
sewardj20d38f22005-02-07 23:50:18 +0000538static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj3245c912004-12-10 14:58:26 +0000539 tl_assert(isShadowAtom(mce,a1));
540 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000541 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
sewardj3245c912004-12-10 14:58:26 +0000542}
543
sewardje50a1b12004-12-17 01:24:54 +0000544static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
sewardj95448072004-11-22 20:19:51 +0000545 switch (vty) {
sewardje50a1b12004-12-17 01:24:54 +0000546 case Ity_I8: return mkUifU8(mce, a1, a2);
sewardja1d93302004-12-12 16:45:06 +0000547 case Ity_I16: return mkUifU16(mce, a1, a2);
548 case Ity_I32: return mkUifU32(mce, a1, a2);
549 case Ity_I64: return mkUifU64(mce, a1, a2);
sewardjb5b87402011-03-07 16:05:35 +0000550 case Ity_I128: return mkUifU128(mce, a1, a2);
sewardj20d38f22005-02-07 23:50:18 +0000551 case Ity_V128: return mkUifUV128(mce, a1, a2);
sewardj95448072004-11-22 20:19:51 +0000552 default:
553 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
554 VG_(tool_panic)("memcheck:mkUifU");
njn25e49d8e72002-09-23 09:36:25 +0000555 }
556}
557
sewardj95448072004-11-22 20:19:51 +0000558/* --------- The Left-family of operations. --------- */
njn25e49d8e72002-09-23 09:36:25 +0000559
sewardj95448072004-11-22 20:19:51 +0000560static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
561 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000562 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
sewardj95448072004-11-22 20:19:51 +0000563}
564
565static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
566 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000567 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
sewardj95448072004-11-22 20:19:51 +0000568}
569
570static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
571 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000572 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
sewardj95448072004-11-22 20:19:51 +0000573}
574
sewardj681be302005-01-15 20:43:58 +0000575static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
576 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000577 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
sewardj681be302005-01-15 20:43:58 +0000578}
579
sewardj95448072004-11-22 20:19:51 +0000580/* --------- 'Improvement' functions for AND/OR. --------- */
581
582/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
583 defined (0); all other -> undefined (1).
584*/
585static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
njn25e49d8e72002-09-23 09:36:25 +0000586{
sewardj95448072004-11-22 20:19:51 +0000587 tl_assert(isOriginalAtom(mce, data));
588 tl_assert(isShadowAtom(mce, vbits));
589 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000590 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000591}
njn25e49d8e72002-09-23 09:36:25 +0000592
sewardj95448072004-11-22 20:19:51 +0000593static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
594{
595 tl_assert(isOriginalAtom(mce, data));
596 tl_assert(isShadowAtom(mce, vbits));
597 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000598 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000599}
njn25e49d8e72002-09-23 09:36:25 +0000600
sewardj95448072004-11-22 20:19:51 +0000601static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
602{
603 tl_assert(isOriginalAtom(mce, data));
604 tl_assert(isShadowAtom(mce, vbits));
605 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000606 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000607}
njn25e49d8e72002-09-23 09:36:25 +0000608
sewardj7010f6e2004-12-10 13:35:22 +0000609static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
610{
611 tl_assert(isOriginalAtom(mce, data));
612 tl_assert(isShadowAtom(mce, vbits));
613 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000614 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
sewardj7010f6e2004-12-10 13:35:22 +0000615}
616
sewardj20d38f22005-02-07 23:50:18 +0000617static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000618{
619 tl_assert(isOriginalAtom(mce, data));
620 tl_assert(isShadowAtom(mce, vbits));
621 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000622 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
sewardj170ee212004-12-10 18:57:51 +0000623}
624
sewardj95448072004-11-22 20:19:51 +0000625/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
626 defined (0); all other -> undefined (1).
627*/
628static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
629{
630 tl_assert(isOriginalAtom(mce, data));
631 tl_assert(isShadowAtom(mce, vbits));
632 tl_assert(sameKindedAtoms(data, vbits));
633 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000634 'V', mce, Ity_I8,
sewardj95448072004-11-22 20:19:51 +0000635 binop(Iop_Or8,
sewardj7cf4e6b2008-05-01 20:24:26 +0000636 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
sewardj95448072004-11-22 20:19:51 +0000637 vbits) );
638}
njn25e49d8e72002-09-23 09:36:25 +0000639
sewardj95448072004-11-22 20:19:51 +0000640static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
641{
642 tl_assert(isOriginalAtom(mce, data));
643 tl_assert(isShadowAtom(mce, vbits));
644 tl_assert(sameKindedAtoms(data, vbits));
645 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000646 'V', mce, Ity_I16,
sewardj95448072004-11-22 20:19:51 +0000647 binop(Iop_Or16,
sewardj7cf4e6b2008-05-01 20:24:26 +0000648 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
sewardj95448072004-11-22 20:19:51 +0000649 vbits) );
650}
njn25e49d8e72002-09-23 09:36:25 +0000651
sewardj95448072004-11-22 20:19:51 +0000652static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
653{
654 tl_assert(isOriginalAtom(mce, data));
655 tl_assert(isShadowAtom(mce, vbits));
656 tl_assert(sameKindedAtoms(data, vbits));
657 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000658 'V', mce, Ity_I32,
sewardj95448072004-11-22 20:19:51 +0000659 binop(Iop_Or32,
sewardj7cf4e6b2008-05-01 20:24:26 +0000660 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
sewardj95448072004-11-22 20:19:51 +0000661 vbits) );
662}
663
sewardj7010f6e2004-12-10 13:35:22 +0000664static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
665{
666 tl_assert(isOriginalAtom(mce, data));
667 tl_assert(isShadowAtom(mce, vbits));
668 tl_assert(sameKindedAtoms(data, vbits));
669 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000670 'V', mce, Ity_I64,
sewardj7010f6e2004-12-10 13:35:22 +0000671 binop(Iop_Or64,
sewardj7cf4e6b2008-05-01 20:24:26 +0000672 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
sewardj7010f6e2004-12-10 13:35:22 +0000673 vbits) );
674}
675
sewardj20d38f22005-02-07 23:50:18 +0000676static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000677{
678 tl_assert(isOriginalAtom(mce, data));
679 tl_assert(isShadowAtom(mce, vbits));
680 tl_assert(sameKindedAtoms(data, vbits));
681 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000682 'V', mce, Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +0000683 binop(Iop_OrV128,
sewardj7cf4e6b2008-05-01 20:24:26 +0000684 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
sewardj170ee212004-12-10 18:57:51 +0000685 vbits) );
686}
687
sewardj95448072004-11-22 20:19:51 +0000688/* --------- Pessimising casts. --------- */
689
sewardjb5b87402011-03-07 16:05:35 +0000690/* The function returns an expression of type DST_TY. If any of the VBITS
691 is undefined (value == 1) the resulting expression has all bits set to
692 1. Otherwise, all bits are 0. */
693
sewardj95448072004-11-22 20:19:51 +0000694static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
695{
sewardj4cc684b2007-08-25 23:09:36 +0000696 IRType src_ty;
sewardj7cf97ee2004-11-28 14:25:01 +0000697 IRAtom* tmp1;
sewardj95448072004-11-22 20:19:51 +0000698 /* Note, dst_ty is a shadow type, not an original type. */
699 /* First of all, collapse vbits down to a single bit. */
700 tl_assert(isShadowAtom(mce,vbits));
sewardj1c0ce7a2009-07-01 08:10:49 +0000701 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
sewardj4cc684b2007-08-25 23:09:36 +0000702
703 /* Fast-track some common cases */
704 if (src_ty == Ity_I32 && dst_ty == Ity_I32)
sewardj7cf4e6b2008-05-01 20:24:26 +0000705 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000706
707 if (src_ty == Ity_I64 && dst_ty == Ity_I64)
sewardj7cf4e6b2008-05-01 20:24:26 +0000708 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000709
710 if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
sewardj7cf4e6b2008-05-01 20:24:26 +0000711 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
712 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
sewardj4cc684b2007-08-25 23:09:36 +0000713 }
714
715 /* Else do it the slow way .. */
716 tmp1 = NULL;
717 switch (src_ty) {
sewardj95448072004-11-22 20:19:51 +0000718 case Ity_I1:
719 tmp1 = vbits;
njn25e49d8e72002-09-23 09:36:25 +0000720 break;
sewardj95448072004-11-22 20:19:51 +0000721 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000722 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
sewardj95448072004-11-22 20:19:51 +0000723 break;
724 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000725 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
sewardj95448072004-11-22 20:19:51 +0000726 break;
727 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000728 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
sewardj95448072004-11-22 20:19:51 +0000729 break;
730 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000731 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
sewardj95448072004-11-22 20:19:51 +0000732 break;
sewardj69a13322005-04-23 01:14:51 +0000733 case Ity_I128: {
734 /* Gah. Chop it in half, OR the halves together, and compare
735 that with zero. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000736 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
737 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
738 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
739 tmp1 = assignNew('V', mce, Ity_I1,
sewardj37c31cc2005-04-26 23:49:24 +0000740 unop(Iop_CmpNEZ64, tmp4));
sewardj69a13322005-04-23 01:14:51 +0000741 break;
742 }
sewardj95448072004-11-22 20:19:51 +0000743 default:
sewardj4cc684b2007-08-25 23:09:36 +0000744 ppIRType(src_ty);
sewardj95448072004-11-22 20:19:51 +0000745 VG_(tool_panic)("mkPCastTo(1)");
746 }
747 tl_assert(tmp1);
748 /* Now widen up to the dst type. */
749 switch (dst_ty) {
750 case Ity_I1:
751 return tmp1;
752 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000753 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
sewardj95448072004-11-22 20:19:51 +0000754 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000755 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
sewardj95448072004-11-22 20:19:51 +0000756 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000757 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
sewardj95448072004-11-22 20:19:51 +0000758 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000759 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000760 case Ity_V128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000761 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
762 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000763 return tmp1;
sewardj69a13322005-04-23 01:14:51 +0000764 case Ity_I128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000765 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
766 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
sewardj69a13322005-04-23 01:14:51 +0000767 return tmp1;
sewardj95448072004-11-22 20:19:51 +0000768 default:
769 ppIRType(dst_ty);
770 VG_(tool_panic)("mkPCastTo(2)");
771 }
772}
773
sewardjd5204dc2004-12-31 01:16:11 +0000774/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
775/*
776 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
777 PCasting to Ity_U1. However, sometimes it is necessary to be more
778 accurate. The insight is that the result is defined if two
779 corresponding bits can be found, one from each argument, so that
780 both bits are defined but are different -- that makes EQ say "No"
781 and NE say "Yes". Hence, we compute an improvement term and DifD
782 it onto the "normal" (UifU) result.
783
784 The result is:
785
786 PCastTo<1> (
sewardje6f8af42005-07-06 18:48:59 +0000787 -- naive version
788 PCastTo<sz>( UifU<sz>(vxx, vyy) )
789
sewardjd5204dc2004-12-31 01:16:11 +0000790 `DifD<sz>`
sewardje6f8af42005-07-06 18:48:59 +0000791
792 -- improvement term
793 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
sewardjd5204dc2004-12-31 01:16:11 +0000794 )
sewardje6f8af42005-07-06 18:48:59 +0000795
sewardjd5204dc2004-12-31 01:16:11 +0000796 where
797 vec contains 0 (defined) bits where the corresponding arg bits
sewardje6f8af42005-07-06 18:48:59 +0000798 are defined but different, and 1 bits otherwise.
sewardjd5204dc2004-12-31 01:16:11 +0000799
sewardje6f8af42005-07-06 18:48:59 +0000800 vec = Or<sz>( vxx, // 0 iff bit defined
801 vyy, // 0 iff bit defined
802 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
803 )
804
805 If any bit of vec is 0, the result is defined and so the
806 improvement term should produce 0...0, else it should produce
807 1...1.
808
809 Hence require for the improvement term:
810
811 if vec == 1...1 then 1...1 else 0...0
812 ->
813 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
814
815 This was extensively re-analysed and checked on 6 July 05.
sewardjd5204dc2004-12-31 01:16:11 +0000816*/
817static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
818 IRType ty,
819 IRAtom* vxx, IRAtom* vyy,
820 IRAtom* xx, IRAtom* yy )
821{
sewardje6f8af42005-07-06 18:48:59 +0000822 IRAtom *naive, *vec, *improvement_term;
823 IRAtom *improved, *final_cast, *top;
824 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
sewardjd5204dc2004-12-31 01:16:11 +0000825
826 tl_assert(isShadowAtom(mce,vxx));
827 tl_assert(isShadowAtom(mce,vyy));
828 tl_assert(isOriginalAtom(mce,xx));
829 tl_assert(isOriginalAtom(mce,yy));
830 tl_assert(sameKindedAtoms(vxx,xx));
831 tl_assert(sameKindedAtoms(vyy,yy));
832
833 switch (ty) {
834 case Ity_I32:
sewardje6f8af42005-07-06 18:48:59 +0000835 opOR = Iop_Or32;
sewardjd5204dc2004-12-31 01:16:11 +0000836 opDIFD = Iop_And32;
837 opUIFU = Iop_Or32;
838 opNOT = Iop_Not32;
839 opXOR = Iop_Xor32;
840 opCMP = Iop_CmpEQ32;
841 top = mkU32(0xFFFFFFFF);
842 break;
tomcd986332005-04-26 07:44:48 +0000843 case Ity_I64:
sewardje6f8af42005-07-06 18:48:59 +0000844 opOR = Iop_Or64;
tomcd986332005-04-26 07:44:48 +0000845 opDIFD = Iop_And64;
846 opUIFU = Iop_Or64;
847 opNOT = Iop_Not64;
848 opXOR = Iop_Xor64;
849 opCMP = Iop_CmpEQ64;
sewardj37c31cc2005-04-26 23:49:24 +0000850 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
tomcd986332005-04-26 07:44:48 +0000851 break;
sewardjd5204dc2004-12-31 01:16:11 +0000852 default:
853 VG_(tool_panic)("expensiveCmpEQorNE");
854 }
855
856 naive
sewardj7cf4e6b2008-05-01 20:24:26 +0000857 = mkPCastTo(mce,ty,
858 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
sewardjd5204dc2004-12-31 01:16:11 +0000859
860 vec
861 = assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000862 'V', mce,ty,
sewardje6f8af42005-07-06 18:48:59 +0000863 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +0000864 assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
sewardjd5204dc2004-12-31 01:16:11 +0000865 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000866 'V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +0000867 unop( opNOT,
sewardj7cf4e6b2008-05-01 20:24:26 +0000868 assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
sewardjd5204dc2004-12-31 01:16:11 +0000869
sewardje6f8af42005-07-06 18:48:59 +0000870 improvement_term
sewardj7cf4e6b2008-05-01 20:24:26 +0000871 = mkPCastTo( mce,ty,
872 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
sewardjd5204dc2004-12-31 01:16:11 +0000873
874 improved
sewardj7cf4e6b2008-05-01 20:24:26 +0000875 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
sewardjd5204dc2004-12-31 01:16:11 +0000876
877 final_cast
878 = mkPCastTo( mce, Ity_I1, improved );
879
880 return final_cast;
881}
882
sewardj95448072004-11-22 20:19:51 +0000883
sewardj992dff92005-10-07 11:08:55 +0000884/* --------- Semi-accurate interpretation of CmpORD. --------- */
885
886/* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
887
888 CmpORD32S(x,y) = 1<<3 if x <s y
889 = 1<<2 if x >s y
890 = 1<<1 if x == y
891
892 and similarly the unsigned variant. The default interpretation is:
893
894 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
sewardj1bc82102005-12-23 00:16:24 +0000895 & (7<<1)
sewardj992dff92005-10-07 11:08:55 +0000896
897 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
898 are zero and therefore defined (viz, zero).
sewardja9e62a92005-10-07 12:13:21 +0000899
900 Also deal with a special case better:
901
902 CmpORD32S(x,0)
903
904 Here, bit 3 (LT) of the result is a copy of the top bit of x and
905 will be defined even if the rest of x isn't. In which case we do:
906
907 CmpORD32S#(x,x#,0,{impliedly 0}#)
sewardj1bc82102005-12-23 00:16:24 +0000908 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
909 | (x# >>u 31) << 3 -- LT# = x#[31]
sewardja9e62a92005-10-07 12:13:21 +0000910
sewardj1bc82102005-12-23 00:16:24 +0000911 Analogous handling for CmpORD64{S,U}.
sewardj992dff92005-10-07 11:08:55 +0000912*/
sewardja9e62a92005-10-07 12:13:21 +0000913static Bool isZeroU32 ( IRAtom* e )
914{
915 return
916 toBool( e->tag == Iex_Const
917 && e->Iex.Const.con->tag == Ico_U32
918 && e->Iex.Const.con->Ico.U32 == 0 );
919}
920
sewardj1bc82102005-12-23 00:16:24 +0000921static Bool isZeroU64 ( IRAtom* e )
sewardj992dff92005-10-07 11:08:55 +0000922{
sewardj1bc82102005-12-23 00:16:24 +0000923 return
924 toBool( e->tag == Iex_Const
925 && e->Iex.Const.con->tag == Ico_U64
926 && e->Iex.Const.con->Ico.U64 == 0 );
927}
928
929static IRAtom* doCmpORD ( MCEnv* mce,
930 IROp cmp_op,
931 IRAtom* xxhash, IRAtom* yyhash,
932 IRAtom* xx, IRAtom* yy )
933{
934 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
935 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
936 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
937 IROp opAND = m64 ? Iop_And64 : Iop_And32;
938 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
939 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
940 IRType ty = m64 ? Ity_I64 : Ity_I32;
941 Int width = m64 ? 64 : 32;
942
943 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
944
945 IRAtom* threeLeft1 = NULL;
946 IRAtom* sevenLeft1 = NULL;
947
sewardj992dff92005-10-07 11:08:55 +0000948 tl_assert(isShadowAtom(mce,xxhash));
949 tl_assert(isShadowAtom(mce,yyhash));
950 tl_assert(isOriginalAtom(mce,xx));
951 tl_assert(isOriginalAtom(mce,yy));
952 tl_assert(sameKindedAtoms(xxhash,xx));
953 tl_assert(sameKindedAtoms(yyhash,yy));
sewardj1bc82102005-12-23 00:16:24 +0000954 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
955 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
sewardj992dff92005-10-07 11:08:55 +0000956
sewardja9e62a92005-10-07 12:13:21 +0000957 if (0) {
958 ppIROp(cmp_op); VG_(printf)(" ");
959 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
960 }
961
sewardj1bc82102005-12-23 00:16:24 +0000962 if (syned && isZero(yy)) {
sewardja9e62a92005-10-07 12:13:21 +0000963 /* fancy interpretation */
964 /* if yy is zero, then it must be fully defined (zero#). */
sewardj1bc82102005-12-23 00:16:24 +0000965 tl_assert(isZero(yyhash));
966 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
sewardja9e62a92005-10-07 12:13:21 +0000967 return
968 binop(
sewardj1bc82102005-12-23 00:16:24 +0000969 opOR,
sewardja9e62a92005-10-07 12:13:21 +0000970 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000971 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +0000972 binop(
sewardj1bc82102005-12-23 00:16:24 +0000973 opAND,
974 mkPCastTo(mce,ty, xxhash),
975 threeLeft1
sewardja9e62a92005-10-07 12:13:21 +0000976 )),
977 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000978 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +0000979 binop(
sewardj1bc82102005-12-23 00:16:24 +0000980 opSHL,
sewardja9e62a92005-10-07 12:13:21 +0000981 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000982 'V', mce,ty,
sewardj1bc82102005-12-23 00:16:24 +0000983 binop(opSHR, xxhash, mkU8(width-1))),
sewardja9e62a92005-10-07 12:13:21 +0000984 mkU8(3)
985 ))
986 );
987 } else {
988 /* standard interpretation */
sewardj1bc82102005-12-23 00:16:24 +0000989 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
sewardja9e62a92005-10-07 12:13:21 +0000990 return
991 binop(
sewardj1bc82102005-12-23 00:16:24 +0000992 opAND,
993 mkPCastTo( mce,ty,
994 mkUifU(mce,ty, xxhash,yyhash)),
995 sevenLeft1
sewardja9e62a92005-10-07 12:13:21 +0000996 );
997 }
sewardj992dff92005-10-07 11:08:55 +0000998}
999
1000
sewardj95448072004-11-22 20:19:51 +00001001/*------------------------------------------------------------*/
1002/*--- Emit a test and complaint if something is undefined. ---*/
1003/*------------------------------------------------------------*/
1004
sewardj7cf4e6b2008-05-01 20:24:26 +00001005static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1006
1007
sewardj95448072004-11-22 20:19:51 +00001008/* Set the annotations on a dirty helper to indicate that the stack
1009 pointer and instruction pointers might be read. This is the
1010 behaviour of all 'emit-a-complaint' style functions we might
1011 call. */
1012
1013static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1014 di->nFxState = 2;
1015 di->fxState[0].fx = Ifx_Read;
1016 di->fxState[0].offset = mce->layout->offset_SP;
1017 di->fxState[0].size = mce->layout->sizeof_SP;
1018 di->fxState[1].fx = Ifx_Read;
1019 di->fxState[1].offset = mce->layout->offset_IP;
1020 di->fxState[1].size = mce->layout->sizeof_IP;
1021}
1022
1023
1024/* Check the supplied **original** atom for undefinedness, and emit a
1025 complaint if so. Once that happens, mark it as defined. This is
1026 possible because the atom is either a tmp or literal. If it's a
1027 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1028 be defined. In fact as mentioned above, we will have to allocate a
1029 new tmp to carry the new 'defined' shadow value, and update the
1030 original->tmp mapping accordingly; we cannot simply assign a new
1031 value to an existing shadow tmp as this breaks SSAness -- resulting
1032 in the post-instrumentation sanity checker spluttering in disapproval.
1033*/
1034static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
1035{
sewardj7cf97ee2004-11-28 14:25:01 +00001036 IRAtom* vatom;
1037 IRType ty;
1038 Int sz;
1039 IRDirty* di;
1040 IRAtom* cond;
sewardj7cf4e6b2008-05-01 20:24:26 +00001041 IRAtom* origin;
1042 void* fn;
1043 HChar* nm;
1044 IRExpr** args;
1045 Int nargs;
sewardj7cf97ee2004-11-28 14:25:01 +00001046
njn1d0825f2006-03-27 11:37:07 +00001047 // Don't do V bit tests if we're not reporting undefined value errors.
sewardj7cf4e6b2008-05-01 20:24:26 +00001048 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001049 return;
1050
sewardj95448072004-11-22 20:19:51 +00001051 /* Since the original expression is atomic, there's no duplicated
1052 work generated by making multiple V-expressions for it. So we
1053 don't really care about the possibility that someone else may
1054 also create a V-interpretion for it. */
1055 tl_assert(isOriginalAtom(mce, atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001056 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001057 tl_assert(isShadowAtom(mce, vatom));
1058 tl_assert(sameKindedAtoms(atom, vatom));
1059
sewardj1c0ce7a2009-07-01 08:10:49 +00001060 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001061
1062 /* sz is only used for constructing the error message */
sewardj7cf97ee2004-11-28 14:25:01 +00001063 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001064
sewardj7cf97ee2004-11-28 14:25:01 +00001065 cond = mkPCastTo( mce, Ity_I1, vatom );
sewardj95448072004-11-22 20:19:51 +00001066 /* cond will be 0 if all defined, and 1 if any not defined. */
1067
sewardj7cf4e6b2008-05-01 20:24:26 +00001068 /* Get the origin info for the value we are about to check. At
1069 least, if we are doing origin tracking. If not, use a dummy
1070 zero origin. */
1071 if (MC_(clo_mc_level) == 3) {
1072 origin = schemeE( mce, atom );
1073 if (mce->hWordTy == Ity_I64) {
1074 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1075 }
1076 } else {
1077 origin = NULL;
1078 }
1079
1080 fn = NULL;
1081 nm = NULL;
1082 args = NULL;
1083 nargs = -1;
1084
sewardj95448072004-11-22 20:19:51 +00001085 switch (sz) {
1086 case 0:
sewardj7cf4e6b2008-05-01 20:24:26 +00001087 if (origin) {
1088 fn = &MC_(helperc_value_check0_fail_w_o);
1089 nm = "MC_(helperc_value_check0_fail_w_o)";
1090 args = mkIRExprVec_1(origin);
1091 nargs = 1;
1092 } else {
1093 fn = &MC_(helperc_value_check0_fail_no_o);
1094 nm = "MC_(helperc_value_check0_fail_no_o)";
1095 args = mkIRExprVec_0();
1096 nargs = 0;
1097 }
sewardj95448072004-11-22 20:19:51 +00001098 break;
1099 case 1:
sewardj7cf4e6b2008-05-01 20:24:26 +00001100 if (origin) {
1101 fn = &MC_(helperc_value_check1_fail_w_o);
1102 nm = "MC_(helperc_value_check1_fail_w_o)";
1103 args = mkIRExprVec_1(origin);
1104 nargs = 1;
1105 } else {
1106 fn = &MC_(helperc_value_check1_fail_no_o);
1107 nm = "MC_(helperc_value_check1_fail_no_o)";
1108 args = mkIRExprVec_0();
1109 nargs = 0;
1110 }
sewardj95448072004-11-22 20:19:51 +00001111 break;
1112 case 4:
sewardj7cf4e6b2008-05-01 20:24:26 +00001113 if (origin) {
1114 fn = &MC_(helperc_value_check4_fail_w_o);
1115 nm = "MC_(helperc_value_check4_fail_w_o)";
1116 args = mkIRExprVec_1(origin);
1117 nargs = 1;
1118 } else {
1119 fn = &MC_(helperc_value_check4_fail_no_o);
1120 nm = "MC_(helperc_value_check4_fail_no_o)";
1121 args = mkIRExprVec_0();
1122 nargs = 0;
1123 }
sewardj95448072004-11-22 20:19:51 +00001124 break;
sewardj11bcc4e2005-04-23 22:38:38 +00001125 case 8:
sewardj7cf4e6b2008-05-01 20:24:26 +00001126 if (origin) {
1127 fn = &MC_(helperc_value_check8_fail_w_o);
1128 nm = "MC_(helperc_value_check8_fail_w_o)";
1129 args = mkIRExprVec_1(origin);
1130 nargs = 1;
1131 } else {
1132 fn = &MC_(helperc_value_check8_fail_no_o);
1133 nm = "MC_(helperc_value_check8_fail_no_o)";
1134 args = mkIRExprVec_0();
1135 nargs = 0;
1136 }
sewardj11bcc4e2005-04-23 22:38:38 +00001137 break;
njn4c245e52009-03-15 23:25:38 +00001138 case 2:
1139 case 16:
sewardj7cf4e6b2008-05-01 20:24:26 +00001140 if (origin) {
1141 fn = &MC_(helperc_value_checkN_fail_w_o);
1142 nm = "MC_(helperc_value_checkN_fail_w_o)";
1143 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1144 nargs = 2;
1145 } else {
1146 fn = &MC_(helperc_value_checkN_fail_no_o);
1147 nm = "MC_(helperc_value_checkN_fail_no_o)";
1148 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1149 nargs = 1;
1150 }
sewardj95448072004-11-22 20:19:51 +00001151 break;
njn4c245e52009-03-15 23:25:38 +00001152 default:
1153 VG_(tool_panic)("unexpected szB");
sewardj95448072004-11-22 20:19:51 +00001154 }
sewardj7cf4e6b2008-05-01 20:24:26 +00001155
1156 tl_assert(fn);
1157 tl_assert(nm);
1158 tl_assert(args);
1159 tl_assert(nargs >= 0 && nargs <= 2);
1160 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1161 || (MC_(clo_mc_level) == 2 && origin == NULL) );
1162
1163 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1164 VG_(fnptr_to_fnentry)( fn ), args );
sewardj95448072004-11-22 20:19:51 +00001165 di->guard = cond;
1166 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00001167 stmt( 'V', mce, IRStmt_Dirty(di));
sewardj95448072004-11-22 20:19:51 +00001168
1169 /* Set the shadow tmp to be defined. First, update the
1170 orig->shadow tmp mapping to reflect the fact that this shadow is
1171 getting a new value. */
sewardj710d6c22005-03-20 18:55:15 +00001172 tl_assert(isIRAtom(vatom));
sewardj95448072004-11-22 20:19:51 +00001173 /* sameKindedAtoms ... */
sewardj0b9d74a2006-12-24 02:24:11 +00001174 if (vatom->tag == Iex_RdTmp) {
1175 tl_assert(atom->tag == Iex_RdTmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00001176 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1177 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1178 definedOfType(ty));
sewardj95448072004-11-22 20:19:51 +00001179 }
1180}
1181
1182
1183/*------------------------------------------------------------*/
1184/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1185/*------------------------------------------------------------*/
1186
1187/* Examine the always-defined sections declared in layout to see if
1188 the (offset,size) section is within one. Note, is is an error to
1189 partially fall into such a region: (offset,size) should either be
1190 completely in such a region or completely not-in such a region.
1191*/
1192static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1193{
1194 Int minoffD, maxoffD, i;
1195 Int minoff = offset;
1196 Int maxoff = minoff + size - 1;
1197 tl_assert((minoff & ~0xFFFF) == 0);
1198 tl_assert((maxoff & ~0xFFFF) == 0);
1199
1200 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1201 minoffD = mce->layout->alwaysDefd[i].offset;
1202 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1203 tl_assert((minoffD & ~0xFFFF) == 0);
1204 tl_assert((maxoffD & ~0xFFFF) == 0);
1205
1206 if (maxoff < minoffD || maxoffD < minoff)
1207 continue; /* no overlap */
1208 if (minoff >= minoffD && maxoff <= maxoffD)
1209 return True; /* completely contained in an always-defd section */
1210
1211 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1212 }
1213 return False; /* could not find any containing section */
1214}
1215
1216
1217/* Generate into bb suitable actions to shadow this Put. If the state
1218 slice is marked 'always defined', do nothing. Otherwise, write the
1219 supplied V bits to the shadow state. We can pass in either an
1220 original atom or a V-atom, but not both. In the former case the
1221 relevant V-bits are then generated from the original.
1222*/
1223static
1224void do_shadow_PUT ( MCEnv* mce, Int offset,
1225 IRAtom* atom, IRAtom* vatom )
1226{
sewardj7cf97ee2004-11-28 14:25:01 +00001227 IRType ty;
njn1d0825f2006-03-27 11:37:07 +00001228
1229 // Don't do shadow PUTs if we're not doing undefined value checking.
1230 // Their absence lets Vex's optimiser remove all the shadow computation
1231 // that they depend on, which includes GETs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001232 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001233 return;
1234
sewardj95448072004-11-22 20:19:51 +00001235 if (atom) {
1236 tl_assert(!vatom);
1237 tl_assert(isOriginalAtom(mce, atom));
1238 vatom = expr2vbits( mce, atom );
1239 } else {
1240 tl_assert(vatom);
1241 tl_assert(isShadowAtom(mce, vatom));
1242 }
1243
sewardj1c0ce7a2009-07-01 08:10:49 +00001244 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001245 tl_assert(ty != Ity_I1);
sewardjb5b87402011-03-07 16:05:35 +00001246 tl_assert(ty != Ity_I128);
sewardj95448072004-11-22 20:19:51 +00001247 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1248 /* later: no ... */
1249 /* emit code to emit a complaint if any of the vbits are 1. */
1250 /* complainIfUndefined(mce, atom); */
1251 } else {
1252 /* Do a plain shadow Put. */
sewardj7cf4e6b2008-05-01 20:24:26 +00001253 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
sewardj95448072004-11-22 20:19:51 +00001254 }
1255}
1256
1257
1258/* Return an expression which contains the V bits corresponding to the
1259 given GETI (passed in in pieces).
1260*/
1261static
1262void do_shadow_PUTI ( MCEnv* mce,
sewardj0b9d74a2006-12-24 02:24:11 +00001263 IRRegArray* descr,
1264 IRAtom* ix, Int bias, IRAtom* atom )
sewardj95448072004-11-22 20:19:51 +00001265{
sewardj7cf97ee2004-11-28 14:25:01 +00001266 IRAtom* vatom;
1267 IRType ty, tyS;
1268 Int arrSize;;
1269
njn1d0825f2006-03-27 11:37:07 +00001270 // Don't do shadow PUTIs if we're not doing undefined value checking.
1271 // Their absence lets Vex's optimiser remove all the shadow computation
1272 // that they depend on, which includes GETIs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001273 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001274 return;
1275
sewardj95448072004-11-22 20:19:51 +00001276 tl_assert(isOriginalAtom(mce,atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001277 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001278 tl_assert(sameKindedAtoms(atom, vatom));
sewardj7cf97ee2004-11-28 14:25:01 +00001279 ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001280 tyS = shadowTypeV(ty);
sewardj7cf97ee2004-11-28 14:25:01 +00001281 arrSize = descr->nElems * sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001282 tl_assert(ty != Ity_I1);
1283 tl_assert(isOriginalAtom(mce,ix));
1284 complainIfUndefined(mce,ix);
1285 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1286 /* later: no ... */
1287 /* emit code to emit a complaint if any of the vbits are 1. */
1288 /* complainIfUndefined(mce, atom); */
1289 } else {
1290 /* Do a cloned version of the Put that refers to the shadow
1291 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001292 IRRegArray* new_descr
1293 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1294 tyS, descr->nElems);
sewardj7cf4e6b2008-05-01 20:24:26 +00001295 stmt( 'V', mce, IRStmt_PutI( new_descr, ix, bias, vatom ));
sewardj95448072004-11-22 20:19:51 +00001296 }
1297}
1298
1299
1300/* Return an expression which contains the V bits corresponding to the
1301 given GET (passed in in pieces).
1302*/
1303static
1304IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1305{
sewardj7cf4e6b2008-05-01 20:24:26 +00001306 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001307 tl_assert(ty != Ity_I1);
sewardjb5b87402011-03-07 16:05:35 +00001308 tl_assert(ty != Ity_I128);
sewardj95448072004-11-22 20:19:51 +00001309 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1310 /* Always defined, return all zeroes of the relevant type */
1311 return definedOfType(tyS);
1312 } else {
1313 /* return a cloned version of the Get that refers to the shadow
1314 area. */
sewardj7cf4e6b2008-05-01 20:24:26 +00001315 /* FIXME: this isn't an atom! */
sewardj95448072004-11-22 20:19:51 +00001316 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1317 }
1318}
1319
1320
1321/* Return an expression which contains the V bits corresponding to the
1322 given GETI (passed in in pieces).
1323*/
1324static
sewardj0b9d74a2006-12-24 02:24:11 +00001325IRExpr* shadow_GETI ( MCEnv* mce,
1326 IRRegArray* descr, IRAtom* ix, Int bias )
sewardj95448072004-11-22 20:19:51 +00001327{
1328 IRType ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001329 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001330 Int arrSize = descr->nElems * sizeofIRType(ty);
1331 tl_assert(ty != Ity_I1);
1332 tl_assert(isOriginalAtom(mce,ix));
1333 complainIfUndefined(mce,ix);
1334 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1335 /* Always defined, return all zeroes of the relevant type */
1336 return definedOfType(tyS);
1337 } else {
1338 /* return a cloned version of the Get that refers to the shadow
1339 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001340 IRRegArray* new_descr
1341 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1342 tyS, descr->nElems);
sewardj95448072004-11-22 20:19:51 +00001343 return IRExpr_GetI( new_descr, ix, bias );
1344 }
1345}
1346
1347
1348/*------------------------------------------------------------*/
1349/*--- Generating approximations for unknown operations, ---*/
1350/*--- using lazy-propagate semantics ---*/
1351/*------------------------------------------------------------*/
1352
1353/* Lazy propagation of undefinedness from two values, resulting in the
1354 specified shadow type.
1355*/
1356static
1357IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1358{
sewardj95448072004-11-22 20:19:51 +00001359 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001360 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1361 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
sewardj95448072004-11-22 20:19:51 +00001362 tl_assert(isShadowAtom(mce,va1));
1363 tl_assert(isShadowAtom(mce,va2));
sewardj37c31cc2005-04-26 23:49:24 +00001364
1365 /* The general case is inefficient because PCast is an expensive
1366 operation. Here are some special cases which use PCast only
1367 once rather than twice. */
1368
1369 /* I64 x I64 -> I64 */
1370 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1371 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1372 at = mkUifU(mce, Ity_I64, va1, va2);
1373 at = mkPCastTo(mce, Ity_I64, at);
1374 return at;
1375 }
1376
1377 /* I64 x I64 -> I32 */
1378 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1379 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1380 at = mkUifU(mce, Ity_I64, va1, va2);
1381 at = mkPCastTo(mce, Ity_I32, at);
1382 return at;
1383 }
1384
1385 if (0) {
1386 VG_(printf)("mkLazy2 ");
1387 ppIRType(t1);
1388 VG_(printf)("_");
1389 ppIRType(t2);
1390 VG_(printf)("_");
1391 ppIRType(finalVty);
1392 VG_(printf)("\n");
1393 }
1394
1395 /* General case: force everything via 32-bit intermediaries. */
sewardj95448072004-11-22 20:19:51 +00001396 at = mkPCastTo(mce, Ity_I32, va1);
1397 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1398 at = mkPCastTo(mce, finalVty, at);
1399 return at;
1400}
1401
1402
sewardjed69fdb2006-02-03 16:12:27 +00001403/* 3-arg version of the above. */
1404static
1405IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1406 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1407{
1408 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001409 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1410 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1411 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
sewardjed69fdb2006-02-03 16:12:27 +00001412 tl_assert(isShadowAtom(mce,va1));
1413 tl_assert(isShadowAtom(mce,va2));
1414 tl_assert(isShadowAtom(mce,va3));
1415
1416 /* The general case is inefficient because PCast is an expensive
1417 operation. Here are some special cases which use PCast only
1418 twice rather than three times. */
1419
1420 /* I32 x I64 x I64 -> I64 */
1421 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1422 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1423 && finalVty == Ity_I64) {
1424 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1425 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1426 mode indication which is fully defined, this should get
1427 folded out later. */
1428 at = mkPCastTo(mce, Ity_I64, va1);
1429 /* Now fold in 2nd and 3rd args. */
1430 at = mkUifU(mce, Ity_I64, at, va2);
1431 at = mkUifU(mce, Ity_I64, at, va3);
1432 /* and PCast once again. */
1433 at = mkPCastTo(mce, Ity_I64, at);
1434 return at;
1435 }
1436
sewardj453e8f82006-02-09 03:25:06 +00001437 /* I32 x I64 x I64 -> I32 */
1438 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1439 && finalVty == Ity_I32) {
sewardj59570ff2010-01-01 11:59:33 +00001440 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
sewardj453e8f82006-02-09 03:25:06 +00001441 at = mkPCastTo(mce, Ity_I64, va1);
1442 at = mkUifU(mce, Ity_I64, at, va2);
1443 at = mkUifU(mce, Ity_I64, at, va3);
1444 at = mkPCastTo(mce, Ity_I32, at);
1445 return at;
1446 }
1447
sewardj59570ff2010-01-01 11:59:33 +00001448 /* I32 x I32 x I32 -> I32 */
1449 /* 32-bit FP idiom, as (eg) happens on ARM */
1450 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1451 && finalVty == Ity_I32) {
1452 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1453 at = va1;
1454 at = mkUifU(mce, Ity_I32, at, va2);
1455 at = mkUifU(mce, Ity_I32, at, va3);
1456 at = mkPCastTo(mce, Ity_I32, at);
1457 return at;
1458 }
1459
sewardjb5b87402011-03-07 16:05:35 +00001460 /* I32 x I128 x I128 -> I128 */
1461 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1462 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1463 && finalVty == Ity_I128) {
1464 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1465 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1466 mode indication which is fully defined, this should get
1467 folded out later. */
1468 at = mkPCastTo(mce, Ity_I128, va1);
1469 /* Now fold in 2nd and 3rd args. */
1470 at = mkUifU(mce, Ity_I128, at, va2);
1471 at = mkUifU(mce, Ity_I128, at, va3);
1472 /* and PCast once again. */
1473 at = mkPCastTo(mce, Ity_I128, at);
1474 return at;
1475 }
sewardj453e8f82006-02-09 03:25:06 +00001476 if (1) {
1477 VG_(printf)("mkLazy3: ");
sewardjed69fdb2006-02-03 16:12:27 +00001478 ppIRType(t1);
sewardj453e8f82006-02-09 03:25:06 +00001479 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001480 ppIRType(t2);
sewardj453e8f82006-02-09 03:25:06 +00001481 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001482 ppIRType(t3);
sewardj453e8f82006-02-09 03:25:06 +00001483 VG_(printf)(" -> ");
sewardjed69fdb2006-02-03 16:12:27 +00001484 ppIRType(finalVty);
1485 VG_(printf)("\n");
1486 }
1487
sewardj453e8f82006-02-09 03:25:06 +00001488 tl_assert(0);
sewardjed69fdb2006-02-03 16:12:27 +00001489 /* General case: force everything via 32-bit intermediaries. */
sewardj453e8f82006-02-09 03:25:06 +00001490 /*
sewardjed69fdb2006-02-03 16:12:27 +00001491 at = mkPCastTo(mce, Ity_I32, va1);
1492 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1493 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1494 at = mkPCastTo(mce, finalVty, at);
1495 return at;
sewardj453e8f82006-02-09 03:25:06 +00001496 */
sewardjed69fdb2006-02-03 16:12:27 +00001497}
1498
1499
sewardje91cea72006-02-08 19:32:02 +00001500/* 4-arg version of the above. */
1501static
1502IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1503 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1504{
1505 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001506 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1507 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1508 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1509 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
sewardje91cea72006-02-08 19:32:02 +00001510 tl_assert(isShadowAtom(mce,va1));
1511 tl_assert(isShadowAtom(mce,va2));
1512 tl_assert(isShadowAtom(mce,va3));
1513 tl_assert(isShadowAtom(mce,va4));
1514
1515 /* The general case is inefficient because PCast is an expensive
1516 operation. Here are some special cases which use PCast only
1517 twice rather than three times. */
1518
1519 /* I32 x I64 x I64 x I64 -> I64 */
1520 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1521 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1522 && finalVty == Ity_I64) {
1523 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1524 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1525 mode indication which is fully defined, this should get
1526 folded out later. */
1527 at = mkPCastTo(mce, Ity_I64, va1);
1528 /* Now fold in 2nd, 3rd, 4th args. */
1529 at = mkUifU(mce, Ity_I64, at, va2);
1530 at = mkUifU(mce, Ity_I64, at, va3);
1531 at = mkUifU(mce, Ity_I64, at, va4);
1532 /* and PCast once again. */
1533 at = mkPCastTo(mce, Ity_I64, at);
1534 return at;
1535 }
sewardjb5b87402011-03-07 16:05:35 +00001536 /* I32 x I32 x I32 x I32 -> I32 */
1537 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1538 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
1539 && finalVty == Ity_I32) {
1540 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
1541 at = va1;
1542 /* Now fold in 2nd, 3rd, 4th args. */
1543 at = mkUifU(mce, Ity_I32, at, va2);
1544 at = mkUifU(mce, Ity_I32, at, va3);
1545 at = mkUifU(mce, Ity_I32, at, va4);
1546 at = mkPCastTo(mce, Ity_I32, at);
1547 return at;
1548 }
sewardje91cea72006-02-08 19:32:02 +00001549
1550 if (1) {
sewardj453e8f82006-02-09 03:25:06 +00001551 VG_(printf)("mkLazy4: ");
sewardje91cea72006-02-08 19:32:02 +00001552 ppIRType(t1);
1553 VG_(printf)(" x ");
1554 ppIRType(t2);
1555 VG_(printf)(" x ");
1556 ppIRType(t3);
1557 VG_(printf)(" x ");
1558 ppIRType(t4);
1559 VG_(printf)(" -> ");
1560 ppIRType(finalVty);
1561 VG_(printf)("\n");
1562 }
1563
1564 tl_assert(0);
1565}
1566
1567
sewardj95448072004-11-22 20:19:51 +00001568/* Do the lazy propagation game from a null-terminated vector of
1569 atoms. This is presumably the arguments to a helper call, so the
1570 IRCallee info is also supplied in order that we can know which
1571 arguments should be ignored (via the .mcx_mask field).
1572*/
1573static
1574IRAtom* mkLazyN ( MCEnv* mce,
1575 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1576{
sewardj4cc684b2007-08-25 23:09:36 +00001577 Int i;
sewardj95448072004-11-22 20:19:51 +00001578 IRAtom* here;
sewardj4cc684b2007-08-25 23:09:36 +00001579 IRAtom* curr;
1580 IRType mergeTy;
sewardj99430032011-05-04 09:09:31 +00001581 Bool mergeTy64 = True;
sewardj4cc684b2007-08-25 23:09:36 +00001582
1583 /* Decide on the type of the merge intermediary. If all relevant
1584 args are I64, then it's I64. In all other circumstances, use
1585 I32. */
1586 for (i = 0; exprvec[i]; i++) {
1587 tl_assert(i < 32);
1588 tl_assert(isOriginalAtom(mce, exprvec[i]));
1589 if (cee->mcx_mask & (1<<i))
1590 continue;
sewardj1c0ce7a2009-07-01 08:10:49 +00001591 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
sewardj4cc684b2007-08-25 23:09:36 +00001592 mergeTy64 = False;
1593 }
1594
1595 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
1596 curr = definedOfType(mergeTy);
1597
sewardj95448072004-11-22 20:19:51 +00001598 for (i = 0; exprvec[i]; i++) {
1599 tl_assert(i < 32);
1600 tl_assert(isOriginalAtom(mce, exprvec[i]));
1601 /* Only take notice of this arg if the callee's mc-exclusion
1602 mask does not say it is to be excluded. */
1603 if (cee->mcx_mask & (1<<i)) {
1604 /* the arg is to be excluded from definedness checking. Do
1605 nothing. */
1606 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1607 } else {
1608 /* calculate the arg's definedness, and pessimistically merge
1609 it in. */
sewardj4cc684b2007-08-25 23:09:36 +00001610 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1611 curr = mergeTy64
1612 ? mkUifU64(mce, here, curr)
1613 : mkUifU32(mce, here, curr);
sewardj95448072004-11-22 20:19:51 +00001614 }
1615 }
1616 return mkPCastTo(mce, finalVtype, curr );
1617}
1618
1619
1620/*------------------------------------------------------------*/
1621/*--- Generating expensive sequences for exact carry-chain ---*/
1622/*--- propagation in add/sub and related operations. ---*/
1623/*------------------------------------------------------------*/
1624
1625static
sewardjd5204dc2004-12-31 01:16:11 +00001626IRAtom* expensiveAddSub ( MCEnv* mce,
1627 Bool add,
1628 IRType ty,
1629 IRAtom* qaa, IRAtom* qbb,
1630 IRAtom* aa, IRAtom* bb )
sewardj95448072004-11-22 20:19:51 +00001631{
sewardj7cf97ee2004-11-28 14:25:01 +00001632 IRAtom *a_min, *b_min, *a_max, *b_max;
sewardjd5204dc2004-12-31 01:16:11 +00001633 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
sewardj7cf97ee2004-11-28 14:25:01 +00001634
sewardj95448072004-11-22 20:19:51 +00001635 tl_assert(isShadowAtom(mce,qaa));
1636 tl_assert(isShadowAtom(mce,qbb));
1637 tl_assert(isOriginalAtom(mce,aa));
1638 tl_assert(isOriginalAtom(mce,bb));
1639 tl_assert(sameKindedAtoms(qaa,aa));
1640 tl_assert(sameKindedAtoms(qbb,bb));
1641
sewardjd5204dc2004-12-31 01:16:11 +00001642 switch (ty) {
1643 case Ity_I32:
1644 opAND = Iop_And32;
1645 opOR = Iop_Or32;
1646 opXOR = Iop_Xor32;
1647 opNOT = Iop_Not32;
1648 opADD = Iop_Add32;
1649 opSUB = Iop_Sub32;
1650 break;
tomd9774d72005-06-27 08:11:01 +00001651 case Ity_I64:
1652 opAND = Iop_And64;
1653 opOR = Iop_Or64;
1654 opXOR = Iop_Xor64;
1655 opNOT = Iop_Not64;
1656 opADD = Iop_Add64;
1657 opSUB = Iop_Sub64;
1658 break;
sewardjd5204dc2004-12-31 01:16:11 +00001659 default:
1660 VG_(tool_panic)("expensiveAddSub");
1661 }
sewardj95448072004-11-22 20:19:51 +00001662
1663 // a_min = aa & ~qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001664 a_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001665 binop(opAND, aa,
sewardj7cf4e6b2008-05-01 20:24:26 +00001666 assignNew('V', mce,ty, unop(opNOT, qaa))));
sewardj95448072004-11-22 20:19:51 +00001667
1668 // b_min = bb & ~qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001669 b_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001670 binop(opAND, bb,
sewardj7cf4e6b2008-05-01 20:24:26 +00001671 assignNew('V', mce,ty, unop(opNOT, qbb))));
sewardj95448072004-11-22 20:19:51 +00001672
1673 // a_max = aa | qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001674 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
sewardj95448072004-11-22 20:19:51 +00001675
1676 // b_max = bb | qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001677 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
sewardj95448072004-11-22 20:19:51 +00001678
sewardjd5204dc2004-12-31 01:16:11 +00001679 if (add) {
1680 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1681 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001682 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001683 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001684 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1685 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001686 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001687 assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1688 assignNew('V', mce,ty, binop(opADD, a_max, b_max))
sewardjd5204dc2004-12-31 01:16:11 +00001689 )
sewardj95448072004-11-22 20:19:51 +00001690 )
sewardjd5204dc2004-12-31 01:16:11 +00001691 )
1692 );
1693 } else {
1694 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1695 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001696 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001697 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001698 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1699 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001700 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001701 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1702 assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
sewardjd5204dc2004-12-31 01:16:11 +00001703 )
1704 )
1705 )
1706 );
1707 }
1708
sewardj95448072004-11-22 20:19:51 +00001709}
1710
1711
1712/*------------------------------------------------------------*/
sewardjaaddbc22005-10-07 09:49:53 +00001713/*--- Scalar shifts. ---*/
1714/*------------------------------------------------------------*/
1715
1716/* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
1717 idea is to shift the definedness bits by the original shift amount.
1718 This introduces 0s ("defined") in new positions for left shifts and
1719 unsigned right shifts, and copies the top definedness bit for
1720 signed right shifts. So, conveniently, applying the original shift
1721 operator to the definedness bits for the left arg is exactly the
1722 right thing to do:
1723
1724 (qaa << bb)
1725
1726 However if the shift amount is undefined then the whole result
1727 is undefined. Hence need:
1728
1729 (qaa << bb) `UifU` PCast(qbb)
1730
1731 If the shift amount bb is a literal than qbb will say 'all defined'
1732 and the UifU and PCast will get folded out by post-instrumentation
1733 optimisation.
1734*/
1735static IRAtom* scalarShift ( MCEnv* mce,
1736 IRType ty,
1737 IROp original_op,
1738 IRAtom* qaa, IRAtom* qbb,
1739 IRAtom* aa, IRAtom* bb )
1740{
1741 tl_assert(isShadowAtom(mce,qaa));
1742 tl_assert(isShadowAtom(mce,qbb));
1743 tl_assert(isOriginalAtom(mce,aa));
1744 tl_assert(isOriginalAtom(mce,bb));
1745 tl_assert(sameKindedAtoms(qaa,aa));
1746 tl_assert(sameKindedAtoms(qbb,bb));
1747 return
1748 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001749 'V', mce, ty,
sewardjaaddbc22005-10-07 09:49:53 +00001750 mkUifU( mce, ty,
sewardj7cf4e6b2008-05-01 20:24:26 +00001751 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
sewardjaaddbc22005-10-07 09:49:53 +00001752 mkPCastTo(mce, ty, qbb)
1753 )
1754 );
1755}
1756
1757
1758/*------------------------------------------------------------*/
1759/*--- Helpers for dealing with vector primops. ---*/
sewardj3245c912004-12-10 14:58:26 +00001760/*------------------------------------------------------------*/
1761
sewardja1d93302004-12-12 16:45:06 +00001762/* Vector pessimisation -- pessimise within each lane individually. */
1763
1764static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1765{
sewardj7cf4e6b2008-05-01 20:24:26 +00001766 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
sewardja1d93302004-12-12 16:45:06 +00001767}
1768
1769static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1770{
sewardj7cf4e6b2008-05-01 20:24:26 +00001771 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
sewardja1d93302004-12-12 16:45:06 +00001772}
1773
1774static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1775{
sewardj7cf4e6b2008-05-01 20:24:26 +00001776 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
sewardja1d93302004-12-12 16:45:06 +00001777}
1778
1779static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1780{
sewardj7cf4e6b2008-05-01 20:24:26 +00001781 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
sewardja1d93302004-12-12 16:45:06 +00001782}
1783
sewardjacd2e912005-01-13 19:17:06 +00001784static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1785{
sewardj7cf4e6b2008-05-01 20:24:26 +00001786 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
sewardjacd2e912005-01-13 19:17:06 +00001787}
1788
1789static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1790{
sewardj7cf4e6b2008-05-01 20:24:26 +00001791 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
sewardjacd2e912005-01-13 19:17:06 +00001792}
1793
1794static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1795{
sewardj7cf4e6b2008-05-01 20:24:26 +00001796 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
sewardjacd2e912005-01-13 19:17:06 +00001797}
1798
sewardjc678b852010-09-22 00:58:51 +00001799static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
1800{
1801 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
1802}
1803
1804static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
1805{
1806 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
1807}
1808
sewardja1d93302004-12-12 16:45:06 +00001809
sewardj3245c912004-12-10 14:58:26 +00001810/* Here's a simple scheme capable of handling ops derived from SSE1
1811 code and while only generating ops that can be efficiently
1812 implemented in SSE1. */
1813
1814/* All-lanes versions are straightforward:
1815
sewardj20d38f22005-02-07 23:50:18 +00001816 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
sewardj3245c912004-12-10 14:58:26 +00001817
1818 unary32Fx4(x,y) ==> PCast32x4(x#)
1819
1820 Lowest-lane-only versions are more complex:
1821
sewardj20d38f22005-02-07 23:50:18 +00001822 binary32F0x4(x,y) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001823 x#,
sewardj20d38f22005-02-07 23:50:18 +00001824 PCast32(V128to32(UifUV128(x#,y#)))
sewardj3245c912004-12-10 14:58:26 +00001825 )
1826
1827 This is perhaps not so obvious. In particular, it's faster to
sewardj20d38f22005-02-07 23:50:18 +00001828 do a V128-bit UifU and then take the bottom 32 bits than the more
sewardj3245c912004-12-10 14:58:26 +00001829 obvious scheme of taking the bottom 32 bits of each operand
1830 and doing a 32-bit UifU. Basically since UifU is fast and
1831 chopping lanes off vector values is slow.
1832
1833 Finally:
1834
sewardj20d38f22005-02-07 23:50:18 +00001835 unary32F0x4(x) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001836 x#,
sewardj20d38f22005-02-07 23:50:18 +00001837 PCast32(V128to32(x#))
sewardj3245c912004-12-10 14:58:26 +00001838 )
1839
1840 Where:
1841
1842 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
1843 PCast32x4(v#) = CmpNEZ32x4(v#)
1844*/
1845
1846static
1847IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1848{
1849 IRAtom* at;
1850 tl_assert(isShadowAtom(mce, vatomX));
1851 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001852 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001853 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
sewardj3245c912004-12-10 14:58:26 +00001854 return at;
1855}
1856
1857static
1858IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1859{
1860 IRAtom* at;
1861 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001862 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001863 return at;
1864}
1865
1866static
1867IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1868{
1869 IRAtom* at;
1870 tl_assert(isShadowAtom(mce, vatomX));
1871 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001872 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001873 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
sewardj3245c912004-12-10 14:58:26 +00001874 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001875 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001876 return at;
1877}
1878
1879static
1880IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1881{
1882 IRAtom* at;
1883 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001884 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001885 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001886 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001887 return at;
1888}
1889
sewardj0b070592004-12-10 21:44:22 +00001890/* --- ... and ... 64Fx2 versions of the same ... --- */
1891
1892static
1893IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1894{
1895 IRAtom* at;
1896 tl_assert(isShadowAtom(mce, vatomX));
1897 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001898 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001899 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
sewardj0b070592004-12-10 21:44:22 +00001900 return at;
1901}
1902
1903static
1904IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1905{
1906 IRAtom* at;
1907 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001908 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001909 return at;
1910}
1911
1912static
1913IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1914{
1915 IRAtom* at;
1916 tl_assert(isShadowAtom(mce, vatomX));
1917 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001918 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001919 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
sewardj0b070592004-12-10 21:44:22 +00001920 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001921 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001922 return at;
1923}
1924
1925static
1926IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1927{
1928 IRAtom* at;
1929 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001930 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001931 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001932 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001933 return at;
1934}
1935
sewardj57f92b02010-08-22 11:54:14 +00001936/* --- --- ... and ... 32Fx2 versions of the same --- --- */
1937
1938static
1939IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1940{
1941 IRAtom* at;
1942 tl_assert(isShadowAtom(mce, vatomX));
1943 tl_assert(isShadowAtom(mce, vatomY));
1944 at = mkUifU64(mce, vatomX, vatomY);
1945 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
1946 return at;
1947}
1948
1949static
1950IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
1951{
1952 IRAtom* at;
1953 tl_assert(isShadowAtom(mce, vatomX));
1954 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
1955 return at;
1956}
1957
sewardja1d93302004-12-12 16:45:06 +00001958/* --- --- Vector saturated narrowing --- --- */
1959
1960/* This is quite subtle. What to do is simple:
1961
1962 Let the original narrowing op be QNarrowW{S,U}xN. Produce:
1963
1964 the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1965
1966 Why this is right is not so simple. Consider a lane in the args,
1967 vatom1 or 2, doesn't matter.
1968
1969 After the PCast, that lane is all 0s (defined) or all
1970 1s(undefined).
1971
1972 Both signed and unsigned saturating narrowing of all 0s produces
1973 all 0s, which is what we want.
1974
1975 The all-1s case is more complex. Unsigned narrowing interprets an
1976 all-1s input as the largest unsigned integer, and so produces all
1977 1s as a result since that is the largest unsigned value at the
1978 smaller width.
1979
1980 Signed narrowing interprets all 1s as -1. Fortunately, -1 narrows
1981 to -1, so we still wind up with all 1s at the smaller width.
1982
1983 So: In short, pessimise the args, then apply the original narrowing
1984 op.
sewardj9beeb0a2011-06-15 15:11:07 +00001985
1986 FIXME JRS 2011-Jun-15: figure out if this is still correct
1987 following today's rationalisation/cleanup of vector narrowing
1988 primops.
sewardja1d93302004-12-12 16:45:06 +00001989*/
1990static
sewardj7ee7d852011-06-16 11:37:21 +00001991IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
1992 IRAtom* vatom1, IRAtom* vatom2)
sewardja1d93302004-12-12 16:45:06 +00001993{
1994 IRAtom *at1, *at2, *at3;
1995 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1996 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00001997 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
1998 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
1999 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2000 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2001 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2002 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2003 default: VG_(tool_panic)("vectorNarrowBinV128");
sewardja1d93302004-12-12 16:45:06 +00002004 }
2005 tl_assert(isShadowAtom(mce,vatom1));
2006 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00002007 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2008 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
2009 at3 = assignNew('V', mce, Ity_V128, binop(narrow_op, at1, at2));
sewardja1d93302004-12-12 16:45:06 +00002010 return at3;
2011}
2012
sewardjacd2e912005-01-13 19:17:06 +00002013static
sewardj7ee7d852011-06-16 11:37:21 +00002014IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2015 IRAtom* vatom1, IRAtom* vatom2)
sewardjacd2e912005-01-13 19:17:06 +00002016{
2017 IRAtom *at1, *at2, *at3;
2018 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2019 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002020 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2021 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break;
2022 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break;
2023 default: VG_(tool_panic)("vectorNarrowBin64");
sewardjacd2e912005-01-13 19:17:06 +00002024 }
2025 tl_assert(isShadowAtom(mce,vatom1));
2026 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00002027 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2028 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
2029 at3 = assignNew('V', mce, Ity_I64, binop(narrow_op, at1, at2));
sewardjacd2e912005-01-13 19:17:06 +00002030 return at3;
2031}
2032
sewardj57f92b02010-08-22 11:54:14 +00002033static
sewardj7ee7d852011-06-16 11:37:21 +00002034IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp shorten_op,
2035 IRAtom* vatom1)
sewardj57f92b02010-08-22 11:54:14 +00002036{
2037 IRAtom *at1, *at2;
2038 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2039 switch (shorten_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002040 /* FIXME: first 3 are too pessimistic; we can just
2041 apply them directly to the V bits. */
2042 case Iop_NarrowUn16to8x8: pcast = mkPCast16x8; break;
2043 case Iop_NarrowUn32to16x4: pcast = mkPCast32x4; break;
2044 case Iop_NarrowUn64to32x2: pcast = mkPCast64x2; break;
2045 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break;
2046 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break;
2047 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break;
2048 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
2049 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
2050 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
2051 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
2052 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
2053 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
2054 default: VG_(tool_panic)("vectorNarrowUnV128");
sewardj57f92b02010-08-22 11:54:14 +00002055 }
2056 tl_assert(isShadowAtom(mce,vatom1));
2057 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2058 at2 = assignNew('V', mce, Ity_I64, unop(shorten_op, at1));
2059 return at2;
2060}
2061
2062static
sewardj7ee7d852011-06-16 11:37:21 +00002063IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
2064 IRAtom* vatom1)
sewardj57f92b02010-08-22 11:54:14 +00002065{
2066 IRAtom *at1, *at2;
2067 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2068 switch (longen_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002069 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break;
2070 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break;
2071 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
2072 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
2073 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
2074 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
2075 default: VG_(tool_panic)("vectorWidenI64");
sewardj57f92b02010-08-22 11:54:14 +00002076 }
2077 tl_assert(isShadowAtom(mce,vatom1));
2078 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
2079 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2080 return at2;
2081}
2082
sewardja1d93302004-12-12 16:45:06 +00002083
2084/* --- --- Vector integer arithmetic --- --- */
2085
2086/* Simple ... UifU the args and per-lane pessimise the results. */
sewardjacd2e912005-01-13 19:17:06 +00002087
sewardj20d38f22005-02-07 23:50:18 +00002088/* --- V128-bit versions --- */
sewardjacd2e912005-01-13 19:17:06 +00002089
sewardja1d93302004-12-12 16:45:06 +00002090static
2091IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2092{
2093 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002094 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002095 at = mkPCast8x16(mce, at);
2096 return at;
2097}
2098
2099static
2100IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2101{
2102 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002103 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002104 at = mkPCast16x8(mce, at);
2105 return at;
2106}
2107
2108static
2109IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2110{
2111 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002112 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002113 at = mkPCast32x4(mce, at);
2114 return at;
2115}
2116
2117static
2118IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2119{
2120 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002121 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002122 at = mkPCast64x2(mce, at);
2123 return at;
2124}
sewardj3245c912004-12-10 14:58:26 +00002125
sewardjacd2e912005-01-13 19:17:06 +00002126/* --- 64-bit versions --- */
2127
2128static
2129IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2130{
2131 IRAtom* at;
2132 at = mkUifU64(mce, vatom1, vatom2);
2133 at = mkPCast8x8(mce, at);
2134 return at;
2135}
2136
2137static
2138IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2139{
2140 IRAtom* at;
2141 at = mkUifU64(mce, vatom1, vatom2);
2142 at = mkPCast16x4(mce, at);
2143 return at;
2144}
2145
2146static
2147IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2148{
2149 IRAtom* at;
2150 at = mkUifU64(mce, vatom1, vatom2);
2151 at = mkPCast32x2(mce, at);
2152 return at;
2153}
2154
sewardj57f92b02010-08-22 11:54:14 +00002155static
2156IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2157{
2158 IRAtom* at;
2159 at = mkUifU64(mce, vatom1, vatom2);
2160 at = mkPCastTo(mce, Ity_I64, at);
2161 return at;
2162}
2163
sewardjc678b852010-09-22 00:58:51 +00002164/* --- 32-bit versions --- */
2165
2166static
2167IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2168{
2169 IRAtom* at;
2170 at = mkUifU32(mce, vatom1, vatom2);
2171 at = mkPCast8x4(mce, at);
2172 return at;
2173}
2174
2175static
2176IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2177{
2178 IRAtom* at;
2179 at = mkUifU32(mce, vatom1, vatom2);
2180 at = mkPCast16x2(mce, at);
2181 return at;
2182}
2183
sewardj3245c912004-12-10 14:58:26 +00002184
2185/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00002186/*--- Generate shadow values from all kinds of IRExprs. ---*/
2187/*------------------------------------------------------------*/
2188
2189static
sewardje91cea72006-02-08 19:32:02 +00002190IRAtom* expr2vbits_Qop ( MCEnv* mce,
2191 IROp op,
2192 IRAtom* atom1, IRAtom* atom2,
2193 IRAtom* atom3, IRAtom* atom4 )
2194{
2195 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2196 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2197 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2198 IRAtom* vatom4 = expr2vbits( mce, atom4 );
2199
2200 tl_assert(isOriginalAtom(mce,atom1));
2201 tl_assert(isOriginalAtom(mce,atom2));
2202 tl_assert(isOriginalAtom(mce,atom3));
2203 tl_assert(isOriginalAtom(mce,atom4));
2204 tl_assert(isShadowAtom(mce,vatom1));
2205 tl_assert(isShadowAtom(mce,vatom2));
2206 tl_assert(isShadowAtom(mce,vatom3));
2207 tl_assert(isShadowAtom(mce,vatom4));
2208 tl_assert(sameKindedAtoms(atom1,vatom1));
2209 tl_assert(sameKindedAtoms(atom2,vatom2));
2210 tl_assert(sameKindedAtoms(atom3,vatom3));
2211 tl_assert(sameKindedAtoms(atom4,vatom4));
2212 switch (op) {
2213 case Iop_MAddF64:
2214 case Iop_MAddF64r32:
2215 case Iop_MSubF64:
2216 case Iop_MSubF64r32:
2217 /* I32(rm) x F64 x F64 x F64 -> F64 */
2218 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
sewardjb5b87402011-03-07 16:05:35 +00002219
2220 case Iop_MAddF32:
2221 case Iop_MSubF32:
2222 /* I32(rm) x F32 x F32 x F32 -> F32 */
2223 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
2224
sewardje91cea72006-02-08 19:32:02 +00002225 default:
2226 ppIROp(op);
2227 VG_(tool_panic)("memcheck:expr2vbits_Qop");
2228 }
2229}
2230
2231
2232static
sewardjed69fdb2006-02-03 16:12:27 +00002233IRAtom* expr2vbits_Triop ( MCEnv* mce,
2234 IROp op,
2235 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2236{
sewardjed69fdb2006-02-03 16:12:27 +00002237 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2238 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2239 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2240
2241 tl_assert(isOriginalAtom(mce,atom1));
2242 tl_assert(isOriginalAtom(mce,atom2));
2243 tl_assert(isOriginalAtom(mce,atom3));
2244 tl_assert(isShadowAtom(mce,vatom1));
2245 tl_assert(isShadowAtom(mce,vatom2));
2246 tl_assert(isShadowAtom(mce,vatom3));
2247 tl_assert(sameKindedAtoms(atom1,vatom1));
2248 tl_assert(sameKindedAtoms(atom2,vatom2));
2249 tl_assert(sameKindedAtoms(atom3,vatom3));
2250 switch (op) {
sewardjb5b87402011-03-07 16:05:35 +00002251 case Iop_AddF128:
2252 case Iop_SubF128:
2253 case Iop_MulF128:
2254 case Iop_DivF128:
2255 /* I32(rm) x F128 x F128 -> F128 */
2256 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
sewardjed69fdb2006-02-03 16:12:27 +00002257 case Iop_AddF64:
2258 case Iop_AddF64r32:
2259 case Iop_SubF64:
2260 case Iop_SubF64r32:
2261 case Iop_MulF64:
2262 case Iop_MulF64r32:
2263 case Iop_DivF64:
2264 case Iop_DivF64r32:
sewardj22ac5f42006-02-03 22:55:04 +00002265 case Iop_ScaleF64:
2266 case Iop_Yl2xF64:
2267 case Iop_Yl2xp1F64:
2268 case Iop_AtanF64:
sewardjd6075eb2006-02-04 15:25:23 +00002269 case Iop_PRemF64:
2270 case Iop_PRem1F64:
sewardj22ac5f42006-02-03 22:55:04 +00002271 /* I32(rm) x F64 x F64 -> F64 */
sewardjed69fdb2006-02-03 16:12:27 +00002272 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
sewardjd6075eb2006-02-04 15:25:23 +00002273 case Iop_PRemC3210F64:
2274 case Iop_PRem1C3210F64:
2275 /* I32(rm) x F64 x F64 -> I32 */
2276 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj59570ff2010-01-01 11:59:33 +00002277 case Iop_AddF32:
2278 case Iop_SubF32:
2279 case Iop_MulF32:
2280 case Iop_DivF32:
2281 /* I32(rm) x F32 x F32 -> I32 */
2282 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj57f92b02010-08-22 11:54:14 +00002283 case Iop_ExtractV128:
2284 complainIfUndefined(mce, atom3);
2285 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2286 case Iop_Extract64:
2287 complainIfUndefined(mce, atom3);
2288 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2289 case Iop_SetElem8x8:
2290 case Iop_SetElem16x4:
2291 case Iop_SetElem32x2:
2292 complainIfUndefined(mce, atom2);
2293 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
sewardjed69fdb2006-02-03 16:12:27 +00002294 default:
2295 ppIROp(op);
2296 VG_(tool_panic)("memcheck:expr2vbits_Triop");
2297 }
2298}
2299
2300
2301static
sewardj95448072004-11-22 20:19:51 +00002302IRAtom* expr2vbits_Binop ( MCEnv* mce,
2303 IROp op,
2304 IRAtom* atom1, IRAtom* atom2 )
2305{
2306 IRType and_or_ty;
2307 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
2308 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
2309 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2310
2311 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2312 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2313
2314 tl_assert(isOriginalAtom(mce,atom1));
2315 tl_assert(isOriginalAtom(mce,atom2));
2316 tl_assert(isShadowAtom(mce,vatom1));
2317 tl_assert(isShadowAtom(mce,vatom2));
2318 tl_assert(sameKindedAtoms(atom1,vatom1));
2319 tl_assert(sameKindedAtoms(atom2,vatom2));
2320 switch (op) {
2321
sewardjc678b852010-09-22 00:58:51 +00002322 /* 32-bit SIMD */
2323
2324 case Iop_Add16x2:
2325 case Iop_HAdd16Ux2:
2326 case Iop_HAdd16Sx2:
2327 case Iop_Sub16x2:
2328 case Iop_HSub16Ux2:
2329 case Iop_HSub16Sx2:
2330 case Iop_QAdd16Sx2:
2331 case Iop_QSub16Sx2:
2332 return binary16Ix2(mce, vatom1, vatom2);
2333
2334 case Iop_Add8x4:
2335 case Iop_HAdd8Ux4:
2336 case Iop_HAdd8Sx4:
2337 case Iop_Sub8x4:
2338 case Iop_HSub8Ux4:
2339 case Iop_HSub8Sx4:
2340 case Iop_QSub8Ux4:
2341 case Iop_QAdd8Ux4:
2342 case Iop_QSub8Sx4:
2343 case Iop_QAdd8Sx4:
2344 return binary8Ix4(mce, vatom1, vatom2);
2345
sewardjacd2e912005-01-13 19:17:06 +00002346 /* 64-bit SIMD */
2347
sewardj57f92b02010-08-22 11:54:14 +00002348 case Iop_ShrN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002349 case Iop_ShrN16x4:
2350 case Iop_ShrN32x2:
sewardj03809ae2006-12-27 01:16:58 +00002351 case Iop_SarN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002352 case Iop_SarN16x4:
2353 case Iop_SarN32x2:
2354 case Iop_ShlN16x4:
2355 case Iop_ShlN32x2:
sewardj114a9172008-02-09 01:49:32 +00002356 case Iop_ShlN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002357 /* Same scheme as with all other shifts. */
2358 complainIfUndefined(mce, atom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002359 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
sewardjacd2e912005-01-13 19:17:06 +00002360
sewardj7ee7d852011-06-16 11:37:21 +00002361 case Iop_QNarrowBin32Sto16Sx4:
2362 case Iop_QNarrowBin16Sto8Sx8:
2363 case Iop_QNarrowBin16Sto8Ux8:
2364 return vectorNarrowBin64(mce, op, vatom1, vatom2);
sewardjacd2e912005-01-13 19:17:06 +00002365
2366 case Iop_Min8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002367 case Iop_Min8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002368 case Iop_Max8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002369 case Iop_Max8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002370 case Iop_Avg8Ux8:
2371 case Iop_QSub8Sx8:
2372 case Iop_QSub8Ux8:
2373 case Iop_Sub8x8:
2374 case Iop_CmpGT8Sx8:
sewardj57f92b02010-08-22 11:54:14 +00002375 case Iop_CmpGT8Ux8:
sewardjacd2e912005-01-13 19:17:06 +00002376 case Iop_CmpEQ8x8:
2377 case Iop_QAdd8Sx8:
2378 case Iop_QAdd8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002379 case Iop_QSal8x8:
2380 case Iop_QShl8x8:
sewardjacd2e912005-01-13 19:17:06 +00002381 case Iop_Add8x8:
sewardj57f92b02010-08-22 11:54:14 +00002382 case Iop_Mul8x8:
2383 case Iop_PolynomialMul8x8:
sewardjacd2e912005-01-13 19:17:06 +00002384 return binary8Ix8(mce, vatom1, vatom2);
2385
2386 case Iop_Min16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002387 case Iop_Min16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002388 case Iop_Max16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002389 case Iop_Max16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002390 case Iop_Avg16Ux4:
2391 case Iop_QSub16Ux4:
2392 case Iop_QSub16Sx4:
2393 case Iop_Sub16x4:
2394 case Iop_Mul16x4:
2395 case Iop_MulHi16Sx4:
2396 case Iop_MulHi16Ux4:
2397 case Iop_CmpGT16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002398 case Iop_CmpGT16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002399 case Iop_CmpEQ16x4:
2400 case Iop_QAdd16Sx4:
2401 case Iop_QAdd16Ux4:
sewardj57f92b02010-08-22 11:54:14 +00002402 case Iop_QSal16x4:
2403 case Iop_QShl16x4:
sewardjacd2e912005-01-13 19:17:06 +00002404 case Iop_Add16x4:
sewardj57f92b02010-08-22 11:54:14 +00002405 case Iop_QDMulHi16Sx4:
2406 case Iop_QRDMulHi16Sx4:
sewardjacd2e912005-01-13 19:17:06 +00002407 return binary16Ix4(mce, vatom1, vatom2);
2408
2409 case Iop_Sub32x2:
sewardj114a9172008-02-09 01:49:32 +00002410 case Iop_Mul32x2:
sewardj57f92b02010-08-22 11:54:14 +00002411 case Iop_Max32Sx2:
2412 case Iop_Max32Ux2:
2413 case Iop_Min32Sx2:
2414 case Iop_Min32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002415 case Iop_CmpGT32Sx2:
sewardj57f92b02010-08-22 11:54:14 +00002416 case Iop_CmpGT32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002417 case Iop_CmpEQ32x2:
2418 case Iop_Add32x2:
sewardj57f92b02010-08-22 11:54:14 +00002419 case Iop_QAdd32Ux2:
2420 case Iop_QAdd32Sx2:
2421 case Iop_QSub32Ux2:
2422 case Iop_QSub32Sx2:
2423 case Iop_QSal32x2:
2424 case Iop_QShl32x2:
2425 case Iop_QDMulHi32Sx2:
2426 case Iop_QRDMulHi32Sx2:
sewardjacd2e912005-01-13 19:17:06 +00002427 return binary32Ix2(mce, vatom1, vatom2);
2428
sewardj57f92b02010-08-22 11:54:14 +00002429 case Iop_QSub64Ux1:
2430 case Iop_QSub64Sx1:
2431 case Iop_QAdd64Ux1:
2432 case Iop_QAdd64Sx1:
2433 case Iop_QSal64x1:
2434 case Iop_QShl64x1:
2435 case Iop_Sal64x1:
2436 return binary64Ix1(mce, vatom1, vatom2);
2437
2438 case Iop_QShlN8Sx8:
2439 case Iop_QShlN8x8:
2440 case Iop_QSalN8x8:
2441 complainIfUndefined(mce, atom2);
2442 return mkPCast8x8(mce, vatom1);
2443
2444 case Iop_QShlN16Sx4:
2445 case Iop_QShlN16x4:
2446 case Iop_QSalN16x4:
2447 complainIfUndefined(mce, atom2);
2448 return mkPCast16x4(mce, vatom1);
2449
2450 case Iop_QShlN32Sx2:
2451 case Iop_QShlN32x2:
2452 case Iop_QSalN32x2:
2453 complainIfUndefined(mce, atom2);
2454 return mkPCast32x2(mce, vatom1);
2455
2456 case Iop_QShlN64Sx1:
2457 case Iop_QShlN64x1:
2458 case Iop_QSalN64x1:
2459 complainIfUndefined(mce, atom2);
2460 return mkPCast32x2(mce, vatom1);
2461
2462 case Iop_PwMax32Sx2:
2463 case Iop_PwMax32Ux2:
2464 case Iop_PwMin32Sx2:
2465 case Iop_PwMin32Ux2:
2466 case Iop_PwMax32Fx2:
2467 case Iop_PwMin32Fx2:
2468 return assignNew('V', mce, Ity_I64, binop(Iop_PwMax32Ux2, mkPCast32x2(mce, vatom1),
2469 mkPCast32x2(mce, vatom2)));
2470
2471 case Iop_PwMax16Sx4:
2472 case Iop_PwMax16Ux4:
2473 case Iop_PwMin16Sx4:
2474 case Iop_PwMin16Ux4:
2475 return assignNew('V', mce, Ity_I64, binop(Iop_PwMax16Ux4, mkPCast16x4(mce, vatom1),
2476 mkPCast16x4(mce, vatom2)));
2477
2478 case Iop_PwMax8Sx8:
2479 case Iop_PwMax8Ux8:
2480 case Iop_PwMin8Sx8:
2481 case Iop_PwMin8Ux8:
2482 return assignNew('V', mce, Ity_I64, binop(Iop_PwMax8Ux8, mkPCast8x8(mce, vatom1),
2483 mkPCast8x8(mce, vatom2)));
2484
2485 case Iop_PwAdd32x2:
2486 case Iop_PwAdd32Fx2:
2487 return mkPCast32x2(mce,
2488 assignNew('V', mce, Ity_I64, binop(Iop_PwAdd32x2, mkPCast32x2(mce, vatom1),
2489 mkPCast32x2(mce, vatom2))));
2490
2491 case Iop_PwAdd16x4:
2492 return mkPCast16x4(mce,
2493 assignNew('V', mce, Ity_I64, binop(op, mkPCast16x4(mce, vatom1),
2494 mkPCast16x4(mce, vatom2))));
2495
2496 case Iop_PwAdd8x8:
2497 return mkPCast8x8(mce,
2498 assignNew('V', mce, Ity_I64, binop(op, mkPCast8x8(mce, vatom1),
2499 mkPCast8x8(mce, vatom2))));
2500
2501 case Iop_Shl8x8:
2502 case Iop_Shr8x8:
2503 case Iop_Sar8x8:
2504 case Iop_Sal8x8:
2505 return mkUifU64(mce,
2506 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2507 mkPCast8x8(mce,vatom2)
2508 );
2509
2510 case Iop_Shl16x4:
2511 case Iop_Shr16x4:
2512 case Iop_Sar16x4:
2513 case Iop_Sal16x4:
2514 return mkUifU64(mce,
2515 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2516 mkPCast16x4(mce,vatom2)
2517 );
2518
2519 case Iop_Shl32x2:
2520 case Iop_Shr32x2:
2521 case Iop_Sar32x2:
2522 case Iop_Sal32x2:
2523 return mkUifU64(mce,
2524 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2525 mkPCast32x2(mce,vatom2)
2526 );
2527
sewardjacd2e912005-01-13 19:17:06 +00002528 /* 64-bit data-steering */
2529 case Iop_InterleaveLO32x2:
2530 case Iop_InterleaveLO16x4:
2531 case Iop_InterleaveLO8x8:
2532 case Iop_InterleaveHI32x2:
2533 case Iop_InterleaveHI16x4:
2534 case Iop_InterleaveHI8x8:
sewardj57f92b02010-08-22 11:54:14 +00002535 case Iop_CatOddLanes8x8:
2536 case Iop_CatEvenLanes8x8:
sewardj114a9172008-02-09 01:49:32 +00002537 case Iop_CatOddLanes16x4:
2538 case Iop_CatEvenLanes16x4:
sewardj57f92b02010-08-22 11:54:14 +00002539 case Iop_InterleaveOddLanes8x8:
2540 case Iop_InterleaveEvenLanes8x8:
2541 case Iop_InterleaveOddLanes16x4:
2542 case Iop_InterleaveEvenLanes16x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00002543 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardjacd2e912005-01-13 19:17:06 +00002544
sewardj57f92b02010-08-22 11:54:14 +00002545 case Iop_GetElem8x8:
2546 complainIfUndefined(mce, atom2);
2547 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2548 case Iop_GetElem16x4:
2549 complainIfUndefined(mce, atom2);
2550 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2551 case Iop_GetElem32x2:
2552 complainIfUndefined(mce, atom2);
2553 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2554
sewardj114a9172008-02-09 01:49:32 +00002555 /* Perm8x8: rearrange values in left arg using steering values
2556 from right arg. So rearrange the vbits in the same way but
2557 pessimise wrt steering values. */
2558 case Iop_Perm8x8:
2559 return mkUifU64(
2560 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002561 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
sewardj114a9172008-02-09 01:49:32 +00002562 mkPCast8x8(mce, vatom2)
2563 );
2564
sewardj20d38f22005-02-07 23:50:18 +00002565 /* V128-bit SIMD */
sewardj0b070592004-12-10 21:44:22 +00002566
sewardj57f92b02010-08-22 11:54:14 +00002567 case Iop_ShrN8x16:
sewardja1d93302004-12-12 16:45:06 +00002568 case Iop_ShrN16x8:
2569 case Iop_ShrN32x4:
2570 case Iop_ShrN64x2:
sewardj57f92b02010-08-22 11:54:14 +00002571 case Iop_SarN8x16:
sewardja1d93302004-12-12 16:45:06 +00002572 case Iop_SarN16x8:
2573 case Iop_SarN32x4:
sewardj57f92b02010-08-22 11:54:14 +00002574 case Iop_SarN64x2:
2575 case Iop_ShlN8x16:
sewardja1d93302004-12-12 16:45:06 +00002576 case Iop_ShlN16x8:
2577 case Iop_ShlN32x4:
2578 case Iop_ShlN64x2:
sewardj620eb5b2005-10-22 12:50:43 +00002579 /* Same scheme as with all other shifts. Note: 22 Oct 05:
2580 this is wrong now, scalar shifts are done properly lazily.
2581 Vector shifts should be fixed too. */
sewardja1d93302004-12-12 16:45:06 +00002582 complainIfUndefined(mce, atom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002583 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardja1d93302004-12-12 16:45:06 +00002584
sewardjcbf8be72005-11-10 18:34:41 +00002585 /* V x V shifts/rotates are done using the standard lazy scheme. */
sewardj43d60752005-11-10 18:13:01 +00002586 case Iop_Shl8x16:
2587 case Iop_Shr8x16:
2588 case Iop_Sar8x16:
sewardj57f92b02010-08-22 11:54:14 +00002589 case Iop_Sal8x16:
sewardjcbf8be72005-11-10 18:34:41 +00002590 case Iop_Rol8x16:
sewardj43d60752005-11-10 18:13:01 +00002591 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002592 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002593 mkPCast8x16(mce,vatom2)
2594 );
2595
2596 case Iop_Shl16x8:
2597 case Iop_Shr16x8:
2598 case Iop_Sar16x8:
sewardj57f92b02010-08-22 11:54:14 +00002599 case Iop_Sal16x8:
sewardjcbf8be72005-11-10 18:34:41 +00002600 case Iop_Rol16x8:
sewardj43d60752005-11-10 18:13:01 +00002601 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002602 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002603 mkPCast16x8(mce,vatom2)
2604 );
2605
2606 case Iop_Shl32x4:
2607 case Iop_Shr32x4:
2608 case Iop_Sar32x4:
sewardj57f92b02010-08-22 11:54:14 +00002609 case Iop_Sal32x4:
sewardjcbf8be72005-11-10 18:34:41 +00002610 case Iop_Rol32x4:
sewardj43d60752005-11-10 18:13:01 +00002611 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002612 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002613 mkPCast32x4(mce,vatom2)
2614 );
2615
sewardj57f92b02010-08-22 11:54:14 +00002616 case Iop_Shl64x2:
2617 case Iop_Shr64x2:
2618 case Iop_Sar64x2:
2619 case Iop_Sal64x2:
2620 return mkUifUV128(mce,
2621 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2622 mkPCast64x2(mce,vatom2)
2623 );
2624
2625 case Iop_F32ToFixed32Ux4_RZ:
2626 case Iop_F32ToFixed32Sx4_RZ:
2627 case Iop_Fixed32UToF32x4_RN:
2628 case Iop_Fixed32SToF32x4_RN:
2629 complainIfUndefined(mce, atom2);
2630 return mkPCast32x4(mce, vatom1);
2631
2632 case Iop_F32ToFixed32Ux2_RZ:
2633 case Iop_F32ToFixed32Sx2_RZ:
2634 case Iop_Fixed32UToF32x2_RN:
2635 case Iop_Fixed32SToF32x2_RN:
2636 complainIfUndefined(mce, atom2);
2637 return mkPCast32x2(mce, vatom1);
2638
sewardja1d93302004-12-12 16:45:06 +00002639 case Iop_QSub8Ux16:
2640 case Iop_QSub8Sx16:
2641 case Iop_Sub8x16:
2642 case Iop_Min8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002643 case Iop_Min8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002644 case Iop_Max8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002645 case Iop_Max8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002646 case Iop_CmpGT8Sx16:
sewardj43d60752005-11-10 18:13:01 +00002647 case Iop_CmpGT8Ux16:
sewardja1d93302004-12-12 16:45:06 +00002648 case Iop_CmpEQ8x16:
2649 case Iop_Avg8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002650 case Iop_Avg8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002651 case Iop_QAdd8Ux16:
2652 case Iop_QAdd8Sx16:
sewardj57f92b02010-08-22 11:54:14 +00002653 case Iop_QSal8x16:
2654 case Iop_QShl8x16:
sewardja1d93302004-12-12 16:45:06 +00002655 case Iop_Add8x16:
sewardj57f92b02010-08-22 11:54:14 +00002656 case Iop_Mul8x16:
2657 case Iop_PolynomialMul8x16:
sewardja1d93302004-12-12 16:45:06 +00002658 return binary8Ix16(mce, vatom1, vatom2);
2659
2660 case Iop_QSub16Ux8:
2661 case Iop_QSub16Sx8:
2662 case Iop_Sub16x8:
2663 case Iop_Mul16x8:
2664 case Iop_MulHi16Sx8:
2665 case Iop_MulHi16Ux8:
2666 case Iop_Min16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002667 case Iop_Min16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002668 case Iop_Max16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002669 case Iop_Max16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002670 case Iop_CmpGT16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002671 case Iop_CmpGT16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002672 case Iop_CmpEQ16x8:
2673 case Iop_Avg16Ux8:
sewardj43d60752005-11-10 18:13:01 +00002674 case Iop_Avg16Sx8:
sewardja1d93302004-12-12 16:45:06 +00002675 case Iop_QAdd16Ux8:
2676 case Iop_QAdd16Sx8:
sewardj57f92b02010-08-22 11:54:14 +00002677 case Iop_QSal16x8:
2678 case Iop_QShl16x8:
sewardja1d93302004-12-12 16:45:06 +00002679 case Iop_Add16x8:
sewardj57f92b02010-08-22 11:54:14 +00002680 case Iop_QDMulHi16Sx8:
2681 case Iop_QRDMulHi16Sx8:
sewardja1d93302004-12-12 16:45:06 +00002682 return binary16Ix8(mce, vatom1, vatom2);
2683
2684 case Iop_Sub32x4:
2685 case Iop_CmpGT32Sx4:
sewardj43d60752005-11-10 18:13:01 +00002686 case Iop_CmpGT32Ux4:
sewardja1d93302004-12-12 16:45:06 +00002687 case Iop_CmpEQ32x4:
sewardj43d60752005-11-10 18:13:01 +00002688 case Iop_QAdd32Sx4:
2689 case Iop_QAdd32Ux4:
2690 case Iop_QSub32Sx4:
2691 case Iop_QSub32Ux4:
sewardj57f92b02010-08-22 11:54:14 +00002692 case Iop_QSal32x4:
2693 case Iop_QShl32x4:
sewardj43d60752005-11-10 18:13:01 +00002694 case Iop_Avg32Ux4:
2695 case Iop_Avg32Sx4:
sewardja1d93302004-12-12 16:45:06 +00002696 case Iop_Add32x4:
sewardj43d60752005-11-10 18:13:01 +00002697 case Iop_Max32Ux4:
2698 case Iop_Max32Sx4:
2699 case Iop_Min32Ux4:
2700 case Iop_Min32Sx4:
sewardjb823b852010-06-18 08:18:38 +00002701 case Iop_Mul32x4:
sewardj57f92b02010-08-22 11:54:14 +00002702 case Iop_QDMulHi32Sx4:
2703 case Iop_QRDMulHi32Sx4:
sewardja1d93302004-12-12 16:45:06 +00002704 return binary32Ix4(mce, vatom1, vatom2);
2705
2706 case Iop_Sub64x2:
2707 case Iop_Add64x2:
sewardj9a2afe92011-10-19 15:24:55 +00002708 case Iop_CmpEQ64x2:
sewardjb823b852010-06-18 08:18:38 +00002709 case Iop_CmpGT64Sx2:
sewardj57f92b02010-08-22 11:54:14 +00002710 case Iop_QSal64x2:
2711 case Iop_QShl64x2:
2712 case Iop_QAdd64Ux2:
2713 case Iop_QAdd64Sx2:
2714 case Iop_QSub64Ux2:
2715 case Iop_QSub64Sx2:
sewardja1d93302004-12-12 16:45:06 +00002716 return binary64Ix2(mce, vatom1, vatom2);
2717
sewardj7ee7d852011-06-16 11:37:21 +00002718 case Iop_QNarrowBin32Sto16Sx8:
2719 case Iop_QNarrowBin32Uto16Ux8:
2720 case Iop_QNarrowBin32Sto16Ux8:
2721 case Iop_QNarrowBin16Sto8Sx16:
2722 case Iop_QNarrowBin16Uto8Ux16:
2723 case Iop_QNarrowBin16Sto8Ux16:
2724 return vectorNarrowBinV128(mce, op, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002725
sewardj0b070592004-12-10 21:44:22 +00002726 case Iop_Sub64Fx2:
2727 case Iop_Mul64Fx2:
2728 case Iop_Min64Fx2:
2729 case Iop_Max64Fx2:
2730 case Iop_Div64Fx2:
2731 case Iop_CmpLT64Fx2:
2732 case Iop_CmpLE64Fx2:
2733 case Iop_CmpEQ64Fx2:
sewardj545663e2005-11-05 01:55:04 +00002734 case Iop_CmpUN64Fx2:
sewardj0b070592004-12-10 21:44:22 +00002735 case Iop_Add64Fx2:
2736 return binary64Fx2(mce, vatom1, vatom2);
2737
2738 case Iop_Sub64F0x2:
2739 case Iop_Mul64F0x2:
2740 case Iop_Min64F0x2:
2741 case Iop_Max64F0x2:
2742 case Iop_Div64F0x2:
2743 case Iop_CmpLT64F0x2:
2744 case Iop_CmpLE64F0x2:
2745 case Iop_CmpEQ64F0x2:
sewardj545663e2005-11-05 01:55:04 +00002746 case Iop_CmpUN64F0x2:
sewardj0b070592004-12-10 21:44:22 +00002747 case Iop_Add64F0x2:
2748 return binary64F0x2(mce, vatom1, vatom2);
2749
sewardj170ee212004-12-10 18:57:51 +00002750 case Iop_Sub32Fx4:
2751 case Iop_Mul32Fx4:
2752 case Iop_Min32Fx4:
2753 case Iop_Max32Fx4:
2754 case Iop_Div32Fx4:
2755 case Iop_CmpLT32Fx4:
2756 case Iop_CmpLE32Fx4:
2757 case Iop_CmpEQ32Fx4:
sewardj545663e2005-11-05 01:55:04 +00002758 case Iop_CmpUN32Fx4:
cerione78ba2a2005-11-14 03:00:35 +00002759 case Iop_CmpGT32Fx4:
2760 case Iop_CmpGE32Fx4:
sewardj3245c912004-12-10 14:58:26 +00002761 case Iop_Add32Fx4:
sewardj57f92b02010-08-22 11:54:14 +00002762 case Iop_Recps32Fx4:
2763 case Iop_Rsqrts32Fx4:
sewardj3245c912004-12-10 14:58:26 +00002764 return binary32Fx4(mce, vatom1, vatom2);
2765
sewardj57f92b02010-08-22 11:54:14 +00002766 case Iop_Sub32Fx2:
2767 case Iop_Mul32Fx2:
2768 case Iop_Min32Fx2:
2769 case Iop_Max32Fx2:
2770 case Iop_CmpEQ32Fx2:
2771 case Iop_CmpGT32Fx2:
2772 case Iop_CmpGE32Fx2:
2773 case Iop_Add32Fx2:
2774 case Iop_Recps32Fx2:
2775 case Iop_Rsqrts32Fx2:
2776 return binary32Fx2(mce, vatom1, vatom2);
2777
sewardj170ee212004-12-10 18:57:51 +00002778 case Iop_Sub32F0x4:
2779 case Iop_Mul32F0x4:
2780 case Iop_Min32F0x4:
2781 case Iop_Max32F0x4:
2782 case Iop_Div32F0x4:
2783 case Iop_CmpLT32F0x4:
2784 case Iop_CmpLE32F0x4:
2785 case Iop_CmpEQ32F0x4:
sewardj545663e2005-11-05 01:55:04 +00002786 case Iop_CmpUN32F0x4:
sewardj170ee212004-12-10 18:57:51 +00002787 case Iop_Add32F0x4:
2788 return binary32F0x4(mce, vatom1, vatom2);
2789
sewardj57f92b02010-08-22 11:54:14 +00002790 case Iop_QShlN8Sx16:
2791 case Iop_QShlN8x16:
2792 case Iop_QSalN8x16:
2793 complainIfUndefined(mce, atom2);
2794 return mkPCast8x16(mce, vatom1);
2795
2796 case Iop_QShlN16Sx8:
2797 case Iop_QShlN16x8:
2798 case Iop_QSalN16x8:
2799 complainIfUndefined(mce, atom2);
2800 return mkPCast16x8(mce, vatom1);
2801
2802 case Iop_QShlN32Sx4:
2803 case Iop_QShlN32x4:
2804 case Iop_QSalN32x4:
2805 complainIfUndefined(mce, atom2);
2806 return mkPCast32x4(mce, vatom1);
2807
2808 case Iop_QShlN64Sx2:
2809 case Iop_QShlN64x2:
2810 case Iop_QSalN64x2:
2811 complainIfUndefined(mce, atom2);
2812 return mkPCast32x4(mce, vatom1);
2813
2814 case Iop_Mull32Sx2:
2815 case Iop_Mull32Ux2:
2816 case Iop_QDMulLong32Sx2:
sewardj7ee7d852011-06-16 11:37:21 +00002817 return vectorWidenI64(mce, Iop_Widen32Sto64x2,
2818 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00002819
2820 case Iop_Mull16Sx4:
2821 case Iop_Mull16Ux4:
2822 case Iop_QDMulLong16Sx4:
sewardj7ee7d852011-06-16 11:37:21 +00002823 return vectorWidenI64(mce, Iop_Widen16Sto32x4,
2824 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00002825
2826 case Iop_Mull8Sx8:
2827 case Iop_Mull8Ux8:
2828 case Iop_PolynomialMull8x8:
sewardj7ee7d852011-06-16 11:37:21 +00002829 return vectorWidenI64(mce, Iop_Widen8Sto16x8,
2830 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00002831
2832 case Iop_PwAdd32x4:
2833 return mkPCast32x4(mce,
2834 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
2835 mkPCast32x4(mce, vatom2))));
2836
2837 case Iop_PwAdd16x8:
2838 return mkPCast16x8(mce,
2839 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
2840 mkPCast16x8(mce, vatom2))));
2841
2842 case Iop_PwAdd8x16:
2843 return mkPCast8x16(mce,
2844 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
2845 mkPCast8x16(mce, vatom2))));
2846
sewardj20d38f22005-02-07 23:50:18 +00002847 /* V128-bit data-steering */
2848 case Iop_SetV128lo32:
2849 case Iop_SetV128lo64:
2850 case Iop_64HLtoV128:
sewardja1d93302004-12-12 16:45:06 +00002851 case Iop_InterleaveLO64x2:
2852 case Iop_InterleaveLO32x4:
2853 case Iop_InterleaveLO16x8:
2854 case Iop_InterleaveLO8x16:
2855 case Iop_InterleaveHI64x2:
2856 case Iop_InterleaveHI32x4:
2857 case Iop_InterleaveHI16x8:
2858 case Iop_InterleaveHI8x16:
sewardj57f92b02010-08-22 11:54:14 +00002859 case Iop_CatOddLanes8x16:
2860 case Iop_CatOddLanes16x8:
2861 case Iop_CatOddLanes32x4:
2862 case Iop_CatEvenLanes8x16:
2863 case Iop_CatEvenLanes16x8:
2864 case Iop_CatEvenLanes32x4:
2865 case Iop_InterleaveOddLanes8x16:
2866 case Iop_InterleaveOddLanes16x8:
2867 case Iop_InterleaveOddLanes32x4:
2868 case Iop_InterleaveEvenLanes8x16:
2869 case Iop_InterleaveEvenLanes16x8:
2870 case Iop_InterleaveEvenLanes32x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00002871 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00002872
2873 case Iop_GetElem8x16:
2874 complainIfUndefined(mce, atom2);
2875 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2876 case Iop_GetElem16x8:
2877 complainIfUndefined(mce, atom2);
2878 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2879 case Iop_GetElem32x4:
2880 complainIfUndefined(mce, atom2);
2881 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2882 case Iop_GetElem64x2:
2883 complainIfUndefined(mce, atom2);
2884 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
2885
sewardj620eb5b2005-10-22 12:50:43 +00002886 /* Perm8x16: rearrange values in left arg using steering values
2887 from right arg. So rearrange the vbits in the same way but
2888 pessimise wrt steering values. */
2889 case Iop_Perm8x16:
2890 return mkUifUV128(
2891 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002892 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj620eb5b2005-10-22 12:50:43 +00002893 mkPCast8x16(mce, vatom2)
2894 );
sewardj170ee212004-12-10 18:57:51 +00002895
sewardj43d60752005-11-10 18:13:01 +00002896 /* These two take the lower half of each 16-bit lane, sign/zero
2897 extend it to 32, and multiply together, producing a 32x4
2898 result (and implicitly ignoring half the operand bits). So
2899 treat it as a bunch of independent 16x8 operations, but then
2900 do 32-bit shifts left-right to copy the lower half results
2901 (which are all 0s or all 1s due to PCasting in binary16Ix8)
2902 into the upper half of each result lane. */
2903 case Iop_MullEven16Ux8:
2904 case Iop_MullEven16Sx8: {
2905 IRAtom* at;
2906 at = binary16Ix8(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002907 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
2908 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
sewardj43d60752005-11-10 18:13:01 +00002909 return at;
2910 }
2911
2912 /* Same deal as Iop_MullEven16{S,U}x8 */
2913 case Iop_MullEven8Ux16:
2914 case Iop_MullEven8Sx16: {
2915 IRAtom* at;
2916 at = binary8Ix16(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002917 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
2918 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
sewardj43d60752005-11-10 18:13:01 +00002919 return at;
2920 }
2921
2922 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
2923 32x4 -> 16x8 laneage, discarding the upper half of each lane.
2924 Simply apply same op to the V bits, since this really no more
2925 than a data steering operation. */
sewardj7ee7d852011-06-16 11:37:21 +00002926 case Iop_NarrowBin32to16x8:
2927 case Iop_NarrowBin16to8x16:
sewardj7cf4e6b2008-05-01 20:24:26 +00002928 return assignNew('V', mce, Ity_V128,
2929 binop(op, vatom1, vatom2));
sewardj43d60752005-11-10 18:13:01 +00002930
2931 case Iop_ShrV128:
2932 case Iop_ShlV128:
2933 /* Same scheme as with all other shifts. Note: 10 Nov 05:
2934 this is wrong now, scalar shifts are done properly lazily.
2935 Vector shifts should be fixed too. */
2936 complainIfUndefined(mce, atom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002937 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardj43d60752005-11-10 18:13:01 +00002938
sewardj69a13322005-04-23 01:14:51 +00002939 /* I128-bit data-steering */
2940 case Iop_64HLto128:
sewardj7cf4e6b2008-05-01 20:24:26 +00002941 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
sewardj69a13322005-04-23 01:14:51 +00002942
sewardj3245c912004-12-10 14:58:26 +00002943 /* Scalar floating point */
2944
sewardjb5b87402011-03-07 16:05:35 +00002945 case Iop_F32toI64S:
2946 /* I32(rm) x F32 -> I64 */
2947 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2948
2949 case Iop_I64StoF32:
2950 /* I32(rm) x I64 -> F32 */
2951 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2952
sewardjed69fdb2006-02-03 16:12:27 +00002953 case Iop_RoundF64toInt:
2954 case Iop_RoundF64toF32:
sewardj06f96d02009-12-31 19:24:12 +00002955 case Iop_F64toI64S:
sewardja201c452011-07-24 14:15:54 +00002956 case Iop_F64toI64U:
sewardj06f96d02009-12-31 19:24:12 +00002957 case Iop_I64StoF64:
sewardjf34eb492011-04-15 11:57:05 +00002958 case Iop_I64UtoF64:
sewardj22ac5f42006-02-03 22:55:04 +00002959 case Iop_SinF64:
2960 case Iop_CosF64:
2961 case Iop_TanF64:
2962 case Iop_2xm1F64:
2963 case Iop_SqrtF64:
2964 /* I32(rm) x I64/F64 -> I64/F64 */
sewardj95448072004-11-22 20:19:51 +00002965 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2966
sewardjd376a762010-06-27 09:08:54 +00002967 case Iop_RoundF32toInt:
sewardjaec1be32010-01-03 22:29:32 +00002968 case Iop_SqrtF32:
2969 /* I32(rm) x I32/F32 -> I32/F32 */
2970 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2971
sewardjb5b87402011-03-07 16:05:35 +00002972 case Iop_SqrtF128:
2973 /* I32(rm) x F128 -> F128 */
2974 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
2975
2976 case Iop_I32StoF32:
2977 case Iop_F32toI32S:
2978 /* First arg is I32 (rounding mode), second is F32/I32 (data). */
2979 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2980
2981 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */
2982 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */
2983 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2984
2985 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */
2986 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */
2987 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2988
2989 case Iop_F64HLtoF128:
2990 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, vatom1, vatom2));
2991
sewardj59570ff2010-01-01 11:59:33 +00002992 case Iop_F64toI32U:
sewardj06f96d02009-12-31 19:24:12 +00002993 case Iop_F64toI32S:
sewardje9e16d32004-12-10 13:17:55 +00002994 case Iop_F64toF32:
sewardjf34eb492011-04-15 11:57:05 +00002995 case Iop_I64UtoF32:
sewardj95448072004-11-22 20:19:51 +00002996 /* First arg is I32 (rounding mode), second is F64 (data). */
2997 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2998
sewardj06f96d02009-12-31 19:24:12 +00002999 case Iop_F64toI16S:
sewardj95448072004-11-22 20:19:51 +00003000 /* First arg is I32 (rounding mode), second is F64 (data). */
3001 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3002
sewardjb5b87402011-03-07 16:05:35 +00003003 case Iop_CmpF32:
sewardj95448072004-11-22 20:19:51 +00003004 case Iop_CmpF64:
sewardjb5b87402011-03-07 16:05:35 +00003005 case Iop_CmpF128:
sewardj95448072004-11-22 20:19:51 +00003006 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3007
3008 /* non-FP after here */
3009
3010 case Iop_DivModU64to32:
3011 case Iop_DivModS64to32:
3012 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3013
sewardj69a13322005-04-23 01:14:51 +00003014 case Iop_DivModU128to64:
3015 case Iop_DivModS128to64:
3016 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3017
sewardj95448072004-11-22 20:19:51 +00003018 case Iop_16HLto32:
sewardj7cf4e6b2008-05-01 20:24:26 +00003019 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003020 case Iop_32HLto64:
sewardj7cf4e6b2008-05-01 20:24:26 +00003021 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003022
sewardjb5b87402011-03-07 16:05:35 +00003023 case Iop_DivModS64to64:
sewardj6cf40ff2005-04-20 22:31:26 +00003024 case Iop_MullS64:
3025 case Iop_MullU64: {
3026 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3027 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
sewardj7cf4e6b2008-05-01 20:24:26 +00003028 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, vHi64, vLo64));
sewardj6cf40ff2005-04-20 22:31:26 +00003029 }
3030
sewardj95448072004-11-22 20:19:51 +00003031 case Iop_MullS32:
3032 case Iop_MullU32: {
3033 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3034 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
sewardj7cf4e6b2008-05-01 20:24:26 +00003035 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
sewardj95448072004-11-22 20:19:51 +00003036 }
3037
3038 case Iop_MullS16:
3039 case Iop_MullU16: {
3040 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3041 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
sewardj7cf4e6b2008-05-01 20:24:26 +00003042 return assignNew('V', mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
sewardj95448072004-11-22 20:19:51 +00003043 }
3044
3045 case Iop_MullS8:
3046 case Iop_MullU8: {
3047 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3048 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
sewardj7cf4e6b2008-05-01 20:24:26 +00003049 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
sewardj95448072004-11-22 20:19:51 +00003050 }
3051
sewardj5af05062010-10-18 16:31:14 +00003052 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */
cerion9e591082005-06-23 15:28:34 +00003053 case Iop_DivS32:
3054 case Iop_DivU32:
sewardja201c452011-07-24 14:15:54 +00003055 case Iop_DivU32E:
sewardj169ac042011-09-05 12:12:34 +00003056 case Iop_DivS32E:
cerion9e591082005-06-23 15:28:34 +00003057 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3058
sewardjb00944a2005-12-23 12:47:16 +00003059 case Iop_DivS64:
3060 case Iop_DivU64:
sewardja201c452011-07-24 14:15:54 +00003061 case Iop_DivS64E:
sewardj169ac042011-09-05 12:12:34 +00003062 case Iop_DivU64E:
sewardjb00944a2005-12-23 12:47:16 +00003063 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3064
sewardj95448072004-11-22 20:19:51 +00003065 case Iop_Add32:
sewardjd5204dc2004-12-31 01:16:11 +00003066 if (mce->bogusLiterals)
3067 return expensiveAddSub(mce,True,Ity_I32,
3068 vatom1,vatom2, atom1,atom2);
3069 else
3070 goto cheap_AddSub32;
sewardj95448072004-11-22 20:19:51 +00003071 case Iop_Sub32:
sewardjd5204dc2004-12-31 01:16:11 +00003072 if (mce->bogusLiterals)
3073 return expensiveAddSub(mce,False,Ity_I32,
3074 vatom1,vatom2, atom1,atom2);
3075 else
3076 goto cheap_AddSub32;
3077
3078 cheap_AddSub32:
sewardj95448072004-11-22 20:19:51 +00003079 case Iop_Mul32:
sewardj992dff92005-10-07 11:08:55 +00003080 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3081
sewardj463b3d92005-07-18 11:41:15 +00003082 case Iop_CmpORD32S:
3083 case Iop_CmpORD32U:
sewardj1bc82102005-12-23 00:16:24 +00003084 case Iop_CmpORD64S:
3085 case Iop_CmpORD64U:
3086 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
sewardj95448072004-11-22 20:19:51 +00003087
sewardj681be302005-01-15 20:43:58 +00003088 case Iop_Add64:
tomd9774d72005-06-27 08:11:01 +00003089 if (mce->bogusLiterals)
3090 return expensiveAddSub(mce,True,Ity_I64,
3091 vatom1,vatom2, atom1,atom2);
3092 else
3093 goto cheap_AddSub64;
sewardj681be302005-01-15 20:43:58 +00003094 case Iop_Sub64:
tomd9774d72005-06-27 08:11:01 +00003095 if (mce->bogusLiterals)
3096 return expensiveAddSub(mce,False,Ity_I64,
3097 vatom1,vatom2, atom1,atom2);
3098 else
3099 goto cheap_AddSub64;
3100
3101 cheap_AddSub64:
3102 case Iop_Mul64:
sewardj681be302005-01-15 20:43:58 +00003103 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3104
sewardj95448072004-11-22 20:19:51 +00003105 case Iop_Mul16:
3106 case Iop_Add16:
3107 case Iop_Sub16:
3108 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3109
3110 case Iop_Sub8:
3111 case Iop_Add8:
3112 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3113
sewardj69a13322005-04-23 01:14:51 +00003114 case Iop_CmpEQ64:
sewardje6f8af42005-07-06 18:48:59 +00003115 case Iop_CmpNE64:
sewardj69a13322005-04-23 01:14:51 +00003116 if (mce->bogusLiterals)
3117 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
3118 else
3119 goto cheap_cmp64;
3120 cheap_cmp64:
tomcd986332005-04-26 07:44:48 +00003121 case Iop_CmpLE64S: case Iop_CmpLE64U:
3122 case Iop_CmpLT64U: case Iop_CmpLT64S:
sewardj69a13322005-04-23 01:14:51 +00003123 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
3124
sewardjd5204dc2004-12-31 01:16:11 +00003125 case Iop_CmpEQ32:
sewardje6f8af42005-07-06 18:48:59 +00003126 case Iop_CmpNE32:
sewardjd5204dc2004-12-31 01:16:11 +00003127 if (mce->bogusLiterals)
3128 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
3129 else
3130 goto cheap_cmp32;
sewardjd5204dc2004-12-31 01:16:11 +00003131 cheap_cmp32:
sewardj95448072004-11-22 20:19:51 +00003132 case Iop_CmpLE32S: case Iop_CmpLE32U:
3133 case Iop_CmpLT32U: case Iop_CmpLT32S:
sewardj95448072004-11-22 20:19:51 +00003134 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
3135
3136 case Iop_CmpEQ16: case Iop_CmpNE16:
3137 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
3138
3139 case Iop_CmpEQ8: case Iop_CmpNE8:
3140 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
3141
sewardjafed4c52009-07-12 13:00:17 +00003142 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
3143 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
3144 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
3145 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
3146 /* Just say these all produce a defined result, regardless
3147 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
3148 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
3149
sewardjaaddbc22005-10-07 09:49:53 +00003150 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
3151 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
3152
sewardj95448072004-11-22 20:19:51 +00003153 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
sewardjaaddbc22005-10-07 09:49:53 +00003154 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003155
sewardjdb67f5f2004-12-14 01:15:31 +00003156 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
sewardjaaddbc22005-10-07 09:49:53 +00003157 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003158
3159 case Iop_Shl8: case Iop_Shr8:
sewardjaaddbc22005-10-07 09:49:53 +00003160 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003161
sewardj20d38f22005-02-07 23:50:18 +00003162 case Iop_AndV128:
3163 uifu = mkUifUV128; difd = mkDifDV128;
3164 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00003165 case Iop_And64:
3166 uifu = mkUifU64; difd = mkDifD64;
3167 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00003168 case Iop_And32:
3169 uifu = mkUifU32; difd = mkDifD32;
3170 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
3171 case Iop_And16:
3172 uifu = mkUifU16; difd = mkDifD16;
3173 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
3174 case Iop_And8:
3175 uifu = mkUifU8; difd = mkDifD8;
3176 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
3177
sewardj20d38f22005-02-07 23:50:18 +00003178 case Iop_OrV128:
3179 uifu = mkUifUV128; difd = mkDifDV128;
3180 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00003181 case Iop_Or64:
3182 uifu = mkUifU64; difd = mkDifD64;
3183 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00003184 case Iop_Or32:
3185 uifu = mkUifU32; difd = mkDifD32;
3186 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
3187 case Iop_Or16:
3188 uifu = mkUifU16; difd = mkDifD16;
3189 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
3190 case Iop_Or8:
3191 uifu = mkUifU8; difd = mkDifD8;
3192 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
3193
3194 do_And_Or:
3195 return
3196 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00003197 'V', mce,
sewardj95448072004-11-22 20:19:51 +00003198 and_or_ty,
3199 difd(mce, uifu(mce, vatom1, vatom2),
3200 difd(mce, improve(mce, atom1, vatom1),
3201 improve(mce, atom2, vatom2) ) ) );
3202
3203 case Iop_Xor8:
3204 return mkUifU8(mce, vatom1, vatom2);
3205 case Iop_Xor16:
3206 return mkUifU16(mce, vatom1, vatom2);
3207 case Iop_Xor32:
3208 return mkUifU32(mce, vatom1, vatom2);
sewardj7010f6e2004-12-10 13:35:22 +00003209 case Iop_Xor64:
3210 return mkUifU64(mce, vatom1, vatom2);
sewardj20d38f22005-02-07 23:50:18 +00003211 case Iop_XorV128:
3212 return mkUifUV128(mce, vatom1, vatom2);
njn25e49d8e72002-09-23 09:36:25 +00003213
3214 default:
sewardj95448072004-11-22 20:19:51 +00003215 ppIROp(op);
3216 VG_(tool_panic)("memcheck:expr2vbits_Binop");
njn25e49d8e72002-09-23 09:36:25 +00003217 }
njn25e49d8e72002-09-23 09:36:25 +00003218}
3219
njn25e49d8e72002-09-23 09:36:25 +00003220
sewardj95448072004-11-22 20:19:51 +00003221static
3222IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
3223{
3224 IRAtom* vatom = expr2vbits( mce, atom );
3225 tl_assert(isOriginalAtom(mce,atom));
3226 switch (op) {
3227
sewardj0b070592004-12-10 21:44:22 +00003228 case Iop_Sqrt64Fx2:
3229 return unary64Fx2(mce, vatom);
3230
3231 case Iop_Sqrt64F0x2:
3232 return unary64F0x2(mce, vatom);
3233
sewardj170ee212004-12-10 18:57:51 +00003234 case Iop_Sqrt32Fx4:
3235 case Iop_RSqrt32Fx4:
3236 case Iop_Recip32Fx4:
cerion176cb4c2005-11-16 17:21:49 +00003237 case Iop_I32UtoFx4:
3238 case Iop_I32StoFx4:
3239 case Iop_QFtoI32Ux4_RZ:
3240 case Iop_QFtoI32Sx4_RZ:
3241 case Iop_RoundF32x4_RM:
3242 case Iop_RoundF32x4_RP:
3243 case Iop_RoundF32x4_RN:
3244 case Iop_RoundF32x4_RZ:
sewardj57f92b02010-08-22 11:54:14 +00003245 case Iop_Recip32x4:
3246 case Iop_Abs32Fx4:
3247 case Iop_Neg32Fx4:
3248 case Iop_Rsqrte32Fx4:
sewardj170ee212004-12-10 18:57:51 +00003249 return unary32Fx4(mce, vatom);
3250
sewardj57f92b02010-08-22 11:54:14 +00003251 case Iop_I32UtoFx2:
3252 case Iop_I32StoFx2:
3253 case Iop_Recip32Fx2:
3254 case Iop_Recip32x2:
3255 case Iop_Abs32Fx2:
3256 case Iop_Neg32Fx2:
3257 case Iop_Rsqrte32Fx2:
3258 return unary32Fx2(mce, vatom);
3259
sewardj170ee212004-12-10 18:57:51 +00003260 case Iop_Sqrt32F0x4:
3261 case Iop_RSqrt32F0x4:
3262 case Iop_Recip32F0x4:
3263 return unary32F0x4(mce, vatom);
3264
sewardj20d38f22005-02-07 23:50:18 +00003265 case Iop_32UtoV128:
3266 case Iop_64UtoV128:
sewardj620eb5b2005-10-22 12:50:43 +00003267 case Iop_Dup8x16:
3268 case Iop_Dup16x8:
3269 case Iop_Dup32x4:
sewardj57f92b02010-08-22 11:54:14 +00003270 case Iop_Reverse16_8x16:
3271 case Iop_Reverse32_8x16:
3272 case Iop_Reverse32_16x8:
3273 case Iop_Reverse64_8x16:
3274 case Iop_Reverse64_16x8:
3275 case Iop_Reverse64_32x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00003276 return assignNew('V', mce, Ity_V128, unop(op, vatom));
sewardj170ee212004-12-10 18:57:51 +00003277
sewardjb5b87402011-03-07 16:05:35 +00003278 case Iop_F128HItoF64: /* F128 -> high half of F128 */
3279 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
3280 case Iop_F128LOtoF64: /* F128 -> low half of F128 */
3281 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
3282
3283 case Iop_NegF128:
3284 case Iop_AbsF128:
3285 return mkPCastTo(mce, Ity_I128, vatom);
3286
3287 case Iop_I32StoF128: /* signed I32 -> F128 */
3288 case Iop_I64StoF128: /* signed I64 -> F128 */
3289 case Iop_F32toF128: /* F32 -> F128 */
3290 case Iop_F64toF128: /* F64 -> F128 */
3291 return mkPCastTo(mce, Ity_I128, vatom);
3292
sewardj95448072004-11-22 20:19:51 +00003293 case Iop_F32toF64:
sewardj06f96d02009-12-31 19:24:12 +00003294 case Iop_I32StoF64:
sewardj59570ff2010-01-01 11:59:33 +00003295 case Iop_I32UtoF64:
sewardj95448072004-11-22 20:19:51 +00003296 case Iop_NegF64:
sewardj95448072004-11-22 20:19:51 +00003297 case Iop_AbsF64:
sewardj4f3e0f22006-01-27 15:11:31 +00003298 case Iop_Est5FRSqrt:
sewardjdead90a2008-08-08 08:38:23 +00003299 case Iop_RoundF64toF64_NEAREST:
3300 case Iop_RoundF64toF64_NegINF:
3301 case Iop_RoundF64toF64_PosINF:
3302 case Iop_RoundF64toF64_ZERO:
sewardj39cc7352005-06-09 21:31:55 +00003303 case Iop_Clz64:
3304 case Iop_Ctz64:
sewardj95448072004-11-22 20:19:51 +00003305 return mkPCastTo(mce, Ity_I64, vatom);
3306
sewardj95448072004-11-22 20:19:51 +00003307 case Iop_Clz32:
3308 case Iop_Ctz32:
sewardjed69fdb2006-02-03 16:12:27 +00003309 case Iop_TruncF64asF32:
sewardj59570ff2010-01-01 11:59:33 +00003310 case Iop_NegF32:
3311 case Iop_AbsF32:
sewardj95448072004-11-22 20:19:51 +00003312 return mkPCastTo(mce, Ity_I32, vatom);
3313
sewardjd9dbc192005-04-27 11:40:27 +00003314 case Iop_1Uto64:
sewardja201c452011-07-24 14:15:54 +00003315 case Iop_1Sto64:
sewardjd9dbc192005-04-27 11:40:27 +00003316 case Iop_8Uto64:
3317 case Iop_8Sto64:
3318 case Iop_16Uto64:
3319 case Iop_16Sto64:
sewardj95448072004-11-22 20:19:51 +00003320 case Iop_32Sto64:
3321 case Iop_32Uto64:
sewardj20d38f22005-02-07 23:50:18 +00003322 case Iop_V128to64:
3323 case Iop_V128HIto64:
sewardj6cf40ff2005-04-20 22:31:26 +00003324 case Iop_128HIto64:
3325 case Iop_128to64:
sewardj57f92b02010-08-22 11:54:14 +00003326 case Iop_Dup8x8:
3327 case Iop_Dup16x4:
3328 case Iop_Dup32x2:
3329 case Iop_Reverse16_8x8:
3330 case Iop_Reverse32_8x8:
3331 case Iop_Reverse32_16x4:
3332 case Iop_Reverse64_8x8:
3333 case Iop_Reverse64_16x4:
3334 case Iop_Reverse64_32x2:
sewardj7cf4e6b2008-05-01 20:24:26 +00003335 return assignNew('V', mce, Ity_I64, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003336
sewardjb5b87402011-03-07 16:05:35 +00003337 case Iop_I16StoF32:
sewardj95448072004-11-22 20:19:51 +00003338 case Iop_64to32:
3339 case Iop_64HIto32:
3340 case Iop_1Uto32:
sewardj463b3d92005-07-18 11:41:15 +00003341 case Iop_1Sto32:
sewardj95448072004-11-22 20:19:51 +00003342 case Iop_8Uto32:
3343 case Iop_16Uto32:
3344 case Iop_16Sto32:
3345 case Iop_8Sto32:
cerionfafaa0d2005-09-12 22:29:38 +00003346 case Iop_V128to32:
sewardj7cf4e6b2008-05-01 20:24:26 +00003347 return assignNew('V', mce, Ity_I32, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003348
3349 case Iop_8Sto16:
3350 case Iop_8Uto16:
3351 case Iop_32to16:
3352 case Iop_32HIto16:
sewardjd9dbc192005-04-27 11:40:27 +00003353 case Iop_64to16:
sewardj7cf4e6b2008-05-01 20:24:26 +00003354 return assignNew('V', mce, Ity_I16, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003355
3356 case Iop_1Uto8:
sewardja201c452011-07-24 14:15:54 +00003357 case Iop_1Sto8:
sewardj95448072004-11-22 20:19:51 +00003358 case Iop_16to8:
sewardj9a807e02006-12-17 14:20:31 +00003359 case Iop_16HIto8:
sewardj95448072004-11-22 20:19:51 +00003360 case Iop_32to8:
sewardjd9dbc192005-04-27 11:40:27 +00003361 case Iop_64to8:
sewardj7cf4e6b2008-05-01 20:24:26 +00003362 return assignNew('V', mce, Ity_I8, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003363
3364 case Iop_32to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00003365 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
sewardj95448072004-11-22 20:19:51 +00003366
sewardjd9dbc192005-04-27 11:40:27 +00003367 case Iop_64to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00003368 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
sewardjd9dbc192005-04-27 11:40:27 +00003369
sewardj95448072004-11-22 20:19:51 +00003370 case Iop_ReinterpF64asI64:
3371 case Iop_ReinterpI64asF64:
sewardj0b070592004-12-10 21:44:22 +00003372 case Iop_ReinterpI32asF32:
sewardj59570ff2010-01-01 11:59:33 +00003373 case Iop_ReinterpF32asI32:
sewardj20d38f22005-02-07 23:50:18 +00003374 case Iop_NotV128:
sewardj7010f6e2004-12-10 13:35:22 +00003375 case Iop_Not64:
sewardj95448072004-11-22 20:19:51 +00003376 case Iop_Not32:
3377 case Iop_Not16:
3378 case Iop_Not8:
3379 case Iop_Not1:
3380 return vatom;
sewardj7010f6e2004-12-10 13:35:22 +00003381
sewardj57f92b02010-08-22 11:54:14 +00003382 case Iop_CmpNEZ8x8:
3383 case Iop_Cnt8x8:
3384 case Iop_Clz8Sx8:
3385 case Iop_Cls8Sx8:
3386 case Iop_Abs8x8:
3387 return mkPCast8x8(mce, vatom);
3388
3389 case Iop_CmpNEZ8x16:
3390 case Iop_Cnt8x16:
3391 case Iop_Clz8Sx16:
3392 case Iop_Cls8Sx16:
3393 case Iop_Abs8x16:
3394 return mkPCast8x16(mce, vatom);
3395
3396 case Iop_CmpNEZ16x4:
3397 case Iop_Clz16Sx4:
3398 case Iop_Cls16Sx4:
3399 case Iop_Abs16x4:
3400 return mkPCast16x4(mce, vatom);
3401
3402 case Iop_CmpNEZ16x8:
3403 case Iop_Clz16Sx8:
3404 case Iop_Cls16Sx8:
3405 case Iop_Abs16x8:
3406 return mkPCast16x8(mce, vatom);
3407
3408 case Iop_CmpNEZ32x2:
3409 case Iop_Clz32Sx2:
3410 case Iop_Cls32Sx2:
3411 case Iop_FtoI32Ux2_RZ:
3412 case Iop_FtoI32Sx2_RZ:
3413 case Iop_Abs32x2:
3414 return mkPCast32x2(mce, vatom);
3415
3416 case Iop_CmpNEZ32x4:
3417 case Iop_Clz32Sx4:
3418 case Iop_Cls32Sx4:
3419 case Iop_FtoI32Ux4_RZ:
3420 case Iop_FtoI32Sx4_RZ:
3421 case Iop_Abs32x4:
3422 return mkPCast32x4(mce, vatom);
3423
3424 case Iop_CmpwNEZ64:
3425 return mkPCastTo(mce, Ity_I64, vatom);
3426
3427 case Iop_CmpNEZ64x2:
3428 return mkPCast64x2(mce, vatom);
3429
sewardj7ee7d852011-06-16 11:37:21 +00003430 case Iop_NarrowUn16to8x8:
3431 case Iop_NarrowUn32to16x4:
3432 case Iop_NarrowUn64to32x2:
3433 case Iop_QNarrowUn16Sto8Sx8:
3434 case Iop_QNarrowUn16Sto8Ux8:
3435 case Iop_QNarrowUn16Uto8Ux8:
3436 case Iop_QNarrowUn32Sto16Sx4:
3437 case Iop_QNarrowUn32Sto16Ux4:
3438 case Iop_QNarrowUn32Uto16Ux4:
3439 case Iop_QNarrowUn64Sto32Sx2:
3440 case Iop_QNarrowUn64Sto32Ux2:
3441 case Iop_QNarrowUn64Uto32Ux2:
3442 return vectorNarrowUnV128(mce, op, vatom);
sewardj57f92b02010-08-22 11:54:14 +00003443
sewardj7ee7d852011-06-16 11:37:21 +00003444 case Iop_Widen8Sto16x8:
3445 case Iop_Widen8Uto16x8:
3446 case Iop_Widen16Sto32x4:
3447 case Iop_Widen16Uto32x4:
3448 case Iop_Widen32Sto64x2:
3449 case Iop_Widen32Uto64x2:
3450 return vectorWidenI64(mce, op, vatom);
sewardj57f92b02010-08-22 11:54:14 +00003451
3452 case Iop_PwAddL32Ux2:
3453 case Iop_PwAddL32Sx2:
3454 return mkPCastTo(mce, Ity_I64,
3455 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
3456
3457 case Iop_PwAddL16Ux4:
3458 case Iop_PwAddL16Sx4:
3459 return mkPCast32x2(mce,
3460 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
3461
3462 case Iop_PwAddL8Ux8:
3463 case Iop_PwAddL8Sx8:
3464 return mkPCast16x4(mce,
3465 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
3466
3467 case Iop_PwAddL32Ux4:
3468 case Iop_PwAddL32Sx4:
3469 return mkPCast64x2(mce,
3470 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
3471
3472 case Iop_PwAddL16Ux8:
3473 case Iop_PwAddL16Sx8:
3474 return mkPCast32x4(mce,
3475 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
3476
3477 case Iop_PwAddL8Ux16:
3478 case Iop_PwAddL8Sx16:
3479 return mkPCast16x8(mce,
3480 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
3481
sewardjf34eb492011-04-15 11:57:05 +00003482 case Iop_I64UtoF32:
sewardj95448072004-11-22 20:19:51 +00003483 default:
3484 ppIROp(op);
3485 VG_(tool_panic)("memcheck:expr2vbits_Unop");
3486 }
3487}
3488
3489
sewardj170ee212004-12-10 18:57:51 +00003490/* Worker function; do not call directly. */
sewardj95448072004-11-22 20:19:51 +00003491static
sewardj2e595852005-06-30 23:33:37 +00003492IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
3493 IREndness end, IRType ty,
3494 IRAtom* addr, UInt bias )
sewardj95448072004-11-22 20:19:51 +00003495{
3496 void* helper;
3497 Char* hname;
3498 IRDirty* di;
3499 IRTemp datavbits;
3500 IRAtom* addrAct;
3501
3502 tl_assert(isOriginalAtom(mce,addr));
sewardj2e595852005-06-30 23:33:37 +00003503 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj95448072004-11-22 20:19:51 +00003504
3505 /* First, emit a definedness test for the address. This also sets
3506 the address (shadow) to 'defined' following the test. */
3507 complainIfUndefined( mce, addr );
3508
3509 /* Now cook up a call to the relevant helper function, to read the
3510 data V bits from shadow memory. */
sewardj7cf4e6b2008-05-01 20:24:26 +00003511 ty = shadowTypeV(ty);
sewardj2e595852005-06-30 23:33:37 +00003512
3513 if (end == Iend_LE) {
3514 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00003515 case Ity_I64: helper = &MC_(helperc_LOADV64le);
3516 hname = "MC_(helperc_LOADV64le)";
sewardj2e595852005-06-30 23:33:37 +00003517 break;
njn1d0825f2006-03-27 11:37:07 +00003518 case Ity_I32: helper = &MC_(helperc_LOADV32le);
3519 hname = "MC_(helperc_LOADV32le)";
sewardj2e595852005-06-30 23:33:37 +00003520 break;
njn1d0825f2006-03-27 11:37:07 +00003521 case Ity_I16: helper = &MC_(helperc_LOADV16le);
3522 hname = "MC_(helperc_LOADV16le)";
sewardj2e595852005-06-30 23:33:37 +00003523 break;
njn1d0825f2006-03-27 11:37:07 +00003524 case Ity_I8: helper = &MC_(helperc_LOADV8);
3525 hname = "MC_(helperc_LOADV8)";
sewardj2e595852005-06-30 23:33:37 +00003526 break;
3527 default: ppIRType(ty);
3528 VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
3529 }
3530 } else {
sewardj8cf88b72005-07-08 01:29:33 +00003531 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00003532 case Ity_I64: helper = &MC_(helperc_LOADV64be);
3533 hname = "MC_(helperc_LOADV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00003534 break;
njn1d0825f2006-03-27 11:37:07 +00003535 case Ity_I32: helper = &MC_(helperc_LOADV32be);
3536 hname = "MC_(helperc_LOADV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00003537 break;
njn1d0825f2006-03-27 11:37:07 +00003538 case Ity_I16: helper = &MC_(helperc_LOADV16be);
3539 hname = "MC_(helperc_LOADV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00003540 break;
njn1d0825f2006-03-27 11:37:07 +00003541 case Ity_I8: helper = &MC_(helperc_LOADV8);
3542 hname = "MC_(helperc_LOADV8)";
sewardj8cf88b72005-07-08 01:29:33 +00003543 break;
3544 default: ppIRType(ty);
3545 VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
3546 }
sewardj95448072004-11-22 20:19:51 +00003547 }
3548
3549 /* Generate the actual address into addrAct. */
3550 if (bias == 0) {
3551 addrAct = addr;
3552 } else {
sewardj7cf97ee2004-11-28 14:25:01 +00003553 IROp mkAdd;
3554 IRAtom* eBias;
sewardj95448072004-11-22 20:19:51 +00003555 IRType tyAddr = mce->hWordTy;
3556 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj7cf97ee2004-11-28 14:25:01 +00003557 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3558 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00003559 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
sewardj95448072004-11-22 20:19:51 +00003560 }
3561
3562 /* We need to have a place to park the V bits we're just about to
3563 read. */
sewardj1c0ce7a2009-07-01 08:10:49 +00003564 datavbits = newTemp(mce, ty, VSh);
sewardj95448072004-11-22 20:19:51 +00003565 di = unsafeIRDirty_1_N( datavbits,
sewardj53ee1fc2005-12-23 02:29:58 +00003566 1/*regparms*/,
3567 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj95448072004-11-22 20:19:51 +00003568 mkIRExprVec_1( addrAct ));
3569 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00003570 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00003571
3572 return mkexpr(datavbits);
3573}
3574
3575
3576static
sewardj2e595852005-06-30 23:33:37 +00003577IRAtom* expr2vbits_Load ( MCEnv* mce,
3578 IREndness end, IRType ty,
3579 IRAtom* addr, UInt bias )
sewardj170ee212004-12-10 18:57:51 +00003580{
3581 IRAtom *v64hi, *v64lo;
sewardj2e595852005-06-30 23:33:37 +00003582 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj7cf4e6b2008-05-01 20:24:26 +00003583 switch (shadowTypeV(ty)) {
sewardj170ee212004-12-10 18:57:51 +00003584 case Ity_I8:
3585 case Ity_I16:
3586 case Ity_I32:
3587 case Ity_I64:
sewardj2e595852005-06-30 23:33:37 +00003588 return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
sewardj170ee212004-12-10 18:57:51 +00003589 case Ity_V128:
sewardj2e595852005-06-30 23:33:37 +00003590 if (end == Iend_LE) {
3591 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
3592 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3593 } else {
sewardj2e595852005-06-30 23:33:37 +00003594 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
3595 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3596 }
sewardj7cf4e6b2008-05-01 20:24:26 +00003597 return assignNew( 'V', mce,
sewardj170ee212004-12-10 18:57:51 +00003598 Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +00003599 binop(Iop_64HLtoV128, v64hi, v64lo));
sewardj170ee212004-12-10 18:57:51 +00003600 default:
sewardj2e595852005-06-30 23:33:37 +00003601 VG_(tool_panic)("expr2vbits_Load");
sewardj170ee212004-12-10 18:57:51 +00003602 }
3603}
3604
3605
3606static
sewardj95448072004-11-22 20:19:51 +00003607IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
3608 IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
3609{
3610 IRAtom *vbitsC, *vbits0, *vbitsX;
3611 IRType ty;
3612 /* Given Mux0X(cond,expr0,exprX), generate
3613 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
3614 That is, steer the V bits like the originals, but trash the
3615 result if the steering value is undefined. This gives
3616 lazy propagation. */
3617 tl_assert(isOriginalAtom(mce, cond));
3618 tl_assert(isOriginalAtom(mce, expr0));
3619 tl_assert(isOriginalAtom(mce, exprX));
3620
3621 vbitsC = expr2vbits(mce, cond);
3622 vbits0 = expr2vbits(mce, expr0);
3623 vbitsX = expr2vbits(mce, exprX);
sewardj1c0ce7a2009-07-01 08:10:49 +00003624 ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
sewardj95448072004-11-22 20:19:51 +00003625
3626 return
sewardj7cf4e6b2008-05-01 20:24:26 +00003627 mkUifU(mce, ty, assignNew('V', mce, ty,
3628 IRExpr_Mux0X(cond, vbits0, vbitsX)),
sewardj95448072004-11-22 20:19:51 +00003629 mkPCastTo(mce, ty, vbitsC) );
3630}
3631
3632/* --------- This is the main expression-handling function. --------- */
3633
3634static
3635IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
3636{
3637 switch (e->tag) {
3638
3639 case Iex_Get:
3640 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
3641
3642 case Iex_GetI:
3643 return shadow_GETI( mce, e->Iex.GetI.descr,
3644 e->Iex.GetI.ix, e->Iex.GetI.bias );
3645
sewardj0b9d74a2006-12-24 02:24:11 +00003646 case Iex_RdTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00003647 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
sewardj95448072004-11-22 20:19:51 +00003648
3649 case Iex_Const:
sewardj1c0ce7a2009-07-01 08:10:49 +00003650 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
sewardj95448072004-11-22 20:19:51 +00003651
sewardje91cea72006-02-08 19:32:02 +00003652 case Iex_Qop:
3653 return expr2vbits_Qop(
3654 mce,
3655 e->Iex.Qop.op,
3656 e->Iex.Qop.arg1, e->Iex.Qop.arg2,
3657 e->Iex.Qop.arg3, e->Iex.Qop.arg4
3658 );
3659
sewardjed69fdb2006-02-03 16:12:27 +00003660 case Iex_Triop:
3661 return expr2vbits_Triop(
3662 mce,
3663 e->Iex.Triop.op,
3664 e->Iex.Triop.arg1, e->Iex.Triop.arg2, e->Iex.Triop.arg3
3665 );
3666
sewardj95448072004-11-22 20:19:51 +00003667 case Iex_Binop:
3668 return expr2vbits_Binop(
3669 mce,
3670 e->Iex.Binop.op,
3671 e->Iex.Binop.arg1, e->Iex.Binop.arg2
3672 );
3673
3674 case Iex_Unop:
3675 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
3676
sewardj2e595852005-06-30 23:33:37 +00003677 case Iex_Load:
3678 return expr2vbits_Load( mce, e->Iex.Load.end,
3679 e->Iex.Load.ty,
3680 e->Iex.Load.addr, 0/*addr bias*/ );
sewardj95448072004-11-22 20:19:51 +00003681
3682 case Iex_CCall:
3683 return mkLazyN( mce, e->Iex.CCall.args,
3684 e->Iex.CCall.retty,
3685 e->Iex.CCall.cee );
3686
3687 case Iex_Mux0X:
3688 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
3689 e->Iex.Mux0X.exprX);
njn25e49d8e72002-09-23 09:36:25 +00003690
3691 default:
sewardj95448072004-11-22 20:19:51 +00003692 VG_(printf)("\n");
3693 ppIRExpr(e);
3694 VG_(printf)("\n");
3695 VG_(tool_panic)("memcheck: expr2vbits");
njn25e49d8e72002-09-23 09:36:25 +00003696 }
njn25e49d8e72002-09-23 09:36:25 +00003697}
3698
3699/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003700/*--- Generate shadow stmts from all kinds of IRStmts. ---*/
njn25e49d8e72002-09-23 09:36:25 +00003701/*------------------------------------------------------------*/
3702
sewardj95448072004-11-22 20:19:51 +00003703/* Widen a value to the host word size. */
njn25e49d8e72002-09-23 09:36:25 +00003704
3705static
sewardj95448072004-11-22 20:19:51 +00003706IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
njn25e49d8e72002-09-23 09:36:25 +00003707{
sewardj7cf97ee2004-11-28 14:25:01 +00003708 IRType ty, tyH;
3709
sewardj95448072004-11-22 20:19:51 +00003710 /* vatom is vbits-value and as such can only have a shadow type. */
3711 tl_assert(isShadowAtom(mce,vatom));
njn25e49d8e72002-09-23 09:36:25 +00003712
sewardj1c0ce7a2009-07-01 08:10:49 +00003713 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj7cf97ee2004-11-28 14:25:01 +00003714 tyH = mce->hWordTy;
njn25e49d8e72002-09-23 09:36:25 +00003715
sewardj95448072004-11-22 20:19:51 +00003716 if (tyH == Ity_I32) {
3717 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00003718 case Ity_I32:
3719 return vatom;
3720 case Ity_I16:
3721 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
3722 case Ity_I8:
3723 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
3724 default:
3725 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00003726 }
sewardj6cf40ff2005-04-20 22:31:26 +00003727 } else
3728 if (tyH == Ity_I64) {
3729 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00003730 case Ity_I32:
3731 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
3732 case Ity_I16:
3733 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
3734 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
3735 case Ity_I8:
3736 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
3737 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
3738 default:
3739 goto unhandled;
sewardj6cf40ff2005-04-20 22:31:26 +00003740 }
sewardj95448072004-11-22 20:19:51 +00003741 } else {
3742 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00003743 }
sewardj95448072004-11-22 20:19:51 +00003744 unhandled:
3745 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
3746 VG_(tool_panic)("zwidenToHostWord");
njn25e49d8e72002-09-23 09:36:25 +00003747}
3748
njn25e49d8e72002-09-23 09:36:25 +00003749
sewardj95448072004-11-22 20:19:51 +00003750/* Generate a shadow store. addr is always the original address atom.
3751 You can pass in either originals or V-bits for the data atom, but
sewardj1c0ce7a2009-07-01 08:10:49 +00003752 obviously not both. guard :: Ity_I1 controls whether the store
3753 really happens; NULL means it unconditionally does. Note that
3754 guard itself is not checked for definedness; the caller of this
3755 function must do that if necessary. */
njn25e49d8e72002-09-23 09:36:25 +00003756
sewardj95448072004-11-22 20:19:51 +00003757static
sewardj2e595852005-06-30 23:33:37 +00003758void do_shadow_Store ( MCEnv* mce,
3759 IREndness end,
3760 IRAtom* addr, UInt bias,
sewardj1c0ce7a2009-07-01 08:10:49 +00003761 IRAtom* data, IRAtom* vdata,
3762 IRAtom* guard )
njn25e49d8e72002-09-23 09:36:25 +00003763{
sewardj170ee212004-12-10 18:57:51 +00003764 IROp mkAdd;
3765 IRType ty, tyAddr;
sewardj95448072004-11-22 20:19:51 +00003766 void* helper = NULL;
3767 Char* hname = NULL;
njn1d0825f2006-03-27 11:37:07 +00003768 IRConst* c;
sewardj170ee212004-12-10 18:57:51 +00003769
3770 tyAddr = mce->hWordTy;
3771 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3772 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj2e595852005-06-30 23:33:37 +00003773 tl_assert( end == Iend_LE || end == Iend_BE );
sewardj170ee212004-12-10 18:57:51 +00003774
sewardj95448072004-11-22 20:19:51 +00003775 if (data) {
3776 tl_assert(!vdata);
3777 tl_assert(isOriginalAtom(mce, data));
3778 tl_assert(bias == 0);
3779 vdata = expr2vbits( mce, data );
3780 } else {
3781 tl_assert(vdata);
3782 }
njn25e49d8e72002-09-23 09:36:25 +00003783
sewardj95448072004-11-22 20:19:51 +00003784 tl_assert(isOriginalAtom(mce,addr));
3785 tl_assert(isShadowAtom(mce,vdata));
njn25e49d8e72002-09-23 09:36:25 +00003786
sewardj1c0ce7a2009-07-01 08:10:49 +00003787 if (guard) {
3788 tl_assert(isOriginalAtom(mce, guard));
3789 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
3790 }
3791
3792 ty = typeOfIRExpr(mce->sb->tyenv, vdata);
njn25e49d8e72002-09-23 09:36:25 +00003793
njn1d0825f2006-03-27 11:37:07 +00003794 // If we're not doing undefined value checking, pretend that this value
3795 // is "all valid". That lets Vex's optimiser remove some of the V bit
3796 // shadow computation ops that precede it.
sewardj7cf4e6b2008-05-01 20:24:26 +00003797 if (MC_(clo_mc_level) == 1) {
njn1d0825f2006-03-27 11:37:07 +00003798 switch (ty) {
sewardj1c0ce7a2009-07-01 08:10:49 +00003799 case Ity_V128: // V128 weirdness
3800 c = IRConst_V128(V_BITS16_DEFINED); break;
njn1d0825f2006-03-27 11:37:07 +00003801 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
3802 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
3803 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
3804 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
3805 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
3806 }
3807 vdata = IRExpr_Const( c );
3808 }
3809
sewardj95448072004-11-22 20:19:51 +00003810 /* First, emit a definedness test for the address. This also sets
3811 the address (shadow) to 'defined' following the test. */
3812 complainIfUndefined( mce, addr );
njn25e49d8e72002-09-23 09:36:25 +00003813
sewardj170ee212004-12-10 18:57:51 +00003814 /* Now decide which helper function to call to write the data V
3815 bits into shadow memory. */
sewardj2e595852005-06-30 23:33:37 +00003816 if (end == Iend_LE) {
3817 switch (ty) {
3818 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00003819 case Ity_I64: helper = &MC_(helperc_STOREV64le);
3820 hname = "MC_(helperc_STOREV64le)";
sewardj2e595852005-06-30 23:33:37 +00003821 break;
njn1d0825f2006-03-27 11:37:07 +00003822 case Ity_I32: helper = &MC_(helperc_STOREV32le);
3823 hname = "MC_(helperc_STOREV32le)";
sewardj2e595852005-06-30 23:33:37 +00003824 break;
njn1d0825f2006-03-27 11:37:07 +00003825 case Ity_I16: helper = &MC_(helperc_STOREV16le);
3826 hname = "MC_(helperc_STOREV16le)";
sewardj2e595852005-06-30 23:33:37 +00003827 break;
njn1d0825f2006-03-27 11:37:07 +00003828 case Ity_I8: helper = &MC_(helperc_STOREV8);
3829 hname = "MC_(helperc_STOREV8)";
sewardj2e595852005-06-30 23:33:37 +00003830 break;
3831 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
3832 }
3833 } else {
sewardj8cf88b72005-07-08 01:29:33 +00003834 switch (ty) {
3835 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00003836 case Ity_I64: helper = &MC_(helperc_STOREV64be);
3837 hname = "MC_(helperc_STOREV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00003838 break;
njn1d0825f2006-03-27 11:37:07 +00003839 case Ity_I32: helper = &MC_(helperc_STOREV32be);
3840 hname = "MC_(helperc_STOREV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00003841 break;
njn1d0825f2006-03-27 11:37:07 +00003842 case Ity_I16: helper = &MC_(helperc_STOREV16be);
3843 hname = "MC_(helperc_STOREV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00003844 break;
njn1d0825f2006-03-27 11:37:07 +00003845 case Ity_I8: helper = &MC_(helperc_STOREV8);
3846 hname = "MC_(helperc_STOREV8)";
sewardj8cf88b72005-07-08 01:29:33 +00003847 break;
3848 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
3849 }
sewardj95448072004-11-22 20:19:51 +00003850 }
njn25e49d8e72002-09-23 09:36:25 +00003851
sewardj170ee212004-12-10 18:57:51 +00003852 if (ty == Ity_V128) {
3853
sewardj20d38f22005-02-07 23:50:18 +00003854 /* V128-bit case */
sewardj170ee212004-12-10 18:57:51 +00003855 /* See comment in next clause re 64-bit regparms */
sewardj2e595852005-06-30 23:33:37 +00003856 /* also, need to be careful about endianness */
3857
njn4c245e52009-03-15 23:25:38 +00003858 Int offLo64, offHi64;
3859 IRDirty *diLo64, *diHi64;
3860 IRAtom *addrLo64, *addrHi64;
3861 IRAtom *vdataLo64, *vdataHi64;
3862 IRAtom *eBiasLo64, *eBiasHi64;
3863
sewardj2e595852005-06-30 23:33:37 +00003864 if (end == Iend_LE) {
3865 offLo64 = 0;
3866 offHi64 = 8;
3867 } else {
sewardj2e595852005-06-30 23:33:37 +00003868 offLo64 = 8;
3869 offHi64 = 0;
3870 }
3871
3872 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
sewardj7cf4e6b2008-05-01 20:24:26 +00003873 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
3874 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
sewardj170ee212004-12-10 18:57:51 +00003875 diLo64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003876 1/*regparms*/,
3877 hname, VG_(fnptr_to_fnentry)( helper ),
3878 mkIRExprVec_2( addrLo64, vdataLo64 )
3879 );
sewardj2e595852005-06-30 23:33:37 +00003880 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
sewardj7cf4e6b2008-05-01 20:24:26 +00003881 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
3882 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
sewardj170ee212004-12-10 18:57:51 +00003883 diHi64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003884 1/*regparms*/,
3885 hname, VG_(fnptr_to_fnentry)( helper ),
3886 mkIRExprVec_2( addrHi64, vdataHi64 )
3887 );
sewardj1c0ce7a2009-07-01 08:10:49 +00003888 if (guard) diLo64->guard = guard;
3889 if (guard) diHi64->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00003890 setHelperAnns( mce, diLo64 );
3891 setHelperAnns( mce, diHi64 );
sewardj7cf4e6b2008-05-01 20:24:26 +00003892 stmt( 'V', mce, IRStmt_Dirty(diLo64) );
3893 stmt( 'V', mce, IRStmt_Dirty(diHi64) );
sewardj170ee212004-12-10 18:57:51 +00003894
sewardj95448072004-11-22 20:19:51 +00003895 } else {
sewardj170ee212004-12-10 18:57:51 +00003896
njn4c245e52009-03-15 23:25:38 +00003897 IRDirty *di;
3898 IRAtom *addrAct;
3899
sewardj170ee212004-12-10 18:57:51 +00003900 /* 8/16/32/64-bit cases */
3901 /* Generate the actual address into addrAct. */
3902 if (bias == 0) {
3903 addrAct = addr;
3904 } else {
njn4c245e52009-03-15 23:25:38 +00003905 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00003906 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
sewardj170ee212004-12-10 18:57:51 +00003907 }
3908
3909 if (ty == Ity_I64) {
3910 /* We can't do this with regparm 2 on 32-bit platforms, since
3911 the back ends aren't clever enough to handle 64-bit
3912 regparm args. Therefore be different. */
3913 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003914 1/*regparms*/,
3915 hname, VG_(fnptr_to_fnentry)( helper ),
3916 mkIRExprVec_2( addrAct, vdata )
3917 );
sewardj170ee212004-12-10 18:57:51 +00003918 } else {
3919 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003920 2/*regparms*/,
3921 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj170ee212004-12-10 18:57:51 +00003922 mkIRExprVec_2( addrAct,
sewardj53ee1fc2005-12-23 02:29:58 +00003923 zwidenToHostWord( mce, vdata ))
3924 );
sewardj170ee212004-12-10 18:57:51 +00003925 }
sewardj1c0ce7a2009-07-01 08:10:49 +00003926 if (guard) di->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00003927 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00003928 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00003929 }
njn25e49d8e72002-09-23 09:36:25 +00003930
sewardj95448072004-11-22 20:19:51 +00003931}
njn25e49d8e72002-09-23 09:36:25 +00003932
njn25e49d8e72002-09-23 09:36:25 +00003933
sewardj95448072004-11-22 20:19:51 +00003934/* Do lazy pessimistic propagation through a dirty helper call, by
3935 looking at the annotations on it. This is the most complex part of
3936 Memcheck. */
njn25e49d8e72002-09-23 09:36:25 +00003937
sewardj95448072004-11-22 20:19:51 +00003938static IRType szToITy ( Int n )
3939{
3940 switch (n) {
3941 case 1: return Ity_I8;
3942 case 2: return Ity_I16;
3943 case 4: return Ity_I32;
3944 case 8: return Ity_I64;
3945 default: VG_(tool_panic)("szToITy(memcheck)");
3946 }
3947}
njn25e49d8e72002-09-23 09:36:25 +00003948
sewardj95448072004-11-22 20:19:51 +00003949static
3950void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
3951{
njn4c245e52009-03-15 23:25:38 +00003952 Int i, n, toDo, gSz, gOff;
sewardj2e595852005-06-30 23:33:37 +00003953 IRAtom *src, *here, *curr;
njn4c245e52009-03-15 23:25:38 +00003954 IRType tySrc, tyDst;
sewardj2e595852005-06-30 23:33:37 +00003955 IRTemp dst;
3956 IREndness end;
3957
3958 /* What's the native endianness? We need to know this. */
sewardj6e340c72005-07-10 00:53:42 +00003959# if defined(VG_BIGENDIAN)
sewardj2e595852005-06-30 23:33:37 +00003960 end = Iend_BE;
sewardj6e340c72005-07-10 00:53:42 +00003961# elif defined(VG_LITTLEENDIAN)
sewardj2e595852005-06-30 23:33:37 +00003962 end = Iend_LE;
3963# else
3964# error "Unknown endianness"
3965# endif
njn25e49d8e72002-09-23 09:36:25 +00003966
sewardj95448072004-11-22 20:19:51 +00003967 /* First check the guard. */
3968 complainIfUndefined(mce, d->guard);
3969
3970 /* Now round up all inputs and PCast over them. */
sewardj7cf97ee2004-11-28 14:25:01 +00003971 curr = definedOfType(Ity_I32);
sewardj95448072004-11-22 20:19:51 +00003972
3973 /* Inputs: unmasked args */
3974 for (i = 0; d->args[i]; i++) {
3975 if (d->cee->mcx_mask & (1<<i)) {
3976 /* ignore this arg */
njn25e49d8e72002-09-23 09:36:25 +00003977 } else {
sewardj95448072004-11-22 20:19:51 +00003978 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
3979 curr = mkUifU32(mce, here, curr);
njn25e49d8e72002-09-23 09:36:25 +00003980 }
3981 }
sewardj95448072004-11-22 20:19:51 +00003982
3983 /* Inputs: guest state that we read. */
3984 for (i = 0; i < d->nFxState; i++) {
3985 tl_assert(d->fxState[i].fx != Ifx_None);
3986 if (d->fxState[i].fx == Ifx_Write)
3987 continue;
sewardja7203252004-11-26 19:17:47 +00003988
3989 /* Ignore any sections marked as 'always defined'. */
3990 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
sewardje9e16d32004-12-10 13:17:55 +00003991 if (0)
sewardja7203252004-11-26 19:17:47 +00003992 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
3993 d->fxState[i].offset, d->fxState[i].size );
3994 continue;
3995 }
3996
sewardj95448072004-11-22 20:19:51 +00003997 /* This state element is read or modified. So we need to
sewardje9e16d32004-12-10 13:17:55 +00003998 consider it. If larger than 8 bytes, deal with it in 8-byte
3999 chunks. */
4000 gSz = d->fxState[i].size;
4001 gOff = d->fxState[i].offset;
4002 tl_assert(gSz > 0);
4003 while (True) {
4004 if (gSz == 0) break;
4005 n = gSz <= 8 ? gSz : 8;
4006 /* update 'curr' with UifU of the state slice
4007 gOff .. gOff+n-1 */
4008 tySrc = szToITy( n );
sewardj7cf4e6b2008-05-01 20:24:26 +00004009 src = assignNew( 'V', mce, tySrc,
4010 shadow_GET(mce, gOff, tySrc ) );
sewardje9e16d32004-12-10 13:17:55 +00004011 here = mkPCastTo( mce, Ity_I32, src );
4012 curr = mkUifU32(mce, here, curr);
4013 gSz -= n;
4014 gOff += n;
4015 }
4016
sewardj95448072004-11-22 20:19:51 +00004017 }
4018
4019 /* Inputs: memory. First set up some info needed regardless of
4020 whether we're doing reads or writes. */
sewardj95448072004-11-22 20:19:51 +00004021
4022 if (d->mFx != Ifx_None) {
4023 /* Because we may do multiple shadow loads/stores from the same
4024 base address, it's best to do a single test of its
4025 definedness right now. Post-instrumentation optimisation
4026 should remove all but this test. */
njn4c245e52009-03-15 23:25:38 +00004027 IRType tyAddr;
sewardj95448072004-11-22 20:19:51 +00004028 tl_assert(d->mAddr);
4029 complainIfUndefined(mce, d->mAddr);
4030
sewardj1c0ce7a2009-07-01 08:10:49 +00004031 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
sewardj95448072004-11-22 20:19:51 +00004032 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
4033 tl_assert(tyAddr == mce->hWordTy); /* not really right */
4034 }
4035
4036 /* Deal with memory inputs (reads or modifies) */
4037 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00004038 toDo = d->mSize;
sewardj2e595852005-06-30 23:33:37 +00004039 /* chew off 32-bit chunks. We don't care about the endianness
4040 since it's all going to be condensed down to a single bit,
4041 but nevertheless choose an endianness which is hopefully
4042 native to the platform. */
sewardj95448072004-11-22 20:19:51 +00004043 while (toDo >= 4) {
4044 here = mkPCastTo(
4045 mce, Ity_I32,
sewardj2e595852005-06-30 23:33:37 +00004046 expr2vbits_Load ( mce, end, Ity_I32,
sewardj95448072004-11-22 20:19:51 +00004047 d->mAddr, d->mSize - toDo )
4048 );
4049 curr = mkUifU32(mce, here, curr);
4050 toDo -= 4;
4051 }
4052 /* chew off 16-bit chunks */
4053 while (toDo >= 2) {
4054 here = mkPCastTo(
4055 mce, Ity_I32,
sewardj2e595852005-06-30 23:33:37 +00004056 expr2vbits_Load ( mce, end, Ity_I16,
sewardj95448072004-11-22 20:19:51 +00004057 d->mAddr, d->mSize - toDo )
4058 );
4059 curr = mkUifU32(mce, here, curr);
4060 toDo -= 2;
4061 }
4062 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
4063 }
4064
4065 /* Whew! So curr is a 32-bit V-value summarising pessimistically
4066 all the inputs to the helper. Now we need to re-distribute the
4067 results to all destinations. */
4068
4069 /* Outputs: the destination temporary, if there is one. */
4070 if (d->tmp != IRTemp_INVALID) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004071 dst = findShadowTmpV(mce, d->tmp);
sewardj1c0ce7a2009-07-01 08:10:49 +00004072 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00004073 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
sewardj95448072004-11-22 20:19:51 +00004074 }
4075
4076 /* Outputs: guest state that we write or modify. */
4077 for (i = 0; i < d->nFxState; i++) {
4078 tl_assert(d->fxState[i].fx != Ifx_None);
4079 if (d->fxState[i].fx == Ifx_Read)
4080 continue;
sewardja7203252004-11-26 19:17:47 +00004081 /* Ignore any sections marked as 'always defined'. */
4082 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
4083 continue;
sewardje9e16d32004-12-10 13:17:55 +00004084 /* This state element is written or modified. So we need to
4085 consider it. If larger than 8 bytes, deal with it in 8-byte
4086 chunks. */
4087 gSz = d->fxState[i].size;
4088 gOff = d->fxState[i].offset;
4089 tl_assert(gSz > 0);
4090 while (True) {
4091 if (gSz == 0) break;
4092 n = gSz <= 8 ? gSz : 8;
4093 /* Write suitably-casted 'curr' to the state slice
4094 gOff .. gOff+n-1 */
4095 tyDst = szToITy( n );
4096 do_shadow_PUT( mce, gOff,
4097 NULL, /* original atom */
4098 mkPCastTo( mce, tyDst, curr ) );
4099 gSz -= n;
4100 gOff += n;
4101 }
sewardj95448072004-11-22 20:19:51 +00004102 }
4103
sewardj2e595852005-06-30 23:33:37 +00004104 /* Outputs: memory that we write or modify. Same comments about
4105 endianness as above apply. */
sewardj95448072004-11-22 20:19:51 +00004106 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00004107 toDo = d->mSize;
4108 /* chew off 32-bit chunks */
4109 while (toDo >= 4) {
sewardj2e595852005-06-30 23:33:37 +00004110 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4111 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00004112 mkPCastTo( mce, Ity_I32, curr ),
4113 NULL/*guard*/ );
sewardj95448072004-11-22 20:19:51 +00004114 toDo -= 4;
4115 }
4116 /* chew off 16-bit chunks */
4117 while (toDo >= 2) {
sewardj2e595852005-06-30 23:33:37 +00004118 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4119 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00004120 mkPCastTo( mce, Ity_I16, curr ),
4121 NULL/*guard*/ );
sewardj95448072004-11-22 20:19:51 +00004122 toDo -= 2;
4123 }
4124 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
4125 }
4126
njn25e49d8e72002-09-23 09:36:25 +00004127}
4128
sewardj1c0ce7a2009-07-01 08:10:49 +00004129
sewardj826ec492005-05-12 18:05:00 +00004130/* We have an ABI hint telling us that [base .. base+len-1] is to
4131 become undefined ("writable"). Generate code to call a helper to
4132 notify the A/V bit machinery of this fact.
4133
4134 We call
sewardj7cf4e6b2008-05-01 20:24:26 +00004135 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
4136 Addr nia );
sewardj826ec492005-05-12 18:05:00 +00004137*/
4138static
sewardj7cf4e6b2008-05-01 20:24:26 +00004139void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
sewardj826ec492005-05-12 18:05:00 +00004140{
4141 IRDirty* di;
sewardj7cf4e6b2008-05-01 20:24:26 +00004142 /* Minor optimisation: if not doing origin tracking, ignore the
4143 supplied nia and pass zero instead. This is on the basis that
4144 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
4145 almost always generate a shorter instruction to put zero into a
4146 register than any other value. */
4147 if (MC_(clo_mc_level) < 3)
4148 nia = mkIRExpr_HWord(0);
4149
sewardj826ec492005-05-12 18:05:00 +00004150 di = unsafeIRDirty_0_N(
4151 0/*regparms*/,
4152 "MC_(helperc_MAKE_STACK_UNINIT)",
sewardj53ee1fc2005-12-23 02:29:58 +00004153 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
sewardj7cf4e6b2008-05-01 20:24:26 +00004154 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
sewardj826ec492005-05-12 18:05:00 +00004155 );
sewardj7cf4e6b2008-05-01 20:24:26 +00004156 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj826ec492005-05-12 18:05:00 +00004157}
4158
njn25e49d8e72002-09-23 09:36:25 +00004159
sewardj1c0ce7a2009-07-01 08:10:49 +00004160/* ------ Dealing with IRCAS (big and complex) ------ */
4161
4162/* FWDS */
4163static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
4164 IRAtom* baseaddr, Int offset );
4165static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
4166static void gen_store_b ( MCEnv* mce, Int szB,
4167 IRAtom* baseaddr, Int offset, IRAtom* dataB,
4168 IRAtom* guard );
4169
4170static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
4171static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
4172
4173
4174/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
4175 IRExpr.Consts, else this asserts. If they are both Consts, it
4176 doesn't do anything. So that just leaves the RdTmp case.
4177
4178 In which case: this assigns the shadow value SHADOW to the IR
4179 shadow temporary associated with ORIG. That is, ORIG, being an
4180 original temporary, will have a shadow temporary associated with
4181 it. However, in the case envisaged here, there will so far have
4182 been no IR emitted to actually write a shadow value into that
4183 temporary. What this routine does is to (emit IR to) copy the
4184 value in SHADOW into said temporary, so that after this call,
4185 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
4186 value in SHADOW.
4187
4188 Point is to allow callers to compute "by hand" a shadow value for
4189 ORIG, and force it to be associated with ORIG.
4190
4191 How do we know that that shadow associated with ORIG has not so far
4192 been assigned to? Well, we don't per se know that, but supposing
4193 it had. Then this routine would create a second assignment to it,
4194 and later the IR sanity checker would barf. But that never
4195 happens. QED.
4196*/
4197static void bind_shadow_tmp_to_orig ( UChar how,
4198 MCEnv* mce,
4199 IRAtom* orig, IRAtom* shadow )
4200{
4201 tl_assert(isOriginalAtom(mce, orig));
4202 tl_assert(isShadowAtom(mce, shadow));
4203 switch (orig->tag) {
4204 case Iex_Const:
4205 tl_assert(shadow->tag == Iex_Const);
4206 break;
4207 case Iex_RdTmp:
4208 tl_assert(shadow->tag == Iex_RdTmp);
4209 if (how == 'V') {
4210 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
4211 shadow);
4212 } else {
4213 tl_assert(how == 'B');
4214 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
4215 shadow);
4216 }
4217 break;
4218 default:
4219 tl_assert(0);
4220 }
4221}
4222
4223
4224static
4225void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
4226{
4227 /* Scheme is (both single- and double- cases):
4228
4229 1. fetch data#,dataB (the proposed new value)
4230
4231 2. fetch expd#,expdB (what we expect to see at the address)
4232
4233 3. check definedness of address
4234
4235 4. load old#,oldB from shadow memory; this also checks
4236 addressibility of the address
4237
4238 5. the CAS itself
4239
sewardjafed4c52009-07-12 13:00:17 +00004240 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
sewardj1c0ce7a2009-07-01 08:10:49 +00004241
sewardjafed4c52009-07-12 13:00:17 +00004242 7. if "expected == old" (as computed by (6))
sewardj1c0ce7a2009-07-01 08:10:49 +00004243 store data#,dataB to shadow memory
4244
4245 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
4246 'data' but 7 stores 'data#'. Hence it is possible for the
4247 shadow data to be incorrectly checked and/or updated:
4248
sewardj1c0ce7a2009-07-01 08:10:49 +00004249 * 7 is at least gated correctly, since the 'expected == old'
4250 condition is derived from outputs of 5. However, the shadow
4251 write could happen too late: imagine after 5 we are
4252 descheduled, a different thread runs, writes a different
4253 (shadow) value at the address, and then we resume, hence
4254 overwriting the shadow value written by the other thread.
4255
4256 Because the original memory access is atomic, there's no way to
4257 make both the original and shadow accesses into a single atomic
4258 thing, hence this is unavoidable.
4259
4260 At least as Valgrind stands, I don't think it's a problem, since
4261 we're single threaded *and* we guarantee that there are no
4262 context switches during the execution of any specific superblock
4263 -- context switches can only happen at superblock boundaries.
4264
4265 If Valgrind ever becomes MT in the future, then it might be more
4266 of a problem. A possible kludge would be to artificially
4267 associate with the location, a lock, which we must acquire and
4268 release around the transaction as a whole. Hmm, that probably
4269 would't work properly since it only guards us against other
4270 threads doing CASs on the same location, not against other
4271 threads doing normal reads and writes.
sewardjafed4c52009-07-12 13:00:17 +00004272
4273 ------------------------------------------------------------
4274
4275 COMMENT_ON_CasCmpEQ:
4276
4277 Note two things. Firstly, in the sequence above, we compute
4278 "expected == old", but we don't check definedness of it. Why
4279 not? Also, the x86 and amd64 front ends use
4280 Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent
4281 determination (expected == old ?) for themselves, and we also
4282 don't check definedness for those primops; we just say that the
4283 result is defined. Why? Details follow.
4284
4285 x86/amd64 contains various forms of locked insns:
4286 * lock prefix before all basic arithmetic insn;
4287 eg lock xorl %reg1,(%reg2)
4288 * atomic exchange reg-mem
4289 * compare-and-swaps
4290
4291 Rather than attempt to represent them all, which would be a
4292 royal PITA, I used a result from Maurice Herlihy
4293 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
4294 demonstrates that compare-and-swap is a primitive more general
4295 than the other two, and so can be used to represent all of them.
4296 So the translation scheme for (eg) lock incl (%reg) is as
4297 follows:
4298
4299 again:
4300 old = * %reg
4301 new = old + 1
4302 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
4303
4304 The "atomically" is the CAS bit. The scheme is always the same:
4305 get old value from memory, compute new value, atomically stuff
4306 new value back in memory iff the old value has not changed (iow,
4307 no other thread modified it in the meantime). If it has changed
4308 then we've been out-raced and we have to start over.
4309
4310 Now that's all very neat, but it has the bad side effect of
4311 introducing an explicit equality test into the translation.
4312 Consider the behaviour of said code on a memory location which
4313 is uninitialised. We will wind up doing a comparison on
4314 uninitialised data, and mc duly complains.
4315
4316 What's difficult about this is, the common case is that the
4317 location is uncontended, and so we're usually comparing the same
4318 value (* %reg) with itself. So we shouldn't complain even if it
4319 is undefined. But mc doesn't know that.
4320
4321 My solution is to mark the == in the IR specially, so as to tell
4322 mc that it almost certainly compares a value with itself, and we
4323 should just regard the result as always defined. Rather than
4324 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
4325 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
4326
4327 So there's always the question of, can this give a false
4328 negative? eg, imagine that initially, * %reg is defined; and we
4329 read that; but then in the gap between the read and the CAS, a
4330 different thread writes an undefined (and different) value at
4331 the location. Then the CAS in this thread will fail and we will
4332 go back to "again:", but without knowing that the trip back
4333 there was based on an undefined comparison. No matter; at least
4334 the other thread won the race and the location is correctly
4335 marked as undefined. What if it wrote an uninitialised version
4336 of the same value that was there originally, though?
4337
4338 etc etc. Seems like there's a small corner case in which we
4339 might lose the fact that something's defined -- we're out-raced
4340 in between the "old = * reg" and the "atomically {", _and_ the
4341 other thread is writing in an undefined version of what's
4342 already there. Well, that seems pretty unlikely.
4343
4344 ---
4345
4346 If we ever need to reinstate it .. code which generates a
4347 definedness test for "expected == old" was removed at r10432 of
4348 this file.
sewardj1c0ce7a2009-07-01 08:10:49 +00004349 */
4350 if (cas->oldHi == IRTemp_INVALID) {
4351 do_shadow_CAS_single( mce, cas );
4352 } else {
4353 do_shadow_CAS_double( mce, cas );
4354 }
4355}
4356
4357
4358static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
4359{
4360 IRAtom *vdataLo = NULL, *bdataLo = NULL;
4361 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4362 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00004363 IRAtom *expd_eq_old = NULL;
4364 IROp opCasCmpEQ;
sewardj1c0ce7a2009-07-01 08:10:49 +00004365 Int elemSzB;
4366 IRType elemTy;
4367 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4368
4369 /* single CAS */
4370 tl_assert(cas->oldHi == IRTemp_INVALID);
4371 tl_assert(cas->expdHi == NULL);
4372 tl_assert(cas->dataHi == NULL);
4373
4374 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4375 switch (elemTy) {
sewardjafed4c52009-07-12 13:00:17 +00004376 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break;
4377 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
4378 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
4379 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
sewardj1c0ce7a2009-07-01 08:10:49 +00004380 default: tl_assert(0); /* IR defn disallows any other types */
4381 }
4382
4383 /* 1. fetch data# (the proposed new value) */
4384 tl_assert(isOriginalAtom(mce, cas->dataLo));
4385 vdataLo
4386 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4387 tl_assert(isShadowAtom(mce, vdataLo));
4388 if (otrak) {
4389 bdataLo
4390 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4391 tl_assert(isShadowAtom(mce, bdataLo));
4392 }
4393
4394 /* 2. fetch expected# (what we expect to see at the address) */
4395 tl_assert(isOriginalAtom(mce, cas->expdLo));
4396 vexpdLo
4397 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
4398 tl_assert(isShadowAtom(mce, vexpdLo));
4399 if (otrak) {
4400 bexpdLo
4401 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
4402 tl_assert(isShadowAtom(mce, bexpdLo));
4403 }
4404
4405 /* 3. check definedness of address */
4406 /* 4. fetch old# from shadow memory; this also checks
4407 addressibility of the address */
4408 voldLo
4409 = assignNew(
4410 'V', mce, elemTy,
4411 expr2vbits_Load(
4412 mce,
4413 cas->end, elemTy, cas->addr, 0/*Addr bias*/
4414 ));
sewardjafed4c52009-07-12 13:00:17 +00004415 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00004416 if (otrak) {
4417 boldLo
4418 = assignNew('B', mce, Ity_I32,
4419 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00004420 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00004421 }
4422
4423 /* 5. the CAS itself */
4424 stmt( 'C', mce, IRStmt_CAS(cas) );
4425
sewardjafed4c52009-07-12 13:00:17 +00004426 /* 6. compute "expected == old" */
4427 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004428 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
4429 tree, but it's not copied from the input block. */
4430 expd_eq_old
4431 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00004432 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00004433
4434 /* 7. if "expected == old"
4435 store data# to shadow memory */
4436 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
4437 NULL/*data*/, vdataLo/*vdata*/,
4438 expd_eq_old/*guard for store*/ );
4439 if (otrak) {
4440 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
4441 bdataLo/*bdata*/,
4442 expd_eq_old/*guard for store*/ );
4443 }
4444}
4445
4446
4447static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
4448{
4449 IRAtom *vdataHi = NULL, *bdataHi = NULL;
4450 IRAtom *vdataLo = NULL, *bdataLo = NULL;
4451 IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
4452 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4453 IRAtom *voldHi = NULL, *boldHi = NULL;
4454 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00004455 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
4456 IRAtom *expd_eq_old = NULL, *zero = NULL;
4457 IROp opCasCmpEQ, opOr, opXor;
sewardj1c0ce7a2009-07-01 08:10:49 +00004458 Int elemSzB, memOffsLo, memOffsHi;
4459 IRType elemTy;
4460 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4461
4462 /* double CAS */
4463 tl_assert(cas->oldHi != IRTemp_INVALID);
4464 tl_assert(cas->expdHi != NULL);
4465 tl_assert(cas->dataHi != NULL);
4466
4467 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4468 switch (elemTy) {
4469 case Ity_I8:
sewardjafed4c52009-07-12 13:00:17 +00004470 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
sewardj1c0ce7a2009-07-01 08:10:49 +00004471 elemSzB = 1; zero = mkU8(0);
4472 break;
4473 case Ity_I16:
sewardjafed4c52009-07-12 13:00:17 +00004474 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
sewardj1c0ce7a2009-07-01 08:10:49 +00004475 elemSzB = 2; zero = mkU16(0);
4476 break;
4477 case Ity_I32:
sewardjafed4c52009-07-12 13:00:17 +00004478 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
sewardj1c0ce7a2009-07-01 08:10:49 +00004479 elemSzB = 4; zero = mkU32(0);
4480 break;
4481 case Ity_I64:
sewardjafed4c52009-07-12 13:00:17 +00004482 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
sewardj1c0ce7a2009-07-01 08:10:49 +00004483 elemSzB = 8; zero = mkU64(0);
4484 break;
4485 default:
4486 tl_assert(0); /* IR defn disallows any other types */
4487 }
4488
4489 /* 1. fetch data# (the proposed new value) */
4490 tl_assert(isOriginalAtom(mce, cas->dataHi));
4491 tl_assert(isOriginalAtom(mce, cas->dataLo));
4492 vdataHi
4493 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
4494 vdataLo
4495 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4496 tl_assert(isShadowAtom(mce, vdataHi));
4497 tl_assert(isShadowAtom(mce, vdataLo));
4498 if (otrak) {
4499 bdataHi
4500 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
4501 bdataLo
4502 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4503 tl_assert(isShadowAtom(mce, bdataHi));
4504 tl_assert(isShadowAtom(mce, bdataLo));
4505 }
4506
4507 /* 2. fetch expected# (what we expect to see at the address) */
4508 tl_assert(isOriginalAtom(mce, cas->expdHi));
4509 tl_assert(isOriginalAtom(mce, cas->expdLo));
4510 vexpdHi
4511 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
4512 vexpdLo
4513 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
4514 tl_assert(isShadowAtom(mce, vexpdHi));
4515 tl_assert(isShadowAtom(mce, vexpdLo));
4516 if (otrak) {
4517 bexpdHi
4518 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
4519 bexpdLo
4520 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
4521 tl_assert(isShadowAtom(mce, bexpdHi));
4522 tl_assert(isShadowAtom(mce, bexpdLo));
4523 }
4524
4525 /* 3. check definedness of address */
4526 /* 4. fetch old# from shadow memory; this also checks
4527 addressibility of the address */
4528 if (cas->end == Iend_LE) {
4529 memOffsLo = 0;
4530 memOffsHi = elemSzB;
4531 } else {
4532 tl_assert(cas->end == Iend_BE);
4533 memOffsLo = elemSzB;
4534 memOffsHi = 0;
4535 }
4536 voldHi
4537 = assignNew(
4538 'V', mce, elemTy,
4539 expr2vbits_Load(
4540 mce,
4541 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/
4542 ));
4543 voldLo
4544 = assignNew(
4545 'V', mce, elemTy,
4546 expr2vbits_Load(
4547 mce,
4548 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/
4549 ));
sewardjafed4c52009-07-12 13:00:17 +00004550 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
4551 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00004552 if (otrak) {
4553 boldHi
4554 = assignNew('B', mce, Ity_I32,
4555 gen_load_b(mce, elemSzB, cas->addr,
4556 memOffsHi/*addr bias*/));
4557 boldLo
4558 = assignNew('B', mce, Ity_I32,
4559 gen_load_b(mce, elemSzB, cas->addr,
4560 memOffsLo/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00004561 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
4562 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00004563 }
4564
4565 /* 5. the CAS itself */
4566 stmt( 'C', mce, IRStmt_CAS(cas) );
4567
sewardjafed4c52009-07-12 13:00:17 +00004568 /* 6. compute "expected == old" */
4569 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004570 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
4571 tree, but it's not copied from the input block. */
4572 /*
4573 xHi = oldHi ^ expdHi;
4574 xLo = oldLo ^ expdLo;
4575 xHL = xHi | xLo;
4576 expd_eq_old = xHL == 0;
4577 */
sewardj1c0ce7a2009-07-01 08:10:49 +00004578 xHi = assignNew('C', mce, elemTy,
4579 binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
sewardj1c0ce7a2009-07-01 08:10:49 +00004580 xLo = assignNew('C', mce, elemTy,
4581 binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00004582 xHL = assignNew('C', mce, elemTy,
4583 binop(opOr, xHi, xLo));
sewardj1c0ce7a2009-07-01 08:10:49 +00004584 expd_eq_old
4585 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00004586 binop(opCasCmpEQ, xHL, zero));
sewardj1c0ce7a2009-07-01 08:10:49 +00004587
4588 /* 7. if "expected == old"
4589 store data# to shadow memory */
4590 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
4591 NULL/*data*/, vdataHi/*vdata*/,
4592 expd_eq_old/*guard for store*/ );
4593 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
4594 NULL/*data*/, vdataLo/*vdata*/,
4595 expd_eq_old/*guard for store*/ );
4596 if (otrak) {
4597 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
4598 bdataHi/*bdata*/,
4599 expd_eq_old/*guard for store*/ );
4600 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
4601 bdataLo/*bdata*/,
4602 expd_eq_old/*guard for store*/ );
4603 }
4604}
4605
4606
sewardjdb5907d2009-11-26 17:20:21 +00004607/* ------ Dealing with LL/SC (not difficult) ------ */
4608
4609static void do_shadow_LLSC ( MCEnv* mce,
4610 IREndness stEnd,
4611 IRTemp stResult,
4612 IRExpr* stAddr,
4613 IRExpr* stStoredata )
4614{
4615 /* In short: treat a load-linked like a normal load followed by an
4616 assignment of the loaded (shadow) data to the result temporary.
4617 Treat a store-conditional like a normal store, and mark the
4618 result temporary as defined. */
4619 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult);
4620 IRTemp resTmp = findShadowTmpV(mce, stResult);
4621
4622 tl_assert(isIRAtom(stAddr));
4623 if (stStoredata)
4624 tl_assert(isIRAtom(stStoredata));
4625
4626 if (stStoredata == NULL) {
4627 /* Load Linked */
4628 /* Just treat this as a normal load, followed by an assignment of
4629 the value to .result. */
4630 /* Stay sane */
4631 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
4632 || resTy == Ity_I16 || resTy == Ity_I8);
4633 assign( 'V', mce, resTmp,
4634 expr2vbits_Load(
4635 mce, stEnd, resTy, stAddr, 0/*addr bias*/));
4636 } else {
4637 /* Store Conditional */
4638 /* Stay sane */
4639 IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
4640 stStoredata);
4641 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
4642 || dataTy == Ity_I16 || dataTy == Ity_I8);
4643 do_shadow_Store( mce, stEnd,
4644 stAddr, 0/* addr bias */,
4645 stStoredata,
4646 NULL /* shadow data */,
4647 NULL/*guard*/ );
4648 /* This is a store conditional, so it writes to .result a value
4649 indicating whether or not the store succeeded. Just claim
4650 this value is always defined. In the PowerPC interpretation
4651 of store-conditional, definedness of the success indication
4652 depends on whether the address of the store matches the
4653 reservation address. But we can't tell that here (and
4654 anyway, we're not being PowerPC-specific). At least we are
4655 guaranteed that the definedness of the store address, and its
4656 addressibility, will be checked as per normal. So it seems
4657 pretty safe to just say that the success indication is always
4658 defined.
4659
4660 In schemeS, for origin tracking, we must correspondingly set
4661 a no-origin value for the origin shadow of .result.
4662 */
4663 tl_assert(resTy == Ity_I1);
4664 assign( 'V', mce, resTmp, definedOfType(resTy) );
4665 }
4666}
4667
4668
sewardj95448072004-11-22 20:19:51 +00004669/*------------------------------------------------------------*/
4670/*--- Memcheck main ---*/
4671/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00004672
sewardj7cf4e6b2008-05-01 20:24:26 +00004673static void schemeS ( MCEnv* mce, IRStmt* st );
4674
sewardj95448072004-11-22 20:19:51 +00004675static Bool isBogusAtom ( IRAtom* at )
njn25e49d8e72002-09-23 09:36:25 +00004676{
sewardj95448072004-11-22 20:19:51 +00004677 ULong n = 0;
4678 IRConst* con;
sewardj710d6c22005-03-20 18:55:15 +00004679 tl_assert(isIRAtom(at));
sewardj0b9d74a2006-12-24 02:24:11 +00004680 if (at->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +00004681 return False;
4682 tl_assert(at->tag == Iex_Const);
4683 con = at->Iex.Const.con;
4684 switch (con->tag) {
sewardjd5204dc2004-12-31 01:16:11 +00004685 case Ico_U1: return False;
4686 case Ico_U8: n = (ULong)con->Ico.U8; break;
4687 case Ico_U16: n = (ULong)con->Ico.U16; break;
4688 case Ico_U32: n = (ULong)con->Ico.U32; break;
4689 case Ico_U64: n = (ULong)con->Ico.U64; break;
4690 case Ico_F64: return False;
sewardjb5b87402011-03-07 16:05:35 +00004691 case Ico_F32i: return False;
sewardjd5204dc2004-12-31 01:16:11 +00004692 case Ico_F64i: return False;
4693 case Ico_V128: return False;
sewardj95448072004-11-22 20:19:51 +00004694 default: ppIRExpr(at); tl_assert(0);
4695 }
4696 /* VG_(printf)("%llx\n", n); */
sewardj96a922e2005-04-23 23:26:29 +00004697 return (/*32*/ n == 0xFEFEFEFFULL
4698 /*32*/ || n == 0x80808080ULL
sewardj17b47432008-12-17 01:12:58 +00004699 /*32*/ || n == 0x7F7F7F7FULL
tomd9774d72005-06-27 08:11:01 +00004700 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
sewardj96a922e2005-04-23 23:26:29 +00004701 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
tomd9774d72005-06-27 08:11:01 +00004702 /*64*/ || n == 0x0000000000008080ULL
sewardj96a922e2005-04-23 23:26:29 +00004703 /*64*/ || n == 0x8080808080808080ULL
sewardj17b47432008-12-17 01:12:58 +00004704 /*64*/ || n == 0x0101010101010101ULL
sewardj96a922e2005-04-23 23:26:29 +00004705 );
sewardj95448072004-11-22 20:19:51 +00004706}
njn25e49d8e72002-09-23 09:36:25 +00004707
sewardj95448072004-11-22 20:19:51 +00004708static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
4709{
sewardjd5204dc2004-12-31 01:16:11 +00004710 Int i;
4711 IRExpr* e;
4712 IRDirty* d;
sewardj1c0ce7a2009-07-01 08:10:49 +00004713 IRCAS* cas;
sewardj95448072004-11-22 20:19:51 +00004714 switch (st->tag) {
sewardj0b9d74a2006-12-24 02:24:11 +00004715 case Ist_WrTmp:
4716 e = st->Ist.WrTmp.data;
sewardj95448072004-11-22 20:19:51 +00004717 switch (e->tag) {
4718 case Iex_Get:
sewardj0b9d74a2006-12-24 02:24:11 +00004719 case Iex_RdTmp:
sewardj95448072004-11-22 20:19:51 +00004720 return False;
sewardjd5204dc2004-12-31 01:16:11 +00004721 case Iex_Const:
4722 return isBogusAtom(e);
sewardj95448072004-11-22 20:19:51 +00004723 case Iex_Unop:
4724 return isBogusAtom(e->Iex.Unop.arg);
sewardjd5204dc2004-12-31 01:16:11 +00004725 case Iex_GetI:
4726 return isBogusAtom(e->Iex.GetI.ix);
sewardj95448072004-11-22 20:19:51 +00004727 case Iex_Binop:
4728 return isBogusAtom(e->Iex.Binop.arg1)
4729 || isBogusAtom(e->Iex.Binop.arg2);
sewardjed69fdb2006-02-03 16:12:27 +00004730 case Iex_Triop:
4731 return isBogusAtom(e->Iex.Triop.arg1)
4732 || isBogusAtom(e->Iex.Triop.arg2)
4733 || isBogusAtom(e->Iex.Triop.arg3);
sewardje91cea72006-02-08 19:32:02 +00004734 case Iex_Qop:
4735 return isBogusAtom(e->Iex.Qop.arg1)
4736 || isBogusAtom(e->Iex.Qop.arg2)
4737 || isBogusAtom(e->Iex.Qop.arg3)
4738 || isBogusAtom(e->Iex.Qop.arg4);
sewardj95448072004-11-22 20:19:51 +00004739 case Iex_Mux0X:
4740 return isBogusAtom(e->Iex.Mux0X.cond)
4741 || isBogusAtom(e->Iex.Mux0X.expr0)
4742 || isBogusAtom(e->Iex.Mux0X.exprX);
sewardj2e595852005-06-30 23:33:37 +00004743 case Iex_Load:
4744 return isBogusAtom(e->Iex.Load.addr);
sewardj95448072004-11-22 20:19:51 +00004745 case Iex_CCall:
4746 for (i = 0; e->Iex.CCall.args[i]; i++)
4747 if (isBogusAtom(e->Iex.CCall.args[i]))
4748 return True;
4749 return False;
4750 default:
4751 goto unhandled;
4752 }
sewardjd5204dc2004-12-31 01:16:11 +00004753 case Ist_Dirty:
4754 d = st->Ist.Dirty.details;
4755 for (i = 0; d->args[i]; i++)
4756 if (isBogusAtom(d->args[i]))
4757 return True;
4758 if (d->guard && isBogusAtom(d->guard))
4759 return True;
4760 if (d->mAddr && isBogusAtom(d->mAddr))
4761 return True;
4762 return False;
sewardj95448072004-11-22 20:19:51 +00004763 case Ist_Put:
4764 return isBogusAtom(st->Ist.Put.data);
sewardjd5204dc2004-12-31 01:16:11 +00004765 case Ist_PutI:
4766 return isBogusAtom(st->Ist.PutI.ix)
4767 || isBogusAtom(st->Ist.PutI.data);
sewardj2e595852005-06-30 23:33:37 +00004768 case Ist_Store:
4769 return isBogusAtom(st->Ist.Store.addr)
4770 || isBogusAtom(st->Ist.Store.data);
sewardj95448072004-11-22 20:19:51 +00004771 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00004772 return isBogusAtom(st->Ist.Exit.guard);
sewardj826ec492005-05-12 18:05:00 +00004773 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00004774 return isBogusAtom(st->Ist.AbiHint.base)
4775 || isBogusAtom(st->Ist.AbiHint.nia);
sewardj21dc3452005-03-21 00:27:41 +00004776 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00004777 case Ist_IMark:
sewardj72d75132007-11-09 23:06:35 +00004778 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00004779 return False;
sewardj1c0ce7a2009-07-01 08:10:49 +00004780 case Ist_CAS:
4781 cas = st->Ist.CAS.details;
4782 return isBogusAtom(cas->addr)
4783 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
4784 || isBogusAtom(cas->expdLo)
4785 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
4786 || isBogusAtom(cas->dataLo);
sewardjdb5907d2009-11-26 17:20:21 +00004787 case Ist_LLSC:
4788 return isBogusAtom(st->Ist.LLSC.addr)
4789 || (st->Ist.LLSC.storedata
4790 ? isBogusAtom(st->Ist.LLSC.storedata)
4791 : False);
sewardj95448072004-11-22 20:19:51 +00004792 default:
4793 unhandled:
4794 ppIRStmt(st);
4795 VG_(tool_panic)("hasBogusLiterals");
4796 }
4797}
njn25e49d8e72002-09-23 09:36:25 +00004798
njn25e49d8e72002-09-23 09:36:25 +00004799
sewardj0b9d74a2006-12-24 02:24:11 +00004800IRSB* MC_(instrument) ( VgCallbackClosure* closure,
sewardj1c0ce7a2009-07-01 08:10:49 +00004801 IRSB* sb_in,
sewardj461df9c2006-01-17 02:06:39 +00004802 VexGuestLayout* layout,
4803 VexGuestExtents* vge,
sewardjd54babf2005-03-21 00:55:49 +00004804 IRType gWordTy, IRType hWordTy )
sewardj95448072004-11-22 20:19:51 +00004805{
sewardj7cf4e6b2008-05-01 20:24:26 +00004806 Bool verboze = 0||False;
sewardj151b90d2005-07-06 19:42:23 +00004807 Bool bogus;
sewardjd5204dc2004-12-31 01:16:11 +00004808 Int i, j, first_stmt;
sewardj95448072004-11-22 20:19:51 +00004809 IRStmt* st;
sewardjd5204dc2004-12-31 01:16:11 +00004810 MCEnv mce;
sewardj1c0ce7a2009-07-01 08:10:49 +00004811 IRSB* sb_out;
sewardjd54babf2005-03-21 00:55:49 +00004812
4813 if (gWordTy != hWordTy) {
4814 /* We don't currently support this case. */
4815 VG_(tool_panic)("host/guest word size mismatch");
4816 }
njn25e49d8e72002-09-23 09:36:25 +00004817
sewardj6cf40ff2005-04-20 22:31:26 +00004818 /* Check we're not completely nuts */
sewardj7cf4e6b2008-05-01 20:24:26 +00004819 tl_assert(sizeof(UWord) == sizeof(void*));
4820 tl_assert(sizeof(Word) == sizeof(void*));
4821 tl_assert(sizeof(Addr) == sizeof(void*));
4822 tl_assert(sizeof(ULong) == 8);
4823 tl_assert(sizeof(Long) == 8);
4824 tl_assert(sizeof(Addr64) == 8);
4825 tl_assert(sizeof(UInt) == 4);
4826 tl_assert(sizeof(Int) == 4);
4827
4828 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
sewardj6cf40ff2005-04-20 22:31:26 +00004829
sewardj0b9d74a2006-12-24 02:24:11 +00004830 /* Set up SB */
sewardj1c0ce7a2009-07-01 08:10:49 +00004831 sb_out = deepCopyIRSBExceptStmts(sb_in);
njn25e49d8e72002-09-23 09:36:25 +00004832
sewardj1c0ce7a2009-07-01 08:10:49 +00004833 /* Set up the running environment. Both .sb and .tmpMap are
4834 modified as we go along. Note that tmps are added to both
4835 .sb->tyenv and .tmpMap together, so the valid index-set for
4836 those two arrays should always be identical. */
4837 VG_(memset)(&mce, 0, sizeof(mce));
4838 mce.sb = sb_out;
sewardj7cf4e6b2008-05-01 20:24:26 +00004839 mce.trace = verboze;
sewardj95448072004-11-22 20:19:51 +00004840 mce.layout = layout;
sewardj95448072004-11-22 20:19:51 +00004841 mce.hWordTy = hWordTy;
sewardjd5204dc2004-12-31 01:16:11 +00004842 mce.bogusLiterals = False;
sewardj1c0ce7a2009-07-01 08:10:49 +00004843
4844 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
4845 sizeof(TempMapEnt));
4846 for (i = 0; i < sb_in->tyenv->types_used; i++) {
4847 TempMapEnt ent;
4848 ent.kind = Orig;
4849 ent.shadowV = IRTemp_INVALID;
4850 ent.shadowB = IRTemp_INVALID;
4851 VG_(addToXA)( mce.tmpMap, &ent );
sewardj7cf4e6b2008-05-01 20:24:26 +00004852 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004853 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
sewardj95448072004-11-22 20:19:51 +00004854
sewardj151b90d2005-07-06 19:42:23 +00004855 /* Make a preliminary inspection of the statements, to see if there
4856 are any dodgy-looking literals. If there are, we generate
4857 extra-detailed (hence extra-expensive) instrumentation in
4858 places. Scan the whole bb even if dodgyness is found earlier,
4859 so that the flatness assertion is applied to all stmts. */
4860
4861 bogus = False;
sewardj95448072004-11-22 20:19:51 +00004862
sewardj1c0ce7a2009-07-01 08:10:49 +00004863 for (i = 0; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00004864
sewardj1c0ce7a2009-07-01 08:10:49 +00004865 st = sb_in->stmts[i];
sewardj21dc3452005-03-21 00:27:41 +00004866 tl_assert(st);
sewardj95448072004-11-22 20:19:51 +00004867 tl_assert(isFlatIRStmt(st));
4868
sewardj151b90d2005-07-06 19:42:23 +00004869 if (!bogus) {
4870 bogus = checkForBogusLiterals(st);
4871 if (0 && bogus) {
sewardj95448072004-11-22 20:19:51 +00004872 VG_(printf)("bogus: ");
4873 ppIRStmt(st);
4874 VG_(printf)("\n");
4875 }
4876 }
sewardjd5204dc2004-12-31 01:16:11 +00004877
sewardj151b90d2005-07-06 19:42:23 +00004878 }
4879
4880 mce.bogusLiterals = bogus;
4881
sewardja0871482006-10-18 12:41:55 +00004882 /* Copy verbatim any IR preamble preceding the first IMark */
sewardj151b90d2005-07-06 19:42:23 +00004883
sewardj1c0ce7a2009-07-01 08:10:49 +00004884 tl_assert(mce.sb == sb_out);
4885 tl_assert(mce.sb != sb_in);
sewardjf1962d32006-10-19 13:22:16 +00004886
sewardja0871482006-10-18 12:41:55 +00004887 i = 0;
sewardj1c0ce7a2009-07-01 08:10:49 +00004888 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
sewardja0871482006-10-18 12:41:55 +00004889
sewardj1c0ce7a2009-07-01 08:10:49 +00004890 st = sb_in->stmts[i];
sewardja0871482006-10-18 12:41:55 +00004891 tl_assert(st);
4892 tl_assert(isFlatIRStmt(st));
4893
sewardj1c0ce7a2009-07-01 08:10:49 +00004894 stmt( 'C', &mce, sb_in->stmts[i] );
sewardja0871482006-10-18 12:41:55 +00004895 i++;
4896 }
4897
sewardjf1962d32006-10-19 13:22:16 +00004898 /* Nasty problem. IR optimisation of the pre-instrumented IR may
4899 cause the IR following the preamble to contain references to IR
4900 temporaries defined in the preamble. Because the preamble isn't
4901 instrumented, these temporaries don't have any shadows.
4902 Nevertheless uses of them following the preamble will cause
4903 memcheck to generate references to their shadows. End effect is
4904 to cause IR sanity check failures, due to references to
4905 non-existent shadows. This is only evident for the complex
4906 preambles used for function wrapping on TOC-afflicted platforms
sewardj6e9de462011-06-28 07:25:29 +00004907 (ppc64-linux).
sewardjf1962d32006-10-19 13:22:16 +00004908
4909 The following loop therefore scans the preamble looking for
4910 assignments to temporaries. For each one found it creates an
sewardjafa617b2008-07-22 09:59:48 +00004911 assignment to the corresponding (V) shadow temp, marking it as
sewardjf1962d32006-10-19 13:22:16 +00004912 'defined'. This is the same resulting IR as if the main
4913 instrumentation loop before had been applied to the statement
4914 'tmp = CONSTANT'.
sewardjafa617b2008-07-22 09:59:48 +00004915
4916 Similarly, if origin tracking is enabled, we must generate an
4917 assignment for the corresponding origin (B) shadow, claiming
4918 no-origin, as appropriate for a defined value.
sewardjf1962d32006-10-19 13:22:16 +00004919 */
4920 for (j = 0; j < i; j++) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004921 if (sb_in->stmts[j]->tag == Ist_WrTmp) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004922 /* findShadowTmpV checks its arg is an original tmp;
sewardjf1962d32006-10-19 13:22:16 +00004923 no need to assert that here. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004924 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
sewardjafa617b2008-07-22 09:59:48 +00004925 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00004926 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
sewardjafa617b2008-07-22 09:59:48 +00004927 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
4928 if (MC_(clo_mc_level) == 3) {
4929 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00004930 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
sewardjafa617b2008-07-22 09:59:48 +00004931 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
4932 }
sewardjf1962d32006-10-19 13:22:16 +00004933 if (0) {
sewardjafa617b2008-07-22 09:59:48 +00004934 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
4935 ppIRType( ty_v );
sewardjf1962d32006-10-19 13:22:16 +00004936 VG_(printf)("\n");
4937 }
4938 }
4939 }
4940
sewardja0871482006-10-18 12:41:55 +00004941 /* Iterate over the remaining stmts to generate instrumentation. */
4942
sewardj1c0ce7a2009-07-01 08:10:49 +00004943 tl_assert(sb_in->stmts_used > 0);
sewardja0871482006-10-18 12:41:55 +00004944 tl_assert(i >= 0);
sewardj1c0ce7a2009-07-01 08:10:49 +00004945 tl_assert(i < sb_in->stmts_used);
4946 tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
sewardja0871482006-10-18 12:41:55 +00004947
sewardj1c0ce7a2009-07-01 08:10:49 +00004948 for (/* use current i*/; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00004949
sewardj1c0ce7a2009-07-01 08:10:49 +00004950 st = sb_in->stmts[i];
4951 first_stmt = sb_out->stmts_used;
sewardj95448072004-11-22 20:19:51 +00004952
4953 if (verboze) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004954 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00004955 ppIRStmt(st);
sewardj7cf4e6b2008-05-01 20:24:26 +00004956 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00004957 }
4958
sewardj1c0ce7a2009-07-01 08:10:49 +00004959 if (MC_(clo_mc_level) == 3) {
4960 /* See comments on case Ist_CAS below. */
4961 if (st->tag != Ist_CAS)
4962 schemeS( &mce, st );
4963 }
sewardj7cf4e6b2008-05-01 20:24:26 +00004964
sewardj29faa502005-03-16 18:20:21 +00004965 /* Generate instrumentation code for each stmt ... */
4966
sewardj95448072004-11-22 20:19:51 +00004967 switch (st->tag) {
4968
sewardj0b9d74a2006-12-24 02:24:11 +00004969 case Ist_WrTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00004970 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
4971 expr2vbits( &mce, st->Ist.WrTmp.data) );
njn25e49d8e72002-09-23 09:36:25 +00004972 break;
4973
sewardj95448072004-11-22 20:19:51 +00004974 case Ist_Put:
4975 do_shadow_PUT( &mce,
4976 st->Ist.Put.offset,
4977 st->Ist.Put.data,
4978 NULL /* shadow atom */ );
njn25e49d8e72002-09-23 09:36:25 +00004979 break;
4980
sewardj95448072004-11-22 20:19:51 +00004981 case Ist_PutI:
4982 do_shadow_PUTI( &mce,
4983 st->Ist.PutI.descr,
4984 st->Ist.PutI.ix,
4985 st->Ist.PutI.bias,
4986 st->Ist.PutI.data );
njn25e49d8e72002-09-23 09:36:25 +00004987 break;
4988
sewardj2e595852005-06-30 23:33:37 +00004989 case Ist_Store:
4990 do_shadow_Store( &mce, st->Ist.Store.end,
4991 st->Ist.Store.addr, 0/* addr bias */,
4992 st->Ist.Store.data,
sewardj1c0ce7a2009-07-01 08:10:49 +00004993 NULL /* shadow data */,
4994 NULL/*guard*/ );
njn25e49d8e72002-09-23 09:36:25 +00004995 break;
4996
sewardj95448072004-11-22 20:19:51 +00004997 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00004998 complainIfUndefined( &mce, st->Ist.Exit.guard );
njn25e49d8e72002-09-23 09:36:25 +00004999 break;
5000
sewardj29faa502005-03-16 18:20:21 +00005001 case Ist_IMark:
sewardj7cf4e6b2008-05-01 20:24:26 +00005002 break;
5003
5004 case Ist_NoOp:
sewardj72d75132007-11-09 23:06:35 +00005005 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00005006 break;
5007
sewardj95448072004-11-22 20:19:51 +00005008 case Ist_Dirty:
5009 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
njn25e49d8e72002-09-23 09:36:25 +00005010 break;
5011
sewardj826ec492005-05-12 18:05:00 +00005012 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00005013 do_AbiHint( &mce, st->Ist.AbiHint.base,
5014 st->Ist.AbiHint.len,
5015 st->Ist.AbiHint.nia );
sewardj826ec492005-05-12 18:05:00 +00005016 break;
5017
sewardj1c0ce7a2009-07-01 08:10:49 +00005018 case Ist_CAS:
5019 do_shadow_CAS( &mce, st->Ist.CAS.details );
5020 /* Note, do_shadow_CAS copies the CAS itself to the output
5021 block, because it needs to add instrumentation both
5022 before and after it. Hence skip the copy below. Also
5023 skip the origin-tracking stuff (call to schemeS) above,
5024 since that's all tangled up with it too; do_shadow_CAS
5025 does it all. */
5026 break;
5027
sewardjdb5907d2009-11-26 17:20:21 +00005028 case Ist_LLSC:
5029 do_shadow_LLSC( &mce,
5030 st->Ist.LLSC.end,
5031 st->Ist.LLSC.result,
5032 st->Ist.LLSC.addr,
5033 st->Ist.LLSC.storedata );
5034 break;
5035
njn25e49d8e72002-09-23 09:36:25 +00005036 default:
sewardj95448072004-11-22 20:19:51 +00005037 VG_(printf)("\n");
5038 ppIRStmt(st);
5039 VG_(printf)("\n");
5040 VG_(tool_panic)("memcheck: unhandled IRStmt");
5041
5042 } /* switch (st->tag) */
5043
sewardj7cf4e6b2008-05-01 20:24:26 +00005044 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005045 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00005046 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00005047 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00005048 VG_(printf)("\n");
5049 }
5050 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00005051 }
sewardj95448072004-11-22 20:19:51 +00005052
sewardj1c0ce7a2009-07-01 08:10:49 +00005053 /* ... and finally copy the stmt itself to the output. Except,
5054 skip the copy of IRCASs; see comments on case Ist_CAS
5055 above. */
5056 if (st->tag != Ist_CAS)
5057 stmt('C', &mce, st);
njn25e49d8e72002-09-23 09:36:25 +00005058 }
njn25e49d8e72002-09-23 09:36:25 +00005059
sewardj95448072004-11-22 20:19:51 +00005060 /* Now we need to complain if the jump target is undefined. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005061 first_stmt = sb_out->stmts_used;
njn25e49d8e72002-09-23 09:36:25 +00005062
sewardj95448072004-11-22 20:19:51 +00005063 if (verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005064 VG_(printf)("sb_in->next = ");
5065 ppIRExpr(sb_in->next);
sewardj95448072004-11-22 20:19:51 +00005066 VG_(printf)("\n\n");
5067 }
njn25e49d8e72002-09-23 09:36:25 +00005068
sewardj1c0ce7a2009-07-01 08:10:49 +00005069 complainIfUndefined( &mce, sb_in->next );
njn25e49d8e72002-09-23 09:36:25 +00005070
sewardj7cf4e6b2008-05-01 20:24:26 +00005071 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005072 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00005073 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00005074 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00005075 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00005076 }
sewardj95448072004-11-22 20:19:51 +00005077 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00005078 }
njn25e49d8e72002-09-23 09:36:25 +00005079
sewardj1c0ce7a2009-07-01 08:10:49 +00005080 /* If this fails, there's been some serious snafu with tmp management,
5081 that should be investigated. */
5082 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
5083 VG_(deleteXA)( mce.tmpMap );
5084
5085 tl_assert(mce.sb == sb_out);
5086 return sb_out;
sewardj95448072004-11-22 20:19:51 +00005087}
njn25e49d8e72002-09-23 09:36:25 +00005088
sewardj81651dc2007-08-28 06:05:20 +00005089/*------------------------------------------------------------*/
5090/*--- Post-tree-build final tidying ---*/
5091/*------------------------------------------------------------*/
5092
5093/* This exploits the observation that Memcheck often produces
5094 repeated conditional calls of the form
5095
sewardj7cf4e6b2008-05-01 20:24:26 +00005096 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
sewardj81651dc2007-08-28 06:05:20 +00005097
5098 with the same guard expression G guarding the same helper call.
5099 The second and subsequent calls are redundant. This usually
5100 results from instrumentation of guest code containing multiple
5101 memory references at different constant offsets from the same base
5102 register. After optimisation of the instrumentation, you get a
5103 test for the definedness of the base register for each memory
5104 reference, which is kinda pointless. MC_(final_tidy) therefore
5105 looks for such repeated calls and removes all but the first. */
5106
5107/* A struct for recording which (helper, guard) pairs we have already
5108 seen. */
5109typedef
5110 struct { void* entry; IRExpr* guard; }
5111 Pair;
5112
5113/* Return True if e1 and e2 definitely denote the same value (used to
5114 compare guards). Return False if unknown; False is the safe
5115 answer. Since guest registers and guest memory do not have the
5116 SSA property we must return False if any Gets or Loads appear in
5117 the expression. */
5118
5119static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
5120{
5121 if (e1->tag != e2->tag)
5122 return False;
5123 switch (e1->tag) {
5124 case Iex_Const:
5125 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
5126 case Iex_Binop:
5127 return e1->Iex.Binop.op == e2->Iex.Binop.op
5128 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
5129 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
5130 case Iex_Unop:
5131 return e1->Iex.Unop.op == e2->Iex.Unop.op
5132 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
5133 case Iex_RdTmp:
5134 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
5135 case Iex_Mux0X:
5136 return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
5137 && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
5138 && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX );
5139 case Iex_Qop:
5140 case Iex_Triop:
5141 case Iex_CCall:
5142 /* be lazy. Could define equality for these, but they never
5143 appear to be used. */
5144 return False;
5145 case Iex_Get:
5146 case Iex_GetI:
5147 case Iex_Load:
5148 /* be conservative - these may not give the same value each
5149 time */
5150 return False;
5151 case Iex_Binder:
5152 /* should never see this */
5153 /* fallthrough */
5154 default:
5155 VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
5156 ppIRExpr(e1);
5157 VG_(tool_panic)("memcheck:sameIRValue");
5158 return False;
5159 }
5160}
5161
5162/* See if 'pairs' already has an entry for (entry, guard). Return
5163 True if so. If not, add an entry. */
5164
5165static
5166Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
5167{
5168 Pair p;
5169 Pair* pp;
5170 Int i, n = VG_(sizeXA)( pairs );
5171 for (i = 0; i < n; i++) {
5172 pp = VG_(indexXA)( pairs, i );
5173 if (pp->entry == entry && sameIRValue(pp->guard, guard))
5174 return True;
5175 }
5176 p.guard = guard;
5177 p.entry = entry;
5178 VG_(addToXA)( pairs, &p );
5179 return False;
5180}
5181
5182static Bool is_helperc_value_checkN_fail ( HChar* name )
5183{
5184 return
sewardj7cf4e6b2008-05-01 20:24:26 +00005185 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
5186 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
5187 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
5188 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
5189 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
5190 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
5191 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
5192 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
sewardj81651dc2007-08-28 06:05:20 +00005193}
5194
5195IRSB* MC_(final_tidy) ( IRSB* sb_in )
5196{
5197 Int i;
5198 IRStmt* st;
5199 IRDirty* di;
5200 IRExpr* guard;
5201 IRCallee* cee;
5202 Bool alreadyPresent;
sewardj9c606bd2008-09-18 18:12:50 +00005203 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
5204 VG_(free), sizeof(Pair) );
sewardj81651dc2007-08-28 06:05:20 +00005205 /* Scan forwards through the statements. Each time a call to one
5206 of the relevant helpers is seen, check if we have made a
5207 previous call to the same helper using the same guard
5208 expression, and if so, delete the call. */
5209 for (i = 0; i < sb_in->stmts_used; i++) {
5210 st = sb_in->stmts[i];
5211 tl_assert(st);
5212 if (st->tag != Ist_Dirty)
5213 continue;
5214 di = st->Ist.Dirty.details;
5215 guard = di->guard;
5216 if (!guard)
5217 continue;
5218 if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
5219 cee = di->cee;
5220 if (!is_helperc_value_checkN_fail( cee->name ))
5221 continue;
5222 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
5223 guard 'guard'. Check if we have already seen a call to this
5224 function with the same guard. If so, delete it. If not,
5225 add it to the set of calls we do know about. */
5226 alreadyPresent = check_or_add( pairs, guard, cee->addr );
5227 if (alreadyPresent) {
5228 sb_in->stmts[i] = IRStmt_NoOp();
5229 if (0) VG_(printf)("XX\n");
5230 }
5231 }
5232 VG_(deleteXA)( pairs );
5233 return sb_in;
5234}
5235
5236
sewardj7cf4e6b2008-05-01 20:24:26 +00005237/*------------------------------------------------------------*/
5238/*--- Origin tracking stuff ---*/
5239/*------------------------------------------------------------*/
5240
sewardj1c0ce7a2009-07-01 08:10:49 +00005241/* Almost identical to findShadowTmpV. */
sewardj7cf4e6b2008-05-01 20:24:26 +00005242static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
5243{
sewardj1c0ce7a2009-07-01 08:10:49 +00005244 TempMapEnt* ent;
5245 /* VG_(indexXA) range-checks 'orig', hence no need to check
5246 here. */
5247 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5248 tl_assert(ent->kind == Orig);
5249 if (ent->shadowB == IRTemp_INVALID) {
5250 IRTemp tmpB
5251 = newTemp( mce, Ity_I32, BSh );
5252 /* newTemp may cause mce->tmpMap to resize, hence previous results
5253 from VG_(indexXA) are invalid. */
5254 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5255 tl_assert(ent->kind == Orig);
5256 tl_assert(ent->shadowB == IRTemp_INVALID);
5257 ent->shadowB = tmpB;
sewardj7cf4e6b2008-05-01 20:24:26 +00005258 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005259 return ent->shadowB;
sewardj7cf4e6b2008-05-01 20:24:26 +00005260}
5261
5262static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
5263{
5264 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
5265}
5266
5267static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5268 IRAtom* baseaddr, Int offset )
5269{
5270 void* hFun;
5271 HChar* hName;
5272 IRTemp bTmp;
5273 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00005274 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00005275 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5276 IRAtom* ea = baseaddr;
5277 if (offset != 0) {
5278 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5279 : mkU64( (Long)(Int)offset );
5280 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5281 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005282 bTmp = newTemp(mce, mce->hWordTy, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00005283
5284 switch (szB) {
5285 case 1: hFun = (void*)&MC_(helperc_b_load1);
5286 hName = "MC_(helperc_b_load1)";
5287 break;
5288 case 2: hFun = (void*)&MC_(helperc_b_load2);
5289 hName = "MC_(helperc_b_load2)";
5290 break;
5291 case 4: hFun = (void*)&MC_(helperc_b_load4);
5292 hName = "MC_(helperc_b_load4)";
5293 break;
5294 case 8: hFun = (void*)&MC_(helperc_b_load8);
5295 hName = "MC_(helperc_b_load8)";
5296 break;
5297 case 16: hFun = (void*)&MC_(helperc_b_load16);
5298 hName = "MC_(helperc_b_load16)";
5299 break;
5300 default:
5301 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
5302 tl_assert(0);
5303 }
5304 di = unsafeIRDirty_1_N(
5305 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
5306 mkIRExprVec_1( ea )
5307 );
5308 /* no need to mess with any annotations. This call accesses
5309 neither guest state nor guest memory. */
5310 stmt( 'B', mce, IRStmt_Dirty(di) );
5311 if (mce->hWordTy == Ity_I64) {
5312 /* 64-bit host */
sewardj1c0ce7a2009-07-01 08:10:49 +00005313 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00005314 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
5315 return mkexpr(bTmp32);
5316 } else {
5317 /* 32-bit host */
5318 return mkexpr(bTmp);
5319 }
5320}
sewardj1c0ce7a2009-07-01 08:10:49 +00005321
5322/* Generate a shadow store. guard :: Ity_I1 controls whether the
5323 store really happens; NULL means it unconditionally does. */
sewardj7cf4e6b2008-05-01 20:24:26 +00005324static void gen_store_b ( MCEnv* mce, Int szB,
sewardj1c0ce7a2009-07-01 08:10:49 +00005325 IRAtom* baseaddr, Int offset, IRAtom* dataB,
5326 IRAtom* guard )
sewardj7cf4e6b2008-05-01 20:24:26 +00005327{
5328 void* hFun;
5329 HChar* hName;
5330 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00005331 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00005332 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5333 IRAtom* ea = baseaddr;
sewardj1c0ce7a2009-07-01 08:10:49 +00005334 if (guard) {
5335 tl_assert(isOriginalAtom(mce, guard));
5336 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
5337 }
sewardj7cf4e6b2008-05-01 20:24:26 +00005338 if (offset != 0) {
5339 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5340 : mkU64( (Long)(Int)offset );
5341 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5342 }
5343 if (mce->hWordTy == Ity_I64)
5344 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
5345
5346 switch (szB) {
5347 case 1: hFun = (void*)&MC_(helperc_b_store1);
5348 hName = "MC_(helperc_b_store1)";
5349 break;
5350 case 2: hFun = (void*)&MC_(helperc_b_store2);
5351 hName = "MC_(helperc_b_store2)";
5352 break;
5353 case 4: hFun = (void*)&MC_(helperc_b_store4);
5354 hName = "MC_(helperc_b_store4)";
5355 break;
5356 case 8: hFun = (void*)&MC_(helperc_b_store8);
5357 hName = "MC_(helperc_b_store8)";
5358 break;
5359 case 16: hFun = (void*)&MC_(helperc_b_store16);
5360 hName = "MC_(helperc_b_store16)";
5361 break;
5362 default:
5363 tl_assert(0);
5364 }
5365 di = unsafeIRDirty_0_N( 2/*regparms*/,
5366 hName, VG_(fnptr_to_fnentry)( hFun ),
5367 mkIRExprVec_2( ea, dataB )
5368 );
5369 /* no need to mess with any annotations. This call accesses
5370 neither guest state nor guest memory. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005371 if (guard) di->guard = guard;
sewardj7cf4e6b2008-05-01 20:24:26 +00005372 stmt( 'B', mce, IRStmt_Dirty(di) );
5373}
5374
5375static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005376 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00005377 if (eTy == Ity_I64)
5378 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
5379 if (eTy == Ity_I32)
5380 return e;
5381 tl_assert(0);
5382}
5383
5384static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005385 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00005386 tl_assert(eTy == Ity_I32);
5387 if (dstTy == Ity_I64)
5388 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
5389 tl_assert(0);
5390}
5391
sewardjdb5907d2009-11-26 17:20:21 +00005392
sewardj7cf4e6b2008-05-01 20:24:26 +00005393static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
5394{
5395 tl_assert(MC_(clo_mc_level) == 3);
5396
5397 switch (e->tag) {
5398
5399 case Iex_GetI: {
5400 IRRegArray* descr_b;
5401 IRAtom *t1, *t2, *t3, *t4;
5402 IRRegArray* descr = e->Iex.GetI.descr;
5403 IRType equivIntTy
5404 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
5405 /* If this array is unshadowable for whatever reason, use the
5406 usual approximation. */
5407 if (equivIntTy == Ity_INVALID)
5408 return mkU32(0);
5409 tl_assert(sizeofIRType(equivIntTy) >= 4);
5410 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
5411 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
5412 equivIntTy, descr->nElems );
5413 /* Do a shadow indexed get of the same size, giving t1. Take
5414 the bottom 32 bits of it, giving t2. Compute into t3 the
5415 origin for the index (almost certainly zero, but there's
5416 no harm in being completely general here, since iropt will
5417 remove any useless code), and fold it in, giving a final
5418 value t4. */
5419 t1 = assignNew( 'B', mce, equivIntTy,
5420 IRExpr_GetI( descr_b, e->Iex.GetI.ix,
5421 e->Iex.GetI.bias ));
5422 t2 = narrowTo32( mce, t1 );
5423 t3 = schemeE( mce, e->Iex.GetI.ix );
5424 t4 = gen_maxU32( mce, t2, t3 );
5425 return t4;
5426 }
5427 case Iex_CCall: {
5428 Int i;
5429 IRAtom* here;
5430 IRExpr** args = e->Iex.CCall.args;
5431 IRAtom* curr = mkU32(0);
5432 for (i = 0; args[i]; i++) {
5433 tl_assert(i < 32);
5434 tl_assert(isOriginalAtom(mce, args[i]));
5435 /* Only take notice of this arg if the callee's
5436 mc-exclusion mask does not say it is to be excluded. */
5437 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
5438 /* the arg is to be excluded from definedness checking.
5439 Do nothing. */
5440 if (0) VG_(printf)("excluding %s(%d)\n",
5441 e->Iex.CCall.cee->name, i);
5442 } else {
5443 /* calculate the arg's definedness, and pessimistically
5444 merge it in. */
5445 here = schemeE( mce, args[i] );
5446 curr = gen_maxU32( mce, curr, here );
5447 }
5448 }
5449 return curr;
5450 }
5451 case Iex_Load: {
5452 Int dszB;
5453 dszB = sizeofIRType(e->Iex.Load.ty);
5454 /* assert that the B value for the address is already
5455 available (somewhere) */
5456 tl_assert(isIRAtom(e->Iex.Load.addr));
5457 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
5458 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
5459 }
5460 case Iex_Mux0X: {
5461 IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond );
5462 IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 );
5463 IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX );
5464 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
5465 }
5466 case Iex_Qop: {
5467 IRAtom* b1 = schemeE( mce, e->Iex.Qop.arg1 );
5468 IRAtom* b2 = schemeE( mce, e->Iex.Qop.arg2 );
5469 IRAtom* b3 = schemeE( mce, e->Iex.Qop.arg3 );
5470 IRAtom* b4 = schemeE( mce, e->Iex.Qop.arg4 );
5471 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
5472 gen_maxU32( mce, b3, b4 ) );
5473 }
5474 case Iex_Triop: {
5475 IRAtom* b1 = schemeE( mce, e->Iex.Triop.arg1 );
5476 IRAtom* b2 = schemeE( mce, e->Iex.Triop.arg2 );
5477 IRAtom* b3 = schemeE( mce, e->Iex.Triop.arg3 );
5478 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
5479 }
5480 case Iex_Binop: {
sewardjafed4c52009-07-12 13:00:17 +00005481 switch (e->Iex.Binop.op) {
5482 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
5483 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
5484 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
5485 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
5486 /* Just say these all produce a defined result,
5487 regardless of their arguments. See
5488 COMMENT_ON_CasCmpEQ in this file. */
5489 return mkU32(0);
5490 default: {
5491 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
5492 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
5493 return gen_maxU32( mce, b1, b2 );
5494 }
5495 }
5496 tl_assert(0);
5497 /*NOTREACHED*/
sewardj7cf4e6b2008-05-01 20:24:26 +00005498 }
5499 case Iex_Unop: {
5500 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
5501 return b1;
5502 }
5503 case Iex_Const:
5504 return mkU32(0);
5505 case Iex_RdTmp:
5506 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
5507 case Iex_Get: {
5508 Int b_offset = MC_(get_otrack_shadow_offset)(
5509 e->Iex.Get.offset,
5510 sizeofIRType(e->Iex.Get.ty)
5511 );
5512 tl_assert(b_offset >= -1
5513 && b_offset <= mce->layout->total_sizeB -4);
5514 if (b_offset >= 0) {
5515 /* FIXME: this isn't an atom! */
5516 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
5517 Ity_I32 );
5518 }
5519 return mkU32(0);
5520 }
5521 default:
5522 VG_(printf)("mc_translate.c: schemeE: unhandled: ");
5523 ppIRExpr(e);
5524 VG_(tool_panic)("memcheck:schemeE");
5525 }
5526}
5527
sewardjdb5907d2009-11-26 17:20:21 +00005528
sewardj7cf4e6b2008-05-01 20:24:26 +00005529static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
5530{
5531 // This is a hacked version of do_shadow_Dirty
njn4c245e52009-03-15 23:25:38 +00005532 Int i, n, toDo, gSz, gOff;
sewardj7cf4e6b2008-05-01 20:24:26 +00005533 IRAtom *here, *curr;
5534 IRTemp dst;
sewardj7cf4e6b2008-05-01 20:24:26 +00005535
5536 /* First check the guard. */
5537 curr = schemeE( mce, d->guard );
5538
5539 /* Now round up all inputs and maxU32 over them. */
5540
5541 /* Inputs: unmasked args */
5542 for (i = 0; d->args[i]; i++) {
5543 if (d->cee->mcx_mask & (1<<i)) {
5544 /* ignore this arg */
5545 } else {
5546 here = schemeE( mce, d->args[i] );
5547 curr = gen_maxU32( mce, curr, here );
5548 }
5549 }
5550
5551 /* Inputs: guest state that we read. */
5552 for (i = 0; i < d->nFxState; i++) {
5553 tl_assert(d->fxState[i].fx != Ifx_None);
5554 if (d->fxState[i].fx == Ifx_Write)
5555 continue;
5556
5557 /* Ignore any sections marked as 'always defined'. */
5558 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
5559 if (0)
5560 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
5561 d->fxState[i].offset, d->fxState[i].size );
5562 continue;
5563 }
5564
5565 /* This state element is read or modified. So we need to
5566 consider it. If larger than 4 bytes, deal with it in 4-byte
5567 chunks. */
5568 gSz = d->fxState[i].size;
5569 gOff = d->fxState[i].offset;
5570 tl_assert(gSz > 0);
5571 while (True) {
5572 Int b_offset;
5573 if (gSz == 0) break;
5574 n = gSz <= 4 ? gSz : 4;
5575 /* update 'curr' with maxU32 of the state slice
5576 gOff .. gOff+n-1 */
5577 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
5578 if (b_offset != -1) {
5579 here = assignNew( 'B',mce,
5580 Ity_I32,
5581 IRExpr_Get(b_offset + 2*mce->layout->total_sizeB,
5582 Ity_I32));
5583 curr = gen_maxU32( mce, curr, here );
5584 }
5585 gSz -= n;
5586 gOff += n;
5587 }
5588
5589 }
5590
5591 /* Inputs: memory */
5592
5593 if (d->mFx != Ifx_None) {
5594 /* Because we may do multiple shadow loads/stores from the same
5595 base address, it's best to do a single test of its
5596 definedness right now. Post-instrumentation optimisation
5597 should remove all but this test. */
5598 tl_assert(d->mAddr);
5599 here = schemeE( mce, d->mAddr );
5600 curr = gen_maxU32( mce, curr, here );
5601 }
5602
5603 /* Deal with memory inputs (reads or modifies) */
5604 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00005605 toDo = d->mSize;
5606 /* chew off 32-bit chunks. We don't care about the endianness
5607 since it's all going to be condensed down to a single bit,
5608 but nevertheless choose an endianness which is hopefully
5609 native to the platform. */
5610 while (toDo >= 4) {
5611 here = gen_load_b( mce, 4, d->mAddr, d->mSize - toDo );
5612 curr = gen_maxU32( mce, curr, here );
5613 toDo -= 4;
5614 }
sewardj8c93fcc2008-10-30 13:08:31 +00005615 /* handle possible 16-bit excess */
5616 while (toDo >= 2) {
5617 here = gen_load_b( mce, 2, d->mAddr, d->mSize - toDo );
5618 curr = gen_maxU32( mce, curr, here );
5619 toDo -= 2;
5620 }
5621 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
sewardj7cf4e6b2008-05-01 20:24:26 +00005622 }
5623
5624 /* Whew! So curr is a 32-bit B-value which should give an origin
5625 of some use if any of the inputs to the helper are undefined.
5626 Now we need to re-distribute the results to all destinations. */
5627
5628 /* Outputs: the destination temporary, if there is one. */
5629 if (d->tmp != IRTemp_INVALID) {
5630 dst = findShadowTmpB(mce, d->tmp);
5631 assign( 'V', mce, dst, curr );
5632 }
5633
5634 /* Outputs: guest state that we write or modify. */
5635 for (i = 0; i < d->nFxState; i++) {
5636 tl_assert(d->fxState[i].fx != Ifx_None);
5637 if (d->fxState[i].fx == Ifx_Read)
5638 continue;
5639
5640 /* Ignore any sections marked as 'always defined'. */
5641 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
5642 continue;
5643
5644 /* This state element is written or modified. So we need to
5645 consider it. If larger than 4 bytes, deal with it in 4-byte
5646 chunks. */
5647 gSz = d->fxState[i].size;
5648 gOff = d->fxState[i].offset;
5649 tl_assert(gSz > 0);
5650 while (True) {
5651 Int b_offset;
5652 if (gSz == 0) break;
5653 n = gSz <= 4 ? gSz : 4;
5654 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
5655 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
5656 if (b_offset != -1) {
5657 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
5658 curr ));
5659 }
5660 gSz -= n;
5661 gOff += n;
5662 }
5663 }
5664
5665 /* Outputs: memory that we write or modify. Same comments about
5666 endianness as above apply. */
5667 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00005668 toDo = d->mSize;
5669 /* chew off 32-bit chunks */
5670 while (toDo >= 4) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005671 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
5672 NULL/*guard*/ );
sewardj7cf4e6b2008-05-01 20:24:26 +00005673 toDo -= 4;
5674 }
sewardj8c93fcc2008-10-30 13:08:31 +00005675 /* handle possible 16-bit excess */
5676 while (toDo >= 2) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005677 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
5678 NULL/*guard*/ );
sewardj8c93fcc2008-10-30 13:08:31 +00005679 toDo -= 2;
5680 }
5681 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
sewardj7cf4e6b2008-05-01 20:24:26 +00005682 }
sewardj7cf4e6b2008-05-01 20:24:26 +00005683}
5684
sewardjdb5907d2009-11-26 17:20:21 +00005685
5686static void do_origins_Store ( MCEnv* mce,
5687 IREndness stEnd,
5688 IRExpr* stAddr,
5689 IRExpr* stData )
5690{
5691 Int dszB;
5692 IRAtom* dataB;
5693 /* assert that the B value for the address is already available
5694 (somewhere), since the call to schemeE will want to see it.
5695 XXXX how does this actually ensure that?? */
5696 tl_assert(isIRAtom(stAddr));
5697 tl_assert(isIRAtom(stData));
5698 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
5699 dataB = schemeE( mce, stData );
5700 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB,
5701 NULL/*guard*/ );
5702}
5703
5704
sewardj7cf4e6b2008-05-01 20:24:26 +00005705static void schemeS ( MCEnv* mce, IRStmt* st )
5706{
5707 tl_assert(MC_(clo_mc_level) == 3);
5708
5709 switch (st->tag) {
5710
5711 case Ist_AbiHint:
5712 /* The value-check instrumenter handles this - by arranging
5713 to pass the address of the next instruction to
5714 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
5715 happen for origin tracking w.r.t. AbiHints. So there is
5716 nothing to do here. */
5717 break;
5718
5719 case Ist_PutI: {
5720 IRRegArray* descr_b;
5721 IRAtom *t1, *t2, *t3, *t4;
5722 IRRegArray* descr = st->Ist.PutI.descr;
5723 IRType equivIntTy
5724 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
5725 /* If this array is unshadowable for whatever reason,
5726 generate no code. */
5727 if (equivIntTy == Ity_INVALID)
5728 break;
5729 tl_assert(sizeofIRType(equivIntTy) >= 4);
5730 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
5731 descr_b
5732 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
5733 equivIntTy, descr->nElems );
5734 /* Compute a value to Put - the conjoinment of the origin for
5735 the data to be Put-ted (obviously) and of the index value
5736 (not so obviously). */
5737 t1 = schemeE( mce, st->Ist.PutI.data );
5738 t2 = schemeE( mce, st->Ist.PutI.ix );
5739 t3 = gen_maxU32( mce, t1, t2 );
5740 t4 = zWidenFrom32( mce, equivIntTy, t3 );
5741 stmt( 'B', mce, IRStmt_PutI( descr_b, st->Ist.PutI.ix,
5742 st->Ist.PutI.bias, t4 ));
5743 break;
5744 }
sewardjdb5907d2009-11-26 17:20:21 +00005745
sewardj7cf4e6b2008-05-01 20:24:26 +00005746 case Ist_Dirty:
5747 do_origins_Dirty( mce, st->Ist.Dirty.details );
5748 break;
sewardjdb5907d2009-11-26 17:20:21 +00005749
5750 case Ist_Store:
5751 do_origins_Store( mce, st->Ist.Store.end,
5752 st->Ist.Store.addr,
5753 st->Ist.Store.data );
5754 break;
5755
5756 case Ist_LLSC: {
5757 /* In short: treat a load-linked like a normal load followed
5758 by an assignment of the loaded (shadow) data the result
5759 temporary. Treat a store-conditional like a normal store,
5760 and mark the result temporary as defined. */
5761 if (st->Ist.LLSC.storedata == NULL) {
5762 /* Load Linked */
5763 IRType resTy
5764 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
5765 IRExpr* vanillaLoad
5766 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
5767 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
5768 || resTy == Ity_I16 || resTy == Ity_I8);
5769 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
5770 schemeE(mce, vanillaLoad));
5771 } else {
5772 /* Store conditional */
5773 do_origins_Store( mce, st->Ist.LLSC.end,
5774 st->Ist.LLSC.addr,
5775 st->Ist.LLSC.storedata );
5776 /* For the rationale behind this, see comments at the
5777 place where the V-shadow for .result is constructed, in
5778 do_shadow_LLSC. In short, we regard .result as
5779 always-defined. */
5780 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
5781 mkU32(0) );
sewardj1c0ce7a2009-07-01 08:10:49 +00005782 }
sewardj7cf4e6b2008-05-01 20:24:26 +00005783 break;
5784 }
sewardjdb5907d2009-11-26 17:20:21 +00005785
sewardj7cf4e6b2008-05-01 20:24:26 +00005786 case Ist_Put: {
5787 Int b_offset
5788 = MC_(get_otrack_shadow_offset)(
5789 st->Ist.Put.offset,
sewardj1c0ce7a2009-07-01 08:10:49 +00005790 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
sewardj7cf4e6b2008-05-01 20:24:26 +00005791 );
5792 if (b_offset >= 0) {
5793 /* FIXME: this isn't an atom! */
5794 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
5795 schemeE( mce, st->Ist.Put.data )) );
5796 }
5797 break;
5798 }
sewardjdb5907d2009-11-26 17:20:21 +00005799
sewardj7cf4e6b2008-05-01 20:24:26 +00005800 case Ist_WrTmp:
5801 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
5802 schemeE(mce, st->Ist.WrTmp.data) );
5803 break;
sewardjdb5907d2009-11-26 17:20:21 +00005804
sewardj7cf4e6b2008-05-01 20:24:26 +00005805 case Ist_MBE:
5806 case Ist_NoOp:
5807 case Ist_Exit:
5808 case Ist_IMark:
5809 break;
sewardjdb5907d2009-11-26 17:20:21 +00005810
sewardj7cf4e6b2008-05-01 20:24:26 +00005811 default:
5812 VG_(printf)("mc_translate.c: schemeS: unhandled: ");
5813 ppIRStmt(st);
5814 VG_(tool_panic)("memcheck:schemeS");
5815 }
5816}
5817
5818
njn25e49d8e72002-09-23 09:36:25 +00005819/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00005820/*--- end mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005821/*--------------------------------------------------------------------*/