blob: f07d90b61c4b54f16da7d2f779405aa977ef172d [file] [log] [blame]
nethercotebb1c9912004-01-04 16:43:23 +00001
njn25e49d8e72002-09-23 09:36:25 +00002/*--------------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003/*--- Instrument IR to perform memory checking operations. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00006
njn25e49d8e72002-09-23 09:36:25 +00007/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of MemCheck, a heavyweight Valgrind tool for
njnc9539842002-10-02 13:26:35 +00009 detecting memory errors.
njn25e49d8e72002-09-23 09:36:25 +000010
sewardj9eecbbb2010-05-03 21:37:12 +000011 Copyright (C) 2000-2010 Julian Seward
njn25e49d8e72002-09-23 09:36:25 +000012 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
njnc7561b92005-06-19 01:24:32 +000032#include "pub_tool_basics.h"
njn1d0825f2006-03-27 11:37:07 +000033#include "pub_tool_hashtable.h" // For mc_include.h
njn132bfcc2005-06-04 19:16:06 +000034#include "pub_tool_libcassert.h"
njn36a20fa2005-06-03 03:08:39 +000035#include "pub_tool_libcprint.h"
njnc7561b92005-06-19 01:24:32 +000036#include "pub_tool_tooliface.h"
sewardj53ee1fc2005-12-23 02:29:58 +000037#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
sewardj81651dc2007-08-28 06:05:20 +000038#include "pub_tool_xarray.h"
39#include "pub_tool_mallocfree.h"
40#include "pub_tool_libcbase.h"
njn25e49d8e72002-09-23 09:36:25 +000041
sewardj7cf4e6b2008-05-01 20:24:26 +000042#include "mc_include.h"
43
44
sewardj7ee7d852011-06-16 11:37:21 +000045/* FIXMEs JRS 2011-June-16.
46
47 Check the interpretation for vector narrowing and widening ops,
48 particularly the saturating ones. I suspect they are either overly
49 pessimistic and/or wrong.
50*/
51
sewardj992dff92005-10-07 11:08:55 +000052/* This file implements the Memcheck instrumentation, and in
53 particular contains the core of its undefined value detection
54 machinery. For a comprehensive background of the terminology,
55 algorithms and rationale used herein, read:
56
57 Using Valgrind to detect undefined value errors with
58 bit-precision
59
60 Julian Seward and Nicholas Nethercote
61
62 2005 USENIX Annual Technical Conference (General Track),
63 Anaheim, CA, USA, April 10-15, 2005.
njn6665ea22007-05-24 23:14:41 +000064
65 ----
66
67 Here is as good a place as any to record exactly when V bits are and
68 should be checked, why, and what function is responsible.
69
70
71 Memcheck complains when an undefined value is used:
72
73 1. In the condition of a conditional branch. Because it could cause
74 incorrect control flow, and thus cause incorrect externally-visible
75 behaviour. [mc_translate.c:complainIfUndefined]
76
77 2. As an argument to a system call, or as the value that specifies
78 the system call number. Because it could cause an incorrect
79 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
80
81 3. As the address in a load or store. Because it could cause an
82 incorrect value to be used later, which could cause externally-visible
83 behaviour (eg. via incorrect control flow or an incorrect system call
84 argument) [complainIfUndefined]
85
86 4. As the target address of a branch. Because it could cause incorrect
87 control flow. [complainIfUndefined]
88
89 5. As an argument to setenv, unsetenv, or putenv. Because it could put
90 an incorrect value into the external environment.
91 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
92
93 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
94 [complainIfUndefined]
95
96 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
97 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
98 requested it. [in memcheck.h]
99
100
101 Memcheck also complains, but should not, when an undefined value is used:
102
103 8. As the shift value in certain SIMD shift operations (but not in the
104 standard integer shift operations). This inconsistency is due to
105 historical reasons.) [complainIfUndefined]
106
107
108 Memcheck does not complain, but should, when an undefined value is used:
109
110 9. As an input to a client request. Because the client request may
111 affect the visible behaviour -- see bug #144362 for an example
112 involving the malloc replacements in vg_replace_malloc.c and
113 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
114 isn't identified. That bug report also has some info on how to solve
115 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
116
117
118 In practice, 1 and 2 account for the vast majority of cases.
sewardj992dff92005-10-07 11:08:55 +0000119*/
120
sewardj95448072004-11-22 20:19:51 +0000121/*------------------------------------------------------------*/
122/*--- Forward decls ---*/
123/*------------------------------------------------------------*/
124
125struct _MCEnv;
126
sewardj7cf4e6b2008-05-01 20:24:26 +0000127static IRType shadowTypeV ( IRType ty );
sewardj95448072004-11-22 20:19:51 +0000128static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
sewardjafa617b2008-07-22 09:59:48 +0000129static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
sewardj95448072004-11-22 20:19:51 +0000130
sewardjb5b87402011-03-07 16:05:35 +0000131static IRExpr *i128_const_zero(void);
sewardj95448072004-11-22 20:19:51 +0000132
133/*------------------------------------------------------------*/
134/*--- Memcheck running state, and tmp management. ---*/
135/*------------------------------------------------------------*/
136
sewardj1c0ce7a2009-07-01 08:10:49 +0000137/* Carries info about a particular tmp. The tmp's number is not
138 recorded, as this is implied by (equal to) its index in the tmpMap
139 in MCEnv. The tmp's type is also not recorded, as this is present
140 in MCEnv.sb->tyenv.
141
142 When .kind is Orig, .shadowV and .shadowB may give the identities
143 of the temps currently holding the associated definedness (shadowV)
144 and origin (shadowB) values, or these may be IRTemp_INVALID if code
145 to compute such values has not yet been emitted.
146
147 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
148 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
149 illogical for a shadow tmp itself to be shadowed.
150*/
151typedef
152 enum { Orig=1, VSh=2, BSh=3 }
153 TempKind;
154
155typedef
156 struct {
157 TempKind kind;
158 IRTemp shadowV;
159 IRTemp shadowB;
160 }
161 TempMapEnt;
162
163
sewardj95448072004-11-22 20:19:51 +0000164/* Carries around state during memcheck instrumentation. */
165typedef
166 struct _MCEnv {
sewardj0b9d74a2006-12-24 02:24:11 +0000167 /* MODIFIED: the superblock being constructed. IRStmts are
168 added. */
sewardj1c0ce7a2009-07-01 08:10:49 +0000169 IRSB* sb;
sewardj7cf4e6b2008-05-01 20:24:26 +0000170 Bool trace;
sewardj95448072004-11-22 20:19:51 +0000171
sewardj1c0ce7a2009-07-01 08:10:49 +0000172 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
173 current kind and possibly shadow temps for each temp in the
174 IRSB being constructed. Note that it does not contain the
175 type of each tmp. If you want to know the type, look at the
176 relevant entry in sb->tyenv. It follows that at all times
177 during the instrumentation process, the valid indices for
178 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
179 total number of Orig, V- and B- temps allocated so far.
180
181 The reason for this strange split (types in one place, all
182 other info in another) is that we need the types to be
183 attached to sb so as to make it possible to do
184 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
185 instrumentation process. */
186 XArray* /* of TempMapEnt */ tmpMap;
sewardj95448072004-11-22 20:19:51 +0000187
sewardjd5204dc2004-12-31 01:16:11 +0000188 /* MODIFIED: indicates whether "bogus" literals have so far been
189 found. Starts off False, and may change to True. */
190 Bool bogusLiterals;
191
sewardj95448072004-11-22 20:19:51 +0000192 /* READONLY: the guest layout. This indicates which parts of
193 the guest state should be regarded as 'always defined'. */
194 VexGuestLayout* layout;
sewardj634ba772006-10-15 12:47:37 +0000195
sewardj95448072004-11-22 20:19:51 +0000196 /* READONLY: the host word type. Needed for constructing
197 arguments of type 'HWord' to be passed to helper functions.
198 Ity_I32 or Ity_I64 only. */
199 IRType hWordTy;
200 }
201 MCEnv;
202
203/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
204 demand), as they are encountered. This is for two reasons.
205
206 (1) (less important reason): Many original tmps are unused due to
207 initial IR optimisation, and we do not want to spaces in tables
208 tracking them.
209
210 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
211 table indexed [0 .. n_types-1], which gives the current shadow for
212 each original tmp, or INVALID_IRTEMP if none is so far assigned.
213 It is necessary to support making multiple assignments to a shadow
214 -- specifically, after testing a shadow for definedness, it needs
215 to be made defined. But IR's SSA property disallows this.
216
217 (2) (more important reason): Therefore, when a shadow needs to get
218 a new value, a new temporary is created, the value is assigned to
219 that, and the tmpMap is updated to reflect the new binding.
220
221 A corollary is that if the tmpMap maps a given tmp to
sewardjf1962d32006-10-19 13:22:16 +0000222 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
sewardj95448072004-11-22 20:19:51 +0000223 there's a read-before-write error in the original tmps. The IR
224 sanity checker should catch all such anomalies, however.
njn25e49d8e72002-09-23 09:36:25 +0000225*/
sewardj95448072004-11-22 20:19:51 +0000226
sewardj1c0ce7a2009-07-01 08:10:49 +0000227/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
228 both the table in mce->sb and to our auxiliary mapping. Note that
229 newTemp may cause mce->tmpMap to resize, hence previous results
230 from VG_(indexXA)(mce->tmpMap) are invalidated. */
231static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
232{
233 Word newIx;
234 TempMapEnt ent;
235 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
236 ent.kind = kind;
237 ent.shadowV = IRTemp_INVALID;
238 ent.shadowB = IRTemp_INVALID;
239 newIx = VG_(addToXA)( mce->tmpMap, &ent );
240 tl_assert(newIx == (Word)tmp);
241 return tmp;
242}
243
244
sewardj95448072004-11-22 20:19:51 +0000245/* Find the tmp currently shadowing the given original tmp. If none
246 so far exists, allocate one. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000247static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000248{
sewardj1c0ce7a2009-07-01 08:10:49 +0000249 TempMapEnt* ent;
250 /* VG_(indexXA) range-checks 'orig', hence no need to check
251 here. */
252 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
253 tl_assert(ent->kind == Orig);
254 if (ent->shadowV == IRTemp_INVALID) {
255 IRTemp tmpV
256 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
257 /* newTemp may cause mce->tmpMap to resize, hence previous results
258 from VG_(indexXA) are invalid. */
259 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
260 tl_assert(ent->kind == Orig);
261 tl_assert(ent->shadowV == IRTemp_INVALID);
262 ent->shadowV = tmpV;
njn25e49d8e72002-09-23 09:36:25 +0000263 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000264 return ent->shadowV;
njn25e49d8e72002-09-23 09:36:25 +0000265}
266
sewardj95448072004-11-22 20:19:51 +0000267/* Allocate a new shadow for the given original tmp. This means any
268 previous shadow is abandoned. This is needed because it is
269 necessary to give a new value to a shadow once it has been tested
270 for undefinedness, but unfortunately IR's SSA property disallows
271 this. Instead we must abandon the old shadow, allocate a new one
sewardj1c0ce7a2009-07-01 08:10:49 +0000272 and use that instead.
273
274 This is the same as findShadowTmpV, except we don't bother to see
275 if a shadow temp already existed -- we simply allocate a new one
276 regardless. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000277static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000278{
sewardj1c0ce7a2009-07-01 08:10:49 +0000279 TempMapEnt* ent;
280 /* VG_(indexXA) range-checks 'orig', hence no need to check
281 here. */
282 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
283 tl_assert(ent->kind == Orig);
284 if (1) {
285 IRTemp tmpV
286 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
287 /* newTemp may cause mce->tmpMap to resize, hence previous results
288 from VG_(indexXA) are invalid. */
289 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
290 tl_assert(ent->kind == Orig);
291 ent->shadowV = tmpV;
292 }
sewardj95448072004-11-22 20:19:51 +0000293}
294
295
296/*------------------------------------------------------------*/
297/*--- IRAtoms -- a subset of IRExprs ---*/
298/*------------------------------------------------------------*/
299
300/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
sewardj710d6c22005-03-20 18:55:15 +0000301 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
sewardj95448072004-11-22 20:19:51 +0000302 input, most of this code deals in atoms. Usefully, a value atom
303 always has a V-value which is also an atom: constants are shadowed
304 by constants, and temps are shadowed by the corresponding shadow
305 temporary. */
306
307typedef IRExpr IRAtom;
308
309/* (used for sanity checks only): is this an atom which looks
310 like it's from original code? */
311static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
312{
313 if (a1->tag == Iex_Const)
314 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000315 if (a1->tag == Iex_RdTmp) {
316 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
317 return ent->kind == Orig;
318 }
sewardj95448072004-11-22 20:19:51 +0000319 return False;
320}
321
322/* (used for sanity checks only): is this an atom which looks
323 like it's from shadow code? */
324static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
325{
326 if (a1->tag == Iex_Const)
327 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000328 if (a1->tag == Iex_RdTmp) {
329 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
330 return ent->kind == VSh || ent->kind == BSh;
331 }
sewardj95448072004-11-22 20:19:51 +0000332 return False;
333}
334
335/* (used for sanity checks only): check that both args are atoms and
336 are identically-kinded. */
337static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
338{
sewardj0b9d74a2006-12-24 02:24:11 +0000339 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +0000340 return True;
sewardjbef552a2005-08-30 12:54:36 +0000341 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
sewardj95448072004-11-22 20:19:51 +0000342 return True;
343 return False;
344}
345
346
347/*------------------------------------------------------------*/
348/*--- Type management ---*/
349/*------------------------------------------------------------*/
350
351/* Shadow state is always accessed using integer types. This returns
352 an integer type with the same size (as per sizeofIRType) as the
353 given type. The only valid shadow types are Bit, I8, I16, I32,
sewardjb5b87402011-03-07 16:05:35 +0000354 I64, I128, V128. */
sewardj95448072004-11-22 20:19:51 +0000355
sewardj7cf4e6b2008-05-01 20:24:26 +0000356static IRType shadowTypeV ( IRType ty )
sewardj95448072004-11-22 20:19:51 +0000357{
358 switch (ty) {
359 case Ity_I1:
360 case Ity_I8:
361 case Ity_I16:
362 case Ity_I32:
sewardj6cf40ff2005-04-20 22:31:26 +0000363 case Ity_I64:
364 case Ity_I128: return ty;
sewardj3245c912004-12-10 14:58:26 +0000365 case Ity_F32: return Ity_I32;
366 case Ity_F64: return Ity_I64;
sewardjb5b87402011-03-07 16:05:35 +0000367 case Ity_F128: return Ity_I128;
sewardj3245c912004-12-10 14:58:26 +0000368 case Ity_V128: return Ity_V128;
sewardj95448072004-11-22 20:19:51 +0000369 default: ppIRType(ty);
sewardj7cf4e6b2008-05-01 20:24:26 +0000370 VG_(tool_panic)("memcheck:shadowTypeV");
sewardj95448072004-11-22 20:19:51 +0000371 }
372}
373
374/* Produce a 'defined' value of the given shadow type. Should only be
375 supplied shadow types (Bit/I8/I16/I32/UI64). */
376static IRExpr* definedOfType ( IRType ty ) {
377 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +0000378 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
379 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
380 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
381 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
382 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
sewardjb5b87402011-03-07 16:05:35 +0000383 case Ity_I128: return i128_const_zero();
sewardj170ee212004-12-10 18:57:51 +0000384 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
sewardjf1962d32006-10-19 13:22:16 +0000385 default: VG_(tool_panic)("memcheck:definedOfType");
njn25e49d8e72002-09-23 09:36:25 +0000386 }
387}
388
389
sewardj95448072004-11-22 20:19:51 +0000390/*------------------------------------------------------------*/
391/*--- Constructing IR fragments ---*/
392/*------------------------------------------------------------*/
393
sewardj95448072004-11-22 20:19:51 +0000394/* add stmt to a bb */
sewardj7cf4e6b2008-05-01 20:24:26 +0000395static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
396 if (mce->trace) {
397 VG_(printf)(" %c: ", cat);
398 ppIRStmt(st);
399 VG_(printf)("\n");
400 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000401 addStmtToIRSB(mce->sb, st);
sewardj7cf4e6b2008-05-01 20:24:26 +0000402}
403
404/* assign value to tmp */
405static inline
406void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
sewardj1c0ce7a2009-07-01 08:10:49 +0000407 stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
sewardj7cf4e6b2008-05-01 20:24:26 +0000408}
sewardj95448072004-11-22 20:19:51 +0000409
410/* build various kinds of expressions */
sewardj57f92b02010-08-22 11:54:14 +0000411#define triop(_op, _arg1, _arg2, _arg3) \
412 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
sewardj95448072004-11-22 20:19:51 +0000413#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
414#define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
415#define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
416#define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
417#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
418#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
sewardj170ee212004-12-10 18:57:51 +0000419#define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
sewardj0b9d74a2006-12-24 02:24:11 +0000420#define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
sewardj95448072004-11-22 20:19:51 +0000421
sewardj7cf4e6b2008-05-01 20:24:26 +0000422/* Bind the given expression to a new temporary, and return the
sewardj95448072004-11-22 20:19:51 +0000423 temporary. This effectively converts an arbitrary expression into
sewardj7cf4e6b2008-05-01 20:24:26 +0000424 an atom.
425
426 'ty' is the type of 'e' and hence the type that the new temporary
sewardj1c0ce7a2009-07-01 08:10:49 +0000427 needs to be. But passing it in is redundant, since we can deduce
428 the type merely by inspecting 'e'. So at least use that fact to
429 assert that the two types agree. */
430static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
431{
432 TempKind k;
433 IRTemp t;
434 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +0000435 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
sewardj1c0ce7a2009-07-01 08:10:49 +0000436 switch (cat) {
437 case 'V': k = VSh; break;
438 case 'B': k = BSh; break;
439 case 'C': k = Orig; break;
440 /* happens when we are making up new "orig"
441 expressions, for IRCAS handling */
442 default: tl_assert(0);
443 }
444 t = newTemp(mce, ty, k);
sewardj7cf4e6b2008-05-01 20:24:26 +0000445 assign(cat, mce, t, e);
sewardj95448072004-11-22 20:19:51 +0000446 return mkexpr(t);
447}
448
449
450/*------------------------------------------------------------*/
sewardjb5b87402011-03-07 16:05:35 +0000451/*--- Helper functions for 128-bit ops ---*/
452/*------------------------------------------------------------*/
453static IRExpr *i128_const_zero(void)
454{
455 return binop(Iop_64HLto128, IRExpr_Const(IRConst_U64(0)),
456 IRExpr_Const(IRConst_U64(0)));
457}
458
459/* There are no 128-bit loads and/or stores. So we do not need to worry
460 about that in expr2vbits_Load */
461
462/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +0000463/*--- Constructing definedness primitive ops ---*/
464/*------------------------------------------------------------*/
465
466/* --------- Defined-if-either-defined --------- */
467
468static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
469 tl_assert(isShadowAtom(mce,a1));
470 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000471 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000472}
473
474static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
475 tl_assert(isShadowAtom(mce,a1));
476 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000477 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000478}
479
480static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
481 tl_assert(isShadowAtom(mce,a1));
482 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000483 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000484}
485
sewardj7010f6e2004-12-10 13:35:22 +0000486static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
487 tl_assert(isShadowAtom(mce,a1));
488 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000489 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
sewardj7010f6e2004-12-10 13:35:22 +0000490}
491
sewardj20d38f22005-02-07 23:50:18 +0000492static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj170ee212004-12-10 18:57:51 +0000493 tl_assert(isShadowAtom(mce,a1));
494 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000495 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
sewardj170ee212004-12-10 18:57:51 +0000496}
497
sewardj95448072004-11-22 20:19:51 +0000498/* --------- Undefined-if-either-undefined --------- */
499
500static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
501 tl_assert(isShadowAtom(mce,a1));
502 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000503 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000504}
505
506static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
507 tl_assert(isShadowAtom(mce,a1));
508 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000509 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000510}
511
512static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
513 tl_assert(isShadowAtom(mce,a1));
514 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000515 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000516}
517
518static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
519 tl_assert(isShadowAtom(mce,a1));
520 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000521 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000522}
523
sewardjb5b87402011-03-07 16:05:35 +0000524static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
525 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
526 tl_assert(isShadowAtom(mce,a1));
527 tl_assert(isShadowAtom(mce,a2));
528 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
529 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
530 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
531 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
532 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
533 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
534
535 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
536}
537
sewardj20d38f22005-02-07 23:50:18 +0000538static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj3245c912004-12-10 14:58:26 +0000539 tl_assert(isShadowAtom(mce,a1));
540 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000541 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
sewardj3245c912004-12-10 14:58:26 +0000542}
543
sewardje50a1b12004-12-17 01:24:54 +0000544static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
sewardj95448072004-11-22 20:19:51 +0000545 switch (vty) {
sewardje50a1b12004-12-17 01:24:54 +0000546 case Ity_I8: return mkUifU8(mce, a1, a2);
sewardja1d93302004-12-12 16:45:06 +0000547 case Ity_I16: return mkUifU16(mce, a1, a2);
548 case Ity_I32: return mkUifU32(mce, a1, a2);
549 case Ity_I64: return mkUifU64(mce, a1, a2);
sewardjb5b87402011-03-07 16:05:35 +0000550 case Ity_I128: return mkUifU128(mce, a1, a2);
sewardj20d38f22005-02-07 23:50:18 +0000551 case Ity_V128: return mkUifUV128(mce, a1, a2);
sewardj95448072004-11-22 20:19:51 +0000552 default:
553 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
554 VG_(tool_panic)("memcheck:mkUifU");
njn25e49d8e72002-09-23 09:36:25 +0000555 }
556}
557
sewardj95448072004-11-22 20:19:51 +0000558/* --------- The Left-family of operations. --------- */
njn25e49d8e72002-09-23 09:36:25 +0000559
sewardj95448072004-11-22 20:19:51 +0000560static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
561 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000562 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
sewardj95448072004-11-22 20:19:51 +0000563}
564
565static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
566 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000567 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
sewardj95448072004-11-22 20:19:51 +0000568}
569
570static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
571 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000572 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
sewardj95448072004-11-22 20:19:51 +0000573}
574
sewardj681be302005-01-15 20:43:58 +0000575static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
576 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000577 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
sewardj681be302005-01-15 20:43:58 +0000578}
579
sewardj95448072004-11-22 20:19:51 +0000580/* --------- 'Improvement' functions for AND/OR. --------- */
581
582/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
583 defined (0); all other -> undefined (1).
584*/
585static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
njn25e49d8e72002-09-23 09:36:25 +0000586{
sewardj95448072004-11-22 20:19:51 +0000587 tl_assert(isOriginalAtom(mce, data));
588 tl_assert(isShadowAtom(mce, vbits));
589 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000590 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000591}
njn25e49d8e72002-09-23 09:36:25 +0000592
sewardj95448072004-11-22 20:19:51 +0000593static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
594{
595 tl_assert(isOriginalAtom(mce, data));
596 tl_assert(isShadowAtom(mce, vbits));
597 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000598 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000599}
njn25e49d8e72002-09-23 09:36:25 +0000600
sewardj95448072004-11-22 20:19:51 +0000601static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
602{
603 tl_assert(isOriginalAtom(mce, data));
604 tl_assert(isShadowAtom(mce, vbits));
605 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000606 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000607}
njn25e49d8e72002-09-23 09:36:25 +0000608
sewardj7010f6e2004-12-10 13:35:22 +0000609static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
610{
611 tl_assert(isOriginalAtom(mce, data));
612 tl_assert(isShadowAtom(mce, vbits));
613 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000614 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
sewardj7010f6e2004-12-10 13:35:22 +0000615}
616
sewardj20d38f22005-02-07 23:50:18 +0000617static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000618{
619 tl_assert(isOriginalAtom(mce, data));
620 tl_assert(isShadowAtom(mce, vbits));
621 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000622 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
sewardj170ee212004-12-10 18:57:51 +0000623}
624
sewardj95448072004-11-22 20:19:51 +0000625/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
626 defined (0); all other -> undefined (1).
627*/
628static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
629{
630 tl_assert(isOriginalAtom(mce, data));
631 tl_assert(isShadowAtom(mce, vbits));
632 tl_assert(sameKindedAtoms(data, vbits));
633 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000634 'V', mce, Ity_I8,
sewardj95448072004-11-22 20:19:51 +0000635 binop(Iop_Or8,
sewardj7cf4e6b2008-05-01 20:24:26 +0000636 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
sewardj95448072004-11-22 20:19:51 +0000637 vbits) );
638}
njn25e49d8e72002-09-23 09:36:25 +0000639
sewardj95448072004-11-22 20:19:51 +0000640static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
641{
642 tl_assert(isOriginalAtom(mce, data));
643 tl_assert(isShadowAtom(mce, vbits));
644 tl_assert(sameKindedAtoms(data, vbits));
645 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000646 'V', mce, Ity_I16,
sewardj95448072004-11-22 20:19:51 +0000647 binop(Iop_Or16,
sewardj7cf4e6b2008-05-01 20:24:26 +0000648 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
sewardj95448072004-11-22 20:19:51 +0000649 vbits) );
650}
njn25e49d8e72002-09-23 09:36:25 +0000651
sewardj95448072004-11-22 20:19:51 +0000652static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
653{
654 tl_assert(isOriginalAtom(mce, data));
655 tl_assert(isShadowAtom(mce, vbits));
656 tl_assert(sameKindedAtoms(data, vbits));
657 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000658 'V', mce, Ity_I32,
sewardj95448072004-11-22 20:19:51 +0000659 binop(Iop_Or32,
sewardj7cf4e6b2008-05-01 20:24:26 +0000660 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
sewardj95448072004-11-22 20:19:51 +0000661 vbits) );
662}
663
sewardj7010f6e2004-12-10 13:35:22 +0000664static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
665{
666 tl_assert(isOriginalAtom(mce, data));
667 tl_assert(isShadowAtom(mce, vbits));
668 tl_assert(sameKindedAtoms(data, vbits));
669 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000670 'V', mce, Ity_I64,
sewardj7010f6e2004-12-10 13:35:22 +0000671 binop(Iop_Or64,
sewardj7cf4e6b2008-05-01 20:24:26 +0000672 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
sewardj7010f6e2004-12-10 13:35:22 +0000673 vbits) );
674}
675
sewardj20d38f22005-02-07 23:50:18 +0000676static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000677{
678 tl_assert(isOriginalAtom(mce, data));
679 tl_assert(isShadowAtom(mce, vbits));
680 tl_assert(sameKindedAtoms(data, vbits));
681 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000682 'V', mce, Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +0000683 binop(Iop_OrV128,
sewardj7cf4e6b2008-05-01 20:24:26 +0000684 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
sewardj170ee212004-12-10 18:57:51 +0000685 vbits) );
686}
687
sewardj95448072004-11-22 20:19:51 +0000688/* --------- Pessimising casts. --------- */
689
sewardjb5b87402011-03-07 16:05:35 +0000690/* The function returns an expression of type DST_TY. If any of the VBITS
691 is undefined (value == 1) the resulting expression has all bits set to
692 1. Otherwise, all bits are 0. */
693
sewardj95448072004-11-22 20:19:51 +0000694static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
695{
sewardj4cc684b2007-08-25 23:09:36 +0000696 IRType src_ty;
sewardj7cf97ee2004-11-28 14:25:01 +0000697 IRAtom* tmp1;
sewardj95448072004-11-22 20:19:51 +0000698 /* Note, dst_ty is a shadow type, not an original type. */
699 /* First of all, collapse vbits down to a single bit. */
700 tl_assert(isShadowAtom(mce,vbits));
sewardj1c0ce7a2009-07-01 08:10:49 +0000701 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
sewardj4cc684b2007-08-25 23:09:36 +0000702
703 /* Fast-track some common cases */
704 if (src_ty == Ity_I32 && dst_ty == Ity_I32)
sewardj7cf4e6b2008-05-01 20:24:26 +0000705 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000706
707 if (src_ty == Ity_I64 && dst_ty == Ity_I64)
sewardj7cf4e6b2008-05-01 20:24:26 +0000708 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000709
710 if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
sewardj7cf4e6b2008-05-01 20:24:26 +0000711 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
712 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
sewardj4cc684b2007-08-25 23:09:36 +0000713 }
714
715 /* Else do it the slow way .. */
716 tmp1 = NULL;
717 switch (src_ty) {
sewardj95448072004-11-22 20:19:51 +0000718 case Ity_I1:
719 tmp1 = vbits;
njn25e49d8e72002-09-23 09:36:25 +0000720 break;
sewardj95448072004-11-22 20:19:51 +0000721 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000722 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
sewardj95448072004-11-22 20:19:51 +0000723 break;
724 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000725 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
sewardj95448072004-11-22 20:19:51 +0000726 break;
727 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000728 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
sewardj95448072004-11-22 20:19:51 +0000729 break;
730 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000731 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
sewardj95448072004-11-22 20:19:51 +0000732 break;
sewardj69a13322005-04-23 01:14:51 +0000733 case Ity_I128: {
734 /* Gah. Chop it in half, OR the halves together, and compare
735 that with zero. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000736 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
737 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
738 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
739 tmp1 = assignNew('V', mce, Ity_I1,
sewardj37c31cc2005-04-26 23:49:24 +0000740 unop(Iop_CmpNEZ64, tmp4));
sewardj69a13322005-04-23 01:14:51 +0000741 break;
742 }
sewardj95448072004-11-22 20:19:51 +0000743 default:
sewardj4cc684b2007-08-25 23:09:36 +0000744 ppIRType(src_ty);
sewardj95448072004-11-22 20:19:51 +0000745 VG_(tool_panic)("mkPCastTo(1)");
746 }
747 tl_assert(tmp1);
748 /* Now widen up to the dst type. */
749 switch (dst_ty) {
750 case Ity_I1:
751 return tmp1;
752 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000753 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
sewardj95448072004-11-22 20:19:51 +0000754 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000755 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
sewardj95448072004-11-22 20:19:51 +0000756 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000757 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
sewardj95448072004-11-22 20:19:51 +0000758 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000759 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000760 case Ity_V128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000761 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
762 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000763 return tmp1;
sewardj69a13322005-04-23 01:14:51 +0000764 case Ity_I128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000765 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
766 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
sewardj69a13322005-04-23 01:14:51 +0000767 return tmp1;
sewardj95448072004-11-22 20:19:51 +0000768 default:
769 ppIRType(dst_ty);
770 VG_(tool_panic)("mkPCastTo(2)");
771 }
772}
773
sewardjd5204dc2004-12-31 01:16:11 +0000774/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
775/*
776 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
777 PCasting to Ity_U1. However, sometimes it is necessary to be more
778 accurate. The insight is that the result is defined if two
779 corresponding bits can be found, one from each argument, so that
780 both bits are defined but are different -- that makes EQ say "No"
781 and NE say "Yes". Hence, we compute an improvement term and DifD
782 it onto the "normal" (UifU) result.
783
784 The result is:
785
786 PCastTo<1> (
sewardje6f8af42005-07-06 18:48:59 +0000787 -- naive version
788 PCastTo<sz>( UifU<sz>(vxx, vyy) )
789
sewardjd5204dc2004-12-31 01:16:11 +0000790 `DifD<sz>`
sewardje6f8af42005-07-06 18:48:59 +0000791
792 -- improvement term
793 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
sewardjd5204dc2004-12-31 01:16:11 +0000794 )
sewardje6f8af42005-07-06 18:48:59 +0000795
sewardjd5204dc2004-12-31 01:16:11 +0000796 where
797 vec contains 0 (defined) bits where the corresponding arg bits
sewardje6f8af42005-07-06 18:48:59 +0000798 are defined but different, and 1 bits otherwise.
sewardjd5204dc2004-12-31 01:16:11 +0000799
sewardje6f8af42005-07-06 18:48:59 +0000800 vec = Or<sz>( vxx, // 0 iff bit defined
801 vyy, // 0 iff bit defined
802 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
803 )
804
805 If any bit of vec is 0, the result is defined and so the
806 improvement term should produce 0...0, else it should produce
807 1...1.
808
809 Hence require for the improvement term:
810
811 if vec == 1...1 then 1...1 else 0...0
812 ->
813 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
814
815 This was extensively re-analysed and checked on 6 July 05.
sewardjd5204dc2004-12-31 01:16:11 +0000816*/
817static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
818 IRType ty,
819 IRAtom* vxx, IRAtom* vyy,
820 IRAtom* xx, IRAtom* yy )
821{
sewardje6f8af42005-07-06 18:48:59 +0000822 IRAtom *naive, *vec, *improvement_term;
823 IRAtom *improved, *final_cast, *top;
824 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
sewardjd5204dc2004-12-31 01:16:11 +0000825
826 tl_assert(isShadowAtom(mce,vxx));
827 tl_assert(isShadowAtom(mce,vyy));
828 tl_assert(isOriginalAtom(mce,xx));
829 tl_assert(isOriginalAtom(mce,yy));
830 tl_assert(sameKindedAtoms(vxx,xx));
831 tl_assert(sameKindedAtoms(vyy,yy));
832
833 switch (ty) {
834 case Ity_I32:
sewardje6f8af42005-07-06 18:48:59 +0000835 opOR = Iop_Or32;
sewardjd5204dc2004-12-31 01:16:11 +0000836 opDIFD = Iop_And32;
837 opUIFU = Iop_Or32;
838 opNOT = Iop_Not32;
839 opXOR = Iop_Xor32;
840 opCMP = Iop_CmpEQ32;
841 top = mkU32(0xFFFFFFFF);
842 break;
tomcd986332005-04-26 07:44:48 +0000843 case Ity_I64:
sewardje6f8af42005-07-06 18:48:59 +0000844 opOR = Iop_Or64;
tomcd986332005-04-26 07:44:48 +0000845 opDIFD = Iop_And64;
846 opUIFU = Iop_Or64;
847 opNOT = Iop_Not64;
848 opXOR = Iop_Xor64;
849 opCMP = Iop_CmpEQ64;
sewardj37c31cc2005-04-26 23:49:24 +0000850 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
tomcd986332005-04-26 07:44:48 +0000851 break;
sewardjd5204dc2004-12-31 01:16:11 +0000852 default:
853 VG_(tool_panic)("expensiveCmpEQorNE");
854 }
855
856 naive
sewardj7cf4e6b2008-05-01 20:24:26 +0000857 = mkPCastTo(mce,ty,
858 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
sewardjd5204dc2004-12-31 01:16:11 +0000859
860 vec
861 = assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000862 'V', mce,ty,
sewardje6f8af42005-07-06 18:48:59 +0000863 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +0000864 assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
sewardjd5204dc2004-12-31 01:16:11 +0000865 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000866 'V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +0000867 unop( opNOT,
sewardj7cf4e6b2008-05-01 20:24:26 +0000868 assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
sewardjd5204dc2004-12-31 01:16:11 +0000869
sewardje6f8af42005-07-06 18:48:59 +0000870 improvement_term
sewardj7cf4e6b2008-05-01 20:24:26 +0000871 = mkPCastTo( mce,ty,
872 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
sewardjd5204dc2004-12-31 01:16:11 +0000873
874 improved
sewardj7cf4e6b2008-05-01 20:24:26 +0000875 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
sewardjd5204dc2004-12-31 01:16:11 +0000876
877 final_cast
878 = mkPCastTo( mce, Ity_I1, improved );
879
880 return final_cast;
881}
882
sewardj95448072004-11-22 20:19:51 +0000883
sewardj992dff92005-10-07 11:08:55 +0000884/* --------- Semi-accurate interpretation of CmpORD. --------- */
885
886/* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
887
888 CmpORD32S(x,y) = 1<<3 if x <s y
889 = 1<<2 if x >s y
890 = 1<<1 if x == y
891
892 and similarly the unsigned variant. The default interpretation is:
893
894 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
sewardj1bc82102005-12-23 00:16:24 +0000895 & (7<<1)
sewardj992dff92005-10-07 11:08:55 +0000896
897 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
898 are zero and therefore defined (viz, zero).
sewardja9e62a92005-10-07 12:13:21 +0000899
900 Also deal with a special case better:
901
902 CmpORD32S(x,0)
903
904 Here, bit 3 (LT) of the result is a copy of the top bit of x and
905 will be defined even if the rest of x isn't. In which case we do:
906
907 CmpORD32S#(x,x#,0,{impliedly 0}#)
sewardj1bc82102005-12-23 00:16:24 +0000908 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
909 | (x# >>u 31) << 3 -- LT# = x#[31]
sewardja9e62a92005-10-07 12:13:21 +0000910
sewardj1bc82102005-12-23 00:16:24 +0000911 Analogous handling for CmpORD64{S,U}.
sewardj992dff92005-10-07 11:08:55 +0000912*/
sewardja9e62a92005-10-07 12:13:21 +0000913static Bool isZeroU32 ( IRAtom* e )
914{
915 return
916 toBool( e->tag == Iex_Const
917 && e->Iex.Const.con->tag == Ico_U32
918 && e->Iex.Const.con->Ico.U32 == 0 );
919}
920
sewardj1bc82102005-12-23 00:16:24 +0000921static Bool isZeroU64 ( IRAtom* e )
sewardj992dff92005-10-07 11:08:55 +0000922{
sewardj1bc82102005-12-23 00:16:24 +0000923 return
924 toBool( e->tag == Iex_Const
925 && e->Iex.Const.con->tag == Ico_U64
926 && e->Iex.Const.con->Ico.U64 == 0 );
927}
928
929static IRAtom* doCmpORD ( MCEnv* mce,
930 IROp cmp_op,
931 IRAtom* xxhash, IRAtom* yyhash,
932 IRAtom* xx, IRAtom* yy )
933{
934 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
935 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
936 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
937 IROp opAND = m64 ? Iop_And64 : Iop_And32;
938 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
939 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
940 IRType ty = m64 ? Ity_I64 : Ity_I32;
941 Int width = m64 ? 64 : 32;
942
943 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
944
945 IRAtom* threeLeft1 = NULL;
946 IRAtom* sevenLeft1 = NULL;
947
sewardj992dff92005-10-07 11:08:55 +0000948 tl_assert(isShadowAtom(mce,xxhash));
949 tl_assert(isShadowAtom(mce,yyhash));
950 tl_assert(isOriginalAtom(mce,xx));
951 tl_assert(isOriginalAtom(mce,yy));
952 tl_assert(sameKindedAtoms(xxhash,xx));
953 tl_assert(sameKindedAtoms(yyhash,yy));
sewardj1bc82102005-12-23 00:16:24 +0000954 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
955 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
sewardj992dff92005-10-07 11:08:55 +0000956
sewardja9e62a92005-10-07 12:13:21 +0000957 if (0) {
958 ppIROp(cmp_op); VG_(printf)(" ");
959 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
960 }
961
sewardj1bc82102005-12-23 00:16:24 +0000962 if (syned && isZero(yy)) {
sewardja9e62a92005-10-07 12:13:21 +0000963 /* fancy interpretation */
964 /* if yy is zero, then it must be fully defined (zero#). */
sewardj1bc82102005-12-23 00:16:24 +0000965 tl_assert(isZero(yyhash));
966 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
sewardja9e62a92005-10-07 12:13:21 +0000967 return
968 binop(
sewardj1bc82102005-12-23 00:16:24 +0000969 opOR,
sewardja9e62a92005-10-07 12:13:21 +0000970 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000971 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +0000972 binop(
sewardj1bc82102005-12-23 00:16:24 +0000973 opAND,
974 mkPCastTo(mce,ty, xxhash),
975 threeLeft1
sewardja9e62a92005-10-07 12:13:21 +0000976 )),
977 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000978 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +0000979 binop(
sewardj1bc82102005-12-23 00:16:24 +0000980 opSHL,
sewardja9e62a92005-10-07 12:13:21 +0000981 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000982 'V', mce,ty,
sewardj1bc82102005-12-23 00:16:24 +0000983 binop(opSHR, xxhash, mkU8(width-1))),
sewardja9e62a92005-10-07 12:13:21 +0000984 mkU8(3)
985 ))
986 );
987 } else {
988 /* standard interpretation */
sewardj1bc82102005-12-23 00:16:24 +0000989 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
sewardja9e62a92005-10-07 12:13:21 +0000990 return
991 binop(
sewardj1bc82102005-12-23 00:16:24 +0000992 opAND,
993 mkPCastTo( mce,ty,
994 mkUifU(mce,ty, xxhash,yyhash)),
995 sevenLeft1
sewardja9e62a92005-10-07 12:13:21 +0000996 );
997 }
sewardj992dff92005-10-07 11:08:55 +0000998}
999
1000
sewardj95448072004-11-22 20:19:51 +00001001/*------------------------------------------------------------*/
1002/*--- Emit a test and complaint if something is undefined. ---*/
1003/*------------------------------------------------------------*/
1004
sewardj7cf4e6b2008-05-01 20:24:26 +00001005static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1006
1007
sewardj95448072004-11-22 20:19:51 +00001008/* Set the annotations on a dirty helper to indicate that the stack
1009 pointer and instruction pointers might be read. This is the
1010 behaviour of all 'emit-a-complaint' style functions we might
1011 call. */
1012
1013static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1014 di->nFxState = 2;
1015 di->fxState[0].fx = Ifx_Read;
1016 di->fxState[0].offset = mce->layout->offset_SP;
1017 di->fxState[0].size = mce->layout->sizeof_SP;
1018 di->fxState[1].fx = Ifx_Read;
1019 di->fxState[1].offset = mce->layout->offset_IP;
1020 di->fxState[1].size = mce->layout->sizeof_IP;
1021}
1022
1023
1024/* Check the supplied **original** atom for undefinedness, and emit a
1025 complaint if so. Once that happens, mark it as defined. This is
1026 possible because the atom is either a tmp or literal. If it's a
1027 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1028 be defined. In fact as mentioned above, we will have to allocate a
1029 new tmp to carry the new 'defined' shadow value, and update the
1030 original->tmp mapping accordingly; we cannot simply assign a new
1031 value to an existing shadow tmp as this breaks SSAness -- resulting
1032 in the post-instrumentation sanity checker spluttering in disapproval.
1033*/
1034static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
1035{
sewardj7cf97ee2004-11-28 14:25:01 +00001036 IRAtom* vatom;
1037 IRType ty;
1038 Int sz;
1039 IRDirty* di;
1040 IRAtom* cond;
sewardj7cf4e6b2008-05-01 20:24:26 +00001041 IRAtom* origin;
1042 void* fn;
1043 HChar* nm;
1044 IRExpr** args;
1045 Int nargs;
sewardj7cf97ee2004-11-28 14:25:01 +00001046
njn1d0825f2006-03-27 11:37:07 +00001047 // Don't do V bit tests if we're not reporting undefined value errors.
sewardj7cf4e6b2008-05-01 20:24:26 +00001048 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001049 return;
1050
sewardj95448072004-11-22 20:19:51 +00001051 /* Since the original expression is atomic, there's no duplicated
1052 work generated by making multiple V-expressions for it. So we
1053 don't really care about the possibility that someone else may
1054 also create a V-interpretion for it. */
1055 tl_assert(isOriginalAtom(mce, atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001056 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001057 tl_assert(isShadowAtom(mce, vatom));
1058 tl_assert(sameKindedAtoms(atom, vatom));
1059
sewardj1c0ce7a2009-07-01 08:10:49 +00001060 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001061
1062 /* sz is only used for constructing the error message */
sewardj7cf97ee2004-11-28 14:25:01 +00001063 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001064
sewardj7cf97ee2004-11-28 14:25:01 +00001065 cond = mkPCastTo( mce, Ity_I1, vatom );
sewardj95448072004-11-22 20:19:51 +00001066 /* cond will be 0 if all defined, and 1 if any not defined. */
1067
sewardj7cf4e6b2008-05-01 20:24:26 +00001068 /* Get the origin info for the value we are about to check. At
1069 least, if we are doing origin tracking. If not, use a dummy
1070 zero origin. */
1071 if (MC_(clo_mc_level) == 3) {
1072 origin = schemeE( mce, atom );
1073 if (mce->hWordTy == Ity_I64) {
1074 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1075 }
1076 } else {
1077 origin = NULL;
1078 }
1079
1080 fn = NULL;
1081 nm = NULL;
1082 args = NULL;
1083 nargs = -1;
1084
sewardj95448072004-11-22 20:19:51 +00001085 switch (sz) {
1086 case 0:
sewardj7cf4e6b2008-05-01 20:24:26 +00001087 if (origin) {
1088 fn = &MC_(helperc_value_check0_fail_w_o);
1089 nm = "MC_(helperc_value_check0_fail_w_o)";
1090 args = mkIRExprVec_1(origin);
1091 nargs = 1;
1092 } else {
1093 fn = &MC_(helperc_value_check0_fail_no_o);
1094 nm = "MC_(helperc_value_check0_fail_no_o)";
1095 args = mkIRExprVec_0();
1096 nargs = 0;
1097 }
sewardj95448072004-11-22 20:19:51 +00001098 break;
1099 case 1:
sewardj7cf4e6b2008-05-01 20:24:26 +00001100 if (origin) {
1101 fn = &MC_(helperc_value_check1_fail_w_o);
1102 nm = "MC_(helperc_value_check1_fail_w_o)";
1103 args = mkIRExprVec_1(origin);
1104 nargs = 1;
1105 } else {
1106 fn = &MC_(helperc_value_check1_fail_no_o);
1107 nm = "MC_(helperc_value_check1_fail_no_o)";
1108 args = mkIRExprVec_0();
1109 nargs = 0;
1110 }
sewardj95448072004-11-22 20:19:51 +00001111 break;
1112 case 4:
sewardj7cf4e6b2008-05-01 20:24:26 +00001113 if (origin) {
1114 fn = &MC_(helperc_value_check4_fail_w_o);
1115 nm = "MC_(helperc_value_check4_fail_w_o)";
1116 args = mkIRExprVec_1(origin);
1117 nargs = 1;
1118 } else {
1119 fn = &MC_(helperc_value_check4_fail_no_o);
1120 nm = "MC_(helperc_value_check4_fail_no_o)";
1121 args = mkIRExprVec_0();
1122 nargs = 0;
1123 }
sewardj95448072004-11-22 20:19:51 +00001124 break;
sewardj11bcc4e2005-04-23 22:38:38 +00001125 case 8:
sewardj7cf4e6b2008-05-01 20:24:26 +00001126 if (origin) {
1127 fn = &MC_(helperc_value_check8_fail_w_o);
1128 nm = "MC_(helperc_value_check8_fail_w_o)";
1129 args = mkIRExprVec_1(origin);
1130 nargs = 1;
1131 } else {
1132 fn = &MC_(helperc_value_check8_fail_no_o);
1133 nm = "MC_(helperc_value_check8_fail_no_o)";
1134 args = mkIRExprVec_0();
1135 nargs = 0;
1136 }
sewardj11bcc4e2005-04-23 22:38:38 +00001137 break;
njn4c245e52009-03-15 23:25:38 +00001138 case 2:
1139 case 16:
sewardj7cf4e6b2008-05-01 20:24:26 +00001140 if (origin) {
1141 fn = &MC_(helperc_value_checkN_fail_w_o);
1142 nm = "MC_(helperc_value_checkN_fail_w_o)";
1143 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1144 nargs = 2;
1145 } else {
1146 fn = &MC_(helperc_value_checkN_fail_no_o);
1147 nm = "MC_(helperc_value_checkN_fail_no_o)";
1148 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1149 nargs = 1;
1150 }
sewardj95448072004-11-22 20:19:51 +00001151 break;
njn4c245e52009-03-15 23:25:38 +00001152 default:
1153 VG_(tool_panic)("unexpected szB");
sewardj95448072004-11-22 20:19:51 +00001154 }
sewardj7cf4e6b2008-05-01 20:24:26 +00001155
1156 tl_assert(fn);
1157 tl_assert(nm);
1158 tl_assert(args);
1159 tl_assert(nargs >= 0 && nargs <= 2);
1160 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1161 || (MC_(clo_mc_level) == 2 && origin == NULL) );
1162
1163 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1164 VG_(fnptr_to_fnentry)( fn ), args );
sewardj95448072004-11-22 20:19:51 +00001165 di->guard = cond;
1166 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00001167 stmt( 'V', mce, IRStmt_Dirty(di));
sewardj95448072004-11-22 20:19:51 +00001168
1169 /* Set the shadow tmp to be defined. First, update the
1170 orig->shadow tmp mapping to reflect the fact that this shadow is
1171 getting a new value. */
sewardj710d6c22005-03-20 18:55:15 +00001172 tl_assert(isIRAtom(vatom));
sewardj95448072004-11-22 20:19:51 +00001173 /* sameKindedAtoms ... */
sewardj0b9d74a2006-12-24 02:24:11 +00001174 if (vatom->tag == Iex_RdTmp) {
1175 tl_assert(atom->tag == Iex_RdTmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00001176 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1177 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1178 definedOfType(ty));
sewardj95448072004-11-22 20:19:51 +00001179 }
1180}
1181
1182
1183/*------------------------------------------------------------*/
1184/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1185/*------------------------------------------------------------*/
1186
1187/* Examine the always-defined sections declared in layout to see if
1188 the (offset,size) section is within one. Note, is is an error to
1189 partially fall into such a region: (offset,size) should either be
1190 completely in such a region or completely not-in such a region.
1191*/
1192static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1193{
1194 Int minoffD, maxoffD, i;
1195 Int minoff = offset;
1196 Int maxoff = minoff + size - 1;
1197 tl_assert((minoff & ~0xFFFF) == 0);
1198 tl_assert((maxoff & ~0xFFFF) == 0);
1199
1200 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1201 minoffD = mce->layout->alwaysDefd[i].offset;
1202 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1203 tl_assert((minoffD & ~0xFFFF) == 0);
1204 tl_assert((maxoffD & ~0xFFFF) == 0);
1205
1206 if (maxoff < minoffD || maxoffD < minoff)
1207 continue; /* no overlap */
1208 if (minoff >= minoffD && maxoff <= maxoffD)
1209 return True; /* completely contained in an always-defd section */
1210
1211 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1212 }
1213 return False; /* could not find any containing section */
1214}
1215
1216
1217/* Generate into bb suitable actions to shadow this Put. If the state
1218 slice is marked 'always defined', do nothing. Otherwise, write the
1219 supplied V bits to the shadow state. We can pass in either an
1220 original atom or a V-atom, but not both. In the former case the
1221 relevant V-bits are then generated from the original.
1222*/
1223static
1224void do_shadow_PUT ( MCEnv* mce, Int offset,
1225 IRAtom* atom, IRAtom* vatom )
1226{
sewardj7cf97ee2004-11-28 14:25:01 +00001227 IRType ty;
njn1d0825f2006-03-27 11:37:07 +00001228
1229 // Don't do shadow PUTs if we're not doing undefined value checking.
1230 // Their absence lets Vex's optimiser remove all the shadow computation
1231 // that they depend on, which includes GETs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001232 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001233 return;
1234
sewardj95448072004-11-22 20:19:51 +00001235 if (atom) {
1236 tl_assert(!vatom);
1237 tl_assert(isOriginalAtom(mce, atom));
1238 vatom = expr2vbits( mce, atom );
1239 } else {
1240 tl_assert(vatom);
1241 tl_assert(isShadowAtom(mce, vatom));
1242 }
1243
sewardj1c0ce7a2009-07-01 08:10:49 +00001244 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001245 tl_assert(ty != Ity_I1);
sewardjb5b87402011-03-07 16:05:35 +00001246 tl_assert(ty != Ity_I128);
sewardj95448072004-11-22 20:19:51 +00001247 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1248 /* later: no ... */
1249 /* emit code to emit a complaint if any of the vbits are 1. */
1250 /* complainIfUndefined(mce, atom); */
1251 } else {
1252 /* Do a plain shadow Put. */
sewardj7cf4e6b2008-05-01 20:24:26 +00001253 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
sewardj95448072004-11-22 20:19:51 +00001254 }
1255}
1256
1257
1258/* Return an expression which contains the V bits corresponding to the
1259 given GETI (passed in in pieces).
1260*/
1261static
1262void do_shadow_PUTI ( MCEnv* mce,
sewardj0b9d74a2006-12-24 02:24:11 +00001263 IRRegArray* descr,
1264 IRAtom* ix, Int bias, IRAtom* atom )
sewardj95448072004-11-22 20:19:51 +00001265{
sewardj7cf97ee2004-11-28 14:25:01 +00001266 IRAtom* vatom;
1267 IRType ty, tyS;
1268 Int arrSize;;
1269
njn1d0825f2006-03-27 11:37:07 +00001270 // Don't do shadow PUTIs if we're not doing undefined value checking.
1271 // Their absence lets Vex's optimiser remove all the shadow computation
1272 // that they depend on, which includes GETIs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001273 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001274 return;
1275
sewardj95448072004-11-22 20:19:51 +00001276 tl_assert(isOriginalAtom(mce,atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001277 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001278 tl_assert(sameKindedAtoms(atom, vatom));
sewardj7cf97ee2004-11-28 14:25:01 +00001279 ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001280 tyS = shadowTypeV(ty);
sewardj7cf97ee2004-11-28 14:25:01 +00001281 arrSize = descr->nElems * sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001282 tl_assert(ty != Ity_I1);
1283 tl_assert(isOriginalAtom(mce,ix));
1284 complainIfUndefined(mce,ix);
1285 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1286 /* later: no ... */
1287 /* emit code to emit a complaint if any of the vbits are 1. */
1288 /* complainIfUndefined(mce, atom); */
1289 } else {
1290 /* Do a cloned version of the Put that refers to the shadow
1291 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001292 IRRegArray* new_descr
1293 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1294 tyS, descr->nElems);
sewardj7cf4e6b2008-05-01 20:24:26 +00001295 stmt( 'V', mce, IRStmt_PutI( new_descr, ix, bias, vatom ));
sewardj95448072004-11-22 20:19:51 +00001296 }
1297}
1298
1299
1300/* Return an expression which contains the V bits corresponding to the
1301 given GET (passed in in pieces).
1302*/
1303static
1304IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1305{
sewardj7cf4e6b2008-05-01 20:24:26 +00001306 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001307 tl_assert(ty != Ity_I1);
sewardjb5b87402011-03-07 16:05:35 +00001308 tl_assert(ty != Ity_I128);
sewardj95448072004-11-22 20:19:51 +00001309 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1310 /* Always defined, return all zeroes of the relevant type */
1311 return definedOfType(tyS);
1312 } else {
1313 /* return a cloned version of the Get that refers to the shadow
1314 area. */
sewardj7cf4e6b2008-05-01 20:24:26 +00001315 /* FIXME: this isn't an atom! */
sewardj95448072004-11-22 20:19:51 +00001316 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1317 }
1318}
1319
1320
1321/* Return an expression which contains the V bits corresponding to the
1322 given GETI (passed in in pieces).
1323*/
1324static
sewardj0b9d74a2006-12-24 02:24:11 +00001325IRExpr* shadow_GETI ( MCEnv* mce,
1326 IRRegArray* descr, IRAtom* ix, Int bias )
sewardj95448072004-11-22 20:19:51 +00001327{
1328 IRType ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001329 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001330 Int arrSize = descr->nElems * sizeofIRType(ty);
1331 tl_assert(ty != Ity_I1);
1332 tl_assert(isOriginalAtom(mce,ix));
1333 complainIfUndefined(mce,ix);
1334 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1335 /* Always defined, return all zeroes of the relevant type */
1336 return definedOfType(tyS);
1337 } else {
1338 /* return a cloned version of the Get that refers to the shadow
1339 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001340 IRRegArray* new_descr
1341 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1342 tyS, descr->nElems);
sewardj95448072004-11-22 20:19:51 +00001343 return IRExpr_GetI( new_descr, ix, bias );
1344 }
1345}
1346
1347
1348/*------------------------------------------------------------*/
1349/*--- Generating approximations for unknown operations, ---*/
1350/*--- using lazy-propagate semantics ---*/
1351/*------------------------------------------------------------*/
1352
1353/* Lazy propagation of undefinedness from two values, resulting in the
1354 specified shadow type.
1355*/
1356static
1357IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1358{
sewardj95448072004-11-22 20:19:51 +00001359 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001360 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1361 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
sewardj95448072004-11-22 20:19:51 +00001362 tl_assert(isShadowAtom(mce,va1));
1363 tl_assert(isShadowAtom(mce,va2));
sewardj37c31cc2005-04-26 23:49:24 +00001364
1365 /* The general case is inefficient because PCast is an expensive
1366 operation. Here are some special cases which use PCast only
1367 once rather than twice. */
1368
1369 /* I64 x I64 -> I64 */
1370 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1371 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1372 at = mkUifU(mce, Ity_I64, va1, va2);
1373 at = mkPCastTo(mce, Ity_I64, at);
1374 return at;
1375 }
1376
1377 /* I64 x I64 -> I32 */
1378 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1379 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1380 at = mkUifU(mce, Ity_I64, va1, va2);
1381 at = mkPCastTo(mce, Ity_I32, at);
1382 return at;
1383 }
1384
1385 if (0) {
1386 VG_(printf)("mkLazy2 ");
1387 ppIRType(t1);
1388 VG_(printf)("_");
1389 ppIRType(t2);
1390 VG_(printf)("_");
1391 ppIRType(finalVty);
1392 VG_(printf)("\n");
1393 }
1394
1395 /* General case: force everything via 32-bit intermediaries. */
sewardj95448072004-11-22 20:19:51 +00001396 at = mkPCastTo(mce, Ity_I32, va1);
1397 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1398 at = mkPCastTo(mce, finalVty, at);
1399 return at;
1400}
1401
1402
sewardjed69fdb2006-02-03 16:12:27 +00001403/* 3-arg version of the above. */
1404static
1405IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1406 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1407{
1408 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001409 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1410 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1411 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
sewardjed69fdb2006-02-03 16:12:27 +00001412 tl_assert(isShadowAtom(mce,va1));
1413 tl_assert(isShadowAtom(mce,va2));
1414 tl_assert(isShadowAtom(mce,va3));
1415
1416 /* The general case is inefficient because PCast is an expensive
1417 operation. Here are some special cases which use PCast only
1418 twice rather than three times. */
1419
1420 /* I32 x I64 x I64 -> I64 */
1421 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1422 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1423 && finalVty == Ity_I64) {
1424 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1425 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1426 mode indication which is fully defined, this should get
1427 folded out later. */
1428 at = mkPCastTo(mce, Ity_I64, va1);
1429 /* Now fold in 2nd and 3rd args. */
1430 at = mkUifU(mce, Ity_I64, at, va2);
1431 at = mkUifU(mce, Ity_I64, at, va3);
1432 /* and PCast once again. */
1433 at = mkPCastTo(mce, Ity_I64, at);
1434 return at;
1435 }
1436
sewardj453e8f82006-02-09 03:25:06 +00001437 /* I32 x I64 x I64 -> I32 */
1438 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1439 && finalVty == Ity_I32) {
sewardj59570ff2010-01-01 11:59:33 +00001440 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
sewardj453e8f82006-02-09 03:25:06 +00001441 at = mkPCastTo(mce, Ity_I64, va1);
1442 at = mkUifU(mce, Ity_I64, at, va2);
1443 at = mkUifU(mce, Ity_I64, at, va3);
1444 at = mkPCastTo(mce, Ity_I32, at);
1445 return at;
1446 }
1447
sewardj59570ff2010-01-01 11:59:33 +00001448 /* I32 x I32 x I32 -> I32 */
1449 /* 32-bit FP idiom, as (eg) happens on ARM */
1450 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1451 && finalVty == Ity_I32) {
1452 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1453 at = va1;
1454 at = mkUifU(mce, Ity_I32, at, va2);
1455 at = mkUifU(mce, Ity_I32, at, va3);
1456 at = mkPCastTo(mce, Ity_I32, at);
1457 return at;
1458 }
1459
sewardjb5b87402011-03-07 16:05:35 +00001460 /* I32 x I128 x I128 -> I128 */
1461 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1462 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1463 && finalVty == Ity_I128) {
1464 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1465 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1466 mode indication which is fully defined, this should get
1467 folded out later. */
1468 at = mkPCastTo(mce, Ity_I128, va1);
1469 /* Now fold in 2nd and 3rd args. */
1470 at = mkUifU(mce, Ity_I128, at, va2);
1471 at = mkUifU(mce, Ity_I128, at, va3);
1472 /* and PCast once again. */
1473 at = mkPCastTo(mce, Ity_I128, at);
1474 return at;
1475 }
sewardj453e8f82006-02-09 03:25:06 +00001476 if (1) {
1477 VG_(printf)("mkLazy3: ");
sewardjed69fdb2006-02-03 16:12:27 +00001478 ppIRType(t1);
sewardj453e8f82006-02-09 03:25:06 +00001479 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001480 ppIRType(t2);
sewardj453e8f82006-02-09 03:25:06 +00001481 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001482 ppIRType(t3);
sewardj453e8f82006-02-09 03:25:06 +00001483 VG_(printf)(" -> ");
sewardjed69fdb2006-02-03 16:12:27 +00001484 ppIRType(finalVty);
1485 VG_(printf)("\n");
1486 }
1487
sewardj453e8f82006-02-09 03:25:06 +00001488 tl_assert(0);
sewardjed69fdb2006-02-03 16:12:27 +00001489 /* General case: force everything via 32-bit intermediaries. */
sewardj453e8f82006-02-09 03:25:06 +00001490 /*
sewardjed69fdb2006-02-03 16:12:27 +00001491 at = mkPCastTo(mce, Ity_I32, va1);
1492 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1493 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1494 at = mkPCastTo(mce, finalVty, at);
1495 return at;
sewardj453e8f82006-02-09 03:25:06 +00001496 */
sewardjed69fdb2006-02-03 16:12:27 +00001497}
1498
1499
sewardje91cea72006-02-08 19:32:02 +00001500/* 4-arg version of the above. */
1501static
1502IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1503 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1504{
1505 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001506 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1507 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1508 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1509 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
sewardje91cea72006-02-08 19:32:02 +00001510 tl_assert(isShadowAtom(mce,va1));
1511 tl_assert(isShadowAtom(mce,va2));
1512 tl_assert(isShadowAtom(mce,va3));
1513 tl_assert(isShadowAtom(mce,va4));
1514
1515 /* The general case is inefficient because PCast is an expensive
1516 operation. Here are some special cases which use PCast only
1517 twice rather than three times. */
1518
1519 /* I32 x I64 x I64 x I64 -> I64 */
1520 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1521 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1522 && finalVty == Ity_I64) {
1523 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1524 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1525 mode indication which is fully defined, this should get
1526 folded out later. */
1527 at = mkPCastTo(mce, Ity_I64, va1);
1528 /* Now fold in 2nd, 3rd, 4th args. */
1529 at = mkUifU(mce, Ity_I64, at, va2);
1530 at = mkUifU(mce, Ity_I64, at, va3);
1531 at = mkUifU(mce, Ity_I64, at, va4);
1532 /* and PCast once again. */
1533 at = mkPCastTo(mce, Ity_I64, at);
1534 return at;
1535 }
sewardjb5b87402011-03-07 16:05:35 +00001536 /* I32 x I32 x I32 x I32 -> I32 */
1537 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1538 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
1539 && finalVty == Ity_I32) {
1540 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
1541 at = va1;
1542 /* Now fold in 2nd, 3rd, 4th args. */
1543 at = mkUifU(mce, Ity_I32, at, va2);
1544 at = mkUifU(mce, Ity_I32, at, va3);
1545 at = mkUifU(mce, Ity_I32, at, va4);
1546 at = mkPCastTo(mce, Ity_I32, at);
1547 return at;
1548 }
sewardje91cea72006-02-08 19:32:02 +00001549
1550 if (1) {
sewardj453e8f82006-02-09 03:25:06 +00001551 VG_(printf)("mkLazy4: ");
sewardje91cea72006-02-08 19:32:02 +00001552 ppIRType(t1);
1553 VG_(printf)(" x ");
1554 ppIRType(t2);
1555 VG_(printf)(" x ");
1556 ppIRType(t3);
1557 VG_(printf)(" x ");
1558 ppIRType(t4);
1559 VG_(printf)(" -> ");
1560 ppIRType(finalVty);
1561 VG_(printf)("\n");
1562 }
1563
1564 tl_assert(0);
1565}
1566
1567
sewardj95448072004-11-22 20:19:51 +00001568/* Do the lazy propagation game from a null-terminated vector of
1569 atoms. This is presumably the arguments to a helper call, so the
1570 IRCallee info is also supplied in order that we can know which
1571 arguments should be ignored (via the .mcx_mask field).
1572*/
1573static
1574IRAtom* mkLazyN ( MCEnv* mce,
1575 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1576{
sewardj4cc684b2007-08-25 23:09:36 +00001577 Int i;
sewardj95448072004-11-22 20:19:51 +00001578 IRAtom* here;
sewardj4cc684b2007-08-25 23:09:36 +00001579 IRAtom* curr;
1580 IRType mergeTy;
sewardj99430032011-05-04 09:09:31 +00001581 Bool mergeTy64 = True;
sewardj4cc684b2007-08-25 23:09:36 +00001582
1583 /* Decide on the type of the merge intermediary. If all relevant
1584 args are I64, then it's I64. In all other circumstances, use
1585 I32. */
1586 for (i = 0; exprvec[i]; i++) {
1587 tl_assert(i < 32);
1588 tl_assert(isOriginalAtom(mce, exprvec[i]));
1589 if (cee->mcx_mask & (1<<i))
1590 continue;
sewardj1c0ce7a2009-07-01 08:10:49 +00001591 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
sewardj4cc684b2007-08-25 23:09:36 +00001592 mergeTy64 = False;
1593 }
1594
1595 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
1596 curr = definedOfType(mergeTy);
1597
sewardj95448072004-11-22 20:19:51 +00001598 for (i = 0; exprvec[i]; i++) {
1599 tl_assert(i < 32);
1600 tl_assert(isOriginalAtom(mce, exprvec[i]));
1601 /* Only take notice of this arg if the callee's mc-exclusion
1602 mask does not say it is to be excluded. */
1603 if (cee->mcx_mask & (1<<i)) {
1604 /* the arg is to be excluded from definedness checking. Do
1605 nothing. */
1606 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1607 } else {
1608 /* calculate the arg's definedness, and pessimistically merge
1609 it in. */
sewardj4cc684b2007-08-25 23:09:36 +00001610 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1611 curr = mergeTy64
1612 ? mkUifU64(mce, here, curr)
1613 : mkUifU32(mce, here, curr);
sewardj95448072004-11-22 20:19:51 +00001614 }
1615 }
1616 return mkPCastTo(mce, finalVtype, curr );
1617}
1618
1619
1620/*------------------------------------------------------------*/
1621/*--- Generating expensive sequences for exact carry-chain ---*/
1622/*--- propagation in add/sub and related operations. ---*/
1623/*------------------------------------------------------------*/
1624
1625static
sewardjd5204dc2004-12-31 01:16:11 +00001626IRAtom* expensiveAddSub ( MCEnv* mce,
1627 Bool add,
1628 IRType ty,
1629 IRAtom* qaa, IRAtom* qbb,
1630 IRAtom* aa, IRAtom* bb )
sewardj95448072004-11-22 20:19:51 +00001631{
sewardj7cf97ee2004-11-28 14:25:01 +00001632 IRAtom *a_min, *b_min, *a_max, *b_max;
sewardjd5204dc2004-12-31 01:16:11 +00001633 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
sewardj7cf97ee2004-11-28 14:25:01 +00001634
sewardj95448072004-11-22 20:19:51 +00001635 tl_assert(isShadowAtom(mce,qaa));
1636 tl_assert(isShadowAtom(mce,qbb));
1637 tl_assert(isOriginalAtom(mce,aa));
1638 tl_assert(isOriginalAtom(mce,bb));
1639 tl_assert(sameKindedAtoms(qaa,aa));
1640 tl_assert(sameKindedAtoms(qbb,bb));
1641
sewardjd5204dc2004-12-31 01:16:11 +00001642 switch (ty) {
1643 case Ity_I32:
1644 opAND = Iop_And32;
1645 opOR = Iop_Or32;
1646 opXOR = Iop_Xor32;
1647 opNOT = Iop_Not32;
1648 opADD = Iop_Add32;
1649 opSUB = Iop_Sub32;
1650 break;
tomd9774d72005-06-27 08:11:01 +00001651 case Ity_I64:
1652 opAND = Iop_And64;
1653 opOR = Iop_Or64;
1654 opXOR = Iop_Xor64;
1655 opNOT = Iop_Not64;
1656 opADD = Iop_Add64;
1657 opSUB = Iop_Sub64;
1658 break;
sewardjd5204dc2004-12-31 01:16:11 +00001659 default:
1660 VG_(tool_panic)("expensiveAddSub");
1661 }
sewardj95448072004-11-22 20:19:51 +00001662
1663 // a_min = aa & ~qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001664 a_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001665 binop(opAND, aa,
sewardj7cf4e6b2008-05-01 20:24:26 +00001666 assignNew('V', mce,ty, unop(opNOT, qaa))));
sewardj95448072004-11-22 20:19:51 +00001667
1668 // b_min = bb & ~qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001669 b_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001670 binop(opAND, bb,
sewardj7cf4e6b2008-05-01 20:24:26 +00001671 assignNew('V', mce,ty, unop(opNOT, qbb))));
sewardj95448072004-11-22 20:19:51 +00001672
1673 // a_max = aa | qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001674 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
sewardj95448072004-11-22 20:19:51 +00001675
1676 // b_max = bb | qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001677 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
sewardj95448072004-11-22 20:19:51 +00001678
sewardjd5204dc2004-12-31 01:16:11 +00001679 if (add) {
1680 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1681 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001682 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001683 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001684 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1685 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001686 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001687 assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1688 assignNew('V', mce,ty, binop(opADD, a_max, b_max))
sewardjd5204dc2004-12-31 01:16:11 +00001689 )
sewardj95448072004-11-22 20:19:51 +00001690 )
sewardjd5204dc2004-12-31 01:16:11 +00001691 )
1692 );
1693 } else {
1694 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1695 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001696 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001697 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001698 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1699 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001700 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001701 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1702 assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
sewardjd5204dc2004-12-31 01:16:11 +00001703 )
1704 )
1705 )
1706 );
1707 }
1708
sewardj95448072004-11-22 20:19:51 +00001709}
1710
1711
1712/*------------------------------------------------------------*/
sewardjaaddbc22005-10-07 09:49:53 +00001713/*--- Scalar shifts. ---*/
1714/*------------------------------------------------------------*/
1715
1716/* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
1717 idea is to shift the definedness bits by the original shift amount.
1718 This introduces 0s ("defined") in new positions for left shifts and
1719 unsigned right shifts, and copies the top definedness bit for
1720 signed right shifts. So, conveniently, applying the original shift
1721 operator to the definedness bits for the left arg is exactly the
1722 right thing to do:
1723
1724 (qaa << bb)
1725
1726 However if the shift amount is undefined then the whole result
1727 is undefined. Hence need:
1728
1729 (qaa << bb) `UifU` PCast(qbb)
1730
1731 If the shift amount bb is a literal than qbb will say 'all defined'
1732 and the UifU and PCast will get folded out by post-instrumentation
1733 optimisation.
1734*/
1735static IRAtom* scalarShift ( MCEnv* mce,
1736 IRType ty,
1737 IROp original_op,
1738 IRAtom* qaa, IRAtom* qbb,
1739 IRAtom* aa, IRAtom* bb )
1740{
1741 tl_assert(isShadowAtom(mce,qaa));
1742 tl_assert(isShadowAtom(mce,qbb));
1743 tl_assert(isOriginalAtom(mce,aa));
1744 tl_assert(isOriginalAtom(mce,bb));
1745 tl_assert(sameKindedAtoms(qaa,aa));
1746 tl_assert(sameKindedAtoms(qbb,bb));
1747 return
1748 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001749 'V', mce, ty,
sewardjaaddbc22005-10-07 09:49:53 +00001750 mkUifU( mce, ty,
sewardj7cf4e6b2008-05-01 20:24:26 +00001751 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
sewardjaaddbc22005-10-07 09:49:53 +00001752 mkPCastTo(mce, ty, qbb)
1753 )
1754 );
1755}
1756
1757
1758/*------------------------------------------------------------*/
1759/*--- Helpers for dealing with vector primops. ---*/
sewardj3245c912004-12-10 14:58:26 +00001760/*------------------------------------------------------------*/
1761
sewardja1d93302004-12-12 16:45:06 +00001762/* Vector pessimisation -- pessimise within each lane individually. */
1763
1764static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1765{
sewardj7cf4e6b2008-05-01 20:24:26 +00001766 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
sewardja1d93302004-12-12 16:45:06 +00001767}
1768
1769static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1770{
sewardj7cf4e6b2008-05-01 20:24:26 +00001771 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
sewardja1d93302004-12-12 16:45:06 +00001772}
1773
1774static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1775{
sewardj7cf4e6b2008-05-01 20:24:26 +00001776 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
sewardja1d93302004-12-12 16:45:06 +00001777}
1778
1779static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1780{
sewardj7cf4e6b2008-05-01 20:24:26 +00001781 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
sewardja1d93302004-12-12 16:45:06 +00001782}
1783
sewardjacd2e912005-01-13 19:17:06 +00001784static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1785{
sewardj7cf4e6b2008-05-01 20:24:26 +00001786 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
sewardjacd2e912005-01-13 19:17:06 +00001787}
1788
1789static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1790{
sewardj7cf4e6b2008-05-01 20:24:26 +00001791 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
sewardjacd2e912005-01-13 19:17:06 +00001792}
1793
1794static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1795{
sewardj7cf4e6b2008-05-01 20:24:26 +00001796 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
sewardjacd2e912005-01-13 19:17:06 +00001797}
1798
sewardjc678b852010-09-22 00:58:51 +00001799static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
1800{
1801 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
1802}
1803
1804static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
1805{
1806 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
1807}
1808
sewardja1d93302004-12-12 16:45:06 +00001809
sewardj3245c912004-12-10 14:58:26 +00001810/* Here's a simple scheme capable of handling ops derived from SSE1
1811 code and while only generating ops that can be efficiently
1812 implemented in SSE1. */
1813
1814/* All-lanes versions are straightforward:
1815
sewardj20d38f22005-02-07 23:50:18 +00001816 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
sewardj3245c912004-12-10 14:58:26 +00001817
1818 unary32Fx4(x,y) ==> PCast32x4(x#)
1819
1820 Lowest-lane-only versions are more complex:
1821
sewardj20d38f22005-02-07 23:50:18 +00001822 binary32F0x4(x,y) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001823 x#,
sewardj20d38f22005-02-07 23:50:18 +00001824 PCast32(V128to32(UifUV128(x#,y#)))
sewardj3245c912004-12-10 14:58:26 +00001825 )
1826
1827 This is perhaps not so obvious. In particular, it's faster to
sewardj20d38f22005-02-07 23:50:18 +00001828 do a V128-bit UifU and then take the bottom 32 bits than the more
sewardj3245c912004-12-10 14:58:26 +00001829 obvious scheme of taking the bottom 32 bits of each operand
1830 and doing a 32-bit UifU. Basically since UifU is fast and
1831 chopping lanes off vector values is slow.
1832
1833 Finally:
1834
sewardj20d38f22005-02-07 23:50:18 +00001835 unary32F0x4(x) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001836 x#,
sewardj20d38f22005-02-07 23:50:18 +00001837 PCast32(V128to32(x#))
sewardj3245c912004-12-10 14:58:26 +00001838 )
1839
1840 Where:
1841
1842 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
1843 PCast32x4(v#) = CmpNEZ32x4(v#)
1844*/
1845
1846static
1847IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1848{
1849 IRAtom* at;
1850 tl_assert(isShadowAtom(mce, vatomX));
1851 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001852 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001853 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
sewardj3245c912004-12-10 14:58:26 +00001854 return at;
1855}
1856
1857static
1858IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1859{
1860 IRAtom* at;
1861 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001862 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001863 return at;
1864}
1865
1866static
1867IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1868{
1869 IRAtom* at;
1870 tl_assert(isShadowAtom(mce, vatomX));
1871 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001872 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001873 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
sewardj3245c912004-12-10 14:58:26 +00001874 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001875 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001876 return at;
1877}
1878
1879static
1880IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1881{
1882 IRAtom* at;
1883 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001884 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001885 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001886 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001887 return at;
1888}
1889
sewardj0b070592004-12-10 21:44:22 +00001890/* --- ... and ... 64Fx2 versions of the same ... --- */
1891
1892static
1893IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1894{
1895 IRAtom* at;
1896 tl_assert(isShadowAtom(mce, vatomX));
1897 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001898 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001899 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
sewardj0b070592004-12-10 21:44:22 +00001900 return at;
1901}
1902
1903static
1904IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1905{
1906 IRAtom* at;
1907 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001908 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001909 return at;
1910}
1911
1912static
1913IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1914{
1915 IRAtom* at;
1916 tl_assert(isShadowAtom(mce, vatomX));
1917 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001918 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001919 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
sewardj0b070592004-12-10 21:44:22 +00001920 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001921 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001922 return at;
1923}
1924
1925static
1926IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1927{
1928 IRAtom* at;
1929 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001930 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
sewardj0b070592004-12-10 21:44:22 +00001931 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001932 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00001933 return at;
1934}
1935
sewardj57f92b02010-08-22 11:54:14 +00001936/* --- --- ... and ... 32Fx2 versions of the same --- --- */
1937
1938static
1939IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1940{
1941 IRAtom* at;
1942 tl_assert(isShadowAtom(mce, vatomX));
1943 tl_assert(isShadowAtom(mce, vatomY));
1944 at = mkUifU64(mce, vatomX, vatomY);
1945 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
1946 return at;
1947}
1948
1949static
1950IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
1951{
1952 IRAtom* at;
1953 tl_assert(isShadowAtom(mce, vatomX));
1954 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
1955 return at;
1956}
1957
sewardja1d93302004-12-12 16:45:06 +00001958/* --- --- Vector saturated narrowing --- --- */
1959
1960/* This is quite subtle. What to do is simple:
1961
1962 Let the original narrowing op be QNarrowW{S,U}xN. Produce:
1963
1964 the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1965
1966 Why this is right is not so simple. Consider a lane in the args,
1967 vatom1 or 2, doesn't matter.
1968
1969 After the PCast, that lane is all 0s (defined) or all
1970 1s(undefined).
1971
1972 Both signed and unsigned saturating narrowing of all 0s produces
1973 all 0s, which is what we want.
1974
1975 The all-1s case is more complex. Unsigned narrowing interprets an
1976 all-1s input as the largest unsigned integer, and so produces all
1977 1s as a result since that is the largest unsigned value at the
1978 smaller width.
1979
1980 Signed narrowing interprets all 1s as -1. Fortunately, -1 narrows
1981 to -1, so we still wind up with all 1s at the smaller width.
1982
1983 So: In short, pessimise the args, then apply the original narrowing
1984 op.
sewardj9beeb0a2011-06-15 15:11:07 +00001985
1986 FIXME JRS 2011-Jun-15: figure out if this is still correct
1987 following today's rationalisation/cleanup of vector narrowing
1988 primops.
sewardja1d93302004-12-12 16:45:06 +00001989*/
1990static
sewardj7ee7d852011-06-16 11:37:21 +00001991IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
1992 IRAtom* vatom1, IRAtom* vatom2)
sewardja1d93302004-12-12 16:45:06 +00001993{
1994 IRAtom *at1, *at2, *at3;
1995 IRAtom* (*pcast)( MCEnv*, IRAtom* );
1996 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00001997 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
1998 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
1999 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2000 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2001 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2002 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2003 default: VG_(tool_panic)("vectorNarrowBinV128");
sewardja1d93302004-12-12 16:45:06 +00002004 }
2005 tl_assert(isShadowAtom(mce,vatom1));
2006 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00002007 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2008 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
2009 at3 = assignNew('V', mce, Ity_V128, binop(narrow_op, at1, at2));
sewardja1d93302004-12-12 16:45:06 +00002010 return at3;
2011}
2012
sewardjacd2e912005-01-13 19:17:06 +00002013static
sewardj7ee7d852011-06-16 11:37:21 +00002014IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2015 IRAtom* vatom1, IRAtom* vatom2)
sewardjacd2e912005-01-13 19:17:06 +00002016{
2017 IRAtom *at1, *at2, *at3;
2018 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2019 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002020 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2021 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break;
2022 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break;
2023 default: VG_(tool_panic)("vectorNarrowBin64");
sewardjacd2e912005-01-13 19:17:06 +00002024 }
2025 tl_assert(isShadowAtom(mce,vatom1));
2026 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00002027 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2028 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
2029 at3 = assignNew('V', mce, Ity_I64, binop(narrow_op, at1, at2));
sewardjacd2e912005-01-13 19:17:06 +00002030 return at3;
2031}
2032
sewardj57f92b02010-08-22 11:54:14 +00002033static
sewardj7ee7d852011-06-16 11:37:21 +00002034IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp shorten_op,
2035 IRAtom* vatom1)
sewardj57f92b02010-08-22 11:54:14 +00002036{
2037 IRAtom *at1, *at2;
2038 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2039 switch (shorten_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002040 /* FIXME: first 3 are too pessimistic; we can just
2041 apply them directly to the V bits. */
2042 case Iop_NarrowUn16to8x8: pcast = mkPCast16x8; break;
2043 case Iop_NarrowUn32to16x4: pcast = mkPCast32x4; break;
2044 case Iop_NarrowUn64to32x2: pcast = mkPCast64x2; break;
2045 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break;
2046 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break;
2047 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break;
2048 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
2049 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
2050 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
2051 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
2052 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
2053 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
2054 default: VG_(tool_panic)("vectorNarrowUnV128");
sewardj57f92b02010-08-22 11:54:14 +00002055 }
2056 tl_assert(isShadowAtom(mce,vatom1));
2057 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2058 at2 = assignNew('V', mce, Ity_I64, unop(shorten_op, at1));
2059 return at2;
2060}
2061
2062static
sewardj7ee7d852011-06-16 11:37:21 +00002063IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
2064 IRAtom* vatom1)
sewardj57f92b02010-08-22 11:54:14 +00002065{
2066 IRAtom *at1, *at2;
2067 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2068 switch (longen_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002069 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break;
2070 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break;
2071 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
2072 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
2073 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
2074 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
2075 default: VG_(tool_panic)("vectorWidenI64");
sewardj57f92b02010-08-22 11:54:14 +00002076 }
2077 tl_assert(isShadowAtom(mce,vatom1));
2078 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
2079 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2080 return at2;
2081}
2082
sewardja1d93302004-12-12 16:45:06 +00002083
2084/* --- --- Vector integer arithmetic --- --- */
2085
2086/* Simple ... UifU the args and per-lane pessimise the results. */
sewardjacd2e912005-01-13 19:17:06 +00002087
sewardj20d38f22005-02-07 23:50:18 +00002088/* --- V128-bit versions --- */
sewardjacd2e912005-01-13 19:17:06 +00002089
sewardja1d93302004-12-12 16:45:06 +00002090static
2091IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2092{
2093 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002094 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002095 at = mkPCast8x16(mce, at);
2096 return at;
2097}
2098
2099static
2100IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2101{
2102 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002103 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002104 at = mkPCast16x8(mce, at);
2105 return at;
2106}
2107
2108static
2109IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2110{
2111 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002112 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002113 at = mkPCast32x4(mce, at);
2114 return at;
2115}
2116
2117static
2118IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2119{
2120 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002121 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002122 at = mkPCast64x2(mce, at);
2123 return at;
2124}
sewardj3245c912004-12-10 14:58:26 +00002125
sewardjacd2e912005-01-13 19:17:06 +00002126/* --- 64-bit versions --- */
2127
2128static
2129IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2130{
2131 IRAtom* at;
2132 at = mkUifU64(mce, vatom1, vatom2);
2133 at = mkPCast8x8(mce, at);
2134 return at;
2135}
2136
2137static
2138IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2139{
2140 IRAtom* at;
2141 at = mkUifU64(mce, vatom1, vatom2);
2142 at = mkPCast16x4(mce, at);
2143 return at;
2144}
2145
2146static
2147IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2148{
2149 IRAtom* at;
2150 at = mkUifU64(mce, vatom1, vatom2);
2151 at = mkPCast32x2(mce, at);
2152 return at;
2153}
2154
sewardj57f92b02010-08-22 11:54:14 +00002155static
2156IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2157{
2158 IRAtom* at;
2159 at = mkUifU64(mce, vatom1, vatom2);
2160 at = mkPCastTo(mce, Ity_I64, at);
2161 return at;
2162}
2163
sewardjc678b852010-09-22 00:58:51 +00002164/* --- 32-bit versions --- */
2165
2166static
2167IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2168{
2169 IRAtom* at;
2170 at = mkUifU32(mce, vatom1, vatom2);
2171 at = mkPCast8x4(mce, at);
2172 return at;
2173}
2174
2175static
2176IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2177{
2178 IRAtom* at;
2179 at = mkUifU32(mce, vatom1, vatom2);
2180 at = mkPCast16x2(mce, at);
2181 return at;
2182}
2183
sewardj3245c912004-12-10 14:58:26 +00002184
2185/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00002186/*--- Generate shadow values from all kinds of IRExprs. ---*/
2187/*------------------------------------------------------------*/
2188
2189static
sewardje91cea72006-02-08 19:32:02 +00002190IRAtom* expr2vbits_Qop ( MCEnv* mce,
2191 IROp op,
2192 IRAtom* atom1, IRAtom* atom2,
2193 IRAtom* atom3, IRAtom* atom4 )
2194{
2195 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2196 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2197 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2198 IRAtom* vatom4 = expr2vbits( mce, atom4 );
2199
2200 tl_assert(isOriginalAtom(mce,atom1));
2201 tl_assert(isOriginalAtom(mce,atom2));
2202 tl_assert(isOriginalAtom(mce,atom3));
2203 tl_assert(isOriginalAtom(mce,atom4));
2204 tl_assert(isShadowAtom(mce,vatom1));
2205 tl_assert(isShadowAtom(mce,vatom2));
2206 tl_assert(isShadowAtom(mce,vatom3));
2207 tl_assert(isShadowAtom(mce,vatom4));
2208 tl_assert(sameKindedAtoms(atom1,vatom1));
2209 tl_assert(sameKindedAtoms(atom2,vatom2));
2210 tl_assert(sameKindedAtoms(atom3,vatom3));
2211 tl_assert(sameKindedAtoms(atom4,vatom4));
2212 switch (op) {
2213 case Iop_MAddF64:
2214 case Iop_MAddF64r32:
2215 case Iop_MSubF64:
2216 case Iop_MSubF64r32:
2217 /* I32(rm) x F64 x F64 x F64 -> F64 */
2218 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
sewardjb5b87402011-03-07 16:05:35 +00002219
2220 case Iop_MAddF32:
2221 case Iop_MSubF32:
2222 /* I32(rm) x F32 x F32 x F32 -> F32 */
2223 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
2224
sewardje91cea72006-02-08 19:32:02 +00002225 default:
2226 ppIROp(op);
2227 VG_(tool_panic)("memcheck:expr2vbits_Qop");
2228 }
2229}
2230
2231
2232static
sewardjed69fdb2006-02-03 16:12:27 +00002233IRAtom* expr2vbits_Triop ( MCEnv* mce,
2234 IROp op,
2235 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2236{
sewardjed69fdb2006-02-03 16:12:27 +00002237 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2238 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2239 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2240
2241 tl_assert(isOriginalAtom(mce,atom1));
2242 tl_assert(isOriginalAtom(mce,atom2));
2243 tl_assert(isOriginalAtom(mce,atom3));
2244 tl_assert(isShadowAtom(mce,vatom1));
2245 tl_assert(isShadowAtom(mce,vatom2));
2246 tl_assert(isShadowAtom(mce,vatom3));
2247 tl_assert(sameKindedAtoms(atom1,vatom1));
2248 tl_assert(sameKindedAtoms(atom2,vatom2));
2249 tl_assert(sameKindedAtoms(atom3,vatom3));
2250 switch (op) {
sewardjb5b87402011-03-07 16:05:35 +00002251 case Iop_AddF128:
2252 case Iop_SubF128:
2253 case Iop_MulF128:
2254 case Iop_DivF128:
2255 /* I32(rm) x F128 x F128 -> F128 */
2256 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
sewardjed69fdb2006-02-03 16:12:27 +00002257 case Iop_AddF64:
2258 case Iop_AddF64r32:
2259 case Iop_SubF64:
2260 case Iop_SubF64r32:
2261 case Iop_MulF64:
2262 case Iop_MulF64r32:
2263 case Iop_DivF64:
2264 case Iop_DivF64r32:
sewardj22ac5f42006-02-03 22:55:04 +00002265 case Iop_ScaleF64:
2266 case Iop_Yl2xF64:
2267 case Iop_Yl2xp1F64:
2268 case Iop_AtanF64:
sewardjd6075eb2006-02-04 15:25:23 +00002269 case Iop_PRemF64:
2270 case Iop_PRem1F64:
sewardj22ac5f42006-02-03 22:55:04 +00002271 /* I32(rm) x F64 x F64 -> F64 */
sewardjed69fdb2006-02-03 16:12:27 +00002272 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
sewardjd6075eb2006-02-04 15:25:23 +00002273 case Iop_PRemC3210F64:
2274 case Iop_PRem1C3210F64:
2275 /* I32(rm) x F64 x F64 -> I32 */
2276 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj59570ff2010-01-01 11:59:33 +00002277 case Iop_AddF32:
2278 case Iop_SubF32:
2279 case Iop_MulF32:
2280 case Iop_DivF32:
2281 /* I32(rm) x F32 x F32 -> I32 */
2282 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj57f92b02010-08-22 11:54:14 +00002283 case Iop_ExtractV128:
2284 complainIfUndefined(mce, atom3);
2285 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2286 case Iop_Extract64:
2287 complainIfUndefined(mce, atom3);
2288 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2289 case Iop_SetElem8x8:
2290 case Iop_SetElem16x4:
2291 case Iop_SetElem32x2:
2292 complainIfUndefined(mce, atom2);
2293 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
sewardjed69fdb2006-02-03 16:12:27 +00002294 default:
2295 ppIROp(op);
2296 VG_(tool_panic)("memcheck:expr2vbits_Triop");
2297 }
2298}
2299
2300
2301static
sewardj95448072004-11-22 20:19:51 +00002302IRAtom* expr2vbits_Binop ( MCEnv* mce,
2303 IROp op,
2304 IRAtom* atom1, IRAtom* atom2 )
2305{
2306 IRType and_or_ty;
2307 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
2308 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
2309 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2310
2311 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2312 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2313
2314 tl_assert(isOriginalAtom(mce,atom1));
2315 tl_assert(isOriginalAtom(mce,atom2));
2316 tl_assert(isShadowAtom(mce,vatom1));
2317 tl_assert(isShadowAtom(mce,vatom2));
2318 tl_assert(sameKindedAtoms(atom1,vatom1));
2319 tl_assert(sameKindedAtoms(atom2,vatom2));
2320 switch (op) {
2321
sewardjc678b852010-09-22 00:58:51 +00002322 /* 32-bit SIMD */
2323
2324 case Iop_Add16x2:
2325 case Iop_HAdd16Ux2:
2326 case Iop_HAdd16Sx2:
2327 case Iop_Sub16x2:
2328 case Iop_HSub16Ux2:
2329 case Iop_HSub16Sx2:
2330 case Iop_QAdd16Sx2:
2331 case Iop_QSub16Sx2:
2332 return binary16Ix2(mce, vatom1, vatom2);
2333
2334 case Iop_Add8x4:
2335 case Iop_HAdd8Ux4:
2336 case Iop_HAdd8Sx4:
2337 case Iop_Sub8x4:
2338 case Iop_HSub8Ux4:
2339 case Iop_HSub8Sx4:
2340 case Iop_QSub8Ux4:
2341 case Iop_QAdd8Ux4:
2342 case Iop_QSub8Sx4:
2343 case Iop_QAdd8Sx4:
2344 return binary8Ix4(mce, vatom1, vatom2);
2345
sewardjacd2e912005-01-13 19:17:06 +00002346 /* 64-bit SIMD */
2347
sewardj57f92b02010-08-22 11:54:14 +00002348 case Iop_ShrN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002349 case Iop_ShrN16x4:
2350 case Iop_ShrN32x2:
sewardj03809ae2006-12-27 01:16:58 +00002351 case Iop_SarN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002352 case Iop_SarN16x4:
2353 case Iop_SarN32x2:
2354 case Iop_ShlN16x4:
2355 case Iop_ShlN32x2:
sewardj114a9172008-02-09 01:49:32 +00002356 case Iop_ShlN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002357 /* Same scheme as with all other shifts. */
2358 complainIfUndefined(mce, atom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002359 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
sewardjacd2e912005-01-13 19:17:06 +00002360
sewardj7ee7d852011-06-16 11:37:21 +00002361 case Iop_QNarrowBin32Sto16Sx4:
2362 case Iop_QNarrowBin16Sto8Sx8:
2363 case Iop_QNarrowBin16Sto8Ux8:
2364 return vectorNarrowBin64(mce, op, vatom1, vatom2);
sewardjacd2e912005-01-13 19:17:06 +00002365
2366 case Iop_Min8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002367 case Iop_Min8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002368 case Iop_Max8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002369 case Iop_Max8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002370 case Iop_Avg8Ux8:
2371 case Iop_QSub8Sx8:
2372 case Iop_QSub8Ux8:
2373 case Iop_Sub8x8:
2374 case Iop_CmpGT8Sx8:
sewardj57f92b02010-08-22 11:54:14 +00002375 case Iop_CmpGT8Ux8:
sewardjacd2e912005-01-13 19:17:06 +00002376 case Iop_CmpEQ8x8:
2377 case Iop_QAdd8Sx8:
2378 case Iop_QAdd8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002379 case Iop_QSal8x8:
2380 case Iop_QShl8x8:
sewardjacd2e912005-01-13 19:17:06 +00002381 case Iop_Add8x8:
sewardj57f92b02010-08-22 11:54:14 +00002382 case Iop_Mul8x8:
2383 case Iop_PolynomialMul8x8:
sewardjacd2e912005-01-13 19:17:06 +00002384 return binary8Ix8(mce, vatom1, vatom2);
2385
2386 case Iop_Min16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002387 case Iop_Min16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002388 case Iop_Max16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002389 case Iop_Max16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002390 case Iop_Avg16Ux4:
2391 case Iop_QSub16Ux4:
2392 case Iop_QSub16Sx4:
2393 case Iop_Sub16x4:
2394 case Iop_Mul16x4:
2395 case Iop_MulHi16Sx4:
2396 case Iop_MulHi16Ux4:
2397 case Iop_CmpGT16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002398 case Iop_CmpGT16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002399 case Iop_CmpEQ16x4:
2400 case Iop_QAdd16Sx4:
2401 case Iop_QAdd16Ux4:
sewardj57f92b02010-08-22 11:54:14 +00002402 case Iop_QSal16x4:
2403 case Iop_QShl16x4:
sewardjacd2e912005-01-13 19:17:06 +00002404 case Iop_Add16x4:
sewardj57f92b02010-08-22 11:54:14 +00002405 case Iop_QDMulHi16Sx4:
2406 case Iop_QRDMulHi16Sx4:
sewardjacd2e912005-01-13 19:17:06 +00002407 return binary16Ix4(mce, vatom1, vatom2);
2408
2409 case Iop_Sub32x2:
sewardj114a9172008-02-09 01:49:32 +00002410 case Iop_Mul32x2:
sewardj57f92b02010-08-22 11:54:14 +00002411 case Iop_Max32Sx2:
2412 case Iop_Max32Ux2:
2413 case Iop_Min32Sx2:
2414 case Iop_Min32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002415 case Iop_CmpGT32Sx2:
sewardj57f92b02010-08-22 11:54:14 +00002416 case Iop_CmpGT32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002417 case Iop_CmpEQ32x2:
2418 case Iop_Add32x2:
sewardj57f92b02010-08-22 11:54:14 +00002419 case Iop_QAdd32Ux2:
2420 case Iop_QAdd32Sx2:
2421 case Iop_QSub32Ux2:
2422 case Iop_QSub32Sx2:
2423 case Iop_QSal32x2:
2424 case Iop_QShl32x2:
2425 case Iop_QDMulHi32Sx2:
2426 case Iop_QRDMulHi32Sx2:
sewardjacd2e912005-01-13 19:17:06 +00002427 return binary32Ix2(mce, vatom1, vatom2);
2428
sewardj57f92b02010-08-22 11:54:14 +00002429 case Iop_QSub64Ux1:
2430 case Iop_QSub64Sx1:
2431 case Iop_QAdd64Ux1:
2432 case Iop_QAdd64Sx1:
2433 case Iop_QSal64x1:
2434 case Iop_QShl64x1:
2435 case Iop_Sal64x1:
2436 return binary64Ix1(mce, vatom1, vatom2);
2437
2438 case Iop_QShlN8Sx8:
2439 case Iop_QShlN8x8:
2440 case Iop_QSalN8x8:
2441 complainIfUndefined(mce, atom2);
2442 return mkPCast8x8(mce, vatom1);
2443
2444 case Iop_QShlN16Sx4:
2445 case Iop_QShlN16x4:
2446 case Iop_QSalN16x4:
2447 complainIfUndefined(mce, atom2);
2448 return mkPCast16x4(mce, vatom1);
2449
2450 case Iop_QShlN32Sx2:
2451 case Iop_QShlN32x2:
2452 case Iop_QSalN32x2:
2453 complainIfUndefined(mce, atom2);
2454 return mkPCast32x2(mce, vatom1);
2455
2456 case Iop_QShlN64Sx1:
2457 case Iop_QShlN64x1:
2458 case Iop_QSalN64x1:
2459 complainIfUndefined(mce, atom2);
2460 return mkPCast32x2(mce, vatom1);
2461
2462 case Iop_PwMax32Sx2:
2463 case Iop_PwMax32Ux2:
2464 case Iop_PwMin32Sx2:
2465 case Iop_PwMin32Ux2:
2466 case Iop_PwMax32Fx2:
2467 case Iop_PwMin32Fx2:
2468 return assignNew('V', mce, Ity_I64, binop(Iop_PwMax32Ux2, mkPCast32x2(mce, vatom1),
2469 mkPCast32x2(mce, vatom2)));
2470
2471 case Iop_PwMax16Sx4:
2472 case Iop_PwMax16Ux4:
2473 case Iop_PwMin16Sx4:
2474 case Iop_PwMin16Ux4:
2475 return assignNew('V', mce, Ity_I64, binop(Iop_PwMax16Ux4, mkPCast16x4(mce, vatom1),
2476 mkPCast16x4(mce, vatom2)));
2477
2478 case Iop_PwMax8Sx8:
2479 case Iop_PwMax8Ux8:
2480 case Iop_PwMin8Sx8:
2481 case Iop_PwMin8Ux8:
2482 return assignNew('V', mce, Ity_I64, binop(Iop_PwMax8Ux8, mkPCast8x8(mce, vatom1),
2483 mkPCast8x8(mce, vatom2)));
2484
2485 case Iop_PwAdd32x2:
2486 case Iop_PwAdd32Fx2:
2487 return mkPCast32x2(mce,
2488 assignNew('V', mce, Ity_I64, binop(Iop_PwAdd32x2, mkPCast32x2(mce, vatom1),
2489 mkPCast32x2(mce, vatom2))));
2490
2491 case Iop_PwAdd16x4:
2492 return mkPCast16x4(mce,
2493 assignNew('V', mce, Ity_I64, binop(op, mkPCast16x4(mce, vatom1),
2494 mkPCast16x4(mce, vatom2))));
2495
2496 case Iop_PwAdd8x8:
2497 return mkPCast8x8(mce,
2498 assignNew('V', mce, Ity_I64, binop(op, mkPCast8x8(mce, vatom1),
2499 mkPCast8x8(mce, vatom2))));
2500
2501 case Iop_Shl8x8:
2502 case Iop_Shr8x8:
2503 case Iop_Sar8x8:
2504 case Iop_Sal8x8:
2505 return mkUifU64(mce,
2506 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2507 mkPCast8x8(mce,vatom2)
2508 );
2509
2510 case Iop_Shl16x4:
2511 case Iop_Shr16x4:
2512 case Iop_Sar16x4:
2513 case Iop_Sal16x4:
2514 return mkUifU64(mce,
2515 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2516 mkPCast16x4(mce,vatom2)
2517 );
2518
2519 case Iop_Shl32x2:
2520 case Iop_Shr32x2:
2521 case Iop_Sar32x2:
2522 case Iop_Sal32x2:
2523 return mkUifU64(mce,
2524 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2525 mkPCast32x2(mce,vatom2)
2526 );
2527
sewardjacd2e912005-01-13 19:17:06 +00002528 /* 64-bit data-steering */
2529 case Iop_InterleaveLO32x2:
2530 case Iop_InterleaveLO16x4:
2531 case Iop_InterleaveLO8x8:
2532 case Iop_InterleaveHI32x2:
2533 case Iop_InterleaveHI16x4:
2534 case Iop_InterleaveHI8x8:
sewardj57f92b02010-08-22 11:54:14 +00002535 case Iop_CatOddLanes8x8:
2536 case Iop_CatEvenLanes8x8:
sewardj114a9172008-02-09 01:49:32 +00002537 case Iop_CatOddLanes16x4:
2538 case Iop_CatEvenLanes16x4:
sewardj57f92b02010-08-22 11:54:14 +00002539 case Iop_InterleaveOddLanes8x8:
2540 case Iop_InterleaveEvenLanes8x8:
2541 case Iop_InterleaveOddLanes16x4:
2542 case Iop_InterleaveEvenLanes16x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00002543 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardjacd2e912005-01-13 19:17:06 +00002544
sewardj57f92b02010-08-22 11:54:14 +00002545 case Iop_GetElem8x8:
2546 complainIfUndefined(mce, atom2);
2547 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2548 case Iop_GetElem16x4:
2549 complainIfUndefined(mce, atom2);
2550 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2551 case Iop_GetElem32x2:
2552 complainIfUndefined(mce, atom2);
2553 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2554
sewardj114a9172008-02-09 01:49:32 +00002555 /* Perm8x8: rearrange values in left arg using steering values
2556 from right arg. So rearrange the vbits in the same way but
2557 pessimise wrt steering values. */
2558 case Iop_Perm8x8:
2559 return mkUifU64(
2560 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002561 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
sewardj114a9172008-02-09 01:49:32 +00002562 mkPCast8x8(mce, vatom2)
2563 );
2564
sewardj20d38f22005-02-07 23:50:18 +00002565 /* V128-bit SIMD */
sewardj0b070592004-12-10 21:44:22 +00002566
sewardj57f92b02010-08-22 11:54:14 +00002567 case Iop_ShrN8x16:
sewardja1d93302004-12-12 16:45:06 +00002568 case Iop_ShrN16x8:
2569 case Iop_ShrN32x4:
2570 case Iop_ShrN64x2:
sewardj57f92b02010-08-22 11:54:14 +00002571 case Iop_SarN8x16:
sewardja1d93302004-12-12 16:45:06 +00002572 case Iop_SarN16x8:
2573 case Iop_SarN32x4:
sewardj57f92b02010-08-22 11:54:14 +00002574 case Iop_SarN64x2:
2575 case Iop_ShlN8x16:
sewardja1d93302004-12-12 16:45:06 +00002576 case Iop_ShlN16x8:
2577 case Iop_ShlN32x4:
2578 case Iop_ShlN64x2:
sewardj620eb5b2005-10-22 12:50:43 +00002579 /* Same scheme as with all other shifts. Note: 22 Oct 05:
2580 this is wrong now, scalar shifts are done properly lazily.
2581 Vector shifts should be fixed too. */
sewardja1d93302004-12-12 16:45:06 +00002582 complainIfUndefined(mce, atom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002583 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardja1d93302004-12-12 16:45:06 +00002584
sewardjcbf8be72005-11-10 18:34:41 +00002585 /* V x V shifts/rotates are done using the standard lazy scheme. */
sewardj43d60752005-11-10 18:13:01 +00002586 case Iop_Shl8x16:
2587 case Iop_Shr8x16:
2588 case Iop_Sar8x16:
sewardj57f92b02010-08-22 11:54:14 +00002589 case Iop_Sal8x16:
sewardjcbf8be72005-11-10 18:34:41 +00002590 case Iop_Rol8x16:
sewardj43d60752005-11-10 18:13:01 +00002591 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002592 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002593 mkPCast8x16(mce,vatom2)
2594 );
2595
2596 case Iop_Shl16x8:
2597 case Iop_Shr16x8:
2598 case Iop_Sar16x8:
sewardj57f92b02010-08-22 11:54:14 +00002599 case Iop_Sal16x8:
sewardjcbf8be72005-11-10 18:34:41 +00002600 case Iop_Rol16x8:
sewardj43d60752005-11-10 18:13:01 +00002601 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002602 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002603 mkPCast16x8(mce,vatom2)
2604 );
2605
2606 case Iop_Shl32x4:
2607 case Iop_Shr32x4:
2608 case Iop_Sar32x4:
sewardj57f92b02010-08-22 11:54:14 +00002609 case Iop_Sal32x4:
sewardjcbf8be72005-11-10 18:34:41 +00002610 case Iop_Rol32x4:
sewardj43d60752005-11-10 18:13:01 +00002611 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002612 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002613 mkPCast32x4(mce,vatom2)
2614 );
2615
sewardj57f92b02010-08-22 11:54:14 +00002616 case Iop_Shl64x2:
2617 case Iop_Shr64x2:
2618 case Iop_Sar64x2:
2619 case Iop_Sal64x2:
2620 return mkUifUV128(mce,
2621 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2622 mkPCast64x2(mce,vatom2)
2623 );
2624
2625 case Iop_F32ToFixed32Ux4_RZ:
2626 case Iop_F32ToFixed32Sx4_RZ:
2627 case Iop_Fixed32UToF32x4_RN:
2628 case Iop_Fixed32SToF32x4_RN:
2629 complainIfUndefined(mce, atom2);
2630 return mkPCast32x4(mce, vatom1);
2631
2632 case Iop_F32ToFixed32Ux2_RZ:
2633 case Iop_F32ToFixed32Sx2_RZ:
2634 case Iop_Fixed32UToF32x2_RN:
2635 case Iop_Fixed32SToF32x2_RN:
2636 complainIfUndefined(mce, atom2);
2637 return mkPCast32x2(mce, vatom1);
2638
sewardja1d93302004-12-12 16:45:06 +00002639 case Iop_QSub8Ux16:
2640 case Iop_QSub8Sx16:
2641 case Iop_Sub8x16:
2642 case Iop_Min8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002643 case Iop_Min8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002644 case Iop_Max8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002645 case Iop_Max8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002646 case Iop_CmpGT8Sx16:
sewardj43d60752005-11-10 18:13:01 +00002647 case Iop_CmpGT8Ux16:
sewardja1d93302004-12-12 16:45:06 +00002648 case Iop_CmpEQ8x16:
2649 case Iop_Avg8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002650 case Iop_Avg8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002651 case Iop_QAdd8Ux16:
2652 case Iop_QAdd8Sx16:
sewardj57f92b02010-08-22 11:54:14 +00002653 case Iop_QSal8x16:
2654 case Iop_QShl8x16:
sewardja1d93302004-12-12 16:45:06 +00002655 case Iop_Add8x16:
sewardj57f92b02010-08-22 11:54:14 +00002656 case Iop_Mul8x16:
2657 case Iop_PolynomialMul8x16:
sewardja1d93302004-12-12 16:45:06 +00002658 return binary8Ix16(mce, vatom1, vatom2);
2659
2660 case Iop_QSub16Ux8:
2661 case Iop_QSub16Sx8:
2662 case Iop_Sub16x8:
2663 case Iop_Mul16x8:
2664 case Iop_MulHi16Sx8:
2665 case Iop_MulHi16Ux8:
2666 case Iop_Min16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002667 case Iop_Min16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002668 case Iop_Max16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002669 case Iop_Max16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002670 case Iop_CmpGT16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002671 case Iop_CmpGT16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002672 case Iop_CmpEQ16x8:
2673 case Iop_Avg16Ux8:
sewardj43d60752005-11-10 18:13:01 +00002674 case Iop_Avg16Sx8:
sewardja1d93302004-12-12 16:45:06 +00002675 case Iop_QAdd16Ux8:
2676 case Iop_QAdd16Sx8:
sewardj57f92b02010-08-22 11:54:14 +00002677 case Iop_QSal16x8:
2678 case Iop_QShl16x8:
sewardja1d93302004-12-12 16:45:06 +00002679 case Iop_Add16x8:
sewardj57f92b02010-08-22 11:54:14 +00002680 case Iop_QDMulHi16Sx8:
2681 case Iop_QRDMulHi16Sx8:
sewardja1d93302004-12-12 16:45:06 +00002682 return binary16Ix8(mce, vatom1, vatom2);
2683
2684 case Iop_Sub32x4:
2685 case Iop_CmpGT32Sx4:
sewardj43d60752005-11-10 18:13:01 +00002686 case Iop_CmpGT32Ux4:
sewardja1d93302004-12-12 16:45:06 +00002687 case Iop_CmpEQ32x4:
sewardj43d60752005-11-10 18:13:01 +00002688 case Iop_QAdd32Sx4:
2689 case Iop_QAdd32Ux4:
2690 case Iop_QSub32Sx4:
2691 case Iop_QSub32Ux4:
sewardj57f92b02010-08-22 11:54:14 +00002692 case Iop_QSal32x4:
2693 case Iop_QShl32x4:
sewardj43d60752005-11-10 18:13:01 +00002694 case Iop_Avg32Ux4:
2695 case Iop_Avg32Sx4:
sewardja1d93302004-12-12 16:45:06 +00002696 case Iop_Add32x4:
sewardj43d60752005-11-10 18:13:01 +00002697 case Iop_Max32Ux4:
2698 case Iop_Max32Sx4:
2699 case Iop_Min32Ux4:
2700 case Iop_Min32Sx4:
sewardjb823b852010-06-18 08:18:38 +00002701 case Iop_Mul32x4:
sewardj57f92b02010-08-22 11:54:14 +00002702 case Iop_QDMulHi32Sx4:
2703 case Iop_QRDMulHi32Sx4:
sewardja1d93302004-12-12 16:45:06 +00002704 return binary32Ix4(mce, vatom1, vatom2);
2705
2706 case Iop_Sub64x2:
2707 case Iop_Add64x2:
sewardjb823b852010-06-18 08:18:38 +00002708 case Iop_CmpGT64Sx2:
sewardj57f92b02010-08-22 11:54:14 +00002709 case Iop_QSal64x2:
2710 case Iop_QShl64x2:
2711 case Iop_QAdd64Ux2:
2712 case Iop_QAdd64Sx2:
2713 case Iop_QSub64Ux2:
2714 case Iop_QSub64Sx2:
sewardja1d93302004-12-12 16:45:06 +00002715 return binary64Ix2(mce, vatom1, vatom2);
2716
sewardj7ee7d852011-06-16 11:37:21 +00002717 case Iop_QNarrowBin32Sto16Sx8:
2718 case Iop_QNarrowBin32Uto16Ux8:
2719 case Iop_QNarrowBin32Sto16Ux8:
2720 case Iop_QNarrowBin16Sto8Sx16:
2721 case Iop_QNarrowBin16Uto8Ux16:
2722 case Iop_QNarrowBin16Sto8Ux16:
2723 return vectorNarrowBinV128(mce, op, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002724
sewardj0b070592004-12-10 21:44:22 +00002725 case Iop_Sub64Fx2:
2726 case Iop_Mul64Fx2:
2727 case Iop_Min64Fx2:
2728 case Iop_Max64Fx2:
2729 case Iop_Div64Fx2:
2730 case Iop_CmpLT64Fx2:
2731 case Iop_CmpLE64Fx2:
2732 case Iop_CmpEQ64Fx2:
sewardj545663e2005-11-05 01:55:04 +00002733 case Iop_CmpUN64Fx2:
sewardj0b070592004-12-10 21:44:22 +00002734 case Iop_Add64Fx2:
2735 return binary64Fx2(mce, vatom1, vatom2);
2736
2737 case Iop_Sub64F0x2:
2738 case Iop_Mul64F0x2:
2739 case Iop_Min64F0x2:
2740 case Iop_Max64F0x2:
2741 case Iop_Div64F0x2:
2742 case Iop_CmpLT64F0x2:
2743 case Iop_CmpLE64F0x2:
2744 case Iop_CmpEQ64F0x2:
sewardj545663e2005-11-05 01:55:04 +00002745 case Iop_CmpUN64F0x2:
sewardj0b070592004-12-10 21:44:22 +00002746 case Iop_Add64F0x2:
2747 return binary64F0x2(mce, vatom1, vatom2);
2748
sewardj170ee212004-12-10 18:57:51 +00002749 case Iop_Sub32Fx4:
2750 case Iop_Mul32Fx4:
2751 case Iop_Min32Fx4:
2752 case Iop_Max32Fx4:
2753 case Iop_Div32Fx4:
2754 case Iop_CmpLT32Fx4:
2755 case Iop_CmpLE32Fx4:
2756 case Iop_CmpEQ32Fx4:
sewardj545663e2005-11-05 01:55:04 +00002757 case Iop_CmpUN32Fx4:
cerione78ba2a2005-11-14 03:00:35 +00002758 case Iop_CmpGT32Fx4:
2759 case Iop_CmpGE32Fx4:
sewardj3245c912004-12-10 14:58:26 +00002760 case Iop_Add32Fx4:
sewardj57f92b02010-08-22 11:54:14 +00002761 case Iop_Recps32Fx4:
2762 case Iop_Rsqrts32Fx4:
sewardj3245c912004-12-10 14:58:26 +00002763 return binary32Fx4(mce, vatom1, vatom2);
2764
sewardj57f92b02010-08-22 11:54:14 +00002765 case Iop_Sub32Fx2:
2766 case Iop_Mul32Fx2:
2767 case Iop_Min32Fx2:
2768 case Iop_Max32Fx2:
2769 case Iop_CmpEQ32Fx2:
2770 case Iop_CmpGT32Fx2:
2771 case Iop_CmpGE32Fx2:
2772 case Iop_Add32Fx2:
2773 case Iop_Recps32Fx2:
2774 case Iop_Rsqrts32Fx2:
2775 return binary32Fx2(mce, vatom1, vatom2);
2776
sewardj170ee212004-12-10 18:57:51 +00002777 case Iop_Sub32F0x4:
2778 case Iop_Mul32F0x4:
2779 case Iop_Min32F0x4:
2780 case Iop_Max32F0x4:
2781 case Iop_Div32F0x4:
2782 case Iop_CmpLT32F0x4:
2783 case Iop_CmpLE32F0x4:
2784 case Iop_CmpEQ32F0x4:
sewardj545663e2005-11-05 01:55:04 +00002785 case Iop_CmpUN32F0x4:
sewardj170ee212004-12-10 18:57:51 +00002786 case Iop_Add32F0x4:
2787 return binary32F0x4(mce, vatom1, vatom2);
2788
sewardj57f92b02010-08-22 11:54:14 +00002789 case Iop_QShlN8Sx16:
2790 case Iop_QShlN8x16:
2791 case Iop_QSalN8x16:
2792 complainIfUndefined(mce, atom2);
2793 return mkPCast8x16(mce, vatom1);
2794
2795 case Iop_QShlN16Sx8:
2796 case Iop_QShlN16x8:
2797 case Iop_QSalN16x8:
2798 complainIfUndefined(mce, atom2);
2799 return mkPCast16x8(mce, vatom1);
2800
2801 case Iop_QShlN32Sx4:
2802 case Iop_QShlN32x4:
2803 case Iop_QSalN32x4:
2804 complainIfUndefined(mce, atom2);
2805 return mkPCast32x4(mce, vatom1);
2806
2807 case Iop_QShlN64Sx2:
2808 case Iop_QShlN64x2:
2809 case Iop_QSalN64x2:
2810 complainIfUndefined(mce, atom2);
2811 return mkPCast32x4(mce, vatom1);
2812
2813 case Iop_Mull32Sx2:
2814 case Iop_Mull32Ux2:
2815 case Iop_QDMulLong32Sx2:
sewardj7ee7d852011-06-16 11:37:21 +00002816 return vectorWidenI64(mce, Iop_Widen32Sto64x2,
2817 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00002818
2819 case Iop_Mull16Sx4:
2820 case Iop_Mull16Ux4:
2821 case Iop_QDMulLong16Sx4:
sewardj7ee7d852011-06-16 11:37:21 +00002822 return vectorWidenI64(mce, Iop_Widen16Sto32x4,
2823 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00002824
2825 case Iop_Mull8Sx8:
2826 case Iop_Mull8Ux8:
2827 case Iop_PolynomialMull8x8:
sewardj7ee7d852011-06-16 11:37:21 +00002828 return vectorWidenI64(mce, Iop_Widen8Sto16x8,
2829 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00002830
2831 case Iop_PwAdd32x4:
2832 return mkPCast32x4(mce,
2833 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
2834 mkPCast32x4(mce, vatom2))));
2835
2836 case Iop_PwAdd16x8:
2837 return mkPCast16x8(mce,
2838 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
2839 mkPCast16x8(mce, vatom2))));
2840
2841 case Iop_PwAdd8x16:
2842 return mkPCast8x16(mce,
2843 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
2844 mkPCast8x16(mce, vatom2))));
2845
sewardj20d38f22005-02-07 23:50:18 +00002846 /* V128-bit data-steering */
2847 case Iop_SetV128lo32:
2848 case Iop_SetV128lo64:
2849 case Iop_64HLtoV128:
sewardja1d93302004-12-12 16:45:06 +00002850 case Iop_InterleaveLO64x2:
2851 case Iop_InterleaveLO32x4:
2852 case Iop_InterleaveLO16x8:
2853 case Iop_InterleaveLO8x16:
2854 case Iop_InterleaveHI64x2:
2855 case Iop_InterleaveHI32x4:
2856 case Iop_InterleaveHI16x8:
2857 case Iop_InterleaveHI8x16:
sewardj57f92b02010-08-22 11:54:14 +00002858 case Iop_CatOddLanes8x16:
2859 case Iop_CatOddLanes16x8:
2860 case Iop_CatOddLanes32x4:
2861 case Iop_CatEvenLanes8x16:
2862 case Iop_CatEvenLanes16x8:
2863 case Iop_CatEvenLanes32x4:
2864 case Iop_InterleaveOddLanes8x16:
2865 case Iop_InterleaveOddLanes16x8:
2866 case Iop_InterleaveOddLanes32x4:
2867 case Iop_InterleaveEvenLanes8x16:
2868 case Iop_InterleaveEvenLanes16x8:
2869 case Iop_InterleaveEvenLanes32x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00002870 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00002871
2872 case Iop_GetElem8x16:
2873 complainIfUndefined(mce, atom2);
2874 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2875 case Iop_GetElem16x8:
2876 complainIfUndefined(mce, atom2);
2877 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2878 case Iop_GetElem32x4:
2879 complainIfUndefined(mce, atom2);
2880 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2881 case Iop_GetElem64x2:
2882 complainIfUndefined(mce, atom2);
2883 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
2884
sewardj620eb5b2005-10-22 12:50:43 +00002885 /* Perm8x16: rearrange values in left arg using steering values
2886 from right arg. So rearrange the vbits in the same way but
2887 pessimise wrt steering values. */
2888 case Iop_Perm8x16:
2889 return mkUifUV128(
2890 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002891 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj620eb5b2005-10-22 12:50:43 +00002892 mkPCast8x16(mce, vatom2)
2893 );
sewardj170ee212004-12-10 18:57:51 +00002894
sewardj43d60752005-11-10 18:13:01 +00002895 /* These two take the lower half of each 16-bit lane, sign/zero
2896 extend it to 32, and multiply together, producing a 32x4
2897 result (and implicitly ignoring half the operand bits). So
2898 treat it as a bunch of independent 16x8 operations, but then
2899 do 32-bit shifts left-right to copy the lower half results
2900 (which are all 0s or all 1s due to PCasting in binary16Ix8)
2901 into the upper half of each result lane. */
2902 case Iop_MullEven16Ux8:
2903 case Iop_MullEven16Sx8: {
2904 IRAtom* at;
2905 at = binary16Ix8(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002906 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
2907 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
sewardj43d60752005-11-10 18:13:01 +00002908 return at;
2909 }
2910
2911 /* Same deal as Iop_MullEven16{S,U}x8 */
2912 case Iop_MullEven8Ux16:
2913 case Iop_MullEven8Sx16: {
2914 IRAtom* at;
2915 at = binary8Ix16(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002916 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
2917 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
sewardj43d60752005-11-10 18:13:01 +00002918 return at;
2919 }
2920
2921 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
2922 32x4 -> 16x8 laneage, discarding the upper half of each lane.
2923 Simply apply same op to the V bits, since this really no more
2924 than a data steering operation. */
sewardj7ee7d852011-06-16 11:37:21 +00002925 case Iop_NarrowBin32to16x8:
2926 case Iop_NarrowBin16to8x16:
sewardj7cf4e6b2008-05-01 20:24:26 +00002927 return assignNew('V', mce, Ity_V128,
2928 binop(op, vatom1, vatom2));
sewardj43d60752005-11-10 18:13:01 +00002929
2930 case Iop_ShrV128:
2931 case Iop_ShlV128:
2932 /* Same scheme as with all other shifts. Note: 10 Nov 05:
2933 this is wrong now, scalar shifts are done properly lazily.
2934 Vector shifts should be fixed too. */
2935 complainIfUndefined(mce, atom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00002936 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardj43d60752005-11-10 18:13:01 +00002937
sewardj69a13322005-04-23 01:14:51 +00002938 /* I128-bit data-steering */
2939 case Iop_64HLto128:
sewardj7cf4e6b2008-05-01 20:24:26 +00002940 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
sewardj69a13322005-04-23 01:14:51 +00002941
sewardj3245c912004-12-10 14:58:26 +00002942 /* Scalar floating point */
2943
sewardjb5b87402011-03-07 16:05:35 +00002944 case Iop_F32toI64S:
2945 /* I32(rm) x F32 -> I64 */
2946 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2947
2948 case Iop_I64StoF32:
2949 /* I32(rm) x I64 -> F32 */
2950 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2951
sewardjed69fdb2006-02-03 16:12:27 +00002952 case Iop_RoundF64toInt:
2953 case Iop_RoundF64toF32:
sewardj06f96d02009-12-31 19:24:12 +00002954 case Iop_F64toI64S:
2955 case Iop_I64StoF64:
sewardjf34eb492011-04-15 11:57:05 +00002956 case Iop_I64UtoF64:
sewardj22ac5f42006-02-03 22:55:04 +00002957 case Iop_SinF64:
2958 case Iop_CosF64:
2959 case Iop_TanF64:
2960 case Iop_2xm1F64:
2961 case Iop_SqrtF64:
2962 /* I32(rm) x I64/F64 -> I64/F64 */
sewardj95448072004-11-22 20:19:51 +00002963 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2964
sewardjd376a762010-06-27 09:08:54 +00002965 case Iop_RoundF32toInt:
sewardjaec1be32010-01-03 22:29:32 +00002966 case Iop_SqrtF32:
2967 /* I32(rm) x I32/F32 -> I32/F32 */
2968 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2969
sewardjb5b87402011-03-07 16:05:35 +00002970 case Iop_SqrtF128:
2971 /* I32(rm) x F128 -> F128 */
2972 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
2973
2974 case Iop_I32StoF32:
2975 case Iop_F32toI32S:
2976 /* First arg is I32 (rounding mode), second is F32/I32 (data). */
2977 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2978
2979 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */
2980 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */
2981 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2982
2983 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */
2984 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */
2985 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2986
2987 case Iop_F64HLtoF128:
2988 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, vatom1, vatom2));
2989
sewardj59570ff2010-01-01 11:59:33 +00002990 case Iop_F64toI32U:
sewardj06f96d02009-12-31 19:24:12 +00002991 case Iop_F64toI32S:
sewardje9e16d32004-12-10 13:17:55 +00002992 case Iop_F64toF32:
sewardjf34eb492011-04-15 11:57:05 +00002993 case Iop_I64UtoF32:
sewardj95448072004-11-22 20:19:51 +00002994 /* First arg is I32 (rounding mode), second is F64 (data). */
2995 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2996
sewardj06f96d02009-12-31 19:24:12 +00002997 case Iop_F64toI16S:
sewardj95448072004-11-22 20:19:51 +00002998 /* First arg is I32 (rounding mode), second is F64 (data). */
2999 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3000
sewardjb5b87402011-03-07 16:05:35 +00003001 case Iop_CmpF32:
sewardj95448072004-11-22 20:19:51 +00003002 case Iop_CmpF64:
sewardjb5b87402011-03-07 16:05:35 +00003003 case Iop_CmpF128:
sewardj95448072004-11-22 20:19:51 +00003004 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3005
3006 /* non-FP after here */
3007
3008 case Iop_DivModU64to32:
3009 case Iop_DivModS64to32:
3010 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3011
sewardj69a13322005-04-23 01:14:51 +00003012 case Iop_DivModU128to64:
3013 case Iop_DivModS128to64:
3014 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3015
sewardj95448072004-11-22 20:19:51 +00003016 case Iop_16HLto32:
sewardj7cf4e6b2008-05-01 20:24:26 +00003017 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003018 case Iop_32HLto64:
sewardj7cf4e6b2008-05-01 20:24:26 +00003019 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003020
sewardjb5b87402011-03-07 16:05:35 +00003021 case Iop_DivModS64to64:
sewardj6cf40ff2005-04-20 22:31:26 +00003022 case Iop_MullS64:
3023 case Iop_MullU64: {
3024 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3025 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
sewardj7cf4e6b2008-05-01 20:24:26 +00003026 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, vHi64, vLo64));
sewardj6cf40ff2005-04-20 22:31:26 +00003027 }
3028
sewardj95448072004-11-22 20:19:51 +00003029 case Iop_MullS32:
3030 case Iop_MullU32: {
3031 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3032 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
sewardj7cf4e6b2008-05-01 20:24:26 +00003033 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
sewardj95448072004-11-22 20:19:51 +00003034 }
3035
3036 case Iop_MullS16:
3037 case Iop_MullU16: {
3038 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3039 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
sewardj7cf4e6b2008-05-01 20:24:26 +00003040 return assignNew('V', mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
sewardj95448072004-11-22 20:19:51 +00003041 }
3042
3043 case Iop_MullS8:
3044 case Iop_MullU8: {
3045 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3046 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
sewardj7cf4e6b2008-05-01 20:24:26 +00003047 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
sewardj95448072004-11-22 20:19:51 +00003048 }
3049
sewardj5af05062010-10-18 16:31:14 +00003050 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */
cerion9e591082005-06-23 15:28:34 +00003051 case Iop_DivS32:
3052 case Iop_DivU32:
3053 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3054
sewardjb00944a2005-12-23 12:47:16 +00003055 case Iop_DivS64:
3056 case Iop_DivU64:
3057 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3058
sewardj95448072004-11-22 20:19:51 +00003059 case Iop_Add32:
sewardjd5204dc2004-12-31 01:16:11 +00003060 if (mce->bogusLiterals)
3061 return expensiveAddSub(mce,True,Ity_I32,
3062 vatom1,vatom2, atom1,atom2);
3063 else
3064 goto cheap_AddSub32;
sewardj95448072004-11-22 20:19:51 +00003065 case Iop_Sub32:
sewardjd5204dc2004-12-31 01:16:11 +00003066 if (mce->bogusLiterals)
3067 return expensiveAddSub(mce,False,Ity_I32,
3068 vatom1,vatom2, atom1,atom2);
3069 else
3070 goto cheap_AddSub32;
3071
3072 cheap_AddSub32:
sewardj95448072004-11-22 20:19:51 +00003073 case Iop_Mul32:
sewardj992dff92005-10-07 11:08:55 +00003074 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3075
sewardj463b3d92005-07-18 11:41:15 +00003076 case Iop_CmpORD32S:
3077 case Iop_CmpORD32U:
sewardj1bc82102005-12-23 00:16:24 +00003078 case Iop_CmpORD64S:
3079 case Iop_CmpORD64U:
3080 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
sewardj95448072004-11-22 20:19:51 +00003081
sewardj681be302005-01-15 20:43:58 +00003082 case Iop_Add64:
tomd9774d72005-06-27 08:11:01 +00003083 if (mce->bogusLiterals)
3084 return expensiveAddSub(mce,True,Ity_I64,
3085 vatom1,vatom2, atom1,atom2);
3086 else
3087 goto cheap_AddSub64;
sewardj681be302005-01-15 20:43:58 +00003088 case Iop_Sub64:
tomd9774d72005-06-27 08:11:01 +00003089 if (mce->bogusLiterals)
3090 return expensiveAddSub(mce,False,Ity_I64,
3091 vatom1,vatom2, atom1,atom2);
3092 else
3093 goto cheap_AddSub64;
3094
3095 cheap_AddSub64:
3096 case Iop_Mul64:
sewardj681be302005-01-15 20:43:58 +00003097 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3098
sewardj95448072004-11-22 20:19:51 +00003099 case Iop_Mul16:
3100 case Iop_Add16:
3101 case Iop_Sub16:
3102 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3103
3104 case Iop_Sub8:
3105 case Iop_Add8:
3106 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3107
sewardj69a13322005-04-23 01:14:51 +00003108 case Iop_CmpEQ64:
sewardje6f8af42005-07-06 18:48:59 +00003109 case Iop_CmpNE64:
sewardj69a13322005-04-23 01:14:51 +00003110 if (mce->bogusLiterals)
3111 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
3112 else
3113 goto cheap_cmp64;
3114 cheap_cmp64:
tomcd986332005-04-26 07:44:48 +00003115 case Iop_CmpLE64S: case Iop_CmpLE64U:
3116 case Iop_CmpLT64U: case Iop_CmpLT64S:
sewardj69a13322005-04-23 01:14:51 +00003117 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
3118
sewardjd5204dc2004-12-31 01:16:11 +00003119 case Iop_CmpEQ32:
sewardje6f8af42005-07-06 18:48:59 +00003120 case Iop_CmpNE32:
sewardjd5204dc2004-12-31 01:16:11 +00003121 if (mce->bogusLiterals)
3122 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
3123 else
3124 goto cheap_cmp32;
sewardjd5204dc2004-12-31 01:16:11 +00003125 cheap_cmp32:
sewardj95448072004-11-22 20:19:51 +00003126 case Iop_CmpLE32S: case Iop_CmpLE32U:
3127 case Iop_CmpLT32U: case Iop_CmpLT32S:
sewardj95448072004-11-22 20:19:51 +00003128 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
3129
3130 case Iop_CmpEQ16: case Iop_CmpNE16:
3131 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
3132
3133 case Iop_CmpEQ8: case Iop_CmpNE8:
3134 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
3135
sewardjafed4c52009-07-12 13:00:17 +00003136 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
3137 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
3138 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
3139 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
3140 /* Just say these all produce a defined result, regardless
3141 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
3142 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
3143
sewardjaaddbc22005-10-07 09:49:53 +00003144 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
3145 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
3146
sewardj95448072004-11-22 20:19:51 +00003147 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
sewardjaaddbc22005-10-07 09:49:53 +00003148 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003149
sewardjdb67f5f2004-12-14 01:15:31 +00003150 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
sewardjaaddbc22005-10-07 09:49:53 +00003151 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003152
3153 case Iop_Shl8: case Iop_Shr8:
sewardjaaddbc22005-10-07 09:49:53 +00003154 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003155
sewardj20d38f22005-02-07 23:50:18 +00003156 case Iop_AndV128:
3157 uifu = mkUifUV128; difd = mkDifDV128;
3158 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00003159 case Iop_And64:
3160 uifu = mkUifU64; difd = mkDifD64;
3161 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00003162 case Iop_And32:
3163 uifu = mkUifU32; difd = mkDifD32;
3164 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
3165 case Iop_And16:
3166 uifu = mkUifU16; difd = mkDifD16;
3167 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
3168 case Iop_And8:
3169 uifu = mkUifU8; difd = mkDifD8;
3170 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
3171
sewardj20d38f22005-02-07 23:50:18 +00003172 case Iop_OrV128:
3173 uifu = mkUifUV128; difd = mkDifDV128;
3174 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00003175 case Iop_Or64:
3176 uifu = mkUifU64; difd = mkDifD64;
3177 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00003178 case Iop_Or32:
3179 uifu = mkUifU32; difd = mkDifD32;
3180 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
3181 case Iop_Or16:
3182 uifu = mkUifU16; difd = mkDifD16;
3183 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
3184 case Iop_Or8:
3185 uifu = mkUifU8; difd = mkDifD8;
3186 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
3187
3188 do_And_Or:
3189 return
3190 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00003191 'V', mce,
sewardj95448072004-11-22 20:19:51 +00003192 and_or_ty,
3193 difd(mce, uifu(mce, vatom1, vatom2),
3194 difd(mce, improve(mce, atom1, vatom1),
3195 improve(mce, atom2, vatom2) ) ) );
3196
3197 case Iop_Xor8:
3198 return mkUifU8(mce, vatom1, vatom2);
3199 case Iop_Xor16:
3200 return mkUifU16(mce, vatom1, vatom2);
3201 case Iop_Xor32:
3202 return mkUifU32(mce, vatom1, vatom2);
sewardj7010f6e2004-12-10 13:35:22 +00003203 case Iop_Xor64:
3204 return mkUifU64(mce, vatom1, vatom2);
sewardj20d38f22005-02-07 23:50:18 +00003205 case Iop_XorV128:
3206 return mkUifUV128(mce, vatom1, vatom2);
njn25e49d8e72002-09-23 09:36:25 +00003207
3208 default:
sewardj95448072004-11-22 20:19:51 +00003209 ppIROp(op);
3210 VG_(tool_panic)("memcheck:expr2vbits_Binop");
njn25e49d8e72002-09-23 09:36:25 +00003211 }
njn25e49d8e72002-09-23 09:36:25 +00003212}
3213
njn25e49d8e72002-09-23 09:36:25 +00003214
sewardj95448072004-11-22 20:19:51 +00003215static
3216IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
3217{
3218 IRAtom* vatom = expr2vbits( mce, atom );
3219 tl_assert(isOriginalAtom(mce,atom));
3220 switch (op) {
3221
sewardj0b070592004-12-10 21:44:22 +00003222 case Iop_Sqrt64Fx2:
3223 return unary64Fx2(mce, vatom);
3224
3225 case Iop_Sqrt64F0x2:
3226 return unary64F0x2(mce, vatom);
3227
sewardj170ee212004-12-10 18:57:51 +00003228 case Iop_Sqrt32Fx4:
3229 case Iop_RSqrt32Fx4:
3230 case Iop_Recip32Fx4:
cerion176cb4c2005-11-16 17:21:49 +00003231 case Iop_I32UtoFx4:
3232 case Iop_I32StoFx4:
3233 case Iop_QFtoI32Ux4_RZ:
3234 case Iop_QFtoI32Sx4_RZ:
3235 case Iop_RoundF32x4_RM:
3236 case Iop_RoundF32x4_RP:
3237 case Iop_RoundF32x4_RN:
3238 case Iop_RoundF32x4_RZ:
sewardj57f92b02010-08-22 11:54:14 +00003239 case Iop_Recip32x4:
3240 case Iop_Abs32Fx4:
3241 case Iop_Neg32Fx4:
3242 case Iop_Rsqrte32Fx4:
sewardj170ee212004-12-10 18:57:51 +00003243 return unary32Fx4(mce, vatom);
3244
sewardj57f92b02010-08-22 11:54:14 +00003245 case Iop_I32UtoFx2:
3246 case Iop_I32StoFx2:
3247 case Iop_Recip32Fx2:
3248 case Iop_Recip32x2:
3249 case Iop_Abs32Fx2:
3250 case Iop_Neg32Fx2:
3251 case Iop_Rsqrte32Fx2:
3252 return unary32Fx2(mce, vatom);
3253
sewardj170ee212004-12-10 18:57:51 +00003254 case Iop_Sqrt32F0x4:
3255 case Iop_RSqrt32F0x4:
3256 case Iop_Recip32F0x4:
3257 return unary32F0x4(mce, vatom);
3258
sewardj20d38f22005-02-07 23:50:18 +00003259 case Iop_32UtoV128:
3260 case Iop_64UtoV128:
sewardj620eb5b2005-10-22 12:50:43 +00003261 case Iop_Dup8x16:
3262 case Iop_Dup16x8:
3263 case Iop_Dup32x4:
sewardj57f92b02010-08-22 11:54:14 +00003264 case Iop_Reverse16_8x16:
3265 case Iop_Reverse32_8x16:
3266 case Iop_Reverse32_16x8:
3267 case Iop_Reverse64_8x16:
3268 case Iop_Reverse64_16x8:
3269 case Iop_Reverse64_32x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00003270 return assignNew('V', mce, Ity_V128, unop(op, vatom));
sewardj170ee212004-12-10 18:57:51 +00003271
sewardjb5b87402011-03-07 16:05:35 +00003272 case Iop_F128HItoF64: /* F128 -> high half of F128 */
3273 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
3274 case Iop_F128LOtoF64: /* F128 -> low half of F128 */
3275 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
3276
3277 case Iop_NegF128:
3278 case Iop_AbsF128:
3279 return mkPCastTo(mce, Ity_I128, vatom);
3280
3281 case Iop_I32StoF128: /* signed I32 -> F128 */
3282 case Iop_I64StoF128: /* signed I64 -> F128 */
3283 case Iop_F32toF128: /* F32 -> F128 */
3284 case Iop_F64toF128: /* F64 -> F128 */
3285 return mkPCastTo(mce, Ity_I128, vatom);
3286
sewardj95448072004-11-22 20:19:51 +00003287 case Iop_F32toF64:
sewardj06f96d02009-12-31 19:24:12 +00003288 case Iop_I32StoF64:
sewardj59570ff2010-01-01 11:59:33 +00003289 case Iop_I32UtoF64:
sewardj95448072004-11-22 20:19:51 +00003290 case Iop_NegF64:
sewardj95448072004-11-22 20:19:51 +00003291 case Iop_AbsF64:
sewardj4f3e0f22006-01-27 15:11:31 +00003292 case Iop_Est5FRSqrt:
sewardjdead90a2008-08-08 08:38:23 +00003293 case Iop_RoundF64toF64_NEAREST:
3294 case Iop_RoundF64toF64_NegINF:
3295 case Iop_RoundF64toF64_PosINF:
3296 case Iop_RoundF64toF64_ZERO:
sewardj39cc7352005-06-09 21:31:55 +00003297 case Iop_Clz64:
3298 case Iop_Ctz64:
sewardj95448072004-11-22 20:19:51 +00003299 return mkPCastTo(mce, Ity_I64, vatom);
3300
sewardj95448072004-11-22 20:19:51 +00003301 case Iop_Clz32:
3302 case Iop_Ctz32:
sewardjed69fdb2006-02-03 16:12:27 +00003303 case Iop_TruncF64asF32:
sewardj59570ff2010-01-01 11:59:33 +00003304 case Iop_NegF32:
3305 case Iop_AbsF32:
sewardj95448072004-11-22 20:19:51 +00003306 return mkPCastTo(mce, Ity_I32, vatom);
3307
sewardjd9dbc192005-04-27 11:40:27 +00003308 case Iop_1Uto64:
3309 case Iop_8Uto64:
3310 case Iop_8Sto64:
3311 case Iop_16Uto64:
3312 case Iop_16Sto64:
sewardj95448072004-11-22 20:19:51 +00003313 case Iop_32Sto64:
3314 case Iop_32Uto64:
sewardj20d38f22005-02-07 23:50:18 +00003315 case Iop_V128to64:
3316 case Iop_V128HIto64:
sewardj6cf40ff2005-04-20 22:31:26 +00003317 case Iop_128HIto64:
3318 case Iop_128to64:
sewardj57f92b02010-08-22 11:54:14 +00003319 case Iop_Dup8x8:
3320 case Iop_Dup16x4:
3321 case Iop_Dup32x2:
3322 case Iop_Reverse16_8x8:
3323 case Iop_Reverse32_8x8:
3324 case Iop_Reverse32_16x4:
3325 case Iop_Reverse64_8x8:
3326 case Iop_Reverse64_16x4:
3327 case Iop_Reverse64_32x2:
sewardj7cf4e6b2008-05-01 20:24:26 +00003328 return assignNew('V', mce, Ity_I64, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003329
sewardjb5b87402011-03-07 16:05:35 +00003330 case Iop_I16StoF32:
sewardj95448072004-11-22 20:19:51 +00003331 case Iop_64to32:
3332 case Iop_64HIto32:
3333 case Iop_1Uto32:
sewardj463b3d92005-07-18 11:41:15 +00003334 case Iop_1Sto32:
sewardj95448072004-11-22 20:19:51 +00003335 case Iop_8Uto32:
3336 case Iop_16Uto32:
3337 case Iop_16Sto32:
3338 case Iop_8Sto32:
cerionfafaa0d2005-09-12 22:29:38 +00003339 case Iop_V128to32:
sewardj7cf4e6b2008-05-01 20:24:26 +00003340 return assignNew('V', mce, Ity_I32, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003341
3342 case Iop_8Sto16:
3343 case Iop_8Uto16:
3344 case Iop_32to16:
3345 case Iop_32HIto16:
sewardjd9dbc192005-04-27 11:40:27 +00003346 case Iop_64to16:
sewardj7cf4e6b2008-05-01 20:24:26 +00003347 return assignNew('V', mce, Ity_I16, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003348
3349 case Iop_1Uto8:
3350 case Iop_16to8:
sewardj9a807e02006-12-17 14:20:31 +00003351 case Iop_16HIto8:
sewardj95448072004-11-22 20:19:51 +00003352 case Iop_32to8:
sewardjd9dbc192005-04-27 11:40:27 +00003353 case Iop_64to8:
sewardj7cf4e6b2008-05-01 20:24:26 +00003354 return assignNew('V', mce, Ity_I8, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003355
3356 case Iop_32to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00003357 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
sewardj95448072004-11-22 20:19:51 +00003358
sewardjd9dbc192005-04-27 11:40:27 +00003359 case Iop_64to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00003360 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
sewardjd9dbc192005-04-27 11:40:27 +00003361
sewardj95448072004-11-22 20:19:51 +00003362 case Iop_ReinterpF64asI64:
3363 case Iop_ReinterpI64asF64:
sewardj0b070592004-12-10 21:44:22 +00003364 case Iop_ReinterpI32asF32:
sewardj59570ff2010-01-01 11:59:33 +00003365 case Iop_ReinterpF32asI32:
sewardj20d38f22005-02-07 23:50:18 +00003366 case Iop_NotV128:
sewardj7010f6e2004-12-10 13:35:22 +00003367 case Iop_Not64:
sewardj95448072004-11-22 20:19:51 +00003368 case Iop_Not32:
3369 case Iop_Not16:
3370 case Iop_Not8:
3371 case Iop_Not1:
3372 return vatom;
sewardj7010f6e2004-12-10 13:35:22 +00003373
sewardj57f92b02010-08-22 11:54:14 +00003374 case Iop_CmpNEZ8x8:
3375 case Iop_Cnt8x8:
3376 case Iop_Clz8Sx8:
3377 case Iop_Cls8Sx8:
3378 case Iop_Abs8x8:
3379 return mkPCast8x8(mce, vatom);
3380
3381 case Iop_CmpNEZ8x16:
3382 case Iop_Cnt8x16:
3383 case Iop_Clz8Sx16:
3384 case Iop_Cls8Sx16:
3385 case Iop_Abs8x16:
3386 return mkPCast8x16(mce, vatom);
3387
3388 case Iop_CmpNEZ16x4:
3389 case Iop_Clz16Sx4:
3390 case Iop_Cls16Sx4:
3391 case Iop_Abs16x4:
3392 return mkPCast16x4(mce, vatom);
3393
3394 case Iop_CmpNEZ16x8:
3395 case Iop_Clz16Sx8:
3396 case Iop_Cls16Sx8:
3397 case Iop_Abs16x8:
3398 return mkPCast16x8(mce, vatom);
3399
3400 case Iop_CmpNEZ32x2:
3401 case Iop_Clz32Sx2:
3402 case Iop_Cls32Sx2:
3403 case Iop_FtoI32Ux2_RZ:
3404 case Iop_FtoI32Sx2_RZ:
3405 case Iop_Abs32x2:
3406 return mkPCast32x2(mce, vatom);
3407
3408 case Iop_CmpNEZ32x4:
3409 case Iop_Clz32Sx4:
3410 case Iop_Cls32Sx4:
3411 case Iop_FtoI32Ux4_RZ:
3412 case Iop_FtoI32Sx4_RZ:
3413 case Iop_Abs32x4:
3414 return mkPCast32x4(mce, vatom);
3415
3416 case Iop_CmpwNEZ64:
3417 return mkPCastTo(mce, Ity_I64, vatom);
3418
3419 case Iop_CmpNEZ64x2:
3420 return mkPCast64x2(mce, vatom);
3421
sewardj7ee7d852011-06-16 11:37:21 +00003422 case Iop_NarrowUn16to8x8:
3423 case Iop_NarrowUn32to16x4:
3424 case Iop_NarrowUn64to32x2:
3425 case Iop_QNarrowUn16Sto8Sx8:
3426 case Iop_QNarrowUn16Sto8Ux8:
3427 case Iop_QNarrowUn16Uto8Ux8:
3428 case Iop_QNarrowUn32Sto16Sx4:
3429 case Iop_QNarrowUn32Sto16Ux4:
3430 case Iop_QNarrowUn32Uto16Ux4:
3431 case Iop_QNarrowUn64Sto32Sx2:
3432 case Iop_QNarrowUn64Sto32Ux2:
3433 case Iop_QNarrowUn64Uto32Ux2:
3434 return vectorNarrowUnV128(mce, op, vatom);
sewardj57f92b02010-08-22 11:54:14 +00003435
sewardj7ee7d852011-06-16 11:37:21 +00003436 case Iop_Widen8Sto16x8:
3437 case Iop_Widen8Uto16x8:
3438 case Iop_Widen16Sto32x4:
3439 case Iop_Widen16Uto32x4:
3440 case Iop_Widen32Sto64x2:
3441 case Iop_Widen32Uto64x2:
3442 return vectorWidenI64(mce, op, vatom);
sewardj57f92b02010-08-22 11:54:14 +00003443
3444 case Iop_PwAddL32Ux2:
3445 case Iop_PwAddL32Sx2:
3446 return mkPCastTo(mce, Ity_I64,
3447 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
3448
3449 case Iop_PwAddL16Ux4:
3450 case Iop_PwAddL16Sx4:
3451 return mkPCast32x2(mce,
3452 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
3453
3454 case Iop_PwAddL8Ux8:
3455 case Iop_PwAddL8Sx8:
3456 return mkPCast16x4(mce,
3457 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
3458
3459 case Iop_PwAddL32Ux4:
3460 case Iop_PwAddL32Sx4:
3461 return mkPCast64x2(mce,
3462 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
3463
3464 case Iop_PwAddL16Ux8:
3465 case Iop_PwAddL16Sx8:
3466 return mkPCast32x4(mce,
3467 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
3468
3469 case Iop_PwAddL8Ux16:
3470 case Iop_PwAddL8Sx16:
3471 return mkPCast16x8(mce,
3472 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
3473
sewardjf34eb492011-04-15 11:57:05 +00003474 case Iop_I64UtoF32:
sewardj95448072004-11-22 20:19:51 +00003475 default:
3476 ppIROp(op);
3477 VG_(tool_panic)("memcheck:expr2vbits_Unop");
3478 }
3479}
3480
3481
sewardj170ee212004-12-10 18:57:51 +00003482/* Worker function; do not call directly. */
sewardj95448072004-11-22 20:19:51 +00003483static
sewardj2e595852005-06-30 23:33:37 +00003484IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
3485 IREndness end, IRType ty,
3486 IRAtom* addr, UInt bias )
sewardj95448072004-11-22 20:19:51 +00003487{
3488 void* helper;
3489 Char* hname;
3490 IRDirty* di;
3491 IRTemp datavbits;
3492 IRAtom* addrAct;
3493
3494 tl_assert(isOriginalAtom(mce,addr));
sewardj2e595852005-06-30 23:33:37 +00003495 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj95448072004-11-22 20:19:51 +00003496
3497 /* First, emit a definedness test for the address. This also sets
3498 the address (shadow) to 'defined' following the test. */
3499 complainIfUndefined( mce, addr );
3500
3501 /* Now cook up a call to the relevant helper function, to read the
3502 data V bits from shadow memory. */
sewardj7cf4e6b2008-05-01 20:24:26 +00003503 ty = shadowTypeV(ty);
sewardj2e595852005-06-30 23:33:37 +00003504
3505 if (end == Iend_LE) {
3506 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00003507 case Ity_I64: helper = &MC_(helperc_LOADV64le);
3508 hname = "MC_(helperc_LOADV64le)";
sewardj2e595852005-06-30 23:33:37 +00003509 break;
njn1d0825f2006-03-27 11:37:07 +00003510 case Ity_I32: helper = &MC_(helperc_LOADV32le);
3511 hname = "MC_(helperc_LOADV32le)";
sewardj2e595852005-06-30 23:33:37 +00003512 break;
njn1d0825f2006-03-27 11:37:07 +00003513 case Ity_I16: helper = &MC_(helperc_LOADV16le);
3514 hname = "MC_(helperc_LOADV16le)";
sewardj2e595852005-06-30 23:33:37 +00003515 break;
njn1d0825f2006-03-27 11:37:07 +00003516 case Ity_I8: helper = &MC_(helperc_LOADV8);
3517 hname = "MC_(helperc_LOADV8)";
sewardj2e595852005-06-30 23:33:37 +00003518 break;
3519 default: ppIRType(ty);
3520 VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
3521 }
3522 } else {
sewardj8cf88b72005-07-08 01:29:33 +00003523 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00003524 case Ity_I64: helper = &MC_(helperc_LOADV64be);
3525 hname = "MC_(helperc_LOADV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00003526 break;
njn1d0825f2006-03-27 11:37:07 +00003527 case Ity_I32: helper = &MC_(helperc_LOADV32be);
3528 hname = "MC_(helperc_LOADV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00003529 break;
njn1d0825f2006-03-27 11:37:07 +00003530 case Ity_I16: helper = &MC_(helperc_LOADV16be);
3531 hname = "MC_(helperc_LOADV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00003532 break;
njn1d0825f2006-03-27 11:37:07 +00003533 case Ity_I8: helper = &MC_(helperc_LOADV8);
3534 hname = "MC_(helperc_LOADV8)";
sewardj8cf88b72005-07-08 01:29:33 +00003535 break;
3536 default: ppIRType(ty);
3537 VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
3538 }
sewardj95448072004-11-22 20:19:51 +00003539 }
3540
3541 /* Generate the actual address into addrAct. */
3542 if (bias == 0) {
3543 addrAct = addr;
3544 } else {
sewardj7cf97ee2004-11-28 14:25:01 +00003545 IROp mkAdd;
3546 IRAtom* eBias;
sewardj95448072004-11-22 20:19:51 +00003547 IRType tyAddr = mce->hWordTy;
3548 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj7cf97ee2004-11-28 14:25:01 +00003549 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3550 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00003551 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
sewardj95448072004-11-22 20:19:51 +00003552 }
3553
3554 /* We need to have a place to park the V bits we're just about to
3555 read. */
sewardj1c0ce7a2009-07-01 08:10:49 +00003556 datavbits = newTemp(mce, ty, VSh);
sewardj95448072004-11-22 20:19:51 +00003557 di = unsafeIRDirty_1_N( datavbits,
sewardj53ee1fc2005-12-23 02:29:58 +00003558 1/*regparms*/,
3559 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj95448072004-11-22 20:19:51 +00003560 mkIRExprVec_1( addrAct ));
3561 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00003562 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00003563
3564 return mkexpr(datavbits);
3565}
3566
3567
3568static
sewardj2e595852005-06-30 23:33:37 +00003569IRAtom* expr2vbits_Load ( MCEnv* mce,
3570 IREndness end, IRType ty,
3571 IRAtom* addr, UInt bias )
sewardj170ee212004-12-10 18:57:51 +00003572{
3573 IRAtom *v64hi, *v64lo;
sewardj2e595852005-06-30 23:33:37 +00003574 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj7cf4e6b2008-05-01 20:24:26 +00003575 switch (shadowTypeV(ty)) {
sewardj170ee212004-12-10 18:57:51 +00003576 case Ity_I8:
3577 case Ity_I16:
3578 case Ity_I32:
3579 case Ity_I64:
sewardj2e595852005-06-30 23:33:37 +00003580 return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
sewardj170ee212004-12-10 18:57:51 +00003581 case Ity_V128:
sewardj2e595852005-06-30 23:33:37 +00003582 if (end == Iend_LE) {
3583 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
3584 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3585 } else {
sewardj2e595852005-06-30 23:33:37 +00003586 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
3587 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3588 }
sewardj7cf4e6b2008-05-01 20:24:26 +00003589 return assignNew( 'V', mce,
sewardj170ee212004-12-10 18:57:51 +00003590 Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +00003591 binop(Iop_64HLtoV128, v64hi, v64lo));
sewardj170ee212004-12-10 18:57:51 +00003592 default:
sewardj2e595852005-06-30 23:33:37 +00003593 VG_(tool_panic)("expr2vbits_Load");
sewardj170ee212004-12-10 18:57:51 +00003594 }
3595}
3596
3597
3598static
sewardj95448072004-11-22 20:19:51 +00003599IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
3600 IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
3601{
3602 IRAtom *vbitsC, *vbits0, *vbitsX;
3603 IRType ty;
3604 /* Given Mux0X(cond,expr0,exprX), generate
3605 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
3606 That is, steer the V bits like the originals, but trash the
3607 result if the steering value is undefined. This gives
3608 lazy propagation. */
3609 tl_assert(isOriginalAtom(mce, cond));
3610 tl_assert(isOriginalAtom(mce, expr0));
3611 tl_assert(isOriginalAtom(mce, exprX));
3612
3613 vbitsC = expr2vbits(mce, cond);
3614 vbits0 = expr2vbits(mce, expr0);
3615 vbitsX = expr2vbits(mce, exprX);
sewardj1c0ce7a2009-07-01 08:10:49 +00003616 ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
sewardj95448072004-11-22 20:19:51 +00003617
3618 return
sewardj7cf4e6b2008-05-01 20:24:26 +00003619 mkUifU(mce, ty, assignNew('V', mce, ty,
3620 IRExpr_Mux0X(cond, vbits0, vbitsX)),
sewardj95448072004-11-22 20:19:51 +00003621 mkPCastTo(mce, ty, vbitsC) );
3622}
3623
3624/* --------- This is the main expression-handling function. --------- */
3625
3626static
3627IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
3628{
3629 switch (e->tag) {
3630
3631 case Iex_Get:
3632 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
3633
3634 case Iex_GetI:
3635 return shadow_GETI( mce, e->Iex.GetI.descr,
3636 e->Iex.GetI.ix, e->Iex.GetI.bias );
3637
sewardj0b9d74a2006-12-24 02:24:11 +00003638 case Iex_RdTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00003639 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
sewardj95448072004-11-22 20:19:51 +00003640
3641 case Iex_Const:
sewardj1c0ce7a2009-07-01 08:10:49 +00003642 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
sewardj95448072004-11-22 20:19:51 +00003643
sewardje91cea72006-02-08 19:32:02 +00003644 case Iex_Qop:
3645 return expr2vbits_Qop(
3646 mce,
3647 e->Iex.Qop.op,
3648 e->Iex.Qop.arg1, e->Iex.Qop.arg2,
3649 e->Iex.Qop.arg3, e->Iex.Qop.arg4
3650 );
3651
sewardjed69fdb2006-02-03 16:12:27 +00003652 case Iex_Triop:
3653 return expr2vbits_Triop(
3654 mce,
3655 e->Iex.Triop.op,
3656 e->Iex.Triop.arg1, e->Iex.Triop.arg2, e->Iex.Triop.arg3
3657 );
3658
sewardj95448072004-11-22 20:19:51 +00003659 case Iex_Binop:
3660 return expr2vbits_Binop(
3661 mce,
3662 e->Iex.Binop.op,
3663 e->Iex.Binop.arg1, e->Iex.Binop.arg2
3664 );
3665
3666 case Iex_Unop:
3667 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
3668
sewardj2e595852005-06-30 23:33:37 +00003669 case Iex_Load:
3670 return expr2vbits_Load( mce, e->Iex.Load.end,
3671 e->Iex.Load.ty,
3672 e->Iex.Load.addr, 0/*addr bias*/ );
sewardj95448072004-11-22 20:19:51 +00003673
3674 case Iex_CCall:
3675 return mkLazyN( mce, e->Iex.CCall.args,
3676 e->Iex.CCall.retty,
3677 e->Iex.CCall.cee );
3678
3679 case Iex_Mux0X:
3680 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
3681 e->Iex.Mux0X.exprX);
njn25e49d8e72002-09-23 09:36:25 +00003682
3683 default:
sewardj95448072004-11-22 20:19:51 +00003684 VG_(printf)("\n");
3685 ppIRExpr(e);
3686 VG_(printf)("\n");
3687 VG_(tool_panic)("memcheck: expr2vbits");
njn25e49d8e72002-09-23 09:36:25 +00003688 }
njn25e49d8e72002-09-23 09:36:25 +00003689}
3690
3691/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003692/*--- Generate shadow stmts from all kinds of IRStmts. ---*/
njn25e49d8e72002-09-23 09:36:25 +00003693/*------------------------------------------------------------*/
3694
sewardj95448072004-11-22 20:19:51 +00003695/* Widen a value to the host word size. */
njn25e49d8e72002-09-23 09:36:25 +00003696
3697static
sewardj95448072004-11-22 20:19:51 +00003698IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
njn25e49d8e72002-09-23 09:36:25 +00003699{
sewardj7cf97ee2004-11-28 14:25:01 +00003700 IRType ty, tyH;
3701
sewardj95448072004-11-22 20:19:51 +00003702 /* vatom is vbits-value and as such can only have a shadow type. */
3703 tl_assert(isShadowAtom(mce,vatom));
njn25e49d8e72002-09-23 09:36:25 +00003704
sewardj1c0ce7a2009-07-01 08:10:49 +00003705 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj7cf97ee2004-11-28 14:25:01 +00003706 tyH = mce->hWordTy;
njn25e49d8e72002-09-23 09:36:25 +00003707
sewardj95448072004-11-22 20:19:51 +00003708 if (tyH == Ity_I32) {
3709 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00003710 case Ity_I32:
3711 return vatom;
3712 case Ity_I16:
3713 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
3714 case Ity_I8:
3715 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
3716 default:
3717 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00003718 }
sewardj6cf40ff2005-04-20 22:31:26 +00003719 } else
3720 if (tyH == Ity_I64) {
3721 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00003722 case Ity_I32:
3723 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
3724 case Ity_I16:
3725 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
3726 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
3727 case Ity_I8:
3728 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
3729 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
3730 default:
3731 goto unhandled;
sewardj6cf40ff2005-04-20 22:31:26 +00003732 }
sewardj95448072004-11-22 20:19:51 +00003733 } else {
3734 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00003735 }
sewardj95448072004-11-22 20:19:51 +00003736 unhandled:
3737 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
3738 VG_(tool_panic)("zwidenToHostWord");
njn25e49d8e72002-09-23 09:36:25 +00003739}
3740
njn25e49d8e72002-09-23 09:36:25 +00003741
sewardj95448072004-11-22 20:19:51 +00003742/* Generate a shadow store. addr is always the original address atom.
3743 You can pass in either originals or V-bits for the data atom, but
sewardj1c0ce7a2009-07-01 08:10:49 +00003744 obviously not both. guard :: Ity_I1 controls whether the store
3745 really happens; NULL means it unconditionally does. Note that
3746 guard itself is not checked for definedness; the caller of this
3747 function must do that if necessary. */
njn25e49d8e72002-09-23 09:36:25 +00003748
sewardj95448072004-11-22 20:19:51 +00003749static
sewardj2e595852005-06-30 23:33:37 +00003750void do_shadow_Store ( MCEnv* mce,
3751 IREndness end,
3752 IRAtom* addr, UInt bias,
sewardj1c0ce7a2009-07-01 08:10:49 +00003753 IRAtom* data, IRAtom* vdata,
3754 IRAtom* guard )
njn25e49d8e72002-09-23 09:36:25 +00003755{
sewardj170ee212004-12-10 18:57:51 +00003756 IROp mkAdd;
3757 IRType ty, tyAddr;
sewardj95448072004-11-22 20:19:51 +00003758 void* helper = NULL;
3759 Char* hname = NULL;
njn1d0825f2006-03-27 11:37:07 +00003760 IRConst* c;
sewardj170ee212004-12-10 18:57:51 +00003761
3762 tyAddr = mce->hWordTy;
3763 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3764 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj2e595852005-06-30 23:33:37 +00003765 tl_assert( end == Iend_LE || end == Iend_BE );
sewardj170ee212004-12-10 18:57:51 +00003766
sewardj95448072004-11-22 20:19:51 +00003767 if (data) {
3768 tl_assert(!vdata);
3769 tl_assert(isOriginalAtom(mce, data));
3770 tl_assert(bias == 0);
3771 vdata = expr2vbits( mce, data );
3772 } else {
3773 tl_assert(vdata);
3774 }
njn25e49d8e72002-09-23 09:36:25 +00003775
sewardj95448072004-11-22 20:19:51 +00003776 tl_assert(isOriginalAtom(mce,addr));
3777 tl_assert(isShadowAtom(mce,vdata));
njn25e49d8e72002-09-23 09:36:25 +00003778
sewardj1c0ce7a2009-07-01 08:10:49 +00003779 if (guard) {
3780 tl_assert(isOriginalAtom(mce, guard));
3781 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
3782 }
3783
3784 ty = typeOfIRExpr(mce->sb->tyenv, vdata);
njn25e49d8e72002-09-23 09:36:25 +00003785
njn1d0825f2006-03-27 11:37:07 +00003786 // If we're not doing undefined value checking, pretend that this value
3787 // is "all valid". That lets Vex's optimiser remove some of the V bit
3788 // shadow computation ops that precede it.
sewardj7cf4e6b2008-05-01 20:24:26 +00003789 if (MC_(clo_mc_level) == 1) {
njn1d0825f2006-03-27 11:37:07 +00003790 switch (ty) {
sewardj1c0ce7a2009-07-01 08:10:49 +00003791 case Ity_V128: // V128 weirdness
3792 c = IRConst_V128(V_BITS16_DEFINED); break;
njn1d0825f2006-03-27 11:37:07 +00003793 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
3794 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
3795 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
3796 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
3797 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
3798 }
3799 vdata = IRExpr_Const( c );
3800 }
3801
sewardj95448072004-11-22 20:19:51 +00003802 /* First, emit a definedness test for the address. This also sets
3803 the address (shadow) to 'defined' following the test. */
3804 complainIfUndefined( mce, addr );
njn25e49d8e72002-09-23 09:36:25 +00003805
sewardj170ee212004-12-10 18:57:51 +00003806 /* Now decide which helper function to call to write the data V
3807 bits into shadow memory. */
sewardj2e595852005-06-30 23:33:37 +00003808 if (end == Iend_LE) {
3809 switch (ty) {
3810 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00003811 case Ity_I64: helper = &MC_(helperc_STOREV64le);
3812 hname = "MC_(helperc_STOREV64le)";
sewardj2e595852005-06-30 23:33:37 +00003813 break;
njn1d0825f2006-03-27 11:37:07 +00003814 case Ity_I32: helper = &MC_(helperc_STOREV32le);
3815 hname = "MC_(helperc_STOREV32le)";
sewardj2e595852005-06-30 23:33:37 +00003816 break;
njn1d0825f2006-03-27 11:37:07 +00003817 case Ity_I16: helper = &MC_(helperc_STOREV16le);
3818 hname = "MC_(helperc_STOREV16le)";
sewardj2e595852005-06-30 23:33:37 +00003819 break;
njn1d0825f2006-03-27 11:37:07 +00003820 case Ity_I8: helper = &MC_(helperc_STOREV8);
3821 hname = "MC_(helperc_STOREV8)";
sewardj2e595852005-06-30 23:33:37 +00003822 break;
3823 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
3824 }
3825 } else {
sewardj8cf88b72005-07-08 01:29:33 +00003826 switch (ty) {
3827 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00003828 case Ity_I64: helper = &MC_(helperc_STOREV64be);
3829 hname = "MC_(helperc_STOREV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00003830 break;
njn1d0825f2006-03-27 11:37:07 +00003831 case Ity_I32: helper = &MC_(helperc_STOREV32be);
3832 hname = "MC_(helperc_STOREV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00003833 break;
njn1d0825f2006-03-27 11:37:07 +00003834 case Ity_I16: helper = &MC_(helperc_STOREV16be);
3835 hname = "MC_(helperc_STOREV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00003836 break;
njn1d0825f2006-03-27 11:37:07 +00003837 case Ity_I8: helper = &MC_(helperc_STOREV8);
3838 hname = "MC_(helperc_STOREV8)";
sewardj8cf88b72005-07-08 01:29:33 +00003839 break;
3840 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
3841 }
sewardj95448072004-11-22 20:19:51 +00003842 }
njn25e49d8e72002-09-23 09:36:25 +00003843
sewardj170ee212004-12-10 18:57:51 +00003844 if (ty == Ity_V128) {
3845
sewardj20d38f22005-02-07 23:50:18 +00003846 /* V128-bit case */
sewardj170ee212004-12-10 18:57:51 +00003847 /* See comment in next clause re 64-bit regparms */
sewardj2e595852005-06-30 23:33:37 +00003848 /* also, need to be careful about endianness */
3849
njn4c245e52009-03-15 23:25:38 +00003850 Int offLo64, offHi64;
3851 IRDirty *diLo64, *diHi64;
3852 IRAtom *addrLo64, *addrHi64;
3853 IRAtom *vdataLo64, *vdataHi64;
3854 IRAtom *eBiasLo64, *eBiasHi64;
3855
sewardj2e595852005-06-30 23:33:37 +00003856 if (end == Iend_LE) {
3857 offLo64 = 0;
3858 offHi64 = 8;
3859 } else {
sewardj2e595852005-06-30 23:33:37 +00003860 offLo64 = 8;
3861 offHi64 = 0;
3862 }
3863
3864 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
sewardj7cf4e6b2008-05-01 20:24:26 +00003865 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
3866 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
sewardj170ee212004-12-10 18:57:51 +00003867 diLo64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003868 1/*regparms*/,
3869 hname, VG_(fnptr_to_fnentry)( helper ),
3870 mkIRExprVec_2( addrLo64, vdataLo64 )
3871 );
sewardj2e595852005-06-30 23:33:37 +00003872 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
sewardj7cf4e6b2008-05-01 20:24:26 +00003873 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
3874 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
sewardj170ee212004-12-10 18:57:51 +00003875 diHi64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003876 1/*regparms*/,
3877 hname, VG_(fnptr_to_fnentry)( helper ),
3878 mkIRExprVec_2( addrHi64, vdataHi64 )
3879 );
sewardj1c0ce7a2009-07-01 08:10:49 +00003880 if (guard) diLo64->guard = guard;
3881 if (guard) diHi64->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00003882 setHelperAnns( mce, diLo64 );
3883 setHelperAnns( mce, diHi64 );
sewardj7cf4e6b2008-05-01 20:24:26 +00003884 stmt( 'V', mce, IRStmt_Dirty(diLo64) );
3885 stmt( 'V', mce, IRStmt_Dirty(diHi64) );
sewardj170ee212004-12-10 18:57:51 +00003886
sewardj95448072004-11-22 20:19:51 +00003887 } else {
sewardj170ee212004-12-10 18:57:51 +00003888
njn4c245e52009-03-15 23:25:38 +00003889 IRDirty *di;
3890 IRAtom *addrAct;
3891
sewardj170ee212004-12-10 18:57:51 +00003892 /* 8/16/32/64-bit cases */
3893 /* Generate the actual address into addrAct. */
3894 if (bias == 0) {
3895 addrAct = addr;
3896 } else {
njn4c245e52009-03-15 23:25:38 +00003897 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00003898 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
sewardj170ee212004-12-10 18:57:51 +00003899 }
3900
3901 if (ty == Ity_I64) {
3902 /* We can't do this with regparm 2 on 32-bit platforms, since
3903 the back ends aren't clever enough to handle 64-bit
3904 regparm args. Therefore be different. */
3905 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003906 1/*regparms*/,
3907 hname, VG_(fnptr_to_fnentry)( helper ),
3908 mkIRExprVec_2( addrAct, vdata )
3909 );
sewardj170ee212004-12-10 18:57:51 +00003910 } else {
3911 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00003912 2/*regparms*/,
3913 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj170ee212004-12-10 18:57:51 +00003914 mkIRExprVec_2( addrAct,
sewardj53ee1fc2005-12-23 02:29:58 +00003915 zwidenToHostWord( mce, vdata ))
3916 );
sewardj170ee212004-12-10 18:57:51 +00003917 }
sewardj1c0ce7a2009-07-01 08:10:49 +00003918 if (guard) di->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00003919 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00003920 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00003921 }
njn25e49d8e72002-09-23 09:36:25 +00003922
sewardj95448072004-11-22 20:19:51 +00003923}
njn25e49d8e72002-09-23 09:36:25 +00003924
njn25e49d8e72002-09-23 09:36:25 +00003925
sewardj95448072004-11-22 20:19:51 +00003926/* Do lazy pessimistic propagation through a dirty helper call, by
3927 looking at the annotations on it. This is the most complex part of
3928 Memcheck. */
njn25e49d8e72002-09-23 09:36:25 +00003929
sewardj95448072004-11-22 20:19:51 +00003930static IRType szToITy ( Int n )
3931{
3932 switch (n) {
3933 case 1: return Ity_I8;
3934 case 2: return Ity_I16;
3935 case 4: return Ity_I32;
3936 case 8: return Ity_I64;
3937 default: VG_(tool_panic)("szToITy(memcheck)");
3938 }
3939}
njn25e49d8e72002-09-23 09:36:25 +00003940
sewardj95448072004-11-22 20:19:51 +00003941static
3942void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
3943{
njn4c245e52009-03-15 23:25:38 +00003944 Int i, n, toDo, gSz, gOff;
sewardj2e595852005-06-30 23:33:37 +00003945 IRAtom *src, *here, *curr;
njn4c245e52009-03-15 23:25:38 +00003946 IRType tySrc, tyDst;
sewardj2e595852005-06-30 23:33:37 +00003947 IRTemp dst;
3948 IREndness end;
3949
3950 /* What's the native endianness? We need to know this. */
sewardj6e340c72005-07-10 00:53:42 +00003951# if defined(VG_BIGENDIAN)
sewardj2e595852005-06-30 23:33:37 +00003952 end = Iend_BE;
sewardj6e340c72005-07-10 00:53:42 +00003953# elif defined(VG_LITTLEENDIAN)
sewardj2e595852005-06-30 23:33:37 +00003954 end = Iend_LE;
3955# else
3956# error "Unknown endianness"
3957# endif
njn25e49d8e72002-09-23 09:36:25 +00003958
sewardj95448072004-11-22 20:19:51 +00003959 /* First check the guard. */
3960 complainIfUndefined(mce, d->guard);
3961
3962 /* Now round up all inputs and PCast over them. */
sewardj7cf97ee2004-11-28 14:25:01 +00003963 curr = definedOfType(Ity_I32);
sewardj95448072004-11-22 20:19:51 +00003964
3965 /* Inputs: unmasked args */
3966 for (i = 0; d->args[i]; i++) {
3967 if (d->cee->mcx_mask & (1<<i)) {
3968 /* ignore this arg */
njn25e49d8e72002-09-23 09:36:25 +00003969 } else {
sewardj95448072004-11-22 20:19:51 +00003970 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
3971 curr = mkUifU32(mce, here, curr);
njn25e49d8e72002-09-23 09:36:25 +00003972 }
3973 }
sewardj95448072004-11-22 20:19:51 +00003974
3975 /* Inputs: guest state that we read. */
3976 for (i = 0; i < d->nFxState; i++) {
3977 tl_assert(d->fxState[i].fx != Ifx_None);
3978 if (d->fxState[i].fx == Ifx_Write)
3979 continue;
sewardja7203252004-11-26 19:17:47 +00003980
3981 /* Ignore any sections marked as 'always defined'. */
3982 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
sewardje9e16d32004-12-10 13:17:55 +00003983 if (0)
sewardja7203252004-11-26 19:17:47 +00003984 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
3985 d->fxState[i].offset, d->fxState[i].size );
3986 continue;
3987 }
3988
sewardj95448072004-11-22 20:19:51 +00003989 /* This state element is read or modified. So we need to
sewardje9e16d32004-12-10 13:17:55 +00003990 consider it. If larger than 8 bytes, deal with it in 8-byte
3991 chunks. */
3992 gSz = d->fxState[i].size;
3993 gOff = d->fxState[i].offset;
3994 tl_assert(gSz > 0);
3995 while (True) {
3996 if (gSz == 0) break;
3997 n = gSz <= 8 ? gSz : 8;
3998 /* update 'curr' with UifU of the state slice
3999 gOff .. gOff+n-1 */
4000 tySrc = szToITy( n );
sewardj7cf4e6b2008-05-01 20:24:26 +00004001 src = assignNew( 'V', mce, tySrc,
4002 shadow_GET(mce, gOff, tySrc ) );
sewardje9e16d32004-12-10 13:17:55 +00004003 here = mkPCastTo( mce, Ity_I32, src );
4004 curr = mkUifU32(mce, here, curr);
4005 gSz -= n;
4006 gOff += n;
4007 }
4008
sewardj95448072004-11-22 20:19:51 +00004009 }
4010
4011 /* Inputs: memory. First set up some info needed regardless of
4012 whether we're doing reads or writes. */
sewardj95448072004-11-22 20:19:51 +00004013
4014 if (d->mFx != Ifx_None) {
4015 /* Because we may do multiple shadow loads/stores from the same
4016 base address, it's best to do a single test of its
4017 definedness right now. Post-instrumentation optimisation
4018 should remove all but this test. */
njn4c245e52009-03-15 23:25:38 +00004019 IRType tyAddr;
sewardj95448072004-11-22 20:19:51 +00004020 tl_assert(d->mAddr);
4021 complainIfUndefined(mce, d->mAddr);
4022
sewardj1c0ce7a2009-07-01 08:10:49 +00004023 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
sewardj95448072004-11-22 20:19:51 +00004024 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
4025 tl_assert(tyAddr == mce->hWordTy); /* not really right */
4026 }
4027
4028 /* Deal with memory inputs (reads or modifies) */
4029 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00004030 toDo = d->mSize;
sewardj2e595852005-06-30 23:33:37 +00004031 /* chew off 32-bit chunks. We don't care about the endianness
4032 since it's all going to be condensed down to a single bit,
4033 but nevertheless choose an endianness which is hopefully
4034 native to the platform. */
sewardj95448072004-11-22 20:19:51 +00004035 while (toDo >= 4) {
4036 here = mkPCastTo(
4037 mce, Ity_I32,
sewardj2e595852005-06-30 23:33:37 +00004038 expr2vbits_Load ( mce, end, Ity_I32,
sewardj95448072004-11-22 20:19:51 +00004039 d->mAddr, d->mSize - toDo )
4040 );
4041 curr = mkUifU32(mce, here, curr);
4042 toDo -= 4;
4043 }
4044 /* chew off 16-bit chunks */
4045 while (toDo >= 2) {
4046 here = mkPCastTo(
4047 mce, Ity_I32,
sewardj2e595852005-06-30 23:33:37 +00004048 expr2vbits_Load ( mce, end, Ity_I16,
sewardj95448072004-11-22 20:19:51 +00004049 d->mAddr, d->mSize - toDo )
4050 );
4051 curr = mkUifU32(mce, here, curr);
4052 toDo -= 2;
4053 }
4054 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
4055 }
4056
4057 /* Whew! So curr is a 32-bit V-value summarising pessimistically
4058 all the inputs to the helper. Now we need to re-distribute the
4059 results to all destinations. */
4060
4061 /* Outputs: the destination temporary, if there is one. */
4062 if (d->tmp != IRTemp_INVALID) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004063 dst = findShadowTmpV(mce, d->tmp);
sewardj1c0ce7a2009-07-01 08:10:49 +00004064 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00004065 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
sewardj95448072004-11-22 20:19:51 +00004066 }
4067
4068 /* Outputs: guest state that we write or modify. */
4069 for (i = 0; i < d->nFxState; i++) {
4070 tl_assert(d->fxState[i].fx != Ifx_None);
4071 if (d->fxState[i].fx == Ifx_Read)
4072 continue;
sewardja7203252004-11-26 19:17:47 +00004073 /* Ignore any sections marked as 'always defined'. */
4074 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
4075 continue;
sewardje9e16d32004-12-10 13:17:55 +00004076 /* This state element is written or modified. So we need to
4077 consider it. If larger than 8 bytes, deal with it in 8-byte
4078 chunks. */
4079 gSz = d->fxState[i].size;
4080 gOff = d->fxState[i].offset;
4081 tl_assert(gSz > 0);
4082 while (True) {
4083 if (gSz == 0) break;
4084 n = gSz <= 8 ? gSz : 8;
4085 /* Write suitably-casted 'curr' to the state slice
4086 gOff .. gOff+n-1 */
4087 tyDst = szToITy( n );
4088 do_shadow_PUT( mce, gOff,
4089 NULL, /* original atom */
4090 mkPCastTo( mce, tyDst, curr ) );
4091 gSz -= n;
4092 gOff += n;
4093 }
sewardj95448072004-11-22 20:19:51 +00004094 }
4095
sewardj2e595852005-06-30 23:33:37 +00004096 /* Outputs: memory that we write or modify. Same comments about
4097 endianness as above apply. */
sewardj95448072004-11-22 20:19:51 +00004098 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00004099 toDo = d->mSize;
4100 /* chew off 32-bit chunks */
4101 while (toDo >= 4) {
sewardj2e595852005-06-30 23:33:37 +00004102 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4103 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00004104 mkPCastTo( mce, Ity_I32, curr ),
4105 NULL/*guard*/ );
sewardj95448072004-11-22 20:19:51 +00004106 toDo -= 4;
4107 }
4108 /* chew off 16-bit chunks */
4109 while (toDo >= 2) {
sewardj2e595852005-06-30 23:33:37 +00004110 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4111 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00004112 mkPCastTo( mce, Ity_I16, curr ),
4113 NULL/*guard*/ );
sewardj95448072004-11-22 20:19:51 +00004114 toDo -= 2;
4115 }
4116 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
4117 }
4118
njn25e49d8e72002-09-23 09:36:25 +00004119}
4120
sewardj1c0ce7a2009-07-01 08:10:49 +00004121
sewardj826ec492005-05-12 18:05:00 +00004122/* We have an ABI hint telling us that [base .. base+len-1] is to
4123 become undefined ("writable"). Generate code to call a helper to
4124 notify the A/V bit machinery of this fact.
4125
4126 We call
sewardj7cf4e6b2008-05-01 20:24:26 +00004127 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
4128 Addr nia );
sewardj826ec492005-05-12 18:05:00 +00004129*/
4130static
sewardj7cf4e6b2008-05-01 20:24:26 +00004131void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
sewardj826ec492005-05-12 18:05:00 +00004132{
4133 IRDirty* di;
sewardj7cf4e6b2008-05-01 20:24:26 +00004134 /* Minor optimisation: if not doing origin tracking, ignore the
4135 supplied nia and pass zero instead. This is on the basis that
4136 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
4137 almost always generate a shorter instruction to put zero into a
4138 register than any other value. */
4139 if (MC_(clo_mc_level) < 3)
4140 nia = mkIRExpr_HWord(0);
4141
sewardj826ec492005-05-12 18:05:00 +00004142 di = unsafeIRDirty_0_N(
4143 0/*regparms*/,
4144 "MC_(helperc_MAKE_STACK_UNINIT)",
sewardj53ee1fc2005-12-23 02:29:58 +00004145 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
sewardj7cf4e6b2008-05-01 20:24:26 +00004146 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
sewardj826ec492005-05-12 18:05:00 +00004147 );
sewardj7cf4e6b2008-05-01 20:24:26 +00004148 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj826ec492005-05-12 18:05:00 +00004149}
4150
njn25e49d8e72002-09-23 09:36:25 +00004151
sewardj1c0ce7a2009-07-01 08:10:49 +00004152/* ------ Dealing with IRCAS (big and complex) ------ */
4153
4154/* FWDS */
4155static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
4156 IRAtom* baseaddr, Int offset );
4157static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
4158static void gen_store_b ( MCEnv* mce, Int szB,
4159 IRAtom* baseaddr, Int offset, IRAtom* dataB,
4160 IRAtom* guard );
4161
4162static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
4163static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
4164
4165
4166/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
4167 IRExpr.Consts, else this asserts. If they are both Consts, it
4168 doesn't do anything. So that just leaves the RdTmp case.
4169
4170 In which case: this assigns the shadow value SHADOW to the IR
4171 shadow temporary associated with ORIG. That is, ORIG, being an
4172 original temporary, will have a shadow temporary associated with
4173 it. However, in the case envisaged here, there will so far have
4174 been no IR emitted to actually write a shadow value into that
4175 temporary. What this routine does is to (emit IR to) copy the
4176 value in SHADOW into said temporary, so that after this call,
4177 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
4178 value in SHADOW.
4179
4180 Point is to allow callers to compute "by hand" a shadow value for
4181 ORIG, and force it to be associated with ORIG.
4182
4183 How do we know that that shadow associated with ORIG has not so far
4184 been assigned to? Well, we don't per se know that, but supposing
4185 it had. Then this routine would create a second assignment to it,
4186 and later the IR sanity checker would barf. But that never
4187 happens. QED.
4188*/
4189static void bind_shadow_tmp_to_orig ( UChar how,
4190 MCEnv* mce,
4191 IRAtom* orig, IRAtom* shadow )
4192{
4193 tl_assert(isOriginalAtom(mce, orig));
4194 tl_assert(isShadowAtom(mce, shadow));
4195 switch (orig->tag) {
4196 case Iex_Const:
4197 tl_assert(shadow->tag == Iex_Const);
4198 break;
4199 case Iex_RdTmp:
4200 tl_assert(shadow->tag == Iex_RdTmp);
4201 if (how == 'V') {
4202 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
4203 shadow);
4204 } else {
4205 tl_assert(how == 'B');
4206 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
4207 shadow);
4208 }
4209 break;
4210 default:
4211 tl_assert(0);
4212 }
4213}
4214
4215
4216static
4217void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
4218{
4219 /* Scheme is (both single- and double- cases):
4220
4221 1. fetch data#,dataB (the proposed new value)
4222
4223 2. fetch expd#,expdB (what we expect to see at the address)
4224
4225 3. check definedness of address
4226
4227 4. load old#,oldB from shadow memory; this also checks
4228 addressibility of the address
4229
4230 5. the CAS itself
4231
sewardjafed4c52009-07-12 13:00:17 +00004232 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
sewardj1c0ce7a2009-07-01 08:10:49 +00004233
sewardjafed4c52009-07-12 13:00:17 +00004234 7. if "expected == old" (as computed by (6))
sewardj1c0ce7a2009-07-01 08:10:49 +00004235 store data#,dataB to shadow memory
4236
4237 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
4238 'data' but 7 stores 'data#'. Hence it is possible for the
4239 shadow data to be incorrectly checked and/or updated:
4240
sewardj1c0ce7a2009-07-01 08:10:49 +00004241 * 7 is at least gated correctly, since the 'expected == old'
4242 condition is derived from outputs of 5. However, the shadow
4243 write could happen too late: imagine after 5 we are
4244 descheduled, a different thread runs, writes a different
4245 (shadow) value at the address, and then we resume, hence
4246 overwriting the shadow value written by the other thread.
4247
4248 Because the original memory access is atomic, there's no way to
4249 make both the original and shadow accesses into a single atomic
4250 thing, hence this is unavoidable.
4251
4252 At least as Valgrind stands, I don't think it's a problem, since
4253 we're single threaded *and* we guarantee that there are no
4254 context switches during the execution of any specific superblock
4255 -- context switches can only happen at superblock boundaries.
4256
4257 If Valgrind ever becomes MT in the future, then it might be more
4258 of a problem. A possible kludge would be to artificially
4259 associate with the location, a lock, which we must acquire and
4260 release around the transaction as a whole. Hmm, that probably
4261 would't work properly since it only guards us against other
4262 threads doing CASs on the same location, not against other
4263 threads doing normal reads and writes.
sewardjafed4c52009-07-12 13:00:17 +00004264
4265 ------------------------------------------------------------
4266
4267 COMMENT_ON_CasCmpEQ:
4268
4269 Note two things. Firstly, in the sequence above, we compute
4270 "expected == old", but we don't check definedness of it. Why
4271 not? Also, the x86 and amd64 front ends use
4272 Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent
4273 determination (expected == old ?) for themselves, and we also
4274 don't check definedness for those primops; we just say that the
4275 result is defined. Why? Details follow.
4276
4277 x86/amd64 contains various forms of locked insns:
4278 * lock prefix before all basic arithmetic insn;
4279 eg lock xorl %reg1,(%reg2)
4280 * atomic exchange reg-mem
4281 * compare-and-swaps
4282
4283 Rather than attempt to represent them all, which would be a
4284 royal PITA, I used a result from Maurice Herlihy
4285 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
4286 demonstrates that compare-and-swap is a primitive more general
4287 than the other two, and so can be used to represent all of them.
4288 So the translation scheme for (eg) lock incl (%reg) is as
4289 follows:
4290
4291 again:
4292 old = * %reg
4293 new = old + 1
4294 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
4295
4296 The "atomically" is the CAS bit. The scheme is always the same:
4297 get old value from memory, compute new value, atomically stuff
4298 new value back in memory iff the old value has not changed (iow,
4299 no other thread modified it in the meantime). If it has changed
4300 then we've been out-raced and we have to start over.
4301
4302 Now that's all very neat, but it has the bad side effect of
4303 introducing an explicit equality test into the translation.
4304 Consider the behaviour of said code on a memory location which
4305 is uninitialised. We will wind up doing a comparison on
4306 uninitialised data, and mc duly complains.
4307
4308 What's difficult about this is, the common case is that the
4309 location is uncontended, and so we're usually comparing the same
4310 value (* %reg) with itself. So we shouldn't complain even if it
4311 is undefined. But mc doesn't know that.
4312
4313 My solution is to mark the == in the IR specially, so as to tell
4314 mc that it almost certainly compares a value with itself, and we
4315 should just regard the result as always defined. Rather than
4316 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
4317 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
4318
4319 So there's always the question of, can this give a false
4320 negative? eg, imagine that initially, * %reg is defined; and we
4321 read that; but then in the gap between the read and the CAS, a
4322 different thread writes an undefined (and different) value at
4323 the location. Then the CAS in this thread will fail and we will
4324 go back to "again:", but without knowing that the trip back
4325 there was based on an undefined comparison. No matter; at least
4326 the other thread won the race and the location is correctly
4327 marked as undefined. What if it wrote an uninitialised version
4328 of the same value that was there originally, though?
4329
4330 etc etc. Seems like there's a small corner case in which we
4331 might lose the fact that something's defined -- we're out-raced
4332 in between the "old = * reg" and the "atomically {", _and_ the
4333 other thread is writing in an undefined version of what's
4334 already there. Well, that seems pretty unlikely.
4335
4336 ---
4337
4338 If we ever need to reinstate it .. code which generates a
4339 definedness test for "expected == old" was removed at r10432 of
4340 this file.
sewardj1c0ce7a2009-07-01 08:10:49 +00004341 */
4342 if (cas->oldHi == IRTemp_INVALID) {
4343 do_shadow_CAS_single( mce, cas );
4344 } else {
4345 do_shadow_CAS_double( mce, cas );
4346 }
4347}
4348
4349
4350static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
4351{
4352 IRAtom *vdataLo = NULL, *bdataLo = NULL;
4353 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4354 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00004355 IRAtom *expd_eq_old = NULL;
4356 IROp opCasCmpEQ;
sewardj1c0ce7a2009-07-01 08:10:49 +00004357 Int elemSzB;
4358 IRType elemTy;
4359 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4360
4361 /* single CAS */
4362 tl_assert(cas->oldHi == IRTemp_INVALID);
4363 tl_assert(cas->expdHi == NULL);
4364 tl_assert(cas->dataHi == NULL);
4365
4366 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4367 switch (elemTy) {
sewardjafed4c52009-07-12 13:00:17 +00004368 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break;
4369 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
4370 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
4371 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
sewardj1c0ce7a2009-07-01 08:10:49 +00004372 default: tl_assert(0); /* IR defn disallows any other types */
4373 }
4374
4375 /* 1. fetch data# (the proposed new value) */
4376 tl_assert(isOriginalAtom(mce, cas->dataLo));
4377 vdataLo
4378 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4379 tl_assert(isShadowAtom(mce, vdataLo));
4380 if (otrak) {
4381 bdataLo
4382 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4383 tl_assert(isShadowAtom(mce, bdataLo));
4384 }
4385
4386 /* 2. fetch expected# (what we expect to see at the address) */
4387 tl_assert(isOriginalAtom(mce, cas->expdLo));
4388 vexpdLo
4389 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
4390 tl_assert(isShadowAtom(mce, vexpdLo));
4391 if (otrak) {
4392 bexpdLo
4393 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
4394 tl_assert(isShadowAtom(mce, bexpdLo));
4395 }
4396
4397 /* 3. check definedness of address */
4398 /* 4. fetch old# from shadow memory; this also checks
4399 addressibility of the address */
4400 voldLo
4401 = assignNew(
4402 'V', mce, elemTy,
4403 expr2vbits_Load(
4404 mce,
4405 cas->end, elemTy, cas->addr, 0/*Addr bias*/
4406 ));
sewardjafed4c52009-07-12 13:00:17 +00004407 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00004408 if (otrak) {
4409 boldLo
4410 = assignNew('B', mce, Ity_I32,
4411 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00004412 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00004413 }
4414
4415 /* 5. the CAS itself */
4416 stmt( 'C', mce, IRStmt_CAS(cas) );
4417
sewardjafed4c52009-07-12 13:00:17 +00004418 /* 6. compute "expected == old" */
4419 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004420 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
4421 tree, but it's not copied from the input block. */
4422 expd_eq_old
4423 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00004424 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00004425
4426 /* 7. if "expected == old"
4427 store data# to shadow memory */
4428 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
4429 NULL/*data*/, vdataLo/*vdata*/,
4430 expd_eq_old/*guard for store*/ );
4431 if (otrak) {
4432 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
4433 bdataLo/*bdata*/,
4434 expd_eq_old/*guard for store*/ );
4435 }
4436}
4437
4438
4439static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
4440{
4441 IRAtom *vdataHi = NULL, *bdataHi = NULL;
4442 IRAtom *vdataLo = NULL, *bdataLo = NULL;
4443 IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
4444 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4445 IRAtom *voldHi = NULL, *boldHi = NULL;
4446 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00004447 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
4448 IRAtom *expd_eq_old = NULL, *zero = NULL;
4449 IROp opCasCmpEQ, opOr, opXor;
sewardj1c0ce7a2009-07-01 08:10:49 +00004450 Int elemSzB, memOffsLo, memOffsHi;
4451 IRType elemTy;
4452 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4453
4454 /* double CAS */
4455 tl_assert(cas->oldHi != IRTemp_INVALID);
4456 tl_assert(cas->expdHi != NULL);
4457 tl_assert(cas->dataHi != NULL);
4458
4459 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4460 switch (elemTy) {
4461 case Ity_I8:
sewardjafed4c52009-07-12 13:00:17 +00004462 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
sewardj1c0ce7a2009-07-01 08:10:49 +00004463 elemSzB = 1; zero = mkU8(0);
4464 break;
4465 case Ity_I16:
sewardjafed4c52009-07-12 13:00:17 +00004466 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
sewardj1c0ce7a2009-07-01 08:10:49 +00004467 elemSzB = 2; zero = mkU16(0);
4468 break;
4469 case Ity_I32:
sewardjafed4c52009-07-12 13:00:17 +00004470 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
sewardj1c0ce7a2009-07-01 08:10:49 +00004471 elemSzB = 4; zero = mkU32(0);
4472 break;
4473 case Ity_I64:
sewardjafed4c52009-07-12 13:00:17 +00004474 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
sewardj1c0ce7a2009-07-01 08:10:49 +00004475 elemSzB = 8; zero = mkU64(0);
4476 break;
4477 default:
4478 tl_assert(0); /* IR defn disallows any other types */
4479 }
4480
4481 /* 1. fetch data# (the proposed new value) */
4482 tl_assert(isOriginalAtom(mce, cas->dataHi));
4483 tl_assert(isOriginalAtom(mce, cas->dataLo));
4484 vdataHi
4485 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
4486 vdataLo
4487 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4488 tl_assert(isShadowAtom(mce, vdataHi));
4489 tl_assert(isShadowAtom(mce, vdataLo));
4490 if (otrak) {
4491 bdataHi
4492 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
4493 bdataLo
4494 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4495 tl_assert(isShadowAtom(mce, bdataHi));
4496 tl_assert(isShadowAtom(mce, bdataLo));
4497 }
4498
4499 /* 2. fetch expected# (what we expect to see at the address) */
4500 tl_assert(isOriginalAtom(mce, cas->expdHi));
4501 tl_assert(isOriginalAtom(mce, cas->expdLo));
4502 vexpdHi
4503 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
4504 vexpdLo
4505 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
4506 tl_assert(isShadowAtom(mce, vexpdHi));
4507 tl_assert(isShadowAtom(mce, vexpdLo));
4508 if (otrak) {
4509 bexpdHi
4510 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
4511 bexpdLo
4512 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
4513 tl_assert(isShadowAtom(mce, bexpdHi));
4514 tl_assert(isShadowAtom(mce, bexpdLo));
4515 }
4516
4517 /* 3. check definedness of address */
4518 /* 4. fetch old# from shadow memory; this also checks
4519 addressibility of the address */
4520 if (cas->end == Iend_LE) {
4521 memOffsLo = 0;
4522 memOffsHi = elemSzB;
4523 } else {
4524 tl_assert(cas->end == Iend_BE);
4525 memOffsLo = elemSzB;
4526 memOffsHi = 0;
4527 }
4528 voldHi
4529 = assignNew(
4530 'V', mce, elemTy,
4531 expr2vbits_Load(
4532 mce,
4533 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/
4534 ));
4535 voldLo
4536 = assignNew(
4537 'V', mce, elemTy,
4538 expr2vbits_Load(
4539 mce,
4540 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/
4541 ));
sewardjafed4c52009-07-12 13:00:17 +00004542 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
4543 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00004544 if (otrak) {
4545 boldHi
4546 = assignNew('B', mce, Ity_I32,
4547 gen_load_b(mce, elemSzB, cas->addr,
4548 memOffsHi/*addr bias*/));
4549 boldLo
4550 = assignNew('B', mce, Ity_I32,
4551 gen_load_b(mce, elemSzB, cas->addr,
4552 memOffsLo/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00004553 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
4554 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00004555 }
4556
4557 /* 5. the CAS itself */
4558 stmt( 'C', mce, IRStmt_CAS(cas) );
4559
sewardjafed4c52009-07-12 13:00:17 +00004560 /* 6. compute "expected == old" */
4561 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004562 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
4563 tree, but it's not copied from the input block. */
4564 /*
4565 xHi = oldHi ^ expdHi;
4566 xLo = oldLo ^ expdLo;
4567 xHL = xHi | xLo;
4568 expd_eq_old = xHL == 0;
4569 */
sewardj1c0ce7a2009-07-01 08:10:49 +00004570 xHi = assignNew('C', mce, elemTy,
4571 binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
sewardj1c0ce7a2009-07-01 08:10:49 +00004572 xLo = assignNew('C', mce, elemTy,
4573 binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00004574 xHL = assignNew('C', mce, elemTy,
4575 binop(opOr, xHi, xLo));
sewardj1c0ce7a2009-07-01 08:10:49 +00004576 expd_eq_old
4577 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00004578 binop(opCasCmpEQ, xHL, zero));
sewardj1c0ce7a2009-07-01 08:10:49 +00004579
4580 /* 7. if "expected == old"
4581 store data# to shadow memory */
4582 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
4583 NULL/*data*/, vdataHi/*vdata*/,
4584 expd_eq_old/*guard for store*/ );
4585 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
4586 NULL/*data*/, vdataLo/*vdata*/,
4587 expd_eq_old/*guard for store*/ );
4588 if (otrak) {
4589 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
4590 bdataHi/*bdata*/,
4591 expd_eq_old/*guard for store*/ );
4592 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
4593 bdataLo/*bdata*/,
4594 expd_eq_old/*guard for store*/ );
4595 }
4596}
4597
4598
sewardjdb5907d2009-11-26 17:20:21 +00004599/* ------ Dealing with LL/SC (not difficult) ------ */
4600
4601static void do_shadow_LLSC ( MCEnv* mce,
4602 IREndness stEnd,
4603 IRTemp stResult,
4604 IRExpr* stAddr,
4605 IRExpr* stStoredata )
4606{
4607 /* In short: treat a load-linked like a normal load followed by an
4608 assignment of the loaded (shadow) data to the result temporary.
4609 Treat a store-conditional like a normal store, and mark the
4610 result temporary as defined. */
4611 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult);
4612 IRTemp resTmp = findShadowTmpV(mce, stResult);
4613
4614 tl_assert(isIRAtom(stAddr));
4615 if (stStoredata)
4616 tl_assert(isIRAtom(stStoredata));
4617
4618 if (stStoredata == NULL) {
4619 /* Load Linked */
4620 /* Just treat this as a normal load, followed by an assignment of
4621 the value to .result. */
4622 /* Stay sane */
4623 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
4624 || resTy == Ity_I16 || resTy == Ity_I8);
4625 assign( 'V', mce, resTmp,
4626 expr2vbits_Load(
4627 mce, stEnd, resTy, stAddr, 0/*addr bias*/));
4628 } else {
4629 /* Store Conditional */
4630 /* Stay sane */
4631 IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
4632 stStoredata);
4633 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
4634 || dataTy == Ity_I16 || dataTy == Ity_I8);
4635 do_shadow_Store( mce, stEnd,
4636 stAddr, 0/* addr bias */,
4637 stStoredata,
4638 NULL /* shadow data */,
4639 NULL/*guard*/ );
4640 /* This is a store conditional, so it writes to .result a value
4641 indicating whether or not the store succeeded. Just claim
4642 this value is always defined. In the PowerPC interpretation
4643 of store-conditional, definedness of the success indication
4644 depends on whether the address of the store matches the
4645 reservation address. But we can't tell that here (and
4646 anyway, we're not being PowerPC-specific). At least we are
4647 guaranteed that the definedness of the store address, and its
4648 addressibility, will be checked as per normal. So it seems
4649 pretty safe to just say that the success indication is always
4650 defined.
4651
4652 In schemeS, for origin tracking, we must correspondingly set
4653 a no-origin value for the origin shadow of .result.
4654 */
4655 tl_assert(resTy == Ity_I1);
4656 assign( 'V', mce, resTmp, definedOfType(resTy) );
4657 }
4658}
4659
4660
sewardj95448072004-11-22 20:19:51 +00004661/*------------------------------------------------------------*/
4662/*--- Memcheck main ---*/
4663/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00004664
sewardj7cf4e6b2008-05-01 20:24:26 +00004665static void schemeS ( MCEnv* mce, IRStmt* st );
4666
sewardj95448072004-11-22 20:19:51 +00004667static Bool isBogusAtom ( IRAtom* at )
njn25e49d8e72002-09-23 09:36:25 +00004668{
sewardj95448072004-11-22 20:19:51 +00004669 ULong n = 0;
4670 IRConst* con;
sewardj710d6c22005-03-20 18:55:15 +00004671 tl_assert(isIRAtom(at));
sewardj0b9d74a2006-12-24 02:24:11 +00004672 if (at->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +00004673 return False;
4674 tl_assert(at->tag == Iex_Const);
4675 con = at->Iex.Const.con;
4676 switch (con->tag) {
sewardjd5204dc2004-12-31 01:16:11 +00004677 case Ico_U1: return False;
4678 case Ico_U8: n = (ULong)con->Ico.U8; break;
4679 case Ico_U16: n = (ULong)con->Ico.U16; break;
4680 case Ico_U32: n = (ULong)con->Ico.U32; break;
4681 case Ico_U64: n = (ULong)con->Ico.U64; break;
4682 case Ico_F64: return False;
sewardjb5b87402011-03-07 16:05:35 +00004683 case Ico_F32i: return False;
sewardjd5204dc2004-12-31 01:16:11 +00004684 case Ico_F64i: return False;
4685 case Ico_V128: return False;
sewardj95448072004-11-22 20:19:51 +00004686 default: ppIRExpr(at); tl_assert(0);
4687 }
4688 /* VG_(printf)("%llx\n", n); */
sewardj96a922e2005-04-23 23:26:29 +00004689 return (/*32*/ n == 0xFEFEFEFFULL
4690 /*32*/ || n == 0x80808080ULL
sewardj17b47432008-12-17 01:12:58 +00004691 /*32*/ || n == 0x7F7F7F7FULL
tomd9774d72005-06-27 08:11:01 +00004692 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
sewardj96a922e2005-04-23 23:26:29 +00004693 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
tomd9774d72005-06-27 08:11:01 +00004694 /*64*/ || n == 0x0000000000008080ULL
sewardj96a922e2005-04-23 23:26:29 +00004695 /*64*/ || n == 0x8080808080808080ULL
sewardj17b47432008-12-17 01:12:58 +00004696 /*64*/ || n == 0x0101010101010101ULL
sewardj96a922e2005-04-23 23:26:29 +00004697 );
sewardj95448072004-11-22 20:19:51 +00004698}
njn25e49d8e72002-09-23 09:36:25 +00004699
sewardj95448072004-11-22 20:19:51 +00004700static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
4701{
sewardjd5204dc2004-12-31 01:16:11 +00004702 Int i;
4703 IRExpr* e;
4704 IRDirty* d;
sewardj1c0ce7a2009-07-01 08:10:49 +00004705 IRCAS* cas;
sewardj95448072004-11-22 20:19:51 +00004706 switch (st->tag) {
sewardj0b9d74a2006-12-24 02:24:11 +00004707 case Ist_WrTmp:
4708 e = st->Ist.WrTmp.data;
sewardj95448072004-11-22 20:19:51 +00004709 switch (e->tag) {
4710 case Iex_Get:
sewardj0b9d74a2006-12-24 02:24:11 +00004711 case Iex_RdTmp:
sewardj95448072004-11-22 20:19:51 +00004712 return False;
sewardjd5204dc2004-12-31 01:16:11 +00004713 case Iex_Const:
4714 return isBogusAtom(e);
sewardj95448072004-11-22 20:19:51 +00004715 case Iex_Unop:
4716 return isBogusAtom(e->Iex.Unop.arg);
sewardjd5204dc2004-12-31 01:16:11 +00004717 case Iex_GetI:
4718 return isBogusAtom(e->Iex.GetI.ix);
sewardj95448072004-11-22 20:19:51 +00004719 case Iex_Binop:
4720 return isBogusAtom(e->Iex.Binop.arg1)
4721 || isBogusAtom(e->Iex.Binop.arg2);
sewardjed69fdb2006-02-03 16:12:27 +00004722 case Iex_Triop:
4723 return isBogusAtom(e->Iex.Triop.arg1)
4724 || isBogusAtom(e->Iex.Triop.arg2)
4725 || isBogusAtom(e->Iex.Triop.arg3);
sewardje91cea72006-02-08 19:32:02 +00004726 case Iex_Qop:
4727 return isBogusAtom(e->Iex.Qop.arg1)
4728 || isBogusAtom(e->Iex.Qop.arg2)
4729 || isBogusAtom(e->Iex.Qop.arg3)
4730 || isBogusAtom(e->Iex.Qop.arg4);
sewardj95448072004-11-22 20:19:51 +00004731 case Iex_Mux0X:
4732 return isBogusAtom(e->Iex.Mux0X.cond)
4733 || isBogusAtom(e->Iex.Mux0X.expr0)
4734 || isBogusAtom(e->Iex.Mux0X.exprX);
sewardj2e595852005-06-30 23:33:37 +00004735 case Iex_Load:
4736 return isBogusAtom(e->Iex.Load.addr);
sewardj95448072004-11-22 20:19:51 +00004737 case Iex_CCall:
4738 for (i = 0; e->Iex.CCall.args[i]; i++)
4739 if (isBogusAtom(e->Iex.CCall.args[i]))
4740 return True;
4741 return False;
4742 default:
4743 goto unhandled;
4744 }
sewardjd5204dc2004-12-31 01:16:11 +00004745 case Ist_Dirty:
4746 d = st->Ist.Dirty.details;
4747 for (i = 0; d->args[i]; i++)
4748 if (isBogusAtom(d->args[i]))
4749 return True;
4750 if (d->guard && isBogusAtom(d->guard))
4751 return True;
4752 if (d->mAddr && isBogusAtom(d->mAddr))
4753 return True;
4754 return False;
sewardj95448072004-11-22 20:19:51 +00004755 case Ist_Put:
4756 return isBogusAtom(st->Ist.Put.data);
sewardjd5204dc2004-12-31 01:16:11 +00004757 case Ist_PutI:
4758 return isBogusAtom(st->Ist.PutI.ix)
4759 || isBogusAtom(st->Ist.PutI.data);
sewardj2e595852005-06-30 23:33:37 +00004760 case Ist_Store:
4761 return isBogusAtom(st->Ist.Store.addr)
4762 || isBogusAtom(st->Ist.Store.data);
sewardj95448072004-11-22 20:19:51 +00004763 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00004764 return isBogusAtom(st->Ist.Exit.guard);
sewardj826ec492005-05-12 18:05:00 +00004765 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00004766 return isBogusAtom(st->Ist.AbiHint.base)
4767 || isBogusAtom(st->Ist.AbiHint.nia);
sewardj21dc3452005-03-21 00:27:41 +00004768 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00004769 case Ist_IMark:
sewardj72d75132007-11-09 23:06:35 +00004770 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00004771 return False;
sewardj1c0ce7a2009-07-01 08:10:49 +00004772 case Ist_CAS:
4773 cas = st->Ist.CAS.details;
4774 return isBogusAtom(cas->addr)
4775 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
4776 || isBogusAtom(cas->expdLo)
4777 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
4778 || isBogusAtom(cas->dataLo);
sewardjdb5907d2009-11-26 17:20:21 +00004779 case Ist_LLSC:
4780 return isBogusAtom(st->Ist.LLSC.addr)
4781 || (st->Ist.LLSC.storedata
4782 ? isBogusAtom(st->Ist.LLSC.storedata)
4783 : False);
sewardj95448072004-11-22 20:19:51 +00004784 default:
4785 unhandled:
4786 ppIRStmt(st);
4787 VG_(tool_panic)("hasBogusLiterals");
4788 }
4789}
njn25e49d8e72002-09-23 09:36:25 +00004790
njn25e49d8e72002-09-23 09:36:25 +00004791
sewardj0b9d74a2006-12-24 02:24:11 +00004792IRSB* MC_(instrument) ( VgCallbackClosure* closure,
sewardj1c0ce7a2009-07-01 08:10:49 +00004793 IRSB* sb_in,
sewardj461df9c2006-01-17 02:06:39 +00004794 VexGuestLayout* layout,
4795 VexGuestExtents* vge,
sewardjd54babf2005-03-21 00:55:49 +00004796 IRType gWordTy, IRType hWordTy )
sewardj95448072004-11-22 20:19:51 +00004797{
sewardj7cf4e6b2008-05-01 20:24:26 +00004798 Bool verboze = 0||False;
sewardj151b90d2005-07-06 19:42:23 +00004799 Bool bogus;
sewardjd5204dc2004-12-31 01:16:11 +00004800 Int i, j, first_stmt;
sewardj95448072004-11-22 20:19:51 +00004801 IRStmt* st;
sewardjd5204dc2004-12-31 01:16:11 +00004802 MCEnv mce;
sewardj1c0ce7a2009-07-01 08:10:49 +00004803 IRSB* sb_out;
sewardjd54babf2005-03-21 00:55:49 +00004804
4805 if (gWordTy != hWordTy) {
4806 /* We don't currently support this case. */
4807 VG_(tool_panic)("host/guest word size mismatch");
4808 }
njn25e49d8e72002-09-23 09:36:25 +00004809
sewardj6cf40ff2005-04-20 22:31:26 +00004810 /* Check we're not completely nuts */
sewardj7cf4e6b2008-05-01 20:24:26 +00004811 tl_assert(sizeof(UWord) == sizeof(void*));
4812 tl_assert(sizeof(Word) == sizeof(void*));
4813 tl_assert(sizeof(Addr) == sizeof(void*));
4814 tl_assert(sizeof(ULong) == 8);
4815 tl_assert(sizeof(Long) == 8);
4816 tl_assert(sizeof(Addr64) == 8);
4817 tl_assert(sizeof(UInt) == 4);
4818 tl_assert(sizeof(Int) == 4);
4819
4820 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
sewardj6cf40ff2005-04-20 22:31:26 +00004821
sewardj0b9d74a2006-12-24 02:24:11 +00004822 /* Set up SB */
sewardj1c0ce7a2009-07-01 08:10:49 +00004823 sb_out = deepCopyIRSBExceptStmts(sb_in);
njn25e49d8e72002-09-23 09:36:25 +00004824
sewardj1c0ce7a2009-07-01 08:10:49 +00004825 /* Set up the running environment. Both .sb and .tmpMap are
4826 modified as we go along. Note that tmps are added to both
4827 .sb->tyenv and .tmpMap together, so the valid index-set for
4828 those two arrays should always be identical. */
4829 VG_(memset)(&mce, 0, sizeof(mce));
4830 mce.sb = sb_out;
sewardj7cf4e6b2008-05-01 20:24:26 +00004831 mce.trace = verboze;
sewardj95448072004-11-22 20:19:51 +00004832 mce.layout = layout;
sewardj95448072004-11-22 20:19:51 +00004833 mce.hWordTy = hWordTy;
sewardjd5204dc2004-12-31 01:16:11 +00004834 mce.bogusLiterals = False;
sewardj1c0ce7a2009-07-01 08:10:49 +00004835
4836 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
4837 sizeof(TempMapEnt));
4838 for (i = 0; i < sb_in->tyenv->types_used; i++) {
4839 TempMapEnt ent;
4840 ent.kind = Orig;
4841 ent.shadowV = IRTemp_INVALID;
4842 ent.shadowB = IRTemp_INVALID;
4843 VG_(addToXA)( mce.tmpMap, &ent );
sewardj7cf4e6b2008-05-01 20:24:26 +00004844 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004845 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
sewardj95448072004-11-22 20:19:51 +00004846
sewardj151b90d2005-07-06 19:42:23 +00004847 /* Make a preliminary inspection of the statements, to see if there
4848 are any dodgy-looking literals. If there are, we generate
4849 extra-detailed (hence extra-expensive) instrumentation in
4850 places. Scan the whole bb even if dodgyness is found earlier,
4851 so that the flatness assertion is applied to all stmts. */
4852
4853 bogus = False;
sewardj95448072004-11-22 20:19:51 +00004854
sewardj1c0ce7a2009-07-01 08:10:49 +00004855 for (i = 0; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00004856
sewardj1c0ce7a2009-07-01 08:10:49 +00004857 st = sb_in->stmts[i];
sewardj21dc3452005-03-21 00:27:41 +00004858 tl_assert(st);
sewardj95448072004-11-22 20:19:51 +00004859 tl_assert(isFlatIRStmt(st));
4860
sewardj151b90d2005-07-06 19:42:23 +00004861 if (!bogus) {
4862 bogus = checkForBogusLiterals(st);
4863 if (0 && bogus) {
sewardj95448072004-11-22 20:19:51 +00004864 VG_(printf)("bogus: ");
4865 ppIRStmt(st);
4866 VG_(printf)("\n");
4867 }
4868 }
sewardjd5204dc2004-12-31 01:16:11 +00004869
sewardj151b90d2005-07-06 19:42:23 +00004870 }
4871
4872 mce.bogusLiterals = bogus;
4873
sewardja0871482006-10-18 12:41:55 +00004874 /* Copy verbatim any IR preamble preceding the first IMark */
sewardj151b90d2005-07-06 19:42:23 +00004875
sewardj1c0ce7a2009-07-01 08:10:49 +00004876 tl_assert(mce.sb == sb_out);
4877 tl_assert(mce.sb != sb_in);
sewardjf1962d32006-10-19 13:22:16 +00004878
sewardja0871482006-10-18 12:41:55 +00004879 i = 0;
sewardj1c0ce7a2009-07-01 08:10:49 +00004880 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
sewardja0871482006-10-18 12:41:55 +00004881
sewardj1c0ce7a2009-07-01 08:10:49 +00004882 st = sb_in->stmts[i];
sewardja0871482006-10-18 12:41:55 +00004883 tl_assert(st);
4884 tl_assert(isFlatIRStmt(st));
4885
sewardj1c0ce7a2009-07-01 08:10:49 +00004886 stmt( 'C', &mce, sb_in->stmts[i] );
sewardja0871482006-10-18 12:41:55 +00004887 i++;
4888 }
4889
sewardjf1962d32006-10-19 13:22:16 +00004890 /* Nasty problem. IR optimisation of the pre-instrumented IR may
4891 cause the IR following the preamble to contain references to IR
4892 temporaries defined in the preamble. Because the preamble isn't
4893 instrumented, these temporaries don't have any shadows.
4894 Nevertheless uses of them following the preamble will cause
4895 memcheck to generate references to their shadows. End effect is
4896 to cause IR sanity check failures, due to references to
4897 non-existent shadows. This is only evident for the complex
4898 preambles used for function wrapping on TOC-afflicted platforms
sewardj6e9de462011-06-28 07:25:29 +00004899 (ppc64-linux).
sewardjf1962d32006-10-19 13:22:16 +00004900
4901 The following loop therefore scans the preamble looking for
4902 assignments to temporaries. For each one found it creates an
sewardjafa617b2008-07-22 09:59:48 +00004903 assignment to the corresponding (V) shadow temp, marking it as
sewardjf1962d32006-10-19 13:22:16 +00004904 'defined'. This is the same resulting IR as if the main
4905 instrumentation loop before had been applied to the statement
4906 'tmp = CONSTANT'.
sewardjafa617b2008-07-22 09:59:48 +00004907
4908 Similarly, if origin tracking is enabled, we must generate an
4909 assignment for the corresponding origin (B) shadow, claiming
4910 no-origin, as appropriate for a defined value.
sewardjf1962d32006-10-19 13:22:16 +00004911 */
4912 for (j = 0; j < i; j++) {
sewardj1c0ce7a2009-07-01 08:10:49 +00004913 if (sb_in->stmts[j]->tag == Ist_WrTmp) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004914 /* findShadowTmpV checks its arg is an original tmp;
sewardjf1962d32006-10-19 13:22:16 +00004915 no need to assert that here. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004916 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
sewardjafa617b2008-07-22 09:59:48 +00004917 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00004918 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
sewardjafa617b2008-07-22 09:59:48 +00004919 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
4920 if (MC_(clo_mc_level) == 3) {
4921 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00004922 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
sewardjafa617b2008-07-22 09:59:48 +00004923 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
4924 }
sewardjf1962d32006-10-19 13:22:16 +00004925 if (0) {
sewardjafa617b2008-07-22 09:59:48 +00004926 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
4927 ppIRType( ty_v );
sewardjf1962d32006-10-19 13:22:16 +00004928 VG_(printf)("\n");
4929 }
4930 }
4931 }
4932
sewardja0871482006-10-18 12:41:55 +00004933 /* Iterate over the remaining stmts to generate instrumentation. */
4934
sewardj1c0ce7a2009-07-01 08:10:49 +00004935 tl_assert(sb_in->stmts_used > 0);
sewardja0871482006-10-18 12:41:55 +00004936 tl_assert(i >= 0);
sewardj1c0ce7a2009-07-01 08:10:49 +00004937 tl_assert(i < sb_in->stmts_used);
4938 tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
sewardja0871482006-10-18 12:41:55 +00004939
sewardj1c0ce7a2009-07-01 08:10:49 +00004940 for (/* use current i*/; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00004941
sewardj1c0ce7a2009-07-01 08:10:49 +00004942 st = sb_in->stmts[i];
4943 first_stmt = sb_out->stmts_used;
sewardj95448072004-11-22 20:19:51 +00004944
4945 if (verboze) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004946 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00004947 ppIRStmt(st);
sewardj7cf4e6b2008-05-01 20:24:26 +00004948 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00004949 }
4950
sewardj1c0ce7a2009-07-01 08:10:49 +00004951 if (MC_(clo_mc_level) == 3) {
4952 /* See comments on case Ist_CAS below. */
4953 if (st->tag != Ist_CAS)
4954 schemeS( &mce, st );
4955 }
sewardj7cf4e6b2008-05-01 20:24:26 +00004956
sewardj29faa502005-03-16 18:20:21 +00004957 /* Generate instrumentation code for each stmt ... */
4958
sewardj95448072004-11-22 20:19:51 +00004959 switch (st->tag) {
4960
sewardj0b9d74a2006-12-24 02:24:11 +00004961 case Ist_WrTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00004962 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
4963 expr2vbits( &mce, st->Ist.WrTmp.data) );
njn25e49d8e72002-09-23 09:36:25 +00004964 break;
4965
sewardj95448072004-11-22 20:19:51 +00004966 case Ist_Put:
4967 do_shadow_PUT( &mce,
4968 st->Ist.Put.offset,
4969 st->Ist.Put.data,
4970 NULL /* shadow atom */ );
njn25e49d8e72002-09-23 09:36:25 +00004971 break;
4972
sewardj95448072004-11-22 20:19:51 +00004973 case Ist_PutI:
4974 do_shadow_PUTI( &mce,
4975 st->Ist.PutI.descr,
4976 st->Ist.PutI.ix,
4977 st->Ist.PutI.bias,
4978 st->Ist.PutI.data );
njn25e49d8e72002-09-23 09:36:25 +00004979 break;
4980
sewardj2e595852005-06-30 23:33:37 +00004981 case Ist_Store:
4982 do_shadow_Store( &mce, st->Ist.Store.end,
4983 st->Ist.Store.addr, 0/* addr bias */,
4984 st->Ist.Store.data,
sewardj1c0ce7a2009-07-01 08:10:49 +00004985 NULL /* shadow data */,
4986 NULL/*guard*/ );
njn25e49d8e72002-09-23 09:36:25 +00004987 break;
4988
sewardj95448072004-11-22 20:19:51 +00004989 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00004990 complainIfUndefined( &mce, st->Ist.Exit.guard );
njn25e49d8e72002-09-23 09:36:25 +00004991 break;
4992
sewardj29faa502005-03-16 18:20:21 +00004993 case Ist_IMark:
sewardj7cf4e6b2008-05-01 20:24:26 +00004994 break;
4995
4996 case Ist_NoOp:
sewardj72d75132007-11-09 23:06:35 +00004997 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00004998 break;
4999
sewardj95448072004-11-22 20:19:51 +00005000 case Ist_Dirty:
5001 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
njn25e49d8e72002-09-23 09:36:25 +00005002 break;
5003
sewardj826ec492005-05-12 18:05:00 +00005004 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00005005 do_AbiHint( &mce, st->Ist.AbiHint.base,
5006 st->Ist.AbiHint.len,
5007 st->Ist.AbiHint.nia );
sewardj826ec492005-05-12 18:05:00 +00005008 break;
5009
sewardj1c0ce7a2009-07-01 08:10:49 +00005010 case Ist_CAS:
5011 do_shadow_CAS( &mce, st->Ist.CAS.details );
5012 /* Note, do_shadow_CAS copies the CAS itself to the output
5013 block, because it needs to add instrumentation both
5014 before and after it. Hence skip the copy below. Also
5015 skip the origin-tracking stuff (call to schemeS) above,
5016 since that's all tangled up with it too; do_shadow_CAS
5017 does it all. */
5018 break;
5019
sewardjdb5907d2009-11-26 17:20:21 +00005020 case Ist_LLSC:
5021 do_shadow_LLSC( &mce,
5022 st->Ist.LLSC.end,
5023 st->Ist.LLSC.result,
5024 st->Ist.LLSC.addr,
5025 st->Ist.LLSC.storedata );
5026 break;
5027
njn25e49d8e72002-09-23 09:36:25 +00005028 default:
sewardj95448072004-11-22 20:19:51 +00005029 VG_(printf)("\n");
5030 ppIRStmt(st);
5031 VG_(printf)("\n");
5032 VG_(tool_panic)("memcheck: unhandled IRStmt");
5033
5034 } /* switch (st->tag) */
5035
sewardj7cf4e6b2008-05-01 20:24:26 +00005036 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005037 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00005038 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00005039 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00005040 VG_(printf)("\n");
5041 }
5042 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00005043 }
sewardj95448072004-11-22 20:19:51 +00005044
sewardj1c0ce7a2009-07-01 08:10:49 +00005045 /* ... and finally copy the stmt itself to the output. Except,
5046 skip the copy of IRCASs; see comments on case Ist_CAS
5047 above. */
5048 if (st->tag != Ist_CAS)
5049 stmt('C', &mce, st);
njn25e49d8e72002-09-23 09:36:25 +00005050 }
njn25e49d8e72002-09-23 09:36:25 +00005051
sewardj95448072004-11-22 20:19:51 +00005052 /* Now we need to complain if the jump target is undefined. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005053 first_stmt = sb_out->stmts_used;
njn25e49d8e72002-09-23 09:36:25 +00005054
sewardj95448072004-11-22 20:19:51 +00005055 if (verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005056 VG_(printf)("sb_in->next = ");
5057 ppIRExpr(sb_in->next);
sewardj95448072004-11-22 20:19:51 +00005058 VG_(printf)("\n\n");
5059 }
njn25e49d8e72002-09-23 09:36:25 +00005060
sewardj1c0ce7a2009-07-01 08:10:49 +00005061 complainIfUndefined( &mce, sb_in->next );
njn25e49d8e72002-09-23 09:36:25 +00005062
sewardj7cf4e6b2008-05-01 20:24:26 +00005063 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005064 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00005065 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00005066 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00005067 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00005068 }
sewardj95448072004-11-22 20:19:51 +00005069 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00005070 }
njn25e49d8e72002-09-23 09:36:25 +00005071
sewardj1c0ce7a2009-07-01 08:10:49 +00005072 /* If this fails, there's been some serious snafu with tmp management,
5073 that should be investigated. */
5074 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
5075 VG_(deleteXA)( mce.tmpMap );
5076
5077 tl_assert(mce.sb == sb_out);
5078 return sb_out;
sewardj95448072004-11-22 20:19:51 +00005079}
njn25e49d8e72002-09-23 09:36:25 +00005080
sewardj81651dc2007-08-28 06:05:20 +00005081/*------------------------------------------------------------*/
5082/*--- Post-tree-build final tidying ---*/
5083/*------------------------------------------------------------*/
5084
5085/* This exploits the observation that Memcheck often produces
5086 repeated conditional calls of the form
5087
sewardj7cf4e6b2008-05-01 20:24:26 +00005088 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
sewardj81651dc2007-08-28 06:05:20 +00005089
5090 with the same guard expression G guarding the same helper call.
5091 The second and subsequent calls are redundant. This usually
5092 results from instrumentation of guest code containing multiple
5093 memory references at different constant offsets from the same base
5094 register. After optimisation of the instrumentation, you get a
5095 test for the definedness of the base register for each memory
5096 reference, which is kinda pointless. MC_(final_tidy) therefore
5097 looks for such repeated calls and removes all but the first. */
5098
5099/* A struct for recording which (helper, guard) pairs we have already
5100 seen. */
5101typedef
5102 struct { void* entry; IRExpr* guard; }
5103 Pair;
5104
5105/* Return True if e1 and e2 definitely denote the same value (used to
5106 compare guards). Return False if unknown; False is the safe
5107 answer. Since guest registers and guest memory do not have the
5108 SSA property we must return False if any Gets or Loads appear in
5109 the expression. */
5110
5111static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
5112{
5113 if (e1->tag != e2->tag)
5114 return False;
5115 switch (e1->tag) {
5116 case Iex_Const:
5117 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
5118 case Iex_Binop:
5119 return e1->Iex.Binop.op == e2->Iex.Binop.op
5120 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
5121 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
5122 case Iex_Unop:
5123 return e1->Iex.Unop.op == e2->Iex.Unop.op
5124 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
5125 case Iex_RdTmp:
5126 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
5127 case Iex_Mux0X:
5128 return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
5129 && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
5130 && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX );
5131 case Iex_Qop:
5132 case Iex_Triop:
5133 case Iex_CCall:
5134 /* be lazy. Could define equality for these, but they never
5135 appear to be used. */
5136 return False;
5137 case Iex_Get:
5138 case Iex_GetI:
5139 case Iex_Load:
5140 /* be conservative - these may not give the same value each
5141 time */
5142 return False;
5143 case Iex_Binder:
5144 /* should never see this */
5145 /* fallthrough */
5146 default:
5147 VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
5148 ppIRExpr(e1);
5149 VG_(tool_panic)("memcheck:sameIRValue");
5150 return False;
5151 }
5152}
5153
5154/* See if 'pairs' already has an entry for (entry, guard). Return
5155 True if so. If not, add an entry. */
5156
5157static
5158Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
5159{
5160 Pair p;
5161 Pair* pp;
5162 Int i, n = VG_(sizeXA)( pairs );
5163 for (i = 0; i < n; i++) {
5164 pp = VG_(indexXA)( pairs, i );
5165 if (pp->entry == entry && sameIRValue(pp->guard, guard))
5166 return True;
5167 }
5168 p.guard = guard;
5169 p.entry = entry;
5170 VG_(addToXA)( pairs, &p );
5171 return False;
5172}
5173
5174static Bool is_helperc_value_checkN_fail ( HChar* name )
5175{
5176 return
sewardj7cf4e6b2008-05-01 20:24:26 +00005177 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
5178 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
5179 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
5180 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
5181 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
5182 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
5183 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
5184 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
sewardj81651dc2007-08-28 06:05:20 +00005185}
5186
5187IRSB* MC_(final_tidy) ( IRSB* sb_in )
5188{
5189 Int i;
5190 IRStmt* st;
5191 IRDirty* di;
5192 IRExpr* guard;
5193 IRCallee* cee;
5194 Bool alreadyPresent;
sewardj9c606bd2008-09-18 18:12:50 +00005195 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
5196 VG_(free), sizeof(Pair) );
sewardj81651dc2007-08-28 06:05:20 +00005197 /* Scan forwards through the statements. Each time a call to one
5198 of the relevant helpers is seen, check if we have made a
5199 previous call to the same helper using the same guard
5200 expression, and if so, delete the call. */
5201 for (i = 0; i < sb_in->stmts_used; i++) {
5202 st = sb_in->stmts[i];
5203 tl_assert(st);
5204 if (st->tag != Ist_Dirty)
5205 continue;
5206 di = st->Ist.Dirty.details;
5207 guard = di->guard;
5208 if (!guard)
5209 continue;
5210 if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
5211 cee = di->cee;
5212 if (!is_helperc_value_checkN_fail( cee->name ))
5213 continue;
5214 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
5215 guard 'guard'. Check if we have already seen a call to this
5216 function with the same guard. If so, delete it. If not,
5217 add it to the set of calls we do know about. */
5218 alreadyPresent = check_or_add( pairs, guard, cee->addr );
5219 if (alreadyPresent) {
5220 sb_in->stmts[i] = IRStmt_NoOp();
5221 if (0) VG_(printf)("XX\n");
5222 }
5223 }
5224 VG_(deleteXA)( pairs );
5225 return sb_in;
5226}
5227
5228
sewardj7cf4e6b2008-05-01 20:24:26 +00005229/*------------------------------------------------------------*/
5230/*--- Origin tracking stuff ---*/
5231/*------------------------------------------------------------*/
5232
sewardj1c0ce7a2009-07-01 08:10:49 +00005233/* Almost identical to findShadowTmpV. */
sewardj7cf4e6b2008-05-01 20:24:26 +00005234static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
5235{
sewardj1c0ce7a2009-07-01 08:10:49 +00005236 TempMapEnt* ent;
5237 /* VG_(indexXA) range-checks 'orig', hence no need to check
5238 here. */
5239 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5240 tl_assert(ent->kind == Orig);
5241 if (ent->shadowB == IRTemp_INVALID) {
5242 IRTemp tmpB
5243 = newTemp( mce, Ity_I32, BSh );
5244 /* newTemp may cause mce->tmpMap to resize, hence previous results
5245 from VG_(indexXA) are invalid. */
5246 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5247 tl_assert(ent->kind == Orig);
5248 tl_assert(ent->shadowB == IRTemp_INVALID);
5249 ent->shadowB = tmpB;
sewardj7cf4e6b2008-05-01 20:24:26 +00005250 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005251 return ent->shadowB;
sewardj7cf4e6b2008-05-01 20:24:26 +00005252}
5253
5254static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
5255{
5256 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
5257}
5258
5259static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5260 IRAtom* baseaddr, Int offset )
5261{
5262 void* hFun;
5263 HChar* hName;
5264 IRTemp bTmp;
5265 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00005266 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00005267 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5268 IRAtom* ea = baseaddr;
5269 if (offset != 0) {
5270 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5271 : mkU64( (Long)(Int)offset );
5272 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5273 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005274 bTmp = newTemp(mce, mce->hWordTy, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00005275
5276 switch (szB) {
5277 case 1: hFun = (void*)&MC_(helperc_b_load1);
5278 hName = "MC_(helperc_b_load1)";
5279 break;
5280 case 2: hFun = (void*)&MC_(helperc_b_load2);
5281 hName = "MC_(helperc_b_load2)";
5282 break;
5283 case 4: hFun = (void*)&MC_(helperc_b_load4);
5284 hName = "MC_(helperc_b_load4)";
5285 break;
5286 case 8: hFun = (void*)&MC_(helperc_b_load8);
5287 hName = "MC_(helperc_b_load8)";
5288 break;
5289 case 16: hFun = (void*)&MC_(helperc_b_load16);
5290 hName = "MC_(helperc_b_load16)";
5291 break;
5292 default:
5293 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
5294 tl_assert(0);
5295 }
5296 di = unsafeIRDirty_1_N(
5297 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
5298 mkIRExprVec_1( ea )
5299 );
5300 /* no need to mess with any annotations. This call accesses
5301 neither guest state nor guest memory. */
5302 stmt( 'B', mce, IRStmt_Dirty(di) );
5303 if (mce->hWordTy == Ity_I64) {
5304 /* 64-bit host */
sewardj1c0ce7a2009-07-01 08:10:49 +00005305 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00005306 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
5307 return mkexpr(bTmp32);
5308 } else {
5309 /* 32-bit host */
5310 return mkexpr(bTmp);
5311 }
5312}
sewardj1c0ce7a2009-07-01 08:10:49 +00005313
5314/* Generate a shadow store. guard :: Ity_I1 controls whether the
5315 store really happens; NULL means it unconditionally does. */
sewardj7cf4e6b2008-05-01 20:24:26 +00005316static void gen_store_b ( MCEnv* mce, Int szB,
sewardj1c0ce7a2009-07-01 08:10:49 +00005317 IRAtom* baseaddr, Int offset, IRAtom* dataB,
5318 IRAtom* guard )
sewardj7cf4e6b2008-05-01 20:24:26 +00005319{
5320 void* hFun;
5321 HChar* hName;
5322 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00005323 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00005324 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5325 IRAtom* ea = baseaddr;
sewardj1c0ce7a2009-07-01 08:10:49 +00005326 if (guard) {
5327 tl_assert(isOriginalAtom(mce, guard));
5328 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
5329 }
sewardj7cf4e6b2008-05-01 20:24:26 +00005330 if (offset != 0) {
5331 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5332 : mkU64( (Long)(Int)offset );
5333 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5334 }
5335 if (mce->hWordTy == Ity_I64)
5336 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
5337
5338 switch (szB) {
5339 case 1: hFun = (void*)&MC_(helperc_b_store1);
5340 hName = "MC_(helperc_b_store1)";
5341 break;
5342 case 2: hFun = (void*)&MC_(helperc_b_store2);
5343 hName = "MC_(helperc_b_store2)";
5344 break;
5345 case 4: hFun = (void*)&MC_(helperc_b_store4);
5346 hName = "MC_(helperc_b_store4)";
5347 break;
5348 case 8: hFun = (void*)&MC_(helperc_b_store8);
5349 hName = "MC_(helperc_b_store8)";
5350 break;
5351 case 16: hFun = (void*)&MC_(helperc_b_store16);
5352 hName = "MC_(helperc_b_store16)";
5353 break;
5354 default:
5355 tl_assert(0);
5356 }
5357 di = unsafeIRDirty_0_N( 2/*regparms*/,
5358 hName, VG_(fnptr_to_fnentry)( hFun ),
5359 mkIRExprVec_2( ea, dataB )
5360 );
5361 /* no need to mess with any annotations. This call accesses
5362 neither guest state nor guest memory. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005363 if (guard) di->guard = guard;
sewardj7cf4e6b2008-05-01 20:24:26 +00005364 stmt( 'B', mce, IRStmt_Dirty(di) );
5365}
5366
5367static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005368 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00005369 if (eTy == Ity_I64)
5370 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
5371 if (eTy == Ity_I32)
5372 return e;
5373 tl_assert(0);
5374}
5375
5376static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005377 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00005378 tl_assert(eTy == Ity_I32);
5379 if (dstTy == Ity_I64)
5380 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
5381 tl_assert(0);
5382}
5383
sewardjdb5907d2009-11-26 17:20:21 +00005384
sewardj7cf4e6b2008-05-01 20:24:26 +00005385static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
5386{
5387 tl_assert(MC_(clo_mc_level) == 3);
5388
5389 switch (e->tag) {
5390
5391 case Iex_GetI: {
5392 IRRegArray* descr_b;
5393 IRAtom *t1, *t2, *t3, *t4;
5394 IRRegArray* descr = e->Iex.GetI.descr;
5395 IRType equivIntTy
5396 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
5397 /* If this array is unshadowable for whatever reason, use the
5398 usual approximation. */
5399 if (equivIntTy == Ity_INVALID)
5400 return mkU32(0);
5401 tl_assert(sizeofIRType(equivIntTy) >= 4);
5402 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
5403 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
5404 equivIntTy, descr->nElems );
5405 /* Do a shadow indexed get of the same size, giving t1. Take
5406 the bottom 32 bits of it, giving t2. Compute into t3 the
5407 origin for the index (almost certainly zero, but there's
5408 no harm in being completely general here, since iropt will
5409 remove any useless code), and fold it in, giving a final
5410 value t4. */
5411 t1 = assignNew( 'B', mce, equivIntTy,
5412 IRExpr_GetI( descr_b, e->Iex.GetI.ix,
5413 e->Iex.GetI.bias ));
5414 t2 = narrowTo32( mce, t1 );
5415 t3 = schemeE( mce, e->Iex.GetI.ix );
5416 t4 = gen_maxU32( mce, t2, t3 );
5417 return t4;
5418 }
5419 case Iex_CCall: {
5420 Int i;
5421 IRAtom* here;
5422 IRExpr** args = e->Iex.CCall.args;
5423 IRAtom* curr = mkU32(0);
5424 for (i = 0; args[i]; i++) {
5425 tl_assert(i < 32);
5426 tl_assert(isOriginalAtom(mce, args[i]));
5427 /* Only take notice of this arg if the callee's
5428 mc-exclusion mask does not say it is to be excluded. */
5429 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
5430 /* the arg is to be excluded from definedness checking.
5431 Do nothing. */
5432 if (0) VG_(printf)("excluding %s(%d)\n",
5433 e->Iex.CCall.cee->name, i);
5434 } else {
5435 /* calculate the arg's definedness, and pessimistically
5436 merge it in. */
5437 here = schemeE( mce, args[i] );
5438 curr = gen_maxU32( mce, curr, here );
5439 }
5440 }
5441 return curr;
5442 }
5443 case Iex_Load: {
5444 Int dszB;
5445 dszB = sizeofIRType(e->Iex.Load.ty);
5446 /* assert that the B value for the address is already
5447 available (somewhere) */
5448 tl_assert(isIRAtom(e->Iex.Load.addr));
5449 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
5450 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
5451 }
5452 case Iex_Mux0X: {
5453 IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond );
5454 IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 );
5455 IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX );
5456 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
5457 }
5458 case Iex_Qop: {
5459 IRAtom* b1 = schemeE( mce, e->Iex.Qop.arg1 );
5460 IRAtom* b2 = schemeE( mce, e->Iex.Qop.arg2 );
5461 IRAtom* b3 = schemeE( mce, e->Iex.Qop.arg3 );
5462 IRAtom* b4 = schemeE( mce, e->Iex.Qop.arg4 );
5463 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
5464 gen_maxU32( mce, b3, b4 ) );
5465 }
5466 case Iex_Triop: {
5467 IRAtom* b1 = schemeE( mce, e->Iex.Triop.arg1 );
5468 IRAtom* b2 = schemeE( mce, e->Iex.Triop.arg2 );
5469 IRAtom* b3 = schemeE( mce, e->Iex.Triop.arg3 );
5470 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
5471 }
5472 case Iex_Binop: {
sewardjafed4c52009-07-12 13:00:17 +00005473 switch (e->Iex.Binop.op) {
5474 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
5475 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
5476 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
5477 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
5478 /* Just say these all produce a defined result,
5479 regardless of their arguments. See
5480 COMMENT_ON_CasCmpEQ in this file. */
5481 return mkU32(0);
5482 default: {
5483 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
5484 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
5485 return gen_maxU32( mce, b1, b2 );
5486 }
5487 }
5488 tl_assert(0);
5489 /*NOTREACHED*/
sewardj7cf4e6b2008-05-01 20:24:26 +00005490 }
5491 case Iex_Unop: {
5492 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
5493 return b1;
5494 }
5495 case Iex_Const:
5496 return mkU32(0);
5497 case Iex_RdTmp:
5498 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
5499 case Iex_Get: {
5500 Int b_offset = MC_(get_otrack_shadow_offset)(
5501 e->Iex.Get.offset,
5502 sizeofIRType(e->Iex.Get.ty)
5503 );
5504 tl_assert(b_offset >= -1
5505 && b_offset <= mce->layout->total_sizeB -4);
5506 if (b_offset >= 0) {
5507 /* FIXME: this isn't an atom! */
5508 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
5509 Ity_I32 );
5510 }
5511 return mkU32(0);
5512 }
5513 default:
5514 VG_(printf)("mc_translate.c: schemeE: unhandled: ");
5515 ppIRExpr(e);
5516 VG_(tool_panic)("memcheck:schemeE");
5517 }
5518}
5519
sewardjdb5907d2009-11-26 17:20:21 +00005520
sewardj7cf4e6b2008-05-01 20:24:26 +00005521static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
5522{
5523 // This is a hacked version of do_shadow_Dirty
njn4c245e52009-03-15 23:25:38 +00005524 Int i, n, toDo, gSz, gOff;
sewardj7cf4e6b2008-05-01 20:24:26 +00005525 IRAtom *here, *curr;
5526 IRTemp dst;
sewardj7cf4e6b2008-05-01 20:24:26 +00005527
5528 /* First check the guard. */
5529 curr = schemeE( mce, d->guard );
5530
5531 /* Now round up all inputs and maxU32 over them. */
5532
5533 /* Inputs: unmasked args */
5534 for (i = 0; d->args[i]; i++) {
5535 if (d->cee->mcx_mask & (1<<i)) {
5536 /* ignore this arg */
5537 } else {
5538 here = schemeE( mce, d->args[i] );
5539 curr = gen_maxU32( mce, curr, here );
5540 }
5541 }
5542
5543 /* Inputs: guest state that we read. */
5544 for (i = 0; i < d->nFxState; i++) {
5545 tl_assert(d->fxState[i].fx != Ifx_None);
5546 if (d->fxState[i].fx == Ifx_Write)
5547 continue;
5548
5549 /* Ignore any sections marked as 'always defined'. */
5550 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
5551 if (0)
5552 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
5553 d->fxState[i].offset, d->fxState[i].size );
5554 continue;
5555 }
5556
5557 /* This state element is read or modified. So we need to
5558 consider it. If larger than 4 bytes, deal with it in 4-byte
5559 chunks. */
5560 gSz = d->fxState[i].size;
5561 gOff = d->fxState[i].offset;
5562 tl_assert(gSz > 0);
5563 while (True) {
5564 Int b_offset;
5565 if (gSz == 0) break;
5566 n = gSz <= 4 ? gSz : 4;
5567 /* update 'curr' with maxU32 of the state slice
5568 gOff .. gOff+n-1 */
5569 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
5570 if (b_offset != -1) {
5571 here = assignNew( 'B',mce,
5572 Ity_I32,
5573 IRExpr_Get(b_offset + 2*mce->layout->total_sizeB,
5574 Ity_I32));
5575 curr = gen_maxU32( mce, curr, here );
5576 }
5577 gSz -= n;
5578 gOff += n;
5579 }
5580
5581 }
5582
5583 /* Inputs: memory */
5584
5585 if (d->mFx != Ifx_None) {
5586 /* Because we may do multiple shadow loads/stores from the same
5587 base address, it's best to do a single test of its
5588 definedness right now. Post-instrumentation optimisation
5589 should remove all but this test. */
5590 tl_assert(d->mAddr);
5591 here = schemeE( mce, d->mAddr );
5592 curr = gen_maxU32( mce, curr, here );
5593 }
5594
5595 /* Deal with memory inputs (reads or modifies) */
5596 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00005597 toDo = d->mSize;
5598 /* chew off 32-bit chunks. We don't care about the endianness
5599 since it's all going to be condensed down to a single bit,
5600 but nevertheless choose an endianness which is hopefully
5601 native to the platform. */
5602 while (toDo >= 4) {
5603 here = gen_load_b( mce, 4, d->mAddr, d->mSize - toDo );
5604 curr = gen_maxU32( mce, curr, here );
5605 toDo -= 4;
5606 }
sewardj8c93fcc2008-10-30 13:08:31 +00005607 /* handle possible 16-bit excess */
5608 while (toDo >= 2) {
5609 here = gen_load_b( mce, 2, d->mAddr, d->mSize - toDo );
5610 curr = gen_maxU32( mce, curr, here );
5611 toDo -= 2;
5612 }
5613 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
sewardj7cf4e6b2008-05-01 20:24:26 +00005614 }
5615
5616 /* Whew! So curr is a 32-bit B-value which should give an origin
5617 of some use if any of the inputs to the helper are undefined.
5618 Now we need to re-distribute the results to all destinations. */
5619
5620 /* Outputs: the destination temporary, if there is one. */
5621 if (d->tmp != IRTemp_INVALID) {
5622 dst = findShadowTmpB(mce, d->tmp);
5623 assign( 'V', mce, dst, curr );
5624 }
5625
5626 /* Outputs: guest state that we write or modify. */
5627 for (i = 0; i < d->nFxState; i++) {
5628 tl_assert(d->fxState[i].fx != Ifx_None);
5629 if (d->fxState[i].fx == Ifx_Read)
5630 continue;
5631
5632 /* Ignore any sections marked as 'always defined'. */
5633 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
5634 continue;
5635
5636 /* This state element is written or modified. So we need to
5637 consider it. If larger than 4 bytes, deal with it in 4-byte
5638 chunks. */
5639 gSz = d->fxState[i].size;
5640 gOff = d->fxState[i].offset;
5641 tl_assert(gSz > 0);
5642 while (True) {
5643 Int b_offset;
5644 if (gSz == 0) break;
5645 n = gSz <= 4 ? gSz : 4;
5646 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
5647 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
5648 if (b_offset != -1) {
5649 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
5650 curr ));
5651 }
5652 gSz -= n;
5653 gOff += n;
5654 }
5655 }
5656
5657 /* Outputs: memory that we write or modify. Same comments about
5658 endianness as above apply. */
5659 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00005660 toDo = d->mSize;
5661 /* chew off 32-bit chunks */
5662 while (toDo >= 4) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005663 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
5664 NULL/*guard*/ );
sewardj7cf4e6b2008-05-01 20:24:26 +00005665 toDo -= 4;
5666 }
sewardj8c93fcc2008-10-30 13:08:31 +00005667 /* handle possible 16-bit excess */
5668 while (toDo >= 2) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005669 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
5670 NULL/*guard*/ );
sewardj8c93fcc2008-10-30 13:08:31 +00005671 toDo -= 2;
5672 }
5673 tl_assert(toDo == 0); /* also need to handle 1-byte excess */
sewardj7cf4e6b2008-05-01 20:24:26 +00005674 }
sewardj7cf4e6b2008-05-01 20:24:26 +00005675}
5676
sewardjdb5907d2009-11-26 17:20:21 +00005677
5678static void do_origins_Store ( MCEnv* mce,
5679 IREndness stEnd,
5680 IRExpr* stAddr,
5681 IRExpr* stData )
5682{
5683 Int dszB;
5684 IRAtom* dataB;
5685 /* assert that the B value for the address is already available
5686 (somewhere), since the call to schemeE will want to see it.
5687 XXXX how does this actually ensure that?? */
5688 tl_assert(isIRAtom(stAddr));
5689 tl_assert(isIRAtom(stData));
5690 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
5691 dataB = schemeE( mce, stData );
5692 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB,
5693 NULL/*guard*/ );
5694}
5695
5696
sewardj7cf4e6b2008-05-01 20:24:26 +00005697static void schemeS ( MCEnv* mce, IRStmt* st )
5698{
5699 tl_assert(MC_(clo_mc_level) == 3);
5700
5701 switch (st->tag) {
5702
5703 case Ist_AbiHint:
5704 /* The value-check instrumenter handles this - by arranging
5705 to pass the address of the next instruction to
5706 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
5707 happen for origin tracking w.r.t. AbiHints. So there is
5708 nothing to do here. */
5709 break;
5710
5711 case Ist_PutI: {
5712 IRRegArray* descr_b;
5713 IRAtom *t1, *t2, *t3, *t4;
5714 IRRegArray* descr = st->Ist.PutI.descr;
5715 IRType equivIntTy
5716 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
5717 /* If this array is unshadowable for whatever reason,
5718 generate no code. */
5719 if (equivIntTy == Ity_INVALID)
5720 break;
5721 tl_assert(sizeofIRType(equivIntTy) >= 4);
5722 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
5723 descr_b
5724 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
5725 equivIntTy, descr->nElems );
5726 /* Compute a value to Put - the conjoinment of the origin for
5727 the data to be Put-ted (obviously) and of the index value
5728 (not so obviously). */
5729 t1 = schemeE( mce, st->Ist.PutI.data );
5730 t2 = schemeE( mce, st->Ist.PutI.ix );
5731 t3 = gen_maxU32( mce, t1, t2 );
5732 t4 = zWidenFrom32( mce, equivIntTy, t3 );
5733 stmt( 'B', mce, IRStmt_PutI( descr_b, st->Ist.PutI.ix,
5734 st->Ist.PutI.bias, t4 ));
5735 break;
5736 }
sewardjdb5907d2009-11-26 17:20:21 +00005737
sewardj7cf4e6b2008-05-01 20:24:26 +00005738 case Ist_Dirty:
5739 do_origins_Dirty( mce, st->Ist.Dirty.details );
5740 break;
sewardjdb5907d2009-11-26 17:20:21 +00005741
5742 case Ist_Store:
5743 do_origins_Store( mce, st->Ist.Store.end,
5744 st->Ist.Store.addr,
5745 st->Ist.Store.data );
5746 break;
5747
5748 case Ist_LLSC: {
5749 /* In short: treat a load-linked like a normal load followed
5750 by an assignment of the loaded (shadow) data the result
5751 temporary. Treat a store-conditional like a normal store,
5752 and mark the result temporary as defined. */
5753 if (st->Ist.LLSC.storedata == NULL) {
5754 /* Load Linked */
5755 IRType resTy
5756 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
5757 IRExpr* vanillaLoad
5758 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
5759 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
5760 || resTy == Ity_I16 || resTy == Ity_I8);
5761 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
5762 schemeE(mce, vanillaLoad));
5763 } else {
5764 /* Store conditional */
5765 do_origins_Store( mce, st->Ist.LLSC.end,
5766 st->Ist.LLSC.addr,
5767 st->Ist.LLSC.storedata );
5768 /* For the rationale behind this, see comments at the
5769 place where the V-shadow for .result is constructed, in
5770 do_shadow_LLSC. In short, we regard .result as
5771 always-defined. */
5772 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
5773 mkU32(0) );
sewardj1c0ce7a2009-07-01 08:10:49 +00005774 }
sewardj7cf4e6b2008-05-01 20:24:26 +00005775 break;
5776 }
sewardjdb5907d2009-11-26 17:20:21 +00005777
sewardj7cf4e6b2008-05-01 20:24:26 +00005778 case Ist_Put: {
5779 Int b_offset
5780 = MC_(get_otrack_shadow_offset)(
5781 st->Ist.Put.offset,
sewardj1c0ce7a2009-07-01 08:10:49 +00005782 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
sewardj7cf4e6b2008-05-01 20:24:26 +00005783 );
5784 if (b_offset >= 0) {
5785 /* FIXME: this isn't an atom! */
5786 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
5787 schemeE( mce, st->Ist.Put.data )) );
5788 }
5789 break;
5790 }
sewardjdb5907d2009-11-26 17:20:21 +00005791
sewardj7cf4e6b2008-05-01 20:24:26 +00005792 case Ist_WrTmp:
5793 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
5794 schemeE(mce, st->Ist.WrTmp.data) );
5795 break;
sewardjdb5907d2009-11-26 17:20:21 +00005796
sewardj7cf4e6b2008-05-01 20:24:26 +00005797 case Ist_MBE:
5798 case Ist_NoOp:
5799 case Ist_Exit:
5800 case Ist_IMark:
5801 break;
sewardjdb5907d2009-11-26 17:20:21 +00005802
sewardj7cf4e6b2008-05-01 20:24:26 +00005803 default:
5804 VG_(printf)("mc_translate.c: schemeS: unhandled: ");
5805 ppIRStmt(st);
5806 VG_(tool_panic)("memcheck:schemeS");
5807 }
5808}
5809
5810
njn25e49d8e72002-09-23 09:36:25 +00005811/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00005812/*--- end mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005813/*--------------------------------------------------------------------*/