blob: 007c9c2ee779fd9f856062968e2639661b61cbbb [file] [log] [blame]
nethercotebb1c9912004-01-04 16:43:23 +00001
njn25e49d8e72002-09-23 09:36:25 +00002/*--------------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003/*--- Instrument IR to perform memory checking operations. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00006
njn25e49d8e72002-09-23 09:36:25 +00007/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of MemCheck, a heavyweight Valgrind tool for
njnc9539842002-10-02 13:26:35 +00009 detecting memory errors.
njn25e49d8e72002-09-23 09:36:25 +000010
sewardj03f8d3f2012-08-05 15:46:46 +000011 Copyright (C) 2000-2012 Julian Seward
njn25e49d8e72002-09-23 09:36:25 +000012 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
njnc7561b92005-06-19 01:24:32 +000032#include "pub_tool_basics.h"
philippe6643e962012-01-17 21:16:30 +000033#include "pub_tool_poolalloc.h" // For mc_include.h
njn1d0825f2006-03-27 11:37:07 +000034#include "pub_tool_hashtable.h" // For mc_include.h
njn132bfcc2005-06-04 19:16:06 +000035#include "pub_tool_libcassert.h"
njn36a20fa2005-06-03 03:08:39 +000036#include "pub_tool_libcprint.h"
njnc7561b92005-06-19 01:24:32 +000037#include "pub_tool_tooliface.h"
sewardj53ee1fc2005-12-23 02:29:58 +000038#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
sewardj81651dc2007-08-28 06:05:20 +000039#include "pub_tool_xarray.h"
40#include "pub_tool_mallocfree.h"
41#include "pub_tool_libcbase.h"
njn25e49d8e72002-09-23 09:36:25 +000042
sewardj7cf4e6b2008-05-01 20:24:26 +000043#include "mc_include.h"
44
45
sewardj7ee7d852011-06-16 11:37:21 +000046/* FIXMEs JRS 2011-June-16.
47
48 Check the interpretation for vector narrowing and widening ops,
49 particularly the saturating ones. I suspect they are either overly
50 pessimistic and/or wrong.
51*/
52
sewardj992dff92005-10-07 11:08:55 +000053/* This file implements the Memcheck instrumentation, and in
54 particular contains the core of its undefined value detection
55 machinery. For a comprehensive background of the terminology,
56 algorithms and rationale used herein, read:
57
58 Using Valgrind to detect undefined value errors with
59 bit-precision
60
61 Julian Seward and Nicholas Nethercote
62
63 2005 USENIX Annual Technical Conference (General Track),
64 Anaheim, CA, USA, April 10-15, 2005.
njn6665ea22007-05-24 23:14:41 +000065
66 ----
67
68 Here is as good a place as any to record exactly when V bits are and
69 should be checked, why, and what function is responsible.
70
71
72 Memcheck complains when an undefined value is used:
73
74 1. In the condition of a conditional branch. Because it could cause
75 incorrect control flow, and thus cause incorrect externally-visible
76 behaviour. [mc_translate.c:complainIfUndefined]
77
78 2. As an argument to a system call, or as the value that specifies
79 the system call number. Because it could cause an incorrect
80 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
81
82 3. As the address in a load or store. Because it could cause an
83 incorrect value to be used later, which could cause externally-visible
84 behaviour (eg. via incorrect control flow or an incorrect system call
85 argument) [complainIfUndefined]
86
87 4. As the target address of a branch. Because it could cause incorrect
88 control flow. [complainIfUndefined]
89
90 5. As an argument to setenv, unsetenv, or putenv. Because it could put
91 an incorrect value into the external environment.
92 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
93
94 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
95 [complainIfUndefined]
96
97 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
98 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
99 requested it. [in memcheck.h]
100
101
102 Memcheck also complains, but should not, when an undefined value is used:
103
104 8. As the shift value in certain SIMD shift operations (but not in the
105 standard integer shift operations). This inconsistency is due to
106 historical reasons.) [complainIfUndefined]
107
108
109 Memcheck does not complain, but should, when an undefined value is used:
110
111 9. As an input to a client request. Because the client request may
112 affect the visible behaviour -- see bug #144362 for an example
113 involving the malloc replacements in vg_replace_malloc.c and
114 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
115 isn't identified. That bug report also has some info on how to solve
116 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
117
118
119 In practice, 1 and 2 account for the vast majority of cases.
sewardj992dff92005-10-07 11:08:55 +0000120*/
121
sewardj95448072004-11-22 20:19:51 +0000122/*------------------------------------------------------------*/
123/*--- Forward decls ---*/
124/*------------------------------------------------------------*/
125
126struct _MCEnv;
127
sewardj7cf4e6b2008-05-01 20:24:26 +0000128static IRType shadowTypeV ( IRType ty );
sewardj95448072004-11-22 20:19:51 +0000129static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
sewardjafa617b2008-07-22 09:59:48 +0000130static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
sewardj95448072004-11-22 20:19:51 +0000131
sewardjb5b87402011-03-07 16:05:35 +0000132static IRExpr *i128_const_zero(void);
sewardj95448072004-11-22 20:19:51 +0000133
134/*------------------------------------------------------------*/
135/*--- Memcheck running state, and tmp management. ---*/
136/*------------------------------------------------------------*/
137
sewardj1c0ce7a2009-07-01 08:10:49 +0000138/* Carries info about a particular tmp. The tmp's number is not
139 recorded, as this is implied by (equal to) its index in the tmpMap
140 in MCEnv. The tmp's type is also not recorded, as this is present
141 in MCEnv.sb->tyenv.
142
143 When .kind is Orig, .shadowV and .shadowB may give the identities
144 of the temps currently holding the associated definedness (shadowV)
145 and origin (shadowB) values, or these may be IRTemp_INVALID if code
146 to compute such values has not yet been emitted.
147
148 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
149 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
150 illogical for a shadow tmp itself to be shadowed.
151*/
152typedef
153 enum { Orig=1, VSh=2, BSh=3 }
154 TempKind;
155
156typedef
157 struct {
158 TempKind kind;
159 IRTemp shadowV;
160 IRTemp shadowB;
161 }
162 TempMapEnt;
163
164
sewardj95448072004-11-22 20:19:51 +0000165/* Carries around state during memcheck instrumentation. */
166typedef
167 struct _MCEnv {
sewardj0b9d74a2006-12-24 02:24:11 +0000168 /* MODIFIED: the superblock being constructed. IRStmts are
169 added. */
sewardj1c0ce7a2009-07-01 08:10:49 +0000170 IRSB* sb;
sewardj7cf4e6b2008-05-01 20:24:26 +0000171 Bool trace;
sewardj95448072004-11-22 20:19:51 +0000172
sewardj1c0ce7a2009-07-01 08:10:49 +0000173 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
174 current kind and possibly shadow temps for each temp in the
175 IRSB being constructed. Note that it does not contain the
176 type of each tmp. If you want to know the type, look at the
177 relevant entry in sb->tyenv. It follows that at all times
178 during the instrumentation process, the valid indices for
179 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
180 total number of Orig, V- and B- temps allocated so far.
181
182 The reason for this strange split (types in one place, all
183 other info in another) is that we need the types to be
184 attached to sb so as to make it possible to do
185 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
186 instrumentation process. */
187 XArray* /* of TempMapEnt */ tmpMap;
sewardj95448072004-11-22 20:19:51 +0000188
sewardjd5204dc2004-12-31 01:16:11 +0000189 /* MODIFIED: indicates whether "bogus" literals have so far been
190 found. Starts off False, and may change to True. */
sewardj54eac252012-03-27 10:19:39 +0000191 Bool bogusLiterals;
192
193 /* READONLY: indicates whether we should use expensive
194 interpretations of integer adds, since unfortunately LLVM
195 uses them to do ORs in some circumstances. Defaulted to True
196 on MacOS and False everywhere else. */
197 Bool useLLVMworkarounds;
sewardjd5204dc2004-12-31 01:16:11 +0000198
sewardj95448072004-11-22 20:19:51 +0000199 /* READONLY: the guest layout. This indicates which parts of
200 the guest state should be regarded as 'always defined'. */
201 VexGuestLayout* layout;
sewardj634ba772006-10-15 12:47:37 +0000202
sewardj95448072004-11-22 20:19:51 +0000203 /* READONLY: the host word type. Needed for constructing
204 arguments of type 'HWord' to be passed to helper functions.
205 Ity_I32 or Ity_I64 only. */
206 IRType hWordTy;
207 }
208 MCEnv;
209
210/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
211 demand), as they are encountered. This is for two reasons.
212
213 (1) (less important reason): Many original tmps are unused due to
214 initial IR optimisation, and we do not want to spaces in tables
215 tracking them.
216
217 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
218 table indexed [0 .. n_types-1], which gives the current shadow for
219 each original tmp, or INVALID_IRTEMP if none is so far assigned.
220 It is necessary to support making multiple assignments to a shadow
221 -- specifically, after testing a shadow for definedness, it needs
222 to be made defined. But IR's SSA property disallows this.
223
224 (2) (more important reason): Therefore, when a shadow needs to get
225 a new value, a new temporary is created, the value is assigned to
226 that, and the tmpMap is updated to reflect the new binding.
227
228 A corollary is that if the tmpMap maps a given tmp to
sewardjf1962d32006-10-19 13:22:16 +0000229 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
sewardj95448072004-11-22 20:19:51 +0000230 there's a read-before-write error in the original tmps. The IR
231 sanity checker should catch all such anomalies, however.
njn25e49d8e72002-09-23 09:36:25 +0000232*/
sewardj95448072004-11-22 20:19:51 +0000233
sewardj1c0ce7a2009-07-01 08:10:49 +0000234/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
235 both the table in mce->sb and to our auxiliary mapping. Note that
236 newTemp may cause mce->tmpMap to resize, hence previous results
237 from VG_(indexXA)(mce->tmpMap) are invalidated. */
238static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
239{
240 Word newIx;
241 TempMapEnt ent;
242 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
243 ent.kind = kind;
244 ent.shadowV = IRTemp_INVALID;
245 ent.shadowB = IRTemp_INVALID;
246 newIx = VG_(addToXA)( mce->tmpMap, &ent );
247 tl_assert(newIx == (Word)tmp);
248 return tmp;
249}
250
251
sewardj95448072004-11-22 20:19:51 +0000252/* Find the tmp currently shadowing the given original tmp. If none
253 so far exists, allocate one. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000254static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000255{
sewardj1c0ce7a2009-07-01 08:10:49 +0000256 TempMapEnt* ent;
257 /* VG_(indexXA) range-checks 'orig', hence no need to check
258 here. */
259 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
260 tl_assert(ent->kind == Orig);
261 if (ent->shadowV == IRTemp_INVALID) {
262 IRTemp tmpV
263 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
264 /* newTemp may cause mce->tmpMap to resize, hence previous results
265 from VG_(indexXA) are invalid. */
266 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
267 tl_assert(ent->kind == Orig);
268 tl_assert(ent->shadowV == IRTemp_INVALID);
269 ent->shadowV = tmpV;
njn25e49d8e72002-09-23 09:36:25 +0000270 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000271 return ent->shadowV;
njn25e49d8e72002-09-23 09:36:25 +0000272}
273
sewardj95448072004-11-22 20:19:51 +0000274/* Allocate a new shadow for the given original tmp. This means any
275 previous shadow is abandoned. This is needed because it is
276 necessary to give a new value to a shadow once it has been tested
277 for undefinedness, but unfortunately IR's SSA property disallows
278 this. Instead we must abandon the old shadow, allocate a new one
sewardj1c0ce7a2009-07-01 08:10:49 +0000279 and use that instead.
280
281 This is the same as findShadowTmpV, except we don't bother to see
282 if a shadow temp already existed -- we simply allocate a new one
283 regardless. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000284static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000285{
sewardj1c0ce7a2009-07-01 08:10:49 +0000286 TempMapEnt* ent;
287 /* VG_(indexXA) range-checks 'orig', hence no need to check
288 here. */
289 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
290 tl_assert(ent->kind == Orig);
291 if (1) {
292 IRTemp tmpV
293 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
294 /* newTemp may cause mce->tmpMap to resize, hence previous results
295 from VG_(indexXA) are invalid. */
296 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
297 tl_assert(ent->kind == Orig);
298 ent->shadowV = tmpV;
299 }
sewardj95448072004-11-22 20:19:51 +0000300}
301
302
303/*------------------------------------------------------------*/
304/*--- IRAtoms -- a subset of IRExprs ---*/
305/*------------------------------------------------------------*/
306
307/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
sewardj710d6c22005-03-20 18:55:15 +0000308 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
sewardj95448072004-11-22 20:19:51 +0000309 input, most of this code deals in atoms. Usefully, a value atom
310 always has a V-value which is also an atom: constants are shadowed
311 by constants, and temps are shadowed by the corresponding shadow
312 temporary. */
313
314typedef IRExpr IRAtom;
315
316/* (used for sanity checks only): is this an atom which looks
317 like it's from original code? */
318static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
319{
320 if (a1->tag == Iex_Const)
321 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000322 if (a1->tag == Iex_RdTmp) {
323 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
324 return ent->kind == Orig;
325 }
sewardj95448072004-11-22 20:19:51 +0000326 return False;
327}
328
329/* (used for sanity checks only): is this an atom which looks
330 like it's from shadow code? */
331static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
332{
333 if (a1->tag == Iex_Const)
334 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000335 if (a1->tag == Iex_RdTmp) {
336 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
337 return ent->kind == VSh || ent->kind == BSh;
338 }
sewardj95448072004-11-22 20:19:51 +0000339 return False;
340}
341
342/* (used for sanity checks only): check that both args are atoms and
343 are identically-kinded. */
344static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
345{
sewardj0b9d74a2006-12-24 02:24:11 +0000346 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +0000347 return True;
sewardjbef552a2005-08-30 12:54:36 +0000348 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
sewardj95448072004-11-22 20:19:51 +0000349 return True;
350 return False;
351}
352
353
354/*------------------------------------------------------------*/
355/*--- Type management ---*/
356/*------------------------------------------------------------*/
357
358/* Shadow state is always accessed using integer types. This returns
359 an integer type with the same size (as per sizeofIRType) as the
360 given type. The only valid shadow types are Bit, I8, I16, I32,
sewardj45fa9f42012-05-21 10:18:10 +0000361 I64, I128, V128, V256. */
sewardj95448072004-11-22 20:19:51 +0000362
sewardj7cf4e6b2008-05-01 20:24:26 +0000363static IRType shadowTypeV ( IRType ty )
sewardj95448072004-11-22 20:19:51 +0000364{
365 switch (ty) {
366 case Ity_I1:
367 case Ity_I8:
368 case Ity_I16:
369 case Ity_I32:
sewardj6cf40ff2005-04-20 22:31:26 +0000370 case Ity_I64:
371 case Ity_I128: return ty;
sewardj3245c912004-12-10 14:58:26 +0000372 case Ity_F32: return Ity_I32;
sewardjb0ccb4d2012-04-02 10:22:05 +0000373 case Ity_D32: return Ity_I32;
sewardj3245c912004-12-10 14:58:26 +0000374 case Ity_F64: return Ity_I64;
sewardjb0ccb4d2012-04-02 10:22:05 +0000375 case Ity_D64: return Ity_I64;
sewardjb5b87402011-03-07 16:05:35 +0000376 case Ity_F128: return Ity_I128;
sewardjb0ccb4d2012-04-02 10:22:05 +0000377 case Ity_D128: return Ity_I128;
sewardj3245c912004-12-10 14:58:26 +0000378 case Ity_V128: return Ity_V128;
sewardj45fa9f42012-05-21 10:18:10 +0000379 case Ity_V256: return Ity_V256;
sewardj95448072004-11-22 20:19:51 +0000380 default: ppIRType(ty);
sewardj7cf4e6b2008-05-01 20:24:26 +0000381 VG_(tool_panic)("memcheck:shadowTypeV");
sewardj95448072004-11-22 20:19:51 +0000382 }
383}
384
385/* Produce a 'defined' value of the given shadow type. Should only be
386 supplied shadow types (Bit/I8/I16/I32/UI64). */
387static IRExpr* definedOfType ( IRType ty ) {
388 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +0000389 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
390 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
391 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
392 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
393 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
sewardjb5b87402011-03-07 16:05:35 +0000394 case Ity_I128: return i128_const_zero();
sewardj170ee212004-12-10 18:57:51 +0000395 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
sewardjf1962d32006-10-19 13:22:16 +0000396 default: VG_(tool_panic)("memcheck:definedOfType");
njn25e49d8e72002-09-23 09:36:25 +0000397 }
398}
399
400
sewardj95448072004-11-22 20:19:51 +0000401/*------------------------------------------------------------*/
402/*--- Constructing IR fragments ---*/
403/*------------------------------------------------------------*/
404
sewardj95448072004-11-22 20:19:51 +0000405/* add stmt to a bb */
sewardj7cf4e6b2008-05-01 20:24:26 +0000406static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
407 if (mce->trace) {
408 VG_(printf)(" %c: ", cat);
409 ppIRStmt(st);
410 VG_(printf)("\n");
411 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000412 addStmtToIRSB(mce->sb, st);
sewardj7cf4e6b2008-05-01 20:24:26 +0000413}
414
415/* assign value to tmp */
416static inline
417void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
sewardj1c0ce7a2009-07-01 08:10:49 +0000418 stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
sewardj7cf4e6b2008-05-01 20:24:26 +0000419}
sewardj95448072004-11-22 20:19:51 +0000420
421/* build various kinds of expressions */
sewardj57f92b02010-08-22 11:54:14 +0000422#define triop(_op, _arg1, _arg2, _arg3) \
423 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
sewardj95448072004-11-22 20:19:51 +0000424#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
425#define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
426#define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
427#define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
428#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
429#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
sewardj170ee212004-12-10 18:57:51 +0000430#define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
sewardj0b9d74a2006-12-24 02:24:11 +0000431#define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
sewardj95448072004-11-22 20:19:51 +0000432
sewardj7cf4e6b2008-05-01 20:24:26 +0000433/* Bind the given expression to a new temporary, and return the
sewardj95448072004-11-22 20:19:51 +0000434 temporary. This effectively converts an arbitrary expression into
sewardj7cf4e6b2008-05-01 20:24:26 +0000435 an atom.
436
437 'ty' is the type of 'e' and hence the type that the new temporary
sewardj1c0ce7a2009-07-01 08:10:49 +0000438 needs to be. But passing it in is redundant, since we can deduce
439 the type merely by inspecting 'e'. So at least use that fact to
440 assert that the two types agree. */
441static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
442{
443 TempKind k;
444 IRTemp t;
445 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
sewardjb0ccb4d2012-04-02 10:22:05 +0000446
sewardj7cf4e6b2008-05-01 20:24:26 +0000447 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
sewardj1c0ce7a2009-07-01 08:10:49 +0000448 switch (cat) {
449 case 'V': k = VSh; break;
450 case 'B': k = BSh; break;
451 case 'C': k = Orig; break;
452 /* happens when we are making up new "orig"
453 expressions, for IRCAS handling */
454 default: tl_assert(0);
455 }
456 t = newTemp(mce, ty, k);
sewardj7cf4e6b2008-05-01 20:24:26 +0000457 assign(cat, mce, t, e);
sewardj95448072004-11-22 20:19:51 +0000458 return mkexpr(t);
459}
460
461
462/*------------------------------------------------------------*/
sewardjb5b87402011-03-07 16:05:35 +0000463/*--- Helper functions for 128-bit ops ---*/
464/*------------------------------------------------------------*/
sewardj45fa9f42012-05-21 10:18:10 +0000465
sewardjb5b87402011-03-07 16:05:35 +0000466static IRExpr *i128_const_zero(void)
467{
sewardj45fa9f42012-05-21 10:18:10 +0000468 IRAtom* z64 = IRExpr_Const(IRConst_U64(0));
469 return binop(Iop_64HLto128, z64, z64);
sewardjb5b87402011-03-07 16:05:35 +0000470}
471
sewardj45fa9f42012-05-21 10:18:10 +0000472/* There are no I128-bit loads and/or stores [as generated by any
473 current front ends]. So we do not need to worry about that in
474 expr2vbits_Load */
475
sewardjb5b87402011-03-07 16:05:35 +0000476
477/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +0000478/*--- Constructing definedness primitive ops ---*/
479/*------------------------------------------------------------*/
480
481/* --------- Defined-if-either-defined --------- */
482
483static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
484 tl_assert(isShadowAtom(mce,a1));
485 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000486 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000487}
488
489static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
490 tl_assert(isShadowAtom(mce,a1));
491 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000492 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000493}
494
495static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
496 tl_assert(isShadowAtom(mce,a1));
497 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000498 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000499}
500
sewardj7010f6e2004-12-10 13:35:22 +0000501static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
502 tl_assert(isShadowAtom(mce,a1));
503 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000504 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
sewardj7010f6e2004-12-10 13:35:22 +0000505}
506
sewardj20d38f22005-02-07 23:50:18 +0000507static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj170ee212004-12-10 18:57:51 +0000508 tl_assert(isShadowAtom(mce,a1));
509 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000510 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
sewardj170ee212004-12-10 18:57:51 +0000511}
512
sewardj350e8f72012-06-25 07:52:15 +0000513static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
514 tl_assert(isShadowAtom(mce,a1));
515 tl_assert(isShadowAtom(mce,a2));
516 return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2));
517}
518
sewardj95448072004-11-22 20:19:51 +0000519/* --------- Undefined-if-either-undefined --------- */
520
521static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
522 tl_assert(isShadowAtom(mce,a1));
523 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000524 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000525}
526
527static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
528 tl_assert(isShadowAtom(mce,a1));
529 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000530 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000531}
532
533static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
534 tl_assert(isShadowAtom(mce,a1));
535 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000536 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000537}
538
539static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
540 tl_assert(isShadowAtom(mce,a1));
541 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000542 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000543}
544
sewardjb5b87402011-03-07 16:05:35 +0000545static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
546 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
547 tl_assert(isShadowAtom(mce,a1));
548 tl_assert(isShadowAtom(mce,a2));
549 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
550 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
551 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
552 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
553 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
554 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
555
556 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
557}
558
sewardj20d38f22005-02-07 23:50:18 +0000559static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj3245c912004-12-10 14:58:26 +0000560 tl_assert(isShadowAtom(mce,a1));
561 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000562 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
sewardj3245c912004-12-10 14:58:26 +0000563}
564
sewardj350e8f72012-06-25 07:52:15 +0000565static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
566 tl_assert(isShadowAtom(mce,a1));
567 tl_assert(isShadowAtom(mce,a2));
568 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2));
569}
570
sewardje50a1b12004-12-17 01:24:54 +0000571static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
sewardj95448072004-11-22 20:19:51 +0000572 switch (vty) {
sewardje50a1b12004-12-17 01:24:54 +0000573 case Ity_I8: return mkUifU8(mce, a1, a2);
sewardja1d93302004-12-12 16:45:06 +0000574 case Ity_I16: return mkUifU16(mce, a1, a2);
575 case Ity_I32: return mkUifU32(mce, a1, a2);
576 case Ity_I64: return mkUifU64(mce, a1, a2);
sewardjb5b87402011-03-07 16:05:35 +0000577 case Ity_I128: return mkUifU128(mce, a1, a2);
sewardj20d38f22005-02-07 23:50:18 +0000578 case Ity_V128: return mkUifUV128(mce, a1, a2);
sewardj95448072004-11-22 20:19:51 +0000579 default:
580 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
581 VG_(tool_panic)("memcheck:mkUifU");
njn25e49d8e72002-09-23 09:36:25 +0000582 }
583}
584
sewardj95448072004-11-22 20:19:51 +0000585/* --------- The Left-family of operations. --------- */
njn25e49d8e72002-09-23 09:36:25 +0000586
sewardj95448072004-11-22 20:19:51 +0000587static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
588 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000589 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
sewardj95448072004-11-22 20:19:51 +0000590}
591
592static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
593 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000594 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
sewardj95448072004-11-22 20:19:51 +0000595}
596
597static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
598 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000599 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
sewardj95448072004-11-22 20:19:51 +0000600}
601
sewardj681be302005-01-15 20:43:58 +0000602static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
603 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000604 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
sewardj681be302005-01-15 20:43:58 +0000605}
606
sewardj95448072004-11-22 20:19:51 +0000607/* --------- 'Improvement' functions for AND/OR. --------- */
608
609/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
610 defined (0); all other -> undefined (1).
611*/
612static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
njn25e49d8e72002-09-23 09:36:25 +0000613{
sewardj95448072004-11-22 20:19:51 +0000614 tl_assert(isOriginalAtom(mce, data));
615 tl_assert(isShadowAtom(mce, vbits));
616 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000617 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000618}
njn25e49d8e72002-09-23 09:36:25 +0000619
sewardj95448072004-11-22 20:19:51 +0000620static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
621{
622 tl_assert(isOriginalAtom(mce, data));
623 tl_assert(isShadowAtom(mce, vbits));
624 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000625 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000626}
njn25e49d8e72002-09-23 09:36:25 +0000627
sewardj95448072004-11-22 20:19:51 +0000628static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
629{
630 tl_assert(isOriginalAtom(mce, data));
631 tl_assert(isShadowAtom(mce, vbits));
632 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000633 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000634}
njn25e49d8e72002-09-23 09:36:25 +0000635
sewardj7010f6e2004-12-10 13:35:22 +0000636static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
637{
638 tl_assert(isOriginalAtom(mce, data));
639 tl_assert(isShadowAtom(mce, vbits));
640 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000641 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
sewardj7010f6e2004-12-10 13:35:22 +0000642}
643
sewardj20d38f22005-02-07 23:50:18 +0000644static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000645{
646 tl_assert(isOriginalAtom(mce, data));
647 tl_assert(isShadowAtom(mce, vbits));
648 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000649 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
sewardj170ee212004-12-10 18:57:51 +0000650}
651
sewardj350e8f72012-06-25 07:52:15 +0000652static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
653{
654 tl_assert(isOriginalAtom(mce, data));
655 tl_assert(isShadowAtom(mce, vbits));
656 tl_assert(sameKindedAtoms(data, vbits));
657 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits));
658}
659
sewardj95448072004-11-22 20:19:51 +0000660/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
661 defined (0); all other -> undefined (1).
662*/
663static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
664{
665 tl_assert(isOriginalAtom(mce, data));
666 tl_assert(isShadowAtom(mce, vbits));
667 tl_assert(sameKindedAtoms(data, vbits));
668 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000669 'V', mce, Ity_I8,
sewardj95448072004-11-22 20:19:51 +0000670 binop(Iop_Or8,
sewardj7cf4e6b2008-05-01 20:24:26 +0000671 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
sewardj95448072004-11-22 20:19:51 +0000672 vbits) );
673}
njn25e49d8e72002-09-23 09:36:25 +0000674
sewardj95448072004-11-22 20:19:51 +0000675static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
676{
677 tl_assert(isOriginalAtom(mce, data));
678 tl_assert(isShadowAtom(mce, vbits));
679 tl_assert(sameKindedAtoms(data, vbits));
680 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000681 'V', mce, Ity_I16,
sewardj95448072004-11-22 20:19:51 +0000682 binop(Iop_Or16,
sewardj7cf4e6b2008-05-01 20:24:26 +0000683 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
sewardj95448072004-11-22 20:19:51 +0000684 vbits) );
685}
njn25e49d8e72002-09-23 09:36:25 +0000686
sewardj95448072004-11-22 20:19:51 +0000687static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
688{
689 tl_assert(isOriginalAtom(mce, data));
690 tl_assert(isShadowAtom(mce, vbits));
691 tl_assert(sameKindedAtoms(data, vbits));
692 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000693 'V', mce, Ity_I32,
sewardj95448072004-11-22 20:19:51 +0000694 binop(Iop_Or32,
sewardj7cf4e6b2008-05-01 20:24:26 +0000695 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
sewardj95448072004-11-22 20:19:51 +0000696 vbits) );
697}
698
sewardj7010f6e2004-12-10 13:35:22 +0000699static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
700{
701 tl_assert(isOriginalAtom(mce, data));
702 tl_assert(isShadowAtom(mce, vbits));
703 tl_assert(sameKindedAtoms(data, vbits));
704 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000705 'V', mce, Ity_I64,
sewardj7010f6e2004-12-10 13:35:22 +0000706 binop(Iop_Or64,
sewardj7cf4e6b2008-05-01 20:24:26 +0000707 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
sewardj7010f6e2004-12-10 13:35:22 +0000708 vbits) );
709}
710
sewardj20d38f22005-02-07 23:50:18 +0000711static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000712{
713 tl_assert(isOriginalAtom(mce, data));
714 tl_assert(isShadowAtom(mce, vbits));
715 tl_assert(sameKindedAtoms(data, vbits));
716 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000717 'V', mce, Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +0000718 binop(Iop_OrV128,
sewardj7cf4e6b2008-05-01 20:24:26 +0000719 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
sewardj170ee212004-12-10 18:57:51 +0000720 vbits) );
721}
722
sewardj350e8f72012-06-25 07:52:15 +0000723static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
724{
725 tl_assert(isOriginalAtom(mce, data));
726 tl_assert(isShadowAtom(mce, vbits));
727 tl_assert(sameKindedAtoms(data, vbits));
728 return assignNew(
729 'V', mce, Ity_V256,
730 binop(Iop_OrV256,
731 assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)),
732 vbits) );
733}
734
sewardj95448072004-11-22 20:19:51 +0000735/* --------- Pessimising casts. --------- */
736
sewardjb5b87402011-03-07 16:05:35 +0000737/* The function returns an expression of type DST_TY. If any of the VBITS
738 is undefined (value == 1) the resulting expression has all bits set to
739 1. Otherwise, all bits are 0. */
740
sewardj95448072004-11-22 20:19:51 +0000741static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
742{
sewardj4cc684b2007-08-25 23:09:36 +0000743 IRType src_ty;
sewardj7cf97ee2004-11-28 14:25:01 +0000744 IRAtom* tmp1;
sewardj2eecb742012-06-01 16:11:41 +0000745
sewardj95448072004-11-22 20:19:51 +0000746 /* Note, dst_ty is a shadow type, not an original type. */
sewardj95448072004-11-22 20:19:51 +0000747 tl_assert(isShadowAtom(mce,vbits));
sewardj1c0ce7a2009-07-01 08:10:49 +0000748 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
sewardj4cc684b2007-08-25 23:09:36 +0000749
750 /* Fast-track some common cases */
751 if (src_ty == Ity_I32 && dst_ty == Ity_I32)
sewardj7cf4e6b2008-05-01 20:24:26 +0000752 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000753
754 if (src_ty == Ity_I64 && dst_ty == Ity_I64)
sewardj7cf4e6b2008-05-01 20:24:26 +0000755 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000756
757 if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
sewardj2eecb742012-06-01 16:11:41 +0000758 /* PCast the arg, then clone it. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000759 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
760 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
sewardj4cc684b2007-08-25 23:09:36 +0000761 }
762
sewardj2eecb742012-06-01 16:11:41 +0000763 if (src_ty == Ity_I64 && dst_ty == Ity_I32) {
764 /* PCast the arg. This gives all 0s or all 1s. Then throw away
765 the top half. */
766 IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
767 return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp));
768 }
769
sewardj4cc684b2007-08-25 23:09:36 +0000770 /* Else do it the slow way .. */
sewardj2eecb742012-06-01 16:11:41 +0000771 /* First of all, collapse vbits down to a single bit. */
sewardj4cc684b2007-08-25 23:09:36 +0000772 tmp1 = NULL;
773 switch (src_ty) {
sewardj95448072004-11-22 20:19:51 +0000774 case Ity_I1:
775 tmp1 = vbits;
njn25e49d8e72002-09-23 09:36:25 +0000776 break;
sewardj95448072004-11-22 20:19:51 +0000777 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000778 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
sewardj95448072004-11-22 20:19:51 +0000779 break;
780 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000781 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
sewardj95448072004-11-22 20:19:51 +0000782 break;
783 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000784 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
sewardj95448072004-11-22 20:19:51 +0000785 break;
786 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000787 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
sewardj95448072004-11-22 20:19:51 +0000788 break;
sewardj69a13322005-04-23 01:14:51 +0000789 case Ity_I128: {
790 /* Gah. Chop it in half, OR the halves together, and compare
791 that with zero. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000792 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
793 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
794 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
795 tmp1 = assignNew('V', mce, Ity_I1,
sewardj37c31cc2005-04-26 23:49:24 +0000796 unop(Iop_CmpNEZ64, tmp4));
sewardj69a13322005-04-23 01:14:51 +0000797 break;
798 }
sewardj95448072004-11-22 20:19:51 +0000799 default:
sewardj4cc684b2007-08-25 23:09:36 +0000800 ppIRType(src_ty);
sewardj95448072004-11-22 20:19:51 +0000801 VG_(tool_panic)("mkPCastTo(1)");
802 }
803 tl_assert(tmp1);
804 /* Now widen up to the dst type. */
805 switch (dst_ty) {
806 case Ity_I1:
807 return tmp1;
808 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000809 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
sewardj95448072004-11-22 20:19:51 +0000810 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000811 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
sewardj95448072004-11-22 20:19:51 +0000812 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000813 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
sewardj95448072004-11-22 20:19:51 +0000814 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000815 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000816 case Ity_V128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000817 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
818 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000819 return tmp1;
sewardj69a13322005-04-23 01:14:51 +0000820 case Ity_I128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000821 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
822 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
sewardj69a13322005-04-23 01:14:51 +0000823 return tmp1;
sewardj95448072004-11-22 20:19:51 +0000824 default:
825 ppIRType(dst_ty);
826 VG_(tool_panic)("mkPCastTo(2)");
827 }
828}
829
sewardjd5204dc2004-12-31 01:16:11 +0000830/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
831/*
832 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
833 PCasting to Ity_U1. However, sometimes it is necessary to be more
834 accurate. The insight is that the result is defined if two
835 corresponding bits can be found, one from each argument, so that
836 both bits are defined but are different -- that makes EQ say "No"
837 and NE say "Yes". Hence, we compute an improvement term and DifD
838 it onto the "normal" (UifU) result.
839
840 The result is:
841
842 PCastTo<1> (
sewardje6f8af42005-07-06 18:48:59 +0000843 -- naive version
844 PCastTo<sz>( UifU<sz>(vxx, vyy) )
845
sewardjd5204dc2004-12-31 01:16:11 +0000846 `DifD<sz>`
sewardje6f8af42005-07-06 18:48:59 +0000847
848 -- improvement term
849 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
sewardjd5204dc2004-12-31 01:16:11 +0000850 )
sewardje6f8af42005-07-06 18:48:59 +0000851
sewardjd5204dc2004-12-31 01:16:11 +0000852 where
853 vec contains 0 (defined) bits where the corresponding arg bits
sewardje6f8af42005-07-06 18:48:59 +0000854 are defined but different, and 1 bits otherwise.
sewardjd5204dc2004-12-31 01:16:11 +0000855
sewardje6f8af42005-07-06 18:48:59 +0000856 vec = Or<sz>( vxx, // 0 iff bit defined
857 vyy, // 0 iff bit defined
858 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
859 )
860
861 If any bit of vec is 0, the result is defined and so the
862 improvement term should produce 0...0, else it should produce
863 1...1.
864
865 Hence require for the improvement term:
866
867 if vec == 1...1 then 1...1 else 0...0
868 ->
869 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
870
871 This was extensively re-analysed and checked on 6 July 05.
sewardjd5204dc2004-12-31 01:16:11 +0000872*/
873static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
874 IRType ty,
875 IRAtom* vxx, IRAtom* vyy,
876 IRAtom* xx, IRAtom* yy )
877{
sewardje6f8af42005-07-06 18:48:59 +0000878 IRAtom *naive, *vec, *improvement_term;
879 IRAtom *improved, *final_cast, *top;
880 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
sewardjd5204dc2004-12-31 01:16:11 +0000881
882 tl_assert(isShadowAtom(mce,vxx));
883 tl_assert(isShadowAtom(mce,vyy));
884 tl_assert(isOriginalAtom(mce,xx));
885 tl_assert(isOriginalAtom(mce,yy));
886 tl_assert(sameKindedAtoms(vxx,xx));
887 tl_assert(sameKindedAtoms(vyy,yy));
888
889 switch (ty) {
890 case Ity_I32:
sewardje6f8af42005-07-06 18:48:59 +0000891 opOR = Iop_Or32;
sewardjd5204dc2004-12-31 01:16:11 +0000892 opDIFD = Iop_And32;
893 opUIFU = Iop_Or32;
894 opNOT = Iop_Not32;
895 opXOR = Iop_Xor32;
896 opCMP = Iop_CmpEQ32;
897 top = mkU32(0xFFFFFFFF);
898 break;
tomcd986332005-04-26 07:44:48 +0000899 case Ity_I64:
sewardje6f8af42005-07-06 18:48:59 +0000900 opOR = Iop_Or64;
tomcd986332005-04-26 07:44:48 +0000901 opDIFD = Iop_And64;
902 opUIFU = Iop_Or64;
903 opNOT = Iop_Not64;
904 opXOR = Iop_Xor64;
905 opCMP = Iop_CmpEQ64;
sewardj37c31cc2005-04-26 23:49:24 +0000906 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
tomcd986332005-04-26 07:44:48 +0000907 break;
sewardjd5204dc2004-12-31 01:16:11 +0000908 default:
909 VG_(tool_panic)("expensiveCmpEQorNE");
910 }
911
912 naive
sewardj7cf4e6b2008-05-01 20:24:26 +0000913 = mkPCastTo(mce,ty,
914 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
sewardjd5204dc2004-12-31 01:16:11 +0000915
916 vec
917 = assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000918 'V', mce,ty,
sewardje6f8af42005-07-06 18:48:59 +0000919 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +0000920 assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
sewardjd5204dc2004-12-31 01:16:11 +0000921 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000922 'V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +0000923 unop( opNOT,
sewardj7cf4e6b2008-05-01 20:24:26 +0000924 assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
sewardjd5204dc2004-12-31 01:16:11 +0000925
sewardje6f8af42005-07-06 18:48:59 +0000926 improvement_term
sewardj7cf4e6b2008-05-01 20:24:26 +0000927 = mkPCastTo( mce,ty,
928 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
sewardjd5204dc2004-12-31 01:16:11 +0000929
930 improved
sewardj7cf4e6b2008-05-01 20:24:26 +0000931 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
sewardjd5204dc2004-12-31 01:16:11 +0000932
933 final_cast
934 = mkPCastTo( mce, Ity_I1, improved );
935
936 return final_cast;
937}
938
sewardj95448072004-11-22 20:19:51 +0000939
sewardj992dff92005-10-07 11:08:55 +0000940/* --------- Semi-accurate interpretation of CmpORD. --------- */
941
942/* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
943
944 CmpORD32S(x,y) = 1<<3 if x <s y
945 = 1<<2 if x >s y
946 = 1<<1 if x == y
947
948 and similarly the unsigned variant. The default interpretation is:
949
950 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
sewardj1bc82102005-12-23 00:16:24 +0000951 & (7<<1)
sewardj992dff92005-10-07 11:08:55 +0000952
953 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
954 are zero and therefore defined (viz, zero).
sewardja9e62a92005-10-07 12:13:21 +0000955
956 Also deal with a special case better:
957
958 CmpORD32S(x,0)
959
960 Here, bit 3 (LT) of the result is a copy of the top bit of x and
961 will be defined even if the rest of x isn't. In which case we do:
962
963 CmpORD32S#(x,x#,0,{impliedly 0}#)
sewardj1bc82102005-12-23 00:16:24 +0000964 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
965 | (x# >>u 31) << 3 -- LT# = x#[31]
sewardja9e62a92005-10-07 12:13:21 +0000966
sewardj1bc82102005-12-23 00:16:24 +0000967 Analogous handling for CmpORD64{S,U}.
sewardj992dff92005-10-07 11:08:55 +0000968*/
sewardja9e62a92005-10-07 12:13:21 +0000969static Bool isZeroU32 ( IRAtom* e )
970{
971 return
972 toBool( e->tag == Iex_Const
973 && e->Iex.Const.con->tag == Ico_U32
974 && e->Iex.Const.con->Ico.U32 == 0 );
975}
976
sewardj1bc82102005-12-23 00:16:24 +0000977static Bool isZeroU64 ( IRAtom* e )
sewardj992dff92005-10-07 11:08:55 +0000978{
sewardj1bc82102005-12-23 00:16:24 +0000979 return
980 toBool( e->tag == Iex_Const
981 && e->Iex.Const.con->tag == Ico_U64
982 && e->Iex.Const.con->Ico.U64 == 0 );
983}
984
985static IRAtom* doCmpORD ( MCEnv* mce,
986 IROp cmp_op,
987 IRAtom* xxhash, IRAtom* yyhash,
988 IRAtom* xx, IRAtom* yy )
989{
990 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
991 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
992 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
993 IROp opAND = m64 ? Iop_And64 : Iop_And32;
994 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
995 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
996 IRType ty = m64 ? Ity_I64 : Ity_I32;
997 Int width = m64 ? 64 : 32;
998
999 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
1000
1001 IRAtom* threeLeft1 = NULL;
1002 IRAtom* sevenLeft1 = NULL;
1003
sewardj992dff92005-10-07 11:08:55 +00001004 tl_assert(isShadowAtom(mce,xxhash));
1005 tl_assert(isShadowAtom(mce,yyhash));
1006 tl_assert(isOriginalAtom(mce,xx));
1007 tl_assert(isOriginalAtom(mce,yy));
1008 tl_assert(sameKindedAtoms(xxhash,xx));
1009 tl_assert(sameKindedAtoms(yyhash,yy));
sewardj1bc82102005-12-23 00:16:24 +00001010 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
1011 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
sewardj992dff92005-10-07 11:08:55 +00001012
sewardja9e62a92005-10-07 12:13:21 +00001013 if (0) {
1014 ppIROp(cmp_op); VG_(printf)(" ");
1015 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
1016 }
1017
sewardj1bc82102005-12-23 00:16:24 +00001018 if (syned && isZero(yy)) {
sewardja9e62a92005-10-07 12:13:21 +00001019 /* fancy interpretation */
1020 /* if yy is zero, then it must be fully defined (zero#). */
sewardj1bc82102005-12-23 00:16:24 +00001021 tl_assert(isZero(yyhash));
1022 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
sewardja9e62a92005-10-07 12:13:21 +00001023 return
1024 binop(
sewardj1bc82102005-12-23 00:16:24 +00001025 opOR,
sewardja9e62a92005-10-07 12:13:21 +00001026 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001027 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +00001028 binop(
sewardj1bc82102005-12-23 00:16:24 +00001029 opAND,
1030 mkPCastTo(mce,ty, xxhash),
1031 threeLeft1
sewardja9e62a92005-10-07 12:13:21 +00001032 )),
1033 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001034 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +00001035 binop(
sewardj1bc82102005-12-23 00:16:24 +00001036 opSHL,
sewardja9e62a92005-10-07 12:13:21 +00001037 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001038 'V', mce,ty,
sewardj1bc82102005-12-23 00:16:24 +00001039 binop(opSHR, xxhash, mkU8(width-1))),
sewardja9e62a92005-10-07 12:13:21 +00001040 mkU8(3)
1041 ))
1042 );
1043 } else {
1044 /* standard interpretation */
sewardj1bc82102005-12-23 00:16:24 +00001045 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
sewardja9e62a92005-10-07 12:13:21 +00001046 return
1047 binop(
sewardj1bc82102005-12-23 00:16:24 +00001048 opAND,
1049 mkPCastTo( mce,ty,
1050 mkUifU(mce,ty, xxhash,yyhash)),
1051 sevenLeft1
sewardja9e62a92005-10-07 12:13:21 +00001052 );
1053 }
sewardj992dff92005-10-07 11:08:55 +00001054}
1055
1056
sewardj95448072004-11-22 20:19:51 +00001057/*------------------------------------------------------------*/
1058/*--- Emit a test and complaint if something is undefined. ---*/
1059/*------------------------------------------------------------*/
1060
sewardj7cf4e6b2008-05-01 20:24:26 +00001061static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1062
1063
sewardj95448072004-11-22 20:19:51 +00001064/* Set the annotations on a dirty helper to indicate that the stack
1065 pointer and instruction pointers might be read. This is the
1066 behaviour of all 'emit-a-complaint' style functions we might
1067 call. */
1068
1069static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1070 di->nFxState = 2;
sewardj2eecb742012-06-01 16:11:41 +00001071 di->fxState[0].fx = Ifx_Read;
1072 di->fxState[0].offset = mce->layout->offset_SP;
1073 di->fxState[0].size = mce->layout->sizeof_SP;
1074 di->fxState[0].nRepeats = 0;
1075 di->fxState[0].repeatLen = 0;
1076 di->fxState[1].fx = Ifx_Read;
1077 di->fxState[1].offset = mce->layout->offset_IP;
1078 di->fxState[1].size = mce->layout->sizeof_IP;
1079 di->fxState[1].nRepeats = 0;
1080 di->fxState[1].repeatLen = 0;
sewardj95448072004-11-22 20:19:51 +00001081}
1082
1083
1084/* Check the supplied **original** atom for undefinedness, and emit a
1085 complaint if so. Once that happens, mark it as defined. This is
1086 possible because the atom is either a tmp or literal. If it's a
1087 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1088 be defined. In fact as mentioned above, we will have to allocate a
1089 new tmp to carry the new 'defined' shadow value, and update the
1090 original->tmp mapping accordingly; we cannot simply assign a new
1091 value to an existing shadow tmp as this breaks SSAness -- resulting
1092 in the post-instrumentation sanity checker spluttering in disapproval.
1093*/
florian434ffae2012-07-19 17:23:42 +00001094static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard )
sewardj95448072004-11-22 20:19:51 +00001095{
sewardj7cf97ee2004-11-28 14:25:01 +00001096 IRAtom* vatom;
1097 IRType ty;
1098 Int sz;
1099 IRDirty* di;
1100 IRAtom* cond;
sewardj7cf4e6b2008-05-01 20:24:26 +00001101 IRAtom* origin;
1102 void* fn;
1103 HChar* nm;
1104 IRExpr** args;
1105 Int nargs;
sewardj7cf97ee2004-11-28 14:25:01 +00001106
njn1d0825f2006-03-27 11:37:07 +00001107 // Don't do V bit tests if we're not reporting undefined value errors.
sewardj7cf4e6b2008-05-01 20:24:26 +00001108 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001109 return;
1110
sewardj95448072004-11-22 20:19:51 +00001111 /* Since the original expression is atomic, there's no duplicated
1112 work generated by making multiple V-expressions for it. So we
1113 don't really care about the possibility that someone else may
1114 also create a V-interpretion for it. */
1115 tl_assert(isOriginalAtom(mce, atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001116 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001117 tl_assert(isShadowAtom(mce, vatom));
1118 tl_assert(sameKindedAtoms(atom, vatom));
1119
sewardj1c0ce7a2009-07-01 08:10:49 +00001120 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001121
1122 /* sz is only used for constructing the error message */
sewardj7cf97ee2004-11-28 14:25:01 +00001123 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001124
sewardj7cf97ee2004-11-28 14:25:01 +00001125 cond = mkPCastTo( mce, Ity_I1, vatom );
sewardj95448072004-11-22 20:19:51 +00001126 /* cond will be 0 if all defined, and 1 if any not defined. */
1127
sewardj7cf4e6b2008-05-01 20:24:26 +00001128 /* Get the origin info for the value we are about to check. At
1129 least, if we are doing origin tracking. If not, use a dummy
1130 zero origin. */
1131 if (MC_(clo_mc_level) == 3) {
1132 origin = schemeE( mce, atom );
1133 if (mce->hWordTy == Ity_I64) {
1134 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1135 }
1136 } else {
1137 origin = NULL;
1138 }
1139
1140 fn = NULL;
1141 nm = NULL;
1142 args = NULL;
1143 nargs = -1;
1144
sewardj95448072004-11-22 20:19:51 +00001145 switch (sz) {
1146 case 0:
sewardj7cf4e6b2008-05-01 20:24:26 +00001147 if (origin) {
1148 fn = &MC_(helperc_value_check0_fail_w_o);
1149 nm = "MC_(helperc_value_check0_fail_w_o)";
1150 args = mkIRExprVec_1(origin);
1151 nargs = 1;
1152 } else {
1153 fn = &MC_(helperc_value_check0_fail_no_o);
1154 nm = "MC_(helperc_value_check0_fail_no_o)";
1155 args = mkIRExprVec_0();
1156 nargs = 0;
1157 }
sewardj95448072004-11-22 20:19:51 +00001158 break;
1159 case 1:
sewardj7cf4e6b2008-05-01 20:24:26 +00001160 if (origin) {
1161 fn = &MC_(helperc_value_check1_fail_w_o);
1162 nm = "MC_(helperc_value_check1_fail_w_o)";
1163 args = mkIRExprVec_1(origin);
1164 nargs = 1;
1165 } else {
1166 fn = &MC_(helperc_value_check1_fail_no_o);
1167 nm = "MC_(helperc_value_check1_fail_no_o)";
1168 args = mkIRExprVec_0();
1169 nargs = 0;
1170 }
sewardj95448072004-11-22 20:19:51 +00001171 break;
1172 case 4:
sewardj7cf4e6b2008-05-01 20:24:26 +00001173 if (origin) {
1174 fn = &MC_(helperc_value_check4_fail_w_o);
1175 nm = "MC_(helperc_value_check4_fail_w_o)";
1176 args = mkIRExprVec_1(origin);
1177 nargs = 1;
1178 } else {
1179 fn = &MC_(helperc_value_check4_fail_no_o);
1180 nm = "MC_(helperc_value_check4_fail_no_o)";
1181 args = mkIRExprVec_0();
1182 nargs = 0;
1183 }
sewardj95448072004-11-22 20:19:51 +00001184 break;
sewardj11bcc4e2005-04-23 22:38:38 +00001185 case 8:
sewardj7cf4e6b2008-05-01 20:24:26 +00001186 if (origin) {
1187 fn = &MC_(helperc_value_check8_fail_w_o);
1188 nm = "MC_(helperc_value_check8_fail_w_o)";
1189 args = mkIRExprVec_1(origin);
1190 nargs = 1;
1191 } else {
1192 fn = &MC_(helperc_value_check8_fail_no_o);
1193 nm = "MC_(helperc_value_check8_fail_no_o)";
1194 args = mkIRExprVec_0();
1195 nargs = 0;
1196 }
sewardj11bcc4e2005-04-23 22:38:38 +00001197 break;
njn4c245e52009-03-15 23:25:38 +00001198 case 2:
1199 case 16:
sewardj7cf4e6b2008-05-01 20:24:26 +00001200 if (origin) {
1201 fn = &MC_(helperc_value_checkN_fail_w_o);
1202 nm = "MC_(helperc_value_checkN_fail_w_o)";
1203 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1204 nargs = 2;
1205 } else {
1206 fn = &MC_(helperc_value_checkN_fail_no_o);
1207 nm = "MC_(helperc_value_checkN_fail_no_o)";
1208 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1209 nargs = 1;
1210 }
sewardj95448072004-11-22 20:19:51 +00001211 break;
njn4c245e52009-03-15 23:25:38 +00001212 default:
1213 VG_(tool_panic)("unexpected szB");
sewardj95448072004-11-22 20:19:51 +00001214 }
sewardj7cf4e6b2008-05-01 20:24:26 +00001215
1216 tl_assert(fn);
1217 tl_assert(nm);
1218 tl_assert(args);
1219 tl_assert(nargs >= 0 && nargs <= 2);
1220 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1221 || (MC_(clo_mc_level) == 2 && origin == NULL) );
1222
1223 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1224 VG_(fnptr_to_fnentry)( fn ), args );
sewardj95448072004-11-22 20:19:51 +00001225 di->guard = cond;
florian434ffae2012-07-19 17:23:42 +00001226
1227 /* If the complaint is to be issued under a guard condition, AND that
1228 guard condition. */
1229 if (guard) {
1230 IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard));
1231 IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard));
1232 IRAtom *e = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2));
1233
1234 di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e));
1235 }
1236
sewardj95448072004-11-22 20:19:51 +00001237 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00001238 stmt( 'V', mce, IRStmt_Dirty(di));
sewardj95448072004-11-22 20:19:51 +00001239
1240 /* Set the shadow tmp to be defined. First, update the
1241 orig->shadow tmp mapping to reflect the fact that this shadow is
1242 getting a new value. */
sewardj710d6c22005-03-20 18:55:15 +00001243 tl_assert(isIRAtom(vatom));
sewardj95448072004-11-22 20:19:51 +00001244 /* sameKindedAtoms ... */
sewardj0b9d74a2006-12-24 02:24:11 +00001245 if (vatom->tag == Iex_RdTmp) {
1246 tl_assert(atom->tag == Iex_RdTmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00001247 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1248 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1249 definedOfType(ty));
sewardj95448072004-11-22 20:19:51 +00001250 }
1251}
1252
1253
1254/*------------------------------------------------------------*/
1255/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1256/*------------------------------------------------------------*/
1257
1258/* Examine the always-defined sections declared in layout to see if
1259 the (offset,size) section is within one. Note, is is an error to
1260 partially fall into such a region: (offset,size) should either be
1261 completely in such a region or completely not-in such a region.
1262*/
1263static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1264{
1265 Int minoffD, maxoffD, i;
1266 Int minoff = offset;
1267 Int maxoff = minoff + size - 1;
1268 tl_assert((minoff & ~0xFFFF) == 0);
1269 tl_assert((maxoff & ~0xFFFF) == 0);
1270
1271 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1272 minoffD = mce->layout->alwaysDefd[i].offset;
1273 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1274 tl_assert((minoffD & ~0xFFFF) == 0);
1275 tl_assert((maxoffD & ~0xFFFF) == 0);
1276
1277 if (maxoff < minoffD || maxoffD < minoff)
1278 continue; /* no overlap */
1279 if (minoff >= minoffD && maxoff <= maxoffD)
1280 return True; /* completely contained in an always-defd section */
1281
1282 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1283 }
1284 return False; /* could not find any containing section */
1285}
1286
1287
1288/* Generate into bb suitable actions to shadow this Put. If the state
1289 slice is marked 'always defined', do nothing. Otherwise, write the
1290 supplied V bits to the shadow state. We can pass in either an
1291 original atom or a V-atom, but not both. In the former case the
1292 relevant V-bits are then generated from the original.
florian434ffae2012-07-19 17:23:42 +00001293 We assume here, that the definedness of GUARD has already been checked.
sewardj95448072004-11-22 20:19:51 +00001294*/
1295static
1296void do_shadow_PUT ( MCEnv* mce, Int offset,
florian434ffae2012-07-19 17:23:42 +00001297 IRAtom* atom, IRAtom* vatom, IRExpr *guard )
sewardj95448072004-11-22 20:19:51 +00001298{
sewardj7cf97ee2004-11-28 14:25:01 +00001299 IRType ty;
njn1d0825f2006-03-27 11:37:07 +00001300
1301 // Don't do shadow PUTs if we're not doing undefined value checking.
1302 // Their absence lets Vex's optimiser remove all the shadow computation
1303 // that they depend on, which includes GETs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001304 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001305 return;
1306
sewardj95448072004-11-22 20:19:51 +00001307 if (atom) {
1308 tl_assert(!vatom);
1309 tl_assert(isOriginalAtom(mce, atom));
1310 vatom = expr2vbits( mce, atom );
1311 } else {
1312 tl_assert(vatom);
1313 tl_assert(isShadowAtom(mce, vatom));
1314 }
1315
sewardj1c0ce7a2009-07-01 08:10:49 +00001316 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001317 tl_assert(ty != Ity_I1);
sewardjb5b87402011-03-07 16:05:35 +00001318 tl_assert(ty != Ity_I128);
sewardj95448072004-11-22 20:19:51 +00001319 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1320 /* later: no ... */
1321 /* emit code to emit a complaint if any of the vbits are 1. */
1322 /* complainIfUndefined(mce, atom); */
1323 } else {
1324 /* Do a plain shadow Put. */
florian434ffae2012-07-19 17:23:42 +00001325 if (guard) {
1326 /* If the guard expression evaluates to false we simply Put the value
1327 that is already stored in the guest state slot */
1328 IRAtom *cond, *iffalse;
1329
1330 cond = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard));
1331 iffalse = assignNew('V', mce, ty,
1332 IRExpr_Get(offset + mce->layout->total_sizeB, ty));
1333 vatom = assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, vatom));
1334 }
1335 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ));
sewardj95448072004-11-22 20:19:51 +00001336 }
1337}
1338
1339
1340/* Return an expression which contains the V bits corresponding to the
1341 given GETI (passed in in pieces).
1342*/
1343static
floriand39b0222012-05-31 15:48:13 +00001344void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti)
sewardj95448072004-11-22 20:19:51 +00001345{
sewardj7cf97ee2004-11-28 14:25:01 +00001346 IRAtom* vatom;
1347 IRType ty, tyS;
1348 Int arrSize;;
floriand39b0222012-05-31 15:48:13 +00001349 IRRegArray* descr = puti->descr;
1350 IRAtom* ix = puti->ix;
1351 Int bias = puti->bias;
1352 IRAtom* atom = puti->data;
sewardj7cf97ee2004-11-28 14:25:01 +00001353
njn1d0825f2006-03-27 11:37:07 +00001354 // Don't do shadow PUTIs if we're not doing undefined value checking.
1355 // Their absence lets Vex's optimiser remove all the shadow computation
1356 // that they depend on, which includes GETIs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001357 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001358 return;
1359
sewardj95448072004-11-22 20:19:51 +00001360 tl_assert(isOriginalAtom(mce,atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001361 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001362 tl_assert(sameKindedAtoms(atom, vatom));
sewardj7cf97ee2004-11-28 14:25:01 +00001363 ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001364 tyS = shadowTypeV(ty);
sewardj7cf97ee2004-11-28 14:25:01 +00001365 arrSize = descr->nElems * sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001366 tl_assert(ty != Ity_I1);
1367 tl_assert(isOriginalAtom(mce,ix));
florian434ffae2012-07-19 17:23:42 +00001368 complainIfUndefined(mce, ix, NULL);
sewardj95448072004-11-22 20:19:51 +00001369 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1370 /* later: no ... */
1371 /* emit code to emit a complaint if any of the vbits are 1. */
1372 /* complainIfUndefined(mce, atom); */
1373 } else {
1374 /* Do a cloned version of the Put that refers to the shadow
1375 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001376 IRRegArray* new_descr
1377 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1378 tyS, descr->nElems);
floriand39b0222012-05-31 15:48:13 +00001379 stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) ));
sewardj95448072004-11-22 20:19:51 +00001380 }
1381}
1382
1383
1384/* Return an expression which contains the V bits corresponding to the
1385 given GET (passed in in pieces).
1386*/
1387static
1388IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1389{
sewardj7cf4e6b2008-05-01 20:24:26 +00001390 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001391 tl_assert(ty != Ity_I1);
sewardjb5b87402011-03-07 16:05:35 +00001392 tl_assert(ty != Ity_I128);
sewardj95448072004-11-22 20:19:51 +00001393 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1394 /* Always defined, return all zeroes of the relevant type */
1395 return definedOfType(tyS);
1396 } else {
1397 /* return a cloned version of the Get that refers to the shadow
1398 area. */
sewardj7cf4e6b2008-05-01 20:24:26 +00001399 /* FIXME: this isn't an atom! */
sewardj95448072004-11-22 20:19:51 +00001400 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1401 }
1402}
1403
1404
1405/* Return an expression which contains the V bits corresponding to the
1406 given GETI (passed in in pieces).
1407*/
1408static
sewardj0b9d74a2006-12-24 02:24:11 +00001409IRExpr* shadow_GETI ( MCEnv* mce,
1410 IRRegArray* descr, IRAtom* ix, Int bias )
sewardj95448072004-11-22 20:19:51 +00001411{
1412 IRType ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001413 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001414 Int arrSize = descr->nElems * sizeofIRType(ty);
1415 tl_assert(ty != Ity_I1);
1416 tl_assert(isOriginalAtom(mce,ix));
florian434ffae2012-07-19 17:23:42 +00001417 complainIfUndefined(mce, ix, NULL);
sewardj95448072004-11-22 20:19:51 +00001418 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1419 /* Always defined, return all zeroes of the relevant type */
1420 return definedOfType(tyS);
1421 } else {
1422 /* return a cloned version of the Get that refers to the shadow
1423 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001424 IRRegArray* new_descr
1425 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1426 tyS, descr->nElems);
sewardj95448072004-11-22 20:19:51 +00001427 return IRExpr_GetI( new_descr, ix, bias );
1428 }
1429}
1430
1431
1432/*------------------------------------------------------------*/
1433/*--- Generating approximations for unknown operations, ---*/
1434/*--- using lazy-propagate semantics ---*/
1435/*------------------------------------------------------------*/
1436
1437/* Lazy propagation of undefinedness from two values, resulting in the
1438 specified shadow type.
1439*/
1440static
1441IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1442{
sewardj95448072004-11-22 20:19:51 +00001443 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001444 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1445 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
sewardj95448072004-11-22 20:19:51 +00001446 tl_assert(isShadowAtom(mce,va1));
1447 tl_assert(isShadowAtom(mce,va2));
sewardj37c31cc2005-04-26 23:49:24 +00001448
1449 /* The general case is inefficient because PCast is an expensive
1450 operation. Here are some special cases which use PCast only
1451 once rather than twice. */
1452
1453 /* I64 x I64 -> I64 */
1454 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1455 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1456 at = mkUifU(mce, Ity_I64, va1, va2);
1457 at = mkPCastTo(mce, Ity_I64, at);
1458 return at;
1459 }
1460
1461 /* I64 x I64 -> I32 */
1462 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1463 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1464 at = mkUifU(mce, Ity_I64, va1, va2);
1465 at = mkPCastTo(mce, Ity_I32, at);
1466 return at;
1467 }
1468
1469 if (0) {
1470 VG_(printf)("mkLazy2 ");
1471 ppIRType(t1);
1472 VG_(printf)("_");
1473 ppIRType(t2);
1474 VG_(printf)("_");
1475 ppIRType(finalVty);
1476 VG_(printf)("\n");
1477 }
1478
1479 /* General case: force everything via 32-bit intermediaries. */
sewardj95448072004-11-22 20:19:51 +00001480 at = mkPCastTo(mce, Ity_I32, va1);
1481 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1482 at = mkPCastTo(mce, finalVty, at);
1483 return at;
1484}
1485
1486
sewardjed69fdb2006-02-03 16:12:27 +00001487/* 3-arg version of the above. */
1488static
1489IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1490 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1491{
1492 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001493 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1494 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1495 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
sewardjed69fdb2006-02-03 16:12:27 +00001496 tl_assert(isShadowAtom(mce,va1));
1497 tl_assert(isShadowAtom(mce,va2));
1498 tl_assert(isShadowAtom(mce,va3));
1499
1500 /* The general case is inefficient because PCast is an expensive
1501 operation. Here are some special cases which use PCast only
1502 twice rather than three times. */
1503
1504 /* I32 x I64 x I64 -> I64 */
1505 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1506 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1507 && finalVty == Ity_I64) {
1508 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1509 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1510 mode indication which is fully defined, this should get
1511 folded out later. */
1512 at = mkPCastTo(mce, Ity_I64, va1);
1513 /* Now fold in 2nd and 3rd args. */
1514 at = mkUifU(mce, Ity_I64, at, va2);
1515 at = mkUifU(mce, Ity_I64, at, va3);
1516 /* and PCast once again. */
1517 at = mkPCastTo(mce, Ity_I64, at);
1518 return at;
1519 }
1520
sewardj453e8f82006-02-09 03:25:06 +00001521 /* I32 x I64 x I64 -> I32 */
1522 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1523 && finalVty == Ity_I32) {
sewardj59570ff2010-01-01 11:59:33 +00001524 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
sewardj453e8f82006-02-09 03:25:06 +00001525 at = mkPCastTo(mce, Ity_I64, va1);
1526 at = mkUifU(mce, Ity_I64, at, va2);
1527 at = mkUifU(mce, Ity_I64, at, va3);
1528 at = mkPCastTo(mce, Ity_I32, at);
1529 return at;
1530 }
1531
sewardj59570ff2010-01-01 11:59:33 +00001532 /* I32 x I32 x I32 -> I32 */
1533 /* 32-bit FP idiom, as (eg) happens on ARM */
1534 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1535 && finalVty == Ity_I32) {
1536 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1537 at = va1;
1538 at = mkUifU(mce, Ity_I32, at, va2);
1539 at = mkUifU(mce, Ity_I32, at, va3);
1540 at = mkPCastTo(mce, Ity_I32, at);
1541 return at;
1542 }
1543
sewardjb5b87402011-03-07 16:05:35 +00001544 /* I32 x I128 x I128 -> I128 */
1545 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1546 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1547 && finalVty == Ity_I128) {
1548 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1549 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1550 mode indication which is fully defined, this should get
1551 folded out later. */
1552 at = mkPCastTo(mce, Ity_I128, va1);
1553 /* Now fold in 2nd and 3rd args. */
1554 at = mkUifU(mce, Ity_I128, at, va2);
1555 at = mkUifU(mce, Ity_I128, at, va3);
1556 /* and PCast once again. */
1557 at = mkPCastTo(mce, Ity_I128, at);
1558 return at;
1559 }
sewardj453e8f82006-02-09 03:25:06 +00001560 if (1) {
1561 VG_(printf)("mkLazy3: ");
sewardjed69fdb2006-02-03 16:12:27 +00001562 ppIRType(t1);
sewardj453e8f82006-02-09 03:25:06 +00001563 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001564 ppIRType(t2);
sewardj453e8f82006-02-09 03:25:06 +00001565 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001566 ppIRType(t3);
sewardj453e8f82006-02-09 03:25:06 +00001567 VG_(printf)(" -> ");
sewardjed69fdb2006-02-03 16:12:27 +00001568 ppIRType(finalVty);
1569 VG_(printf)("\n");
1570 }
1571
sewardj453e8f82006-02-09 03:25:06 +00001572 tl_assert(0);
sewardjed69fdb2006-02-03 16:12:27 +00001573 /* General case: force everything via 32-bit intermediaries. */
sewardj453e8f82006-02-09 03:25:06 +00001574 /*
sewardjed69fdb2006-02-03 16:12:27 +00001575 at = mkPCastTo(mce, Ity_I32, va1);
1576 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1577 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1578 at = mkPCastTo(mce, finalVty, at);
1579 return at;
sewardj453e8f82006-02-09 03:25:06 +00001580 */
sewardjed69fdb2006-02-03 16:12:27 +00001581}
1582
1583
sewardje91cea72006-02-08 19:32:02 +00001584/* 4-arg version of the above. */
1585static
1586IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1587 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1588{
1589 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001590 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1591 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1592 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1593 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
sewardje91cea72006-02-08 19:32:02 +00001594 tl_assert(isShadowAtom(mce,va1));
1595 tl_assert(isShadowAtom(mce,va2));
1596 tl_assert(isShadowAtom(mce,va3));
1597 tl_assert(isShadowAtom(mce,va4));
1598
1599 /* The general case is inefficient because PCast is an expensive
1600 operation. Here are some special cases which use PCast only
1601 twice rather than three times. */
1602
1603 /* I32 x I64 x I64 x I64 -> I64 */
1604 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1605 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1606 && finalVty == Ity_I64) {
1607 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1608 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1609 mode indication which is fully defined, this should get
1610 folded out later. */
1611 at = mkPCastTo(mce, Ity_I64, va1);
1612 /* Now fold in 2nd, 3rd, 4th args. */
1613 at = mkUifU(mce, Ity_I64, at, va2);
1614 at = mkUifU(mce, Ity_I64, at, va3);
1615 at = mkUifU(mce, Ity_I64, at, va4);
1616 /* and PCast once again. */
1617 at = mkPCastTo(mce, Ity_I64, at);
1618 return at;
1619 }
sewardjb5b87402011-03-07 16:05:35 +00001620 /* I32 x I32 x I32 x I32 -> I32 */
1621 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1622 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
1623 && finalVty == Ity_I32) {
1624 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
1625 at = va1;
1626 /* Now fold in 2nd, 3rd, 4th args. */
1627 at = mkUifU(mce, Ity_I32, at, va2);
1628 at = mkUifU(mce, Ity_I32, at, va3);
1629 at = mkUifU(mce, Ity_I32, at, va4);
1630 at = mkPCastTo(mce, Ity_I32, at);
1631 return at;
1632 }
sewardje91cea72006-02-08 19:32:02 +00001633
1634 if (1) {
sewardj453e8f82006-02-09 03:25:06 +00001635 VG_(printf)("mkLazy4: ");
sewardje91cea72006-02-08 19:32:02 +00001636 ppIRType(t1);
1637 VG_(printf)(" x ");
1638 ppIRType(t2);
1639 VG_(printf)(" x ");
1640 ppIRType(t3);
1641 VG_(printf)(" x ");
1642 ppIRType(t4);
1643 VG_(printf)(" -> ");
1644 ppIRType(finalVty);
1645 VG_(printf)("\n");
1646 }
1647
1648 tl_assert(0);
1649}
1650
1651
sewardj95448072004-11-22 20:19:51 +00001652/* Do the lazy propagation game from a null-terminated vector of
1653 atoms. This is presumably the arguments to a helper call, so the
1654 IRCallee info is also supplied in order that we can know which
1655 arguments should be ignored (via the .mcx_mask field).
1656*/
1657static
1658IRAtom* mkLazyN ( MCEnv* mce,
1659 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1660{
sewardj4cc684b2007-08-25 23:09:36 +00001661 Int i;
sewardj95448072004-11-22 20:19:51 +00001662 IRAtom* here;
sewardj4cc684b2007-08-25 23:09:36 +00001663 IRAtom* curr;
1664 IRType mergeTy;
sewardj99430032011-05-04 09:09:31 +00001665 Bool mergeTy64 = True;
sewardj4cc684b2007-08-25 23:09:36 +00001666
1667 /* Decide on the type of the merge intermediary. If all relevant
1668 args are I64, then it's I64. In all other circumstances, use
1669 I32. */
1670 for (i = 0; exprvec[i]; i++) {
1671 tl_assert(i < 32);
1672 tl_assert(isOriginalAtom(mce, exprvec[i]));
1673 if (cee->mcx_mask & (1<<i))
1674 continue;
sewardj1c0ce7a2009-07-01 08:10:49 +00001675 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
sewardj4cc684b2007-08-25 23:09:36 +00001676 mergeTy64 = False;
1677 }
1678
1679 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
1680 curr = definedOfType(mergeTy);
1681
sewardj95448072004-11-22 20:19:51 +00001682 for (i = 0; exprvec[i]; i++) {
1683 tl_assert(i < 32);
1684 tl_assert(isOriginalAtom(mce, exprvec[i]));
1685 /* Only take notice of this arg if the callee's mc-exclusion
1686 mask does not say it is to be excluded. */
1687 if (cee->mcx_mask & (1<<i)) {
1688 /* the arg is to be excluded from definedness checking. Do
1689 nothing. */
1690 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1691 } else {
1692 /* calculate the arg's definedness, and pessimistically merge
1693 it in. */
sewardj4cc684b2007-08-25 23:09:36 +00001694 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1695 curr = mergeTy64
1696 ? mkUifU64(mce, here, curr)
1697 : mkUifU32(mce, here, curr);
sewardj95448072004-11-22 20:19:51 +00001698 }
1699 }
1700 return mkPCastTo(mce, finalVtype, curr );
1701}
1702
1703
1704/*------------------------------------------------------------*/
1705/*--- Generating expensive sequences for exact carry-chain ---*/
1706/*--- propagation in add/sub and related operations. ---*/
1707/*------------------------------------------------------------*/
1708
1709static
sewardjd5204dc2004-12-31 01:16:11 +00001710IRAtom* expensiveAddSub ( MCEnv* mce,
1711 Bool add,
1712 IRType ty,
1713 IRAtom* qaa, IRAtom* qbb,
1714 IRAtom* aa, IRAtom* bb )
sewardj95448072004-11-22 20:19:51 +00001715{
sewardj7cf97ee2004-11-28 14:25:01 +00001716 IRAtom *a_min, *b_min, *a_max, *b_max;
sewardjd5204dc2004-12-31 01:16:11 +00001717 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
sewardj7cf97ee2004-11-28 14:25:01 +00001718
sewardj95448072004-11-22 20:19:51 +00001719 tl_assert(isShadowAtom(mce,qaa));
1720 tl_assert(isShadowAtom(mce,qbb));
1721 tl_assert(isOriginalAtom(mce,aa));
1722 tl_assert(isOriginalAtom(mce,bb));
1723 tl_assert(sameKindedAtoms(qaa,aa));
1724 tl_assert(sameKindedAtoms(qbb,bb));
1725
sewardjd5204dc2004-12-31 01:16:11 +00001726 switch (ty) {
1727 case Ity_I32:
1728 opAND = Iop_And32;
1729 opOR = Iop_Or32;
1730 opXOR = Iop_Xor32;
1731 opNOT = Iop_Not32;
1732 opADD = Iop_Add32;
1733 opSUB = Iop_Sub32;
1734 break;
tomd9774d72005-06-27 08:11:01 +00001735 case Ity_I64:
1736 opAND = Iop_And64;
1737 opOR = Iop_Or64;
1738 opXOR = Iop_Xor64;
1739 opNOT = Iop_Not64;
1740 opADD = Iop_Add64;
1741 opSUB = Iop_Sub64;
1742 break;
sewardjd5204dc2004-12-31 01:16:11 +00001743 default:
1744 VG_(tool_panic)("expensiveAddSub");
1745 }
sewardj95448072004-11-22 20:19:51 +00001746
1747 // a_min = aa & ~qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001748 a_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001749 binop(opAND, aa,
sewardj7cf4e6b2008-05-01 20:24:26 +00001750 assignNew('V', mce,ty, unop(opNOT, qaa))));
sewardj95448072004-11-22 20:19:51 +00001751
1752 // b_min = bb & ~qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001753 b_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001754 binop(opAND, bb,
sewardj7cf4e6b2008-05-01 20:24:26 +00001755 assignNew('V', mce,ty, unop(opNOT, qbb))));
sewardj95448072004-11-22 20:19:51 +00001756
1757 // a_max = aa | qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001758 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
sewardj95448072004-11-22 20:19:51 +00001759
1760 // b_max = bb | qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001761 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
sewardj95448072004-11-22 20:19:51 +00001762
sewardjd5204dc2004-12-31 01:16:11 +00001763 if (add) {
1764 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1765 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001766 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001767 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001768 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1769 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001770 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001771 assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1772 assignNew('V', mce,ty, binop(opADD, a_max, b_max))
sewardjd5204dc2004-12-31 01:16:11 +00001773 )
sewardj95448072004-11-22 20:19:51 +00001774 )
sewardjd5204dc2004-12-31 01:16:11 +00001775 )
1776 );
1777 } else {
1778 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1779 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001780 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001781 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001782 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1783 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001784 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001785 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1786 assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
sewardjd5204dc2004-12-31 01:16:11 +00001787 )
1788 )
1789 )
1790 );
1791 }
1792
sewardj95448072004-11-22 20:19:51 +00001793}
1794
1795
1796/*------------------------------------------------------------*/
sewardjaaddbc22005-10-07 09:49:53 +00001797/*--- Scalar shifts. ---*/
1798/*------------------------------------------------------------*/
1799
1800/* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
1801 idea is to shift the definedness bits by the original shift amount.
1802 This introduces 0s ("defined") in new positions for left shifts and
1803 unsigned right shifts, and copies the top definedness bit for
1804 signed right shifts. So, conveniently, applying the original shift
1805 operator to the definedness bits for the left arg is exactly the
1806 right thing to do:
1807
1808 (qaa << bb)
1809
1810 However if the shift amount is undefined then the whole result
1811 is undefined. Hence need:
1812
1813 (qaa << bb) `UifU` PCast(qbb)
1814
1815 If the shift amount bb is a literal than qbb will say 'all defined'
1816 and the UifU and PCast will get folded out by post-instrumentation
1817 optimisation.
1818*/
1819static IRAtom* scalarShift ( MCEnv* mce,
1820 IRType ty,
1821 IROp original_op,
1822 IRAtom* qaa, IRAtom* qbb,
1823 IRAtom* aa, IRAtom* bb )
1824{
1825 tl_assert(isShadowAtom(mce,qaa));
1826 tl_assert(isShadowAtom(mce,qbb));
1827 tl_assert(isOriginalAtom(mce,aa));
1828 tl_assert(isOriginalAtom(mce,bb));
1829 tl_assert(sameKindedAtoms(qaa,aa));
1830 tl_assert(sameKindedAtoms(qbb,bb));
1831 return
1832 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001833 'V', mce, ty,
sewardjaaddbc22005-10-07 09:49:53 +00001834 mkUifU( mce, ty,
sewardj7cf4e6b2008-05-01 20:24:26 +00001835 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
sewardjaaddbc22005-10-07 09:49:53 +00001836 mkPCastTo(mce, ty, qbb)
1837 )
1838 );
1839}
1840
1841
1842/*------------------------------------------------------------*/
1843/*--- Helpers for dealing with vector primops. ---*/
sewardj3245c912004-12-10 14:58:26 +00001844/*------------------------------------------------------------*/
1845
sewardja1d93302004-12-12 16:45:06 +00001846/* Vector pessimisation -- pessimise within each lane individually. */
1847
1848static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1849{
sewardj7cf4e6b2008-05-01 20:24:26 +00001850 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
sewardja1d93302004-12-12 16:45:06 +00001851}
1852
1853static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1854{
sewardj7cf4e6b2008-05-01 20:24:26 +00001855 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
sewardja1d93302004-12-12 16:45:06 +00001856}
1857
1858static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1859{
sewardj7cf4e6b2008-05-01 20:24:26 +00001860 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
sewardja1d93302004-12-12 16:45:06 +00001861}
1862
1863static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1864{
sewardj7cf4e6b2008-05-01 20:24:26 +00001865 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
sewardja1d93302004-12-12 16:45:06 +00001866}
1867
sewardj350e8f72012-06-25 07:52:15 +00001868static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at )
1869{
1870 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at));
1871}
1872
1873static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at )
1874{
1875 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at));
1876}
1877
sewardjacd2e912005-01-13 19:17:06 +00001878static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1879{
sewardj7cf4e6b2008-05-01 20:24:26 +00001880 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
sewardjacd2e912005-01-13 19:17:06 +00001881}
1882
1883static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1884{
sewardj7cf4e6b2008-05-01 20:24:26 +00001885 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
sewardjacd2e912005-01-13 19:17:06 +00001886}
1887
1888static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1889{
sewardj7cf4e6b2008-05-01 20:24:26 +00001890 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
sewardjacd2e912005-01-13 19:17:06 +00001891}
1892
sewardjc678b852010-09-22 00:58:51 +00001893static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
1894{
1895 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
1896}
1897
1898static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
1899{
1900 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
1901}
1902
sewardja1d93302004-12-12 16:45:06 +00001903
sewardj3245c912004-12-10 14:58:26 +00001904/* Here's a simple scheme capable of handling ops derived from SSE1
1905 code and while only generating ops that can be efficiently
1906 implemented in SSE1. */
1907
1908/* All-lanes versions are straightforward:
1909
sewardj20d38f22005-02-07 23:50:18 +00001910 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
sewardj3245c912004-12-10 14:58:26 +00001911
1912 unary32Fx4(x,y) ==> PCast32x4(x#)
1913
1914 Lowest-lane-only versions are more complex:
1915
sewardj20d38f22005-02-07 23:50:18 +00001916 binary32F0x4(x,y) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001917 x#,
sewardj20d38f22005-02-07 23:50:18 +00001918 PCast32(V128to32(UifUV128(x#,y#)))
sewardj3245c912004-12-10 14:58:26 +00001919 )
1920
1921 This is perhaps not so obvious. In particular, it's faster to
sewardj20d38f22005-02-07 23:50:18 +00001922 do a V128-bit UifU and then take the bottom 32 bits than the more
sewardj3245c912004-12-10 14:58:26 +00001923 obvious scheme of taking the bottom 32 bits of each operand
1924 and doing a 32-bit UifU. Basically since UifU is fast and
1925 chopping lanes off vector values is slow.
1926
1927 Finally:
1928
sewardj20d38f22005-02-07 23:50:18 +00001929 unary32F0x4(x) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001930 x#,
sewardj20d38f22005-02-07 23:50:18 +00001931 PCast32(V128to32(x#))
sewardj3245c912004-12-10 14:58:26 +00001932 )
1933
1934 Where:
1935
1936 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
1937 PCast32x4(v#) = CmpNEZ32x4(v#)
1938*/
1939
1940static
1941IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1942{
1943 IRAtom* at;
1944 tl_assert(isShadowAtom(mce, vatomX));
1945 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001946 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001947 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
sewardj3245c912004-12-10 14:58:26 +00001948 return at;
1949}
1950
1951static
1952IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1953{
1954 IRAtom* at;
1955 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001956 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001957 return at;
1958}
1959
1960static
1961IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1962{
1963 IRAtom* at;
1964 tl_assert(isShadowAtom(mce, vatomX));
1965 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001966 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001967 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
sewardj3245c912004-12-10 14:58:26 +00001968 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001969 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001970 return at;
1971}
1972
1973static
1974IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1975{
1976 IRAtom* at;
1977 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001978 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001979 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001980 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001981 return at;
1982}
1983
sewardj0b070592004-12-10 21:44:22 +00001984/* --- ... and ... 64Fx2 versions of the same ... --- */
1985
1986static
1987IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1988{
1989 IRAtom* at;
1990 tl_assert(isShadowAtom(mce, vatomX));
1991 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001992 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001993 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
sewardj0b070592004-12-10 21:44:22 +00001994 return at;
1995}
1996
1997static
1998IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1999{
2000 IRAtom* at;
2001 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002002 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
sewardj0b070592004-12-10 21:44:22 +00002003 return at;
2004}
2005
2006static
2007IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2008{
2009 IRAtom* at;
2010 tl_assert(isShadowAtom(mce, vatomX));
2011 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002012 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002013 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
sewardj0b070592004-12-10 21:44:22 +00002014 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002015 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00002016 return at;
2017}
2018
2019static
2020IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
2021{
2022 IRAtom* at;
2023 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002024 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
sewardj0b070592004-12-10 21:44:22 +00002025 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002026 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00002027 return at;
2028}
2029
sewardj57f92b02010-08-22 11:54:14 +00002030/* --- --- ... and ... 32Fx2 versions of the same --- --- */
2031
2032static
2033IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2034{
2035 IRAtom* at;
2036 tl_assert(isShadowAtom(mce, vatomX));
2037 tl_assert(isShadowAtom(mce, vatomY));
2038 at = mkUifU64(mce, vatomX, vatomY);
2039 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
2040 return at;
2041}
2042
2043static
2044IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
2045{
2046 IRAtom* at;
2047 tl_assert(isShadowAtom(mce, vatomX));
2048 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
2049 return at;
2050}
2051
sewardj350e8f72012-06-25 07:52:15 +00002052/* --- ... and ... 64Fx4 versions of the same ... --- */
2053
2054static
2055IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2056{
2057 IRAtom* at;
2058 tl_assert(isShadowAtom(mce, vatomX));
2059 tl_assert(isShadowAtom(mce, vatomY));
2060 at = mkUifUV256(mce, vatomX, vatomY);
2061 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at));
2062 return at;
2063}
2064
2065static
2066IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX )
2067{
2068 IRAtom* at;
2069 tl_assert(isShadowAtom(mce, vatomX));
2070 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX));
2071 return at;
2072}
2073
2074/* --- ... and ... 32Fx8 versions of the same ... --- */
2075
2076static
2077IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2078{
2079 IRAtom* at;
2080 tl_assert(isShadowAtom(mce, vatomX));
2081 tl_assert(isShadowAtom(mce, vatomY));
2082 at = mkUifUV256(mce, vatomX, vatomY);
2083 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at));
2084 return at;
2085}
2086
2087static
2088IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX )
2089{
2090 IRAtom* at;
2091 tl_assert(isShadowAtom(mce, vatomX));
2092 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX));
2093 return at;
2094}
2095
sewardja1d93302004-12-12 16:45:06 +00002096/* --- --- Vector saturated narrowing --- --- */
2097
sewardjb5a29232011-10-22 09:29:41 +00002098/* We used to do something very clever here, but on closer inspection
2099 (2011-Jun-15), and in particular bug #279698, it turns out to be
2100 wrong. Part of the problem came from the fact that for a long
2101 time, the IR primops to do with saturated narrowing were
2102 underspecified and managed to confuse multiple cases which needed
2103 to be separate: the op names had a signedness qualifier, but in
2104 fact the source and destination signednesses needed to be specified
2105 independently, so the op names really need two independent
2106 signedness specifiers.
sewardja1d93302004-12-12 16:45:06 +00002107
sewardjb5a29232011-10-22 09:29:41 +00002108 As of 2011-Jun-15 (ish) the underspecification was sorted out
2109 properly. The incorrect instrumentation remained, though. That
2110 has now (2011-Oct-22) been fixed.
sewardja1d93302004-12-12 16:45:06 +00002111
sewardjb5a29232011-10-22 09:29:41 +00002112 What we now do is simple:
sewardja1d93302004-12-12 16:45:06 +00002113
sewardjb5a29232011-10-22 09:29:41 +00002114 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
2115 number of lanes, X is the source lane width and signedness, and Y
2116 is the destination lane width and signedness. In all cases the
2117 destination lane width is half the source lane width, so the names
2118 have a bit of redundancy, but are at least easy to read.
sewardja1d93302004-12-12 16:45:06 +00002119
sewardjb5a29232011-10-22 09:29:41 +00002120 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
2121 to unsigned 16s.
sewardja1d93302004-12-12 16:45:06 +00002122
sewardjb5a29232011-10-22 09:29:41 +00002123 Let Vanilla(OP) be a function that takes OP, one of these
2124 saturating narrowing ops, and produces the same "shaped" narrowing
2125 op which is not saturating, but merely dumps the most significant
2126 bits. "same shape" means that the lane numbers and widths are the
2127 same as with OP.
sewardja1d93302004-12-12 16:45:06 +00002128
sewardjb5a29232011-10-22 09:29:41 +00002129 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
2130 = Iop_NarrowBin32to16x8,
2131 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
2132 dumping the top half of each lane.
sewardja1d93302004-12-12 16:45:06 +00002133
sewardjb5a29232011-10-22 09:29:41 +00002134 So, with that in place, the scheme is simple, and it is simple to
2135 pessimise each lane individually and then apply Vanilla(OP) so as
2136 to get the result in the right "shape". If the original OP is
2137 QNarrowBinXtoYxZ then we produce
sewardja1d93302004-12-12 16:45:06 +00002138
sewardjb5a29232011-10-22 09:29:41 +00002139 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
sewardj9beeb0a2011-06-15 15:11:07 +00002140
sewardjb5a29232011-10-22 09:29:41 +00002141 or for the case when OP is unary (Iop_QNarrowUn*)
2142
2143 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
sewardja1d93302004-12-12 16:45:06 +00002144*/
2145static
sewardjb5a29232011-10-22 09:29:41 +00002146IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
2147{
2148 switch (qnarrowOp) {
2149 /* Binary: (128, 128) -> 128 */
2150 case Iop_QNarrowBin16Sto8Ux16:
2151 case Iop_QNarrowBin16Sto8Sx16:
2152 case Iop_QNarrowBin16Uto8Ux16:
2153 return Iop_NarrowBin16to8x16;
2154 case Iop_QNarrowBin32Sto16Ux8:
2155 case Iop_QNarrowBin32Sto16Sx8:
2156 case Iop_QNarrowBin32Uto16Ux8:
2157 return Iop_NarrowBin32to16x8;
2158 /* Binary: (64, 64) -> 64 */
2159 case Iop_QNarrowBin32Sto16Sx4:
2160 return Iop_NarrowBin32to16x4;
2161 case Iop_QNarrowBin16Sto8Ux8:
2162 case Iop_QNarrowBin16Sto8Sx8:
2163 return Iop_NarrowBin16to8x8;
2164 /* Unary: 128 -> 64 */
2165 case Iop_QNarrowUn64Uto32Ux2:
2166 case Iop_QNarrowUn64Sto32Sx2:
2167 case Iop_QNarrowUn64Sto32Ux2:
2168 return Iop_NarrowUn64to32x2;
2169 case Iop_QNarrowUn32Uto16Ux4:
2170 case Iop_QNarrowUn32Sto16Sx4:
2171 case Iop_QNarrowUn32Sto16Ux4:
2172 return Iop_NarrowUn32to16x4;
2173 case Iop_QNarrowUn16Uto8Ux8:
2174 case Iop_QNarrowUn16Sto8Sx8:
2175 case Iop_QNarrowUn16Sto8Ux8:
2176 return Iop_NarrowUn16to8x8;
2177 default:
2178 ppIROp(qnarrowOp);
2179 VG_(tool_panic)("vanillaNarrowOpOfShape");
2180 }
2181}
2182
2183static
sewardj7ee7d852011-06-16 11:37:21 +00002184IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
2185 IRAtom* vatom1, IRAtom* vatom2)
sewardja1d93302004-12-12 16:45:06 +00002186{
2187 IRAtom *at1, *at2, *at3;
2188 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2189 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002190 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
2191 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
2192 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2193 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2194 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2195 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2196 default: VG_(tool_panic)("vectorNarrowBinV128");
sewardja1d93302004-12-12 16:45:06 +00002197 }
sewardjb5a29232011-10-22 09:29:41 +00002198 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardja1d93302004-12-12 16:45:06 +00002199 tl_assert(isShadowAtom(mce,vatom1));
2200 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00002201 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2202 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
sewardjb5a29232011-10-22 09:29:41 +00002203 at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
sewardja1d93302004-12-12 16:45:06 +00002204 return at3;
2205}
2206
sewardjacd2e912005-01-13 19:17:06 +00002207static
sewardj7ee7d852011-06-16 11:37:21 +00002208IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2209 IRAtom* vatom1, IRAtom* vatom2)
sewardjacd2e912005-01-13 19:17:06 +00002210{
2211 IRAtom *at1, *at2, *at3;
2212 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2213 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002214 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2215 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break;
2216 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break;
2217 default: VG_(tool_panic)("vectorNarrowBin64");
sewardjacd2e912005-01-13 19:17:06 +00002218 }
sewardjb5a29232011-10-22 09:29:41 +00002219 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardjacd2e912005-01-13 19:17:06 +00002220 tl_assert(isShadowAtom(mce,vatom1));
2221 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00002222 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2223 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
sewardjb5a29232011-10-22 09:29:41 +00002224 at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
sewardjacd2e912005-01-13 19:17:06 +00002225 return at3;
2226}
2227
sewardj57f92b02010-08-22 11:54:14 +00002228static
sewardjb5a29232011-10-22 09:29:41 +00002229IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
sewardj7ee7d852011-06-16 11:37:21 +00002230 IRAtom* vatom1)
sewardj57f92b02010-08-22 11:54:14 +00002231{
2232 IRAtom *at1, *at2;
2233 IRAtom* (*pcast)( MCEnv*, IRAtom* );
sewardjb5a29232011-10-22 09:29:41 +00002234 tl_assert(isShadowAtom(mce,vatom1));
2235 /* For vanilla narrowing (non-saturating), we can just apply
2236 the op directly to the V bits. */
2237 switch (narrow_op) {
2238 case Iop_NarrowUn16to8x8:
2239 case Iop_NarrowUn32to16x4:
2240 case Iop_NarrowUn64to32x2:
2241 at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
2242 return at1;
2243 default:
2244 break; /* Do Plan B */
2245 }
2246 /* Plan B: for ops that involve a saturation operation on the args,
2247 we must PCast before the vanilla narrow. */
2248 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002249 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break;
2250 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break;
2251 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break;
2252 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
2253 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
2254 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
2255 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
2256 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
2257 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
2258 default: VG_(tool_panic)("vectorNarrowUnV128");
sewardj57f92b02010-08-22 11:54:14 +00002259 }
sewardjb5a29232011-10-22 09:29:41 +00002260 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardj57f92b02010-08-22 11:54:14 +00002261 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
sewardjb5a29232011-10-22 09:29:41 +00002262 at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
sewardj57f92b02010-08-22 11:54:14 +00002263 return at2;
2264}
2265
2266static
sewardj7ee7d852011-06-16 11:37:21 +00002267IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
2268 IRAtom* vatom1)
sewardj57f92b02010-08-22 11:54:14 +00002269{
2270 IRAtom *at1, *at2;
2271 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2272 switch (longen_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002273 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break;
2274 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break;
2275 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
2276 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
2277 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
2278 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
2279 default: VG_(tool_panic)("vectorWidenI64");
sewardj57f92b02010-08-22 11:54:14 +00002280 }
2281 tl_assert(isShadowAtom(mce,vatom1));
2282 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
2283 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2284 return at2;
2285}
2286
sewardja1d93302004-12-12 16:45:06 +00002287
2288/* --- --- Vector integer arithmetic --- --- */
2289
2290/* Simple ... UifU the args and per-lane pessimise the results. */
sewardjacd2e912005-01-13 19:17:06 +00002291
sewardj20d38f22005-02-07 23:50:18 +00002292/* --- V128-bit versions --- */
sewardjacd2e912005-01-13 19:17:06 +00002293
sewardja1d93302004-12-12 16:45:06 +00002294static
2295IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2296{
2297 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002298 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002299 at = mkPCast8x16(mce, at);
2300 return at;
2301}
2302
2303static
2304IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2305{
2306 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002307 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002308 at = mkPCast16x8(mce, at);
2309 return at;
2310}
2311
2312static
2313IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2314{
2315 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002316 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002317 at = mkPCast32x4(mce, at);
2318 return at;
2319}
2320
2321static
2322IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2323{
2324 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002325 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002326 at = mkPCast64x2(mce, at);
2327 return at;
2328}
sewardj3245c912004-12-10 14:58:26 +00002329
sewardjacd2e912005-01-13 19:17:06 +00002330/* --- 64-bit versions --- */
2331
2332static
2333IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2334{
2335 IRAtom* at;
2336 at = mkUifU64(mce, vatom1, vatom2);
2337 at = mkPCast8x8(mce, at);
2338 return at;
2339}
2340
2341static
2342IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2343{
2344 IRAtom* at;
2345 at = mkUifU64(mce, vatom1, vatom2);
2346 at = mkPCast16x4(mce, at);
2347 return at;
2348}
2349
2350static
2351IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2352{
2353 IRAtom* at;
2354 at = mkUifU64(mce, vatom1, vatom2);
2355 at = mkPCast32x2(mce, at);
2356 return at;
2357}
2358
sewardj57f92b02010-08-22 11:54:14 +00002359static
2360IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2361{
2362 IRAtom* at;
2363 at = mkUifU64(mce, vatom1, vatom2);
2364 at = mkPCastTo(mce, Ity_I64, at);
2365 return at;
2366}
2367
sewardjc678b852010-09-22 00:58:51 +00002368/* --- 32-bit versions --- */
2369
2370static
2371IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2372{
2373 IRAtom* at;
2374 at = mkUifU32(mce, vatom1, vatom2);
2375 at = mkPCast8x4(mce, at);
2376 return at;
2377}
2378
2379static
2380IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2381{
2382 IRAtom* at;
2383 at = mkUifU32(mce, vatom1, vatom2);
2384 at = mkPCast16x2(mce, at);
2385 return at;
2386}
2387
sewardj3245c912004-12-10 14:58:26 +00002388
2389/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00002390/*--- Generate shadow values from all kinds of IRExprs. ---*/
2391/*------------------------------------------------------------*/
2392
2393static
sewardje91cea72006-02-08 19:32:02 +00002394IRAtom* expr2vbits_Qop ( MCEnv* mce,
2395 IROp op,
2396 IRAtom* atom1, IRAtom* atom2,
2397 IRAtom* atom3, IRAtom* atom4 )
2398{
2399 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2400 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2401 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2402 IRAtom* vatom4 = expr2vbits( mce, atom4 );
2403
2404 tl_assert(isOriginalAtom(mce,atom1));
2405 tl_assert(isOriginalAtom(mce,atom2));
2406 tl_assert(isOriginalAtom(mce,atom3));
2407 tl_assert(isOriginalAtom(mce,atom4));
2408 tl_assert(isShadowAtom(mce,vatom1));
2409 tl_assert(isShadowAtom(mce,vatom2));
2410 tl_assert(isShadowAtom(mce,vatom3));
2411 tl_assert(isShadowAtom(mce,vatom4));
2412 tl_assert(sameKindedAtoms(atom1,vatom1));
2413 tl_assert(sameKindedAtoms(atom2,vatom2));
2414 tl_assert(sameKindedAtoms(atom3,vatom3));
2415 tl_assert(sameKindedAtoms(atom4,vatom4));
2416 switch (op) {
2417 case Iop_MAddF64:
2418 case Iop_MAddF64r32:
2419 case Iop_MSubF64:
2420 case Iop_MSubF64r32:
2421 /* I32(rm) x F64 x F64 x F64 -> F64 */
2422 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
sewardjb5b87402011-03-07 16:05:35 +00002423
2424 case Iop_MAddF32:
2425 case Iop_MSubF32:
2426 /* I32(rm) x F32 x F32 x F32 -> F32 */
2427 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
2428
sewardj350e8f72012-06-25 07:52:15 +00002429 /* V256-bit data-steering */
2430 case Iop_64x4toV256:
2431 return assignNew('V', mce, Ity_V256,
2432 IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4));
2433
sewardje91cea72006-02-08 19:32:02 +00002434 default:
2435 ppIROp(op);
2436 VG_(tool_panic)("memcheck:expr2vbits_Qop");
2437 }
2438}
2439
2440
2441static
sewardjed69fdb2006-02-03 16:12:27 +00002442IRAtom* expr2vbits_Triop ( MCEnv* mce,
2443 IROp op,
2444 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2445{
sewardjed69fdb2006-02-03 16:12:27 +00002446 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2447 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2448 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2449
2450 tl_assert(isOriginalAtom(mce,atom1));
2451 tl_assert(isOriginalAtom(mce,atom2));
2452 tl_assert(isOriginalAtom(mce,atom3));
2453 tl_assert(isShadowAtom(mce,vatom1));
2454 tl_assert(isShadowAtom(mce,vatom2));
2455 tl_assert(isShadowAtom(mce,vatom3));
2456 tl_assert(sameKindedAtoms(atom1,vatom1));
2457 tl_assert(sameKindedAtoms(atom2,vatom2));
2458 tl_assert(sameKindedAtoms(atom3,vatom3));
2459 switch (op) {
sewardjb5b87402011-03-07 16:05:35 +00002460 case Iop_AddF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002461 case Iop_AddD128:
sewardjb5b87402011-03-07 16:05:35 +00002462 case Iop_SubF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002463 case Iop_SubD128:
sewardjb5b87402011-03-07 16:05:35 +00002464 case Iop_MulF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002465 case Iop_MulD128:
sewardjb5b87402011-03-07 16:05:35 +00002466 case Iop_DivF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002467 case Iop_DivD128:
sewardj18c72fa2012-04-23 11:22:05 +00002468 case Iop_QuantizeD128:
2469 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
sewardjb5b87402011-03-07 16:05:35 +00002470 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
sewardjed69fdb2006-02-03 16:12:27 +00002471 case Iop_AddF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002472 case Iop_AddD64:
sewardjed69fdb2006-02-03 16:12:27 +00002473 case Iop_AddF64r32:
2474 case Iop_SubF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002475 case Iop_SubD64:
sewardjed69fdb2006-02-03 16:12:27 +00002476 case Iop_SubF64r32:
2477 case Iop_MulF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002478 case Iop_MulD64:
sewardjed69fdb2006-02-03 16:12:27 +00002479 case Iop_MulF64r32:
2480 case Iop_DivF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002481 case Iop_DivD64:
sewardjed69fdb2006-02-03 16:12:27 +00002482 case Iop_DivF64r32:
sewardj22ac5f42006-02-03 22:55:04 +00002483 case Iop_ScaleF64:
2484 case Iop_Yl2xF64:
2485 case Iop_Yl2xp1F64:
2486 case Iop_AtanF64:
sewardjd6075eb2006-02-04 15:25:23 +00002487 case Iop_PRemF64:
2488 case Iop_PRem1F64:
sewardj18c72fa2012-04-23 11:22:05 +00002489 case Iop_QuantizeD64:
2490 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
sewardjed69fdb2006-02-03 16:12:27 +00002491 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
sewardjd6075eb2006-02-04 15:25:23 +00002492 case Iop_PRemC3210F64:
2493 case Iop_PRem1C3210F64:
2494 /* I32(rm) x F64 x F64 -> I32 */
2495 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj59570ff2010-01-01 11:59:33 +00002496 case Iop_AddF32:
2497 case Iop_SubF32:
2498 case Iop_MulF32:
2499 case Iop_DivF32:
2500 /* I32(rm) x F32 x F32 -> I32 */
2501 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj18c72fa2012-04-23 11:22:05 +00002502 case Iop_SignificanceRoundD64:
2503 /* IRRoundingModeDFP(I32) x I8 x D64 -> D64 */
2504 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2505 case Iop_SignificanceRoundD128:
2506 /* IRRoundingModeDFP(I32) x I8 x D128 -> D128 */
2507 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
sewardj57f92b02010-08-22 11:54:14 +00002508 case Iop_ExtractV128:
florian434ffae2012-07-19 17:23:42 +00002509 complainIfUndefined(mce, atom3, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002510 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2511 case Iop_Extract64:
florian434ffae2012-07-19 17:23:42 +00002512 complainIfUndefined(mce, atom3, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002513 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2514 case Iop_SetElem8x8:
2515 case Iop_SetElem16x4:
2516 case Iop_SetElem32x2:
florian434ffae2012-07-19 17:23:42 +00002517 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002518 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
sewardjed69fdb2006-02-03 16:12:27 +00002519 default:
2520 ppIROp(op);
2521 VG_(tool_panic)("memcheck:expr2vbits_Triop");
2522 }
2523}
2524
2525
2526static
sewardj95448072004-11-22 20:19:51 +00002527IRAtom* expr2vbits_Binop ( MCEnv* mce,
2528 IROp op,
2529 IRAtom* atom1, IRAtom* atom2 )
2530{
2531 IRType and_or_ty;
2532 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
2533 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
2534 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2535
2536 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2537 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2538
2539 tl_assert(isOriginalAtom(mce,atom1));
2540 tl_assert(isOriginalAtom(mce,atom2));
2541 tl_assert(isShadowAtom(mce,vatom1));
2542 tl_assert(isShadowAtom(mce,vatom2));
2543 tl_assert(sameKindedAtoms(atom1,vatom1));
2544 tl_assert(sameKindedAtoms(atom2,vatom2));
2545 switch (op) {
2546
sewardjc678b852010-09-22 00:58:51 +00002547 /* 32-bit SIMD */
2548
2549 case Iop_Add16x2:
2550 case Iop_HAdd16Ux2:
2551 case Iop_HAdd16Sx2:
2552 case Iop_Sub16x2:
2553 case Iop_HSub16Ux2:
2554 case Iop_HSub16Sx2:
2555 case Iop_QAdd16Sx2:
2556 case Iop_QSub16Sx2:
2557 return binary16Ix2(mce, vatom1, vatom2);
2558
2559 case Iop_Add8x4:
2560 case Iop_HAdd8Ux4:
2561 case Iop_HAdd8Sx4:
2562 case Iop_Sub8x4:
2563 case Iop_HSub8Ux4:
2564 case Iop_HSub8Sx4:
2565 case Iop_QSub8Ux4:
2566 case Iop_QAdd8Ux4:
2567 case Iop_QSub8Sx4:
2568 case Iop_QAdd8Sx4:
2569 return binary8Ix4(mce, vatom1, vatom2);
2570
sewardjacd2e912005-01-13 19:17:06 +00002571 /* 64-bit SIMD */
2572
sewardj57f92b02010-08-22 11:54:14 +00002573 case Iop_ShrN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002574 case Iop_ShrN16x4:
2575 case Iop_ShrN32x2:
sewardj03809ae2006-12-27 01:16:58 +00002576 case Iop_SarN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002577 case Iop_SarN16x4:
2578 case Iop_SarN32x2:
2579 case Iop_ShlN16x4:
2580 case Iop_ShlN32x2:
sewardj114a9172008-02-09 01:49:32 +00002581 case Iop_ShlN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002582 /* Same scheme as with all other shifts. */
florian434ffae2012-07-19 17:23:42 +00002583 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00002584 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
sewardjacd2e912005-01-13 19:17:06 +00002585
sewardj7ee7d852011-06-16 11:37:21 +00002586 case Iop_QNarrowBin32Sto16Sx4:
2587 case Iop_QNarrowBin16Sto8Sx8:
2588 case Iop_QNarrowBin16Sto8Ux8:
2589 return vectorNarrowBin64(mce, op, vatom1, vatom2);
sewardjacd2e912005-01-13 19:17:06 +00002590
2591 case Iop_Min8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002592 case Iop_Min8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002593 case Iop_Max8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002594 case Iop_Max8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002595 case Iop_Avg8Ux8:
2596 case Iop_QSub8Sx8:
2597 case Iop_QSub8Ux8:
2598 case Iop_Sub8x8:
2599 case Iop_CmpGT8Sx8:
sewardj57f92b02010-08-22 11:54:14 +00002600 case Iop_CmpGT8Ux8:
sewardjacd2e912005-01-13 19:17:06 +00002601 case Iop_CmpEQ8x8:
2602 case Iop_QAdd8Sx8:
2603 case Iop_QAdd8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002604 case Iop_QSal8x8:
2605 case Iop_QShl8x8:
sewardjacd2e912005-01-13 19:17:06 +00002606 case Iop_Add8x8:
sewardj57f92b02010-08-22 11:54:14 +00002607 case Iop_Mul8x8:
2608 case Iop_PolynomialMul8x8:
sewardjacd2e912005-01-13 19:17:06 +00002609 return binary8Ix8(mce, vatom1, vatom2);
2610
2611 case Iop_Min16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002612 case Iop_Min16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002613 case Iop_Max16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002614 case Iop_Max16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002615 case Iop_Avg16Ux4:
2616 case Iop_QSub16Ux4:
2617 case Iop_QSub16Sx4:
2618 case Iop_Sub16x4:
2619 case Iop_Mul16x4:
2620 case Iop_MulHi16Sx4:
2621 case Iop_MulHi16Ux4:
2622 case Iop_CmpGT16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002623 case Iop_CmpGT16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002624 case Iop_CmpEQ16x4:
2625 case Iop_QAdd16Sx4:
2626 case Iop_QAdd16Ux4:
sewardj57f92b02010-08-22 11:54:14 +00002627 case Iop_QSal16x4:
2628 case Iop_QShl16x4:
sewardjacd2e912005-01-13 19:17:06 +00002629 case Iop_Add16x4:
sewardj57f92b02010-08-22 11:54:14 +00002630 case Iop_QDMulHi16Sx4:
2631 case Iop_QRDMulHi16Sx4:
sewardjacd2e912005-01-13 19:17:06 +00002632 return binary16Ix4(mce, vatom1, vatom2);
2633
2634 case Iop_Sub32x2:
sewardj114a9172008-02-09 01:49:32 +00002635 case Iop_Mul32x2:
sewardj57f92b02010-08-22 11:54:14 +00002636 case Iop_Max32Sx2:
2637 case Iop_Max32Ux2:
2638 case Iop_Min32Sx2:
2639 case Iop_Min32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002640 case Iop_CmpGT32Sx2:
sewardj57f92b02010-08-22 11:54:14 +00002641 case Iop_CmpGT32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002642 case Iop_CmpEQ32x2:
2643 case Iop_Add32x2:
sewardj57f92b02010-08-22 11:54:14 +00002644 case Iop_QAdd32Ux2:
2645 case Iop_QAdd32Sx2:
2646 case Iop_QSub32Ux2:
2647 case Iop_QSub32Sx2:
2648 case Iop_QSal32x2:
2649 case Iop_QShl32x2:
2650 case Iop_QDMulHi32Sx2:
2651 case Iop_QRDMulHi32Sx2:
sewardjacd2e912005-01-13 19:17:06 +00002652 return binary32Ix2(mce, vatom1, vatom2);
2653
sewardj57f92b02010-08-22 11:54:14 +00002654 case Iop_QSub64Ux1:
2655 case Iop_QSub64Sx1:
2656 case Iop_QAdd64Ux1:
2657 case Iop_QAdd64Sx1:
2658 case Iop_QSal64x1:
2659 case Iop_QShl64x1:
2660 case Iop_Sal64x1:
2661 return binary64Ix1(mce, vatom1, vatom2);
2662
2663 case Iop_QShlN8Sx8:
2664 case Iop_QShlN8x8:
2665 case Iop_QSalN8x8:
florian434ffae2012-07-19 17:23:42 +00002666 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002667 return mkPCast8x8(mce, vatom1);
2668
2669 case Iop_QShlN16Sx4:
2670 case Iop_QShlN16x4:
2671 case Iop_QSalN16x4:
florian434ffae2012-07-19 17:23:42 +00002672 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002673 return mkPCast16x4(mce, vatom1);
2674
2675 case Iop_QShlN32Sx2:
2676 case Iop_QShlN32x2:
2677 case Iop_QSalN32x2:
florian434ffae2012-07-19 17:23:42 +00002678 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002679 return mkPCast32x2(mce, vatom1);
2680
2681 case Iop_QShlN64Sx1:
2682 case Iop_QShlN64x1:
2683 case Iop_QSalN64x1:
florian434ffae2012-07-19 17:23:42 +00002684 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002685 return mkPCast32x2(mce, vatom1);
2686
2687 case Iop_PwMax32Sx2:
2688 case Iop_PwMax32Ux2:
2689 case Iop_PwMin32Sx2:
2690 case Iop_PwMin32Ux2:
2691 case Iop_PwMax32Fx2:
2692 case Iop_PwMin32Fx2:
sewardj350e8f72012-06-25 07:52:15 +00002693 return assignNew('V', mce, Ity_I64,
2694 binop(Iop_PwMax32Ux2,
2695 mkPCast32x2(mce, vatom1),
2696 mkPCast32x2(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00002697
2698 case Iop_PwMax16Sx4:
2699 case Iop_PwMax16Ux4:
2700 case Iop_PwMin16Sx4:
2701 case Iop_PwMin16Ux4:
sewardj350e8f72012-06-25 07:52:15 +00002702 return assignNew('V', mce, Ity_I64,
2703 binop(Iop_PwMax16Ux4,
2704 mkPCast16x4(mce, vatom1),
2705 mkPCast16x4(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00002706
2707 case Iop_PwMax8Sx8:
2708 case Iop_PwMax8Ux8:
2709 case Iop_PwMin8Sx8:
2710 case Iop_PwMin8Ux8:
sewardj350e8f72012-06-25 07:52:15 +00002711 return assignNew('V', mce, Ity_I64,
2712 binop(Iop_PwMax8Ux8,
2713 mkPCast8x8(mce, vatom1),
2714 mkPCast8x8(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00002715
2716 case Iop_PwAdd32x2:
2717 case Iop_PwAdd32Fx2:
2718 return mkPCast32x2(mce,
sewardj350e8f72012-06-25 07:52:15 +00002719 assignNew('V', mce, Ity_I64,
2720 binop(Iop_PwAdd32x2,
2721 mkPCast32x2(mce, vatom1),
2722 mkPCast32x2(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00002723
2724 case Iop_PwAdd16x4:
2725 return mkPCast16x4(mce,
sewardj350e8f72012-06-25 07:52:15 +00002726 assignNew('V', mce, Ity_I64,
2727 binop(op, mkPCast16x4(mce, vatom1),
2728 mkPCast16x4(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00002729
2730 case Iop_PwAdd8x8:
2731 return mkPCast8x8(mce,
sewardj350e8f72012-06-25 07:52:15 +00002732 assignNew('V', mce, Ity_I64,
2733 binop(op, mkPCast8x8(mce, vatom1),
2734 mkPCast8x8(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00002735
2736 case Iop_Shl8x8:
2737 case Iop_Shr8x8:
2738 case Iop_Sar8x8:
2739 case Iop_Sal8x8:
2740 return mkUifU64(mce,
2741 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2742 mkPCast8x8(mce,vatom2)
2743 );
2744
2745 case Iop_Shl16x4:
2746 case Iop_Shr16x4:
2747 case Iop_Sar16x4:
2748 case Iop_Sal16x4:
2749 return mkUifU64(mce,
2750 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2751 mkPCast16x4(mce,vatom2)
2752 );
2753
2754 case Iop_Shl32x2:
2755 case Iop_Shr32x2:
2756 case Iop_Sar32x2:
2757 case Iop_Sal32x2:
2758 return mkUifU64(mce,
2759 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2760 mkPCast32x2(mce,vatom2)
2761 );
2762
sewardjacd2e912005-01-13 19:17:06 +00002763 /* 64-bit data-steering */
2764 case Iop_InterleaveLO32x2:
2765 case Iop_InterleaveLO16x4:
2766 case Iop_InterleaveLO8x8:
2767 case Iop_InterleaveHI32x2:
2768 case Iop_InterleaveHI16x4:
2769 case Iop_InterleaveHI8x8:
sewardj57f92b02010-08-22 11:54:14 +00002770 case Iop_CatOddLanes8x8:
2771 case Iop_CatEvenLanes8x8:
sewardj114a9172008-02-09 01:49:32 +00002772 case Iop_CatOddLanes16x4:
2773 case Iop_CatEvenLanes16x4:
sewardj57f92b02010-08-22 11:54:14 +00002774 case Iop_InterleaveOddLanes8x8:
2775 case Iop_InterleaveEvenLanes8x8:
2776 case Iop_InterleaveOddLanes16x4:
2777 case Iop_InterleaveEvenLanes16x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00002778 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardjacd2e912005-01-13 19:17:06 +00002779
sewardj57f92b02010-08-22 11:54:14 +00002780 case Iop_GetElem8x8:
florian434ffae2012-07-19 17:23:42 +00002781 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002782 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2783 case Iop_GetElem16x4:
florian434ffae2012-07-19 17:23:42 +00002784 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002785 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2786 case Iop_GetElem32x2:
florian434ffae2012-07-19 17:23:42 +00002787 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002788 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2789
sewardj114a9172008-02-09 01:49:32 +00002790 /* Perm8x8: rearrange values in left arg using steering values
2791 from right arg. So rearrange the vbits in the same way but
2792 pessimise wrt steering values. */
2793 case Iop_Perm8x8:
2794 return mkUifU64(
2795 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002796 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
sewardj114a9172008-02-09 01:49:32 +00002797 mkPCast8x8(mce, vatom2)
2798 );
2799
sewardj20d38f22005-02-07 23:50:18 +00002800 /* V128-bit SIMD */
sewardj0b070592004-12-10 21:44:22 +00002801
sewardj57f92b02010-08-22 11:54:14 +00002802 case Iop_ShrN8x16:
sewardja1d93302004-12-12 16:45:06 +00002803 case Iop_ShrN16x8:
2804 case Iop_ShrN32x4:
2805 case Iop_ShrN64x2:
sewardj57f92b02010-08-22 11:54:14 +00002806 case Iop_SarN8x16:
sewardja1d93302004-12-12 16:45:06 +00002807 case Iop_SarN16x8:
2808 case Iop_SarN32x4:
sewardj57f92b02010-08-22 11:54:14 +00002809 case Iop_SarN64x2:
2810 case Iop_ShlN8x16:
sewardja1d93302004-12-12 16:45:06 +00002811 case Iop_ShlN16x8:
2812 case Iop_ShlN32x4:
2813 case Iop_ShlN64x2:
sewardj620eb5b2005-10-22 12:50:43 +00002814 /* Same scheme as with all other shifts. Note: 22 Oct 05:
2815 this is wrong now, scalar shifts are done properly lazily.
2816 Vector shifts should be fixed too. */
florian434ffae2012-07-19 17:23:42 +00002817 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00002818 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardja1d93302004-12-12 16:45:06 +00002819
sewardjcbf8be72005-11-10 18:34:41 +00002820 /* V x V shifts/rotates are done using the standard lazy scheme. */
sewardj43d60752005-11-10 18:13:01 +00002821 case Iop_Shl8x16:
2822 case Iop_Shr8x16:
2823 case Iop_Sar8x16:
sewardj57f92b02010-08-22 11:54:14 +00002824 case Iop_Sal8x16:
sewardjcbf8be72005-11-10 18:34:41 +00002825 case Iop_Rol8x16:
sewardj43d60752005-11-10 18:13:01 +00002826 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002827 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002828 mkPCast8x16(mce,vatom2)
2829 );
2830
2831 case Iop_Shl16x8:
2832 case Iop_Shr16x8:
2833 case Iop_Sar16x8:
sewardj57f92b02010-08-22 11:54:14 +00002834 case Iop_Sal16x8:
sewardjcbf8be72005-11-10 18:34:41 +00002835 case Iop_Rol16x8:
sewardj43d60752005-11-10 18:13:01 +00002836 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002837 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002838 mkPCast16x8(mce,vatom2)
2839 );
2840
2841 case Iop_Shl32x4:
2842 case Iop_Shr32x4:
2843 case Iop_Sar32x4:
sewardj57f92b02010-08-22 11:54:14 +00002844 case Iop_Sal32x4:
sewardjcbf8be72005-11-10 18:34:41 +00002845 case Iop_Rol32x4:
sewardj43d60752005-11-10 18:13:01 +00002846 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002847 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002848 mkPCast32x4(mce,vatom2)
2849 );
2850
sewardj57f92b02010-08-22 11:54:14 +00002851 case Iop_Shl64x2:
2852 case Iop_Shr64x2:
2853 case Iop_Sar64x2:
2854 case Iop_Sal64x2:
2855 return mkUifUV128(mce,
2856 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2857 mkPCast64x2(mce,vatom2)
2858 );
2859
2860 case Iop_F32ToFixed32Ux4_RZ:
2861 case Iop_F32ToFixed32Sx4_RZ:
2862 case Iop_Fixed32UToF32x4_RN:
2863 case Iop_Fixed32SToF32x4_RN:
florian434ffae2012-07-19 17:23:42 +00002864 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002865 return mkPCast32x4(mce, vatom1);
2866
2867 case Iop_F32ToFixed32Ux2_RZ:
2868 case Iop_F32ToFixed32Sx2_RZ:
2869 case Iop_Fixed32UToF32x2_RN:
2870 case Iop_Fixed32SToF32x2_RN:
florian434ffae2012-07-19 17:23:42 +00002871 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002872 return mkPCast32x2(mce, vatom1);
2873
sewardja1d93302004-12-12 16:45:06 +00002874 case Iop_QSub8Ux16:
2875 case Iop_QSub8Sx16:
2876 case Iop_Sub8x16:
2877 case Iop_Min8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002878 case Iop_Min8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002879 case Iop_Max8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002880 case Iop_Max8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002881 case Iop_CmpGT8Sx16:
sewardj43d60752005-11-10 18:13:01 +00002882 case Iop_CmpGT8Ux16:
sewardja1d93302004-12-12 16:45:06 +00002883 case Iop_CmpEQ8x16:
2884 case Iop_Avg8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002885 case Iop_Avg8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002886 case Iop_QAdd8Ux16:
2887 case Iop_QAdd8Sx16:
sewardj57f92b02010-08-22 11:54:14 +00002888 case Iop_QSal8x16:
2889 case Iop_QShl8x16:
sewardja1d93302004-12-12 16:45:06 +00002890 case Iop_Add8x16:
sewardj57f92b02010-08-22 11:54:14 +00002891 case Iop_Mul8x16:
2892 case Iop_PolynomialMul8x16:
sewardja1d93302004-12-12 16:45:06 +00002893 return binary8Ix16(mce, vatom1, vatom2);
2894
2895 case Iop_QSub16Ux8:
2896 case Iop_QSub16Sx8:
2897 case Iop_Sub16x8:
2898 case Iop_Mul16x8:
2899 case Iop_MulHi16Sx8:
2900 case Iop_MulHi16Ux8:
2901 case Iop_Min16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002902 case Iop_Min16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002903 case Iop_Max16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002904 case Iop_Max16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002905 case Iop_CmpGT16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002906 case Iop_CmpGT16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002907 case Iop_CmpEQ16x8:
2908 case Iop_Avg16Ux8:
sewardj43d60752005-11-10 18:13:01 +00002909 case Iop_Avg16Sx8:
sewardja1d93302004-12-12 16:45:06 +00002910 case Iop_QAdd16Ux8:
2911 case Iop_QAdd16Sx8:
sewardj57f92b02010-08-22 11:54:14 +00002912 case Iop_QSal16x8:
2913 case Iop_QShl16x8:
sewardja1d93302004-12-12 16:45:06 +00002914 case Iop_Add16x8:
sewardj57f92b02010-08-22 11:54:14 +00002915 case Iop_QDMulHi16Sx8:
2916 case Iop_QRDMulHi16Sx8:
sewardja1d93302004-12-12 16:45:06 +00002917 return binary16Ix8(mce, vatom1, vatom2);
2918
2919 case Iop_Sub32x4:
2920 case Iop_CmpGT32Sx4:
sewardj43d60752005-11-10 18:13:01 +00002921 case Iop_CmpGT32Ux4:
sewardja1d93302004-12-12 16:45:06 +00002922 case Iop_CmpEQ32x4:
sewardj43d60752005-11-10 18:13:01 +00002923 case Iop_QAdd32Sx4:
2924 case Iop_QAdd32Ux4:
2925 case Iop_QSub32Sx4:
2926 case Iop_QSub32Ux4:
sewardj57f92b02010-08-22 11:54:14 +00002927 case Iop_QSal32x4:
2928 case Iop_QShl32x4:
sewardj43d60752005-11-10 18:13:01 +00002929 case Iop_Avg32Ux4:
2930 case Iop_Avg32Sx4:
sewardja1d93302004-12-12 16:45:06 +00002931 case Iop_Add32x4:
sewardj43d60752005-11-10 18:13:01 +00002932 case Iop_Max32Ux4:
2933 case Iop_Max32Sx4:
2934 case Iop_Min32Ux4:
2935 case Iop_Min32Sx4:
sewardjb823b852010-06-18 08:18:38 +00002936 case Iop_Mul32x4:
sewardj57f92b02010-08-22 11:54:14 +00002937 case Iop_QDMulHi32Sx4:
2938 case Iop_QRDMulHi32Sx4:
sewardja1d93302004-12-12 16:45:06 +00002939 return binary32Ix4(mce, vatom1, vatom2);
2940
2941 case Iop_Sub64x2:
2942 case Iop_Add64x2:
sewardj9a2afe92011-10-19 15:24:55 +00002943 case Iop_CmpEQ64x2:
sewardjb823b852010-06-18 08:18:38 +00002944 case Iop_CmpGT64Sx2:
sewardj57f92b02010-08-22 11:54:14 +00002945 case Iop_QSal64x2:
2946 case Iop_QShl64x2:
2947 case Iop_QAdd64Ux2:
2948 case Iop_QAdd64Sx2:
2949 case Iop_QSub64Ux2:
2950 case Iop_QSub64Sx2:
sewardja1d93302004-12-12 16:45:06 +00002951 return binary64Ix2(mce, vatom1, vatom2);
2952
sewardj7ee7d852011-06-16 11:37:21 +00002953 case Iop_QNarrowBin32Sto16Sx8:
2954 case Iop_QNarrowBin32Uto16Ux8:
2955 case Iop_QNarrowBin32Sto16Ux8:
2956 case Iop_QNarrowBin16Sto8Sx16:
2957 case Iop_QNarrowBin16Uto8Ux16:
2958 case Iop_QNarrowBin16Sto8Ux16:
2959 return vectorNarrowBinV128(mce, op, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002960
sewardj0b070592004-12-10 21:44:22 +00002961 case Iop_Sub64Fx2:
2962 case Iop_Mul64Fx2:
2963 case Iop_Min64Fx2:
2964 case Iop_Max64Fx2:
2965 case Iop_Div64Fx2:
2966 case Iop_CmpLT64Fx2:
2967 case Iop_CmpLE64Fx2:
2968 case Iop_CmpEQ64Fx2:
sewardj545663e2005-11-05 01:55:04 +00002969 case Iop_CmpUN64Fx2:
sewardj0b070592004-12-10 21:44:22 +00002970 case Iop_Add64Fx2:
2971 return binary64Fx2(mce, vatom1, vatom2);
2972
2973 case Iop_Sub64F0x2:
2974 case Iop_Mul64F0x2:
2975 case Iop_Min64F0x2:
2976 case Iop_Max64F0x2:
2977 case Iop_Div64F0x2:
2978 case Iop_CmpLT64F0x2:
2979 case Iop_CmpLE64F0x2:
2980 case Iop_CmpEQ64F0x2:
sewardj545663e2005-11-05 01:55:04 +00002981 case Iop_CmpUN64F0x2:
sewardj0b070592004-12-10 21:44:22 +00002982 case Iop_Add64F0x2:
2983 return binary64F0x2(mce, vatom1, vatom2);
2984
sewardj170ee212004-12-10 18:57:51 +00002985 case Iop_Sub32Fx4:
2986 case Iop_Mul32Fx4:
2987 case Iop_Min32Fx4:
2988 case Iop_Max32Fx4:
2989 case Iop_Div32Fx4:
2990 case Iop_CmpLT32Fx4:
2991 case Iop_CmpLE32Fx4:
2992 case Iop_CmpEQ32Fx4:
sewardj545663e2005-11-05 01:55:04 +00002993 case Iop_CmpUN32Fx4:
cerione78ba2a2005-11-14 03:00:35 +00002994 case Iop_CmpGT32Fx4:
2995 case Iop_CmpGE32Fx4:
sewardj3245c912004-12-10 14:58:26 +00002996 case Iop_Add32Fx4:
sewardj57f92b02010-08-22 11:54:14 +00002997 case Iop_Recps32Fx4:
2998 case Iop_Rsqrts32Fx4:
sewardj3245c912004-12-10 14:58:26 +00002999 return binary32Fx4(mce, vatom1, vatom2);
3000
sewardj57f92b02010-08-22 11:54:14 +00003001 case Iop_Sub32Fx2:
3002 case Iop_Mul32Fx2:
3003 case Iop_Min32Fx2:
3004 case Iop_Max32Fx2:
3005 case Iop_CmpEQ32Fx2:
3006 case Iop_CmpGT32Fx2:
3007 case Iop_CmpGE32Fx2:
3008 case Iop_Add32Fx2:
3009 case Iop_Recps32Fx2:
3010 case Iop_Rsqrts32Fx2:
3011 return binary32Fx2(mce, vatom1, vatom2);
3012
sewardj170ee212004-12-10 18:57:51 +00003013 case Iop_Sub32F0x4:
3014 case Iop_Mul32F0x4:
3015 case Iop_Min32F0x4:
3016 case Iop_Max32F0x4:
3017 case Iop_Div32F0x4:
3018 case Iop_CmpLT32F0x4:
3019 case Iop_CmpLE32F0x4:
3020 case Iop_CmpEQ32F0x4:
sewardj545663e2005-11-05 01:55:04 +00003021 case Iop_CmpUN32F0x4:
sewardj170ee212004-12-10 18:57:51 +00003022 case Iop_Add32F0x4:
3023 return binary32F0x4(mce, vatom1, vatom2);
3024
sewardj57f92b02010-08-22 11:54:14 +00003025 case Iop_QShlN8Sx16:
3026 case Iop_QShlN8x16:
3027 case Iop_QSalN8x16:
florian434ffae2012-07-19 17:23:42 +00003028 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003029 return mkPCast8x16(mce, vatom1);
3030
3031 case Iop_QShlN16Sx8:
3032 case Iop_QShlN16x8:
3033 case Iop_QSalN16x8:
florian434ffae2012-07-19 17:23:42 +00003034 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003035 return mkPCast16x8(mce, vatom1);
3036
3037 case Iop_QShlN32Sx4:
3038 case Iop_QShlN32x4:
3039 case Iop_QSalN32x4:
florian434ffae2012-07-19 17:23:42 +00003040 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003041 return mkPCast32x4(mce, vatom1);
3042
3043 case Iop_QShlN64Sx2:
3044 case Iop_QShlN64x2:
3045 case Iop_QSalN64x2:
florian434ffae2012-07-19 17:23:42 +00003046 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003047 return mkPCast32x4(mce, vatom1);
3048
3049 case Iop_Mull32Sx2:
3050 case Iop_Mull32Ux2:
3051 case Iop_QDMulLong32Sx2:
sewardj7ee7d852011-06-16 11:37:21 +00003052 return vectorWidenI64(mce, Iop_Widen32Sto64x2,
3053 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003054
3055 case Iop_Mull16Sx4:
3056 case Iop_Mull16Ux4:
3057 case Iop_QDMulLong16Sx4:
sewardj7ee7d852011-06-16 11:37:21 +00003058 return vectorWidenI64(mce, Iop_Widen16Sto32x4,
3059 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003060
3061 case Iop_Mull8Sx8:
3062 case Iop_Mull8Ux8:
3063 case Iop_PolynomialMull8x8:
sewardj7ee7d852011-06-16 11:37:21 +00003064 return vectorWidenI64(mce, Iop_Widen8Sto16x8,
3065 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003066
3067 case Iop_PwAdd32x4:
3068 return mkPCast32x4(mce,
3069 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
3070 mkPCast32x4(mce, vatom2))));
3071
3072 case Iop_PwAdd16x8:
3073 return mkPCast16x8(mce,
3074 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
3075 mkPCast16x8(mce, vatom2))));
3076
3077 case Iop_PwAdd8x16:
3078 return mkPCast8x16(mce,
3079 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
3080 mkPCast8x16(mce, vatom2))));
3081
sewardj20d38f22005-02-07 23:50:18 +00003082 /* V128-bit data-steering */
3083 case Iop_SetV128lo32:
3084 case Iop_SetV128lo64:
3085 case Iop_64HLtoV128:
sewardja1d93302004-12-12 16:45:06 +00003086 case Iop_InterleaveLO64x2:
3087 case Iop_InterleaveLO32x4:
3088 case Iop_InterleaveLO16x8:
3089 case Iop_InterleaveLO8x16:
3090 case Iop_InterleaveHI64x2:
3091 case Iop_InterleaveHI32x4:
3092 case Iop_InterleaveHI16x8:
3093 case Iop_InterleaveHI8x16:
sewardj57f92b02010-08-22 11:54:14 +00003094 case Iop_CatOddLanes8x16:
3095 case Iop_CatOddLanes16x8:
3096 case Iop_CatOddLanes32x4:
3097 case Iop_CatEvenLanes8x16:
3098 case Iop_CatEvenLanes16x8:
3099 case Iop_CatEvenLanes32x4:
3100 case Iop_InterleaveOddLanes8x16:
3101 case Iop_InterleaveOddLanes16x8:
3102 case Iop_InterleaveOddLanes32x4:
3103 case Iop_InterleaveEvenLanes8x16:
3104 case Iop_InterleaveEvenLanes16x8:
3105 case Iop_InterleaveEvenLanes32x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00003106 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003107
3108 case Iop_GetElem8x16:
florian434ffae2012-07-19 17:23:42 +00003109 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003110 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3111 case Iop_GetElem16x8:
florian434ffae2012-07-19 17:23:42 +00003112 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003113 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3114 case Iop_GetElem32x4:
florian434ffae2012-07-19 17:23:42 +00003115 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003116 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3117 case Iop_GetElem64x2:
florian434ffae2012-07-19 17:23:42 +00003118 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003119 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
3120
sewardj620eb5b2005-10-22 12:50:43 +00003121 /* Perm8x16: rearrange values in left arg using steering values
3122 from right arg. So rearrange the vbits in the same way but
sewardj350e8f72012-06-25 07:52:15 +00003123 pessimise wrt steering values. Perm32x4 ditto. */
sewardj620eb5b2005-10-22 12:50:43 +00003124 case Iop_Perm8x16:
3125 return mkUifUV128(
3126 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003127 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj620eb5b2005-10-22 12:50:43 +00003128 mkPCast8x16(mce, vatom2)
3129 );
sewardj350e8f72012-06-25 07:52:15 +00003130 case Iop_Perm32x4:
3131 return mkUifUV128(
3132 mce,
3133 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3134 mkPCast32x4(mce, vatom2)
3135 );
sewardj170ee212004-12-10 18:57:51 +00003136
sewardj43d60752005-11-10 18:13:01 +00003137 /* These two take the lower half of each 16-bit lane, sign/zero
3138 extend it to 32, and multiply together, producing a 32x4
3139 result (and implicitly ignoring half the operand bits). So
3140 treat it as a bunch of independent 16x8 operations, but then
3141 do 32-bit shifts left-right to copy the lower half results
3142 (which are all 0s or all 1s due to PCasting in binary16Ix8)
3143 into the upper half of each result lane. */
3144 case Iop_MullEven16Ux8:
3145 case Iop_MullEven16Sx8: {
3146 IRAtom* at;
3147 at = binary16Ix8(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00003148 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
3149 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
sewardj43d60752005-11-10 18:13:01 +00003150 return at;
3151 }
3152
3153 /* Same deal as Iop_MullEven16{S,U}x8 */
3154 case Iop_MullEven8Ux16:
3155 case Iop_MullEven8Sx16: {
3156 IRAtom* at;
3157 at = binary8Ix16(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00003158 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
3159 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
sewardj43d60752005-11-10 18:13:01 +00003160 return at;
3161 }
3162
3163 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
3164 32x4 -> 16x8 laneage, discarding the upper half of each lane.
3165 Simply apply same op to the V bits, since this really no more
3166 than a data steering operation. */
sewardj7ee7d852011-06-16 11:37:21 +00003167 case Iop_NarrowBin32to16x8:
3168 case Iop_NarrowBin16to8x16:
sewardj7cf4e6b2008-05-01 20:24:26 +00003169 return assignNew('V', mce, Ity_V128,
3170 binop(op, vatom1, vatom2));
sewardj43d60752005-11-10 18:13:01 +00003171
3172 case Iop_ShrV128:
3173 case Iop_ShlV128:
3174 /* Same scheme as with all other shifts. Note: 10 Nov 05:
3175 this is wrong now, scalar shifts are done properly lazily.
3176 Vector shifts should be fixed too. */
florian434ffae2012-07-19 17:23:42 +00003177 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00003178 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardj43d60752005-11-10 18:13:01 +00003179
sewardj69a13322005-04-23 01:14:51 +00003180 /* I128-bit data-steering */
3181 case Iop_64HLto128:
sewardj7cf4e6b2008-05-01 20:24:26 +00003182 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
sewardj69a13322005-04-23 01:14:51 +00003183
sewardj350e8f72012-06-25 07:52:15 +00003184 /* V256-bit SIMD */
3185
3186 case Iop_Add64Fx4:
3187 case Iop_Sub64Fx4:
3188 case Iop_Mul64Fx4:
3189 case Iop_Div64Fx4:
3190 case Iop_Max64Fx4:
3191 case Iop_Min64Fx4:
3192 return binary64Fx4(mce, vatom1, vatom2);
3193
3194 case Iop_Add32Fx8:
3195 case Iop_Sub32Fx8:
3196 case Iop_Mul32Fx8:
3197 case Iop_Div32Fx8:
3198 case Iop_Max32Fx8:
3199 case Iop_Min32Fx8:
3200 return binary32Fx8(mce, vatom1, vatom2);
3201
3202 /* V256-bit data-steering */
3203 case Iop_V128HLtoV256:
3204 return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2));
3205
sewardj3245c912004-12-10 14:58:26 +00003206 /* Scalar floating point */
3207
sewardjb5b87402011-03-07 16:05:35 +00003208 case Iop_F32toI64S:
3209 /* I32(rm) x F32 -> I64 */
3210 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3211
3212 case Iop_I64StoF32:
3213 /* I32(rm) x I64 -> F32 */
3214 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3215
sewardjed69fdb2006-02-03 16:12:27 +00003216 case Iop_RoundF64toInt:
3217 case Iop_RoundF64toF32:
sewardj06f96d02009-12-31 19:24:12 +00003218 case Iop_F64toI64S:
sewardja201c452011-07-24 14:15:54 +00003219 case Iop_F64toI64U:
sewardj06f96d02009-12-31 19:24:12 +00003220 case Iop_I64StoF64:
sewardjf34eb492011-04-15 11:57:05 +00003221 case Iop_I64UtoF64:
sewardj22ac5f42006-02-03 22:55:04 +00003222 case Iop_SinF64:
3223 case Iop_CosF64:
3224 case Iop_TanF64:
3225 case Iop_2xm1F64:
3226 case Iop_SqrtF64:
3227 /* I32(rm) x I64/F64 -> I64/F64 */
sewardj95448072004-11-22 20:19:51 +00003228 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3229
sewardjea8b02f2012-04-12 17:28:57 +00003230 case Iop_ShlD64:
3231 case Iop_ShrD64:
sewardj18c72fa2012-04-23 11:22:05 +00003232 case Iop_RoundD64toInt:
sewardjea8b02f2012-04-12 17:28:57 +00003233 /* I32(DFP rm) x D64 -> D64 */
3234 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3235
3236 case Iop_ShlD128:
3237 case Iop_ShrD128:
sewardj18c72fa2012-04-23 11:22:05 +00003238 case Iop_RoundD128toInt:
3239 /* I32(DFP rm) x D128 -> D128 */
sewardjea8b02f2012-04-12 17:28:57 +00003240 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3241
3242 case Iop_D64toI64S:
3243 case Iop_I64StoD64:
3244 /* I64(DFP rm) x I64 -> D64 */
3245 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3246
sewardjd376a762010-06-27 09:08:54 +00003247 case Iop_RoundF32toInt:
sewardjaec1be32010-01-03 22:29:32 +00003248 case Iop_SqrtF32:
3249 /* I32(rm) x I32/F32 -> I32/F32 */
3250 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3251
sewardjb5b87402011-03-07 16:05:35 +00003252 case Iop_SqrtF128:
3253 /* I32(rm) x F128 -> F128 */
3254 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3255
3256 case Iop_I32StoF32:
3257 case Iop_F32toI32S:
3258 /* First arg is I32 (rounding mode), second is F32/I32 (data). */
3259 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3260
3261 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */
3262 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */
3263 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3264
3265 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */
3266 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */
sewardjea8b02f2012-04-12 17:28:57 +00003267 case Iop_D128toD64: /* IRRoundingModeDFP(I64) x D128 -> D64 */
3268 case Iop_D128toI64S: /* IRRoundingModeDFP(I64) x D128 -> signed I64 */
sewardjb5b87402011-03-07 16:05:35 +00003269 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3270
3271 case Iop_F64HLtoF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00003272 case Iop_D64HLtoD128:
sewardj350e8f72012-06-25 07:52:15 +00003273 return assignNew('V', mce, Ity_I128,
3274 binop(Iop_64HLto128, vatom1, vatom2));
sewardjb5b87402011-03-07 16:05:35 +00003275
sewardj59570ff2010-01-01 11:59:33 +00003276 case Iop_F64toI32U:
sewardj06f96d02009-12-31 19:24:12 +00003277 case Iop_F64toI32S:
sewardje9e16d32004-12-10 13:17:55 +00003278 case Iop_F64toF32:
sewardjf34eb492011-04-15 11:57:05 +00003279 case Iop_I64UtoF32:
sewardj95448072004-11-22 20:19:51 +00003280 /* First arg is I32 (rounding mode), second is F64 (data). */
3281 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3282
sewardjea8b02f2012-04-12 17:28:57 +00003283 case Iop_D64toD32:
3284 /* First arg is I64 (DFProunding mode), second is D64 (data). */
3285 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3286
sewardj06f96d02009-12-31 19:24:12 +00003287 case Iop_F64toI16S:
sewardj95448072004-11-22 20:19:51 +00003288 /* First arg is I32 (rounding mode), second is F64 (data). */
3289 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3290
sewardj18c72fa2012-04-23 11:22:05 +00003291 case Iop_InsertExpD64:
3292 /* I64 x I64 -> D64 */
3293 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3294
3295 case Iop_InsertExpD128:
3296 /* I64 x I128 -> D128 */
3297 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3298
sewardjb5b87402011-03-07 16:05:35 +00003299 case Iop_CmpF32:
sewardj95448072004-11-22 20:19:51 +00003300 case Iop_CmpF64:
sewardjb5b87402011-03-07 16:05:35 +00003301 case Iop_CmpF128:
sewardj18c72fa2012-04-23 11:22:05 +00003302 case Iop_CmpD64:
3303 case Iop_CmpD128:
sewardj95448072004-11-22 20:19:51 +00003304 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3305
3306 /* non-FP after here */
3307
3308 case Iop_DivModU64to32:
3309 case Iop_DivModS64to32:
3310 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3311
sewardj69a13322005-04-23 01:14:51 +00003312 case Iop_DivModU128to64:
3313 case Iop_DivModS128to64:
3314 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3315
florian537ed2d2012-08-20 16:51:39 +00003316 case Iop_8HLto16:
3317 return assignNew('V', mce, Ity_I16, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003318 case Iop_16HLto32:
sewardj7cf4e6b2008-05-01 20:24:26 +00003319 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003320 case Iop_32HLto64:
sewardj7cf4e6b2008-05-01 20:24:26 +00003321 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003322
sewardjb5b87402011-03-07 16:05:35 +00003323 case Iop_DivModS64to64:
sewardj6cf40ff2005-04-20 22:31:26 +00003324 case Iop_MullS64:
3325 case Iop_MullU64: {
3326 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3327 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
sewardj350e8f72012-06-25 07:52:15 +00003328 return assignNew('V', mce, Ity_I128,
3329 binop(Iop_64HLto128, vHi64, vLo64));
sewardj6cf40ff2005-04-20 22:31:26 +00003330 }
3331
sewardj95448072004-11-22 20:19:51 +00003332 case Iop_MullS32:
3333 case Iop_MullU32: {
3334 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3335 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
sewardj350e8f72012-06-25 07:52:15 +00003336 return assignNew('V', mce, Ity_I64,
3337 binop(Iop_32HLto64, vHi32, vLo32));
sewardj95448072004-11-22 20:19:51 +00003338 }
3339
3340 case Iop_MullS16:
3341 case Iop_MullU16: {
3342 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3343 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
sewardj350e8f72012-06-25 07:52:15 +00003344 return assignNew('V', mce, Ity_I32,
3345 binop(Iop_16HLto32, vHi16, vLo16));
sewardj95448072004-11-22 20:19:51 +00003346 }
3347
3348 case Iop_MullS8:
3349 case Iop_MullU8: {
3350 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3351 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
sewardj7cf4e6b2008-05-01 20:24:26 +00003352 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
sewardj95448072004-11-22 20:19:51 +00003353 }
3354
sewardj5af05062010-10-18 16:31:14 +00003355 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */
cerion9e591082005-06-23 15:28:34 +00003356 case Iop_DivS32:
3357 case Iop_DivU32:
sewardja201c452011-07-24 14:15:54 +00003358 case Iop_DivU32E:
sewardj169ac042011-09-05 12:12:34 +00003359 case Iop_DivS32E:
sewardj2157b2c2012-07-11 13:20:58 +00003360 case Iop_QAdd32S: /* could probably do better */
3361 case Iop_QSub32S: /* could probably do better */
cerion9e591082005-06-23 15:28:34 +00003362 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3363
sewardjb00944a2005-12-23 12:47:16 +00003364 case Iop_DivS64:
3365 case Iop_DivU64:
sewardja201c452011-07-24 14:15:54 +00003366 case Iop_DivS64E:
sewardj169ac042011-09-05 12:12:34 +00003367 case Iop_DivU64E:
sewardjb00944a2005-12-23 12:47:16 +00003368 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3369
sewardj95448072004-11-22 20:19:51 +00003370 case Iop_Add32:
sewardj54eac252012-03-27 10:19:39 +00003371 if (mce->bogusLiterals || mce->useLLVMworkarounds)
sewardjd5204dc2004-12-31 01:16:11 +00003372 return expensiveAddSub(mce,True,Ity_I32,
3373 vatom1,vatom2, atom1,atom2);
3374 else
3375 goto cheap_AddSub32;
sewardj95448072004-11-22 20:19:51 +00003376 case Iop_Sub32:
sewardjd5204dc2004-12-31 01:16:11 +00003377 if (mce->bogusLiterals)
3378 return expensiveAddSub(mce,False,Ity_I32,
3379 vatom1,vatom2, atom1,atom2);
3380 else
3381 goto cheap_AddSub32;
3382
3383 cheap_AddSub32:
sewardj95448072004-11-22 20:19:51 +00003384 case Iop_Mul32:
sewardj992dff92005-10-07 11:08:55 +00003385 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3386
sewardj463b3d92005-07-18 11:41:15 +00003387 case Iop_CmpORD32S:
3388 case Iop_CmpORD32U:
sewardj1bc82102005-12-23 00:16:24 +00003389 case Iop_CmpORD64S:
3390 case Iop_CmpORD64U:
3391 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
sewardj95448072004-11-22 20:19:51 +00003392
sewardj681be302005-01-15 20:43:58 +00003393 case Iop_Add64:
sewardj54eac252012-03-27 10:19:39 +00003394 if (mce->bogusLiterals || mce->useLLVMworkarounds)
tomd9774d72005-06-27 08:11:01 +00003395 return expensiveAddSub(mce,True,Ity_I64,
3396 vatom1,vatom2, atom1,atom2);
3397 else
3398 goto cheap_AddSub64;
sewardj681be302005-01-15 20:43:58 +00003399 case Iop_Sub64:
tomd9774d72005-06-27 08:11:01 +00003400 if (mce->bogusLiterals)
3401 return expensiveAddSub(mce,False,Ity_I64,
3402 vatom1,vatom2, atom1,atom2);
3403 else
3404 goto cheap_AddSub64;
3405
3406 cheap_AddSub64:
3407 case Iop_Mul64:
sewardj681be302005-01-15 20:43:58 +00003408 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3409
sewardj95448072004-11-22 20:19:51 +00003410 case Iop_Mul16:
3411 case Iop_Add16:
3412 case Iop_Sub16:
3413 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3414
florian537ed2d2012-08-20 16:51:39 +00003415 case Iop_Mul8:
sewardj95448072004-11-22 20:19:51 +00003416 case Iop_Sub8:
3417 case Iop_Add8:
3418 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3419
sewardj69a13322005-04-23 01:14:51 +00003420 case Iop_CmpEQ64:
sewardje6f8af42005-07-06 18:48:59 +00003421 case Iop_CmpNE64:
sewardj69a13322005-04-23 01:14:51 +00003422 if (mce->bogusLiterals)
3423 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
3424 else
3425 goto cheap_cmp64;
3426 cheap_cmp64:
tomcd986332005-04-26 07:44:48 +00003427 case Iop_CmpLE64S: case Iop_CmpLE64U:
3428 case Iop_CmpLT64U: case Iop_CmpLT64S:
sewardj69a13322005-04-23 01:14:51 +00003429 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
3430
sewardjd5204dc2004-12-31 01:16:11 +00003431 case Iop_CmpEQ32:
sewardje6f8af42005-07-06 18:48:59 +00003432 case Iop_CmpNE32:
sewardjd5204dc2004-12-31 01:16:11 +00003433 if (mce->bogusLiterals)
3434 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
3435 else
3436 goto cheap_cmp32;
sewardjd5204dc2004-12-31 01:16:11 +00003437 cheap_cmp32:
sewardj95448072004-11-22 20:19:51 +00003438 case Iop_CmpLE32S: case Iop_CmpLE32U:
3439 case Iop_CmpLT32U: case Iop_CmpLT32S:
sewardj95448072004-11-22 20:19:51 +00003440 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
3441
3442 case Iop_CmpEQ16: case Iop_CmpNE16:
3443 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
3444
3445 case Iop_CmpEQ8: case Iop_CmpNE8:
3446 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
3447
sewardjafed4c52009-07-12 13:00:17 +00003448 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
3449 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
3450 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
3451 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
3452 /* Just say these all produce a defined result, regardless
3453 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
3454 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
3455
sewardjaaddbc22005-10-07 09:49:53 +00003456 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
3457 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
3458
sewardj95448072004-11-22 20:19:51 +00003459 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
sewardjaaddbc22005-10-07 09:49:53 +00003460 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003461
sewardjdb67f5f2004-12-14 01:15:31 +00003462 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
sewardjaaddbc22005-10-07 09:49:53 +00003463 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003464
florian537ed2d2012-08-20 16:51:39 +00003465 case Iop_Shl8: case Iop_Shr8: case Iop_Sar8:
sewardjaaddbc22005-10-07 09:49:53 +00003466 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003467
sewardj350e8f72012-06-25 07:52:15 +00003468 case Iop_AndV256:
3469 uifu = mkUifUV256; difd = mkDifDV256;
3470 and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or;
sewardj20d38f22005-02-07 23:50:18 +00003471 case Iop_AndV128:
3472 uifu = mkUifUV128; difd = mkDifDV128;
3473 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00003474 case Iop_And64:
3475 uifu = mkUifU64; difd = mkDifD64;
3476 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00003477 case Iop_And32:
3478 uifu = mkUifU32; difd = mkDifD32;
3479 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
3480 case Iop_And16:
3481 uifu = mkUifU16; difd = mkDifD16;
3482 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
3483 case Iop_And8:
3484 uifu = mkUifU8; difd = mkDifD8;
3485 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
3486
sewardj350e8f72012-06-25 07:52:15 +00003487 case Iop_OrV256:
3488 uifu = mkUifUV256; difd = mkDifDV256;
3489 and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or;
sewardj20d38f22005-02-07 23:50:18 +00003490 case Iop_OrV128:
3491 uifu = mkUifUV128; difd = mkDifDV128;
3492 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00003493 case Iop_Or64:
3494 uifu = mkUifU64; difd = mkDifD64;
3495 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00003496 case Iop_Or32:
3497 uifu = mkUifU32; difd = mkDifD32;
3498 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
3499 case Iop_Or16:
3500 uifu = mkUifU16; difd = mkDifD16;
3501 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
3502 case Iop_Or8:
3503 uifu = mkUifU8; difd = mkDifD8;
3504 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
3505
3506 do_And_Or:
3507 return
3508 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00003509 'V', mce,
sewardj95448072004-11-22 20:19:51 +00003510 and_or_ty,
3511 difd(mce, uifu(mce, vatom1, vatom2),
3512 difd(mce, improve(mce, atom1, vatom1),
3513 improve(mce, atom2, vatom2) ) ) );
3514
3515 case Iop_Xor8:
3516 return mkUifU8(mce, vatom1, vatom2);
3517 case Iop_Xor16:
3518 return mkUifU16(mce, vatom1, vatom2);
3519 case Iop_Xor32:
3520 return mkUifU32(mce, vatom1, vatom2);
sewardj7010f6e2004-12-10 13:35:22 +00003521 case Iop_Xor64:
3522 return mkUifU64(mce, vatom1, vatom2);
sewardj20d38f22005-02-07 23:50:18 +00003523 case Iop_XorV128:
3524 return mkUifUV128(mce, vatom1, vatom2);
sewardj350e8f72012-06-25 07:52:15 +00003525 case Iop_XorV256:
3526 return mkUifUV256(mce, vatom1, vatom2);
njn25e49d8e72002-09-23 09:36:25 +00003527
3528 default:
sewardj95448072004-11-22 20:19:51 +00003529 ppIROp(op);
3530 VG_(tool_panic)("memcheck:expr2vbits_Binop");
njn25e49d8e72002-09-23 09:36:25 +00003531 }
njn25e49d8e72002-09-23 09:36:25 +00003532}
3533
njn25e49d8e72002-09-23 09:36:25 +00003534
sewardj95448072004-11-22 20:19:51 +00003535static
3536IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
3537{
3538 IRAtom* vatom = expr2vbits( mce, atom );
3539 tl_assert(isOriginalAtom(mce,atom));
3540 switch (op) {
3541
sewardj0b070592004-12-10 21:44:22 +00003542 case Iop_Sqrt64Fx2:
3543 return unary64Fx2(mce, vatom);
3544
3545 case Iop_Sqrt64F0x2:
3546 return unary64F0x2(mce, vatom);
3547
sewardj350e8f72012-06-25 07:52:15 +00003548 case Iop_Sqrt32Fx8:
3549 case Iop_RSqrt32Fx8:
3550 case Iop_Recip32Fx8:
3551 return unary32Fx8(mce, vatom);
3552
3553 case Iop_Sqrt64Fx4:
3554 return unary64Fx4(mce, vatom);
3555
sewardj170ee212004-12-10 18:57:51 +00003556 case Iop_Sqrt32Fx4:
3557 case Iop_RSqrt32Fx4:
3558 case Iop_Recip32Fx4:
cerion176cb4c2005-11-16 17:21:49 +00003559 case Iop_I32UtoFx4:
3560 case Iop_I32StoFx4:
3561 case Iop_QFtoI32Ux4_RZ:
3562 case Iop_QFtoI32Sx4_RZ:
3563 case Iop_RoundF32x4_RM:
3564 case Iop_RoundF32x4_RP:
3565 case Iop_RoundF32x4_RN:
3566 case Iop_RoundF32x4_RZ:
sewardj57f92b02010-08-22 11:54:14 +00003567 case Iop_Recip32x4:
3568 case Iop_Abs32Fx4:
3569 case Iop_Neg32Fx4:
3570 case Iop_Rsqrte32Fx4:
sewardj170ee212004-12-10 18:57:51 +00003571 return unary32Fx4(mce, vatom);
3572
sewardj57f92b02010-08-22 11:54:14 +00003573 case Iop_I32UtoFx2:
3574 case Iop_I32StoFx2:
3575 case Iop_Recip32Fx2:
3576 case Iop_Recip32x2:
3577 case Iop_Abs32Fx2:
3578 case Iop_Neg32Fx2:
3579 case Iop_Rsqrte32Fx2:
3580 return unary32Fx2(mce, vatom);
3581
sewardj170ee212004-12-10 18:57:51 +00003582 case Iop_Sqrt32F0x4:
3583 case Iop_RSqrt32F0x4:
3584 case Iop_Recip32F0x4:
3585 return unary32F0x4(mce, vatom);
3586
sewardj20d38f22005-02-07 23:50:18 +00003587 case Iop_32UtoV128:
3588 case Iop_64UtoV128:
sewardj620eb5b2005-10-22 12:50:43 +00003589 case Iop_Dup8x16:
3590 case Iop_Dup16x8:
3591 case Iop_Dup32x4:
sewardj57f92b02010-08-22 11:54:14 +00003592 case Iop_Reverse16_8x16:
3593 case Iop_Reverse32_8x16:
3594 case Iop_Reverse32_16x8:
3595 case Iop_Reverse64_8x16:
3596 case Iop_Reverse64_16x8:
3597 case Iop_Reverse64_32x4:
sewardj350e8f72012-06-25 07:52:15 +00003598 case Iop_V256toV128_1: case Iop_V256toV128_0:
sewardj7cf4e6b2008-05-01 20:24:26 +00003599 return assignNew('V', mce, Ity_V128, unop(op, vatom));
sewardj170ee212004-12-10 18:57:51 +00003600
sewardjb5b87402011-03-07 16:05:35 +00003601 case Iop_F128HItoF64: /* F128 -> high half of F128 */
sewardjb0ccb4d2012-04-02 10:22:05 +00003602 case Iop_D128HItoD64: /* D128 -> high half of D128 */
sewardjb5b87402011-03-07 16:05:35 +00003603 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
3604 case Iop_F128LOtoF64: /* F128 -> low half of F128 */
sewardjb0ccb4d2012-04-02 10:22:05 +00003605 case Iop_D128LOtoD64: /* D128 -> low half of D128 */
sewardjb5b87402011-03-07 16:05:35 +00003606 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
3607
3608 case Iop_NegF128:
3609 case Iop_AbsF128:
3610 return mkPCastTo(mce, Ity_I128, vatom);
3611
3612 case Iop_I32StoF128: /* signed I32 -> F128 */
3613 case Iop_I64StoF128: /* signed I64 -> F128 */
3614 case Iop_F32toF128: /* F32 -> F128 */
3615 case Iop_F64toF128: /* F64 -> F128 */
sewardjea8b02f2012-04-12 17:28:57 +00003616 case Iop_I64StoD128: /* signed I64 -> D128 */
sewardjb5b87402011-03-07 16:05:35 +00003617 return mkPCastTo(mce, Ity_I128, vatom);
3618
sewardj95448072004-11-22 20:19:51 +00003619 case Iop_F32toF64:
sewardj06f96d02009-12-31 19:24:12 +00003620 case Iop_I32StoF64:
sewardj59570ff2010-01-01 11:59:33 +00003621 case Iop_I32UtoF64:
sewardj95448072004-11-22 20:19:51 +00003622 case Iop_NegF64:
sewardj95448072004-11-22 20:19:51 +00003623 case Iop_AbsF64:
sewardj4f3e0f22006-01-27 15:11:31 +00003624 case Iop_Est5FRSqrt:
sewardjdead90a2008-08-08 08:38:23 +00003625 case Iop_RoundF64toF64_NEAREST:
3626 case Iop_RoundF64toF64_NegINF:
3627 case Iop_RoundF64toF64_PosINF:
3628 case Iop_RoundF64toF64_ZERO:
sewardj39cc7352005-06-09 21:31:55 +00003629 case Iop_Clz64:
3630 case Iop_Ctz64:
sewardjea8b02f2012-04-12 17:28:57 +00003631 case Iop_D32toD64:
sewardj18c72fa2012-04-23 11:22:05 +00003632 case Iop_ExtractExpD64: /* D64 -> I64 */
3633 case Iop_ExtractExpD128: /* D128 -> I64 */
sewardj95448072004-11-22 20:19:51 +00003634 return mkPCastTo(mce, Ity_I64, vatom);
3635
sewardjea8b02f2012-04-12 17:28:57 +00003636 case Iop_D64toD128:
3637 return mkPCastTo(mce, Ity_I128, vatom);
3638
sewardj95448072004-11-22 20:19:51 +00003639 case Iop_Clz32:
3640 case Iop_Ctz32:
sewardjed69fdb2006-02-03 16:12:27 +00003641 case Iop_TruncF64asF32:
sewardj59570ff2010-01-01 11:59:33 +00003642 case Iop_NegF32:
3643 case Iop_AbsF32:
sewardj95448072004-11-22 20:19:51 +00003644 return mkPCastTo(mce, Ity_I32, vatom);
3645
sewardjd9dbc192005-04-27 11:40:27 +00003646 case Iop_1Uto64:
sewardja201c452011-07-24 14:15:54 +00003647 case Iop_1Sto64:
sewardjd9dbc192005-04-27 11:40:27 +00003648 case Iop_8Uto64:
3649 case Iop_8Sto64:
3650 case Iop_16Uto64:
3651 case Iop_16Sto64:
sewardj95448072004-11-22 20:19:51 +00003652 case Iop_32Sto64:
3653 case Iop_32Uto64:
sewardj20d38f22005-02-07 23:50:18 +00003654 case Iop_V128to64:
3655 case Iop_V128HIto64:
sewardj6cf40ff2005-04-20 22:31:26 +00003656 case Iop_128HIto64:
3657 case Iop_128to64:
sewardj57f92b02010-08-22 11:54:14 +00003658 case Iop_Dup8x8:
3659 case Iop_Dup16x4:
3660 case Iop_Dup32x2:
3661 case Iop_Reverse16_8x8:
3662 case Iop_Reverse32_8x8:
3663 case Iop_Reverse32_16x4:
3664 case Iop_Reverse64_8x8:
3665 case Iop_Reverse64_16x4:
3666 case Iop_Reverse64_32x2:
sewardj350e8f72012-06-25 07:52:15 +00003667 case Iop_V256to64_0: case Iop_V256to64_1:
3668 case Iop_V256to64_2: case Iop_V256to64_3:
sewardj7cf4e6b2008-05-01 20:24:26 +00003669 return assignNew('V', mce, Ity_I64, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003670
3671 case Iop_64to32:
3672 case Iop_64HIto32:
3673 case Iop_1Uto32:
sewardj463b3d92005-07-18 11:41:15 +00003674 case Iop_1Sto32:
sewardj95448072004-11-22 20:19:51 +00003675 case Iop_8Uto32:
3676 case Iop_16Uto32:
3677 case Iop_16Sto32:
3678 case Iop_8Sto32:
cerionfafaa0d2005-09-12 22:29:38 +00003679 case Iop_V128to32:
sewardj7cf4e6b2008-05-01 20:24:26 +00003680 return assignNew('V', mce, Ity_I32, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003681
3682 case Iop_8Sto16:
3683 case Iop_8Uto16:
3684 case Iop_32to16:
3685 case Iop_32HIto16:
sewardjd9dbc192005-04-27 11:40:27 +00003686 case Iop_64to16:
sewardj7cf4e6b2008-05-01 20:24:26 +00003687 return assignNew('V', mce, Ity_I16, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003688
3689 case Iop_1Uto8:
sewardja201c452011-07-24 14:15:54 +00003690 case Iop_1Sto8:
sewardj95448072004-11-22 20:19:51 +00003691 case Iop_16to8:
sewardj9a807e02006-12-17 14:20:31 +00003692 case Iop_16HIto8:
sewardj95448072004-11-22 20:19:51 +00003693 case Iop_32to8:
sewardjd9dbc192005-04-27 11:40:27 +00003694 case Iop_64to8:
sewardj7cf4e6b2008-05-01 20:24:26 +00003695 return assignNew('V', mce, Ity_I8, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003696
3697 case Iop_32to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00003698 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
sewardj95448072004-11-22 20:19:51 +00003699
sewardjd9dbc192005-04-27 11:40:27 +00003700 case Iop_64to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00003701 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
sewardjd9dbc192005-04-27 11:40:27 +00003702
sewardj95448072004-11-22 20:19:51 +00003703 case Iop_ReinterpF64asI64:
3704 case Iop_ReinterpI64asF64:
sewardj0b070592004-12-10 21:44:22 +00003705 case Iop_ReinterpI32asF32:
sewardj59570ff2010-01-01 11:59:33 +00003706 case Iop_ReinterpF32asI32:
sewardj18c72fa2012-04-23 11:22:05 +00003707 case Iop_ReinterpI64asD64:
sewardj0892b822012-04-29 20:20:16 +00003708 case Iop_ReinterpD64asI64:
sewardj457cba62012-06-02 23:48:06 +00003709 case Iop_DPBtoBCD:
3710 case Iop_BCDtoDPB:
sewardj350e8f72012-06-25 07:52:15 +00003711 case Iop_NotV256:
sewardj20d38f22005-02-07 23:50:18 +00003712 case Iop_NotV128:
sewardj7010f6e2004-12-10 13:35:22 +00003713 case Iop_Not64:
sewardj95448072004-11-22 20:19:51 +00003714 case Iop_Not32:
3715 case Iop_Not16:
3716 case Iop_Not8:
3717 case Iop_Not1:
3718 return vatom;
sewardj7010f6e2004-12-10 13:35:22 +00003719
sewardj57f92b02010-08-22 11:54:14 +00003720 case Iop_CmpNEZ8x8:
3721 case Iop_Cnt8x8:
3722 case Iop_Clz8Sx8:
3723 case Iop_Cls8Sx8:
3724 case Iop_Abs8x8:
3725 return mkPCast8x8(mce, vatom);
3726
3727 case Iop_CmpNEZ8x16:
3728 case Iop_Cnt8x16:
3729 case Iop_Clz8Sx16:
3730 case Iop_Cls8Sx16:
3731 case Iop_Abs8x16:
3732 return mkPCast8x16(mce, vatom);
3733
3734 case Iop_CmpNEZ16x4:
3735 case Iop_Clz16Sx4:
3736 case Iop_Cls16Sx4:
3737 case Iop_Abs16x4:
3738 return mkPCast16x4(mce, vatom);
3739
3740 case Iop_CmpNEZ16x8:
3741 case Iop_Clz16Sx8:
3742 case Iop_Cls16Sx8:
3743 case Iop_Abs16x8:
3744 return mkPCast16x8(mce, vatom);
3745
3746 case Iop_CmpNEZ32x2:
3747 case Iop_Clz32Sx2:
3748 case Iop_Cls32Sx2:
3749 case Iop_FtoI32Ux2_RZ:
3750 case Iop_FtoI32Sx2_RZ:
3751 case Iop_Abs32x2:
3752 return mkPCast32x2(mce, vatom);
3753
3754 case Iop_CmpNEZ32x4:
3755 case Iop_Clz32Sx4:
3756 case Iop_Cls32Sx4:
3757 case Iop_FtoI32Ux4_RZ:
3758 case Iop_FtoI32Sx4_RZ:
3759 case Iop_Abs32x4:
3760 return mkPCast32x4(mce, vatom);
3761
florian537ed2d2012-08-20 16:51:39 +00003762 case Iop_CmpwNEZ32:
3763 return mkPCastTo(mce, Ity_I32, vatom);
3764
sewardj57f92b02010-08-22 11:54:14 +00003765 case Iop_CmpwNEZ64:
3766 return mkPCastTo(mce, Ity_I64, vatom);
3767
3768 case Iop_CmpNEZ64x2:
3769 return mkPCast64x2(mce, vatom);
3770
sewardj7ee7d852011-06-16 11:37:21 +00003771 case Iop_NarrowUn16to8x8:
3772 case Iop_NarrowUn32to16x4:
3773 case Iop_NarrowUn64to32x2:
3774 case Iop_QNarrowUn16Sto8Sx8:
3775 case Iop_QNarrowUn16Sto8Ux8:
3776 case Iop_QNarrowUn16Uto8Ux8:
3777 case Iop_QNarrowUn32Sto16Sx4:
3778 case Iop_QNarrowUn32Sto16Ux4:
3779 case Iop_QNarrowUn32Uto16Ux4:
3780 case Iop_QNarrowUn64Sto32Sx2:
3781 case Iop_QNarrowUn64Sto32Ux2:
3782 case Iop_QNarrowUn64Uto32Ux2:
3783 return vectorNarrowUnV128(mce, op, vatom);
sewardj57f92b02010-08-22 11:54:14 +00003784
sewardj7ee7d852011-06-16 11:37:21 +00003785 case Iop_Widen8Sto16x8:
3786 case Iop_Widen8Uto16x8:
3787 case Iop_Widen16Sto32x4:
3788 case Iop_Widen16Uto32x4:
3789 case Iop_Widen32Sto64x2:
3790 case Iop_Widen32Uto64x2:
3791 return vectorWidenI64(mce, op, vatom);
sewardj57f92b02010-08-22 11:54:14 +00003792
3793 case Iop_PwAddL32Ux2:
3794 case Iop_PwAddL32Sx2:
3795 return mkPCastTo(mce, Ity_I64,
3796 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
3797
3798 case Iop_PwAddL16Ux4:
3799 case Iop_PwAddL16Sx4:
3800 return mkPCast32x2(mce,
3801 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
3802
3803 case Iop_PwAddL8Ux8:
3804 case Iop_PwAddL8Sx8:
3805 return mkPCast16x4(mce,
3806 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
3807
3808 case Iop_PwAddL32Ux4:
3809 case Iop_PwAddL32Sx4:
3810 return mkPCast64x2(mce,
3811 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
3812
3813 case Iop_PwAddL16Ux8:
3814 case Iop_PwAddL16Sx8:
3815 return mkPCast32x4(mce,
3816 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
3817
3818 case Iop_PwAddL8Ux16:
3819 case Iop_PwAddL8Sx16:
3820 return mkPCast16x8(mce,
3821 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
3822
sewardjf34eb492011-04-15 11:57:05 +00003823 case Iop_I64UtoF32:
sewardj95448072004-11-22 20:19:51 +00003824 default:
3825 ppIROp(op);
3826 VG_(tool_panic)("memcheck:expr2vbits_Unop");
3827 }
3828}
3829
3830
sewardj170ee212004-12-10 18:57:51 +00003831/* Worker function; do not call directly. */
sewardj95448072004-11-22 20:19:51 +00003832static
sewardj2e595852005-06-30 23:33:37 +00003833IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
3834 IREndness end, IRType ty,
3835 IRAtom* addr, UInt bias )
sewardj95448072004-11-22 20:19:51 +00003836{
3837 void* helper;
3838 Char* hname;
3839 IRDirty* di;
3840 IRTemp datavbits;
3841 IRAtom* addrAct;
3842
3843 tl_assert(isOriginalAtom(mce,addr));
sewardj2e595852005-06-30 23:33:37 +00003844 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj95448072004-11-22 20:19:51 +00003845
3846 /* First, emit a definedness test for the address. This also sets
3847 the address (shadow) to 'defined' following the test. */
florian434ffae2012-07-19 17:23:42 +00003848 complainIfUndefined( mce, addr, NULL );
sewardj95448072004-11-22 20:19:51 +00003849
3850 /* Now cook up a call to the relevant helper function, to read the
3851 data V bits from shadow memory. */
sewardj7cf4e6b2008-05-01 20:24:26 +00003852 ty = shadowTypeV(ty);
sewardj2e595852005-06-30 23:33:37 +00003853
3854 if (end == Iend_LE) {
3855 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00003856 case Ity_I64: helper = &MC_(helperc_LOADV64le);
3857 hname = "MC_(helperc_LOADV64le)";
sewardj2e595852005-06-30 23:33:37 +00003858 break;
njn1d0825f2006-03-27 11:37:07 +00003859 case Ity_I32: helper = &MC_(helperc_LOADV32le);
3860 hname = "MC_(helperc_LOADV32le)";
sewardj2e595852005-06-30 23:33:37 +00003861 break;
njn1d0825f2006-03-27 11:37:07 +00003862 case Ity_I16: helper = &MC_(helperc_LOADV16le);
3863 hname = "MC_(helperc_LOADV16le)";
sewardj2e595852005-06-30 23:33:37 +00003864 break;
njn1d0825f2006-03-27 11:37:07 +00003865 case Ity_I8: helper = &MC_(helperc_LOADV8);
3866 hname = "MC_(helperc_LOADV8)";
sewardj2e595852005-06-30 23:33:37 +00003867 break;
3868 default: ppIRType(ty);
3869 VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
3870 }
3871 } else {
sewardj8cf88b72005-07-08 01:29:33 +00003872 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00003873 case Ity_I64: helper = &MC_(helperc_LOADV64be);
3874 hname = "MC_(helperc_LOADV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00003875 break;
njn1d0825f2006-03-27 11:37:07 +00003876 case Ity_I32: helper = &MC_(helperc_LOADV32be);
3877 hname = "MC_(helperc_LOADV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00003878 break;
njn1d0825f2006-03-27 11:37:07 +00003879 case Ity_I16: helper = &MC_(helperc_LOADV16be);
3880 hname = "MC_(helperc_LOADV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00003881 break;
njn1d0825f2006-03-27 11:37:07 +00003882 case Ity_I8: helper = &MC_(helperc_LOADV8);
3883 hname = "MC_(helperc_LOADV8)";
sewardj8cf88b72005-07-08 01:29:33 +00003884 break;
3885 default: ppIRType(ty);
3886 VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
3887 }
sewardj95448072004-11-22 20:19:51 +00003888 }
3889
3890 /* Generate the actual address into addrAct. */
3891 if (bias == 0) {
3892 addrAct = addr;
3893 } else {
sewardj7cf97ee2004-11-28 14:25:01 +00003894 IROp mkAdd;
3895 IRAtom* eBias;
sewardj95448072004-11-22 20:19:51 +00003896 IRType tyAddr = mce->hWordTy;
3897 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj7cf97ee2004-11-28 14:25:01 +00003898 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3899 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00003900 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
sewardj95448072004-11-22 20:19:51 +00003901 }
3902
3903 /* We need to have a place to park the V bits we're just about to
3904 read. */
sewardj1c0ce7a2009-07-01 08:10:49 +00003905 datavbits = newTemp(mce, ty, VSh);
sewardj95448072004-11-22 20:19:51 +00003906 di = unsafeIRDirty_1_N( datavbits,
sewardj53ee1fc2005-12-23 02:29:58 +00003907 1/*regparms*/,
3908 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj95448072004-11-22 20:19:51 +00003909 mkIRExprVec_1( addrAct ));
3910 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00003911 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00003912
3913 return mkexpr(datavbits);
3914}
3915
3916
3917static
sewardj2e595852005-06-30 23:33:37 +00003918IRAtom* expr2vbits_Load ( MCEnv* mce,
3919 IREndness end, IRType ty,
3920 IRAtom* addr, UInt bias )
sewardj170ee212004-12-10 18:57:51 +00003921{
sewardj2e595852005-06-30 23:33:37 +00003922 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj7cf4e6b2008-05-01 20:24:26 +00003923 switch (shadowTypeV(ty)) {
sewardj170ee212004-12-10 18:57:51 +00003924 case Ity_I8:
3925 case Ity_I16:
3926 case Ity_I32:
3927 case Ity_I64:
sewardj2e595852005-06-30 23:33:37 +00003928 return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
sewardj45fa9f42012-05-21 10:18:10 +00003929 case Ity_V128: {
3930 IRAtom *v64hi, *v64lo;
sewardj2e595852005-06-30 23:33:37 +00003931 if (end == Iend_LE) {
sewardj45fa9f42012-05-21 10:18:10 +00003932 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
sewardj2e595852005-06-30 23:33:37 +00003933 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3934 } else {
sewardj45fa9f42012-05-21 10:18:10 +00003935 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
sewardj2e595852005-06-30 23:33:37 +00003936 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3937 }
sewardj7cf4e6b2008-05-01 20:24:26 +00003938 return assignNew( 'V', mce,
sewardj170ee212004-12-10 18:57:51 +00003939 Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +00003940 binop(Iop_64HLtoV128, v64hi, v64lo));
sewardj45fa9f42012-05-21 10:18:10 +00003941 }
3942 case Ity_V256: {
3943 /* V256-bit case -- phrased in terms of 64 bit units (Qs),
3944 with Q3 being the most significant lane. */
3945 if (end == Iend_BE) goto unhandled;
3946 IRAtom* v64Q0 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
3947 IRAtom* v64Q1 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3948 IRAtom* v64Q2 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+16);
3949 IRAtom* v64Q3 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+24);
3950 return assignNew( 'V', mce,
3951 Ity_V256,
3952 IRExpr_Qop(Iop_64x4toV256,
3953 v64Q3, v64Q2, v64Q1, v64Q0));
3954 }
3955 unhandled:
sewardj170ee212004-12-10 18:57:51 +00003956 default:
sewardj2e595852005-06-30 23:33:37 +00003957 VG_(tool_panic)("expr2vbits_Load");
sewardj170ee212004-12-10 18:57:51 +00003958 }
3959}
3960
3961
florian434ffae2012-07-19 17:23:42 +00003962/* If there is no guard expression or the guard is always TRUE this function
3963 behaves like expr2vbits_Load. If the guard is not true at runtime, an
3964 all-bits-defined bit pattern will be returned.
3965 It is assumed that definedness of GUARD has already been checked at the call
3966 site. */
3967static
3968IRAtom* expr2vbits_guarded_Load ( MCEnv* mce,
3969 IREndness end, IRType ty,
3970 IRAtom* addr, UInt bias, IRAtom *guard )
3971{
3972 if (guard) {
3973 IRAtom *cond, *iffalse, *iftrue;
3974
3975 cond = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard));
3976 iftrue = assignNew('V', mce, ty,
3977 expr2vbits_Load(mce, end, ty, addr, bias));
3978 iffalse = assignNew('V', mce, ty, definedOfType(ty));
3979
3980 return assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, iftrue));
3981 }
3982
3983 /* No guard expression or unconditional load */
3984 return expr2vbits_Load(mce, end, ty, addr, bias);
3985}
3986
3987
sewardj170ee212004-12-10 18:57:51 +00003988static
sewardj95448072004-11-22 20:19:51 +00003989IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
3990 IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
3991{
3992 IRAtom *vbitsC, *vbits0, *vbitsX;
3993 IRType ty;
3994 /* Given Mux0X(cond,expr0,exprX), generate
3995 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
3996 That is, steer the V bits like the originals, but trash the
3997 result if the steering value is undefined. This gives
3998 lazy propagation. */
3999 tl_assert(isOriginalAtom(mce, cond));
4000 tl_assert(isOriginalAtom(mce, expr0));
4001 tl_assert(isOriginalAtom(mce, exprX));
4002
4003 vbitsC = expr2vbits(mce, cond);
4004 vbits0 = expr2vbits(mce, expr0);
4005 vbitsX = expr2vbits(mce, exprX);
sewardj1c0ce7a2009-07-01 08:10:49 +00004006 ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
sewardj95448072004-11-22 20:19:51 +00004007
4008 return
sewardj7cf4e6b2008-05-01 20:24:26 +00004009 mkUifU(mce, ty, assignNew('V', mce, ty,
4010 IRExpr_Mux0X(cond, vbits0, vbitsX)),
sewardj95448072004-11-22 20:19:51 +00004011 mkPCastTo(mce, ty, vbitsC) );
4012}
4013
4014/* --------- This is the main expression-handling function. --------- */
4015
4016static
4017IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
4018{
4019 switch (e->tag) {
4020
4021 case Iex_Get:
4022 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
4023
4024 case Iex_GetI:
4025 return shadow_GETI( mce, e->Iex.GetI.descr,
4026 e->Iex.GetI.ix, e->Iex.GetI.bias );
4027
sewardj0b9d74a2006-12-24 02:24:11 +00004028 case Iex_RdTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00004029 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
sewardj95448072004-11-22 20:19:51 +00004030
4031 case Iex_Const:
sewardj1c0ce7a2009-07-01 08:10:49 +00004032 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
sewardj95448072004-11-22 20:19:51 +00004033
sewardje91cea72006-02-08 19:32:02 +00004034 case Iex_Qop:
4035 return expr2vbits_Qop(
4036 mce,
floriane2ab2972012-06-01 20:43:03 +00004037 e->Iex.Qop.details->op,
4038 e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2,
4039 e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4
sewardje91cea72006-02-08 19:32:02 +00004040 );
4041
sewardjed69fdb2006-02-03 16:12:27 +00004042 case Iex_Triop:
4043 return expr2vbits_Triop(
4044 mce,
florian26441742012-06-02 20:30:41 +00004045 e->Iex.Triop.details->op,
4046 e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2,
4047 e->Iex.Triop.details->arg3
sewardjed69fdb2006-02-03 16:12:27 +00004048 );
4049
sewardj95448072004-11-22 20:19:51 +00004050 case Iex_Binop:
4051 return expr2vbits_Binop(
4052 mce,
4053 e->Iex.Binop.op,
4054 e->Iex.Binop.arg1, e->Iex.Binop.arg2
4055 );
4056
4057 case Iex_Unop:
4058 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
4059
sewardj2e595852005-06-30 23:33:37 +00004060 case Iex_Load:
4061 return expr2vbits_Load( mce, e->Iex.Load.end,
4062 e->Iex.Load.ty,
4063 e->Iex.Load.addr, 0/*addr bias*/ );
sewardj95448072004-11-22 20:19:51 +00004064
4065 case Iex_CCall:
4066 return mkLazyN( mce, e->Iex.CCall.args,
4067 e->Iex.CCall.retty,
4068 e->Iex.CCall.cee );
4069
4070 case Iex_Mux0X:
4071 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
4072 e->Iex.Mux0X.exprX);
njn25e49d8e72002-09-23 09:36:25 +00004073
4074 default:
sewardj95448072004-11-22 20:19:51 +00004075 VG_(printf)("\n");
4076 ppIRExpr(e);
4077 VG_(printf)("\n");
4078 VG_(tool_panic)("memcheck: expr2vbits");
njn25e49d8e72002-09-23 09:36:25 +00004079 }
njn25e49d8e72002-09-23 09:36:25 +00004080}
4081
4082/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00004083/*--- Generate shadow stmts from all kinds of IRStmts. ---*/
njn25e49d8e72002-09-23 09:36:25 +00004084/*------------------------------------------------------------*/
4085
sewardj95448072004-11-22 20:19:51 +00004086/* Widen a value to the host word size. */
njn25e49d8e72002-09-23 09:36:25 +00004087
4088static
sewardj95448072004-11-22 20:19:51 +00004089IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
njn25e49d8e72002-09-23 09:36:25 +00004090{
sewardj7cf97ee2004-11-28 14:25:01 +00004091 IRType ty, tyH;
4092
sewardj95448072004-11-22 20:19:51 +00004093 /* vatom is vbits-value and as such can only have a shadow type. */
4094 tl_assert(isShadowAtom(mce,vatom));
njn25e49d8e72002-09-23 09:36:25 +00004095
sewardj1c0ce7a2009-07-01 08:10:49 +00004096 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj7cf97ee2004-11-28 14:25:01 +00004097 tyH = mce->hWordTy;
njn25e49d8e72002-09-23 09:36:25 +00004098
sewardj95448072004-11-22 20:19:51 +00004099 if (tyH == Ity_I32) {
4100 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004101 case Ity_I32:
4102 return vatom;
4103 case Ity_I16:
4104 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
4105 case Ity_I8:
4106 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
4107 default:
4108 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00004109 }
sewardj6cf40ff2005-04-20 22:31:26 +00004110 } else
4111 if (tyH == Ity_I64) {
4112 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004113 case Ity_I32:
4114 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
4115 case Ity_I16:
4116 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4117 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
4118 case Ity_I8:
4119 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4120 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
4121 default:
4122 goto unhandled;
sewardj6cf40ff2005-04-20 22:31:26 +00004123 }
sewardj95448072004-11-22 20:19:51 +00004124 } else {
4125 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00004126 }
sewardj95448072004-11-22 20:19:51 +00004127 unhandled:
4128 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
4129 VG_(tool_panic)("zwidenToHostWord");
njn25e49d8e72002-09-23 09:36:25 +00004130}
4131
njn25e49d8e72002-09-23 09:36:25 +00004132
sewardj95448072004-11-22 20:19:51 +00004133/* Generate a shadow store. addr is always the original address atom.
4134 You can pass in either originals or V-bits for the data atom, but
sewardj1c0ce7a2009-07-01 08:10:49 +00004135 obviously not both. guard :: Ity_I1 controls whether the store
4136 really happens; NULL means it unconditionally does. Note that
4137 guard itself is not checked for definedness; the caller of this
4138 function must do that if necessary. */
njn25e49d8e72002-09-23 09:36:25 +00004139
sewardj95448072004-11-22 20:19:51 +00004140static
sewardj2e595852005-06-30 23:33:37 +00004141void do_shadow_Store ( MCEnv* mce,
4142 IREndness end,
4143 IRAtom* addr, UInt bias,
sewardj1c0ce7a2009-07-01 08:10:49 +00004144 IRAtom* data, IRAtom* vdata,
4145 IRAtom* guard )
njn25e49d8e72002-09-23 09:36:25 +00004146{
sewardj170ee212004-12-10 18:57:51 +00004147 IROp mkAdd;
4148 IRType ty, tyAddr;
sewardj95448072004-11-22 20:19:51 +00004149 void* helper = NULL;
4150 Char* hname = NULL;
njn1d0825f2006-03-27 11:37:07 +00004151 IRConst* c;
sewardj170ee212004-12-10 18:57:51 +00004152
4153 tyAddr = mce->hWordTy;
4154 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
4155 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj2e595852005-06-30 23:33:37 +00004156 tl_assert( end == Iend_LE || end == Iend_BE );
sewardj170ee212004-12-10 18:57:51 +00004157
sewardj95448072004-11-22 20:19:51 +00004158 if (data) {
4159 tl_assert(!vdata);
4160 tl_assert(isOriginalAtom(mce, data));
4161 tl_assert(bias == 0);
4162 vdata = expr2vbits( mce, data );
4163 } else {
4164 tl_assert(vdata);
4165 }
njn25e49d8e72002-09-23 09:36:25 +00004166
sewardj95448072004-11-22 20:19:51 +00004167 tl_assert(isOriginalAtom(mce,addr));
4168 tl_assert(isShadowAtom(mce,vdata));
njn25e49d8e72002-09-23 09:36:25 +00004169
sewardj1c0ce7a2009-07-01 08:10:49 +00004170 if (guard) {
4171 tl_assert(isOriginalAtom(mce, guard));
4172 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
4173 }
4174
4175 ty = typeOfIRExpr(mce->sb->tyenv, vdata);
njn25e49d8e72002-09-23 09:36:25 +00004176
njn1d0825f2006-03-27 11:37:07 +00004177 // If we're not doing undefined value checking, pretend that this value
4178 // is "all valid". That lets Vex's optimiser remove some of the V bit
4179 // shadow computation ops that precede it.
sewardj7cf4e6b2008-05-01 20:24:26 +00004180 if (MC_(clo_mc_level) == 1) {
njn1d0825f2006-03-27 11:37:07 +00004181 switch (ty) {
sewardj45fa9f42012-05-21 10:18:10 +00004182 case Ity_V256: // V256 weirdness -- used four times
sewardjbd43bfa2012-06-29 15:29:37 +00004183 c = IRConst_V256(V_BITS32_DEFINED); break;
sewardj45fa9f42012-05-21 10:18:10 +00004184 case Ity_V128: // V128 weirdness -- used twice
sewardj1c0ce7a2009-07-01 08:10:49 +00004185 c = IRConst_V128(V_BITS16_DEFINED); break;
njn1d0825f2006-03-27 11:37:07 +00004186 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
4187 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
4188 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
4189 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
4190 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4191 }
4192 vdata = IRExpr_Const( c );
4193 }
4194
sewardj95448072004-11-22 20:19:51 +00004195 /* First, emit a definedness test for the address. This also sets
4196 the address (shadow) to 'defined' following the test. */
florian434ffae2012-07-19 17:23:42 +00004197 complainIfUndefined( mce, addr, guard );
njn25e49d8e72002-09-23 09:36:25 +00004198
sewardj170ee212004-12-10 18:57:51 +00004199 /* Now decide which helper function to call to write the data V
4200 bits into shadow memory. */
sewardj2e595852005-06-30 23:33:37 +00004201 if (end == Iend_LE) {
4202 switch (ty) {
sewardj45fa9f42012-05-21 10:18:10 +00004203 case Ity_V256: /* we'll use the helper four times */
sewardj2e595852005-06-30 23:33:37 +00004204 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00004205 case Ity_I64: helper = &MC_(helperc_STOREV64le);
4206 hname = "MC_(helperc_STOREV64le)";
sewardj2e595852005-06-30 23:33:37 +00004207 break;
njn1d0825f2006-03-27 11:37:07 +00004208 case Ity_I32: helper = &MC_(helperc_STOREV32le);
4209 hname = "MC_(helperc_STOREV32le)";
sewardj2e595852005-06-30 23:33:37 +00004210 break;
njn1d0825f2006-03-27 11:37:07 +00004211 case Ity_I16: helper = &MC_(helperc_STOREV16le);
4212 hname = "MC_(helperc_STOREV16le)";
sewardj2e595852005-06-30 23:33:37 +00004213 break;
njn1d0825f2006-03-27 11:37:07 +00004214 case Ity_I8: helper = &MC_(helperc_STOREV8);
4215 hname = "MC_(helperc_STOREV8)";
sewardj2e595852005-06-30 23:33:37 +00004216 break;
4217 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4218 }
4219 } else {
sewardj8cf88b72005-07-08 01:29:33 +00004220 switch (ty) {
4221 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00004222 case Ity_I64: helper = &MC_(helperc_STOREV64be);
4223 hname = "MC_(helperc_STOREV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00004224 break;
njn1d0825f2006-03-27 11:37:07 +00004225 case Ity_I32: helper = &MC_(helperc_STOREV32be);
4226 hname = "MC_(helperc_STOREV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00004227 break;
njn1d0825f2006-03-27 11:37:07 +00004228 case Ity_I16: helper = &MC_(helperc_STOREV16be);
4229 hname = "MC_(helperc_STOREV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00004230 break;
njn1d0825f2006-03-27 11:37:07 +00004231 case Ity_I8: helper = &MC_(helperc_STOREV8);
4232 hname = "MC_(helperc_STOREV8)";
sewardj8cf88b72005-07-08 01:29:33 +00004233 break;
sewardj45fa9f42012-05-21 10:18:10 +00004234 /* Note, no V256 case here, because no big-endian target that
4235 we support, has 256 vectors. */
sewardj8cf88b72005-07-08 01:29:33 +00004236 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
4237 }
sewardj95448072004-11-22 20:19:51 +00004238 }
njn25e49d8e72002-09-23 09:36:25 +00004239
sewardj45fa9f42012-05-21 10:18:10 +00004240 if (UNLIKELY(ty == Ity_V256)) {
4241
4242 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
4243 Q3 being the most significant lane. */
4244 /* These are the offsets of the Qs in memory. */
4245 Int offQ0, offQ1, offQ2, offQ3;
4246
4247 /* Various bits for constructing the 4 lane helper calls */
4248 IRDirty *diQ0, *diQ1, *diQ2, *diQ3;
4249 IRAtom *addrQ0, *addrQ1, *addrQ2, *addrQ3;
4250 IRAtom *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3;
4251 IRAtom *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3;
4252
4253 if (end == Iend_LE) {
4254 offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24;
4255 } else {
4256 offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24;
4257 }
4258
4259 eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0);
4260 addrQ0 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) );
4261 vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata));
4262 diQ0 = unsafeIRDirty_0_N(
4263 1/*regparms*/,
4264 hname, VG_(fnptr_to_fnentry)( helper ),
4265 mkIRExprVec_2( addrQ0, vdataQ0 )
4266 );
4267
4268 eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1);
4269 addrQ1 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) );
4270 vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata));
4271 diQ1 = unsafeIRDirty_0_N(
4272 1/*regparms*/,
4273 hname, VG_(fnptr_to_fnentry)( helper ),
4274 mkIRExprVec_2( addrQ1, vdataQ1 )
4275 );
4276
4277 eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2);
4278 addrQ2 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) );
4279 vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata));
4280 diQ2 = unsafeIRDirty_0_N(
4281 1/*regparms*/,
4282 hname, VG_(fnptr_to_fnentry)( helper ),
4283 mkIRExprVec_2( addrQ2, vdataQ2 )
4284 );
4285
4286 eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3);
4287 addrQ3 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) );
4288 vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata));
4289 diQ3 = unsafeIRDirty_0_N(
4290 1/*regparms*/,
4291 hname, VG_(fnptr_to_fnentry)( helper ),
4292 mkIRExprVec_2( addrQ3, vdataQ3 )
4293 );
4294
4295 if (guard)
4296 diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard;
4297
4298 setHelperAnns( mce, diQ0 );
4299 setHelperAnns( mce, diQ1 );
4300 setHelperAnns( mce, diQ2 );
4301 setHelperAnns( mce, diQ3 );
4302 stmt( 'V', mce, IRStmt_Dirty(diQ0) );
4303 stmt( 'V', mce, IRStmt_Dirty(diQ1) );
4304 stmt( 'V', mce, IRStmt_Dirty(diQ2) );
4305 stmt( 'V', mce, IRStmt_Dirty(diQ3) );
4306
4307 }
4308 else if (UNLIKELY(ty == Ity_V128)) {
sewardj170ee212004-12-10 18:57:51 +00004309
sewardj20d38f22005-02-07 23:50:18 +00004310 /* V128-bit case */
sewardj170ee212004-12-10 18:57:51 +00004311 /* See comment in next clause re 64-bit regparms */
sewardj2e595852005-06-30 23:33:37 +00004312 /* also, need to be careful about endianness */
4313
njn4c245e52009-03-15 23:25:38 +00004314 Int offLo64, offHi64;
4315 IRDirty *diLo64, *diHi64;
4316 IRAtom *addrLo64, *addrHi64;
4317 IRAtom *vdataLo64, *vdataHi64;
4318 IRAtom *eBiasLo64, *eBiasHi64;
4319
sewardj2e595852005-06-30 23:33:37 +00004320 if (end == Iend_LE) {
4321 offLo64 = 0;
4322 offHi64 = 8;
4323 } else {
sewardj2e595852005-06-30 23:33:37 +00004324 offLo64 = 8;
4325 offHi64 = 0;
4326 }
4327
4328 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
sewardj7cf4e6b2008-05-01 20:24:26 +00004329 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
4330 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
sewardj170ee212004-12-10 18:57:51 +00004331 diLo64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004332 1/*regparms*/,
4333 hname, VG_(fnptr_to_fnentry)( helper ),
4334 mkIRExprVec_2( addrLo64, vdataLo64 )
4335 );
sewardj2e595852005-06-30 23:33:37 +00004336 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
sewardj7cf4e6b2008-05-01 20:24:26 +00004337 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
4338 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
sewardj170ee212004-12-10 18:57:51 +00004339 diHi64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004340 1/*regparms*/,
4341 hname, VG_(fnptr_to_fnentry)( helper ),
4342 mkIRExprVec_2( addrHi64, vdataHi64 )
4343 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004344 if (guard) diLo64->guard = guard;
4345 if (guard) diHi64->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00004346 setHelperAnns( mce, diLo64 );
4347 setHelperAnns( mce, diHi64 );
sewardj7cf4e6b2008-05-01 20:24:26 +00004348 stmt( 'V', mce, IRStmt_Dirty(diLo64) );
4349 stmt( 'V', mce, IRStmt_Dirty(diHi64) );
sewardj170ee212004-12-10 18:57:51 +00004350
sewardj95448072004-11-22 20:19:51 +00004351 } else {
sewardj170ee212004-12-10 18:57:51 +00004352
njn4c245e52009-03-15 23:25:38 +00004353 IRDirty *di;
4354 IRAtom *addrAct;
4355
sewardj170ee212004-12-10 18:57:51 +00004356 /* 8/16/32/64-bit cases */
4357 /* Generate the actual address into addrAct. */
4358 if (bias == 0) {
4359 addrAct = addr;
4360 } else {
njn4c245e52009-03-15 23:25:38 +00004361 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00004362 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
sewardj170ee212004-12-10 18:57:51 +00004363 }
4364
4365 if (ty == Ity_I64) {
4366 /* We can't do this with regparm 2 on 32-bit platforms, since
4367 the back ends aren't clever enough to handle 64-bit
4368 regparm args. Therefore be different. */
4369 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004370 1/*regparms*/,
4371 hname, VG_(fnptr_to_fnentry)( helper ),
4372 mkIRExprVec_2( addrAct, vdata )
4373 );
sewardj170ee212004-12-10 18:57:51 +00004374 } else {
4375 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004376 2/*regparms*/,
4377 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj170ee212004-12-10 18:57:51 +00004378 mkIRExprVec_2( addrAct,
sewardj53ee1fc2005-12-23 02:29:58 +00004379 zwidenToHostWord( mce, vdata ))
4380 );
sewardj170ee212004-12-10 18:57:51 +00004381 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004382 if (guard) di->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00004383 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00004384 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00004385 }
njn25e49d8e72002-09-23 09:36:25 +00004386
sewardj95448072004-11-22 20:19:51 +00004387}
njn25e49d8e72002-09-23 09:36:25 +00004388
njn25e49d8e72002-09-23 09:36:25 +00004389
sewardj95448072004-11-22 20:19:51 +00004390/* Do lazy pessimistic propagation through a dirty helper call, by
4391 looking at the annotations on it. This is the most complex part of
4392 Memcheck. */
njn25e49d8e72002-09-23 09:36:25 +00004393
sewardj95448072004-11-22 20:19:51 +00004394static IRType szToITy ( Int n )
4395{
4396 switch (n) {
4397 case 1: return Ity_I8;
4398 case 2: return Ity_I16;
4399 case 4: return Ity_I32;
4400 case 8: return Ity_I64;
4401 default: VG_(tool_panic)("szToITy(memcheck)");
4402 }
4403}
njn25e49d8e72002-09-23 09:36:25 +00004404
sewardj95448072004-11-22 20:19:51 +00004405static
4406void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
4407{
sewardj2eecb742012-06-01 16:11:41 +00004408 Int i, k, n, toDo, gSz, gOff;
sewardj2e595852005-06-30 23:33:37 +00004409 IRAtom *src, *here, *curr;
njn4c245e52009-03-15 23:25:38 +00004410 IRType tySrc, tyDst;
sewardj2e595852005-06-30 23:33:37 +00004411 IRTemp dst;
4412 IREndness end;
4413
4414 /* What's the native endianness? We need to know this. */
sewardj6e340c72005-07-10 00:53:42 +00004415# if defined(VG_BIGENDIAN)
sewardj2e595852005-06-30 23:33:37 +00004416 end = Iend_BE;
sewardj6e340c72005-07-10 00:53:42 +00004417# elif defined(VG_LITTLEENDIAN)
sewardj2e595852005-06-30 23:33:37 +00004418 end = Iend_LE;
4419# else
4420# error "Unknown endianness"
4421# endif
njn25e49d8e72002-09-23 09:36:25 +00004422
sewardj95448072004-11-22 20:19:51 +00004423 /* First check the guard. */
florian434ffae2012-07-19 17:23:42 +00004424 complainIfUndefined(mce, d->guard, NULL);
sewardj95448072004-11-22 20:19:51 +00004425
4426 /* Now round up all inputs and PCast over them. */
sewardj7cf97ee2004-11-28 14:25:01 +00004427 curr = definedOfType(Ity_I32);
sewardj95448072004-11-22 20:19:51 +00004428
florian434ffae2012-07-19 17:23:42 +00004429 /* Inputs: unmasked args
4430 Note: arguments are evaluated REGARDLESS of the guard expression */
sewardj95448072004-11-22 20:19:51 +00004431 for (i = 0; d->args[i]; i++) {
4432 if (d->cee->mcx_mask & (1<<i)) {
4433 /* ignore this arg */
njn25e49d8e72002-09-23 09:36:25 +00004434 } else {
sewardj95448072004-11-22 20:19:51 +00004435 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
4436 curr = mkUifU32(mce, here, curr);
njn25e49d8e72002-09-23 09:36:25 +00004437 }
4438 }
sewardj95448072004-11-22 20:19:51 +00004439
4440 /* Inputs: guest state that we read. */
4441 for (i = 0; i < d->nFxState; i++) {
4442 tl_assert(d->fxState[i].fx != Ifx_None);
4443 if (d->fxState[i].fx == Ifx_Write)
4444 continue;
sewardja7203252004-11-26 19:17:47 +00004445
sewardj2eecb742012-06-01 16:11:41 +00004446 /* Enumerate the described state segments */
4447 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
4448 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
4449 gSz = d->fxState[i].size;
sewardja7203252004-11-26 19:17:47 +00004450
sewardj2eecb742012-06-01 16:11:41 +00004451 /* Ignore any sections marked as 'always defined'. */
4452 if (isAlwaysDefd(mce, gOff, gSz)) {
4453 if (0)
4454 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
4455 gOff, gSz);
4456 continue;
4457 }
sewardje9e16d32004-12-10 13:17:55 +00004458
sewardj2eecb742012-06-01 16:11:41 +00004459 /* This state element is read or modified. So we need to
4460 consider it. If larger than 8 bytes, deal with it in
4461 8-byte chunks. */
4462 while (True) {
4463 tl_assert(gSz >= 0);
4464 if (gSz == 0) break;
4465 n = gSz <= 8 ? gSz : 8;
4466 /* update 'curr' with UifU of the state slice
4467 gOff .. gOff+n-1 */
4468 tySrc = szToITy( n );
florian434ffae2012-07-19 17:23:42 +00004469
4470 /* Observe the guard expression. If it is false use an
4471 all-bits-defined bit pattern */
4472 IRAtom *cond, *iffalse, *iftrue;
4473
4474 cond = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, d->guard));
4475 iftrue = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc));
4476 iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc));
4477 src = assignNew('V', mce, tySrc,
4478 IRExpr_Mux0X(cond, iffalse, iftrue));
4479
sewardj2eecb742012-06-01 16:11:41 +00004480 here = mkPCastTo( mce, Ity_I32, src );
4481 curr = mkUifU32(mce, here, curr);
4482 gSz -= n;
4483 gOff += n;
4484 }
4485 }
sewardj95448072004-11-22 20:19:51 +00004486 }
4487
4488 /* Inputs: memory. First set up some info needed regardless of
4489 whether we're doing reads or writes. */
sewardj95448072004-11-22 20:19:51 +00004490
4491 if (d->mFx != Ifx_None) {
4492 /* Because we may do multiple shadow loads/stores from the same
4493 base address, it's best to do a single test of its
4494 definedness right now. Post-instrumentation optimisation
4495 should remove all but this test. */
njn4c245e52009-03-15 23:25:38 +00004496 IRType tyAddr;
sewardj95448072004-11-22 20:19:51 +00004497 tl_assert(d->mAddr);
florian434ffae2012-07-19 17:23:42 +00004498 complainIfUndefined(mce, d->mAddr, d->guard);
sewardj95448072004-11-22 20:19:51 +00004499
sewardj1c0ce7a2009-07-01 08:10:49 +00004500 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
sewardj95448072004-11-22 20:19:51 +00004501 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
4502 tl_assert(tyAddr == mce->hWordTy); /* not really right */
4503 }
4504
4505 /* Deal with memory inputs (reads or modifies) */
4506 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00004507 toDo = d->mSize;
sewardj2e595852005-06-30 23:33:37 +00004508 /* chew off 32-bit chunks. We don't care about the endianness
4509 since it's all going to be condensed down to a single bit,
4510 but nevertheless choose an endianness which is hopefully
4511 native to the platform. */
sewardj95448072004-11-22 20:19:51 +00004512 while (toDo >= 4) {
4513 here = mkPCastTo(
4514 mce, Ity_I32,
florian434ffae2012-07-19 17:23:42 +00004515 expr2vbits_guarded_Load ( mce, end, Ity_I32, d->mAddr,
4516 d->mSize - toDo, d->guard )
sewardj95448072004-11-22 20:19:51 +00004517 );
4518 curr = mkUifU32(mce, here, curr);
4519 toDo -= 4;
4520 }
4521 /* chew off 16-bit chunks */
4522 while (toDo >= 2) {
4523 here = mkPCastTo(
4524 mce, Ity_I32,
florian434ffae2012-07-19 17:23:42 +00004525 expr2vbits_guarded_Load ( mce, end, Ity_I16, d->mAddr,
4526 d->mSize - toDo, d->guard )
sewardj95448072004-11-22 20:19:51 +00004527 );
4528 curr = mkUifU32(mce, here, curr);
4529 toDo -= 2;
4530 }
floriancda994b2012-06-08 16:01:19 +00004531 /* chew off the remaining 8-bit chunk, if any */
4532 if (toDo == 1) {
4533 here = mkPCastTo(
4534 mce, Ity_I32,
florian434ffae2012-07-19 17:23:42 +00004535 expr2vbits_guarded_Load ( mce, end, Ity_I8, d->mAddr,
4536 d->mSize - toDo, d->guard )
floriancda994b2012-06-08 16:01:19 +00004537 );
4538 curr = mkUifU32(mce, here, curr);
4539 toDo -= 1;
4540 }
4541 tl_assert(toDo == 0);
sewardj95448072004-11-22 20:19:51 +00004542 }
4543
4544 /* Whew! So curr is a 32-bit V-value summarising pessimistically
4545 all the inputs to the helper. Now we need to re-distribute the
4546 results to all destinations. */
4547
4548 /* Outputs: the destination temporary, if there is one. */
4549 if (d->tmp != IRTemp_INVALID) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004550 dst = findShadowTmpV(mce, d->tmp);
sewardj1c0ce7a2009-07-01 08:10:49 +00004551 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00004552 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
sewardj95448072004-11-22 20:19:51 +00004553 }
4554
4555 /* Outputs: guest state that we write or modify. */
4556 for (i = 0; i < d->nFxState; i++) {
4557 tl_assert(d->fxState[i].fx != Ifx_None);
4558 if (d->fxState[i].fx == Ifx_Read)
4559 continue;
sewardj2eecb742012-06-01 16:11:41 +00004560
4561 /* Enumerate the described state segments */
4562 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
4563 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
4564 gSz = d->fxState[i].size;
4565
4566 /* Ignore any sections marked as 'always defined'. */
4567 if (isAlwaysDefd(mce, gOff, gSz))
4568 continue;
4569
4570 /* This state element is written or modified. So we need to
4571 consider it. If larger than 8 bytes, deal with it in
4572 8-byte chunks. */
4573 while (True) {
4574 tl_assert(gSz >= 0);
4575 if (gSz == 0) break;
4576 n = gSz <= 8 ? gSz : 8;
4577 /* Write suitably-casted 'curr' to the state slice
4578 gOff .. gOff+n-1 */
4579 tyDst = szToITy( n );
4580 do_shadow_PUT( mce, gOff,
4581 NULL, /* original atom */
florian434ffae2012-07-19 17:23:42 +00004582 mkPCastTo( mce, tyDst, curr ), d->guard );
sewardj2eecb742012-06-01 16:11:41 +00004583 gSz -= n;
4584 gOff += n;
4585 }
sewardje9e16d32004-12-10 13:17:55 +00004586 }
sewardj95448072004-11-22 20:19:51 +00004587 }
4588
sewardj2e595852005-06-30 23:33:37 +00004589 /* Outputs: memory that we write or modify. Same comments about
4590 endianness as above apply. */
sewardj95448072004-11-22 20:19:51 +00004591 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00004592 toDo = d->mSize;
4593 /* chew off 32-bit chunks */
4594 while (toDo >= 4) {
sewardj2e595852005-06-30 23:33:37 +00004595 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4596 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00004597 mkPCastTo( mce, Ity_I32, curr ),
florian434ffae2012-07-19 17:23:42 +00004598 d->guard );
sewardj95448072004-11-22 20:19:51 +00004599 toDo -= 4;
4600 }
4601 /* chew off 16-bit chunks */
4602 while (toDo >= 2) {
sewardj2e595852005-06-30 23:33:37 +00004603 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4604 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00004605 mkPCastTo( mce, Ity_I16, curr ),
florian434ffae2012-07-19 17:23:42 +00004606 d->guard );
sewardj95448072004-11-22 20:19:51 +00004607 toDo -= 2;
4608 }
floriancda994b2012-06-08 16:01:19 +00004609 /* chew off the remaining 8-bit chunk, if any */
4610 if (toDo == 1) {
4611 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4612 NULL, /* original data */
4613 mkPCastTo( mce, Ity_I8, curr ),
florian434ffae2012-07-19 17:23:42 +00004614 d->guard );
floriancda994b2012-06-08 16:01:19 +00004615 toDo -= 1;
4616 }
4617 tl_assert(toDo == 0);
sewardj95448072004-11-22 20:19:51 +00004618 }
4619
njn25e49d8e72002-09-23 09:36:25 +00004620}
4621
sewardj1c0ce7a2009-07-01 08:10:49 +00004622
sewardj826ec492005-05-12 18:05:00 +00004623/* We have an ABI hint telling us that [base .. base+len-1] is to
4624 become undefined ("writable"). Generate code to call a helper to
4625 notify the A/V bit machinery of this fact.
4626
4627 We call
sewardj7cf4e6b2008-05-01 20:24:26 +00004628 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
4629 Addr nia );
sewardj826ec492005-05-12 18:05:00 +00004630*/
4631static
sewardj7cf4e6b2008-05-01 20:24:26 +00004632void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
sewardj826ec492005-05-12 18:05:00 +00004633{
4634 IRDirty* di;
sewardj7cf4e6b2008-05-01 20:24:26 +00004635 /* Minor optimisation: if not doing origin tracking, ignore the
4636 supplied nia and pass zero instead. This is on the basis that
4637 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
4638 almost always generate a shorter instruction to put zero into a
4639 register than any other value. */
4640 if (MC_(clo_mc_level) < 3)
4641 nia = mkIRExpr_HWord(0);
4642
sewardj826ec492005-05-12 18:05:00 +00004643 di = unsafeIRDirty_0_N(
4644 0/*regparms*/,
4645 "MC_(helperc_MAKE_STACK_UNINIT)",
sewardj53ee1fc2005-12-23 02:29:58 +00004646 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
sewardj7cf4e6b2008-05-01 20:24:26 +00004647 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
sewardj826ec492005-05-12 18:05:00 +00004648 );
sewardj7cf4e6b2008-05-01 20:24:26 +00004649 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj826ec492005-05-12 18:05:00 +00004650}
4651
njn25e49d8e72002-09-23 09:36:25 +00004652
sewardj1c0ce7a2009-07-01 08:10:49 +00004653/* ------ Dealing with IRCAS (big and complex) ------ */
4654
4655/* FWDS */
4656static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
4657 IRAtom* baseaddr, Int offset );
4658static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
4659static void gen_store_b ( MCEnv* mce, Int szB,
4660 IRAtom* baseaddr, Int offset, IRAtom* dataB,
4661 IRAtom* guard );
4662
4663static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
4664static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
4665
4666
4667/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
4668 IRExpr.Consts, else this asserts. If they are both Consts, it
4669 doesn't do anything. So that just leaves the RdTmp case.
4670
4671 In which case: this assigns the shadow value SHADOW to the IR
4672 shadow temporary associated with ORIG. That is, ORIG, being an
4673 original temporary, will have a shadow temporary associated with
4674 it. However, in the case envisaged here, there will so far have
4675 been no IR emitted to actually write a shadow value into that
4676 temporary. What this routine does is to (emit IR to) copy the
4677 value in SHADOW into said temporary, so that after this call,
4678 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
4679 value in SHADOW.
4680
4681 Point is to allow callers to compute "by hand" a shadow value for
4682 ORIG, and force it to be associated with ORIG.
4683
4684 How do we know that that shadow associated with ORIG has not so far
4685 been assigned to? Well, we don't per se know that, but supposing
4686 it had. Then this routine would create a second assignment to it,
4687 and later the IR sanity checker would barf. But that never
4688 happens. QED.
4689*/
4690static void bind_shadow_tmp_to_orig ( UChar how,
4691 MCEnv* mce,
4692 IRAtom* orig, IRAtom* shadow )
4693{
4694 tl_assert(isOriginalAtom(mce, orig));
4695 tl_assert(isShadowAtom(mce, shadow));
4696 switch (orig->tag) {
4697 case Iex_Const:
4698 tl_assert(shadow->tag == Iex_Const);
4699 break;
4700 case Iex_RdTmp:
4701 tl_assert(shadow->tag == Iex_RdTmp);
4702 if (how == 'V') {
4703 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
4704 shadow);
4705 } else {
4706 tl_assert(how == 'B');
4707 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
4708 shadow);
4709 }
4710 break;
4711 default:
4712 tl_assert(0);
4713 }
4714}
4715
4716
4717static
4718void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
4719{
4720 /* Scheme is (both single- and double- cases):
4721
4722 1. fetch data#,dataB (the proposed new value)
4723
4724 2. fetch expd#,expdB (what we expect to see at the address)
4725
4726 3. check definedness of address
4727
4728 4. load old#,oldB from shadow memory; this also checks
4729 addressibility of the address
4730
4731 5. the CAS itself
4732
sewardjafed4c52009-07-12 13:00:17 +00004733 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
sewardj1c0ce7a2009-07-01 08:10:49 +00004734
sewardjafed4c52009-07-12 13:00:17 +00004735 7. if "expected == old" (as computed by (6))
sewardj1c0ce7a2009-07-01 08:10:49 +00004736 store data#,dataB to shadow memory
4737
4738 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
4739 'data' but 7 stores 'data#'. Hence it is possible for the
4740 shadow data to be incorrectly checked and/or updated:
4741
sewardj1c0ce7a2009-07-01 08:10:49 +00004742 * 7 is at least gated correctly, since the 'expected == old'
4743 condition is derived from outputs of 5. However, the shadow
4744 write could happen too late: imagine after 5 we are
4745 descheduled, a different thread runs, writes a different
4746 (shadow) value at the address, and then we resume, hence
4747 overwriting the shadow value written by the other thread.
4748
4749 Because the original memory access is atomic, there's no way to
4750 make both the original and shadow accesses into a single atomic
4751 thing, hence this is unavoidable.
4752
4753 At least as Valgrind stands, I don't think it's a problem, since
4754 we're single threaded *and* we guarantee that there are no
4755 context switches during the execution of any specific superblock
4756 -- context switches can only happen at superblock boundaries.
4757
4758 If Valgrind ever becomes MT in the future, then it might be more
4759 of a problem. A possible kludge would be to artificially
4760 associate with the location, a lock, which we must acquire and
4761 release around the transaction as a whole. Hmm, that probably
4762 would't work properly since it only guards us against other
4763 threads doing CASs on the same location, not against other
4764 threads doing normal reads and writes.
sewardjafed4c52009-07-12 13:00:17 +00004765
4766 ------------------------------------------------------------
4767
4768 COMMENT_ON_CasCmpEQ:
4769
4770 Note two things. Firstly, in the sequence above, we compute
4771 "expected == old", but we don't check definedness of it. Why
4772 not? Also, the x86 and amd64 front ends use
4773 Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent
4774 determination (expected == old ?) for themselves, and we also
4775 don't check definedness for those primops; we just say that the
4776 result is defined. Why? Details follow.
4777
4778 x86/amd64 contains various forms of locked insns:
4779 * lock prefix before all basic arithmetic insn;
4780 eg lock xorl %reg1,(%reg2)
4781 * atomic exchange reg-mem
4782 * compare-and-swaps
4783
4784 Rather than attempt to represent them all, which would be a
4785 royal PITA, I used a result from Maurice Herlihy
4786 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
4787 demonstrates that compare-and-swap is a primitive more general
4788 than the other two, and so can be used to represent all of them.
4789 So the translation scheme for (eg) lock incl (%reg) is as
4790 follows:
4791
4792 again:
4793 old = * %reg
4794 new = old + 1
4795 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
4796
4797 The "atomically" is the CAS bit. The scheme is always the same:
4798 get old value from memory, compute new value, atomically stuff
4799 new value back in memory iff the old value has not changed (iow,
4800 no other thread modified it in the meantime). If it has changed
4801 then we've been out-raced and we have to start over.
4802
4803 Now that's all very neat, but it has the bad side effect of
4804 introducing an explicit equality test into the translation.
4805 Consider the behaviour of said code on a memory location which
4806 is uninitialised. We will wind up doing a comparison on
4807 uninitialised data, and mc duly complains.
4808
4809 What's difficult about this is, the common case is that the
4810 location is uncontended, and so we're usually comparing the same
4811 value (* %reg) with itself. So we shouldn't complain even if it
4812 is undefined. But mc doesn't know that.
4813
4814 My solution is to mark the == in the IR specially, so as to tell
4815 mc that it almost certainly compares a value with itself, and we
4816 should just regard the result as always defined. Rather than
4817 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
4818 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
4819
4820 So there's always the question of, can this give a false
4821 negative? eg, imagine that initially, * %reg is defined; and we
4822 read that; but then in the gap between the read and the CAS, a
4823 different thread writes an undefined (and different) value at
4824 the location. Then the CAS in this thread will fail and we will
4825 go back to "again:", but without knowing that the trip back
4826 there was based on an undefined comparison. No matter; at least
4827 the other thread won the race and the location is correctly
4828 marked as undefined. What if it wrote an uninitialised version
4829 of the same value that was there originally, though?
4830
4831 etc etc. Seems like there's a small corner case in which we
4832 might lose the fact that something's defined -- we're out-raced
4833 in between the "old = * reg" and the "atomically {", _and_ the
4834 other thread is writing in an undefined version of what's
4835 already there. Well, that seems pretty unlikely.
4836
4837 ---
4838
4839 If we ever need to reinstate it .. code which generates a
4840 definedness test for "expected == old" was removed at r10432 of
4841 this file.
sewardj1c0ce7a2009-07-01 08:10:49 +00004842 */
4843 if (cas->oldHi == IRTemp_INVALID) {
4844 do_shadow_CAS_single( mce, cas );
4845 } else {
4846 do_shadow_CAS_double( mce, cas );
4847 }
4848}
4849
4850
4851static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
4852{
4853 IRAtom *vdataLo = NULL, *bdataLo = NULL;
4854 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4855 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00004856 IRAtom *expd_eq_old = NULL;
4857 IROp opCasCmpEQ;
sewardj1c0ce7a2009-07-01 08:10:49 +00004858 Int elemSzB;
4859 IRType elemTy;
4860 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4861
4862 /* single CAS */
4863 tl_assert(cas->oldHi == IRTemp_INVALID);
4864 tl_assert(cas->expdHi == NULL);
4865 tl_assert(cas->dataHi == NULL);
4866
4867 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4868 switch (elemTy) {
sewardjafed4c52009-07-12 13:00:17 +00004869 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break;
4870 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
4871 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
4872 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
sewardj1c0ce7a2009-07-01 08:10:49 +00004873 default: tl_assert(0); /* IR defn disallows any other types */
4874 }
4875
4876 /* 1. fetch data# (the proposed new value) */
4877 tl_assert(isOriginalAtom(mce, cas->dataLo));
4878 vdataLo
4879 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4880 tl_assert(isShadowAtom(mce, vdataLo));
4881 if (otrak) {
4882 bdataLo
4883 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4884 tl_assert(isShadowAtom(mce, bdataLo));
4885 }
4886
4887 /* 2. fetch expected# (what we expect to see at the address) */
4888 tl_assert(isOriginalAtom(mce, cas->expdLo));
4889 vexpdLo
4890 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
4891 tl_assert(isShadowAtom(mce, vexpdLo));
4892 if (otrak) {
4893 bexpdLo
4894 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
4895 tl_assert(isShadowAtom(mce, bexpdLo));
4896 }
4897
4898 /* 3. check definedness of address */
4899 /* 4. fetch old# from shadow memory; this also checks
4900 addressibility of the address */
4901 voldLo
4902 = assignNew(
4903 'V', mce, elemTy,
4904 expr2vbits_Load(
4905 mce,
4906 cas->end, elemTy, cas->addr, 0/*Addr bias*/
4907 ));
sewardjafed4c52009-07-12 13:00:17 +00004908 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00004909 if (otrak) {
4910 boldLo
4911 = assignNew('B', mce, Ity_I32,
4912 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00004913 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00004914 }
4915
4916 /* 5. the CAS itself */
4917 stmt( 'C', mce, IRStmt_CAS(cas) );
4918
sewardjafed4c52009-07-12 13:00:17 +00004919 /* 6. compute "expected == old" */
4920 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004921 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
4922 tree, but it's not copied from the input block. */
4923 expd_eq_old
4924 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00004925 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00004926
4927 /* 7. if "expected == old"
4928 store data# to shadow memory */
4929 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
4930 NULL/*data*/, vdataLo/*vdata*/,
4931 expd_eq_old/*guard for store*/ );
4932 if (otrak) {
4933 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
4934 bdataLo/*bdata*/,
4935 expd_eq_old/*guard for store*/ );
4936 }
4937}
4938
4939
4940static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
4941{
4942 IRAtom *vdataHi = NULL, *bdataHi = NULL;
4943 IRAtom *vdataLo = NULL, *bdataLo = NULL;
4944 IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
4945 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4946 IRAtom *voldHi = NULL, *boldHi = NULL;
4947 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00004948 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
4949 IRAtom *expd_eq_old = NULL, *zero = NULL;
4950 IROp opCasCmpEQ, opOr, opXor;
sewardj1c0ce7a2009-07-01 08:10:49 +00004951 Int elemSzB, memOffsLo, memOffsHi;
4952 IRType elemTy;
4953 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4954
4955 /* double CAS */
4956 tl_assert(cas->oldHi != IRTemp_INVALID);
4957 tl_assert(cas->expdHi != NULL);
4958 tl_assert(cas->dataHi != NULL);
4959
4960 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4961 switch (elemTy) {
4962 case Ity_I8:
sewardjafed4c52009-07-12 13:00:17 +00004963 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
sewardj1c0ce7a2009-07-01 08:10:49 +00004964 elemSzB = 1; zero = mkU8(0);
4965 break;
4966 case Ity_I16:
sewardjafed4c52009-07-12 13:00:17 +00004967 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
sewardj1c0ce7a2009-07-01 08:10:49 +00004968 elemSzB = 2; zero = mkU16(0);
4969 break;
4970 case Ity_I32:
sewardjafed4c52009-07-12 13:00:17 +00004971 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
sewardj1c0ce7a2009-07-01 08:10:49 +00004972 elemSzB = 4; zero = mkU32(0);
4973 break;
4974 case Ity_I64:
sewardjafed4c52009-07-12 13:00:17 +00004975 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
sewardj1c0ce7a2009-07-01 08:10:49 +00004976 elemSzB = 8; zero = mkU64(0);
4977 break;
4978 default:
4979 tl_assert(0); /* IR defn disallows any other types */
4980 }
4981
4982 /* 1. fetch data# (the proposed new value) */
4983 tl_assert(isOriginalAtom(mce, cas->dataHi));
4984 tl_assert(isOriginalAtom(mce, cas->dataLo));
4985 vdataHi
4986 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
4987 vdataLo
4988 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4989 tl_assert(isShadowAtom(mce, vdataHi));
4990 tl_assert(isShadowAtom(mce, vdataLo));
4991 if (otrak) {
4992 bdataHi
4993 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
4994 bdataLo
4995 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4996 tl_assert(isShadowAtom(mce, bdataHi));
4997 tl_assert(isShadowAtom(mce, bdataLo));
4998 }
4999
5000 /* 2. fetch expected# (what we expect to see at the address) */
5001 tl_assert(isOriginalAtom(mce, cas->expdHi));
5002 tl_assert(isOriginalAtom(mce, cas->expdLo));
5003 vexpdHi
5004 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
5005 vexpdLo
5006 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5007 tl_assert(isShadowAtom(mce, vexpdHi));
5008 tl_assert(isShadowAtom(mce, vexpdLo));
5009 if (otrak) {
5010 bexpdHi
5011 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
5012 bexpdLo
5013 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5014 tl_assert(isShadowAtom(mce, bexpdHi));
5015 tl_assert(isShadowAtom(mce, bexpdLo));
5016 }
5017
5018 /* 3. check definedness of address */
5019 /* 4. fetch old# from shadow memory; this also checks
5020 addressibility of the address */
5021 if (cas->end == Iend_LE) {
5022 memOffsLo = 0;
5023 memOffsHi = elemSzB;
5024 } else {
5025 tl_assert(cas->end == Iend_BE);
5026 memOffsLo = elemSzB;
5027 memOffsHi = 0;
5028 }
5029 voldHi
5030 = assignNew(
5031 'V', mce, elemTy,
5032 expr2vbits_Load(
5033 mce,
5034 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/
5035 ));
5036 voldLo
5037 = assignNew(
5038 'V', mce, elemTy,
5039 expr2vbits_Load(
5040 mce,
5041 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/
5042 ));
sewardjafed4c52009-07-12 13:00:17 +00005043 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
5044 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005045 if (otrak) {
5046 boldHi
5047 = assignNew('B', mce, Ity_I32,
5048 gen_load_b(mce, elemSzB, cas->addr,
5049 memOffsHi/*addr bias*/));
5050 boldLo
5051 = assignNew('B', mce, Ity_I32,
5052 gen_load_b(mce, elemSzB, cas->addr,
5053 memOffsLo/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00005054 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
5055 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005056 }
5057
5058 /* 5. the CAS itself */
5059 stmt( 'C', mce, IRStmt_CAS(cas) );
5060
sewardjafed4c52009-07-12 13:00:17 +00005061 /* 6. compute "expected == old" */
5062 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005063 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5064 tree, but it's not copied from the input block. */
5065 /*
5066 xHi = oldHi ^ expdHi;
5067 xLo = oldLo ^ expdLo;
5068 xHL = xHi | xLo;
5069 expd_eq_old = xHL == 0;
5070 */
sewardj1c0ce7a2009-07-01 08:10:49 +00005071 xHi = assignNew('C', mce, elemTy,
5072 binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005073 xLo = assignNew('C', mce, elemTy,
5074 binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005075 xHL = assignNew('C', mce, elemTy,
5076 binop(opOr, xHi, xLo));
sewardj1c0ce7a2009-07-01 08:10:49 +00005077 expd_eq_old
5078 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00005079 binop(opCasCmpEQ, xHL, zero));
sewardj1c0ce7a2009-07-01 08:10:49 +00005080
5081 /* 7. if "expected == old"
5082 store data# to shadow memory */
5083 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
5084 NULL/*data*/, vdataHi/*vdata*/,
5085 expd_eq_old/*guard for store*/ );
5086 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
5087 NULL/*data*/, vdataLo/*vdata*/,
5088 expd_eq_old/*guard for store*/ );
5089 if (otrak) {
5090 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
5091 bdataHi/*bdata*/,
5092 expd_eq_old/*guard for store*/ );
5093 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
5094 bdataLo/*bdata*/,
5095 expd_eq_old/*guard for store*/ );
5096 }
5097}
5098
5099
sewardjdb5907d2009-11-26 17:20:21 +00005100/* ------ Dealing with LL/SC (not difficult) ------ */
5101
5102static void do_shadow_LLSC ( MCEnv* mce,
5103 IREndness stEnd,
5104 IRTemp stResult,
5105 IRExpr* stAddr,
5106 IRExpr* stStoredata )
5107{
5108 /* In short: treat a load-linked like a normal load followed by an
5109 assignment of the loaded (shadow) data to the result temporary.
5110 Treat a store-conditional like a normal store, and mark the
5111 result temporary as defined. */
5112 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult);
5113 IRTemp resTmp = findShadowTmpV(mce, stResult);
5114
5115 tl_assert(isIRAtom(stAddr));
5116 if (stStoredata)
5117 tl_assert(isIRAtom(stStoredata));
5118
5119 if (stStoredata == NULL) {
5120 /* Load Linked */
5121 /* Just treat this as a normal load, followed by an assignment of
5122 the value to .result. */
5123 /* Stay sane */
5124 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
5125 || resTy == Ity_I16 || resTy == Ity_I8);
5126 assign( 'V', mce, resTmp,
5127 expr2vbits_Load(
5128 mce, stEnd, resTy, stAddr, 0/*addr bias*/));
5129 } else {
5130 /* Store Conditional */
5131 /* Stay sane */
5132 IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
5133 stStoredata);
5134 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
5135 || dataTy == Ity_I16 || dataTy == Ity_I8);
5136 do_shadow_Store( mce, stEnd,
5137 stAddr, 0/* addr bias */,
5138 stStoredata,
5139 NULL /* shadow data */,
5140 NULL/*guard*/ );
5141 /* This is a store conditional, so it writes to .result a value
5142 indicating whether or not the store succeeded. Just claim
5143 this value is always defined. In the PowerPC interpretation
5144 of store-conditional, definedness of the success indication
5145 depends on whether the address of the store matches the
5146 reservation address. But we can't tell that here (and
5147 anyway, we're not being PowerPC-specific). At least we are
5148 guaranteed that the definedness of the store address, and its
5149 addressibility, will be checked as per normal. So it seems
5150 pretty safe to just say that the success indication is always
5151 defined.
5152
5153 In schemeS, for origin tracking, we must correspondingly set
5154 a no-origin value for the origin shadow of .result.
5155 */
5156 tl_assert(resTy == Ity_I1);
5157 assign( 'V', mce, resTmp, definedOfType(resTy) );
5158 }
5159}
5160
5161
sewardj95448072004-11-22 20:19:51 +00005162/*------------------------------------------------------------*/
5163/*--- Memcheck main ---*/
5164/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00005165
sewardj7cf4e6b2008-05-01 20:24:26 +00005166static void schemeS ( MCEnv* mce, IRStmt* st );
5167
sewardj95448072004-11-22 20:19:51 +00005168static Bool isBogusAtom ( IRAtom* at )
njn25e49d8e72002-09-23 09:36:25 +00005169{
sewardj95448072004-11-22 20:19:51 +00005170 ULong n = 0;
5171 IRConst* con;
sewardj710d6c22005-03-20 18:55:15 +00005172 tl_assert(isIRAtom(at));
sewardj0b9d74a2006-12-24 02:24:11 +00005173 if (at->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +00005174 return False;
5175 tl_assert(at->tag == Iex_Const);
5176 con = at->Iex.Const.con;
5177 switch (con->tag) {
sewardjd5204dc2004-12-31 01:16:11 +00005178 case Ico_U1: return False;
5179 case Ico_U8: n = (ULong)con->Ico.U8; break;
5180 case Ico_U16: n = (ULong)con->Ico.U16; break;
5181 case Ico_U32: n = (ULong)con->Ico.U32; break;
5182 case Ico_U64: n = (ULong)con->Ico.U64; break;
5183 case Ico_F64: return False;
sewardjb5b87402011-03-07 16:05:35 +00005184 case Ico_F32i: return False;
sewardjd5204dc2004-12-31 01:16:11 +00005185 case Ico_F64i: return False;
5186 case Ico_V128: return False;
sewardj95448072004-11-22 20:19:51 +00005187 default: ppIRExpr(at); tl_assert(0);
5188 }
5189 /* VG_(printf)("%llx\n", n); */
sewardj96a922e2005-04-23 23:26:29 +00005190 return (/*32*/ n == 0xFEFEFEFFULL
5191 /*32*/ || n == 0x80808080ULL
sewardj17b47432008-12-17 01:12:58 +00005192 /*32*/ || n == 0x7F7F7F7FULL
tomd9774d72005-06-27 08:11:01 +00005193 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
sewardj96a922e2005-04-23 23:26:29 +00005194 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
tomd9774d72005-06-27 08:11:01 +00005195 /*64*/ || n == 0x0000000000008080ULL
sewardj96a922e2005-04-23 23:26:29 +00005196 /*64*/ || n == 0x8080808080808080ULL
sewardj17b47432008-12-17 01:12:58 +00005197 /*64*/ || n == 0x0101010101010101ULL
sewardj96a922e2005-04-23 23:26:29 +00005198 );
sewardj95448072004-11-22 20:19:51 +00005199}
njn25e49d8e72002-09-23 09:36:25 +00005200
sewardj95448072004-11-22 20:19:51 +00005201static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
5202{
sewardjd5204dc2004-12-31 01:16:11 +00005203 Int i;
5204 IRExpr* e;
5205 IRDirty* d;
sewardj1c0ce7a2009-07-01 08:10:49 +00005206 IRCAS* cas;
sewardj95448072004-11-22 20:19:51 +00005207 switch (st->tag) {
sewardj0b9d74a2006-12-24 02:24:11 +00005208 case Ist_WrTmp:
5209 e = st->Ist.WrTmp.data;
sewardj95448072004-11-22 20:19:51 +00005210 switch (e->tag) {
5211 case Iex_Get:
sewardj0b9d74a2006-12-24 02:24:11 +00005212 case Iex_RdTmp:
sewardj95448072004-11-22 20:19:51 +00005213 return False;
sewardjd5204dc2004-12-31 01:16:11 +00005214 case Iex_Const:
5215 return isBogusAtom(e);
sewardj95448072004-11-22 20:19:51 +00005216 case Iex_Unop:
5217 return isBogusAtom(e->Iex.Unop.arg);
sewardjd5204dc2004-12-31 01:16:11 +00005218 case Iex_GetI:
5219 return isBogusAtom(e->Iex.GetI.ix);
sewardj95448072004-11-22 20:19:51 +00005220 case Iex_Binop:
5221 return isBogusAtom(e->Iex.Binop.arg1)
5222 || isBogusAtom(e->Iex.Binop.arg2);
sewardjed69fdb2006-02-03 16:12:27 +00005223 case Iex_Triop:
florian26441742012-06-02 20:30:41 +00005224 return isBogusAtom(e->Iex.Triop.details->arg1)
5225 || isBogusAtom(e->Iex.Triop.details->arg2)
5226 || isBogusAtom(e->Iex.Triop.details->arg3);
sewardje91cea72006-02-08 19:32:02 +00005227 case Iex_Qop:
floriane2ab2972012-06-01 20:43:03 +00005228 return isBogusAtom(e->Iex.Qop.details->arg1)
5229 || isBogusAtom(e->Iex.Qop.details->arg2)
5230 || isBogusAtom(e->Iex.Qop.details->arg3)
5231 || isBogusAtom(e->Iex.Qop.details->arg4);
sewardj95448072004-11-22 20:19:51 +00005232 case Iex_Mux0X:
5233 return isBogusAtom(e->Iex.Mux0X.cond)
5234 || isBogusAtom(e->Iex.Mux0X.expr0)
5235 || isBogusAtom(e->Iex.Mux0X.exprX);
sewardj2e595852005-06-30 23:33:37 +00005236 case Iex_Load:
5237 return isBogusAtom(e->Iex.Load.addr);
sewardj95448072004-11-22 20:19:51 +00005238 case Iex_CCall:
5239 for (i = 0; e->Iex.CCall.args[i]; i++)
5240 if (isBogusAtom(e->Iex.CCall.args[i]))
5241 return True;
5242 return False;
5243 default:
5244 goto unhandled;
5245 }
sewardjd5204dc2004-12-31 01:16:11 +00005246 case Ist_Dirty:
5247 d = st->Ist.Dirty.details;
5248 for (i = 0; d->args[i]; i++)
5249 if (isBogusAtom(d->args[i]))
5250 return True;
5251 if (d->guard && isBogusAtom(d->guard))
5252 return True;
5253 if (d->mAddr && isBogusAtom(d->mAddr))
5254 return True;
5255 return False;
sewardj95448072004-11-22 20:19:51 +00005256 case Ist_Put:
5257 return isBogusAtom(st->Ist.Put.data);
sewardjd5204dc2004-12-31 01:16:11 +00005258 case Ist_PutI:
floriand39b0222012-05-31 15:48:13 +00005259 return isBogusAtom(st->Ist.PutI.details->ix)
5260 || isBogusAtom(st->Ist.PutI.details->data);
sewardj2e595852005-06-30 23:33:37 +00005261 case Ist_Store:
5262 return isBogusAtom(st->Ist.Store.addr)
5263 || isBogusAtom(st->Ist.Store.data);
sewardj95448072004-11-22 20:19:51 +00005264 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00005265 return isBogusAtom(st->Ist.Exit.guard);
sewardj826ec492005-05-12 18:05:00 +00005266 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00005267 return isBogusAtom(st->Ist.AbiHint.base)
5268 || isBogusAtom(st->Ist.AbiHint.nia);
sewardj21dc3452005-03-21 00:27:41 +00005269 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00005270 case Ist_IMark:
sewardj72d75132007-11-09 23:06:35 +00005271 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00005272 return False;
sewardj1c0ce7a2009-07-01 08:10:49 +00005273 case Ist_CAS:
5274 cas = st->Ist.CAS.details;
5275 return isBogusAtom(cas->addr)
5276 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
5277 || isBogusAtom(cas->expdLo)
5278 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
5279 || isBogusAtom(cas->dataLo);
sewardjdb5907d2009-11-26 17:20:21 +00005280 case Ist_LLSC:
5281 return isBogusAtom(st->Ist.LLSC.addr)
5282 || (st->Ist.LLSC.storedata
5283 ? isBogusAtom(st->Ist.LLSC.storedata)
5284 : False);
sewardj95448072004-11-22 20:19:51 +00005285 default:
5286 unhandled:
5287 ppIRStmt(st);
5288 VG_(tool_panic)("hasBogusLiterals");
5289 }
5290}
njn25e49d8e72002-09-23 09:36:25 +00005291
njn25e49d8e72002-09-23 09:36:25 +00005292
sewardj0b9d74a2006-12-24 02:24:11 +00005293IRSB* MC_(instrument) ( VgCallbackClosure* closure,
sewardj1c0ce7a2009-07-01 08:10:49 +00005294 IRSB* sb_in,
sewardj461df9c2006-01-17 02:06:39 +00005295 VexGuestLayout* layout,
5296 VexGuestExtents* vge,
sewardjd54babf2005-03-21 00:55:49 +00005297 IRType gWordTy, IRType hWordTy )
sewardj95448072004-11-22 20:19:51 +00005298{
sewardj7cf4e6b2008-05-01 20:24:26 +00005299 Bool verboze = 0||False;
sewardj151b90d2005-07-06 19:42:23 +00005300 Bool bogus;
sewardjd5204dc2004-12-31 01:16:11 +00005301 Int i, j, first_stmt;
sewardj95448072004-11-22 20:19:51 +00005302 IRStmt* st;
sewardjd5204dc2004-12-31 01:16:11 +00005303 MCEnv mce;
sewardj1c0ce7a2009-07-01 08:10:49 +00005304 IRSB* sb_out;
sewardjd54babf2005-03-21 00:55:49 +00005305
5306 if (gWordTy != hWordTy) {
5307 /* We don't currently support this case. */
5308 VG_(tool_panic)("host/guest word size mismatch");
5309 }
njn25e49d8e72002-09-23 09:36:25 +00005310
sewardj6cf40ff2005-04-20 22:31:26 +00005311 /* Check we're not completely nuts */
sewardj7cf4e6b2008-05-01 20:24:26 +00005312 tl_assert(sizeof(UWord) == sizeof(void*));
5313 tl_assert(sizeof(Word) == sizeof(void*));
5314 tl_assert(sizeof(Addr) == sizeof(void*));
5315 tl_assert(sizeof(ULong) == 8);
5316 tl_assert(sizeof(Long) == 8);
5317 tl_assert(sizeof(Addr64) == 8);
5318 tl_assert(sizeof(UInt) == 4);
5319 tl_assert(sizeof(Int) == 4);
5320
5321 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
sewardj6cf40ff2005-04-20 22:31:26 +00005322
sewardj0b9d74a2006-12-24 02:24:11 +00005323 /* Set up SB */
sewardj1c0ce7a2009-07-01 08:10:49 +00005324 sb_out = deepCopyIRSBExceptStmts(sb_in);
njn25e49d8e72002-09-23 09:36:25 +00005325
sewardj1c0ce7a2009-07-01 08:10:49 +00005326 /* Set up the running environment. Both .sb and .tmpMap are
5327 modified as we go along. Note that tmps are added to both
5328 .sb->tyenv and .tmpMap together, so the valid index-set for
5329 those two arrays should always be identical. */
5330 VG_(memset)(&mce, 0, sizeof(mce));
5331 mce.sb = sb_out;
sewardj7cf4e6b2008-05-01 20:24:26 +00005332 mce.trace = verboze;
sewardj95448072004-11-22 20:19:51 +00005333 mce.layout = layout;
sewardj95448072004-11-22 20:19:51 +00005334 mce.hWordTy = hWordTy;
sewardjd5204dc2004-12-31 01:16:11 +00005335 mce.bogusLiterals = False;
sewardj1c0ce7a2009-07-01 08:10:49 +00005336
sewardj54eac252012-03-27 10:19:39 +00005337 /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on
5338 Darwin. 10.7 is mostly built with LLVM, which uses these for
5339 bitfield inserts, and we get a lot of false errors if the cheap
5340 interpretation is used, alas. Could solve this much better if
5341 we knew which of such adds came from x86/amd64 LEA instructions,
5342 since these are the only ones really needing the expensive
5343 interpretation, but that would require some way to tag them in
5344 the _toIR.c front ends, which is a lot of faffing around. So
5345 for now just use the slow and blunt-instrument solution. */
5346 mce.useLLVMworkarounds = False;
5347# if defined(VGO_darwin)
5348 mce.useLLVMworkarounds = True;
5349# endif
5350
sewardj1c0ce7a2009-07-01 08:10:49 +00005351 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
5352 sizeof(TempMapEnt));
5353 for (i = 0; i < sb_in->tyenv->types_used; i++) {
5354 TempMapEnt ent;
5355 ent.kind = Orig;
5356 ent.shadowV = IRTemp_INVALID;
5357 ent.shadowB = IRTemp_INVALID;
5358 VG_(addToXA)( mce.tmpMap, &ent );
sewardj7cf4e6b2008-05-01 20:24:26 +00005359 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005360 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
sewardj95448072004-11-22 20:19:51 +00005361
sewardj151b90d2005-07-06 19:42:23 +00005362 /* Make a preliminary inspection of the statements, to see if there
5363 are any dodgy-looking literals. If there are, we generate
5364 extra-detailed (hence extra-expensive) instrumentation in
5365 places. Scan the whole bb even if dodgyness is found earlier,
5366 so that the flatness assertion is applied to all stmts. */
5367
5368 bogus = False;
sewardj95448072004-11-22 20:19:51 +00005369
sewardj1c0ce7a2009-07-01 08:10:49 +00005370 for (i = 0; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00005371
sewardj1c0ce7a2009-07-01 08:10:49 +00005372 st = sb_in->stmts[i];
sewardj21dc3452005-03-21 00:27:41 +00005373 tl_assert(st);
sewardj95448072004-11-22 20:19:51 +00005374 tl_assert(isFlatIRStmt(st));
5375
sewardj151b90d2005-07-06 19:42:23 +00005376 if (!bogus) {
5377 bogus = checkForBogusLiterals(st);
5378 if (0 && bogus) {
sewardj95448072004-11-22 20:19:51 +00005379 VG_(printf)("bogus: ");
5380 ppIRStmt(st);
5381 VG_(printf)("\n");
5382 }
5383 }
sewardjd5204dc2004-12-31 01:16:11 +00005384
sewardj151b90d2005-07-06 19:42:23 +00005385 }
5386
5387 mce.bogusLiterals = bogus;
5388
sewardja0871482006-10-18 12:41:55 +00005389 /* Copy verbatim any IR preamble preceding the first IMark */
sewardj151b90d2005-07-06 19:42:23 +00005390
sewardj1c0ce7a2009-07-01 08:10:49 +00005391 tl_assert(mce.sb == sb_out);
5392 tl_assert(mce.sb != sb_in);
sewardjf1962d32006-10-19 13:22:16 +00005393
sewardja0871482006-10-18 12:41:55 +00005394 i = 0;
sewardj1c0ce7a2009-07-01 08:10:49 +00005395 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
sewardja0871482006-10-18 12:41:55 +00005396
sewardj1c0ce7a2009-07-01 08:10:49 +00005397 st = sb_in->stmts[i];
sewardja0871482006-10-18 12:41:55 +00005398 tl_assert(st);
5399 tl_assert(isFlatIRStmt(st));
5400
sewardj1c0ce7a2009-07-01 08:10:49 +00005401 stmt( 'C', &mce, sb_in->stmts[i] );
sewardja0871482006-10-18 12:41:55 +00005402 i++;
5403 }
5404
sewardjf1962d32006-10-19 13:22:16 +00005405 /* Nasty problem. IR optimisation of the pre-instrumented IR may
5406 cause the IR following the preamble to contain references to IR
5407 temporaries defined in the preamble. Because the preamble isn't
5408 instrumented, these temporaries don't have any shadows.
5409 Nevertheless uses of them following the preamble will cause
5410 memcheck to generate references to their shadows. End effect is
5411 to cause IR sanity check failures, due to references to
5412 non-existent shadows. This is only evident for the complex
5413 preambles used for function wrapping on TOC-afflicted platforms
sewardj6e9de462011-06-28 07:25:29 +00005414 (ppc64-linux).
sewardjf1962d32006-10-19 13:22:16 +00005415
5416 The following loop therefore scans the preamble looking for
5417 assignments to temporaries. For each one found it creates an
sewardjafa617b2008-07-22 09:59:48 +00005418 assignment to the corresponding (V) shadow temp, marking it as
sewardjf1962d32006-10-19 13:22:16 +00005419 'defined'. This is the same resulting IR as if the main
5420 instrumentation loop before had been applied to the statement
5421 'tmp = CONSTANT'.
sewardjafa617b2008-07-22 09:59:48 +00005422
5423 Similarly, if origin tracking is enabled, we must generate an
5424 assignment for the corresponding origin (B) shadow, claiming
5425 no-origin, as appropriate for a defined value.
sewardjf1962d32006-10-19 13:22:16 +00005426 */
5427 for (j = 0; j < i; j++) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005428 if (sb_in->stmts[j]->tag == Ist_WrTmp) {
sewardj7cf4e6b2008-05-01 20:24:26 +00005429 /* findShadowTmpV checks its arg is an original tmp;
sewardjf1962d32006-10-19 13:22:16 +00005430 no need to assert that here. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005431 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
sewardjafa617b2008-07-22 09:59:48 +00005432 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00005433 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
sewardjafa617b2008-07-22 09:59:48 +00005434 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
5435 if (MC_(clo_mc_level) == 3) {
5436 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00005437 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
sewardjafa617b2008-07-22 09:59:48 +00005438 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
5439 }
sewardjf1962d32006-10-19 13:22:16 +00005440 if (0) {
sewardjafa617b2008-07-22 09:59:48 +00005441 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
5442 ppIRType( ty_v );
sewardjf1962d32006-10-19 13:22:16 +00005443 VG_(printf)("\n");
5444 }
5445 }
5446 }
5447
sewardja0871482006-10-18 12:41:55 +00005448 /* Iterate over the remaining stmts to generate instrumentation. */
5449
sewardj1c0ce7a2009-07-01 08:10:49 +00005450 tl_assert(sb_in->stmts_used > 0);
sewardja0871482006-10-18 12:41:55 +00005451 tl_assert(i >= 0);
sewardj1c0ce7a2009-07-01 08:10:49 +00005452 tl_assert(i < sb_in->stmts_used);
5453 tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
sewardja0871482006-10-18 12:41:55 +00005454
sewardj1c0ce7a2009-07-01 08:10:49 +00005455 for (/* use current i*/; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00005456
sewardj1c0ce7a2009-07-01 08:10:49 +00005457 st = sb_in->stmts[i];
5458 first_stmt = sb_out->stmts_used;
sewardj95448072004-11-22 20:19:51 +00005459
5460 if (verboze) {
sewardj7cf4e6b2008-05-01 20:24:26 +00005461 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00005462 ppIRStmt(st);
sewardj7cf4e6b2008-05-01 20:24:26 +00005463 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00005464 }
5465
sewardj1c0ce7a2009-07-01 08:10:49 +00005466 if (MC_(clo_mc_level) == 3) {
5467 /* See comments on case Ist_CAS below. */
5468 if (st->tag != Ist_CAS)
5469 schemeS( &mce, st );
5470 }
sewardj7cf4e6b2008-05-01 20:24:26 +00005471
sewardj29faa502005-03-16 18:20:21 +00005472 /* Generate instrumentation code for each stmt ... */
5473
sewardj95448072004-11-22 20:19:51 +00005474 switch (st->tag) {
5475
sewardj0b9d74a2006-12-24 02:24:11 +00005476 case Ist_WrTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00005477 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
5478 expr2vbits( &mce, st->Ist.WrTmp.data) );
njn25e49d8e72002-09-23 09:36:25 +00005479 break;
5480
sewardj95448072004-11-22 20:19:51 +00005481 case Ist_Put:
5482 do_shadow_PUT( &mce,
5483 st->Ist.Put.offset,
5484 st->Ist.Put.data,
florian434ffae2012-07-19 17:23:42 +00005485 NULL /* shadow atom */, NULL /* guard */ );
njn25e49d8e72002-09-23 09:36:25 +00005486 break;
5487
sewardj95448072004-11-22 20:19:51 +00005488 case Ist_PutI:
floriand39b0222012-05-31 15:48:13 +00005489 do_shadow_PUTI( &mce, st->Ist.PutI.details);
njn25e49d8e72002-09-23 09:36:25 +00005490 break;
5491
sewardj2e595852005-06-30 23:33:37 +00005492 case Ist_Store:
5493 do_shadow_Store( &mce, st->Ist.Store.end,
5494 st->Ist.Store.addr, 0/* addr bias */,
5495 st->Ist.Store.data,
sewardj1c0ce7a2009-07-01 08:10:49 +00005496 NULL /* shadow data */,
5497 NULL/*guard*/ );
njn25e49d8e72002-09-23 09:36:25 +00005498 break;
5499
sewardj95448072004-11-22 20:19:51 +00005500 case Ist_Exit:
florian434ffae2012-07-19 17:23:42 +00005501 complainIfUndefined( &mce, st->Ist.Exit.guard, NULL );
njn25e49d8e72002-09-23 09:36:25 +00005502 break;
5503
sewardj29faa502005-03-16 18:20:21 +00005504 case Ist_IMark:
sewardj7cf4e6b2008-05-01 20:24:26 +00005505 break;
5506
5507 case Ist_NoOp:
sewardj72d75132007-11-09 23:06:35 +00005508 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00005509 break;
5510
sewardj95448072004-11-22 20:19:51 +00005511 case Ist_Dirty:
5512 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
njn25e49d8e72002-09-23 09:36:25 +00005513 break;
5514
sewardj826ec492005-05-12 18:05:00 +00005515 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00005516 do_AbiHint( &mce, st->Ist.AbiHint.base,
5517 st->Ist.AbiHint.len,
5518 st->Ist.AbiHint.nia );
sewardj826ec492005-05-12 18:05:00 +00005519 break;
5520
sewardj1c0ce7a2009-07-01 08:10:49 +00005521 case Ist_CAS:
5522 do_shadow_CAS( &mce, st->Ist.CAS.details );
5523 /* Note, do_shadow_CAS copies the CAS itself to the output
5524 block, because it needs to add instrumentation both
5525 before and after it. Hence skip the copy below. Also
5526 skip the origin-tracking stuff (call to schemeS) above,
5527 since that's all tangled up with it too; do_shadow_CAS
5528 does it all. */
5529 break;
5530
sewardjdb5907d2009-11-26 17:20:21 +00005531 case Ist_LLSC:
5532 do_shadow_LLSC( &mce,
5533 st->Ist.LLSC.end,
5534 st->Ist.LLSC.result,
5535 st->Ist.LLSC.addr,
5536 st->Ist.LLSC.storedata );
5537 break;
5538
njn25e49d8e72002-09-23 09:36:25 +00005539 default:
sewardj95448072004-11-22 20:19:51 +00005540 VG_(printf)("\n");
5541 ppIRStmt(st);
5542 VG_(printf)("\n");
5543 VG_(tool_panic)("memcheck: unhandled IRStmt");
5544
5545 } /* switch (st->tag) */
5546
sewardj7cf4e6b2008-05-01 20:24:26 +00005547 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005548 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00005549 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00005550 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00005551 VG_(printf)("\n");
5552 }
5553 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00005554 }
sewardj95448072004-11-22 20:19:51 +00005555
sewardj1c0ce7a2009-07-01 08:10:49 +00005556 /* ... and finally copy the stmt itself to the output. Except,
5557 skip the copy of IRCASs; see comments on case Ist_CAS
5558 above. */
5559 if (st->tag != Ist_CAS)
5560 stmt('C', &mce, st);
njn25e49d8e72002-09-23 09:36:25 +00005561 }
njn25e49d8e72002-09-23 09:36:25 +00005562
sewardj95448072004-11-22 20:19:51 +00005563 /* Now we need to complain if the jump target is undefined. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005564 first_stmt = sb_out->stmts_used;
njn25e49d8e72002-09-23 09:36:25 +00005565
sewardj95448072004-11-22 20:19:51 +00005566 if (verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005567 VG_(printf)("sb_in->next = ");
5568 ppIRExpr(sb_in->next);
sewardj95448072004-11-22 20:19:51 +00005569 VG_(printf)("\n\n");
5570 }
njn25e49d8e72002-09-23 09:36:25 +00005571
florian434ffae2012-07-19 17:23:42 +00005572 complainIfUndefined( &mce, sb_in->next, NULL );
njn25e49d8e72002-09-23 09:36:25 +00005573
sewardj7cf4e6b2008-05-01 20:24:26 +00005574 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005575 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00005576 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00005577 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00005578 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00005579 }
sewardj95448072004-11-22 20:19:51 +00005580 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00005581 }
njn25e49d8e72002-09-23 09:36:25 +00005582
sewardj1c0ce7a2009-07-01 08:10:49 +00005583 /* If this fails, there's been some serious snafu with tmp management,
5584 that should be investigated. */
5585 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
5586 VG_(deleteXA)( mce.tmpMap );
5587
5588 tl_assert(mce.sb == sb_out);
5589 return sb_out;
sewardj95448072004-11-22 20:19:51 +00005590}
njn25e49d8e72002-09-23 09:36:25 +00005591
sewardj81651dc2007-08-28 06:05:20 +00005592/*------------------------------------------------------------*/
5593/*--- Post-tree-build final tidying ---*/
5594/*------------------------------------------------------------*/
5595
5596/* This exploits the observation that Memcheck often produces
5597 repeated conditional calls of the form
5598
sewardj7cf4e6b2008-05-01 20:24:26 +00005599 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
sewardj81651dc2007-08-28 06:05:20 +00005600
5601 with the same guard expression G guarding the same helper call.
5602 The second and subsequent calls are redundant. This usually
5603 results from instrumentation of guest code containing multiple
5604 memory references at different constant offsets from the same base
5605 register. After optimisation of the instrumentation, you get a
5606 test for the definedness of the base register for each memory
5607 reference, which is kinda pointless. MC_(final_tidy) therefore
5608 looks for such repeated calls and removes all but the first. */
5609
5610/* A struct for recording which (helper, guard) pairs we have already
5611 seen. */
5612typedef
5613 struct { void* entry; IRExpr* guard; }
5614 Pair;
5615
5616/* Return True if e1 and e2 definitely denote the same value (used to
5617 compare guards). Return False if unknown; False is the safe
5618 answer. Since guest registers and guest memory do not have the
5619 SSA property we must return False if any Gets or Loads appear in
5620 the expression. */
5621
5622static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
5623{
5624 if (e1->tag != e2->tag)
5625 return False;
5626 switch (e1->tag) {
5627 case Iex_Const:
5628 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
5629 case Iex_Binop:
5630 return e1->Iex.Binop.op == e2->Iex.Binop.op
5631 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
5632 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
5633 case Iex_Unop:
5634 return e1->Iex.Unop.op == e2->Iex.Unop.op
5635 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
5636 case Iex_RdTmp:
5637 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
5638 case Iex_Mux0X:
5639 return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
5640 && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
5641 && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX );
5642 case Iex_Qop:
5643 case Iex_Triop:
5644 case Iex_CCall:
5645 /* be lazy. Could define equality for these, but they never
5646 appear to be used. */
5647 return False;
5648 case Iex_Get:
5649 case Iex_GetI:
5650 case Iex_Load:
5651 /* be conservative - these may not give the same value each
5652 time */
5653 return False;
5654 case Iex_Binder:
5655 /* should never see this */
5656 /* fallthrough */
5657 default:
5658 VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
5659 ppIRExpr(e1);
5660 VG_(tool_panic)("memcheck:sameIRValue");
5661 return False;
5662 }
5663}
5664
5665/* See if 'pairs' already has an entry for (entry, guard). Return
5666 True if so. If not, add an entry. */
5667
5668static
5669Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
5670{
5671 Pair p;
5672 Pair* pp;
5673 Int i, n = VG_(sizeXA)( pairs );
5674 for (i = 0; i < n; i++) {
5675 pp = VG_(indexXA)( pairs, i );
5676 if (pp->entry == entry && sameIRValue(pp->guard, guard))
5677 return True;
5678 }
5679 p.guard = guard;
5680 p.entry = entry;
5681 VG_(addToXA)( pairs, &p );
5682 return False;
5683}
5684
5685static Bool is_helperc_value_checkN_fail ( HChar* name )
5686{
5687 return
sewardj7cf4e6b2008-05-01 20:24:26 +00005688 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
5689 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
5690 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
5691 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
5692 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
5693 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
5694 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
5695 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
sewardj81651dc2007-08-28 06:05:20 +00005696}
5697
5698IRSB* MC_(final_tidy) ( IRSB* sb_in )
5699{
5700 Int i;
5701 IRStmt* st;
5702 IRDirty* di;
5703 IRExpr* guard;
5704 IRCallee* cee;
5705 Bool alreadyPresent;
sewardj9c606bd2008-09-18 18:12:50 +00005706 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
5707 VG_(free), sizeof(Pair) );
sewardj81651dc2007-08-28 06:05:20 +00005708 /* Scan forwards through the statements. Each time a call to one
5709 of the relevant helpers is seen, check if we have made a
5710 previous call to the same helper using the same guard
5711 expression, and if so, delete the call. */
5712 for (i = 0; i < sb_in->stmts_used; i++) {
5713 st = sb_in->stmts[i];
5714 tl_assert(st);
5715 if (st->tag != Ist_Dirty)
5716 continue;
5717 di = st->Ist.Dirty.details;
5718 guard = di->guard;
5719 if (!guard)
5720 continue;
5721 if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
5722 cee = di->cee;
5723 if (!is_helperc_value_checkN_fail( cee->name ))
5724 continue;
5725 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
5726 guard 'guard'. Check if we have already seen a call to this
5727 function with the same guard. If so, delete it. If not,
5728 add it to the set of calls we do know about. */
5729 alreadyPresent = check_or_add( pairs, guard, cee->addr );
5730 if (alreadyPresent) {
5731 sb_in->stmts[i] = IRStmt_NoOp();
5732 if (0) VG_(printf)("XX\n");
5733 }
5734 }
5735 VG_(deleteXA)( pairs );
5736 return sb_in;
5737}
5738
5739
sewardj7cf4e6b2008-05-01 20:24:26 +00005740/*------------------------------------------------------------*/
5741/*--- Origin tracking stuff ---*/
5742/*------------------------------------------------------------*/
5743
sewardj1c0ce7a2009-07-01 08:10:49 +00005744/* Almost identical to findShadowTmpV. */
sewardj7cf4e6b2008-05-01 20:24:26 +00005745static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
5746{
sewardj1c0ce7a2009-07-01 08:10:49 +00005747 TempMapEnt* ent;
5748 /* VG_(indexXA) range-checks 'orig', hence no need to check
5749 here. */
5750 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5751 tl_assert(ent->kind == Orig);
5752 if (ent->shadowB == IRTemp_INVALID) {
5753 IRTemp tmpB
5754 = newTemp( mce, Ity_I32, BSh );
5755 /* newTemp may cause mce->tmpMap to resize, hence previous results
5756 from VG_(indexXA) are invalid. */
5757 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5758 tl_assert(ent->kind == Orig);
5759 tl_assert(ent->shadowB == IRTemp_INVALID);
5760 ent->shadowB = tmpB;
sewardj7cf4e6b2008-05-01 20:24:26 +00005761 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005762 return ent->shadowB;
sewardj7cf4e6b2008-05-01 20:24:26 +00005763}
5764
5765static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
5766{
5767 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
5768}
5769
5770static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5771 IRAtom* baseaddr, Int offset )
5772{
5773 void* hFun;
5774 HChar* hName;
5775 IRTemp bTmp;
5776 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00005777 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00005778 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5779 IRAtom* ea = baseaddr;
5780 if (offset != 0) {
5781 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5782 : mkU64( (Long)(Int)offset );
5783 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5784 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005785 bTmp = newTemp(mce, mce->hWordTy, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00005786
5787 switch (szB) {
5788 case 1: hFun = (void*)&MC_(helperc_b_load1);
5789 hName = "MC_(helperc_b_load1)";
5790 break;
5791 case 2: hFun = (void*)&MC_(helperc_b_load2);
5792 hName = "MC_(helperc_b_load2)";
5793 break;
5794 case 4: hFun = (void*)&MC_(helperc_b_load4);
5795 hName = "MC_(helperc_b_load4)";
5796 break;
5797 case 8: hFun = (void*)&MC_(helperc_b_load8);
5798 hName = "MC_(helperc_b_load8)";
5799 break;
5800 case 16: hFun = (void*)&MC_(helperc_b_load16);
5801 hName = "MC_(helperc_b_load16)";
5802 break;
sewardj45fa9f42012-05-21 10:18:10 +00005803 case 32: hFun = (void*)&MC_(helperc_b_load32);
5804 hName = "MC_(helperc_b_load32)";
5805 break;
sewardj7cf4e6b2008-05-01 20:24:26 +00005806 default:
5807 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
5808 tl_assert(0);
5809 }
5810 di = unsafeIRDirty_1_N(
5811 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
5812 mkIRExprVec_1( ea )
5813 );
5814 /* no need to mess with any annotations. This call accesses
5815 neither guest state nor guest memory. */
5816 stmt( 'B', mce, IRStmt_Dirty(di) );
5817 if (mce->hWordTy == Ity_I64) {
5818 /* 64-bit host */
sewardj1c0ce7a2009-07-01 08:10:49 +00005819 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00005820 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
5821 return mkexpr(bTmp32);
5822 } else {
5823 /* 32-bit host */
5824 return mkexpr(bTmp);
5825 }
5826}
sewardj1c0ce7a2009-07-01 08:10:49 +00005827
florian434ffae2012-07-19 17:23:42 +00005828static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr,
5829 Int offset, IRAtom* guard )
5830{
5831 if (guard) {
5832 IRAtom *cond, *iffalse, *iftrue;
5833
5834 cond = assignNew('B', mce, Ity_I8, unop(Iop_1Uto8, guard));
5835 iftrue = assignNew('B', mce, Ity_I32,
5836 gen_load_b(mce, szB, baseaddr, offset));
5837 iffalse = mkU32(0);
5838
5839 return assignNew('B', mce, Ity_I32, IRExpr_Mux0X(cond, iffalse, iftrue));
5840 }
5841
5842 return gen_load_b(mce, szB, baseaddr, offset);
5843}
5844
sewardj1c0ce7a2009-07-01 08:10:49 +00005845/* Generate a shadow store. guard :: Ity_I1 controls whether the
5846 store really happens; NULL means it unconditionally does. */
sewardj7cf4e6b2008-05-01 20:24:26 +00005847static void gen_store_b ( MCEnv* mce, Int szB,
sewardj1c0ce7a2009-07-01 08:10:49 +00005848 IRAtom* baseaddr, Int offset, IRAtom* dataB,
5849 IRAtom* guard )
sewardj7cf4e6b2008-05-01 20:24:26 +00005850{
5851 void* hFun;
5852 HChar* hName;
5853 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00005854 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00005855 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5856 IRAtom* ea = baseaddr;
sewardj1c0ce7a2009-07-01 08:10:49 +00005857 if (guard) {
5858 tl_assert(isOriginalAtom(mce, guard));
5859 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
5860 }
sewardj7cf4e6b2008-05-01 20:24:26 +00005861 if (offset != 0) {
5862 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5863 : mkU64( (Long)(Int)offset );
5864 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5865 }
5866 if (mce->hWordTy == Ity_I64)
5867 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
5868
5869 switch (szB) {
5870 case 1: hFun = (void*)&MC_(helperc_b_store1);
5871 hName = "MC_(helperc_b_store1)";
5872 break;
5873 case 2: hFun = (void*)&MC_(helperc_b_store2);
5874 hName = "MC_(helperc_b_store2)";
5875 break;
5876 case 4: hFun = (void*)&MC_(helperc_b_store4);
5877 hName = "MC_(helperc_b_store4)";
5878 break;
5879 case 8: hFun = (void*)&MC_(helperc_b_store8);
5880 hName = "MC_(helperc_b_store8)";
5881 break;
5882 case 16: hFun = (void*)&MC_(helperc_b_store16);
5883 hName = "MC_(helperc_b_store16)";
5884 break;
sewardj45fa9f42012-05-21 10:18:10 +00005885 case 32: hFun = (void*)&MC_(helperc_b_store32);
5886 hName = "MC_(helperc_b_store32)";
5887 break;
sewardj7cf4e6b2008-05-01 20:24:26 +00005888 default:
5889 tl_assert(0);
5890 }
5891 di = unsafeIRDirty_0_N( 2/*regparms*/,
5892 hName, VG_(fnptr_to_fnentry)( hFun ),
5893 mkIRExprVec_2( ea, dataB )
5894 );
5895 /* no need to mess with any annotations. This call accesses
5896 neither guest state nor guest memory. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005897 if (guard) di->guard = guard;
sewardj7cf4e6b2008-05-01 20:24:26 +00005898 stmt( 'B', mce, IRStmt_Dirty(di) );
5899}
5900
5901static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005902 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00005903 if (eTy == Ity_I64)
5904 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
5905 if (eTy == Ity_I32)
5906 return e;
5907 tl_assert(0);
5908}
5909
5910static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005911 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00005912 tl_assert(eTy == Ity_I32);
5913 if (dstTy == Ity_I64)
5914 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
5915 tl_assert(0);
5916}
5917
sewardjdb5907d2009-11-26 17:20:21 +00005918
sewardj7cf4e6b2008-05-01 20:24:26 +00005919static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
5920{
5921 tl_assert(MC_(clo_mc_level) == 3);
5922
5923 switch (e->tag) {
5924
5925 case Iex_GetI: {
5926 IRRegArray* descr_b;
5927 IRAtom *t1, *t2, *t3, *t4;
5928 IRRegArray* descr = e->Iex.GetI.descr;
5929 IRType equivIntTy
5930 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
5931 /* If this array is unshadowable for whatever reason, use the
5932 usual approximation. */
5933 if (equivIntTy == Ity_INVALID)
5934 return mkU32(0);
5935 tl_assert(sizeofIRType(equivIntTy) >= 4);
5936 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
5937 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
5938 equivIntTy, descr->nElems );
5939 /* Do a shadow indexed get of the same size, giving t1. Take
5940 the bottom 32 bits of it, giving t2. Compute into t3 the
5941 origin for the index (almost certainly zero, but there's
5942 no harm in being completely general here, since iropt will
5943 remove any useless code), and fold it in, giving a final
5944 value t4. */
5945 t1 = assignNew( 'B', mce, equivIntTy,
5946 IRExpr_GetI( descr_b, e->Iex.GetI.ix,
5947 e->Iex.GetI.bias ));
5948 t2 = narrowTo32( mce, t1 );
5949 t3 = schemeE( mce, e->Iex.GetI.ix );
5950 t4 = gen_maxU32( mce, t2, t3 );
5951 return t4;
5952 }
5953 case Iex_CCall: {
5954 Int i;
5955 IRAtom* here;
5956 IRExpr** args = e->Iex.CCall.args;
5957 IRAtom* curr = mkU32(0);
5958 for (i = 0; args[i]; i++) {
5959 tl_assert(i < 32);
5960 tl_assert(isOriginalAtom(mce, args[i]));
5961 /* Only take notice of this arg if the callee's
5962 mc-exclusion mask does not say it is to be excluded. */
5963 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
5964 /* the arg is to be excluded from definedness checking.
5965 Do nothing. */
5966 if (0) VG_(printf)("excluding %s(%d)\n",
5967 e->Iex.CCall.cee->name, i);
5968 } else {
5969 /* calculate the arg's definedness, and pessimistically
5970 merge it in. */
5971 here = schemeE( mce, args[i] );
5972 curr = gen_maxU32( mce, curr, here );
5973 }
5974 }
5975 return curr;
5976 }
5977 case Iex_Load: {
5978 Int dszB;
5979 dszB = sizeofIRType(e->Iex.Load.ty);
5980 /* assert that the B value for the address is already
5981 available (somewhere) */
5982 tl_assert(isIRAtom(e->Iex.Load.addr));
5983 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
5984 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
5985 }
5986 case Iex_Mux0X: {
5987 IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond );
5988 IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 );
5989 IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX );
5990 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
5991 }
5992 case Iex_Qop: {
floriane2ab2972012-06-01 20:43:03 +00005993 IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 );
5994 IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 );
5995 IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 );
5996 IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 );
sewardj7cf4e6b2008-05-01 20:24:26 +00005997 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
5998 gen_maxU32( mce, b3, b4 ) );
5999 }
6000 case Iex_Triop: {
florian26441742012-06-02 20:30:41 +00006001 IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 );
6002 IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 );
6003 IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 );
sewardj7cf4e6b2008-05-01 20:24:26 +00006004 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
6005 }
6006 case Iex_Binop: {
sewardjafed4c52009-07-12 13:00:17 +00006007 switch (e->Iex.Binop.op) {
6008 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
6009 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
6010 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
6011 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
6012 /* Just say these all produce a defined result,
6013 regardless of their arguments. See
6014 COMMENT_ON_CasCmpEQ in this file. */
6015 return mkU32(0);
6016 default: {
6017 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
6018 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
6019 return gen_maxU32( mce, b1, b2 );
6020 }
6021 }
6022 tl_assert(0);
6023 /*NOTREACHED*/
sewardj7cf4e6b2008-05-01 20:24:26 +00006024 }
6025 case Iex_Unop: {
6026 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
6027 return b1;
6028 }
6029 case Iex_Const:
6030 return mkU32(0);
6031 case Iex_RdTmp:
6032 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
6033 case Iex_Get: {
6034 Int b_offset = MC_(get_otrack_shadow_offset)(
6035 e->Iex.Get.offset,
6036 sizeofIRType(e->Iex.Get.ty)
6037 );
6038 tl_assert(b_offset >= -1
6039 && b_offset <= mce->layout->total_sizeB -4);
6040 if (b_offset >= 0) {
6041 /* FIXME: this isn't an atom! */
6042 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
6043 Ity_I32 );
6044 }
6045 return mkU32(0);
6046 }
6047 default:
6048 VG_(printf)("mc_translate.c: schemeE: unhandled: ");
6049 ppIRExpr(e);
6050 VG_(tool_panic)("memcheck:schemeE");
6051 }
6052}
6053
sewardjdb5907d2009-11-26 17:20:21 +00006054
sewardj7cf4e6b2008-05-01 20:24:26 +00006055static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
6056{
6057 // This is a hacked version of do_shadow_Dirty
sewardj2eecb742012-06-01 16:11:41 +00006058 Int i, k, n, toDo, gSz, gOff;
sewardj7cf4e6b2008-05-01 20:24:26 +00006059 IRAtom *here, *curr;
6060 IRTemp dst;
sewardj7cf4e6b2008-05-01 20:24:26 +00006061
6062 /* First check the guard. */
6063 curr = schemeE( mce, d->guard );
6064
6065 /* Now round up all inputs and maxU32 over them. */
6066
florian434ffae2012-07-19 17:23:42 +00006067 /* Inputs: unmasked args
6068 Note: arguments are evaluated REGARDLESS of the guard expression */
sewardj7cf4e6b2008-05-01 20:24:26 +00006069 for (i = 0; d->args[i]; i++) {
6070 if (d->cee->mcx_mask & (1<<i)) {
6071 /* ignore this arg */
6072 } else {
6073 here = schemeE( mce, d->args[i] );
6074 curr = gen_maxU32( mce, curr, here );
6075 }
6076 }
6077
6078 /* Inputs: guest state that we read. */
6079 for (i = 0; i < d->nFxState; i++) {
6080 tl_assert(d->fxState[i].fx != Ifx_None);
6081 if (d->fxState[i].fx == Ifx_Write)
6082 continue;
6083
sewardj2eecb742012-06-01 16:11:41 +00006084 /* Enumerate the described state segments */
6085 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6086 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6087 gSz = d->fxState[i].size;
sewardj7cf4e6b2008-05-01 20:24:26 +00006088
sewardj2eecb742012-06-01 16:11:41 +00006089 /* Ignore any sections marked as 'always defined'. */
6090 if (isAlwaysDefd(mce, gOff, gSz)) {
6091 if (0)
6092 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
6093 gOff, gSz);
6094 continue;
sewardj7cf4e6b2008-05-01 20:24:26 +00006095 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006096
sewardj2eecb742012-06-01 16:11:41 +00006097 /* This state element is read or modified. So we need to
6098 consider it. If larger than 4 bytes, deal with it in
6099 4-byte chunks. */
6100 while (True) {
6101 Int b_offset;
6102 tl_assert(gSz >= 0);
6103 if (gSz == 0) break;
6104 n = gSz <= 4 ? gSz : 4;
6105 /* update 'curr' with maxU32 of the state slice
6106 gOff .. gOff+n-1 */
6107 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6108 if (b_offset != -1) {
florian434ffae2012-07-19 17:23:42 +00006109 /* Observe the guard expression. If it is false use 0, i.e.
6110 nothing is known about the origin */
6111 IRAtom *cond, *iffalse, *iftrue;
6112
6113 cond = assignNew( 'B', mce, Ity_I8, unop(Iop_1Uto8, d->guard));
6114 iffalse = mkU32(0);
6115 iftrue = assignNew( 'B', mce, Ity_I32,
6116 IRExpr_Get(b_offset
6117 + 2*mce->layout->total_sizeB,
6118 Ity_I32));
6119 here = assignNew( 'B', mce, Ity_I32,
6120 IRExpr_Mux0X(cond, iffalse, iftrue));
sewardj2eecb742012-06-01 16:11:41 +00006121 curr = gen_maxU32( mce, curr, here );
6122 }
6123 gSz -= n;
6124 gOff += n;
6125 }
6126 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006127 }
6128
6129 /* Inputs: memory */
6130
6131 if (d->mFx != Ifx_None) {
6132 /* Because we may do multiple shadow loads/stores from the same
6133 base address, it's best to do a single test of its
6134 definedness right now. Post-instrumentation optimisation
6135 should remove all but this test. */
6136 tl_assert(d->mAddr);
6137 here = schemeE( mce, d->mAddr );
6138 curr = gen_maxU32( mce, curr, here );
6139 }
6140
6141 /* Deal with memory inputs (reads or modifies) */
6142 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006143 toDo = d->mSize;
6144 /* chew off 32-bit chunks. We don't care about the endianness
6145 since it's all going to be condensed down to a single bit,
6146 but nevertheless choose an endianness which is hopefully
6147 native to the platform. */
6148 while (toDo >= 4) {
florian434ffae2012-07-19 17:23:42 +00006149 here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo,
6150 d->guard );
sewardj7cf4e6b2008-05-01 20:24:26 +00006151 curr = gen_maxU32( mce, curr, here );
6152 toDo -= 4;
6153 }
sewardj8c93fcc2008-10-30 13:08:31 +00006154 /* handle possible 16-bit excess */
6155 while (toDo >= 2) {
florian434ffae2012-07-19 17:23:42 +00006156 here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo,
6157 d->guard );
sewardj8c93fcc2008-10-30 13:08:31 +00006158 curr = gen_maxU32( mce, curr, here );
6159 toDo -= 2;
6160 }
floriancda994b2012-06-08 16:01:19 +00006161 /* chew off the remaining 8-bit chunk, if any */
6162 if (toDo == 1) {
florian434ffae2012-07-19 17:23:42 +00006163 here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo,
6164 d->guard );
floriancda994b2012-06-08 16:01:19 +00006165 curr = gen_maxU32( mce, curr, here );
6166 toDo -= 1;
6167 }
6168 tl_assert(toDo == 0);
sewardj7cf4e6b2008-05-01 20:24:26 +00006169 }
6170
6171 /* Whew! So curr is a 32-bit B-value which should give an origin
6172 of some use if any of the inputs to the helper are undefined.
6173 Now we need to re-distribute the results to all destinations. */
6174
6175 /* Outputs: the destination temporary, if there is one. */
6176 if (d->tmp != IRTemp_INVALID) {
6177 dst = findShadowTmpB(mce, d->tmp);
6178 assign( 'V', mce, dst, curr );
6179 }
6180
6181 /* Outputs: guest state that we write or modify. */
6182 for (i = 0; i < d->nFxState; i++) {
6183 tl_assert(d->fxState[i].fx != Ifx_None);
6184 if (d->fxState[i].fx == Ifx_Read)
6185 continue;
6186
sewardj2eecb742012-06-01 16:11:41 +00006187 /* Enumerate the described state segments */
6188 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6189 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6190 gSz = d->fxState[i].size;
sewardj7cf4e6b2008-05-01 20:24:26 +00006191
sewardj2eecb742012-06-01 16:11:41 +00006192 /* Ignore any sections marked as 'always defined'. */
6193 if (isAlwaysDefd(mce, gOff, gSz))
6194 continue;
6195
6196 /* This state element is written or modified. So we need to
6197 consider it. If larger than 4 bytes, deal with it in
6198 4-byte chunks. */
6199 while (True) {
6200 Int b_offset;
6201 tl_assert(gSz >= 0);
6202 if (gSz == 0) break;
6203 n = gSz <= 4 ? gSz : 4;
6204 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
6205 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6206 if (b_offset != -1) {
florian434ffae2012-07-19 17:23:42 +00006207 if (d->guard) {
6208 /* If the guard expression evaluates to false we simply Put
6209 the value that is already stored in the guest state slot */
6210 IRAtom *cond, *iffalse;
6211
6212 cond = assignNew('B', mce, Ity_I8,
6213 unop(Iop_1Uto8, d->guard));
6214 iffalse = assignNew('B', mce, Ity_I32,
6215 IRExpr_Get(b_offset +
6216 2*mce->layout->total_sizeB,
6217 Ity_I32));
6218 curr = assignNew('V', mce, Ity_I32,
6219 IRExpr_Mux0X(cond, iffalse, curr));
6220 }
sewardj2eecb742012-06-01 16:11:41 +00006221 stmt( 'B', mce, IRStmt_Put(b_offset
6222 + 2*mce->layout->total_sizeB,
6223 curr ));
6224 }
6225 gSz -= n;
6226 gOff += n;
sewardj7cf4e6b2008-05-01 20:24:26 +00006227 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006228 }
6229 }
6230
6231 /* Outputs: memory that we write or modify. Same comments about
6232 endianness as above apply. */
6233 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006234 toDo = d->mSize;
6235 /* chew off 32-bit chunks */
6236 while (toDo >= 4) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006237 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
florian434ffae2012-07-19 17:23:42 +00006238 d->guard );
sewardj7cf4e6b2008-05-01 20:24:26 +00006239 toDo -= 4;
6240 }
sewardj8c93fcc2008-10-30 13:08:31 +00006241 /* handle possible 16-bit excess */
6242 while (toDo >= 2) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006243 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
florian434ffae2012-07-19 17:23:42 +00006244 d->guard );
sewardj8c93fcc2008-10-30 13:08:31 +00006245 toDo -= 2;
6246 }
floriancda994b2012-06-08 16:01:19 +00006247 /* chew off the remaining 8-bit chunk, if any */
6248 if (toDo == 1) {
6249 gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr,
florian434ffae2012-07-19 17:23:42 +00006250 d->guard );
floriancda994b2012-06-08 16:01:19 +00006251 toDo -= 1;
6252 }
6253 tl_assert(toDo == 0);
sewardj7cf4e6b2008-05-01 20:24:26 +00006254 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006255}
6256
sewardjdb5907d2009-11-26 17:20:21 +00006257
6258static void do_origins_Store ( MCEnv* mce,
6259 IREndness stEnd,
6260 IRExpr* stAddr,
6261 IRExpr* stData )
6262{
6263 Int dszB;
6264 IRAtom* dataB;
6265 /* assert that the B value for the address is already available
6266 (somewhere), since the call to schemeE will want to see it.
6267 XXXX how does this actually ensure that?? */
6268 tl_assert(isIRAtom(stAddr));
6269 tl_assert(isIRAtom(stData));
6270 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
6271 dataB = schemeE( mce, stData );
6272 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB,
6273 NULL/*guard*/ );
6274}
6275
6276
sewardj7cf4e6b2008-05-01 20:24:26 +00006277static void schemeS ( MCEnv* mce, IRStmt* st )
6278{
6279 tl_assert(MC_(clo_mc_level) == 3);
6280
6281 switch (st->tag) {
6282
6283 case Ist_AbiHint:
6284 /* The value-check instrumenter handles this - by arranging
6285 to pass the address of the next instruction to
6286 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
6287 happen for origin tracking w.r.t. AbiHints. So there is
6288 nothing to do here. */
6289 break;
6290
6291 case Ist_PutI: {
floriand39b0222012-05-31 15:48:13 +00006292 IRPutI *puti = st->Ist.PutI.details;
sewardj7cf4e6b2008-05-01 20:24:26 +00006293 IRRegArray* descr_b;
6294 IRAtom *t1, *t2, *t3, *t4;
floriand39b0222012-05-31 15:48:13 +00006295 IRRegArray* descr = puti->descr;
sewardj7cf4e6b2008-05-01 20:24:26 +00006296 IRType equivIntTy
6297 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
6298 /* If this array is unshadowable for whatever reason,
6299 generate no code. */
6300 if (equivIntTy == Ity_INVALID)
6301 break;
6302 tl_assert(sizeofIRType(equivIntTy) >= 4);
6303 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
6304 descr_b
6305 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
6306 equivIntTy, descr->nElems );
6307 /* Compute a value to Put - the conjoinment of the origin for
6308 the data to be Put-ted (obviously) and of the index value
6309 (not so obviously). */
floriand39b0222012-05-31 15:48:13 +00006310 t1 = schemeE( mce, puti->data );
6311 t2 = schemeE( mce, puti->ix );
sewardj7cf4e6b2008-05-01 20:24:26 +00006312 t3 = gen_maxU32( mce, t1, t2 );
6313 t4 = zWidenFrom32( mce, equivIntTy, t3 );
floriand39b0222012-05-31 15:48:13 +00006314 stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix,
6315 puti->bias, t4) ));
sewardj7cf4e6b2008-05-01 20:24:26 +00006316 break;
6317 }
sewardjdb5907d2009-11-26 17:20:21 +00006318
sewardj7cf4e6b2008-05-01 20:24:26 +00006319 case Ist_Dirty:
6320 do_origins_Dirty( mce, st->Ist.Dirty.details );
6321 break;
sewardjdb5907d2009-11-26 17:20:21 +00006322
6323 case Ist_Store:
6324 do_origins_Store( mce, st->Ist.Store.end,
6325 st->Ist.Store.addr,
6326 st->Ist.Store.data );
6327 break;
6328
6329 case Ist_LLSC: {
6330 /* In short: treat a load-linked like a normal load followed
6331 by an assignment of the loaded (shadow) data the result
6332 temporary. Treat a store-conditional like a normal store,
6333 and mark the result temporary as defined. */
6334 if (st->Ist.LLSC.storedata == NULL) {
6335 /* Load Linked */
6336 IRType resTy
6337 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
6338 IRExpr* vanillaLoad
6339 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
6340 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
6341 || resTy == Ity_I16 || resTy == Ity_I8);
6342 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
6343 schemeE(mce, vanillaLoad));
6344 } else {
6345 /* Store conditional */
6346 do_origins_Store( mce, st->Ist.LLSC.end,
6347 st->Ist.LLSC.addr,
6348 st->Ist.LLSC.storedata );
6349 /* For the rationale behind this, see comments at the
6350 place where the V-shadow for .result is constructed, in
6351 do_shadow_LLSC. In short, we regard .result as
6352 always-defined. */
6353 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
6354 mkU32(0) );
sewardj1c0ce7a2009-07-01 08:10:49 +00006355 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006356 break;
6357 }
sewardjdb5907d2009-11-26 17:20:21 +00006358
sewardj7cf4e6b2008-05-01 20:24:26 +00006359 case Ist_Put: {
6360 Int b_offset
6361 = MC_(get_otrack_shadow_offset)(
6362 st->Ist.Put.offset,
sewardj1c0ce7a2009-07-01 08:10:49 +00006363 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
sewardj7cf4e6b2008-05-01 20:24:26 +00006364 );
6365 if (b_offset >= 0) {
6366 /* FIXME: this isn't an atom! */
6367 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
6368 schemeE( mce, st->Ist.Put.data )) );
6369 }
6370 break;
6371 }
sewardjdb5907d2009-11-26 17:20:21 +00006372
sewardj7cf4e6b2008-05-01 20:24:26 +00006373 case Ist_WrTmp:
6374 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
6375 schemeE(mce, st->Ist.WrTmp.data) );
6376 break;
sewardjdb5907d2009-11-26 17:20:21 +00006377
sewardj7cf4e6b2008-05-01 20:24:26 +00006378 case Ist_MBE:
6379 case Ist_NoOp:
6380 case Ist_Exit:
6381 case Ist_IMark:
6382 break;
sewardjdb5907d2009-11-26 17:20:21 +00006383
sewardj7cf4e6b2008-05-01 20:24:26 +00006384 default:
6385 VG_(printf)("mc_translate.c: schemeS: unhandled: ");
6386 ppIRStmt(st);
6387 VG_(tool_panic)("memcheck:schemeS");
6388 }
6389}
6390
6391
njn25e49d8e72002-09-23 09:36:25 +00006392/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00006393/*--- end mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00006394/*--------------------------------------------------------------------*/