blob: 6dcced38974a724567a2b3695e72e28f779de222 [file] [log] [blame]
nethercotebb1c9912004-01-04 16:43:23 +00001
njn25e49d8e72002-09-23 09:36:25 +00002/*--------------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003/*--- Instrument IR to perform memory checking operations. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00006
njn25e49d8e72002-09-23 09:36:25 +00007/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of MemCheck, a heavyweight Valgrind tool for
njnc9539842002-10-02 13:26:35 +00009 detecting memory errors.
njn25e49d8e72002-09-23 09:36:25 +000010
sewardj03f8d3f2012-08-05 15:46:46 +000011 Copyright (C) 2000-2012 Julian Seward
njn25e49d8e72002-09-23 09:36:25 +000012 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
njnc7561b92005-06-19 01:24:32 +000032#include "pub_tool_basics.h"
philippe6643e962012-01-17 21:16:30 +000033#include "pub_tool_poolalloc.h" // For mc_include.h
njn1d0825f2006-03-27 11:37:07 +000034#include "pub_tool_hashtable.h" // For mc_include.h
njn132bfcc2005-06-04 19:16:06 +000035#include "pub_tool_libcassert.h"
njn36a20fa2005-06-03 03:08:39 +000036#include "pub_tool_libcprint.h"
njnc7561b92005-06-19 01:24:32 +000037#include "pub_tool_tooliface.h"
sewardj53ee1fc2005-12-23 02:29:58 +000038#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
sewardj81651dc2007-08-28 06:05:20 +000039#include "pub_tool_xarray.h"
40#include "pub_tool_mallocfree.h"
41#include "pub_tool_libcbase.h"
njn25e49d8e72002-09-23 09:36:25 +000042
sewardj7cf4e6b2008-05-01 20:24:26 +000043#include "mc_include.h"
44
45
sewardj7ee7d852011-06-16 11:37:21 +000046/* FIXMEs JRS 2011-June-16.
47
48 Check the interpretation for vector narrowing and widening ops,
49 particularly the saturating ones. I suspect they are either overly
50 pessimistic and/or wrong.
51*/
52
sewardj992dff92005-10-07 11:08:55 +000053/* This file implements the Memcheck instrumentation, and in
54 particular contains the core of its undefined value detection
55 machinery. For a comprehensive background of the terminology,
56 algorithms and rationale used herein, read:
57
58 Using Valgrind to detect undefined value errors with
59 bit-precision
60
61 Julian Seward and Nicholas Nethercote
62
63 2005 USENIX Annual Technical Conference (General Track),
64 Anaheim, CA, USA, April 10-15, 2005.
njn6665ea22007-05-24 23:14:41 +000065
66 ----
67
68 Here is as good a place as any to record exactly when V bits are and
69 should be checked, why, and what function is responsible.
70
71
72 Memcheck complains when an undefined value is used:
73
74 1. In the condition of a conditional branch. Because it could cause
75 incorrect control flow, and thus cause incorrect externally-visible
76 behaviour. [mc_translate.c:complainIfUndefined]
77
78 2. As an argument to a system call, or as the value that specifies
79 the system call number. Because it could cause an incorrect
80 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
81
82 3. As the address in a load or store. Because it could cause an
83 incorrect value to be used later, which could cause externally-visible
84 behaviour (eg. via incorrect control flow or an incorrect system call
85 argument) [complainIfUndefined]
86
87 4. As the target address of a branch. Because it could cause incorrect
88 control flow. [complainIfUndefined]
89
90 5. As an argument to setenv, unsetenv, or putenv. Because it could put
91 an incorrect value into the external environment.
92 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
93
94 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
95 [complainIfUndefined]
96
97 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
98 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
99 requested it. [in memcheck.h]
100
101
102 Memcheck also complains, but should not, when an undefined value is used:
103
104 8. As the shift value in certain SIMD shift operations (but not in the
105 standard integer shift operations). This inconsistency is due to
106 historical reasons.) [complainIfUndefined]
107
108
109 Memcheck does not complain, but should, when an undefined value is used:
110
111 9. As an input to a client request. Because the client request may
112 affect the visible behaviour -- see bug #144362 for an example
113 involving the malloc replacements in vg_replace_malloc.c and
114 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
115 isn't identified. That bug report also has some info on how to solve
116 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
117
118
119 In practice, 1 and 2 account for the vast majority of cases.
sewardj992dff92005-10-07 11:08:55 +0000120*/
121
sewardj95448072004-11-22 20:19:51 +0000122/*------------------------------------------------------------*/
123/*--- Forward decls ---*/
124/*------------------------------------------------------------*/
125
126struct _MCEnv;
127
sewardj7cf4e6b2008-05-01 20:24:26 +0000128static IRType shadowTypeV ( IRType ty );
sewardj95448072004-11-22 20:19:51 +0000129static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
sewardjafa617b2008-07-22 09:59:48 +0000130static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
sewardj95448072004-11-22 20:19:51 +0000131
sewardjb5b87402011-03-07 16:05:35 +0000132static IRExpr *i128_const_zero(void);
sewardj95448072004-11-22 20:19:51 +0000133
134/*------------------------------------------------------------*/
135/*--- Memcheck running state, and tmp management. ---*/
136/*------------------------------------------------------------*/
137
sewardj1c0ce7a2009-07-01 08:10:49 +0000138/* Carries info about a particular tmp. The tmp's number is not
139 recorded, as this is implied by (equal to) its index in the tmpMap
140 in MCEnv. The tmp's type is also not recorded, as this is present
141 in MCEnv.sb->tyenv.
142
143 When .kind is Orig, .shadowV and .shadowB may give the identities
144 of the temps currently holding the associated definedness (shadowV)
145 and origin (shadowB) values, or these may be IRTemp_INVALID if code
146 to compute such values has not yet been emitted.
147
148 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
149 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
150 illogical for a shadow tmp itself to be shadowed.
151*/
152typedef
153 enum { Orig=1, VSh=2, BSh=3 }
154 TempKind;
155
156typedef
157 struct {
158 TempKind kind;
159 IRTemp shadowV;
160 IRTemp shadowB;
161 }
162 TempMapEnt;
163
164
sewardj95448072004-11-22 20:19:51 +0000165/* Carries around state during memcheck instrumentation. */
166typedef
167 struct _MCEnv {
sewardj0b9d74a2006-12-24 02:24:11 +0000168 /* MODIFIED: the superblock being constructed. IRStmts are
169 added. */
sewardj1c0ce7a2009-07-01 08:10:49 +0000170 IRSB* sb;
sewardj7cf4e6b2008-05-01 20:24:26 +0000171 Bool trace;
sewardj95448072004-11-22 20:19:51 +0000172
sewardj1c0ce7a2009-07-01 08:10:49 +0000173 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
174 current kind and possibly shadow temps for each temp in the
175 IRSB being constructed. Note that it does not contain the
176 type of each tmp. If you want to know the type, look at the
177 relevant entry in sb->tyenv. It follows that at all times
178 during the instrumentation process, the valid indices for
179 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
180 total number of Orig, V- and B- temps allocated so far.
181
182 The reason for this strange split (types in one place, all
183 other info in another) is that we need the types to be
184 attached to sb so as to make it possible to do
185 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
186 instrumentation process. */
187 XArray* /* of TempMapEnt */ tmpMap;
sewardj95448072004-11-22 20:19:51 +0000188
sewardjd5204dc2004-12-31 01:16:11 +0000189 /* MODIFIED: indicates whether "bogus" literals have so far been
190 found. Starts off False, and may change to True. */
sewardj54eac252012-03-27 10:19:39 +0000191 Bool bogusLiterals;
192
193 /* READONLY: indicates whether we should use expensive
194 interpretations of integer adds, since unfortunately LLVM
195 uses them to do ORs in some circumstances. Defaulted to True
196 on MacOS and False everywhere else. */
197 Bool useLLVMworkarounds;
sewardjd5204dc2004-12-31 01:16:11 +0000198
sewardj95448072004-11-22 20:19:51 +0000199 /* READONLY: the guest layout. This indicates which parts of
200 the guest state should be regarded as 'always defined'. */
201 VexGuestLayout* layout;
sewardj634ba772006-10-15 12:47:37 +0000202
sewardj95448072004-11-22 20:19:51 +0000203 /* READONLY: the host word type. Needed for constructing
204 arguments of type 'HWord' to be passed to helper functions.
205 Ity_I32 or Ity_I64 only. */
206 IRType hWordTy;
207 }
208 MCEnv;
209
210/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
211 demand), as they are encountered. This is for two reasons.
212
213 (1) (less important reason): Many original tmps are unused due to
214 initial IR optimisation, and we do not want to spaces in tables
215 tracking them.
216
217 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
218 table indexed [0 .. n_types-1], which gives the current shadow for
219 each original tmp, or INVALID_IRTEMP if none is so far assigned.
220 It is necessary to support making multiple assignments to a shadow
221 -- specifically, after testing a shadow for definedness, it needs
222 to be made defined. But IR's SSA property disallows this.
223
224 (2) (more important reason): Therefore, when a shadow needs to get
225 a new value, a new temporary is created, the value is assigned to
226 that, and the tmpMap is updated to reflect the new binding.
227
228 A corollary is that if the tmpMap maps a given tmp to
sewardjf1962d32006-10-19 13:22:16 +0000229 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
sewardj95448072004-11-22 20:19:51 +0000230 there's a read-before-write error in the original tmps. The IR
231 sanity checker should catch all such anomalies, however.
njn25e49d8e72002-09-23 09:36:25 +0000232*/
sewardj95448072004-11-22 20:19:51 +0000233
sewardj1c0ce7a2009-07-01 08:10:49 +0000234/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
235 both the table in mce->sb and to our auxiliary mapping. Note that
236 newTemp may cause mce->tmpMap to resize, hence previous results
237 from VG_(indexXA)(mce->tmpMap) are invalidated. */
238static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
239{
240 Word newIx;
241 TempMapEnt ent;
242 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
243 ent.kind = kind;
244 ent.shadowV = IRTemp_INVALID;
245 ent.shadowB = IRTemp_INVALID;
246 newIx = VG_(addToXA)( mce->tmpMap, &ent );
247 tl_assert(newIx == (Word)tmp);
248 return tmp;
249}
250
251
sewardj95448072004-11-22 20:19:51 +0000252/* Find the tmp currently shadowing the given original tmp. If none
253 so far exists, allocate one. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000254static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000255{
sewardj1c0ce7a2009-07-01 08:10:49 +0000256 TempMapEnt* ent;
257 /* VG_(indexXA) range-checks 'orig', hence no need to check
258 here. */
259 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
260 tl_assert(ent->kind == Orig);
261 if (ent->shadowV == IRTemp_INVALID) {
262 IRTemp tmpV
263 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
264 /* newTemp may cause mce->tmpMap to resize, hence previous results
265 from VG_(indexXA) are invalid. */
266 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
267 tl_assert(ent->kind == Orig);
268 tl_assert(ent->shadowV == IRTemp_INVALID);
269 ent->shadowV = tmpV;
njn25e49d8e72002-09-23 09:36:25 +0000270 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000271 return ent->shadowV;
njn25e49d8e72002-09-23 09:36:25 +0000272}
273
sewardj95448072004-11-22 20:19:51 +0000274/* Allocate a new shadow for the given original tmp. This means any
275 previous shadow is abandoned. This is needed because it is
276 necessary to give a new value to a shadow once it has been tested
277 for undefinedness, but unfortunately IR's SSA property disallows
278 this. Instead we must abandon the old shadow, allocate a new one
sewardj1c0ce7a2009-07-01 08:10:49 +0000279 and use that instead.
280
281 This is the same as findShadowTmpV, except we don't bother to see
282 if a shadow temp already existed -- we simply allocate a new one
283 regardless. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000284static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000285{
sewardj1c0ce7a2009-07-01 08:10:49 +0000286 TempMapEnt* ent;
287 /* VG_(indexXA) range-checks 'orig', hence no need to check
288 here. */
289 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
290 tl_assert(ent->kind == Orig);
291 if (1) {
292 IRTemp tmpV
293 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
294 /* newTemp may cause mce->tmpMap to resize, hence previous results
295 from VG_(indexXA) are invalid. */
296 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
297 tl_assert(ent->kind == Orig);
298 ent->shadowV = tmpV;
299 }
sewardj95448072004-11-22 20:19:51 +0000300}
301
302
303/*------------------------------------------------------------*/
304/*--- IRAtoms -- a subset of IRExprs ---*/
305/*------------------------------------------------------------*/
306
307/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
sewardj710d6c22005-03-20 18:55:15 +0000308 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
sewardj95448072004-11-22 20:19:51 +0000309 input, most of this code deals in atoms. Usefully, a value atom
310 always has a V-value which is also an atom: constants are shadowed
311 by constants, and temps are shadowed by the corresponding shadow
312 temporary. */
313
314typedef IRExpr IRAtom;
315
316/* (used for sanity checks only): is this an atom which looks
317 like it's from original code? */
318static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
319{
320 if (a1->tag == Iex_Const)
321 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000322 if (a1->tag == Iex_RdTmp) {
323 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
324 return ent->kind == Orig;
325 }
sewardj95448072004-11-22 20:19:51 +0000326 return False;
327}
328
329/* (used for sanity checks only): is this an atom which looks
330 like it's from shadow code? */
331static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
332{
333 if (a1->tag == Iex_Const)
334 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000335 if (a1->tag == Iex_RdTmp) {
336 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
337 return ent->kind == VSh || ent->kind == BSh;
338 }
sewardj95448072004-11-22 20:19:51 +0000339 return False;
340}
341
342/* (used for sanity checks only): check that both args are atoms and
343 are identically-kinded. */
344static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
345{
sewardj0b9d74a2006-12-24 02:24:11 +0000346 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +0000347 return True;
sewardjbef552a2005-08-30 12:54:36 +0000348 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
sewardj95448072004-11-22 20:19:51 +0000349 return True;
350 return False;
351}
352
353
354/*------------------------------------------------------------*/
355/*--- Type management ---*/
356/*------------------------------------------------------------*/
357
358/* Shadow state is always accessed using integer types. This returns
359 an integer type with the same size (as per sizeofIRType) as the
360 given type. The only valid shadow types are Bit, I8, I16, I32,
sewardj45fa9f42012-05-21 10:18:10 +0000361 I64, I128, V128, V256. */
sewardj95448072004-11-22 20:19:51 +0000362
sewardj7cf4e6b2008-05-01 20:24:26 +0000363static IRType shadowTypeV ( IRType ty )
sewardj95448072004-11-22 20:19:51 +0000364{
365 switch (ty) {
366 case Ity_I1:
367 case Ity_I8:
368 case Ity_I16:
369 case Ity_I32:
sewardj6cf40ff2005-04-20 22:31:26 +0000370 case Ity_I64:
371 case Ity_I128: return ty;
sewardj3245c912004-12-10 14:58:26 +0000372 case Ity_F32: return Ity_I32;
sewardjb0ccb4d2012-04-02 10:22:05 +0000373 case Ity_D32: return Ity_I32;
sewardj3245c912004-12-10 14:58:26 +0000374 case Ity_F64: return Ity_I64;
sewardjb0ccb4d2012-04-02 10:22:05 +0000375 case Ity_D64: return Ity_I64;
sewardjb5b87402011-03-07 16:05:35 +0000376 case Ity_F128: return Ity_I128;
sewardjb0ccb4d2012-04-02 10:22:05 +0000377 case Ity_D128: return Ity_I128;
sewardj3245c912004-12-10 14:58:26 +0000378 case Ity_V128: return Ity_V128;
sewardj45fa9f42012-05-21 10:18:10 +0000379 case Ity_V256: return Ity_V256;
sewardj95448072004-11-22 20:19:51 +0000380 default: ppIRType(ty);
sewardj7cf4e6b2008-05-01 20:24:26 +0000381 VG_(tool_panic)("memcheck:shadowTypeV");
sewardj95448072004-11-22 20:19:51 +0000382 }
383}
384
385/* Produce a 'defined' value of the given shadow type. Should only be
386 supplied shadow types (Bit/I8/I16/I32/UI64). */
387static IRExpr* definedOfType ( IRType ty ) {
388 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +0000389 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
390 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
391 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
392 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
393 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
sewardjb5b87402011-03-07 16:05:35 +0000394 case Ity_I128: return i128_const_zero();
sewardj170ee212004-12-10 18:57:51 +0000395 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
sewardjf1962d32006-10-19 13:22:16 +0000396 default: VG_(tool_panic)("memcheck:definedOfType");
njn25e49d8e72002-09-23 09:36:25 +0000397 }
398}
399
400
sewardj95448072004-11-22 20:19:51 +0000401/*------------------------------------------------------------*/
402/*--- Constructing IR fragments ---*/
403/*------------------------------------------------------------*/
404
sewardj95448072004-11-22 20:19:51 +0000405/* add stmt to a bb */
sewardj7cf4e6b2008-05-01 20:24:26 +0000406static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
407 if (mce->trace) {
408 VG_(printf)(" %c: ", cat);
409 ppIRStmt(st);
410 VG_(printf)("\n");
411 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000412 addStmtToIRSB(mce->sb, st);
sewardj7cf4e6b2008-05-01 20:24:26 +0000413}
414
415/* assign value to tmp */
416static inline
417void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
sewardj1c0ce7a2009-07-01 08:10:49 +0000418 stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
sewardj7cf4e6b2008-05-01 20:24:26 +0000419}
sewardj95448072004-11-22 20:19:51 +0000420
421/* build various kinds of expressions */
sewardj57f92b02010-08-22 11:54:14 +0000422#define triop(_op, _arg1, _arg2, _arg3) \
423 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
sewardj95448072004-11-22 20:19:51 +0000424#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
425#define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
426#define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
427#define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
428#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
429#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
sewardj170ee212004-12-10 18:57:51 +0000430#define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
sewardj0b9d74a2006-12-24 02:24:11 +0000431#define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
sewardj95448072004-11-22 20:19:51 +0000432
sewardj7cf4e6b2008-05-01 20:24:26 +0000433/* Bind the given expression to a new temporary, and return the
sewardj95448072004-11-22 20:19:51 +0000434 temporary. This effectively converts an arbitrary expression into
sewardj7cf4e6b2008-05-01 20:24:26 +0000435 an atom.
436
437 'ty' is the type of 'e' and hence the type that the new temporary
sewardj1c0ce7a2009-07-01 08:10:49 +0000438 needs to be. But passing it in is redundant, since we can deduce
439 the type merely by inspecting 'e'. So at least use that fact to
440 assert that the two types agree. */
441static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
442{
443 TempKind k;
444 IRTemp t;
445 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
sewardjb0ccb4d2012-04-02 10:22:05 +0000446
sewardj7cf4e6b2008-05-01 20:24:26 +0000447 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
sewardj1c0ce7a2009-07-01 08:10:49 +0000448 switch (cat) {
449 case 'V': k = VSh; break;
450 case 'B': k = BSh; break;
451 case 'C': k = Orig; break;
452 /* happens when we are making up new "orig"
453 expressions, for IRCAS handling */
454 default: tl_assert(0);
455 }
456 t = newTemp(mce, ty, k);
sewardj7cf4e6b2008-05-01 20:24:26 +0000457 assign(cat, mce, t, e);
sewardj95448072004-11-22 20:19:51 +0000458 return mkexpr(t);
459}
460
461
462/*------------------------------------------------------------*/
sewardjb5b87402011-03-07 16:05:35 +0000463/*--- Helper functions for 128-bit ops ---*/
464/*------------------------------------------------------------*/
sewardj45fa9f42012-05-21 10:18:10 +0000465
sewardjb5b87402011-03-07 16:05:35 +0000466static IRExpr *i128_const_zero(void)
467{
sewardj45fa9f42012-05-21 10:18:10 +0000468 IRAtom* z64 = IRExpr_Const(IRConst_U64(0));
469 return binop(Iop_64HLto128, z64, z64);
sewardjb5b87402011-03-07 16:05:35 +0000470}
471
sewardj45fa9f42012-05-21 10:18:10 +0000472/* There are no I128-bit loads and/or stores [as generated by any
473 current front ends]. So we do not need to worry about that in
474 expr2vbits_Load */
475
sewardjb5b87402011-03-07 16:05:35 +0000476
477/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +0000478/*--- Constructing definedness primitive ops ---*/
479/*------------------------------------------------------------*/
480
481/* --------- Defined-if-either-defined --------- */
482
483static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
484 tl_assert(isShadowAtom(mce,a1));
485 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000486 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000487}
488
489static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
490 tl_assert(isShadowAtom(mce,a1));
491 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000492 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000493}
494
495static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
496 tl_assert(isShadowAtom(mce,a1));
497 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000498 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000499}
500
sewardj7010f6e2004-12-10 13:35:22 +0000501static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
502 tl_assert(isShadowAtom(mce,a1));
503 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000504 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
sewardj7010f6e2004-12-10 13:35:22 +0000505}
506
sewardj20d38f22005-02-07 23:50:18 +0000507static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj170ee212004-12-10 18:57:51 +0000508 tl_assert(isShadowAtom(mce,a1));
509 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000510 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
sewardj170ee212004-12-10 18:57:51 +0000511}
512
sewardj350e8f72012-06-25 07:52:15 +0000513static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
514 tl_assert(isShadowAtom(mce,a1));
515 tl_assert(isShadowAtom(mce,a2));
516 return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2));
517}
518
sewardj95448072004-11-22 20:19:51 +0000519/* --------- Undefined-if-either-undefined --------- */
520
521static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
522 tl_assert(isShadowAtom(mce,a1));
523 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000524 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000525}
526
527static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
528 tl_assert(isShadowAtom(mce,a1));
529 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000530 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000531}
532
533static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
534 tl_assert(isShadowAtom(mce,a1));
535 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000536 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000537}
538
539static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
540 tl_assert(isShadowAtom(mce,a1));
541 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000542 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000543}
544
sewardjb5b87402011-03-07 16:05:35 +0000545static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
546 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
547 tl_assert(isShadowAtom(mce,a1));
548 tl_assert(isShadowAtom(mce,a2));
549 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
550 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
551 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
552 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
553 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
554 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
555
556 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
557}
558
sewardj20d38f22005-02-07 23:50:18 +0000559static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj3245c912004-12-10 14:58:26 +0000560 tl_assert(isShadowAtom(mce,a1));
561 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000562 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
sewardj3245c912004-12-10 14:58:26 +0000563}
564
sewardj350e8f72012-06-25 07:52:15 +0000565static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
566 tl_assert(isShadowAtom(mce,a1));
567 tl_assert(isShadowAtom(mce,a2));
568 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2));
569}
570
sewardje50a1b12004-12-17 01:24:54 +0000571static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
sewardj95448072004-11-22 20:19:51 +0000572 switch (vty) {
sewardje50a1b12004-12-17 01:24:54 +0000573 case Ity_I8: return mkUifU8(mce, a1, a2);
sewardja1d93302004-12-12 16:45:06 +0000574 case Ity_I16: return mkUifU16(mce, a1, a2);
575 case Ity_I32: return mkUifU32(mce, a1, a2);
576 case Ity_I64: return mkUifU64(mce, a1, a2);
sewardjb5b87402011-03-07 16:05:35 +0000577 case Ity_I128: return mkUifU128(mce, a1, a2);
sewardj20d38f22005-02-07 23:50:18 +0000578 case Ity_V128: return mkUifUV128(mce, a1, a2);
sewardj95448072004-11-22 20:19:51 +0000579 default:
580 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
581 VG_(tool_panic)("memcheck:mkUifU");
njn25e49d8e72002-09-23 09:36:25 +0000582 }
583}
584
sewardj95448072004-11-22 20:19:51 +0000585/* --------- The Left-family of operations. --------- */
njn25e49d8e72002-09-23 09:36:25 +0000586
sewardj95448072004-11-22 20:19:51 +0000587static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
588 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000589 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
sewardj95448072004-11-22 20:19:51 +0000590}
591
592static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
593 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000594 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
sewardj95448072004-11-22 20:19:51 +0000595}
596
597static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
598 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000599 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
sewardj95448072004-11-22 20:19:51 +0000600}
601
sewardj681be302005-01-15 20:43:58 +0000602static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
603 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000604 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
sewardj681be302005-01-15 20:43:58 +0000605}
606
sewardj95448072004-11-22 20:19:51 +0000607/* --------- 'Improvement' functions for AND/OR. --------- */
608
609/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
610 defined (0); all other -> undefined (1).
611*/
612static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
njn25e49d8e72002-09-23 09:36:25 +0000613{
sewardj95448072004-11-22 20:19:51 +0000614 tl_assert(isOriginalAtom(mce, data));
615 tl_assert(isShadowAtom(mce, vbits));
616 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000617 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000618}
njn25e49d8e72002-09-23 09:36:25 +0000619
sewardj95448072004-11-22 20:19:51 +0000620static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
621{
622 tl_assert(isOriginalAtom(mce, data));
623 tl_assert(isShadowAtom(mce, vbits));
624 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000625 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000626}
njn25e49d8e72002-09-23 09:36:25 +0000627
sewardj95448072004-11-22 20:19:51 +0000628static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
629{
630 tl_assert(isOriginalAtom(mce, data));
631 tl_assert(isShadowAtom(mce, vbits));
632 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000633 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000634}
njn25e49d8e72002-09-23 09:36:25 +0000635
sewardj7010f6e2004-12-10 13:35:22 +0000636static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
637{
638 tl_assert(isOriginalAtom(mce, data));
639 tl_assert(isShadowAtom(mce, vbits));
640 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000641 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
sewardj7010f6e2004-12-10 13:35:22 +0000642}
643
sewardj20d38f22005-02-07 23:50:18 +0000644static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000645{
646 tl_assert(isOriginalAtom(mce, data));
647 tl_assert(isShadowAtom(mce, vbits));
648 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000649 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
sewardj170ee212004-12-10 18:57:51 +0000650}
651
sewardj350e8f72012-06-25 07:52:15 +0000652static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
653{
654 tl_assert(isOriginalAtom(mce, data));
655 tl_assert(isShadowAtom(mce, vbits));
656 tl_assert(sameKindedAtoms(data, vbits));
657 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits));
658}
659
sewardj95448072004-11-22 20:19:51 +0000660/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
661 defined (0); all other -> undefined (1).
662*/
663static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
664{
665 tl_assert(isOriginalAtom(mce, data));
666 tl_assert(isShadowAtom(mce, vbits));
667 tl_assert(sameKindedAtoms(data, vbits));
668 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000669 'V', mce, Ity_I8,
sewardj95448072004-11-22 20:19:51 +0000670 binop(Iop_Or8,
sewardj7cf4e6b2008-05-01 20:24:26 +0000671 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
sewardj95448072004-11-22 20:19:51 +0000672 vbits) );
673}
njn25e49d8e72002-09-23 09:36:25 +0000674
sewardj95448072004-11-22 20:19:51 +0000675static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
676{
677 tl_assert(isOriginalAtom(mce, data));
678 tl_assert(isShadowAtom(mce, vbits));
679 tl_assert(sameKindedAtoms(data, vbits));
680 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000681 'V', mce, Ity_I16,
sewardj95448072004-11-22 20:19:51 +0000682 binop(Iop_Or16,
sewardj7cf4e6b2008-05-01 20:24:26 +0000683 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
sewardj95448072004-11-22 20:19:51 +0000684 vbits) );
685}
njn25e49d8e72002-09-23 09:36:25 +0000686
sewardj95448072004-11-22 20:19:51 +0000687static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
688{
689 tl_assert(isOriginalAtom(mce, data));
690 tl_assert(isShadowAtom(mce, vbits));
691 tl_assert(sameKindedAtoms(data, vbits));
692 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000693 'V', mce, Ity_I32,
sewardj95448072004-11-22 20:19:51 +0000694 binop(Iop_Or32,
sewardj7cf4e6b2008-05-01 20:24:26 +0000695 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
sewardj95448072004-11-22 20:19:51 +0000696 vbits) );
697}
698
sewardj7010f6e2004-12-10 13:35:22 +0000699static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
700{
701 tl_assert(isOriginalAtom(mce, data));
702 tl_assert(isShadowAtom(mce, vbits));
703 tl_assert(sameKindedAtoms(data, vbits));
704 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000705 'V', mce, Ity_I64,
sewardj7010f6e2004-12-10 13:35:22 +0000706 binop(Iop_Or64,
sewardj7cf4e6b2008-05-01 20:24:26 +0000707 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
sewardj7010f6e2004-12-10 13:35:22 +0000708 vbits) );
709}
710
sewardj20d38f22005-02-07 23:50:18 +0000711static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000712{
713 tl_assert(isOriginalAtom(mce, data));
714 tl_assert(isShadowAtom(mce, vbits));
715 tl_assert(sameKindedAtoms(data, vbits));
716 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000717 'V', mce, Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +0000718 binop(Iop_OrV128,
sewardj7cf4e6b2008-05-01 20:24:26 +0000719 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
sewardj170ee212004-12-10 18:57:51 +0000720 vbits) );
721}
722
sewardj350e8f72012-06-25 07:52:15 +0000723static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
724{
725 tl_assert(isOriginalAtom(mce, data));
726 tl_assert(isShadowAtom(mce, vbits));
727 tl_assert(sameKindedAtoms(data, vbits));
728 return assignNew(
729 'V', mce, Ity_V256,
730 binop(Iop_OrV256,
731 assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)),
732 vbits) );
733}
734
sewardj95448072004-11-22 20:19:51 +0000735/* --------- Pessimising casts. --------- */
736
sewardjb5b87402011-03-07 16:05:35 +0000737/* The function returns an expression of type DST_TY. If any of the VBITS
738 is undefined (value == 1) the resulting expression has all bits set to
739 1. Otherwise, all bits are 0. */
740
sewardj95448072004-11-22 20:19:51 +0000741static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
742{
sewardj4cc684b2007-08-25 23:09:36 +0000743 IRType src_ty;
sewardj7cf97ee2004-11-28 14:25:01 +0000744 IRAtom* tmp1;
sewardj2eecb742012-06-01 16:11:41 +0000745
sewardj95448072004-11-22 20:19:51 +0000746 /* Note, dst_ty is a shadow type, not an original type. */
sewardj95448072004-11-22 20:19:51 +0000747 tl_assert(isShadowAtom(mce,vbits));
sewardj1c0ce7a2009-07-01 08:10:49 +0000748 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
sewardj4cc684b2007-08-25 23:09:36 +0000749
750 /* Fast-track some common cases */
751 if (src_ty == Ity_I32 && dst_ty == Ity_I32)
sewardj7cf4e6b2008-05-01 20:24:26 +0000752 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000753
754 if (src_ty == Ity_I64 && dst_ty == Ity_I64)
sewardj7cf4e6b2008-05-01 20:24:26 +0000755 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000756
757 if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
sewardj2eecb742012-06-01 16:11:41 +0000758 /* PCast the arg, then clone it. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000759 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
760 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
sewardj4cc684b2007-08-25 23:09:36 +0000761 }
762
sewardj2eecb742012-06-01 16:11:41 +0000763 if (src_ty == Ity_I64 && dst_ty == Ity_I32) {
764 /* PCast the arg. This gives all 0s or all 1s. Then throw away
765 the top half. */
766 IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
767 return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp));
768 }
769
sewardj4cc684b2007-08-25 23:09:36 +0000770 /* Else do it the slow way .. */
sewardj2eecb742012-06-01 16:11:41 +0000771 /* First of all, collapse vbits down to a single bit. */
sewardj4cc684b2007-08-25 23:09:36 +0000772 tmp1 = NULL;
773 switch (src_ty) {
sewardj95448072004-11-22 20:19:51 +0000774 case Ity_I1:
775 tmp1 = vbits;
njn25e49d8e72002-09-23 09:36:25 +0000776 break;
sewardj95448072004-11-22 20:19:51 +0000777 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000778 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
sewardj95448072004-11-22 20:19:51 +0000779 break;
780 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000781 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
sewardj95448072004-11-22 20:19:51 +0000782 break;
783 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000784 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
sewardj95448072004-11-22 20:19:51 +0000785 break;
786 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000787 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
sewardj95448072004-11-22 20:19:51 +0000788 break;
sewardj69a13322005-04-23 01:14:51 +0000789 case Ity_I128: {
790 /* Gah. Chop it in half, OR the halves together, and compare
791 that with zero. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000792 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
793 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
794 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
795 tmp1 = assignNew('V', mce, Ity_I1,
sewardj37c31cc2005-04-26 23:49:24 +0000796 unop(Iop_CmpNEZ64, tmp4));
sewardj69a13322005-04-23 01:14:51 +0000797 break;
798 }
sewardj95448072004-11-22 20:19:51 +0000799 default:
sewardj4cc684b2007-08-25 23:09:36 +0000800 ppIRType(src_ty);
sewardj95448072004-11-22 20:19:51 +0000801 VG_(tool_panic)("mkPCastTo(1)");
802 }
803 tl_assert(tmp1);
804 /* Now widen up to the dst type. */
805 switch (dst_ty) {
806 case Ity_I1:
807 return tmp1;
808 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000809 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
sewardj95448072004-11-22 20:19:51 +0000810 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000811 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
sewardj95448072004-11-22 20:19:51 +0000812 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000813 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
sewardj95448072004-11-22 20:19:51 +0000814 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000815 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000816 case Ity_V128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000817 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
818 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000819 return tmp1;
sewardj69a13322005-04-23 01:14:51 +0000820 case Ity_I128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000821 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
822 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
sewardj69a13322005-04-23 01:14:51 +0000823 return tmp1;
sewardj95448072004-11-22 20:19:51 +0000824 default:
825 ppIRType(dst_ty);
826 VG_(tool_panic)("mkPCastTo(2)");
827 }
828}
829
sewardjd5204dc2004-12-31 01:16:11 +0000830/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
831/*
832 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
833 PCasting to Ity_U1. However, sometimes it is necessary to be more
834 accurate. The insight is that the result is defined if two
835 corresponding bits can be found, one from each argument, so that
836 both bits are defined but are different -- that makes EQ say "No"
837 and NE say "Yes". Hence, we compute an improvement term and DifD
838 it onto the "normal" (UifU) result.
839
840 The result is:
841
842 PCastTo<1> (
sewardje6f8af42005-07-06 18:48:59 +0000843 -- naive version
844 PCastTo<sz>( UifU<sz>(vxx, vyy) )
845
sewardjd5204dc2004-12-31 01:16:11 +0000846 `DifD<sz>`
sewardje6f8af42005-07-06 18:48:59 +0000847
848 -- improvement term
849 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
sewardjd5204dc2004-12-31 01:16:11 +0000850 )
sewardje6f8af42005-07-06 18:48:59 +0000851
sewardjd5204dc2004-12-31 01:16:11 +0000852 where
853 vec contains 0 (defined) bits where the corresponding arg bits
sewardje6f8af42005-07-06 18:48:59 +0000854 are defined but different, and 1 bits otherwise.
sewardjd5204dc2004-12-31 01:16:11 +0000855
sewardje6f8af42005-07-06 18:48:59 +0000856 vec = Or<sz>( vxx, // 0 iff bit defined
857 vyy, // 0 iff bit defined
858 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
859 )
860
861 If any bit of vec is 0, the result is defined and so the
862 improvement term should produce 0...0, else it should produce
863 1...1.
864
865 Hence require for the improvement term:
866
867 if vec == 1...1 then 1...1 else 0...0
868 ->
869 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
870
871 This was extensively re-analysed and checked on 6 July 05.
sewardjd5204dc2004-12-31 01:16:11 +0000872*/
873static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
874 IRType ty,
875 IRAtom* vxx, IRAtom* vyy,
876 IRAtom* xx, IRAtom* yy )
877{
sewardje6f8af42005-07-06 18:48:59 +0000878 IRAtom *naive, *vec, *improvement_term;
879 IRAtom *improved, *final_cast, *top;
880 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
sewardjd5204dc2004-12-31 01:16:11 +0000881
882 tl_assert(isShadowAtom(mce,vxx));
883 tl_assert(isShadowAtom(mce,vyy));
884 tl_assert(isOriginalAtom(mce,xx));
885 tl_assert(isOriginalAtom(mce,yy));
886 tl_assert(sameKindedAtoms(vxx,xx));
887 tl_assert(sameKindedAtoms(vyy,yy));
888
889 switch (ty) {
890 case Ity_I32:
sewardje6f8af42005-07-06 18:48:59 +0000891 opOR = Iop_Or32;
sewardjd5204dc2004-12-31 01:16:11 +0000892 opDIFD = Iop_And32;
893 opUIFU = Iop_Or32;
894 opNOT = Iop_Not32;
895 opXOR = Iop_Xor32;
896 opCMP = Iop_CmpEQ32;
897 top = mkU32(0xFFFFFFFF);
898 break;
tomcd986332005-04-26 07:44:48 +0000899 case Ity_I64:
sewardje6f8af42005-07-06 18:48:59 +0000900 opOR = Iop_Or64;
tomcd986332005-04-26 07:44:48 +0000901 opDIFD = Iop_And64;
902 opUIFU = Iop_Or64;
903 opNOT = Iop_Not64;
904 opXOR = Iop_Xor64;
905 opCMP = Iop_CmpEQ64;
sewardj37c31cc2005-04-26 23:49:24 +0000906 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
tomcd986332005-04-26 07:44:48 +0000907 break;
sewardjd5204dc2004-12-31 01:16:11 +0000908 default:
909 VG_(tool_panic)("expensiveCmpEQorNE");
910 }
911
912 naive
sewardj7cf4e6b2008-05-01 20:24:26 +0000913 = mkPCastTo(mce,ty,
914 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
sewardjd5204dc2004-12-31 01:16:11 +0000915
916 vec
917 = assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000918 'V', mce,ty,
sewardje6f8af42005-07-06 18:48:59 +0000919 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +0000920 assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
sewardjd5204dc2004-12-31 01:16:11 +0000921 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000922 'V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +0000923 unop( opNOT,
sewardj7cf4e6b2008-05-01 20:24:26 +0000924 assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
sewardjd5204dc2004-12-31 01:16:11 +0000925
sewardje6f8af42005-07-06 18:48:59 +0000926 improvement_term
sewardj7cf4e6b2008-05-01 20:24:26 +0000927 = mkPCastTo( mce,ty,
928 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
sewardjd5204dc2004-12-31 01:16:11 +0000929
930 improved
sewardj7cf4e6b2008-05-01 20:24:26 +0000931 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
sewardjd5204dc2004-12-31 01:16:11 +0000932
933 final_cast
934 = mkPCastTo( mce, Ity_I1, improved );
935
936 return final_cast;
937}
938
sewardj95448072004-11-22 20:19:51 +0000939
sewardj992dff92005-10-07 11:08:55 +0000940/* --------- Semi-accurate interpretation of CmpORD. --------- */
941
942/* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
943
944 CmpORD32S(x,y) = 1<<3 if x <s y
945 = 1<<2 if x >s y
946 = 1<<1 if x == y
947
948 and similarly the unsigned variant. The default interpretation is:
949
950 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
sewardj1bc82102005-12-23 00:16:24 +0000951 & (7<<1)
sewardj992dff92005-10-07 11:08:55 +0000952
953 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
954 are zero and therefore defined (viz, zero).
sewardja9e62a92005-10-07 12:13:21 +0000955
956 Also deal with a special case better:
957
958 CmpORD32S(x,0)
959
960 Here, bit 3 (LT) of the result is a copy of the top bit of x and
961 will be defined even if the rest of x isn't. In which case we do:
962
963 CmpORD32S#(x,x#,0,{impliedly 0}#)
sewardj1bc82102005-12-23 00:16:24 +0000964 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
965 | (x# >>u 31) << 3 -- LT# = x#[31]
sewardja9e62a92005-10-07 12:13:21 +0000966
sewardj1bc82102005-12-23 00:16:24 +0000967 Analogous handling for CmpORD64{S,U}.
sewardj992dff92005-10-07 11:08:55 +0000968*/
sewardja9e62a92005-10-07 12:13:21 +0000969static Bool isZeroU32 ( IRAtom* e )
970{
971 return
972 toBool( e->tag == Iex_Const
973 && e->Iex.Const.con->tag == Ico_U32
974 && e->Iex.Const.con->Ico.U32 == 0 );
975}
976
sewardj1bc82102005-12-23 00:16:24 +0000977static Bool isZeroU64 ( IRAtom* e )
sewardj992dff92005-10-07 11:08:55 +0000978{
sewardj1bc82102005-12-23 00:16:24 +0000979 return
980 toBool( e->tag == Iex_Const
981 && e->Iex.Const.con->tag == Ico_U64
982 && e->Iex.Const.con->Ico.U64 == 0 );
983}
984
985static IRAtom* doCmpORD ( MCEnv* mce,
986 IROp cmp_op,
987 IRAtom* xxhash, IRAtom* yyhash,
988 IRAtom* xx, IRAtom* yy )
989{
990 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
991 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
992 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
993 IROp opAND = m64 ? Iop_And64 : Iop_And32;
994 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
995 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
996 IRType ty = m64 ? Ity_I64 : Ity_I32;
997 Int width = m64 ? 64 : 32;
998
999 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
1000
1001 IRAtom* threeLeft1 = NULL;
1002 IRAtom* sevenLeft1 = NULL;
1003
sewardj992dff92005-10-07 11:08:55 +00001004 tl_assert(isShadowAtom(mce,xxhash));
1005 tl_assert(isShadowAtom(mce,yyhash));
1006 tl_assert(isOriginalAtom(mce,xx));
1007 tl_assert(isOriginalAtom(mce,yy));
1008 tl_assert(sameKindedAtoms(xxhash,xx));
1009 tl_assert(sameKindedAtoms(yyhash,yy));
sewardj1bc82102005-12-23 00:16:24 +00001010 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
1011 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
sewardj992dff92005-10-07 11:08:55 +00001012
sewardja9e62a92005-10-07 12:13:21 +00001013 if (0) {
1014 ppIROp(cmp_op); VG_(printf)(" ");
1015 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
1016 }
1017
sewardj1bc82102005-12-23 00:16:24 +00001018 if (syned && isZero(yy)) {
sewardja9e62a92005-10-07 12:13:21 +00001019 /* fancy interpretation */
1020 /* if yy is zero, then it must be fully defined (zero#). */
sewardj1bc82102005-12-23 00:16:24 +00001021 tl_assert(isZero(yyhash));
1022 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
sewardja9e62a92005-10-07 12:13:21 +00001023 return
1024 binop(
sewardj1bc82102005-12-23 00:16:24 +00001025 opOR,
sewardja9e62a92005-10-07 12:13:21 +00001026 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001027 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +00001028 binop(
sewardj1bc82102005-12-23 00:16:24 +00001029 opAND,
1030 mkPCastTo(mce,ty, xxhash),
1031 threeLeft1
sewardja9e62a92005-10-07 12:13:21 +00001032 )),
1033 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001034 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +00001035 binop(
sewardj1bc82102005-12-23 00:16:24 +00001036 opSHL,
sewardja9e62a92005-10-07 12:13:21 +00001037 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001038 'V', mce,ty,
sewardj1bc82102005-12-23 00:16:24 +00001039 binop(opSHR, xxhash, mkU8(width-1))),
sewardja9e62a92005-10-07 12:13:21 +00001040 mkU8(3)
1041 ))
1042 );
1043 } else {
1044 /* standard interpretation */
sewardj1bc82102005-12-23 00:16:24 +00001045 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
sewardja9e62a92005-10-07 12:13:21 +00001046 return
1047 binop(
sewardj1bc82102005-12-23 00:16:24 +00001048 opAND,
1049 mkPCastTo( mce,ty,
1050 mkUifU(mce,ty, xxhash,yyhash)),
1051 sevenLeft1
sewardja9e62a92005-10-07 12:13:21 +00001052 );
1053 }
sewardj992dff92005-10-07 11:08:55 +00001054}
1055
1056
sewardj95448072004-11-22 20:19:51 +00001057/*------------------------------------------------------------*/
1058/*--- Emit a test and complaint if something is undefined. ---*/
1059/*------------------------------------------------------------*/
1060
sewardj7cf4e6b2008-05-01 20:24:26 +00001061static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1062
1063
sewardj95448072004-11-22 20:19:51 +00001064/* Set the annotations on a dirty helper to indicate that the stack
1065 pointer and instruction pointers might be read. This is the
1066 behaviour of all 'emit-a-complaint' style functions we might
1067 call. */
1068
1069static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1070 di->nFxState = 2;
sewardj2eecb742012-06-01 16:11:41 +00001071 di->fxState[0].fx = Ifx_Read;
1072 di->fxState[0].offset = mce->layout->offset_SP;
1073 di->fxState[0].size = mce->layout->sizeof_SP;
1074 di->fxState[0].nRepeats = 0;
1075 di->fxState[0].repeatLen = 0;
1076 di->fxState[1].fx = Ifx_Read;
1077 di->fxState[1].offset = mce->layout->offset_IP;
1078 di->fxState[1].size = mce->layout->sizeof_IP;
1079 di->fxState[1].nRepeats = 0;
1080 di->fxState[1].repeatLen = 0;
sewardj95448072004-11-22 20:19:51 +00001081}
1082
1083
1084/* Check the supplied **original** atom for undefinedness, and emit a
1085 complaint if so. Once that happens, mark it as defined. This is
1086 possible because the atom is either a tmp or literal. If it's a
1087 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1088 be defined. In fact as mentioned above, we will have to allocate a
1089 new tmp to carry the new 'defined' shadow value, and update the
1090 original->tmp mapping accordingly; we cannot simply assign a new
1091 value to an existing shadow tmp as this breaks SSAness -- resulting
1092 in the post-instrumentation sanity checker spluttering in disapproval.
1093*/
florian434ffae2012-07-19 17:23:42 +00001094static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard )
sewardj95448072004-11-22 20:19:51 +00001095{
sewardj7cf97ee2004-11-28 14:25:01 +00001096 IRAtom* vatom;
1097 IRType ty;
1098 Int sz;
1099 IRDirty* di;
1100 IRAtom* cond;
sewardj7cf4e6b2008-05-01 20:24:26 +00001101 IRAtom* origin;
1102 void* fn;
1103 HChar* nm;
1104 IRExpr** args;
1105 Int nargs;
sewardj7cf97ee2004-11-28 14:25:01 +00001106
njn1d0825f2006-03-27 11:37:07 +00001107 // Don't do V bit tests if we're not reporting undefined value errors.
sewardj7cf4e6b2008-05-01 20:24:26 +00001108 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001109 return;
1110
sewardj95448072004-11-22 20:19:51 +00001111 /* Since the original expression is atomic, there's no duplicated
1112 work generated by making multiple V-expressions for it. So we
1113 don't really care about the possibility that someone else may
1114 also create a V-interpretion for it. */
1115 tl_assert(isOriginalAtom(mce, atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001116 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001117 tl_assert(isShadowAtom(mce, vatom));
1118 tl_assert(sameKindedAtoms(atom, vatom));
1119
sewardj1c0ce7a2009-07-01 08:10:49 +00001120 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001121
1122 /* sz is only used for constructing the error message */
sewardj7cf97ee2004-11-28 14:25:01 +00001123 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001124
sewardj7cf97ee2004-11-28 14:25:01 +00001125 cond = mkPCastTo( mce, Ity_I1, vatom );
sewardj95448072004-11-22 20:19:51 +00001126 /* cond will be 0 if all defined, and 1 if any not defined. */
1127
sewardj7cf4e6b2008-05-01 20:24:26 +00001128 /* Get the origin info for the value we are about to check. At
1129 least, if we are doing origin tracking. If not, use a dummy
1130 zero origin. */
1131 if (MC_(clo_mc_level) == 3) {
1132 origin = schemeE( mce, atom );
1133 if (mce->hWordTy == Ity_I64) {
1134 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1135 }
1136 } else {
1137 origin = NULL;
1138 }
1139
1140 fn = NULL;
1141 nm = NULL;
1142 args = NULL;
1143 nargs = -1;
1144
sewardj95448072004-11-22 20:19:51 +00001145 switch (sz) {
1146 case 0:
sewardj7cf4e6b2008-05-01 20:24:26 +00001147 if (origin) {
1148 fn = &MC_(helperc_value_check0_fail_w_o);
1149 nm = "MC_(helperc_value_check0_fail_w_o)";
1150 args = mkIRExprVec_1(origin);
1151 nargs = 1;
1152 } else {
1153 fn = &MC_(helperc_value_check0_fail_no_o);
1154 nm = "MC_(helperc_value_check0_fail_no_o)";
1155 args = mkIRExprVec_0();
1156 nargs = 0;
1157 }
sewardj95448072004-11-22 20:19:51 +00001158 break;
1159 case 1:
sewardj7cf4e6b2008-05-01 20:24:26 +00001160 if (origin) {
1161 fn = &MC_(helperc_value_check1_fail_w_o);
1162 nm = "MC_(helperc_value_check1_fail_w_o)";
1163 args = mkIRExprVec_1(origin);
1164 nargs = 1;
1165 } else {
1166 fn = &MC_(helperc_value_check1_fail_no_o);
1167 nm = "MC_(helperc_value_check1_fail_no_o)";
1168 args = mkIRExprVec_0();
1169 nargs = 0;
1170 }
sewardj95448072004-11-22 20:19:51 +00001171 break;
1172 case 4:
sewardj7cf4e6b2008-05-01 20:24:26 +00001173 if (origin) {
1174 fn = &MC_(helperc_value_check4_fail_w_o);
1175 nm = "MC_(helperc_value_check4_fail_w_o)";
1176 args = mkIRExprVec_1(origin);
1177 nargs = 1;
1178 } else {
1179 fn = &MC_(helperc_value_check4_fail_no_o);
1180 nm = "MC_(helperc_value_check4_fail_no_o)";
1181 args = mkIRExprVec_0();
1182 nargs = 0;
1183 }
sewardj95448072004-11-22 20:19:51 +00001184 break;
sewardj11bcc4e2005-04-23 22:38:38 +00001185 case 8:
sewardj7cf4e6b2008-05-01 20:24:26 +00001186 if (origin) {
1187 fn = &MC_(helperc_value_check8_fail_w_o);
1188 nm = "MC_(helperc_value_check8_fail_w_o)";
1189 args = mkIRExprVec_1(origin);
1190 nargs = 1;
1191 } else {
1192 fn = &MC_(helperc_value_check8_fail_no_o);
1193 nm = "MC_(helperc_value_check8_fail_no_o)";
1194 args = mkIRExprVec_0();
1195 nargs = 0;
1196 }
sewardj11bcc4e2005-04-23 22:38:38 +00001197 break;
njn4c245e52009-03-15 23:25:38 +00001198 case 2:
1199 case 16:
sewardj7cf4e6b2008-05-01 20:24:26 +00001200 if (origin) {
1201 fn = &MC_(helperc_value_checkN_fail_w_o);
1202 nm = "MC_(helperc_value_checkN_fail_w_o)";
1203 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1204 nargs = 2;
1205 } else {
1206 fn = &MC_(helperc_value_checkN_fail_no_o);
1207 nm = "MC_(helperc_value_checkN_fail_no_o)";
1208 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1209 nargs = 1;
1210 }
sewardj95448072004-11-22 20:19:51 +00001211 break;
njn4c245e52009-03-15 23:25:38 +00001212 default:
1213 VG_(tool_panic)("unexpected szB");
sewardj95448072004-11-22 20:19:51 +00001214 }
sewardj7cf4e6b2008-05-01 20:24:26 +00001215
1216 tl_assert(fn);
1217 tl_assert(nm);
1218 tl_assert(args);
1219 tl_assert(nargs >= 0 && nargs <= 2);
1220 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1221 || (MC_(clo_mc_level) == 2 && origin == NULL) );
1222
1223 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1224 VG_(fnptr_to_fnentry)( fn ), args );
sewardj95448072004-11-22 20:19:51 +00001225 di->guard = cond;
florian434ffae2012-07-19 17:23:42 +00001226
1227 /* If the complaint is to be issued under a guard condition, AND that
1228 guard condition. */
1229 if (guard) {
1230 IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard));
1231 IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard));
1232 IRAtom *e = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2));
1233
1234 di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e));
1235 }
1236
sewardj95448072004-11-22 20:19:51 +00001237 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00001238 stmt( 'V', mce, IRStmt_Dirty(di));
sewardj95448072004-11-22 20:19:51 +00001239
1240 /* Set the shadow tmp to be defined. First, update the
1241 orig->shadow tmp mapping to reflect the fact that this shadow is
1242 getting a new value. */
sewardj710d6c22005-03-20 18:55:15 +00001243 tl_assert(isIRAtom(vatom));
sewardj95448072004-11-22 20:19:51 +00001244 /* sameKindedAtoms ... */
sewardj0b9d74a2006-12-24 02:24:11 +00001245 if (vatom->tag == Iex_RdTmp) {
1246 tl_assert(atom->tag == Iex_RdTmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00001247 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1248 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1249 definedOfType(ty));
sewardj95448072004-11-22 20:19:51 +00001250 }
1251}
1252
1253
1254/*------------------------------------------------------------*/
1255/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1256/*------------------------------------------------------------*/
1257
1258/* Examine the always-defined sections declared in layout to see if
1259 the (offset,size) section is within one. Note, is is an error to
1260 partially fall into such a region: (offset,size) should either be
1261 completely in such a region or completely not-in such a region.
1262*/
1263static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1264{
1265 Int minoffD, maxoffD, i;
1266 Int minoff = offset;
1267 Int maxoff = minoff + size - 1;
1268 tl_assert((minoff & ~0xFFFF) == 0);
1269 tl_assert((maxoff & ~0xFFFF) == 0);
1270
1271 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1272 minoffD = mce->layout->alwaysDefd[i].offset;
1273 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1274 tl_assert((minoffD & ~0xFFFF) == 0);
1275 tl_assert((maxoffD & ~0xFFFF) == 0);
1276
1277 if (maxoff < minoffD || maxoffD < minoff)
1278 continue; /* no overlap */
1279 if (minoff >= minoffD && maxoff <= maxoffD)
1280 return True; /* completely contained in an always-defd section */
1281
1282 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1283 }
1284 return False; /* could not find any containing section */
1285}
1286
1287
1288/* Generate into bb suitable actions to shadow this Put. If the state
1289 slice is marked 'always defined', do nothing. Otherwise, write the
1290 supplied V bits to the shadow state. We can pass in either an
1291 original atom or a V-atom, but not both. In the former case the
1292 relevant V-bits are then generated from the original.
florian434ffae2012-07-19 17:23:42 +00001293 We assume here, that the definedness of GUARD has already been checked.
sewardj95448072004-11-22 20:19:51 +00001294*/
1295static
1296void do_shadow_PUT ( MCEnv* mce, Int offset,
florian434ffae2012-07-19 17:23:42 +00001297 IRAtom* atom, IRAtom* vatom, IRExpr *guard )
sewardj95448072004-11-22 20:19:51 +00001298{
sewardj7cf97ee2004-11-28 14:25:01 +00001299 IRType ty;
njn1d0825f2006-03-27 11:37:07 +00001300
1301 // Don't do shadow PUTs if we're not doing undefined value checking.
1302 // Their absence lets Vex's optimiser remove all the shadow computation
1303 // that they depend on, which includes GETs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001304 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001305 return;
1306
sewardj95448072004-11-22 20:19:51 +00001307 if (atom) {
1308 tl_assert(!vatom);
1309 tl_assert(isOriginalAtom(mce, atom));
1310 vatom = expr2vbits( mce, atom );
1311 } else {
1312 tl_assert(vatom);
1313 tl_assert(isShadowAtom(mce, vatom));
1314 }
1315
sewardj1c0ce7a2009-07-01 08:10:49 +00001316 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001317 tl_assert(ty != Ity_I1);
sewardjb5b87402011-03-07 16:05:35 +00001318 tl_assert(ty != Ity_I128);
sewardj95448072004-11-22 20:19:51 +00001319 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1320 /* later: no ... */
1321 /* emit code to emit a complaint if any of the vbits are 1. */
1322 /* complainIfUndefined(mce, atom); */
1323 } else {
1324 /* Do a plain shadow Put. */
florian434ffae2012-07-19 17:23:42 +00001325 if (guard) {
1326 /* If the guard expression evaluates to false we simply Put the value
1327 that is already stored in the guest state slot */
1328 IRAtom *cond, *iffalse;
1329
1330 cond = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard));
1331 iffalse = assignNew('V', mce, ty,
1332 IRExpr_Get(offset + mce->layout->total_sizeB, ty));
1333 vatom = assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, vatom));
1334 }
1335 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ));
sewardj95448072004-11-22 20:19:51 +00001336 }
1337}
1338
1339
1340/* Return an expression which contains the V bits corresponding to the
1341 given GETI (passed in in pieces).
1342*/
1343static
floriand39b0222012-05-31 15:48:13 +00001344void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti)
sewardj95448072004-11-22 20:19:51 +00001345{
sewardj7cf97ee2004-11-28 14:25:01 +00001346 IRAtom* vatom;
1347 IRType ty, tyS;
1348 Int arrSize;;
floriand39b0222012-05-31 15:48:13 +00001349 IRRegArray* descr = puti->descr;
1350 IRAtom* ix = puti->ix;
1351 Int bias = puti->bias;
1352 IRAtom* atom = puti->data;
sewardj7cf97ee2004-11-28 14:25:01 +00001353
njn1d0825f2006-03-27 11:37:07 +00001354 // Don't do shadow PUTIs if we're not doing undefined value checking.
1355 // Their absence lets Vex's optimiser remove all the shadow computation
1356 // that they depend on, which includes GETIs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001357 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001358 return;
1359
sewardj95448072004-11-22 20:19:51 +00001360 tl_assert(isOriginalAtom(mce,atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001361 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001362 tl_assert(sameKindedAtoms(atom, vatom));
sewardj7cf97ee2004-11-28 14:25:01 +00001363 ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001364 tyS = shadowTypeV(ty);
sewardj7cf97ee2004-11-28 14:25:01 +00001365 arrSize = descr->nElems * sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001366 tl_assert(ty != Ity_I1);
1367 tl_assert(isOriginalAtom(mce,ix));
florian434ffae2012-07-19 17:23:42 +00001368 complainIfUndefined(mce, ix, NULL);
sewardj95448072004-11-22 20:19:51 +00001369 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1370 /* later: no ... */
1371 /* emit code to emit a complaint if any of the vbits are 1. */
1372 /* complainIfUndefined(mce, atom); */
1373 } else {
1374 /* Do a cloned version of the Put that refers to the shadow
1375 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001376 IRRegArray* new_descr
1377 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1378 tyS, descr->nElems);
floriand39b0222012-05-31 15:48:13 +00001379 stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) ));
sewardj95448072004-11-22 20:19:51 +00001380 }
1381}
1382
1383
1384/* Return an expression which contains the V bits corresponding to the
1385 given GET (passed in in pieces).
1386*/
1387static
1388IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1389{
sewardj7cf4e6b2008-05-01 20:24:26 +00001390 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001391 tl_assert(ty != Ity_I1);
sewardjb5b87402011-03-07 16:05:35 +00001392 tl_assert(ty != Ity_I128);
sewardj95448072004-11-22 20:19:51 +00001393 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1394 /* Always defined, return all zeroes of the relevant type */
1395 return definedOfType(tyS);
1396 } else {
1397 /* return a cloned version of the Get that refers to the shadow
1398 area. */
sewardj7cf4e6b2008-05-01 20:24:26 +00001399 /* FIXME: this isn't an atom! */
sewardj95448072004-11-22 20:19:51 +00001400 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1401 }
1402}
1403
1404
1405/* Return an expression which contains the V bits corresponding to the
1406 given GETI (passed in in pieces).
1407*/
1408static
sewardj0b9d74a2006-12-24 02:24:11 +00001409IRExpr* shadow_GETI ( MCEnv* mce,
1410 IRRegArray* descr, IRAtom* ix, Int bias )
sewardj95448072004-11-22 20:19:51 +00001411{
1412 IRType ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001413 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001414 Int arrSize = descr->nElems * sizeofIRType(ty);
1415 tl_assert(ty != Ity_I1);
1416 tl_assert(isOriginalAtom(mce,ix));
florian434ffae2012-07-19 17:23:42 +00001417 complainIfUndefined(mce, ix, NULL);
sewardj95448072004-11-22 20:19:51 +00001418 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1419 /* Always defined, return all zeroes of the relevant type */
1420 return definedOfType(tyS);
1421 } else {
1422 /* return a cloned version of the Get that refers to the shadow
1423 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001424 IRRegArray* new_descr
1425 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1426 tyS, descr->nElems);
sewardj95448072004-11-22 20:19:51 +00001427 return IRExpr_GetI( new_descr, ix, bias );
1428 }
1429}
1430
1431
1432/*------------------------------------------------------------*/
1433/*--- Generating approximations for unknown operations, ---*/
1434/*--- using lazy-propagate semantics ---*/
1435/*------------------------------------------------------------*/
1436
1437/* Lazy propagation of undefinedness from two values, resulting in the
1438 specified shadow type.
1439*/
1440static
1441IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1442{
sewardj95448072004-11-22 20:19:51 +00001443 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001444 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1445 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
sewardj95448072004-11-22 20:19:51 +00001446 tl_assert(isShadowAtom(mce,va1));
1447 tl_assert(isShadowAtom(mce,va2));
sewardj37c31cc2005-04-26 23:49:24 +00001448
1449 /* The general case is inefficient because PCast is an expensive
1450 operation. Here are some special cases which use PCast only
1451 once rather than twice. */
1452
1453 /* I64 x I64 -> I64 */
1454 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1455 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1456 at = mkUifU(mce, Ity_I64, va1, va2);
1457 at = mkPCastTo(mce, Ity_I64, at);
1458 return at;
1459 }
1460
1461 /* I64 x I64 -> I32 */
1462 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1463 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1464 at = mkUifU(mce, Ity_I64, va1, va2);
1465 at = mkPCastTo(mce, Ity_I32, at);
1466 return at;
1467 }
1468
1469 if (0) {
1470 VG_(printf)("mkLazy2 ");
1471 ppIRType(t1);
1472 VG_(printf)("_");
1473 ppIRType(t2);
1474 VG_(printf)("_");
1475 ppIRType(finalVty);
1476 VG_(printf)("\n");
1477 }
1478
1479 /* General case: force everything via 32-bit intermediaries. */
sewardj95448072004-11-22 20:19:51 +00001480 at = mkPCastTo(mce, Ity_I32, va1);
1481 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1482 at = mkPCastTo(mce, finalVty, at);
1483 return at;
1484}
1485
1486
sewardjed69fdb2006-02-03 16:12:27 +00001487/* 3-arg version of the above. */
1488static
1489IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1490 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1491{
1492 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001493 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1494 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1495 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
sewardjed69fdb2006-02-03 16:12:27 +00001496 tl_assert(isShadowAtom(mce,va1));
1497 tl_assert(isShadowAtom(mce,va2));
1498 tl_assert(isShadowAtom(mce,va3));
1499
1500 /* The general case is inefficient because PCast is an expensive
1501 operation. Here are some special cases which use PCast only
1502 twice rather than three times. */
1503
1504 /* I32 x I64 x I64 -> I64 */
1505 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1506 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1507 && finalVty == Ity_I64) {
1508 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1509 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1510 mode indication which is fully defined, this should get
1511 folded out later. */
1512 at = mkPCastTo(mce, Ity_I64, va1);
1513 /* Now fold in 2nd and 3rd args. */
1514 at = mkUifU(mce, Ity_I64, at, va2);
1515 at = mkUifU(mce, Ity_I64, at, va3);
1516 /* and PCast once again. */
1517 at = mkPCastTo(mce, Ity_I64, at);
1518 return at;
1519 }
1520
sewardj453e8f82006-02-09 03:25:06 +00001521 /* I32 x I64 x I64 -> I32 */
1522 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1523 && finalVty == Ity_I32) {
sewardj59570ff2010-01-01 11:59:33 +00001524 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
sewardj453e8f82006-02-09 03:25:06 +00001525 at = mkPCastTo(mce, Ity_I64, va1);
1526 at = mkUifU(mce, Ity_I64, at, va2);
1527 at = mkUifU(mce, Ity_I64, at, va3);
1528 at = mkPCastTo(mce, Ity_I32, at);
1529 return at;
1530 }
1531
sewardj59570ff2010-01-01 11:59:33 +00001532 /* I32 x I32 x I32 -> I32 */
1533 /* 32-bit FP idiom, as (eg) happens on ARM */
1534 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1535 && finalVty == Ity_I32) {
1536 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1537 at = va1;
1538 at = mkUifU(mce, Ity_I32, at, va2);
1539 at = mkUifU(mce, Ity_I32, at, va3);
1540 at = mkPCastTo(mce, Ity_I32, at);
1541 return at;
1542 }
1543
sewardjb5b87402011-03-07 16:05:35 +00001544 /* I32 x I128 x I128 -> I128 */
1545 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1546 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1547 && finalVty == Ity_I128) {
1548 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1549 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1550 mode indication which is fully defined, this should get
1551 folded out later. */
1552 at = mkPCastTo(mce, Ity_I128, va1);
1553 /* Now fold in 2nd and 3rd args. */
1554 at = mkUifU(mce, Ity_I128, at, va2);
1555 at = mkUifU(mce, Ity_I128, at, va3);
1556 /* and PCast once again. */
1557 at = mkPCastTo(mce, Ity_I128, at);
1558 return at;
1559 }
sewardj453e8f82006-02-09 03:25:06 +00001560 if (1) {
1561 VG_(printf)("mkLazy3: ");
sewardjed69fdb2006-02-03 16:12:27 +00001562 ppIRType(t1);
sewardj453e8f82006-02-09 03:25:06 +00001563 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001564 ppIRType(t2);
sewardj453e8f82006-02-09 03:25:06 +00001565 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001566 ppIRType(t3);
sewardj453e8f82006-02-09 03:25:06 +00001567 VG_(printf)(" -> ");
sewardjed69fdb2006-02-03 16:12:27 +00001568 ppIRType(finalVty);
1569 VG_(printf)("\n");
1570 }
1571
sewardj453e8f82006-02-09 03:25:06 +00001572 tl_assert(0);
sewardjed69fdb2006-02-03 16:12:27 +00001573 /* General case: force everything via 32-bit intermediaries. */
sewardj453e8f82006-02-09 03:25:06 +00001574 /*
sewardjed69fdb2006-02-03 16:12:27 +00001575 at = mkPCastTo(mce, Ity_I32, va1);
1576 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1577 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1578 at = mkPCastTo(mce, finalVty, at);
1579 return at;
sewardj453e8f82006-02-09 03:25:06 +00001580 */
sewardjed69fdb2006-02-03 16:12:27 +00001581}
1582
1583
sewardje91cea72006-02-08 19:32:02 +00001584/* 4-arg version of the above. */
1585static
1586IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1587 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1588{
1589 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001590 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1591 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1592 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1593 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
sewardje91cea72006-02-08 19:32:02 +00001594 tl_assert(isShadowAtom(mce,va1));
1595 tl_assert(isShadowAtom(mce,va2));
1596 tl_assert(isShadowAtom(mce,va3));
1597 tl_assert(isShadowAtom(mce,va4));
1598
1599 /* The general case is inefficient because PCast is an expensive
1600 operation. Here are some special cases which use PCast only
1601 twice rather than three times. */
1602
1603 /* I32 x I64 x I64 x I64 -> I64 */
1604 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1605 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1606 && finalVty == Ity_I64) {
1607 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1608 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1609 mode indication which is fully defined, this should get
1610 folded out later. */
1611 at = mkPCastTo(mce, Ity_I64, va1);
1612 /* Now fold in 2nd, 3rd, 4th args. */
1613 at = mkUifU(mce, Ity_I64, at, va2);
1614 at = mkUifU(mce, Ity_I64, at, va3);
1615 at = mkUifU(mce, Ity_I64, at, va4);
1616 /* and PCast once again. */
1617 at = mkPCastTo(mce, Ity_I64, at);
1618 return at;
1619 }
sewardjb5b87402011-03-07 16:05:35 +00001620 /* I32 x I32 x I32 x I32 -> I32 */
1621 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1622 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
1623 && finalVty == Ity_I32) {
1624 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
1625 at = va1;
1626 /* Now fold in 2nd, 3rd, 4th args. */
1627 at = mkUifU(mce, Ity_I32, at, va2);
1628 at = mkUifU(mce, Ity_I32, at, va3);
1629 at = mkUifU(mce, Ity_I32, at, va4);
1630 at = mkPCastTo(mce, Ity_I32, at);
1631 return at;
1632 }
sewardje91cea72006-02-08 19:32:02 +00001633
1634 if (1) {
sewardj453e8f82006-02-09 03:25:06 +00001635 VG_(printf)("mkLazy4: ");
sewardje91cea72006-02-08 19:32:02 +00001636 ppIRType(t1);
1637 VG_(printf)(" x ");
1638 ppIRType(t2);
1639 VG_(printf)(" x ");
1640 ppIRType(t3);
1641 VG_(printf)(" x ");
1642 ppIRType(t4);
1643 VG_(printf)(" -> ");
1644 ppIRType(finalVty);
1645 VG_(printf)("\n");
1646 }
1647
1648 tl_assert(0);
1649}
1650
1651
sewardj95448072004-11-22 20:19:51 +00001652/* Do the lazy propagation game from a null-terminated vector of
1653 atoms. This is presumably the arguments to a helper call, so the
1654 IRCallee info is also supplied in order that we can know which
1655 arguments should be ignored (via the .mcx_mask field).
1656*/
1657static
1658IRAtom* mkLazyN ( MCEnv* mce,
1659 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1660{
sewardj4cc684b2007-08-25 23:09:36 +00001661 Int i;
sewardj95448072004-11-22 20:19:51 +00001662 IRAtom* here;
sewardj4cc684b2007-08-25 23:09:36 +00001663 IRAtom* curr;
1664 IRType mergeTy;
sewardj99430032011-05-04 09:09:31 +00001665 Bool mergeTy64 = True;
sewardj4cc684b2007-08-25 23:09:36 +00001666
1667 /* Decide on the type of the merge intermediary. If all relevant
1668 args are I64, then it's I64. In all other circumstances, use
1669 I32. */
1670 for (i = 0; exprvec[i]; i++) {
1671 tl_assert(i < 32);
1672 tl_assert(isOriginalAtom(mce, exprvec[i]));
1673 if (cee->mcx_mask & (1<<i))
1674 continue;
sewardj1c0ce7a2009-07-01 08:10:49 +00001675 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
sewardj4cc684b2007-08-25 23:09:36 +00001676 mergeTy64 = False;
1677 }
1678
1679 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
1680 curr = definedOfType(mergeTy);
1681
sewardj95448072004-11-22 20:19:51 +00001682 for (i = 0; exprvec[i]; i++) {
1683 tl_assert(i < 32);
1684 tl_assert(isOriginalAtom(mce, exprvec[i]));
1685 /* Only take notice of this arg if the callee's mc-exclusion
1686 mask does not say it is to be excluded. */
1687 if (cee->mcx_mask & (1<<i)) {
1688 /* the arg is to be excluded from definedness checking. Do
1689 nothing. */
1690 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1691 } else {
1692 /* calculate the arg's definedness, and pessimistically merge
1693 it in. */
sewardj4cc684b2007-08-25 23:09:36 +00001694 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1695 curr = mergeTy64
1696 ? mkUifU64(mce, here, curr)
1697 : mkUifU32(mce, here, curr);
sewardj95448072004-11-22 20:19:51 +00001698 }
1699 }
1700 return mkPCastTo(mce, finalVtype, curr );
1701}
1702
1703
1704/*------------------------------------------------------------*/
1705/*--- Generating expensive sequences for exact carry-chain ---*/
1706/*--- propagation in add/sub and related operations. ---*/
1707/*------------------------------------------------------------*/
1708
1709static
sewardjd5204dc2004-12-31 01:16:11 +00001710IRAtom* expensiveAddSub ( MCEnv* mce,
1711 Bool add,
1712 IRType ty,
1713 IRAtom* qaa, IRAtom* qbb,
1714 IRAtom* aa, IRAtom* bb )
sewardj95448072004-11-22 20:19:51 +00001715{
sewardj7cf97ee2004-11-28 14:25:01 +00001716 IRAtom *a_min, *b_min, *a_max, *b_max;
sewardjd5204dc2004-12-31 01:16:11 +00001717 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
sewardj7cf97ee2004-11-28 14:25:01 +00001718
sewardj95448072004-11-22 20:19:51 +00001719 tl_assert(isShadowAtom(mce,qaa));
1720 tl_assert(isShadowAtom(mce,qbb));
1721 tl_assert(isOriginalAtom(mce,aa));
1722 tl_assert(isOriginalAtom(mce,bb));
1723 tl_assert(sameKindedAtoms(qaa,aa));
1724 tl_assert(sameKindedAtoms(qbb,bb));
1725
sewardjd5204dc2004-12-31 01:16:11 +00001726 switch (ty) {
1727 case Ity_I32:
1728 opAND = Iop_And32;
1729 opOR = Iop_Or32;
1730 opXOR = Iop_Xor32;
1731 opNOT = Iop_Not32;
1732 opADD = Iop_Add32;
1733 opSUB = Iop_Sub32;
1734 break;
tomd9774d72005-06-27 08:11:01 +00001735 case Ity_I64:
1736 opAND = Iop_And64;
1737 opOR = Iop_Or64;
1738 opXOR = Iop_Xor64;
1739 opNOT = Iop_Not64;
1740 opADD = Iop_Add64;
1741 opSUB = Iop_Sub64;
1742 break;
sewardjd5204dc2004-12-31 01:16:11 +00001743 default:
1744 VG_(tool_panic)("expensiveAddSub");
1745 }
sewardj95448072004-11-22 20:19:51 +00001746
1747 // a_min = aa & ~qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001748 a_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001749 binop(opAND, aa,
sewardj7cf4e6b2008-05-01 20:24:26 +00001750 assignNew('V', mce,ty, unop(opNOT, qaa))));
sewardj95448072004-11-22 20:19:51 +00001751
1752 // b_min = bb & ~qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001753 b_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001754 binop(opAND, bb,
sewardj7cf4e6b2008-05-01 20:24:26 +00001755 assignNew('V', mce,ty, unop(opNOT, qbb))));
sewardj95448072004-11-22 20:19:51 +00001756
1757 // a_max = aa | qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001758 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
sewardj95448072004-11-22 20:19:51 +00001759
1760 // b_max = bb | qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001761 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
sewardj95448072004-11-22 20:19:51 +00001762
sewardjd5204dc2004-12-31 01:16:11 +00001763 if (add) {
1764 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1765 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001766 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001767 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001768 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1769 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001770 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001771 assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1772 assignNew('V', mce,ty, binop(opADD, a_max, b_max))
sewardjd5204dc2004-12-31 01:16:11 +00001773 )
sewardj95448072004-11-22 20:19:51 +00001774 )
sewardjd5204dc2004-12-31 01:16:11 +00001775 )
1776 );
1777 } else {
1778 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1779 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001780 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001781 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001782 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1783 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001784 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001785 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1786 assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
sewardjd5204dc2004-12-31 01:16:11 +00001787 )
1788 )
1789 )
1790 );
1791 }
1792
sewardj95448072004-11-22 20:19:51 +00001793}
1794
1795
1796/*------------------------------------------------------------*/
sewardjaaddbc22005-10-07 09:49:53 +00001797/*--- Scalar shifts. ---*/
1798/*------------------------------------------------------------*/
1799
1800/* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
1801 idea is to shift the definedness bits by the original shift amount.
1802 This introduces 0s ("defined") in new positions for left shifts and
1803 unsigned right shifts, and copies the top definedness bit for
1804 signed right shifts. So, conveniently, applying the original shift
1805 operator to the definedness bits for the left arg is exactly the
1806 right thing to do:
1807
1808 (qaa << bb)
1809
1810 However if the shift amount is undefined then the whole result
1811 is undefined. Hence need:
1812
1813 (qaa << bb) `UifU` PCast(qbb)
1814
1815 If the shift amount bb is a literal than qbb will say 'all defined'
1816 and the UifU and PCast will get folded out by post-instrumentation
1817 optimisation.
1818*/
1819static IRAtom* scalarShift ( MCEnv* mce,
1820 IRType ty,
1821 IROp original_op,
1822 IRAtom* qaa, IRAtom* qbb,
1823 IRAtom* aa, IRAtom* bb )
1824{
1825 tl_assert(isShadowAtom(mce,qaa));
1826 tl_assert(isShadowAtom(mce,qbb));
1827 tl_assert(isOriginalAtom(mce,aa));
1828 tl_assert(isOriginalAtom(mce,bb));
1829 tl_assert(sameKindedAtoms(qaa,aa));
1830 tl_assert(sameKindedAtoms(qbb,bb));
1831 return
1832 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001833 'V', mce, ty,
sewardjaaddbc22005-10-07 09:49:53 +00001834 mkUifU( mce, ty,
sewardj7cf4e6b2008-05-01 20:24:26 +00001835 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
sewardjaaddbc22005-10-07 09:49:53 +00001836 mkPCastTo(mce, ty, qbb)
1837 )
1838 );
1839}
1840
1841
1842/*------------------------------------------------------------*/
1843/*--- Helpers for dealing with vector primops. ---*/
sewardj3245c912004-12-10 14:58:26 +00001844/*------------------------------------------------------------*/
1845
sewardja1d93302004-12-12 16:45:06 +00001846/* Vector pessimisation -- pessimise within each lane individually. */
1847
1848static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1849{
sewardj7cf4e6b2008-05-01 20:24:26 +00001850 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
sewardja1d93302004-12-12 16:45:06 +00001851}
1852
1853static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1854{
sewardj7cf4e6b2008-05-01 20:24:26 +00001855 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
sewardja1d93302004-12-12 16:45:06 +00001856}
1857
1858static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1859{
sewardj7cf4e6b2008-05-01 20:24:26 +00001860 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
sewardja1d93302004-12-12 16:45:06 +00001861}
1862
1863static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1864{
sewardj7cf4e6b2008-05-01 20:24:26 +00001865 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
sewardja1d93302004-12-12 16:45:06 +00001866}
1867
sewardj350e8f72012-06-25 07:52:15 +00001868static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at )
1869{
1870 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at));
1871}
1872
1873static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at )
1874{
1875 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at));
1876}
1877
sewardjacd2e912005-01-13 19:17:06 +00001878static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1879{
sewardj7cf4e6b2008-05-01 20:24:26 +00001880 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
sewardjacd2e912005-01-13 19:17:06 +00001881}
1882
1883static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1884{
sewardj7cf4e6b2008-05-01 20:24:26 +00001885 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
sewardjacd2e912005-01-13 19:17:06 +00001886}
1887
1888static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1889{
sewardj7cf4e6b2008-05-01 20:24:26 +00001890 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
sewardjacd2e912005-01-13 19:17:06 +00001891}
1892
sewardjc678b852010-09-22 00:58:51 +00001893static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
1894{
1895 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
1896}
1897
1898static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
1899{
1900 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
1901}
1902
sewardja1d93302004-12-12 16:45:06 +00001903
sewardj3245c912004-12-10 14:58:26 +00001904/* Here's a simple scheme capable of handling ops derived from SSE1
1905 code and while only generating ops that can be efficiently
1906 implemented in SSE1. */
1907
1908/* All-lanes versions are straightforward:
1909
sewardj20d38f22005-02-07 23:50:18 +00001910 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
sewardj3245c912004-12-10 14:58:26 +00001911
1912 unary32Fx4(x,y) ==> PCast32x4(x#)
1913
1914 Lowest-lane-only versions are more complex:
1915
sewardj20d38f22005-02-07 23:50:18 +00001916 binary32F0x4(x,y) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001917 x#,
sewardj20d38f22005-02-07 23:50:18 +00001918 PCast32(V128to32(UifUV128(x#,y#)))
sewardj3245c912004-12-10 14:58:26 +00001919 )
1920
1921 This is perhaps not so obvious. In particular, it's faster to
sewardj20d38f22005-02-07 23:50:18 +00001922 do a V128-bit UifU and then take the bottom 32 bits than the more
sewardj3245c912004-12-10 14:58:26 +00001923 obvious scheme of taking the bottom 32 bits of each operand
1924 and doing a 32-bit UifU. Basically since UifU is fast and
1925 chopping lanes off vector values is slow.
1926
1927 Finally:
1928
sewardj20d38f22005-02-07 23:50:18 +00001929 unary32F0x4(x) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001930 x#,
sewardj20d38f22005-02-07 23:50:18 +00001931 PCast32(V128to32(x#))
sewardj3245c912004-12-10 14:58:26 +00001932 )
1933
1934 Where:
1935
1936 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
1937 PCast32x4(v#) = CmpNEZ32x4(v#)
1938*/
1939
1940static
1941IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1942{
1943 IRAtom* at;
1944 tl_assert(isShadowAtom(mce, vatomX));
1945 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001946 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001947 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
sewardj3245c912004-12-10 14:58:26 +00001948 return at;
1949}
1950
1951static
1952IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1953{
1954 IRAtom* at;
1955 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001956 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001957 return at;
1958}
1959
1960static
1961IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1962{
1963 IRAtom* at;
1964 tl_assert(isShadowAtom(mce, vatomX));
1965 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001966 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001967 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
sewardj3245c912004-12-10 14:58:26 +00001968 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001969 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001970 return at;
1971}
1972
1973static
1974IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1975{
1976 IRAtom* at;
1977 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00001978 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
sewardj3245c912004-12-10 14:58:26 +00001979 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00001980 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00001981 return at;
1982}
1983
sewardj0b070592004-12-10 21:44:22 +00001984/* --- ... and ... 64Fx2 versions of the same ... --- */
1985
1986static
1987IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1988{
1989 IRAtom* at;
1990 tl_assert(isShadowAtom(mce, vatomX));
1991 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00001992 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00001993 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
sewardj0b070592004-12-10 21:44:22 +00001994 return at;
1995}
1996
1997static
1998IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1999{
2000 IRAtom* at;
2001 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002002 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
sewardj0b070592004-12-10 21:44:22 +00002003 return at;
2004}
2005
2006static
2007IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2008{
2009 IRAtom* at;
2010 tl_assert(isShadowAtom(mce, vatomX));
2011 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002012 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002013 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
sewardj0b070592004-12-10 21:44:22 +00002014 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002015 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00002016 return at;
2017}
2018
2019static
2020IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
2021{
2022 IRAtom* at;
2023 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002024 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
sewardj0b070592004-12-10 21:44:22 +00002025 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002026 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00002027 return at;
2028}
2029
sewardj57f92b02010-08-22 11:54:14 +00002030/* --- --- ... and ... 32Fx2 versions of the same --- --- */
2031
2032static
2033IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2034{
2035 IRAtom* at;
2036 tl_assert(isShadowAtom(mce, vatomX));
2037 tl_assert(isShadowAtom(mce, vatomY));
2038 at = mkUifU64(mce, vatomX, vatomY);
2039 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
2040 return at;
2041}
2042
2043static
2044IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
2045{
2046 IRAtom* at;
2047 tl_assert(isShadowAtom(mce, vatomX));
2048 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
2049 return at;
2050}
2051
sewardj350e8f72012-06-25 07:52:15 +00002052/* --- ... and ... 64Fx4 versions of the same ... --- */
2053
2054static
2055IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2056{
2057 IRAtom* at;
2058 tl_assert(isShadowAtom(mce, vatomX));
2059 tl_assert(isShadowAtom(mce, vatomY));
2060 at = mkUifUV256(mce, vatomX, vatomY);
2061 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at));
2062 return at;
2063}
2064
2065static
2066IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX )
2067{
2068 IRAtom* at;
2069 tl_assert(isShadowAtom(mce, vatomX));
2070 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX));
2071 return at;
2072}
2073
2074/* --- ... and ... 32Fx8 versions of the same ... --- */
2075
2076static
2077IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2078{
2079 IRAtom* at;
2080 tl_assert(isShadowAtom(mce, vatomX));
2081 tl_assert(isShadowAtom(mce, vatomY));
2082 at = mkUifUV256(mce, vatomX, vatomY);
2083 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at));
2084 return at;
2085}
2086
2087static
2088IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX )
2089{
2090 IRAtom* at;
2091 tl_assert(isShadowAtom(mce, vatomX));
2092 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX));
2093 return at;
2094}
2095
sewardja1d93302004-12-12 16:45:06 +00002096/* --- --- Vector saturated narrowing --- --- */
2097
sewardjb5a29232011-10-22 09:29:41 +00002098/* We used to do something very clever here, but on closer inspection
2099 (2011-Jun-15), and in particular bug #279698, it turns out to be
2100 wrong. Part of the problem came from the fact that for a long
2101 time, the IR primops to do with saturated narrowing were
2102 underspecified and managed to confuse multiple cases which needed
2103 to be separate: the op names had a signedness qualifier, but in
2104 fact the source and destination signednesses needed to be specified
2105 independently, so the op names really need two independent
2106 signedness specifiers.
sewardja1d93302004-12-12 16:45:06 +00002107
sewardjb5a29232011-10-22 09:29:41 +00002108 As of 2011-Jun-15 (ish) the underspecification was sorted out
2109 properly. The incorrect instrumentation remained, though. That
2110 has now (2011-Oct-22) been fixed.
sewardja1d93302004-12-12 16:45:06 +00002111
sewardjb5a29232011-10-22 09:29:41 +00002112 What we now do is simple:
sewardja1d93302004-12-12 16:45:06 +00002113
sewardjb5a29232011-10-22 09:29:41 +00002114 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
2115 number of lanes, X is the source lane width and signedness, and Y
2116 is the destination lane width and signedness. In all cases the
2117 destination lane width is half the source lane width, so the names
2118 have a bit of redundancy, but are at least easy to read.
sewardja1d93302004-12-12 16:45:06 +00002119
sewardjb5a29232011-10-22 09:29:41 +00002120 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
2121 to unsigned 16s.
sewardja1d93302004-12-12 16:45:06 +00002122
sewardjb5a29232011-10-22 09:29:41 +00002123 Let Vanilla(OP) be a function that takes OP, one of these
2124 saturating narrowing ops, and produces the same "shaped" narrowing
2125 op which is not saturating, but merely dumps the most significant
2126 bits. "same shape" means that the lane numbers and widths are the
2127 same as with OP.
sewardja1d93302004-12-12 16:45:06 +00002128
sewardjb5a29232011-10-22 09:29:41 +00002129 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
2130 = Iop_NarrowBin32to16x8,
2131 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
2132 dumping the top half of each lane.
sewardja1d93302004-12-12 16:45:06 +00002133
sewardjb5a29232011-10-22 09:29:41 +00002134 So, with that in place, the scheme is simple, and it is simple to
2135 pessimise each lane individually and then apply Vanilla(OP) so as
2136 to get the result in the right "shape". If the original OP is
2137 QNarrowBinXtoYxZ then we produce
sewardja1d93302004-12-12 16:45:06 +00002138
sewardjb5a29232011-10-22 09:29:41 +00002139 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
sewardj9beeb0a2011-06-15 15:11:07 +00002140
sewardjb5a29232011-10-22 09:29:41 +00002141 or for the case when OP is unary (Iop_QNarrowUn*)
2142
2143 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
sewardja1d93302004-12-12 16:45:06 +00002144*/
2145static
sewardjb5a29232011-10-22 09:29:41 +00002146IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
2147{
2148 switch (qnarrowOp) {
2149 /* Binary: (128, 128) -> 128 */
2150 case Iop_QNarrowBin16Sto8Ux16:
2151 case Iop_QNarrowBin16Sto8Sx16:
2152 case Iop_QNarrowBin16Uto8Ux16:
2153 return Iop_NarrowBin16to8x16;
2154 case Iop_QNarrowBin32Sto16Ux8:
2155 case Iop_QNarrowBin32Sto16Sx8:
2156 case Iop_QNarrowBin32Uto16Ux8:
2157 return Iop_NarrowBin32to16x8;
2158 /* Binary: (64, 64) -> 64 */
2159 case Iop_QNarrowBin32Sto16Sx4:
2160 return Iop_NarrowBin32to16x4;
2161 case Iop_QNarrowBin16Sto8Ux8:
2162 case Iop_QNarrowBin16Sto8Sx8:
2163 return Iop_NarrowBin16to8x8;
2164 /* Unary: 128 -> 64 */
2165 case Iop_QNarrowUn64Uto32Ux2:
2166 case Iop_QNarrowUn64Sto32Sx2:
2167 case Iop_QNarrowUn64Sto32Ux2:
2168 return Iop_NarrowUn64to32x2;
2169 case Iop_QNarrowUn32Uto16Ux4:
2170 case Iop_QNarrowUn32Sto16Sx4:
2171 case Iop_QNarrowUn32Sto16Ux4:
2172 return Iop_NarrowUn32to16x4;
2173 case Iop_QNarrowUn16Uto8Ux8:
2174 case Iop_QNarrowUn16Sto8Sx8:
2175 case Iop_QNarrowUn16Sto8Ux8:
2176 return Iop_NarrowUn16to8x8;
2177 default:
2178 ppIROp(qnarrowOp);
2179 VG_(tool_panic)("vanillaNarrowOpOfShape");
2180 }
2181}
2182
2183static
sewardj7ee7d852011-06-16 11:37:21 +00002184IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
2185 IRAtom* vatom1, IRAtom* vatom2)
sewardja1d93302004-12-12 16:45:06 +00002186{
2187 IRAtom *at1, *at2, *at3;
2188 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2189 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002190 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
2191 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
2192 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2193 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2194 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2195 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2196 default: VG_(tool_panic)("vectorNarrowBinV128");
sewardja1d93302004-12-12 16:45:06 +00002197 }
sewardjb5a29232011-10-22 09:29:41 +00002198 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardja1d93302004-12-12 16:45:06 +00002199 tl_assert(isShadowAtom(mce,vatom1));
2200 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00002201 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2202 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
sewardjb5a29232011-10-22 09:29:41 +00002203 at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
sewardja1d93302004-12-12 16:45:06 +00002204 return at3;
2205}
2206
sewardjacd2e912005-01-13 19:17:06 +00002207static
sewardj7ee7d852011-06-16 11:37:21 +00002208IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2209 IRAtom* vatom1, IRAtom* vatom2)
sewardjacd2e912005-01-13 19:17:06 +00002210{
2211 IRAtom *at1, *at2, *at3;
2212 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2213 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002214 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2215 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break;
2216 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break;
2217 default: VG_(tool_panic)("vectorNarrowBin64");
sewardjacd2e912005-01-13 19:17:06 +00002218 }
sewardjb5a29232011-10-22 09:29:41 +00002219 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardjacd2e912005-01-13 19:17:06 +00002220 tl_assert(isShadowAtom(mce,vatom1));
2221 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00002222 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2223 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
sewardjb5a29232011-10-22 09:29:41 +00002224 at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
sewardjacd2e912005-01-13 19:17:06 +00002225 return at3;
2226}
2227
sewardj57f92b02010-08-22 11:54:14 +00002228static
sewardjb5a29232011-10-22 09:29:41 +00002229IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
sewardj7ee7d852011-06-16 11:37:21 +00002230 IRAtom* vatom1)
sewardj57f92b02010-08-22 11:54:14 +00002231{
2232 IRAtom *at1, *at2;
2233 IRAtom* (*pcast)( MCEnv*, IRAtom* );
sewardjb5a29232011-10-22 09:29:41 +00002234 tl_assert(isShadowAtom(mce,vatom1));
2235 /* For vanilla narrowing (non-saturating), we can just apply
2236 the op directly to the V bits. */
2237 switch (narrow_op) {
2238 case Iop_NarrowUn16to8x8:
2239 case Iop_NarrowUn32to16x4:
2240 case Iop_NarrowUn64to32x2:
2241 at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
2242 return at1;
2243 default:
2244 break; /* Do Plan B */
2245 }
2246 /* Plan B: for ops that involve a saturation operation on the args,
2247 we must PCast before the vanilla narrow. */
2248 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002249 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break;
2250 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break;
2251 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break;
2252 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
2253 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
2254 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
2255 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
2256 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
2257 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
2258 default: VG_(tool_panic)("vectorNarrowUnV128");
sewardj57f92b02010-08-22 11:54:14 +00002259 }
sewardjb5a29232011-10-22 09:29:41 +00002260 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardj57f92b02010-08-22 11:54:14 +00002261 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
sewardjb5a29232011-10-22 09:29:41 +00002262 at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
sewardj57f92b02010-08-22 11:54:14 +00002263 return at2;
2264}
2265
2266static
sewardj7ee7d852011-06-16 11:37:21 +00002267IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
2268 IRAtom* vatom1)
sewardj57f92b02010-08-22 11:54:14 +00002269{
2270 IRAtom *at1, *at2;
2271 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2272 switch (longen_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002273 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break;
2274 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break;
2275 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
2276 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
2277 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
2278 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
2279 default: VG_(tool_panic)("vectorWidenI64");
sewardj57f92b02010-08-22 11:54:14 +00002280 }
2281 tl_assert(isShadowAtom(mce,vatom1));
2282 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
2283 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2284 return at2;
2285}
2286
sewardja1d93302004-12-12 16:45:06 +00002287
2288/* --- --- Vector integer arithmetic --- --- */
2289
2290/* Simple ... UifU the args and per-lane pessimise the results. */
sewardjacd2e912005-01-13 19:17:06 +00002291
sewardj20d38f22005-02-07 23:50:18 +00002292/* --- V128-bit versions --- */
sewardjacd2e912005-01-13 19:17:06 +00002293
sewardja1d93302004-12-12 16:45:06 +00002294static
2295IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2296{
2297 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002298 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002299 at = mkPCast8x16(mce, at);
2300 return at;
2301}
2302
2303static
2304IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2305{
2306 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002307 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002308 at = mkPCast16x8(mce, at);
2309 return at;
2310}
2311
2312static
2313IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2314{
2315 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002316 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002317 at = mkPCast32x4(mce, at);
2318 return at;
2319}
2320
2321static
2322IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2323{
2324 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002325 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002326 at = mkPCast64x2(mce, at);
2327 return at;
2328}
sewardj3245c912004-12-10 14:58:26 +00002329
sewardjacd2e912005-01-13 19:17:06 +00002330/* --- 64-bit versions --- */
2331
2332static
2333IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2334{
2335 IRAtom* at;
2336 at = mkUifU64(mce, vatom1, vatom2);
2337 at = mkPCast8x8(mce, at);
2338 return at;
2339}
2340
2341static
2342IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2343{
2344 IRAtom* at;
2345 at = mkUifU64(mce, vatom1, vatom2);
2346 at = mkPCast16x4(mce, at);
2347 return at;
2348}
2349
2350static
2351IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2352{
2353 IRAtom* at;
2354 at = mkUifU64(mce, vatom1, vatom2);
2355 at = mkPCast32x2(mce, at);
2356 return at;
2357}
2358
sewardj57f92b02010-08-22 11:54:14 +00002359static
2360IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2361{
2362 IRAtom* at;
2363 at = mkUifU64(mce, vatom1, vatom2);
2364 at = mkPCastTo(mce, Ity_I64, at);
2365 return at;
2366}
2367
sewardjc678b852010-09-22 00:58:51 +00002368/* --- 32-bit versions --- */
2369
2370static
2371IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2372{
2373 IRAtom* at;
2374 at = mkUifU32(mce, vatom1, vatom2);
2375 at = mkPCast8x4(mce, at);
2376 return at;
2377}
2378
2379static
2380IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2381{
2382 IRAtom* at;
2383 at = mkUifU32(mce, vatom1, vatom2);
2384 at = mkPCast16x2(mce, at);
2385 return at;
2386}
2387
sewardj3245c912004-12-10 14:58:26 +00002388
2389/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00002390/*--- Generate shadow values from all kinds of IRExprs. ---*/
2391/*------------------------------------------------------------*/
2392
2393static
sewardje91cea72006-02-08 19:32:02 +00002394IRAtom* expr2vbits_Qop ( MCEnv* mce,
2395 IROp op,
2396 IRAtom* atom1, IRAtom* atom2,
2397 IRAtom* atom3, IRAtom* atom4 )
2398{
2399 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2400 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2401 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2402 IRAtom* vatom4 = expr2vbits( mce, atom4 );
2403
2404 tl_assert(isOriginalAtom(mce,atom1));
2405 tl_assert(isOriginalAtom(mce,atom2));
2406 tl_assert(isOriginalAtom(mce,atom3));
2407 tl_assert(isOriginalAtom(mce,atom4));
2408 tl_assert(isShadowAtom(mce,vatom1));
2409 tl_assert(isShadowAtom(mce,vatom2));
2410 tl_assert(isShadowAtom(mce,vatom3));
2411 tl_assert(isShadowAtom(mce,vatom4));
2412 tl_assert(sameKindedAtoms(atom1,vatom1));
2413 tl_assert(sameKindedAtoms(atom2,vatom2));
2414 tl_assert(sameKindedAtoms(atom3,vatom3));
2415 tl_assert(sameKindedAtoms(atom4,vatom4));
2416 switch (op) {
2417 case Iop_MAddF64:
2418 case Iop_MAddF64r32:
2419 case Iop_MSubF64:
2420 case Iop_MSubF64r32:
2421 /* I32(rm) x F64 x F64 x F64 -> F64 */
2422 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
sewardjb5b87402011-03-07 16:05:35 +00002423
2424 case Iop_MAddF32:
2425 case Iop_MSubF32:
2426 /* I32(rm) x F32 x F32 x F32 -> F32 */
2427 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
2428
sewardj350e8f72012-06-25 07:52:15 +00002429 /* V256-bit data-steering */
2430 case Iop_64x4toV256:
2431 return assignNew('V', mce, Ity_V256,
2432 IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4));
2433
sewardje91cea72006-02-08 19:32:02 +00002434 default:
2435 ppIROp(op);
2436 VG_(tool_panic)("memcheck:expr2vbits_Qop");
2437 }
2438}
2439
2440
2441static
sewardjed69fdb2006-02-03 16:12:27 +00002442IRAtom* expr2vbits_Triop ( MCEnv* mce,
2443 IROp op,
2444 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2445{
sewardjed69fdb2006-02-03 16:12:27 +00002446 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2447 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2448 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2449
2450 tl_assert(isOriginalAtom(mce,atom1));
2451 tl_assert(isOriginalAtom(mce,atom2));
2452 tl_assert(isOriginalAtom(mce,atom3));
2453 tl_assert(isShadowAtom(mce,vatom1));
2454 tl_assert(isShadowAtom(mce,vatom2));
2455 tl_assert(isShadowAtom(mce,vatom3));
2456 tl_assert(sameKindedAtoms(atom1,vatom1));
2457 tl_assert(sameKindedAtoms(atom2,vatom2));
2458 tl_assert(sameKindedAtoms(atom3,vatom3));
2459 switch (op) {
sewardjb5b87402011-03-07 16:05:35 +00002460 case Iop_AddF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002461 case Iop_AddD128:
sewardjb5b87402011-03-07 16:05:35 +00002462 case Iop_SubF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002463 case Iop_SubD128:
sewardjb5b87402011-03-07 16:05:35 +00002464 case Iop_MulF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002465 case Iop_MulD128:
sewardjb5b87402011-03-07 16:05:35 +00002466 case Iop_DivF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002467 case Iop_DivD128:
sewardj18c72fa2012-04-23 11:22:05 +00002468 case Iop_QuantizeD128:
2469 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
sewardjb5b87402011-03-07 16:05:35 +00002470 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
sewardjed69fdb2006-02-03 16:12:27 +00002471 case Iop_AddF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002472 case Iop_AddD64:
sewardjed69fdb2006-02-03 16:12:27 +00002473 case Iop_AddF64r32:
2474 case Iop_SubF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002475 case Iop_SubD64:
sewardjed69fdb2006-02-03 16:12:27 +00002476 case Iop_SubF64r32:
2477 case Iop_MulF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002478 case Iop_MulD64:
sewardjed69fdb2006-02-03 16:12:27 +00002479 case Iop_MulF64r32:
2480 case Iop_DivF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002481 case Iop_DivD64:
sewardjed69fdb2006-02-03 16:12:27 +00002482 case Iop_DivF64r32:
sewardj22ac5f42006-02-03 22:55:04 +00002483 case Iop_ScaleF64:
2484 case Iop_Yl2xF64:
2485 case Iop_Yl2xp1F64:
2486 case Iop_AtanF64:
sewardjd6075eb2006-02-04 15:25:23 +00002487 case Iop_PRemF64:
2488 case Iop_PRem1F64:
sewardj18c72fa2012-04-23 11:22:05 +00002489 case Iop_QuantizeD64:
2490 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
sewardjed69fdb2006-02-03 16:12:27 +00002491 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
sewardjd6075eb2006-02-04 15:25:23 +00002492 case Iop_PRemC3210F64:
2493 case Iop_PRem1C3210F64:
2494 /* I32(rm) x F64 x F64 -> I32 */
2495 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj59570ff2010-01-01 11:59:33 +00002496 case Iop_AddF32:
2497 case Iop_SubF32:
2498 case Iop_MulF32:
2499 case Iop_DivF32:
2500 /* I32(rm) x F32 x F32 -> I32 */
2501 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj18c72fa2012-04-23 11:22:05 +00002502 case Iop_SignificanceRoundD64:
2503 /* IRRoundingModeDFP(I32) x I8 x D64 -> D64 */
2504 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2505 case Iop_SignificanceRoundD128:
2506 /* IRRoundingModeDFP(I32) x I8 x D128 -> D128 */
2507 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
sewardj57f92b02010-08-22 11:54:14 +00002508 case Iop_ExtractV128:
florian434ffae2012-07-19 17:23:42 +00002509 complainIfUndefined(mce, atom3, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002510 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2511 case Iop_Extract64:
florian434ffae2012-07-19 17:23:42 +00002512 complainIfUndefined(mce, atom3, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002513 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2514 case Iop_SetElem8x8:
2515 case Iop_SetElem16x4:
2516 case Iop_SetElem32x2:
florian434ffae2012-07-19 17:23:42 +00002517 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002518 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
sewardjed69fdb2006-02-03 16:12:27 +00002519 default:
2520 ppIROp(op);
2521 VG_(tool_panic)("memcheck:expr2vbits_Triop");
2522 }
2523}
2524
2525
2526static
sewardj95448072004-11-22 20:19:51 +00002527IRAtom* expr2vbits_Binop ( MCEnv* mce,
2528 IROp op,
2529 IRAtom* atom1, IRAtom* atom2 )
2530{
2531 IRType and_or_ty;
2532 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
2533 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
2534 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2535
2536 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2537 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2538
2539 tl_assert(isOriginalAtom(mce,atom1));
2540 tl_assert(isOriginalAtom(mce,atom2));
2541 tl_assert(isShadowAtom(mce,vatom1));
2542 tl_assert(isShadowAtom(mce,vatom2));
2543 tl_assert(sameKindedAtoms(atom1,vatom1));
2544 tl_assert(sameKindedAtoms(atom2,vatom2));
2545 switch (op) {
2546
sewardjc678b852010-09-22 00:58:51 +00002547 /* 32-bit SIMD */
2548
2549 case Iop_Add16x2:
2550 case Iop_HAdd16Ux2:
2551 case Iop_HAdd16Sx2:
2552 case Iop_Sub16x2:
2553 case Iop_HSub16Ux2:
2554 case Iop_HSub16Sx2:
2555 case Iop_QAdd16Sx2:
2556 case Iop_QSub16Sx2:
sewardj9fb31092012-09-17 15:28:46 +00002557 case Iop_QSub16Ux2:
sewardjc678b852010-09-22 00:58:51 +00002558 return binary16Ix2(mce, vatom1, vatom2);
2559
2560 case Iop_Add8x4:
2561 case Iop_HAdd8Ux4:
2562 case Iop_HAdd8Sx4:
2563 case Iop_Sub8x4:
2564 case Iop_HSub8Ux4:
2565 case Iop_HSub8Sx4:
2566 case Iop_QSub8Ux4:
2567 case Iop_QAdd8Ux4:
2568 case Iop_QSub8Sx4:
2569 case Iop_QAdd8Sx4:
2570 return binary8Ix4(mce, vatom1, vatom2);
2571
sewardjacd2e912005-01-13 19:17:06 +00002572 /* 64-bit SIMD */
2573
sewardj57f92b02010-08-22 11:54:14 +00002574 case Iop_ShrN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002575 case Iop_ShrN16x4:
2576 case Iop_ShrN32x2:
sewardj03809ae2006-12-27 01:16:58 +00002577 case Iop_SarN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002578 case Iop_SarN16x4:
2579 case Iop_SarN32x2:
2580 case Iop_ShlN16x4:
2581 case Iop_ShlN32x2:
sewardj114a9172008-02-09 01:49:32 +00002582 case Iop_ShlN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002583 /* Same scheme as with all other shifts. */
florian434ffae2012-07-19 17:23:42 +00002584 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00002585 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
sewardjacd2e912005-01-13 19:17:06 +00002586
sewardj7ee7d852011-06-16 11:37:21 +00002587 case Iop_QNarrowBin32Sto16Sx4:
2588 case Iop_QNarrowBin16Sto8Sx8:
2589 case Iop_QNarrowBin16Sto8Ux8:
2590 return vectorNarrowBin64(mce, op, vatom1, vatom2);
sewardjacd2e912005-01-13 19:17:06 +00002591
2592 case Iop_Min8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002593 case Iop_Min8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002594 case Iop_Max8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002595 case Iop_Max8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002596 case Iop_Avg8Ux8:
2597 case Iop_QSub8Sx8:
2598 case Iop_QSub8Ux8:
2599 case Iop_Sub8x8:
2600 case Iop_CmpGT8Sx8:
sewardj57f92b02010-08-22 11:54:14 +00002601 case Iop_CmpGT8Ux8:
sewardjacd2e912005-01-13 19:17:06 +00002602 case Iop_CmpEQ8x8:
2603 case Iop_QAdd8Sx8:
2604 case Iop_QAdd8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002605 case Iop_QSal8x8:
2606 case Iop_QShl8x8:
sewardjacd2e912005-01-13 19:17:06 +00002607 case Iop_Add8x8:
sewardj57f92b02010-08-22 11:54:14 +00002608 case Iop_Mul8x8:
2609 case Iop_PolynomialMul8x8:
sewardjacd2e912005-01-13 19:17:06 +00002610 return binary8Ix8(mce, vatom1, vatom2);
2611
2612 case Iop_Min16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002613 case Iop_Min16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002614 case Iop_Max16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002615 case Iop_Max16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002616 case Iop_Avg16Ux4:
2617 case Iop_QSub16Ux4:
2618 case Iop_QSub16Sx4:
2619 case Iop_Sub16x4:
2620 case Iop_Mul16x4:
2621 case Iop_MulHi16Sx4:
2622 case Iop_MulHi16Ux4:
2623 case Iop_CmpGT16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002624 case Iop_CmpGT16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002625 case Iop_CmpEQ16x4:
2626 case Iop_QAdd16Sx4:
2627 case Iop_QAdd16Ux4:
sewardj57f92b02010-08-22 11:54:14 +00002628 case Iop_QSal16x4:
2629 case Iop_QShl16x4:
sewardjacd2e912005-01-13 19:17:06 +00002630 case Iop_Add16x4:
sewardj57f92b02010-08-22 11:54:14 +00002631 case Iop_QDMulHi16Sx4:
2632 case Iop_QRDMulHi16Sx4:
sewardjacd2e912005-01-13 19:17:06 +00002633 return binary16Ix4(mce, vatom1, vatom2);
2634
2635 case Iop_Sub32x2:
sewardj114a9172008-02-09 01:49:32 +00002636 case Iop_Mul32x2:
sewardj57f92b02010-08-22 11:54:14 +00002637 case Iop_Max32Sx2:
2638 case Iop_Max32Ux2:
2639 case Iop_Min32Sx2:
2640 case Iop_Min32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002641 case Iop_CmpGT32Sx2:
sewardj57f92b02010-08-22 11:54:14 +00002642 case Iop_CmpGT32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002643 case Iop_CmpEQ32x2:
2644 case Iop_Add32x2:
sewardj57f92b02010-08-22 11:54:14 +00002645 case Iop_QAdd32Ux2:
2646 case Iop_QAdd32Sx2:
2647 case Iop_QSub32Ux2:
2648 case Iop_QSub32Sx2:
2649 case Iop_QSal32x2:
2650 case Iop_QShl32x2:
2651 case Iop_QDMulHi32Sx2:
2652 case Iop_QRDMulHi32Sx2:
sewardjacd2e912005-01-13 19:17:06 +00002653 return binary32Ix2(mce, vatom1, vatom2);
2654
sewardj57f92b02010-08-22 11:54:14 +00002655 case Iop_QSub64Ux1:
2656 case Iop_QSub64Sx1:
2657 case Iop_QAdd64Ux1:
2658 case Iop_QAdd64Sx1:
2659 case Iop_QSal64x1:
2660 case Iop_QShl64x1:
2661 case Iop_Sal64x1:
2662 return binary64Ix1(mce, vatom1, vatom2);
2663
2664 case Iop_QShlN8Sx8:
2665 case Iop_QShlN8x8:
2666 case Iop_QSalN8x8:
florian434ffae2012-07-19 17:23:42 +00002667 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002668 return mkPCast8x8(mce, vatom1);
2669
2670 case Iop_QShlN16Sx4:
2671 case Iop_QShlN16x4:
2672 case Iop_QSalN16x4:
florian434ffae2012-07-19 17:23:42 +00002673 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002674 return mkPCast16x4(mce, vatom1);
2675
2676 case Iop_QShlN32Sx2:
2677 case Iop_QShlN32x2:
2678 case Iop_QSalN32x2:
florian434ffae2012-07-19 17:23:42 +00002679 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002680 return mkPCast32x2(mce, vatom1);
2681
2682 case Iop_QShlN64Sx1:
2683 case Iop_QShlN64x1:
2684 case Iop_QSalN64x1:
florian434ffae2012-07-19 17:23:42 +00002685 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002686 return mkPCast32x2(mce, vatom1);
2687
2688 case Iop_PwMax32Sx2:
2689 case Iop_PwMax32Ux2:
2690 case Iop_PwMin32Sx2:
2691 case Iop_PwMin32Ux2:
2692 case Iop_PwMax32Fx2:
2693 case Iop_PwMin32Fx2:
sewardj350e8f72012-06-25 07:52:15 +00002694 return assignNew('V', mce, Ity_I64,
2695 binop(Iop_PwMax32Ux2,
2696 mkPCast32x2(mce, vatom1),
2697 mkPCast32x2(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00002698
2699 case Iop_PwMax16Sx4:
2700 case Iop_PwMax16Ux4:
2701 case Iop_PwMin16Sx4:
2702 case Iop_PwMin16Ux4:
sewardj350e8f72012-06-25 07:52:15 +00002703 return assignNew('V', mce, Ity_I64,
2704 binop(Iop_PwMax16Ux4,
2705 mkPCast16x4(mce, vatom1),
2706 mkPCast16x4(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00002707
2708 case Iop_PwMax8Sx8:
2709 case Iop_PwMax8Ux8:
2710 case Iop_PwMin8Sx8:
2711 case Iop_PwMin8Ux8:
sewardj350e8f72012-06-25 07:52:15 +00002712 return assignNew('V', mce, Ity_I64,
2713 binop(Iop_PwMax8Ux8,
2714 mkPCast8x8(mce, vatom1),
2715 mkPCast8x8(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00002716
2717 case Iop_PwAdd32x2:
2718 case Iop_PwAdd32Fx2:
2719 return mkPCast32x2(mce,
sewardj350e8f72012-06-25 07:52:15 +00002720 assignNew('V', mce, Ity_I64,
2721 binop(Iop_PwAdd32x2,
2722 mkPCast32x2(mce, vatom1),
2723 mkPCast32x2(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00002724
2725 case Iop_PwAdd16x4:
2726 return mkPCast16x4(mce,
sewardj350e8f72012-06-25 07:52:15 +00002727 assignNew('V', mce, Ity_I64,
2728 binop(op, mkPCast16x4(mce, vatom1),
2729 mkPCast16x4(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00002730
2731 case Iop_PwAdd8x8:
2732 return mkPCast8x8(mce,
sewardj350e8f72012-06-25 07:52:15 +00002733 assignNew('V', mce, Ity_I64,
2734 binop(op, mkPCast8x8(mce, vatom1),
2735 mkPCast8x8(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00002736
2737 case Iop_Shl8x8:
2738 case Iop_Shr8x8:
2739 case Iop_Sar8x8:
2740 case Iop_Sal8x8:
2741 return mkUifU64(mce,
2742 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2743 mkPCast8x8(mce,vatom2)
2744 );
2745
2746 case Iop_Shl16x4:
2747 case Iop_Shr16x4:
2748 case Iop_Sar16x4:
2749 case Iop_Sal16x4:
2750 return mkUifU64(mce,
2751 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2752 mkPCast16x4(mce,vatom2)
2753 );
2754
2755 case Iop_Shl32x2:
2756 case Iop_Shr32x2:
2757 case Iop_Sar32x2:
2758 case Iop_Sal32x2:
2759 return mkUifU64(mce,
2760 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2761 mkPCast32x2(mce,vatom2)
2762 );
2763
sewardjacd2e912005-01-13 19:17:06 +00002764 /* 64-bit data-steering */
2765 case Iop_InterleaveLO32x2:
2766 case Iop_InterleaveLO16x4:
2767 case Iop_InterleaveLO8x8:
2768 case Iop_InterleaveHI32x2:
2769 case Iop_InterleaveHI16x4:
2770 case Iop_InterleaveHI8x8:
sewardj57f92b02010-08-22 11:54:14 +00002771 case Iop_CatOddLanes8x8:
2772 case Iop_CatEvenLanes8x8:
sewardj114a9172008-02-09 01:49:32 +00002773 case Iop_CatOddLanes16x4:
2774 case Iop_CatEvenLanes16x4:
sewardj57f92b02010-08-22 11:54:14 +00002775 case Iop_InterleaveOddLanes8x8:
2776 case Iop_InterleaveEvenLanes8x8:
2777 case Iop_InterleaveOddLanes16x4:
2778 case Iop_InterleaveEvenLanes16x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00002779 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardjacd2e912005-01-13 19:17:06 +00002780
sewardj57f92b02010-08-22 11:54:14 +00002781 case Iop_GetElem8x8:
florian434ffae2012-07-19 17:23:42 +00002782 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002783 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2784 case Iop_GetElem16x4:
florian434ffae2012-07-19 17:23:42 +00002785 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002786 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2787 case Iop_GetElem32x2:
florian434ffae2012-07-19 17:23:42 +00002788 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002789 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2790
sewardj114a9172008-02-09 01:49:32 +00002791 /* Perm8x8: rearrange values in left arg using steering values
2792 from right arg. So rearrange the vbits in the same way but
2793 pessimise wrt steering values. */
2794 case Iop_Perm8x8:
2795 return mkUifU64(
2796 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002797 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
sewardj114a9172008-02-09 01:49:32 +00002798 mkPCast8x8(mce, vatom2)
2799 );
2800
sewardj20d38f22005-02-07 23:50:18 +00002801 /* V128-bit SIMD */
sewardj0b070592004-12-10 21:44:22 +00002802
sewardj57f92b02010-08-22 11:54:14 +00002803 case Iop_ShrN8x16:
sewardja1d93302004-12-12 16:45:06 +00002804 case Iop_ShrN16x8:
2805 case Iop_ShrN32x4:
2806 case Iop_ShrN64x2:
sewardj57f92b02010-08-22 11:54:14 +00002807 case Iop_SarN8x16:
sewardja1d93302004-12-12 16:45:06 +00002808 case Iop_SarN16x8:
2809 case Iop_SarN32x4:
sewardj57f92b02010-08-22 11:54:14 +00002810 case Iop_SarN64x2:
2811 case Iop_ShlN8x16:
sewardja1d93302004-12-12 16:45:06 +00002812 case Iop_ShlN16x8:
2813 case Iop_ShlN32x4:
2814 case Iop_ShlN64x2:
sewardj620eb5b2005-10-22 12:50:43 +00002815 /* Same scheme as with all other shifts. Note: 22 Oct 05:
2816 this is wrong now, scalar shifts are done properly lazily.
2817 Vector shifts should be fixed too. */
florian434ffae2012-07-19 17:23:42 +00002818 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00002819 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardja1d93302004-12-12 16:45:06 +00002820
sewardjcbf8be72005-11-10 18:34:41 +00002821 /* V x V shifts/rotates are done using the standard lazy scheme. */
sewardj43d60752005-11-10 18:13:01 +00002822 case Iop_Shl8x16:
2823 case Iop_Shr8x16:
2824 case Iop_Sar8x16:
sewardj57f92b02010-08-22 11:54:14 +00002825 case Iop_Sal8x16:
sewardjcbf8be72005-11-10 18:34:41 +00002826 case Iop_Rol8x16:
sewardj43d60752005-11-10 18:13:01 +00002827 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002828 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002829 mkPCast8x16(mce,vatom2)
2830 );
2831
2832 case Iop_Shl16x8:
2833 case Iop_Shr16x8:
2834 case Iop_Sar16x8:
sewardj57f92b02010-08-22 11:54:14 +00002835 case Iop_Sal16x8:
sewardjcbf8be72005-11-10 18:34:41 +00002836 case Iop_Rol16x8:
sewardj43d60752005-11-10 18:13:01 +00002837 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002838 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002839 mkPCast16x8(mce,vatom2)
2840 );
2841
2842 case Iop_Shl32x4:
2843 case Iop_Shr32x4:
2844 case Iop_Sar32x4:
sewardj57f92b02010-08-22 11:54:14 +00002845 case Iop_Sal32x4:
sewardjcbf8be72005-11-10 18:34:41 +00002846 case Iop_Rol32x4:
sewardj43d60752005-11-10 18:13:01 +00002847 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002848 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002849 mkPCast32x4(mce,vatom2)
2850 );
2851
sewardj57f92b02010-08-22 11:54:14 +00002852 case Iop_Shl64x2:
2853 case Iop_Shr64x2:
2854 case Iop_Sar64x2:
2855 case Iop_Sal64x2:
2856 return mkUifUV128(mce,
2857 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2858 mkPCast64x2(mce,vatom2)
2859 );
2860
2861 case Iop_F32ToFixed32Ux4_RZ:
2862 case Iop_F32ToFixed32Sx4_RZ:
2863 case Iop_Fixed32UToF32x4_RN:
2864 case Iop_Fixed32SToF32x4_RN:
florian434ffae2012-07-19 17:23:42 +00002865 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002866 return mkPCast32x4(mce, vatom1);
2867
2868 case Iop_F32ToFixed32Ux2_RZ:
2869 case Iop_F32ToFixed32Sx2_RZ:
2870 case Iop_Fixed32UToF32x2_RN:
2871 case Iop_Fixed32SToF32x2_RN:
florian434ffae2012-07-19 17:23:42 +00002872 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002873 return mkPCast32x2(mce, vatom1);
2874
sewardja1d93302004-12-12 16:45:06 +00002875 case Iop_QSub8Ux16:
2876 case Iop_QSub8Sx16:
2877 case Iop_Sub8x16:
2878 case Iop_Min8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002879 case Iop_Min8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002880 case Iop_Max8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002881 case Iop_Max8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002882 case Iop_CmpGT8Sx16:
sewardj43d60752005-11-10 18:13:01 +00002883 case Iop_CmpGT8Ux16:
sewardja1d93302004-12-12 16:45:06 +00002884 case Iop_CmpEQ8x16:
2885 case Iop_Avg8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002886 case Iop_Avg8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002887 case Iop_QAdd8Ux16:
2888 case Iop_QAdd8Sx16:
sewardj57f92b02010-08-22 11:54:14 +00002889 case Iop_QSal8x16:
2890 case Iop_QShl8x16:
sewardja1d93302004-12-12 16:45:06 +00002891 case Iop_Add8x16:
sewardj57f92b02010-08-22 11:54:14 +00002892 case Iop_Mul8x16:
2893 case Iop_PolynomialMul8x16:
sewardja1d93302004-12-12 16:45:06 +00002894 return binary8Ix16(mce, vatom1, vatom2);
2895
2896 case Iop_QSub16Ux8:
2897 case Iop_QSub16Sx8:
2898 case Iop_Sub16x8:
2899 case Iop_Mul16x8:
2900 case Iop_MulHi16Sx8:
2901 case Iop_MulHi16Ux8:
2902 case Iop_Min16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002903 case Iop_Min16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002904 case Iop_Max16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002905 case Iop_Max16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002906 case Iop_CmpGT16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002907 case Iop_CmpGT16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002908 case Iop_CmpEQ16x8:
2909 case Iop_Avg16Ux8:
sewardj43d60752005-11-10 18:13:01 +00002910 case Iop_Avg16Sx8:
sewardja1d93302004-12-12 16:45:06 +00002911 case Iop_QAdd16Ux8:
2912 case Iop_QAdd16Sx8:
sewardj57f92b02010-08-22 11:54:14 +00002913 case Iop_QSal16x8:
2914 case Iop_QShl16x8:
sewardja1d93302004-12-12 16:45:06 +00002915 case Iop_Add16x8:
sewardj57f92b02010-08-22 11:54:14 +00002916 case Iop_QDMulHi16Sx8:
2917 case Iop_QRDMulHi16Sx8:
sewardja1d93302004-12-12 16:45:06 +00002918 return binary16Ix8(mce, vatom1, vatom2);
2919
2920 case Iop_Sub32x4:
2921 case Iop_CmpGT32Sx4:
sewardj43d60752005-11-10 18:13:01 +00002922 case Iop_CmpGT32Ux4:
sewardja1d93302004-12-12 16:45:06 +00002923 case Iop_CmpEQ32x4:
sewardj43d60752005-11-10 18:13:01 +00002924 case Iop_QAdd32Sx4:
2925 case Iop_QAdd32Ux4:
2926 case Iop_QSub32Sx4:
2927 case Iop_QSub32Ux4:
sewardj57f92b02010-08-22 11:54:14 +00002928 case Iop_QSal32x4:
2929 case Iop_QShl32x4:
sewardj43d60752005-11-10 18:13:01 +00002930 case Iop_Avg32Ux4:
2931 case Iop_Avg32Sx4:
sewardja1d93302004-12-12 16:45:06 +00002932 case Iop_Add32x4:
sewardj43d60752005-11-10 18:13:01 +00002933 case Iop_Max32Ux4:
2934 case Iop_Max32Sx4:
2935 case Iop_Min32Ux4:
2936 case Iop_Min32Sx4:
sewardjb823b852010-06-18 08:18:38 +00002937 case Iop_Mul32x4:
sewardj57f92b02010-08-22 11:54:14 +00002938 case Iop_QDMulHi32Sx4:
2939 case Iop_QRDMulHi32Sx4:
sewardja1d93302004-12-12 16:45:06 +00002940 return binary32Ix4(mce, vatom1, vatom2);
2941
2942 case Iop_Sub64x2:
2943 case Iop_Add64x2:
sewardj9a2afe92011-10-19 15:24:55 +00002944 case Iop_CmpEQ64x2:
sewardjb823b852010-06-18 08:18:38 +00002945 case Iop_CmpGT64Sx2:
sewardj57f92b02010-08-22 11:54:14 +00002946 case Iop_QSal64x2:
2947 case Iop_QShl64x2:
2948 case Iop_QAdd64Ux2:
2949 case Iop_QAdd64Sx2:
2950 case Iop_QSub64Ux2:
2951 case Iop_QSub64Sx2:
sewardja1d93302004-12-12 16:45:06 +00002952 return binary64Ix2(mce, vatom1, vatom2);
2953
sewardj7ee7d852011-06-16 11:37:21 +00002954 case Iop_QNarrowBin32Sto16Sx8:
2955 case Iop_QNarrowBin32Uto16Ux8:
2956 case Iop_QNarrowBin32Sto16Ux8:
2957 case Iop_QNarrowBin16Sto8Sx16:
2958 case Iop_QNarrowBin16Uto8Ux16:
2959 case Iop_QNarrowBin16Sto8Ux16:
2960 return vectorNarrowBinV128(mce, op, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002961
sewardj0b070592004-12-10 21:44:22 +00002962 case Iop_Sub64Fx2:
2963 case Iop_Mul64Fx2:
2964 case Iop_Min64Fx2:
2965 case Iop_Max64Fx2:
2966 case Iop_Div64Fx2:
2967 case Iop_CmpLT64Fx2:
2968 case Iop_CmpLE64Fx2:
2969 case Iop_CmpEQ64Fx2:
sewardj545663e2005-11-05 01:55:04 +00002970 case Iop_CmpUN64Fx2:
sewardj0b070592004-12-10 21:44:22 +00002971 case Iop_Add64Fx2:
2972 return binary64Fx2(mce, vatom1, vatom2);
2973
2974 case Iop_Sub64F0x2:
2975 case Iop_Mul64F0x2:
2976 case Iop_Min64F0x2:
2977 case Iop_Max64F0x2:
2978 case Iop_Div64F0x2:
2979 case Iop_CmpLT64F0x2:
2980 case Iop_CmpLE64F0x2:
2981 case Iop_CmpEQ64F0x2:
sewardj545663e2005-11-05 01:55:04 +00002982 case Iop_CmpUN64F0x2:
sewardj0b070592004-12-10 21:44:22 +00002983 case Iop_Add64F0x2:
2984 return binary64F0x2(mce, vatom1, vatom2);
2985
sewardj170ee212004-12-10 18:57:51 +00002986 case Iop_Sub32Fx4:
2987 case Iop_Mul32Fx4:
2988 case Iop_Min32Fx4:
2989 case Iop_Max32Fx4:
2990 case Iop_Div32Fx4:
2991 case Iop_CmpLT32Fx4:
2992 case Iop_CmpLE32Fx4:
2993 case Iop_CmpEQ32Fx4:
sewardj545663e2005-11-05 01:55:04 +00002994 case Iop_CmpUN32Fx4:
cerione78ba2a2005-11-14 03:00:35 +00002995 case Iop_CmpGT32Fx4:
2996 case Iop_CmpGE32Fx4:
sewardj3245c912004-12-10 14:58:26 +00002997 case Iop_Add32Fx4:
sewardj57f92b02010-08-22 11:54:14 +00002998 case Iop_Recps32Fx4:
2999 case Iop_Rsqrts32Fx4:
sewardj3245c912004-12-10 14:58:26 +00003000 return binary32Fx4(mce, vatom1, vatom2);
3001
sewardj57f92b02010-08-22 11:54:14 +00003002 case Iop_Sub32Fx2:
3003 case Iop_Mul32Fx2:
3004 case Iop_Min32Fx2:
3005 case Iop_Max32Fx2:
3006 case Iop_CmpEQ32Fx2:
3007 case Iop_CmpGT32Fx2:
3008 case Iop_CmpGE32Fx2:
3009 case Iop_Add32Fx2:
3010 case Iop_Recps32Fx2:
3011 case Iop_Rsqrts32Fx2:
3012 return binary32Fx2(mce, vatom1, vatom2);
3013
sewardj170ee212004-12-10 18:57:51 +00003014 case Iop_Sub32F0x4:
3015 case Iop_Mul32F0x4:
3016 case Iop_Min32F0x4:
3017 case Iop_Max32F0x4:
3018 case Iop_Div32F0x4:
3019 case Iop_CmpLT32F0x4:
3020 case Iop_CmpLE32F0x4:
3021 case Iop_CmpEQ32F0x4:
sewardj545663e2005-11-05 01:55:04 +00003022 case Iop_CmpUN32F0x4:
sewardj170ee212004-12-10 18:57:51 +00003023 case Iop_Add32F0x4:
3024 return binary32F0x4(mce, vatom1, vatom2);
3025
sewardj57f92b02010-08-22 11:54:14 +00003026 case Iop_QShlN8Sx16:
3027 case Iop_QShlN8x16:
3028 case Iop_QSalN8x16:
florian434ffae2012-07-19 17:23:42 +00003029 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003030 return mkPCast8x16(mce, vatom1);
3031
3032 case Iop_QShlN16Sx8:
3033 case Iop_QShlN16x8:
3034 case Iop_QSalN16x8:
florian434ffae2012-07-19 17:23:42 +00003035 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003036 return mkPCast16x8(mce, vatom1);
3037
3038 case Iop_QShlN32Sx4:
3039 case Iop_QShlN32x4:
3040 case Iop_QSalN32x4:
florian434ffae2012-07-19 17:23:42 +00003041 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003042 return mkPCast32x4(mce, vatom1);
3043
3044 case Iop_QShlN64Sx2:
3045 case Iop_QShlN64x2:
3046 case Iop_QSalN64x2:
florian434ffae2012-07-19 17:23:42 +00003047 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003048 return mkPCast32x4(mce, vatom1);
3049
3050 case Iop_Mull32Sx2:
3051 case Iop_Mull32Ux2:
3052 case Iop_QDMulLong32Sx2:
sewardj7ee7d852011-06-16 11:37:21 +00003053 return vectorWidenI64(mce, Iop_Widen32Sto64x2,
3054 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003055
3056 case Iop_Mull16Sx4:
3057 case Iop_Mull16Ux4:
3058 case Iop_QDMulLong16Sx4:
sewardj7ee7d852011-06-16 11:37:21 +00003059 return vectorWidenI64(mce, Iop_Widen16Sto32x4,
3060 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003061
3062 case Iop_Mull8Sx8:
3063 case Iop_Mull8Ux8:
3064 case Iop_PolynomialMull8x8:
sewardj7ee7d852011-06-16 11:37:21 +00003065 return vectorWidenI64(mce, Iop_Widen8Sto16x8,
3066 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003067
3068 case Iop_PwAdd32x4:
3069 return mkPCast32x4(mce,
3070 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
3071 mkPCast32x4(mce, vatom2))));
3072
3073 case Iop_PwAdd16x8:
3074 return mkPCast16x8(mce,
3075 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
3076 mkPCast16x8(mce, vatom2))));
3077
3078 case Iop_PwAdd8x16:
3079 return mkPCast8x16(mce,
3080 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
3081 mkPCast8x16(mce, vatom2))));
3082
sewardj20d38f22005-02-07 23:50:18 +00003083 /* V128-bit data-steering */
3084 case Iop_SetV128lo32:
3085 case Iop_SetV128lo64:
3086 case Iop_64HLtoV128:
sewardja1d93302004-12-12 16:45:06 +00003087 case Iop_InterleaveLO64x2:
3088 case Iop_InterleaveLO32x4:
3089 case Iop_InterleaveLO16x8:
3090 case Iop_InterleaveLO8x16:
3091 case Iop_InterleaveHI64x2:
3092 case Iop_InterleaveHI32x4:
3093 case Iop_InterleaveHI16x8:
3094 case Iop_InterleaveHI8x16:
sewardj57f92b02010-08-22 11:54:14 +00003095 case Iop_CatOddLanes8x16:
3096 case Iop_CatOddLanes16x8:
3097 case Iop_CatOddLanes32x4:
3098 case Iop_CatEvenLanes8x16:
3099 case Iop_CatEvenLanes16x8:
3100 case Iop_CatEvenLanes32x4:
3101 case Iop_InterleaveOddLanes8x16:
3102 case Iop_InterleaveOddLanes16x8:
3103 case Iop_InterleaveOddLanes32x4:
3104 case Iop_InterleaveEvenLanes8x16:
3105 case Iop_InterleaveEvenLanes16x8:
3106 case Iop_InterleaveEvenLanes32x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00003107 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003108
3109 case Iop_GetElem8x16:
florian434ffae2012-07-19 17:23:42 +00003110 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003111 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3112 case Iop_GetElem16x8:
florian434ffae2012-07-19 17:23:42 +00003113 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003114 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3115 case Iop_GetElem32x4:
florian434ffae2012-07-19 17:23:42 +00003116 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003117 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3118 case Iop_GetElem64x2:
florian434ffae2012-07-19 17:23:42 +00003119 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003120 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
3121
sewardj620eb5b2005-10-22 12:50:43 +00003122 /* Perm8x16: rearrange values in left arg using steering values
3123 from right arg. So rearrange the vbits in the same way but
sewardj350e8f72012-06-25 07:52:15 +00003124 pessimise wrt steering values. Perm32x4 ditto. */
sewardj620eb5b2005-10-22 12:50:43 +00003125 case Iop_Perm8x16:
3126 return mkUifUV128(
3127 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003128 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj620eb5b2005-10-22 12:50:43 +00003129 mkPCast8x16(mce, vatom2)
3130 );
sewardj350e8f72012-06-25 07:52:15 +00003131 case Iop_Perm32x4:
3132 return mkUifUV128(
3133 mce,
3134 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3135 mkPCast32x4(mce, vatom2)
3136 );
sewardj170ee212004-12-10 18:57:51 +00003137
sewardj43d60752005-11-10 18:13:01 +00003138 /* These two take the lower half of each 16-bit lane, sign/zero
3139 extend it to 32, and multiply together, producing a 32x4
3140 result (and implicitly ignoring half the operand bits). So
3141 treat it as a bunch of independent 16x8 operations, but then
3142 do 32-bit shifts left-right to copy the lower half results
3143 (which are all 0s or all 1s due to PCasting in binary16Ix8)
3144 into the upper half of each result lane. */
3145 case Iop_MullEven16Ux8:
3146 case Iop_MullEven16Sx8: {
3147 IRAtom* at;
3148 at = binary16Ix8(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00003149 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
3150 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
sewardj43d60752005-11-10 18:13:01 +00003151 return at;
3152 }
3153
3154 /* Same deal as Iop_MullEven16{S,U}x8 */
3155 case Iop_MullEven8Ux16:
3156 case Iop_MullEven8Sx16: {
3157 IRAtom* at;
3158 at = binary8Ix16(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00003159 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
3160 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
sewardj43d60752005-11-10 18:13:01 +00003161 return at;
3162 }
3163
3164 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
3165 32x4 -> 16x8 laneage, discarding the upper half of each lane.
3166 Simply apply same op to the V bits, since this really no more
3167 than a data steering operation. */
sewardj7ee7d852011-06-16 11:37:21 +00003168 case Iop_NarrowBin32to16x8:
3169 case Iop_NarrowBin16to8x16:
sewardj7cf4e6b2008-05-01 20:24:26 +00003170 return assignNew('V', mce, Ity_V128,
3171 binop(op, vatom1, vatom2));
sewardj43d60752005-11-10 18:13:01 +00003172
3173 case Iop_ShrV128:
3174 case Iop_ShlV128:
3175 /* Same scheme as with all other shifts. Note: 10 Nov 05:
3176 this is wrong now, scalar shifts are done properly lazily.
3177 Vector shifts should be fixed too. */
florian434ffae2012-07-19 17:23:42 +00003178 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00003179 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardj43d60752005-11-10 18:13:01 +00003180
sewardj69a13322005-04-23 01:14:51 +00003181 /* I128-bit data-steering */
3182 case Iop_64HLto128:
sewardj7cf4e6b2008-05-01 20:24:26 +00003183 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
sewardj69a13322005-04-23 01:14:51 +00003184
sewardj350e8f72012-06-25 07:52:15 +00003185 /* V256-bit SIMD */
3186
3187 case Iop_Add64Fx4:
3188 case Iop_Sub64Fx4:
3189 case Iop_Mul64Fx4:
3190 case Iop_Div64Fx4:
3191 case Iop_Max64Fx4:
3192 case Iop_Min64Fx4:
3193 return binary64Fx4(mce, vatom1, vatom2);
3194
3195 case Iop_Add32Fx8:
3196 case Iop_Sub32Fx8:
3197 case Iop_Mul32Fx8:
3198 case Iop_Div32Fx8:
3199 case Iop_Max32Fx8:
3200 case Iop_Min32Fx8:
3201 return binary32Fx8(mce, vatom1, vatom2);
3202
3203 /* V256-bit data-steering */
3204 case Iop_V128HLtoV256:
3205 return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2));
3206
sewardj3245c912004-12-10 14:58:26 +00003207 /* Scalar floating point */
3208
sewardjb5b87402011-03-07 16:05:35 +00003209 case Iop_F32toI64S:
florian1b9609a2012-09-01 00:15:45 +00003210 case Iop_F32toI64U:
sewardjb5b87402011-03-07 16:05:35 +00003211 /* I32(rm) x F32 -> I64 */
3212 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3213
3214 case Iop_I64StoF32:
3215 /* I32(rm) x I64 -> F32 */
3216 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3217
sewardjed69fdb2006-02-03 16:12:27 +00003218 case Iop_RoundF64toInt:
3219 case Iop_RoundF64toF32:
sewardj06f96d02009-12-31 19:24:12 +00003220 case Iop_F64toI64S:
sewardja201c452011-07-24 14:15:54 +00003221 case Iop_F64toI64U:
sewardj06f96d02009-12-31 19:24:12 +00003222 case Iop_I64StoF64:
sewardjf34eb492011-04-15 11:57:05 +00003223 case Iop_I64UtoF64:
sewardj22ac5f42006-02-03 22:55:04 +00003224 case Iop_SinF64:
3225 case Iop_CosF64:
3226 case Iop_TanF64:
3227 case Iop_2xm1F64:
3228 case Iop_SqrtF64:
3229 /* I32(rm) x I64/F64 -> I64/F64 */
sewardj95448072004-11-22 20:19:51 +00003230 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3231
sewardjea8b02f2012-04-12 17:28:57 +00003232 case Iop_ShlD64:
3233 case Iop_ShrD64:
sewardj18c72fa2012-04-23 11:22:05 +00003234 case Iop_RoundD64toInt:
sewardjea8b02f2012-04-12 17:28:57 +00003235 /* I32(DFP rm) x D64 -> D64 */
3236 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3237
3238 case Iop_ShlD128:
3239 case Iop_ShrD128:
sewardj18c72fa2012-04-23 11:22:05 +00003240 case Iop_RoundD128toInt:
3241 /* I32(DFP rm) x D128 -> D128 */
sewardjea8b02f2012-04-12 17:28:57 +00003242 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3243
3244 case Iop_D64toI64S:
3245 case Iop_I64StoD64:
3246 /* I64(DFP rm) x I64 -> D64 */
3247 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3248
sewardjd376a762010-06-27 09:08:54 +00003249 case Iop_RoundF32toInt:
sewardjaec1be32010-01-03 22:29:32 +00003250 case Iop_SqrtF32:
3251 /* I32(rm) x I32/F32 -> I32/F32 */
3252 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3253
sewardjb5b87402011-03-07 16:05:35 +00003254 case Iop_SqrtF128:
3255 /* I32(rm) x F128 -> F128 */
3256 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3257
3258 case Iop_I32StoF32:
florian1b9609a2012-09-01 00:15:45 +00003259 case Iop_I32UtoF32:
sewardjb5b87402011-03-07 16:05:35 +00003260 case Iop_F32toI32S:
florian1b9609a2012-09-01 00:15:45 +00003261 case Iop_F32toI32U:
sewardjb5b87402011-03-07 16:05:35 +00003262 /* First arg is I32 (rounding mode), second is F32/I32 (data). */
3263 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3264
3265 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */
florian1b9609a2012-09-01 00:15:45 +00003266 case Iop_F128toI32U: /* IRRoundingMode(I32) x F128 -> unsigned I32 */
sewardjb5b87402011-03-07 16:05:35 +00003267 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */
3268 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3269
3270 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */
florian1b9609a2012-09-01 00:15:45 +00003271 case Iop_F128toI64U: /* IRRoundingMode(I32) x F128 -> unsigned I64 */
sewardjb5b87402011-03-07 16:05:35 +00003272 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */
sewardjea8b02f2012-04-12 17:28:57 +00003273 case Iop_D128toD64: /* IRRoundingModeDFP(I64) x D128 -> D64 */
3274 case Iop_D128toI64S: /* IRRoundingModeDFP(I64) x D128 -> signed I64 */
sewardjb5b87402011-03-07 16:05:35 +00003275 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3276
3277 case Iop_F64HLtoF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00003278 case Iop_D64HLtoD128:
sewardj350e8f72012-06-25 07:52:15 +00003279 return assignNew('V', mce, Ity_I128,
3280 binop(Iop_64HLto128, vatom1, vatom2));
sewardjb5b87402011-03-07 16:05:35 +00003281
sewardj59570ff2010-01-01 11:59:33 +00003282 case Iop_F64toI32U:
sewardj06f96d02009-12-31 19:24:12 +00003283 case Iop_F64toI32S:
sewardje9e16d32004-12-10 13:17:55 +00003284 case Iop_F64toF32:
sewardjf34eb492011-04-15 11:57:05 +00003285 case Iop_I64UtoF32:
sewardj95448072004-11-22 20:19:51 +00003286 /* First arg is I32 (rounding mode), second is F64 (data). */
3287 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3288
sewardjea8b02f2012-04-12 17:28:57 +00003289 case Iop_D64toD32:
3290 /* First arg is I64 (DFProunding mode), second is D64 (data). */
3291 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3292
sewardj06f96d02009-12-31 19:24:12 +00003293 case Iop_F64toI16S:
sewardj95448072004-11-22 20:19:51 +00003294 /* First arg is I32 (rounding mode), second is F64 (data). */
3295 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3296
sewardj18c72fa2012-04-23 11:22:05 +00003297 case Iop_InsertExpD64:
3298 /* I64 x I64 -> D64 */
3299 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3300
3301 case Iop_InsertExpD128:
3302 /* I64 x I128 -> D128 */
3303 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3304
sewardjb5b87402011-03-07 16:05:35 +00003305 case Iop_CmpF32:
sewardj95448072004-11-22 20:19:51 +00003306 case Iop_CmpF64:
sewardjb5b87402011-03-07 16:05:35 +00003307 case Iop_CmpF128:
sewardj18c72fa2012-04-23 11:22:05 +00003308 case Iop_CmpD64:
3309 case Iop_CmpD128:
sewardj95448072004-11-22 20:19:51 +00003310 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3311
3312 /* non-FP after here */
3313
3314 case Iop_DivModU64to32:
3315 case Iop_DivModS64to32:
3316 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3317
sewardj69a13322005-04-23 01:14:51 +00003318 case Iop_DivModU128to64:
3319 case Iop_DivModS128to64:
3320 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3321
florian537ed2d2012-08-20 16:51:39 +00003322 case Iop_8HLto16:
3323 return assignNew('V', mce, Ity_I16, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003324 case Iop_16HLto32:
sewardj7cf4e6b2008-05-01 20:24:26 +00003325 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003326 case Iop_32HLto64:
sewardj7cf4e6b2008-05-01 20:24:26 +00003327 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003328
sewardjb5b87402011-03-07 16:05:35 +00003329 case Iop_DivModS64to64:
sewardj6cf40ff2005-04-20 22:31:26 +00003330 case Iop_MullS64:
3331 case Iop_MullU64: {
3332 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3333 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
sewardj350e8f72012-06-25 07:52:15 +00003334 return assignNew('V', mce, Ity_I128,
3335 binop(Iop_64HLto128, vHi64, vLo64));
sewardj6cf40ff2005-04-20 22:31:26 +00003336 }
3337
sewardj95448072004-11-22 20:19:51 +00003338 case Iop_MullS32:
3339 case Iop_MullU32: {
3340 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3341 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
sewardj350e8f72012-06-25 07:52:15 +00003342 return assignNew('V', mce, Ity_I64,
3343 binop(Iop_32HLto64, vHi32, vLo32));
sewardj95448072004-11-22 20:19:51 +00003344 }
3345
3346 case Iop_MullS16:
3347 case Iop_MullU16: {
3348 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3349 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
sewardj350e8f72012-06-25 07:52:15 +00003350 return assignNew('V', mce, Ity_I32,
3351 binop(Iop_16HLto32, vHi16, vLo16));
sewardj95448072004-11-22 20:19:51 +00003352 }
3353
3354 case Iop_MullS8:
3355 case Iop_MullU8: {
3356 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3357 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
sewardj7cf4e6b2008-05-01 20:24:26 +00003358 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
sewardj95448072004-11-22 20:19:51 +00003359 }
3360
sewardj5af05062010-10-18 16:31:14 +00003361 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */
cerion9e591082005-06-23 15:28:34 +00003362 case Iop_DivS32:
3363 case Iop_DivU32:
sewardja201c452011-07-24 14:15:54 +00003364 case Iop_DivU32E:
sewardj169ac042011-09-05 12:12:34 +00003365 case Iop_DivS32E:
sewardj2157b2c2012-07-11 13:20:58 +00003366 case Iop_QAdd32S: /* could probably do better */
3367 case Iop_QSub32S: /* could probably do better */
cerion9e591082005-06-23 15:28:34 +00003368 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3369
sewardjb00944a2005-12-23 12:47:16 +00003370 case Iop_DivS64:
3371 case Iop_DivU64:
sewardja201c452011-07-24 14:15:54 +00003372 case Iop_DivS64E:
sewardj169ac042011-09-05 12:12:34 +00003373 case Iop_DivU64E:
sewardjb00944a2005-12-23 12:47:16 +00003374 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3375
sewardj95448072004-11-22 20:19:51 +00003376 case Iop_Add32:
sewardj54eac252012-03-27 10:19:39 +00003377 if (mce->bogusLiterals || mce->useLLVMworkarounds)
sewardjd5204dc2004-12-31 01:16:11 +00003378 return expensiveAddSub(mce,True,Ity_I32,
3379 vatom1,vatom2, atom1,atom2);
3380 else
3381 goto cheap_AddSub32;
sewardj95448072004-11-22 20:19:51 +00003382 case Iop_Sub32:
sewardjd5204dc2004-12-31 01:16:11 +00003383 if (mce->bogusLiterals)
3384 return expensiveAddSub(mce,False,Ity_I32,
3385 vatom1,vatom2, atom1,atom2);
3386 else
3387 goto cheap_AddSub32;
3388
3389 cheap_AddSub32:
sewardj95448072004-11-22 20:19:51 +00003390 case Iop_Mul32:
sewardj992dff92005-10-07 11:08:55 +00003391 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3392
sewardj463b3d92005-07-18 11:41:15 +00003393 case Iop_CmpORD32S:
3394 case Iop_CmpORD32U:
sewardj1bc82102005-12-23 00:16:24 +00003395 case Iop_CmpORD64S:
3396 case Iop_CmpORD64U:
3397 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
sewardj95448072004-11-22 20:19:51 +00003398
sewardj681be302005-01-15 20:43:58 +00003399 case Iop_Add64:
sewardj54eac252012-03-27 10:19:39 +00003400 if (mce->bogusLiterals || mce->useLLVMworkarounds)
tomd9774d72005-06-27 08:11:01 +00003401 return expensiveAddSub(mce,True,Ity_I64,
3402 vatom1,vatom2, atom1,atom2);
3403 else
3404 goto cheap_AddSub64;
sewardj681be302005-01-15 20:43:58 +00003405 case Iop_Sub64:
tomd9774d72005-06-27 08:11:01 +00003406 if (mce->bogusLiterals)
3407 return expensiveAddSub(mce,False,Ity_I64,
3408 vatom1,vatom2, atom1,atom2);
3409 else
3410 goto cheap_AddSub64;
3411
3412 cheap_AddSub64:
3413 case Iop_Mul64:
sewardj681be302005-01-15 20:43:58 +00003414 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3415
sewardj95448072004-11-22 20:19:51 +00003416 case Iop_Mul16:
3417 case Iop_Add16:
3418 case Iop_Sub16:
3419 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3420
florian537ed2d2012-08-20 16:51:39 +00003421 case Iop_Mul8:
sewardj95448072004-11-22 20:19:51 +00003422 case Iop_Sub8:
3423 case Iop_Add8:
3424 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3425
sewardj69a13322005-04-23 01:14:51 +00003426 case Iop_CmpEQ64:
sewardje6f8af42005-07-06 18:48:59 +00003427 case Iop_CmpNE64:
sewardj69a13322005-04-23 01:14:51 +00003428 if (mce->bogusLiterals)
3429 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
3430 else
3431 goto cheap_cmp64;
3432 cheap_cmp64:
tomcd986332005-04-26 07:44:48 +00003433 case Iop_CmpLE64S: case Iop_CmpLE64U:
3434 case Iop_CmpLT64U: case Iop_CmpLT64S:
sewardj69a13322005-04-23 01:14:51 +00003435 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
3436
sewardjd5204dc2004-12-31 01:16:11 +00003437 case Iop_CmpEQ32:
sewardje6f8af42005-07-06 18:48:59 +00003438 case Iop_CmpNE32:
sewardjd5204dc2004-12-31 01:16:11 +00003439 if (mce->bogusLiterals)
3440 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
3441 else
3442 goto cheap_cmp32;
sewardjd5204dc2004-12-31 01:16:11 +00003443 cheap_cmp32:
sewardj95448072004-11-22 20:19:51 +00003444 case Iop_CmpLE32S: case Iop_CmpLE32U:
3445 case Iop_CmpLT32U: case Iop_CmpLT32S:
sewardj95448072004-11-22 20:19:51 +00003446 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
3447
3448 case Iop_CmpEQ16: case Iop_CmpNE16:
3449 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
3450
3451 case Iop_CmpEQ8: case Iop_CmpNE8:
3452 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
3453
sewardjafed4c52009-07-12 13:00:17 +00003454 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
3455 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
3456 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
3457 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
3458 /* Just say these all produce a defined result, regardless
3459 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
3460 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
3461
sewardjaaddbc22005-10-07 09:49:53 +00003462 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
3463 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
3464
sewardj95448072004-11-22 20:19:51 +00003465 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
sewardjaaddbc22005-10-07 09:49:53 +00003466 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003467
sewardjdb67f5f2004-12-14 01:15:31 +00003468 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
sewardjaaddbc22005-10-07 09:49:53 +00003469 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003470
florian537ed2d2012-08-20 16:51:39 +00003471 case Iop_Shl8: case Iop_Shr8: case Iop_Sar8:
sewardjaaddbc22005-10-07 09:49:53 +00003472 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003473
sewardj350e8f72012-06-25 07:52:15 +00003474 case Iop_AndV256:
3475 uifu = mkUifUV256; difd = mkDifDV256;
3476 and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or;
sewardj20d38f22005-02-07 23:50:18 +00003477 case Iop_AndV128:
3478 uifu = mkUifUV128; difd = mkDifDV128;
3479 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00003480 case Iop_And64:
3481 uifu = mkUifU64; difd = mkDifD64;
3482 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00003483 case Iop_And32:
3484 uifu = mkUifU32; difd = mkDifD32;
3485 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
3486 case Iop_And16:
3487 uifu = mkUifU16; difd = mkDifD16;
3488 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
3489 case Iop_And8:
3490 uifu = mkUifU8; difd = mkDifD8;
3491 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
3492
sewardj350e8f72012-06-25 07:52:15 +00003493 case Iop_OrV256:
3494 uifu = mkUifUV256; difd = mkDifDV256;
3495 and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or;
sewardj20d38f22005-02-07 23:50:18 +00003496 case Iop_OrV128:
3497 uifu = mkUifUV128; difd = mkDifDV128;
3498 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00003499 case Iop_Or64:
3500 uifu = mkUifU64; difd = mkDifD64;
3501 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00003502 case Iop_Or32:
3503 uifu = mkUifU32; difd = mkDifD32;
3504 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
3505 case Iop_Or16:
3506 uifu = mkUifU16; difd = mkDifD16;
3507 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
3508 case Iop_Or8:
3509 uifu = mkUifU8; difd = mkDifD8;
3510 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
3511
3512 do_And_Or:
3513 return
3514 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00003515 'V', mce,
sewardj95448072004-11-22 20:19:51 +00003516 and_or_ty,
3517 difd(mce, uifu(mce, vatom1, vatom2),
3518 difd(mce, improve(mce, atom1, vatom1),
3519 improve(mce, atom2, vatom2) ) ) );
3520
3521 case Iop_Xor8:
3522 return mkUifU8(mce, vatom1, vatom2);
3523 case Iop_Xor16:
3524 return mkUifU16(mce, vatom1, vatom2);
3525 case Iop_Xor32:
3526 return mkUifU32(mce, vatom1, vatom2);
sewardj7010f6e2004-12-10 13:35:22 +00003527 case Iop_Xor64:
3528 return mkUifU64(mce, vatom1, vatom2);
sewardj20d38f22005-02-07 23:50:18 +00003529 case Iop_XorV128:
3530 return mkUifUV128(mce, vatom1, vatom2);
sewardj350e8f72012-06-25 07:52:15 +00003531 case Iop_XorV256:
3532 return mkUifUV256(mce, vatom1, vatom2);
njn25e49d8e72002-09-23 09:36:25 +00003533
3534 default:
sewardj95448072004-11-22 20:19:51 +00003535 ppIROp(op);
3536 VG_(tool_panic)("memcheck:expr2vbits_Binop");
njn25e49d8e72002-09-23 09:36:25 +00003537 }
njn25e49d8e72002-09-23 09:36:25 +00003538}
3539
njn25e49d8e72002-09-23 09:36:25 +00003540
sewardj95448072004-11-22 20:19:51 +00003541static
3542IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
3543{
3544 IRAtom* vatom = expr2vbits( mce, atom );
3545 tl_assert(isOriginalAtom(mce,atom));
3546 switch (op) {
3547
sewardj0b070592004-12-10 21:44:22 +00003548 case Iop_Sqrt64Fx2:
3549 return unary64Fx2(mce, vatom);
3550
3551 case Iop_Sqrt64F0x2:
3552 return unary64F0x2(mce, vatom);
3553
sewardj350e8f72012-06-25 07:52:15 +00003554 case Iop_Sqrt32Fx8:
3555 case Iop_RSqrt32Fx8:
3556 case Iop_Recip32Fx8:
3557 return unary32Fx8(mce, vatom);
3558
3559 case Iop_Sqrt64Fx4:
3560 return unary64Fx4(mce, vatom);
3561
sewardj170ee212004-12-10 18:57:51 +00003562 case Iop_Sqrt32Fx4:
3563 case Iop_RSqrt32Fx4:
3564 case Iop_Recip32Fx4:
cerion176cb4c2005-11-16 17:21:49 +00003565 case Iop_I32UtoFx4:
3566 case Iop_I32StoFx4:
3567 case Iop_QFtoI32Ux4_RZ:
3568 case Iop_QFtoI32Sx4_RZ:
3569 case Iop_RoundF32x4_RM:
3570 case Iop_RoundF32x4_RP:
3571 case Iop_RoundF32x4_RN:
3572 case Iop_RoundF32x4_RZ:
sewardj57f92b02010-08-22 11:54:14 +00003573 case Iop_Recip32x4:
3574 case Iop_Abs32Fx4:
3575 case Iop_Neg32Fx4:
3576 case Iop_Rsqrte32Fx4:
sewardj170ee212004-12-10 18:57:51 +00003577 return unary32Fx4(mce, vatom);
3578
sewardj57f92b02010-08-22 11:54:14 +00003579 case Iop_I32UtoFx2:
3580 case Iop_I32StoFx2:
3581 case Iop_Recip32Fx2:
3582 case Iop_Recip32x2:
3583 case Iop_Abs32Fx2:
3584 case Iop_Neg32Fx2:
3585 case Iop_Rsqrte32Fx2:
3586 return unary32Fx2(mce, vatom);
3587
sewardj170ee212004-12-10 18:57:51 +00003588 case Iop_Sqrt32F0x4:
3589 case Iop_RSqrt32F0x4:
3590 case Iop_Recip32F0x4:
3591 return unary32F0x4(mce, vatom);
3592
sewardj20d38f22005-02-07 23:50:18 +00003593 case Iop_32UtoV128:
3594 case Iop_64UtoV128:
sewardj620eb5b2005-10-22 12:50:43 +00003595 case Iop_Dup8x16:
3596 case Iop_Dup16x8:
3597 case Iop_Dup32x4:
sewardj57f92b02010-08-22 11:54:14 +00003598 case Iop_Reverse16_8x16:
3599 case Iop_Reverse32_8x16:
3600 case Iop_Reverse32_16x8:
3601 case Iop_Reverse64_8x16:
3602 case Iop_Reverse64_16x8:
3603 case Iop_Reverse64_32x4:
sewardj350e8f72012-06-25 07:52:15 +00003604 case Iop_V256toV128_1: case Iop_V256toV128_0:
sewardj7cf4e6b2008-05-01 20:24:26 +00003605 return assignNew('V', mce, Ity_V128, unop(op, vatom));
sewardj170ee212004-12-10 18:57:51 +00003606
sewardjb5b87402011-03-07 16:05:35 +00003607 case Iop_F128HItoF64: /* F128 -> high half of F128 */
sewardjb0ccb4d2012-04-02 10:22:05 +00003608 case Iop_D128HItoD64: /* D128 -> high half of D128 */
sewardjb5b87402011-03-07 16:05:35 +00003609 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
3610 case Iop_F128LOtoF64: /* F128 -> low half of F128 */
sewardjb0ccb4d2012-04-02 10:22:05 +00003611 case Iop_D128LOtoD64: /* D128 -> low half of D128 */
sewardjb5b87402011-03-07 16:05:35 +00003612 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
3613
3614 case Iop_NegF128:
3615 case Iop_AbsF128:
3616 return mkPCastTo(mce, Ity_I128, vatom);
3617
3618 case Iop_I32StoF128: /* signed I32 -> F128 */
3619 case Iop_I64StoF128: /* signed I64 -> F128 */
florian1b9609a2012-09-01 00:15:45 +00003620 case Iop_I32UtoF128: /* unsigned I32 -> F128 */
3621 case Iop_I64UtoF128: /* unsigned I64 -> F128 */
sewardjb5b87402011-03-07 16:05:35 +00003622 case Iop_F32toF128: /* F32 -> F128 */
3623 case Iop_F64toF128: /* F64 -> F128 */
sewardjea8b02f2012-04-12 17:28:57 +00003624 case Iop_I64StoD128: /* signed I64 -> D128 */
sewardjb5b87402011-03-07 16:05:35 +00003625 return mkPCastTo(mce, Ity_I128, vatom);
3626
sewardj95448072004-11-22 20:19:51 +00003627 case Iop_F32toF64:
sewardj06f96d02009-12-31 19:24:12 +00003628 case Iop_I32StoF64:
sewardj59570ff2010-01-01 11:59:33 +00003629 case Iop_I32UtoF64:
sewardj95448072004-11-22 20:19:51 +00003630 case Iop_NegF64:
sewardj95448072004-11-22 20:19:51 +00003631 case Iop_AbsF64:
sewardj4f3e0f22006-01-27 15:11:31 +00003632 case Iop_Est5FRSqrt:
sewardjdead90a2008-08-08 08:38:23 +00003633 case Iop_RoundF64toF64_NEAREST:
3634 case Iop_RoundF64toF64_NegINF:
3635 case Iop_RoundF64toF64_PosINF:
3636 case Iop_RoundF64toF64_ZERO:
sewardj39cc7352005-06-09 21:31:55 +00003637 case Iop_Clz64:
3638 case Iop_Ctz64:
sewardjea8b02f2012-04-12 17:28:57 +00003639 case Iop_D32toD64:
sewardj18c72fa2012-04-23 11:22:05 +00003640 case Iop_ExtractExpD64: /* D64 -> I64 */
3641 case Iop_ExtractExpD128: /* D128 -> I64 */
florian1943eb52012-08-22 18:09:07 +00003642 case Iop_DPBtoBCD:
3643 case Iop_BCDtoDPB:
sewardj95448072004-11-22 20:19:51 +00003644 return mkPCastTo(mce, Ity_I64, vatom);
3645
sewardjea8b02f2012-04-12 17:28:57 +00003646 case Iop_D64toD128:
3647 return mkPCastTo(mce, Ity_I128, vatom);
3648
sewardj95448072004-11-22 20:19:51 +00003649 case Iop_Clz32:
3650 case Iop_Ctz32:
sewardjed69fdb2006-02-03 16:12:27 +00003651 case Iop_TruncF64asF32:
sewardj59570ff2010-01-01 11:59:33 +00003652 case Iop_NegF32:
3653 case Iop_AbsF32:
sewardj95448072004-11-22 20:19:51 +00003654 return mkPCastTo(mce, Ity_I32, vatom);
3655
sewardjd9dbc192005-04-27 11:40:27 +00003656 case Iop_1Uto64:
sewardja201c452011-07-24 14:15:54 +00003657 case Iop_1Sto64:
sewardjd9dbc192005-04-27 11:40:27 +00003658 case Iop_8Uto64:
3659 case Iop_8Sto64:
3660 case Iop_16Uto64:
3661 case Iop_16Sto64:
sewardj95448072004-11-22 20:19:51 +00003662 case Iop_32Sto64:
3663 case Iop_32Uto64:
sewardj20d38f22005-02-07 23:50:18 +00003664 case Iop_V128to64:
3665 case Iop_V128HIto64:
sewardj6cf40ff2005-04-20 22:31:26 +00003666 case Iop_128HIto64:
3667 case Iop_128to64:
sewardj57f92b02010-08-22 11:54:14 +00003668 case Iop_Dup8x8:
3669 case Iop_Dup16x4:
3670 case Iop_Dup32x2:
3671 case Iop_Reverse16_8x8:
3672 case Iop_Reverse32_8x8:
3673 case Iop_Reverse32_16x4:
3674 case Iop_Reverse64_8x8:
3675 case Iop_Reverse64_16x4:
3676 case Iop_Reverse64_32x2:
sewardj350e8f72012-06-25 07:52:15 +00003677 case Iop_V256to64_0: case Iop_V256to64_1:
3678 case Iop_V256to64_2: case Iop_V256to64_3:
sewardj7cf4e6b2008-05-01 20:24:26 +00003679 return assignNew('V', mce, Ity_I64, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003680
3681 case Iop_64to32:
3682 case Iop_64HIto32:
3683 case Iop_1Uto32:
sewardj463b3d92005-07-18 11:41:15 +00003684 case Iop_1Sto32:
sewardj95448072004-11-22 20:19:51 +00003685 case Iop_8Uto32:
3686 case Iop_16Uto32:
3687 case Iop_16Sto32:
3688 case Iop_8Sto32:
cerionfafaa0d2005-09-12 22:29:38 +00003689 case Iop_V128to32:
sewardj7cf4e6b2008-05-01 20:24:26 +00003690 return assignNew('V', mce, Ity_I32, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003691
3692 case Iop_8Sto16:
3693 case Iop_8Uto16:
3694 case Iop_32to16:
3695 case Iop_32HIto16:
sewardjd9dbc192005-04-27 11:40:27 +00003696 case Iop_64to16:
sewardj7cf4e6b2008-05-01 20:24:26 +00003697 return assignNew('V', mce, Ity_I16, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003698
3699 case Iop_1Uto8:
sewardja201c452011-07-24 14:15:54 +00003700 case Iop_1Sto8:
sewardj95448072004-11-22 20:19:51 +00003701 case Iop_16to8:
sewardj9a807e02006-12-17 14:20:31 +00003702 case Iop_16HIto8:
sewardj95448072004-11-22 20:19:51 +00003703 case Iop_32to8:
sewardjd9dbc192005-04-27 11:40:27 +00003704 case Iop_64to8:
sewardj7cf4e6b2008-05-01 20:24:26 +00003705 return assignNew('V', mce, Ity_I8, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003706
3707 case Iop_32to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00003708 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
sewardj95448072004-11-22 20:19:51 +00003709
sewardjd9dbc192005-04-27 11:40:27 +00003710 case Iop_64to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00003711 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
sewardjd9dbc192005-04-27 11:40:27 +00003712
sewardj95448072004-11-22 20:19:51 +00003713 case Iop_ReinterpF64asI64:
3714 case Iop_ReinterpI64asF64:
sewardj0b070592004-12-10 21:44:22 +00003715 case Iop_ReinterpI32asF32:
sewardj59570ff2010-01-01 11:59:33 +00003716 case Iop_ReinterpF32asI32:
sewardj18c72fa2012-04-23 11:22:05 +00003717 case Iop_ReinterpI64asD64:
sewardj0892b822012-04-29 20:20:16 +00003718 case Iop_ReinterpD64asI64:
sewardj350e8f72012-06-25 07:52:15 +00003719 case Iop_NotV256:
sewardj20d38f22005-02-07 23:50:18 +00003720 case Iop_NotV128:
sewardj7010f6e2004-12-10 13:35:22 +00003721 case Iop_Not64:
sewardj95448072004-11-22 20:19:51 +00003722 case Iop_Not32:
3723 case Iop_Not16:
3724 case Iop_Not8:
3725 case Iop_Not1:
3726 return vatom;
sewardj7010f6e2004-12-10 13:35:22 +00003727
sewardj57f92b02010-08-22 11:54:14 +00003728 case Iop_CmpNEZ8x8:
3729 case Iop_Cnt8x8:
3730 case Iop_Clz8Sx8:
3731 case Iop_Cls8Sx8:
3732 case Iop_Abs8x8:
3733 return mkPCast8x8(mce, vatom);
3734
3735 case Iop_CmpNEZ8x16:
3736 case Iop_Cnt8x16:
3737 case Iop_Clz8Sx16:
3738 case Iop_Cls8Sx16:
3739 case Iop_Abs8x16:
3740 return mkPCast8x16(mce, vatom);
3741
3742 case Iop_CmpNEZ16x4:
3743 case Iop_Clz16Sx4:
3744 case Iop_Cls16Sx4:
3745 case Iop_Abs16x4:
3746 return mkPCast16x4(mce, vatom);
3747
3748 case Iop_CmpNEZ16x8:
3749 case Iop_Clz16Sx8:
3750 case Iop_Cls16Sx8:
3751 case Iop_Abs16x8:
3752 return mkPCast16x8(mce, vatom);
3753
3754 case Iop_CmpNEZ32x2:
3755 case Iop_Clz32Sx2:
3756 case Iop_Cls32Sx2:
3757 case Iop_FtoI32Ux2_RZ:
3758 case Iop_FtoI32Sx2_RZ:
3759 case Iop_Abs32x2:
3760 return mkPCast32x2(mce, vatom);
3761
3762 case Iop_CmpNEZ32x4:
3763 case Iop_Clz32Sx4:
3764 case Iop_Cls32Sx4:
3765 case Iop_FtoI32Ux4_RZ:
3766 case Iop_FtoI32Sx4_RZ:
3767 case Iop_Abs32x4:
3768 return mkPCast32x4(mce, vatom);
3769
florian537ed2d2012-08-20 16:51:39 +00003770 case Iop_CmpwNEZ32:
3771 return mkPCastTo(mce, Ity_I32, vatom);
3772
sewardj57f92b02010-08-22 11:54:14 +00003773 case Iop_CmpwNEZ64:
3774 return mkPCastTo(mce, Ity_I64, vatom);
3775
3776 case Iop_CmpNEZ64x2:
3777 return mkPCast64x2(mce, vatom);
3778
sewardj7ee7d852011-06-16 11:37:21 +00003779 case Iop_NarrowUn16to8x8:
3780 case Iop_NarrowUn32to16x4:
3781 case Iop_NarrowUn64to32x2:
3782 case Iop_QNarrowUn16Sto8Sx8:
3783 case Iop_QNarrowUn16Sto8Ux8:
3784 case Iop_QNarrowUn16Uto8Ux8:
3785 case Iop_QNarrowUn32Sto16Sx4:
3786 case Iop_QNarrowUn32Sto16Ux4:
3787 case Iop_QNarrowUn32Uto16Ux4:
3788 case Iop_QNarrowUn64Sto32Sx2:
3789 case Iop_QNarrowUn64Sto32Ux2:
3790 case Iop_QNarrowUn64Uto32Ux2:
3791 return vectorNarrowUnV128(mce, op, vatom);
sewardj57f92b02010-08-22 11:54:14 +00003792
sewardj7ee7d852011-06-16 11:37:21 +00003793 case Iop_Widen8Sto16x8:
3794 case Iop_Widen8Uto16x8:
3795 case Iop_Widen16Sto32x4:
3796 case Iop_Widen16Uto32x4:
3797 case Iop_Widen32Sto64x2:
3798 case Iop_Widen32Uto64x2:
3799 return vectorWidenI64(mce, op, vatom);
sewardj57f92b02010-08-22 11:54:14 +00003800
3801 case Iop_PwAddL32Ux2:
3802 case Iop_PwAddL32Sx2:
3803 return mkPCastTo(mce, Ity_I64,
3804 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
3805
3806 case Iop_PwAddL16Ux4:
3807 case Iop_PwAddL16Sx4:
3808 return mkPCast32x2(mce,
3809 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
3810
3811 case Iop_PwAddL8Ux8:
3812 case Iop_PwAddL8Sx8:
3813 return mkPCast16x4(mce,
3814 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
3815
3816 case Iop_PwAddL32Ux4:
3817 case Iop_PwAddL32Sx4:
3818 return mkPCast64x2(mce,
3819 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
3820
3821 case Iop_PwAddL16Ux8:
3822 case Iop_PwAddL16Sx8:
3823 return mkPCast32x4(mce,
3824 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
3825
3826 case Iop_PwAddL8Ux16:
3827 case Iop_PwAddL8Sx16:
3828 return mkPCast16x8(mce,
3829 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
3830
sewardjf34eb492011-04-15 11:57:05 +00003831 case Iop_I64UtoF32:
sewardj95448072004-11-22 20:19:51 +00003832 default:
3833 ppIROp(op);
3834 VG_(tool_panic)("memcheck:expr2vbits_Unop");
3835 }
3836}
3837
3838
sewardj170ee212004-12-10 18:57:51 +00003839/* Worker function; do not call directly. */
sewardj95448072004-11-22 20:19:51 +00003840static
sewardj2e595852005-06-30 23:33:37 +00003841IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
3842 IREndness end, IRType ty,
3843 IRAtom* addr, UInt bias )
sewardj95448072004-11-22 20:19:51 +00003844{
3845 void* helper;
3846 Char* hname;
3847 IRDirty* di;
3848 IRTemp datavbits;
3849 IRAtom* addrAct;
3850
3851 tl_assert(isOriginalAtom(mce,addr));
sewardj2e595852005-06-30 23:33:37 +00003852 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj95448072004-11-22 20:19:51 +00003853
3854 /* First, emit a definedness test for the address. This also sets
3855 the address (shadow) to 'defined' following the test. */
florian434ffae2012-07-19 17:23:42 +00003856 complainIfUndefined( mce, addr, NULL );
sewardj95448072004-11-22 20:19:51 +00003857
3858 /* Now cook up a call to the relevant helper function, to read the
3859 data V bits from shadow memory. */
sewardj7cf4e6b2008-05-01 20:24:26 +00003860 ty = shadowTypeV(ty);
sewardj2e595852005-06-30 23:33:37 +00003861
3862 if (end == Iend_LE) {
3863 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00003864 case Ity_I64: helper = &MC_(helperc_LOADV64le);
3865 hname = "MC_(helperc_LOADV64le)";
sewardj2e595852005-06-30 23:33:37 +00003866 break;
njn1d0825f2006-03-27 11:37:07 +00003867 case Ity_I32: helper = &MC_(helperc_LOADV32le);
3868 hname = "MC_(helperc_LOADV32le)";
sewardj2e595852005-06-30 23:33:37 +00003869 break;
njn1d0825f2006-03-27 11:37:07 +00003870 case Ity_I16: helper = &MC_(helperc_LOADV16le);
3871 hname = "MC_(helperc_LOADV16le)";
sewardj2e595852005-06-30 23:33:37 +00003872 break;
njn1d0825f2006-03-27 11:37:07 +00003873 case Ity_I8: helper = &MC_(helperc_LOADV8);
3874 hname = "MC_(helperc_LOADV8)";
sewardj2e595852005-06-30 23:33:37 +00003875 break;
3876 default: ppIRType(ty);
3877 VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
3878 }
3879 } else {
sewardj8cf88b72005-07-08 01:29:33 +00003880 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00003881 case Ity_I64: helper = &MC_(helperc_LOADV64be);
3882 hname = "MC_(helperc_LOADV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00003883 break;
njn1d0825f2006-03-27 11:37:07 +00003884 case Ity_I32: helper = &MC_(helperc_LOADV32be);
3885 hname = "MC_(helperc_LOADV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00003886 break;
njn1d0825f2006-03-27 11:37:07 +00003887 case Ity_I16: helper = &MC_(helperc_LOADV16be);
3888 hname = "MC_(helperc_LOADV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00003889 break;
njn1d0825f2006-03-27 11:37:07 +00003890 case Ity_I8: helper = &MC_(helperc_LOADV8);
3891 hname = "MC_(helperc_LOADV8)";
sewardj8cf88b72005-07-08 01:29:33 +00003892 break;
3893 default: ppIRType(ty);
3894 VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
3895 }
sewardj95448072004-11-22 20:19:51 +00003896 }
3897
3898 /* Generate the actual address into addrAct. */
3899 if (bias == 0) {
3900 addrAct = addr;
3901 } else {
sewardj7cf97ee2004-11-28 14:25:01 +00003902 IROp mkAdd;
3903 IRAtom* eBias;
sewardj95448072004-11-22 20:19:51 +00003904 IRType tyAddr = mce->hWordTy;
3905 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj7cf97ee2004-11-28 14:25:01 +00003906 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3907 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00003908 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
sewardj95448072004-11-22 20:19:51 +00003909 }
3910
3911 /* We need to have a place to park the V bits we're just about to
3912 read. */
sewardj1c0ce7a2009-07-01 08:10:49 +00003913 datavbits = newTemp(mce, ty, VSh);
sewardj95448072004-11-22 20:19:51 +00003914 di = unsafeIRDirty_1_N( datavbits,
sewardj53ee1fc2005-12-23 02:29:58 +00003915 1/*regparms*/,
3916 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj95448072004-11-22 20:19:51 +00003917 mkIRExprVec_1( addrAct ));
3918 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00003919 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00003920
3921 return mkexpr(datavbits);
3922}
3923
3924
3925static
sewardj2e595852005-06-30 23:33:37 +00003926IRAtom* expr2vbits_Load ( MCEnv* mce,
3927 IREndness end, IRType ty,
3928 IRAtom* addr, UInt bias )
sewardj170ee212004-12-10 18:57:51 +00003929{
sewardj2e595852005-06-30 23:33:37 +00003930 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj7cf4e6b2008-05-01 20:24:26 +00003931 switch (shadowTypeV(ty)) {
sewardj170ee212004-12-10 18:57:51 +00003932 case Ity_I8:
3933 case Ity_I16:
3934 case Ity_I32:
3935 case Ity_I64:
sewardj2e595852005-06-30 23:33:37 +00003936 return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
sewardj45fa9f42012-05-21 10:18:10 +00003937 case Ity_V128: {
3938 IRAtom *v64hi, *v64lo;
sewardj2e595852005-06-30 23:33:37 +00003939 if (end == Iend_LE) {
sewardj45fa9f42012-05-21 10:18:10 +00003940 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
sewardj2e595852005-06-30 23:33:37 +00003941 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3942 } else {
sewardj45fa9f42012-05-21 10:18:10 +00003943 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
sewardj2e595852005-06-30 23:33:37 +00003944 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3945 }
sewardj7cf4e6b2008-05-01 20:24:26 +00003946 return assignNew( 'V', mce,
sewardj170ee212004-12-10 18:57:51 +00003947 Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +00003948 binop(Iop_64HLtoV128, v64hi, v64lo));
sewardj45fa9f42012-05-21 10:18:10 +00003949 }
3950 case Ity_V256: {
3951 /* V256-bit case -- phrased in terms of 64 bit units (Qs),
3952 with Q3 being the most significant lane. */
3953 if (end == Iend_BE) goto unhandled;
3954 IRAtom* v64Q0 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
3955 IRAtom* v64Q1 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3956 IRAtom* v64Q2 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+16);
3957 IRAtom* v64Q3 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+24);
3958 return assignNew( 'V', mce,
3959 Ity_V256,
3960 IRExpr_Qop(Iop_64x4toV256,
3961 v64Q3, v64Q2, v64Q1, v64Q0));
3962 }
3963 unhandled:
sewardj170ee212004-12-10 18:57:51 +00003964 default:
sewardj2e595852005-06-30 23:33:37 +00003965 VG_(tool_panic)("expr2vbits_Load");
sewardj170ee212004-12-10 18:57:51 +00003966 }
3967}
3968
3969
florian434ffae2012-07-19 17:23:42 +00003970/* If there is no guard expression or the guard is always TRUE this function
3971 behaves like expr2vbits_Load. If the guard is not true at runtime, an
3972 all-bits-defined bit pattern will be returned.
3973 It is assumed that definedness of GUARD has already been checked at the call
3974 site. */
3975static
3976IRAtom* expr2vbits_guarded_Load ( MCEnv* mce,
3977 IREndness end, IRType ty,
3978 IRAtom* addr, UInt bias, IRAtom *guard )
3979{
3980 if (guard) {
3981 IRAtom *cond, *iffalse, *iftrue;
3982
3983 cond = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard));
3984 iftrue = assignNew('V', mce, ty,
3985 expr2vbits_Load(mce, end, ty, addr, bias));
3986 iffalse = assignNew('V', mce, ty, definedOfType(ty));
3987
3988 return assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, iftrue));
3989 }
3990
3991 /* No guard expression or unconditional load */
3992 return expr2vbits_Load(mce, end, ty, addr, bias);
3993}
3994
3995
sewardj170ee212004-12-10 18:57:51 +00003996static
sewardj95448072004-11-22 20:19:51 +00003997IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
3998 IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
3999{
4000 IRAtom *vbitsC, *vbits0, *vbitsX;
4001 IRType ty;
4002 /* Given Mux0X(cond,expr0,exprX), generate
4003 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
4004 That is, steer the V bits like the originals, but trash the
4005 result if the steering value is undefined. This gives
4006 lazy propagation. */
4007 tl_assert(isOriginalAtom(mce, cond));
4008 tl_assert(isOriginalAtom(mce, expr0));
4009 tl_assert(isOriginalAtom(mce, exprX));
4010
4011 vbitsC = expr2vbits(mce, cond);
4012 vbits0 = expr2vbits(mce, expr0);
4013 vbitsX = expr2vbits(mce, exprX);
sewardj1c0ce7a2009-07-01 08:10:49 +00004014 ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
sewardj95448072004-11-22 20:19:51 +00004015
4016 return
sewardj7cf4e6b2008-05-01 20:24:26 +00004017 mkUifU(mce, ty, assignNew('V', mce, ty,
4018 IRExpr_Mux0X(cond, vbits0, vbitsX)),
sewardj95448072004-11-22 20:19:51 +00004019 mkPCastTo(mce, ty, vbitsC) );
4020}
4021
4022/* --------- This is the main expression-handling function. --------- */
4023
4024static
4025IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
4026{
4027 switch (e->tag) {
4028
4029 case Iex_Get:
4030 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
4031
4032 case Iex_GetI:
4033 return shadow_GETI( mce, e->Iex.GetI.descr,
4034 e->Iex.GetI.ix, e->Iex.GetI.bias );
4035
sewardj0b9d74a2006-12-24 02:24:11 +00004036 case Iex_RdTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00004037 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
sewardj95448072004-11-22 20:19:51 +00004038
4039 case Iex_Const:
sewardj1c0ce7a2009-07-01 08:10:49 +00004040 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
sewardj95448072004-11-22 20:19:51 +00004041
sewardje91cea72006-02-08 19:32:02 +00004042 case Iex_Qop:
4043 return expr2vbits_Qop(
4044 mce,
floriane2ab2972012-06-01 20:43:03 +00004045 e->Iex.Qop.details->op,
4046 e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2,
4047 e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4
sewardje91cea72006-02-08 19:32:02 +00004048 );
4049
sewardjed69fdb2006-02-03 16:12:27 +00004050 case Iex_Triop:
4051 return expr2vbits_Triop(
4052 mce,
florian26441742012-06-02 20:30:41 +00004053 e->Iex.Triop.details->op,
4054 e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2,
4055 e->Iex.Triop.details->arg3
sewardjed69fdb2006-02-03 16:12:27 +00004056 );
4057
sewardj95448072004-11-22 20:19:51 +00004058 case Iex_Binop:
4059 return expr2vbits_Binop(
4060 mce,
4061 e->Iex.Binop.op,
4062 e->Iex.Binop.arg1, e->Iex.Binop.arg2
4063 );
4064
4065 case Iex_Unop:
4066 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
4067
sewardj2e595852005-06-30 23:33:37 +00004068 case Iex_Load:
4069 return expr2vbits_Load( mce, e->Iex.Load.end,
4070 e->Iex.Load.ty,
4071 e->Iex.Load.addr, 0/*addr bias*/ );
sewardj95448072004-11-22 20:19:51 +00004072
4073 case Iex_CCall:
4074 return mkLazyN( mce, e->Iex.CCall.args,
4075 e->Iex.CCall.retty,
4076 e->Iex.CCall.cee );
4077
4078 case Iex_Mux0X:
4079 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
4080 e->Iex.Mux0X.exprX);
njn25e49d8e72002-09-23 09:36:25 +00004081
4082 default:
sewardj95448072004-11-22 20:19:51 +00004083 VG_(printf)("\n");
4084 ppIRExpr(e);
4085 VG_(printf)("\n");
4086 VG_(tool_panic)("memcheck: expr2vbits");
njn25e49d8e72002-09-23 09:36:25 +00004087 }
njn25e49d8e72002-09-23 09:36:25 +00004088}
4089
4090/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00004091/*--- Generate shadow stmts from all kinds of IRStmts. ---*/
njn25e49d8e72002-09-23 09:36:25 +00004092/*------------------------------------------------------------*/
4093
sewardj95448072004-11-22 20:19:51 +00004094/* Widen a value to the host word size. */
njn25e49d8e72002-09-23 09:36:25 +00004095
4096static
sewardj95448072004-11-22 20:19:51 +00004097IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
njn25e49d8e72002-09-23 09:36:25 +00004098{
sewardj7cf97ee2004-11-28 14:25:01 +00004099 IRType ty, tyH;
4100
sewardj95448072004-11-22 20:19:51 +00004101 /* vatom is vbits-value and as such can only have a shadow type. */
4102 tl_assert(isShadowAtom(mce,vatom));
njn25e49d8e72002-09-23 09:36:25 +00004103
sewardj1c0ce7a2009-07-01 08:10:49 +00004104 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj7cf97ee2004-11-28 14:25:01 +00004105 tyH = mce->hWordTy;
njn25e49d8e72002-09-23 09:36:25 +00004106
sewardj95448072004-11-22 20:19:51 +00004107 if (tyH == Ity_I32) {
4108 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004109 case Ity_I32:
4110 return vatom;
4111 case Ity_I16:
4112 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
4113 case Ity_I8:
4114 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
4115 default:
4116 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00004117 }
sewardj6cf40ff2005-04-20 22:31:26 +00004118 } else
4119 if (tyH == Ity_I64) {
4120 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004121 case Ity_I32:
4122 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
4123 case Ity_I16:
4124 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4125 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
4126 case Ity_I8:
4127 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4128 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
4129 default:
4130 goto unhandled;
sewardj6cf40ff2005-04-20 22:31:26 +00004131 }
sewardj95448072004-11-22 20:19:51 +00004132 } else {
4133 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00004134 }
sewardj95448072004-11-22 20:19:51 +00004135 unhandled:
4136 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
4137 VG_(tool_panic)("zwidenToHostWord");
njn25e49d8e72002-09-23 09:36:25 +00004138}
4139
njn25e49d8e72002-09-23 09:36:25 +00004140
sewardj95448072004-11-22 20:19:51 +00004141/* Generate a shadow store. addr is always the original address atom.
4142 You can pass in either originals or V-bits for the data atom, but
sewardj1c0ce7a2009-07-01 08:10:49 +00004143 obviously not both. guard :: Ity_I1 controls whether the store
4144 really happens; NULL means it unconditionally does. Note that
4145 guard itself is not checked for definedness; the caller of this
4146 function must do that if necessary. */
njn25e49d8e72002-09-23 09:36:25 +00004147
sewardj95448072004-11-22 20:19:51 +00004148static
sewardj2e595852005-06-30 23:33:37 +00004149void do_shadow_Store ( MCEnv* mce,
4150 IREndness end,
4151 IRAtom* addr, UInt bias,
sewardj1c0ce7a2009-07-01 08:10:49 +00004152 IRAtom* data, IRAtom* vdata,
4153 IRAtom* guard )
njn25e49d8e72002-09-23 09:36:25 +00004154{
sewardj170ee212004-12-10 18:57:51 +00004155 IROp mkAdd;
4156 IRType ty, tyAddr;
sewardj95448072004-11-22 20:19:51 +00004157 void* helper = NULL;
4158 Char* hname = NULL;
njn1d0825f2006-03-27 11:37:07 +00004159 IRConst* c;
sewardj170ee212004-12-10 18:57:51 +00004160
4161 tyAddr = mce->hWordTy;
4162 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
4163 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj2e595852005-06-30 23:33:37 +00004164 tl_assert( end == Iend_LE || end == Iend_BE );
sewardj170ee212004-12-10 18:57:51 +00004165
sewardj95448072004-11-22 20:19:51 +00004166 if (data) {
4167 tl_assert(!vdata);
4168 tl_assert(isOriginalAtom(mce, data));
4169 tl_assert(bias == 0);
4170 vdata = expr2vbits( mce, data );
4171 } else {
4172 tl_assert(vdata);
4173 }
njn25e49d8e72002-09-23 09:36:25 +00004174
sewardj95448072004-11-22 20:19:51 +00004175 tl_assert(isOriginalAtom(mce,addr));
4176 tl_assert(isShadowAtom(mce,vdata));
njn25e49d8e72002-09-23 09:36:25 +00004177
sewardj1c0ce7a2009-07-01 08:10:49 +00004178 if (guard) {
4179 tl_assert(isOriginalAtom(mce, guard));
4180 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
4181 }
4182
4183 ty = typeOfIRExpr(mce->sb->tyenv, vdata);
njn25e49d8e72002-09-23 09:36:25 +00004184
njn1d0825f2006-03-27 11:37:07 +00004185 // If we're not doing undefined value checking, pretend that this value
4186 // is "all valid". That lets Vex's optimiser remove some of the V bit
4187 // shadow computation ops that precede it.
sewardj7cf4e6b2008-05-01 20:24:26 +00004188 if (MC_(clo_mc_level) == 1) {
njn1d0825f2006-03-27 11:37:07 +00004189 switch (ty) {
sewardj45fa9f42012-05-21 10:18:10 +00004190 case Ity_V256: // V256 weirdness -- used four times
sewardjbd43bfa2012-06-29 15:29:37 +00004191 c = IRConst_V256(V_BITS32_DEFINED); break;
sewardj45fa9f42012-05-21 10:18:10 +00004192 case Ity_V128: // V128 weirdness -- used twice
sewardj1c0ce7a2009-07-01 08:10:49 +00004193 c = IRConst_V128(V_BITS16_DEFINED); break;
njn1d0825f2006-03-27 11:37:07 +00004194 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
4195 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
4196 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
4197 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
4198 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4199 }
4200 vdata = IRExpr_Const( c );
4201 }
4202
sewardj95448072004-11-22 20:19:51 +00004203 /* First, emit a definedness test for the address. This also sets
4204 the address (shadow) to 'defined' following the test. */
florian434ffae2012-07-19 17:23:42 +00004205 complainIfUndefined( mce, addr, guard );
njn25e49d8e72002-09-23 09:36:25 +00004206
sewardj170ee212004-12-10 18:57:51 +00004207 /* Now decide which helper function to call to write the data V
4208 bits into shadow memory. */
sewardj2e595852005-06-30 23:33:37 +00004209 if (end == Iend_LE) {
4210 switch (ty) {
sewardj45fa9f42012-05-21 10:18:10 +00004211 case Ity_V256: /* we'll use the helper four times */
sewardj2e595852005-06-30 23:33:37 +00004212 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00004213 case Ity_I64: helper = &MC_(helperc_STOREV64le);
4214 hname = "MC_(helperc_STOREV64le)";
sewardj2e595852005-06-30 23:33:37 +00004215 break;
njn1d0825f2006-03-27 11:37:07 +00004216 case Ity_I32: helper = &MC_(helperc_STOREV32le);
4217 hname = "MC_(helperc_STOREV32le)";
sewardj2e595852005-06-30 23:33:37 +00004218 break;
njn1d0825f2006-03-27 11:37:07 +00004219 case Ity_I16: helper = &MC_(helperc_STOREV16le);
4220 hname = "MC_(helperc_STOREV16le)";
sewardj2e595852005-06-30 23:33:37 +00004221 break;
njn1d0825f2006-03-27 11:37:07 +00004222 case Ity_I8: helper = &MC_(helperc_STOREV8);
4223 hname = "MC_(helperc_STOREV8)";
sewardj2e595852005-06-30 23:33:37 +00004224 break;
4225 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4226 }
4227 } else {
sewardj8cf88b72005-07-08 01:29:33 +00004228 switch (ty) {
4229 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00004230 case Ity_I64: helper = &MC_(helperc_STOREV64be);
4231 hname = "MC_(helperc_STOREV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00004232 break;
njn1d0825f2006-03-27 11:37:07 +00004233 case Ity_I32: helper = &MC_(helperc_STOREV32be);
4234 hname = "MC_(helperc_STOREV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00004235 break;
njn1d0825f2006-03-27 11:37:07 +00004236 case Ity_I16: helper = &MC_(helperc_STOREV16be);
4237 hname = "MC_(helperc_STOREV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00004238 break;
njn1d0825f2006-03-27 11:37:07 +00004239 case Ity_I8: helper = &MC_(helperc_STOREV8);
4240 hname = "MC_(helperc_STOREV8)";
sewardj8cf88b72005-07-08 01:29:33 +00004241 break;
sewardj45fa9f42012-05-21 10:18:10 +00004242 /* Note, no V256 case here, because no big-endian target that
4243 we support, has 256 vectors. */
sewardj8cf88b72005-07-08 01:29:33 +00004244 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
4245 }
sewardj95448072004-11-22 20:19:51 +00004246 }
njn25e49d8e72002-09-23 09:36:25 +00004247
sewardj45fa9f42012-05-21 10:18:10 +00004248 if (UNLIKELY(ty == Ity_V256)) {
4249
4250 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
4251 Q3 being the most significant lane. */
4252 /* These are the offsets of the Qs in memory. */
4253 Int offQ0, offQ1, offQ2, offQ3;
4254
4255 /* Various bits for constructing the 4 lane helper calls */
4256 IRDirty *diQ0, *diQ1, *diQ2, *diQ3;
4257 IRAtom *addrQ0, *addrQ1, *addrQ2, *addrQ3;
4258 IRAtom *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3;
4259 IRAtom *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3;
4260
4261 if (end == Iend_LE) {
4262 offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24;
4263 } else {
4264 offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24;
4265 }
4266
4267 eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0);
4268 addrQ0 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) );
4269 vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata));
4270 diQ0 = unsafeIRDirty_0_N(
4271 1/*regparms*/,
4272 hname, VG_(fnptr_to_fnentry)( helper ),
4273 mkIRExprVec_2( addrQ0, vdataQ0 )
4274 );
4275
4276 eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1);
4277 addrQ1 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) );
4278 vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata));
4279 diQ1 = unsafeIRDirty_0_N(
4280 1/*regparms*/,
4281 hname, VG_(fnptr_to_fnentry)( helper ),
4282 mkIRExprVec_2( addrQ1, vdataQ1 )
4283 );
4284
4285 eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2);
4286 addrQ2 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) );
4287 vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata));
4288 diQ2 = unsafeIRDirty_0_N(
4289 1/*regparms*/,
4290 hname, VG_(fnptr_to_fnentry)( helper ),
4291 mkIRExprVec_2( addrQ2, vdataQ2 )
4292 );
4293
4294 eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3);
4295 addrQ3 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) );
4296 vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata));
4297 diQ3 = unsafeIRDirty_0_N(
4298 1/*regparms*/,
4299 hname, VG_(fnptr_to_fnentry)( helper ),
4300 mkIRExprVec_2( addrQ3, vdataQ3 )
4301 );
4302
4303 if (guard)
4304 diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard;
4305
4306 setHelperAnns( mce, diQ0 );
4307 setHelperAnns( mce, diQ1 );
4308 setHelperAnns( mce, diQ2 );
4309 setHelperAnns( mce, diQ3 );
4310 stmt( 'V', mce, IRStmt_Dirty(diQ0) );
4311 stmt( 'V', mce, IRStmt_Dirty(diQ1) );
4312 stmt( 'V', mce, IRStmt_Dirty(diQ2) );
4313 stmt( 'V', mce, IRStmt_Dirty(diQ3) );
4314
4315 }
4316 else if (UNLIKELY(ty == Ity_V128)) {
sewardj170ee212004-12-10 18:57:51 +00004317
sewardj20d38f22005-02-07 23:50:18 +00004318 /* V128-bit case */
sewardj170ee212004-12-10 18:57:51 +00004319 /* See comment in next clause re 64-bit regparms */
sewardj2e595852005-06-30 23:33:37 +00004320 /* also, need to be careful about endianness */
4321
njn4c245e52009-03-15 23:25:38 +00004322 Int offLo64, offHi64;
4323 IRDirty *diLo64, *diHi64;
4324 IRAtom *addrLo64, *addrHi64;
4325 IRAtom *vdataLo64, *vdataHi64;
4326 IRAtom *eBiasLo64, *eBiasHi64;
4327
sewardj2e595852005-06-30 23:33:37 +00004328 if (end == Iend_LE) {
4329 offLo64 = 0;
4330 offHi64 = 8;
4331 } else {
sewardj2e595852005-06-30 23:33:37 +00004332 offLo64 = 8;
4333 offHi64 = 0;
4334 }
4335
4336 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
sewardj7cf4e6b2008-05-01 20:24:26 +00004337 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
4338 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
sewardj170ee212004-12-10 18:57:51 +00004339 diLo64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004340 1/*regparms*/,
4341 hname, VG_(fnptr_to_fnentry)( helper ),
4342 mkIRExprVec_2( addrLo64, vdataLo64 )
4343 );
sewardj2e595852005-06-30 23:33:37 +00004344 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
sewardj7cf4e6b2008-05-01 20:24:26 +00004345 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
4346 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
sewardj170ee212004-12-10 18:57:51 +00004347 diHi64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004348 1/*regparms*/,
4349 hname, VG_(fnptr_to_fnentry)( helper ),
4350 mkIRExprVec_2( addrHi64, vdataHi64 )
4351 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004352 if (guard) diLo64->guard = guard;
4353 if (guard) diHi64->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00004354 setHelperAnns( mce, diLo64 );
4355 setHelperAnns( mce, diHi64 );
sewardj7cf4e6b2008-05-01 20:24:26 +00004356 stmt( 'V', mce, IRStmt_Dirty(diLo64) );
4357 stmt( 'V', mce, IRStmt_Dirty(diHi64) );
sewardj170ee212004-12-10 18:57:51 +00004358
sewardj95448072004-11-22 20:19:51 +00004359 } else {
sewardj170ee212004-12-10 18:57:51 +00004360
njn4c245e52009-03-15 23:25:38 +00004361 IRDirty *di;
4362 IRAtom *addrAct;
4363
sewardj170ee212004-12-10 18:57:51 +00004364 /* 8/16/32/64-bit cases */
4365 /* Generate the actual address into addrAct. */
4366 if (bias == 0) {
4367 addrAct = addr;
4368 } else {
njn4c245e52009-03-15 23:25:38 +00004369 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00004370 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
sewardj170ee212004-12-10 18:57:51 +00004371 }
4372
4373 if (ty == Ity_I64) {
4374 /* We can't do this with regparm 2 on 32-bit platforms, since
4375 the back ends aren't clever enough to handle 64-bit
4376 regparm args. Therefore be different. */
4377 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004378 1/*regparms*/,
4379 hname, VG_(fnptr_to_fnentry)( helper ),
4380 mkIRExprVec_2( addrAct, vdata )
4381 );
sewardj170ee212004-12-10 18:57:51 +00004382 } else {
4383 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004384 2/*regparms*/,
4385 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj170ee212004-12-10 18:57:51 +00004386 mkIRExprVec_2( addrAct,
sewardj53ee1fc2005-12-23 02:29:58 +00004387 zwidenToHostWord( mce, vdata ))
4388 );
sewardj170ee212004-12-10 18:57:51 +00004389 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004390 if (guard) di->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00004391 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00004392 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00004393 }
njn25e49d8e72002-09-23 09:36:25 +00004394
sewardj95448072004-11-22 20:19:51 +00004395}
njn25e49d8e72002-09-23 09:36:25 +00004396
njn25e49d8e72002-09-23 09:36:25 +00004397
sewardj95448072004-11-22 20:19:51 +00004398/* Do lazy pessimistic propagation through a dirty helper call, by
4399 looking at the annotations on it. This is the most complex part of
4400 Memcheck. */
njn25e49d8e72002-09-23 09:36:25 +00004401
sewardj95448072004-11-22 20:19:51 +00004402static IRType szToITy ( Int n )
4403{
4404 switch (n) {
4405 case 1: return Ity_I8;
4406 case 2: return Ity_I16;
4407 case 4: return Ity_I32;
4408 case 8: return Ity_I64;
4409 default: VG_(tool_panic)("szToITy(memcheck)");
4410 }
4411}
njn25e49d8e72002-09-23 09:36:25 +00004412
sewardj95448072004-11-22 20:19:51 +00004413static
4414void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
4415{
sewardj2eecb742012-06-01 16:11:41 +00004416 Int i, k, n, toDo, gSz, gOff;
sewardj2e595852005-06-30 23:33:37 +00004417 IRAtom *src, *here, *curr;
njn4c245e52009-03-15 23:25:38 +00004418 IRType tySrc, tyDst;
sewardj2e595852005-06-30 23:33:37 +00004419 IRTemp dst;
4420 IREndness end;
4421
4422 /* What's the native endianness? We need to know this. */
sewardj6e340c72005-07-10 00:53:42 +00004423# if defined(VG_BIGENDIAN)
sewardj2e595852005-06-30 23:33:37 +00004424 end = Iend_BE;
sewardj6e340c72005-07-10 00:53:42 +00004425# elif defined(VG_LITTLEENDIAN)
sewardj2e595852005-06-30 23:33:37 +00004426 end = Iend_LE;
4427# else
4428# error "Unknown endianness"
4429# endif
njn25e49d8e72002-09-23 09:36:25 +00004430
sewardj95448072004-11-22 20:19:51 +00004431 /* First check the guard. */
florian434ffae2012-07-19 17:23:42 +00004432 complainIfUndefined(mce, d->guard, NULL);
sewardj95448072004-11-22 20:19:51 +00004433
4434 /* Now round up all inputs and PCast over them. */
sewardj7cf97ee2004-11-28 14:25:01 +00004435 curr = definedOfType(Ity_I32);
sewardj95448072004-11-22 20:19:51 +00004436
florian434ffae2012-07-19 17:23:42 +00004437 /* Inputs: unmasked args
4438 Note: arguments are evaluated REGARDLESS of the guard expression */
sewardj95448072004-11-22 20:19:51 +00004439 for (i = 0; d->args[i]; i++) {
4440 if (d->cee->mcx_mask & (1<<i)) {
4441 /* ignore this arg */
njn25e49d8e72002-09-23 09:36:25 +00004442 } else {
sewardj95448072004-11-22 20:19:51 +00004443 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
4444 curr = mkUifU32(mce, here, curr);
njn25e49d8e72002-09-23 09:36:25 +00004445 }
4446 }
sewardj95448072004-11-22 20:19:51 +00004447
4448 /* Inputs: guest state that we read. */
4449 for (i = 0; i < d->nFxState; i++) {
4450 tl_assert(d->fxState[i].fx != Ifx_None);
4451 if (d->fxState[i].fx == Ifx_Write)
4452 continue;
sewardja7203252004-11-26 19:17:47 +00004453
sewardj2eecb742012-06-01 16:11:41 +00004454 /* Enumerate the described state segments */
4455 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
4456 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
4457 gSz = d->fxState[i].size;
sewardja7203252004-11-26 19:17:47 +00004458
sewardj2eecb742012-06-01 16:11:41 +00004459 /* Ignore any sections marked as 'always defined'. */
4460 if (isAlwaysDefd(mce, gOff, gSz)) {
4461 if (0)
4462 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
4463 gOff, gSz);
4464 continue;
4465 }
sewardje9e16d32004-12-10 13:17:55 +00004466
sewardj2eecb742012-06-01 16:11:41 +00004467 /* This state element is read or modified. So we need to
4468 consider it. If larger than 8 bytes, deal with it in
4469 8-byte chunks. */
4470 while (True) {
4471 tl_assert(gSz >= 0);
4472 if (gSz == 0) break;
4473 n = gSz <= 8 ? gSz : 8;
4474 /* update 'curr' with UifU of the state slice
4475 gOff .. gOff+n-1 */
4476 tySrc = szToITy( n );
florian434ffae2012-07-19 17:23:42 +00004477
4478 /* Observe the guard expression. If it is false use an
4479 all-bits-defined bit pattern */
4480 IRAtom *cond, *iffalse, *iftrue;
4481
4482 cond = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, d->guard));
4483 iftrue = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc));
4484 iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc));
4485 src = assignNew('V', mce, tySrc,
4486 IRExpr_Mux0X(cond, iffalse, iftrue));
4487
sewardj2eecb742012-06-01 16:11:41 +00004488 here = mkPCastTo( mce, Ity_I32, src );
4489 curr = mkUifU32(mce, here, curr);
4490 gSz -= n;
4491 gOff += n;
4492 }
4493 }
sewardj95448072004-11-22 20:19:51 +00004494 }
4495
4496 /* Inputs: memory. First set up some info needed regardless of
4497 whether we're doing reads or writes. */
sewardj95448072004-11-22 20:19:51 +00004498
4499 if (d->mFx != Ifx_None) {
4500 /* Because we may do multiple shadow loads/stores from the same
4501 base address, it's best to do a single test of its
4502 definedness right now. Post-instrumentation optimisation
4503 should remove all but this test. */
njn4c245e52009-03-15 23:25:38 +00004504 IRType tyAddr;
sewardj95448072004-11-22 20:19:51 +00004505 tl_assert(d->mAddr);
florian434ffae2012-07-19 17:23:42 +00004506 complainIfUndefined(mce, d->mAddr, d->guard);
sewardj95448072004-11-22 20:19:51 +00004507
sewardj1c0ce7a2009-07-01 08:10:49 +00004508 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
sewardj95448072004-11-22 20:19:51 +00004509 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
4510 tl_assert(tyAddr == mce->hWordTy); /* not really right */
4511 }
4512
4513 /* Deal with memory inputs (reads or modifies) */
4514 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00004515 toDo = d->mSize;
sewardj2e595852005-06-30 23:33:37 +00004516 /* chew off 32-bit chunks. We don't care about the endianness
4517 since it's all going to be condensed down to a single bit,
4518 but nevertheless choose an endianness which is hopefully
4519 native to the platform. */
sewardj95448072004-11-22 20:19:51 +00004520 while (toDo >= 4) {
4521 here = mkPCastTo(
4522 mce, Ity_I32,
florian434ffae2012-07-19 17:23:42 +00004523 expr2vbits_guarded_Load ( mce, end, Ity_I32, d->mAddr,
4524 d->mSize - toDo, d->guard )
sewardj95448072004-11-22 20:19:51 +00004525 );
4526 curr = mkUifU32(mce, here, curr);
4527 toDo -= 4;
4528 }
4529 /* chew off 16-bit chunks */
4530 while (toDo >= 2) {
4531 here = mkPCastTo(
4532 mce, Ity_I32,
florian434ffae2012-07-19 17:23:42 +00004533 expr2vbits_guarded_Load ( mce, end, Ity_I16, d->mAddr,
4534 d->mSize - toDo, d->guard )
sewardj95448072004-11-22 20:19:51 +00004535 );
4536 curr = mkUifU32(mce, here, curr);
4537 toDo -= 2;
4538 }
floriancda994b2012-06-08 16:01:19 +00004539 /* chew off the remaining 8-bit chunk, if any */
4540 if (toDo == 1) {
4541 here = mkPCastTo(
4542 mce, Ity_I32,
florian434ffae2012-07-19 17:23:42 +00004543 expr2vbits_guarded_Load ( mce, end, Ity_I8, d->mAddr,
4544 d->mSize - toDo, d->guard )
floriancda994b2012-06-08 16:01:19 +00004545 );
4546 curr = mkUifU32(mce, here, curr);
4547 toDo -= 1;
4548 }
4549 tl_assert(toDo == 0);
sewardj95448072004-11-22 20:19:51 +00004550 }
4551
4552 /* Whew! So curr is a 32-bit V-value summarising pessimistically
4553 all the inputs to the helper. Now we need to re-distribute the
4554 results to all destinations. */
4555
4556 /* Outputs: the destination temporary, if there is one. */
4557 if (d->tmp != IRTemp_INVALID) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004558 dst = findShadowTmpV(mce, d->tmp);
sewardj1c0ce7a2009-07-01 08:10:49 +00004559 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00004560 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
sewardj95448072004-11-22 20:19:51 +00004561 }
4562
4563 /* Outputs: guest state that we write or modify. */
4564 for (i = 0; i < d->nFxState; i++) {
4565 tl_assert(d->fxState[i].fx != Ifx_None);
4566 if (d->fxState[i].fx == Ifx_Read)
4567 continue;
sewardj2eecb742012-06-01 16:11:41 +00004568
4569 /* Enumerate the described state segments */
4570 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
4571 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
4572 gSz = d->fxState[i].size;
4573
4574 /* Ignore any sections marked as 'always defined'. */
4575 if (isAlwaysDefd(mce, gOff, gSz))
4576 continue;
4577
4578 /* This state element is written or modified. So we need to
4579 consider it. If larger than 8 bytes, deal with it in
4580 8-byte chunks. */
4581 while (True) {
4582 tl_assert(gSz >= 0);
4583 if (gSz == 0) break;
4584 n = gSz <= 8 ? gSz : 8;
4585 /* Write suitably-casted 'curr' to the state slice
4586 gOff .. gOff+n-1 */
4587 tyDst = szToITy( n );
4588 do_shadow_PUT( mce, gOff,
4589 NULL, /* original atom */
florian434ffae2012-07-19 17:23:42 +00004590 mkPCastTo( mce, tyDst, curr ), d->guard );
sewardj2eecb742012-06-01 16:11:41 +00004591 gSz -= n;
4592 gOff += n;
4593 }
sewardje9e16d32004-12-10 13:17:55 +00004594 }
sewardj95448072004-11-22 20:19:51 +00004595 }
4596
sewardj2e595852005-06-30 23:33:37 +00004597 /* Outputs: memory that we write or modify. Same comments about
4598 endianness as above apply. */
sewardj95448072004-11-22 20:19:51 +00004599 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00004600 toDo = d->mSize;
4601 /* chew off 32-bit chunks */
4602 while (toDo >= 4) {
sewardj2e595852005-06-30 23:33:37 +00004603 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4604 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00004605 mkPCastTo( mce, Ity_I32, curr ),
florian434ffae2012-07-19 17:23:42 +00004606 d->guard );
sewardj95448072004-11-22 20:19:51 +00004607 toDo -= 4;
4608 }
4609 /* chew off 16-bit chunks */
4610 while (toDo >= 2) {
sewardj2e595852005-06-30 23:33:37 +00004611 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4612 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00004613 mkPCastTo( mce, Ity_I16, curr ),
florian434ffae2012-07-19 17:23:42 +00004614 d->guard );
sewardj95448072004-11-22 20:19:51 +00004615 toDo -= 2;
4616 }
floriancda994b2012-06-08 16:01:19 +00004617 /* chew off the remaining 8-bit chunk, if any */
4618 if (toDo == 1) {
4619 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4620 NULL, /* original data */
4621 mkPCastTo( mce, Ity_I8, curr ),
florian434ffae2012-07-19 17:23:42 +00004622 d->guard );
floriancda994b2012-06-08 16:01:19 +00004623 toDo -= 1;
4624 }
4625 tl_assert(toDo == 0);
sewardj95448072004-11-22 20:19:51 +00004626 }
4627
njn25e49d8e72002-09-23 09:36:25 +00004628}
4629
sewardj1c0ce7a2009-07-01 08:10:49 +00004630
sewardj826ec492005-05-12 18:05:00 +00004631/* We have an ABI hint telling us that [base .. base+len-1] is to
4632 become undefined ("writable"). Generate code to call a helper to
4633 notify the A/V bit machinery of this fact.
4634
4635 We call
sewardj7cf4e6b2008-05-01 20:24:26 +00004636 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
4637 Addr nia );
sewardj826ec492005-05-12 18:05:00 +00004638*/
4639static
sewardj7cf4e6b2008-05-01 20:24:26 +00004640void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
sewardj826ec492005-05-12 18:05:00 +00004641{
4642 IRDirty* di;
sewardj7cf4e6b2008-05-01 20:24:26 +00004643 /* Minor optimisation: if not doing origin tracking, ignore the
4644 supplied nia and pass zero instead. This is on the basis that
4645 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
4646 almost always generate a shorter instruction to put zero into a
4647 register than any other value. */
4648 if (MC_(clo_mc_level) < 3)
4649 nia = mkIRExpr_HWord(0);
4650
sewardj826ec492005-05-12 18:05:00 +00004651 di = unsafeIRDirty_0_N(
4652 0/*regparms*/,
4653 "MC_(helperc_MAKE_STACK_UNINIT)",
sewardj53ee1fc2005-12-23 02:29:58 +00004654 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
sewardj7cf4e6b2008-05-01 20:24:26 +00004655 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
sewardj826ec492005-05-12 18:05:00 +00004656 );
sewardj7cf4e6b2008-05-01 20:24:26 +00004657 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj826ec492005-05-12 18:05:00 +00004658}
4659
njn25e49d8e72002-09-23 09:36:25 +00004660
sewardj1c0ce7a2009-07-01 08:10:49 +00004661/* ------ Dealing with IRCAS (big and complex) ------ */
4662
4663/* FWDS */
4664static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
4665 IRAtom* baseaddr, Int offset );
4666static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
4667static void gen_store_b ( MCEnv* mce, Int szB,
4668 IRAtom* baseaddr, Int offset, IRAtom* dataB,
4669 IRAtom* guard );
4670
4671static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
4672static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
4673
4674
4675/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
4676 IRExpr.Consts, else this asserts. If they are both Consts, it
4677 doesn't do anything. So that just leaves the RdTmp case.
4678
4679 In which case: this assigns the shadow value SHADOW to the IR
4680 shadow temporary associated with ORIG. That is, ORIG, being an
4681 original temporary, will have a shadow temporary associated with
4682 it. However, in the case envisaged here, there will so far have
4683 been no IR emitted to actually write a shadow value into that
4684 temporary. What this routine does is to (emit IR to) copy the
4685 value in SHADOW into said temporary, so that after this call,
4686 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
4687 value in SHADOW.
4688
4689 Point is to allow callers to compute "by hand" a shadow value for
4690 ORIG, and force it to be associated with ORIG.
4691
4692 How do we know that that shadow associated with ORIG has not so far
4693 been assigned to? Well, we don't per se know that, but supposing
4694 it had. Then this routine would create a second assignment to it,
4695 and later the IR sanity checker would barf. But that never
4696 happens. QED.
4697*/
4698static void bind_shadow_tmp_to_orig ( UChar how,
4699 MCEnv* mce,
4700 IRAtom* orig, IRAtom* shadow )
4701{
4702 tl_assert(isOriginalAtom(mce, orig));
4703 tl_assert(isShadowAtom(mce, shadow));
4704 switch (orig->tag) {
4705 case Iex_Const:
4706 tl_assert(shadow->tag == Iex_Const);
4707 break;
4708 case Iex_RdTmp:
4709 tl_assert(shadow->tag == Iex_RdTmp);
4710 if (how == 'V') {
4711 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
4712 shadow);
4713 } else {
4714 tl_assert(how == 'B');
4715 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
4716 shadow);
4717 }
4718 break;
4719 default:
4720 tl_assert(0);
4721 }
4722}
4723
4724
4725static
4726void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
4727{
4728 /* Scheme is (both single- and double- cases):
4729
4730 1. fetch data#,dataB (the proposed new value)
4731
4732 2. fetch expd#,expdB (what we expect to see at the address)
4733
4734 3. check definedness of address
4735
4736 4. load old#,oldB from shadow memory; this also checks
4737 addressibility of the address
4738
4739 5. the CAS itself
4740
sewardjafed4c52009-07-12 13:00:17 +00004741 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
sewardj1c0ce7a2009-07-01 08:10:49 +00004742
sewardjafed4c52009-07-12 13:00:17 +00004743 7. if "expected == old" (as computed by (6))
sewardj1c0ce7a2009-07-01 08:10:49 +00004744 store data#,dataB to shadow memory
4745
4746 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
4747 'data' but 7 stores 'data#'. Hence it is possible for the
4748 shadow data to be incorrectly checked and/or updated:
4749
sewardj1c0ce7a2009-07-01 08:10:49 +00004750 * 7 is at least gated correctly, since the 'expected == old'
4751 condition is derived from outputs of 5. However, the shadow
4752 write could happen too late: imagine after 5 we are
4753 descheduled, a different thread runs, writes a different
4754 (shadow) value at the address, and then we resume, hence
4755 overwriting the shadow value written by the other thread.
4756
4757 Because the original memory access is atomic, there's no way to
4758 make both the original and shadow accesses into a single atomic
4759 thing, hence this is unavoidable.
4760
4761 At least as Valgrind stands, I don't think it's a problem, since
4762 we're single threaded *and* we guarantee that there are no
4763 context switches during the execution of any specific superblock
4764 -- context switches can only happen at superblock boundaries.
4765
4766 If Valgrind ever becomes MT in the future, then it might be more
4767 of a problem. A possible kludge would be to artificially
4768 associate with the location, a lock, which we must acquire and
4769 release around the transaction as a whole. Hmm, that probably
4770 would't work properly since it only guards us against other
4771 threads doing CASs on the same location, not against other
4772 threads doing normal reads and writes.
sewardjafed4c52009-07-12 13:00:17 +00004773
4774 ------------------------------------------------------------
4775
4776 COMMENT_ON_CasCmpEQ:
4777
4778 Note two things. Firstly, in the sequence above, we compute
4779 "expected == old", but we don't check definedness of it. Why
4780 not? Also, the x86 and amd64 front ends use
4781 Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent
4782 determination (expected == old ?) for themselves, and we also
4783 don't check definedness for those primops; we just say that the
4784 result is defined. Why? Details follow.
4785
4786 x86/amd64 contains various forms of locked insns:
4787 * lock prefix before all basic arithmetic insn;
4788 eg lock xorl %reg1,(%reg2)
4789 * atomic exchange reg-mem
4790 * compare-and-swaps
4791
4792 Rather than attempt to represent them all, which would be a
4793 royal PITA, I used a result from Maurice Herlihy
4794 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
4795 demonstrates that compare-and-swap is a primitive more general
4796 than the other two, and so can be used to represent all of them.
4797 So the translation scheme for (eg) lock incl (%reg) is as
4798 follows:
4799
4800 again:
4801 old = * %reg
4802 new = old + 1
4803 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
4804
4805 The "atomically" is the CAS bit. The scheme is always the same:
4806 get old value from memory, compute new value, atomically stuff
4807 new value back in memory iff the old value has not changed (iow,
4808 no other thread modified it in the meantime). If it has changed
4809 then we've been out-raced and we have to start over.
4810
4811 Now that's all very neat, but it has the bad side effect of
4812 introducing an explicit equality test into the translation.
4813 Consider the behaviour of said code on a memory location which
4814 is uninitialised. We will wind up doing a comparison on
4815 uninitialised data, and mc duly complains.
4816
4817 What's difficult about this is, the common case is that the
4818 location is uncontended, and so we're usually comparing the same
4819 value (* %reg) with itself. So we shouldn't complain even if it
4820 is undefined. But mc doesn't know that.
4821
4822 My solution is to mark the == in the IR specially, so as to tell
4823 mc that it almost certainly compares a value with itself, and we
4824 should just regard the result as always defined. Rather than
4825 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
4826 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
4827
4828 So there's always the question of, can this give a false
4829 negative? eg, imagine that initially, * %reg is defined; and we
4830 read that; but then in the gap between the read and the CAS, a
4831 different thread writes an undefined (and different) value at
4832 the location. Then the CAS in this thread will fail and we will
4833 go back to "again:", but without knowing that the trip back
4834 there was based on an undefined comparison. No matter; at least
4835 the other thread won the race and the location is correctly
4836 marked as undefined. What if it wrote an uninitialised version
4837 of the same value that was there originally, though?
4838
4839 etc etc. Seems like there's a small corner case in which we
4840 might lose the fact that something's defined -- we're out-raced
4841 in between the "old = * reg" and the "atomically {", _and_ the
4842 other thread is writing in an undefined version of what's
4843 already there. Well, that seems pretty unlikely.
4844
4845 ---
4846
4847 If we ever need to reinstate it .. code which generates a
4848 definedness test for "expected == old" was removed at r10432 of
4849 this file.
sewardj1c0ce7a2009-07-01 08:10:49 +00004850 */
4851 if (cas->oldHi == IRTemp_INVALID) {
4852 do_shadow_CAS_single( mce, cas );
4853 } else {
4854 do_shadow_CAS_double( mce, cas );
4855 }
4856}
4857
4858
4859static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
4860{
4861 IRAtom *vdataLo = NULL, *bdataLo = NULL;
4862 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4863 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00004864 IRAtom *expd_eq_old = NULL;
4865 IROp opCasCmpEQ;
sewardj1c0ce7a2009-07-01 08:10:49 +00004866 Int elemSzB;
4867 IRType elemTy;
4868 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4869
4870 /* single CAS */
4871 tl_assert(cas->oldHi == IRTemp_INVALID);
4872 tl_assert(cas->expdHi == NULL);
4873 tl_assert(cas->dataHi == NULL);
4874
4875 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4876 switch (elemTy) {
sewardjafed4c52009-07-12 13:00:17 +00004877 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break;
4878 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
4879 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
4880 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
sewardj1c0ce7a2009-07-01 08:10:49 +00004881 default: tl_assert(0); /* IR defn disallows any other types */
4882 }
4883
4884 /* 1. fetch data# (the proposed new value) */
4885 tl_assert(isOriginalAtom(mce, cas->dataLo));
4886 vdataLo
4887 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4888 tl_assert(isShadowAtom(mce, vdataLo));
4889 if (otrak) {
4890 bdataLo
4891 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4892 tl_assert(isShadowAtom(mce, bdataLo));
4893 }
4894
4895 /* 2. fetch expected# (what we expect to see at the address) */
4896 tl_assert(isOriginalAtom(mce, cas->expdLo));
4897 vexpdLo
4898 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
4899 tl_assert(isShadowAtom(mce, vexpdLo));
4900 if (otrak) {
4901 bexpdLo
4902 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
4903 tl_assert(isShadowAtom(mce, bexpdLo));
4904 }
4905
4906 /* 3. check definedness of address */
4907 /* 4. fetch old# from shadow memory; this also checks
4908 addressibility of the address */
4909 voldLo
4910 = assignNew(
4911 'V', mce, elemTy,
4912 expr2vbits_Load(
4913 mce,
4914 cas->end, elemTy, cas->addr, 0/*Addr bias*/
4915 ));
sewardjafed4c52009-07-12 13:00:17 +00004916 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00004917 if (otrak) {
4918 boldLo
4919 = assignNew('B', mce, Ity_I32,
4920 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00004921 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00004922 }
4923
4924 /* 5. the CAS itself */
4925 stmt( 'C', mce, IRStmt_CAS(cas) );
4926
sewardjafed4c52009-07-12 13:00:17 +00004927 /* 6. compute "expected == old" */
4928 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00004929 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
4930 tree, but it's not copied from the input block. */
4931 expd_eq_old
4932 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00004933 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00004934
4935 /* 7. if "expected == old"
4936 store data# to shadow memory */
4937 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
4938 NULL/*data*/, vdataLo/*vdata*/,
4939 expd_eq_old/*guard for store*/ );
4940 if (otrak) {
4941 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
4942 bdataLo/*bdata*/,
4943 expd_eq_old/*guard for store*/ );
4944 }
4945}
4946
4947
4948static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
4949{
4950 IRAtom *vdataHi = NULL, *bdataHi = NULL;
4951 IRAtom *vdataLo = NULL, *bdataLo = NULL;
4952 IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
4953 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4954 IRAtom *voldHi = NULL, *boldHi = NULL;
4955 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00004956 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
4957 IRAtom *expd_eq_old = NULL, *zero = NULL;
4958 IROp opCasCmpEQ, opOr, opXor;
sewardj1c0ce7a2009-07-01 08:10:49 +00004959 Int elemSzB, memOffsLo, memOffsHi;
4960 IRType elemTy;
4961 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4962
4963 /* double CAS */
4964 tl_assert(cas->oldHi != IRTemp_INVALID);
4965 tl_assert(cas->expdHi != NULL);
4966 tl_assert(cas->dataHi != NULL);
4967
4968 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4969 switch (elemTy) {
4970 case Ity_I8:
sewardjafed4c52009-07-12 13:00:17 +00004971 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
sewardj1c0ce7a2009-07-01 08:10:49 +00004972 elemSzB = 1; zero = mkU8(0);
4973 break;
4974 case Ity_I16:
sewardjafed4c52009-07-12 13:00:17 +00004975 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
sewardj1c0ce7a2009-07-01 08:10:49 +00004976 elemSzB = 2; zero = mkU16(0);
4977 break;
4978 case Ity_I32:
sewardjafed4c52009-07-12 13:00:17 +00004979 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
sewardj1c0ce7a2009-07-01 08:10:49 +00004980 elemSzB = 4; zero = mkU32(0);
4981 break;
4982 case Ity_I64:
sewardjafed4c52009-07-12 13:00:17 +00004983 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
sewardj1c0ce7a2009-07-01 08:10:49 +00004984 elemSzB = 8; zero = mkU64(0);
4985 break;
4986 default:
4987 tl_assert(0); /* IR defn disallows any other types */
4988 }
4989
4990 /* 1. fetch data# (the proposed new value) */
4991 tl_assert(isOriginalAtom(mce, cas->dataHi));
4992 tl_assert(isOriginalAtom(mce, cas->dataLo));
4993 vdataHi
4994 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
4995 vdataLo
4996 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4997 tl_assert(isShadowAtom(mce, vdataHi));
4998 tl_assert(isShadowAtom(mce, vdataLo));
4999 if (otrak) {
5000 bdataHi
5001 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
5002 bdataLo
5003 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
5004 tl_assert(isShadowAtom(mce, bdataHi));
5005 tl_assert(isShadowAtom(mce, bdataLo));
5006 }
5007
5008 /* 2. fetch expected# (what we expect to see at the address) */
5009 tl_assert(isOriginalAtom(mce, cas->expdHi));
5010 tl_assert(isOriginalAtom(mce, cas->expdLo));
5011 vexpdHi
5012 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
5013 vexpdLo
5014 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5015 tl_assert(isShadowAtom(mce, vexpdHi));
5016 tl_assert(isShadowAtom(mce, vexpdLo));
5017 if (otrak) {
5018 bexpdHi
5019 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
5020 bexpdLo
5021 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5022 tl_assert(isShadowAtom(mce, bexpdHi));
5023 tl_assert(isShadowAtom(mce, bexpdLo));
5024 }
5025
5026 /* 3. check definedness of address */
5027 /* 4. fetch old# from shadow memory; this also checks
5028 addressibility of the address */
5029 if (cas->end == Iend_LE) {
5030 memOffsLo = 0;
5031 memOffsHi = elemSzB;
5032 } else {
5033 tl_assert(cas->end == Iend_BE);
5034 memOffsLo = elemSzB;
5035 memOffsHi = 0;
5036 }
5037 voldHi
5038 = assignNew(
5039 'V', mce, elemTy,
5040 expr2vbits_Load(
5041 mce,
5042 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/
5043 ));
5044 voldLo
5045 = assignNew(
5046 'V', mce, elemTy,
5047 expr2vbits_Load(
5048 mce,
5049 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/
5050 ));
sewardjafed4c52009-07-12 13:00:17 +00005051 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
5052 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005053 if (otrak) {
5054 boldHi
5055 = assignNew('B', mce, Ity_I32,
5056 gen_load_b(mce, elemSzB, cas->addr,
5057 memOffsHi/*addr bias*/));
5058 boldLo
5059 = assignNew('B', mce, Ity_I32,
5060 gen_load_b(mce, elemSzB, cas->addr,
5061 memOffsLo/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00005062 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
5063 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005064 }
5065
5066 /* 5. the CAS itself */
5067 stmt( 'C', mce, IRStmt_CAS(cas) );
5068
sewardjafed4c52009-07-12 13:00:17 +00005069 /* 6. compute "expected == old" */
5070 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005071 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5072 tree, but it's not copied from the input block. */
5073 /*
5074 xHi = oldHi ^ expdHi;
5075 xLo = oldLo ^ expdLo;
5076 xHL = xHi | xLo;
5077 expd_eq_old = xHL == 0;
5078 */
sewardj1c0ce7a2009-07-01 08:10:49 +00005079 xHi = assignNew('C', mce, elemTy,
5080 binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005081 xLo = assignNew('C', mce, elemTy,
5082 binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005083 xHL = assignNew('C', mce, elemTy,
5084 binop(opOr, xHi, xLo));
sewardj1c0ce7a2009-07-01 08:10:49 +00005085 expd_eq_old
5086 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00005087 binop(opCasCmpEQ, xHL, zero));
sewardj1c0ce7a2009-07-01 08:10:49 +00005088
5089 /* 7. if "expected == old"
5090 store data# to shadow memory */
5091 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
5092 NULL/*data*/, vdataHi/*vdata*/,
5093 expd_eq_old/*guard for store*/ );
5094 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
5095 NULL/*data*/, vdataLo/*vdata*/,
5096 expd_eq_old/*guard for store*/ );
5097 if (otrak) {
5098 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
5099 bdataHi/*bdata*/,
5100 expd_eq_old/*guard for store*/ );
5101 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
5102 bdataLo/*bdata*/,
5103 expd_eq_old/*guard for store*/ );
5104 }
5105}
5106
5107
sewardjdb5907d2009-11-26 17:20:21 +00005108/* ------ Dealing with LL/SC (not difficult) ------ */
5109
5110static void do_shadow_LLSC ( MCEnv* mce,
5111 IREndness stEnd,
5112 IRTemp stResult,
5113 IRExpr* stAddr,
5114 IRExpr* stStoredata )
5115{
5116 /* In short: treat a load-linked like a normal load followed by an
5117 assignment of the loaded (shadow) data to the result temporary.
5118 Treat a store-conditional like a normal store, and mark the
5119 result temporary as defined. */
5120 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult);
5121 IRTemp resTmp = findShadowTmpV(mce, stResult);
5122
5123 tl_assert(isIRAtom(stAddr));
5124 if (stStoredata)
5125 tl_assert(isIRAtom(stStoredata));
5126
5127 if (stStoredata == NULL) {
5128 /* Load Linked */
5129 /* Just treat this as a normal load, followed by an assignment of
5130 the value to .result. */
5131 /* Stay sane */
5132 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
5133 || resTy == Ity_I16 || resTy == Ity_I8);
5134 assign( 'V', mce, resTmp,
5135 expr2vbits_Load(
5136 mce, stEnd, resTy, stAddr, 0/*addr bias*/));
5137 } else {
5138 /* Store Conditional */
5139 /* Stay sane */
5140 IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
5141 stStoredata);
5142 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
5143 || dataTy == Ity_I16 || dataTy == Ity_I8);
5144 do_shadow_Store( mce, stEnd,
5145 stAddr, 0/* addr bias */,
5146 stStoredata,
5147 NULL /* shadow data */,
5148 NULL/*guard*/ );
5149 /* This is a store conditional, so it writes to .result a value
5150 indicating whether or not the store succeeded. Just claim
5151 this value is always defined. In the PowerPC interpretation
5152 of store-conditional, definedness of the success indication
5153 depends on whether the address of the store matches the
5154 reservation address. But we can't tell that here (and
5155 anyway, we're not being PowerPC-specific). At least we are
5156 guaranteed that the definedness of the store address, and its
5157 addressibility, will be checked as per normal. So it seems
5158 pretty safe to just say that the success indication is always
5159 defined.
5160
5161 In schemeS, for origin tracking, we must correspondingly set
5162 a no-origin value for the origin shadow of .result.
5163 */
5164 tl_assert(resTy == Ity_I1);
5165 assign( 'V', mce, resTmp, definedOfType(resTy) );
5166 }
5167}
5168
5169
sewardj95448072004-11-22 20:19:51 +00005170/*------------------------------------------------------------*/
5171/*--- Memcheck main ---*/
5172/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00005173
sewardj7cf4e6b2008-05-01 20:24:26 +00005174static void schemeS ( MCEnv* mce, IRStmt* st );
5175
sewardj95448072004-11-22 20:19:51 +00005176static Bool isBogusAtom ( IRAtom* at )
njn25e49d8e72002-09-23 09:36:25 +00005177{
sewardj95448072004-11-22 20:19:51 +00005178 ULong n = 0;
5179 IRConst* con;
sewardj710d6c22005-03-20 18:55:15 +00005180 tl_assert(isIRAtom(at));
sewardj0b9d74a2006-12-24 02:24:11 +00005181 if (at->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +00005182 return False;
5183 tl_assert(at->tag == Iex_Const);
5184 con = at->Iex.Const.con;
5185 switch (con->tag) {
sewardjd5204dc2004-12-31 01:16:11 +00005186 case Ico_U1: return False;
5187 case Ico_U8: n = (ULong)con->Ico.U8; break;
5188 case Ico_U16: n = (ULong)con->Ico.U16; break;
5189 case Ico_U32: n = (ULong)con->Ico.U32; break;
5190 case Ico_U64: n = (ULong)con->Ico.U64; break;
5191 case Ico_F64: return False;
sewardjb5b87402011-03-07 16:05:35 +00005192 case Ico_F32i: return False;
sewardjd5204dc2004-12-31 01:16:11 +00005193 case Ico_F64i: return False;
5194 case Ico_V128: return False;
sewardj95448072004-11-22 20:19:51 +00005195 default: ppIRExpr(at); tl_assert(0);
5196 }
5197 /* VG_(printf)("%llx\n", n); */
sewardj96a922e2005-04-23 23:26:29 +00005198 return (/*32*/ n == 0xFEFEFEFFULL
5199 /*32*/ || n == 0x80808080ULL
sewardj17b47432008-12-17 01:12:58 +00005200 /*32*/ || n == 0x7F7F7F7FULL
tomd9774d72005-06-27 08:11:01 +00005201 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
sewardj96a922e2005-04-23 23:26:29 +00005202 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
tomd9774d72005-06-27 08:11:01 +00005203 /*64*/ || n == 0x0000000000008080ULL
sewardj96a922e2005-04-23 23:26:29 +00005204 /*64*/ || n == 0x8080808080808080ULL
sewardj17b47432008-12-17 01:12:58 +00005205 /*64*/ || n == 0x0101010101010101ULL
sewardj96a922e2005-04-23 23:26:29 +00005206 );
sewardj95448072004-11-22 20:19:51 +00005207}
njn25e49d8e72002-09-23 09:36:25 +00005208
sewardj95448072004-11-22 20:19:51 +00005209static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
5210{
sewardjd5204dc2004-12-31 01:16:11 +00005211 Int i;
5212 IRExpr* e;
5213 IRDirty* d;
sewardj1c0ce7a2009-07-01 08:10:49 +00005214 IRCAS* cas;
sewardj95448072004-11-22 20:19:51 +00005215 switch (st->tag) {
sewardj0b9d74a2006-12-24 02:24:11 +00005216 case Ist_WrTmp:
5217 e = st->Ist.WrTmp.data;
sewardj95448072004-11-22 20:19:51 +00005218 switch (e->tag) {
5219 case Iex_Get:
sewardj0b9d74a2006-12-24 02:24:11 +00005220 case Iex_RdTmp:
sewardj95448072004-11-22 20:19:51 +00005221 return False;
sewardjd5204dc2004-12-31 01:16:11 +00005222 case Iex_Const:
5223 return isBogusAtom(e);
sewardj95448072004-11-22 20:19:51 +00005224 case Iex_Unop:
5225 return isBogusAtom(e->Iex.Unop.arg);
sewardjd5204dc2004-12-31 01:16:11 +00005226 case Iex_GetI:
5227 return isBogusAtom(e->Iex.GetI.ix);
sewardj95448072004-11-22 20:19:51 +00005228 case Iex_Binop:
5229 return isBogusAtom(e->Iex.Binop.arg1)
5230 || isBogusAtom(e->Iex.Binop.arg2);
sewardjed69fdb2006-02-03 16:12:27 +00005231 case Iex_Triop:
florian26441742012-06-02 20:30:41 +00005232 return isBogusAtom(e->Iex.Triop.details->arg1)
5233 || isBogusAtom(e->Iex.Triop.details->arg2)
5234 || isBogusAtom(e->Iex.Triop.details->arg3);
sewardje91cea72006-02-08 19:32:02 +00005235 case Iex_Qop:
floriane2ab2972012-06-01 20:43:03 +00005236 return isBogusAtom(e->Iex.Qop.details->arg1)
5237 || isBogusAtom(e->Iex.Qop.details->arg2)
5238 || isBogusAtom(e->Iex.Qop.details->arg3)
5239 || isBogusAtom(e->Iex.Qop.details->arg4);
sewardj95448072004-11-22 20:19:51 +00005240 case Iex_Mux0X:
5241 return isBogusAtom(e->Iex.Mux0X.cond)
5242 || isBogusAtom(e->Iex.Mux0X.expr0)
5243 || isBogusAtom(e->Iex.Mux0X.exprX);
sewardj2e595852005-06-30 23:33:37 +00005244 case Iex_Load:
5245 return isBogusAtom(e->Iex.Load.addr);
sewardj95448072004-11-22 20:19:51 +00005246 case Iex_CCall:
5247 for (i = 0; e->Iex.CCall.args[i]; i++)
5248 if (isBogusAtom(e->Iex.CCall.args[i]))
5249 return True;
5250 return False;
5251 default:
5252 goto unhandled;
5253 }
sewardjd5204dc2004-12-31 01:16:11 +00005254 case Ist_Dirty:
5255 d = st->Ist.Dirty.details;
5256 for (i = 0; d->args[i]; i++)
5257 if (isBogusAtom(d->args[i]))
5258 return True;
5259 if (d->guard && isBogusAtom(d->guard))
5260 return True;
5261 if (d->mAddr && isBogusAtom(d->mAddr))
5262 return True;
5263 return False;
sewardj95448072004-11-22 20:19:51 +00005264 case Ist_Put:
5265 return isBogusAtom(st->Ist.Put.data);
sewardjd5204dc2004-12-31 01:16:11 +00005266 case Ist_PutI:
floriand39b0222012-05-31 15:48:13 +00005267 return isBogusAtom(st->Ist.PutI.details->ix)
5268 || isBogusAtom(st->Ist.PutI.details->data);
sewardj2e595852005-06-30 23:33:37 +00005269 case Ist_Store:
5270 return isBogusAtom(st->Ist.Store.addr)
5271 || isBogusAtom(st->Ist.Store.data);
sewardj95448072004-11-22 20:19:51 +00005272 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00005273 return isBogusAtom(st->Ist.Exit.guard);
sewardj826ec492005-05-12 18:05:00 +00005274 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00005275 return isBogusAtom(st->Ist.AbiHint.base)
5276 || isBogusAtom(st->Ist.AbiHint.nia);
sewardj21dc3452005-03-21 00:27:41 +00005277 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00005278 case Ist_IMark:
sewardj72d75132007-11-09 23:06:35 +00005279 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00005280 return False;
sewardj1c0ce7a2009-07-01 08:10:49 +00005281 case Ist_CAS:
5282 cas = st->Ist.CAS.details;
5283 return isBogusAtom(cas->addr)
5284 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
5285 || isBogusAtom(cas->expdLo)
5286 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
5287 || isBogusAtom(cas->dataLo);
sewardjdb5907d2009-11-26 17:20:21 +00005288 case Ist_LLSC:
5289 return isBogusAtom(st->Ist.LLSC.addr)
5290 || (st->Ist.LLSC.storedata
5291 ? isBogusAtom(st->Ist.LLSC.storedata)
5292 : False);
sewardj95448072004-11-22 20:19:51 +00005293 default:
5294 unhandled:
5295 ppIRStmt(st);
5296 VG_(tool_panic)("hasBogusLiterals");
5297 }
5298}
njn25e49d8e72002-09-23 09:36:25 +00005299
njn25e49d8e72002-09-23 09:36:25 +00005300
sewardj0b9d74a2006-12-24 02:24:11 +00005301IRSB* MC_(instrument) ( VgCallbackClosure* closure,
sewardj1c0ce7a2009-07-01 08:10:49 +00005302 IRSB* sb_in,
sewardj461df9c2006-01-17 02:06:39 +00005303 VexGuestLayout* layout,
5304 VexGuestExtents* vge,
florianca503be2012-10-07 21:59:42 +00005305 VexArchInfo* archinfo_host,
sewardjd54babf2005-03-21 00:55:49 +00005306 IRType gWordTy, IRType hWordTy )
sewardj95448072004-11-22 20:19:51 +00005307{
sewardj7cf4e6b2008-05-01 20:24:26 +00005308 Bool verboze = 0||False;
sewardj151b90d2005-07-06 19:42:23 +00005309 Bool bogus;
sewardjd5204dc2004-12-31 01:16:11 +00005310 Int i, j, first_stmt;
sewardj95448072004-11-22 20:19:51 +00005311 IRStmt* st;
sewardjd5204dc2004-12-31 01:16:11 +00005312 MCEnv mce;
sewardj1c0ce7a2009-07-01 08:10:49 +00005313 IRSB* sb_out;
sewardjd54babf2005-03-21 00:55:49 +00005314
5315 if (gWordTy != hWordTy) {
5316 /* We don't currently support this case. */
5317 VG_(tool_panic)("host/guest word size mismatch");
5318 }
njn25e49d8e72002-09-23 09:36:25 +00005319
sewardj6cf40ff2005-04-20 22:31:26 +00005320 /* Check we're not completely nuts */
sewardj7cf4e6b2008-05-01 20:24:26 +00005321 tl_assert(sizeof(UWord) == sizeof(void*));
5322 tl_assert(sizeof(Word) == sizeof(void*));
5323 tl_assert(sizeof(Addr) == sizeof(void*));
5324 tl_assert(sizeof(ULong) == 8);
5325 tl_assert(sizeof(Long) == 8);
5326 tl_assert(sizeof(Addr64) == 8);
5327 tl_assert(sizeof(UInt) == 4);
5328 tl_assert(sizeof(Int) == 4);
5329
5330 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
sewardj6cf40ff2005-04-20 22:31:26 +00005331
sewardj0b9d74a2006-12-24 02:24:11 +00005332 /* Set up SB */
sewardj1c0ce7a2009-07-01 08:10:49 +00005333 sb_out = deepCopyIRSBExceptStmts(sb_in);
njn25e49d8e72002-09-23 09:36:25 +00005334
sewardj1c0ce7a2009-07-01 08:10:49 +00005335 /* Set up the running environment. Both .sb and .tmpMap are
5336 modified as we go along. Note that tmps are added to both
5337 .sb->tyenv and .tmpMap together, so the valid index-set for
5338 those two arrays should always be identical. */
5339 VG_(memset)(&mce, 0, sizeof(mce));
5340 mce.sb = sb_out;
sewardj7cf4e6b2008-05-01 20:24:26 +00005341 mce.trace = verboze;
sewardj95448072004-11-22 20:19:51 +00005342 mce.layout = layout;
sewardj95448072004-11-22 20:19:51 +00005343 mce.hWordTy = hWordTy;
sewardjd5204dc2004-12-31 01:16:11 +00005344 mce.bogusLiterals = False;
sewardj1c0ce7a2009-07-01 08:10:49 +00005345
sewardj54eac252012-03-27 10:19:39 +00005346 /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on
5347 Darwin. 10.7 is mostly built with LLVM, which uses these for
5348 bitfield inserts, and we get a lot of false errors if the cheap
5349 interpretation is used, alas. Could solve this much better if
5350 we knew which of such adds came from x86/amd64 LEA instructions,
5351 since these are the only ones really needing the expensive
5352 interpretation, but that would require some way to tag them in
5353 the _toIR.c front ends, which is a lot of faffing around. So
5354 for now just use the slow and blunt-instrument solution. */
5355 mce.useLLVMworkarounds = False;
5356# if defined(VGO_darwin)
5357 mce.useLLVMworkarounds = True;
5358# endif
5359
sewardj1c0ce7a2009-07-01 08:10:49 +00005360 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
5361 sizeof(TempMapEnt));
5362 for (i = 0; i < sb_in->tyenv->types_used; i++) {
5363 TempMapEnt ent;
5364 ent.kind = Orig;
5365 ent.shadowV = IRTemp_INVALID;
5366 ent.shadowB = IRTemp_INVALID;
5367 VG_(addToXA)( mce.tmpMap, &ent );
sewardj7cf4e6b2008-05-01 20:24:26 +00005368 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005369 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
sewardj95448072004-11-22 20:19:51 +00005370
sewardj151b90d2005-07-06 19:42:23 +00005371 /* Make a preliminary inspection of the statements, to see if there
5372 are any dodgy-looking literals. If there are, we generate
5373 extra-detailed (hence extra-expensive) instrumentation in
5374 places. Scan the whole bb even if dodgyness is found earlier,
5375 so that the flatness assertion is applied to all stmts. */
5376
5377 bogus = False;
sewardj95448072004-11-22 20:19:51 +00005378
sewardj1c0ce7a2009-07-01 08:10:49 +00005379 for (i = 0; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00005380
sewardj1c0ce7a2009-07-01 08:10:49 +00005381 st = sb_in->stmts[i];
sewardj21dc3452005-03-21 00:27:41 +00005382 tl_assert(st);
sewardj95448072004-11-22 20:19:51 +00005383 tl_assert(isFlatIRStmt(st));
5384
sewardj151b90d2005-07-06 19:42:23 +00005385 if (!bogus) {
5386 bogus = checkForBogusLiterals(st);
5387 if (0 && bogus) {
sewardj95448072004-11-22 20:19:51 +00005388 VG_(printf)("bogus: ");
5389 ppIRStmt(st);
5390 VG_(printf)("\n");
5391 }
5392 }
sewardjd5204dc2004-12-31 01:16:11 +00005393
sewardj151b90d2005-07-06 19:42:23 +00005394 }
5395
5396 mce.bogusLiterals = bogus;
5397
sewardja0871482006-10-18 12:41:55 +00005398 /* Copy verbatim any IR preamble preceding the first IMark */
sewardj151b90d2005-07-06 19:42:23 +00005399
sewardj1c0ce7a2009-07-01 08:10:49 +00005400 tl_assert(mce.sb == sb_out);
5401 tl_assert(mce.sb != sb_in);
sewardjf1962d32006-10-19 13:22:16 +00005402
sewardja0871482006-10-18 12:41:55 +00005403 i = 0;
sewardj1c0ce7a2009-07-01 08:10:49 +00005404 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
sewardja0871482006-10-18 12:41:55 +00005405
sewardj1c0ce7a2009-07-01 08:10:49 +00005406 st = sb_in->stmts[i];
sewardja0871482006-10-18 12:41:55 +00005407 tl_assert(st);
5408 tl_assert(isFlatIRStmt(st));
5409
sewardj1c0ce7a2009-07-01 08:10:49 +00005410 stmt( 'C', &mce, sb_in->stmts[i] );
sewardja0871482006-10-18 12:41:55 +00005411 i++;
5412 }
5413
sewardjf1962d32006-10-19 13:22:16 +00005414 /* Nasty problem. IR optimisation of the pre-instrumented IR may
5415 cause the IR following the preamble to contain references to IR
5416 temporaries defined in the preamble. Because the preamble isn't
5417 instrumented, these temporaries don't have any shadows.
5418 Nevertheless uses of them following the preamble will cause
5419 memcheck to generate references to their shadows. End effect is
5420 to cause IR sanity check failures, due to references to
5421 non-existent shadows. This is only evident for the complex
5422 preambles used for function wrapping on TOC-afflicted platforms
sewardj6e9de462011-06-28 07:25:29 +00005423 (ppc64-linux).
sewardjf1962d32006-10-19 13:22:16 +00005424
5425 The following loop therefore scans the preamble looking for
5426 assignments to temporaries. For each one found it creates an
sewardjafa617b2008-07-22 09:59:48 +00005427 assignment to the corresponding (V) shadow temp, marking it as
sewardjf1962d32006-10-19 13:22:16 +00005428 'defined'. This is the same resulting IR as if the main
5429 instrumentation loop before had been applied to the statement
5430 'tmp = CONSTANT'.
sewardjafa617b2008-07-22 09:59:48 +00005431
5432 Similarly, if origin tracking is enabled, we must generate an
5433 assignment for the corresponding origin (B) shadow, claiming
5434 no-origin, as appropriate for a defined value.
sewardjf1962d32006-10-19 13:22:16 +00005435 */
5436 for (j = 0; j < i; j++) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005437 if (sb_in->stmts[j]->tag == Ist_WrTmp) {
sewardj7cf4e6b2008-05-01 20:24:26 +00005438 /* findShadowTmpV checks its arg is an original tmp;
sewardjf1962d32006-10-19 13:22:16 +00005439 no need to assert that here. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005440 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
sewardjafa617b2008-07-22 09:59:48 +00005441 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00005442 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
sewardjafa617b2008-07-22 09:59:48 +00005443 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
5444 if (MC_(clo_mc_level) == 3) {
5445 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00005446 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
sewardjafa617b2008-07-22 09:59:48 +00005447 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
5448 }
sewardjf1962d32006-10-19 13:22:16 +00005449 if (0) {
sewardjafa617b2008-07-22 09:59:48 +00005450 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
5451 ppIRType( ty_v );
sewardjf1962d32006-10-19 13:22:16 +00005452 VG_(printf)("\n");
5453 }
5454 }
5455 }
5456
sewardja0871482006-10-18 12:41:55 +00005457 /* Iterate over the remaining stmts to generate instrumentation. */
5458
sewardj1c0ce7a2009-07-01 08:10:49 +00005459 tl_assert(sb_in->stmts_used > 0);
sewardja0871482006-10-18 12:41:55 +00005460 tl_assert(i >= 0);
sewardj1c0ce7a2009-07-01 08:10:49 +00005461 tl_assert(i < sb_in->stmts_used);
5462 tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
sewardja0871482006-10-18 12:41:55 +00005463
sewardj1c0ce7a2009-07-01 08:10:49 +00005464 for (/* use current i*/; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00005465
sewardj1c0ce7a2009-07-01 08:10:49 +00005466 st = sb_in->stmts[i];
5467 first_stmt = sb_out->stmts_used;
sewardj95448072004-11-22 20:19:51 +00005468
5469 if (verboze) {
sewardj7cf4e6b2008-05-01 20:24:26 +00005470 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00005471 ppIRStmt(st);
sewardj7cf4e6b2008-05-01 20:24:26 +00005472 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00005473 }
5474
sewardj1c0ce7a2009-07-01 08:10:49 +00005475 if (MC_(clo_mc_level) == 3) {
5476 /* See comments on case Ist_CAS below. */
5477 if (st->tag != Ist_CAS)
5478 schemeS( &mce, st );
5479 }
sewardj7cf4e6b2008-05-01 20:24:26 +00005480
sewardj29faa502005-03-16 18:20:21 +00005481 /* Generate instrumentation code for each stmt ... */
5482
sewardj95448072004-11-22 20:19:51 +00005483 switch (st->tag) {
5484
sewardj0b9d74a2006-12-24 02:24:11 +00005485 case Ist_WrTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00005486 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
5487 expr2vbits( &mce, st->Ist.WrTmp.data) );
njn25e49d8e72002-09-23 09:36:25 +00005488 break;
5489
sewardj95448072004-11-22 20:19:51 +00005490 case Ist_Put:
5491 do_shadow_PUT( &mce,
5492 st->Ist.Put.offset,
5493 st->Ist.Put.data,
florian434ffae2012-07-19 17:23:42 +00005494 NULL /* shadow atom */, NULL /* guard */ );
njn25e49d8e72002-09-23 09:36:25 +00005495 break;
5496
sewardj95448072004-11-22 20:19:51 +00005497 case Ist_PutI:
floriand39b0222012-05-31 15:48:13 +00005498 do_shadow_PUTI( &mce, st->Ist.PutI.details);
njn25e49d8e72002-09-23 09:36:25 +00005499 break;
5500
sewardj2e595852005-06-30 23:33:37 +00005501 case Ist_Store:
5502 do_shadow_Store( &mce, st->Ist.Store.end,
5503 st->Ist.Store.addr, 0/* addr bias */,
5504 st->Ist.Store.data,
sewardj1c0ce7a2009-07-01 08:10:49 +00005505 NULL /* shadow data */,
5506 NULL/*guard*/ );
njn25e49d8e72002-09-23 09:36:25 +00005507 break;
5508
sewardj95448072004-11-22 20:19:51 +00005509 case Ist_Exit:
florian434ffae2012-07-19 17:23:42 +00005510 complainIfUndefined( &mce, st->Ist.Exit.guard, NULL );
njn25e49d8e72002-09-23 09:36:25 +00005511 break;
5512
sewardj29faa502005-03-16 18:20:21 +00005513 case Ist_IMark:
sewardj7cf4e6b2008-05-01 20:24:26 +00005514 break;
5515
5516 case Ist_NoOp:
sewardj72d75132007-11-09 23:06:35 +00005517 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00005518 break;
5519
sewardj95448072004-11-22 20:19:51 +00005520 case Ist_Dirty:
5521 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
njn25e49d8e72002-09-23 09:36:25 +00005522 break;
5523
sewardj826ec492005-05-12 18:05:00 +00005524 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00005525 do_AbiHint( &mce, st->Ist.AbiHint.base,
5526 st->Ist.AbiHint.len,
5527 st->Ist.AbiHint.nia );
sewardj826ec492005-05-12 18:05:00 +00005528 break;
5529
sewardj1c0ce7a2009-07-01 08:10:49 +00005530 case Ist_CAS:
5531 do_shadow_CAS( &mce, st->Ist.CAS.details );
5532 /* Note, do_shadow_CAS copies the CAS itself to the output
5533 block, because it needs to add instrumentation both
5534 before and after it. Hence skip the copy below. Also
5535 skip the origin-tracking stuff (call to schemeS) above,
5536 since that's all tangled up with it too; do_shadow_CAS
5537 does it all. */
5538 break;
5539
sewardjdb5907d2009-11-26 17:20:21 +00005540 case Ist_LLSC:
5541 do_shadow_LLSC( &mce,
5542 st->Ist.LLSC.end,
5543 st->Ist.LLSC.result,
5544 st->Ist.LLSC.addr,
5545 st->Ist.LLSC.storedata );
5546 break;
5547
njn25e49d8e72002-09-23 09:36:25 +00005548 default:
sewardj95448072004-11-22 20:19:51 +00005549 VG_(printf)("\n");
5550 ppIRStmt(st);
5551 VG_(printf)("\n");
5552 VG_(tool_panic)("memcheck: unhandled IRStmt");
5553
5554 } /* switch (st->tag) */
5555
sewardj7cf4e6b2008-05-01 20:24:26 +00005556 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005557 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00005558 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00005559 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00005560 VG_(printf)("\n");
5561 }
5562 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00005563 }
sewardj95448072004-11-22 20:19:51 +00005564
sewardj1c0ce7a2009-07-01 08:10:49 +00005565 /* ... and finally copy the stmt itself to the output. Except,
5566 skip the copy of IRCASs; see comments on case Ist_CAS
5567 above. */
5568 if (st->tag != Ist_CAS)
5569 stmt('C', &mce, st);
njn25e49d8e72002-09-23 09:36:25 +00005570 }
njn25e49d8e72002-09-23 09:36:25 +00005571
sewardj95448072004-11-22 20:19:51 +00005572 /* Now we need to complain if the jump target is undefined. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005573 first_stmt = sb_out->stmts_used;
njn25e49d8e72002-09-23 09:36:25 +00005574
sewardj95448072004-11-22 20:19:51 +00005575 if (verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005576 VG_(printf)("sb_in->next = ");
5577 ppIRExpr(sb_in->next);
sewardj95448072004-11-22 20:19:51 +00005578 VG_(printf)("\n\n");
5579 }
njn25e49d8e72002-09-23 09:36:25 +00005580
florian434ffae2012-07-19 17:23:42 +00005581 complainIfUndefined( &mce, sb_in->next, NULL );
njn25e49d8e72002-09-23 09:36:25 +00005582
sewardj7cf4e6b2008-05-01 20:24:26 +00005583 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005584 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00005585 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00005586 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00005587 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00005588 }
sewardj95448072004-11-22 20:19:51 +00005589 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00005590 }
njn25e49d8e72002-09-23 09:36:25 +00005591
sewardj1c0ce7a2009-07-01 08:10:49 +00005592 /* If this fails, there's been some serious snafu with tmp management,
5593 that should be investigated. */
5594 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
5595 VG_(deleteXA)( mce.tmpMap );
5596
5597 tl_assert(mce.sb == sb_out);
5598 return sb_out;
sewardj95448072004-11-22 20:19:51 +00005599}
njn25e49d8e72002-09-23 09:36:25 +00005600
sewardj81651dc2007-08-28 06:05:20 +00005601/*------------------------------------------------------------*/
5602/*--- Post-tree-build final tidying ---*/
5603/*------------------------------------------------------------*/
5604
5605/* This exploits the observation that Memcheck often produces
5606 repeated conditional calls of the form
5607
sewardj7cf4e6b2008-05-01 20:24:26 +00005608 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
sewardj81651dc2007-08-28 06:05:20 +00005609
5610 with the same guard expression G guarding the same helper call.
5611 The second and subsequent calls are redundant. This usually
5612 results from instrumentation of guest code containing multiple
5613 memory references at different constant offsets from the same base
5614 register. After optimisation of the instrumentation, you get a
5615 test for the definedness of the base register for each memory
5616 reference, which is kinda pointless. MC_(final_tidy) therefore
5617 looks for such repeated calls and removes all but the first. */
5618
5619/* A struct for recording which (helper, guard) pairs we have already
5620 seen. */
5621typedef
5622 struct { void* entry; IRExpr* guard; }
5623 Pair;
5624
5625/* Return True if e1 and e2 definitely denote the same value (used to
5626 compare guards). Return False if unknown; False is the safe
5627 answer. Since guest registers and guest memory do not have the
5628 SSA property we must return False if any Gets or Loads appear in
5629 the expression. */
5630
5631static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
5632{
5633 if (e1->tag != e2->tag)
5634 return False;
5635 switch (e1->tag) {
5636 case Iex_Const:
5637 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
5638 case Iex_Binop:
5639 return e1->Iex.Binop.op == e2->Iex.Binop.op
5640 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
5641 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
5642 case Iex_Unop:
5643 return e1->Iex.Unop.op == e2->Iex.Unop.op
5644 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
5645 case Iex_RdTmp:
5646 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
5647 case Iex_Mux0X:
5648 return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
5649 && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
5650 && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX );
5651 case Iex_Qop:
5652 case Iex_Triop:
5653 case Iex_CCall:
5654 /* be lazy. Could define equality for these, but they never
5655 appear to be used. */
5656 return False;
5657 case Iex_Get:
5658 case Iex_GetI:
5659 case Iex_Load:
5660 /* be conservative - these may not give the same value each
5661 time */
5662 return False;
5663 case Iex_Binder:
5664 /* should never see this */
5665 /* fallthrough */
5666 default:
5667 VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
5668 ppIRExpr(e1);
5669 VG_(tool_panic)("memcheck:sameIRValue");
5670 return False;
5671 }
5672}
5673
5674/* See if 'pairs' already has an entry for (entry, guard). Return
5675 True if so. If not, add an entry. */
5676
5677static
5678Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
5679{
5680 Pair p;
5681 Pair* pp;
5682 Int i, n = VG_(sizeXA)( pairs );
5683 for (i = 0; i < n; i++) {
5684 pp = VG_(indexXA)( pairs, i );
5685 if (pp->entry == entry && sameIRValue(pp->guard, guard))
5686 return True;
5687 }
5688 p.guard = guard;
5689 p.entry = entry;
5690 VG_(addToXA)( pairs, &p );
5691 return False;
5692}
5693
florian11f3cc82012-10-21 02:19:35 +00005694static Bool is_helperc_value_checkN_fail ( const HChar* name )
sewardj81651dc2007-08-28 06:05:20 +00005695{
5696 return
sewardj7cf4e6b2008-05-01 20:24:26 +00005697 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
5698 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
5699 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
5700 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
5701 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
5702 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
5703 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
5704 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
sewardj81651dc2007-08-28 06:05:20 +00005705}
5706
5707IRSB* MC_(final_tidy) ( IRSB* sb_in )
5708{
5709 Int i;
5710 IRStmt* st;
5711 IRDirty* di;
5712 IRExpr* guard;
5713 IRCallee* cee;
5714 Bool alreadyPresent;
sewardj9c606bd2008-09-18 18:12:50 +00005715 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
5716 VG_(free), sizeof(Pair) );
sewardj81651dc2007-08-28 06:05:20 +00005717 /* Scan forwards through the statements. Each time a call to one
5718 of the relevant helpers is seen, check if we have made a
5719 previous call to the same helper using the same guard
5720 expression, and if so, delete the call. */
5721 for (i = 0; i < sb_in->stmts_used; i++) {
5722 st = sb_in->stmts[i];
5723 tl_assert(st);
5724 if (st->tag != Ist_Dirty)
5725 continue;
5726 di = st->Ist.Dirty.details;
5727 guard = di->guard;
5728 if (!guard)
5729 continue;
5730 if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
5731 cee = di->cee;
5732 if (!is_helperc_value_checkN_fail( cee->name ))
5733 continue;
5734 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
5735 guard 'guard'. Check if we have already seen a call to this
5736 function with the same guard. If so, delete it. If not,
5737 add it to the set of calls we do know about. */
5738 alreadyPresent = check_or_add( pairs, guard, cee->addr );
5739 if (alreadyPresent) {
5740 sb_in->stmts[i] = IRStmt_NoOp();
5741 if (0) VG_(printf)("XX\n");
5742 }
5743 }
5744 VG_(deleteXA)( pairs );
5745 return sb_in;
5746}
5747
5748
sewardj7cf4e6b2008-05-01 20:24:26 +00005749/*------------------------------------------------------------*/
5750/*--- Origin tracking stuff ---*/
5751/*------------------------------------------------------------*/
5752
sewardj1c0ce7a2009-07-01 08:10:49 +00005753/* Almost identical to findShadowTmpV. */
sewardj7cf4e6b2008-05-01 20:24:26 +00005754static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
5755{
sewardj1c0ce7a2009-07-01 08:10:49 +00005756 TempMapEnt* ent;
5757 /* VG_(indexXA) range-checks 'orig', hence no need to check
5758 here. */
5759 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5760 tl_assert(ent->kind == Orig);
5761 if (ent->shadowB == IRTemp_INVALID) {
5762 IRTemp tmpB
5763 = newTemp( mce, Ity_I32, BSh );
5764 /* newTemp may cause mce->tmpMap to resize, hence previous results
5765 from VG_(indexXA) are invalid. */
5766 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5767 tl_assert(ent->kind == Orig);
5768 tl_assert(ent->shadowB == IRTemp_INVALID);
5769 ent->shadowB = tmpB;
sewardj7cf4e6b2008-05-01 20:24:26 +00005770 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005771 return ent->shadowB;
sewardj7cf4e6b2008-05-01 20:24:26 +00005772}
5773
5774static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
5775{
5776 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
5777}
5778
5779static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5780 IRAtom* baseaddr, Int offset )
5781{
5782 void* hFun;
5783 HChar* hName;
5784 IRTemp bTmp;
5785 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00005786 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00005787 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5788 IRAtom* ea = baseaddr;
5789 if (offset != 0) {
5790 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5791 : mkU64( (Long)(Int)offset );
5792 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5793 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005794 bTmp = newTemp(mce, mce->hWordTy, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00005795
5796 switch (szB) {
5797 case 1: hFun = (void*)&MC_(helperc_b_load1);
5798 hName = "MC_(helperc_b_load1)";
5799 break;
5800 case 2: hFun = (void*)&MC_(helperc_b_load2);
5801 hName = "MC_(helperc_b_load2)";
5802 break;
5803 case 4: hFun = (void*)&MC_(helperc_b_load4);
5804 hName = "MC_(helperc_b_load4)";
5805 break;
5806 case 8: hFun = (void*)&MC_(helperc_b_load8);
5807 hName = "MC_(helperc_b_load8)";
5808 break;
5809 case 16: hFun = (void*)&MC_(helperc_b_load16);
5810 hName = "MC_(helperc_b_load16)";
5811 break;
sewardj45fa9f42012-05-21 10:18:10 +00005812 case 32: hFun = (void*)&MC_(helperc_b_load32);
5813 hName = "MC_(helperc_b_load32)";
5814 break;
sewardj7cf4e6b2008-05-01 20:24:26 +00005815 default:
5816 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
5817 tl_assert(0);
5818 }
5819 di = unsafeIRDirty_1_N(
5820 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
5821 mkIRExprVec_1( ea )
5822 );
5823 /* no need to mess with any annotations. This call accesses
5824 neither guest state nor guest memory. */
5825 stmt( 'B', mce, IRStmt_Dirty(di) );
5826 if (mce->hWordTy == Ity_I64) {
5827 /* 64-bit host */
sewardj1c0ce7a2009-07-01 08:10:49 +00005828 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00005829 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
5830 return mkexpr(bTmp32);
5831 } else {
5832 /* 32-bit host */
5833 return mkexpr(bTmp);
5834 }
5835}
sewardj1c0ce7a2009-07-01 08:10:49 +00005836
florian434ffae2012-07-19 17:23:42 +00005837static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr,
5838 Int offset, IRAtom* guard )
5839{
5840 if (guard) {
5841 IRAtom *cond, *iffalse, *iftrue;
5842
5843 cond = assignNew('B', mce, Ity_I8, unop(Iop_1Uto8, guard));
5844 iftrue = assignNew('B', mce, Ity_I32,
5845 gen_load_b(mce, szB, baseaddr, offset));
5846 iffalse = mkU32(0);
5847
5848 return assignNew('B', mce, Ity_I32, IRExpr_Mux0X(cond, iffalse, iftrue));
5849 }
5850
5851 return gen_load_b(mce, szB, baseaddr, offset);
5852}
5853
sewardj1c0ce7a2009-07-01 08:10:49 +00005854/* Generate a shadow store. guard :: Ity_I1 controls whether the
5855 store really happens; NULL means it unconditionally does. */
sewardj7cf4e6b2008-05-01 20:24:26 +00005856static void gen_store_b ( MCEnv* mce, Int szB,
sewardj1c0ce7a2009-07-01 08:10:49 +00005857 IRAtom* baseaddr, Int offset, IRAtom* dataB,
5858 IRAtom* guard )
sewardj7cf4e6b2008-05-01 20:24:26 +00005859{
5860 void* hFun;
5861 HChar* hName;
5862 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00005863 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00005864 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5865 IRAtom* ea = baseaddr;
sewardj1c0ce7a2009-07-01 08:10:49 +00005866 if (guard) {
5867 tl_assert(isOriginalAtom(mce, guard));
5868 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
5869 }
sewardj7cf4e6b2008-05-01 20:24:26 +00005870 if (offset != 0) {
5871 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5872 : mkU64( (Long)(Int)offset );
5873 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5874 }
5875 if (mce->hWordTy == Ity_I64)
5876 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
5877
5878 switch (szB) {
5879 case 1: hFun = (void*)&MC_(helperc_b_store1);
5880 hName = "MC_(helperc_b_store1)";
5881 break;
5882 case 2: hFun = (void*)&MC_(helperc_b_store2);
5883 hName = "MC_(helperc_b_store2)";
5884 break;
5885 case 4: hFun = (void*)&MC_(helperc_b_store4);
5886 hName = "MC_(helperc_b_store4)";
5887 break;
5888 case 8: hFun = (void*)&MC_(helperc_b_store8);
5889 hName = "MC_(helperc_b_store8)";
5890 break;
5891 case 16: hFun = (void*)&MC_(helperc_b_store16);
5892 hName = "MC_(helperc_b_store16)";
5893 break;
sewardj45fa9f42012-05-21 10:18:10 +00005894 case 32: hFun = (void*)&MC_(helperc_b_store32);
5895 hName = "MC_(helperc_b_store32)";
5896 break;
sewardj7cf4e6b2008-05-01 20:24:26 +00005897 default:
5898 tl_assert(0);
5899 }
5900 di = unsafeIRDirty_0_N( 2/*regparms*/,
5901 hName, VG_(fnptr_to_fnentry)( hFun ),
5902 mkIRExprVec_2( ea, dataB )
5903 );
5904 /* no need to mess with any annotations. This call accesses
5905 neither guest state nor guest memory. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005906 if (guard) di->guard = guard;
sewardj7cf4e6b2008-05-01 20:24:26 +00005907 stmt( 'B', mce, IRStmt_Dirty(di) );
5908}
5909
5910static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005911 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00005912 if (eTy == Ity_I64)
5913 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
5914 if (eTy == Ity_I32)
5915 return e;
5916 tl_assert(0);
5917}
5918
5919static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005920 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00005921 tl_assert(eTy == Ity_I32);
5922 if (dstTy == Ity_I64)
5923 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
5924 tl_assert(0);
5925}
5926
sewardjdb5907d2009-11-26 17:20:21 +00005927
sewardj7cf4e6b2008-05-01 20:24:26 +00005928static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
5929{
5930 tl_assert(MC_(clo_mc_level) == 3);
5931
5932 switch (e->tag) {
5933
5934 case Iex_GetI: {
5935 IRRegArray* descr_b;
5936 IRAtom *t1, *t2, *t3, *t4;
5937 IRRegArray* descr = e->Iex.GetI.descr;
5938 IRType equivIntTy
5939 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
5940 /* If this array is unshadowable for whatever reason, use the
5941 usual approximation. */
5942 if (equivIntTy == Ity_INVALID)
5943 return mkU32(0);
5944 tl_assert(sizeofIRType(equivIntTy) >= 4);
5945 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
5946 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
5947 equivIntTy, descr->nElems );
5948 /* Do a shadow indexed get of the same size, giving t1. Take
5949 the bottom 32 bits of it, giving t2. Compute into t3 the
5950 origin for the index (almost certainly zero, but there's
5951 no harm in being completely general here, since iropt will
5952 remove any useless code), and fold it in, giving a final
5953 value t4. */
5954 t1 = assignNew( 'B', mce, equivIntTy,
5955 IRExpr_GetI( descr_b, e->Iex.GetI.ix,
5956 e->Iex.GetI.bias ));
5957 t2 = narrowTo32( mce, t1 );
5958 t3 = schemeE( mce, e->Iex.GetI.ix );
5959 t4 = gen_maxU32( mce, t2, t3 );
5960 return t4;
5961 }
5962 case Iex_CCall: {
5963 Int i;
5964 IRAtom* here;
5965 IRExpr** args = e->Iex.CCall.args;
5966 IRAtom* curr = mkU32(0);
5967 for (i = 0; args[i]; i++) {
5968 tl_assert(i < 32);
5969 tl_assert(isOriginalAtom(mce, args[i]));
5970 /* Only take notice of this arg if the callee's
5971 mc-exclusion mask does not say it is to be excluded. */
5972 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
5973 /* the arg is to be excluded from definedness checking.
5974 Do nothing. */
5975 if (0) VG_(printf)("excluding %s(%d)\n",
5976 e->Iex.CCall.cee->name, i);
5977 } else {
5978 /* calculate the arg's definedness, and pessimistically
5979 merge it in. */
5980 here = schemeE( mce, args[i] );
5981 curr = gen_maxU32( mce, curr, here );
5982 }
5983 }
5984 return curr;
5985 }
5986 case Iex_Load: {
5987 Int dszB;
5988 dszB = sizeofIRType(e->Iex.Load.ty);
5989 /* assert that the B value for the address is already
5990 available (somewhere) */
5991 tl_assert(isIRAtom(e->Iex.Load.addr));
5992 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
5993 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
5994 }
5995 case Iex_Mux0X: {
5996 IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond );
5997 IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 );
5998 IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX );
5999 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
6000 }
6001 case Iex_Qop: {
floriane2ab2972012-06-01 20:43:03 +00006002 IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 );
6003 IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 );
6004 IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 );
6005 IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 );
sewardj7cf4e6b2008-05-01 20:24:26 +00006006 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
6007 gen_maxU32( mce, b3, b4 ) );
6008 }
6009 case Iex_Triop: {
florian26441742012-06-02 20:30:41 +00006010 IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 );
6011 IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 );
6012 IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 );
sewardj7cf4e6b2008-05-01 20:24:26 +00006013 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
6014 }
6015 case Iex_Binop: {
sewardjafed4c52009-07-12 13:00:17 +00006016 switch (e->Iex.Binop.op) {
6017 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
6018 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
6019 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
6020 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
6021 /* Just say these all produce a defined result,
6022 regardless of their arguments. See
6023 COMMENT_ON_CasCmpEQ in this file. */
6024 return mkU32(0);
6025 default: {
6026 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
6027 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
6028 return gen_maxU32( mce, b1, b2 );
6029 }
6030 }
6031 tl_assert(0);
6032 /*NOTREACHED*/
sewardj7cf4e6b2008-05-01 20:24:26 +00006033 }
6034 case Iex_Unop: {
6035 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
6036 return b1;
6037 }
6038 case Iex_Const:
6039 return mkU32(0);
6040 case Iex_RdTmp:
6041 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
6042 case Iex_Get: {
6043 Int b_offset = MC_(get_otrack_shadow_offset)(
6044 e->Iex.Get.offset,
6045 sizeofIRType(e->Iex.Get.ty)
6046 );
6047 tl_assert(b_offset >= -1
6048 && b_offset <= mce->layout->total_sizeB -4);
6049 if (b_offset >= 0) {
6050 /* FIXME: this isn't an atom! */
6051 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
6052 Ity_I32 );
6053 }
6054 return mkU32(0);
6055 }
6056 default:
6057 VG_(printf)("mc_translate.c: schemeE: unhandled: ");
6058 ppIRExpr(e);
6059 VG_(tool_panic)("memcheck:schemeE");
6060 }
6061}
6062
sewardjdb5907d2009-11-26 17:20:21 +00006063
sewardj7cf4e6b2008-05-01 20:24:26 +00006064static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
6065{
6066 // This is a hacked version of do_shadow_Dirty
sewardj2eecb742012-06-01 16:11:41 +00006067 Int i, k, n, toDo, gSz, gOff;
sewardj7cf4e6b2008-05-01 20:24:26 +00006068 IRAtom *here, *curr;
6069 IRTemp dst;
sewardj7cf4e6b2008-05-01 20:24:26 +00006070
6071 /* First check the guard. */
6072 curr = schemeE( mce, d->guard );
6073
6074 /* Now round up all inputs and maxU32 over them. */
6075
florian434ffae2012-07-19 17:23:42 +00006076 /* Inputs: unmasked args
6077 Note: arguments are evaluated REGARDLESS of the guard expression */
sewardj7cf4e6b2008-05-01 20:24:26 +00006078 for (i = 0; d->args[i]; i++) {
6079 if (d->cee->mcx_mask & (1<<i)) {
6080 /* ignore this arg */
6081 } else {
6082 here = schemeE( mce, d->args[i] );
6083 curr = gen_maxU32( mce, curr, here );
6084 }
6085 }
6086
6087 /* Inputs: guest state that we read. */
6088 for (i = 0; i < d->nFxState; i++) {
6089 tl_assert(d->fxState[i].fx != Ifx_None);
6090 if (d->fxState[i].fx == Ifx_Write)
6091 continue;
6092
sewardj2eecb742012-06-01 16:11:41 +00006093 /* Enumerate the described state segments */
6094 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6095 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6096 gSz = d->fxState[i].size;
sewardj7cf4e6b2008-05-01 20:24:26 +00006097
sewardj2eecb742012-06-01 16:11:41 +00006098 /* Ignore any sections marked as 'always defined'. */
6099 if (isAlwaysDefd(mce, gOff, gSz)) {
6100 if (0)
6101 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
6102 gOff, gSz);
6103 continue;
sewardj7cf4e6b2008-05-01 20:24:26 +00006104 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006105
sewardj2eecb742012-06-01 16:11:41 +00006106 /* This state element is read or modified. So we need to
6107 consider it. If larger than 4 bytes, deal with it in
6108 4-byte chunks. */
6109 while (True) {
6110 Int b_offset;
6111 tl_assert(gSz >= 0);
6112 if (gSz == 0) break;
6113 n = gSz <= 4 ? gSz : 4;
6114 /* update 'curr' with maxU32 of the state slice
6115 gOff .. gOff+n-1 */
6116 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6117 if (b_offset != -1) {
florian434ffae2012-07-19 17:23:42 +00006118 /* Observe the guard expression. If it is false use 0, i.e.
6119 nothing is known about the origin */
6120 IRAtom *cond, *iffalse, *iftrue;
6121
6122 cond = assignNew( 'B', mce, Ity_I8, unop(Iop_1Uto8, d->guard));
6123 iffalse = mkU32(0);
6124 iftrue = assignNew( 'B', mce, Ity_I32,
6125 IRExpr_Get(b_offset
6126 + 2*mce->layout->total_sizeB,
6127 Ity_I32));
6128 here = assignNew( 'B', mce, Ity_I32,
6129 IRExpr_Mux0X(cond, iffalse, iftrue));
sewardj2eecb742012-06-01 16:11:41 +00006130 curr = gen_maxU32( mce, curr, here );
6131 }
6132 gSz -= n;
6133 gOff += n;
6134 }
6135 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006136 }
6137
6138 /* Inputs: memory */
6139
6140 if (d->mFx != Ifx_None) {
6141 /* Because we may do multiple shadow loads/stores from the same
6142 base address, it's best to do a single test of its
6143 definedness right now. Post-instrumentation optimisation
6144 should remove all but this test. */
6145 tl_assert(d->mAddr);
6146 here = schemeE( mce, d->mAddr );
6147 curr = gen_maxU32( mce, curr, here );
6148 }
6149
6150 /* Deal with memory inputs (reads or modifies) */
6151 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006152 toDo = d->mSize;
6153 /* chew off 32-bit chunks. We don't care about the endianness
6154 since it's all going to be condensed down to a single bit,
6155 but nevertheless choose an endianness which is hopefully
6156 native to the platform. */
6157 while (toDo >= 4) {
florian434ffae2012-07-19 17:23:42 +00006158 here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo,
6159 d->guard );
sewardj7cf4e6b2008-05-01 20:24:26 +00006160 curr = gen_maxU32( mce, curr, here );
6161 toDo -= 4;
6162 }
sewardj8c93fcc2008-10-30 13:08:31 +00006163 /* handle possible 16-bit excess */
6164 while (toDo >= 2) {
florian434ffae2012-07-19 17:23:42 +00006165 here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo,
6166 d->guard );
sewardj8c93fcc2008-10-30 13:08:31 +00006167 curr = gen_maxU32( mce, curr, here );
6168 toDo -= 2;
6169 }
floriancda994b2012-06-08 16:01:19 +00006170 /* chew off the remaining 8-bit chunk, if any */
6171 if (toDo == 1) {
florian434ffae2012-07-19 17:23:42 +00006172 here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo,
6173 d->guard );
floriancda994b2012-06-08 16:01:19 +00006174 curr = gen_maxU32( mce, curr, here );
6175 toDo -= 1;
6176 }
6177 tl_assert(toDo == 0);
sewardj7cf4e6b2008-05-01 20:24:26 +00006178 }
6179
6180 /* Whew! So curr is a 32-bit B-value which should give an origin
6181 of some use if any of the inputs to the helper are undefined.
6182 Now we need to re-distribute the results to all destinations. */
6183
6184 /* Outputs: the destination temporary, if there is one. */
6185 if (d->tmp != IRTemp_INVALID) {
6186 dst = findShadowTmpB(mce, d->tmp);
6187 assign( 'V', mce, dst, curr );
6188 }
6189
6190 /* Outputs: guest state that we write or modify. */
6191 for (i = 0; i < d->nFxState; i++) {
6192 tl_assert(d->fxState[i].fx != Ifx_None);
6193 if (d->fxState[i].fx == Ifx_Read)
6194 continue;
6195
sewardj2eecb742012-06-01 16:11:41 +00006196 /* Enumerate the described state segments */
6197 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6198 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6199 gSz = d->fxState[i].size;
sewardj7cf4e6b2008-05-01 20:24:26 +00006200
sewardj2eecb742012-06-01 16:11:41 +00006201 /* Ignore any sections marked as 'always defined'. */
6202 if (isAlwaysDefd(mce, gOff, gSz))
6203 continue;
6204
6205 /* This state element is written or modified. So we need to
6206 consider it. If larger than 4 bytes, deal with it in
6207 4-byte chunks. */
6208 while (True) {
6209 Int b_offset;
6210 tl_assert(gSz >= 0);
6211 if (gSz == 0) break;
6212 n = gSz <= 4 ? gSz : 4;
6213 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
6214 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6215 if (b_offset != -1) {
florian434ffae2012-07-19 17:23:42 +00006216 if (d->guard) {
6217 /* If the guard expression evaluates to false we simply Put
6218 the value that is already stored in the guest state slot */
6219 IRAtom *cond, *iffalse;
6220
6221 cond = assignNew('B', mce, Ity_I8,
6222 unop(Iop_1Uto8, d->guard));
6223 iffalse = assignNew('B', mce, Ity_I32,
6224 IRExpr_Get(b_offset +
6225 2*mce->layout->total_sizeB,
6226 Ity_I32));
6227 curr = assignNew('V', mce, Ity_I32,
6228 IRExpr_Mux0X(cond, iffalse, curr));
6229 }
sewardj2eecb742012-06-01 16:11:41 +00006230 stmt( 'B', mce, IRStmt_Put(b_offset
6231 + 2*mce->layout->total_sizeB,
6232 curr ));
6233 }
6234 gSz -= n;
6235 gOff += n;
sewardj7cf4e6b2008-05-01 20:24:26 +00006236 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006237 }
6238 }
6239
6240 /* Outputs: memory that we write or modify. Same comments about
6241 endianness as above apply. */
6242 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006243 toDo = d->mSize;
6244 /* chew off 32-bit chunks */
6245 while (toDo >= 4) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006246 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
florian434ffae2012-07-19 17:23:42 +00006247 d->guard );
sewardj7cf4e6b2008-05-01 20:24:26 +00006248 toDo -= 4;
6249 }
sewardj8c93fcc2008-10-30 13:08:31 +00006250 /* handle possible 16-bit excess */
6251 while (toDo >= 2) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006252 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
florian434ffae2012-07-19 17:23:42 +00006253 d->guard );
sewardj8c93fcc2008-10-30 13:08:31 +00006254 toDo -= 2;
6255 }
floriancda994b2012-06-08 16:01:19 +00006256 /* chew off the remaining 8-bit chunk, if any */
6257 if (toDo == 1) {
6258 gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr,
florian434ffae2012-07-19 17:23:42 +00006259 d->guard );
floriancda994b2012-06-08 16:01:19 +00006260 toDo -= 1;
6261 }
6262 tl_assert(toDo == 0);
sewardj7cf4e6b2008-05-01 20:24:26 +00006263 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006264}
6265
sewardjdb5907d2009-11-26 17:20:21 +00006266
6267static void do_origins_Store ( MCEnv* mce,
6268 IREndness stEnd,
6269 IRExpr* stAddr,
6270 IRExpr* stData )
6271{
6272 Int dszB;
6273 IRAtom* dataB;
6274 /* assert that the B value for the address is already available
6275 (somewhere), since the call to schemeE will want to see it.
6276 XXXX how does this actually ensure that?? */
6277 tl_assert(isIRAtom(stAddr));
6278 tl_assert(isIRAtom(stData));
6279 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
6280 dataB = schemeE( mce, stData );
6281 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB,
6282 NULL/*guard*/ );
6283}
6284
6285
sewardj7cf4e6b2008-05-01 20:24:26 +00006286static void schemeS ( MCEnv* mce, IRStmt* st )
6287{
6288 tl_assert(MC_(clo_mc_level) == 3);
6289
6290 switch (st->tag) {
6291
6292 case Ist_AbiHint:
6293 /* The value-check instrumenter handles this - by arranging
6294 to pass the address of the next instruction to
6295 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
6296 happen for origin tracking w.r.t. AbiHints. So there is
6297 nothing to do here. */
6298 break;
6299
6300 case Ist_PutI: {
floriand39b0222012-05-31 15:48:13 +00006301 IRPutI *puti = st->Ist.PutI.details;
sewardj7cf4e6b2008-05-01 20:24:26 +00006302 IRRegArray* descr_b;
6303 IRAtom *t1, *t2, *t3, *t4;
floriand39b0222012-05-31 15:48:13 +00006304 IRRegArray* descr = puti->descr;
sewardj7cf4e6b2008-05-01 20:24:26 +00006305 IRType equivIntTy
6306 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
6307 /* If this array is unshadowable for whatever reason,
6308 generate no code. */
6309 if (equivIntTy == Ity_INVALID)
6310 break;
6311 tl_assert(sizeofIRType(equivIntTy) >= 4);
6312 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
6313 descr_b
6314 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
6315 equivIntTy, descr->nElems );
6316 /* Compute a value to Put - the conjoinment of the origin for
6317 the data to be Put-ted (obviously) and of the index value
6318 (not so obviously). */
floriand39b0222012-05-31 15:48:13 +00006319 t1 = schemeE( mce, puti->data );
6320 t2 = schemeE( mce, puti->ix );
sewardj7cf4e6b2008-05-01 20:24:26 +00006321 t3 = gen_maxU32( mce, t1, t2 );
6322 t4 = zWidenFrom32( mce, equivIntTy, t3 );
floriand39b0222012-05-31 15:48:13 +00006323 stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix,
6324 puti->bias, t4) ));
sewardj7cf4e6b2008-05-01 20:24:26 +00006325 break;
6326 }
sewardjdb5907d2009-11-26 17:20:21 +00006327
sewardj7cf4e6b2008-05-01 20:24:26 +00006328 case Ist_Dirty:
6329 do_origins_Dirty( mce, st->Ist.Dirty.details );
6330 break;
sewardjdb5907d2009-11-26 17:20:21 +00006331
6332 case Ist_Store:
6333 do_origins_Store( mce, st->Ist.Store.end,
6334 st->Ist.Store.addr,
6335 st->Ist.Store.data );
6336 break;
6337
6338 case Ist_LLSC: {
6339 /* In short: treat a load-linked like a normal load followed
6340 by an assignment of the loaded (shadow) data the result
6341 temporary. Treat a store-conditional like a normal store,
6342 and mark the result temporary as defined. */
6343 if (st->Ist.LLSC.storedata == NULL) {
6344 /* Load Linked */
6345 IRType resTy
6346 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
6347 IRExpr* vanillaLoad
6348 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
6349 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
6350 || resTy == Ity_I16 || resTy == Ity_I8);
6351 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
6352 schemeE(mce, vanillaLoad));
6353 } else {
6354 /* Store conditional */
6355 do_origins_Store( mce, st->Ist.LLSC.end,
6356 st->Ist.LLSC.addr,
6357 st->Ist.LLSC.storedata );
6358 /* For the rationale behind this, see comments at the
6359 place where the V-shadow for .result is constructed, in
6360 do_shadow_LLSC. In short, we regard .result as
6361 always-defined. */
6362 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
6363 mkU32(0) );
sewardj1c0ce7a2009-07-01 08:10:49 +00006364 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006365 break;
6366 }
sewardjdb5907d2009-11-26 17:20:21 +00006367
sewardj7cf4e6b2008-05-01 20:24:26 +00006368 case Ist_Put: {
6369 Int b_offset
6370 = MC_(get_otrack_shadow_offset)(
6371 st->Ist.Put.offset,
sewardj1c0ce7a2009-07-01 08:10:49 +00006372 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
sewardj7cf4e6b2008-05-01 20:24:26 +00006373 );
6374 if (b_offset >= 0) {
6375 /* FIXME: this isn't an atom! */
6376 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
6377 schemeE( mce, st->Ist.Put.data )) );
6378 }
6379 break;
6380 }
sewardjdb5907d2009-11-26 17:20:21 +00006381
sewardj7cf4e6b2008-05-01 20:24:26 +00006382 case Ist_WrTmp:
6383 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
6384 schemeE(mce, st->Ist.WrTmp.data) );
6385 break;
sewardjdb5907d2009-11-26 17:20:21 +00006386
sewardj7cf4e6b2008-05-01 20:24:26 +00006387 case Ist_MBE:
6388 case Ist_NoOp:
6389 case Ist_Exit:
6390 case Ist_IMark:
6391 break;
sewardjdb5907d2009-11-26 17:20:21 +00006392
sewardj7cf4e6b2008-05-01 20:24:26 +00006393 default:
6394 VG_(printf)("mc_translate.c: schemeS: unhandled: ");
6395 ppIRStmt(st);
6396 VG_(tool_panic)("memcheck:schemeS");
6397 }
6398}
6399
6400
njn25e49d8e72002-09-23 09:36:25 +00006401/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00006402/*--- end mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00006403/*--------------------------------------------------------------------*/