blob: 44a3a0b7dc1796aa36fcf0a32a3d994067cf08ce [file] [log] [blame]
nethercotebb1c9912004-01-04 16:43:23 +00001
njn25e49d8e72002-09-23 09:36:25 +00002/*--------------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00003/*--- Instrument IR to perform memory checking operations. ---*/
njn25cac76cb2002-09-23 11:21:57 +00004/*--- mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00005/*--------------------------------------------------------------------*/
njnc9539842002-10-02 13:26:35 +00006
njn25e49d8e72002-09-23 09:36:25 +00007/*
nethercote137bc552003-11-14 17:47:54 +00008 This file is part of MemCheck, a heavyweight Valgrind tool for
njnc9539842002-10-02 13:26:35 +00009 detecting memory errors.
njn25e49d8e72002-09-23 09:36:25 +000010
sewardj03f8d3f2012-08-05 15:46:46 +000011 Copyright (C) 2000-2012 Julian Seward
njn25e49d8e72002-09-23 09:36:25 +000012 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
njnc7561b92005-06-19 01:24:32 +000032#include "pub_tool_basics.h"
philippe6643e962012-01-17 21:16:30 +000033#include "pub_tool_poolalloc.h" // For mc_include.h
njn1d0825f2006-03-27 11:37:07 +000034#include "pub_tool_hashtable.h" // For mc_include.h
njn132bfcc2005-06-04 19:16:06 +000035#include "pub_tool_libcassert.h"
njn36a20fa2005-06-03 03:08:39 +000036#include "pub_tool_libcprint.h"
njnc7561b92005-06-19 01:24:32 +000037#include "pub_tool_tooliface.h"
sewardj53ee1fc2005-12-23 02:29:58 +000038#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
sewardj81651dc2007-08-28 06:05:20 +000039#include "pub_tool_xarray.h"
40#include "pub_tool_mallocfree.h"
41#include "pub_tool_libcbase.h"
njn25e49d8e72002-09-23 09:36:25 +000042
sewardj7cf4e6b2008-05-01 20:24:26 +000043#include "mc_include.h"
44
45
sewardj7ee7d852011-06-16 11:37:21 +000046/* FIXMEs JRS 2011-June-16.
47
48 Check the interpretation for vector narrowing and widening ops,
49 particularly the saturating ones. I suspect they are either overly
50 pessimistic and/or wrong.
51*/
52
sewardj992dff92005-10-07 11:08:55 +000053/* This file implements the Memcheck instrumentation, and in
54 particular contains the core of its undefined value detection
55 machinery. For a comprehensive background of the terminology,
56 algorithms and rationale used herein, read:
57
58 Using Valgrind to detect undefined value errors with
59 bit-precision
60
61 Julian Seward and Nicholas Nethercote
62
63 2005 USENIX Annual Technical Conference (General Track),
64 Anaheim, CA, USA, April 10-15, 2005.
njn6665ea22007-05-24 23:14:41 +000065
66 ----
67
68 Here is as good a place as any to record exactly when V bits are and
69 should be checked, why, and what function is responsible.
70
71
72 Memcheck complains when an undefined value is used:
73
74 1. In the condition of a conditional branch. Because it could cause
75 incorrect control flow, and thus cause incorrect externally-visible
76 behaviour. [mc_translate.c:complainIfUndefined]
77
78 2. As an argument to a system call, or as the value that specifies
79 the system call number. Because it could cause an incorrect
80 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
81
82 3. As the address in a load or store. Because it could cause an
83 incorrect value to be used later, which could cause externally-visible
84 behaviour (eg. via incorrect control flow or an incorrect system call
85 argument) [complainIfUndefined]
86
87 4. As the target address of a branch. Because it could cause incorrect
88 control flow. [complainIfUndefined]
89
90 5. As an argument to setenv, unsetenv, or putenv. Because it could put
91 an incorrect value into the external environment.
92 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
93
94 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
95 [complainIfUndefined]
96
97 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
98 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
99 requested it. [in memcheck.h]
100
101
102 Memcheck also complains, but should not, when an undefined value is used:
103
104 8. As the shift value in certain SIMD shift operations (but not in the
105 standard integer shift operations). This inconsistency is due to
106 historical reasons.) [complainIfUndefined]
107
108
109 Memcheck does not complain, but should, when an undefined value is used:
110
111 9. As an input to a client request. Because the client request may
112 affect the visible behaviour -- see bug #144362 for an example
113 involving the malloc replacements in vg_replace_malloc.c and
114 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
115 isn't identified. That bug report also has some info on how to solve
116 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
117
118
119 In practice, 1 and 2 account for the vast majority of cases.
sewardj992dff92005-10-07 11:08:55 +0000120*/
121
sewardj95448072004-11-22 20:19:51 +0000122/*------------------------------------------------------------*/
123/*--- Forward decls ---*/
124/*------------------------------------------------------------*/
125
126struct _MCEnv;
127
sewardj7cf4e6b2008-05-01 20:24:26 +0000128static IRType shadowTypeV ( IRType ty );
sewardj95448072004-11-22 20:19:51 +0000129static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
sewardjafa617b2008-07-22 09:59:48 +0000130static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
sewardj95448072004-11-22 20:19:51 +0000131
sewardjb5b87402011-03-07 16:05:35 +0000132static IRExpr *i128_const_zero(void);
sewardj95448072004-11-22 20:19:51 +0000133
134/*------------------------------------------------------------*/
135/*--- Memcheck running state, and tmp management. ---*/
136/*------------------------------------------------------------*/
137
sewardj1c0ce7a2009-07-01 08:10:49 +0000138/* Carries info about a particular tmp. The tmp's number is not
139 recorded, as this is implied by (equal to) its index in the tmpMap
140 in MCEnv. The tmp's type is also not recorded, as this is present
141 in MCEnv.sb->tyenv.
142
143 When .kind is Orig, .shadowV and .shadowB may give the identities
144 of the temps currently holding the associated definedness (shadowV)
145 and origin (shadowB) values, or these may be IRTemp_INVALID if code
146 to compute such values has not yet been emitted.
147
148 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
149 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
150 illogical for a shadow tmp itself to be shadowed.
151*/
152typedef
153 enum { Orig=1, VSh=2, BSh=3 }
154 TempKind;
155
156typedef
157 struct {
158 TempKind kind;
159 IRTemp shadowV;
160 IRTemp shadowB;
161 }
162 TempMapEnt;
163
164
sewardj95448072004-11-22 20:19:51 +0000165/* Carries around state during memcheck instrumentation. */
166typedef
167 struct _MCEnv {
sewardj0b9d74a2006-12-24 02:24:11 +0000168 /* MODIFIED: the superblock being constructed. IRStmts are
169 added. */
sewardj1c0ce7a2009-07-01 08:10:49 +0000170 IRSB* sb;
sewardj7cf4e6b2008-05-01 20:24:26 +0000171 Bool trace;
sewardj95448072004-11-22 20:19:51 +0000172
sewardj1c0ce7a2009-07-01 08:10:49 +0000173 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
174 current kind and possibly shadow temps for each temp in the
175 IRSB being constructed. Note that it does not contain the
176 type of each tmp. If you want to know the type, look at the
177 relevant entry in sb->tyenv. It follows that at all times
178 during the instrumentation process, the valid indices for
179 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
180 total number of Orig, V- and B- temps allocated so far.
181
182 The reason for this strange split (types in one place, all
183 other info in another) is that we need the types to be
184 attached to sb so as to make it possible to do
185 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
186 instrumentation process. */
187 XArray* /* of TempMapEnt */ tmpMap;
sewardj95448072004-11-22 20:19:51 +0000188
sewardjd5204dc2004-12-31 01:16:11 +0000189 /* MODIFIED: indicates whether "bogus" literals have so far been
190 found. Starts off False, and may change to True. */
sewardj54eac252012-03-27 10:19:39 +0000191 Bool bogusLiterals;
192
193 /* READONLY: indicates whether we should use expensive
194 interpretations of integer adds, since unfortunately LLVM
195 uses them to do ORs in some circumstances. Defaulted to True
196 on MacOS and False everywhere else. */
197 Bool useLLVMworkarounds;
sewardjd5204dc2004-12-31 01:16:11 +0000198
sewardj95448072004-11-22 20:19:51 +0000199 /* READONLY: the guest layout. This indicates which parts of
200 the guest state should be regarded as 'always defined'. */
201 VexGuestLayout* layout;
sewardj634ba772006-10-15 12:47:37 +0000202
sewardj95448072004-11-22 20:19:51 +0000203 /* READONLY: the host word type. Needed for constructing
204 arguments of type 'HWord' to be passed to helper functions.
205 Ity_I32 or Ity_I64 only. */
206 IRType hWordTy;
207 }
208 MCEnv;
209
210/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
211 demand), as they are encountered. This is for two reasons.
212
213 (1) (less important reason): Many original tmps are unused due to
214 initial IR optimisation, and we do not want to spaces in tables
215 tracking them.
216
217 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
218 table indexed [0 .. n_types-1], which gives the current shadow for
219 each original tmp, or INVALID_IRTEMP if none is so far assigned.
220 It is necessary to support making multiple assignments to a shadow
221 -- specifically, after testing a shadow for definedness, it needs
222 to be made defined. But IR's SSA property disallows this.
223
224 (2) (more important reason): Therefore, when a shadow needs to get
225 a new value, a new temporary is created, the value is assigned to
226 that, and the tmpMap is updated to reflect the new binding.
227
228 A corollary is that if the tmpMap maps a given tmp to
sewardjf1962d32006-10-19 13:22:16 +0000229 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
sewardj95448072004-11-22 20:19:51 +0000230 there's a read-before-write error in the original tmps. The IR
231 sanity checker should catch all such anomalies, however.
njn25e49d8e72002-09-23 09:36:25 +0000232*/
sewardj95448072004-11-22 20:19:51 +0000233
sewardj1c0ce7a2009-07-01 08:10:49 +0000234/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
235 both the table in mce->sb and to our auxiliary mapping. Note that
236 newTemp may cause mce->tmpMap to resize, hence previous results
237 from VG_(indexXA)(mce->tmpMap) are invalidated. */
238static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
239{
240 Word newIx;
241 TempMapEnt ent;
242 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
243 ent.kind = kind;
244 ent.shadowV = IRTemp_INVALID;
245 ent.shadowB = IRTemp_INVALID;
246 newIx = VG_(addToXA)( mce->tmpMap, &ent );
247 tl_assert(newIx == (Word)tmp);
248 return tmp;
249}
250
251
sewardj95448072004-11-22 20:19:51 +0000252/* Find the tmp currently shadowing the given original tmp. If none
253 so far exists, allocate one. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000254static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000255{
sewardj1c0ce7a2009-07-01 08:10:49 +0000256 TempMapEnt* ent;
257 /* VG_(indexXA) range-checks 'orig', hence no need to check
258 here. */
259 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
260 tl_assert(ent->kind == Orig);
261 if (ent->shadowV == IRTemp_INVALID) {
262 IRTemp tmpV
263 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
264 /* newTemp may cause mce->tmpMap to resize, hence previous results
265 from VG_(indexXA) are invalid. */
266 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
267 tl_assert(ent->kind == Orig);
268 tl_assert(ent->shadowV == IRTemp_INVALID);
269 ent->shadowV = tmpV;
njn25e49d8e72002-09-23 09:36:25 +0000270 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000271 return ent->shadowV;
njn25e49d8e72002-09-23 09:36:25 +0000272}
273
sewardj95448072004-11-22 20:19:51 +0000274/* Allocate a new shadow for the given original tmp. This means any
275 previous shadow is abandoned. This is needed because it is
276 necessary to give a new value to a shadow once it has been tested
277 for undefinedness, but unfortunately IR's SSA property disallows
278 this. Instead we must abandon the old shadow, allocate a new one
sewardj1c0ce7a2009-07-01 08:10:49 +0000279 and use that instead.
280
281 This is the same as findShadowTmpV, except we don't bother to see
282 if a shadow temp already existed -- we simply allocate a new one
283 regardless. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000284static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
njn25e49d8e72002-09-23 09:36:25 +0000285{
sewardj1c0ce7a2009-07-01 08:10:49 +0000286 TempMapEnt* ent;
287 /* VG_(indexXA) range-checks 'orig', hence no need to check
288 here. */
289 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
290 tl_assert(ent->kind == Orig);
291 if (1) {
292 IRTemp tmpV
293 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
294 /* newTemp may cause mce->tmpMap to resize, hence previous results
295 from VG_(indexXA) are invalid. */
296 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
297 tl_assert(ent->kind == Orig);
298 ent->shadowV = tmpV;
299 }
sewardj95448072004-11-22 20:19:51 +0000300}
301
302
303/*------------------------------------------------------------*/
304/*--- IRAtoms -- a subset of IRExprs ---*/
305/*------------------------------------------------------------*/
306
307/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
sewardj710d6c22005-03-20 18:55:15 +0000308 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
sewardj95448072004-11-22 20:19:51 +0000309 input, most of this code deals in atoms. Usefully, a value atom
310 always has a V-value which is also an atom: constants are shadowed
311 by constants, and temps are shadowed by the corresponding shadow
312 temporary. */
313
314typedef IRExpr IRAtom;
315
316/* (used for sanity checks only): is this an atom which looks
317 like it's from original code? */
318static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
319{
320 if (a1->tag == Iex_Const)
321 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000322 if (a1->tag == Iex_RdTmp) {
323 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
324 return ent->kind == Orig;
325 }
sewardj95448072004-11-22 20:19:51 +0000326 return False;
327}
328
329/* (used for sanity checks only): is this an atom which looks
330 like it's from shadow code? */
331static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
332{
333 if (a1->tag == Iex_Const)
334 return True;
sewardj1c0ce7a2009-07-01 08:10:49 +0000335 if (a1->tag == Iex_RdTmp) {
336 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
337 return ent->kind == VSh || ent->kind == BSh;
338 }
sewardj95448072004-11-22 20:19:51 +0000339 return False;
340}
341
342/* (used for sanity checks only): check that both args are atoms and
343 are identically-kinded. */
344static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
345{
sewardj0b9d74a2006-12-24 02:24:11 +0000346 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +0000347 return True;
sewardjbef552a2005-08-30 12:54:36 +0000348 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
sewardj95448072004-11-22 20:19:51 +0000349 return True;
350 return False;
351}
352
353
354/*------------------------------------------------------------*/
355/*--- Type management ---*/
356/*------------------------------------------------------------*/
357
358/* Shadow state is always accessed using integer types. This returns
359 an integer type with the same size (as per sizeofIRType) as the
360 given type. The only valid shadow types are Bit, I8, I16, I32,
sewardj45fa9f42012-05-21 10:18:10 +0000361 I64, I128, V128, V256. */
sewardj95448072004-11-22 20:19:51 +0000362
sewardj7cf4e6b2008-05-01 20:24:26 +0000363static IRType shadowTypeV ( IRType ty )
sewardj95448072004-11-22 20:19:51 +0000364{
365 switch (ty) {
366 case Ity_I1:
367 case Ity_I8:
368 case Ity_I16:
369 case Ity_I32:
sewardj6cf40ff2005-04-20 22:31:26 +0000370 case Ity_I64:
371 case Ity_I128: return ty;
sewardj3245c912004-12-10 14:58:26 +0000372 case Ity_F32: return Ity_I32;
sewardjb0ccb4d2012-04-02 10:22:05 +0000373 case Ity_D32: return Ity_I32;
sewardj3245c912004-12-10 14:58:26 +0000374 case Ity_F64: return Ity_I64;
sewardjb0ccb4d2012-04-02 10:22:05 +0000375 case Ity_D64: return Ity_I64;
sewardjb5b87402011-03-07 16:05:35 +0000376 case Ity_F128: return Ity_I128;
sewardjb0ccb4d2012-04-02 10:22:05 +0000377 case Ity_D128: return Ity_I128;
sewardj3245c912004-12-10 14:58:26 +0000378 case Ity_V128: return Ity_V128;
sewardj45fa9f42012-05-21 10:18:10 +0000379 case Ity_V256: return Ity_V256;
sewardj95448072004-11-22 20:19:51 +0000380 default: ppIRType(ty);
sewardj7cf4e6b2008-05-01 20:24:26 +0000381 VG_(tool_panic)("memcheck:shadowTypeV");
sewardj95448072004-11-22 20:19:51 +0000382 }
383}
384
385/* Produce a 'defined' value of the given shadow type. Should only be
386 supplied shadow types (Bit/I8/I16/I32/UI64). */
387static IRExpr* definedOfType ( IRType ty ) {
388 switch (ty) {
sewardj170ee212004-12-10 18:57:51 +0000389 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
390 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
391 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
392 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
393 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
sewardjb5b87402011-03-07 16:05:35 +0000394 case Ity_I128: return i128_const_zero();
sewardj170ee212004-12-10 18:57:51 +0000395 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
sewardjf1962d32006-10-19 13:22:16 +0000396 default: VG_(tool_panic)("memcheck:definedOfType");
njn25e49d8e72002-09-23 09:36:25 +0000397 }
398}
399
400
sewardj95448072004-11-22 20:19:51 +0000401/*------------------------------------------------------------*/
402/*--- Constructing IR fragments ---*/
403/*------------------------------------------------------------*/
404
sewardj95448072004-11-22 20:19:51 +0000405/* add stmt to a bb */
sewardj7cf4e6b2008-05-01 20:24:26 +0000406static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
407 if (mce->trace) {
408 VG_(printf)(" %c: ", cat);
409 ppIRStmt(st);
410 VG_(printf)("\n");
411 }
sewardj1c0ce7a2009-07-01 08:10:49 +0000412 addStmtToIRSB(mce->sb, st);
sewardj7cf4e6b2008-05-01 20:24:26 +0000413}
414
415/* assign value to tmp */
416static inline
417void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
sewardj1c0ce7a2009-07-01 08:10:49 +0000418 stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
sewardj7cf4e6b2008-05-01 20:24:26 +0000419}
sewardj95448072004-11-22 20:19:51 +0000420
421/* build various kinds of expressions */
sewardj57f92b02010-08-22 11:54:14 +0000422#define triop(_op, _arg1, _arg2, _arg3) \
423 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
sewardj95448072004-11-22 20:19:51 +0000424#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
425#define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
426#define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
427#define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
428#define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
429#define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
sewardj170ee212004-12-10 18:57:51 +0000430#define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
sewardj0b9d74a2006-12-24 02:24:11 +0000431#define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
sewardj95448072004-11-22 20:19:51 +0000432
sewardj7cf4e6b2008-05-01 20:24:26 +0000433/* Bind the given expression to a new temporary, and return the
sewardj95448072004-11-22 20:19:51 +0000434 temporary. This effectively converts an arbitrary expression into
sewardj7cf4e6b2008-05-01 20:24:26 +0000435 an atom.
436
437 'ty' is the type of 'e' and hence the type that the new temporary
sewardj1c0ce7a2009-07-01 08:10:49 +0000438 needs to be. But passing it in is redundant, since we can deduce
439 the type merely by inspecting 'e'. So at least use that fact to
440 assert that the two types agree. */
441static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
442{
443 TempKind k;
444 IRTemp t;
445 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
sewardjb0ccb4d2012-04-02 10:22:05 +0000446
sewardj7cf4e6b2008-05-01 20:24:26 +0000447 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
sewardj1c0ce7a2009-07-01 08:10:49 +0000448 switch (cat) {
449 case 'V': k = VSh; break;
450 case 'B': k = BSh; break;
451 case 'C': k = Orig; break;
452 /* happens when we are making up new "orig"
453 expressions, for IRCAS handling */
454 default: tl_assert(0);
455 }
456 t = newTemp(mce, ty, k);
sewardj7cf4e6b2008-05-01 20:24:26 +0000457 assign(cat, mce, t, e);
sewardj95448072004-11-22 20:19:51 +0000458 return mkexpr(t);
459}
460
461
462/*------------------------------------------------------------*/
sewardjb5b87402011-03-07 16:05:35 +0000463/*--- Helper functions for 128-bit ops ---*/
464/*------------------------------------------------------------*/
sewardj45fa9f42012-05-21 10:18:10 +0000465
sewardjb5b87402011-03-07 16:05:35 +0000466static IRExpr *i128_const_zero(void)
467{
sewardj45fa9f42012-05-21 10:18:10 +0000468 IRAtom* z64 = IRExpr_Const(IRConst_U64(0));
469 return binop(Iop_64HLto128, z64, z64);
sewardjb5b87402011-03-07 16:05:35 +0000470}
471
sewardj45fa9f42012-05-21 10:18:10 +0000472/* There are no I128-bit loads and/or stores [as generated by any
473 current front ends]. So we do not need to worry about that in
474 expr2vbits_Load */
475
sewardjb5b87402011-03-07 16:05:35 +0000476
477/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +0000478/*--- Constructing definedness primitive ops ---*/
479/*------------------------------------------------------------*/
480
481/* --------- Defined-if-either-defined --------- */
482
483static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
484 tl_assert(isShadowAtom(mce,a1));
485 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000486 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000487}
488
489static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
490 tl_assert(isShadowAtom(mce,a1));
491 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000492 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000493}
494
495static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
496 tl_assert(isShadowAtom(mce,a1));
497 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000498 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000499}
500
sewardj7010f6e2004-12-10 13:35:22 +0000501static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
502 tl_assert(isShadowAtom(mce,a1));
503 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000504 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
sewardj7010f6e2004-12-10 13:35:22 +0000505}
506
sewardj20d38f22005-02-07 23:50:18 +0000507static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj170ee212004-12-10 18:57:51 +0000508 tl_assert(isShadowAtom(mce,a1));
509 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000510 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
sewardj170ee212004-12-10 18:57:51 +0000511}
512
sewardj350e8f72012-06-25 07:52:15 +0000513static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
514 tl_assert(isShadowAtom(mce,a1));
515 tl_assert(isShadowAtom(mce,a2));
516 return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2));
517}
518
sewardj95448072004-11-22 20:19:51 +0000519/* --------- Undefined-if-either-undefined --------- */
520
521static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
522 tl_assert(isShadowAtom(mce,a1));
523 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000524 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000525}
526
527static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
528 tl_assert(isShadowAtom(mce,a1));
529 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000530 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000531}
532
533static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
534 tl_assert(isShadowAtom(mce,a1));
535 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000536 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000537}
538
539static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
540 tl_assert(isShadowAtom(mce,a1));
541 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000542 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
sewardj95448072004-11-22 20:19:51 +0000543}
544
sewardjb5b87402011-03-07 16:05:35 +0000545static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
546 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
547 tl_assert(isShadowAtom(mce,a1));
548 tl_assert(isShadowAtom(mce,a2));
549 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
550 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
551 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
552 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
553 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
554 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
555
556 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
557}
558
sewardj20d38f22005-02-07 23:50:18 +0000559static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
sewardj3245c912004-12-10 14:58:26 +0000560 tl_assert(isShadowAtom(mce,a1));
561 tl_assert(isShadowAtom(mce,a2));
sewardj7cf4e6b2008-05-01 20:24:26 +0000562 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
sewardj3245c912004-12-10 14:58:26 +0000563}
564
sewardj350e8f72012-06-25 07:52:15 +0000565static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
566 tl_assert(isShadowAtom(mce,a1));
567 tl_assert(isShadowAtom(mce,a2));
568 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2));
569}
570
sewardje50a1b12004-12-17 01:24:54 +0000571static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
sewardj95448072004-11-22 20:19:51 +0000572 switch (vty) {
sewardje50a1b12004-12-17 01:24:54 +0000573 case Ity_I8: return mkUifU8(mce, a1, a2);
sewardja1d93302004-12-12 16:45:06 +0000574 case Ity_I16: return mkUifU16(mce, a1, a2);
575 case Ity_I32: return mkUifU32(mce, a1, a2);
576 case Ity_I64: return mkUifU64(mce, a1, a2);
sewardjb5b87402011-03-07 16:05:35 +0000577 case Ity_I128: return mkUifU128(mce, a1, a2);
sewardj20d38f22005-02-07 23:50:18 +0000578 case Ity_V128: return mkUifUV128(mce, a1, a2);
sewardj95448072004-11-22 20:19:51 +0000579 default:
580 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
581 VG_(tool_panic)("memcheck:mkUifU");
njn25e49d8e72002-09-23 09:36:25 +0000582 }
583}
584
sewardj95448072004-11-22 20:19:51 +0000585/* --------- The Left-family of operations. --------- */
njn25e49d8e72002-09-23 09:36:25 +0000586
sewardj95448072004-11-22 20:19:51 +0000587static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
588 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000589 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
sewardj95448072004-11-22 20:19:51 +0000590}
591
592static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
593 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000594 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
sewardj95448072004-11-22 20:19:51 +0000595}
596
597static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
598 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000599 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
sewardj95448072004-11-22 20:19:51 +0000600}
601
sewardj681be302005-01-15 20:43:58 +0000602static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
603 tl_assert(isShadowAtom(mce,a1));
sewardj7cf4e6b2008-05-01 20:24:26 +0000604 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
sewardj681be302005-01-15 20:43:58 +0000605}
606
sewardj95448072004-11-22 20:19:51 +0000607/* --------- 'Improvement' functions for AND/OR. --------- */
608
609/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
610 defined (0); all other -> undefined (1).
611*/
612static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
njn25e49d8e72002-09-23 09:36:25 +0000613{
sewardj95448072004-11-22 20:19:51 +0000614 tl_assert(isOriginalAtom(mce, data));
615 tl_assert(isShadowAtom(mce, vbits));
616 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000617 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000618}
njn25e49d8e72002-09-23 09:36:25 +0000619
sewardj95448072004-11-22 20:19:51 +0000620static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
621{
622 tl_assert(isOriginalAtom(mce, data));
623 tl_assert(isShadowAtom(mce, vbits));
624 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000625 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000626}
njn25e49d8e72002-09-23 09:36:25 +0000627
sewardj95448072004-11-22 20:19:51 +0000628static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
629{
630 tl_assert(isOriginalAtom(mce, data));
631 tl_assert(isShadowAtom(mce, vbits));
632 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000633 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
sewardj95448072004-11-22 20:19:51 +0000634}
njn25e49d8e72002-09-23 09:36:25 +0000635
sewardj7010f6e2004-12-10 13:35:22 +0000636static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
637{
638 tl_assert(isOriginalAtom(mce, data));
639 tl_assert(isShadowAtom(mce, vbits));
640 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000641 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
sewardj7010f6e2004-12-10 13:35:22 +0000642}
643
sewardj20d38f22005-02-07 23:50:18 +0000644static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000645{
646 tl_assert(isOriginalAtom(mce, data));
647 tl_assert(isShadowAtom(mce, vbits));
648 tl_assert(sameKindedAtoms(data, vbits));
sewardj7cf4e6b2008-05-01 20:24:26 +0000649 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
sewardj170ee212004-12-10 18:57:51 +0000650}
651
sewardj350e8f72012-06-25 07:52:15 +0000652static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
653{
654 tl_assert(isOriginalAtom(mce, data));
655 tl_assert(isShadowAtom(mce, vbits));
656 tl_assert(sameKindedAtoms(data, vbits));
657 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits));
658}
659
sewardj95448072004-11-22 20:19:51 +0000660/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
661 defined (0); all other -> undefined (1).
662*/
663static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
664{
665 tl_assert(isOriginalAtom(mce, data));
666 tl_assert(isShadowAtom(mce, vbits));
667 tl_assert(sameKindedAtoms(data, vbits));
668 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000669 'V', mce, Ity_I8,
sewardj95448072004-11-22 20:19:51 +0000670 binop(Iop_Or8,
sewardj7cf4e6b2008-05-01 20:24:26 +0000671 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
sewardj95448072004-11-22 20:19:51 +0000672 vbits) );
673}
njn25e49d8e72002-09-23 09:36:25 +0000674
sewardj95448072004-11-22 20:19:51 +0000675static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
676{
677 tl_assert(isOriginalAtom(mce, data));
678 tl_assert(isShadowAtom(mce, vbits));
679 tl_assert(sameKindedAtoms(data, vbits));
680 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000681 'V', mce, Ity_I16,
sewardj95448072004-11-22 20:19:51 +0000682 binop(Iop_Or16,
sewardj7cf4e6b2008-05-01 20:24:26 +0000683 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
sewardj95448072004-11-22 20:19:51 +0000684 vbits) );
685}
njn25e49d8e72002-09-23 09:36:25 +0000686
sewardj95448072004-11-22 20:19:51 +0000687static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
688{
689 tl_assert(isOriginalAtom(mce, data));
690 tl_assert(isShadowAtom(mce, vbits));
691 tl_assert(sameKindedAtoms(data, vbits));
692 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000693 'V', mce, Ity_I32,
sewardj95448072004-11-22 20:19:51 +0000694 binop(Iop_Or32,
sewardj7cf4e6b2008-05-01 20:24:26 +0000695 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
sewardj95448072004-11-22 20:19:51 +0000696 vbits) );
697}
698
sewardj7010f6e2004-12-10 13:35:22 +0000699static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
700{
701 tl_assert(isOriginalAtom(mce, data));
702 tl_assert(isShadowAtom(mce, vbits));
703 tl_assert(sameKindedAtoms(data, vbits));
704 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000705 'V', mce, Ity_I64,
sewardj7010f6e2004-12-10 13:35:22 +0000706 binop(Iop_Or64,
sewardj7cf4e6b2008-05-01 20:24:26 +0000707 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
sewardj7010f6e2004-12-10 13:35:22 +0000708 vbits) );
709}
710
sewardj20d38f22005-02-07 23:50:18 +0000711static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
sewardj170ee212004-12-10 18:57:51 +0000712{
713 tl_assert(isOriginalAtom(mce, data));
714 tl_assert(isShadowAtom(mce, vbits));
715 tl_assert(sameKindedAtoms(data, vbits));
716 return assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000717 'V', mce, Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +0000718 binop(Iop_OrV128,
sewardj7cf4e6b2008-05-01 20:24:26 +0000719 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
sewardj170ee212004-12-10 18:57:51 +0000720 vbits) );
721}
722
sewardj350e8f72012-06-25 07:52:15 +0000723static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
724{
725 tl_assert(isOriginalAtom(mce, data));
726 tl_assert(isShadowAtom(mce, vbits));
727 tl_assert(sameKindedAtoms(data, vbits));
728 return assignNew(
729 'V', mce, Ity_V256,
730 binop(Iop_OrV256,
731 assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)),
732 vbits) );
733}
734
sewardj95448072004-11-22 20:19:51 +0000735/* --------- Pessimising casts. --------- */
736
sewardjb5b87402011-03-07 16:05:35 +0000737/* The function returns an expression of type DST_TY. If any of the VBITS
738 is undefined (value == 1) the resulting expression has all bits set to
739 1. Otherwise, all bits are 0. */
740
sewardj95448072004-11-22 20:19:51 +0000741static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
742{
sewardj4cc684b2007-08-25 23:09:36 +0000743 IRType src_ty;
sewardj7cf97ee2004-11-28 14:25:01 +0000744 IRAtom* tmp1;
sewardj2eecb742012-06-01 16:11:41 +0000745
sewardj95448072004-11-22 20:19:51 +0000746 /* Note, dst_ty is a shadow type, not an original type. */
sewardj95448072004-11-22 20:19:51 +0000747 tl_assert(isShadowAtom(mce,vbits));
sewardj1c0ce7a2009-07-01 08:10:49 +0000748 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
sewardj4cc684b2007-08-25 23:09:36 +0000749
750 /* Fast-track some common cases */
751 if (src_ty == Ity_I32 && dst_ty == Ity_I32)
sewardj7cf4e6b2008-05-01 20:24:26 +0000752 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000753
754 if (src_ty == Ity_I64 && dst_ty == Ity_I64)
sewardj7cf4e6b2008-05-01 20:24:26 +0000755 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
sewardj4cc684b2007-08-25 23:09:36 +0000756
757 if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
sewardj2eecb742012-06-01 16:11:41 +0000758 /* PCast the arg, then clone it. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000759 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
760 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
sewardj4cc684b2007-08-25 23:09:36 +0000761 }
762
sewardj2eecb742012-06-01 16:11:41 +0000763 if (src_ty == Ity_I64 && dst_ty == Ity_I32) {
764 /* PCast the arg. This gives all 0s or all 1s. Then throw away
765 the top half. */
766 IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
767 return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp));
768 }
769
sewardj4cc684b2007-08-25 23:09:36 +0000770 /* Else do it the slow way .. */
sewardj2eecb742012-06-01 16:11:41 +0000771 /* First of all, collapse vbits down to a single bit. */
sewardj4cc684b2007-08-25 23:09:36 +0000772 tmp1 = NULL;
773 switch (src_ty) {
sewardj95448072004-11-22 20:19:51 +0000774 case Ity_I1:
775 tmp1 = vbits;
njn25e49d8e72002-09-23 09:36:25 +0000776 break;
sewardj95448072004-11-22 20:19:51 +0000777 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000778 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
sewardj95448072004-11-22 20:19:51 +0000779 break;
780 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000781 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
sewardj95448072004-11-22 20:19:51 +0000782 break;
783 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000784 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
sewardj95448072004-11-22 20:19:51 +0000785 break;
786 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000787 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
sewardj95448072004-11-22 20:19:51 +0000788 break;
sewardj69a13322005-04-23 01:14:51 +0000789 case Ity_I128: {
790 /* Gah. Chop it in half, OR the halves together, and compare
791 that with zero. */
sewardj7cf4e6b2008-05-01 20:24:26 +0000792 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
793 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
794 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
795 tmp1 = assignNew('V', mce, Ity_I1,
sewardj37c31cc2005-04-26 23:49:24 +0000796 unop(Iop_CmpNEZ64, tmp4));
sewardj69a13322005-04-23 01:14:51 +0000797 break;
798 }
sewardj95448072004-11-22 20:19:51 +0000799 default:
sewardj4cc684b2007-08-25 23:09:36 +0000800 ppIRType(src_ty);
sewardj95448072004-11-22 20:19:51 +0000801 VG_(tool_panic)("mkPCastTo(1)");
802 }
803 tl_assert(tmp1);
804 /* Now widen up to the dst type. */
805 switch (dst_ty) {
806 case Ity_I1:
807 return tmp1;
808 case Ity_I8:
sewardj7cf4e6b2008-05-01 20:24:26 +0000809 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
sewardj95448072004-11-22 20:19:51 +0000810 case Ity_I16:
sewardj7cf4e6b2008-05-01 20:24:26 +0000811 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
sewardj95448072004-11-22 20:19:51 +0000812 case Ity_I32:
sewardj7cf4e6b2008-05-01 20:24:26 +0000813 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
sewardj95448072004-11-22 20:19:51 +0000814 case Ity_I64:
sewardj7cf4e6b2008-05-01 20:24:26 +0000815 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000816 case Ity_V128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000817 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
818 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
sewardja1d93302004-12-12 16:45:06 +0000819 return tmp1;
sewardj69a13322005-04-23 01:14:51 +0000820 case Ity_I128:
sewardj7cf4e6b2008-05-01 20:24:26 +0000821 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
822 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
sewardj69a13322005-04-23 01:14:51 +0000823 return tmp1;
sewardj95448072004-11-22 20:19:51 +0000824 default:
825 ppIRType(dst_ty);
826 VG_(tool_panic)("mkPCastTo(2)");
827 }
828}
829
sewardjd5204dc2004-12-31 01:16:11 +0000830/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
831/*
832 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
833 PCasting to Ity_U1. However, sometimes it is necessary to be more
834 accurate. The insight is that the result is defined if two
835 corresponding bits can be found, one from each argument, so that
836 both bits are defined but are different -- that makes EQ say "No"
837 and NE say "Yes". Hence, we compute an improvement term and DifD
838 it onto the "normal" (UifU) result.
839
840 The result is:
841
842 PCastTo<1> (
sewardje6f8af42005-07-06 18:48:59 +0000843 -- naive version
844 PCastTo<sz>( UifU<sz>(vxx, vyy) )
845
sewardjd5204dc2004-12-31 01:16:11 +0000846 `DifD<sz>`
sewardje6f8af42005-07-06 18:48:59 +0000847
848 -- improvement term
849 PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
sewardjd5204dc2004-12-31 01:16:11 +0000850 )
sewardje6f8af42005-07-06 18:48:59 +0000851
sewardjd5204dc2004-12-31 01:16:11 +0000852 where
853 vec contains 0 (defined) bits where the corresponding arg bits
sewardje6f8af42005-07-06 18:48:59 +0000854 are defined but different, and 1 bits otherwise.
sewardjd5204dc2004-12-31 01:16:11 +0000855
sewardje6f8af42005-07-06 18:48:59 +0000856 vec = Or<sz>( vxx, // 0 iff bit defined
857 vyy, // 0 iff bit defined
858 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
859 )
860
861 If any bit of vec is 0, the result is defined and so the
862 improvement term should produce 0...0, else it should produce
863 1...1.
864
865 Hence require for the improvement term:
866
867 if vec == 1...1 then 1...1 else 0...0
868 ->
869 PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
870
871 This was extensively re-analysed and checked on 6 July 05.
sewardjd5204dc2004-12-31 01:16:11 +0000872*/
873static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
874 IRType ty,
875 IRAtom* vxx, IRAtom* vyy,
876 IRAtom* xx, IRAtom* yy )
877{
sewardje6f8af42005-07-06 18:48:59 +0000878 IRAtom *naive, *vec, *improvement_term;
879 IRAtom *improved, *final_cast, *top;
880 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
sewardjd5204dc2004-12-31 01:16:11 +0000881
882 tl_assert(isShadowAtom(mce,vxx));
883 tl_assert(isShadowAtom(mce,vyy));
884 tl_assert(isOriginalAtom(mce,xx));
885 tl_assert(isOriginalAtom(mce,yy));
886 tl_assert(sameKindedAtoms(vxx,xx));
887 tl_assert(sameKindedAtoms(vyy,yy));
888
889 switch (ty) {
sewardj4cfa81b2012-11-08 10:58:16 +0000890 case Ity_I16:
891 opOR = Iop_Or16;
892 opDIFD = Iop_And16;
893 opUIFU = Iop_Or16;
894 opNOT = Iop_Not16;
895 opXOR = Iop_Xor16;
896 opCMP = Iop_CmpEQ16;
897 top = mkU16(0xFFFF);
898 break;
sewardjd5204dc2004-12-31 01:16:11 +0000899 case Ity_I32:
sewardje6f8af42005-07-06 18:48:59 +0000900 opOR = Iop_Or32;
sewardjd5204dc2004-12-31 01:16:11 +0000901 opDIFD = Iop_And32;
902 opUIFU = Iop_Or32;
903 opNOT = Iop_Not32;
904 opXOR = Iop_Xor32;
905 opCMP = Iop_CmpEQ32;
906 top = mkU32(0xFFFFFFFF);
907 break;
tomcd986332005-04-26 07:44:48 +0000908 case Ity_I64:
sewardje6f8af42005-07-06 18:48:59 +0000909 opOR = Iop_Or64;
tomcd986332005-04-26 07:44:48 +0000910 opDIFD = Iop_And64;
911 opUIFU = Iop_Or64;
912 opNOT = Iop_Not64;
913 opXOR = Iop_Xor64;
914 opCMP = Iop_CmpEQ64;
sewardj37c31cc2005-04-26 23:49:24 +0000915 top = mkU64(0xFFFFFFFFFFFFFFFFULL);
tomcd986332005-04-26 07:44:48 +0000916 break;
sewardjd5204dc2004-12-31 01:16:11 +0000917 default:
918 VG_(tool_panic)("expensiveCmpEQorNE");
919 }
920
921 naive
sewardj7cf4e6b2008-05-01 20:24:26 +0000922 = mkPCastTo(mce,ty,
923 assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
sewardjd5204dc2004-12-31 01:16:11 +0000924
925 vec
926 = assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000927 'V', mce,ty,
sewardje6f8af42005-07-06 18:48:59 +0000928 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +0000929 assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
sewardjd5204dc2004-12-31 01:16:11 +0000930 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +0000931 'V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +0000932 unop( opNOT,
sewardj7cf4e6b2008-05-01 20:24:26 +0000933 assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
sewardjd5204dc2004-12-31 01:16:11 +0000934
sewardje6f8af42005-07-06 18:48:59 +0000935 improvement_term
sewardj7cf4e6b2008-05-01 20:24:26 +0000936 = mkPCastTo( mce,ty,
937 assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
sewardjd5204dc2004-12-31 01:16:11 +0000938
939 improved
sewardj7cf4e6b2008-05-01 20:24:26 +0000940 = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
sewardjd5204dc2004-12-31 01:16:11 +0000941
942 final_cast
943 = mkPCastTo( mce, Ity_I1, improved );
944
945 return final_cast;
946}
947
sewardj95448072004-11-22 20:19:51 +0000948
sewardj992dff92005-10-07 11:08:55 +0000949/* --------- Semi-accurate interpretation of CmpORD. --------- */
950
951/* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
952
953 CmpORD32S(x,y) = 1<<3 if x <s y
954 = 1<<2 if x >s y
955 = 1<<1 if x == y
956
957 and similarly the unsigned variant. The default interpretation is:
958
959 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
sewardj1bc82102005-12-23 00:16:24 +0000960 & (7<<1)
sewardj992dff92005-10-07 11:08:55 +0000961
962 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
963 are zero and therefore defined (viz, zero).
sewardja9e62a92005-10-07 12:13:21 +0000964
965 Also deal with a special case better:
966
967 CmpORD32S(x,0)
968
969 Here, bit 3 (LT) of the result is a copy of the top bit of x and
970 will be defined even if the rest of x isn't. In which case we do:
971
972 CmpORD32S#(x,x#,0,{impliedly 0}#)
sewardj1bc82102005-12-23 00:16:24 +0000973 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
974 | (x# >>u 31) << 3 -- LT# = x#[31]
sewardja9e62a92005-10-07 12:13:21 +0000975
sewardj1bc82102005-12-23 00:16:24 +0000976 Analogous handling for CmpORD64{S,U}.
sewardj992dff92005-10-07 11:08:55 +0000977*/
sewardja9e62a92005-10-07 12:13:21 +0000978static Bool isZeroU32 ( IRAtom* e )
979{
980 return
981 toBool( e->tag == Iex_Const
982 && e->Iex.Const.con->tag == Ico_U32
983 && e->Iex.Const.con->Ico.U32 == 0 );
984}
985
sewardj1bc82102005-12-23 00:16:24 +0000986static Bool isZeroU64 ( IRAtom* e )
sewardj992dff92005-10-07 11:08:55 +0000987{
sewardj1bc82102005-12-23 00:16:24 +0000988 return
989 toBool( e->tag == Iex_Const
990 && e->Iex.Const.con->tag == Ico_U64
991 && e->Iex.Const.con->Ico.U64 == 0 );
992}
993
994static IRAtom* doCmpORD ( MCEnv* mce,
995 IROp cmp_op,
996 IRAtom* xxhash, IRAtom* yyhash,
997 IRAtom* xx, IRAtom* yy )
998{
999 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
1000 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
1001 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
1002 IROp opAND = m64 ? Iop_And64 : Iop_And32;
1003 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
1004 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
1005 IRType ty = m64 ? Ity_I64 : Ity_I32;
1006 Int width = m64 ? 64 : 32;
1007
1008 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
1009
1010 IRAtom* threeLeft1 = NULL;
1011 IRAtom* sevenLeft1 = NULL;
1012
sewardj992dff92005-10-07 11:08:55 +00001013 tl_assert(isShadowAtom(mce,xxhash));
1014 tl_assert(isShadowAtom(mce,yyhash));
1015 tl_assert(isOriginalAtom(mce,xx));
1016 tl_assert(isOriginalAtom(mce,yy));
1017 tl_assert(sameKindedAtoms(xxhash,xx));
1018 tl_assert(sameKindedAtoms(yyhash,yy));
sewardj1bc82102005-12-23 00:16:24 +00001019 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
1020 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
sewardj992dff92005-10-07 11:08:55 +00001021
sewardja9e62a92005-10-07 12:13:21 +00001022 if (0) {
1023 ppIROp(cmp_op); VG_(printf)(" ");
1024 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
1025 }
1026
sewardj1bc82102005-12-23 00:16:24 +00001027 if (syned && isZero(yy)) {
sewardja9e62a92005-10-07 12:13:21 +00001028 /* fancy interpretation */
1029 /* if yy is zero, then it must be fully defined (zero#). */
sewardj1bc82102005-12-23 00:16:24 +00001030 tl_assert(isZero(yyhash));
1031 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
sewardja9e62a92005-10-07 12:13:21 +00001032 return
1033 binop(
sewardj1bc82102005-12-23 00:16:24 +00001034 opOR,
sewardja9e62a92005-10-07 12:13:21 +00001035 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001036 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +00001037 binop(
sewardj1bc82102005-12-23 00:16:24 +00001038 opAND,
1039 mkPCastTo(mce,ty, xxhash),
1040 threeLeft1
sewardja9e62a92005-10-07 12:13:21 +00001041 )),
1042 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001043 'V', mce,ty,
sewardja9e62a92005-10-07 12:13:21 +00001044 binop(
sewardj1bc82102005-12-23 00:16:24 +00001045 opSHL,
sewardja9e62a92005-10-07 12:13:21 +00001046 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001047 'V', mce,ty,
sewardj1bc82102005-12-23 00:16:24 +00001048 binop(opSHR, xxhash, mkU8(width-1))),
sewardja9e62a92005-10-07 12:13:21 +00001049 mkU8(3)
1050 ))
1051 );
1052 } else {
1053 /* standard interpretation */
sewardj1bc82102005-12-23 00:16:24 +00001054 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
sewardja9e62a92005-10-07 12:13:21 +00001055 return
1056 binop(
sewardj1bc82102005-12-23 00:16:24 +00001057 opAND,
1058 mkPCastTo( mce,ty,
1059 mkUifU(mce,ty, xxhash,yyhash)),
1060 sevenLeft1
sewardja9e62a92005-10-07 12:13:21 +00001061 );
1062 }
sewardj992dff92005-10-07 11:08:55 +00001063}
1064
1065
sewardj95448072004-11-22 20:19:51 +00001066/*------------------------------------------------------------*/
1067/*--- Emit a test and complaint if something is undefined. ---*/
1068/*------------------------------------------------------------*/
1069
sewardj7cf4e6b2008-05-01 20:24:26 +00001070static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1071
1072
sewardj95448072004-11-22 20:19:51 +00001073/* Set the annotations on a dirty helper to indicate that the stack
1074 pointer and instruction pointers might be read. This is the
1075 behaviour of all 'emit-a-complaint' style functions we might
1076 call. */
1077
1078static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1079 di->nFxState = 2;
sewardj2eecb742012-06-01 16:11:41 +00001080 di->fxState[0].fx = Ifx_Read;
1081 di->fxState[0].offset = mce->layout->offset_SP;
1082 di->fxState[0].size = mce->layout->sizeof_SP;
1083 di->fxState[0].nRepeats = 0;
1084 di->fxState[0].repeatLen = 0;
1085 di->fxState[1].fx = Ifx_Read;
1086 di->fxState[1].offset = mce->layout->offset_IP;
1087 di->fxState[1].size = mce->layout->sizeof_IP;
1088 di->fxState[1].nRepeats = 0;
1089 di->fxState[1].repeatLen = 0;
sewardj95448072004-11-22 20:19:51 +00001090}
1091
1092
1093/* Check the supplied **original** atom for undefinedness, and emit a
1094 complaint if so. Once that happens, mark it as defined. This is
1095 possible because the atom is either a tmp or literal. If it's a
1096 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1097 be defined. In fact as mentioned above, we will have to allocate a
1098 new tmp to carry the new 'defined' shadow value, and update the
1099 original->tmp mapping accordingly; we cannot simply assign a new
1100 value to an existing shadow tmp as this breaks SSAness -- resulting
1101 in the post-instrumentation sanity checker spluttering in disapproval.
1102*/
florian434ffae2012-07-19 17:23:42 +00001103static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard )
sewardj95448072004-11-22 20:19:51 +00001104{
sewardj7cf97ee2004-11-28 14:25:01 +00001105 IRAtom* vatom;
1106 IRType ty;
1107 Int sz;
1108 IRDirty* di;
1109 IRAtom* cond;
sewardj7cf4e6b2008-05-01 20:24:26 +00001110 IRAtom* origin;
1111 void* fn;
florian6bd9dc12012-11-23 16:17:43 +00001112 const HChar* nm;
sewardj7cf4e6b2008-05-01 20:24:26 +00001113 IRExpr** args;
1114 Int nargs;
sewardj7cf97ee2004-11-28 14:25:01 +00001115
njn1d0825f2006-03-27 11:37:07 +00001116 // Don't do V bit tests if we're not reporting undefined value errors.
sewardj7cf4e6b2008-05-01 20:24:26 +00001117 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001118 return;
1119
sewardj95448072004-11-22 20:19:51 +00001120 /* Since the original expression is atomic, there's no duplicated
1121 work generated by making multiple V-expressions for it. So we
1122 don't really care about the possibility that someone else may
1123 also create a V-interpretion for it. */
1124 tl_assert(isOriginalAtom(mce, atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001125 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001126 tl_assert(isShadowAtom(mce, vatom));
1127 tl_assert(sameKindedAtoms(atom, vatom));
1128
sewardj1c0ce7a2009-07-01 08:10:49 +00001129 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001130
1131 /* sz is only used for constructing the error message */
sewardj7cf97ee2004-11-28 14:25:01 +00001132 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001133
sewardj7cf97ee2004-11-28 14:25:01 +00001134 cond = mkPCastTo( mce, Ity_I1, vatom );
sewardj95448072004-11-22 20:19:51 +00001135 /* cond will be 0 if all defined, and 1 if any not defined. */
1136
sewardj7cf4e6b2008-05-01 20:24:26 +00001137 /* Get the origin info for the value we are about to check. At
1138 least, if we are doing origin tracking. If not, use a dummy
1139 zero origin. */
1140 if (MC_(clo_mc_level) == 3) {
1141 origin = schemeE( mce, atom );
1142 if (mce->hWordTy == Ity_I64) {
1143 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1144 }
1145 } else {
1146 origin = NULL;
1147 }
1148
1149 fn = NULL;
1150 nm = NULL;
1151 args = NULL;
1152 nargs = -1;
1153
sewardj95448072004-11-22 20:19:51 +00001154 switch (sz) {
1155 case 0:
sewardj7cf4e6b2008-05-01 20:24:26 +00001156 if (origin) {
1157 fn = &MC_(helperc_value_check0_fail_w_o);
1158 nm = "MC_(helperc_value_check0_fail_w_o)";
1159 args = mkIRExprVec_1(origin);
1160 nargs = 1;
1161 } else {
1162 fn = &MC_(helperc_value_check0_fail_no_o);
1163 nm = "MC_(helperc_value_check0_fail_no_o)";
1164 args = mkIRExprVec_0();
1165 nargs = 0;
1166 }
sewardj95448072004-11-22 20:19:51 +00001167 break;
1168 case 1:
sewardj7cf4e6b2008-05-01 20:24:26 +00001169 if (origin) {
1170 fn = &MC_(helperc_value_check1_fail_w_o);
1171 nm = "MC_(helperc_value_check1_fail_w_o)";
1172 args = mkIRExprVec_1(origin);
1173 nargs = 1;
1174 } else {
1175 fn = &MC_(helperc_value_check1_fail_no_o);
1176 nm = "MC_(helperc_value_check1_fail_no_o)";
1177 args = mkIRExprVec_0();
1178 nargs = 0;
1179 }
sewardj95448072004-11-22 20:19:51 +00001180 break;
1181 case 4:
sewardj7cf4e6b2008-05-01 20:24:26 +00001182 if (origin) {
1183 fn = &MC_(helperc_value_check4_fail_w_o);
1184 nm = "MC_(helperc_value_check4_fail_w_o)";
1185 args = mkIRExprVec_1(origin);
1186 nargs = 1;
1187 } else {
1188 fn = &MC_(helperc_value_check4_fail_no_o);
1189 nm = "MC_(helperc_value_check4_fail_no_o)";
1190 args = mkIRExprVec_0();
1191 nargs = 0;
1192 }
sewardj95448072004-11-22 20:19:51 +00001193 break;
sewardj11bcc4e2005-04-23 22:38:38 +00001194 case 8:
sewardj7cf4e6b2008-05-01 20:24:26 +00001195 if (origin) {
1196 fn = &MC_(helperc_value_check8_fail_w_o);
1197 nm = "MC_(helperc_value_check8_fail_w_o)";
1198 args = mkIRExprVec_1(origin);
1199 nargs = 1;
1200 } else {
1201 fn = &MC_(helperc_value_check8_fail_no_o);
1202 nm = "MC_(helperc_value_check8_fail_no_o)";
1203 args = mkIRExprVec_0();
1204 nargs = 0;
1205 }
sewardj11bcc4e2005-04-23 22:38:38 +00001206 break;
njn4c245e52009-03-15 23:25:38 +00001207 case 2:
1208 case 16:
sewardj7cf4e6b2008-05-01 20:24:26 +00001209 if (origin) {
1210 fn = &MC_(helperc_value_checkN_fail_w_o);
1211 nm = "MC_(helperc_value_checkN_fail_w_o)";
1212 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1213 nargs = 2;
1214 } else {
1215 fn = &MC_(helperc_value_checkN_fail_no_o);
1216 nm = "MC_(helperc_value_checkN_fail_no_o)";
1217 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1218 nargs = 1;
1219 }
sewardj95448072004-11-22 20:19:51 +00001220 break;
njn4c245e52009-03-15 23:25:38 +00001221 default:
1222 VG_(tool_panic)("unexpected szB");
sewardj95448072004-11-22 20:19:51 +00001223 }
sewardj7cf4e6b2008-05-01 20:24:26 +00001224
1225 tl_assert(fn);
1226 tl_assert(nm);
1227 tl_assert(args);
1228 tl_assert(nargs >= 0 && nargs <= 2);
1229 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1230 || (MC_(clo_mc_level) == 2 && origin == NULL) );
1231
1232 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1233 VG_(fnptr_to_fnentry)( fn ), args );
sewardj95448072004-11-22 20:19:51 +00001234 di->guard = cond;
florian434ffae2012-07-19 17:23:42 +00001235
1236 /* If the complaint is to be issued under a guard condition, AND that
1237 guard condition. */
1238 if (guard) {
1239 IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard));
1240 IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard));
1241 IRAtom *e = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2));
1242
1243 di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e));
1244 }
1245
sewardj95448072004-11-22 20:19:51 +00001246 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00001247 stmt( 'V', mce, IRStmt_Dirty(di));
sewardj95448072004-11-22 20:19:51 +00001248
1249 /* Set the shadow tmp to be defined. First, update the
1250 orig->shadow tmp mapping to reflect the fact that this shadow is
1251 getting a new value. */
sewardj710d6c22005-03-20 18:55:15 +00001252 tl_assert(isIRAtom(vatom));
sewardj95448072004-11-22 20:19:51 +00001253 /* sameKindedAtoms ... */
sewardj0b9d74a2006-12-24 02:24:11 +00001254 if (vatom->tag == Iex_RdTmp) {
1255 tl_assert(atom->tag == Iex_RdTmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00001256 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1257 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1258 definedOfType(ty));
sewardj95448072004-11-22 20:19:51 +00001259 }
1260}
1261
1262
1263/*------------------------------------------------------------*/
1264/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1265/*------------------------------------------------------------*/
1266
1267/* Examine the always-defined sections declared in layout to see if
1268 the (offset,size) section is within one. Note, is is an error to
1269 partially fall into such a region: (offset,size) should either be
1270 completely in such a region or completely not-in such a region.
1271*/
1272static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1273{
1274 Int minoffD, maxoffD, i;
1275 Int minoff = offset;
1276 Int maxoff = minoff + size - 1;
1277 tl_assert((minoff & ~0xFFFF) == 0);
1278 tl_assert((maxoff & ~0xFFFF) == 0);
1279
1280 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1281 minoffD = mce->layout->alwaysDefd[i].offset;
1282 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1283 tl_assert((minoffD & ~0xFFFF) == 0);
1284 tl_assert((maxoffD & ~0xFFFF) == 0);
1285
1286 if (maxoff < minoffD || maxoffD < minoff)
1287 continue; /* no overlap */
1288 if (minoff >= minoffD && maxoff <= maxoffD)
1289 return True; /* completely contained in an always-defd section */
1290
1291 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1292 }
1293 return False; /* could not find any containing section */
1294}
1295
1296
1297/* Generate into bb suitable actions to shadow this Put. If the state
1298 slice is marked 'always defined', do nothing. Otherwise, write the
1299 supplied V bits to the shadow state. We can pass in either an
1300 original atom or a V-atom, but not both. In the former case the
1301 relevant V-bits are then generated from the original.
florian434ffae2012-07-19 17:23:42 +00001302 We assume here, that the definedness of GUARD has already been checked.
sewardj95448072004-11-22 20:19:51 +00001303*/
1304static
1305void do_shadow_PUT ( MCEnv* mce, Int offset,
florian434ffae2012-07-19 17:23:42 +00001306 IRAtom* atom, IRAtom* vatom, IRExpr *guard )
sewardj95448072004-11-22 20:19:51 +00001307{
sewardj7cf97ee2004-11-28 14:25:01 +00001308 IRType ty;
njn1d0825f2006-03-27 11:37:07 +00001309
1310 // Don't do shadow PUTs if we're not doing undefined value checking.
1311 // Their absence lets Vex's optimiser remove all the shadow computation
1312 // that they depend on, which includes GETs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001313 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001314 return;
1315
sewardj95448072004-11-22 20:19:51 +00001316 if (atom) {
1317 tl_assert(!vatom);
1318 tl_assert(isOriginalAtom(mce, atom));
1319 vatom = expr2vbits( mce, atom );
1320 } else {
1321 tl_assert(vatom);
1322 tl_assert(isShadowAtom(mce, vatom));
1323 }
1324
sewardj1c0ce7a2009-07-01 08:10:49 +00001325 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj95448072004-11-22 20:19:51 +00001326 tl_assert(ty != Ity_I1);
sewardjb5b87402011-03-07 16:05:35 +00001327 tl_assert(ty != Ity_I128);
sewardj95448072004-11-22 20:19:51 +00001328 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1329 /* later: no ... */
1330 /* emit code to emit a complaint if any of the vbits are 1. */
1331 /* complainIfUndefined(mce, atom); */
1332 } else {
1333 /* Do a plain shadow Put. */
florian434ffae2012-07-19 17:23:42 +00001334 if (guard) {
1335 /* If the guard expression evaluates to false we simply Put the value
1336 that is already stored in the guest state slot */
1337 IRAtom *cond, *iffalse;
1338
1339 cond = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard));
1340 iffalse = assignNew('V', mce, ty,
1341 IRExpr_Get(offset + mce->layout->total_sizeB, ty));
1342 vatom = assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, vatom));
1343 }
1344 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ));
sewardj95448072004-11-22 20:19:51 +00001345 }
1346}
1347
1348
1349/* Return an expression which contains the V bits corresponding to the
1350 given GETI (passed in in pieces).
1351*/
1352static
floriand39b0222012-05-31 15:48:13 +00001353void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti)
sewardj95448072004-11-22 20:19:51 +00001354{
sewardj7cf97ee2004-11-28 14:25:01 +00001355 IRAtom* vatom;
1356 IRType ty, tyS;
1357 Int arrSize;;
floriand39b0222012-05-31 15:48:13 +00001358 IRRegArray* descr = puti->descr;
1359 IRAtom* ix = puti->ix;
1360 Int bias = puti->bias;
1361 IRAtom* atom = puti->data;
sewardj7cf97ee2004-11-28 14:25:01 +00001362
njn1d0825f2006-03-27 11:37:07 +00001363 // Don't do shadow PUTIs if we're not doing undefined value checking.
1364 // Their absence lets Vex's optimiser remove all the shadow computation
1365 // that they depend on, which includes GETIs of the shadow registers.
sewardj7cf4e6b2008-05-01 20:24:26 +00001366 if (MC_(clo_mc_level) == 1)
njn1d0825f2006-03-27 11:37:07 +00001367 return;
1368
sewardj95448072004-11-22 20:19:51 +00001369 tl_assert(isOriginalAtom(mce,atom));
sewardj7cf97ee2004-11-28 14:25:01 +00001370 vatom = expr2vbits( mce, atom );
sewardj95448072004-11-22 20:19:51 +00001371 tl_assert(sameKindedAtoms(atom, vatom));
sewardj7cf97ee2004-11-28 14:25:01 +00001372 ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001373 tyS = shadowTypeV(ty);
sewardj7cf97ee2004-11-28 14:25:01 +00001374 arrSize = descr->nElems * sizeofIRType(ty);
sewardj95448072004-11-22 20:19:51 +00001375 tl_assert(ty != Ity_I1);
1376 tl_assert(isOriginalAtom(mce,ix));
florian434ffae2012-07-19 17:23:42 +00001377 complainIfUndefined(mce, ix, NULL);
sewardj95448072004-11-22 20:19:51 +00001378 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1379 /* later: no ... */
1380 /* emit code to emit a complaint if any of the vbits are 1. */
1381 /* complainIfUndefined(mce, atom); */
1382 } else {
1383 /* Do a cloned version of the Put that refers to the shadow
1384 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001385 IRRegArray* new_descr
1386 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1387 tyS, descr->nElems);
floriand39b0222012-05-31 15:48:13 +00001388 stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) ));
sewardj95448072004-11-22 20:19:51 +00001389 }
1390}
1391
1392
1393/* Return an expression which contains the V bits corresponding to the
1394 given GET (passed in in pieces).
1395*/
1396static
1397IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1398{
sewardj7cf4e6b2008-05-01 20:24:26 +00001399 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001400 tl_assert(ty != Ity_I1);
sewardjb5b87402011-03-07 16:05:35 +00001401 tl_assert(ty != Ity_I128);
sewardj95448072004-11-22 20:19:51 +00001402 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1403 /* Always defined, return all zeroes of the relevant type */
1404 return definedOfType(tyS);
1405 } else {
1406 /* return a cloned version of the Get that refers to the shadow
1407 area. */
sewardj7cf4e6b2008-05-01 20:24:26 +00001408 /* FIXME: this isn't an atom! */
sewardj95448072004-11-22 20:19:51 +00001409 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1410 }
1411}
1412
1413
1414/* Return an expression which contains the V bits corresponding to the
1415 given GETI (passed in in pieces).
1416*/
1417static
sewardj0b9d74a2006-12-24 02:24:11 +00001418IRExpr* shadow_GETI ( MCEnv* mce,
1419 IRRegArray* descr, IRAtom* ix, Int bias )
sewardj95448072004-11-22 20:19:51 +00001420{
1421 IRType ty = descr->elemTy;
sewardj7cf4e6b2008-05-01 20:24:26 +00001422 IRType tyS = shadowTypeV(ty);
sewardj95448072004-11-22 20:19:51 +00001423 Int arrSize = descr->nElems * sizeofIRType(ty);
1424 tl_assert(ty != Ity_I1);
1425 tl_assert(isOriginalAtom(mce,ix));
florian434ffae2012-07-19 17:23:42 +00001426 complainIfUndefined(mce, ix, NULL);
sewardj95448072004-11-22 20:19:51 +00001427 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1428 /* Always defined, return all zeroes of the relevant type */
1429 return definedOfType(tyS);
1430 } else {
1431 /* return a cloned version of the Get that refers to the shadow
1432 area. */
sewardj0b9d74a2006-12-24 02:24:11 +00001433 IRRegArray* new_descr
1434 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1435 tyS, descr->nElems);
sewardj95448072004-11-22 20:19:51 +00001436 return IRExpr_GetI( new_descr, ix, bias );
1437 }
1438}
1439
1440
1441/*------------------------------------------------------------*/
1442/*--- Generating approximations for unknown operations, ---*/
1443/*--- using lazy-propagate semantics ---*/
1444/*------------------------------------------------------------*/
1445
1446/* Lazy propagation of undefinedness from two values, resulting in the
1447 specified shadow type.
1448*/
1449static
1450IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1451{
sewardj95448072004-11-22 20:19:51 +00001452 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001453 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1454 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
sewardj95448072004-11-22 20:19:51 +00001455 tl_assert(isShadowAtom(mce,va1));
1456 tl_assert(isShadowAtom(mce,va2));
sewardj37c31cc2005-04-26 23:49:24 +00001457
1458 /* The general case is inefficient because PCast is an expensive
1459 operation. Here are some special cases which use PCast only
1460 once rather than twice. */
1461
1462 /* I64 x I64 -> I64 */
1463 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1464 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1465 at = mkUifU(mce, Ity_I64, va1, va2);
1466 at = mkPCastTo(mce, Ity_I64, at);
1467 return at;
1468 }
1469
1470 /* I64 x I64 -> I32 */
1471 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1472 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1473 at = mkUifU(mce, Ity_I64, va1, va2);
1474 at = mkPCastTo(mce, Ity_I32, at);
1475 return at;
1476 }
1477
1478 if (0) {
1479 VG_(printf)("mkLazy2 ");
1480 ppIRType(t1);
1481 VG_(printf)("_");
1482 ppIRType(t2);
1483 VG_(printf)("_");
1484 ppIRType(finalVty);
1485 VG_(printf)("\n");
1486 }
1487
1488 /* General case: force everything via 32-bit intermediaries. */
sewardj95448072004-11-22 20:19:51 +00001489 at = mkPCastTo(mce, Ity_I32, va1);
1490 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1491 at = mkPCastTo(mce, finalVty, at);
1492 return at;
1493}
1494
1495
sewardjed69fdb2006-02-03 16:12:27 +00001496/* 3-arg version of the above. */
1497static
1498IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1499 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1500{
1501 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001502 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1503 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1504 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
sewardjed69fdb2006-02-03 16:12:27 +00001505 tl_assert(isShadowAtom(mce,va1));
1506 tl_assert(isShadowAtom(mce,va2));
1507 tl_assert(isShadowAtom(mce,va3));
1508
1509 /* The general case is inefficient because PCast is an expensive
1510 operation. Here are some special cases which use PCast only
1511 twice rather than three times. */
1512
1513 /* I32 x I64 x I64 -> I64 */
1514 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1515 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1516 && finalVty == Ity_I64) {
1517 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1518 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1519 mode indication which is fully defined, this should get
1520 folded out later. */
1521 at = mkPCastTo(mce, Ity_I64, va1);
1522 /* Now fold in 2nd and 3rd args. */
1523 at = mkUifU(mce, Ity_I64, at, va2);
1524 at = mkUifU(mce, Ity_I64, at, va3);
1525 /* and PCast once again. */
1526 at = mkPCastTo(mce, Ity_I64, at);
1527 return at;
1528 }
1529
sewardj453e8f82006-02-09 03:25:06 +00001530 /* I32 x I64 x I64 -> I32 */
1531 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1532 && finalVty == Ity_I32) {
sewardj59570ff2010-01-01 11:59:33 +00001533 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
sewardj453e8f82006-02-09 03:25:06 +00001534 at = mkPCastTo(mce, Ity_I64, va1);
1535 at = mkUifU(mce, Ity_I64, at, va2);
1536 at = mkUifU(mce, Ity_I64, at, va3);
1537 at = mkPCastTo(mce, Ity_I32, at);
1538 return at;
1539 }
1540
sewardj59570ff2010-01-01 11:59:33 +00001541 /* I32 x I32 x I32 -> I32 */
1542 /* 32-bit FP idiom, as (eg) happens on ARM */
1543 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1544 && finalVty == Ity_I32) {
1545 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1546 at = va1;
1547 at = mkUifU(mce, Ity_I32, at, va2);
1548 at = mkUifU(mce, Ity_I32, at, va3);
1549 at = mkPCastTo(mce, Ity_I32, at);
1550 return at;
1551 }
1552
sewardjb5b87402011-03-07 16:05:35 +00001553 /* I32 x I128 x I128 -> I128 */
1554 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1555 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1556 && finalVty == Ity_I128) {
1557 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1558 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1559 mode indication which is fully defined, this should get
1560 folded out later. */
1561 at = mkPCastTo(mce, Ity_I128, va1);
1562 /* Now fold in 2nd and 3rd args. */
1563 at = mkUifU(mce, Ity_I128, at, va2);
1564 at = mkUifU(mce, Ity_I128, at, va3);
1565 /* and PCast once again. */
1566 at = mkPCastTo(mce, Ity_I128, at);
1567 return at;
1568 }
sewardj453e8f82006-02-09 03:25:06 +00001569 if (1) {
1570 VG_(printf)("mkLazy3: ");
sewardjed69fdb2006-02-03 16:12:27 +00001571 ppIRType(t1);
sewardj453e8f82006-02-09 03:25:06 +00001572 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001573 ppIRType(t2);
sewardj453e8f82006-02-09 03:25:06 +00001574 VG_(printf)(" x ");
sewardjed69fdb2006-02-03 16:12:27 +00001575 ppIRType(t3);
sewardj453e8f82006-02-09 03:25:06 +00001576 VG_(printf)(" -> ");
sewardjed69fdb2006-02-03 16:12:27 +00001577 ppIRType(finalVty);
1578 VG_(printf)("\n");
1579 }
1580
sewardj453e8f82006-02-09 03:25:06 +00001581 tl_assert(0);
sewardjed69fdb2006-02-03 16:12:27 +00001582 /* General case: force everything via 32-bit intermediaries. */
sewardj453e8f82006-02-09 03:25:06 +00001583 /*
sewardjed69fdb2006-02-03 16:12:27 +00001584 at = mkPCastTo(mce, Ity_I32, va1);
1585 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1586 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1587 at = mkPCastTo(mce, finalVty, at);
1588 return at;
sewardj453e8f82006-02-09 03:25:06 +00001589 */
sewardjed69fdb2006-02-03 16:12:27 +00001590}
1591
1592
sewardje91cea72006-02-08 19:32:02 +00001593/* 4-arg version of the above. */
1594static
1595IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1596 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1597{
1598 IRAtom* at;
sewardj1c0ce7a2009-07-01 08:10:49 +00001599 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1600 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1601 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1602 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
sewardje91cea72006-02-08 19:32:02 +00001603 tl_assert(isShadowAtom(mce,va1));
1604 tl_assert(isShadowAtom(mce,va2));
1605 tl_assert(isShadowAtom(mce,va3));
1606 tl_assert(isShadowAtom(mce,va4));
1607
1608 /* The general case is inefficient because PCast is an expensive
1609 operation. Here are some special cases which use PCast only
1610 twice rather than three times. */
1611
1612 /* I32 x I64 x I64 x I64 -> I64 */
1613 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1614 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1615 && finalVty == Ity_I64) {
1616 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1617 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1618 mode indication which is fully defined, this should get
1619 folded out later. */
1620 at = mkPCastTo(mce, Ity_I64, va1);
1621 /* Now fold in 2nd, 3rd, 4th args. */
1622 at = mkUifU(mce, Ity_I64, at, va2);
1623 at = mkUifU(mce, Ity_I64, at, va3);
1624 at = mkUifU(mce, Ity_I64, at, va4);
1625 /* and PCast once again. */
1626 at = mkPCastTo(mce, Ity_I64, at);
1627 return at;
1628 }
sewardjb5b87402011-03-07 16:05:35 +00001629 /* I32 x I32 x I32 x I32 -> I32 */
1630 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1631 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
1632 && finalVty == Ity_I32) {
1633 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
1634 at = va1;
1635 /* Now fold in 2nd, 3rd, 4th args. */
1636 at = mkUifU(mce, Ity_I32, at, va2);
1637 at = mkUifU(mce, Ity_I32, at, va3);
1638 at = mkUifU(mce, Ity_I32, at, va4);
1639 at = mkPCastTo(mce, Ity_I32, at);
1640 return at;
1641 }
sewardje91cea72006-02-08 19:32:02 +00001642
1643 if (1) {
sewardj453e8f82006-02-09 03:25:06 +00001644 VG_(printf)("mkLazy4: ");
sewardje91cea72006-02-08 19:32:02 +00001645 ppIRType(t1);
1646 VG_(printf)(" x ");
1647 ppIRType(t2);
1648 VG_(printf)(" x ");
1649 ppIRType(t3);
1650 VG_(printf)(" x ");
1651 ppIRType(t4);
1652 VG_(printf)(" -> ");
1653 ppIRType(finalVty);
1654 VG_(printf)("\n");
1655 }
1656
1657 tl_assert(0);
1658}
1659
1660
sewardj95448072004-11-22 20:19:51 +00001661/* Do the lazy propagation game from a null-terminated vector of
1662 atoms. This is presumably the arguments to a helper call, so the
1663 IRCallee info is also supplied in order that we can know which
1664 arguments should be ignored (via the .mcx_mask field).
1665*/
1666static
1667IRAtom* mkLazyN ( MCEnv* mce,
1668 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1669{
sewardj4cc684b2007-08-25 23:09:36 +00001670 Int i;
sewardj95448072004-11-22 20:19:51 +00001671 IRAtom* here;
sewardj4cc684b2007-08-25 23:09:36 +00001672 IRAtom* curr;
1673 IRType mergeTy;
sewardj99430032011-05-04 09:09:31 +00001674 Bool mergeTy64 = True;
sewardj4cc684b2007-08-25 23:09:36 +00001675
1676 /* Decide on the type of the merge intermediary. If all relevant
1677 args are I64, then it's I64. In all other circumstances, use
1678 I32. */
1679 for (i = 0; exprvec[i]; i++) {
1680 tl_assert(i < 32);
1681 tl_assert(isOriginalAtom(mce, exprvec[i]));
1682 if (cee->mcx_mask & (1<<i))
1683 continue;
sewardj1c0ce7a2009-07-01 08:10:49 +00001684 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
sewardj4cc684b2007-08-25 23:09:36 +00001685 mergeTy64 = False;
1686 }
1687
1688 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
1689 curr = definedOfType(mergeTy);
1690
sewardj95448072004-11-22 20:19:51 +00001691 for (i = 0; exprvec[i]; i++) {
1692 tl_assert(i < 32);
1693 tl_assert(isOriginalAtom(mce, exprvec[i]));
1694 /* Only take notice of this arg if the callee's mc-exclusion
1695 mask does not say it is to be excluded. */
1696 if (cee->mcx_mask & (1<<i)) {
1697 /* the arg is to be excluded from definedness checking. Do
1698 nothing. */
1699 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1700 } else {
1701 /* calculate the arg's definedness, and pessimistically merge
1702 it in. */
sewardj4cc684b2007-08-25 23:09:36 +00001703 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1704 curr = mergeTy64
1705 ? mkUifU64(mce, here, curr)
1706 : mkUifU32(mce, here, curr);
sewardj95448072004-11-22 20:19:51 +00001707 }
1708 }
1709 return mkPCastTo(mce, finalVtype, curr );
1710}
1711
1712
1713/*------------------------------------------------------------*/
1714/*--- Generating expensive sequences for exact carry-chain ---*/
1715/*--- propagation in add/sub and related operations. ---*/
1716/*------------------------------------------------------------*/
1717
1718static
sewardjd5204dc2004-12-31 01:16:11 +00001719IRAtom* expensiveAddSub ( MCEnv* mce,
1720 Bool add,
1721 IRType ty,
1722 IRAtom* qaa, IRAtom* qbb,
1723 IRAtom* aa, IRAtom* bb )
sewardj95448072004-11-22 20:19:51 +00001724{
sewardj7cf97ee2004-11-28 14:25:01 +00001725 IRAtom *a_min, *b_min, *a_max, *b_max;
sewardjd5204dc2004-12-31 01:16:11 +00001726 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
sewardj7cf97ee2004-11-28 14:25:01 +00001727
sewardj95448072004-11-22 20:19:51 +00001728 tl_assert(isShadowAtom(mce,qaa));
1729 tl_assert(isShadowAtom(mce,qbb));
1730 tl_assert(isOriginalAtom(mce,aa));
1731 tl_assert(isOriginalAtom(mce,bb));
1732 tl_assert(sameKindedAtoms(qaa,aa));
1733 tl_assert(sameKindedAtoms(qbb,bb));
1734
sewardjd5204dc2004-12-31 01:16:11 +00001735 switch (ty) {
1736 case Ity_I32:
1737 opAND = Iop_And32;
1738 opOR = Iop_Or32;
1739 opXOR = Iop_Xor32;
1740 opNOT = Iop_Not32;
1741 opADD = Iop_Add32;
1742 opSUB = Iop_Sub32;
1743 break;
tomd9774d72005-06-27 08:11:01 +00001744 case Ity_I64:
1745 opAND = Iop_And64;
1746 opOR = Iop_Or64;
1747 opXOR = Iop_Xor64;
1748 opNOT = Iop_Not64;
1749 opADD = Iop_Add64;
1750 opSUB = Iop_Sub64;
1751 break;
sewardjd5204dc2004-12-31 01:16:11 +00001752 default:
1753 VG_(tool_panic)("expensiveAddSub");
1754 }
sewardj95448072004-11-22 20:19:51 +00001755
1756 // a_min = aa & ~qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001757 a_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001758 binop(opAND, aa,
sewardj7cf4e6b2008-05-01 20:24:26 +00001759 assignNew('V', mce,ty, unop(opNOT, qaa))));
sewardj95448072004-11-22 20:19:51 +00001760
1761 // b_min = bb & ~qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001762 b_min = assignNew('V', mce,ty,
sewardj95448072004-11-22 20:19:51 +00001763 binop(opAND, bb,
sewardj7cf4e6b2008-05-01 20:24:26 +00001764 assignNew('V', mce,ty, unop(opNOT, qbb))));
sewardj95448072004-11-22 20:19:51 +00001765
1766 // a_max = aa | qaa
sewardj7cf4e6b2008-05-01 20:24:26 +00001767 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
sewardj95448072004-11-22 20:19:51 +00001768
1769 // b_max = bb | qbb
sewardj7cf4e6b2008-05-01 20:24:26 +00001770 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
sewardj95448072004-11-22 20:19:51 +00001771
sewardjd5204dc2004-12-31 01:16:11 +00001772 if (add) {
1773 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1774 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001775 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001776 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001777 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1778 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001779 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001780 assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1781 assignNew('V', mce,ty, binop(opADD, a_max, b_max))
sewardjd5204dc2004-12-31 01:16:11 +00001782 )
sewardj95448072004-11-22 20:19:51 +00001783 )
sewardjd5204dc2004-12-31 01:16:11 +00001784 )
1785 );
1786 } else {
1787 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1788 return
sewardj7cf4e6b2008-05-01 20:24:26 +00001789 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001790 binop( opOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001791 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1792 assignNew('V', mce,ty,
sewardjd5204dc2004-12-31 01:16:11 +00001793 binop( opXOR,
sewardj7cf4e6b2008-05-01 20:24:26 +00001794 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1795 assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
sewardjd5204dc2004-12-31 01:16:11 +00001796 )
1797 )
1798 )
1799 );
1800 }
1801
sewardj95448072004-11-22 20:19:51 +00001802}
1803
1804
sewardj4cfa81b2012-11-08 10:58:16 +00001805static
1806IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
1807 IRAtom* atom, IRAtom* vatom )
1808{
1809 IRType ty;
1810 IROp xorOp, subOp, andOp;
1811 IRExpr *one;
1812 IRAtom *improver, *improved;
1813 tl_assert(isShadowAtom(mce,vatom));
1814 tl_assert(isOriginalAtom(mce,atom));
1815 tl_assert(sameKindedAtoms(atom,vatom));
1816
1817 switch (czop) {
1818 case Iop_Ctz32:
1819 ty = Ity_I32;
1820 xorOp = Iop_Xor32;
1821 subOp = Iop_Sub32;
1822 andOp = Iop_And32;
1823 one = mkU32(1);
1824 break;
1825 case Iop_Ctz64:
1826 ty = Ity_I64;
1827 xorOp = Iop_Xor64;
1828 subOp = Iop_Sub64;
1829 andOp = Iop_And64;
1830 one = mkU64(1);
1831 break;
1832 default:
1833 ppIROp(czop);
1834 VG_(tool_panic)("memcheck:expensiveCountTrailingZeroes");
1835 }
1836
1837 // improver = atom ^ (atom - 1)
1838 //
1839 // That is, improver has its low ctz(atom) bits equal to one;
1840 // higher bits (if any) equal to zero.
1841 improver = assignNew('V', mce,ty,
1842 binop(xorOp,
1843 atom,
1844 assignNew('V', mce, ty,
1845 binop(subOp, atom, one))));
1846
1847 // improved = vatom & improver
1848 //
1849 // That is, treat any V bits above the first ctz(atom) bits as
1850 // "defined".
1851 improved = assignNew('V', mce, ty,
1852 binop(andOp, vatom, improver));
1853
1854 // Return pessimizing cast of improved.
1855 return mkPCastTo(mce, ty, improved);
1856}
1857
1858
sewardj95448072004-11-22 20:19:51 +00001859/*------------------------------------------------------------*/
sewardjaaddbc22005-10-07 09:49:53 +00001860/*--- Scalar shifts. ---*/
1861/*------------------------------------------------------------*/
1862
1863/* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
1864 idea is to shift the definedness bits by the original shift amount.
1865 This introduces 0s ("defined") in new positions for left shifts and
1866 unsigned right shifts, and copies the top definedness bit for
1867 signed right shifts. So, conveniently, applying the original shift
1868 operator to the definedness bits for the left arg is exactly the
1869 right thing to do:
1870
1871 (qaa << bb)
1872
1873 However if the shift amount is undefined then the whole result
1874 is undefined. Hence need:
1875
1876 (qaa << bb) `UifU` PCast(qbb)
1877
1878 If the shift amount bb is a literal than qbb will say 'all defined'
1879 and the UifU and PCast will get folded out by post-instrumentation
1880 optimisation.
1881*/
1882static IRAtom* scalarShift ( MCEnv* mce,
1883 IRType ty,
1884 IROp original_op,
1885 IRAtom* qaa, IRAtom* qbb,
1886 IRAtom* aa, IRAtom* bb )
1887{
1888 tl_assert(isShadowAtom(mce,qaa));
1889 tl_assert(isShadowAtom(mce,qbb));
1890 tl_assert(isOriginalAtom(mce,aa));
1891 tl_assert(isOriginalAtom(mce,bb));
1892 tl_assert(sameKindedAtoms(qaa,aa));
1893 tl_assert(sameKindedAtoms(qbb,bb));
1894 return
1895 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00001896 'V', mce, ty,
sewardjaaddbc22005-10-07 09:49:53 +00001897 mkUifU( mce, ty,
sewardj7cf4e6b2008-05-01 20:24:26 +00001898 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
sewardjaaddbc22005-10-07 09:49:53 +00001899 mkPCastTo(mce, ty, qbb)
1900 )
1901 );
1902}
1903
1904
1905/*------------------------------------------------------------*/
1906/*--- Helpers for dealing with vector primops. ---*/
sewardj3245c912004-12-10 14:58:26 +00001907/*------------------------------------------------------------*/
1908
sewardja1d93302004-12-12 16:45:06 +00001909/* Vector pessimisation -- pessimise within each lane individually. */
1910
1911static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1912{
sewardj7cf4e6b2008-05-01 20:24:26 +00001913 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
sewardja1d93302004-12-12 16:45:06 +00001914}
1915
1916static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1917{
sewardj7cf4e6b2008-05-01 20:24:26 +00001918 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
sewardja1d93302004-12-12 16:45:06 +00001919}
1920
1921static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1922{
sewardj7cf4e6b2008-05-01 20:24:26 +00001923 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
sewardja1d93302004-12-12 16:45:06 +00001924}
1925
1926static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1927{
sewardj7cf4e6b2008-05-01 20:24:26 +00001928 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
sewardja1d93302004-12-12 16:45:06 +00001929}
1930
sewardj350e8f72012-06-25 07:52:15 +00001931static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at )
1932{
1933 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at));
1934}
1935
1936static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at )
1937{
1938 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at));
1939}
1940
sewardjacd2e912005-01-13 19:17:06 +00001941static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1942{
sewardj7cf4e6b2008-05-01 20:24:26 +00001943 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
sewardjacd2e912005-01-13 19:17:06 +00001944}
1945
1946static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1947{
sewardj7cf4e6b2008-05-01 20:24:26 +00001948 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
sewardjacd2e912005-01-13 19:17:06 +00001949}
1950
1951static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1952{
sewardj7cf4e6b2008-05-01 20:24:26 +00001953 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
sewardjacd2e912005-01-13 19:17:06 +00001954}
1955
sewardjc678b852010-09-22 00:58:51 +00001956static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
1957{
1958 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
1959}
1960
1961static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
1962{
1963 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
1964}
1965
sewardja1d93302004-12-12 16:45:06 +00001966
sewardj3245c912004-12-10 14:58:26 +00001967/* Here's a simple scheme capable of handling ops derived from SSE1
1968 code and while only generating ops that can be efficiently
1969 implemented in SSE1. */
1970
1971/* All-lanes versions are straightforward:
1972
sewardj20d38f22005-02-07 23:50:18 +00001973 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
sewardj3245c912004-12-10 14:58:26 +00001974
1975 unary32Fx4(x,y) ==> PCast32x4(x#)
1976
1977 Lowest-lane-only versions are more complex:
1978
sewardj20d38f22005-02-07 23:50:18 +00001979 binary32F0x4(x,y) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001980 x#,
sewardj20d38f22005-02-07 23:50:18 +00001981 PCast32(V128to32(UifUV128(x#,y#)))
sewardj3245c912004-12-10 14:58:26 +00001982 )
1983
1984 This is perhaps not so obvious. In particular, it's faster to
sewardj20d38f22005-02-07 23:50:18 +00001985 do a V128-bit UifU and then take the bottom 32 bits than the more
sewardj3245c912004-12-10 14:58:26 +00001986 obvious scheme of taking the bottom 32 bits of each operand
1987 and doing a 32-bit UifU. Basically since UifU is fast and
1988 chopping lanes off vector values is slow.
1989
1990 Finally:
1991
sewardj20d38f22005-02-07 23:50:18 +00001992 unary32F0x4(x) ==> SetV128lo32(
sewardj3245c912004-12-10 14:58:26 +00001993 x#,
sewardj20d38f22005-02-07 23:50:18 +00001994 PCast32(V128to32(x#))
sewardj3245c912004-12-10 14:58:26 +00001995 )
1996
1997 Where:
1998
1999 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
2000 PCast32x4(v#) = CmpNEZ32x4(v#)
2001*/
2002
2003static
2004IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2005{
2006 IRAtom* at;
2007 tl_assert(isShadowAtom(mce, vatomX));
2008 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002009 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002010 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
sewardj3245c912004-12-10 14:58:26 +00002011 return at;
2012}
2013
2014static
2015IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
2016{
2017 IRAtom* at;
2018 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002019 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
sewardj3245c912004-12-10 14:58:26 +00002020 return at;
2021}
2022
2023static
2024IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2025{
2026 IRAtom* at;
2027 tl_assert(isShadowAtom(mce, vatomX));
2028 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002029 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002030 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
sewardj3245c912004-12-10 14:58:26 +00002031 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002032 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00002033 return at;
2034}
2035
2036static
2037IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
2038{
2039 IRAtom* at;
2040 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002041 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
sewardj3245c912004-12-10 14:58:26 +00002042 at = mkPCastTo(mce, Ity_I32, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002043 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
sewardj3245c912004-12-10 14:58:26 +00002044 return at;
2045}
2046
sewardj0b070592004-12-10 21:44:22 +00002047/* --- ... and ... 64Fx2 versions of the same ... --- */
2048
2049static
2050IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2051{
2052 IRAtom* at;
2053 tl_assert(isShadowAtom(mce, vatomX));
2054 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002055 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002056 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
sewardj0b070592004-12-10 21:44:22 +00002057 return at;
2058}
2059
2060static
2061IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
2062{
2063 IRAtom* at;
2064 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002065 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
sewardj0b070592004-12-10 21:44:22 +00002066 return at;
2067}
2068
2069static
2070IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2071{
2072 IRAtom* at;
2073 tl_assert(isShadowAtom(mce, vatomX));
2074 tl_assert(isShadowAtom(mce, vatomY));
sewardj20d38f22005-02-07 23:50:18 +00002075 at = mkUifUV128(mce, vatomX, vatomY);
sewardj7cf4e6b2008-05-01 20:24:26 +00002076 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
sewardj0b070592004-12-10 21:44:22 +00002077 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002078 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00002079 return at;
2080}
2081
2082static
2083IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
2084{
2085 IRAtom* at;
2086 tl_assert(isShadowAtom(mce, vatomX));
sewardj7cf4e6b2008-05-01 20:24:26 +00002087 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
sewardj0b070592004-12-10 21:44:22 +00002088 at = mkPCastTo(mce, Ity_I64, at);
sewardj7cf4e6b2008-05-01 20:24:26 +00002089 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
sewardj0b070592004-12-10 21:44:22 +00002090 return at;
2091}
2092
sewardj57f92b02010-08-22 11:54:14 +00002093/* --- --- ... and ... 32Fx2 versions of the same --- --- */
2094
2095static
2096IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2097{
2098 IRAtom* at;
2099 tl_assert(isShadowAtom(mce, vatomX));
2100 tl_assert(isShadowAtom(mce, vatomY));
2101 at = mkUifU64(mce, vatomX, vatomY);
2102 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
2103 return at;
2104}
2105
2106static
2107IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
2108{
2109 IRAtom* at;
2110 tl_assert(isShadowAtom(mce, vatomX));
2111 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
2112 return at;
2113}
2114
sewardj350e8f72012-06-25 07:52:15 +00002115/* --- ... and ... 64Fx4 versions of the same ... --- */
2116
2117static
2118IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2119{
2120 IRAtom* at;
2121 tl_assert(isShadowAtom(mce, vatomX));
2122 tl_assert(isShadowAtom(mce, vatomY));
2123 at = mkUifUV256(mce, vatomX, vatomY);
2124 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at));
2125 return at;
2126}
2127
2128static
2129IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX )
2130{
2131 IRAtom* at;
2132 tl_assert(isShadowAtom(mce, vatomX));
2133 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX));
2134 return at;
2135}
2136
2137/* --- ... and ... 32Fx8 versions of the same ... --- */
2138
2139static
2140IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2141{
2142 IRAtom* at;
2143 tl_assert(isShadowAtom(mce, vatomX));
2144 tl_assert(isShadowAtom(mce, vatomY));
2145 at = mkUifUV256(mce, vatomX, vatomY);
2146 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at));
2147 return at;
2148}
2149
2150static
2151IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX )
2152{
2153 IRAtom* at;
2154 tl_assert(isShadowAtom(mce, vatomX));
2155 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX));
2156 return at;
2157}
2158
sewardja1d93302004-12-12 16:45:06 +00002159/* --- --- Vector saturated narrowing --- --- */
2160
sewardjb5a29232011-10-22 09:29:41 +00002161/* We used to do something very clever here, but on closer inspection
2162 (2011-Jun-15), and in particular bug #279698, it turns out to be
2163 wrong. Part of the problem came from the fact that for a long
2164 time, the IR primops to do with saturated narrowing were
2165 underspecified and managed to confuse multiple cases which needed
2166 to be separate: the op names had a signedness qualifier, but in
2167 fact the source and destination signednesses needed to be specified
2168 independently, so the op names really need two independent
2169 signedness specifiers.
sewardja1d93302004-12-12 16:45:06 +00002170
sewardjb5a29232011-10-22 09:29:41 +00002171 As of 2011-Jun-15 (ish) the underspecification was sorted out
2172 properly. The incorrect instrumentation remained, though. That
2173 has now (2011-Oct-22) been fixed.
sewardja1d93302004-12-12 16:45:06 +00002174
sewardjb5a29232011-10-22 09:29:41 +00002175 What we now do is simple:
sewardja1d93302004-12-12 16:45:06 +00002176
sewardjb5a29232011-10-22 09:29:41 +00002177 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
2178 number of lanes, X is the source lane width and signedness, and Y
2179 is the destination lane width and signedness. In all cases the
2180 destination lane width is half the source lane width, so the names
2181 have a bit of redundancy, but are at least easy to read.
sewardja1d93302004-12-12 16:45:06 +00002182
sewardjb5a29232011-10-22 09:29:41 +00002183 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
2184 to unsigned 16s.
sewardja1d93302004-12-12 16:45:06 +00002185
sewardjb5a29232011-10-22 09:29:41 +00002186 Let Vanilla(OP) be a function that takes OP, one of these
2187 saturating narrowing ops, and produces the same "shaped" narrowing
2188 op which is not saturating, but merely dumps the most significant
2189 bits. "same shape" means that the lane numbers and widths are the
2190 same as with OP.
sewardja1d93302004-12-12 16:45:06 +00002191
sewardjb5a29232011-10-22 09:29:41 +00002192 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
2193 = Iop_NarrowBin32to16x8,
2194 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
2195 dumping the top half of each lane.
sewardja1d93302004-12-12 16:45:06 +00002196
sewardjb5a29232011-10-22 09:29:41 +00002197 So, with that in place, the scheme is simple, and it is simple to
2198 pessimise each lane individually and then apply Vanilla(OP) so as
2199 to get the result in the right "shape". If the original OP is
2200 QNarrowBinXtoYxZ then we produce
sewardja1d93302004-12-12 16:45:06 +00002201
sewardjb5a29232011-10-22 09:29:41 +00002202 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
sewardj9beeb0a2011-06-15 15:11:07 +00002203
sewardjb5a29232011-10-22 09:29:41 +00002204 or for the case when OP is unary (Iop_QNarrowUn*)
2205
2206 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
sewardja1d93302004-12-12 16:45:06 +00002207*/
2208static
sewardjb5a29232011-10-22 09:29:41 +00002209IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
2210{
2211 switch (qnarrowOp) {
2212 /* Binary: (128, 128) -> 128 */
2213 case Iop_QNarrowBin16Sto8Ux16:
2214 case Iop_QNarrowBin16Sto8Sx16:
2215 case Iop_QNarrowBin16Uto8Ux16:
2216 return Iop_NarrowBin16to8x16;
2217 case Iop_QNarrowBin32Sto16Ux8:
2218 case Iop_QNarrowBin32Sto16Sx8:
2219 case Iop_QNarrowBin32Uto16Ux8:
2220 return Iop_NarrowBin32to16x8;
2221 /* Binary: (64, 64) -> 64 */
2222 case Iop_QNarrowBin32Sto16Sx4:
2223 return Iop_NarrowBin32to16x4;
2224 case Iop_QNarrowBin16Sto8Ux8:
2225 case Iop_QNarrowBin16Sto8Sx8:
2226 return Iop_NarrowBin16to8x8;
2227 /* Unary: 128 -> 64 */
2228 case Iop_QNarrowUn64Uto32Ux2:
2229 case Iop_QNarrowUn64Sto32Sx2:
2230 case Iop_QNarrowUn64Sto32Ux2:
2231 return Iop_NarrowUn64to32x2;
2232 case Iop_QNarrowUn32Uto16Ux4:
2233 case Iop_QNarrowUn32Sto16Sx4:
2234 case Iop_QNarrowUn32Sto16Ux4:
2235 return Iop_NarrowUn32to16x4;
2236 case Iop_QNarrowUn16Uto8Ux8:
2237 case Iop_QNarrowUn16Sto8Sx8:
2238 case Iop_QNarrowUn16Sto8Ux8:
2239 return Iop_NarrowUn16to8x8;
2240 default:
2241 ppIROp(qnarrowOp);
2242 VG_(tool_panic)("vanillaNarrowOpOfShape");
2243 }
2244}
2245
2246static
sewardj7ee7d852011-06-16 11:37:21 +00002247IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
2248 IRAtom* vatom1, IRAtom* vatom2)
sewardja1d93302004-12-12 16:45:06 +00002249{
2250 IRAtom *at1, *at2, *at3;
2251 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2252 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002253 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
2254 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
2255 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2256 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2257 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2258 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2259 default: VG_(tool_panic)("vectorNarrowBinV128");
sewardja1d93302004-12-12 16:45:06 +00002260 }
sewardjb5a29232011-10-22 09:29:41 +00002261 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardja1d93302004-12-12 16:45:06 +00002262 tl_assert(isShadowAtom(mce,vatom1));
2263 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00002264 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2265 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
sewardjb5a29232011-10-22 09:29:41 +00002266 at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
sewardja1d93302004-12-12 16:45:06 +00002267 return at3;
2268}
2269
sewardjacd2e912005-01-13 19:17:06 +00002270static
sewardj7ee7d852011-06-16 11:37:21 +00002271IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2272 IRAtom* vatom1, IRAtom* vatom2)
sewardjacd2e912005-01-13 19:17:06 +00002273{
2274 IRAtom *at1, *at2, *at3;
2275 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2276 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002277 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2278 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break;
2279 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break;
2280 default: VG_(tool_panic)("vectorNarrowBin64");
sewardjacd2e912005-01-13 19:17:06 +00002281 }
sewardjb5a29232011-10-22 09:29:41 +00002282 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardjacd2e912005-01-13 19:17:06 +00002283 tl_assert(isShadowAtom(mce,vatom1));
2284 tl_assert(isShadowAtom(mce,vatom2));
sewardj7cf4e6b2008-05-01 20:24:26 +00002285 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2286 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
sewardjb5a29232011-10-22 09:29:41 +00002287 at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
sewardjacd2e912005-01-13 19:17:06 +00002288 return at3;
2289}
2290
sewardj57f92b02010-08-22 11:54:14 +00002291static
sewardjb5a29232011-10-22 09:29:41 +00002292IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
sewardj7ee7d852011-06-16 11:37:21 +00002293 IRAtom* vatom1)
sewardj57f92b02010-08-22 11:54:14 +00002294{
2295 IRAtom *at1, *at2;
2296 IRAtom* (*pcast)( MCEnv*, IRAtom* );
sewardjb5a29232011-10-22 09:29:41 +00002297 tl_assert(isShadowAtom(mce,vatom1));
2298 /* For vanilla narrowing (non-saturating), we can just apply
2299 the op directly to the V bits. */
2300 switch (narrow_op) {
2301 case Iop_NarrowUn16to8x8:
2302 case Iop_NarrowUn32to16x4:
2303 case Iop_NarrowUn64to32x2:
2304 at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
2305 return at1;
2306 default:
2307 break; /* Do Plan B */
2308 }
2309 /* Plan B: for ops that involve a saturation operation on the args,
2310 we must PCast before the vanilla narrow. */
2311 switch (narrow_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002312 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break;
2313 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break;
2314 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break;
2315 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
2316 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
2317 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
2318 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
2319 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
2320 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
2321 default: VG_(tool_panic)("vectorNarrowUnV128");
sewardj57f92b02010-08-22 11:54:14 +00002322 }
sewardjb5a29232011-10-22 09:29:41 +00002323 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
sewardj57f92b02010-08-22 11:54:14 +00002324 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
sewardjb5a29232011-10-22 09:29:41 +00002325 at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
sewardj57f92b02010-08-22 11:54:14 +00002326 return at2;
2327}
2328
2329static
sewardj7ee7d852011-06-16 11:37:21 +00002330IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
2331 IRAtom* vatom1)
sewardj57f92b02010-08-22 11:54:14 +00002332{
2333 IRAtom *at1, *at2;
2334 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2335 switch (longen_op) {
sewardj7ee7d852011-06-16 11:37:21 +00002336 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break;
2337 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break;
2338 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
2339 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
2340 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
2341 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
2342 default: VG_(tool_panic)("vectorWidenI64");
sewardj57f92b02010-08-22 11:54:14 +00002343 }
2344 tl_assert(isShadowAtom(mce,vatom1));
2345 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
2346 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2347 return at2;
2348}
2349
sewardja1d93302004-12-12 16:45:06 +00002350
2351/* --- --- Vector integer arithmetic --- --- */
2352
2353/* Simple ... UifU the args and per-lane pessimise the results. */
sewardjacd2e912005-01-13 19:17:06 +00002354
sewardj20d38f22005-02-07 23:50:18 +00002355/* --- V128-bit versions --- */
sewardjacd2e912005-01-13 19:17:06 +00002356
sewardja1d93302004-12-12 16:45:06 +00002357static
2358IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2359{
2360 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002361 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002362 at = mkPCast8x16(mce, at);
2363 return at;
2364}
2365
2366static
2367IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2368{
2369 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002370 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002371 at = mkPCast16x8(mce, at);
2372 return at;
2373}
2374
2375static
2376IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2377{
2378 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002379 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002380 at = mkPCast32x4(mce, at);
2381 return at;
2382}
2383
2384static
2385IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2386{
2387 IRAtom* at;
sewardj20d38f22005-02-07 23:50:18 +00002388 at = mkUifUV128(mce, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00002389 at = mkPCast64x2(mce, at);
2390 return at;
2391}
sewardj3245c912004-12-10 14:58:26 +00002392
sewardjacd2e912005-01-13 19:17:06 +00002393/* --- 64-bit versions --- */
2394
2395static
2396IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2397{
2398 IRAtom* at;
2399 at = mkUifU64(mce, vatom1, vatom2);
2400 at = mkPCast8x8(mce, at);
2401 return at;
2402}
2403
2404static
2405IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2406{
2407 IRAtom* at;
2408 at = mkUifU64(mce, vatom1, vatom2);
2409 at = mkPCast16x4(mce, at);
2410 return at;
2411}
2412
2413static
2414IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2415{
2416 IRAtom* at;
2417 at = mkUifU64(mce, vatom1, vatom2);
2418 at = mkPCast32x2(mce, at);
2419 return at;
2420}
2421
sewardj57f92b02010-08-22 11:54:14 +00002422static
2423IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2424{
2425 IRAtom* at;
2426 at = mkUifU64(mce, vatom1, vatom2);
2427 at = mkPCastTo(mce, Ity_I64, at);
2428 return at;
2429}
2430
sewardjc678b852010-09-22 00:58:51 +00002431/* --- 32-bit versions --- */
2432
2433static
2434IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2435{
2436 IRAtom* at;
2437 at = mkUifU32(mce, vatom1, vatom2);
2438 at = mkPCast8x4(mce, at);
2439 return at;
2440}
2441
2442static
2443IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2444{
2445 IRAtom* at;
2446 at = mkUifU32(mce, vatom1, vatom2);
2447 at = mkPCast16x2(mce, at);
2448 return at;
2449}
2450
sewardj3245c912004-12-10 14:58:26 +00002451
2452/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00002453/*--- Generate shadow values from all kinds of IRExprs. ---*/
2454/*------------------------------------------------------------*/
2455
2456static
sewardje91cea72006-02-08 19:32:02 +00002457IRAtom* expr2vbits_Qop ( MCEnv* mce,
2458 IROp op,
2459 IRAtom* atom1, IRAtom* atom2,
2460 IRAtom* atom3, IRAtom* atom4 )
2461{
2462 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2463 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2464 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2465 IRAtom* vatom4 = expr2vbits( mce, atom4 );
2466
2467 tl_assert(isOriginalAtom(mce,atom1));
2468 tl_assert(isOriginalAtom(mce,atom2));
2469 tl_assert(isOriginalAtom(mce,atom3));
2470 tl_assert(isOriginalAtom(mce,atom4));
2471 tl_assert(isShadowAtom(mce,vatom1));
2472 tl_assert(isShadowAtom(mce,vatom2));
2473 tl_assert(isShadowAtom(mce,vatom3));
2474 tl_assert(isShadowAtom(mce,vatom4));
2475 tl_assert(sameKindedAtoms(atom1,vatom1));
2476 tl_assert(sameKindedAtoms(atom2,vatom2));
2477 tl_assert(sameKindedAtoms(atom3,vatom3));
2478 tl_assert(sameKindedAtoms(atom4,vatom4));
2479 switch (op) {
2480 case Iop_MAddF64:
2481 case Iop_MAddF64r32:
2482 case Iop_MSubF64:
2483 case Iop_MSubF64r32:
2484 /* I32(rm) x F64 x F64 x F64 -> F64 */
2485 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
sewardjb5b87402011-03-07 16:05:35 +00002486
2487 case Iop_MAddF32:
2488 case Iop_MSubF32:
2489 /* I32(rm) x F32 x F32 x F32 -> F32 */
2490 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
2491
sewardj350e8f72012-06-25 07:52:15 +00002492 /* V256-bit data-steering */
2493 case Iop_64x4toV256:
2494 return assignNew('V', mce, Ity_V256,
2495 IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4));
2496
sewardje91cea72006-02-08 19:32:02 +00002497 default:
2498 ppIROp(op);
2499 VG_(tool_panic)("memcheck:expr2vbits_Qop");
2500 }
2501}
2502
2503
2504static
sewardjed69fdb2006-02-03 16:12:27 +00002505IRAtom* expr2vbits_Triop ( MCEnv* mce,
2506 IROp op,
2507 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2508{
sewardjed69fdb2006-02-03 16:12:27 +00002509 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2510 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2511 IRAtom* vatom3 = expr2vbits( mce, atom3 );
2512
2513 tl_assert(isOriginalAtom(mce,atom1));
2514 tl_assert(isOriginalAtom(mce,atom2));
2515 tl_assert(isOriginalAtom(mce,atom3));
2516 tl_assert(isShadowAtom(mce,vatom1));
2517 tl_assert(isShadowAtom(mce,vatom2));
2518 tl_assert(isShadowAtom(mce,vatom3));
2519 tl_assert(sameKindedAtoms(atom1,vatom1));
2520 tl_assert(sameKindedAtoms(atom2,vatom2));
2521 tl_assert(sameKindedAtoms(atom3,vatom3));
2522 switch (op) {
sewardjb5b87402011-03-07 16:05:35 +00002523 case Iop_AddF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002524 case Iop_AddD128:
sewardjb5b87402011-03-07 16:05:35 +00002525 case Iop_SubF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002526 case Iop_SubD128:
sewardjb5b87402011-03-07 16:05:35 +00002527 case Iop_MulF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002528 case Iop_MulD128:
sewardjb5b87402011-03-07 16:05:35 +00002529 case Iop_DivF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00002530 case Iop_DivD128:
sewardj18c72fa2012-04-23 11:22:05 +00002531 case Iop_QuantizeD128:
2532 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
sewardjb5b87402011-03-07 16:05:35 +00002533 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
sewardjed69fdb2006-02-03 16:12:27 +00002534 case Iop_AddF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002535 case Iop_AddD64:
sewardjed69fdb2006-02-03 16:12:27 +00002536 case Iop_AddF64r32:
2537 case Iop_SubF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002538 case Iop_SubD64:
sewardjed69fdb2006-02-03 16:12:27 +00002539 case Iop_SubF64r32:
2540 case Iop_MulF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002541 case Iop_MulD64:
sewardjed69fdb2006-02-03 16:12:27 +00002542 case Iop_MulF64r32:
2543 case Iop_DivF64:
sewardjb0ccb4d2012-04-02 10:22:05 +00002544 case Iop_DivD64:
sewardjed69fdb2006-02-03 16:12:27 +00002545 case Iop_DivF64r32:
sewardj22ac5f42006-02-03 22:55:04 +00002546 case Iop_ScaleF64:
2547 case Iop_Yl2xF64:
2548 case Iop_Yl2xp1F64:
2549 case Iop_AtanF64:
sewardjd6075eb2006-02-04 15:25:23 +00002550 case Iop_PRemF64:
2551 case Iop_PRem1F64:
sewardj18c72fa2012-04-23 11:22:05 +00002552 case Iop_QuantizeD64:
2553 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
sewardjed69fdb2006-02-03 16:12:27 +00002554 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
sewardjd6075eb2006-02-04 15:25:23 +00002555 case Iop_PRemC3210F64:
2556 case Iop_PRem1C3210F64:
2557 /* I32(rm) x F64 x F64 -> I32 */
2558 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj59570ff2010-01-01 11:59:33 +00002559 case Iop_AddF32:
2560 case Iop_SubF32:
2561 case Iop_MulF32:
2562 case Iop_DivF32:
2563 /* I32(rm) x F32 x F32 -> I32 */
2564 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
sewardj18c72fa2012-04-23 11:22:05 +00002565 case Iop_SignificanceRoundD64:
2566 /* IRRoundingModeDFP(I32) x I8 x D64 -> D64 */
2567 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2568 case Iop_SignificanceRoundD128:
2569 /* IRRoundingModeDFP(I32) x I8 x D128 -> D128 */
2570 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
sewardj57f92b02010-08-22 11:54:14 +00002571 case Iop_ExtractV128:
florian434ffae2012-07-19 17:23:42 +00002572 complainIfUndefined(mce, atom3, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002573 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2574 case Iop_Extract64:
florian434ffae2012-07-19 17:23:42 +00002575 complainIfUndefined(mce, atom3, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002576 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2577 case Iop_SetElem8x8:
2578 case Iop_SetElem16x4:
2579 case Iop_SetElem32x2:
florian434ffae2012-07-19 17:23:42 +00002580 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002581 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
sewardjed69fdb2006-02-03 16:12:27 +00002582 default:
2583 ppIROp(op);
2584 VG_(tool_panic)("memcheck:expr2vbits_Triop");
2585 }
2586}
2587
2588
2589static
sewardj95448072004-11-22 20:19:51 +00002590IRAtom* expr2vbits_Binop ( MCEnv* mce,
2591 IROp op,
2592 IRAtom* atom1, IRAtom* atom2 )
2593{
2594 IRType and_or_ty;
2595 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
2596 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
2597 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2598
2599 IRAtom* vatom1 = expr2vbits( mce, atom1 );
2600 IRAtom* vatom2 = expr2vbits( mce, atom2 );
2601
2602 tl_assert(isOriginalAtom(mce,atom1));
2603 tl_assert(isOriginalAtom(mce,atom2));
2604 tl_assert(isShadowAtom(mce,vatom1));
2605 tl_assert(isShadowAtom(mce,vatom2));
2606 tl_assert(sameKindedAtoms(atom1,vatom1));
2607 tl_assert(sameKindedAtoms(atom2,vatom2));
2608 switch (op) {
2609
sewardjc678b852010-09-22 00:58:51 +00002610 /* 32-bit SIMD */
2611
2612 case Iop_Add16x2:
2613 case Iop_HAdd16Ux2:
2614 case Iop_HAdd16Sx2:
2615 case Iop_Sub16x2:
2616 case Iop_HSub16Ux2:
2617 case Iop_HSub16Sx2:
2618 case Iop_QAdd16Sx2:
2619 case Iop_QSub16Sx2:
sewardj9fb31092012-09-17 15:28:46 +00002620 case Iop_QSub16Ux2:
sewardjc678b852010-09-22 00:58:51 +00002621 return binary16Ix2(mce, vatom1, vatom2);
2622
2623 case Iop_Add8x4:
2624 case Iop_HAdd8Ux4:
2625 case Iop_HAdd8Sx4:
2626 case Iop_Sub8x4:
2627 case Iop_HSub8Ux4:
2628 case Iop_HSub8Sx4:
2629 case Iop_QSub8Ux4:
2630 case Iop_QAdd8Ux4:
2631 case Iop_QSub8Sx4:
2632 case Iop_QAdd8Sx4:
2633 return binary8Ix4(mce, vatom1, vatom2);
2634
sewardjacd2e912005-01-13 19:17:06 +00002635 /* 64-bit SIMD */
2636
sewardj57f92b02010-08-22 11:54:14 +00002637 case Iop_ShrN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002638 case Iop_ShrN16x4:
2639 case Iop_ShrN32x2:
sewardj03809ae2006-12-27 01:16:58 +00002640 case Iop_SarN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002641 case Iop_SarN16x4:
2642 case Iop_SarN32x2:
2643 case Iop_ShlN16x4:
2644 case Iop_ShlN32x2:
sewardj114a9172008-02-09 01:49:32 +00002645 case Iop_ShlN8x8:
sewardjacd2e912005-01-13 19:17:06 +00002646 /* Same scheme as with all other shifts. */
florian434ffae2012-07-19 17:23:42 +00002647 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00002648 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
sewardjacd2e912005-01-13 19:17:06 +00002649
sewardj7ee7d852011-06-16 11:37:21 +00002650 case Iop_QNarrowBin32Sto16Sx4:
2651 case Iop_QNarrowBin16Sto8Sx8:
2652 case Iop_QNarrowBin16Sto8Ux8:
2653 return vectorNarrowBin64(mce, op, vatom1, vatom2);
sewardjacd2e912005-01-13 19:17:06 +00002654
2655 case Iop_Min8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002656 case Iop_Min8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002657 case Iop_Max8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002658 case Iop_Max8Sx8:
sewardjacd2e912005-01-13 19:17:06 +00002659 case Iop_Avg8Ux8:
2660 case Iop_QSub8Sx8:
2661 case Iop_QSub8Ux8:
2662 case Iop_Sub8x8:
2663 case Iop_CmpGT8Sx8:
sewardj57f92b02010-08-22 11:54:14 +00002664 case Iop_CmpGT8Ux8:
sewardjacd2e912005-01-13 19:17:06 +00002665 case Iop_CmpEQ8x8:
2666 case Iop_QAdd8Sx8:
2667 case Iop_QAdd8Ux8:
sewardj57f92b02010-08-22 11:54:14 +00002668 case Iop_QSal8x8:
2669 case Iop_QShl8x8:
sewardjacd2e912005-01-13 19:17:06 +00002670 case Iop_Add8x8:
sewardj57f92b02010-08-22 11:54:14 +00002671 case Iop_Mul8x8:
2672 case Iop_PolynomialMul8x8:
sewardjacd2e912005-01-13 19:17:06 +00002673 return binary8Ix8(mce, vatom1, vatom2);
2674
2675 case Iop_Min16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002676 case Iop_Min16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002677 case Iop_Max16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002678 case Iop_Max16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002679 case Iop_Avg16Ux4:
2680 case Iop_QSub16Ux4:
2681 case Iop_QSub16Sx4:
2682 case Iop_Sub16x4:
2683 case Iop_Mul16x4:
2684 case Iop_MulHi16Sx4:
2685 case Iop_MulHi16Ux4:
2686 case Iop_CmpGT16Sx4:
sewardj57f92b02010-08-22 11:54:14 +00002687 case Iop_CmpGT16Ux4:
sewardjacd2e912005-01-13 19:17:06 +00002688 case Iop_CmpEQ16x4:
2689 case Iop_QAdd16Sx4:
2690 case Iop_QAdd16Ux4:
sewardj57f92b02010-08-22 11:54:14 +00002691 case Iop_QSal16x4:
2692 case Iop_QShl16x4:
sewardjacd2e912005-01-13 19:17:06 +00002693 case Iop_Add16x4:
sewardj57f92b02010-08-22 11:54:14 +00002694 case Iop_QDMulHi16Sx4:
2695 case Iop_QRDMulHi16Sx4:
sewardjacd2e912005-01-13 19:17:06 +00002696 return binary16Ix4(mce, vatom1, vatom2);
2697
2698 case Iop_Sub32x2:
sewardj114a9172008-02-09 01:49:32 +00002699 case Iop_Mul32x2:
sewardj57f92b02010-08-22 11:54:14 +00002700 case Iop_Max32Sx2:
2701 case Iop_Max32Ux2:
2702 case Iop_Min32Sx2:
2703 case Iop_Min32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002704 case Iop_CmpGT32Sx2:
sewardj57f92b02010-08-22 11:54:14 +00002705 case Iop_CmpGT32Ux2:
sewardjacd2e912005-01-13 19:17:06 +00002706 case Iop_CmpEQ32x2:
2707 case Iop_Add32x2:
sewardj57f92b02010-08-22 11:54:14 +00002708 case Iop_QAdd32Ux2:
2709 case Iop_QAdd32Sx2:
2710 case Iop_QSub32Ux2:
2711 case Iop_QSub32Sx2:
2712 case Iop_QSal32x2:
2713 case Iop_QShl32x2:
2714 case Iop_QDMulHi32Sx2:
2715 case Iop_QRDMulHi32Sx2:
sewardjacd2e912005-01-13 19:17:06 +00002716 return binary32Ix2(mce, vatom1, vatom2);
2717
sewardj57f92b02010-08-22 11:54:14 +00002718 case Iop_QSub64Ux1:
2719 case Iop_QSub64Sx1:
2720 case Iop_QAdd64Ux1:
2721 case Iop_QAdd64Sx1:
2722 case Iop_QSal64x1:
2723 case Iop_QShl64x1:
2724 case Iop_Sal64x1:
2725 return binary64Ix1(mce, vatom1, vatom2);
2726
2727 case Iop_QShlN8Sx8:
2728 case Iop_QShlN8x8:
2729 case Iop_QSalN8x8:
florian434ffae2012-07-19 17:23:42 +00002730 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002731 return mkPCast8x8(mce, vatom1);
2732
2733 case Iop_QShlN16Sx4:
2734 case Iop_QShlN16x4:
2735 case Iop_QSalN16x4:
florian434ffae2012-07-19 17:23:42 +00002736 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002737 return mkPCast16x4(mce, vatom1);
2738
2739 case Iop_QShlN32Sx2:
2740 case Iop_QShlN32x2:
2741 case Iop_QSalN32x2:
florian434ffae2012-07-19 17:23:42 +00002742 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002743 return mkPCast32x2(mce, vatom1);
2744
2745 case Iop_QShlN64Sx1:
2746 case Iop_QShlN64x1:
2747 case Iop_QSalN64x1:
florian434ffae2012-07-19 17:23:42 +00002748 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002749 return mkPCast32x2(mce, vatom1);
2750
2751 case Iop_PwMax32Sx2:
2752 case Iop_PwMax32Ux2:
2753 case Iop_PwMin32Sx2:
2754 case Iop_PwMin32Ux2:
2755 case Iop_PwMax32Fx2:
2756 case Iop_PwMin32Fx2:
sewardj350e8f72012-06-25 07:52:15 +00002757 return assignNew('V', mce, Ity_I64,
2758 binop(Iop_PwMax32Ux2,
2759 mkPCast32x2(mce, vatom1),
2760 mkPCast32x2(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00002761
2762 case Iop_PwMax16Sx4:
2763 case Iop_PwMax16Ux4:
2764 case Iop_PwMin16Sx4:
2765 case Iop_PwMin16Ux4:
sewardj350e8f72012-06-25 07:52:15 +00002766 return assignNew('V', mce, Ity_I64,
2767 binop(Iop_PwMax16Ux4,
2768 mkPCast16x4(mce, vatom1),
2769 mkPCast16x4(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00002770
2771 case Iop_PwMax8Sx8:
2772 case Iop_PwMax8Ux8:
2773 case Iop_PwMin8Sx8:
2774 case Iop_PwMin8Ux8:
sewardj350e8f72012-06-25 07:52:15 +00002775 return assignNew('V', mce, Ity_I64,
2776 binop(Iop_PwMax8Ux8,
2777 mkPCast8x8(mce, vatom1),
2778 mkPCast8x8(mce, vatom2)));
sewardj57f92b02010-08-22 11:54:14 +00002779
2780 case Iop_PwAdd32x2:
2781 case Iop_PwAdd32Fx2:
2782 return mkPCast32x2(mce,
sewardj350e8f72012-06-25 07:52:15 +00002783 assignNew('V', mce, Ity_I64,
2784 binop(Iop_PwAdd32x2,
2785 mkPCast32x2(mce, vatom1),
2786 mkPCast32x2(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00002787
2788 case Iop_PwAdd16x4:
2789 return mkPCast16x4(mce,
sewardj350e8f72012-06-25 07:52:15 +00002790 assignNew('V', mce, Ity_I64,
2791 binop(op, mkPCast16x4(mce, vatom1),
2792 mkPCast16x4(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00002793
2794 case Iop_PwAdd8x8:
2795 return mkPCast8x8(mce,
sewardj350e8f72012-06-25 07:52:15 +00002796 assignNew('V', mce, Ity_I64,
2797 binop(op, mkPCast8x8(mce, vatom1),
2798 mkPCast8x8(mce, vatom2))));
sewardj57f92b02010-08-22 11:54:14 +00002799
2800 case Iop_Shl8x8:
2801 case Iop_Shr8x8:
2802 case Iop_Sar8x8:
2803 case Iop_Sal8x8:
2804 return mkUifU64(mce,
2805 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2806 mkPCast8x8(mce,vatom2)
2807 );
2808
2809 case Iop_Shl16x4:
2810 case Iop_Shr16x4:
2811 case Iop_Sar16x4:
2812 case Iop_Sal16x4:
2813 return mkUifU64(mce,
2814 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2815 mkPCast16x4(mce,vatom2)
2816 );
2817
2818 case Iop_Shl32x2:
2819 case Iop_Shr32x2:
2820 case Iop_Sar32x2:
2821 case Iop_Sal32x2:
2822 return mkUifU64(mce,
2823 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2824 mkPCast32x2(mce,vatom2)
2825 );
2826
sewardjacd2e912005-01-13 19:17:06 +00002827 /* 64-bit data-steering */
2828 case Iop_InterleaveLO32x2:
2829 case Iop_InterleaveLO16x4:
2830 case Iop_InterleaveLO8x8:
2831 case Iop_InterleaveHI32x2:
2832 case Iop_InterleaveHI16x4:
2833 case Iop_InterleaveHI8x8:
sewardj57f92b02010-08-22 11:54:14 +00002834 case Iop_CatOddLanes8x8:
2835 case Iop_CatEvenLanes8x8:
sewardj114a9172008-02-09 01:49:32 +00002836 case Iop_CatOddLanes16x4:
2837 case Iop_CatEvenLanes16x4:
sewardj57f92b02010-08-22 11:54:14 +00002838 case Iop_InterleaveOddLanes8x8:
2839 case Iop_InterleaveEvenLanes8x8:
2840 case Iop_InterleaveOddLanes16x4:
2841 case Iop_InterleaveEvenLanes16x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00002842 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardjacd2e912005-01-13 19:17:06 +00002843
sewardj57f92b02010-08-22 11:54:14 +00002844 case Iop_GetElem8x8:
florian434ffae2012-07-19 17:23:42 +00002845 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002846 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2847 case Iop_GetElem16x4:
florian434ffae2012-07-19 17:23:42 +00002848 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002849 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2850 case Iop_GetElem32x2:
florian434ffae2012-07-19 17:23:42 +00002851 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002852 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2853
sewardj114a9172008-02-09 01:49:32 +00002854 /* Perm8x8: rearrange values in left arg using steering values
2855 from right arg. So rearrange the vbits in the same way but
2856 pessimise wrt steering values. */
2857 case Iop_Perm8x8:
2858 return mkUifU64(
2859 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002860 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
sewardj114a9172008-02-09 01:49:32 +00002861 mkPCast8x8(mce, vatom2)
2862 );
2863
sewardj20d38f22005-02-07 23:50:18 +00002864 /* V128-bit SIMD */
sewardj0b070592004-12-10 21:44:22 +00002865
sewardj57f92b02010-08-22 11:54:14 +00002866 case Iop_ShrN8x16:
sewardja1d93302004-12-12 16:45:06 +00002867 case Iop_ShrN16x8:
2868 case Iop_ShrN32x4:
2869 case Iop_ShrN64x2:
sewardj57f92b02010-08-22 11:54:14 +00002870 case Iop_SarN8x16:
sewardja1d93302004-12-12 16:45:06 +00002871 case Iop_SarN16x8:
2872 case Iop_SarN32x4:
sewardj57f92b02010-08-22 11:54:14 +00002873 case Iop_SarN64x2:
2874 case Iop_ShlN8x16:
sewardja1d93302004-12-12 16:45:06 +00002875 case Iop_ShlN16x8:
2876 case Iop_ShlN32x4:
2877 case Iop_ShlN64x2:
sewardj620eb5b2005-10-22 12:50:43 +00002878 /* Same scheme as with all other shifts. Note: 22 Oct 05:
2879 this is wrong now, scalar shifts are done properly lazily.
2880 Vector shifts should be fixed too. */
florian434ffae2012-07-19 17:23:42 +00002881 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00002882 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardja1d93302004-12-12 16:45:06 +00002883
sewardjcbf8be72005-11-10 18:34:41 +00002884 /* V x V shifts/rotates are done using the standard lazy scheme. */
sewardj43d60752005-11-10 18:13:01 +00002885 case Iop_Shl8x16:
2886 case Iop_Shr8x16:
2887 case Iop_Sar8x16:
sewardj57f92b02010-08-22 11:54:14 +00002888 case Iop_Sal8x16:
sewardjcbf8be72005-11-10 18:34:41 +00002889 case Iop_Rol8x16:
sewardj43d60752005-11-10 18:13:01 +00002890 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002891 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002892 mkPCast8x16(mce,vatom2)
2893 );
2894
2895 case Iop_Shl16x8:
2896 case Iop_Shr16x8:
2897 case Iop_Sar16x8:
sewardj57f92b02010-08-22 11:54:14 +00002898 case Iop_Sal16x8:
sewardjcbf8be72005-11-10 18:34:41 +00002899 case Iop_Rol16x8:
sewardj43d60752005-11-10 18:13:01 +00002900 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002901 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002902 mkPCast16x8(mce,vatom2)
2903 );
2904
2905 case Iop_Shl32x4:
2906 case Iop_Shr32x4:
2907 case Iop_Sar32x4:
sewardj57f92b02010-08-22 11:54:14 +00002908 case Iop_Sal32x4:
sewardjcbf8be72005-11-10 18:34:41 +00002909 case Iop_Rol32x4:
sewardj43d60752005-11-10 18:13:01 +00002910 return mkUifUV128(mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00002911 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj43d60752005-11-10 18:13:01 +00002912 mkPCast32x4(mce,vatom2)
2913 );
2914
sewardj57f92b02010-08-22 11:54:14 +00002915 case Iop_Shl64x2:
2916 case Iop_Shr64x2:
2917 case Iop_Sar64x2:
2918 case Iop_Sal64x2:
2919 return mkUifUV128(mce,
2920 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2921 mkPCast64x2(mce,vatom2)
2922 );
2923
2924 case Iop_F32ToFixed32Ux4_RZ:
2925 case Iop_F32ToFixed32Sx4_RZ:
2926 case Iop_Fixed32UToF32x4_RN:
2927 case Iop_Fixed32SToF32x4_RN:
florian434ffae2012-07-19 17:23:42 +00002928 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002929 return mkPCast32x4(mce, vatom1);
2930
2931 case Iop_F32ToFixed32Ux2_RZ:
2932 case Iop_F32ToFixed32Sx2_RZ:
2933 case Iop_Fixed32UToF32x2_RN:
2934 case Iop_Fixed32SToF32x2_RN:
florian434ffae2012-07-19 17:23:42 +00002935 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00002936 return mkPCast32x2(mce, vatom1);
2937
sewardja1d93302004-12-12 16:45:06 +00002938 case Iop_QSub8Ux16:
2939 case Iop_QSub8Sx16:
2940 case Iop_Sub8x16:
2941 case Iop_Min8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002942 case Iop_Min8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002943 case Iop_Max8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002944 case Iop_Max8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002945 case Iop_CmpGT8Sx16:
sewardj43d60752005-11-10 18:13:01 +00002946 case Iop_CmpGT8Ux16:
sewardja1d93302004-12-12 16:45:06 +00002947 case Iop_CmpEQ8x16:
2948 case Iop_Avg8Ux16:
sewardj43d60752005-11-10 18:13:01 +00002949 case Iop_Avg8Sx16:
sewardja1d93302004-12-12 16:45:06 +00002950 case Iop_QAdd8Ux16:
2951 case Iop_QAdd8Sx16:
sewardj57f92b02010-08-22 11:54:14 +00002952 case Iop_QSal8x16:
2953 case Iop_QShl8x16:
sewardja1d93302004-12-12 16:45:06 +00002954 case Iop_Add8x16:
sewardj57f92b02010-08-22 11:54:14 +00002955 case Iop_Mul8x16:
2956 case Iop_PolynomialMul8x16:
sewardja1d93302004-12-12 16:45:06 +00002957 return binary8Ix16(mce, vatom1, vatom2);
2958
2959 case Iop_QSub16Ux8:
2960 case Iop_QSub16Sx8:
2961 case Iop_Sub16x8:
2962 case Iop_Mul16x8:
2963 case Iop_MulHi16Sx8:
2964 case Iop_MulHi16Ux8:
2965 case Iop_Min16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002966 case Iop_Min16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002967 case Iop_Max16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002968 case Iop_Max16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002969 case Iop_CmpGT16Sx8:
sewardj43d60752005-11-10 18:13:01 +00002970 case Iop_CmpGT16Ux8:
sewardja1d93302004-12-12 16:45:06 +00002971 case Iop_CmpEQ16x8:
2972 case Iop_Avg16Ux8:
sewardj43d60752005-11-10 18:13:01 +00002973 case Iop_Avg16Sx8:
sewardja1d93302004-12-12 16:45:06 +00002974 case Iop_QAdd16Ux8:
2975 case Iop_QAdd16Sx8:
sewardj57f92b02010-08-22 11:54:14 +00002976 case Iop_QSal16x8:
2977 case Iop_QShl16x8:
sewardja1d93302004-12-12 16:45:06 +00002978 case Iop_Add16x8:
sewardj57f92b02010-08-22 11:54:14 +00002979 case Iop_QDMulHi16Sx8:
2980 case Iop_QRDMulHi16Sx8:
sewardja1d93302004-12-12 16:45:06 +00002981 return binary16Ix8(mce, vatom1, vatom2);
2982
2983 case Iop_Sub32x4:
2984 case Iop_CmpGT32Sx4:
sewardj43d60752005-11-10 18:13:01 +00002985 case Iop_CmpGT32Ux4:
sewardja1d93302004-12-12 16:45:06 +00002986 case Iop_CmpEQ32x4:
sewardj43d60752005-11-10 18:13:01 +00002987 case Iop_QAdd32Sx4:
2988 case Iop_QAdd32Ux4:
2989 case Iop_QSub32Sx4:
2990 case Iop_QSub32Ux4:
sewardj57f92b02010-08-22 11:54:14 +00002991 case Iop_QSal32x4:
2992 case Iop_QShl32x4:
sewardj43d60752005-11-10 18:13:01 +00002993 case Iop_Avg32Ux4:
2994 case Iop_Avg32Sx4:
sewardja1d93302004-12-12 16:45:06 +00002995 case Iop_Add32x4:
sewardj43d60752005-11-10 18:13:01 +00002996 case Iop_Max32Ux4:
2997 case Iop_Max32Sx4:
2998 case Iop_Min32Ux4:
2999 case Iop_Min32Sx4:
sewardjb823b852010-06-18 08:18:38 +00003000 case Iop_Mul32x4:
sewardj57f92b02010-08-22 11:54:14 +00003001 case Iop_QDMulHi32Sx4:
3002 case Iop_QRDMulHi32Sx4:
sewardja1d93302004-12-12 16:45:06 +00003003 return binary32Ix4(mce, vatom1, vatom2);
3004
3005 case Iop_Sub64x2:
3006 case Iop_Add64x2:
sewardj9a2afe92011-10-19 15:24:55 +00003007 case Iop_CmpEQ64x2:
sewardjb823b852010-06-18 08:18:38 +00003008 case Iop_CmpGT64Sx2:
sewardj57f92b02010-08-22 11:54:14 +00003009 case Iop_QSal64x2:
3010 case Iop_QShl64x2:
3011 case Iop_QAdd64Ux2:
3012 case Iop_QAdd64Sx2:
3013 case Iop_QSub64Ux2:
3014 case Iop_QSub64Sx2:
sewardja1d93302004-12-12 16:45:06 +00003015 return binary64Ix2(mce, vatom1, vatom2);
3016
sewardj7ee7d852011-06-16 11:37:21 +00003017 case Iop_QNarrowBin32Sto16Sx8:
3018 case Iop_QNarrowBin32Uto16Ux8:
3019 case Iop_QNarrowBin32Sto16Ux8:
3020 case Iop_QNarrowBin16Sto8Sx16:
3021 case Iop_QNarrowBin16Uto8Ux16:
3022 case Iop_QNarrowBin16Sto8Ux16:
3023 return vectorNarrowBinV128(mce, op, vatom1, vatom2);
sewardja1d93302004-12-12 16:45:06 +00003024
sewardj0b070592004-12-10 21:44:22 +00003025 case Iop_Sub64Fx2:
3026 case Iop_Mul64Fx2:
3027 case Iop_Min64Fx2:
3028 case Iop_Max64Fx2:
3029 case Iop_Div64Fx2:
3030 case Iop_CmpLT64Fx2:
3031 case Iop_CmpLE64Fx2:
3032 case Iop_CmpEQ64Fx2:
sewardj545663e2005-11-05 01:55:04 +00003033 case Iop_CmpUN64Fx2:
sewardj0b070592004-12-10 21:44:22 +00003034 case Iop_Add64Fx2:
3035 return binary64Fx2(mce, vatom1, vatom2);
3036
3037 case Iop_Sub64F0x2:
3038 case Iop_Mul64F0x2:
3039 case Iop_Min64F0x2:
3040 case Iop_Max64F0x2:
3041 case Iop_Div64F0x2:
3042 case Iop_CmpLT64F0x2:
3043 case Iop_CmpLE64F0x2:
3044 case Iop_CmpEQ64F0x2:
sewardj545663e2005-11-05 01:55:04 +00003045 case Iop_CmpUN64F0x2:
sewardj0b070592004-12-10 21:44:22 +00003046 case Iop_Add64F0x2:
3047 return binary64F0x2(mce, vatom1, vatom2);
3048
sewardj170ee212004-12-10 18:57:51 +00003049 case Iop_Sub32Fx4:
3050 case Iop_Mul32Fx4:
3051 case Iop_Min32Fx4:
3052 case Iop_Max32Fx4:
3053 case Iop_Div32Fx4:
3054 case Iop_CmpLT32Fx4:
3055 case Iop_CmpLE32Fx4:
3056 case Iop_CmpEQ32Fx4:
sewardj545663e2005-11-05 01:55:04 +00003057 case Iop_CmpUN32Fx4:
cerione78ba2a2005-11-14 03:00:35 +00003058 case Iop_CmpGT32Fx4:
3059 case Iop_CmpGE32Fx4:
sewardj3245c912004-12-10 14:58:26 +00003060 case Iop_Add32Fx4:
sewardj57f92b02010-08-22 11:54:14 +00003061 case Iop_Recps32Fx4:
3062 case Iop_Rsqrts32Fx4:
sewardj3245c912004-12-10 14:58:26 +00003063 return binary32Fx4(mce, vatom1, vatom2);
3064
sewardj57f92b02010-08-22 11:54:14 +00003065 case Iop_Sub32Fx2:
3066 case Iop_Mul32Fx2:
3067 case Iop_Min32Fx2:
3068 case Iop_Max32Fx2:
3069 case Iop_CmpEQ32Fx2:
3070 case Iop_CmpGT32Fx2:
3071 case Iop_CmpGE32Fx2:
3072 case Iop_Add32Fx2:
3073 case Iop_Recps32Fx2:
3074 case Iop_Rsqrts32Fx2:
3075 return binary32Fx2(mce, vatom1, vatom2);
3076
sewardj170ee212004-12-10 18:57:51 +00003077 case Iop_Sub32F0x4:
3078 case Iop_Mul32F0x4:
3079 case Iop_Min32F0x4:
3080 case Iop_Max32F0x4:
3081 case Iop_Div32F0x4:
3082 case Iop_CmpLT32F0x4:
3083 case Iop_CmpLE32F0x4:
3084 case Iop_CmpEQ32F0x4:
sewardj545663e2005-11-05 01:55:04 +00003085 case Iop_CmpUN32F0x4:
sewardj170ee212004-12-10 18:57:51 +00003086 case Iop_Add32F0x4:
3087 return binary32F0x4(mce, vatom1, vatom2);
3088
sewardj57f92b02010-08-22 11:54:14 +00003089 case Iop_QShlN8Sx16:
3090 case Iop_QShlN8x16:
3091 case Iop_QSalN8x16:
florian434ffae2012-07-19 17:23:42 +00003092 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003093 return mkPCast8x16(mce, vatom1);
3094
3095 case Iop_QShlN16Sx8:
3096 case Iop_QShlN16x8:
3097 case Iop_QSalN16x8:
florian434ffae2012-07-19 17:23:42 +00003098 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003099 return mkPCast16x8(mce, vatom1);
3100
3101 case Iop_QShlN32Sx4:
3102 case Iop_QShlN32x4:
3103 case Iop_QSalN32x4:
florian434ffae2012-07-19 17:23:42 +00003104 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003105 return mkPCast32x4(mce, vatom1);
3106
3107 case Iop_QShlN64Sx2:
3108 case Iop_QShlN64x2:
3109 case Iop_QSalN64x2:
florian434ffae2012-07-19 17:23:42 +00003110 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003111 return mkPCast32x4(mce, vatom1);
3112
3113 case Iop_Mull32Sx2:
3114 case Iop_Mull32Ux2:
3115 case Iop_QDMulLong32Sx2:
sewardj7ee7d852011-06-16 11:37:21 +00003116 return vectorWidenI64(mce, Iop_Widen32Sto64x2,
3117 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003118
3119 case Iop_Mull16Sx4:
3120 case Iop_Mull16Ux4:
3121 case Iop_QDMulLong16Sx4:
sewardj7ee7d852011-06-16 11:37:21 +00003122 return vectorWidenI64(mce, Iop_Widen16Sto32x4,
3123 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003124
3125 case Iop_Mull8Sx8:
3126 case Iop_Mull8Ux8:
3127 case Iop_PolynomialMull8x8:
sewardj7ee7d852011-06-16 11:37:21 +00003128 return vectorWidenI64(mce, Iop_Widen8Sto16x8,
3129 mkUifU64(mce, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003130
3131 case Iop_PwAdd32x4:
3132 return mkPCast32x4(mce,
3133 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
3134 mkPCast32x4(mce, vatom2))));
3135
3136 case Iop_PwAdd16x8:
3137 return mkPCast16x8(mce,
3138 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
3139 mkPCast16x8(mce, vatom2))));
3140
3141 case Iop_PwAdd8x16:
3142 return mkPCast8x16(mce,
3143 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
3144 mkPCast8x16(mce, vatom2))));
3145
sewardj20d38f22005-02-07 23:50:18 +00003146 /* V128-bit data-steering */
3147 case Iop_SetV128lo32:
3148 case Iop_SetV128lo64:
3149 case Iop_64HLtoV128:
sewardja1d93302004-12-12 16:45:06 +00003150 case Iop_InterleaveLO64x2:
3151 case Iop_InterleaveLO32x4:
3152 case Iop_InterleaveLO16x8:
3153 case Iop_InterleaveLO8x16:
3154 case Iop_InterleaveHI64x2:
3155 case Iop_InterleaveHI32x4:
3156 case Iop_InterleaveHI16x8:
3157 case Iop_InterleaveHI8x16:
sewardj57f92b02010-08-22 11:54:14 +00003158 case Iop_CatOddLanes8x16:
3159 case Iop_CatOddLanes16x8:
3160 case Iop_CatOddLanes32x4:
3161 case Iop_CatEvenLanes8x16:
3162 case Iop_CatEvenLanes16x8:
3163 case Iop_CatEvenLanes32x4:
3164 case Iop_InterleaveOddLanes8x16:
3165 case Iop_InterleaveOddLanes16x8:
3166 case Iop_InterleaveOddLanes32x4:
3167 case Iop_InterleaveEvenLanes8x16:
3168 case Iop_InterleaveEvenLanes16x8:
3169 case Iop_InterleaveEvenLanes32x4:
sewardj7cf4e6b2008-05-01 20:24:26 +00003170 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
sewardj57f92b02010-08-22 11:54:14 +00003171
3172 case Iop_GetElem8x16:
florian434ffae2012-07-19 17:23:42 +00003173 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003174 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3175 case Iop_GetElem16x8:
florian434ffae2012-07-19 17:23:42 +00003176 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003177 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3178 case Iop_GetElem32x4:
florian434ffae2012-07-19 17:23:42 +00003179 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003180 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3181 case Iop_GetElem64x2:
florian434ffae2012-07-19 17:23:42 +00003182 complainIfUndefined(mce, atom2, NULL);
sewardj57f92b02010-08-22 11:54:14 +00003183 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
3184
sewardj620eb5b2005-10-22 12:50:43 +00003185 /* Perm8x16: rearrange values in left arg using steering values
3186 from right arg. So rearrange the vbits in the same way but
sewardj350e8f72012-06-25 07:52:15 +00003187 pessimise wrt steering values. Perm32x4 ditto. */
sewardj620eb5b2005-10-22 12:50:43 +00003188 case Iop_Perm8x16:
3189 return mkUifUV128(
3190 mce,
sewardj7cf4e6b2008-05-01 20:24:26 +00003191 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
sewardj620eb5b2005-10-22 12:50:43 +00003192 mkPCast8x16(mce, vatom2)
3193 );
sewardj350e8f72012-06-25 07:52:15 +00003194 case Iop_Perm32x4:
3195 return mkUifUV128(
3196 mce,
3197 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3198 mkPCast32x4(mce, vatom2)
3199 );
sewardj170ee212004-12-10 18:57:51 +00003200
sewardj43d60752005-11-10 18:13:01 +00003201 /* These two take the lower half of each 16-bit lane, sign/zero
3202 extend it to 32, and multiply together, producing a 32x4
3203 result (and implicitly ignoring half the operand bits). So
3204 treat it as a bunch of independent 16x8 operations, but then
3205 do 32-bit shifts left-right to copy the lower half results
3206 (which are all 0s or all 1s due to PCasting in binary16Ix8)
3207 into the upper half of each result lane. */
3208 case Iop_MullEven16Ux8:
3209 case Iop_MullEven16Sx8: {
3210 IRAtom* at;
3211 at = binary16Ix8(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00003212 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
3213 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
sewardj43d60752005-11-10 18:13:01 +00003214 return at;
3215 }
3216
3217 /* Same deal as Iop_MullEven16{S,U}x8 */
3218 case Iop_MullEven8Ux16:
3219 case Iop_MullEven8Sx16: {
3220 IRAtom* at;
3221 at = binary8Ix16(mce,vatom1,vatom2);
sewardj7cf4e6b2008-05-01 20:24:26 +00003222 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
3223 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
sewardj43d60752005-11-10 18:13:01 +00003224 return at;
3225 }
3226
3227 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
3228 32x4 -> 16x8 laneage, discarding the upper half of each lane.
3229 Simply apply same op to the V bits, since this really no more
3230 than a data steering operation. */
sewardj7ee7d852011-06-16 11:37:21 +00003231 case Iop_NarrowBin32to16x8:
3232 case Iop_NarrowBin16to8x16:
sewardj7cf4e6b2008-05-01 20:24:26 +00003233 return assignNew('V', mce, Ity_V128,
3234 binop(op, vatom1, vatom2));
sewardj43d60752005-11-10 18:13:01 +00003235
3236 case Iop_ShrV128:
3237 case Iop_ShlV128:
3238 /* Same scheme as with all other shifts. Note: 10 Nov 05:
3239 this is wrong now, scalar shifts are done properly lazily.
3240 Vector shifts should be fixed too. */
florian434ffae2012-07-19 17:23:42 +00003241 complainIfUndefined(mce, atom2, NULL);
sewardj7cf4e6b2008-05-01 20:24:26 +00003242 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
sewardj43d60752005-11-10 18:13:01 +00003243
sewardj69a13322005-04-23 01:14:51 +00003244 /* I128-bit data-steering */
3245 case Iop_64HLto128:
sewardj7cf4e6b2008-05-01 20:24:26 +00003246 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
sewardj69a13322005-04-23 01:14:51 +00003247
sewardj350e8f72012-06-25 07:52:15 +00003248 /* V256-bit SIMD */
3249
3250 case Iop_Add64Fx4:
3251 case Iop_Sub64Fx4:
3252 case Iop_Mul64Fx4:
3253 case Iop_Div64Fx4:
3254 case Iop_Max64Fx4:
3255 case Iop_Min64Fx4:
3256 return binary64Fx4(mce, vatom1, vatom2);
3257
3258 case Iop_Add32Fx8:
3259 case Iop_Sub32Fx8:
3260 case Iop_Mul32Fx8:
3261 case Iop_Div32Fx8:
3262 case Iop_Max32Fx8:
3263 case Iop_Min32Fx8:
3264 return binary32Fx8(mce, vatom1, vatom2);
3265
3266 /* V256-bit data-steering */
3267 case Iop_V128HLtoV256:
3268 return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2));
3269
sewardj3245c912004-12-10 14:58:26 +00003270 /* Scalar floating point */
3271
sewardjb5b87402011-03-07 16:05:35 +00003272 case Iop_F32toI64S:
florian1b9609a2012-09-01 00:15:45 +00003273 case Iop_F32toI64U:
sewardjb5b87402011-03-07 16:05:35 +00003274 /* I32(rm) x F32 -> I64 */
3275 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3276
3277 case Iop_I64StoF32:
3278 /* I32(rm) x I64 -> F32 */
3279 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3280
sewardjed69fdb2006-02-03 16:12:27 +00003281 case Iop_RoundF64toInt:
3282 case Iop_RoundF64toF32:
sewardj06f96d02009-12-31 19:24:12 +00003283 case Iop_F64toI64S:
sewardja201c452011-07-24 14:15:54 +00003284 case Iop_F64toI64U:
sewardj06f96d02009-12-31 19:24:12 +00003285 case Iop_I64StoF64:
sewardjf34eb492011-04-15 11:57:05 +00003286 case Iop_I64UtoF64:
sewardj22ac5f42006-02-03 22:55:04 +00003287 case Iop_SinF64:
3288 case Iop_CosF64:
3289 case Iop_TanF64:
3290 case Iop_2xm1F64:
3291 case Iop_SqrtF64:
3292 /* I32(rm) x I64/F64 -> I64/F64 */
sewardj95448072004-11-22 20:19:51 +00003293 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3294
sewardjea8b02f2012-04-12 17:28:57 +00003295 case Iop_ShlD64:
3296 case Iop_ShrD64:
sewardj18c72fa2012-04-23 11:22:05 +00003297 case Iop_RoundD64toInt:
sewardjea8b02f2012-04-12 17:28:57 +00003298 /* I32(DFP rm) x D64 -> D64 */
3299 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3300
3301 case Iop_ShlD128:
3302 case Iop_ShrD128:
sewardj18c72fa2012-04-23 11:22:05 +00003303 case Iop_RoundD128toInt:
3304 /* I32(DFP rm) x D128 -> D128 */
sewardjea8b02f2012-04-12 17:28:57 +00003305 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3306
3307 case Iop_D64toI64S:
3308 case Iop_I64StoD64:
3309 /* I64(DFP rm) x I64 -> D64 */
3310 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3311
sewardjd376a762010-06-27 09:08:54 +00003312 case Iop_RoundF32toInt:
sewardjaec1be32010-01-03 22:29:32 +00003313 case Iop_SqrtF32:
3314 /* I32(rm) x I32/F32 -> I32/F32 */
3315 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3316
sewardjb5b87402011-03-07 16:05:35 +00003317 case Iop_SqrtF128:
3318 /* I32(rm) x F128 -> F128 */
3319 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3320
3321 case Iop_I32StoF32:
florian1b9609a2012-09-01 00:15:45 +00003322 case Iop_I32UtoF32:
sewardjb5b87402011-03-07 16:05:35 +00003323 case Iop_F32toI32S:
florian1b9609a2012-09-01 00:15:45 +00003324 case Iop_F32toI32U:
sewardjb5b87402011-03-07 16:05:35 +00003325 /* First arg is I32 (rounding mode), second is F32/I32 (data). */
3326 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3327
3328 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */
florian1b9609a2012-09-01 00:15:45 +00003329 case Iop_F128toI32U: /* IRRoundingMode(I32) x F128 -> unsigned I32 */
sewardjb5b87402011-03-07 16:05:35 +00003330 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */
3331 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3332
3333 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */
florian1b9609a2012-09-01 00:15:45 +00003334 case Iop_F128toI64U: /* IRRoundingMode(I32) x F128 -> unsigned I64 */
sewardjb5b87402011-03-07 16:05:35 +00003335 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */
sewardjea8b02f2012-04-12 17:28:57 +00003336 case Iop_D128toD64: /* IRRoundingModeDFP(I64) x D128 -> D64 */
3337 case Iop_D128toI64S: /* IRRoundingModeDFP(I64) x D128 -> signed I64 */
sewardjb5b87402011-03-07 16:05:35 +00003338 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3339
3340 case Iop_F64HLtoF128:
sewardjb0ccb4d2012-04-02 10:22:05 +00003341 case Iop_D64HLtoD128:
sewardj350e8f72012-06-25 07:52:15 +00003342 return assignNew('V', mce, Ity_I128,
3343 binop(Iop_64HLto128, vatom1, vatom2));
sewardjb5b87402011-03-07 16:05:35 +00003344
sewardj59570ff2010-01-01 11:59:33 +00003345 case Iop_F64toI32U:
sewardj06f96d02009-12-31 19:24:12 +00003346 case Iop_F64toI32S:
sewardje9e16d32004-12-10 13:17:55 +00003347 case Iop_F64toF32:
sewardjf34eb492011-04-15 11:57:05 +00003348 case Iop_I64UtoF32:
sewardj95448072004-11-22 20:19:51 +00003349 /* First arg is I32 (rounding mode), second is F64 (data). */
3350 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3351
sewardjea8b02f2012-04-12 17:28:57 +00003352 case Iop_D64toD32:
3353 /* First arg is I64 (DFProunding mode), second is D64 (data). */
3354 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3355
sewardj06f96d02009-12-31 19:24:12 +00003356 case Iop_F64toI16S:
sewardj95448072004-11-22 20:19:51 +00003357 /* First arg is I32 (rounding mode), second is F64 (data). */
3358 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3359
sewardj18c72fa2012-04-23 11:22:05 +00003360 case Iop_InsertExpD64:
3361 /* I64 x I64 -> D64 */
3362 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3363
3364 case Iop_InsertExpD128:
3365 /* I64 x I128 -> D128 */
3366 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3367
sewardjb5b87402011-03-07 16:05:35 +00003368 case Iop_CmpF32:
sewardj95448072004-11-22 20:19:51 +00003369 case Iop_CmpF64:
sewardjb5b87402011-03-07 16:05:35 +00003370 case Iop_CmpF128:
sewardj18c72fa2012-04-23 11:22:05 +00003371 case Iop_CmpD64:
3372 case Iop_CmpD128:
sewardj95448072004-11-22 20:19:51 +00003373 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3374
3375 /* non-FP after here */
3376
3377 case Iop_DivModU64to32:
3378 case Iop_DivModS64to32:
3379 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3380
sewardj69a13322005-04-23 01:14:51 +00003381 case Iop_DivModU128to64:
3382 case Iop_DivModS128to64:
3383 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3384
florian537ed2d2012-08-20 16:51:39 +00003385 case Iop_8HLto16:
3386 return assignNew('V', mce, Ity_I16, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003387 case Iop_16HLto32:
sewardj7cf4e6b2008-05-01 20:24:26 +00003388 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003389 case Iop_32HLto64:
sewardj7cf4e6b2008-05-01 20:24:26 +00003390 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
sewardj95448072004-11-22 20:19:51 +00003391
sewardjb5b87402011-03-07 16:05:35 +00003392 case Iop_DivModS64to64:
sewardj6cf40ff2005-04-20 22:31:26 +00003393 case Iop_MullS64:
3394 case Iop_MullU64: {
3395 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3396 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
sewardj350e8f72012-06-25 07:52:15 +00003397 return assignNew('V', mce, Ity_I128,
3398 binop(Iop_64HLto128, vHi64, vLo64));
sewardj6cf40ff2005-04-20 22:31:26 +00003399 }
3400
sewardj95448072004-11-22 20:19:51 +00003401 case Iop_MullS32:
3402 case Iop_MullU32: {
3403 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3404 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
sewardj350e8f72012-06-25 07:52:15 +00003405 return assignNew('V', mce, Ity_I64,
3406 binop(Iop_32HLto64, vHi32, vLo32));
sewardj95448072004-11-22 20:19:51 +00003407 }
3408
3409 case Iop_MullS16:
3410 case Iop_MullU16: {
3411 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3412 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
sewardj350e8f72012-06-25 07:52:15 +00003413 return assignNew('V', mce, Ity_I32,
3414 binop(Iop_16HLto32, vHi16, vLo16));
sewardj95448072004-11-22 20:19:51 +00003415 }
3416
3417 case Iop_MullS8:
3418 case Iop_MullU8: {
3419 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3420 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
sewardj7cf4e6b2008-05-01 20:24:26 +00003421 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
sewardj95448072004-11-22 20:19:51 +00003422 }
3423
sewardj5af05062010-10-18 16:31:14 +00003424 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */
cerion9e591082005-06-23 15:28:34 +00003425 case Iop_DivS32:
3426 case Iop_DivU32:
sewardja201c452011-07-24 14:15:54 +00003427 case Iop_DivU32E:
sewardj169ac042011-09-05 12:12:34 +00003428 case Iop_DivS32E:
sewardj2157b2c2012-07-11 13:20:58 +00003429 case Iop_QAdd32S: /* could probably do better */
3430 case Iop_QSub32S: /* could probably do better */
cerion9e591082005-06-23 15:28:34 +00003431 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3432
sewardjb00944a2005-12-23 12:47:16 +00003433 case Iop_DivS64:
3434 case Iop_DivU64:
sewardja201c452011-07-24 14:15:54 +00003435 case Iop_DivS64E:
sewardj169ac042011-09-05 12:12:34 +00003436 case Iop_DivU64E:
sewardjb00944a2005-12-23 12:47:16 +00003437 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3438
sewardj95448072004-11-22 20:19:51 +00003439 case Iop_Add32:
sewardj54eac252012-03-27 10:19:39 +00003440 if (mce->bogusLiterals || mce->useLLVMworkarounds)
sewardjd5204dc2004-12-31 01:16:11 +00003441 return expensiveAddSub(mce,True,Ity_I32,
3442 vatom1,vatom2, atom1,atom2);
3443 else
3444 goto cheap_AddSub32;
sewardj95448072004-11-22 20:19:51 +00003445 case Iop_Sub32:
sewardjd5204dc2004-12-31 01:16:11 +00003446 if (mce->bogusLiterals)
3447 return expensiveAddSub(mce,False,Ity_I32,
3448 vatom1,vatom2, atom1,atom2);
3449 else
3450 goto cheap_AddSub32;
3451
3452 cheap_AddSub32:
sewardj95448072004-11-22 20:19:51 +00003453 case Iop_Mul32:
sewardj992dff92005-10-07 11:08:55 +00003454 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3455
sewardj463b3d92005-07-18 11:41:15 +00003456 case Iop_CmpORD32S:
3457 case Iop_CmpORD32U:
sewardj1bc82102005-12-23 00:16:24 +00003458 case Iop_CmpORD64S:
3459 case Iop_CmpORD64U:
3460 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
sewardj95448072004-11-22 20:19:51 +00003461
sewardj681be302005-01-15 20:43:58 +00003462 case Iop_Add64:
sewardj54eac252012-03-27 10:19:39 +00003463 if (mce->bogusLiterals || mce->useLLVMworkarounds)
tomd9774d72005-06-27 08:11:01 +00003464 return expensiveAddSub(mce,True,Ity_I64,
3465 vatom1,vatom2, atom1,atom2);
3466 else
3467 goto cheap_AddSub64;
sewardj681be302005-01-15 20:43:58 +00003468 case Iop_Sub64:
tomd9774d72005-06-27 08:11:01 +00003469 if (mce->bogusLiterals)
3470 return expensiveAddSub(mce,False,Ity_I64,
3471 vatom1,vatom2, atom1,atom2);
3472 else
3473 goto cheap_AddSub64;
3474
3475 cheap_AddSub64:
3476 case Iop_Mul64:
sewardj681be302005-01-15 20:43:58 +00003477 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3478
sewardj95448072004-11-22 20:19:51 +00003479 case Iop_Mul16:
3480 case Iop_Add16:
3481 case Iop_Sub16:
3482 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3483
florian537ed2d2012-08-20 16:51:39 +00003484 case Iop_Mul8:
sewardj95448072004-11-22 20:19:51 +00003485 case Iop_Sub8:
3486 case Iop_Add8:
3487 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3488
sewardj69a13322005-04-23 01:14:51 +00003489 case Iop_CmpEQ64:
sewardje6f8af42005-07-06 18:48:59 +00003490 case Iop_CmpNE64:
sewardj69a13322005-04-23 01:14:51 +00003491 if (mce->bogusLiterals)
sewardj4cfa81b2012-11-08 10:58:16 +00003492 goto expensive_cmp64;
sewardj69a13322005-04-23 01:14:51 +00003493 else
3494 goto cheap_cmp64;
sewardj4cfa81b2012-11-08 10:58:16 +00003495
3496 expensive_cmp64:
3497 case Iop_ExpCmpNE64:
3498 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
3499
sewardj69a13322005-04-23 01:14:51 +00003500 cheap_cmp64:
tomcd986332005-04-26 07:44:48 +00003501 case Iop_CmpLE64S: case Iop_CmpLE64U:
3502 case Iop_CmpLT64U: case Iop_CmpLT64S:
sewardj69a13322005-04-23 01:14:51 +00003503 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
3504
sewardjd5204dc2004-12-31 01:16:11 +00003505 case Iop_CmpEQ32:
sewardje6f8af42005-07-06 18:48:59 +00003506 case Iop_CmpNE32:
sewardjd5204dc2004-12-31 01:16:11 +00003507 if (mce->bogusLiterals)
sewardj4cfa81b2012-11-08 10:58:16 +00003508 goto expensive_cmp32;
sewardjd5204dc2004-12-31 01:16:11 +00003509 else
3510 goto cheap_cmp32;
sewardj4cfa81b2012-11-08 10:58:16 +00003511
3512 expensive_cmp32:
3513 case Iop_ExpCmpNE32:
3514 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
3515
sewardjd5204dc2004-12-31 01:16:11 +00003516 cheap_cmp32:
sewardj95448072004-11-22 20:19:51 +00003517 case Iop_CmpLE32S: case Iop_CmpLE32U:
3518 case Iop_CmpLT32U: case Iop_CmpLT32S:
sewardj95448072004-11-22 20:19:51 +00003519 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
3520
3521 case Iop_CmpEQ16: case Iop_CmpNE16:
3522 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
3523
sewardj4cfa81b2012-11-08 10:58:16 +00003524 case Iop_ExpCmpNE16:
3525 return expensiveCmpEQorNE(mce,Ity_I16, vatom1,vatom2, atom1,atom2 );
3526
sewardj95448072004-11-22 20:19:51 +00003527 case Iop_CmpEQ8: case Iop_CmpNE8:
3528 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
3529
sewardjafed4c52009-07-12 13:00:17 +00003530 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
3531 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
3532 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
3533 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
3534 /* Just say these all produce a defined result, regardless
3535 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
3536 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
3537
sewardjaaddbc22005-10-07 09:49:53 +00003538 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
3539 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
3540
sewardj95448072004-11-22 20:19:51 +00003541 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
sewardjaaddbc22005-10-07 09:49:53 +00003542 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003543
sewardjdb67f5f2004-12-14 01:15:31 +00003544 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
sewardjaaddbc22005-10-07 09:49:53 +00003545 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003546
florian537ed2d2012-08-20 16:51:39 +00003547 case Iop_Shl8: case Iop_Shr8: case Iop_Sar8:
sewardjaaddbc22005-10-07 09:49:53 +00003548 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
sewardj95448072004-11-22 20:19:51 +00003549
sewardj350e8f72012-06-25 07:52:15 +00003550 case Iop_AndV256:
3551 uifu = mkUifUV256; difd = mkDifDV256;
3552 and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or;
sewardj20d38f22005-02-07 23:50:18 +00003553 case Iop_AndV128:
3554 uifu = mkUifUV128; difd = mkDifDV128;
3555 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00003556 case Iop_And64:
3557 uifu = mkUifU64; difd = mkDifD64;
3558 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00003559 case Iop_And32:
3560 uifu = mkUifU32; difd = mkDifD32;
3561 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
3562 case Iop_And16:
3563 uifu = mkUifU16; difd = mkDifD16;
3564 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
3565 case Iop_And8:
3566 uifu = mkUifU8; difd = mkDifD8;
3567 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
3568
sewardj350e8f72012-06-25 07:52:15 +00003569 case Iop_OrV256:
3570 uifu = mkUifUV256; difd = mkDifDV256;
3571 and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or;
sewardj20d38f22005-02-07 23:50:18 +00003572 case Iop_OrV128:
3573 uifu = mkUifUV128; difd = mkDifDV128;
3574 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
sewardj7010f6e2004-12-10 13:35:22 +00003575 case Iop_Or64:
3576 uifu = mkUifU64; difd = mkDifD64;
3577 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
sewardj95448072004-11-22 20:19:51 +00003578 case Iop_Or32:
3579 uifu = mkUifU32; difd = mkDifD32;
3580 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
3581 case Iop_Or16:
3582 uifu = mkUifU16; difd = mkDifD16;
3583 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
3584 case Iop_Or8:
3585 uifu = mkUifU8; difd = mkDifD8;
3586 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
3587
3588 do_And_Or:
3589 return
3590 assignNew(
sewardj7cf4e6b2008-05-01 20:24:26 +00003591 'V', mce,
sewardj95448072004-11-22 20:19:51 +00003592 and_or_ty,
3593 difd(mce, uifu(mce, vatom1, vatom2),
3594 difd(mce, improve(mce, atom1, vatom1),
3595 improve(mce, atom2, vatom2) ) ) );
3596
3597 case Iop_Xor8:
3598 return mkUifU8(mce, vatom1, vatom2);
3599 case Iop_Xor16:
3600 return mkUifU16(mce, vatom1, vatom2);
3601 case Iop_Xor32:
3602 return mkUifU32(mce, vatom1, vatom2);
sewardj7010f6e2004-12-10 13:35:22 +00003603 case Iop_Xor64:
3604 return mkUifU64(mce, vatom1, vatom2);
sewardj20d38f22005-02-07 23:50:18 +00003605 case Iop_XorV128:
3606 return mkUifUV128(mce, vatom1, vatom2);
sewardj350e8f72012-06-25 07:52:15 +00003607 case Iop_XorV256:
3608 return mkUifUV256(mce, vatom1, vatom2);
njn25e49d8e72002-09-23 09:36:25 +00003609
3610 default:
sewardj95448072004-11-22 20:19:51 +00003611 ppIROp(op);
3612 VG_(tool_panic)("memcheck:expr2vbits_Binop");
njn25e49d8e72002-09-23 09:36:25 +00003613 }
njn25e49d8e72002-09-23 09:36:25 +00003614}
3615
njn25e49d8e72002-09-23 09:36:25 +00003616
sewardj95448072004-11-22 20:19:51 +00003617static
3618IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
3619{
3620 IRAtom* vatom = expr2vbits( mce, atom );
3621 tl_assert(isOriginalAtom(mce,atom));
3622 switch (op) {
3623
sewardj0b070592004-12-10 21:44:22 +00003624 case Iop_Sqrt64Fx2:
3625 return unary64Fx2(mce, vatom);
3626
3627 case Iop_Sqrt64F0x2:
3628 return unary64F0x2(mce, vatom);
3629
sewardj350e8f72012-06-25 07:52:15 +00003630 case Iop_Sqrt32Fx8:
3631 case Iop_RSqrt32Fx8:
3632 case Iop_Recip32Fx8:
3633 return unary32Fx8(mce, vatom);
3634
3635 case Iop_Sqrt64Fx4:
3636 return unary64Fx4(mce, vatom);
3637
sewardj170ee212004-12-10 18:57:51 +00003638 case Iop_Sqrt32Fx4:
3639 case Iop_RSqrt32Fx4:
3640 case Iop_Recip32Fx4:
cerion176cb4c2005-11-16 17:21:49 +00003641 case Iop_I32UtoFx4:
3642 case Iop_I32StoFx4:
3643 case Iop_QFtoI32Ux4_RZ:
3644 case Iop_QFtoI32Sx4_RZ:
3645 case Iop_RoundF32x4_RM:
3646 case Iop_RoundF32x4_RP:
3647 case Iop_RoundF32x4_RN:
3648 case Iop_RoundF32x4_RZ:
sewardj57f92b02010-08-22 11:54:14 +00003649 case Iop_Recip32x4:
3650 case Iop_Abs32Fx4:
3651 case Iop_Neg32Fx4:
3652 case Iop_Rsqrte32Fx4:
sewardj170ee212004-12-10 18:57:51 +00003653 return unary32Fx4(mce, vatom);
3654
sewardj57f92b02010-08-22 11:54:14 +00003655 case Iop_I32UtoFx2:
3656 case Iop_I32StoFx2:
3657 case Iop_Recip32Fx2:
3658 case Iop_Recip32x2:
3659 case Iop_Abs32Fx2:
3660 case Iop_Neg32Fx2:
3661 case Iop_Rsqrte32Fx2:
3662 return unary32Fx2(mce, vatom);
3663
sewardj170ee212004-12-10 18:57:51 +00003664 case Iop_Sqrt32F0x4:
3665 case Iop_RSqrt32F0x4:
3666 case Iop_Recip32F0x4:
3667 return unary32F0x4(mce, vatom);
3668
sewardj20d38f22005-02-07 23:50:18 +00003669 case Iop_32UtoV128:
3670 case Iop_64UtoV128:
sewardj620eb5b2005-10-22 12:50:43 +00003671 case Iop_Dup8x16:
3672 case Iop_Dup16x8:
3673 case Iop_Dup32x4:
sewardj57f92b02010-08-22 11:54:14 +00003674 case Iop_Reverse16_8x16:
3675 case Iop_Reverse32_8x16:
3676 case Iop_Reverse32_16x8:
3677 case Iop_Reverse64_8x16:
3678 case Iop_Reverse64_16x8:
3679 case Iop_Reverse64_32x4:
sewardj350e8f72012-06-25 07:52:15 +00003680 case Iop_V256toV128_1: case Iop_V256toV128_0:
sewardj7cf4e6b2008-05-01 20:24:26 +00003681 return assignNew('V', mce, Ity_V128, unop(op, vatom));
sewardj170ee212004-12-10 18:57:51 +00003682
sewardjb5b87402011-03-07 16:05:35 +00003683 case Iop_F128HItoF64: /* F128 -> high half of F128 */
sewardjb0ccb4d2012-04-02 10:22:05 +00003684 case Iop_D128HItoD64: /* D128 -> high half of D128 */
sewardjb5b87402011-03-07 16:05:35 +00003685 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
3686 case Iop_F128LOtoF64: /* F128 -> low half of F128 */
sewardjb0ccb4d2012-04-02 10:22:05 +00003687 case Iop_D128LOtoD64: /* D128 -> low half of D128 */
sewardjb5b87402011-03-07 16:05:35 +00003688 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
3689
3690 case Iop_NegF128:
3691 case Iop_AbsF128:
3692 return mkPCastTo(mce, Ity_I128, vatom);
3693
3694 case Iop_I32StoF128: /* signed I32 -> F128 */
3695 case Iop_I64StoF128: /* signed I64 -> F128 */
florian1b9609a2012-09-01 00:15:45 +00003696 case Iop_I32UtoF128: /* unsigned I32 -> F128 */
3697 case Iop_I64UtoF128: /* unsigned I64 -> F128 */
sewardjb5b87402011-03-07 16:05:35 +00003698 case Iop_F32toF128: /* F32 -> F128 */
3699 case Iop_F64toF128: /* F64 -> F128 */
sewardjea8b02f2012-04-12 17:28:57 +00003700 case Iop_I64StoD128: /* signed I64 -> D128 */
sewardjb5b87402011-03-07 16:05:35 +00003701 return mkPCastTo(mce, Ity_I128, vatom);
3702
sewardj95448072004-11-22 20:19:51 +00003703 case Iop_F32toF64:
sewardj06f96d02009-12-31 19:24:12 +00003704 case Iop_I32StoF64:
sewardj59570ff2010-01-01 11:59:33 +00003705 case Iop_I32UtoF64:
sewardj95448072004-11-22 20:19:51 +00003706 case Iop_NegF64:
sewardj95448072004-11-22 20:19:51 +00003707 case Iop_AbsF64:
sewardj4f3e0f22006-01-27 15:11:31 +00003708 case Iop_Est5FRSqrt:
sewardjdead90a2008-08-08 08:38:23 +00003709 case Iop_RoundF64toF64_NEAREST:
3710 case Iop_RoundF64toF64_NegINF:
3711 case Iop_RoundF64toF64_PosINF:
3712 case Iop_RoundF64toF64_ZERO:
sewardj39cc7352005-06-09 21:31:55 +00003713 case Iop_Clz64:
sewardjea8b02f2012-04-12 17:28:57 +00003714 case Iop_D32toD64:
sewardj18c72fa2012-04-23 11:22:05 +00003715 case Iop_ExtractExpD64: /* D64 -> I64 */
3716 case Iop_ExtractExpD128: /* D128 -> I64 */
florian1943eb52012-08-22 18:09:07 +00003717 case Iop_DPBtoBCD:
3718 case Iop_BCDtoDPB:
sewardj95448072004-11-22 20:19:51 +00003719 return mkPCastTo(mce, Ity_I64, vatom);
3720
sewardjea8b02f2012-04-12 17:28:57 +00003721 case Iop_D64toD128:
3722 return mkPCastTo(mce, Ity_I128, vatom);
3723
sewardj95448072004-11-22 20:19:51 +00003724 case Iop_Clz32:
sewardjed69fdb2006-02-03 16:12:27 +00003725 case Iop_TruncF64asF32:
sewardj59570ff2010-01-01 11:59:33 +00003726 case Iop_NegF32:
3727 case Iop_AbsF32:
sewardj95448072004-11-22 20:19:51 +00003728 return mkPCastTo(mce, Ity_I32, vatom);
3729
sewardj4cfa81b2012-11-08 10:58:16 +00003730 case Iop_Ctz32:
3731 case Iop_Ctz64:
3732 return expensiveCountTrailingZeroes(mce, op, atom, vatom);
3733
sewardjd9dbc192005-04-27 11:40:27 +00003734 case Iop_1Uto64:
sewardja201c452011-07-24 14:15:54 +00003735 case Iop_1Sto64:
sewardjd9dbc192005-04-27 11:40:27 +00003736 case Iop_8Uto64:
3737 case Iop_8Sto64:
3738 case Iop_16Uto64:
3739 case Iop_16Sto64:
sewardj95448072004-11-22 20:19:51 +00003740 case Iop_32Sto64:
3741 case Iop_32Uto64:
sewardj20d38f22005-02-07 23:50:18 +00003742 case Iop_V128to64:
3743 case Iop_V128HIto64:
sewardj6cf40ff2005-04-20 22:31:26 +00003744 case Iop_128HIto64:
3745 case Iop_128to64:
sewardj57f92b02010-08-22 11:54:14 +00003746 case Iop_Dup8x8:
3747 case Iop_Dup16x4:
3748 case Iop_Dup32x2:
3749 case Iop_Reverse16_8x8:
3750 case Iop_Reverse32_8x8:
3751 case Iop_Reverse32_16x4:
3752 case Iop_Reverse64_8x8:
3753 case Iop_Reverse64_16x4:
3754 case Iop_Reverse64_32x2:
sewardj350e8f72012-06-25 07:52:15 +00003755 case Iop_V256to64_0: case Iop_V256to64_1:
3756 case Iop_V256to64_2: case Iop_V256to64_3:
sewardj7cf4e6b2008-05-01 20:24:26 +00003757 return assignNew('V', mce, Ity_I64, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003758
3759 case Iop_64to32:
3760 case Iop_64HIto32:
3761 case Iop_1Uto32:
sewardj463b3d92005-07-18 11:41:15 +00003762 case Iop_1Sto32:
sewardj95448072004-11-22 20:19:51 +00003763 case Iop_8Uto32:
3764 case Iop_16Uto32:
3765 case Iop_16Sto32:
3766 case Iop_8Sto32:
cerionfafaa0d2005-09-12 22:29:38 +00003767 case Iop_V128to32:
sewardj7cf4e6b2008-05-01 20:24:26 +00003768 return assignNew('V', mce, Ity_I32, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003769
3770 case Iop_8Sto16:
3771 case Iop_8Uto16:
3772 case Iop_32to16:
3773 case Iop_32HIto16:
sewardjd9dbc192005-04-27 11:40:27 +00003774 case Iop_64to16:
sewardjf5176342012-12-13 18:31:49 +00003775 case Iop_GetMSBs8x16:
sewardj7cf4e6b2008-05-01 20:24:26 +00003776 return assignNew('V', mce, Ity_I16, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003777
3778 case Iop_1Uto8:
sewardja201c452011-07-24 14:15:54 +00003779 case Iop_1Sto8:
sewardj95448072004-11-22 20:19:51 +00003780 case Iop_16to8:
sewardj9a807e02006-12-17 14:20:31 +00003781 case Iop_16HIto8:
sewardj95448072004-11-22 20:19:51 +00003782 case Iop_32to8:
sewardjd9dbc192005-04-27 11:40:27 +00003783 case Iop_64to8:
sewardj4cfa81b2012-11-08 10:58:16 +00003784 case Iop_GetMSBs8x8:
sewardj7cf4e6b2008-05-01 20:24:26 +00003785 return assignNew('V', mce, Ity_I8, unop(op, vatom));
sewardj95448072004-11-22 20:19:51 +00003786
3787 case Iop_32to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00003788 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
sewardj95448072004-11-22 20:19:51 +00003789
sewardjd9dbc192005-04-27 11:40:27 +00003790 case Iop_64to1:
sewardj7cf4e6b2008-05-01 20:24:26 +00003791 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
sewardjd9dbc192005-04-27 11:40:27 +00003792
sewardj95448072004-11-22 20:19:51 +00003793 case Iop_ReinterpF64asI64:
3794 case Iop_ReinterpI64asF64:
sewardj0b070592004-12-10 21:44:22 +00003795 case Iop_ReinterpI32asF32:
sewardj59570ff2010-01-01 11:59:33 +00003796 case Iop_ReinterpF32asI32:
sewardj18c72fa2012-04-23 11:22:05 +00003797 case Iop_ReinterpI64asD64:
sewardj0892b822012-04-29 20:20:16 +00003798 case Iop_ReinterpD64asI64:
sewardj350e8f72012-06-25 07:52:15 +00003799 case Iop_NotV256:
sewardj20d38f22005-02-07 23:50:18 +00003800 case Iop_NotV128:
sewardj7010f6e2004-12-10 13:35:22 +00003801 case Iop_Not64:
sewardj95448072004-11-22 20:19:51 +00003802 case Iop_Not32:
3803 case Iop_Not16:
3804 case Iop_Not8:
3805 case Iop_Not1:
3806 return vatom;
sewardj7010f6e2004-12-10 13:35:22 +00003807
sewardj57f92b02010-08-22 11:54:14 +00003808 case Iop_CmpNEZ8x8:
3809 case Iop_Cnt8x8:
3810 case Iop_Clz8Sx8:
3811 case Iop_Cls8Sx8:
3812 case Iop_Abs8x8:
3813 return mkPCast8x8(mce, vatom);
3814
3815 case Iop_CmpNEZ8x16:
3816 case Iop_Cnt8x16:
3817 case Iop_Clz8Sx16:
3818 case Iop_Cls8Sx16:
3819 case Iop_Abs8x16:
3820 return mkPCast8x16(mce, vatom);
3821
3822 case Iop_CmpNEZ16x4:
3823 case Iop_Clz16Sx4:
3824 case Iop_Cls16Sx4:
3825 case Iop_Abs16x4:
3826 return mkPCast16x4(mce, vatom);
3827
3828 case Iop_CmpNEZ16x8:
3829 case Iop_Clz16Sx8:
3830 case Iop_Cls16Sx8:
3831 case Iop_Abs16x8:
3832 return mkPCast16x8(mce, vatom);
3833
3834 case Iop_CmpNEZ32x2:
3835 case Iop_Clz32Sx2:
3836 case Iop_Cls32Sx2:
3837 case Iop_FtoI32Ux2_RZ:
3838 case Iop_FtoI32Sx2_RZ:
3839 case Iop_Abs32x2:
3840 return mkPCast32x2(mce, vatom);
3841
3842 case Iop_CmpNEZ32x4:
3843 case Iop_Clz32Sx4:
3844 case Iop_Cls32Sx4:
3845 case Iop_FtoI32Ux4_RZ:
3846 case Iop_FtoI32Sx4_RZ:
3847 case Iop_Abs32x4:
3848 return mkPCast32x4(mce, vatom);
3849
florian537ed2d2012-08-20 16:51:39 +00003850 case Iop_CmpwNEZ32:
3851 return mkPCastTo(mce, Ity_I32, vatom);
3852
sewardj57f92b02010-08-22 11:54:14 +00003853 case Iop_CmpwNEZ64:
3854 return mkPCastTo(mce, Ity_I64, vatom);
3855
3856 case Iop_CmpNEZ64x2:
3857 return mkPCast64x2(mce, vatom);
3858
sewardj7ee7d852011-06-16 11:37:21 +00003859 case Iop_NarrowUn16to8x8:
3860 case Iop_NarrowUn32to16x4:
3861 case Iop_NarrowUn64to32x2:
3862 case Iop_QNarrowUn16Sto8Sx8:
3863 case Iop_QNarrowUn16Sto8Ux8:
3864 case Iop_QNarrowUn16Uto8Ux8:
3865 case Iop_QNarrowUn32Sto16Sx4:
3866 case Iop_QNarrowUn32Sto16Ux4:
3867 case Iop_QNarrowUn32Uto16Ux4:
3868 case Iop_QNarrowUn64Sto32Sx2:
3869 case Iop_QNarrowUn64Sto32Ux2:
3870 case Iop_QNarrowUn64Uto32Ux2:
3871 return vectorNarrowUnV128(mce, op, vatom);
sewardj57f92b02010-08-22 11:54:14 +00003872
sewardj7ee7d852011-06-16 11:37:21 +00003873 case Iop_Widen8Sto16x8:
3874 case Iop_Widen8Uto16x8:
3875 case Iop_Widen16Sto32x4:
3876 case Iop_Widen16Uto32x4:
3877 case Iop_Widen32Sto64x2:
3878 case Iop_Widen32Uto64x2:
3879 return vectorWidenI64(mce, op, vatom);
sewardj57f92b02010-08-22 11:54:14 +00003880
3881 case Iop_PwAddL32Ux2:
3882 case Iop_PwAddL32Sx2:
3883 return mkPCastTo(mce, Ity_I64,
3884 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
3885
3886 case Iop_PwAddL16Ux4:
3887 case Iop_PwAddL16Sx4:
3888 return mkPCast32x2(mce,
3889 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
3890
3891 case Iop_PwAddL8Ux8:
3892 case Iop_PwAddL8Sx8:
3893 return mkPCast16x4(mce,
3894 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
3895
3896 case Iop_PwAddL32Ux4:
3897 case Iop_PwAddL32Sx4:
3898 return mkPCast64x2(mce,
3899 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
3900
3901 case Iop_PwAddL16Ux8:
3902 case Iop_PwAddL16Sx8:
3903 return mkPCast32x4(mce,
3904 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
3905
3906 case Iop_PwAddL8Ux16:
3907 case Iop_PwAddL8Sx16:
3908 return mkPCast16x8(mce,
3909 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
3910
sewardjf34eb492011-04-15 11:57:05 +00003911 case Iop_I64UtoF32:
sewardj95448072004-11-22 20:19:51 +00003912 default:
3913 ppIROp(op);
3914 VG_(tool_panic)("memcheck:expr2vbits_Unop");
3915 }
3916}
3917
3918
sewardj170ee212004-12-10 18:57:51 +00003919/* Worker function; do not call directly. */
sewardj95448072004-11-22 20:19:51 +00003920static
sewardj2e595852005-06-30 23:33:37 +00003921IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
3922 IREndness end, IRType ty,
3923 IRAtom* addr, UInt bias )
sewardj95448072004-11-22 20:19:51 +00003924{
3925 void* helper;
floriana5f894c2012-10-21 03:43:20 +00003926 const HChar* hname;
sewardj95448072004-11-22 20:19:51 +00003927 IRDirty* di;
3928 IRTemp datavbits;
3929 IRAtom* addrAct;
3930
3931 tl_assert(isOriginalAtom(mce,addr));
sewardj2e595852005-06-30 23:33:37 +00003932 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj95448072004-11-22 20:19:51 +00003933
3934 /* First, emit a definedness test for the address. This also sets
3935 the address (shadow) to 'defined' following the test. */
florian434ffae2012-07-19 17:23:42 +00003936 complainIfUndefined( mce, addr, NULL );
sewardj95448072004-11-22 20:19:51 +00003937
3938 /* Now cook up a call to the relevant helper function, to read the
3939 data V bits from shadow memory. */
sewardj7cf4e6b2008-05-01 20:24:26 +00003940 ty = shadowTypeV(ty);
sewardj2e595852005-06-30 23:33:37 +00003941
3942 if (end == Iend_LE) {
3943 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00003944 case Ity_I64: helper = &MC_(helperc_LOADV64le);
3945 hname = "MC_(helperc_LOADV64le)";
sewardj2e595852005-06-30 23:33:37 +00003946 break;
njn1d0825f2006-03-27 11:37:07 +00003947 case Ity_I32: helper = &MC_(helperc_LOADV32le);
3948 hname = "MC_(helperc_LOADV32le)";
sewardj2e595852005-06-30 23:33:37 +00003949 break;
njn1d0825f2006-03-27 11:37:07 +00003950 case Ity_I16: helper = &MC_(helperc_LOADV16le);
3951 hname = "MC_(helperc_LOADV16le)";
sewardj2e595852005-06-30 23:33:37 +00003952 break;
njn1d0825f2006-03-27 11:37:07 +00003953 case Ity_I8: helper = &MC_(helperc_LOADV8);
3954 hname = "MC_(helperc_LOADV8)";
sewardj2e595852005-06-30 23:33:37 +00003955 break;
3956 default: ppIRType(ty);
3957 VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
3958 }
3959 } else {
sewardj8cf88b72005-07-08 01:29:33 +00003960 switch (ty) {
njn1d0825f2006-03-27 11:37:07 +00003961 case Ity_I64: helper = &MC_(helperc_LOADV64be);
3962 hname = "MC_(helperc_LOADV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00003963 break;
njn1d0825f2006-03-27 11:37:07 +00003964 case Ity_I32: helper = &MC_(helperc_LOADV32be);
3965 hname = "MC_(helperc_LOADV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00003966 break;
njn1d0825f2006-03-27 11:37:07 +00003967 case Ity_I16: helper = &MC_(helperc_LOADV16be);
3968 hname = "MC_(helperc_LOADV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00003969 break;
njn1d0825f2006-03-27 11:37:07 +00003970 case Ity_I8: helper = &MC_(helperc_LOADV8);
3971 hname = "MC_(helperc_LOADV8)";
sewardj8cf88b72005-07-08 01:29:33 +00003972 break;
3973 default: ppIRType(ty);
3974 VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
3975 }
sewardj95448072004-11-22 20:19:51 +00003976 }
3977
3978 /* Generate the actual address into addrAct. */
3979 if (bias == 0) {
3980 addrAct = addr;
3981 } else {
sewardj7cf97ee2004-11-28 14:25:01 +00003982 IROp mkAdd;
3983 IRAtom* eBias;
sewardj95448072004-11-22 20:19:51 +00003984 IRType tyAddr = mce->hWordTy;
3985 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj7cf97ee2004-11-28 14:25:01 +00003986 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3987 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00003988 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
sewardj95448072004-11-22 20:19:51 +00003989 }
3990
3991 /* We need to have a place to park the V bits we're just about to
3992 read. */
sewardj1c0ce7a2009-07-01 08:10:49 +00003993 datavbits = newTemp(mce, ty, VSh);
sewardj95448072004-11-22 20:19:51 +00003994 di = unsafeIRDirty_1_N( datavbits,
sewardj53ee1fc2005-12-23 02:29:58 +00003995 1/*regparms*/,
3996 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj95448072004-11-22 20:19:51 +00003997 mkIRExprVec_1( addrAct ));
3998 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00003999 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00004000
4001 return mkexpr(datavbits);
4002}
4003
4004
4005static
sewardj2e595852005-06-30 23:33:37 +00004006IRAtom* expr2vbits_Load ( MCEnv* mce,
4007 IREndness end, IRType ty,
4008 IRAtom* addr, UInt bias )
sewardj170ee212004-12-10 18:57:51 +00004009{
sewardj2e595852005-06-30 23:33:37 +00004010 tl_assert(end == Iend_LE || end == Iend_BE);
sewardj7cf4e6b2008-05-01 20:24:26 +00004011 switch (shadowTypeV(ty)) {
sewardj170ee212004-12-10 18:57:51 +00004012 case Ity_I8:
4013 case Ity_I16:
4014 case Ity_I32:
4015 case Ity_I64:
sewardj2e595852005-06-30 23:33:37 +00004016 return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
sewardj45fa9f42012-05-21 10:18:10 +00004017 case Ity_V128: {
4018 IRAtom *v64hi, *v64lo;
sewardj2e595852005-06-30 23:33:37 +00004019 if (end == Iend_LE) {
sewardj45fa9f42012-05-21 10:18:10 +00004020 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
sewardj2e595852005-06-30 23:33:37 +00004021 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
4022 } else {
sewardj45fa9f42012-05-21 10:18:10 +00004023 v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
sewardj2e595852005-06-30 23:33:37 +00004024 v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
4025 }
sewardj7cf4e6b2008-05-01 20:24:26 +00004026 return assignNew( 'V', mce,
sewardj170ee212004-12-10 18:57:51 +00004027 Ity_V128,
sewardj20d38f22005-02-07 23:50:18 +00004028 binop(Iop_64HLtoV128, v64hi, v64lo));
sewardj45fa9f42012-05-21 10:18:10 +00004029 }
4030 case Ity_V256: {
4031 /* V256-bit case -- phrased in terms of 64 bit units (Qs),
4032 with Q3 being the most significant lane. */
4033 if (end == Iend_BE) goto unhandled;
4034 IRAtom* v64Q0 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
4035 IRAtom* v64Q1 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
4036 IRAtom* v64Q2 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+16);
4037 IRAtom* v64Q3 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+24);
4038 return assignNew( 'V', mce,
4039 Ity_V256,
4040 IRExpr_Qop(Iop_64x4toV256,
4041 v64Q3, v64Q2, v64Q1, v64Q0));
4042 }
4043 unhandled:
sewardj170ee212004-12-10 18:57:51 +00004044 default:
sewardj2e595852005-06-30 23:33:37 +00004045 VG_(tool_panic)("expr2vbits_Load");
sewardj170ee212004-12-10 18:57:51 +00004046 }
4047}
4048
4049
florian434ffae2012-07-19 17:23:42 +00004050/* If there is no guard expression or the guard is always TRUE this function
4051 behaves like expr2vbits_Load. If the guard is not true at runtime, an
4052 all-bits-defined bit pattern will be returned.
4053 It is assumed that definedness of GUARD has already been checked at the call
4054 site. */
4055static
4056IRAtom* expr2vbits_guarded_Load ( MCEnv* mce,
4057 IREndness end, IRType ty,
4058 IRAtom* addr, UInt bias, IRAtom *guard )
4059{
4060 if (guard) {
4061 IRAtom *cond, *iffalse, *iftrue;
4062
4063 cond = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard));
4064 iftrue = assignNew('V', mce, ty,
4065 expr2vbits_Load(mce, end, ty, addr, bias));
4066 iffalse = assignNew('V', mce, ty, definedOfType(ty));
4067
4068 return assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, iftrue));
4069 }
4070
4071 /* No guard expression or unconditional load */
4072 return expr2vbits_Load(mce, end, ty, addr, bias);
4073}
4074
4075
sewardj170ee212004-12-10 18:57:51 +00004076static
sewardj95448072004-11-22 20:19:51 +00004077IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
4078 IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
4079{
4080 IRAtom *vbitsC, *vbits0, *vbitsX;
4081 IRType ty;
4082 /* Given Mux0X(cond,expr0,exprX), generate
4083 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
4084 That is, steer the V bits like the originals, but trash the
4085 result if the steering value is undefined. This gives
4086 lazy propagation. */
4087 tl_assert(isOriginalAtom(mce, cond));
4088 tl_assert(isOriginalAtom(mce, expr0));
4089 tl_assert(isOriginalAtom(mce, exprX));
4090
4091 vbitsC = expr2vbits(mce, cond);
4092 vbits0 = expr2vbits(mce, expr0);
4093 vbitsX = expr2vbits(mce, exprX);
sewardj1c0ce7a2009-07-01 08:10:49 +00004094 ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
sewardj95448072004-11-22 20:19:51 +00004095
4096 return
sewardj7cf4e6b2008-05-01 20:24:26 +00004097 mkUifU(mce, ty, assignNew('V', mce, ty,
4098 IRExpr_Mux0X(cond, vbits0, vbitsX)),
sewardj95448072004-11-22 20:19:51 +00004099 mkPCastTo(mce, ty, vbitsC) );
4100}
4101
4102/* --------- This is the main expression-handling function. --------- */
4103
4104static
4105IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
4106{
4107 switch (e->tag) {
4108
4109 case Iex_Get:
4110 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
4111
4112 case Iex_GetI:
4113 return shadow_GETI( mce, e->Iex.GetI.descr,
4114 e->Iex.GetI.ix, e->Iex.GetI.bias );
4115
sewardj0b9d74a2006-12-24 02:24:11 +00004116 case Iex_RdTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00004117 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
sewardj95448072004-11-22 20:19:51 +00004118
4119 case Iex_Const:
sewardj1c0ce7a2009-07-01 08:10:49 +00004120 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
sewardj95448072004-11-22 20:19:51 +00004121
sewardje91cea72006-02-08 19:32:02 +00004122 case Iex_Qop:
4123 return expr2vbits_Qop(
4124 mce,
floriane2ab2972012-06-01 20:43:03 +00004125 e->Iex.Qop.details->op,
4126 e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2,
4127 e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4
sewardje91cea72006-02-08 19:32:02 +00004128 );
4129
sewardjed69fdb2006-02-03 16:12:27 +00004130 case Iex_Triop:
4131 return expr2vbits_Triop(
4132 mce,
florian26441742012-06-02 20:30:41 +00004133 e->Iex.Triop.details->op,
4134 e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2,
4135 e->Iex.Triop.details->arg3
sewardjed69fdb2006-02-03 16:12:27 +00004136 );
4137
sewardj95448072004-11-22 20:19:51 +00004138 case Iex_Binop:
4139 return expr2vbits_Binop(
4140 mce,
4141 e->Iex.Binop.op,
4142 e->Iex.Binop.arg1, e->Iex.Binop.arg2
4143 );
4144
4145 case Iex_Unop:
4146 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
4147
sewardj2e595852005-06-30 23:33:37 +00004148 case Iex_Load:
4149 return expr2vbits_Load( mce, e->Iex.Load.end,
4150 e->Iex.Load.ty,
4151 e->Iex.Load.addr, 0/*addr bias*/ );
sewardj95448072004-11-22 20:19:51 +00004152
4153 case Iex_CCall:
4154 return mkLazyN( mce, e->Iex.CCall.args,
4155 e->Iex.CCall.retty,
4156 e->Iex.CCall.cee );
4157
4158 case Iex_Mux0X:
4159 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
4160 e->Iex.Mux0X.exprX);
njn25e49d8e72002-09-23 09:36:25 +00004161
4162 default:
sewardj95448072004-11-22 20:19:51 +00004163 VG_(printf)("\n");
4164 ppIRExpr(e);
4165 VG_(printf)("\n");
4166 VG_(tool_panic)("memcheck: expr2vbits");
njn25e49d8e72002-09-23 09:36:25 +00004167 }
njn25e49d8e72002-09-23 09:36:25 +00004168}
4169
4170/*------------------------------------------------------------*/
sewardj95448072004-11-22 20:19:51 +00004171/*--- Generate shadow stmts from all kinds of IRStmts. ---*/
njn25e49d8e72002-09-23 09:36:25 +00004172/*------------------------------------------------------------*/
4173
sewardj95448072004-11-22 20:19:51 +00004174/* Widen a value to the host word size. */
njn25e49d8e72002-09-23 09:36:25 +00004175
4176static
sewardj95448072004-11-22 20:19:51 +00004177IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
njn25e49d8e72002-09-23 09:36:25 +00004178{
sewardj7cf97ee2004-11-28 14:25:01 +00004179 IRType ty, tyH;
4180
sewardj95448072004-11-22 20:19:51 +00004181 /* vatom is vbits-value and as such can only have a shadow type. */
4182 tl_assert(isShadowAtom(mce,vatom));
njn25e49d8e72002-09-23 09:36:25 +00004183
sewardj1c0ce7a2009-07-01 08:10:49 +00004184 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
sewardj7cf97ee2004-11-28 14:25:01 +00004185 tyH = mce->hWordTy;
njn25e49d8e72002-09-23 09:36:25 +00004186
sewardj95448072004-11-22 20:19:51 +00004187 if (tyH == Ity_I32) {
4188 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004189 case Ity_I32:
4190 return vatom;
4191 case Ity_I16:
4192 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
4193 case Ity_I8:
4194 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
4195 default:
4196 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00004197 }
sewardj6cf40ff2005-04-20 22:31:26 +00004198 } else
4199 if (tyH == Ity_I64) {
4200 switch (ty) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004201 case Ity_I32:
4202 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
4203 case Ity_I16:
4204 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4205 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
4206 case Ity_I8:
4207 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4208 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
4209 default:
4210 goto unhandled;
sewardj6cf40ff2005-04-20 22:31:26 +00004211 }
sewardj95448072004-11-22 20:19:51 +00004212 } else {
4213 goto unhandled;
sewardj8ec2cfc2002-10-13 00:57:26 +00004214 }
sewardj95448072004-11-22 20:19:51 +00004215 unhandled:
4216 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
4217 VG_(tool_panic)("zwidenToHostWord");
njn25e49d8e72002-09-23 09:36:25 +00004218}
4219
njn25e49d8e72002-09-23 09:36:25 +00004220
sewardj95448072004-11-22 20:19:51 +00004221/* Generate a shadow store. addr is always the original address atom.
4222 You can pass in either originals or V-bits for the data atom, but
sewardj1c0ce7a2009-07-01 08:10:49 +00004223 obviously not both. guard :: Ity_I1 controls whether the store
4224 really happens; NULL means it unconditionally does. Note that
4225 guard itself is not checked for definedness; the caller of this
4226 function must do that if necessary. */
njn25e49d8e72002-09-23 09:36:25 +00004227
sewardj95448072004-11-22 20:19:51 +00004228static
sewardj2e595852005-06-30 23:33:37 +00004229void do_shadow_Store ( MCEnv* mce,
4230 IREndness end,
4231 IRAtom* addr, UInt bias,
sewardj1c0ce7a2009-07-01 08:10:49 +00004232 IRAtom* data, IRAtom* vdata,
4233 IRAtom* guard )
njn25e49d8e72002-09-23 09:36:25 +00004234{
sewardj170ee212004-12-10 18:57:51 +00004235 IROp mkAdd;
4236 IRType ty, tyAddr;
sewardj95448072004-11-22 20:19:51 +00004237 void* helper = NULL;
floriana5f894c2012-10-21 03:43:20 +00004238 const HChar* hname = NULL;
njn1d0825f2006-03-27 11:37:07 +00004239 IRConst* c;
sewardj170ee212004-12-10 18:57:51 +00004240
4241 tyAddr = mce->hWordTy;
4242 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
4243 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
sewardj2e595852005-06-30 23:33:37 +00004244 tl_assert( end == Iend_LE || end == Iend_BE );
sewardj170ee212004-12-10 18:57:51 +00004245
sewardj95448072004-11-22 20:19:51 +00004246 if (data) {
4247 tl_assert(!vdata);
4248 tl_assert(isOriginalAtom(mce, data));
4249 tl_assert(bias == 0);
4250 vdata = expr2vbits( mce, data );
4251 } else {
4252 tl_assert(vdata);
4253 }
njn25e49d8e72002-09-23 09:36:25 +00004254
sewardj95448072004-11-22 20:19:51 +00004255 tl_assert(isOriginalAtom(mce,addr));
4256 tl_assert(isShadowAtom(mce,vdata));
njn25e49d8e72002-09-23 09:36:25 +00004257
sewardj1c0ce7a2009-07-01 08:10:49 +00004258 if (guard) {
4259 tl_assert(isOriginalAtom(mce, guard));
4260 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
4261 }
4262
4263 ty = typeOfIRExpr(mce->sb->tyenv, vdata);
njn25e49d8e72002-09-23 09:36:25 +00004264
njn1d0825f2006-03-27 11:37:07 +00004265 // If we're not doing undefined value checking, pretend that this value
4266 // is "all valid". That lets Vex's optimiser remove some of the V bit
4267 // shadow computation ops that precede it.
sewardj7cf4e6b2008-05-01 20:24:26 +00004268 if (MC_(clo_mc_level) == 1) {
njn1d0825f2006-03-27 11:37:07 +00004269 switch (ty) {
sewardj45fa9f42012-05-21 10:18:10 +00004270 case Ity_V256: // V256 weirdness -- used four times
sewardjbd43bfa2012-06-29 15:29:37 +00004271 c = IRConst_V256(V_BITS32_DEFINED); break;
sewardj45fa9f42012-05-21 10:18:10 +00004272 case Ity_V128: // V128 weirdness -- used twice
sewardj1c0ce7a2009-07-01 08:10:49 +00004273 c = IRConst_V128(V_BITS16_DEFINED); break;
njn1d0825f2006-03-27 11:37:07 +00004274 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
4275 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
4276 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
4277 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
4278 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4279 }
4280 vdata = IRExpr_Const( c );
4281 }
4282
sewardj95448072004-11-22 20:19:51 +00004283 /* First, emit a definedness test for the address. This also sets
4284 the address (shadow) to 'defined' following the test. */
florian434ffae2012-07-19 17:23:42 +00004285 complainIfUndefined( mce, addr, guard );
njn25e49d8e72002-09-23 09:36:25 +00004286
sewardj170ee212004-12-10 18:57:51 +00004287 /* Now decide which helper function to call to write the data V
4288 bits into shadow memory. */
sewardj2e595852005-06-30 23:33:37 +00004289 if (end == Iend_LE) {
4290 switch (ty) {
sewardj45fa9f42012-05-21 10:18:10 +00004291 case Ity_V256: /* we'll use the helper four times */
sewardj2e595852005-06-30 23:33:37 +00004292 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00004293 case Ity_I64: helper = &MC_(helperc_STOREV64le);
4294 hname = "MC_(helperc_STOREV64le)";
sewardj2e595852005-06-30 23:33:37 +00004295 break;
njn1d0825f2006-03-27 11:37:07 +00004296 case Ity_I32: helper = &MC_(helperc_STOREV32le);
4297 hname = "MC_(helperc_STOREV32le)";
sewardj2e595852005-06-30 23:33:37 +00004298 break;
njn1d0825f2006-03-27 11:37:07 +00004299 case Ity_I16: helper = &MC_(helperc_STOREV16le);
4300 hname = "MC_(helperc_STOREV16le)";
sewardj2e595852005-06-30 23:33:37 +00004301 break;
njn1d0825f2006-03-27 11:37:07 +00004302 case Ity_I8: helper = &MC_(helperc_STOREV8);
4303 hname = "MC_(helperc_STOREV8)";
sewardj2e595852005-06-30 23:33:37 +00004304 break;
4305 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4306 }
4307 } else {
sewardj8cf88b72005-07-08 01:29:33 +00004308 switch (ty) {
4309 case Ity_V128: /* we'll use the helper twice */
njn1d0825f2006-03-27 11:37:07 +00004310 case Ity_I64: helper = &MC_(helperc_STOREV64be);
4311 hname = "MC_(helperc_STOREV64be)";
sewardj8cf88b72005-07-08 01:29:33 +00004312 break;
njn1d0825f2006-03-27 11:37:07 +00004313 case Ity_I32: helper = &MC_(helperc_STOREV32be);
4314 hname = "MC_(helperc_STOREV32be)";
sewardj8cf88b72005-07-08 01:29:33 +00004315 break;
njn1d0825f2006-03-27 11:37:07 +00004316 case Ity_I16: helper = &MC_(helperc_STOREV16be);
4317 hname = "MC_(helperc_STOREV16be)";
sewardj8cf88b72005-07-08 01:29:33 +00004318 break;
njn1d0825f2006-03-27 11:37:07 +00004319 case Ity_I8: helper = &MC_(helperc_STOREV8);
4320 hname = "MC_(helperc_STOREV8)";
sewardj8cf88b72005-07-08 01:29:33 +00004321 break;
sewardj45fa9f42012-05-21 10:18:10 +00004322 /* Note, no V256 case here, because no big-endian target that
4323 we support, has 256 vectors. */
sewardj8cf88b72005-07-08 01:29:33 +00004324 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
4325 }
sewardj95448072004-11-22 20:19:51 +00004326 }
njn25e49d8e72002-09-23 09:36:25 +00004327
sewardj45fa9f42012-05-21 10:18:10 +00004328 if (UNLIKELY(ty == Ity_V256)) {
4329
4330 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
4331 Q3 being the most significant lane. */
4332 /* These are the offsets of the Qs in memory. */
4333 Int offQ0, offQ1, offQ2, offQ3;
4334
4335 /* Various bits for constructing the 4 lane helper calls */
4336 IRDirty *diQ0, *diQ1, *diQ2, *diQ3;
4337 IRAtom *addrQ0, *addrQ1, *addrQ2, *addrQ3;
4338 IRAtom *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3;
4339 IRAtom *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3;
4340
4341 if (end == Iend_LE) {
4342 offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24;
4343 } else {
4344 offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24;
4345 }
4346
4347 eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0);
4348 addrQ0 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) );
4349 vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata));
4350 diQ0 = unsafeIRDirty_0_N(
4351 1/*regparms*/,
4352 hname, VG_(fnptr_to_fnentry)( helper ),
4353 mkIRExprVec_2( addrQ0, vdataQ0 )
4354 );
4355
4356 eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1);
4357 addrQ1 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) );
4358 vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata));
4359 diQ1 = unsafeIRDirty_0_N(
4360 1/*regparms*/,
4361 hname, VG_(fnptr_to_fnentry)( helper ),
4362 mkIRExprVec_2( addrQ1, vdataQ1 )
4363 );
4364
4365 eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2);
4366 addrQ2 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) );
4367 vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata));
4368 diQ2 = unsafeIRDirty_0_N(
4369 1/*regparms*/,
4370 hname, VG_(fnptr_to_fnentry)( helper ),
4371 mkIRExprVec_2( addrQ2, vdataQ2 )
4372 );
4373
4374 eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3);
4375 addrQ3 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) );
4376 vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata));
4377 diQ3 = unsafeIRDirty_0_N(
4378 1/*regparms*/,
4379 hname, VG_(fnptr_to_fnentry)( helper ),
4380 mkIRExprVec_2( addrQ3, vdataQ3 )
4381 );
4382
4383 if (guard)
4384 diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard;
4385
4386 setHelperAnns( mce, diQ0 );
4387 setHelperAnns( mce, diQ1 );
4388 setHelperAnns( mce, diQ2 );
4389 setHelperAnns( mce, diQ3 );
4390 stmt( 'V', mce, IRStmt_Dirty(diQ0) );
4391 stmt( 'V', mce, IRStmt_Dirty(diQ1) );
4392 stmt( 'V', mce, IRStmt_Dirty(diQ2) );
4393 stmt( 'V', mce, IRStmt_Dirty(diQ3) );
4394
4395 }
4396 else if (UNLIKELY(ty == Ity_V128)) {
sewardj170ee212004-12-10 18:57:51 +00004397
sewardj20d38f22005-02-07 23:50:18 +00004398 /* V128-bit case */
sewardj170ee212004-12-10 18:57:51 +00004399 /* See comment in next clause re 64-bit regparms */
sewardj2e595852005-06-30 23:33:37 +00004400 /* also, need to be careful about endianness */
4401
njn4c245e52009-03-15 23:25:38 +00004402 Int offLo64, offHi64;
4403 IRDirty *diLo64, *diHi64;
4404 IRAtom *addrLo64, *addrHi64;
4405 IRAtom *vdataLo64, *vdataHi64;
4406 IRAtom *eBiasLo64, *eBiasHi64;
4407
sewardj2e595852005-06-30 23:33:37 +00004408 if (end == Iend_LE) {
4409 offLo64 = 0;
4410 offHi64 = 8;
4411 } else {
sewardj2e595852005-06-30 23:33:37 +00004412 offLo64 = 8;
4413 offHi64 = 0;
4414 }
4415
4416 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
sewardj7cf4e6b2008-05-01 20:24:26 +00004417 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
4418 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
sewardj170ee212004-12-10 18:57:51 +00004419 diLo64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004420 1/*regparms*/,
4421 hname, VG_(fnptr_to_fnentry)( helper ),
4422 mkIRExprVec_2( addrLo64, vdataLo64 )
4423 );
sewardj2e595852005-06-30 23:33:37 +00004424 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
sewardj7cf4e6b2008-05-01 20:24:26 +00004425 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
4426 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
sewardj170ee212004-12-10 18:57:51 +00004427 diHi64 = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004428 1/*regparms*/,
4429 hname, VG_(fnptr_to_fnentry)( helper ),
4430 mkIRExprVec_2( addrHi64, vdataHi64 )
4431 );
sewardj1c0ce7a2009-07-01 08:10:49 +00004432 if (guard) diLo64->guard = guard;
4433 if (guard) diHi64->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00004434 setHelperAnns( mce, diLo64 );
4435 setHelperAnns( mce, diHi64 );
sewardj7cf4e6b2008-05-01 20:24:26 +00004436 stmt( 'V', mce, IRStmt_Dirty(diLo64) );
4437 stmt( 'V', mce, IRStmt_Dirty(diHi64) );
sewardj170ee212004-12-10 18:57:51 +00004438
sewardj95448072004-11-22 20:19:51 +00004439 } else {
sewardj170ee212004-12-10 18:57:51 +00004440
njn4c245e52009-03-15 23:25:38 +00004441 IRDirty *di;
4442 IRAtom *addrAct;
4443
sewardj170ee212004-12-10 18:57:51 +00004444 /* 8/16/32/64-bit cases */
4445 /* Generate the actual address into addrAct. */
4446 if (bias == 0) {
4447 addrAct = addr;
4448 } else {
njn4c245e52009-03-15 23:25:38 +00004449 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
sewardj7cf4e6b2008-05-01 20:24:26 +00004450 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
sewardj170ee212004-12-10 18:57:51 +00004451 }
4452
4453 if (ty == Ity_I64) {
4454 /* We can't do this with regparm 2 on 32-bit platforms, since
4455 the back ends aren't clever enough to handle 64-bit
4456 regparm args. Therefore be different. */
4457 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004458 1/*regparms*/,
4459 hname, VG_(fnptr_to_fnentry)( helper ),
4460 mkIRExprVec_2( addrAct, vdata )
4461 );
sewardj170ee212004-12-10 18:57:51 +00004462 } else {
4463 di = unsafeIRDirty_0_N(
sewardj53ee1fc2005-12-23 02:29:58 +00004464 2/*regparms*/,
4465 hname, VG_(fnptr_to_fnentry)( helper ),
sewardj170ee212004-12-10 18:57:51 +00004466 mkIRExprVec_2( addrAct,
sewardj53ee1fc2005-12-23 02:29:58 +00004467 zwidenToHostWord( mce, vdata ))
4468 );
sewardj170ee212004-12-10 18:57:51 +00004469 }
sewardj1c0ce7a2009-07-01 08:10:49 +00004470 if (guard) di->guard = guard;
sewardj170ee212004-12-10 18:57:51 +00004471 setHelperAnns( mce, di );
sewardj7cf4e6b2008-05-01 20:24:26 +00004472 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj95448072004-11-22 20:19:51 +00004473 }
njn25e49d8e72002-09-23 09:36:25 +00004474
sewardj95448072004-11-22 20:19:51 +00004475}
njn25e49d8e72002-09-23 09:36:25 +00004476
njn25e49d8e72002-09-23 09:36:25 +00004477
sewardj95448072004-11-22 20:19:51 +00004478/* Do lazy pessimistic propagation through a dirty helper call, by
4479 looking at the annotations on it. This is the most complex part of
4480 Memcheck. */
njn25e49d8e72002-09-23 09:36:25 +00004481
sewardj95448072004-11-22 20:19:51 +00004482static IRType szToITy ( Int n )
4483{
4484 switch (n) {
4485 case 1: return Ity_I8;
4486 case 2: return Ity_I16;
4487 case 4: return Ity_I32;
4488 case 8: return Ity_I64;
4489 default: VG_(tool_panic)("szToITy(memcheck)");
4490 }
4491}
njn25e49d8e72002-09-23 09:36:25 +00004492
sewardj95448072004-11-22 20:19:51 +00004493static
4494void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
4495{
sewardj2eecb742012-06-01 16:11:41 +00004496 Int i, k, n, toDo, gSz, gOff;
sewardj2e595852005-06-30 23:33:37 +00004497 IRAtom *src, *here, *curr;
njn4c245e52009-03-15 23:25:38 +00004498 IRType tySrc, tyDst;
sewardj2e595852005-06-30 23:33:37 +00004499 IRTemp dst;
4500 IREndness end;
4501
4502 /* What's the native endianness? We need to know this. */
sewardj6e340c72005-07-10 00:53:42 +00004503# if defined(VG_BIGENDIAN)
sewardj2e595852005-06-30 23:33:37 +00004504 end = Iend_BE;
sewardj6e340c72005-07-10 00:53:42 +00004505# elif defined(VG_LITTLEENDIAN)
sewardj2e595852005-06-30 23:33:37 +00004506 end = Iend_LE;
4507# else
4508# error "Unknown endianness"
4509# endif
njn25e49d8e72002-09-23 09:36:25 +00004510
sewardj95448072004-11-22 20:19:51 +00004511 /* First check the guard. */
florian434ffae2012-07-19 17:23:42 +00004512 complainIfUndefined(mce, d->guard, NULL);
sewardj95448072004-11-22 20:19:51 +00004513
4514 /* Now round up all inputs and PCast over them. */
sewardj7cf97ee2004-11-28 14:25:01 +00004515 curr = definedOfType(Ity_I32);
sewardj95448072004-11-22 20:19:51 +00004516
florian434ffae2012-07-19 17:23:42 +00004517 /* Inputs: unmasked args
4518 Note: arguments are evaluated REGARDLESS of the guard expression */
sewardj95448072004-11-22 20:19:51 +00004519 for (i = 0; d->args[i]; i++) {
4520 if (d->cee->mcx_mask & (1<<i)) {
4521 /* ignore this arg */
njn25e49d8e72002-09-23 09:36:25 +00004522 } else {
sewardj95448072004-11-22 20:19:51 +00004523 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
4524 curr = mkUifU32(mce, here, curr);
njn25e49d8e72002-09-23 09:36:25 +00004525 }
4526 }
sewardj95448072004-11-22 20:19:51 +00004527
4528 /* Inputs: guest state that we read. */
4529 for (i = 0; i < d->nFxState; i++) {
4530 tl_assert(d->fxState[i].fx != Ifx_None);
4531 if (d->fxState[i].fx == Ifx_Write)
4532 continue;
sewardja7203252004-11-26 19:17:47 +00004533
sewardj2eecb742012-06-01 16:11:41 +00004534 /* Enumerate the described state segments */
4535 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
4536 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
4537 gSz = d->fxState[i].size;
sewardja7203252004-11-26 19:17:47 +00004538
sewardj2eecb742012-06-01 16:11:41 +00004539 /* Ignore any sections marked as 'always defined'. */
4540 if (isAlwaysDefd(mce, gOff, gSz)) {
4541 if (0)
4542 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
4543 gOff, gSz);
4544 continue;
4545 }
sewardje9e16d32004-12-10 13:17:55 +00004546
sewardj2eecb742012-06-01 16:11:41 +00004547 /* This state element is read or modified. So we need to
4548 consider it. If larger than 8 bytes, deal with it in
4549 8-byte chunks. */
4550 while (True) {
4551 tl_assert(gSz >= 0);
4552 if (gSz == 0) break;
4553 n = gSz <= 8 ? gSz : 8;
4554 /* update 'curr' with UifU of the state slice
4555 gOff .. gOff+n-1 */
4556 tySrc = szToITy( n );
florian434ffae2012-07-19 17:23:42 +00004557
4558 /* Observe the guard expression. If it is false use an
4559 all-bits-defined bit pattern */
4560 IRAtom *cond, *iffalse, *iftrue;
4561
4562 cond = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, d->guard));
4563 iftrue = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc));
4564 iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc));
4565 src = assignNew('V', mce, tySrc,
4566 IRExpr_Mux0X(cond, iffalse, iftrue));
4567
sewardj2eecb742012-06-01 16:11:41 +00004568 here = mkPCastTo( mce, Ity_I32, src );
4569 curr = mkUifU32(mce, here, curr);
4570 gSz -= n;
4571 gOff += n;
4572 }
4573 }
sewardj95448072004-11-22 20:19:51 +00004574 }
4575
4576 /* Inputs: memory. First set up some info needed regardless of
4577 whether we're doing reads or writes. */
sewardj95448072004-11-22 20:19:51 +00004578
4579 if (d->mFx != Ifx_None) {
4580 /* Because we may do multiple shadow loads/stores from the same
4581 base address, it's best to do a single test of its
4582 definedness right now. Post-instrumentation optimisation
4583 should remove all but this test. */
njn4c245e52009-03-15 23:25:38 +00004584 IRType tyAddr;
sewardj95448072004-11-22 20:19:51 +00004585 tl_assert(d->mAddr);
florian434ffae2012-07-19 17:23:42 +00004586 complainIfUndefined(mce, d->mAddr, d->guard);
sewardj95448072004-11-22 20:19:51 +00004587
sewardj1c0ce7a2009-07-01 08:10:49 +00004588 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
sewardj95448072004-11-22 20:19:51 +00004589 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
4590 tl_assert(tyAddr == mce->hWordTy); /* not really right */
4591 }
4592
4593 /* Deal with memory inputs (reads or modifies) */
4594 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00004595 toDo = d->mSize;
sewardj2e595852005-06-30 23:33:37 +00004596 /* chew off 32-bit chunks. We don't care about the endianness
4597 since it's all going to be condensed down to a single bit,
4598 but nevertheless choose an endianness which is hopefully
4599 native to the platform. */
sewardj95448072004-11-22 20:19:51 +00004600 while (toDo >= 4) {
4601 here = mkPCastTo(
4602 mce, Ity_I32,
florian434ffae2012-07-19 17:23:42 +00004603 expr2vbits_guarded_Load ( mce, end, Ity_I32, d->mAddr,
4604 d->mSize - toDo, d->guard )
sewardj95448072004-11-22 20:19:51 +00004605 );
4606 curr = mkUifU32(mce, here, curr);
4607 toDo -= 4;
4608 }
4609 /* chew off 16-bit chunks */
4610 while (toDo >= 2) {
4611 here = mkPCastTo(
4612 mce, Ity_I32,
florian434ffae2012-07-19 17:23:42 +00004613 expr2vbits_guarded_Load ( mce, end, Ity_I16, d->mAddr,
4614 d->mSize - toDo, d->guard )
sewardj95448072004-11-22 20:19:51 +00004615 );
4616 curr = mkUifU32(mce, here, curr);
4617 toDo -= 2;
4618 }
floriancda994b2012-06-08 16:01:19 +00004619 /* chew off the remaining 8-bit chunk, if any */
4620 if (toDo == 1) {
4621 here = mkPCastTo(
4622 mce, Ity_I32,
florian434ffae2012-07-19 17:23:42 +00004623 expr2vbits_guarded_Load ( mce, end, Ity_I8, d->mAddr,
4624 d->mSize - toDo, d->guard )
floriancda994b2012-06-08 16:01:19 +00004625 );
4626 curr = mkUifU32(mce, here, curr);
4627 toDo -= 1;
4628 }
4629 tl_assert(toDo == 0);
sewardj95448072004-11-22 20:19:51 +00004630 }
4631
4632 /* Whew! So curr is a 32-bit V-value summarising pessimistically
4633 all the inputs to the helper. Now we need to re-distribute the
4634 results to all destinations. */
4635
4636 /* Outputs: the destination temporary, if there is one. */
4637 if (d->tmp != IRTemp_INVALID) {
sewardj7cf4e6b2008-05-01 20:24:26 +00004638 dst = findShadowTmpV(mce, d->tmp);
sewardj1c0ce7a2009-07-01 08:10:49 +00004639 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
sewardj7cf4e6b2008-05-01 20:24:26 +00004640 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
sewardj95448072004-11-22 20:19:51 +00004641 }
4642
4643 /* Outputs: guest state that we write or modify. */
4644 for (i = 0; i < d->nFxState; i++) {
4645 tl_assert(d->fxState[i].fx != Ifx_None);
4646 if (d->fxState[i].fx == Ifx_Read)
4647 continue;
sewardj2eecb742012-06-01 16:11:41 +00004648
4649 /* Enumerate the described state segments */
4650 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
4651 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
4652 gSz = d->fxState[i].size;
4653
4654 /* Ignore any sections marked as 'always defined'. */
4655 if (isAlwaysDefd(mce, gOff, gSz))
4656 continue;
4657
4658 /* This state element is written or modified. So we need to
4659 consider it. If larger than 8 bytes, deal with it in
4660 8-byte chunks. */
4661 while (True) {
4662 tl_assert(gSz >= 0);
4663 if (gSz == 0) break;
4664 n = gSz <= 8 ? gSz : 8;
4665 /* Write suitably-casted 'curr' to the state slice
4666 gOff .. gOff+n-1 */
4667 tyDst = szToITy( n );
4668 do_shadow_PUT( mce, gOff,
4669 NULL, /* original atom */
florian434ffae2012-07-19 17:23:42 +00004670 mkPCastTo( mce, tyDst, curr ), d->guard );
sewardj2eecb742012-06-01 16:11:41 +00004671 gSz -= n;
4672 gOff += n;
4673 }
sewardje9e16d32004-12-10 13:17:55 +00004674 }
sewardj95448072004-11-22 20:19:51 +00004675 }
4676
sewardj2e595852005-06-30 23:33:37 +00004677 /* Outputs: memory that we write or modify. Same comments about
4678 endianness as above apply. */
sewardj95448072004-11-22 20:19:51 +00004679 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj95448072004-11-22 20:19:51 +00004680 toDo = d->mSize;
4681 /* chew off 32-bit chunks */
4682 while (toDo >= 4) {
sewardj2e595852005-06-30 23:33:37 +00004683 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4684 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00004685 mkPCastTo( mce, Ity_I32, curr ),
florian434ffae2012-07-19 17:23:42 +00004686 d->guard );
sewardj95448072004-11-22 20:19:51 +00004687 toDo -= 4;
4688 }
4689 /* chew off 16-bit chunks */
4690 while (toDo >= 2) {
sewardj2e595852005-06-30 23:33:37 +00004691 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4692 NULL, /* original data */
sewardj1c0ce7a2009-07-01 08:10:49 +00004693 mkPCastTo( mce, Ity_I16, curr ),
florian434ffae2012-07-19 17:23:42 +00004694 d->guard );
sewardj95448072004-11-22 20:19:51 +00004695 toDo -= 2;
4696 }
floriancda994b2012-06-08 16:01:19 +00004697 /* chew off the remaining 8-bit chunk, if any */
4698 if (toDo == 1) {
4699 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4700 NULL, /* original data */
4701 mkPCastTo( mce, Ity_I8, curr ),
florian434ffae2012-07-19 17:23:42 +00004702 d->guard );
floriancda994b2012-06-08 16:01:19 +00004703 toDo -= 1;
4704 }
4705 tl_assert(toDo == 0);
sewardj95448072004-11-22 20:19:51 +00004706 }
4707
njn25e49d8e72002-09-23 09:36:25 +00004708}
4709
sewardj1c0ce7a2009-07-01 08:10:49 +00004710
sewardj826ec492005-05-12 18:05:00 +00004711/* We have an ABI hint telling us that [base .. base+len-1] is to
4712 become undefined ("writable"). Generate code to call a helper to
4713 notify the A/V bit machinery of this fact.
4714
4715 We call
sewardj7cf4e6b2008-05-01 20:24:26 +00004716 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
4717 Addr nia );
sewardj826ec492005-05-12 18:05:00 +00004718*/
4719static
sewardj7cf4e6b2008-05-01 20:24:26 +00004720void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
sewardj826ec492005-05-12 18:05:00 +00004721{
4722 IRDirty* di;
sewardj7cf4e6b2008-05-01 20:24:26 +00004723 /* Minor optimisation: if not doing origin tracking, ignore the
4724 supplied nia and pass zero instead. This is on the basis that
4725 MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
4726 almost always generate a shorter instruction to put zero into a
4727 register than any other value. */
4728 if (MC_(clo_mc_level) < 3)
4729 nia = mkIRExpr_HWord(0);
4730
sewardj826ec492005-05-12 18:05:00 +00004731 di = unsafeIRDirty_0_N(
4732 0/*regparms*/,
4733 "MC_(helperc_MAKE_STACK_UNINIT)",
sewardj53ee1fc2005-12-23 02:29:58 +00004734 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
sewardj7cf4e6b2008-05-01 20:24:26 +00004735 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
sewardj826ec492005-05-12 18:05:00 +00004736 );
sewardj7cf4e6b2008-05-01 20:24:26 +00004737 stmt( 'V', mce, IRStmt_Dirty(di) );
sewardj826ec492005-05-12 18:05:00 +00004738}
4739
njn25e49d8e72002-09-23 09:36:25 +00004740
sewardj1c0ce7a2009-07-01 08:10:49 +00004741/* ------ Dealing with IRCAS (big and complex) ------ */
4742
4743/* FWDS */
4744static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
4745 IRAtom* baseaddr, Int offset );
4746static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
4747static void gen_store_b ( MCEnv* mce, Int szB,
4748 IRAtom* baseaddr, Int offset, IRAtom* dataB,
4749 IRAtom* guard );
4750
4751static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
4752static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
4753
4754
4755/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
4756 IRExpr.Consts, else this asserts. If they are both Consts, it
4757 doesn't do anything. So that just leaves the RdTmp case.
4758
4759 In which case: this assigns the shadow value SHADOW to the IR
4760 shadow temporary associated with ORIG. That is, ORIG, being an
4761 original temporary, will have a shadow temporary associated with
4762 it. However, in the case envisaged here, there will so far have
4763 been no IR emitted to actually write a shadow value into that
4764 temporary. What this routine does is to (emit IR to) copy the
4765 value in SHADOW into said temporary, so that after this call,
4766 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
4767 value in SHADOW.
4768
4769 Point is to allow callers to compute "by hand" a shadow value for
4770 ORIG, and force it to be associated with ORIG.
4771
4772 How do we know that that shadow associated with ORIG has not so far
4773 been assigned to? Well, we don't per se know that, but supposing
4774 it had. Then this routine would create a second assignment to it,
4775 and later the IR sanity checker would barf. But that never
4776 happens. QED.
4777*/
4778static void bind_shadow_tmp_to_orig ( UChar how,
4779 MCEnv* mce,
4780 IRAtom* orig, IRAtom* shadow )
4781{
4782 tl_assert(isOriginalAtom(mce, orig));
4783 tl_assert(isShadowAtom(mce, shadow));
4784 switch (orig->tag) {
4785 case Iex_Const:
4786 tl_assert(shadow->tag == Iex_Const);
4787 break;
4788 case Iex_RdTmp:
4789 tl_assert(shadow->tag == Iex_RdTmp);
4790 if (how == 'V') {
4791 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
4792 shadow);
4793 } else {
4794 tl_assert(how == 'B');
4795 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
4796 shadow);
4797 }
4798 break;
4799 default:
4800 tl_assert(0);
4801 }
4802}
4803
4804
4805static
4806void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
4807{
4808 /* Scheme is (both single- and double- cases):
4809
4810 1. fetch data#,dataB (the proposed new value)
4811
4812 2. fetch expd#,expdB (what we expect to see at the address)
4813
4814 3. check definedness of address
4815
4816 4. load old#,oldB from shadow memory; this also checks
4817 addressibility of the address
4818
4819 5. the CAS itself
4820
sewardjafed4c52009-07-12 13:00:17 +00004821 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
sewardj1c0ce7a2009-07-01 08:10:49 +00004822
sewardjafed4c52009-07-12 13:00:17 +00004823 7. if "expected == old" (as computed by (6))
sewardj1c0ce7a2009-07-01 08:10:49 +00004824 store data#,dataB to shadow memory
4825
4826 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
4827 'data' but 7 stores 'data#'. Hence it is possible for the
4828 shadow data to be incorrectly checked and/or updated:
4829
sewardj1c0ce7a2009-07-01 08:10:49 +00004830 * 7 is at least gated correctly, since the 'expected == old'
4831 condition is derived from outputs of 5. However, the shadow
4832 write could happen too late: imagine after 5 we are
4833 descheduled, a different thread runs, writes a different
4834 (shadow) value at the address, and then we resume, hence
4835 overwriting the shadow value written by the other thread.
4836
4837 Because the original memory access is atomic, there's no way to
4838 make both the original and shadow accesses into a single atomic
4839 thing, hence this is unavoidable.
4840
4841 At least as Valgrind stands, I don't think it's a problem, since
4842 we're single threaded *and* we guarantee that there are no
4843 context switches during the execution of any specific superblock
4844 -- context switches can only happen at superblock boundaries.
4845
4846 If Valgrind ever becomes MT in the future, then it might be more
4847 of a problem. A possible kludge would be to artificially
4848 associate with the location, a lock, which we must acquire and
4849 release around the transaction as a whole. Hmm, that probably
4850 would't work properly since it only guards us against other
4851 threads doing CASs on the same location, not against other
4852 threads doing normal reads and writes.
sewardjafed4c52009-07-12 13:00:17 +00004853
4854 ------------------------------------------------------------
4855
4856 COMMENT_ON_CasCmpEQ:
4857
4858 Note two things. Firstly, in the sequence above, we compute
4859 "expected == old", but we don't check definedness of it. Why
4860 not? Also, the x86 and amd64 front ends use
4861 Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent
4862 determination (expected == old ?) for themselves, and we also
4863 don't check definedness for those primops; we just say that the
4864 result is defined. Why? Details follow.
4865
4866 x86/amd64 contains various forms of locked insns:
4867 * lock prefix before all basic arithmetic insn;
4868 eg lock xorl %reg1,(%reg2)
4869 * atomic exchange reg-mem
4870 * compare-and-swaps
4871
4872 Rather than attempt to represent them all, which would be a
4873 royal PITA, I used a result from Maurice Herlihy
4874 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
4875 demonstrates that compare-and-swap is a primitive more general
4876 than the other two, and so can be used to represent all of them.
4877 So the translation scheme for (eg) lock incl (%reg) is as
4878 follows:
4879
4880 again:
4881 old = * %reg
4882 new = old + 1
4883 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
4884
4885 The "atomically" is the CAS bit. The scheme is always the same:
4886 get old value from memory, compute new value, atomically stuff
4887 new value back in memory iff the old value has not changed (iow,
4888 no other thread modified it in the meantime). If it has changed
4889 then we've been out-raced and we have to start over.
4890
4891 Now that's all very neat, but it has the bad side effect of
4892 introducing an explicit equality test into the translation.
4893 Consider the behaviour of said code on a memory location which
4894 is uninitialised. We will wind up doing a comparison on
4895 uninitialised data, and mc duly complains.
4896
4897 What's difficult about this is, the common case is that the
4898 location is uncontended, and so we're usually comparing the same
4899 value (* %reg) with itself. So we shouldn't complain even if it
4900 is undefined. But mc doesn't know that.
4901
4902 My solution is to mark the == in the IR specially, so as to tell
4903 mc that it almost certainly compares a value with itself, and we
4904 should just regard the result as always defined. Rather than
4905 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
4906 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
4907
4908 So there's always the question of, can this give a false
4909 negative? eg, imagine that initially, * %reg is defined; and we
4910 read that; but then in the gap between the read and the CAS, a
4911 different thread writes an undefined (and different) value at
4912 the location. Then the CAS in this thread will fail and we will
4913 go back to "again:", but without knowing that the trip back
4914 there was based on an undefined comparison. No matter; at least
4915 the other thread won the race and the location is correctly
4916 marked as undefined. What if it wrote an uninitialised version
4917 of the same value that was there originally, though?
4918
4919 etc etc. Seems like there's a small corner case in which we
4920 might lose the fact that something's defined -- we're out-raced
4921 in between the "old = * reg" and the "atomically {", _and_ the
4922 other thread is writing in an undefined version of what's
4923 already there. Well, that seems pretty unlikely.
4924
4925 ---
4926
4927 If we ever need to reinstate it .. code which generates a
4928 definedness test for "expected == old" was removed at r10432 of
4929 this file.
sewardj1c0ce7a2009-07-01 08:10:49 +00004930 */
4931 if (cas->oldHi == IRTemp_INVALID) {
4932 do_shadow_CAS_single( mce, cas );
4933 } else {
4934 do_shadow_CAS_double( mce, cas );
4935 }
4936}
4937
4938
4939static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
4940{
4941 IRAtom *vdataLo = NULL, *bdataLo = NULL;
4942 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4943 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00004944 IRAtom *expd_eq_old = NULL;
4945 IROp opCasCmpEQ;
sewardj1c0ce7a2009-07-01 08:10:49 +00004946 Int elemSzB;
4947 IRType elemTy;
4948 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4949
4950 /* single CAS */
4951 tl_assert(cas->oldHi == IRTemp_INVALID);
4952 tl_assert(cas->expdHi == NULL);
4953 tl_assert(cas->dataHi == NULL);
4954
4955 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4956 switch (elemTy) {
sewardjafed4c52009-07-12 13:00:17 +00004957 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break;
4958 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
4959 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
4960 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
sewardj1c0ce7a2009-07-01 08:10:49 +00004961 default: tl_assert(0); /* IR defn disallows any other types */
4962 }
4963
4964 /* 1. fetch data# (the proposed new value) */
4965 tl_assert(isOriginalAtom(mce, cas->dataLo));
4966 vdataLo
4967 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4968 tl_assert(isShadowAtom(mce, vdataLo));
4969 if (otrak) {
4970 bdataLo
4971 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4972 tl_assert(isShadowAtom(mce, bdataLo));
4973 }
4974
4975 /* 2. fetch expected# (what we expect to see at the address) */
4976 tl_assert(isOriginalAtom(mce, cas->expdLo));
4977 vexpdLo
4978 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
4979 tl_assert(isShadowAtom(mce, vexpdLo));
4980 if (otrak) {
4981 bexpdLo
4982 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
4983 tl_assert(isShadowAtom(mce, bexpdLo));
4984 }
4985
4986 /* 3. check definedness of address */
4987 /* 4. fetch old# from shadow memory; this also checks
4988 addressibility of the address */
4989 voldLo
4990 = assignNew(
4991 'V', mce, elemTy,
4992 expr2vbits_Load(
4993 mce,
4994 cas->end, elemTy, cas->addr, 0/*Addr bias*/
4995 ));
sewardjafed4c52009-07-12 13:00:17 +00004996 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00004997 if (otrak) {
4998 boldLo
4999 = assignNew('B', mce, Ity_I32,
5000 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00005001 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005002 }
5003
5004 /* 5. the CAS itself */
5005 stmt( 'C', mce, IRStmt_CAS(cas) );
5006
sewardjafed4c52009-07-12 13:00:17 +00005007 /* 6. compute "expected == old" */
5008 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005009 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5010 tree, but it's not copied from the input block. */
5011 expd_eq_old
5012 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00005013 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005014
5015 /* 7. if "expected == old"
5016 store data# to shadow memory */
5017 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
5018 NULL/*data*/, vdataLo/*vdata*/,
5019 expd_eq_old/*guard for store*/ );
5020 if (otrak) {
5021 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
5022 bdataLo/*bdata*/,
5023 expd_eq_old/*guard for store*/ );
5024 }
5025}
5026
5027
5028static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
5029{
5030 IRAtom *vdataHi = NULL, *bdataHi = NULL;
5031 IRAtom *vdataLo = NULL, *bdataLo = NULL;
5032 IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
5033 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
5034 IRAtom *voldHi = NULL, *boldHi = NULL;
5035 IRAtom *voldLo = NULL, *boldLo = NULL;
sewardjafed4c52009-07-12 13:00:17 +00005036 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
5037 IRAtom *expd_eq_old = NULL, *zero = NULL;
5038 IROp opCasCmpEQ, opOr, opXor;
sewardj1c0ce7a2009-07-01 08:10:49 +00005039 Int elemSzB, memOffsLo, memOffsHi;
5040 IRType elemTy;
5041 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
5042
5043 /* double CAS */
5044 tl_assert(cas->oldHi != IRTemp_INVALID);
5045 tl_assert(cas->expdHi != NULL);
5046 tl_assert(cas->dataHi != NULL);
5047
5048 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
5049 switch (elemTy) {
5050 case Ity_I8:
sewardjafed4c52009-07-12 13:00:17 +00005051 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
sewardj1c0ce7a2009-07-01 08:10:49 +00005052 elemSzB = 1; zero = mkU8(0);
5053 break;
5054 case Ity_I16:
sewardjafed4c52009-07-12 13:00:17 +00005055 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
sewardj1c0ce7a2009-07-01 08:10:49 +00005056 elemSzB = 2; zero = mkU16(0);
5057 break;
5058 case Ity_I32:
sewardjafed4c52009-07-12 13:00:17 +00005059 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
sewardj1c0ce7a2009-07-01 08:10:49 +00005060 elemSzB = 4; zero = mkU32(0);
5061 break;
5062 case Ity_I64:
sewardjafed4c52009-07-12 13:00:17 +00005063 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
sewardj1c0ce7a2009-07-01 08:10:49 +00005064 elemSzB = 8; zero = mkU64(0);
5065 break;
5066 default:
5067 tl_assert(0); /* IR defn disallows any other types */
5068 }
5069
5070 /* 1. fetch data# (the proposed new value) */
5071 tl_assert(isOriginalAtom(mce, cas->dataHi));
5072 tl_assert(isOriginalAtom(mce, cas->dataLo));
5073 vdataHi
5074 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
5075 vdataLo
5076 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
5077 tl_assert(isShadowAtom(mce, vdataHi));
5078 tl_assert(isShadowAtom(mce, vdataLo));
5079 if (otrak) {
5080 bdataHi
5081 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
5082 bdataLo
5083 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
5084 tl_assert(isShadowAtom(mce, bdataHi));
5085 tl_assert(isShadowAtom(mce, bdataLo));
5086 }
5087
5088 /* 2. fetch expected# (what we expect to see at the address) */
5089 tl_assert(isOriginalAtom(mce, cas->expdHi));
5090 tl_assert(isOriginalAtom(mce, cas->expdLo));
5091 vexpdHi
5092 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
5093 vexpdLo
5094 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5095 tl_assert(isShadowAtom(mce, vexpdHi));
5096 tl_assert(isShadowAtom(mce, vexpdLo));
5097 if (otrak) {
5098 bexpdHi
5099 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
5100 bexpdLo
5101 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5102 tl_assert(isShadowAtom(mce, bexpdHi));
5103 tl_assert(isShadowAtom(mce, bexpdLo));
5104 }
5105
5106 /* 3. check definedness of address */
5107 /* 4. fetch old# from shadow memory; this also checks
5108 addressibility of the address */
5109 if (cas->end == Iend_LE) {
5110 memOffsLo = 0;
5111 memOffsHi = elemSzB;
5112 } else {
5113 tl_assert(cas->end == Iend_BE);
5114 memOffsLo = elemSzB;
5115 memOffsHi = 0;
5116 }
5117 voldHi
5118 = assignNew(
5119 'V', mce, elemTy,
5120 expr2vbits_Load(
5121 mce,
5122 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/
5123 ));
5124 voldLo
5125 = assignNew(
5126 'V', mce, elemTy,
5127 expr2vbits_Load(
5128 mce,
5129 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/
5130 ));
sewardjafed4c52009-07-12 13:00:17 +00005131 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
5132 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005133 if (otrak) {
5134 boldHi
5135 = assignNew('B', mce, Ity_I32,
5136 gen_load_b(mce, elemSzB, cas->addr,
5137 memOffsHi/*addr bias*/));
5138 boldLo
5139 = assignNew('B', mce, Ity_I32,
5140 gen_load_b(mce, elemSzB, cas->addr,
5141 memOffsLo/*addr bias*/));
sewardjafed4c52009-07-12 13:00:17 +00005142 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
5143 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
sewardj1c0ce7a2009-07-01 08:10:49 +00005144 }
5145
5146 /* 5. the CAS itself */
5147 stmt( 'C', mce, IRStmt_CAS(cas) );
5148
sewardjafed4c52009-07-12 13:00:17 +00005149 /* 6. compute "expected == old" */
5150 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005151 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5152 tree, but it's not copied from the input block. */
5153 /*
5154 xHi = oldHi ^ expdHi;
5155 xLo = oldLo ^ expdLo;
5156 xHL = xHi | xLo;
5157 expd_eq_old = xHL == 0;
5158 */
sewardj1c0ce7a2009-07-01 08:10:49 +00005159 xHi = assignNew('C', mce, elemTy,
5160 binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005161 xLo = assignNew('C', mce, elemTy,
5162 binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
sewardj1c0ce7a2009-07-01 08:10:49 +00005163 xHL = assignNew('C', mce, elemTy,
5164 binop(opOr, xHi, xLo));
sewardj1c0ce7a2009-07-01 08:10:49 +00005165 expd_eq_old
5166 = assignNew('C', mce, Ity_I1,
sewardjafed4c52009-07-12 13:00:17 +00005167 binop(opCasCmpEQ, xHL, zero));
sewardj1c0ce7a2009-07-01 08:10:49 +00005168
5169 /* 7. if "expected == old"
5170 store data# to shadow memory */
5171 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
5172 NULL/*data*/, vdataHi/*vdata*/,
5173 expd_eq_old/*guard for store*/ );
5174 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
5175 NULL/*data*/, vdataLo/*vdata*/,
5176 expd_eq_old/*guard for store*/ );
5177 if (otrak) {
5178 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
5179 bdataHi/*bdata*/,
5180 expd_eq_old/*guard for store*/ );
5181 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
5182 bdataLo/*bdata*/,
5183 expd_eq_old/*guard for store*/ );
5184 }
5185}
5186
5187
sewardjdb5907d2009-11-26 17:20:21 +00005188/* ------ Dealing with LL/SC (not difficult) ------ */
5189
5190static void do_shadow_LLSC ( MCEnv* mce,
5191 IREndness stEnd,
5192 IRTemp stResult,
5193 IRExpr* stAddr,
5194 IRExpr* stStoredata )
5195{
5196 /* In short: treat a load-linked like a normal load followed by an
5197 assignment of the loaded (shadow) data to the result temporary.
5198 Treat a store-conditional like a normal store, and mark the
5199 result temporary as defined. */
5200 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult);
5201 IRTemp resTmp = findShadowTmpV(mce, stResult);
5202
5203 tl_assert(isIRAtom(stAddr));
5204 if (stStoredata)
5205 tl_assert(isIRAtom(stStoredata));
5206
5207 if (stStoredata == NULL) {
5208 /* Load Linked */
5209 /* Just treat this as a normal load, followed by an assignment of
5210 the value to .result. */
5211 /* Stay sane */
5212 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
5213 || resTy == Ity_I16 || resTy == Ity_I8);
5214 assign( 'V', mce, resTmp,
5215 expr2vbits_Load(
5216 mce, stEnd, resTy, stAddr, 0/*addr bias*/));
5217 } else {
5218 /* Store Conditional */
5219 /* Stay sane */
5220 IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
5221 stStoredata);
5222 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
5223 || dataTy == Ity_I16 || dataTy == Ity_I8);
5224 do_shadow_Store( mce, stEnd,
5225 stAddr, 0/* addr bias */,
5226 stStoredata,
5227 NULL /* shadow data */,
5228 NULL/*guard*/ );
5229 /* This is a store conditional, so it writes to .result a value
5230 indicating whether or not the store succeeded. Just claim
5231 this value is always defined. In the PowerPC interpretation
5232 of store-conditional, definedness of the success indication
5233 depends on whether the address of the store matches the
5234 reservation address. But we can't tell that here (and
5235 anyway, we're not being PowerPC-specific). At least we are
5236 guaranteed that the definedness of the store address, and its
5237 addressibility, will be checked as per normal. So it seems
5238 pretty safe to just say that the success indication is always
5239 defined.
5240
5241 In schemeS, for origin tracking, we must correspondingly set
5242 a no-origin value for the origin shadow of .result.
5243 */
5244 tl_assert(resTy == Ity_I1);
5245 assign( 'V', mce, resTmp, definedOfType(resTy) );
5246 }
5247}
5248
5249
sewardj95448072004-11-22 20:19:51 +00005250/*------------------------------------------------------------*/
5251/*--- Memcheck main ---*/
5252/*------------------------------------------------------------*/
njn25e49d8e72002-09-23 09:36:25 +00005253
sewardj7cf4e6b2008-05-01 20:24:26 +00005254static void schemeS ( MCEnv* mce, IRStmt* st );
5255
sewardj95448072004-11-22 20:19:51 +00005256static Bool isBogusAtom ( IRAtom* at )
njn25e49d8e72002-09-23 09:36:25 +00005257{
sewardj95448072004-11-22 20:19:51 +00005258 ULong n = 0;
5259 IRConst* con;
sewardj710d6c22005-03-20 18:55:15 +00005260 tl_assert(isIRAtom(at));
sewardj0b9d74a2006-12-24 02:24:11 +00005261 if (at->tag == Iex_RdTmp)
sewardj95448072004-11-22 20:19:51 +00005262 return False;
5263 tl_assert(at->tag == Iex_Const);
5264 con = at->Iex.Const.con;
5265 switch (con->tag) {
sewardjd5204dc2004-12-31 01:16:11 +00005266 case Ico_U1: return False;
5267 case Ico_U8: n = (ULong)con->Ico.U8; break;
5268 case Ico_U16: n = (ULong)con->Ico.U16; break;
5269 case Ico_U32: n = (ULong)con->Ico.U32; break;
5270 case Ico_U64: n = (ULong)con->Ico.U64; break;
5271 case Ico_F64: return False;
sewardjb5b87402011-03-07 16:05:35 +00005272 case Ico_F32i: return False;
sewardjd5204dc2004-12-31 01:16:11 +00005273 case Ico_F64i: return False;
5274 case Ico_V128: return False;
sewardj95448072004-11-22 20:19:51 +00005275 default: ppIRExpr(at); tl_assert(0);
5276 }
5277 /* VG_(printf)("%llx\n", n); */
sewardj96a922e2005-04-23 23:26:29 +00005278 return (/*32*/ n == 0xFEFEFEFFULL
5279 /*32*/ || n == 0x80808080ULL
sewardj17b47432008-12-17 01:12:58 +00005280 /*32*/ || n == 0x7F7F7F7FULL
tomd9774d72005-06-27 08:11:01 +00005281 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
sewardj96a922e2005-04-23 23:26:29 +00005282 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
tomd9774d72005-06-27 08:11:01 +00005283 /*64*/ || n == 0x0000000000008080ULL
sewardj96a922e2005-04-23 23:26:29 +00005284 /*64*/ || n == 0x8080808080808080ULL
sewardj17b47432008-12-17 01:12:58 +00005285 /*64*/ || n == 0x0101010101010101ULL
sewardj96a922e2005-04-23 23:26:29 +00005286 );
sewardj95448072004-11-22 20:19:51 +00005287}
njn25e49d8e72002-09-23 09:36:25 +00005288
sewardj95448072004-11-22 20:19:51 +00005289static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
5290{
sewardjd5204dc2004-12-31 01:16:11 +00005291 Int i;
5292 IRExpr* e;
5293 IRDirty* d;
sewardj1c0ce7a2009-07-01 08:10:49 +00005294 IRCAS* cas;
sewardj95448072004-11-22 20:19:51 +00005295 switch (st->tag) {
sewardj0b9d74a2006-12-24 02:24:11 +00005296 case Ist_WrTmp:
5297 e = st->Ist.WrTmp.data;
sewardj95448072004-11-22 20:19:51 +00005298 switch (e->tag) {
5299 case Iex_Get:
sewardj0b9d74a2006-12-24 02:24:11 +00005300 case Iex_RdTmp:
sewardj95448072004-11-22 20:19:51 +00005301 return False;
sewardjd5204dc2004-12-31 01:16:11 +00005302 case Iex_Const:
5303 return isBogusAtom(e);
sewardj95448072004-11-22 20:19:51 +00005304 case Iex_Unop:
5305 return isBogusAtom(e->Iex.Unop.arg);
sewardjd5204dc2004-12-31 01:16:11 +00005306 case Iex_GetI:
5307 return isBogusAtom(e->Iex.GetI.ix);
sewardj95448072004-11-22 20:19:51 +00005308 case Iex_Binop:
5309 return isBogusAtom(e->Iex.Binop.arg1)
5310 || isBogusAtom(e->Iex.Binop.arg2);
sewardjed69fdb2006-02-03 16:12:27 +00005311 case Iex_Triop:
florian26441742012-06-02 20:30:41 +00005312 return isBogusAtom(e->Iex.Triop.details->arg1)
5313 || isBogusAtom(e->Iex.Triop.details->arg2)
5314 || isBogusAtom(e->Iex.Triop.details->arg3);
sewardje91cea72006-02-08 19:32:02 +00005315 case Iex_Qop:
floriane2ab2972012-06-01 20:43:03 +00005316 return isBogusAtom(e->Iex.Qop.details->arg1)
5317 || isBogusAtom(e->Iex.Qop.details->arg2)
5318 || isBogusAtom(e->Iex.Qop.details->arg3)
5319 || isBogusAtom(e->Iex.Qop.details->arg4);
sewardj95448072004-11-22 20:19:51 +00005320 case Iex_Mux0X:
5321 return isBogusAtom(e->Iex.Mux0X.cond)
5322 || isBogusAtom(e->Iex.Mux0X.expr0)
5323 || isBogusAtom(e->Iex.Mux0X.exprX);
sewardj2e595852005-06-30 23:33:37 +00005324 case Iex_Load:
5325 return isBogusAtom(e->Iex.Load.addr);
sewardj95448072004-11-22 20:19:51 +00005326 case Iex_CCall:
5327 for (i = 0; e->Iex.CCall.args[i]; i++)
5328 if (isBogusAtom(e->Iex.CCall.args[i]))
5329 return True;
5330 return False;
5331 default:
5332 goto unhandled;
5333 }
sewardjd5204dc2004-12-31 01:16:11 +00005334 case Ist_Dirty:
5335 d = st->Ist.Dirty.details;
5336 for (i = 0; d->args[i]; i++)
5337 if (isBogusAtom(d->args[i]))
5338 return True;
5339 if (d->guard && isBogusAtom(d->guard))
5340 return True;
5341 if (d->mAddr && isBogusAtom(d->mAddr))
5342 return True;
5343 return False;
sewardj95448072004-11-22 20:19:51 +00005344 case Ist_Put:
5345 return isBogusAtom(st->Ist.Put.data);
sewardjd5204dc2004-12-31 01:16:11 +00005346 case Ist_PutI:
floriand39b0222012-05-31 15:48:13 +00005347 return isBogusAtom(st->Ist.PutI.details->ix)
5348 || isBogusAtom(st->Ist.PutI.details->data);
sewardj2e595852005-06-30 23:33:37 +00005349 case Ist_Store:
5350 return isBogusAtom(st->Ist.Store.addr)
5351 || isBogusAtom(st->Ist.Store.data);
sewardj95448072004-11-22 20:19:51 +00005352 case Ist_Exit:
sewardjd5204dc2004-12-31 01:16:11 +00005353 return isBogusAtom(st->Ist.Exit.guard);
sewardj826ec492005-05-12 18:05:00 +00005354 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00005355 return isBogusAtom(st->Ist.AbiHint.base)
5356 || isBogusAtom(st->Ist.AbiHint.nia);
sewardj21dc3452005-03-21 00:27:41 +00005357 case Ist_NoOp:
sewardj29faa502005-03-16 18:20:21 +00005358 case Ist_IMark:
sewardj72d75132007-11-09 23:06:35 +00005359 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00005360 return False;
sewardj1c0ce7a2009-07-01 08:10:49 +00005361 case Ist_CAS:
5362 cas = st->Ist.CAS.details;
5363 return isBogusAtom(cas->addr)
5364 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
5365 || isBogusAtom(cas->expdLo)
5366 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
5367 || isBogusAtom(cas->dataLo);
sewardjdb5907d2009-11-26 17:20:21 +00005368 case Ist_LLSC:
5369 return isBogusAtom(st->Ist.LLSC.addr)
5370 || (st->Ist.LLSC.storedata
5371 ? isBogusAtom(st->Ist.LLSC.storedata)
5372 : False);
sewardj95448072004-11-22 20:19:51 +00005373 default:
5374 unhandled:
5375 ppIRStmt(st);
5376 VG_(tool_panic)("hasBogusLiterals");
5377 }
5378}
njn25e49d8e72002-09-23 09:36:25 +00005379
njn25e49d8e72002-09-23 09:36:25 +00005380
sewardj0b9d74a2006-12-24 02:24:11 +00005381IRSB* MC_(instrument) ( VgCallbackClosure* closure,
sewardj1c0ce7a2009-07-01 08:10:49 +00005382 IRSB* sb_in,
sewardj461df9c2006-01-17 02:06:39 +00005383 VexGuestLayout* layout,
5384 VexGuestExtents* vge,
florianca503be2012-10-07 21:59:42 +00005385 VexArchInfo* archinfo_host,
sewardjd54babf2005-03-21 00:55:49 +00005386 IRType gWordTy, IRType hWordTy )
sewardj95448072004-11-22 20:19:51 +00005387{
sewardj7cf4e6b2008-05-01 20:24:26 +00005388 Bool verboze = 0||False;
sewardj151b90d2005-07-06 19:42:23 +00005389 Bool bogus;
sewardjd5204dc2004-12-31 01:16:11 +00005390 Int i, j, first_stmt;
sewardj95448072004-11-22 20:19:51 +00005391 IRStmt* st;
sewardjd5204dc2004-12-31 01:16:11 +00005392 MCEnv mce;
sewardj1c0ce7a2009-07-01 08:10:49 +00005393 IRSB* sb_out;
sewardjd54babf2005-03-21 00:55:49 +00005394
5395 if (gWordTy != hWordTy) {
5396 /* We don't currently support this case. */
5397 VG_(tool_panic)("host/guest word size mismatch");
5398 }
njn25e49d8e72002-09-23 09:36:25 +00005399
sewardj6cf40ff2005-04-20 22:31:26 +00005400 /* Check we're not completely nuts */
sewardj7cf4e6b2008-05-01 20:24:26 +00005401 tl_assert(sizeof(UWord) == sizeof(void*));
5402 tl_assert(sizeof(Word) == sizeof(void*));
5403 tl_assert(sizeof(Addr) == sizeof(void*));
5404 tl_assert(sizeof(ULong) == 8);
5405 tl_assert(sizeof(Long) == 8);
5406 tl_assert(sizeof(Addr64) == 8);
5407 tl_assert(sizeof(UInt) == 4);
5408 tl_assert(sizeof(Int) == 4);
5409
5410 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
sewardj6cf40ff2005-04-20 22:31:26 +00005411
sewardj0b9d74a2006-12-24 02:24:11 +00005412 /* Set up SB */
sewardj1c0ce7a2009-07-01 08:10:49 +00005413 sb_out = deepCopyIRSBExceptStmts(sb_in);
njn25e49d8e72002-09-23 09:36:25 +00005414
sewardj1c0ce7a2009-07-01 08:10:49 +00005415 /* Set up the running environment. Both .sb and .tmpMap are
5416 modified as we go along. Note that tmps are added to both
5417 .sb->tyenv and .tmpMap together, so the valid index-set for
5418 those two arrays should always be identical. */
5419 VG_(memset)(&mce, 0, sizeof(mce));
5420 mce.sb = sb_out;
sewardj7cf4e6b2008-05-01 20:24:26 +00005421 mce.trace = verboze;
sewardj95448072004-11-22 20:19:51 +00005422 mce.layout = layout;
sewardj95448072004-11-22 20:19:51 +00005423 mce.hWordTy = hWordTy;
sewardjd5204dc2004-12-31 01:16:11 +00005424 mce.bogusLiterals = False;
sewardj1c0ce7a2009-07-01 08:10:49 +00005425
sewardj54eac252012-03-27 10:19:39 +00005426 /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on
5427 Darwin. 10.7 is mostly built with LLVM, which uses these for
5428 bitfield inserts, and we get a lot of false errors if the cheap
5429 interpretation is used, alas. Could solve this much better if
5430 we knew which of such adds came from x86/amd64 LEA instructions,
5431 since these are the only ones really needing the expensive
5432 interpretation, but that would require some way to tag them in
5433 the _toIR.c front ends, which is a lot of faffing around. So
5434 for now just use the slow and blunt-instrument solution. */
5435 mce.useLLVMworkarounds = False;
5436# if defined(VGO_darwin)
5437 mce.useLLVMworkarounds = True;
5438# endif
5439
sewardj1c0ce7a2009-07-01 08:10:49 +00005440 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
5441 sizeof(TempMapEnt));
5442 for (i = 0; i < sb_in->tyenv->types_used; i++) {
5443 TempMapEnt ent;
5444 ent.kind = Orig;
5445 ent.shadowV = IRTemp_INVALID;
5446 ent.shadowB = IRTemp_INVALID;
5447 VG_(addToXA)( mce.tmpMap, &ent );
sewardj7cf4e6b2008-05-01 20:24:26 +00005448 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005449 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
sewardj95448072004-11-22 20:19:51 +00005450
sewardj151b90d2005-07-06 19:42:23 +00005451 /* Make a preliminary inspection of the statements, to see if there
5452 are any dodgy-looking literals. If there are, we generate
5453 extra-detailed (hence extra-expensive) instrumentation in
5454 places. Scan the whole bb even if dodgyness is found earlier,
5455 so that the flatness assertion is applied to all stmts. */
5456
5457 bogus = False;
sewardj95448072004-11-22 20:19:51 +00005458
sewardj1c0ce7a2009-07-01 08:10:49 +00005459 for (i = 0; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00005460
sewardj1c0ce7a2009-07-01 08:10:49 +00005461 st = sb_in->stmts[i];
sewardj21dc3452005-03-21 00:27:41 +00005462 tl_assert(st);
sewardj95448072004-11-22 20:19:51 +00005463 tl_assert(isFlatIRStmt(st));
5464
sewardj151b90d2005-07-06 19:42:23 +00005465 if (!bogus) {
5466 bogus = checkForBogusLiterals(st);
5467 if (0 && bogus) {
sewardj95448072004-11-22 20:19:51 +00005468 VG_(printf)("bogus: ");
5469 ppIRStmt(st);
5470 VG_(printf)("\n");
5471 }
5472 }
sewardjd5204dc2004-12-31 01:16:11 +00005473
sewardj151b90d2005-07-06 19:42:23 +00005474 }
5475
5476 mce.bogusLiterals = bogus;
5477
sewardja0871482006-10-18 12:41:55 +00005478 /* Copy verbatim any IR preamble preceding the first IMark */
sewardj151b90d2005-07-06 19:42:23 +00005479
sewardj1c0ce7a2009-07-01 08:10:49 +00005480 tl_assert(mce.sb == sb_out);
5481 tl_assert(mce.sb != sb_in);
sewardjf1962d32006-10-19 13:22:16 +00005482
sewardja0871482006-10-18 12:41:55 +00005483 i = 0;
sewardj1c0ce7a2009-07-01 08:10:49 +00005484 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
sewardja0871482006-10-18 12:41:55 +00005485
sewardj1c0ce7a2009-07-01 08:10:49 +00005486 st = sb_in->stmts[i];
sewardja0871482006-10-18 12:41:55 +00005487 tl_assert(st);
5488 tl_assert(isFlatIRStmt(st));
5489
sewardj1c0ce7a2009-07-01 08:10:49 +00005490 stmt( 'C', &mce, sb_in->stmts[i] );
sewardja0871482006-10-18 12:41:55 +00005491 i++;
5492 }
5493
sewardjf1962d32006-10-19 13:22:16 +00005494 /* Nasty problem. IR optimisation of the pre-instrumented IR may
5495 cause the IR following the preamble to contain references to IR
5496 temporaries defined in the preamble. Because the preamble isn't
5497 instrumented, these temporaries don't have any shadows.
5498 Nevertheless uses of them following the preamble will cause
5499 memcheck to generate references to their shadows. End effect is
5500 to cause IR sanity check failures, due to references to
5501 non-existent shadows. This is only evident for the complex
5502 preambles used for function wrapping on TOC-afflicted platforms
sewardj6e9de462011-06-28 07:25:29 +00005503 (ppc64-linux).
sewardjf1962d32006-10-19 13:22:16 +00005504
5505 The following loop therefore scans the preamble looking for
5506 assignments to temporaries. For each one found it creates an
sewardjafa617b2008-07-22 09:59:48 +00005507 assignment to the corresponding (V) shadow temp, marking it as
sewardjf1962d32006-10-19 13:22:16 +00005508 'defined'. This is the same resulting IR as if the main
5509 instrumentation loop before had been applied to the statement
5510 'tmp = CONSTANT'.
sewardjafa617b2008-07-22 09:59:48 +00005511
5512 Similarly, if origin tracking is enabled, we must generate an
5513 assignment for the corresponding origin (B) shadow, claiming
5514 no-origin, as appropriate for a defined value.
sewardjf1962d32006-10-19 13:22:16 +00005515 */
5516 for (j = 0; j < i; j++) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005517 if (sb_in->stmts[j]->tag == Ist_WrTmp) {
sewardj7cf4e6b2008-05-01 20:24:26 +00005518 /* findShadowTmpV checks its arg is an original tmp;
sewardjf1962d32006-10-19 13:22:16 +00005519 no need to assert that here. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005520 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
sewardjafa617b2008-07-22 09:59:48 +00005521 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00005522 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
sewardjafa617b2008-07-22 09:59:48 +00005523 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
5524 if (MC_(clo_mc_level) == 3) {
5525 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
sewardj1c0ce7a2009-07-01 08:10:49 +00005526 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
sewardjafa617b2008-07-22 09:59:48 +00005527 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
5528 }
sewardjf1962d32006-10-19 13:22:16 +00005529 if (0) {
sewardjafa617b2008-07-22 09:59:48 +00005530 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
5531 ppIRType( ty_v );
sewardjf1962d32006-10-19 13:22:16 +00005532 VG_(printf)("\n");
5533 }
5534 }
5535 }
5536
sewardja0871482006-10-18 12:41:55 +00005537 /* Iterate over the remaining stmts to generate instrumentation. */
5538
sewardj1c0ce7a2009-07-01 08:10:49 +00005539 tl_assert(sb_in->stmts_used > 0);
sewardja0871482006-10-18 12:41:55 +00005540 tl_assert(i >= 0);
sewardj1c0ce7a2009-07-01 08:10:49 +00005541 tl_assert(i < sb_in->stmts_used);
5542 tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
sewardja0871482006-10-18 12:41:55 +00005543
sewardj1c0ce7a2009-07-01 08:10:49 +00005544 for (/* use current i*/; i < sb_in->stmts_used; i++) {
sewardj151b90d2005-07-06 19:42:23 +00005545
sewardj1c0ce7a2009-07-01 08:10:49 +00005546 st = sb_in->stmts[i];
5547 first_stmt = sb_out->stmts_used;
sewardj95448072004-11-22 20:19:51 +00005548
5549 if (verboze) {
sewardj7cf4e6b2008-05-01 20:24:26 +00005550 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00005551 ppIRStmt(st);
sewardj7cf4e6b2008-05-01 20:24:26 +00005552 VG_(printf)("\n");
sewardj95448072004-11-22 20:19:51 +00005553 }
5554
sewardj1c0ce7a2009-07-01 08:10:49 +00005555 if (MC_(clo_mc_level) == 3) {
5556 /* See comments on case Ist_CAS below. */
5557 if (st->tag != Ist_CAS)
5558 schemeS( &mce, st );
5559 }
sewardj7cf4e6b2008-05-01 20:24:26 +00005560
sewardj29faa502005-03-16 18:20:21 +00005561 /* Generate instrumentation code for each stmt ... */
5562
sewardj95448072004-11-22 20:19:51 +00005563 switch (st->tag) {
5564
sewardj0b9d74a2006-12-24 02:24:11 +00005565 case Ist_WrTmp:
sewardj7cf4e6b2008-05-01 20:24:26 +00005566 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
5567 expr2vbits( &mce, st->Ist.WrTmp.data) );
njn25e49d8e72002-09-23 09:36:25 +00005568 break;
5569
sewardj95448072004-11-22 20:19:51 +00005570 case Ist_Put:
5571 do_shadow_PUT( &mce,
5572 st->Ist.Put.offset,
5573 st->Ist.Put.data,
florian434ffae2012-07-19 17:23:42 +00005574 NULL /* shadow atom */, NULL /* guard */ );
njn25e49d8e72002-09-23 09:36:25 +00005575 break;
5576
sewardj95448072004-11-22 20:19:51 +00005577 case Ist_PutI:
floriand39b0222012-05-31 15:48:13 +00005578 do_shadow_PUTI( &mce, st->Ist.PutI.details);
njn25e49d8e72002-09-23 09:36:25 +00005579 break;
5580
sewardj2e595852005-06-30 23:33:37 +00005581 case Ist_Store:
5582 do_shadow_Store( &mce, st->Ist.Store.end,
5583 st->Ist.Store.addr, 0/* addr bias */,
5584 st->Ist.Store.data,
sewardj1c0ce7a2009-07-01 08:10:49 +00005585 NULL /* shadow data */,
5586 NULL/*guard*/ );
njn25e49d8e72002-09-23 09:36:25 +00005587 break;
5588
sewardj95448072004-11-22 20:19:51 +00005589 case Ist_Exit:
florian434ffae2012-07-19 17:23:42 +00005590 complainIfUndefined( &mce, st->Ist.Exit.guard, NULL );
njn25e49d8e72002-09-23 09:36:25 +00005591 break;
5592
sewardj29faa502005-03-16 18:20:21 +00005593 case Ist_IMark:
sewardj7cf4e6b2008-05-01 20:24:26 +00005594 break;
5595
5596 case Ist_NoOp:
sewardj72d75132007-11-09 23:06:35 +00005597 case Ist_MBE:
sewardjbd598e12005-01-07 12:10:21 +00005598 break;
5599
sewardj95448072004-11-22 20:19:51 +00005600 case Ist_Dirty:
5601 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
njn25e49d8e72002-09-23 09:36:25 +00005602 break;
5603
sewardj826ec492005-05-12 18:05:00 +00005604 case Ist_AbiHint:
sewardj7cf4e6b2008-05-01 20:24:26 +00005605 do_AbiHint( &mce, st->Ist.AbiHint.base,
5606 st->Ist.AbiHint.len,
5607 st->Ist.AbiHint.nia );
sewardj826ec492005-05-12 18:05:00 +00005608 break;
5609
sewardj1c0ce7a2009-07-01 08:10:49 +00005610 case Ist_CAS:
5611 do_shadow_CAS( &mce, st->Ist.CAS.details );
5612 /* Note, do_shadow_CAS copies the CAS itself to the output
5613 block, because it needs to add instrumentation both
5614 before and after it. Hence skip the copy below. Also
5615 skip the origin-tracking stuff (call to schemeS) above,
5616 since that's all tangled up with it too; do_shadow_CAS
5617 does it all. */
5618 break;
5619
sewardjdb5907d2009-11-26 17:20:21 +00005620 case Ist_LLSC:
5621 do_shadow_LLSC( &mce,
5622 st->Ist.LLSC.end,
5623 st->Ist.LLSC.result,
5624 st->Ist.LLSC.addr,
5625 st->Ist.LLSC.storedata );
5626 break;
5627
njn25e49d8e72002-09-23 09:36:25 +00005628 default:
sewardj95448072004-11-22 20:19:51 +00005629 VG_(printf)("\n");
5630 ppIRStmt(st);
5631 VG_(printf)("\n");
5632 VG_(tool_panic)("memcheck: unhandled IRStmt");
5633
5634 } /* switch (st->tag) */
5635
sewardj7cf4e6b2008-05-01 20:24:26 +00005636 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005637 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00005638 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00005639 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00005640 VG_(printf)("\n");
5641 }
5642 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00005643 }
sewardj95448072004-11-22 20:19:51 +00005644
sewardj1c0ce7a2009-07-01 08:10:49 +00005645 /* ... and finally copy the stmt itself to the output. Except,
5646 skip the copy of IRCASs; see comments on case Ist_CAS
5647 above. */
5648 if (st->tag != Ist_CAS)
5649 stmt('C', &mce, st);
njn25e49d8e72002-09-23 09:36:25 +00005650 }
njn25e49d8e72002-09-23 09:36:25 +00005651
sewardj95448072004-11-22 20:19:51 +00005652 /* Now we need to complain if the jump target is undefined. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005653 first_stmt = sb_out->stmts_used;
njn25e49d8e72002-09-23 09:36:25 +00005654
sewardj95448072004-11-22 20:19:51 +00005655 if (verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005656 VG_(printf)("sb_in->next = ");
5657 ppIRExpr(sb_in->next);
sewardj95448072004-11-22 20:19:51 +00005658 VG_(printf)("\n\n");
5659 }
njn25e49d8e72002-09-23 09:36:25 +00005660
florian434ffae2012-07-19 17:23:42 +00005661 complainIfUndefined( &mce, sb_in->next, NULL );
njn25e49d8e72002-09-23 09:36:25 +00005662
sewardj7cf4e6b2008-05-01 20:24:26 +00005663 if (0 && verboze) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005664 for (j = first_stmt; j < sb_out->stmts_used; j++) {
sewardj95448072004-11-22 20:19:51 +00005665 VG_(printf)(" ");
sewardj1c0ce7a2009-07-01 08:10:49 +00005666 ppIRStmt(sb_out->stmts[j]);
sewardj95448072004-11-22 20:19:51 +00005667 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00005668 }
sewardj95448072004-11-22 20:19:51 +00005669 VG_(printf)("\n");
njn25e49d8e72002-09-23 09:36:25 +00005670 }
njn25e49d8e72002-09-23 09:36:25 +00005671
sewardj1c0ce7a2009-07-01 08:10:49 +00005672 /* If this fails, there's been some serious snafu with tmp management,
5673 that should be investigated. */
5674 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
5675 VG_(deleteXA)( mce.tmpMap );
5676
5677 tl_assert(mce.sb == sb_out);
5678 return sb_out;
sewardj95448072004-11-22 20:19:51 +00005679}
njn25e49d8e72002-09-23 09:36:25 +00005680
sewardj81651dc2007-08-28 06:05:20 +00005681/*------------------------------------------------------------*/
5682/*--- Post-tree-build final tidying ---*/
5683/*------------------------------------------------------------*/
5684
5685/* This exploits the observation that Memcheck often produces
5686 repeated conditional calls of the form
5687
sewardj7cf4e6b2008-05-01 20:24:26 +00005688 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
sewardj81651dc2007-08-28 06:05:20 +00005689
5690 with the same guard expression G guarding the same helper call.
5691 The second and subsequent calls are redundant. This usually
5692 results from instrumentation of guest code containing multiple
5693 memory references at different constant offsets from the same base
5694 register. After optimisation of the instrumentation, you get a
5695 test for the definedness of the base register for each memory
5696 reference, which is kinda pointless. MC_(final_tidy) therefore
5697 looks for such repeated calls and removes all but the first. */
5698
5699/* A struct for recording which (helper, guard) pairs we have already
5700 seen. */
5701typedef
5702 struct { void* entry; IRExpr* guard; }
5703 Pair;
5704
5705/* Return True if e1 and e2 definitely denote the same value (used to
5706 compare guards). Return False if unknown; False is the safe
5707 answer. Since guest registers and guest memory do not have the
5708 SSA property we must return False if any Gets or Loads appear in
5709 the expression. */
5710
5711static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
5712{
5713 if (e1->tag != e2->tag)
5714 return False;
5715 switch (e1->tag) {
5716 case Iex_Const:
5717 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
5718 case Iex_Binop:
5719 return e1->Iex.Binop.op == e2->Iex.Binop.op
5720 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
5721 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
5722 case Iex_Unop:
5723 return e1->Iex.Unop.op == e2->Iex.Unop.op
5724 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
5725 case Iex_RdTmp:
5726 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
5727 case Iex_Mux0X:
5728 return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
5729 && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
5730 && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX );
5731 case Iex_Qop:
5732 case Iex_Triop:
5733 case Iex_CCall:
5734 /* be lazy. Could define equality for these, but they never
5735 appear to be used. */
5736 return False;
5737 case Iex_Get:
5738 case Iex_GetI:
5739 case Iex_Load:
5740 /* be conservative - these may not give the same value each
5741 time */
5742 return False;
5743 case Iex_Binder:
5744 /* should never see this */
5745 /* fallthrough */
5746 default:
5747 VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
5748 ppIRExpr(e1);
5749 VG_(tool_panic)("memcheck:sameIRValue");
5750 return False;
5751 }
5752}
5753
5754/* See if 'pairs' already has an entry for (entry, guard). Return
5755 True if so. If not, add an entry. */
5756
5757static
5758Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
5759{
5760 Pair p;
5761 Pair* pp;
5762 Int i, n = VG_(sizeXA)( pairs );
5763 for (i = 0; i < n; i++) {
5764 pp = VG_(indexXA)( pairs, i );
5765 if (pp->entry == entry && sameIRValue(pp->guard, guard))
5766 return True;
5767 }
5768 p.guard = guard;
5769 p.entry = entry;
5770 VG_(addToXA)( pairs, &p );
5771 return False;
5772}
5773
florian11f3cc82012-10-21 02:19:35 +00005774static Bool is_helperc_value_checkN_fail ( const HChar* name )
sewardj81651dc2007-08-28 06:05:20 +00005775{
5776 return
sewardj7cf4e6b2008-05-01 20:24:26 +00005777 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
5778 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
5779 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
5780 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
5781 || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
5782 || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
5783 || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
5784 || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
sewardj81651dc2007-08-28 06:05:20 +00005785}
5786
5787IRSB* MC_(final_tidy) ( IRSB* sb_in )
5788{
5789 Int i;
5790 IRStmt* st;
5791 IRDirty* di;
5792 IRExpr* guard;
5793 IRCallee* cee;
5794 Bool alreadyPresent;
sewardj9c606bd2008-09-18 18:12:50 +00005795 XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
5796 VG_(free), sizeof(Pair) );
sewardj81651dc2007-08-28 06:05:20 +00005797 /* Scan forwards through the statements. Each time a call to one
5798 of the relevant helpers is seen, check if we have made a
5799 previous call to the same helper using the same guard
5800 expression, and if so, delete the call. */
5801 for (i = 0; i < sb_in->stmts_used; i++) {
5802 st = sb_in->stmts[i];
5803 tl_assert(st);
5804 if (st->tag != Ist_Dirty)
5805 continue;
5806 di = st->Ist.Dirty.details;
5807 guard = di->guard;
5808 if (!guard)
5809 continue;
5810 if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
5811 cee = di->cee;
5812 if (!is_helperc_value_checkN_fail( cee->name ))
5813 continue;
5814 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
5815 guard 'guard'. Check if we have already seen a call to this
5816 function with the same guard. If so, delete it. If not,
5817 add it to the set of calls we do know about. */
5818 alreadyPresent = check_or_add( pairs, guard, cee->addr );
5819 if (alreadyPresent) {
5820 sb_in->stmts[i] = IRStmt_NoOp();
5821 if (0) VG_(printf)("XX\n");
5822 }
5823 }
5824 VG_(deleteXA)( pairs );
5825 return sb_in;
5826}
5827
5828
sewardj7cf4e6b2008-05-01 20:24:26 +00005829/*------------------------------------------------------------*/
5830/*--- Origin tracking stuff ---*/
5831/*------------------------------------------------------------*/
5832
sewardj1c0ce7a2009-07-01 08:10:49 +00005833/* Almost identical to findShadowTmpV. */
sewardj7cf4e6b2008-05-01 20:24:26 +00005834static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
5835{
sewardj1c0ce7a2009-07-01 08:10:49 +00005836 TempMapEnt* ent;
5837 /* VG_(indexXA) range-checks 'orig', hence no need to check
5838 here. */
5839 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5840 tl_assert(ent->kind == Orig);
5841 if (ent->shadowB == IRTemp_INVALID) {
5842 IRTemp tmpB
5843 = newTemp( mce, Ity_I32, BSh );
5844 /* newTemp may cause mce->tmpMap to resize, hence previous results
5845 from VG_(indexXA) are invalid. */
5846 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5847 tl_assert(ent->kind == Orig);
5848 tl_assert(ent->shadowB == IRTemp_INVALID);
5849 ent->shadowB = tmpB;
sewardj7cf4e6b2008-05-01 20:24:26 +00005850 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005851 return ent->shadowB;
sewardj7cf4e6b2008-05-01 20:24:26 +00005852}
5853
5854static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
5855{
5856 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
5857}
5858
5859static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5860 IRAtom* baseaddr, Int offset )
5861{
5862 void* hFun;
florian6bd9dc12012-11-23 16:17:43 +00005863 const HChar* hName;
sewardj7cf4e6b2008-05-01 20:24:26 +00005864 IRTemp bTmp;
5865 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00005866 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00005867 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5868 IRAtom* ea = baseaddr;
5869 if (offset != 0) {
5870 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5871 : mkU64( (Long)(Int)offset );
5872 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5873 }
sewardj1c0ce7a2009-07-01 08:10:49 +00005874 bTmp = newTemp(mce, mce->hWordTy, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00005875
5876 switch (szB) {
5877 case 1: hFun = (void*)&MC_(helperc_b_load1);
5878 hName = "MC_(helperc_b_load1)";
5879 break;
5880 case 2: hFun = (void*)&MC_(helperc_b_load2);
5881 hName = "MC_(helperc_b_load2)";
5882 break;
5883 case 4: hFun = (void*)&MC_(helperc_b_load4);
5884 hName = "MC_(helperc_b_load4)";
5885 break;
5886 case 8: hFun = (void*)&MC_(helperc_b_load8);
5887 hName = "MC_(helperc_b_load8)";
5888 break;
5889 case 16: hFun = (void*)&MC_(helperc_b_load16);
5890 hName = "MC_(helperc_b_load16)";
5891 break;
sewardj45fa9f42012-05-21 10:18:10 +00005892 case 32: hFun = (void*)&MC_(helperc_b_load32);
5893 hName = "MC_(helperc_b_load32)";
5894 break;
sewardj7cf4e6b2008-05-01 20:24:26 +00005895 default:
5896 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
5897 tl_assert(0);
5898 }
5899 di = unsafeIRDirty_1_N(
5900 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
5901 mkIRExprVec_1( ea )
5902 );
5903 /* no need to mess with any annotations. This call accesses
5904 neither guest state nor guest memory. */
5905 stmt( 'B', mce, IRStmt_Dirty(di) );
5906 if (mce->hWordTy == Ity_I64) {
5907 /* 64-bit host */
sewardj1c0ce7a2009-07-01 08:10:49 +00005908 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
sewardj7cf4e6b2008-05-01 20:24:26 +00005909 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
5910 return mkexpr(bTmp32);
5911 } else {
5912 /* 32-bit host */
5913 return mkexpr(bTmp);
5914 }
5915}
sewardj1c0ce7a2009-07-01 08:10:49 +00005916
florian434ffae2012-07-19 17:23:42 +00005917static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr,
5918 Int offset, IRAtom* guard )
5919{
5920 if (guard) {
5921 IRAtom *cond, *iffalse, *iftrue;
5922
5923 cond = assignNew('B', mce, Ity_I8, unop(Iop_1Uto8, guard));
5924 iftrue = assignNew('B', mce, Ity_I32,
5925 gen_load_b(mce, szB, baseaddr, offset));
5926 iffalse = mkU32(0);
5927
5928 return assignNew('B', mce, Ity_I32, IRExpr_Mux0X(cond, iffalse, iftrue));
5929 }
5930
5931 return gen_load_b(mce, szB, baseaddr, offset);
5932}
5933
sewardj1c0ce7a2009-07-01 08:10:49 +00005934/* Generate a shadow store. guard :: Ity_I1 controls whether the
5935 store really happens; NULL means it unconditionally does. */
sewardj7cf4e6b2008-05-01 20:24:26 +00005936static void gen_store_b ( MCEnv* mce, Int szB,
sewardj1c0ce7a2009-07-01 08:10:49 +00005937 IRAtom* baseaddr, Int offset, IRAtom* dataB,
5938 IRAtom* guard )
sewardj7cf4e6b2008-05-01 20:24:26 +00005939{
5940 void* hFun;
florian6bd9dc12012-11-23 16:17:43 +00005941 const HChar* hName;
sewardj7cf4e6b2008-05-01 20:24:26 +00005942 IRDirty* di;
sewardj1c0ce7a2009-07-01 08:10:49 +00005943 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
sewardj7cf4e6b2008-05-01 20:24:26 +00005944 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5945 IRAtom* ea = baseaddr;
sewardj1c0ce7a2009-07-01 08:10:49 +00005946 if (guard) {
5947 tl_assert(isOriginalAtom(mce, guard));
5948 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
5949 }
sewardj7cf4e6b2008-05-01 20:24:26 +00005950 if (offset != 0) {
5951 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5952 : mkU64( (Long)(Int)offset );
5953 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5954 }
5955 if (mce->hWordTy == Ity_I64)
5956 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
5957
5958 switch (szB) {
5959 case 1: hFun = (void*)&MC_(helperc_b_store1);
5960 hName = "MC_(helperc_b_store1)";
5961 break;
5962 case 2: hFun = (void*)&MC_(helperc_b_store2);
5963 hName = "MC_(helperc_b_store2)";
5964 break;
5965 case 4: hFun = (void*)&MC_(helperc_b_store4);
5966 hName = "MC_(helperc_b_store4)";
5967 break;
5968 case 8: hFun = (void*)&MC_(helperc_b_store8);
5969 hName = "MC_(helperc_b_store8)";
5970 break;
5971 case 16: hFun = (void*)&MC_(helperc_b_store16);
5972 hName = "MC_(helperc_b_store16)";
5973 break;
sewardj45fa9f42012-05-21 10:18:10 +00005974 case 32: hFun = (void*)&MC_(helperc_b_store32);
5975 hName = "MC_(helperc_b_store32)";
5976 break;
sewardj7cf4e6b2008-05-01 20:24:26 +00005977 default:
5978 tl_assert(0);
5979 }
5980 di = unsafeIRDirty_0_N( 2/*regparms*/,
5981 hName, VG_(fnptr_to_fnentry)( hFun ),
5982 mkIRExprVec_2( ea, dataB )
5983 );
5984 /* no need to mess with any annotations. This call accesses
5985 neither guest state nor guest memory. */
sewardj1c0ce7a2009-07-01 08:10:49 +00005986 if (guard) di->guard = guard;
sewardj7cf4e6b2008-05-01 20:24:26 +00005987 stmt( 'B', mce, IRStmt_Dirty(di) );
5988}
5989
5990static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00005991 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00005992 if (eTy == Ity_I64)
5993 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
5994 if (eTy == Ity_I32)
5995 return e;
5996 tl_assert(0);
5997}
5998
5999static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006000 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
sewardj7cf4e6b2008-05-01 20:24:26 +00006001 tl_assert(eTy == Ity_I32);
6002 if (dstTy == Ity_I64)
6003 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
6004 tl_assert(0);
6005}
6006
sewardjdb5907d2009-11-26 17:20:21 +00006007
sewardj7cf4e6b2008-05-01 20:24:26 +00006008static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
6009{
6010 tl_assert(MC_(clo_mc_level) == 3);
6011
6012 switch (e->tag) {
6013
6014 case Iex_GetI: {
6015 IRRegArray* descr_b;
6016 IRAtom *t1, *t2, *t3, *t4;
6017 IRRegArray* descr = e->Iex.GetI.descr;
6018 IRType equivIntTy
6019 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
6020 /* If this array is unshadowable for whatever reason, use the
6021 usual approximation. */
6022 if (equivIntTy == Ity_INVALID)
6023 return mkU32(0);
6024 tl_assert(sizeofIRType(equivIntTy) >= 4);
6025 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
6026 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
6027 equivIntTy, descr->nElems );
6028 /* Do a shadow indexed get of the same size, giving t1. Take
6029 the bottom 32 bits of it, giving t2. Compute into t3 the
6030 origin for the index (almost certainly zero, but there's
6031 no harm in being completely general here, since iropt will
6032 remove any useless code), and fold it in, giving a final
6033 value t4. */
6034 t1 = assignNew( 'B', mce, equivIntTy,
6035 IRExpr_GetI( descr_b, e->Iex.GetI.ix,
6036 e->Iex.GetI.bias ));
6037 t2 = narrowTo32( mce, t1 );
6038 t3 = schemeE( mce, e->Iex.GetI.ix );
6039 t4 = gen_maxU32( mce, t2, t3 );
6040 return t4;
6041 }
6042 case Iex_CCall: {
6043 Int i;
6044 IRAtom* here;
6045 IRExpr** args = e->Iex.CCall.args;
6046 IRAtom* curr = mkU32(0);
6047 for (i = 0; args[i]; i++) {
6048 tl_assert(i < 32);
6049 tl_assert(isOriginalAtom(mce, args[i]));
6050 /* Only take notice of this arg if the callee's
6051 mc-exclusion mask does not say it is to be excluded. */
6052 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
6053 /* the arg is to be excluded from definedness checking.
6054 Do nothing. */
6055 if (0) VG_(printf)("excluding %s(%d)\n",
6056 e->Iex.CCall.cee->name, i);
6057 } else {
6058 /* calculate the arg's definedness, and pessimistically
6059 merge it in. */
6060 here = schemeE( mce, args[i] );
6061 curr = gen_maxU32( mce, curr, here );
6062 }
6063 }
6064 return curr;
6065 }
6066 case Iex_Load: {
6067 Int dszB;
6068 dszB = sizeofIRType(e->Iex.Load.ty);
6069 /* assert that the B value for the address is already
6070 available (somewhere) */
6071 tl_assert(isIRAtom(e->Iex.Load.addr));
6072 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
6073 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
6074 }
6075 case Iex_Mux0X: {
6076 IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond );
6077 IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 );
6078 IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX );
6079 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
6080 }
6081 case Iex_Qop: {
floriane2ab2972012-06-01 20:43:03 +00006082 IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 );
6083 IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 );
6084 IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 );
6085 IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 );
sewardj7cf4e6b2008-05-01 20:24:26 +00006086 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
6087 gen_maxU32( mce, b3, b4 ) );
6088 }
6089 case Iex_Triop: {
florian26441742012-06-02 20:30:41 +00006090 IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 );
6091 IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 );
6092 IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 );
sewardj7cf4e6b2008-05-01 20:24:26 +00006093 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
6094 }
6095 case Iex_Binop: {
sewardjafed4c52009-07-12 13:00:17 +00006096 switch (e->Iex.Binop.op) {
6097 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
6098 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
6099 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
6100 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
6101 /* Just say these all produce a defined result,
6102 regardless of their arguments. See
6103 COMMENT_ON_CasCmpEQ in this file. */
6104 return mkU32(0);
6105 default: {
6106 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
6107 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
6108 return gen_maxU32( mce, b1, b2 );
6109 }
6110 }
6111 tl_assert(0);
6112 /*NOTREACHED*/
sewardj7cf4e6b2008-05-01 20:24:26 +00006113 }
6114 case Iex_Unop: {
6115 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
6116 return b1;
6117 }
6118 case Iex_Const:
6119 return mkU32(0);
6120 case Iex_RdTmp:
6121 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
6122 case Iex_Get: {
6123 Int b_offset = MC_(get_otrack_shadow_offset)(
6124 e->Iex.Get.offset,
6125 sizeofIRType(e->Iex.Get.ty)
6126 );
6127 tl_assert(b_offset >= -1
6128 && b_offset <= mce->layout->total_sizeB -4);
6129 if (b_offset >= 0) {
6130 /* FIXME: this isn't an atom! */
6131 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
6132 Ity_I32 );
6133 }
6134 return mkU32(0);
6135 }
6136 default:
6137 VG_(printf)("mc_translate.c: schemeE: unhandled: ");
6138 ppIRExpr(e);
6139 VG_(tool_panic)("memcheck:schemeE");
6140 }
6141}
6142
sewardjdb5907d2009-11-26 17:20:21 +00006143
sewardj7cf4e6b2008-05-01 20:24:26 +00006144static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
6145{
6146 // This is a hacked version of do_shadow_Dirty
sewardj2eecb742012-06-01 16:11:41 +00006147 Int i, k, n, toDo, gSz, gOff;
sewardj7cf4e6b2008-05-01 20:24:26 +00006148 IRAtom *here, *curr;
6149 IRTemp dst;
sewardj7cf4e6b2008-05-01 20:24:26 +00006150
6151 /* First check the guard. */
6152 curr = schemeE( mce, d->guard );
6153
6154 /* Now round up all inputs and maxU32 over them. */
6155
florian434ffae2012-07-19 17:23:42 +00006156 /* Inputs: unmasked args
6157 Note: arguments are evaluated REGARDLESS of the guard expression */
sewardj7cf4e6b2008-05-01 20:24:26 +00006158 for (i = 0; d->args[i]; i++) {
6159 if (d->cee->mcx_mask & (1<<i)) {
6160 /* ignore this arg */
6161 } else {
6162 here = schemeE( mce, d->args[i] );
6163 curr = gen_maxU32( mce, curr, here );
6164 }
6165 }
6166
6167 /* Inputs: guest state that we read. */
6168 for (i = 0; i < d->nFxState; i++) {
6169 tl_assert(d->fxState[i].fx != Ifx_None);
6170 if (d->fxState[i].fx == Ifx_Write)
6171 continue;
6172
sewardj2eecb742012-06-01 16:11:41 +00006173 /* Enumerate the described state segments */
6174 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6175 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6176 gSz = d->fxState[i].size;
sewardj7cf4e6b2008-05-01 20:24:26 +00006177
sewardj2eecb742012-06-01 16:11:41 +00006178 /* Ignore any sections marked as 'always defined'. */
6179 if (isAlwaysDefd(mce, gOff, gSz)) {
6180 if (0)
6181 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
6182 gOff, gSz);
6183 continue;
sewardj7cf4e6b2008-05-01 20:24:26 +00006184 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006185
sewardj2eecb742012-06-01 16:11:41 +00006186 /* This state element is read or modified. So we need to
6187 consider it. If larger than 4 bytes, deal with it in
6188 4-byte chunks. */
6189 while (True) {
6190 Int b_offset;
6191 tl_assert(gSz >= 0);
6192 if (gSz == 0) break;
6193 n = gSz <= 4 ? gSz : 4;
6194 /* update 'curr' with maxU32 of the state slice
6195 gOff .. gOff+n-1 */
6196 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6197 if (b_offset != -1) {
florian434ffae2012-07-19 17:23:42 +00006198 /* Observe the guard expression. If it is false use 0, i.e.
6199 nothing is known about the origin */
6200 IRAtom *cond, *iffalse, *iftrue;
6201
6202 cond = assignNew( 'B', mce, Ity_I8, unop(Iop_1Uto8, d->guard));
6203 iffalse = mkU32(0);
6204 iftrue = assignNew( 'B', mce, Ity_I32,
6205 IRExpr_Get(b_offset
6206 + 2*mce->layout->total_sizeB,
6207 Ity_I32));
6208 here = assignNew( 'B', mce, Ity_I32,
6209 IRExpr_Mux0X(cond, iffalse, iftrue));
sewardj2eecb742012-06-01 16:11:41 +00006210 curr = gen_maxU32( mce, curr, here );
6211 }
6212 gSz -= n;
6213 gOff += n;
6214 }
6215 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006216 }
6217
6218 /* Inputs: memory */
6219
6220 if (d->mFx != Ifx_None) {
6221 /* Because we may do multiple shadow loads/stores from the same
6222 base address, it's best to do a single test of its
6223 definedness right now. Post-instrumentation optimisation
6224 should remove all but this test. */
6225 tl_assert(d->mAddr);
6226 here = schemeE( mce, d->mAddr );
6227 curr = gen_maxU32( mce, curr, here );
6228 }
6229
6230 /* Deal with memory inputs (reads or modifies) */
6231 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006232 toDo = d->mSize;
6233 /* chew off 32-bit chunks. We don't care about the endianness
6234 since it's all going to be condensed down to a single bit,
6235 but nevertheless choose an endianness which is hopefully
6236 native to the platform. */
6237 while (toDo >= 4) {
florian434ffae2012-07-19 17:23:42 +00006238 here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo,
6239 d->guard );
sewardj7cf4e6b2008-05-01 20:24:26 +00006240 curr = gen_maxU32( mce, curr, here );
6241 toDo -= 4;
6242 }
sewardj8c93fcc2008-10-30 13:08:31 +00006243 /* handle possible 16-bit excess */
6244 while (toDo >= 2) {
florian434ffae2012-07-19 17:23:42 +00006245 here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo,
6246 d->guard );
sewardj8c93fcc2008-10-30 13:08:31 +00006247 curr = gen_maxU32( mce, curr, here );
6248 toDo -= 2;
6249 }
floriancda994b2012-06-08 16:01:19 +00006250 /* chew off the remaining 8-bit chunk, if any */
6251 if (toDo == 1) {
florian434ffae2012-07-19 17:23:42 +00006252 here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo,
6253 d->guard );
floriancda994b2012-06-08 16:01:19 +00006254 curr = gen_maxU32( mce, curr, here );
6255 toDo -= 1;
6256 }
6257 tl_assert(toDo == 0);
sewardj7cf4e6b2008-05-01 20:24:26 +00006258 }
6259
6260 /* Whew! So curr is a 32-bit B-value which should give an origin
6261 of some use if any of the inputs to the helper are undefined.
6262 Now we need to re-distribute the results to all destinations. */
6263
6264 /* Outputs: the destination temporary, if there is one. */
6265 if (d->tmp != IRTemp_INVALID) {
6266 dst = findShadowTmpB(mce, d->tmp);
6267 assign( 'V', mce, dst, curr );
6268 }
6269
6270 /* Outputs: guest state that we write or modify. */
6271 for (i = 0; i < d->nFxState; i++) {
6272 tl_assert(d->fxState[i].fx != Ifx_None);
6273 if (d->fxState[i].fx == Ifx_Read)
6274 continue;
6275
sewardj2eecb742012-06-01 16:11:41 +00006276 /* Enumerate the described state segments */
6277 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6278 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6279 gSz = d->fxState[i].size;
sewardj7cf4e6b2008-05-01 20:24:26 +00006280
sewardj2eecb742012-06-01 16:11:41 +00006281 /* Ignore any sections marked as 'always defined'. */
6282 if (isAlwaysDefd(mce, gOff, gSz))
6283 continue;
6284
6285 /* This state element is written or modified. So we need to
6286 consider it. If larger than 4 bytes, deal with it in
6287 4-byte chunks. */
6288 while (True) {
6289 Int b_offset;
6290 tl_assert(gSz >= 0);
6291 if (gSz == 0) break;
6292 n = gSz <= 4 ? gSz : 4;
6293 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
6294 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6295 if (b_offset != -1) {
florian434ffae2012-07-19 17:23:42 +00006296 if (d->guard) {
6297 /* If the guard expression evaluates to false we simply Put
6298 the value that is already stored in the guest state slot */
6299 IRAtom *cond, *iffalse;
6300
6301 cond = assignNew('B', mce, Ity_I8,
6302 unop(Iop_1Uto8, d->guard));
6303 iffalse = assignNew('B', mce, Ity_I32,
6304 IRExpr_Get(b_offset +
6305 2*mce->layout->total_sizeB,
6306 Ity_I32));
6307 curr = assignNew('V', mce, Ity_I32,
6308 IRExpr_Mux0X(cond, iffalse, curr));
6309 }
sewardj2eecb742012-06-01 16:11:41 +00006310 stmt( 'B', mce, IRStmt_Put(b_offset
6311 + 2*mce->layout->total_sizeB,
6312 curr ));
6313 }
6314 gSz -= n;
6315 gOff += n;
sewardj7cf4e6b2008-05-01 20:24:26 +00006316 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006317 }
6318 }
6319
6320 /* Outputs: memory that we write or modify. Same comments about
6321 endianness as above apply. */
6322 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
sewardj7cf4e6b2008-05-01 20:24:26 +00006323 toDo = d->mSize;
6324 /* chew off 32-bit chunks */
6325 while (toDo >= 4) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006326 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
florian434ffae2012-07-19 17:23:42 +00006327 d->guard );
sewardj7cf4e6b2008-05-01 20:24:26 +00006328 toDo -= 4;
6329 }
sewardj8c93fcc2008-10-30 13:08:31 +00006330 /* handle possible 16-bit excess */
6331 while (toDo >= 2) {
sewardj1c0ce7a2009-07-01 08:10:49 +00006332 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
florian434ffae2012-07-19 17:23:42 +00006333 d->guard );
sewardj8c93fcc2008-10-30 13:08:31 +00006334 toDo -= 2;
6335 }
floriancda994b2012-06-08 16:01:19 +00006336 /* chew off the remaining 8-bit chunk, if any */
6337 if (toDo == 1) {
6338 gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr,
florian434ffae2012-07-19 17:23:42 +00006339 d->guard );
floriancda994b2012-06-08 16:01:19 +00006340 toDo -= 1;
6341 }
6342 tl_assert(toDo == 0);
sewardj7cf4e6b2008-05-01 20:24:26 +00006343 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006344}
6345
sewardjdb5907d2009-11-26 17:20:21 +00006346
6347static void do_origins_Store ( MCEnv* mce,
6348 IREndness stEnd,
6349 IRExpr* stAddr,
6350 IRExpr* stData )
6351{
6352 Int dszB;
6353 IRAtom* dataB;
6354 /* assert that the B value for the address is already available
6355 (somewhere), since the call to schemeE will want to see it.
6356 XXXX how does this actually ensure that?? */
6357 tl_assert(isIRAtom(stAddr));
6358 tl_assert(isIRAtom(stData));
6359 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
6360 dataB = schemeE( mce, stData );
6361 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB,
6362 NULL/*guard*/ );
6363}
6364
6365
sewardj7cf4e6b2008-05-01 20:24:26 +00006366static void schemeS ( MCEnv* mce, IRStmt* st )
6367{
6368 tl_assert(MC_(clo_mc_level) == 3);
6369
6370 switch (st->tag) {
6371
6372 case Ist_AbiHint:
6373 /* The value-check instrumenter handles this - by arranging
6374 to pass the address of the next instruction to
6375 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
6376 happen for origin tracking w.r.t. AbiHints. So there is
6377 nothing to do here. */
6378 break;
6379
6380 case Ist_PutI: {
floriand39b0222012-05-31 15:48:13 +00006381 IRPutI *puti = st->Ist.PutI.details;
sewardj7cf4e6b2008-05-01 20:24:26 +00006382 IRRegArray* descr_b;
6383 IRAtom *t1, *t2, *t3, *t4;
floriand39b0222012-05-31 15:48:13 +00006384 IRRegArray* descr = puti->descr;
sewardj7cf4e6b2008-05-01 20:24:26 +00006385 IRType equivIntTy
6386 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
6387 /* If this array is unshadowable for whatever reason,
6388 generate no code. */
6389 if (equivIntTy == Ity_INVALID)
6390 break;
6391 tl_assert(sizeofIRType(equivIntTy) >= 4);
6392 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
6393 descr_b
6394 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
6395 equivIntTy, descr->nElems );
6396 /* Compute a value to Put - the conjoinment of the origin for
6397 the data to be Put-ted (obviously) and of the index value
6398 (not so obviously). */
floriand39b0222012-05-31 15:48:13 +00006399 t1 = schemeE( mce, puti->data );
6400 t2 = schemeE( mce, puti->ix );
sewardj7cf4e6b2008-05-01 20:24:26 +00006401 t3 = gen_maxU32( mce, t1, t2 );
6402 t4 = zWidenFrom32( mce, equivIntTy, t3 );
floriand39b0222012-05-31 15:48:13 +00006403 stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix,
6404 puti->bias, t4) ));
sewardj7cf4e6b2008-05-01 20:24:26 +00006405 break;
6406 }
sewardjdb5907d2009-11-26 17:20:21 +00006407
sewardj7cf4e6b2008-05-01 20:24:26 +00006408 case Ist_Dirty:
6409 do_origins_Dirty( mce, st->Ist.Dirty.details );
6410 break;
sewardjdb5907d2009-11-26 17:20:21 +00006411
6412 case Ist_Store:
6413 do_origins_Store( mce, st->Ist.Store.end,
6414 st->Ist.Store.addr,
6415 st->Ist.Store.data );
6416 break;
6417
6418 case Ist_LLSC: {
6419 /* In short: treat a load-linked like a normal load followed
6420 by an assignment of the loaded (shadow) data the result
6421 temporary. Treat a store-conditional like a normal store,
6422 and mark the result temporary as defined. */
6423 if (st->Ist.LLSC.storedata == NULL) {
6424 /* Load Linked */
6425 IRType resTy
6426 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
6427 IRExpr* vanillaLoad
6428 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
6429 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
6430 || resTy == Ity_I16 || resTy == Ity_I8);
6431 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
6432 schemeE(mce, vanillaLoad));
6433 } else {
6434 /* Store conditional */
6435 do_origins_Store( mce, st->Ist.LLSC.end,
6436 st->Ist.LLSC.addr,
6437 st->Ist.LLSC.storedata );
6438 /* For the rationale behind this, see comments at the
6439 place where the V-shadow for .result is constructed, in
6440 do_shadow_LLSC. In short, we regard .result as
6441 always-defined. */
6442 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
6443 mkU32(0) );
sewardj1c0ce7a2009-07-01 08:10:49 +00006444 }
sewardj7cf4e6b2008-05-01 20:24:26 +00006445 break;
6446 }
sewardjdb5907d2009-11-26 17:20:21 +00006447
sewardj7cf4e6b2008-05-01 20:24:26 +00006448 case Ist_Put: {
6449 Int b_offset
6450 = MC_(get_otrack_shadow_offset)(
6451 st->Ist.Put.offset,
sewardj1c0ce7a2009-07-01 08:10:49 +00006452 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
sewardj7cf4e6b2008-05-01 20:24:26 +00006453 );
6454 if (b_offset >= 0) {
6455 /* FIXME: this isn't an atom! */
6456 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
6457 schemeE( mce, st->Ist.Put.data )) );
6458 }
6459 break;
6460 }
sewardjdb5907d2009-11-26 17:20:21 +00006461
sewardj7cf4e6b2008-05-01 20:24:26 +00006462 case Ist_WrTmp:
6463 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
6464 schemeE(mce, st->Ist.WrTmp.data) );
6465 break;
sewardjdb5907d2009-11-26 17:20:21 +00006466
sewardj7cf4e6b2008-05-01 20:24:26 +00006467 case Ist_MBE:
6468 case Ist_NoOp:
6469 case Ist_Exit:
6470 case Ist_IMark:
6471 break;
sewardjdb5907d2009-11-26 17:20:21 +00006472
sewardj7cf4e6b2008-05-01 20:24:26 +00006473 default:
6474 VG_(printf)("mc_translate.c: schemeS: unhandled: ");
6475 ppIRStmt(st);
6476 VG_(tool_panic)("memcheck:schemeS");
6477 }
6478}
6479
6480
njn25e49d8e72002-09-23 09:36:25 +00006481/*--------------------------------------------------------------------*/
njn25cac76cb2002-09-23 11:21:57 +00006482/*--- end mc_translate.c ---*/
njn25e49d8e72002-09-23 09:36:25 +00006483/*--------------------------------------------------------------------*/